From 53d1cd6b125fb9d69303516a1179ebc3b72f797a Mon Sep 17 00:00:00 2001 From: Abhishek Goel Date: Tue, 7 Nov 2017 15:17:55 +0530 Subject: [PATCH 001/333] cpupowerutils: bench - Fix cpu online check cpupower_is_cpu_online was incorrectly checking for 0. This patch fixes this by checking for 1 when the cpu is online. Signed-off-by: Abhishek Goel Signed-off-by: Shuah Khan --- tools/power/cpupower/bench/system.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/cpupower/bench/system.c b/tools/power/cpupower/bench/system.c index c25a74ae51ba..2bb3eef7d5c1 100644 --- a/tools/power/cpupower/bench/system.c +++ b/tools/power/cpupower/bench/system.c @@ -61,7 +61,7 @@ int set_cpufreq_governor(char *governor, unsigned int cpu) dprintf("set %s as cpufreq governor\n", governor); - if (cpupower_is_cpu_online(cpu) != 0) { + if (cpupower_is_cpu_online(cpu) != 1) { perror("cpufreq_cpu_exists"); fprintf(stderr, "error: cpu %u does not exist\n", cpu); return -1; From dbdc468f35ee827cab2753caa1c660bdb832243a Mon Sep 17 00:00:00 2001 From: Abhishek Goel Date: Wed, 15 Nov 2017 14:10:02 +0530 Subject: [PATCH 002/333] cpupower : Fix cpupower working when cpu0 is offline cpuidle_monitor used to assume that cpu0 is always online which is not a valid assumption on POWER machines. This patch fixes this by getting the cpu on which the current thread is running, instead of always using cpu0 for monitoring which may not be online. Signed-off-by: Abhishek Goel Signed-off-by: Shuah Khan --- tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c index 1b5da0066ebf..5b3205f16217 100644 --- a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c +++ b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c @@ -130,15 +130,18 @@ static struct cpuidle_monitor *cpuidle_register(void) { int num; char *tmp; + int this_cpu; + + this_cpu = sched_getcpu(); /* Assume idle state count is the same for all CPUs */ - cpuidle_sysfs_monitor.hw_states_num = cpuidle_state_count(0); + cpuidle_sysfs_monitor.hw_states_num = cpuidle_state_count(this_cpu); if (cpuidle_sysfs_monitor.hw_states_num <= 0) return NULL; for (num = 0; num < cpuidle_sysfs_monitor.hw_states_num; num++) { - tmp = cpuidle_state_name(0, num); + tmp = cpuidle_state_name(this_cpu, num); if (tmp == NULL) continue; @@ -146,7 +149,7 @@ static struct cpuidle_monitor *cpuidle_register(void) strncpy(cpuidle_cstates[num].name, tmp, CSTATE_NAME_LEN - 1); free(tmp); - tmp = cpuidle_state_desc(0, num); + tmp = cpuidle_state_desc(this_cpu, num); if (tmp == NULL) continue; strncpy(cpuidle_cstates[num].desc, tmp, CSTATE_DESC_LEN - 1); From 51c4b8bba674cfd2260d173602c4dac08e4c3a99 Mon Sep 17 00:00:00 2001 From: Liran Alon Date: Sun, 5 Nov 2017 16:11:30 +0200 Subject: [PATCH 003/333] KVM: x86: pvclock: Handle first-time write to pvclock-page contains random junk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When guest passes KVM it's pvclock-page GPA via WRMSR to MSR_KVM_SYSTEM_TIME / MSR_KVM_SYSTEM_TIME_NEW, KVM don't initialize pvclock-page to some start-values. It just requests a clock-update which will happen before entering to guest. The clock-update logic will call kvm_setup_pvclock_page() to update the pvclock-page with info. However, kvm_setup_pvclock_page() *wrongly* assumes that the version-field is initialized to an even number. This is wrong because at first-time write, field could be any-value. Fix simply makes sure that if first-time version-field is odd, increment it once more to make it even and only then start standard logic. This follows same logic as done in other pvclock shared-pages (See kvm_write_wall_clock() and record_steal_time()). Signed-off-by: Liran Alon Reviewed-by: Nikita Leshenko Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: Paolo Bonzini Cc: stable@vger.kernel.org Signed-off-by: Radim Krčmář --- arch/x86/kvm/x86.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 34c85aa2e2d1..1d492b3660d5 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1830,6 +1830,9 @@ static void kvm_setup_pvclock_page(struct kvm_vcpu *v) */ BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0); + if (guest_hv_clock.version & 1) + ++guest_hv_clock.version; /* first time write, random junk */ + vcpu->hv_clock.version = guest_hv_clock.version + 1; kvm_write_guest_cached(v->kvm, &vcpu->pv_time, &vcpu->hv_clock, From ac9b305caa0df6f5b75d294e4b86c1027648991e Mon Sep 17 00:00:00 2001 From: Liran Alon Date: Mon, 6 Nov 2017 16:15:10 +0200 Subject: [PATCH 004/333] KVM: nVMX/nSVM: Don't intercept #UD when running L2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When running L2, #UD should be intercepted by L1 or just forwarded directly to L2. It should not reach L0 x86 emulator. Therefore, set intercept for #UD only based on L1 exception-bitmap. Also add WARN_ON_ONCE() on L0 #UD intercept handlers to make sure it is never reached while running L2. This improves commit ae1f57670703 ("KVM: nVMX: Do not emulate #UD while in guest mode") by removing an unnecessary exit from L2 to L0 on #UD when L1 doesn't intercept it. In addition, SVM L0 #UD intercept handler doesn't handle correctly the case it is raised from L2. In this case, it should forward the #UD to guest instead of x86 emulator. As done in VMX #UD intercept handler. This commit fixes this issue as-well. Signed-off-by: Liran Alon Reviewed-by: Nikita Leshenko Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: Paolo Bonzini Reviewed-by: Wanpeng Li Signed-off-by: Radim Krčmář --- arch/x86/kvm/svm.c | 9 ++++++++- arch/x86/kvm/vmx.c | 9 ++++----- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 59e13a79c2e3..1bf7c09d97e6 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -361,6 +361,7 @@ static void recalc_intercepts(struct vcpu_svm *svm) { struct vmcb_control_area *c, *h; struct nested_state *g; + u32 h_intercept_exceptions; mark_dirty(svm->vmcb, VMCB_INTERCEPTS); @@ -371,9 +372,14 @@ static void recalc_intercepts(struct vcpu_svm *svm) h = &svm->nested.hsave->control; g = &svm->nested; + /* No need to intercept #UD if L1 doesn't intercept it */ + h_intercept_exceptions = + h->intercept_exceptions & ~(1U << UD_VECTOR); + c->intercept_cr = h->intercept_cr | g->intercept_cr; c->intercept_dr = h->intercept_dr | g->intercept_dr; - c->intercept_exceptions = h->intercept_exceptions | g->intercept_exceptions; + c->intercept_exceptions = + h_intercept_exceptions | g->intercept_exceptions; c->intercept = h->intercept | g->intercept; } @@ -2196,6 +2202,7 @@ static int ud_interception(struct vcpu_svm *svm) { int er; + WARN_ON_ONCE(is_guest_mode(&svm->vcpu)); er = emulate_instruction(&svm->vcpu, EMULTYPE_TRAP_UD); if (er != EMULATE_DONE) kvm_queue_exception(&svm->vcpu, UD_VECTOR); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 714a0673ec3c..d319e2666ad5 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1887,7 +1887,7 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) { u32 eb; - eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) | + eb = (1u << PF_VECTOR) | (1u << MC_VECTOR) | (1u << DB_VECTOR) | (1u << AC_VECTOR); if ((vcpu->guest_debug & (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) == @@ -1905,6 +1905,8 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) */ if (is_guest_mode(vcpu)) eb |= get_vmcs12(vcpu)->exception_bitmap; + else + eb |= 1u << UD_VECTOR; vmcs_write32(EXCEPTION_BITMAP, eb); } @@ -5915,10 +5917,7 @@ static int handle_exception(struct kvm_vcpu *vcpu) return 1; /* already handled by vmx_vcpu_run() */ if (is_invalid_opcode(intr_info)) { - if (is_guest_mode(vcpu)) { - kvm_queue_exception(vcpu, UD_VECTOR); - return 1; - } + WARN_ON_ONCE(is_guest_mode(vcpu)); er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD); if (er != EMULATE_DONE) kvm_queue_exception(vcpu, UD_VECTOR); From 61cb57c9ed631c95b54f8e9090c89d18b3695b3c Mon Sep 17 00:00:00 2001 From: Liran Alon Date: Sun, 5 Nov 2017 16:56:32 +0200 Subject: [PATCH 005/333] KVM: x86: Exit to user-mode on #UD intercept when emulator requires MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instruction emulation after trapping a #UD exception can result in an MMIO access, for example when emulating a MOVBE on a processor that doesn't support the instruction. In this case, the #UD vmexit handler must exit to user mode, but there wasn't any code to do so. Add it for both VMX and SVM. Signed-off-by: Liran Alon Reviewed-by: Nikita Leshenko Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: Wanpeng Li Reviewed-by: Paolo Bonzini Cc: stable@vger.kernel.org Signed-off-by: Radim Krčmář --- arch/x86/kvm/svm.c | 2 ++ arch/x86/kvm/vmx.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 1bf7c09d97e6..eb714f1cdf7e 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2204,6 +2204,8 @@ static int ud_interception(struct vcpu_svm *svm) WARN_ON_ONCE(is_guest_mode(&svm->vcpu)); er = emulate_instruction(&svm->vcpu, EMULTYPE_TRAP_UD); + if (er == EMULATE_USER_EXIT) + return 0; if (er != EMULATE_DONE) kvm_queue_exception(&svm->vcpu, UD_VECTOR); return 1; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index d319e2666ad5..65f1f06f6aaa 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5919,6 +5919,8 @@ static int handle_exception(struct kvm_vcpu *vcpu) if (is_invalid_opcode(intr_info)) { WARN_ON_ONCE(is_guest_mode(vcpu)); er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD); + if (er == EMULATE_USER_EXIT) + return 0; if (er != EMULATE_DONE) kvm_queue_exception(vcpu, UD_VECTOR); return 1; From 1f4dcb3b213235e642088709a1c54964d23365e9 Mon Sep 17 00:00:00 2001 From: Liran Alon Date: Sun, 5 Nov 2017 16:56:33 +0200 Subject: [PATCH 006/333] KVM: x86: emulator: Return to user-mode on L1 CPL=0 emulation failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On this case, handle_emulation_failure() fills kvm_run with internal-error information which it expects to be delivered to user-mode for further processing. However, the code reports a wrong return-value which makes KVM to never return to user-mode on this scenario. Fixes: 6d77dbfc88e3 ("KVM: inject #UD if instruction emulation fails and exit to userspace") Signed-off-by: Liran Alon Reviewed-by: Nikita Leshenko Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: Wanpeng Li Signed-off-by: Radim Krčmář --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1d492b3660d5..e5a7c53a19c6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5433,7 +5433,7 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu) vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; vcpu->run->internal.ndata = 0; - r = EMULATE_FAIL; + r = EMULATE_USER_EXIT; } kvm_queue_exception(vcpu, UD_VECTOR); From 9b8ae63798cb97e785a667ff27e43fa6220cb734 Mon Sep 17 00:00:00 2001 From: Liran Alon Date: Sun, 5 Nov 2017 16:56:34 +0200 Subject: [PATCH 007/333] KVM: x86: Don't re-execute instruction when not passing CR2 value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In case of instruction-decode failure or emulation failure, x86_emulate_instruction() will call reexecute_instruction() which will attempt to use the cr2 value passed to x86_emulate_instruction(). However, when x86_emulate_instruction() is called from emulate_instruction(), cr2 is not passed (passed as 0) and therefore it doesn't make sense to execute reexecute_instruction() logic at all. Fixes: 51d8b66199e9 ("KVM: cleanup emulate_instruction") Signed-off-by: Liran Alon Reviewed-by: Nikita Leshenko Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: Wanpeng Li Signed-off-by: Radim Krčmář --- arch/x86/include/asm/kvm_host.h | 3 ++- arch/x86/kvm/vmx.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 1bfb99770c34..977de5fb968b 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1161,7 +1161,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, static inline int emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type) { - return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0); + return x86_emulate_instruction(vcpu, 0, + emulation_type | EMULTYPE_NO_REEXECUTE, NULL, 0); } void kvm_enable_efer_bits(u64); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 65f1f06f6aaa..6e4a0f822766 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6603,7 +6603,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) if (kvm_test_request(KVM_REQ_EVENT, vcpu)) return 1; - err = emulate_instruction(vcpu, EMULTYPE_NO_REEXECUTE); + err = emulate_instruction(vcpu, 0); if (err == EMULATE_USER_EXIT) { ++vcpu->stat.mmio_exits; From 3853be2603191829b442b64dac6ae8ba0c027bf9 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Sun, 5 Nov 2017 16:54:47 -0800 Subject: [PATCH 008/333] KVM: X86: Fix operand/address-size during instruction decoding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pedro reported: During tests that we conducted on KVM, we noticed that executing a "PUSH %ES" instruction under KVM produces different results on both memory and the SP register depending on whether EPT support is enabled. With EPT the SP is reduced by 4 bytes (and the written value is 0-padded) but without EPT support it is only reduced by 2 bytes. The difference can be observed when the CS.DB field is 1 (32-bit) but not when it's 0 (16-bit). The internal segment descriptor cache exist even in real/vm8096 mode. The CS.D also should be respected instead of just default operand/address-size/66H prefix/67H prefix during instruction decoding. This patch fixes it by also adjusting operand/address-size according to CS.D. Reported-by: Pedro Fonseca Tested-by: Pedro Fonseca Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Nadav Amit Cc: Pedro Fonseca Signed-off-by: Wanpeng Li Reviewed-by: Paolo Bonzini Signed-off-by: Radim Krčmář --- arch/x86/kvm/emulate.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 8079d141792a..b4a87debbb4b 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -5000,6 +5000,8 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) bool op_prefix = false; bool has_seg_override = false; struct opcode opcode; + u16 dummy; + struct desc_struct desc; ctxt->memop.type = OP_NONE; ctxt->memopp = NULL; @@ -5018,6 +5020,11 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) switch (mode) { case X86EMUL_MODE_REAL: case X86EMUL_MODE_VM86: + def_op_bytes = def_ad_bytes = 2; + ctxt->ops->get_segment(ctxt, &dummy, &desc, NULL, VCPU_SREG_CS); + if (desc.d) + def_op_bytes = def_ad_bytes = 4; + break; case X86EMUL_MODE_PROT16: def_op_bytes = def_ad_bytes = 2; break; From f1b026a3310a441f504640dd3d9765eb533386b8 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Sun, 5 Nov 2017 16:54:48 -0800 Subject: [PATCH 009/333] KVM: nVMX: Validate the IA32_BNDCFGS on nested VM-entry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to the SDM, if the "load IA32_BNDCFGS" VM-entry controls is 1, the following checks are performed on the field for the IA32_BNDCFGS MSR: - Bits reserved in the IA32_BNDCFGS MSR must be 0. - The linear address in bits 63:12 must be canonical. Reviewed-by: Konrad Rzeszutek Wilk Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Jim Mattson Signed-off-by: Wanpeng Li Reviewed-by: Paolo Bonzini Signed-off-by: Radim Krčmář --- arch/x86/kvm/vmx.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 6e4a0f822766..707aaa954b3d 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -10876,6 +10876,11 @@ static int check_vmentry_postreqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, return 1; } + if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS) && + (is_noncanonical_address(vmcs12->guest_bndcfgs & PAGE_MASK, vcpu) || + (vmcs12->guest_bndcfgs & MSR_IA32_BNDCFGS_RSVD))) + return 1; + return 0; } From 5af4157388adad82c339e3742fb6b67840721347 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Sun, 5 Nov 2017 16:54:49 -0800 Subject: [PATCH 010/333] KVM: nVMX: Fix mmu context after VMLAUNCH/VMRESUME failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 4f350c6dbcb (kvm: nVMX: Handle deferred early VMLAUNCH/VMRESUME failure properly) can result in L1(run kvm-unit-tests/run_tests.sh vmx_controls in L1) null pointer deference and also L0 calltrace when EPT=0 on both L0 and L1. In L1: BUG: unable to handle kernel paging request at ffffffffc015bf8f IP: vmx_vcpu_run+0x202/0x510 [kvm_intel] PGD 146e13067 P4D 146e13067 PUD 146e15067 PMD 3d2686067 PTE 3d4af9161 Oops: 0003 [#1] PREEMPT SMP CPU: 2 PID: 1798 Comm: qemu-system-x86 Not tainted 4.14.0-rc4+ #6 RIP: 0010:vmx_vcpu_run+0x202/0x510 [kvm_intel] Call Trace: WARNING: kernel stack frame pointer at ffffb86f4988bc18 in qemu-system-x86:1798 has bad value 0000000000000002 In L0: -----------[ cut here ]------------ WARNING: CPU: 6 PID: 4460 at /home/kernel/linux/arch/x86/kvm//vmx.c:9845 vmx_inject_page_fault_nested+0x130/0x140 [kvm_intel] CPU: 6 PID: 4460 Comm: qemu-system-x86 Tainted: G OE 4.14.0-rc7+ #25 RIP: 0010:vmx_inject_page_fault_nested+0x130/0x140 [kvm_intel] Call Trace: paging64_page_fault+0x500/0xde0 [kvm] ? paging32_gva_to_gpa_nested+0x120/0x120 [kvm] ? nonpaging_page_fault+0x3b0/0x3b0 [kvm] ? __asan_storeN+0x12/0x20 ? paging64_gva_to_gpa+0xb0/0x120 [kvm] ? paging64_walk_addr_generic+0x11a0/0x11a0 [kvm] ? lock_acquire+0x2c0/0x2c0 ? vmx_read_guest_seg_ar+0x97/0x100 [kvm_intel] ? vmx_get_segment+0x2a6/0x310 [kvm_intel] ? sched_clock+0x1f/0x30 ? check_chain_key+0x137/0x1e0 ? __lock_acquire+0x83c/0x2420 ? kvm_multiple_exception+0xf2/0x220 [kvm] ? debug_check_no_locks_freed+0x240/0x240 ? debug_smp_processor_id+0x17/0x20 ? __lock_is_held+0x9e/0x100 kvm_mmu_page_fault+0x90/0x180 [kvm] kvm_handle_page_fault+0x15c/0x310 [kvm] ? __lock_is_held+0x9e/0x100 handle_exception+0x3c7/0x4d0 [kvm_intel] vmx_handle_exit+0x103/0x1010 [kvm_intel] ? kvm_arch_vcpu_ioctl_run+0x1628/0x2e20 [kvm] The commit avoids to load host state of vmcs12 as vmcs01's guest state since vmcs12 is not modified (except for the VM-instruction error field) if the checking of vmcs control area fails. However, the mmu context is switched to nested mmu in prepare_vmcs02() and it will not be reloaded since load_vmcs12_host_state() is skipped when nested VMLAUNCH/VMRESUME fails. This patch fixes it by reloading mmu context when nested VMLAUNCH/VMRESUME fails. Reviewed-by: Jim Mattson Reviewed-by: Krish Sadhukhan Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Jim Mattson Signed-off-by: Wanpeng Li Reviewed-by: Paolo Bonzini Signed-off-by: Radim Krčmář --- arch/x86/kvm/vmx.c | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 707aaa954b3d..10474d26a000 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -11330,6 +11330,24 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, kvm_clear_interrupt_queue(vcpu); } +static void load_vmcs12_mmu_host_state(struct kvm_vcpu *vcpu, + struct vmcs12 *vmcs12) +{ + u32 entry_failure_code; + + nested_ept_uninit_mmu_context(vcpu); + + /* + * Only PDPTE load can fail as the value of cr3 was checked on entry and + * couldn't have changed. + */ + if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, &entry_failure_code)) + nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL); + + if (!enable_ept) + vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault; +} + /* * A part of what we need to when the nested L2 guest exits and we want to * run its L1 parent, is to reset L1's guest state to the host state specified @@ -11343,7 +11361,6 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { struct kvm_segment seg; - u32 entry_failure_code; if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) vcpu->arch.efer = vmcs12->host_ia32_efer; @@ -11370,17 +11387,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK); vmx_set_cr4(vcpu, vmcs12->host_cr4); - nested_ept_uninit_mmu_context(vcpu); - - /* - * Only PDPTE load can fail as the value of cr3 was checked on entry and - * couldn't have changed. - */ - if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, &entry_failure_code)) - nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL); - - if (!enable_ept) - vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault; + load_vmcs12_mmu_host_state(vcpu, vmcs12); if (enable_vpid) { /* @@ -11610,6 +11617,9 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, * accordingly. */ nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); + + load_vmcs12_mmu_host_state(vcpu, vmcs12); + /* * The emulated instruction was already skipped in * nested_vmx_run, but the updated RIP was never From 4d772cb85f64c16eca00177089ecb3cd5d292120 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 7 Nov 2017 18:04:05 +0100 Subject: [PATCH 011/333] KVM: x86: fix em_fxstor() sleeping while in atomic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 9d643f63128b ("KVM: x86: avoid large stack allocations in em_fxrstor") optimize the stack size, but introduced a guest memory access which might sleep while in atomic. Fix it by introducing, again, a second fxregs_state. Try to avoid large stacks by using noinline. Add some helpful comments. Reported by syzbot: in_atomic(): 1, irqs_disabled(): 0, pid: 2909, name: syzkaller879109 2 locks held by syzkaller879109/2909: #0: (&vcpu->mutex){+.+.}, at: [] vcpu_load+0x1c/0x70 arch/x86/kvm/../../../virt/kvm/kvm_main.c:154 #1: (&kvm->srcu){....}, at: [] vcpu_enter_guest arch/x86/kvm/x86.c:6983 [inline] #1: (&kvm->srcu){....}, at: [] vcpu_run arch/x86/kvm/x86.c:7061 [inline] #1: (&kvm->srcu){....}, at: [] kvm_arch_vcpu_ioctl_run+0x1bc2/0x58b0 arch/x86/kvm/x86.c:7222 CPU: 1 PID: 2909 Comm: syzkaller879109 Not tainted 4.13.0-rc4-next-20170811 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:16 [inline] dump_stack+0x194/0x257 lib/dump_stack.c:52 ___might_sleep+0x2b2/0x470 kernel/sched/core.c:6014 __might_sleep+0x95/0x190 kernel/sched/core.c:5967 __might_fault+0xab/0x1d0 mm/memory.c:4383 __copy_from_user include/linux/uaccess.h:71 [inline] __kvm_read_guest_page+0x58/0xa0 arch/x86/kvm/../../../virt/kvm/kvm_main.c:1771 kvm_vcpu_read_guest_page+0x44/0x60 arch/x86/kvm/../../../virt/kvm/kvm_main.c:1791 kvm_read_guest_virt_helper+0x76/0x140 arch/x86/kvm/x86.c:4407 kvm_read_guest_virt_system+0x3c/0x50 arch/x86/kvm/x86.c:4466 segmented_read_std+0x10c/0x180 arch/x86/kvm/emulate.c:819 em_fxrstor+0x27b/0x410 arch/x86/kvm/emulate.c:4022 x86_emulate_insn+0x55d/0x3c50 arch/x86/kvm/emulate.c:5471 x86_emulate_instruction+0x411/0x1ca0 arch/x86/kvm/x86.c:5698 kvm_mmu_page_fault+0x18b/0x2c0 arch/x86/kvm/mmu.c:4854 handle_ept_violation+0x1fc/0x5e0 arch/x86/kvm/vmx.c:6400 vmx_handle_exit+0x281/0x1ab0 arch/x86/kvm/vmx.c:8718 vcpu_enter_guest arch/x86/kvm/x86.c:6999 [inline] vcpu_run arch/x86/kvm/x86.c:7061 [inline] kvm_arch_vcpu_ioctl_run+0x1cee/0x58b0 arch/x86/kvm/x86.c:7222 kvm_vcpu_ioctl+0x64c/0x1010 arch/x86/kvm/../../../virt/kvm/kvm_main.c:2591 vfs_ioctl fs/ioctl.c:45 [inline] do_vfs_ioctl+0x1b1/0x1520 fs/ioctl.c:685 SYSC_ioctl fs/ioctl.c:700 [inline] SyS_ioctl+0x8f/0xc0 fs/ioctl.c:691 entry_SYSCALL_64_fastpath+0x1f/0xbe RIP: 0033:0x437fc9 RSP: 002b:00007ffc7b4d5ab8 EFLAGS: 00000206 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00000000004002b0 RCX: 0000000000437fc9 RDX: 0000000000000000 RSI: 000000000000ae80 RDI: 0000000000000005 RBP: 0000000000000086 R08: 0000000000000000 R09: 0000000020ae8000 R10: 0000000000009120 R11: 0000000000000206 R12: 0000000000000000 R13: 0000000000000004 R14: 0000000000000004 R15: 0000000020077000 Fixes: 9d643f63128b ("KVM: x86: avoid large stack allocations in em_fxrstor") Signed-off-by: David Hildenbrand Signed-off-by: Radim Krčmář --- arch/x86/kvm/emulate.c | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index b4a87debbb4b..e7d04d0c8008 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -4014,6 +4014,26 @@ static int em_fxsave(struct x86_emulate_ctxt *ctxt) fxstate_size(ctxt)); } +/* + * FXRSTOR might restore XMM registers not provided by the guest. Fill + * in the host registers (via FXSAVE) instead, so they won't be modified. + * (preemption has to stay disabled until FXRSTOR). + * + * Use noinline to keep the stack for other functions called by callers small. + */ +static noinline int fxregs_fixup(struct fxregs_state *fx_state, + const size_t used_size) +{ + struct fxregs_state fx_tmp; + int rc; + + rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_tmp)); + memcpy((void *)fx_state + used_size, (void *)&fx_tmp + used_size, + __fxstate_size(16) - used_size); + + return rc; +} + static int em_fxrstor(struct x86_emulate_ctxt *ctxt) { struct fxregs_state fx_state; @@ -4024,19 +4044,19 @@ static int em_fxrstor(struct x86_emulate_ctxt *ctxt) if (rc != X86EMUL_CONTINUE) return rc; + size = fxstate_size(ctxt); + rc = segmented_read_std(ctxt, ctxt->memop.addr.mem, &fx_state, size); + if (rc != X86EMUL_CONTINUE) + return rc; + ctxt->ops->get_fpu(ctxt); - size = fxstate_size(ctxt); if (size < __fxstate_size(16)) { - rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state)); + rc = fxregs_fixup(&fx_state, size); if (rc != X86EMUL_CONTINUE) goto out; } - rc = segmented_read_std(ctxt, ctxt->memop.addr.mem, &fx_state, size); - if (rc != X86EMUL_CONTINUE) - goto out; - if (fx_state.mxcsr >> 16) { rc = emulate_gp(ctxt, 0); goto out; From fab0aa3b776f0a3af1db1f50e04f1884015f9082 Mon Sep 17 00:00:00 2001 From: Eyal Moscovici Date: Wed, 8 Nov 2017 14:32:08 +0200 Subject: [PATCH 012/333] KVM: x86: Allow suppressing prints on RDMSR/WRMSR of unhandled MSRs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some guests use these unhandled MSRs very frequently. This cause dmesg to be populated with lots of aggregated messages on usage of ignored MSRs. As ignore_msrs=true means that the user is well-aware his guest use ignored MSRs, allow to also disable the prints on their usage. An example of such guest is ESXi which tends to access a lot to MSR 0x34 (MSR_SMI_COUNT) very frequently. In addition, we have observed this to cause unnecessary delays to guest execution. Such an example is ESXi which experience networking delays in it's guests (L2 guests) because of these prints (even when prints are rate-limited). This can easily be reproduced by pinging from one L2 guest to another. Once in a while, a peak in ping RTT will be observed. Removing these unhandled MSR prints solves the issue. Because these prints can help diagnose issues with guests, this commit only suppress them by a module parameter instead of removing them from code entirely. Signed-off-by: Eyal Moscovici Reviewed-by: Liran Alon Reviewed-by: Krish Sadhukhan Signed-off-by: Krish Sadhukhan Signed-off-by: Konrad Rzeszutek Wilk [Changed suppress_ignore_msrs_prints to report_ignored_msrs - Radim] Signed-off-by: Radim Krčmář --- arch/x86/kvm/x86.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e5a7c53a19c6..0c5b141927b8 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -107,6 +107,9 @@ EXPORT_SYMBOL_GPL(kvm_x86_ops); static bool __read_mostly ignore_msrs = 0; module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR); +static bool __read_mostly report_ignored_msrs = true; +module_param(report_ignored_msrs, bool, S_IRUGO | S_IWUSR); + unsigned int min_timer_period_us = 500; module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR); @@ -2325,7 +2328,9 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) /* Drop writes to this legacy MSR -- see rdmsr * counterpart for further detail. */ - vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n", msr, data); + if (report_ignored_msrs) + vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n", + msr, data); break; case MSR_AMD64_OSVW_ID_LENGTH: if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW)) @@ -2362,8 +2367,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr, data); return 1; } else { - vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n", - msr, data); + if (report_ignored_msrs) + vcpu_unimpl(vcpu, + "ignored wrmsr: 0x%x data 0x%llx\n", + msr, data); break; } } @@ -2581,7 +2588,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->index); return 1; } else { - vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n", msr_info->index); + if (report_ignored_msrs) + vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n", + msr_info->index); msr_info->data = 0; } break; From 6ea6e84309ca7e0e850b3083e6b09344ee15c290 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 10 Nov 2017 10:49:38 +0100 Subject: [PATCH 013/333] KVM: x86: inject exceptions produced by x86_decode_insn MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sometimes, a processor might execute an instruction while another processor is updating the page tables for that instruction's code page, but before the TLB shootdown completes. The interesting case happens if the page is in the TLB. In general, the processor will succeed in executing the instruction and nothing bad happens. However, what if the instruction is an MMIO access? If *that* happens, KVM invokes the emulator, and the emulator gets the updated page tables. If the update side had marked the code page as non present, the page table walk then will fail and so will x86_decode_insn. Unfortunately, even though kvm_fetch_guest_virt is correctly returning X86EMUL_PROPAGATE_FAULT, x86_decode_insn's caller treats the failure as a fatal error if the instruction cannot simply be reexecuted (as is the case for MMIO). And this in fact happened sometimes when rebooting Windows 2012r2 guests. Just checking ctxt->have_exception and injecting the exception if true is enough to fix the case. Thanks to Eduardo Habkost for helping in the debugging of this issue. Reported-by: Yanan Fu Cc: Eduardo Habkost Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Radim Krčmář --- arch/x86/kvm/x86.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0c5b141927b8..4552427105f6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5734,6 +5734,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, if (reexecute_instruction(vcpu, cr2, write_fault_to_spt, emulation_type)) return EMULATE_DONE; + if (ctxt->have_exception && inject_emulated_exception(vcpu)) + return EMULATE_DONE; if (emulation_type & EMULTYPE_SKIP) return EMULATE_FAIL; return handle_emulation_failure(vcpu); From 0fc5a36dd6b345eb0d251a65c236e53bead3eef7 Mon Sep 17 00:00:00 2001 From: Nikita Leshenko Date: Sun, 5 Nov 2017 15:52:29 +0200 Subject: [PATCH 014/333] KVM: x86: ioapic: Fix level-triggered EOI and IOAPIC reconfigure race MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit KVM uses ioapic_handled_vectors to track vectors that need to notify the IOAPIC on EOI. The problem is that IOAPIC can be reconfigured while an interrupt with old configuration is pending or running and ioapic_handled_vectors only remembers the newest configuration; thus EOI from the old interrupt is not delievered to the IOAPIC. A previous commit db2bdcbbbd32 ("KVM: x86: fix edge EOI and IOAPIC reconfig race") addressed this issue by adding pending edge-triggered interrupts to ioapic_handled_vectors, fixing this race for edge-triggered interrupts. The commit explicitly ignored level-triggered interrupts, but this race applies to them as well: 1) IOAPIC sends a level triggered interrupt vector to VCPU0 2) VCPU0's handler deasserts the irq line and reconfigures the IOAPIC to route the vector to VCPU1. The reconfiguration rewrites only the upper 32 bits of the IOREDTBLn register. (Causes KVM to update ioapic_handled_vectors for VCPU0 and it no longer includes the vector.) 3) VCPU0 sends EOI for the vector, but it's not delievered to the IOAPIC because the ioapic_handled_vectors doesn't include the vector. 4) New interrupts are not delievered to VCPU1 because remote_irr bit is set forever. Therefore, the correct behavior is to add all pending and running interrupts to ioapic_handled_vectors. This commit introduces a slight performance hit similar to commit db2bdcbbbd32 ("KVM: x86: fix edge EOI and IOAPIC reconfig race") for the rare case that the vector is reused by a non-IOAPIC source on VCPU0. We prefer to keep solution simple and not handle this case just as the original commit does. Fixes: db2bdcbbbd32 ("KVM: x86: fix edge EOI and IOAPIC reconfig race") Signed-off-by: Nikita Leshenko Reviewed-by: Liran Alon Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Radim Krčmář --- arch/x86/kvm/ioapic.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index bdff437acbcb..ae0a7dc318b2 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -257,8 +257,7 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors) index == RTC_GSI) { if (kvm_apic_match_dest(vcpu, NULL, 0, e->fields.dest_id, e->fields.dest_mode) || - (e->fields.trig_mode == IOAPIC_EDGE_TRIG && - kvm_apic_pending_eoi(vcpu, e->fields.vector))) + kvm_apic_pending_eoi(vcpu, e->fields.vector)) __set_bit(e->fields.vector, ioapic_handled_vectors); } From da3fe7bdfada217bf02ecd0477fcdb55da50944c Mon Sep 17 00:00:00 2001 From: Nikita Leshenko Date: Sun, 5 Nov 2017 15:52:30 +0200 Subject: [PATCH 015/333] KVM: x86: ioapic: Don't fire level irq when Remote IRR set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoid firing a level-triggered interrupt that has the Remote IRR bit set, because that means that some CPU is already processing it. The Remote IRR bit will be cleared after an EOI and the interrupt will refire if the irq line is still asserted. This behavior is aligned with QEMU's IOAPIC implementation that was introduced by commit f99b86b94987 ("x86: ioapic: ignore level irq during processing") in QEMU. Signed-off-by: Nikita Leshenko Reviewed-by: Liran Alon Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: Wanpeng Li Signed-off-by: Radim Krčmář --- arch/x86/kvm/ioapic.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index ae0a7dc318b2..5c9231139243 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -323,7 +323,9 @@ static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status) struct kvm_lapic_irq irqe; int ret; - if (entry->fields.mask) + if (entry->fields.mask || + (entry->fields.trig_mode == IOAPIC_LEVEL_TRIG && + entry->fields.remote_irr)) return -1; ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x " From 7d2253684dd10eb800ee1898ad7904044ae88ed6 Mon Sep 17 00:00:00 2001 From: Nikita Leshenko Date: Sun, 5 Nov 2017 15:52:31 +0200 Subject: [PATCH 016/333] KVM: x86: ioapic: Remove redundant check for Remote IRR in ioapic_set_irq MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remote IRR for level-triggered interrupts was previously checked in ioapic_set_irq, but since we now have a check in ioapic_service we can remove the redundant check from ioapic_set_irq. This commit doesn't change semantics. Signed-off-by: Nikita Leshenko Reviewed-by: Liran Alon Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: Wanpeng Li Signed-off-by: Radim Krčmář --- arch/x86/kvm/ioapic.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 5c9231139243..6df150eaaa78 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -209,12 +209,12 @@ static int ioapic_set_irq(struct kvm_ioapic *ioapic, unsigned int irq, old_irr = ioapic->irr; ioapic->irr |= mask; - if (edge) + if (edge) { ioapic->irr_delivered &= ~mask; - if ((edge && old_irr == ioapic->irr) || - (!edge && entry.fields.remote_irr)) { - ret = 0; - goto out; + if (old_irr == ioapic->irr) { + ret = 0; + goto out; + } } ret = ioapic_service(ioapic, irq, line_status); From a8bfec2930525808c01f038825d1df3904638631 Mon Sep 17 00:00:00 2001 From: Nikita Leshenko Date: Sun, 5 Nov 2017 15:52:32 +0200 Subject: [PATCH 017/333] KVM: x86: ioapic: Clear Remote IRR when entry is switched to edge-triggered MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some OSes (Linux, Xen) use this behavior to clear the Remote IRR bit for IOAPICs without an EOI register. They simulate the EOI message manually by changing the trigger mode to edge and then back to level, with the entry being masked during this. QEMU implements this feature in commit ed1263c363c9 ("ioapic: clear remote irr bit for edge-triggered interrupts") As a side effect, this commit removes an incorrect behavior where Remote IRR was cleared when the redirection table entry was rewritten. This is not consistent with the manual and also opens an opportunity for a strange behavior when a redirection table entry is modified from an interrupt handler that handles the same entry: The modification will clear the Remote IRR bit even though the interrupt handler is still running. Signed-off-by: Nikita Leshenko Reviewed-by: Liran Alon Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: Wanpeng Li Reviewed-by: Steve Rutherford Signed-off-by: Radim Krčmář --- arch/x86/kvm/ioapic.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 6df150eaaa78..163d340ee5f8 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -304,8 +304,17 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) } else { e->bits &= ~0xffffffffULL; e->bits |= (u32) val; - e->fields.remote_irr = 0; } + + /* + * Some OSes (Linux, Xen) assume that Remote IRR bit will + * be cleared by IOAPIC hardware when the entry is configured + * as edge-triggered. This behavior is used to simulate an + * explicit EOI on IOAPICs that don't have the EOI register. + */ + if (e->fields.trig_mode == IOAPIC_EDGE_TRIG) + e->fields.remote_irr = 0; + mask_after = e->fields.mask; if (mask_before != mask_after) kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after); From b200dded0a6974a3b69599832b2203483920ab25 Mon Sep 17 00:00:00 2001 From: Nikita Leshenko Date: Sun, 5 Nov 2017 15:52:33 +0200 Subject: [PATCH 018/333] KVM: x86: ioapic: Preserve read-only values in the redirection table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to 82093AA (IOAPIC) manual, Remote IRR and Delivery Status are read-only. QEMU implements the bits as RO in commit 479c2a1cb7fb ("ioapic: keep RO bits for IOAPIC entry"). Signed-off-by: Nikita Leshenko Reviewed-by: Liran Alon Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: Wanpeng Li Reviewed-by: Steve Rutherford Signed-off-by: Radim Krčmář --- arch/x86/kvm/ioapic.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 163d340ee5f8..4e822ad363f3 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -276,6 +276,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) { unsigned index; bool mask_before, mask_after; + int old_remote_irr, old_delivery_status; union kvm_ioapic_redirect_entry *e; switch (ioapic->ioregsel) { @@ -298,6 +299,9 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) return; e = &ioapic->redirtbl[index]; mask_before = e->fields.mask; + /* Preserve read-only fields */ + old_remote_irr = e->fields.remote_irr; + old_delivery_status = e->fields.delivery_status; if (ioapic->ioregsel & 1) { e->bits &= 0xffffffff; e->bits |= (u64) val << 32; @@ -305,6 +309,8 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) e->bits &= ~0xffffffffULL; e->bits |= (u32) val; } + e->fields.remote_irr = old_remote_irr; + e->fields.delivery_status = old_delivery_status; /* * Some OSes (Linux, Xen) assume that Remote IRR bit will From 917dc6068bc12a2dafffcf0e9d405ddb1b8780cb Mon Sep 17 00:00:00 2001 From: Liran Alon Date: Sun, 5 Nov 2017 16:07:43 +0200 Subject: [PATCH 019/333] KVM: nVMX: Fix vmx_check_nested_events() return value in case an event was reinjected to L2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit vmx_check_nested_events() should return -EBUSY only in case there is a pending L1 event which requires a VMExit from L2 to L1 but such a VMExit is currently blocked. Such VMExits are blocked either because nested_run_pending=1 or an event was reinjected to L2. vmx_check_nested_events() should return 0 in case there are no pending L1 events which requires a VMExit from L2 to L1 or if a VMExit from L2 to L1 was done internally. However, upstream commit which introduced blocking in case an event was reinjected to L2 (commit acc9ab601327 ("KVM: nVMX: Fix pending events injection")) contains a bug: It returns -EBUSY even if there are no pending L1 events which requires VMExit from L2 to L1. This commit fix this issue. Fixes: acc9ab601327 ("KVM: nVMX: Fix pending events injection") Signed-off-by: Liran Alon Reviewed-by: Nikita Leshenko Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Radim Krčmář --- arch/x86/kvm/vmx.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 10474d26a000..be4724b5d434 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -11105,13 +11105,12 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr) { struct vcpu_vmx *vmx = to_vmx(vcpu); unsigned long exit_qual; - - if (kvm_event_needs_reinjection(vcpu)) - return -EBUSY; + bool block_nested_events = + vmx->nested.nested_run_pending || kvm_event_needs_reinjection(vcpu); if (vcpu->arch.exception.pending && nested_vmx_check_exception(vcpu, &exit_qual)) { - if (vmx->nested.nested_run_pending) + if (block_nested_events) return -EBUSY; nested_vmx_inject_exception_vmexit(vcpu, exit_qual); vcpu->arch.exception.pending = false; @@ -11120,14 +11119,14 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr) if (nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) && vmx->nested.preemption_timer_expired) { - if (vmx->nested.nested_run_pending) + if (block_nested_events) return -EBUSY; nested_vmx_vmexit(vcpu, EXIT_REASON_PREEMPTION_TIMER, 0, 0); return 0; } if (vcpu->arch.nmi_pending && nested_exit_on_nmi(vcpu)) { - if (vmx->nested.nested_run_pending) + if (block_nested_events) return -EBUSY; nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, NMI_VECTOR | INTR_TYPE_NMI_INTR | @@ -11143,7 +11142,7 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr) if ((kvm_cpu_has_interrupt(vcpu) || external_intr) && nested_exit_on_intr(vcpu)) { - if (vmx->nested.nested_run_pending) + if (block_nested_events) return -EBUSY; nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0); return 0; From 50a671d4d15b859f447fa527191073019b6ce9cb Mon Sep 17 00:00:00 2001 From: Janakarajan Natarajan Date: Mon, 6 Nov 2017 11:44:23 -0600 Subject: [PATCH 020/333] KVM: x86: Fix CPUID function for word 6 (80000001_ECX) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function for CPUID 80000001 ECX is set to 0xc0000001. Set it to 0x80000001. Signed-off-by: Janakarajan Natarajan Reviewed-by: Jim Mattson Reviewed-by: Krish Sadhukhan Reviewed-by: Borislav Petkov Fixes: d6321d493319 ("KVM: x86: generalize guest_cpuid_has_ helpers") Signed-off-by: Radim Krčmář --- arch/x86/kvm/cpuid.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index cdc70a3a6583..c2cea6651279 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -44,7 +44,7 @@ static const struct cpuid_reg reverse_cpuid[] = { [CPUID_8086_0001_EDX] = {0x80860001, 0, CPUID_EDX}, [CPUID_1_ECX] = { 1, 0, CPUID_ECX}, [CPUID_C000_0001_EDX] = {0xc0000001, 0, CPUID_EDX}, - [CPUID_8000_0001_ECX] = {0xc0000001, 0, CPUID_ECX}, + [CPUID_8000_0001_ECX] = {0x80000001, 0, CPUID_ECX}, [CPUID_7_0_EBX] = { 7, 0, CPUID_EBX}, [CPUID_D_1_EAX] = { 0xd, 1, CPUID_EAX}, [CPUID_F_0_EDX] = { 0xf, 0, CPUID_EDX}, From c4ad77e0d49b10b412a9fa7f47a3a23177870bc7 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 13 Nov 2017 14:23:59 +0100 Subject: [PATCH 021/333] KVM: vmx: use X86_CR4_UMIP and X86_FEATURE_UMIP These bits were not defined until now in common code, but they are now that the kernel supports UMIP too. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index be4724b5d434..0d59dbe430c8 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -9801,8 +9801,7 @@ static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu) cr4_fixed1_update(X86_CR4_SMEP, ebx, bit(X86_FEATURE_SMEP)); cr4_fixed1_update(X86_CR4_SMAP, ebx, bit(X86_FEATURE_SMAP)); cr4_fixed1_update(X86_CR4_PKE, ecx, bit(X86_FEATURE_PKU)); - /* TODO: Use X86_CR4_UMIP and X86_FEATURE_UMIP macros */ - cr4_fixed1_update(bit(11), ecx, bit(2)); + cr4_fixed1_update(X86_CR4_UMIP, ecx, bit(X86_FEATURE_UMIP)); #undef cr4_fixed1_update } From 97f07697932e6faf2a708f7aef7cba8e6b0cab96 Mon Sep 17 00:00:00 2001 From: weiping zhang Date: Tue, 31 Oct 2017 18:37:54 +0800 Subject: [PATCH 022/333] bdi: convert bdi_debug_register to int Convert bdi_debug_register to int and then do error handle for it. Reviewed-by: Jan Kara Signed-off-by: weiping zhang Signed-off-by: Jens Axboe --- mm/backing-dev.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 74b52dfd5852..b5f940ce0143 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -113,11 +113,23 @@ static const struct file_operations bdi_debug_stats_fops = { .release = single_release, }; -static void bdi_debug_register(struct backing_dev_info *bdi, const char *name) +static int bdi_debug_register(struct backing_dev_info *bdi, const char *name) { + if (!bdi_debug_root) + return -ENOMEM; + bdi->debug_dir = debugfs_create_dir(name, bdi_debug_root); + if (!bdi->debug_dir) + return -ENOMEM; + bdi->debug_stats = debugfs_create_file("stats", 0444, bdi->debug_dir, bdi, &bdi_debug_stats_fops); + if (!bdi->debug_stats) { + debugfs_remove(bdi->debug_dir); + return -ENOMEM; + } + + return 0; } static void bdi_debug_unregister(struct backing_dev_info *bdi) @@ -129,9 +141,10 @@ static void bdi_debug_unregister(struct backing_dev_info *bdi) static inline void bdi_debug_init(void) { } -static inline void bdi_debug_register(struct backing_dev_info *bdi, +static inline int bdi_debug_register(struct backing_dev_info *bdi, const char *name) { + return 0; } static inline void bdi_debug_unregister(struct backing_dev_info *bdi) { From a0747a859ef6d3cc5b6cd50eb694499b78dd0025 Mon Sep 17 00:00:00 2001 From: weiping zhang Date: Fri, 17 Nov 2017 23:06:04 +0800 Subject: [PATCH 023/333] bdi: add error handle for bdi_debug_register In order to make error handle more cleaner we call bdi_debug_register before set state to WB_registered, that we can avoid call bdi_unregister in release_bdi(). Reviewed-by: Jan Kara Signed-off-by: weiping zhang Signed-off-by: Jens Axboe --- mm/backing-dev.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mm/backing-dev.c b/mm/backing-dev.c index b5f940ce0143..84b2dc76f140 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -882,10 +882,13 @@ int bdi_register_va(struct backing_dev_info *bdi, const char *fmt, va_list args) if (IS_ERR(dev)) return PTR_ERR(dev); + if (bdi_debug_register(bdi, dev_name(dev))) { + device_destroy(bdi_class, dev->devt); + return -ENOMEM; + } cgwb_bdi_register(bdi); bdi->dev = dev; - bdi_debug_register(bdi, dev_name(dev)); set_bit(WB_registered, &bdi->wb.state); spin_lock_bh(&bdi_lock); From 3a92168bc8a113098b44a95d499557a88bb387da Mon Sep 17 00:00:00 2001 From: weiping zhang Date: Tue, 31 Oct 2017 18:38:59 +0800 Subject: [PATCH 024/333] block: add WARN_ON if bdi register fail device_add_disk need do more safety error handle, so this patch just add WARN_ON. Reviewed-by: Jan Kara Signed-off-by: weiping zhang Adapted for current series by me. Signed-off-by: Jens Axboe --- block/genhd.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/block/genhd.c b/block/genhd.c index c2223f12a805..c143a2274238 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -671,10 +671,13 @@ void device_add_disk(struct device *parent, struct gendisk *disk) disk->flags |= GENHD_FL_SUPPRESS_PARTITION_INFO; disk->flags |= GENHD_FL_NO_PART_SCAN; } else { + int ret; + /* Register BDI before referencing it from bdev */ disk_to_dev(disk)->devt = devt; - bdi_register_owner(disk->queue->backing_dev_info, - disk_to_dev(disk)); + ret = bdi_register_owner(disk->queue->backing_dev_info, + disk_to_dev(disk)); + WARN_ON(ret); blk_register_region(disk_devt(disk), disk->minors, NULL, exact_match, exact_lock, disk); } From 7fb526212f16fcec4e92121ea86dc28fba493177 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 18 Nov 2017 17:43:38 -0800 Subject: [PATCH 025/333] block: genhd.c: fix message typo Fix typo in error message. Signed-off-by: Randy Dunlap Signed-off-by: Jens Axboe --- block/genhd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/genhd.c b/block/genhd.c index c143a2274238..96a66f671720 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1392,7 +1392,7 @@ struct gendisk *__alloc_disk_node(int minors, int node_id) if (minors > DISK_MAX_PARTS) { printk(KERN_ERR - "block: can't allocated more than %d partitions\n", + "block: can't allocate more than %d partitions\n", DISK_MAX_PARTS); minors = DISK_MAX_PARTS; } From 1690102de5651bb85b23d5eeaff682a6b96d705b Mon Sep 17 00:00:00 2001 From: Marcos Paulo de Souza Date: Sun, 19 Nov 2017 16:48:13 -0200 Subject: [PATCH 026/333] blktrace: Use blk_trace_bio_get_cgid inside blk_add_trace_bio We always pass in blk_trace_bio_get_cgid(q, bio) to blk_add_trace_bio(). Since both are readily available in the function already, kill the argument. Signed-off-by: Marcos Paulo de Souza Rewrote commit message. Signed-off-by: Jens Axboe --- kernel/trace/blktrace.c | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 206e0e2ace53..c5987d4c5f23 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -872,7 +872,7 @@ static void blk_add_trace_rq_complete(void *ignore, struct request *rq, * **/ static void blk_add_trace_bio(struct request_queue *q, struct bio *bio, - u32 what, int error, union kernfs_node_id *cgid) + u32 what, int error) { struct blk_trace *bt = q->blk_trace; @@ -880,22 +880,21 @@ static void blk_add_trace_bio(struct request_queue *q, struct bio *bio, return; __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size, - bio_op(bio), bio->bi_opf, what, error, 0, NULL, cgid); + bio_op(bio), bio->bi_opf, what, error, 0, NULL, + blk_trace_bio_get_cgid(q, bio)); } static void blk_add_trace_bio_bounce(void *ignore, struct request_queue *q, struct bio *bio) { - blk_add_trace_bio(q, bio, BLK_TA_BOUNCE, 0, - blk_trace_bio_get_cgid(q, bio)); + blk_add_trace_bio(q, bio, BLK_TA_BOUNCE, 0); } static void blk_add_trace_bio_complete(void *ignore, struct request_queue *q, struct bio *bio, int error) { - blk_add_trace_bio(q, bio, BLK_TA_COMPLETE, error, - blk_trace_bio_get_cgid(q, bio)); + blk_add_trace_bio(q, bio, BLK_TA_COMPLETE, error); } static void blk_add_trace_bio_backmerge(void *ignore, @@ -903,8 +902,7 @@ static void blk_add_trace_bio_backmerge(void *ignore, struct request *rq, struct bio *bio) { - blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE, 0, - blk_trace_bio_get_cgid(q, bio)); + blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE, 0); } static void blk_add_trace_bio_frontmerge(void *ignore, @@ -912,15 +910,13 @@ static void blk_add_trace_bio_frontmerge(void *ignore, struct request *rq, struct bio *bio) { - blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE, 0, - blk_trace_bio_get_cgid(q, bio)); + blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE, 0); } static void blk_add_trace_bio_queue(void *ignore, struct request_queue *q, struct bio *bio) { - blk_add_trace_bio(q, bio, BLK_TA_QUEUE, 0, - blk_trace_bio_get_cgid(q, bio)); + blk_add_trace_bio(q, bio, BLK_TA_QUEUE, 0); } static void blk_add_trace_getrq(void *ignore, @@ -928,8 +924,7 @@ static void blk_add_trace_getrq(void *ignore, struct bio *bio, int rw) { if (bio) - blk_add_trace_bio(q, bio, BLK_TA_GETRQ, 0, - blk_trace_bio_get_cgid(q, bio)); + blk_add_trace_bio(q, bio, BLK_TA_GETRQ, 0); else { struct blk_trace *bt = q->blk_trace; @@ -945,8 +940,7 @@ static void blk_add_trace_sleeprq(void *ignore, struct bio *bio, int rw) { if (bio) - blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ, 0, - blk_trace_bio_get_cgid(q, bio)); + blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ, 0); else { struct blk_trace *bt = q->blk_trace; From 48832f8d58cfedb2f9bee11bbfbb657efb42e7e7 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 24 Oct 2017 15:25:20 +0300 Subject: [PATCH 027/333] nvme-fabrics: introduce init command check for a queue that is not alive When the fabrics queue is not alive and fully functional, no commands should be allowed to pass but connect (which moves the queue to a fully functional state). Any other command should be failed, with either temporary status BLK_STS_RESOUCE or permanent status BLK_STS_IOERR. This is shared across all fabrics, hence move the check to fabrics library. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fabrics.h | 30 ++++++++++++++++++++++++++++++ drivers/nvme/host/rdma.c | 30 +++++------------------------- 2 files changed, 35 insertions(+), 25 deletions(-) diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index 42232e731f19..9ba614953607 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -156,4 +156,34 @@ void nvmf_free_options(struct nvmf_ctrl_options *opts); int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size); bool nvmf_should_reconnect(struct nvme_ctrl *ctrl); +static inline blk_status_t nvmf_check_init_req(struct nvme_ctrl *ctrl, + struct request *rq) +{ + struct nvme_command *cmd = nvme_req(rq)->cmd; + + /* + * We cannot accept any other command until the connect command has + * completed, so only allow connect to pass. + */ + if (!blk_rq_is_passthrough(rq) || + cmd->common.opcode != nvme_fabrics_command || + cmd->fabrics.fctype != nvme_fabrics_type_connect) { + /* + * Reconnecting state means transport disruption, which can take + * a long time and even might fail permanently, fail fast to + * give upper layers a chance to failover. + * Deleting state means that the ctrl will never accept commands + * again, fail it permanently. + */ + if (ctrl->state == NVME_CTRL_RECONNECTING || + ctrl->state == NVME_CTRL_DELETING) { + nvme_req(rq)->status = NVME_SC_ABORT_REQ; + return BLK_STS_IOERR; + } + return BLK_STS_RESOURCE; /* try again later */ + } + + return BLK_STS_OK; +} + #endif /* _NVME_FABRICS_H */ diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 4f9bf2f815c3..2c597105a6bf 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1591,31 +1591,11 @@ nvme_rdma_timeout(struct request *rq, bool reserved) * We cannot accept any other command until the Connect command has completed. */ static inline blk_status_t -nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue, struct request *rq) +nvme_rdma_is_ready(struct nvme_rdma_queue *queue, struct request *rq) { - if (unlikely(!test_bit(NVME_RDMA_Q_LIVE, &queue->flags))) { - struct nvme_command *cmd = nvme_req(rq)->cmd; - - if (!blk_rq_is_passthrough(rq) || - cmd->common.opcode != nvme_fabrics_command || - cmd->fabrics.fctype != nvme_fabrics_type_connect) { - /* - * reconnecting state means transport disruption, which - * can take a long time and even might fail permanently, - * fail fast to give upper layers a chance to failover. - * deleting state means that the ctrl will never accept - * commands again, fail it permanently. - */ - if (queue->ctrl->ctrl.state == NVME_CTRL_RECONNECTING || - queue->ctrl->ctrl.state == NVME_CTRL_DELETING) { - nvme_req(rq)->status = NVME_SC_ABORT_REQ; - return BLK_STS_IOERR; - } - return BLK_STS_RESOURCE; /* try again later */ - } - } - - return 0; + if (unlikely(!test_bit(NVME_RDMA_Q_LIVE, &queue->flags))) + return nvmf_check_init_req(&queue->ctrl->ctrl, rq); + return BLK_STS_OK; } static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, @@ -1634,7 +1614,7 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, WARN_ON_ONCE(rq->tag < 0); - ret = nvme_rdma_queue_is_ready(queue, rq); + ret = nvme_rdma_is_ready(queue, rq); if (unlikely(ret)) return ret; From 9e0ed16ab9a9aaf670b81c9cd05b5e50defed654 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 24 Oct 2017 15:25:21 +0300 Subject: [PATCH 028/333] nvme-fc: check if queue is ready in queue_rq In case the queue is not LIVE (fully functional and connected at the nvmf level), we cannot allow any commands other than connect to pass through. Add a new queue state flag NVME_FC_Q_LIVE which is set after nvmf connect and cleared in queue teardown. Signed-off-by: Sagi Grimberg Reviewed-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 7ab0be55c7d0..e0577bf33f45 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -32,6 +32,7 @@ enum nvme_fc_queue_flags { NVME_FC_Q_CONNECTED = (1 << 0), + NVME_FC_Q_LIVE = (1 << 1), }; #define NVMEFC_QUEUE_DELAY 3 /* ms units */ @@ -1927,6 +1928,7 @@ nvme_fc_free_queue(struct nvme_fc_queue *queue) if (!test_and_clear_bit(NVME_FC_Q_CONNECTED, &queue->flags)) return; + clear_bit(NVME_FC_Q_LIVE, &queue->flags); /* * Current implementation never disconnects a single queue. * It always terminates a whole association. So there is never @@ -1934,7 +1936,6 @@ nvme_fc_free_queue(struct nvme_fc_queue *queue) */ queue->connection_id = 0; - clear_bit(NVME_FC_Q_CONNECTED, &queue->flags); } static void @@ -2013,6 +2014,8 @@ nvme_fc_connect_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize) ret = nvmf_connect_io_queue(&ctrl->ctrl, i); if (ret) break; + + set_bit(NVME_FC_Q_LIVE, &ctrl->queues[i].flags); } return ret; @@ -2320,6 +2323,14 @@ busy: return BLK_STS_RESOURCE; } +static inline blk_status_t nvme_fc_is_ready(struct nvme_fc_queue *queue, + struct request *rq) +{ + if (unlikely(!test_bit(NVME_FC_Q_LIVE, &queue->flags))) + return nvmf_check_init_req(&queue->ctrl->ctrl, rq); + return BLK_STS_OK; +} + static blk_status_t nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) @@ -2335,6 +2346,10 @@ nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx, u32 data_len; blk_status_t ret; + ret = nvme_fc_is_ready(queue, rq); + if (unlikely(ret)) + return ret; + ret = nvme_setup_cmd(ns, rq, sqe); if (ret) return ret; @@ -2727,6 +2742,8 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) if (ret) goto out_disconnect_admin_queue; + set_bit(NVME_FC_Q_LIVE, &ctrl->queues[0].flags); + /* * Check controller capabilities * From 9d7fab04b95e8c26014a9bfc1c943b8360b44c17 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 24 Oct 2017 15:25:22 +0300 Subject: [PATCH 029/333] nvme-loop: check if queue is ready in queue_rq In case the queue is not LIVE (fully functional and connected at the nvmf level), we cannot allow any commands other than connect to pass through. Add a new queue state flag NVME_LOOP_Q_LIVE which is set after nvmf connect and cleared in queue teardown. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/loop.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 96d390416789..1e21b286f299 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -52,10 +52,15 @@ static inline struct nvme_loop_ctrl *to_loop_ctrl(struct nvme_ctrl *ctrl) return container_of(ctrl, struct nvme_loop_ctrl, ctrl); } +enum nvme_loop_queue_flags { + NVME_LOOP_Q_LIVE = 0, +}; + struct nvme_loop_queue { struct nvmet_cq nvme_cq; struct nvmet_sq nvme_sq; struct nvme_loop_ctrl *ctrl; + unsigned long flags; }; static struct nvmet_port *nvmet_loop_port; @@ -144,6 +149,14 @@ nvme_loop_timeout(struct request *rq, bool reserved) return BLK_EH_HANDLED; } +static inline blk_status_t nvme_loop_is_ready(struct nvme_loop_queue *queue, + struct request *rq) +{ + if (unlikely(!test_bit(NVME_LOOP_Q_LIVE, &queue->flags))) + return nvmf_check_init_req(&queue->ctrl->ctrl, rq); + return BLK_STS_OK; +} + static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { @@ -153,6 +166,10 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req); blk_status_t ret; + ret = nvme_loop_is_ready(queue, req); + if (unlikely(ret)) + return ret; + ret = nvme_setup_cmd(ns, req, &iod->cmd); if (ret) return ret; @@ -267,6 +284,7 @@ static const struct blk_mq_ops nvme_loop_admin_mq_ops = { static void nvme_loop_destroy_admin_queue(struct nvme_loop_ctrl *ctrl) { + clear_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[0].flags); nvmet_sq_destroy(&ctrl->queues[0].nvme_sq); blk_cleanup_queue(ctrl->ctrl.admin_q); blk_mq_free_tag_set(&ctrl->admin_tag_set); @@ -297,8 +315,10 @@ static void nvme_loop_destroy_io_queues(struct nvme_loop_ctrl *ctrl) { int i; - for (i = 1; i < ctrl->ctrl.queue_count; i++) + for (i = 1; i < ctrl->ctrl.queue_count; i++) { + clear_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[i].flags); nvmet_sq_destroy(&ctrl->queues[i].nvme_sq); + } } static int nvme_loop_init_io_queues(struct nvme_loop_ctrl *ctrl) @@ -338,6 +358,7 @@ static int nvme_loop_connect_io_queues(struct nvme_loop_ctrl *ctrl) ret = nvmf_connect_io_queue(&ctrl->ctrl, i); if (ret) return ret; + set_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[i].flags); } return 0; @@ -380,6 +401,8 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl) if (error) goto out_cleanup_queue; + set_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[0].flags); + error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->ctrl.cap); if (error) { dev_err(ctrl->ctrl.device, From 8427bbc224863e14d905c87920d4005cb3e88ac3 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Thu, 9 Nov 2017 01:12:03 -0500 Subject: [PATCH 030/333] nvme-pci: disable APST on Samsung SSD 960 EVO + ASUS PRIME B350M-A The NVMe device in question drops off the PCIe bus after system suspend. I've tried several approaches to workaround this issue, but none of them works: - NVME_QUIRK_DELAY_BEFORE_CHK_RDY - NVME_QUIRK_NO_DEEPEST_PS - Disable APST before controller shutdown - Delay between controller shutdown and system suspend - Explicitly set power state to 0 before controller shutdown Fortunately it's a desktop, so disable APST won't hurt the battery. Also, change the quirk function name to reflect it's for vendor combination quirks. BugLink: https://bugs.launchpad.net/bugs/1705748 Signed-off-by: Kai-Heng Feng Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index a11cfd470089..8a15aa50b8e0 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2428,7 +2428,7 @@ static int nvme_dev_map(struct nvme_dev *dev) return -ENODEV; } -static unsigned long check_dell_samsung_bug(struct pci_dev *pdev) +static unsigned long check_vendor_combination_bug(struct pci_dev *pdev) { if (pdev->vendor == 0x144d && pdev->device == 0xa802) { /* @@ -2443,6 +2443,14 @@ static unsigned long check_dell_samsung_bug(struct pci_dev *pdev) (dmi_match(DMI_PRODUCT_NAME, "XPS 15 9550") || dmi_match(DMI_PRODUCT_NAME, "Precision 5510"))) return NVME_QUIRK_NO_DEEPEST_PS; + } else if (pdev->vendor == 0x144d && pdev->device == 0xa804) { + /* + * Samsung SSD 960 EVO drops off the PCIe bus after system + * suspend on a Ryzen board, ASUS PRIME B350M-A. + */ + if (dmi_match(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC.") && + dmi_match(DMI_BOARD_NAME, "PRIME B350M-A")) + return NVME_QUIRK_NO_APST; } return 0; @@ -2482,7 +2490,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (result) goto unmap; - quirks |= check_dell_samsung_bug(pdev); + quirks |= check_vendor_combination_bug(pdev); result = nvme_init_ctrl(&dev->ctrl, &pdev->dev, &nvme_pci_ctrl_ops, quirks); From 244a8fe40a09c218622eb9927b9090b0a9b73a1a Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Fri, 17 Nov 2017 01:34:24 +0900 Subject: [PATCH 031/333] nvme-pci: avoid hmb desc array idx out-of-bound when hmmaxd set. hmb descriptor idx out-of-bound occurs in case of below conditions. preferred = 128MiB chunk_size = 4MiB hmmaxd = 1 Current code will not allow rmmod which will free hmb descriptors to be done successfully in above case. "descs[i]" will be set in for-loop without seeing any conditions related to "max_entries" after a single "descs" was allocated by (max_entries = 1) in this case. Added a condition into for-loop to check index of descriptors. Fixes: 044a9df1("nvme-pci: implement the HMB entry number and size limitations") Signed-off-by: Minwoo Im Reviewed-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 8a15aa50b8e0..58dbe684007b 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1787,7 +1787,7 @@ static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred, if (!bufs) goto out_free_descs; - for (size = 0; size < preferred; size += len) { + for (size = 0; size < preferred && i < max_entries; size += len) { dma_addr_t dma_addr; len = min_t(u64, chunk_size, preferred - size); From 89c4aff6d4f71726f22e567f046dc1dd73c35de1 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 14 Nov 2017 14:26:27 +0000 Subject: [PATCH 032/333] nvme: fix spelling mistake: "requeing" -> "requeuing" Trivial fix to spelling mistake in dev_warn_ratelimited message text Signed-off-by: Colin Ian King Signed-off-by: Christoph Hellwig --- drivers/nvme/host/multipath.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 78d92151a904..1218a9fca846 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -131,7 +131,7 @@ static blk_qc_t nvme_ns_head_make_request(struct request_queue *q, bio->bi_opf |= REQ_NVME_MPATH; ret = direct_make_request(bio); } else if (!list_empty_careful(&head->list)) { - dev_warn_ratelimited(dev, "no path available - requeing I/O\n"); + dev_warn_ratelimited(dev, "no path available - requeuing I/O\n"); spin_lock_irq(&head->requeue_lock); bio_list_add(&head->requeue_list, bio); From b0d61d586f09fd814a45a5d778fe0d6123f67c2a Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 16 Nov 2017 13:36:49 -0700 Subject: [PATCH 033/333] nvme: Fix NULL dereference on reservation request This fixes using the NULL 'head' before getting the reference. It is however possible the head will always be NULL, so this patch uses the struct nvme_ns to get the ns_id field. Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 25da74d310d1..a2ab4e440bea 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1449,19 +1449,19 @@ static int nvme_pr_command(struct block_device *bdev, u32 cdw10, int srcu_idx, ret; u8 data[16] = { 0, }; + ns = nvme_get_ns_from_disk(bdev->bd_disk, &head, &srcu_idx); + if (unlikely(!ns)) + return -EWOULDBLOCK; + put_unaligned_le64(key, &data[0]); put_unaligned_le64(sa_key, &data[8]); memset(&c, 0, sizeof(c)); c.common.opcode = op; - c.common.nsid = cpu_to_le32(head->ns_id); + c.common.nsid = cpu_to_le32(ns->head->ns_id); c.common.cdw10[0] = cpu_to_le32(cdw10); - ns = nvme_get_ns_from_disk(bdev->bd_disk, &head, &srcu_idx); - if (unlikely(!ns)) - ret = -EWOULDBLOCK; - else - ret = nvme_submit_sync_cmd(ns->queue, &c, data, 16); + ret = nvme_submit_sync_cmd(ns->queue, &c, data, 16); nvme_put_ns_from_disk(head, srcu_idx); return ret; } From 9941a862cc92e448df95709ff40a618964b25e33 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 16 Nov 2017 13:36:50 -0700 Subject: [PATCH 034/333] nvme: Suppress static analyis warning The ns->head is always valid, so we don't need to check for NULL. Reported-by: Dan Carpenter Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index a2ab4e440bea..f837d666cbd4 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2961,8 +2961,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) static void nvme_ns_remove(struct nvme_ns *ns) { - struct nvme_ns_head *head = ns->head; - if (test_and_set_bit(NVME_NS_REMOVING, &ns->flags)) return; @@ -2980,15 +2978,14 @@ static void nvme_ns_remove(struct nvme_ns *ns) mutex_lock(&ns->ctrl->subsys->lock); nvme_mpath_clear_current_path(ns); - if (head) - list_del_rcu(&ns->siblings); + list_del_rcu(&ns->siblings); mutex_unlock(&ns->ctrl->subsys->lock); mutex_lock(&ns->ctrl->namespaces_mutex); list_del_init(&ns->list); mutex_unlock(&ns->ctrl->namespaces_mutex); - synchronize_srcu(&head->srcu); + synchronize_srcu(&ns->head->srcu); nvme_put_ns(ns); } From 619c62dcc62b957d17cccde2081cad527b020883 Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 10 Nov 2017 15:38:45 -0800 Subject: [PATCH 035/333] nvmet-fc: correct ref counting error when deferred rcv used Whenever a cmd is received a reference is taken while looking up the queue. The reference is removed after the cmd is done as the iod is returned for reuse. The fod may be reused for a deferred (recevied but no job context) cmd. Existing code removes the reference only if the fod is not reused for another command. Given the fod may be used for one or more ios, although a reference was taken per io, it won't be matched on the frees. Remove the reference on every fod free. This pairs the references to each io. Signed-off-by: James Smart Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fc.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 664d3013f68f..5fd86039e353 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -533,15 +533,15 @@ nvmet_fc_free_fcp_iod(struct nvmet_fc_tgt_queue *queue, tgtport->ops->fcp_req_release(&tgtport->fc_target_port, fcpreq); + /* release the queue lookup reference on the completed IO */ + nvmet_fc_tgt_q_put(queue); + spin_lock_irqsave(&queue->qlock, flags); deferfcp = list_first_entry_or_null(&queue->pending_cmd_list, struct nvmet_fc_defer_fcp_req, req_list); if (!deferfcp) { list_add_tail(&fod->fcp_list, &fod->queue->fod_list); spin_unlock_irqrestore(&queue->qlock, flags); - - /* Release reference taken at queue lookup and fod allocation */ - nvmet_fc_tgt_q_put(queue); return; } @@ -760,6 +760,9 @@ nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue) tgtport->ops->fcp_req_release(&tgtport->fc_target_port, deferfcp->fcp_req); + /* release the queue lookup reference */ + nvmet_fc_tgt_q_put(queue); + kfree(deferfcp); spin_lock_irqsave(&queue->qlock, flags); From 38389ec84e835fa31a59b7dabb18343106a6d0d5 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Thu, 16 Nov 2017 14:26:36 +0100 Subject: [PATCH 036/333] s390/topology: fix compile error in file arch/s390/kernel/smp.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 1887aa07b676 ("s390/topology: add detection of dedicated vs shared CPUs") introduced following compiler error when CONFIG_SCHED_TOPOLOGY is not set. CC arch/s390/kernel/smp.o ... arch/s390/kernel/smp.c: In function ‘smp_start_secondary’: arch/s390/kernel/smp.c:812:6: error: implicit declaration of function ‘topology_cpu_dedicated’; did you mean ‘topology_cpu_init’? This patch fixes the compiler error by adding function topology_cpu_dedicated() to return false when this config option is not defined. Signed-off-by: Thomas Richter Reviewed-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/topology.h | 1 + arch/s390/kernel/smp.c | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h index 1807229b292f..cca406fdbe51 100644 --- a/arch/s390/include/asm/topology.h +++ b/arch/s390/include/asm/topology.h @@ -53,6 +53,7 @@ const struct cpumask *cpu_coregroup_mask(int cpu); static inline void topology_init_early(void) { } static inline void topology_schedule_update(void) { } static inline int topology_cpu_init(struct cpu *cpu) { return 0; } +static inline int topology_cpu_dedicated(int cpu_nr) { return 0; } static inline void topology_expect_change(void) { } #endif /* CONFIG_SCHED_TOPOLOGY */ diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index cd4334e80b64..b8c1a85bcf2d 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -55,6 +55,7 @@ #include #include #include +#include #include "entry.h" enum { From 3241d3eb7d9bb7a134bedf49db196cf98b58834d Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 16 Nov 2017 14:54:04 +0100 Subject: [PATCH 037/333] s390: rework __switch_to() to allow larger task_struct offsets If GCC_PLUGIN_RANDSTRUCT is enabled the members of task_struct will be shuffled around. The offsets of the "pid" and "stack" members within task_struct may not necessarily fit into 12 bits anymore, which causes compile errors within __switch_to, since instructions are used, which only have a 12 bit displacement field. Therefore rework __switch_to, to allow for larger offsets. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/entry.S | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index a316cd6999ad..9e5f6cd8e4c2 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -180,18 +180,17 @@ _PIF_WORK = (_PIF_PER_TRAP | _PIF_SYSCALL_RESTART) */ ENTRY(__switch_to) stmg %r6,%r15,__SF_GPRS(%r15) # store gprs of prev task - lgr %r1,%r2 - aghi %r1,__TASK_thread # thread_struct of prev task - lg %r5,__TASK_stack(%r3) # start of kernel stack of next - stg %r15,__THREAD_ksp(%r1) # store kernel stack of prev - lgr %r1,%r3 - aghi %r1,__TASK_thread # thread_struct of next task + lghi %r4,__TASK_stack + lghi %r1,__TASK_thread + lg %r5,0(%r4,%r3) # start of kernel stack of next + stg %r15,__THREAD_ksp(%r1,%r2) # store kernel stack of prev lgr %r15,%r5 aghi %r15,STACK_INIT # end of kernel stack of next stg %r3,__LC_CURRENT # store task struct of next stg %r15,__LC_KERNEL_STACK # store end of kernel stack - lg %r15,__THREAD_ksp(%r1) # load kernel stack of next - mvc __LC_CURRENT_PID(4,%r0),__TASK_pid(%r3) # store pid of next + lg %r15,__THREAD_ksp(%r1,%r3) # load kernel stack of next + aghi %r3,__TASK_pid + mvc __LC_CURRENT_PID(4,%r0),0(%r3) # store pid of next lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_LPP bzr %r14 From de35089cc82c33f8be1dec5a0c3d7a05cf91720d Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 17 Nov 2017 09:50:40 +0100 Subject: [PATCH 038/333] s390/disassembler: remove confusing code When searching the opcode offset table within find_insn() the check "entry->opcode == 0" was intended to clarify that 1-byte opcodes, the first one being 0, are special. However there is no mnemonic for an illegal opcode starting with 0. Therefore there is also no opcode offset table entry that matches, which again means that the check never is true. Therefore just remove the confusing check, and add a comment which hopefully explains how this works. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/dis.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index 3be829721cf9..465d52b5e470 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -396,9 +396,14 @@ struct s390_insn *find_insn(unsigned char *code) unsigned char opfrag; int i; + /* Search the opcode offset table to find an entry which + * matches the beginning of the opcode. If there is no match + * the last entry will be used, which is the default entry for + * unknown instructions as well as 1-byte opcode instructions. + */ for (i = 0; i < ARRAY_SIZE(opcode_offset); i++) { entry = &opcode_offset[i]; - if (entry->opcode == code[0] || entry->opcode == 0) + if (entry->opcode == code[0]) break; } From 0b0882672640ced4deeebf84da0b88b6389619c4 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Fri, 17 Nov 2017 16:32:22 +0100 Subject: [PATCH 039/333] s390/zcrypt: Fix wrong comparison leading to strange load balancing The function to decide if one zcrypt queue is better than another one compared two pointers instead of comparing the values where the pointers refer to. So within the same zcrypt card when load of each queue was equal just one queue was used. This effect only appears on relatively lite load, typically with one thread applications. This patch fixes the wrong comparison and now the counters show that requests are balanced equally over all available queues within the cards. There is no performance improvement coming with this fix. As long as the queue depth for an APQN queue is not touched, processing is not faster when requests are spread over queues within the same card hardware. So this fix only beautifies the lszcrypt counter printouts. Signed-off-by: Harald Freudenberger Signed-off-by: Martin Schwidefsky --- drivers/s390/crypto/zcrypt_api.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c index b5f4006198b9..a9a56aa9c26b 100644 --- a/drivers/s390/crypto/zcrypt_api.c +++ b/drivers/s390/crypto/zcrypt_api.c @@ -218,8 +218,8 @@ static inline bool zcrypt_queue_compare(struct zcrypt_queue *zq, weight += atomic_read(&zq->load); pref_weight += atomic_read(&pref_zq->load); if (weight == pref_weight) - return &zq->queue->total_request_count > - &pref_zq->queue->total_request_count; + return zq->queue->total_request_count > + pref_zq->queue->total_request_count; return weight > pref_weight; } From 10809bb976648ac58194a629e3d7af99e7400297 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 15 Oct 2017 21:24:49 +0200 Subject: [PATCH 040/333] ACPI / bus: Leave modalias empty for devices which are not present Most Bay and Cherry Trail devices use a generic DSDT with all possible peripheral devices present in the DSDT, with their _STA returning 0x00 or 0x0f based on AML variables which describe what is actually present on the board. Since ACPI device objects with a 0x00 status (not present) still get an entry under /sys/bus/acpi/devices, and those entry had an acpi:PNPID modalias, userspace would end up loading modules for non present hardware. This commit fixes this by leaving the modalias empty for non present devices. This results in 10 modules less being loaded with a generic distro kernel config on my Cherry Trail test-device (a GPD pocket). Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki --- drivers/acpi/device_sysfs.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/acpi/device_sysfs.c b/drivers/acpi/device_sysfs.c index 24418932612e..a041689e5701 100644 --- a/drivers/acpi/device_sysfs.c +++ b/drivers/acpi/device_sysfs.c @@ -146,6 +146,10 @@ static int create_pnp_modalias(struct acpi_device *acpi_dev, char *modalias, int count; struct acpi_hardware_id *id; + /* Avoid unnecessarily loading modules for non present devices. */ + if (!acpi_device_is_present(acpi_dev)) + return 0; + /* * Since we skip ACPI_DT_NAMESPACE_HID from the modalias below, 0 should * be returned if ACPI_DT_NAMESPACE_HID is the only ACPI/PNP ID in the From a64a62ce9a380213dc9e192f762266d70c9b40ec Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Tue, 26 Sep 2017 16:54:09 +0800 Subject: [PATCH 041/333] ACPI / EC: Fix regression related to PM ops support in ECDT device On platforms (ASUS X550ZE and possibly all ASUS X series) with valid ECDT EC but invalid DSDT EC, EC PM ops won't be invoked as ECDT EC is not an ACPI device. Thus the following commit actually removed post-resume acpi_ec_enable_event() invocation for such platforms, and triggered a regression on them that after being resumed, EC (actually should be ECDT) driver stops handling EC events: Commit: c2b46d679b30c5c0d7eb47a21085943242bdd8dc Subject: ACPI / EC: Add PM operations to improve event handling for resume process Notice that the root cause actually is "ECDT is not an ACPI device" rather than "the timing of acpi_ec_enable_event() invocation", this patch fixes this issue by enumerating ECDT EC as an ACPI device. Due to the existence of the noirq stage, the ability of tuning the timing of acpi_ec_enable_event() invocation is still meaningful. This patch is a little bit different from the posted fix by moving acpi_config_boot_ec() from acpi_ec_ecdt_start() to acpi_ec_add() to make sure that EC event handling won't be stopped as long as the ACPI EC driver is bound. Thus the following sequence shouldn't disable EC event handling: unbind,suspend,resume,bind. Fixes: c2b46d679b30 (ACPI / EC: Add PM operations to improve event handling for resume process) Link: https://bugzilla.kernel.org/show_bug.cgi?id=196847 Reported-by: Luya Tshimbalanga Tested-by: Luya Tshimbalanga Cc: 4.9+ # 4.9+ Signed-off-by: Lv Zheng Signed-off-by: Rafael J. Wysocki --- drivers/acpi/ec.c | 69 ++++++++++++++++++++++++------------- drivers/acpi/internal.h | 1 + drivers/acpi/scan.c | 21 +++++++++++ include/acpi/acpi_bus.h | 1 + include/acpi/acpi_drivers.h | 1 + 5 files changed, 69 insertions(+), 24 deletions(-) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 82b3ce5e937e..df842465634a 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -1597,32 +1597,41 @@ static int acpi_ec_add(struct acpi_device *device) { struct acpi_ec *ec = NULL; int ret; + bool is_ecdt = false; + acpi_status status; strcpy(acpi_device_name(device), ACPI_EC_DEVICE_NAME); strcpy(acpi_device_class(device), ACPI_EC_CLASS); - ec = acpi_ec_alloc(); - if (!ec) - return -ENOMEM; - if (ec_parse_device(device->handle, 0, ec, NULL) != - AE_CTRL_TERMINATE) { + if (!strcmp(acpi_device_hid(device), ACPI_ECDT_HID)) { + is_ecdt = true; + ec = boot_ec; + } else { + ec = acpi_ec_alloc(); + if (!ec) + return -ENOMEM; + status = ec_parse_device(device->handle, 0, ec, NULL); + if (status != AE_CTRL_TERMINATE) { ret = -EINVAL; goto err_alloc; + } } if (acpi_is_boot_ec(ec)) { - boot_ec_is_ecdt = false; - /* - * Trust PNP0C09 namespace location rather than ECDT ID. - * - * But trust ECDT GPE rather than _GPE because of ASUS quirks, - * so do not change boot_ec->gpe to ec->gpe. - */ - boot_ec->handle = ec->handle; - acpi_handle_debug(ec->handle, "duplicated.\n"); - acpi_ec_free(ec); - ec = boot_ec; - ret = acpi_config_boot_ec(ec, ec->handle, true, false); + boot_ec_is_ecdt = is_ecdt; + if (!is_ecdt) { + /* + * Trust PNP0C09 namespace location rather than + * ECDT ID. But trust ECDT GPE rather than _GPE + * because of ASUS quirks, so do not change + * boot_ec->gpe to ec->gpe. + */ + boot_ec->handle = ec->handle; + acpi_handle_debug(ec->handle, "duplicated.\n"); + acpi_ec_free(ec); + ec = boot_ec; + } + ret = acpi_config_boot_ec(ec, ec->handle, true, is_ecdt); } else ret = acpi_ec_setup(ec, true); if (ret) @@ -1635,8 +1644,10 @@ static int acpi_ec_add(struct acpi_device *device) ret = !!request_region(ec->command_addr, 1, "EC cmd"); WARN(!ret, "Could not request EC cmd io port 0x%lx", ec->command_addr); - /* Reprobe devices depending on the EC */ - acpi_walk_dep_device_list(ec->handle); + if (!is_ecdt) { + /* Reprobe devices depending on the EC */ + acpi_walk_dep_device_list(ec->handle); + } acpi_handle_debug(ec->handle, "enumerated.\n"); return 0; @@ -1692,6 +1703,7 @@ ec_parse_io_ports(struct acpi_resource *resource, void *context) static const struct acpi_device_id ec_device_ids[] = { {"PNP0C09", 0}, + {ACPI_ECDT_HID, 0}, {"", 0}, }; @@ -1764,11 +1776,14 @@ static int __init acpi_ec_ecdt_start(void) * Note: ec->handle can be valid if this function is called after * acpi_ec_add(), hence the fast path. */ - if (boot_ec->handle != ACPI_ROOT_OBJECT) - handle = boot_ec->handle; - else if (!acpi_ec_ecdt_get_handle(&handle)) - return -ENODEV; - return acpi_config_boot_ec(boot_ec, handle, true, true); + if (boot_ec->handle == ACPI_ROOT_OBJECT) { + if (!acpi_ec_ecdt_get_handle(&handle)) + return -ENODEV; + boot_ec->handle = handle; + } + + /* Register to ACPI bus with PM ops attached */ + return acpi_bus_register_early_device(ACPI_BUS_TYPE_ECDT_EC); } #if 0 @@ -2020,6 +2035,12 @@ int __init acpi_ec_init(void) /* Drivers must be started after acpi_ec_query_init() */ dsdt_fail = acpi_bus_register_driver(&acpi_ec_driver); + /* + * Register ECDT to ACPI bus only when PNP0C09 probe fails. This is + * useful for platforms (confirmed on ASUS X550ZE) with valid ECDT + * settings but invalid DSDT settings. + * https://bugzilla.kernel.org/show_bug.cgi?id=196847 + */ ecdt_fail = acpi_ec_ecdt_start(); return ecdt_fail && dsdt_fail ? -ENODEV : 0; } diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index 4361c4415b4f..ede83d38beed 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -115,6 +115,7 @@ bool acpi_device_is_present(const struct acpi_device *adev); bool acpi_device_is_battery(struct acpi_device *adev); bool acpi_device_is_first_physical_node(struct acpi_device *adev, const struct device *dev); +int acpi_bus_register_early_device(int type); /* -------------------------------------------------------------------------- Device Matching and Notification diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 602f8ff212f2..2f2f50322ffb 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1024,6 +1024,9 @@ static void acpi_device_get_busid(struct acpi_device *device) case ACPI_BUS_TYPE_SLEEP_BUTTON: strcpy(device->pnp.bus_id, "SLPF"); break; + case ACPI_BUS_TYPE_ECDT_EC: + strcpy(device->pnp.bus_id, "ECDT"); + break; default: acpi_get_name(device->handle, ACPI_SINGLE_NAME, &buffer); /* Clean up trailing underscores (if any) */ @@ -1304,6 +1307,9 @@ static void acpi_set_pnp_ids(acpi_handle handle, struct acpi_device_pnp *pnp, case ACPI_BUS_TYPE_SLEEP_BUTTON: acpi_add_id(pnp, ACPI_BUTTON_HID_SLEEPF); break; + case ACPI_BUS_TYPE_ECDT_EC: + acpi_add_id(pnp, ACPI_ECDT_HID); + break; } } @@ -2049,6 +2055,21 @@ void acpi_bus_trim(struct acpi_device *adev) } EXPORT_SYMBOL_GPL(acpi_bus_trim); +int acpi_bus_register_early_device(int type) +{ + struct acpi_device *device = NULL; + int result; + + result = acpi_add_single_object(&device, NULL, + type, ACPI_STA_DEFAULT); + if (result) + return result; + + device->flags.match_driver = true; + return device_attach(&device->dev); +} +EXPORT_SYMBOL_GPL(acpi_bus_register_early_device); + static int acpi_bus_scan_fixed(void) { int result = 0; diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index fa1505292f6c..324a04df3785 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -105,6 +105,7 @@ enum acpi_bus_device_type { ACPI_BUS_TYPE_THERMAL, ACPI_BUS_TYPE_POWER_BUTTON, ACPI_BUS_TYPE_SLEEP_BUTTON, + ACPI_BUS_TYPE_ECDT_EC, ACPI_BUS_DEVICE_TYPE_COUNT }; diff --git a/include/acpi/acpi_drivers.h b/include/acpi/acpi_drivers.h index 29c691265b49..14499757338f 100644 --- a/include/acpi/acpi_drivers.h +++ b/include/acpi/acpi_drivers.h @@ -58,6 +58,7 @@ #define ACPI_VIDEO_HID "LNXVIDEO" #define ACPI_BAY_HID "LNXIOBAY" #define ACPI_DOCK_HID "LNXDOCK" +#define ACPI_ECDT_HID "LNXEC" /* Quirk for broken IBM BIOSes */ #define ACPI_SMBUS_IBM_HID "SMBUSIBM" From 0d307935fefa6389eb726c6362351c162c949101 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Wed, 15 Nov 2017 21:17:55 +0000 Subject: [PATCH 042/333] cpufreq: Add Loongson machine dependencies The MIPS loongson cpufreq drivers don't build unless configured for the correct machine type, due to dependency on machine specific architecture headers and symbols in machine specific platform code. More specifically loongson1-cpufreq.c uses RST_CPU_EN and RST_CPU, neither of which is defined in asm/mach-loongson32/regs-clk.h unless CONFIG_LOONGSON1_LS1B=y, and loongson2_cpufreq.c references loongson2_clockmod_table[], which is only defined in arch/mips/loongson64/lemote-2f/clock.c, i.e. when CONFIG_LEMOTE_MACH2F=y. Add these dependencies to Kconfig to avoid randconfig / allyesconfig build failures (e.g. when based on BMIPS which also has a cpufreq driver). Signed-off-by: James Hogan Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index 4ebae43118ef..d8addbce40bc 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -275,6 +275,7 @@ config BMIPS_CPUFREQ config LOONGSON2_CPUFREQ tristate "Loongson2 CPUFreq Driver" + depends on LEMOTE_MACH2F help This option adds a CPUFreq driver for loongson processors which support software configurable cpu frequency. @@ -287,6 +288,7 @@ config LOONGSON2_CPUFREQ config LOONGSON1_CPUFREQ tristate "Loongson1 CPUFreq Driver" + depends on LOONGSON1_LS1B help This option adds a CPUFreq driver for loongson1 processors which support software configurable cpu frequency. From 7e8a09e05a0f0ccaf6b2a16f12ed6edc8e62c47d Mon Sep 17 00:00:00 2001 From: Jesse Chan Date: Mon, 20 Nov 2017 13:32:01 -0800 Subject: [PATCH 043/333] cpufreq: mediatek: add missing MODULE_DESCRIPTION/AUTHOR/LICENSE This change resolves a new compile-time warning when built as a loadable module: WARNING: modpost: missing MODULE_LICENSE() in drivers/cpufreq/mediatek-cpufreq.o see include/linux/module.h for more information This adds the license as "GPL v2", which matches the header of the file. MODULE_DESCRIPTION and MODULE_AUTHOR are also added. Signed-off-by: Jesse Chan Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/mediatek-cpufreq.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/cpufreq/mediatek-cpufreq.c b/drivers/cpufreq/mediatek-cpufreq.c index 18c4bd9a5c65..e0d5090b303d 100644 --- a/drivers/cpufreq/mediatek-cpufreq.c +++ b/drivers/cpufreq/mediatek-cpufreq.c @@ -620,3 +620,7 @@ static int __init mtk_cpufreq_driver_init(void) return 0; } device_initcall(mtk_cpufreq_driver_init); + +MODULE_DESCRIPTION("MediaTek CPUFreq driver"); +MODULE_AUTHOR("Pi-Cheng Chen "); +MODULE_LICENSE("GPL v2"); From 1addb798e93893d33c8dfab743cd44f09fd7719a Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Wed, 8 Nov 2017 17:29:44 +0100 Subject: [PATCH 044/333] null_blk: fix dev->badblocks leak null_alloc_dev() allocates memory for dev->badblocks, but cleanup currently only occurs in the configfs release codepath, missing a number of other places. This bug was found running the blktests block/010 test, alongside kmemleak: rapido1:/blktests# ./check block/010 ... rapido1:/blktests# echo scan > /sys/kernel/debug/kmemleak [ 306.966708] kmemleak: 32 new suspected memory leaks (see /sys/kernel/debug/kmemleak) rapido1:/blktests# cat /sys/kernel/debug/kmemleak unreferenced object 0xffff88001f86d000 (size 4096): comm "modprobe", pid 231, jiffies 4294892415 (age 318.252s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] kmemleak_alloc+0x49/0xa0 [] kmem_cache_alloc+0x9f/0xe0 [] badblocks_init+0x2f/0x60 [] 0xffffffffa0019fae [] nullb_device_badblocks_store+0x63/0x130 [null_blk] [] do_one_initcall+0x3d/0x170 [] do_init_module+0x56/0x1e9 [] load_module+0x1c47/0x26a0 [] SyS_finit_module+0xa9/0xd0 [] entry_SYSCALL_64_fastpath+0x13/0x94 Fixes: 2f54a613c942 ("nullb: badbblocks support") Reviewed-by: Shaohua Li Signed-off-by: David Disseldorp Signed-off-by: Jens Axboe --- drivers/block/null_blk.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index c61960deb74a..ccb9975a97fa 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -471,7 +471,6 @@ static void nullb_device_release(struct config_item *item) { struct nullb_device *dev = to_nullb_device(item); - badblocks_exit(&dev->badblocks); null_free_device_storage(dev, false); null_free_dev(dev); } @@ -582,6 +581,10 @@ static struct nullb_device *null_alloc_dev(void) static void null_free_dev(struct nullb_device *dev) { + if (!dev) + return; + + badblocks_exit(&dev->badblocks); kfree(dev); } From f341a4d384f7fced2ca0d9472ed88fe94de32726 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 22 Nov 2017 13:18:05 -0500 Subject: [PATCH 045/333] block: remove useless assignment in bio_split Remove useless assignment to the variable "split" because the variable is unconditionally assigned later. Signed-off-by: Mikulas Patocka Signed-off-by: Jens Axboe --- block/bio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/bio.c b/block/bio.c index 228229f3bb76..8bfdea58159b 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1819,7 +1819,7 @@ EXPORT_SYMBOL(bio_endio); struct bio *bio_split(struct bio *bio, int sectors, gfp_t gfp, struct bio_set *bs) { - struct bio *split = NULL; + struct bio *split; BUG_ON(sectors <= 0); BUG_ON(sectors >= bio_sectors(bio)); From ded13fc11b71fd1351e57c68a130d89a0285f1b6 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 22 Nov 2017 14:38:53 +1100 Subject: [PATCH 046/333] KVM: PPC: Book3S HV: Fix migration and HPT resizing of HPT guests on radix hosts This fixes two errors that prevent a guest using the HPT MMU from successfully migrating to a POWER9 host in radix MMU mode, or resizing its HPT when running on a radix host. The first bug was that commit 8dc6cca556e4 ("KVM: PPC: Book3S HV: Don't rely on host's page size information", 2017-09-11) missed two uses of hpte_base_page_size(), one in the HPT rehashing code and one in kvm_htab_write() (which is used on the destination side in migrating a HPT guest). Instead we use kvmppc_hpte_base_page_shift(). Having the shift count means that we can use left and right shifts instead of multiplication and division in a few places. Along the way, this adds a check in kvm_htab_write() to ensure that the page size encoding in the incoming HPTEs is recognized, and if not return an EINVAL error to userspace. The second bug was that kvm_htab_write was performing some but not all of the functions of kvmhv_setup_mmu(), resulting in the destination VM being left in radix mode as far as the hardware is concerned. The simplest fix for now is make kvm_htab_write() call kvmppc_setup_partition_table() like kvmppc_hv_setup_htab_rma() does. In future it would be better to refactor the code more extensively to remove the duplication. Fixes: 8dc6cca556e4 ("KVM: PPC: Book3S HV: Don't rely on host's page size information") Fixes: 7a84084c6054 ("KVM: PPC: Book3S HV: Set partition table rather than SDR1 on POWER9") Reported-by: Suraj Jitindar Singh Tested-by: Suraj Jitindar Singh Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/kvm_ppc.h | 1 + arch/powerpc/kvm/book3s_64_mmu_hv.c | 37 ++++++++++++++++++----------- arch/powerpc/kvm/book3s_hv.c | 3 +-- 3 files changed, 25 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 96753f3aac6d..941c2a3f231b 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -180,6 +180,7 @@ extern void kvm_spapr_tce_release_iommu_group(struct kvm *kvm, struct iommu_group *grp); extern int kvmppc_switch_mmu_to_hpt(struct kvm *kvm); extern int kvmppc_switch_mmu_to_radix(struct kvm *kvm); +extern void kvmppc_setup_partition_table(struct kvm *kvm); extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, struct kvm_create_spapr_tce_64 *args); diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 235319c2574e..966097232d21 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -1238,8 +1238,9 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize, unsigned long vpte, rpte, guest_rpte; int ret; struct revmap_entry *rev; - unsigned long apsize, psize, avpn, pteg, hash; + unsigned long apsize, avpn, pteg, hash; unsigned long new_idx, new_pteg, replace_vpte; + int pshift; hptep = (__be64 *)(old->virt + (idx << 4)); @@ -1298,8 +1299,8 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize, goto out; rpte = be64_to_cpu(hptep[1]); - psize = hpte_base_page_size(vpte, rpte); - avpn = HPTE_V_AVPN_VAL(vpte) & ~((psize - 1) >> 23); + pshift = kvmppc_hpte_base_page_shift(vpte, rpte); + avpn = HPTE_V_AVPN_VAL(vpte) & ~(((1ul << pshift) - 1) >> 23); pteg = idx / HPTES_PER_GROUP; if (vpte & HPTE_V_SECONDARY) pteg = ~pteg; @@ -1311,20 +1312,20 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize, offset = (avpn & 0x1f) << 23; vsid = avpn >> 5; /* We can find more bits from the pteg value */ - if (psize < (1ULL << 23)) - offset |= ((vsid ^ pteg) & old_hash_mask) * psize; + if (pshift < 23) + offset |= ((vsid ^ pteg) & old_hash_mask) << pshift; - hash = vsid ^ (offset / psize); + hash = vsid ^ (offset >> pshift); } else { unsigned long offset, vsid; /* We only have 40 - 23 bits of seg_off in avpn */ offset = (avpn & 0x1ffff) << 23; vsid = avpn >> 17; - if (psize < (1ULL << 23)) - offset |= ((vsid ^ (vsid << 25) ^ pteg) & old_hash_mask) * psize; + if (pshift < 23) + offset |= ((vsid ^ (vsid << 25) ^ pteg) & old_hash_mask) << pshift; - hash = vsid ^ (vsid << 25) ^ (offset / psize); + hash = vsid ^ (vsid << 25) ^ (offset >> pshift); } new_pteg = hash & new_hash_mask; @@ -1801,6 +1802,7 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf, ssize_t nb; long int err, ret; int mmu_ready; + int pshift; if (!access_ok(VERIFY_READ, buf, count)) return -EFAULT; @@ -1855,6 +1857,9 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf, err = -EINVAL; if (!(v & HPTE_V_VALID)) goto out; + pshift = kvmppc_hpte_base_page_shift(v, r); + if (pshift <= 0) + goto out; lbuf += 2; nb += HPTE_SIZE; @@ -1869,14 +1874,18 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf, goto out; } if (!mmu_ready && is_vrma_hpte(v)) { - unsigned long psize = hpte_base_page_size(v, r); - unsigned long senc = slb_pgsize_encoding(psize); - unsigned long lpcr; + unsigned long senc, lpcr; + senc = slb_pgsize_encoding(1ul << pshift); kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T | (VRMA_VSID << SLB_VSID_SHIFT_1T); - lpcr = senc << (LPCR_VRMASD_SH - 4); - kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD); + if (!cpu_has_feature(CPU_FTR_ARCH_300)) { + lpcr = senc << (LPCR_VRMASD_SH - 4); + kvmppc_update_lpcr(kvm, lpcr, + LPCR_VRMASD); + } else { + kvmppc_setup_partition_table(kvm); + } mmu_ready = 1; } ++i; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 79ea3d9269db..2d46037ce936 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -120,7 +120,6 @@ MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core"); static void kvmppc_end_cede(struct kvm_vcpu *vcpu); static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); -static void kvmppc_setup_partition_table(struct kvm *kvm); static inline struct kvm_vcpu *next_runnable_thread(struct kvmppc_vcore *vc, int *ip) @@ -3574,7 +3573,7 @@ static void kvmppc_mmu_destroy_hv(struct kvm_vcpu *vcpu) return; } -static void kvmppc_setup_partition_table(struct kvm *kvm) +void kvmppc_setup_partition_table(struct kvm *kvm) { unsigned long dw0, dw1; From 8c97eeccf0ad8783c057830119467b877bdfced7 Mon Sep 17 00:00:00 2001 From: Jeff Lien Date: Tue, 21 Nov 2017 10:44:37 -0600 Subject: [PATCH 047/333] nvme-pci: add quirk for delay before CHK RDY for WDC SN200 And increase the existing delay to cover this device as well. Cc: stable@vger.kernel.org Signed-off-by: Jeff Lien Signed-off-by: Christoph Hellwig --- drivers/nvme/host/nvme.h | 2 +- drivers/nvme/host/pci.c | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index c0873a68872f..ea1aa5283e8e 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -114,7 +114,7 @@ static inline struct nvme_request *nvme_req(struct request *req) * NVME_QUIRK_DELAY_BEFORE_CHK_RDY quirk enabled. The value (in ms) was * found empirically. */ -#define NVME_QUIRK_DELAY_AMOUNT 2000 +#define NVME_QUIRK_DELAY_AMOUNT 2300 enum nvme_ctrl_state { NVME_CTRL_NEW, diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 58dbe684007b..617374762b7c 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2673,6 +2673,8 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_IDENTIFY_CNS, }, { PCI_DEVICE(0x1c58, 0x0003), /* HGST adapter */ .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, + { PCI_DEVICE(0x1c58, 0x0023), /* WDC SN200 adapter */ + .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, { PCI_DEVICE(0x1c5f, 0x0540), /* Memblaze Pblaze4 adapter */ .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, { PCI_DEVICE(0x144d, 0xa821), /* Samsung PM1725 */ From 12841f87b7a8ceb3d54f171660f72a86941bfcb3 Mon Sep 17 00:00:00 2001 From: Vaibhav Jain Date: Thu, 23 Nov 2017 09:08:57 +0530 Subject: [PATCH 048/333] cxl: Check if vphb exists before iterating over AFU devices During an eeh a kernel-oops is reported if no vPHB is allocated to the AFU. This happens as during AFU init, an error in creation of vPHB is a non-fatal error. Hence afu->phb should always be checked for NULL before iterating over it for the virtual AFU pci devices. This patch fixes the kenel-oops by adding a NULL pointer check for afu->phb before it is dereferenced. Fixes: 9e8df8a21963 ("cxl: EEH support") Cc: stable@vger.kernel.org # v4.3+ Signed-off-by: Vaibhav Jain Acked-by: Andrew Donnellan Acked-by: Frederic Barrat Signed-off-by: Michael Ellerman --- drivers/misc/cxl/pci.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index bb7fd3f4edab..19969ee86d6f 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -2083,6 +2083,9 @@ static pci_ers_result_t cxl_vphb_error_detected(struct cxl_afu *afu, /* There should only be one entry, but go through the list * anyway */ + if (afu->phb == NULL) + return result; + list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) { if (!afu_dev->driver) continue; @@ -2124,8 +2127,7 @@ static pci_ers_result_t cxl_pci_error_detected(struct pci_dev *pdev, * Tell the AFU drivers; but we don't care what they * say, we're going away. */ - if (afu->phb != NULL) - cxl_vphb_error_detected(afu, state); + cxl_vphb_error_detected(afu, state); } return PCI_ERS_RESULT_DISCONNECT; } @@ -2265,6 +2267,9 @@ static pci_ers_result_t cxl_pci_slot_reset(struct pci_dev *pdev) if (cxl_afu_select_best_mode(afu)) goto err; + if (afu->phb == NULL) + continue; + list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) { /* Reset the device context. * TODO: make this less disruptive @@ -2327,6 +2332,9 @@ static void cxl_pci_resume(struct pci_dev *pdev) for (i = 0; i < adapter->slices; i++) { afu = adapter->afu[i]; + if (afu->phb == NULL) + continue; + list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) { if (afu_dev->driver && afu_dev->driver->err_handler && afu_dev->driver->err_handler->resume) From a3961f824cdbe7eb431254dc7d8f6f6767f474aa Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Wed, 22 Nov 2017 23:02:07 +0530 Subject: [PATCH 049/333] powerpc/powernv: Fix kexec crashes caused by tlbie tracing Rebooting into a new kernel with kexec fails in trace_tlbie() which is called from native_hpte_clear(). This happens if the running kernel has CONFIG_LOCKDEP enabled. With lockdep enabled, the tracepoints always execute few RCU checks regardless of whether tracing is on or off. We are already in the last phase of kexec sequence in real mode with HILE_BE set. At this point the RCU check ends up in RCU_LOCKDEP_WARN and causes kexec to fail. Fix this by not calling trace_tlbie() from native_hpte_clear(). mpe: It's not safe to call trace points at this point in the kexec path, even if we could avoid the RCU checks/warnings. The only solution is to not call them. Fixes: 0428491cba92 ("powerpc/mm: Trace tlbie(l) instructions") Cc: stable@vger.kernel.org # v4.13+ Signed-off-by: Mahesh Salgaonkar Reported-by: Aneesh Kumar K.V Suggested-by: Michael Ellerman Acked-by: Naveen N. Rao Signed-off-by: Michael Ellerman --- arch/powerpc/mm/hash_native_64.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 3848af167df9..640cf566e986 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -47,7 +47,8 @@ DEFINE_RAW_SPINLOCK(native_tlbie_lock); -static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize) +static inline unsigned long ___tlbie(unsigned long vpn, int psize, + int apsize, int ssize) { unsigned long va; unsigned int penc; @@ -100,7 +101,15 @@ static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize) : "memory"); break; } - trace_tlbie(0, 0, va, 0, 0, 0, 0); + return va; +} + +static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize) +{ + unsigned long rb; + + rb = ___tlbie(vpn, psize, apsize, ssize); + trace_tlbie(0, 0, rb, 0, 0, 0, 0); } static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize) @@ -652,7 +661,7 @@ static void native_hpte_clear(void) if (hpte_v & HPTE_V_VALID) { hpte_decode(hptep, slot, &psize, &apsize, &ssize, &vpn); hptep->v = 0; - __tlbie(vpn, psize, apsize, ssize); + ___tlbie(vpn, psize, apsize, ssize); } } From 34c089e806793a66e450b11bd167db6047399fcd Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 21 Nov 2017 15:42:27 +0200 Subject: [PATCH 050/333] mmc: block: Fix missing blk_put_request() Ensure blk_get_request() is paired with blk_put_request(). Fixes: 0493f6fe5bde ("mmc: block: Move boot partition locking into a driver op") Fixes: 627c3ccfb46a ("mmc: debugfs: Move block debugfs into block module") Signed-off-by: Adrian Hunter Reviewed-by: Linus Walleij Cc: stable@vger.kernel.org # 4.13+ Signed-off-by: Ulf Hansson --- drivers/mmc/core/block.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index ea80ff4cd7f9..f60939858586 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -236,6 +236,7 @@ static ssize_t power_ro_lock_store(struct device *dev, req_to_mmc_queue_req(req)->drv_op = MMC_DRV_OP_BOOT_WP; blk_execute_rq(mq->queue, NULL, req, 0); ret = req_to_mmc_queue_req(req)->drv_op_result; + blk_put_request(req); if (!ret) { pr_info("%s: Locking boot partition ro until next power on\n", @@ -2557,6 +2558,7 @@ static int mmc_dbg_card_status_get(void *data, u64 *val) *val = ret; ret = 0; } + blk_put_request(req); return ret; } @@ -2587,6 +2589,7 @@ static int mmc_ext_csd_open(struct inode *inode, struct file *filp) req_to_mmc_queue_req(req)->drv_op_data = &ext_csd; blk_execute_rq(mq->queue, NULL, req, 0); err = req_to_mmc_queue_req(req)->drv_op_result; + blk_put_request(req); if (err) { pr_err("FAILED %d\n", err); goto out_free; From fb8e456e547ed2c699f64665bd8a3b9bde7b9728 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 21 Nov 2017 15:42:28 +0200 Subject: [PATCH 051/333] mmc: block: Check return value of blk_get_request() blk_get_request() can fail, always check the return value. Fixes: 0493f6fe5bde ("mmc: block: Move boot partition locking into a driver op") Fixes: 3ecd8cf23f88 ("mmc: block: move multi-ioctl() to use block layer") Fixes: 614f0388f580 ("mmc: block: move single ioctl() commands to block requests") Fixes: 627c3ccfb46a ("mmc: debugfs: Move block debugfs into block module") Signed-off-by: Adrian Hunter Reviewed-by: Linus Walleij Cc: stable@vger.kernel.org # 4.13+ Signed-off-by: Ulf Hansson --- drivers/mmc/core/block.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index f60939858586..4a319ddbd956 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -233,6 +233,10 @@ static ssize_t power_ro_lock_store(struct device *dev, /* Dispatch locking to the block layer */ req = blk_get_request(mq->queue, REQ_OP_DRV_OUT, __GFP_RECLAIM); + if (IS_ERR(req)) { + count = PTR_ERR(req); + goto out_put; + } req_to_mmc_queue_req(req)->drv_op = MMC_DRV_OP_BOOT_WP; blk_execute_rq(mq->queue, NULL, req, 0); ret = req_to_mmc_queue_req(req)->drv_op_result; @@ -249,7 +253,7 @@ static ssize_t power_ro_lock_store(struct device *dev, set_disk_ro(part_md->disk, 1); } } - +out_put: mmc_blk_put(md); return count; } @@ -625,6 +629,10 @@ static int mmc_blk_ioctl_cmd(struct mmc_blk_data *md, req = blk_get_request(mq->queue, idata->ic.write_flag ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN, __GFP_RECLAIM); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto cmd_done; + } idatas[0] = idata; req_to_mmc_queue_req(req)->drv_op = rpmb ? MMC_DRV_OP_IOCTL_RPMB : MMC_DRV_OP_IOCTL; @@ -692,6 +700,10 @@ static int mmc_blk_ioctl_multi_cmd(struct mmc_blk_data *md, req = blk_get_request(mq->queue, idata[0]->ic.write_flag ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN, __GFP_RECLAIM); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto cmd_err; + } req_to_mmc_queue_req(req)->drv_op = rpmb ? MMC_DRV_OP_IOCTL_RPMB : MMC_DRV_OP_IOCTL; req_to_mmc_queue_req(req)->drv_op_data = idata; @@ -2551,6 +2563,8 @@ static int mmc_dbg_card_status_get(void *data, u64 *val) /* Ask the block layer about the card status */ req = blk_get_request(mq->queue, REQ_OP_DRV_IN, __GFP_RECLAIM); + if (IS_ERR(req)) + return PTR_ERR(req); req_to_mmc_queue_req(req)->drv_op = MMC_DRV_OP_GET_CARD_STATUS; blk_execute_rq(mq->queue, NULL, req, 0); ret = req_to_mmc_queue_req(req)->drv_op_result; @@ -2585,6 +2599,10 @@ static int mmc_ext_csd_open(struct inode *inode, struct file *filp) /* Ask the block layer for the EXT CSD */ req = blk_get_request(mq->queue, REQ_OP_DRV_IN, __GFP_RECLAIM); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto out_free; + } req_to_mmc_queue_req(req)->drv_op = MMC_DRV_OP_GET_EXT_CSD; req_to_mmc_queue_req(req)->drv_op_data = &ext_csd; blk_execute_rq(mq->queue, NULL, req, 0); From ebe7dd45cf49e3b49cacbaace17f9f878f21fbea Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 21 Nov 2017 15:42:29 +0200 Subject: [PATCH 052/333] mmc: core: Do not leave the block driver in a suspended state The block driver must be resumed if the mmc bus fails to suspend the card. Signed-off-by: Adrian Hunter Reviewed-by: Linus Walleij Cc: stable@vger.kernel.org # v3.19+ Signed-off-by: Ulf Hansson --- drivers/mmc/core/bus.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/mmc/core/bus.c b/drivers/mmc/core/bus.c index a4b49e25fe96..7586ff2ad1f1 100644 --- a/drivers/mmc/core/bus.c +++ b/drivers/mmc/core/bus.c @@ -157,6 +157,9 @@ static int mmc_bus_suspend(struct device *dev) return ret; ret = host->bus_ops->suspend(host); + if (ret) + pm_generic_resume(dev); + return ret; } From f9f0da98819503b06b35e61869d18cf3a8cd3323 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 21 Nov 2017 15:42:30 +0200 Subject: [PATCH 053/333] mmc: block: Ensure that debugfs files are removed The card is not necessarily being removed, but the debugfs files must be removed when the driver is removed, otherwise they will continue to exist after unbinding the card from the driver. e.g. # echo "mmc1:0001" > /sys/bus/mmc/drivers/mmcblk/unbind # cat /sys/kernel/debug/mmc1/mmc1\:0001/ext_csd [ 173.634584] BUG: unable to handle kernel NULL pointer dereference at 0000000000000050 [ 173.643356] IP: mmc_ext_csd_open+0x5e/0x170 A complication is that the debugfs_root may have already been removed, so check for that too. Fixes: 627c3ccfb46a ("mmc: debugfs: Move block debugfs into block module") Signed-off-by: Adrian Hunter Reviewed-by: Linus Walleij Cc: stable@vger.kernel.org # 4.14+ Signed-off-by: Ulf Hansson --- drivers/mmc/core/block.c | 44 ++++++++++++++++++++++++++++++++------ drivers/mmc/core/debugfs.c | 1 + 2 files changed, 38 insertions(+), 7 deletions(-) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index 4a319ddbd956..ccfa98af1dd3 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -122,6 +122,10 @@ struct mmc_blk_data { struct device_attribute force_ro; struct device_attribute power_ro_lock; int area_type; + + /* debugfs files (only in main mmc_blk_data) */ + struct dentry *status_dentry; + struct dentry *ext_csd_dentry; }; /* Device type for RPMB character devices */ @@ -2653,7 +2657,7 @@ static const struct file_operations mmc_dbg_ext_csd_fops = { .llseek = default_llseek, }; -static int mmc_blk_add_debugfs(struct mmc_card *card) +static int mmc_blk_add_debugfs(struct mmc_card *card, struct mmc_blk_data *md) { struct dentry *root; @@ -2663,28 +2667,53 @@ static int mmc_blk_add_debugfs(struct mmc_card *card) root = card->debugfs_root; if (mmc_card_mmc(card) || mmc_card_sd(card)) { - if (!debugfs_create_file("status", S_IRUSR, root, card, - &mmc_dbg_card_status_fops)) + md->status_dentry = + debugfs_create_file("status", S_IRUSR, root, card, + &mmc_dbg_card_status_fops); + if (!md->status_dentry) return -EIO; } if (mmc_card_mmc(card)) { - if (!debugfs_create_file("ext_csd", S_IRUSR, root, card, - &mmc_dbg_ext_csd_fops)) + md->ext_csd_dentry = + debugfs_create_file("ext_csd", S_IRUSR, root, card, + &mmc_dbg_ext_csd_fops); + if (!md->ext_csd_dentry) return -EIO; } return 0; } +static void mmc_blk_remove_debugfs(struct mmc_card *card, + struct mmc_blk_data *md) +{ + if (!card->debugfs_root) + return; + + if (!IS_ERR_OR_NULL(md->status_dentry)) { + debugfs_remove(md->status_dentry); + md->status_dentry = NULL; + } + + if (!IS_ERR_OR_NULL(md->ext_csd_dentry)) { + debugfs_remove(md->ext_csd_dentry); + md->ext_csd_dentry = NULL; + } +} #else -static int mmc_blk_add_debugfs(struct mmc_card *card) +static int mmc_blk_add_debugfs(struct mmc_card *card, struct mmc_blk_data *md) { return 0; } +static void mmc_blk_remove_debugfs(struct mmc_card *card, + struct mmc_blk_data *md) +{ +} + #endif /* CONFIG_DEBUG_FS */ static int mmc_blk_probe(struct mmc_card *card) @@ -2724,7 +2753,7 @@ static int mmc_blk_probe(struct mmc_card *card) } /* Add two debugfs entries */ - mmc_blk_add_debugfs(card); + mmc_blk_add_debugfs(card, md); pm_runtime_set_autosuspend_delay(&card->dev, 3000); pm_runtime_use_autosuspend(&card->dev); @@ -2750,6 +2779,7 @@ static void mmc_blk_remove(struct mmc_card *card) { struct mmc_blk_data *md = dev_get_drvdata(&card->dev); + mmc_blk_remove_debugfs(card, md); mmc_blk_remove_parts(card, md); pm_runtime_get_sync(&card->dev); mmc_claim_host(card->host); diff --git a/drivers/mmc/core/debugfs.c b/drivers/mmc/core/debugfs.c index 01e459a34f33..0f4a7d7b2626 100644 --- a/drivers/mmc/core/debugfs.c +++ b/drivers/mmc/core/debugfs.c @@ -314,4 +314,5 @@ err: void mmc_remove_card_debugfs(struct mmc_card *card) { debugfs_remove_recursive(card->debugfs_root); + card->debugfs_root = NULL; } From 52884f8f66c893e92a73787b5bef32b2ef52e1bc Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Mon, 20 Nov 2017 11:56:47 -0800 Subject: [PATCH 054/333] mmc: sdhci-msm: Optionally wait for signal level changes Not all instances of the SDCC core supports changing signal voltage and as such will not generate a power interrupt when the software attempts to change the voltage. This results in probing the eMMC on some devices to take over 2 minutes. Check that the SWITCHABLE_SIGNALING_VOLTAGE bit in MCI_GENERICS is set before waiting for the power interrupt. Cc: Sahitya Tummala Cc: Vijay Viswanath Fixes: c0309b3803fe ("mmc: sdhci-msm: Add sdhci msm register write APIs which wait for pwr irq") Signed-off-by: Bjorn Andersson Tested-by: Luca Weiss Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-msm.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c index 3fb7d2eec93f..c283291db705 100644 --- a/drivers/mmc/host/sdhci-msm.c +++ b/drivers/mmc/host/sdhci-msm.c @@ -29,6 +29,9 @@ #define CORE_VERSION_MAJOR_MASK (0xf << CORE_VERSION_MAJOR_SHIFT) #define CORE_VERSION_MINOR_MASK 0xff +#define CORE_MCI_GENERICS 0x70 +#define SWITCHABLE_SIGNALING_VOLTAGE BIT(29) + #define CORE_HC_MODE 0x78 #define HC_MODE_EN 0x1 #define CORE_POWER 0x0 @@ -1028,11 +1031,22 @@ static void sdhci_msm_check_power_status(struct sdhci_host *host, u32 req_type) struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); struct sdhci_msm_host *msm_host = sdhci_pltfm_priv(pltfm_host); bool done = false; + u32 val; pr_debug("%s: %s: request %d curr_pwr_state %x curr_io_level %x\n", mmc_hostname(host->mmc), __func__, req_type, msm_host->curr_pwr_state, msm_host->curr_io_level); + /* + * The power interrupt will not be generated for signal voltage + * switches if SWITCHABLE_SIGNALING_VOLTAGE in MCI_GENERICS is not set. + */ + val = readl(msm_host->core_mem + CORE_MCI_GENERICS); + if ((req_type & REQ_IO_HIGH || req_type & REQ_IO_LOW) && + !(val & SWITCHABLE_SIGNALING_VOLTAGE)) { + return; + } + /* * The IRQ for request type IO High/LOW will be generated when - * there is a state change in 1.8V enable bit (bit 3) of From 612ea091fc77770d659b82ea44a1d5646e0af54c Mon Sep 17 00:00:00 2001 From: weiping zhang Date: Thu, 23 Nov 2017 21:39:03 +0800 Subject: [PATCH 055/333] blk-wbt: remove duplicated setting in wbt_init rwb->wc and rwb->queue_depth were overwritten by wbt_set_write_cache and wbt_set_queue_depth, remove the default setting. Signed-off-by: weiping zhang Signed-off-by: Jens Axboe --- block/blk-wbt.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/block/blk-wbt.c b/block/blk-wbt.c index b252da0e4c11..ec903532a5d1 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -723,8 +723,6 @@ int wbt_init(struct request_queue *q) init_waitqueue_head(&rwb->rq_wait[i].wait); } - rwb->wc = 1; - rwb->queue_depth = RWB_DEF_DEPTH; rwb->last_comp = rwb->last_issue = jiffies; rwb->queue = q; rwb->win_nsec = RWB_WINDOW_NSEC; From f680474345f1fd6329d1806d6358066fd3b07f02 Mon Sep 17 00:00:00 2001 From: weiping zhang Date: Thu, 23 Nov 2017 21:39:40 +0800 Subject: [PATCH 056/333] blk-sysfs: remove NULL pointer checking in queue_wb_lat_store wbt_init doesn't set q->rq_wb to NULL, if wbt_init return 0, so check return value is enough, remove NULL checking. Signed-off-by: weiping zhang Signed-off-by: Jens Axboe --- block/blk-sysfs.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index e54be402899d..870484eaed1f 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -450,12 +450,9 @@ static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page, ret = wbt_init(q); if (ret) return ret; - - rwb = q->rq_wb; - if (!rwb) - return -EINVAL; } + rwb = q->rq_wb; if (val == -1) rwb->min_lat_nsec = wbt_default_latency_nsec(q); else if (val >= 0) From 62d772fa9d86856d828cd24dd8fff5c83275c7e1 Mon Sep 17 00:00:00 2001 From: weiping zhang Date: Thu, 23 Nov 2017 21:39:55 +0800 Subject: [PATCH 057/333] blk-wbt: move wbt_clear_stat to common place in wbt_done wbt_done call wbt_clear_stat no matter current stat was tracked or not, move it to common place. Signed-off-by: weiping zhang Signed-off-by: Jens Axboe --- block/blk-wbt.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/block/blk-wbt.c b/block/blk-wbt.c index ec903532a5d1..19faecc5f8f6 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -178,12 +178,11 @@ void wbt_done(struct rq_wb *rwb, struct blk_issue_stat *stat) if (wbt_is_read(stat)) wb_timestamp(rwb, &rwb->last_comp); - wbt_clear_state(stat); } else { WARN_ON_ONCE(stat == rwb->sync_cookie); __wbt_done(rwb, wbt_stat_to_mask(stat)); - wbt_clear_state(stat); } + wbt_clear_state(stat); } /* From 3dfbdc44d69b2cd7e382fd084a5c860d2cea24f6 Mon Sep 17 00:00:00 2001 From: weiping zhang Date: Thu, 23 Nov 2017 21:40:10 +0800 Subject: [PATCH 058/333] blk-wbt: fix comments typo Signed-off-by: weiping zhang Signed-off-by: Jens Axboe --- block/blk-wbt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-wbt.c b/block/blk-wbt.c index 19faecc5f8f6..ae8de9780085 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -481,7 +481,7 @@ static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw) /* * At this point we know it's a buffered write. If this is - * kswapd trying to free memory, or REQ_SYNC is set, set, then + * kswapd trying to free memory, or REQ_SYNC is set, then * it's WB_SYNC_ALL writeback, and we'll use the max limit for * that. If the write is marked as a background write, then use * the idle limit, or go to normal if we haven't had competing From 2621e945fbf1d6df5f3f0ba7be5bae3d2cf9b6a5 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 24 Nov 2017 14:51:02 +1100 Subject: [PATCH 059/333] powerpc/kexec: Fix kexec/kdump in P9 guest kernels The code that cleans up the IAMR/AMOR before kexec'ing failed to remember that when we're running as a guest AMOR is not writable, it's hypervisor privileged. They symptom is that the kexec stops before entering purgatory and nothing else is seen on the console. If you examine the state of the system all threads will be in the 0x700 program check handler. Fix it by making the write to AMOR dependent on HV mode. Fixes: 1e2a516e89fc ("powerpc/kexec: Fix radix to hash kexec due to IAMR/AMOR") Cc: stable@vger.kernel.org # v4.10+ Reported-by: Yilin Zhang Debugged-by: David Gibson Signed-off-by: Michael Ellerman Acked-by: Balbir Singh Reviewed-by: David Gibson Tested-by: David Gibson Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/misc_64.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 8ac0bd2bddb0..3280953a82cf 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -623,7 +623,9 @@ BEGIN_FTR_SECTION * NOTE, we rely on r0 being 0 from above. */ mtspr SPRN_IAMR,r0 +BEGIN_FTR_SECTION_NESTED(42) mtspr SPRN_AMOR,r0 +END_FTR_SECTION_NESTED_IFSET(CPU_FTR_HVMODE, 42) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) /* save regs for local vars on new stack. From 26f4e759ef9b8a2bab1823d692ed6d56d40b66e3 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Thu, 23 Nov 2017 10:50:23 +0100 Subject: [PATCH 060/333] s390/disassembler: correct disassembly lines alignment 176.718956 Krnl Code: 00000000004d38b0: a54c0018 llihh %r4,24 176.718956 00000000004d38b4: b9080014 agr %r1,%r4 ^ Using a tab to align disassembly lines which follow the first line with "Krnl Code: " doesn't always work, e.g. if there is a prefix (timestamp or syslog prefix) which is not 8 chars aligned. Go back to alignment with spaces. Fixes: b192571d1ae3 ("s390/disassembler: increase show_code buffer size") Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/dis.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index 465d52b5e470..0a0e14335a04 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -548,7 +548,7 @@ void show_code(struct pt_regs *regs) start += opsize; pr_cont("%s", buffer); ptr = buffer; - ptr += sprintf(ptr, "\n\t "); + ptr += sprintf(ptr, "\n "); hops++; } pr_cont("\n"); From 53c4ab70c11c3ba1b9e3caa8e8c17e9c16d9cbc0 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 22 Nov 2017 17:19:32 +0100 Subject: [PATCH 061/333] s390: fix alloc_pgste check in init_new_context again git commit badb8bb983e9 "fix alloc_pgste check in init_new_context" fixed the problem of 'current->mm == NULL' in init_new_context back in 2011. git commit 3eabaee998c7 "KVM: s390: allow sie enablement for multi- threaded programs" completely removed the check against alloc_pgste. git commit 23fefe119ceb "s390/kvm: avoid global config of vm.alloc_pgste=1" re-added a check against the alloc_pgste flag but without the required check for current->mm != NULL. For execve() called by a kernel thread init_new_context() reads from ((struct mm_struct *) NULL)->context.alloc_pgste to decide between 2K vs 4K page tables. If the bit happens to be set for the init process it will be created with large page tables. This decision is inherited by all the children of init, this waste quite some memory. Re-add the check for 'current->mm != NULL'. Fixes: 23fefe119ceb ("s390/kvm: avoid global config of vm.alloc_pgste=1") Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/mmu_context.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index f4a07f788f78..65154eaa3714 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -28,7 +28,7 @@ static inline int init_new_context(struct task_struct *tsk, #ifdef CONFIG_PGSTE mm->context.alloc_pgste = page_table_allocate_pgste || test_thread_flag(TIF_PGSTE) || - current->mm->context.alloc_pgste; + (current->mm && current->mm->context.alloc_pgste); mm->context.has_pgste = 0; mm->context.use_skey = 0; mm->context.use_cmma = 0; From febae9bc946de2c85e324b20bb7dc7b2d4e49d37 Mon Sep 17 00:00:00 2001 From: Liviu Dudau Date: Mon, 27 Mar 2017 16:20:57 +0100 Subject: [PATCH 062/333] drm: hdlcd: Update PM code to save/restore console. Update the PM code to suspend/resume the fbdev_cma console. Signed-off-by: Liviu Dudau --- drivers/gpu/drm/arm/hdlcd_drv.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/arm/hdlcd_drv.c b/drivers/gpu/drm/arm/hdlcd_drv.c index 764d0c83710c..45b174fea36b 100644 --- a/drivers/gpu/drm/arm/hdlcd_drv.c +++ b/drivers/gpu/drm/arm/hdlcd_drv.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -432,9 +433,11 @@ static int __maybe_unused hdlcd_pm_suspend(struct device *dev) return 0; drm_kms_helper_poll_disable(drm); + drm_fbdev_cma_set_suspend_unlocked(hdlcd->fbdev, 1); hdlcd->state = drm_atomic_helper_suspend(drm); if (IS_ERR(hdlcd->state)) { + drm_fbdev_cma_set_suspend_unlocked(hdlcd->fbdev, 0); drm_kms_helper_poll_enable(drm); return PTR_ERR(hdlcd->state); } @@ -451,8 +454,8 @@ static int __maybe_unused hdlcd_pm_resume(struct device *dev) return 0; drm_atomic_helper_resume(drm, hdlcd->state); + drm_fbdev_cma_set_suspend_unlocked(hdlcd->fbdev, 0); drm_kms_helper_poll_enable(drm); - pm_runtime_set_active(dev); return 0; } From 860ec7c6e21cecca8c508a66ee211b56d87f5bd7 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 23 Nov 2017 19:26:20 +0100 Subject: [PATCH 063/333] s390/debug: use pK for kernel pointers the s390 debug feature (/sys/kernel/debug/s390dbf/) shows the kernel pointer of the calling function even for kptr_restrict == 2. Let us use pK instead of p. This hides the kernel addresses for kptr_restrict == 2: root@host $ echo 2 > /proc/sys/kernel/kptr_restrict root@host $ tail -n1 /sys/kernel/debug/s390dbf/cio_msg/sprintf 00 01511461280:386645 2 - 00 0000000000000000 snsid: device 0.0.3f68: rc=0 3990/e9 3390/0c root@host $ echo 1 > /proc/sys/kernel/kptr_restrict root@host $ tail -n1 /sys/kernel/debug/s390dbf/cio_msg/sprintf 00 01511461280:386645 2 - 00 000000000071171c snsid: device 0.0.3f68: rc=0 3990/e9 3390/0c root@host $ echo 0 > /proc/sys/kernel/kptr_restrict root@host $ tail -n1 /sys/kernel/debug/s390dbf/cio_msg/sprintf 00 01511461280:386645 2 - 00 000000000071171c snsid: device 0.0.3f68: rc=0 3990/e9 3390/0c Signed-off-by: Christian Borntraeger Acked-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index 58b9e127b615..80e974adb9e8 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -1392,7 +1392,7 @@ int debug_dflt_header_fn(debug_info_t *id, struct debug_view *view, else except_str = "-"; caller = (unsigned long) entry->caller; - rc += sprintf(out_buf, "%02i %011ld:%06lu %1u %1s %02i %p ", + rc += sprintf(out_buf, "%02i %011ld:%06lu %1u %1s %02i %pK ", area, sec, usec, level, except_str, entry->id.fields.cpuid, (void *)caller); return rc; From 6a55d2cdf1bc140665cbfaed14de79acaf3758c4 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 14 Nov 2017 18:38:00 +0100 Subject: [PATCH 064/333] s390: block: add SPDX identifiers to the remaining files It's good to have SPDX identifiers in all files to make it easier to audit the kernel tree for correct licenses. Update the drivers/s390/block/ files with the correct SPDX license identifier based on the license text in the file itself. The SPDX identifier is a legally binding shorthand, which can be used instead of the full boiler plate text. This work is based on a script and data from Thomas Gleixner, Philippe Ombredanne, and Kate Stewart. Cc: Stefan Haberland Cc: Jan Hoeppner Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Thomas Gleixner Cc: Kate Stewart Cc: Philippe Ombredanne Signed-off-by: Greg Kroah-Hartman Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd.c | 1 + drivers/s390/block/dasd_devmap.c | 1 + drivers/s390/block/dasd_diag.c | 1 + drivers/s390/block/dasd_eckd.c | 1 + drivers/s390/block/dasd_fba.c | 1 + drivers/s390/block/dcssblk.c | 1 + drivers/s390/block/scm_blk.c | 1 + drivers/s390/block/xpram.c | 1 + 8 files changed, 8 insertions(+) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 29f35e29d480..fdb3b133e5e0 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Author(s)......: Holger Smolinski * Horst Hummel diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c index c95a4784c191..e7cd28ff1984 100644 --- a/drivers/s390/block/dasd_devmap.c +++ b/drivers/s390/block/dasd_devmap.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Author(s)......: Holger Smolinski * Horst Hummel diff --git a/drivers/s390/block/dasd_diag.c b/drivers/s390/block/dasd_diag.c index 98fb28e49d2c..f035c2f25d35 100644 --- a/drivers/s390/block/dasd_diag.c +++ b/drivers/s390/block/dasd_diag.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Author(s)......: Holger Smolinski * Based on.......: linux/drivers/s390/block/mdisk.c diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 8eafcd5fa004..1a41ef496338 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Author(s)......: Holger Smolinski * Horst Hummel diff --git a/drivers/s390/block/dasd_fba.c b/drivers/s390/block/dasd_fba.c index 6168ccdb389c..a6b132f7e869 100644 --- a/drivers/s390/block/dasd_fba.c +++ b/drivers/s390/block/dasd_fba.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Author(s)......: Holger Smolinski * Bugreports.to..: diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index 7abb240847c0..6aaefb780436 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * dcssblk.c -- the S/390 block driver for dcss memory * diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c index eb51893c74a4..b4130c7880d8 100644 --- a/drivers/s390/block/scm_blk.c +++ b/drivers/s390/block/scm_blk.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Block driver for s390 storage class memory. * diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c index 571a0709e1e5..2a6334ca750e 100644 --- a/drivers/s390/block/xpram.c +++ b/drivers/s390/block/xpram.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Xpram.c -- the S/390 expanded memory RAM-disk * From 812141a9fe61446c948a71456c58090ac11a6d14 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 14 Nov 2017 18:38:01 +0100 Subject: [PATCH 065/333] s390: crypto: add SPDX identifiers to the remaining files It's good to have SPDX identifiers in all files to make it easier to audit the kernel tree for correct licenses. Update the drivers/s390/crypto/ files with the correct SPDX license identifier based on the license text in the file itself. The SPDX identifier is a legally binding shorthand, which can be used instead of the full boiler plate text. This work is based on a script and data from Thomas Gleixner, Philippe Ombredanne, and Kate Stewart. Cc: Harald Freudenberger Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Thomas Gleixner Cc: Kate Stewart Cc: Philippe Ombredanne Signed-off-by: Greg Kroah-Hartman Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- drivers/s390/crypto/ap_bus.c | 1 + drivers/s390/crypto/ap_bus.h | 1 + drivers/s390/crypto/pkey_api.c | 1 + drivers/s390/crypto/zcrypt_api.c | 1 + drivers/s390/crypto/zcrypt_api.h | 1 + drivers/s390/crypto/zcrypt_card.c | 1 + drivers/s390/crypto/zcrypt_cca_key.h | 1 + drivers/s390/crypto/zcrypt_cex2a.c | 1 + drivers/s390/crypto/zcrypt_cex2a.h | 1 + drivers/s390/crypto/zcrypt_cex4.c | 1 + drivers/s390/crypto/zcrypt_error.h | 1 + drivers/s390/crypto/zcrypt_msgtype50.c | 1 + drivers/s390/crypto/zcrypt_msgtype50.h | 1 + drivers/s390/crypto/zcrypt_msgtype6.c | 1 + drivers/s390/crypto/zcrypt_msgtype6.h | 1 + drivers/s390/crypto/zcrypt_pcixcc.c | 1 + drivers/s390/crypto/zcrypt_pcixcc.h | 1 + drivers/s390/crypto/zcrypt_queue.c | 1 + 18 files changed, 18 insertions(+) diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index faeba9db3d95..658626039171 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Copyright IBM Corp. 2006, 2012 * Author(s): Cornelia Huck diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index 7e45c4d08cad..b43e4d650940 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Copyright IBM Corp. 2006, 2012 * Author(s): Cornelia Huck diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c index 8dda5bb34a2f..869ace34972d 100644 --- a/drivers/s390/crypto/pkey_api.c +++ b/drivers/s390/crypto/pkey_api.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * pkey device driver * diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c index a9a56aa9c26b..620407ac2faa 100644 --- a/drivers/s390/crypto/zcrypt_api.c +++ b/drivers/s390/crypto/zcrypt_api.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * zcrypt 2.1.0 * diff --git a/drivers/s390/crypto/zcrypt_api.h b/drivers/s390/crypto/zcrypt_api.h index 73541a798db7..4883d51521fd 100644 --- a/drivers/s390/crypto/zcrypt_api.h +++ b/drivers/s390/crypto/zcrypt_api.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * zcrypt 2.1.0 * diff --git a/drivers/s390/crypto/zcrypt_card.c b/drivers/s390/crypto/zcrypt_card.c index f85dacf1c284..c0e7072f18a9 100644 --- a/drivers/s390/crypto/zcrypt_card.c +++ b/drivers/s390/crypto/zcrypt_card.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * zcrypt 2.1.0 * diff --git a/drivers/s390/crypto/zcrypt_cca_key.h b/drivers/s390/crypto/zcrypt_cca_key.h index 12cff6262566..ebb9fbcb9bc0 100644 --- a/drivers/s390/crypto/zcrypt_cca_key.h +++ b/drivers/s390/crypto/zcrypt_cca_key.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * zcrypt 2.1.0 * diff --git a/drivers/s390/crypto/zcrypt_cex2a.c b/drivers/s390/crypto/zcrypt_cex2a.c index b97c5d5ee5a4..e00ffa44f85c 100644 --- a/drivers/s390/crypto/zcrypt_cex2a.c +++ b/drivers/s390/crypto/zcrypt_cex2a.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * zcrypt 2.1.0 * diff --git a/drivers/s390/crypto/zcrypt_cex2a.h b/drivers/s390/crypto/zcrypt_cex2a.h index 0dce4b9af184..6f6830e0bab3 100644 --- a/drivers/s390/crypto/zcrypt_cex2a.h +++ b/drivers/s390/crypto/zcrypt_cex2a.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * zcrypt 2.1.0 * diff --git a/drivers/s390/crypto/zcrypt_cex4.c b/drivers/s390/crypto/zcrypt_cex4.c index e2eebc775a37..f305538334ad 100644 --- a/drivers/s390/crypto/zcrypt_cex4.c +++ b/drivers/s390/crypto/zcrypt_cex4.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 2012 * Author(s): Holger Dengler diff --git a/drivers/s390/crypto/zcrypt_error.h b/drivers/s390/crypto/zcrypt_error.h index 13df60209ed3..8d8892d14bd0 100644 --- a/drivers/s390/crypto/zcrypt_error.h +++ b/drivers/s390/crypto/zcrypt_error.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * zcrypt 2.1.0 * diff --git a/drivers/s390/crypto/zcrypt_msgtype50.c b/drivers/s390/crypto/zcrypt_msgtype50.c index db5bde47dfb0..8fdf5d65c4d2 100644 --- a/drivers/s390/crypto/zcrypt_msgtype50.c +++ b/drivers/s390/crypto/zcrypt_msgtype50.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * zcrypt 2.1.0 * diff --git a/drivers/s390/crypto/zcrypt_msgtype50.h b/drivers/s390/crypto/zcrypt_msgtype50.h index 5cc280318ee7..f6ce799c2239 100644 --- a/drivers/s390/crypto/zcrypt_msgtype50.h +++ b/drivers/s390/crypto/zcrypt_msgtype50.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * zcrypt 2.1.0 * diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c b/drivers/s390/crypto/zcrypt_msgtype6.c index 785620d30504..3a19b69b846b 100644 --- a/drivers/s390/crypto/zcrypt_msgtype6.c +++ b/drivers/s390/crypto/zcrypt_msgtype6.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * zcrypt 2.1.0 * diff --git a/drivers/s390/crypto/zcrypt_msgtype6.h b/drivers/s390/crypto/zcrypt_msgtype6.h index 7a0d5b57821f..52d140889377 100644 --- a/drivers/s390/crypto/zcrypt_msgtype6.h +++ b/drivers/s390/crypto/zcrypt_msgtype6.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * zcrypt 2.1.0 * diff --git a/drivers/s390/crypto/zcrypt_pcixcc.c b/drivers/s390/crypto/zcrypt_pcixcc.c index 600604782b65..9963018a6bbc 100644 --- a/drivers/s390/crypto/zcrypt_pcixcc.c +++ b/drivers/s390/crypto/zcrypt_pcixcc.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * zcrypt 2.1.0 * diff --git a/drivers/s390/crypto/zcrypt_pcixcc.h b/drivers/s390/crypto/zcrypt_pcixcc.h index eacafc8962f2..a85778d5763c 100644 --- a/drivers/s390/crypto/zcrypt_pcixcc.h +++ b/drivers/s390/crypto/zcrypt_pcixcc.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * zcrypt 2.1.0 * diff --git a/drivers/s390/crypto/zcrypt_queue.c b/drivers/s390/crypto/zcrypt_queue.c index 4742be0eec24..5103727b4633 100644 --- a/drivers/s390/crypto/zcrypt_queue.c +++ b/drivers/s390/crypto/zcrypt_queue.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * zcrypt 2.1.0 * From 724117b77bbe2b28f27728d58a432ed22630e33f Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 14 Nov 2017 18:38:02 +0100 Subject: [PATCH 066/333] s390: cio: add SPDX identifiers to the remaining files It's good to have SPDX identifiers in all files to make it easier to audit the kernel tree for correct licenses. Update the drivers/s390/cio/ files with the correct SPDX license identifier based on the license text in the file itself. The SPDX identifier is a legally binding shorthand, which can be used instead of the full boiler plate text. This work is based on a script and data from Thomas Gleixner, Philippe Ombredanne, and Kate Stewart. Cc: Sebastian Ott Cc: Peter Oberparleiter Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Cornelia Huck Cc: Dong Jia Shi Cc: Thomas Gleixner Cc: Kate Stewart Cc: Philippe Ombredanne Signed-off-by: Greg Kroah-Hartman Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/ccwgroup.c | 1 + drivers/s390/cio/chp.c | 1 + drivers/s390/cio/chsc.c | 1 + drivers/s390/cio/chsc_sch.c | 1 + drivers/s390/cio/cio.c | 1 + drivers/s390/cio/cmf.c | 1 + drivers/s390/cio/css.c | 1 + drivers/s390/cio/device.c | 1 + drivers/s390/cio/device_fsm.c | 1 + drivers/s390/cio/device_ops.c | 1 + drivers/s390/cio/eadm_sch.c | 1 + drivers/s390/cio/isc.c | 1 + drivers/s390/cio/qdio_main.c | 1 + drivers/s390/cio/qdio_setup.c | 1 + drivers/s390/cio/scm.c | 1 + drivers/s390/cio/vfio_ccw_drv.c | 1 + 16 files changed, 16 insertions(+) diff --git a/drivers/s390/cio/ccwgroup.c b/drivers/s390/cio/ccwgroup.c index e2f7b6e93efd..bfec1485ca23 100644 --- a/drivers/s390/cio/ccwgroup.c +++ b/drivers/s390/cio/ccwgroup.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * bus driver for ccwgroup * diff --git a/drivers/s390/cio/chp.c b/drivers/s390/cio/chp.c index f4166f80c4d4..5c94a3aec4dd 100644 --- a/drivers/s390/cio/chp.c +++ b/drivers/s390/cio/chp.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 1999, 2010 * Author(s): Cornelia Huck (cornelia.huck@de.ibm.com) diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c index 7b0b295b2313..c08fc5a8df0c 100644 --- a/drivers/s390/cio/chsc.c +++ b/drivers/s390/cio/chsc.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * S/390 common I/O routines -- channel subsystem call * diff --git a/drivers/s390/cio/chsc_sch.c b/drivers/s390/cio/chsc_sch.c index 8e7e19b9e92c..0015729d917d 100644 --- a/drivers/s390/cio/chsc_sch.c +++ b/drivers/s390/cio/chsc_sch.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Driver for s390 chsc subchannels * diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c index 89216174fcbb..987bf9a8c9f7 100644 --- a/drivers/s390/cio/cio.c +++ b/drivers/s390/cio/cio.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * S/390 common I/O routines -- low level i/o calls * diff --git a/drivers/s390/cio/cmf.c b/drivers/s390/cio/cmf.c index 7d59230e88bb..320ce009ba41 100644 --- a/drivers/s390/cio/cmf.c +++ b/drivers/s390/cio/cmf.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Linux on zSeries Channel Measurement Facility support * diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c index d3e504c3c362..f4c2e71e7a56 100644 --- a/drivers/s390/cio/css.c +++ b/drivers/s390/cio/css.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * driver for channel subsystem * diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index 318d8269f5de..e4ed6c651bca 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-1.0+ /* * bus driver for ccw devices * diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c index dd7d79d30edc..1319122e9d12 100644 --- a/drivers/s390/cio/device_fsm.c +++ b/drivers/s390/cio/device_fsm.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * finite state machine for device handling * diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c index cf8c4ac6323a..aa125d92cc65 100644 --- a/drivers/s390/cio/device_ops.c +++ b/drivers/s390/cio/device_ops.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-1.0+ /* * Copyright IBM Corp. 2002, 2009 * diff --git a/drivers/s390/cio/eadm_sch.c b/drivers/s390/cio/eadm_sch.c index ce16e4f45d44..53468ae64b99 100644 --- a/drivers/s390/cio/eadm_sch.c +++ b/drivers/s390/cio/eadm_sch.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Driver for s390 eadm subchannels * diff --git a/drivers/s390/cio/isc.c b/drivers/s390/cio/isc.c index c592087be0f1..77fde9f5ea8b 100644 --- a/drivers/s390/cio/isc.c +++ b/drivers/s390/cio/isc.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Functions for registration of I/O interruption subclasses on s390. * diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index ed4852fab44b..59b4a3370cd5 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Linux for s390 qdio support, buffer handling, qdio API and module support. * diff --git a/drivers/s390/cio/qdio_setup.c b/drivers/s390/cio/qdio_setup.c index 9ae1380cbc31..98f3cfdc0d02 100644 --- a/drivers/s390/cio/qdio_setup.c +++ b/drivers/s390/cio/qdio_setup.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * qdio queue initialization * diff --git a/drivers/s390/cio/scm.c b/drivers/s390/cio/scm.c index 1fa53ecdc2aa..6bca1d5455d4 100644 --- a/drivers/s390/cio/scm.c +++ b/drivers/s390/cio/scm.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Recognize and maintain s390 storage class memory. * diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c index 82f05c4b8c52..ea6a2d0b2894 100644 --- a/drivers/s390/cio/vfio_ccw_drv.c +++ b/drivers/s390/cio/vfio_ccw_drv.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * VFIO based Physical Subchannel device driver * From 6f05e69e44d8167d3c97c3b3b657a94f3cb41d71 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 14 Nov 2017 18:38:03 +0100 Subject: [PATCH 067/333] s390: char: add SPDX identifiers to the remaining files It's good to have SPDX identifiers in all files to make it easier to audit the kernel tree for correct licenses. Update the drivers/s390/char/ files with the correct SPDX license identifier based on the license text in the file itself. The SPDX identifier is a legally binding shorthand, which can be used instead of the full boiler plate text. This work is based on a script and data from Thomas Gleixner, Philippe Ombredanne, and Kate Stewart. Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Thomas Gleixner Cc: Kate Stewart Cc: Philippe Ombredanne Signed-off-by: Greg Kroah-Hartman Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- drivers/s390/char/fs3270.c | 1 + drivers/s390/char/hmcdrv_mod.c | 1 + drivers/s390/char/monreader.c | 1 + drivers/s390/char/monwriter.c | 1 + drivers/s390/char/raw3270.c | 1 + drivers/s390/char/sclp_async.c | 1 + drivers/s390/char/tape_34xx.c | 1 + drivers/s390/char/tape_3590.c | 1 + drivers/s390/char/tape_class.c | 1 + drivers/s390/char/tape_core.c | 1 + drivers/s390/char/tty3270.c | 1 + drivers/s390/char/vmlogrdr.c | 1 + drivers/s390/char/vmur.c | 1 + drivers/s390/char/zcore.c | 1 + 14 files changed, 14 insertions(+) diff --git a/drivers/s390/char/fs3270.c b/drivers/s390/char/fs3270.c index c4518168fd02..61822480a2a0 100644 --- a/drivers/s390/char/fs3270.c +++ b/drivers/s390/char/fs3270.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * IBM/3270 Driver - fullscreen driver. * diff --git a/drivers/s390/char/hmcdrv_mod.c b/drivers/s390/char/hmcdrv_mod.c index 251a318a9b75..1447d0887225 100644 --- a/drivers/s390/char/hmcdrv_mod.c +++ b/drivers/s390/char/hmcdrv_mod.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * HMC Drive DVD Module * diff --git a/drivers/s390/char/monreader.c b/drivers/s390/char/monreader.c index 027ac6ae5eea..bf4ab4efed73 100644 --- a/drivers/s390/char/monreader.c +++ b/drivers/s390/char/monreader.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Character device driver for reading z/VM *MONITOR service records. * diff --git a/drivers/s390/char/monwriter.c b/drivers/s390/char/monwriter.c index 571a7e352755..76c158c41510 100644 --- a/drivers/s390/char/monwriter.c +++ b/drivers/s390/char/monwriter.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Character device driver for writing z/VM *MONITOR service records. * diff --git a/drivers/s390/char/raw3270.c b/drivers/s390/char/raw3270.c index 5d4f053d7c38..f8cd2935fbfd 100644 --- a/drivers/s390/char/raw3270.c +++ b/drivers/s390/char/raw3270.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * IBM/3270 Driver - core functions. * diff --git a/drivers/s390/char/sclp_async.c b/drivers/s390/char/sclp_async.c index 19c25427f27f..ee6f3b563728 100644 --- a/drivers/s390/char/sclp_async.c +++ b/drivers/s390/char/sclp_async.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Enable Asynchronous Notification via SCLP. * diff --git a/drivers/s390/char/tape_34xx.c b/drivers/s390/char/tape_34xx.c index de69f0ddc321..6d73ee3f827a 100644 --- a/drivers/s390/char/tape_34xx.c +++ b/drivers/s390/char/tape_34xx.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * tape device discipline for 3480/3490 tapes. * diff --git a/drivers/s390/char/tape_3590.c b/drivers/s390/char/tape_3590.c index e352047ed9f7..37e65a05517f 100644 --- a/drivers/s390/char/tape_3590.c +++ b/drivers/s390/char/tape_3590.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * tape device discipline for 3590 tapes. * diff --git a/drivers/s390/char/tape_class.c b/drivers/s390/char/tape_class.c index e7d23048d3f0..a07102472ce9 100644 --- a/drivers/s390/char/tape_class.c +++ b/drivers/s390/char/tape_class.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 2004 * diff --git a/drivers/s390/char/tape_core.c b/drivers/s390/char/tape_core.c index 32503a60ee85..8d3370da2dfc 100644 --- a/drivers/s390/char/tape_core.c +++ b/drivers/s390/char/tape_core.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * basic function of the tape device driver * diff --git a/drivers/s390/char/tty3270.c b/drivers/s390/char/tty3270.c index e417ccd9e299..1c98023cffd4 100644 --- a/drivers/s390/char/tty3270.c +++ b/drivers/s390/char/tty3270.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * IBM/3270 Driver - tty functions. * diff --git a/drivers/s390/char/vmlogrdr.c b/drivers/s390/char/vmlogrdr.c index 62559dc0169f..069b9ef08206 100644 --- a/drivers/s390/char/vmlogrdr.c +++ b/drivers/s390/char/vmlogrdr.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * character device driver for reading z/VM system service records * diff --git a/drivers/s390/char/vmur.c b/drivers/s390/char/vmur.c index fa90ef05afc0..52aa89424318 100644 --- a/drivers/s390/char/vmur.c +++ b/drivers/s390/char/vmur.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Linux driver for System z and s390 unit record devices * (z/VM virtual punch, reader, printer) diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c index aaed778f67c4..dd86559ce7fb 100644 --- a/drivers/s390/char/zcore.c +++ b/drivers/s390/char/zcore.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-1.0+ /* * zcore module to export memory content and register sets for creating system * dumps on SCSI disks (zfcpdump). The "zcore/mem" debugfs file shows the same From ab9953ff0f2e37092dc247ddd7c62fe6f03618dc Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 14 Nov 2017 18:38:04 +0100 Subject: [PATCH 068/333] s390: net: add SPDX identifiers to the remaining files It's good to have SPDX identifiers in all files to make it easier to audit the kernel tree for correct licenses. Update the drivers/s390/net/ files with the correct SPDX license identifier based on the license text in the file itself. The SPDX identifier is a legally binding shorthand, which can be used instead of the full boiler plate text. This work is based on a script and data from Thomas Gleixner, Philippe Ombredanne, and Kate Stewart. Cc: Julian Wiedmann Cc: Ursula Braun Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Thomas Gleixner Cc: Kate Stewart Cc: Philippe Ombredanne Signed-off-by: Greg Kroah-Hartman Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- drivers/s390/net/ctcm_main.c | 1 + drivers/s390/net/fsm.c | 1 + drivers/s390/net/lcs.c | 1 + drivers/s390/net/netiucv.c | 1 + drivers/s390/net/qeth_core_main.c | 1 + drivers/s390/net/qeth_core_sys.c | 1 + drivers/s390/net/qeth_l2_main.c | 1 + drivers/s390/net/qeth_l3_main.c | 1 + drivers/s390/net/smsgiucv.c | 1 + drivers/s390/net/smsgiucv_app.c | 1 + 10 files changed, 10 insertions(+) diff --git a/drivers/s390/net/ctcm_main.c b/drivers/s390/net/ctcm_main.c index be9f17218531..7ce98b70cad3 100644 --- a/drivers/s390/net/ctcm_main.c +++ b/drivers/s390/net/ctcm_main.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 2001, 2009 * Author(s): diff --git a/drivers/s390/net/fsm.c b/drivers/s390/net/fsm.c index 8c14c6c3ad3d..f0c7c182b077 100644 --- a/drivers/s390/net/fsm.c +++ b/drivers/s390/net/fsm.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /** * A generic FSM based on fsm used in isdn4linux * diff --git a/drivers/s390/net/lcs.c b/drivers/s390/net/lcs.c index e131a03262ad..36899e94d268 100644 --- a/drivers/s390/net/lcs.c +++ b/drivers/s390/net/lcs.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Linux for S/390 Lan Channel Station Network Driver * diff --git a/drivers/s390/net/netiucv.c b/drivers/s390/net/netiucv.c index b9c7c1e61da2..150053131baf 100644 --- a/drivers/s390/net/netiucv.c +++ b/drivers/s390/net/netiucv.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * IUCV network driver * diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 49b9efeba1bd..98a7f84540ab 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 2007, 2009 * Author(s): Utz Bacher , diff --git a/drivers/s390/net/qeth_core_sys.c b/drivers/s390/net/qeth_core_sys.c index b22ed2a57acd..ae81534de912 100644 --- a/drivers/s390/net/qeth_core_sys.c +++ b/drivers/s390/net/qeth_core_sys.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 2007 * Author(s): Utz Bacher , diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index d2537c09126d..93d7e345d180 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 2007, 2009 * Author(s): Utz Bacher , diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index aadd384316a3..0f8c12738b06 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 2007, 2009 * Author(s): Utz Bacher , diff --git a/drivers/s390/net/smsgiucv.c b/drivers/s390/net/smsgiucv.c index a851d34c642b..93405e0bad3c 100644 --- a/drivers/s390/net/smsgiucv.c +++ b/drivers/s390/net/smsgiucv.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * IUCV special message driver * diff --git a/drivers/s390/net/smsgiucv_app.c b/drivers/s390/net/smsgiucv_app.c index 32515a201bbc..0a263999f7ae 100644 --- a/drivers/s390/net/smsgiucv_app.c +++ b/drivers/s390/net/smsgiucv_app.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Deliver z/VM CP special messages (SMSG) as uevents. * From 40bf411ee6d1f2d9833486a9d4318734180a997e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 14 Nov 2017 18:38:05 +0100 Subject: [PATCH 069/333] s390: scsi: zfcp_aux: add SPDX identifier It's good to have SPDX identifiers in all files to make it easier to audit the kernel tree for correct licenses. Update the drivers/s390/scsi/zfcp_aux.c file with the correct SPDX license identifier based on the license text in the file itself. The SPDX identifier is a legally binding shorthand, which can be used instead of the full boiler plate text. This work is based on a script and data from Thomas Gleixner, Philippe Ombredanne, and Kate Stewart. Cc: Steffen Maier Cc: Benjamin Block Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Thomas Gleixner Cc: Kate Stewart Cc: Philippe Ombredanne Signed-off-by: Greg Kroah-Hartman Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- drivers/s390/scsi/zfcp_aux.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/s390/scsi/zfcp_aux.c b/drivers/s390/scsi/zfcp_aux.c index 84752152d41f..a3a8c8d9d717 100644 --- a/drivers/s390/scsi/zfcp_aux.c +++ b/drivers/s390/scsi/zfcp_aux.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * zfcp device driver * From 31fb16730a0d8baf0c04c93ebf606340c9ea8393 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 14 Nov 2017 18:38:06 +0100 Subject: [PATCH 070/333] s390: virtio: add SPDX identifiers to the remaining files It's good to have SPDX identifiers in all files to make it easier to audit the kernel tree for correct licenses. Update the drivers/s390/virtio/ files with the correct SPDX license identifier based on the license text in the file itself. The SPDX identifier is a legally binding shorthand, which can be used instead of the full boiler plate text. This work is based on a script and data from Thomas Gleixner, Philippe Ombredanne, and Kate Stewart. Cc: Cornelia Huck Cc: Halil Pasic Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Thomas Gleixner Cc: Kate Stewart Cc: Philippe Ombredanne Signed-off-by: Greg Kroah-Hartman Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- drivers/s390/virtio/Makefile | 1 + drivers/s390/virtio/virtio_ccw.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/s390/virtio/Makefile b/drivers/s390/virtio/Makefile index f68af1f317f1..7c6f0c8db883 100644 --- a/drivers/s390/virtio/Makefile +++ b/drivers/s390/virtio/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # Makefile for kvm guest drivers on s390 # # Copyright IBM Corp. 2008 diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c index b18fe2014cf2..56346caadb21 100644 --- a/drivers/s390/virtio/virtio_ccw.c +++ b/drivers/s390/virtio/virtio_ccw.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * ccw based virtio transport * From 0b622e60bc6c4eca75d517b10f15914ecd58e6b1 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 14 Nov 2017 18:38:07 +0100 Subject: [PATCH 071/333] s390: crypto: Remove redundant license text Now that the SPDX tag is in all drivers/s390/crypto/ files, that identifies the license in a specific and legally-defined manner. So the extra GPL text wording can be removed as it is no longer needed at all. This is done on a quest to remove the 700+ different ways that files in the kernel describe the GPL license text. And there's unneeded stuff like the address (sometimes incorrect) for the FSF which is never needed. No copyright headers or other non-license-description text was removed. Cc: Harald Freudenberger Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: "Paul E. McKenney" Signed-off-by: Greg Kroah-Hartman Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- drivers/s390/crypto/ap_bus.c | 14 -------------- drivers/s390/crypto/ap_bus.h | 14 -------------- drivers/s390/crypto/pkey_api.c | 5 ----- drivers/s390/crypto/zcrypt_api.c | 14 -------------- drivers/s390/crypto/zcrypt_api.h | 14 -------------- drivers/s390/crypto/zcrypt_card.c | 10 ---------- drivers/s390/crypto/zcrypt_cca_key.h | 14 -------------- drivers/s390/crypto/zcrypt_cex2a.c | 14 -------------- drivers/s390/crypto/zcrypt_cex2a.h | 14 -------------- drivers/s390/crypto/zcrypt_error.h | 14 -------------- drivers/s390/crypto/zcrypt_msgtype50.c | 14 -------------- drivers/s390/crypto/zcrypt_msgtype50.h | 14 -------------- drivers/s390/crypto/zcrypt_msgtype6.c | 14 -------------- drivers/s390/crypto/zcrypt_msgtype6.h | 14 -------------- drivers/s390/crypto/zcrypt_pcixcc.c | 14 -------------- drivers/s390/crypto/zcrypt_pcixcc.h | 14 -------------- drivers/s390/crypto/zcrypt_queue.c | 10 ---------- 17 files changed, 221 deletions(-) diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 658626039171..48d55dc9e986 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -8,20 +8,6 @@ * Holger Dengler * * Adjunct processor bus. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #define KMSG_COMPONENT "ap" diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index b43e4d650940..e0827eaa42f1 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -8,20 +8,6 @@ * Holger Dengler * * Adjunct processor bus header file. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifndef _AP_BUS_H_ diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c index 869ace34972d..e7c2e4f9529a 100644 --- a/drivers/s390/crypto/pkey_api.c +++ b/drivers/s390/crypto/pkey_api.c @@ -4,11 +4,6 @@ * * Copyright IBM Corp. 2017 * Author(s): Harald Freudenberger - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * */ #define KMSG_COMPONENT "pkey" diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c index 620407ac2faa..ce15f101ee28 100644 --- a/drivers/s390/crypto/zcrypt_api.c +++ b/drivers/s390/crypto/zcrypt_api.c @@ -11,20 +11,6 @@ * Major cleanup & driver split: Martin Schwidefsky * Ralph Wuerthner * MSGTYPE restruct: Holger Dengler - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include diff --git a/drivers/s390/crypto/zcrypt_api.h b/drivers/s390/crypto/zcrypt_api.h index 4883d51521fd..9fff8912f6e3 100644 --- a/drivers/s390/crypto/zcrypt_api.h +++ b/drivers/s390/crypto/zcrypt_api.h @@ -11,20 +11,6 @@ * Major cleanup & driver split: Martin Schwidefsky * Ralph Wuerthner * MSGTYPE restruct: Holger Dengler - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifndef _ZCRYPT_API_H_ diff --git a/drivers/s390/crypto/zcrypt_card.c b/drivers/s390/crypto/zcrypt_card.c index c0e7072f18a9..233e1e695208 100644 --- a/drivers/s390/crypto/zcrypt_card.c +++ b/drivers/s390/crypto/zcrypt_card.c @@ -11,16 +11,6 @@ * Major cleanup & driver split: Martin Schwidefsky * Ralph Wuerthner * MSGTYPE restruct: Holger Dengler - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. */ #include diff --git a/drivers/s390/crypto/zcrypt_cca_key.h b/drivers/s390/crypto/zcrypt_cca_key.h index ebb9fbcb9bc0..011d61d8a4ae 100644 --- a/drivers/s390/crypto/zcrypt_cca_key.h +++ b/drivers/s390/crypto/zcrypt_cca_key.h @@ -8,20 +8,6 @@ * * Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com) * Major cleanup & driver split: Martin Schwidefsky - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifndef _ZCRYPT_CCA_KEY_H_ diff --git a/drivers/s390/crypto/zcrypt_cex2a.c b/drivers/s390/crypto/zcrypt_cex2a.c index e00ffa44f85c..e701194d3611 100644 --- a/drivers/s390/crypto/zcrypt_cex2a.c +++ b/drivers/s390/crypto/zcrypt_cex2a.c @@ -10,20 +10,6 @@ * Major cleanup & driver split: Martin Schwidefsky * Ralph Wuerthner * MSGTYPE restruct: Holger Dengler - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include diff --git a/drivers/s390/crypto/zcrypt_cex2a.h b/drivers/s390/crypto/zcrypt_cex2a.h index 6f6830e0bab3..c3c116777c93 100644 --- a/drivers/s390/crypto/zcrypt_cex2a.h +++ b/drivers/s390/crypto/zcrypt_cex2a.h @@ -8,20 +8,6 @@ * * Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com) * Major cleanup & driver split: Martin Schwidefsky - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifndef _ZCRYPT_CEX2A_H_ diff --git a/drivers/s390/crypto/zcrypt_error.h b/drivers/s390/crypto/zcrypt_error.h index 8d8892d14bd0..01598d83c60a 100644 --- a/drivers/s390/crypto/zcrypt_error.h +++ b/drivers/s390/crypto/zcrypt_error.h @@ -8,20 +8,6 @@ * * Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com) * Major cleanup & driver split: Martin Schwidefsky - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifndef _ZCRYPT_ERROR_H_ diff --git a/drivers/s390/crypto/zcrypt_msgtype50.c b/drivers/s390/crypto/zcrypt_msgtype50.c index 8fdf5d65c4d2..afe1b2bcd7ec 100644 --- a/drivers/s390/crypto/zcrypt_msgtype50.c +++ b/drivers/s390/crypto/zcrypt_msgtype50.c @@ -10,20 +10,6 @@ * Major cleanup & driver split: Martin Schwidefsky * Ralph Wuerthner * MSGTYPE restruct: Holger Dengler - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #define KMSG_COMPONENT "zcrypt" diff --git a/drivers/s390/crypto/zcrypt_msgtype50.h b/drivers/s390/crypto/zcrypt_msgtype50.h index f6ce799c2239..0a36545cfb8e 100644 --- a/drivers/s390/crypto/zcrypt_msgtype50.h +++ b/drivers/s390/crypto/zcrypt_msgtype50.h @@ -9,20 +9,6 @@ * Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com) * Major cleanup & driver split: Martin Schwidefsky * MSGTYPE restruct: Holger Dengler - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifndef _ZCRYPT_MSGTYPE50_H_ diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c b/drivers/s390/crypto/zcrypt_msgtype6.c index 3a19b69b846b..f54bef4a928e 100644 --- a/drivers/s390/crypto/zcrypt_msgtype6.c +++ b/drivers/s390/crypto/zcrypt_msgtype6.c @@ -10,20 +10,6 @@ * Major cleanup & driver split: Martin Schwidefsky * Ralph Wuerthner * MSGTYPE restruct: Holger Dengler - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #define KMSG_COMPONENT "zcrypt" diff --git a/drivers/s390/crypto/zcrypt_msgtype6.h b/drivers/s390/crypto/zcrypt_msgtype6.h index 52d140889377..d314f4525518 100644 --- a/drivers/s390/crypto/zcrypt_msgtype6.h +++ b/drivers/s390/crypto/zcrypt_msgtype6.h @@ -9,20 +9,6 @@ * Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com) * Major cleanup & driver split: Martin Schwidefsky * MSGTYPE restruct: Holger Dengler - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifndef _ZCRYPT_MSGTYPE6_H_ diff --git a/drivers/s390/crypto/zcrypt_pcixcc.c b/drivers/s390/crypto/zcrypt_pcixcc.c index 9963018a6bbc..159b0a0dd211 100644 --- a/drivers/s390/crypto/zcrypt_pcixcc.c +++ b/drivers/s390/crypto/zcrypt_pcixcc.c @@ -10,20 +10,6 @@ * Major cleanup & driver split: Martin Schwidefsky * Ralph Wuerthner * MSGTYPE restruct: Holger Dengler - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include diff --git a/drivers/s390/crypto/zcrypt_pcixcc.h b/drivers/s390/crypto/zcrypt_pcixcc.h index a85778d5763c..d678a3af83a7 100644 --- a/drivers/s390/crypto/zcrypt_pcixcc.h +++ b/drivers/s390/crypto/zcrypt_pcixcc.h @@ -9,20 +9,6 @@ * Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com) * Major cleanup & driver split: Martin Schwidefsky * MSGTYPE restruct: Holger Dengler - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifndef _ZCRYPT_PCIXCC_H_ diff --git a/drivers/s390/crypto/zcrypt_queue.c b/drivers/s390/crypto/zcrypt_queue.c index 5103727b4633..720434e18007 100644 --- a/drivers/s390/crypto/zcrypt_queue.c +++ b/drivers/s390/crypto/zcrypt_queue.c @@ -11,16 +11,6 @@ * Major cleanup & driver split: Martin Schwidefsky * Ralph Wuerthner * MSGTYPE restruct: Holger Dengler - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. */ #include From 13d1d559f04a893b4a32ec04fb9d7210ec4d9597 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 14 Nov 2017 18:38:08 +0100 Subject: [PATCH 072/333] s390: drivers: Remove redundant license text Now that the SPDX tag is in all drivers/s390/ files, that identifies the license in a specific and legally-defined manner. So the extra GPL text wording can be removed as it is no longer needed at all. This is done on a quest to remove the 700+ different ways that files in the kernel describe the GPL license text. And there's unneeded stuff like the address (sometimes incorrect) for the FSF which is never needed. No copyright headers or other non-license-description text was removed. Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Sebastian Ott Cc: Peter Oberparleiter Cc: Julian Wiedmann Cc: Ursula Braun Cc: Cornelia Huck Cc: Halil Pasic Cc: "Paul E. McKenney" Signed-off-by: Greg Kroah-Hartman Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- drivers/s390/char/zcore.c | 1 - drivers/s390/cio/cmf.c | 14 -------------- drivers/s390/cio/css.c | 2 -- drivers/s390/cio/device.c | 2 -- drivers/s390/cio/device_ops.c | 2 -- drivers/s390/net/lcs.c | 14 -------------- drivers/s390/net/netiucv.c | 15 --------------- drivers/s390/net/smsgiucv.c | 14 -------------- drivers/s390/virtio/Makefile | 4 ---- drivers/s390/virtio/virtio_ccw.c | 4 ---- 10 files changed, 72 deletions(-) diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c index dd86559ce7fb..4369662cfff5 100644 --- a/drivers/s390/char/zcore.c +++ b/drivers/s390/char/zcore.c @@ -8,7 +8,6 @@ * * Copyright IBM Corp. 2003, 2008 * Author(s): Michael Holzheu - * License: GPL */ #define KMSG_COMPONENT "zdump" diff --git a/drivers/s390/cio/cmf.c b/drivers/s390/cio/cmf.c index 320ce009ba41..5e495c62cfa7 100644 --- a/drivers/s390/cio/cmf.c +++ b/drivers/s390/cio/cmf.c @@ -8,20 +8,6 @@ * Cornelia Huck * * original idea from Natarajan Krishnaswami - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #define KMSG_COMPONENT "cio" diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c index f4c2e71e7a56..0f11dce6e224 100644 --- a/drivers/s390/cio/css.c +++ b/drivers/s390/cio/css.c @@ -6,8 +6,6 @@ * * Author(s): Arnd Bergmann (arndb@de.ibm.com) * Cornelia Huck (cornelia.huck@de.ibm.com) - * - * License: GPL */ #define KMSG_COMPONENT "cio" diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index e4ed6c651bca..75a245f38e2e 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -6,8 +6,6 @@ * Author(s): Arnd Bergmann (arndb@de.ibm.com) * Cornelia Huck (cornelia.huck@de.ibm.com) * Martin Schwidefsky (schwidefsky@de.ibm.com) - * - * License: GPL */ #define KMSG_COMPONENT "cio" diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c index aa125d92cc65..1caf6a398760 100644 --- a/drivers/s390/cio/device_ops.c +++ b/drivers/s390/cio/device_ops.c @@ -4,8 +4,6 @@ * * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) * Cornelia Huck (cornelia.huck@de.ibm.com) - * - * License: GPL */ #include #include diff --git a/drivers/s390/net/lcs.c b/drivers/s390/net/lcs.c index 36899e94d268..92ae84a927fc 100644 --- a/drivers/s390/net/lcs.c +++ b/drivers/s390/net/lcs.c @@ -8,20 +8,6 @@ * Rewritten by * Frank Pavlic and * Martin Schwidefsky - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #define KMSG_COMPONENT "lcs" diff --git a/drivers/s390/net/netiucv.c b/drivers/s390/net/netiucv.c index 150053131baf..5ce2424ca729 100644 --- a/drivers/s390/net/netiucv.c +++ b/drivers/s390/net/netiucv.c @@ -19,21 +19,6 @@ * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com) * Martin Schwidefsky (schwidefsky@de.ibm.com) * Alan Altmark (Alan_Altmark@us.ibm.com) Sept. 2000 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * */ #define KMSG_COMPONENT "netiucv" diff --git a/drivers/s390/net/smsgiucv.c b/drivers/s390/net/smsgiucv.c index 93405e0bad3c..3b0c8b8a7634 100644 --- a/drivers/s390/net/smsgiucv.c +++ b/drivers/s390/net/smsgiucv.c @@ -5,20 +5,6 @@ * Copyright IBM Corp. 2003, 2009 * * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include diff --git a/drivers/s390/virtio/Makefile b/drivers/s390/virtio/Makefile index 7c6f0c8db883..2dc4d9aab634 100644 --- a/drivers/s390/virtio/Makefile +++ b/drivers/s390/virtio/Makefile @@ -2,9 +2,5 @@ # Makefile for kvm guest drivers on s390 # # Copyright IBM Corp. 2008 -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License (version 2 only) -# as published by the Free Software Foundation. obj-$(CONFIG_S390_GUEST) += virtio_ccw.o diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c index 56346caadb21..ba2e0856d22c 100644 --- a/drivers/s390/virtio/virtio_ccw.c +++ b/drivers/s390/virtio/virtio_ccw.c @@ -4,10 +4,6 @@ * * Copyright IBM Corp. 2012, 2014 * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * * Author(s): Cornelia Huck */ From b1c0de0e51504b84cf4a3eb46541e98557bb460d Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 24 Nov 2017 15:19:32 +0100 Subject: [PATCH 073/333] s390: sthyi: add SPDX identifiers to the remaining files It's good to have SPDX identifiers in all files to make it easier to audit the kernel tree for correct licenses. Update the arch/s390/kernel/sthyi file with the correct SPDX license identifier based on the license text in the file itself. The SPDX identifier is a legally binding shorthand, which can be used instead of the full boiler plate text. This work is based on a script and data from Thomas Gleixner, Philippe Ombredanne, and Kate Stewart. Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/sthyi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/kernel/sthyi.c b/arch/s390/kernel/sthyi.c index 12981e197f01..db163cf3606a 100644 --- a/arch/s390/kernel/sthyi.c +++ b/arch/s390/kernel/sthyi.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * store hypervisor information instruction emulation functions. * From a17ae4c3a6add7579e9962df5dd12cb1f3bed431 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 24 Nov 2017 15:00:32 +0100 Subject: [PATCH 074/333] s390: kernel: add SPDX identifiers to the remaining files It's good to have SPDX identifiers in all files to make it easier to audit the kernel tree for correct licenses. Update the arch/s390/kernel/ files with the correct SPDX license identifier based on the license text in the file itself. The SPDX identifier is a legally binding shorthand, which can be used instead of the full boiler plate text. This work is based on a script and data from Thomas Gleixner, Philippe Ombredanne, and Kate Stewart. Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Thomas Gleixner Cc: Kate Stewart Cc: Philippe Ombredanne Signed-off-by: Greg Kroah-Hartman Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/dis.c | 1 + arch/s390/kernel/dumpstack.c | 1 + arch/s390/kernel/ipl.c | 1 + arch/s390/kernel/kprobes.c | 1 + arch/s390/kernel/lgr.c | 1 + arch/s390/kernel/module.c | 1 + arch/s390/kernel/nmi.c | 1 + arch/s390/kernel/perf_cpum_cf.c | 1 + arch/s390/kernel/perf_cpum_sf.c | 1 + arch/s390/kernel/perf_event.c | 1 + arch/s390/kernel/setup.c | 1 + arch/s390/kernel/stacktrace.c | 1 + arch/s390/kernel/time.c | 1 + arch/s390/kernel/topology.c | 1 + arch/s390/kernel/vdso.c | 1 + arch/s390/kernel/vdso32/clock_getres.S | 1 + arch/s390/kernel/vdso32/clock_gettime.S | 1 + arch/s390/kernel/vdso32/gettimeofday.S | 1 + arch/s390/kernel/vdso64/clock_getres.S | 1 + arch/s390/kernel/vdso64/clock_gettime.S | 1 + arch/s390/kernel/vdso64/gettimeofday.S | 1 + arch/s390/kernel/vtime.c | 1 + 22 files changed, 22 insertions(+) diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index 0a0e14335a04..b2c68fbf2634 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Disassemble s390 instructions. * diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index 2aa545dca4d5..5b23c4f6e50c 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Stack dumping functions * diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 310e59e6eb4b..8ecb8726ac47 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * ipl/reipl/dump support for Linux on s390. * diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index 1a6521af1751..ee055f7bdb10 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Kernel Probes (KProbes) * diff --git a/arch/s390/kernel/lgr.c b/arch/s390/kernel/lgr.c index bf9622f0e6b1..452502f9a0d9 100644 --- a/arch/s390/kernel/lgr.c +++ b/arch/s390/kernel/lgr.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Linux Guest Relocation (LGR) detection * diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index 7b87991416fd..10d104589978 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Kernel module help for s390. * diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 6ff169253cae..c7a627620e5e 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Machine check handler * diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 746d03423333..8f6296860186 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Performance event support for s390x - CPU-measurement Counter Facility * diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 227b38bd82c9..6e404caac856 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Performance event support for the System z CPU-measurement Sampling Facility * diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c index 93a386f4a3b5..0ef308851374 100644 --- a/arch/s390/kernel/perf_event.c +++ b/arch/s390/kernel/perf_event.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Performance event support for s390x * diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 090053cf279b..793da97f9a6e 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * S390 version * Copyright IBM Corp. 1999, 2012 diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c index e66687dc6144..460dcfba7d4e 100644 --- a/arch/s390/kernel/stacktrace.c +++ b/arch/s390/kernel/stacktrace.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Stack trace management functions * diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 5cbd52169348..44c012c866d2 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Time of day based timer functions. * diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index f9b393d4a078..4d5b65e527b5 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 2007, 2011 * Author(s): Heiko Carstens diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index 39a218703c50..88f6fab68eb2 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * vdso setup for s390 * diff --git a/arch/s390/kernel/vdso32/clock_getres.S b/arch/s390/kernel/vdso32/clock_getres.S index eca3f001f081..eb9f22cbfd47 100644 --- a/arch/s390/kernel/vdso32/clock_getres.S +++ b/arch/s390/kernel/vdso32/clock_getres.S @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Userland implementation of clock_getres() for 32 bits processes in a * s390 kernel for use in the vDSO diff --git a/arch/s390/kernel/vdso32/clock_gettime.S b/arch/s390/kernel/vdso32/clock_gettime.S index a5769b83d90e..b642c0382238 100644 --- a/arch/s390/kernel/vdso32/clock_gettime.S +++ b/arch/s390/kernel/vdso32/clock_gettime.S @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Userland implementation of clock_gettime() for 32 bits processes in a * s390 kernel for use in the vDSO diff --git a/arch/s390/kernel/vdso32/gettimeofday.S b/arch/s390/kernel/vdso32/gettimeofday.S index 63b86dceb0bf..8a89b54246d7 100644 --- a/arch/s390/kernel/vdso32/gettimeofday.S +++ b/arch/s390/kernel/vdso32/gettimeofday.S @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Userland implementation of gettimeofday() for 32 bits processes in a * s390 kernel for use in the vDSO diff --git a/arch/s390/kernel/vdso64/clock_getres.S b/arch/s390/kernel/vdso64/clock_getres.S index c8513deb8c66..416bf711c2aa 100644 --- a/arch/s390/kernel/vdso64/clock_getres.S +++ b/arch/s390/kernel/vdso64/clock_getres.S @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Userland implementation of clock_getres() for 64 bits processes in a * s390 kernel for use in the vDSO diff --git a/arch/s390/kernel/vdso64/clock_gettime.S b/arch/s390/kernel/vdso64/clock_gettime.S index 5d7b56b49458..d2dee331b7cd 100644 --- a/arch/s390/kernel/vdso64/clock_gettime.S +++ b/arch/s390/kernel/vdso64/clock_gettime.S @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Userland implementation of clock_gettime() for 64 bits processes in a * s390 kernel for use in the vDSO diff --git a/arch/s390/kernel/vdso64/gettimeofday.S b/arch/s390/kernel/vdso64/gettimeofday.S index b02e62f3bc12..cf62a1611054 100644 --- a/arch/s390/kernel/vdso64/gettimeofday.S +++ b/arch/s390/kernel/vdso64/gettimeofday.S @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Userland implementation of gettimeofday() for 64 bits processes in a * s390 kernel for use in the vDSO diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index dd7178fbb4f3..f24395a01918 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Virtual cpu timer based timer functions. * From 20a884f5e0180a6f67bd650bdb9d703c415c1436 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 24 Nov 2017 15:00:34 +0100 Subject: [PATCH 075/333] s390: crypto: add SPDX identifiers to the remaining files It's good to have SPDX identifiers in all files to make it easier to audit the kernel tree for correct licenses. Update the arch/s390/crypto/ files with the correct SPDX license identifier based on the license text in the file itself. The SPDX identifier is a legally binding shorthand, which can be used instead of the full boiler plate text. This work is based on a script and data from Thomas Gleixner, Philippe Ombredanne, and Kate Stewart. Cc: Herbert Xu Cc: "David S. Miller" Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Thomas Gleixner Cc: Kate Stewart Cc: Philippe Ombredanne Signed-off-by: Greg Kroah-Hartman Signed-off-by: Martin Schwidefsky --- arch/s390/crypto/aes_s390.c | 1 + arch/s390/crypto/arch_random.c | 1 + arch/s390/crypto/crc32-vx.c | 1 + arch/s390/crypto/des_s390.c | 1 + arch/s390/crypto/ghash_s390.c | 1 + arch/s390/crypto/paes_s390.c | 1 + arch/s390/crypto/prng.c | 1 + arch/s390/crypto/sha.h | 1 + arch/s390/crypto/sha256_s390.c | 1 + arch/s390/crypto/sha512_s390.c | 1 + arch/s390/crypto/sha_common.c | 1 + 11 files changed, 11 insertions(+) diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index b48e20dd94e9..5a7b60a65f96 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Cryptographic API. * diff --git a/arch/s390/crypto/arch_random.c b/arch/s390/crypto/arch_random.c index 36aefc07d10c..7ad00354fb25 100644 --- a/arch/s390/crypto/arch_random.c +++ b/arch/s390/crypto/arch_random.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * s390 arch random implementation. * diff --git a/arch/s390/crypto/crc32-vx.c b/arch/s390/crypto/crc32-vx.c index 992e630c227b..436865926c26 100644 --- a/arch/s390/crypto/crc32-vx.c +++ b/arch/s390/crypto/crc32-vx.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Crypto-API module for CRC-32 algorithms implemented with the * z/Architecture Vector Extension Facility. diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c index 0d296662bbf0..c871b1086408 100644 --- a/arch/s390/crypto/des_s390.c +++ b/arch/s390/crypto/des_s390.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Cryptographic API. * diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c index 564616d48d8b..3b7f96c9eead 100644 --- a/arch/s390/crypto/ghash_s390.c +++ b/arch/s390/crypto/ghash_s390.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Cryptographic API. * diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c index a4e903ed7e21..53926a2b7285 100644 --- a/arch/s390/crypto/paes_s390.c +++ b/arch/s390/crypto/paes_s390.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Cryptographic API. * diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c index 3e47c4a0f18b..a97a1802cfb4 100644 --- a/arch/s390/crypto/prng.c +++ b/arch/s390/crypto/prng.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 2006, 2015 * Author(s): Jan Glauber diff --git a/arch/s390/crypto/sha.h b/arch/s390/crypto/sha.h index 10f200790079..19d344d49392 100644 --- a/arch/s390/crypto/sha.h +++ b/arch/s390/crypto/sha.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ /* * Cryptographic API. * diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c index 53c277999a28..4a31d6416d41 100644 --- a/arch/s390/crypto/sha256_s390.c +++ b/arch/s390/crypto/sha256_s390.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Cryptographic API. * diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c index 2f4caa1ef123..e22ed139d0b6 100644 --- a/arch/s390/crypto/sha512_s390.c +++ b/arch/s390/crypto/sha512_s390.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Cryptographic API. * diff --git a/arch/s390/crypto/sha_common.c b/arch/s390/crypto/sha_common.c index c740f77285b2..d359ec7fdd6b 100644 --- a/arch/s390/crypto/sha_common.c +++ b/arch/s390/crypto/sha_common.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Cryptographic API. * From ac41aaeedc62648208eb1b32cff768a9ffcfdd23 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 24 Nov 2017 15:00:35 +0100 Subject: [PATCH 076/333] s390: mm: add SPDX identifiers to the remaining files It's good to have SPDX identifiers in all files to make it easier to audit the kernel tree for correct licenses. Update the arch/s390/mm/ files with the correct SPDX license identifier based on the license text in the file itself. The SPDX identifier is a legally binding shorthand, which can be used instead of the full boiler plate text. This work is based on a script and data from Thomas Gleixner, Philippe Ombredanne, and Kate Stewart. Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Christian Borntraeger Cc: Cornelia Huck Cc: Thomas Gleixner Cc: Kate Stewart Cc: Philippe Ombredanne Signed-off-by: Greg Kroah-Hartman Signed-off-by: Martin Schwidefsky --- arch/s390/mm/cmm.c | 1 + arch/s390/mm/gmap.c | 1 + arch/s390/mm/mmap.c | 1 + arch/s390/mm/pgtable.c | 1 + 4 files changed, 4 insertions(+) diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c index 2dbdcd85b68f..3596ef91cf68 100644 --- a/arch/s390/mm/cmm.c +++ b/arch/s390/mm/cmm.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Collaborative memory management interface. * diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index b2c140193b0a..05d459b638f5 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * KVM guest address space mapping code * diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index 5bea139517a2..5ec9a8c2b768 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * flexible mmap layout support * diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index ae677f814bc0..4f2b65d01a70 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 2007, 2011 * Author(s): Martin Schwidefsky From adbb3901685fb94a4a0ac62f7859724964054287 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 24 Nov 2017 15:00:36 +0100 Subject: [PATCH 077/333] s390: pci: add SPDX identifiers to the remaining files It's good to have SPDX identifiers in all files to make it easier to audit the kernel tree for correct licenses. Update the arch/s390/pci/ files with the correct SPDX license identifier based on the license text in the file itself. The SPDX identifier is a legally binding shorthand, which can be used instead of the full boiler plate text. This work is based on a script and data from Thomas Gleixner, Philippe Ombredanne, and Kate Stewart. Cc: Sebastian Ott Cc: Gerald Schaefer Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Thomas Gleixner Cc: Kate Stewart Cc: Philippe Ombredanne Signed-off-by: Greg Kroah-Hartman Signed-off-by: Martin Schwidefsky --- arch/s390/pci/pci.c | 1 + arch/s390/pci/pci_debug.c | 1 + arch/s390/pci/pci_dma.c | 1 + arch/s390/pci/pci_insn.c | 1 + 4 files changed, 4 insertions(+) diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 0fe649c0d542..4902fed221c0 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 2012 * diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c index c2f786f0ea06..b482e95b6249 100644 --- a/arch/s390/pci/pci_debug.c +++ b/arch/s390/pci/pci_debug.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 2012,2015 * diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index 0d300ee00f4e..f7aa5a77827e 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 2012 * diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c index 81b840bc6e4e..19bcb3b45a70 100644 --- a/arch/s390/pci/pci_insn.c +++ b/arch/s390/pci/pci_insn.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * s390 specific pci instructions * From 0caa8cdf1a02a8b45dbd8641c7b3e896051c1254 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 24 Nov 2017 15:00:37 +0100 Subject: [PATCH 078/333] s390: appldata: add SPDX identifiers to the remaining files It's good to have SPDX identifiers in all files to make it easier to audit the kernel tree for correct licenses. Update the arch/s390/appldata/ files with the correct SPDX license identifier based on the license text in the file itself. The SPDX identifier is a legally binding shorthand, which can be used instead of the full boiler plate text. This work is based on a script and data from Thomas Gleixner, Philippe Ombredanne, and Kate Stewart. Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Thomas Gleixner Cc: Kate Stewart Cc: Philippe Ombredanne Signed-off-by: Greg Kroah-Hartman Signed-off-by: Martin Schwidefsky --- arch/s390/appldata/appldata_base.c | 1 + arch/s390/appldata/appldata_mem.c | 1 + arch/s390/appldata/appldata_net_sum.c | 1 + arch/s390/appldata/appldata_os.c | 1 + 4 files changed, 4 insertions(+) diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c index ef3fb1b9201f..cb6e8066b1ad 100644 --- a/arch/s390/appldata/appldata_base.c +++ b/arch/s390/appldata/appldata_base.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Base infrastructure for Linux-z/VM Monitor Stream, Stage 1. * Exports appldata_register_ops() and appldata_unregister_ops() for the diff --git a/arch/s390/appldata/appldata_mem.c b/arch/s390/appldata/appldata_mem.c index 598df5708501..e68136c3c23a 100644 --- a/arch/s390/appldata/appldata_mem.c +++ b/arch/s390/appldata/appldata_mem.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Data gathering module for Linux-VM Monitor Stream, Stage 1. * Collects data related to memory management. diff --git a/arch/s390/appldata/appldata_net_sum.c b/arch/s390/appldata/appldata_net_sum.c index 66037d2622b4..8bc14b0d1def 100644 --- a/arch/s390/appldata/appldata_net_sum.c +++ b/arch/s390/appldata/appldata_net_sum.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Data gathering module for Linux-VM Monitor Stream, Stage 1. * Collects accumulated network statistics (Packets received/transmitted, diff --git a/arch/s390/appldata/appldata_os.c b/arch/s390/appldata/appldata_os.c index 45b3178200ab..433a994b1a89 100644 --- a/arch/s390/appldata/appldata_os.c +++ b/arch/s390/appldata/appldata_os.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Data gathering module for Linux-VM Monitor Stream, Stage 1. * Collects misc. OS related data (CPU utilization, running processes). From 0b73214f8a4d7790efd68b9cc183d26e472ccefd Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 24 Nov 2017 15:00:38 +0100 Subject: [PATCH 079/333] s390: add SPDX identifiers to the remaining files It's good to have SPDX identifiers in all files to make it easier to audit the kernel tree for correct licenses. Update the remaining arch/s390/ files with the correct SPDX license identifier based on the license text in the file itself. The SPDX identifier is a legally binding shorthand, which can be used instead of the full boiler plate text. This work is based on a script and data from Thomas Gleixner, Philippe Ombredanne, and Kate Stewart. Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Christian Borntraeger Cc: Cornelia Huck Cc: Thomas Gleixner Cc: Kate Stewart Cc: Philippe Ombredanne Signed-off-by: Greg Kroah-Hartman Signed-off-by: Martin Schwidefsky --- arch/s390/Makefile | 1 + arch/s390/boot/install.sh | 1 + arch/s390/hypfs/inode.c | 1 + arch/s390/include/asm/cpu_mf.h | 1 + arch/s390/include/asm/kprobes.h | 1 + arch/s390/include/asm/kvm_host.h | 1 + arch/s390/include/asm/kvm_para.h | 1 + arch/s390/include/asm/livepatch.h | 1 + arch/s390/include/asm/syscall.h | 1 + arch/s390/include/asm/sysinfo.h | 1 + 10 files changed, 10 insertions(+) diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 6b3f41985f28..ffd95861f9ac 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # s390/Makefile # diff --git a/arch/s390/boot/install.sh b/arch/s390/boot/install.sh index aed3069699bd..32a9367b9bf0 100644 --- a/arch/s390/boot/install.sh +++ b/arch/s390/boot/install.sh @@ -1,4 +1,5 @@ #!/bin/sh +# SPDX-License-Identifier: GPL-2.0 # # arch/s390x/boot/install.sh # diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index cf8a2d92467f..05d62cdb78b5 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-1.0+ /* * Hypervisor filesystem for Linux on s390. * diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h index 792cda339af1..a9ba5e322bf3 100644 --- a/arch/s390/include/asm/cpu_mf.h +++ b/arch/s390/include/asm/cpu_mf.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * CPU-measurement facilities * diff --git a/arch/s390/include/asm/kprobes.h b/arch/s390/include/asm/kprobes.h index 921391f2341e..396b065ebb2c 100644 --- a/arch/s390/include/asm/kprobes.h +++ b/arch/s390/include/asm/kprobes.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ #ifndef _ASM_S390_KPROBES_H #define _ASM_S390_KPROBES_H /* diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index f3a9b5a445b6..66efb2979351 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * definition for kernel virtual machines on s390 * diff --git a/arch/s390/include/asm/kvm_para.h b/arch/s390/include/asm/kvm_para.h index 41393052ac57..2c623bf5c095 100644 --- a/arch/s390/include/asm/kvm_para.h +++ b/arch/s390/include/asm/kvm_para.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * definition for paravirtual devices on s390 * diff --git a/arch/s390/include/asm/livepatch.h b/arch/s390/include/asm/livepatch.h index 6de5c6cb0061..d1f97fafcc20 100644 --- a/arch/s390/include/asm/livepatch.h +++ b/arch/s390/include/asm/livepatch.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ /* * livepatch.h - s390-specific Kernel Live Patching Core * diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h index 6bc941be6921..4d6463eb7a2d 100644 --- a/arch/s390/include/asm/syscall.h +++ b/arch/s390/include/asm/syscall.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Access to user system call parameters and results * diff --git a/arch/s390/include/asm/sysinfo.h b/arch/s390/include/asm/sysinfo.h index a702cb9d4269..c2137e7d9310 100644 --- a/arch/s390/include/asm/sysinfo.h +++ b/arch/s390/include/asm/sysinfo.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * definition for store system information stsi * From 53634237e72b4f3c2dff2f92ec35792207730a98 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 24 Nov 2017 15:00:40 +0100 Subject: [PATCH 080/333] s390: kernel: Remove redundant license text Now that the SPDX tag is in all arch/s390/kernel/ files, that identifies the license in a specific and legally-defined manner. So the extra GPL text wording can be removed as it is no longer needed at all. This is done on a quest to remove the 700+ different ways that files in the kernel describe the GPL license text. And there's unneeded stuff like the address (sometimes incorrect) for the FSF which is never needed. No copyright headers or other non-license-description text was removed. Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: "Paul E. McKenney" Signed-off-by: Greg Kroah-Hartman Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/kprobes.c | 14 -------------- arch/s390/kernel/module.c | 14 -------------- arch/s390/kernel/perf_cpum_cf.c | 4 ---- arch/s390/kernel/perf_cpum_sf.c | 4 ---- arch/s390/kernel/perf_event.c | 4 ---- arch/s390/kernel/sthyi.c | 4 ---- arch/s390/kernel/vdso.c | 4 ---- arch/s390/kernel/vdso32/clock_getres.S | 4 ---- arch/s390/kernel/vdso32/clock_gettime.S | 4 ---- arch/s390/kernel/vdso32/gettimeofday.S | 4 ---- arch/s390/kernel/vdso64/clock_getres.S | 4 ---- arch/s390/kernel/vdso64/clock_gettime.S | 4 ---- arch/s390/kernel/vdso64/gettimeofday.S | 4 ---- 13 files changed, 72 deletions(-) diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index ee055f7bdb10..af3722c28fd9 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -2,20 +2,6 @@ /* * Kernel Probes (KProbes) * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * * Copyright IBM Corp. 2002, 2006 * * s390 port, used ppc64 as template. Mike Grundy diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index 10d104589978..b7abfad4fd7d 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -9,20 +9,6 @@ * * based on i386 version * Copyright (C) 2001 Rusty Russell. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 8f6296860186..cc085e2d2ce9 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -4,10 +4,6 @@ * * Copyright IBM Corp. 2012, 2017 * Author(s): Hendrik Brueckner - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. */ #define KMSG_COMPONENT "cpum_cf" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 6e404caac856..1c9ddd7aa5ec 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -4,10 +4,6 @@ * * Copyright IBM Corp. 2013 * Author(s): Hendrik Brueckner - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. */ #define KMSG_COMPONENT "cpum_sf" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c index 0ef308851374..0d770e513abf 100644 --- a/arch/s390/kernel/perf_event.c +++ b/arch/s390/kernel/perf_event.c @@ -4,10 +4,6 @@ * * Copyright IBM Corp. 2012, 2013 * Author(s): Hendrik Brueckner - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. */ #define KMSG_COMPONENT "perf" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt diff --git a/arch/s390/kernel/sthyi.c b/arch/s390/kernel/sthyi.c index db163cf3606a..80b862e9c53c 100644 --- a/arch/s390/kernel/sthyi.c +++ b/arch/s390/kernel/sthyi.c @@ -2,10 +2,6 @@ /* * store hypervisor information instruction emulation functions. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * * Copyright IBM Corp. 2016 * Author(s): Janosch Frank */ diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index 88f6fab68eb2..f3a1c7c6824e 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -4,10 +4,6 @@ * * Copyright IBM Corp. 2008 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. */ #include diff --git a/arch/s390/kernel/vdso32/clock_getres.S b/arch/s390/kernel/vdso32/clock_getres.S index eb9f22cbfd47..f61df5253c23 100644 --- a/arch/s390/kernel/vdso32/clock_getres.S +++ b/arch/s390/kernel/vdso32/clock_getres.S @@ -5,10 +5,6 @@ * * Copyright IBM Corp. 2008 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. */ #include #include diff --git a/arch/s390/kernel/vdso32/clock_gettime.S b/arch/s390/kernel/vdso32/clock_gettime.S index b642c0382238..2d6ec3abe095 100644 --- a/arch/s390/kernel/vdso32/clock_gettime.S +++ b/arch/s390/kernel/vdso32/clock_gettime.S @@ -5,10 +5,6 @@ * * Copyright IBM Corp. 2008 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. */ #include #include diff --git a/arch/s390/kernel/vdso32/gettimeofday.S b/arch/s390/kernel/vdso32/gettimeofday.S index 8a89b54246d7..aa8bf13a2edb 100644 --- a/arch/s390/kernel/vdso32/gettimeofday.S +++ b/arch/s390/kernel/vdso32/gettimeofday.S @@ -5,10 +5,6 @@ * * Copyright IBM Corp. 2008 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. */ #include #include diff --git a/arch/s390/kernel/vdso64/clock_getres.S b/arch/s390/kernel/vdso64/clock_getres.S index 416bf711c2aa..faf5213b15df 100644 --- a/arch/s390/kernel/vdso64/clock_getres.S +++ b/arch/s390/kernel/vdso64/clock_getres.S @@ -5,10 +5,6 @@ * * Copyright IBM Corp. 2008 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. */ #include #include diff --git a/arch/s390/kernel/vdso64/clock_gettime.S b/arch/s390/kernel/vdso64/clock_gettime.S index d2dee331b7cd..6046b3bfca46 100644 --- a/arch/s390/kernel/vdso64/clock_gettime.S +++ b/arch/s390/kernel/vdso64/clock_gettime.S @@ -5,10 +5,6 @@ * * Copyright IBM Corp. 2008 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. */ #include #include diff --git a/arch/s390/kernel/vdso64/gettimeofday.S b/arch/s390/kernel/vdso64/gettimeofday.S index cf62a1611054..cc9dbc27da6f 100644 --- a/arch/s390/kernel/vdso64/gettimeofday.S +++ b/arch/s390/kernel/vdso64/gettimeofday.S @@ -5,10 +5,6 @@ * * Copyright IBM Corp. 2008 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. */ #include #include From 94bf2f28c9923abe7a1fccc126a86a2401cb5a47 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 24 Nov 2017 15:00:41 +0100 Subject: [PATCH 081/333] s390: include: Remove redundant license text Now that the SPDX tag is in all arch/s390/include/ files, that identifies the license in a specific and legally-defined manner. So the extra GPL text wording can be removed as it is no longer needed at all. This is done on a quest to remove the 700+ different ways that files in the kernel describe the GPL license text. And there's unneeded stuff like the address (sometimes incorrect) for the FSF which is never needed. No copyright headers or other non-license-description text was removed. Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Christian Borntraeger Cc: Cornelia Huck Cc: Halil Pasic Cc: "Paul E. McKenney" Signed-off-by: Greg Kroah-Hartman Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/cpu_mf.h | 4 ---- arch/s390/include/asm/kprobes.h | 14 -------------- arch/s390/include/asm/kvm_host.h | 4 ---- arch/s390/include/asm/kvm_para.h | 6 ------ arch/s390/include/asm/livepatch.h | 7 ------- arch/s390/include/asm/syscall.h | 4 ---- arch/s390/include/asm/sysinfo.h | 4 ---- arch/s390/include/uapi/asm/kvm.h | 4 ---- arch/s390/include/uapi/asm/kvm_para.h | 4 ---- arch/s390/include/uapi/asm/kvm_perf.h | 4 ---- arch/s390/include/uapi/asm/virtio-ccw.h | 4 ---- arch/s390/include/uapi/asm/zcrypt.h | 14 -------------- 12 files changed, 73 deletions(-) diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h index a9ba5e322bf3..dd08db491b89 100644 --- a/arch/s390/include/asm/cpu_mf.h +++ b/arch/s390/include/asm/cpu_mf.h @@ -5,10 +5,6 @@ * Copyright IBM Corp. 2012 * Author(s): Hendrik Brueckner * Jan Glauber - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. */ #ifndef _ASM_S390_CPU_MF_H #define _ASM_S390_CPU_MF_H diff --git a/arch/s390/include/asm/kprobes.h b/arch/s390/include/asm/kprobes.h index 396b065ebb2c..13de80cf741c 100644 --- a/arch/s390/include/asm/kprobes.h +++ b/arch/s390/include/asm/kprobes.h @@ -4,20 +4,6 @@ /* * Kernel Probes (KProbes) * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * * Copyright IBM Corp. 2002, 2006 * * 2002-Oct Created by Vamsi Krishna S Kernel diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 66efb2979351..e14f381757f6 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -4,10 +4,6 @@ * * Copyright IBM Corp. 2008, 2009 * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * * Author(s): Carsten Otte */ diff --git a/arch/s390/include/asm/kvm_para.h b/arch/s390/include/asm/kvm_para.h index 2c623bf5c095..74eeec9c0a80 100644 --- a/arch/s390/include/asm/kvm_para.h +++ b/arch/s390/include/asm/kvm_para.h @@ -4,10 +4,6 @@ * * Copyright IBM Corp. 2008 * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * * Author(s): Christian Borntraeger */ /* @@ -21,8 +17,6 @@ * * Copyright IBM Corp. 2007,2008 * Author(s): Christian Borntraeger - * - * This work is licensed under the terms of the GNU GPL, version 2. */ #ifndef __S390_KVM_PARA_H #define __S390_KVM_PARA_H diff --git a/arch/s390/include/asm/livepatch.h b/arch/s390/include/asm/livepatch.h index d1f97fafcc20..672f95b12d40 100644 --- a/arch/s390/include/asm/livepatch.h +++ b/arch/s390/include/asm/livepatch.h @@ -8,13 +8,6 @@ * Jiri Slaby */ -/* - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - */ - #ifndef ASM_LIVEPATCH_H #define ASM_LIVEPATCH_H diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h index 4d6463eb7a2d..96f9a9151fde 100644 --- a/arch/s390/include/asm/syscall.h +++ b/arch/s390/include/asm/syscall.h @@ -4,10 +4,6 @@ * * Copyright IBM Corp. 2008 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. */ #ifndef _ASM_SYSCALL_H diff --git a/arch/s390/include/asm/sysinfo.h b/arch/s390/include/asm/sysinfo.h index c2137e7d9310..25057c118d56 100644 --- a/arch/s390/include/asm/sysinfo.h +++ b/arch/s390/include/asm/sysinfo.h @@ -4,10 +4,6 @@ * * Copyright IBM Corp. 2001, 2008 * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * * Author(s): Ulrich Weigand * Christian Borntraeger */ diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h index 9ad172dcd912..38535a57fef8 100644 --- a/arch/s390/include/uapi/asm/kvm.h +++ b/arch/s390/include/uapi/asm/kvm.h @@ -6,10 +6,6 @@ * * Copyright IBM Corp. 2008 * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * * Author(s): Carsten Otte * Christian Borntraeger */ diff --git a/arch/s390/include/uapi/asm/kvm_para.h b/arch/s390/include/uapi/asm/kvm_para.h index 0dc86b3a7cb0..b9ab584adf43 100644 --- a/arch/s390/include/uapi/asm/kvm_para.h +++ b/arch/s390/include/uapi/asm/kvm_para.h @@ -4,9 +4,5 @@ * * Copyright IBM Corp. 2008 * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * * Author(s): Christian Borntraeger */ diff --git a/arch/s390/include/uapi/asm/kvm_perf.h b/arch/s390/include/uapi/asm/kvm_perf.h index c36c97ffdc6f..84606b8cc49e 100644 --- a/arch/s390/include/uapi/asm/kvm_perf.h +++ b/arch/s390/include/uapi/asm/kvm_perf.h @@ -4,10 +4,6 @@ * * Copyright 2014 IBM Corp. * Author(s): Alexander Yarygin - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. */ #ifndef __LINUX_KVM_PERF_S390_H diff --git a/arch/s390/include/uapi/asm/virtio-ccw.h b/arch/s390/include/uapi/asm/virtio-ccw.h index 967aad390105..3a77833c74dc 100644 --- a/arch/s390/include/uapi/asm/virtio-ccw.h +++ b/arch/s390/include/uapi/asm/virtio-ccw.h @@ -4,10 +4,6 @@ * * Copyright IBM Corp. 2013 * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * * Author(s): Cornelia Huck */ #ifndef __KVM_VIRTIO_CCW_H diff --git a/arch/s390/include/uapi/asm/zcrypt.h b/arch/s390/include/uapi/asm/zcrypt.h index 137ef473584e..d568307321fc 100644 --- a/arch/s390/include/uapi/asm/zcrypt.h +++ b/arch/s390/include/uapi/asm/zcrypt.h @@ -9,20 +9,6 @@ * Eric Rossman (edrossma@us.ibm.com) * * Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifndef __ASM_S390_ZCRYPT_H From a876ca4ddef01e8737da5c672e878c67798cb975 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 24 Nov 2017 15:00:42 +0100 Subject: [PATCH 082/333] s390: crypto: Remove redundant license text Now that the SPDX tag is in all arch/s390/crypto/ files, that identifies the license in a specific and legally-defined manner. So the extra GPL text wording can be removed as it is no longer needed at all. This is done on a quest to remove the 700+ different ways that files in the kernel describe the GPL license text. And there's unneeded stuff like the address (sometimes incorrect) for the FSF which is never needed. No copyright headers or other non-license-description text was removed. Cc: Herbert Xu Cc: "David S. Miller" Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: "Paul E. McKenney" Signed-off-by: Greg Kroah-Hartman Signed-off-by: Martin Schwidefsky --- arch/s390/crypto/aes_s390.c | 6 ------ arch/s390/crypto/arch_random.c | 5 ----- arch/s390/crypto/des_s390.c | 6 ------ arch/s390/crypto/paes_s390.c | 5 ----- arch/s390/crypto/sha.h | 6 ------ arch/s390/crypto/sha256_s390.c | 6 ------ arch/s390/crypto/sha512_s390.c | 6 ------ arch/s390/crypto/sha_common.c | 6 ------ 8 files changed, 46 deletions(-) diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index 5a7b60a65f96..d60798737d86 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -12,12 +12,6 @@ * Harald Freudenberger * * Derived from "crypto/aes_generic.c" - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * */ #define KMSG_COMPONENT "aes_s390" diff --git a/arch/s390/crypto/arch_random.c b/arch/s390/crypto/arch_random.c index 7ad00354fb25..8720e9203ecf 100644 --- a/arch/s390/crypto/arch_random.c +++ b/arch/s390/crypto/arch_random.c @@ -4,11 +4,6 @@ * * Copyright IBM Corp. 2017 * Author(s): Harald Freudenberger - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * */ #include diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c index c871b1086408..5346b5a80bb6 100644 --- a/arch/s390/crypto/des_s390.c +++ b/arch/s390/crypto/des_s390.c @@ -7,12 +7,6 @@ * Copyright IBM Corp. 2003, 2011 * Author(s): Thomas Spatzier * Jan Glauber (jan.glauber@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * */ #include diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c index 53926a2b7285..003932db8d12 100644 --- a/arch/s390/crypto/paes_s390.c +++ b/arch/s390/crypto/paes_s390.c @@ -8,11 +8,6 @@ * Copyright IBM Corp. 2017 * Author(s): Martin Schwidefsky * Harald Freudenberger - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * */ #define KMSG_COMPONENT "paes_s390" diff --git a/arch/s390/crypto/sha.h b/arch/s390/crypto/sha.h index 19d344d49392..d6f8258b44df 100644 --- a/arch/s390/crypto/sha.h +++ b/arch/s390/crypto/sha.h @@ -6,12 +6,6 @@ * * Copyright IBM Corp. 2007 * Author(s): Jan Glauber (jang@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * */ #ifndef _CRYPTO_ARCH_S390_SHA_H #define _CRYPTO_ARCH_S390_SHA_H diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c index 4a31d6416d41..944aa6b237cd 100644 --- a/arch/s390/crypto/sha256_s390.c +++ b/arch/s390/crypto/sha256_s390.c @@ -7,12 +7,6 @@ * s390 Version: * Copyright IBM Corp. 2005, 2011 * Author(s): Jan Glauber (jang@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * */ #include #include diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c index e22ed139d0b6..b17eded532b1 100644 --- a/arch/s390/crypto/sha512_s390.c +++ b/arch/s390/crypto/sha512_s390.c @@ -6,12 +6,6 @@ * * Copyright IBM Corp. 2007 * Author(s): Jan Glauber (jang@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * */ #include #include diff --git a/arch/s390/crypto/sha_common.c b/arch/s390/crypto/sha_common.c index d359ec7fdd6b..cf0718d121bc 100644 --- a/arch/s390/crypto/sha_common.c +++ b/arch/s390/crypto/sha_common.c @@ -6,12 +6,6 @@ * * Copyright IBM Corp. 2007 * Author(s): Jan Glauber (jang@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * */ #include From fec37202e8a5c060e0c46cd353bf0b6d26b9cb02 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 24 Nov 2017 15:00:43 +0100 Subject: [PATCH 083/333] s390: Remove redundant license text Now that the SPDX tag is in all arch/s390/ files, that identifies the license in a specific and legally-defined manner. So the extra GPL text wording in the remaining files can be removed as it is no longer needed at all. This is done on a quest to remove the 700+ different ways that files in the kernel describe the GPL license text. And there's unneeded stuff like the address (sometimes incorrect) for the FSF which is never needed. No copyright headers or other non-license-description text was removed. Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: "Paul E. McKenney" Signed-off-by: Greg Kroah-Hartman Signed-off-by: Martin Schwidefsky --- arch/s390/Makefile | 4 ---- arch/s390/boot/install.sh | 4 ---- arch/s390/hypfs/inode.c | 1 - arch/s390/mm/mmap.c | 15 --------------- 4 files changed, 24 deletions(-) diff --git a/arch/s390/Makefile b/arch/s390/Makefile index ffd95861f9ac..de54cfc6109d 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -7,10 +7,6 @@ # for "archclean" and "archdep" for cleaning up and making dependencies for # this architecture # -# This file is subject to the terms and conditions of the GNU General Public -# License. See the file "COPYING" in the main directory of this archive -# for more details. -# # Copyright (C) 1994 by Linus Torvalds # diff --git a/arch/s390/boot/install.sh b/arch/s390/boot/install.sh index 32a9367b9bf0..bed227f267ae 100644 --- a/arch/s390/boot/install.sh +++ b/arch/s390/boot/install.sh @@ -3,10 +3,6 @@ # # arch/s390x/boot/install.sh # -# This file is subject to the terms and conditions of the GNU General Public -# License. See the file "COPYING" in the main directory of this archive -# for more details. -# # Copyright (C) 1995 by Linus Torvalds # # Adapted from code in arch/i386/boot/Makefile by H. Peter Anvin diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index 05d62cdb78b5..43bbe63e2992 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -4,7 +4,6 @@ * * Copyright IBM Corp. 2006, 2008 * Author(s): Michael Holzheu - * License: GPL */ #define KMSG_COMPONENT "hypfs" diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index 5ec9a8c2b768..831bdcf407bb 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -5,21 +5,6 @@ * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina. * All Rights Reserved. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * * Started by Ingo Molnar */ From 345f8f34bb473241d62803951c18a844dd705f8d Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 24 Nov 2017 16:23:15 +0100 Subject: [PATCH 084/333] s390: revert ELF_ET_DYN_BASE base changes This reverts commit a73dc5370e153ac63718d850bddf0c9aa9d871e6. Reducing the base address for 31-bit PIE executables from (STACK_TOP/3)*2 to 4MB broke several compat programs which use -fpie to move the executable out of the lower 16MB. Cc: # 4.13+ Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/elf.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index 9a3cb3983c01..1a61b1b997f2 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -194,13 +194,14 @@ struct arch_elf_state { #define CORE_DUMP_USE_REGSET #define ELF_EXEC_PAGESIZE PAGE_SIZE -/* - * This is the base location for PIE (ET_DYN with INTERP) loads. On - * 64-bit, this is raised to 4GB to leave the entire 32-bit address - * space open for things that want to use the area for 32-bit pointers. - */ -#define ELF_ET_DYN_BASE (is_compat_task() ? 0x000400000UL : \ - 0x100000000UL) +/* This is the location that an ET_DYN program is loaded if exec'ed. Typical + use of this is to invoke "./ld.so someprog" to test out a new version of + the loader. We need to make sure that it is out of the way of the program + that it will "exec", and that there is sufficient room for the brk. 64-bit + tasks are aligned to 4GB. */ +#define ELF_ET_DYN_BASE (is_compat_task() ? \ + (STACK_TOP / 3 * 2) : \ + (STACK_TOP / 3 * 2) & ~((1UL << 32) - 1)) /* This yields a mask that user programs can use to figure out what instruction set this CPU supports. */ From c2cc215cde5b3fd4102c9595df66c6e4ff237be4 Mon Sep 17 00:00:00 2001 From: Cihangir Akturk Date: Fri, 11 Aug 2017 15:32:48 +0300 Subject: [PATCH 085/333] drm: mali-dp: switch to drm_*_get(), drm_*_put() helpers Use drm_*_get() and drm_*_put() helpers instead of drm_*_reference() and drm_*_unreference() helpers. drm_*_reference() and drm_*_unreference() functions are just compatibility alias for drm_*_get() and drm_*_put() and should not be used by new code. So convert all users of compatibility functions to use the new APIs. Generated by: scripts/coccinelle/api/drm-get-put.cocci Signed-off-by: Cihangir Akturk Acked-by: Liviu Dudau Signed-off-by: Liviu Dudau --- drivers/gpu/drm/arm/malidp_planes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/arm/malidp_planes.c b/drivers/gpu/drm/arm/malidp_planes.c index 94e7e3fa3408..f12f8eb126be 100644 --- a/drivers/gpu/drm/arm/malidp_planes.c +++ b/drivers/gpu/drm/arm/malidp_planes.c @@ -57,7 +57,7 @@ static void malidp_de_plane_destroy(struct drm_plane *plane) struct malidp_plane *mp = to_malidp_plane(plane); if (mp->base.fb) - drm_framebuffer_unreference(mp->base.fb); + drm_framebuffer_put(mp->base.fb); drm_plane_helper_disable(plane); drm_plane_cleanup(plane); From 0970d7a2f5b0dcdd520c7655210d677f6e9a878e Mon Sep 17 00:00:00 2001 From: Srishti Sharma Date: Fri, 29 Sep 2017 15:30:40 +0530 Subject: [PATCH 086/333] drm/arm: Replace instances of drm_dev_unref with drm_dev_put. Replace drm_dev_unref with drm_dev_put as it is more consistent with kernel coding style. Done using the following semantic patch by coccinelle. @r@ expression e; @@ -drm_dev_unref(); +drm_dev_put(); Signed-off-by: Srishti Sharma [split patch into hdlcd and mali-dp versions] Signed-off-by: Liviu Dudau --- drivers/gpu/drm/arm/malidp_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/arm/malidp_drv.c b/drivers/gpu/drm/arm/malidp_drv.c index b8944666a18f..a2b698c983b4 100644 --- a/drivers/gpu/drm/arm/malidp_drv.c +++ b/drivers/gpu/drm/arm/malidp_drv.c @@ -671,7 +671,7 @@ query_hw_fail: malidp_runtime_pm_suspend(dev); drm->dev_private = NULL; dev_set_drvdata(dev, NULL); - drm_dev_unref(drm); + drm_dev_put(drm); alloc_fail: of_reserved_mem_device_release(dev); @@ -704,7 +704,7 @@ static void malidp_unbind(struct device *dev) malidp_runtime_pm_suspend(dev); drm->dev_private = NULL; dev_set_drvdata(dev, NULL); - drm_dev_unref(drm); + drm_dev_put(drm); of_reserved_mem_device_release(dev); } From a6993b215a719ad5758c1bced5f8df95add070bf Mon Sep 17 00:00:00 2001 From: Liviu Dudau Date: Thu, 31 Aug 2017 15:48:43 +0100 Subject: [PATCH 087/333] drm: mali-dp: Separate static internal data into a read-only structure. The malidp_hw_device structure that the driver uses to handle the differences between versions of the IP contains both non-changeable data and fields that get updated at probe time. Previously we were copying the read-only part into allocated memory, but that can be completely avoided by splitting the structure into a read-only part and keeping the runtime modifiable fields into the old structure. Reviewed-by: Brian Starkey Reviewed-by: Gustavo Padovan Signed-off-by: Liviu Dudau --- drivers/gpu/drm/arm/malidp_crtc.c | 13 +++--- drivers/gpu/drm/arm/malidp_drv.c | 30 ++++++------- drivers/gpu/drm/arm/malidp_hw.c | 46 ++++++++++---------- drivers/gpu/drm/arm/malidp_hw.h | 65 +++++++++++++++++------------ drivers/gpu/drm/arm/malidp_planes.c | 19 ++++----- 5 files changed, 93 insertions(+), 80 deletions(-) diff --git a/drivers/gpu/drm/arm/malidp_crtc.c b/drivers/gpu/drm/arm/malidp_crtc.c index 3615d18a7ddf..153a49670626 100644 --- a/drivers/gpu/drm/arm/malidp_crtc.c +++ b/drivers/gpu/drm/arm/malidp_crtc.c @@ -65,8 +65,8 @@ static void malidp_crtc_atomic_enable(struct drm_crtc *crtc, /* We rely on firmware to set mclk to a sensible level. */ clk_set_rate(hwdev->pxlclk, crtc->state->adjusted_mode.crtc_clock * 1000); - hwdev->modeset(hwdev, &vm); - hwdev->leave_config_mode(hwdev); + hwdev->hw->modeset(hwdev, &vm); + hwdev->hw->leave_config_mode(hwdev); drm_crtc_vblank_on(crtc); } @@ -78,7 +78,8 @@ static void malidp_crtc_atomic_disable(struct drm_crtc *crtc, int err; drm_crtc_vblank_off(crtc); - hwdev->enter_config_mode(hwdev); + hwdev->hw->enter_config_mode(hwdev); + clk_disable_unprepare(hwdev->pxlclk); err = pm_runtime_put(crtc->dev->dev); @@ -319,7 +320,7 @@ static int malidp_crtc_atomic_check_scaling(struct drm_crtc *crtc, mclk_calc: drm_display_mode_to_videomode(&state->adjusted_mode, &vm); - ret = hwdev->se_calc_mclk(hwdev, s, &vm); + ret = hwdev->hw->se_calc_mclk(hwdev, s, &vm); if (ret < 0) return -EINVAL; return 0; @@ -475,7 +476,7 @@ static int malidp_crtc_enable_vblank(struct drm_crtc *crtc) struct malidp_hw_device *hwdev = malidp->dev; malidp_hw_enable_irq(hwdev, MALIDP_DE_BLOCK, - hwdev->map.de_irq_map.vsync_irq); + hwdev->hw->map.de_irq_map.vsync_irq); return 0; } @@ -485,7 +486,7 @@ static void malidp_crtc_disable_vblank(struct drm_crtc *crtc) struct malidp_hw_device *hwdev = malidp->dev; malidp_hw_disable_irq(hwdev, MALIDP_DE_BLOCK, - hwdev->map.de_irq_map.vsync_irq); + hwdev->hw->map.de_irq_map.vsync_irq); } static const struct drm_crtc_funcs malidp_crtc_funcs = { diff --git a/drivers/gpu/drm/arm/malidp_drv.c b/drivers/gpu/drm/arm/malidp_drv.c index a2b698c983b4..91f2b0191368 100644 --- a/drivers/gpu/drm/arm/malidp_drv.c +++ b/drivers/gpu/drm/arm/malidp_drv.c @@ -47,10 +47,10 @@ static void malidp_write_gamma_table(struct malidp_hw_device *hwdev, * directly. */ malidp_hw_write(hwdev, gamma_write_mask, - hwdev->map.coeffs_base + MALIDP_COEF_TABLE_ADDR); + hwdev->hw->map.coeffs_base + MALIDP_COEF_TABLE_ADDR); for (i = 0; i < MALIDP_COEFFTAB_NUM_COEFFS; ++i) malidp_hw_write(hwdev, data[i], - hwdev->map.coeffs_base + + hwdev->hw->map.coeffs_base + MALIDP_COEF_TABLE_DATA); } @@ -103,7 +103,7 @@ void malidp_atomic_commit_update_coloradj(struct drm_crtc *crtc, for (i = 0; i < MALIDP_COLORADJ_NUM_COEFFS; ++i) malidp_hw_write(hwdev, mc->coloradj_coeffs[i], - hwdev->map.coeffs_base + + hwdev->hw->map.coeffs_base + MALIDP_COLOR_ADJ_COEF + 4 * i); malidp_hw_setbits(hwdev, MALIDP_DISP_FUNC_CADJ, @@ -120,8 +120,8 @@ static void malidp_atomic_commit_se_config(struct drm_crtc *crtc, struct malidp_hw_device *hwdev = malidp->dev; struct malidp_se_config *s = &cs->scaler_config; struct malidp_se_config *old_s = &old_cs->scaler_config; - u32 se_control = hwdev->map.se_base + - ((hwdev->map.features & MALIDP_REGMAP_HAS_CLEARIRQ) ? + u32 se_control = hwdev->hw->map.se_base + + ((hwdev->hw->map.features & MALIDP_REGMAP_HAS_CLEARIRQ) ? 0x10 : 0xC); u32 layer_control = se_control + MALIDP_SE_LAYER_CONTROL; u32 scr = se_control + MALIDP_SE_SCALING_CONTROL; @@ -135,7 +135,7 @@ static void malidp_atomic_commit_se_config(struct drm_crtc *crtc, return; } - hwdev->se_set_scaling_coeffs(hwdev, s, old_s); + hwdev->hw->se_set_scaling_coeffs(hwdev, s, old_s); val = malidp_hw_read(hwdev, se_control); val |= MALIDP_SE_SCALING_EN | MALIDP_SE_ALPHA_EN; @@ -170,9 +170,9 @@ static int malidp_set_and_wait_config_valid(struct drm_device *drm) int ret; atomic_set(&malidp->config_valid, 0); - hwdev->set_config_valid(hwdev); + hwdev->hw->set_config_valid(hwdev); /* don't wait for config_valid flag if we are in config mode */ - if (hwdev->in_config_mode(hwdev)) + if (hwdev->hw->in_config_mode(hwdev)) return 0; ret = wait_event_interruptible_timeout(malidp->wq, @@ -455,7 +455,7 @@ static int malidp_runtime_pm_suspend(struct device *dev) struct malidp_hw_device *hwdev = malidp->dev; /* we can only suspend if the hardware is in config mode */ - WARN_ON(!hwdev->in_config_mode(hwdev)); + WARN_ON(!hwdev->hw->in_config_mode(hwdev)); hwdev->pm_suspended = true; clk_disable_unprepare(hwdev->mclk); @@ -500,11 +500,7 @@ static int malidp_bind(struct device *dev) if (!hwdev) return -ENOMEM; - /* - * copy the associated data from malidp_drm_of_match to avoid - * having to keep a reference to the OF node after binding - */ - memcpy(hwdev, of_device_get_match_data(dev), sizeof(*hwdev)); + hwdev->hw = (struct malidp_hw *)of_device_get_match_data(dev); malidp->dev = hwdev; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); @@ -568,13 +564,13 @@ static int malidp_bind(struct device *dev) goto query_hw_fail; } - ret = hwdev->query_hw(hwdev); + ret = hwdev->hw->query_hw(hwdev); if (ret) { DRM_ERROR("Invalid HW configuration\n"); goto query_hw_fail; } - version = malidp_hw_read(hwdev, hwdev->map.dc_base + MALIDP_DE_CORE_ID); + version = malidp_hw_read(hwdev, hwdev->hw->map.dc_base + MALIDP_DE_CORE_ID); DRM_INFO("found ARM Mali-DP%3x version r%dp%d\n", version >> 16, (version >> 12) & 0xf, (version >> 8) & 0xf); @@ -589,7 +585,7 @@ static int malidp_bind(struct device *dev) for (i = 0; i < MAX_OUTPUT_CHANNELS; i++) out_depth = (out_depth << 8) | (output_width[i] & 0xf); - malidp_hw_write(hwdev, out_depth, hwdev->map.out_depth_base); + malidp_hw_write(hwdev, out_depth, hwdev->hw->map.out_depth_base); atomic_set(&malidp->config_valid, 0); init_waitqueue_head(&malidp->wq); diff --git a/drivers/gpu/drm/arm/malidp_hw.c b/drivers/gpu/drm/arm/malidp_hw.c index 17bca99e8ac8..2bfb542135ac 100644 --- a/drivers/gpu/drm/arm/malidp_hw.c +++ b/drivers/gpu/drm/arm/malidp_hw.c @@ -183,7 +183,7 @@ static void malidp500_enter_config_mode(struct malidp_hw_device *hwdev) malidp_hw_setbits(hwdev, MALIDP500_DC_CONFIG_REQ, MALIDP500_DC_CONTROL); while (count) { - status = malidp_hw_read(hwdev, hwdev->map.dc_base + MALIDP_REG_STATUS); + status = malidp_hw_read(hwdev, hwdev->hw->map.dc_base + MALIDP_REG_STATUS); if ((status & MALIDP500_DC_CONFIG_REQ) == MALIDP500_DC_CONFIG_REQ) break; /* @@ -203,7 +203,7 @@ static void malidp500_leave_config_mode(struct malidp_hw_device *hwdev) malidp_hw_clearbits(hwdev, MALIDP_CFG_VALID, MALIDP500_CONFIG_VALID); malidp_hw_clearbits(hwdev, MALIDP500_DC_CONFIG_REQ, MALIDP500_DC_CONTROL); while (count) { - status = malidp_hw_read(hwdev, hwdev->map.dc_base + MALIDP_REG_STATUS); + status = malidp_hw_read(hwdev, hwdev->hw->map.dc_base + MALIDP_REG_STATUS); if ((status & MALIDP500_DC_CONFIG_REQ) == 0) break; usleep_range(100, 1000); @@ -216,7 +216,7 @@ static bool malidp500_in_config_mode(struct malidp_hw_device *hwdev) { u32 status; - status = malidp_hw_read(hwdev, hwdev->map.dc_base + MALIDP_REG_STATUS); + status = malidp_hw_read(hwdev, hwdev->hw->map.dc_base + MALIDP_REG_STATUS); if ((status & MALIDP500_DC_CONFIG_REQ) == MALIDP500_DC_CONFIG_REQ) return true; @@ -407,7 +407,7 @@ static void malidp550_enter_config_mode(struct malidp_hw_device *hwdev) malidp_hw_setbits(hwdev, MALIDP550_DC_CONFIG_REQ, MALIDP550_DC_CONTROL); while (count) { - status = malidp_hw_read(hwdev, hwdev->map.dc_base + MALIDP_REG_STATUS); + status = malidp_hw_read(hwdev, hwdev->hw->map.dc_base + MALIDP_REG_STATUS); if ((status & MALIDP550_DC_CONFIG_REQ) == MALIDP550_DC_CONFIG_REQ) break; /* @@ -427,7 +427,7 @@ static void malidp550_leave_config_mode(struct malidp_hw_device *hwdev) malidp_hw_clearbits(hwdev, MALIDP_CFG_VALID, MALIDP550_CONFIG_VALID); malidp_hw_clearbits(hwdev, MALIDP550_DC_CONFIG_REQ, MALIDP550_DC_CONTROL); while (count) { - status = malidp_hw_read(hwdev, hwdev->map.dc_base + MALIDP_REG_STATUS); + status = malidp_hw_read(hwdev, hwdev->hw->map.dc_base + MALIDP_REG_STATUS); if ((status & MALIDP550_DC_CONFIG_REQ) == 0) break; usleep_range(100, 1000); @@ -440,7 +440,7 @@ static bool malidp550_in_config_mode(struct malidp_hw_device *hwdev) { u32 status; - status = malidp_hw_read(hwdev, hwdev->map.dc_base + MALIDP_REG_STATUS); + status = malidp_hw_read(hwdev, hwdev->hw->map.dc_base + MALIDP_REG_STATUS); if ((status & MALIDP550_DC_CONFIG_REQ) == MALIDP550_DC_CONFIG_REQ) return true; @@ -616,7 +616,7 @@ static int malidp650_query_hw(struct malidp_hw_device *hwdev) return 0; } -const struct malidp_hw_device malidp_device[MALIDP_MAX_DEVICES] = { +const struct malidp_hw malidp_device[MALIDP_MAX_DEVICES] = { [MALIDP_500] = { .map = { .coeffs_base = MALIDP500_COEFFS_BASE, @@ -751,7 +751,7 @@ static void malidp_hw_clear_irq(struct malidp_hw_device *hwdev, u8 block, u32 ir { u32 base = malidp_get_block_base(hwdev, block); - if (hwdev->map.features & MALIDP_REGMAP_HAS_CLEARIRQ) + if (hwdev->hw->map.features & MALIDP_REGMAP_HAS_CLEARIRQ) malidp_hw_write(hwdev, irq, base + MALIDP_REG_CLEARIRQ); else malidp_hw_write(hwdev, irq, base + MALIDP_REG_STATUS); @@ -762,12 +762,14 @@ static irqreturn_t malidp_de_irq(int irq, void *arg) struct drm_device *drm = arg; struct malidp_drm *malidp = drm->dev_private; struct malidp_hw_device *hwdev; + struct malidp_hw *hw; const struct malidp_irq_map *de; u32 status, mask, dc_status; irqreturn_t ret = IRQ_NONE; hwdev = malidp->dev; - de = &hwdev->map.de_irq_map; + hw = hwdev->hw; + de = &hw->map.de_irq_map; /* * if we are suspended it is likely that we were invoked because @@ -778,8 +780,8 @@ static irqreturn_t malidp_de_irq(int irq, void *arg) return IRQ_NONE; /* first handle the config valid IRQ */ - dc_status = malidp_hw_read(hwdev, hwdev->map.dc_base + MALIDP_REG_STATUS); - if (dc_status & hwdev->map.dc_irq_map.vsync_irq) { + dc_status = malidp_hw_read(hwdev, hw->map.dc_base + MALIDP_REG_STATUS); + if (dc_status & hw->map.dc_irq_map.vsync_irq) { /* we have a page flip event */ atomic_set(&malidp->config_valid, 1); malidp_hw_clear_irq(hwdev, MALIDP_DC_BLOCK, dc_status); @@ -832,11 +834,11 @@ int malidp_de_irq_init(struct drm_device *drm, int irq) /* first enable the DC block IRQs */ malidp_hw_enable_irq(hwdev, MALIDP_DC_BLOCK, - hwdev->map.dc_irq_map.irq_mask); + hwdev->hw->map.dc_irq_map.irq_mask); /* now enable the DE block IRQs */ malidp_hw_enable_irq(hwdev, MALIDP_DE_BLOCK, - hwdev->map.de_irq_map.irq_mask); + hwdev->hw->map.de_irq_map.irq_mask); return 0; } @@ -847,9 +849,9 @@ void malidp_de_irq_fini(struct drm_device *drm) struct malidp_hw_device *hwdev = malidp->dev; malidp_hw_disable_irq(hwdev, MALIDP_DE_BLOCK, - hwdev->map.de_irq_map.irq_mask); + hwdev->hw->map.de_irq_map.irq_mask); malidp_hw_disable_irq(hwdev, MALIDP_DC_BLOCK, - hwdev->map.dc_irq_map.irq_mask); + hwdev->hw->map.dc_irq_map.irq_mask); } static irqreturn_t malidp_se_irq(int irq, void *arg) @@ -857,6 +859,8 @@ static irqreturn_t malidp_se_irq(int irq, void *arg) struct drm_device *drm = arg; struct malidp_drm *malidp = drm->dev_private; struct malidp_hw_device *hwdev = malidp->dev; + struct malidp_hw *hw = hwdev->hw; + const struct malidp_irq_map *se = &hw->map.se_irq_map; u32 status, mask; /* @@ -867,12 +871,12 @@ static irqreturn_t malidp_se_irq(int irq, void *arg) if (hwdev->pm_suspended) return IRQ_NONE; - status = malidp_hw_read(hwdev, hwdev->map.se_base + MALIDP_REG_STATUS); - if (!(status & hwdev->map.se_irq_map.irq_mask)) + status = malidp_hw_read(hwdev, hw->map.se_base + MALIDP_REG_STATUS); + if (!(status & se->irq_mask)) return IRQ_NONE; - mask = malidp_hw_read(hwdev, hwdev->map.se_base + MALIDP_REG_MASKIRQ); - status = malidp_hw_read(hwdev, hwdev->map.se_base + MALIDP_REG_STATUS); + mask = malidp_hw_read(hwdev, hw->map.se_base + MALIDP_REG_MASKIRQ); + status = malidp_hw_read(hwdev, hw->map.se_base + MALIDP_REG_STATUS); status &= mask; /* ToDo: status decoding and firing up of VSYNC and page flip events */ @@ -905,7 +909,7 @@ int malidp_se_irq_init(struct drm_device *drm, int irq) } malidp_hw_enable_irq(hwdev, MALIDP_SE_BLOCK, - hwdev->map.se_irq_map.irq_mask); + hwdev->hw->map.se_irq_map.irq_mask); return 0; } @@ -916,5 +920,5 @@ void malidp_se_irq_fini(struct drm_device *drm) struct malidp_hw_device *hwdev = malidp->dev; malidp_hw_disable_irq(hwdev, MALIDP_SE_BLOCK, - hwdev->map.se_irq_map.irq_mask); + hwdev->hw->map.se_irq_map.irq_mask); } diff --git a/drivers/gpu/drm/arm/malidp_hw.h b/drivers/gpu/drm/arm/malidp_hw.h index 849ad9a30c3a..b0690ebb3565 100644 --- a/drivers/gpu/drm/arm/malidp_hw.h +++ b/drivers/gpu/drm/arm/malidp_hw.h @@ -120,18 +120,14 @@ struct malidp_hw_regmap { /* Unlike DP550/650, DP500 has 3 stride registers in its video layer. */ #define MALIDP_DEVICE_LV_HAS_3_STRIDES BIT(0) -struct malidp_hw_device { - const struct malidp_hw_regmap map; - void __iomem *regs; +struct malidp_hw_device; - /* APB clock */ - struct clk *pclk; - /* AXI clock */ - struct clk *aclk; - /* main clock for display core */ - struct clk *mclk; - /* pixel clock for display core */ - struct clk *pxlclk; +/* + * Static structure containing hardware specific data and pointers to + * functions that behave differently between various versions of the IP. + */ +struct malidp_hw { + const struct malidp_hw_regmap map; /* * Validate the driver instance against the hardware bits @@ -182,15 +178,6 @@ struct malidp_hw_device { struct videomode *vm); u8 features; - - u8 min_line_size; - u16 max_line_size; - - /* track the device PM state */ - bool pm_suspended; - - /* size of memory used for rotating layers, up to two banks available */ - u32 rotation_memory[2]; }; /* Supported variants of the hardware */ @@ -202,7 +189,33 @@ enum { MALIDP_MAX_DEVICES }; -extern const struct malidp_hw_device malidp_device[MALIDP_MAX_DEVICES]; +extern const struct malidp_hw malidp_device[MALIDP_MAX_DEVICES]; + +/* + * Structure used by the driver during runtime operation. + */ +struct malidp_hw_device { + struct malidp_hw *hw; + void __iomem *regs; + + /* APB clock */ + struct clk *pclk; + /* AXI clock */ + struct clk *aclk; + /* main clock for display core */ + struct clk *mclk; + /* pixel clock for display core */ + struct clk *pxlclk; + + u8 min_line_size; + u16 max_line_size; + + /* track the device PM state */ + bool pm_suspended; + + /* size of memory used for rotating layers, up to two banks available */ + u32 rotation_memory[2]; +}; static inline u32 malidp_hw_read(struct malidp_hw_device *hwdev, u32 reg) { @@ -240,9 +253,9 @@ static inline u32 malidp_get_block_base(struct malidp_hw_device *hwdev, { switch (block) { case MALIDP_SE_BLOCK: - return hwdev->map.se_base; + return hwdev->hw->map.se_base; case MALIDP_DC_BLOCK: - return hwdev->map.dc_base; + return hwdev->hw->map.dc_base; } return 0; @@ -275,7 +288,7 @@ u8 malidp_hw_get_format_id(const struct malidp_hw_regmap *map, static inline bool malidp_hw_pitch_valid(struct malidp_hw_device *hwdev, unsigned int pitch) { - return !(pitch & (hwdev->map.bus_align_bytes - 1)); + return !(pitch & (hwdev->hw->map.bus_align_bytes - 1)); } /* U16.16 */ @@ -308,8 +321,8 @@ static inline void malidp_se_set_enh_coeffs(struct malidp_hw_device *hwdev) }; u32 val = MALIDP_SE_SET_ENH_LIMIT_LOW(MALIDP_SE_ENH_LOW_LEVEL) | MALIDP_SE_SET_ENH_LIMIT_HIGH(MALIDP_SE_ENH_HIGH_LEVEL); - u32 image_enh = hwdev->map.se_base + - ((hwdev->map.features & MALIDP_REGMAP_HAS_CLEARIRQ) ? + u32 image_enh = hwdev->hw->map.se_base + + ((hwdev->hw->map.features & MALIDP_REGMAP_HAS_CLEARIRQ) ? 0x10 : 0xC) + MALIDP_SE_IMAGE_ENH; u32 enh_coeffs = image_enh + MALIDP_SE_ENH_COEFF0; int i; diff --git a/drivers/gpu/drm/arm/malidp_planes.c b/drivers/gpu/drm/arm/malidp_planes.c index f12f8eb126be..e7419797bbd1 100644 --- a/drivers/gpu/drm/arm/malidp_planes.c +++ b/drivers/gpu/drm/arm/malidp_planes.c @@ -185,8 +185,9 @@ static int malidp_de_plane_check(struct drm_plane *plane, fb = state->fb; - ms->format = malidp_hw_get_format_id(&mp->hwdev->map, mp->layer->id, - fb->format->format); + ms->format = malidp_hw_get_format_id(&mp->hwdev->hw->map, + mp->layer->id, + fb->format->format); if (ms->format == MALIDP_INVALID_FORMAT_ID) return -EINVAL; @@ -211,7 +212,7 @@ static int malidp_de_plane_check(struct drm_plane *plane, * third plane stride register. */ if (ms->n_planes == 3 && - !(mp->hwdev->features & MALIDP_DEVICE_LV_HAS_3_STRIDES) && + !(mp->hwdev->hw->features & MALIDP_DEVICE_LV_HAS_3_STRIDES) && (state->fb->pitches[1] != state->fb->pitches[2])) return -EINVAL; @@ -229,9 +230,9 @@ static int malidp_de_plane_check(struct drm_plane *plane, if (state->rotation & MALIDP_ROTATED_MASK) { int val; - val = mp->hwdev->rotmem_required(mp->hwdev, state->crtc_h, - state->crtc_w, - fb->format->format); + val = mp->hwdev->hw->rotmem_required(mp->hwdev, state->crtc_h, + state->crtc_w, + fb->format->format); if (val < 0) return val; @@ -251,7 +252,7 @@ static void malidp_de_set_plane_pitches(struct malidp_plane *mp, return; if (num_planes == 3) - num_strides = (mp->hwdev->features & + num_strides = (mp->hwdev->hw->features & MALIDP_DEVICE_LV_HAS_3_STRIDES) ? 3 : 2; for (i = 0; i < num_strides; ++i) @@ -264,13 +265,11 @@ static void malidp_de_plane_update(struct drm_plane *plane, struct drm_plane_state *old_state) { struct malidp_plane *mp; - const struct malidp_hw_regmap *map; struct malidp_plane_state *ms = to_malidp_plane_state(plane->state); u32 src_w, src_h, dest_w, dest_h, val; int i; mp = to_malidp_plane(plane); - map = &mp->hwdev->map; /* convert src values from Q16 fixed point to integer */ src_w = plane->state->src_w >> 16; @@ -363,7 +362,7 @@ static const struct drm_plane_helper_funcs malidp_de_plane_helper_funcs = { int malidp_de_planes_init(struct drm_device *drm) { struct malidp_drm *malidp = drm->dev_private; - const struct malidp_hw_regmap *map = &malidp->dev->map; + const struct malidp_hw_regmap *map = &malidp->dev->hw->map; struct malidp_plane *plane = NULL; enum drm_plane_type plane_type; unsigned long crtcs = 1 << drm->mode_config.num_crtc; From 54243016ae35a0912a680f884835237fd6176820 Mon Sep 17 00:00:00 2001 From: Liviu Dudau Date: Thu, 31 Aug 2017 17:39:24 +0100 Subject: [PATCH 088/333] drm: mali-dp: Disable planes when their CRTC gets disabled. Make sure only the planes on the active CRTCs get committed and that all planes on the disabled CRTCs get turned off. Reviewed-by: Brian Starkey Reviewed-by: Gustavo Padovan Signed-off-by: Liviu Dudau --- drivers/gpu/drm/arm/malidp_crtc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/arm/malidp_crtc.c b/drivers/gpu/drm/arm/malidp_crtc.c index 153a49670626..904fff80917b 100644 --- a/drivers/gpu/drm/arm/malidp_crtc.c +++ b/drivers/gpu/drm/arm/malidp_crtc.c @@ -77,6 +77,9 @@ static void malidp_crtc_atomic_disable(struct drm_crtc *crtc, struct malidp_hw_device *hwdev = malidp->dev; int err; + /* always disable planes on the CRTC that is being turned off */ + drm_atomic_helper_disable_planes_on_crtc(old_state, false); + drm_crtc_vblank_off(crtc); hwdev->hw->enter_config_mode(hwdev); From 26c0a26d78bc7c2943d55121a32cb85a4594f8ea Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 24 Nov 2017 10:12:33 -0700 Subject: [PATCH 089/333] nvme-fc: don't use bit masks for set/test_bit() numbers So far harmless, but it's confusing and a bug waiting to happen if the shifts grow larger than 4. Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/fc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index e0577bf33f45..0a8af4daef89 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -31,8 +31,8 @@ enum nvme_fc_queue_flags { - NVME_FC_Q_CONNECTED = (1 << 0), - NVME_FC_Q_LIVE = (1 << 1), + NVME_FC_Q_CONNECTED = 0, + NVME_FC_Q_LIVE, }; #define NVMEFC_QUEUE_DELAY 3 /* ms units */ From bb22cafd75686d799dabfe422571fac4b5c2ed94 Mon Sep 17 00:00:00 2001 From: Tang Junhui Date: Fri, 24 Nov 2017 15:14:24 -0800 Subject: [PATCH 090/333] bcache: add a comment in journal bucket reading Journal bucket is a circular buffer, the bucket can be like YYYNNNYY, which means the first valid journal in the 7th bucket, and the latest valid journal in third bucket, in this case, if we do not try we the zero index first, We may get a valid journal in the 7th bucket, then we call find_next_bit(bitmap,ca->sb.njournal_buckets, l + 1) to get the first invalid bucket after the 7th bucket, because all these buckets is valid, so no bit 1 in bitmap, thus find_next_bit() function would return with ca->sb.njournal_buckets (8). So, after that, bcache only read journal in 7th and 8the bucket, the first to the third buckets are lost. So, it is important to let developer know that, we need to try the zero index at first in the hash-search, and avoid any breaks in future's code modification. [ML: Fixed whitespace & formatting & file permissions] Signed-off-by: Tang Junhui Signed-off-by: Michael Lyle Reviewed-by: Michael Lyle Signed-off-by: Jens Axboe --- drivers/md/bcache/journal.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 02a98ddb592d..5018c56ebb67 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -170,6 +170,11 @@ int bch_journal_read(struct cache_set *c, struct list_head *list) * find a sequence of buckets with valid journal entries */ for (i = 0; i < ca->sb.njournal_buckets; i++) { + /* + * We must try the index l with ZERO first for + * correctness due to the scenario that the journal + * bucket is circular buffer which might have wrapped + */ l = (i * 2654435769U) % ca->sb.njournal_buckets; if (test_bit(l, bitmap)) From cf33c1ee5254c6a430bc1538232b49c3ea13e613 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Fri, 24 Nov 2017 15:14:25 -0800 Subject: [PATCH 091/333] bcache: Fix building error on MIPS This patch try to fix the building error on MIPS. The reason is MIPS has already defined the PTR macro, which conflicts with the PTR macro in include/uapi/linux/bcache.h. [fixed by mlyle: corrected a line-length issue] Cc: stable@vger.kernel.org Signed-off-by: Huacai Chen Reviewed-by: Michael Lyle Signed-off-by: Michael Lyle Signed-off-by: Jens Axboe --- drivers/md/bcache/alloc.c | 2 +- drivers/md/bcache/extents.c | 2 +- drivers/md/bcache/journal.c | 2 +- include/uapi/linux/bcache.h | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index a27d85232ce1..a0cc1bc6d884 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c @@ -490,7 +490,7 @@ int __bch_bucket_alloc_set(struct cache_set *c, unsigned reserve, if (b == -1) goto err; - k->ptr[i] = PTR(ca->buckets[b].gen, + k->ptr[i] = MAKE_PTR(ca->buckets[b].gen, bucket_to_sector(c, b), ca->sb.nr_this_dev); diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c index 41c238fc3733..f9d391711595 100644 --- a/drivers/md/bcache/extents.c +++ b/drivers/md/bcache/extents.c @@ -585,7 +585,7 @@ static bool bch_extent_merge(struct btree_keys *bk, struct bkey *l, struct bkey return false; for (i = 0; i < KEY_PTRS(l); i++) - if (l->ptr[i] + PTR(0, KEY_SIZE(l), 0) != r->ptr[i] || + if (l->ptr[i] + MAKE_PTR(0, KEY_SIZE(l), 0) != r->ptr[i] || PTR_BUCKET_NR(b->c, l, i) != PTR_BUCKET_NR(b->c, r, i)) return false; diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 5018c56ebb67..a87165c1d8e5 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -512,7 +512,7 @@ static void journal_reclaim(struct cache_set *c) continue; ja->cur_idx = next; - k->ptr[n++] = PTR(0, + k->ptr[n++] = MAKE_PTR(0, bucket_to_sector(c, ca->sb.d[ja->cur_idx]), ca->sb.nr_this_dev); } diff --git a/include/uapi/linux/bcache.h b/include/uapi/linux/bcache.h index 90fc490f973f..821f71a2e48f 100644 --- a/include/uapi/linux/bcache.h +++ b/include/uapi/linux/bcache.h @@ -91,7 +91,7 @@ PTR_FIELD(PTR_GEN, 0, 8) #define PTR_CHECK_DEV ((1 << PTR_DEV_BITS) - 1) -#define PTR(gen, offset, dev) \ +#define MAKE_PTR(gen, offset, dev) \ ((((__u64) dev) << 51) | ((__u64) offset) << 8 | gen) /* Bkey utility code */ From e393aa2446150536929140739f09c6ecbcbea7f0 Mon Sep 17 00:00:00 2001 From: Rui Hua Date: Fri, 24 Nov 2017 15:14:26 -0800 Subject: [PATCH 092/333] bcache: recover data from backing when data is clean When we send a read request and hit the clean data in cache device, there is a situation called cache read race in bcache(see the commit in the tail of cache_look_up(), the following explaination just copy from there): The bucket we're reading from might be reused while our bio is in flight, and we could then end up reading the wrong data. We guard against this by checking (in bch_cache_read_endio()) if the pointer is stale again; if so, we treat it as an error (s->iop.error = -EINTR) and reread from the backing device (but we don't pass that error up anywhere) It should be noted that cache read race happened under normal circumstances, not the circumstance when SSD failed, it was counted and shown in /sys/fs/bcache/XXX/internal/cache_read_races. Without this patch, when we use writeback mode, we will never reread from the backing device when cache read race happened, until the whole cache device is clean, because the condition (s->recoverable && (dc && !atomic_read(&dc->has_dirty))) is false in cached_dev_read_error(). In this situation, the s->iop.error(= -EINTR) will be passed up, at last, user will receive -EINTR when it's bio end, this is not suitable, and wield to up-application. In this patch, we use s->read_dirty_data to judge whether the read request hit dirty data in cache device, it is safe to reread data from the backing device when the read request hit clean data. This can not only handle cache read race, but also recover data when failed read request from cache device. [edited by mlyle to fix up whitespace, commit log title, comment spelling] Fixes: d59b23795933 ("bcache: only permit to recovery read error when cache device is clean") Cc: # 4.14 Signed-off-by: Hua Rui Reviewed-by: Michael Lyle Reviewed-by: Coly Li Signed-off-by: Michael Lyle Signed-off-by: Jens Axboe --- drivers/md/bcache/request.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 3a7aed7282b2..643c3021624f 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -708,16 +708,15 @@ static void cached_dev_read_error(struct closure *cl) { struct search *s = container_of(cl, struct search, cl); struct bio *bio = &s->bio.bio; - struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); /* - * If cache device is dirty (dc->has_dirty is non-zero), then - * recovery a failed read request from cached device may get a - * stale data back. So read failure recovery is only permitted - * when cache device is clean. + * If read request hit dirty data (s->read_dirty_data is true), + * then recovery a failed read request from cached device may + * get a stale data back. So read failure recovery is only + * permitted when read request hit clean data in cache device, + * or when cache read race happened. */ - if (s->recoverable && - (dc && !atomic_read(&dc->has_dirty))) { + if (s->recoverable && !s->read_dirty_data) { /* Retry from the backing device: */ trace_bcache_read_retry(s->orig_bio); From 6c4ca1e36cdc1a0a7a84797804b87920ccbebf51 Mon Sep 17 00:00:00 2001 From: Michael Lyle Date: Fri, 24 Nov 2017 15:14:27 -0800 Subject: [PATCH 093/333] bcache: check return value of register_shrinker register_shrinker is now __must_check, so check it to kill a warning. Caller of bch_btree_cache_alloc in super.c appropriately checks return value so this is fully plumbed through. This V2 fixes checkpatch warnings and improves the commit description, as I was too hasty getting the previous version out. Signed-off-by: Michael Lyle Reviewed-by: Vojtech Pavlik Signed-off-by: Jens Axboe --- drivers/md/bcache/btree.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 11c5503d31dc..81e8dc3dbe5e 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -807,7 +807,10 @@ int bch_btree_cache_alloc(struct cache_set *c) c->shrink.scan_objects = bch_mca_scan; c->shrink.seeks = 4; c->shrink.batch = c->btree_pages * 2; - register_shrinker(&c->shrink); + + if (register_shrinker(&c->shrink)) + pr_warn("bcache: %s: could not register shrinker", + __func__); return 0; } From cf21654b40968609779751b34e7923180968fe5b Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Wed, 1 Nov 2017 19:21:55 -0400 Subject: [PATCH 094/333] drm/amdgpu: Fix SDMA load/unload sequence on HWS disabled mode Fix the SDMA load and unload sequence as suggested by HW document. Signed-off-by: shaoyun liu Signed-off-by: Felix Kuehling Acked-by: Oded Gabbay Signed-off-by: Oded Gabbay --- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 57 +++++++++++++------ 1 file changed, 39 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index 47d1c132ac40..1e3e9be7d77e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -379,29 +379,50 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd) { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct cik_sdma_rlc_registers *m; + unsigned long end_jiffies; uint32_t sdma_base_addr; + uint32_t data; m = get_sdma_mqd(mqd); sdma_base_addr = get_sdma_base_addr(m); - WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR, - m->sdma_rlc_virtual_addr); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, + m->sdma_rlc_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, - m->sdma_rlc_rb_base); - - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, - m->sdma_rlc_rb_base_hi); - - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, - m->sdma_rlc_rb_rptr_addr_lo); - - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, - m->sdma_rlc_rb_rptr_addr_hi); + end_jiffies = msecs_to_jiffies(2000) + jiffies; + while (true) { + data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); + if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) + break; + if (time_after(jiffies, end_jiffies)) + return -ETIME; + usleep_range(500, 1000); + } + if (m->sdma_engine_id) { + data = RREG32(mmSDMA1_GFX_CONTEXT_CNTL); + data = REG_SET_FIELD(data, SDMA1_GFX_CONTEXT_CNTL, + RESUME_CTX, 0); + WREG32(mmSDMA1_GFX_CONTEXT_CNTL, data); + } else { + data = RREG32(mmSDMA0_GFX_CONTEXT_CNTL); + data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL, + RESUME_CTX, 0); + WREG32(mmSDMA0_GFX_CONTEXT_CNTL, data); + } WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, - m->sdma_rlc_doorbell); - + m->sdma_rlc_doorbell); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, 0); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, 0); + WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR, + m->sdma_rlc_virtual_addr); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdma_rlc_rb_base); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, + m->sdma_rlc_rb_base_hi); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, + m->sdma_rlc_rb_rptr_addr_lo); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, + m->sdma_rlc_rb_rptr_addr_hi); WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, m->sdma_rlc_rb_cntl); @@ -574,9 +595,9 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, } WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, 0); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, 0); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, 0); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, + RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) | + SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK); return 0; } From d12fb13f23199faa7e536acec1db49068e5a067d Mon Sep 17 00:00:00 2001 From: shaoyunl Date: Wed, 1 Nov 2017 19:21:56 -0400 Subject: [PATCH 095/333] drm/amdkfd: Fix SDMA ring buffer size calculation ffs function return the position of the first bit set on 1 based. (bit zero returns 1). Signed-off-by: shaoyun liu Signed-off-by: Felix Kuehling Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c index 4859d263fa2a..4728fad3fd74 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c @@ -202,8 +202,8 @@ static int update_mqd_sdma(struct mqd_manager *mm, void *mqd, struct cik_sdma_rlc_registers *m; m = get_sdma_mqd(mqd); - m->sdma_rlc_rb_cntl = ffs(q->queue_size / sizeof(unsigned int)) << - SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT | + m->sdma_rlc_rb_cntl = (ffs(q->queue_size / sizeof(unsigned int)) - 1) + << SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT | q->vmid << SDMA0_RLC0_RB_CNTL__RB_VMID__SHIFT | 1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT | 6 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT; From 8c946b8988acec785bcf67088b6bd0747f36d2d3 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Wed, 1 Nov 2017 19:21:57 -0400 Subject: [PATCH 096/333] drm/amdkfd: Fix SDMA oversubsription handling SDMA only supports a fixed number of queues. HWS cannot handle oversubscription. Signed-off-by: shaoyun liu Signed-off-by: Felix Kuehling Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- .../drm/amd/amdkfd/kfd_process_queue_manager.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 2bec902fc939..a3f1e62c60ba 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -191,6 +191,24 @@ int pqm_create_queue(struct process_queue_manager *pqm, switch (type) { case KFD_QUEUE_TYPE_SDMA: + if (dev->dqm->queue_count >= + CIK_SDMA_QUEUES_PER_ENGINE * CIK_SDMA_ENGINE_NUM) { + pr_err("Over-subscription is not allowed for SDMA.\n"); + retval = -EPERM; + goto err_create_queue; + } + + retval = create_cp_queue(pqm, dev, &q, properties, f, *qid); + if (retval != 0) + goto err_create_queue; + pqn->q = q; + pqn->kq = NULL; + retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd, + &q->properties.vmid); + pr_debug("DQM returned %d for create_queue\n", retval); + print_queue(q); + break; + case KFD_QUEUE_TYPE_COMPUTE: /* check if there is over subscription */ if ((sched_policy == KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) && From c393e9b2d51540b74e18e555df14706098dbf2cc Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 13 Nov 2017 18:08:48 +0200 Subject: [PATCH 097/333] drm/amdkfd: fix amdkfd use-after-free GP fault Fix GP fault caused by dev_info() reference to a struct device* after the device has been freed (use after free). kfd_chardev_exit() frees the device so 'kfd_device' should not be used after calling kfd_chardev_exit(). Signed-off-by: Randy Dunlap Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_module.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c index 6c5a9cab55de..f744caeaee04 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c @@ -24,6 +24,7 @@ #include #include #include +#include #include "kfd_priv.h" #define KFD_DRIVER_AUTHOR "AMD Inc. and others" @@ -132,7 +133,7 @@ static void __exit kfd_module_exit(void) kfd_process_destroy_wq(); kfd_topology_shutdown(); kfd_chardev_exit(); - dev_info(kfd_device, "Removed module\n"); + pr_info("amdkfd: Removed module\n"); } module_init(kfd_module_init); From b4d085201d86af69cbda2214c6dafc0be240ef9f Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Mon, 13 Nov 2017 03:35:27 +0300 Subject: [PATCH 098/333] uapi: fix linux/kfd_ioctl.h userspace compilation errors Consistently use types provided by via to fix the following linux/kfd_ioctl.h userspace compilation errors: /usr/include/linux/kfd_ioctl.h:236:2: error: unknown type name 'uint64_t' uint64_t va_addr; /* to KFD */ /usr/include/linux/kfd_ioctl.h:237:2: error: unknown type name 'uint32_t' uint32_t gpu_id; /* to KFD */ /usr/include/linux/kfd_ioctl.h:238:2: error: unknown type name 'uint32_t' uint32_t pad; /usr/include/linux/kfd_ioctl.h:243:2: error: unknown type name 'uint64_t' uint64_t tile_config_ptr; /usr/include/linux/kfd_ioctl.h:245:2: error: unknown type name 'uint64_t' uint64_t macro_tile_config_ptr; /usr/include/linux/kfd_ioctl.h:249:2: error: unknown type name 'uint32_t' uint32_t num_tile_configs; /usr/include/linux/kfd_ioctl.h:253:2: error: unknown type name 'uint32_t' uint32_t num_macro_tile_configs; /usr/include/linux/kfd_ioctl.h:255:2: error: unknown type name 'uint32_t' uint32_t gpu_id; /* to KFD */ /usr/include/linux/kfd_ioctl.h:256:2: error: unknown type name 'uint32_t' uint32_t gb_addr_config; /* from KFD */ /usr/include/linux/kfd_ioctl.h:257:2: error: unknown type name 'uint32_t' uint32_t num_banks; /* from KFD */ /usr/include/linux/kfd_ioctl.h:258:2: error: unknown type name 'uint32_t' uint32_t num_ranks; /* from KFD */ Fixes: 6a1c9510694fe ("drm/amdkfd: Adding new IOCTL for scratch memory v2") Fixes: 5d71dbc3a5886 ("drm/amdkfd: Implement image tiling mode support v2") Signed-off-by: Dmitry V. Levin Signed-off-by: Oded Gabbay --- include/uapi/linux/kfd_ioctl.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 731d0df722e3..6e80501368ae 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -233,29 +233,29 @@ struct kfd_ioctl_wait_events_args { }; struct kfd_ioctl_set_scratch_backing_va_args { - uint64_t va_addr; /* to KFD */ - uint32_t gpu_id; /* to KFD */ - uint32_t pad; + __u64 va_addr; /* to KFD */ + __u32 gpu_id; /* to KFD */ + __u32 pad; }; struct kfd_ioctl_get_tile_config_args { /* to KFD: pointer to tile array */ - uint64_t tile_config_ptr; + __u64 tile_config_ptr; /* to KFD: pointer to macro tile array */ - uint64_t macro_tile_config_ptr; + __u64 macro_tile_config_ptr; /* to KFD: array size allocated by user mode * from KFD: array size filled by kernel */ - uint32_t num_tile_configs; + __u32 num_tile_configs; /* to KFD: array size allocated by user mode * from KFD: array size filled by kernel */ - uint32_t num_macro_tile_configs; + __u32 num_macro_tile_configs; - uint32_t gpu_id; /* to KFD */ - uint32_t gb_addr_config; /* from KFD */ - uint32_t num_banks; /* from KFD */ - uint32_t num_ranks; /* from KFD */ + __u32 gpu_id; /* to KFD */ + __u32 gb_addr_config; /* from KFD */ + __u32 num_banks; /* from KFD */ + __u32 num_ranks; /* from KFD */ /* struct size can be extended later if needed * without breaking ABI compatibility */ From b4b591c87f2b0f4ebaf3a68d4f13873b241aa584 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 23 Nov 2017 17:35:21 +0200 Subject: [PATCH 099/333] nvme-rdma: don't suppress send completions The entire completions suppress mechanism is currently broken because the HCA might retry a send operation (due to dropped ack) after the nvme transaction has completed. In order to handle this, we signal all send completions and introduce a separate done handler for async events as they will be handled differently (as they don't include in-capsule data by definition). Signed-off-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 54 +++++++++++----------------------------- 1 file changed, 14 insertions(+), 40 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 2c597105a6bf..61511bed8aca 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -77,7 +77,6 @@ enum nvme_rdma_queue_flags { struct nvme_rdma_queue { struct nvme_rdma_qe *rsp_ring; - atomic_t sig_count; int queue_size; size_t cmnd_capsule_len; struct nvme_rdma_ctrl *ctrl; @@ -510,7 +509,6 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl, queue->cmnd_capsule_len = sizeof(struct nvme_command); queue->queue_size = queue_size; - atomic_set(&queue->sig_count, 0); queue->cm_id = rdma_create_id(&init_net, nvme_rdma_cm_handler, queue, RDMA_PS_TCP, IB_QPT_RC); @@ -1204,21 +1202,9 @@ static void nvme_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc) nvme_rdma_wr_error(cq, wc, "SEND"); } -/* - * We want to signal completion at least every queue depth/2. This returns the - * largest power of two that is not above half of (queue size + 1) to optimize - * (avoid divisions). - */ -static inline bool nvme_rdma_queue_sig_limit(struct nvme_rdma_queue *queue) -{ - int limit = 1 << ilog2((queue->queue_size + 1) / 2); - - return (atomic_inc_return(&queue->sig_count) & (limit - 1)) == 0; -} - static int nvme_rdma_post_send(struct nvme_rdma_queue *queue, struct nvme_rdma_qe *qe, struct ib_sge *sge, u32 num_sge, - struct ib_send_wr *first, bool flush) + struct ib_send_wr *first) { struct ib_send_wr wr, *bad_wr; int ret; @@ -1227,31 +1213,12 @@ static int nvme_rdma_post_send(struct nvme_rdma_queue *queue, sge->length = sizeof(struct nvme_command), sge->lkey = queue->device->pd->local_dma_lkey; - qe->cqe.done = nvme_rdma_send_done; - wr.next = NULL; wr.wr_cqe = &qe->cqe; wr.sg_list = sge; wr.num_sge = num_sge; wr.opcode = IB_WR_SEND; - wr.send_flags = 0; - - /* - * Unsignalled send completions are another giant desaster in the - * IB Verbs spec: If we don't regularly post signalled sends - * the send queue will fill up and only a QP reset will rescue us. - * Would have been way to obvious to handle this in hardware or - * at least the RDMA stack.. - * - * Always signal the flushes. The magic request used for the flush - * sequencer is not allocated in our driver's tagset and it's - * triggered to be freed by blk_cleanup_queue(). So we need to - * always mark it as signaled to ensure that the "wr_cqe", which is - * embedded in request's payload, is not freed when __ib_process_cq() - * calls wr_cqe->done(). - */ - if (nvme_rdma_queue_sig_limit(queue) || flush) - wr.send_flags |= IB_SEND_SIGNALED; + wr.send_flags = IB_SEND_SIGNALED; if (first) first->next = ≀ @@ -1301,6 +1268,12 @@ static struct blk_mq_tags *nvme_rdma_tagset(struct nvme_rdma_queue *queue) return queue->ctrl->tag_set.tags[queue_idx - 1]; } +static void nvme_rdma_async_done(struct ib_cq *cq, struct ib_wc *wc) +{ + if (unlikely(wc->status != IB_WC_SUCCESS)) + nvme_rdma_wr_error(cq, wc, "ASYNC"); +} + static void nvme_rdma_submit_async_event(struct nvme_ctrl *arg) { struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(arg); @@ -1319,10 +1292,12 @@ static void nvme_rdma_submit_async_event(struct nvme_ctrl *arg) cmd->common.flags |= NVME_CMD_SGL_METABUF; nvme_rdma_set_sg_null(cmd); + sqe->cqe.done = nvme_rdma_async_done; + ib_dma_sync_single_for_device(dev, sqe->dma, sizeof(*cmd), DMA_TO_DEVICE); - ret = nvme_rdma_post_send(queue, sqe, &sge, 1, NULL, false); + ret = nvme_rdma_post_send(queue, sqe, &sge, 1, NULL); WARN_ON_ONCE(ret); } @@ -1607,7 +1582,6 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); struct nvme_rdma_qe *sqe = &req->sqe; struct nvme_command *c = sqe->data; - bool flush = false; struct ib_device *dev; blk_status_t ret; int err; @@ -1636,13 +1610,13 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, goto err; } + sqe->cqe.done = nvme_rdma_send_done; + ib_dma_sync_single_for_device(dev, sqe->dma, sizeof(struct nvme_command), DMA_TO_DEVICE); - if (req_op(rq) == REQ_OP_FLUSH) - flush = true; err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge, - req->mr->need_inval ? &req->reg_wr.wr : NULL, flush); + req->mr->need_inval ? &req->reg_wr.wr : NULL); if (unlikely(err)) { nvme_rdma_unmap_data(queue, rq); goto err; From 4af7f7ff92a42b6c713293c99e7982bcfcf51a70 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 23 Nov 2017 17:35:22 +0200 Subject: [PATCH 100/333] nvme-rdma: don't complete requests before a send work request has completed In order to guarantee that the HCA will never get an access violation (either from invalidated rkey or from iommu) when retrying a send operation we must complete a request only when both send completion and the nvme cqe has arrived. We need to set the send/recv completions flags atomically because we might have more than a single context accessing the request concurrently (one is cq irq-poll context and the other is user-polling used in IOCB_HIPRI). Only then we are safe to invalidate the rkey (if needed), unmap the host buffers, and complete the IO. Signed-off-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 61511bed8aca..502f7c515a99 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -59,6 +59,9 @@ struct nvme_rdma_request { struct nvme_request req; struct ib_mr *mr; struct nvme_rdma_qe sqe; + union nvme_result result; + __le16 status; + refcount_t ref; struct ib_sge sge[1 + NVME_RDMA_MAX_INLINE_SEGMENTS]; u32 num_sge; int nents; @@ -1162,6 +1165,7 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, req->num_sge = 1; req->inline_data = false; req->mr->need_inval = false; + refcount_set(&req->ref, 2); /* send and recv completions */ c->common.flags |= NVME_CMD_SGL_METABUF; @@ -1198,8 +1202,19 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, static void nvme_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc) { - if (unlikely(wc->status != IB_WC_SUCCESS)) + struct nvme_rdma_qe *qe = + container_of(wc->wr_cqe, struct nvme_rdma_qe, cqe); + struct nvme_rdma_request *req = + container_of(qe, struct nvme_rdma_request, sqe); + struct request *rq = blk_mq_rq_from_pdu(req); + + if (unlikely(wc->status != IB_WC_SUCCESS)) { nvme_rdma_wr_error(cq, wc, "SEND"); + return; + } + + if (refcount_dec_and_test(&req->ref)) + nvme_end_request(rq, req->status, req->result); } static int nvme_rdma_post_send(struct nvme_rdma_queue *queue, @@ -1318,14 +1333,19 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue, } req = blk_mq_rq_to_pdu(rq); - if (rq->tag == tag) - ret = 1; + req->status = cqe->status; + req->result = cqe->result; if ((wc->wc_flags & IB_WC_WITH_INVALIDATE) && wc->ex.invalidate_rkey == req->mr->rkey) req->mr->need_inval = false; - nvme_end_request(rq, cqe->status, cqe->result); + if (refcount_dec_and_test(&req->ref)) { + if (rq->tag == tag) + ret = 1; + nvme_end_request(rq, req->status, req->result); + } + return ret; } From 2f122e4f5107cf8ab0e592d63ed816a00110b4fe Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 23 Nov 2017 17:35:23 +0200 Subject: [PATCH 101/333] nvme-rdma: wait for local invalidation before completing a request We must not complete a request before the host memory region is invalidated. Luckily we have send with invalidate protocol support so we usually don't need to execute it, but in case the target did not invalidate a memory region for us, we must wait for the invalidation to complete before unmapping host memory and completing the I/O. Signed-off-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 39 ++++++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 502f7c515a99..c03460d0ca94 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1019,8 +1019,18 @@ static void nvme_rdma_memreg_done(struct ib_cq *cq, struct ib_wc *wc) static void nvme_rdma_inv_rkey_done(struct ib_cq *cq, struct ib_wc *wc) { - if (unlikely(wc->status != IB_WC_SUCCESS)) + struct nvme_rdma_request *req = + container_of(wc->wr_cqe, struct nvme_rdma_request, reg_cqe); + struct request *rq = blk_mq_rq_from_pdu(req); + + if (unlikely(wc->status != IB_WC_SUCCESS)) { nvme_rdma_wr_error(cq, wc, "LOCAL_INV"); + return; + } + + if (refcount_dec_and_test(&req->ref)) + nvme_end_request(rq, req->status, req->result); + } static int nvme_rdma_inv_rkey(struct nvme_rdma_queue *queue, @@ -1031,7 +1041,7 @@ static int nvme_rdma_inv_rkey(struct nvme_rdma_queue *queue, .opcode = IB_WR_LOCAL_INV, .next = NULL, .num_sge = 0, - .send_flags = 0, + .send_flags = IB_SEND_SIGNALED, .ex.invalidate_rkey = req->mr->rkey, }; @@ -1045,24 +1055,12 @@ static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue, struct request *rq) { struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); - struct nvme_rdma_ctrl *ctrl = queue->ctrl; struct nvme_rdma_device *dev = queue->device; struct ib_device *ibdev = dev->dev; - int res; if (!blk_rq_bytes(rq)) return; - if (req->mr->need_inval && test_bit(NVME_RDMA_Q_LIVE, &req->queue->flags)) { - res = nvme_rdma_inv_rkey(queue, req); - if (unlikely(res < 0)) { - dev_err(ctrl->ctrl.device, - "Queueing INV WR for rkey %#x failed (%d)\n", - req->mr->rkey, res); - nvme_rdma_error_recovery(queue->ctrl); - } - } - ib_dma_unmap_sg(ibdev, req->sg_table.sgl, req->nents, rq_data_dir(rq) == WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE); @@ -1337,8 +1335,19 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue, req->result = cqe->result; if ((wc->wc_flags & IB_WC_WITH_INVALIDATE) && - wc->ex.invalidate_rkey == req->mr->rkey) + wc->ex.invalidate_rkey == req->mr->rkey) { req->mr->need_inval = false; + } else if (req->mr->need_inval) { + ret = nvme_rdma_inv_rkey(queue, req); + if (unlikely(ret < 0)) { + dev_err(queue->ctrl->ctrl.device, + "Queueing INV WR for rkey %#x failed (%d)\n", + req->mr->rkey, ret); + nvme_rdma_error_recovery(queue->ctrl); + } + /* the local invalidation completion will end the request */ + return 0; + } if (refcount_dec_and_test(&req->ref)) { if (rq->tag == tag) From 3ef0279bb0031f67537bd8972899a6a23d3064d7 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 23 Nov 2017 17:35:24 +0200 Subject: [PATCH 102/333] nvme-rdma: Check remotely invalidated rkey matches our expected rkey If we got a remote invalidation on a bogus rkey, this is a protocol error. Fail the connection in this case. Signed-off-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index c03460d0ca94..3a952d458e7c 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1334,8 +1334,13 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue, req->status = cqe->status; req->result = cqe->result; - if ((wc->wc_flags & IB_WC_WITH_INVALIDATE) && - wc->ex.invalidate_rkey == req->mr->rkey) { + if (wc->wc_flags & IB_WC_WITH_INVALIDATE) { + if (unlikely(wc->ex.invalidate_rkey != req->mr->rkey)) { + dev_err(queue->ctrl->ctrl.device, + "Bogus remote invalidation for rkey %#x\n", + req->mr->rkey); + nvme_rdma_error_recovery(queue->ctrl); + } req->mr->need_inval = false; } else if (req->mr->need_inval) { ret = nvme_rdma_inv_rkey(queue, req); From f41725bbe16b0773302c0cc7dc2e89f54828712d Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Sun, 26 Nov 2017 10:40:55 +0000 Subject: [PATCH 103/333] nvme-rdma: Use mr pool Currently, blk_mq_tagset_iter() iterate over initial hctx tags only. If an I/O scheduler is used, it doesn't iterate the hctx scheduler tags and the static request aren't been updated. For example, while using NVMe over Fabrics RDMA host, this cause us not to reinit the scheduler requests and thus not re-register all the memory regions during the tagset re-initialization in the reconnect flow. This may lead to a memory registration error: "MEMREG for CQE 0xffff88044c14dce8 failed with status memory management operation error (6)" With this commit we don't need to reinit the requests, and thus fix this failure. Signed-off-by: Israel Rukshin Reviewed-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 95 ++++++++++++++++------------------------ 1 file changed, 37 insertions(+), 58 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 3a952d458e7c..02ef0771f6b9 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -260,32 +261,6 @@ static int nvme_rdma_create_qp(struct nvme_rdma_queue *queue, const int factor) return ret; } -static int nvme_rdma_reinit_request(void *data, struct request *rq) -{ - struct nvme_rdma_ctrl *ctrl = data; - struct nvme_rdma_device *dev = ctrl->device; - struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); - int ret = 0; - - if (WARN_ON_ONCE(!req->mr)) - return 0; - - ib_dereg_mr(req->mr); - - req->mr = ib_alloc_mr(dev->pd, IB_MR_TYPE_MEM_REG, - ctrl->max_fr_pages); - if (IS_ERR(req->mr)) { - ret = PTR_ERR(req->mr); - req->mr = NULL; - goto out; - } - - req->mr->need_inval = false; - -out: - return ret; -} - static void nvme_rdma_exit_request(struct blk_mq_tag_set *set, struct request *rq, unsigned int hctx_idx) { @@ -295,9 +270,6 @@ static void nvme_rdma_exit_request(struct blk_mq_tag_set *set, struct nvme_rdma_queue *queue = &ctrl->queues[queue_idx]; struct nvme_rdma_device *dev = queue->device; - if (req->mr) - ib_dereg_mr(req->mr); - nvme_rdma_free_qe(dev->dev, &req->sqe, sizeof(struct nvme_command), DMA_TO_DEVICE); } @@ -319,21 +291,9 @@ static int nvme_rdma_init_request(struct blk_mq_tag_set *set, if (ret) return ret; - req->mr = ib_alloc_mr(dev->pd, IB_MR_TYPE_MEM_REG, - ctrl->max_fr_pages); - if (IS_ERR(req->mr)) { - ret = PTR_ERR(req->mr); - goto out_free_qe; - } - req->queue = queue; return 0; - -out_free_qe: - nvme_rdma_free_qe(dev->dev, &req->sqe, sizeof(struct nvme_command), - DMA_TO_DEVICE); - return -ENOMEM; } static int nvme_rdma_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, @@ -433,6 +393,8 @@ static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue) struct nvme_rdma_device *dev = queue->device; struct ib_device *ibdev = dev->dev; + ib_mr_pool_destroy(queue->qp, &queue->qp->rdma_mrs); + rdma_destroy_qp(queue->cm_id); ib_free_cq(queue->ib_cq); @@ -442,6 +404,12 @@ static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue) nvme_rdma_dev_put(dev); } +static int nvme_rdma_get_max_fr_pages(struct ib_device *ibdev) +{ + return min_t(u32, NVME_RDMA_MAX_SEGMENTS, + ibdev->attrs.max_fast_reg_page_list_len); +} + static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue) { struct ib_device *ibdev; @@ -484,8 +452,22 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue) goto out_destroy_qp; } + ret = ib_mr_pool_init(queue->qp, &queue->qp->rdma_mrs, + queue->queue_size, + IB_MR_TYPE_MEM_REG, + nvme_rdma_get_max_fr_pages(ibdev)); + if (ret) { + dev_err(queue->ctrl->ctrl.device, + "failed to initialize MR pool sized %d for QID %d\n", + queue->queue_size, idx); + goto out_destroy_ring; + } + return 0; +out_destroy_ring: + nvme_rdma_free_ring(ibdev, queue->rsp_ring, queue->queue_size, + sizeof(struct nvme_completion), DMA_FROM_DEVICE); out_destroy_qp: rdma_destroy_qp(queue->cm_id); out_destroy_ib_cq: @@ -757,8 +739,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, ctrl->device = ctrl->queues[0].device; - ctrl->max_fr_pages = min_t(u32, NVME_RDMA_MAX_SEGMENTS, - ctrl->device->dev->attrs.max_fast_reg_page_list_len); + ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev); if (new) { ctrl->ctrl.admin_tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, true); @@ -772,10 +753,6 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, error = PTR_ERR(ctrl->ctrl.admin_q); goto out_free_tagset; } - } else { - error = nvme_reinit_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset); - if (error) - goto out_free_queue; } error = nvme_rdma_start_queue(ctrl, 0); @@ -855,10 +832,6 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new) goto out_free_tag_set; } } else { - ret = nvme_reinit_tagset(&ctrl->ctrl, ctrl->ctrl.tagset); - if (ret) - goto out_free_io_queues; - blk_mq_update_nr_hw_queues(&ctrl->tag_set, ctrl->ctrl.queue_count - 1); } @@ -1061,6 +1034,11 @@ static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue, if (!blk_rq_bytes(rq)) return; + if (req->mr) { + ib_mr_pool_put(queue->qp, &queue->qp->rdma_mrs, req->mr); + req->mr = NULL; + } + ib_dma_unmap_sg(ibdev, req->sg_table.sgl, req->nents, rq_data_dir(rq) == WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE); @@ -1117,12 +1095,18 @@ static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue, struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl; int nr; + req->mr = ib_mr_pool_get(queue->qp, &queue->qp->rdma_mrs); + if (WARN_ON_ONCE(!req->mr)) + return -EAGAIN; + /* * Align the MR to a 4K page size to match the ctrl page size and * the block virtual boundary. */ nr = ib_map_mr_sg(req->mr, req->sg_table.sgl, count, NULL, SZ_4K); if (unlikely(nr < count)) { + ib_mr_pool_put(queue->qp, &queue->qp->rdma_mrs, req->mr); + req->mr = NULL; if (nr < 0) return nr; return -EINVAL; @@ -1141,8 +1125,6 @@ static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue, IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_WRITE; - req->mr->need_inval = true; - sg->addr = cpu_to_le64(req->mr->iova); put_unaligned_le24(req->mr->length, sg->length); put_unaligned_le32(req->mr->rkey, sg->key); @@ -1162,7 +1144,6 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, req->num_sge = 1; req->inline_data = false; - req->mr->need_inval = false; refcount_set(&req->ref, 2); /* send and recv completions */ c->common.flags |= NVME_CMD_SGL_METABUF; @@ -1341,8 +1322,7 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue, req->mr->rkey); nvme_rdma_error_recovery(queue->ctrl); } - req->mr->need_inval = false; - } else if (req->mr->need_inval) { + } else if (req->mr) { ret = nvme_rdma_inv_rkey(queue, req); if (unlikely(ret < 0)) { dev_err(queue->ctrl->ctrl.device, @@ -1650,7 +1630,7 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, sizeof(struct nvme_command), DMA_TO_DEVICE); err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge, - req->mr->need_inval ? &req->reg_wr.wr : NULL); + req->mr ? &req->reg_wr.wr : NULL); if (unlikely(err)) { nvme_rdma_unmap_data(queue, rq); goto err; @@ -1798,7 +1778,6 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = { .submit_async_event = nvme_rdma_submit_async_event, .delete_ctrl = nvme_rdma_delete_ctrl, .get_address = nvmf_get_address, - .reinit_request = nvme_rdma_reinit_request, }; static inline bool From ed81cc612c92167fe7606773e036ee5ccef6c190 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Tue, 21 Nov 2017 17:09:59 +0100 Subject: [PATCH 104/333] hwmon: Drop reference to Jean's tree This tree has not been used for over a year, Guenter is taking all the hwmon patches in practice. Signed-off-by: Jean Delvare Cc: Guenter Roeck Signed-off-by: Guenter Roeck --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index aa71ab52fd76..533fbb03a95c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6174,7 +6174,6 @@ M: Jean Delvare M: Guenter Roeck L: linux-hwmon@vger.kernel.org W: http://hwmon.wiki.kernel.org/ -T: quilt http://jdelvare.nerim.net/devel/linux/jdelvare-hwmon/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/groeck/linux-staging.git S: Maintained F: Documentation/hwmon/ From 517f56839f581618d24f2e67a35738a5c6cbaecb Mon Sep 17 00:00:00 2001 From: Mirza Krak Date: Wed, 15 Nov 2017 08:24:46 +0000 Subject: [PATCH 105/333] drm/rockchip: dw-mipi-dsi: fix possible un-balanced runtime PM enable In the case where the bind gets deferred we would end up with a un-balanced runtime PM enable call. Fix this by simply moving the pm_runtime_enable call to the end of the bind function when all paths have succeeded. Signed-off-by: Mirza Krak Signed-off-by: Sandy Huang Link: https://patchwork.freedesktop.org/patch/msgid/1510734286-37434-1-git-send-email-mirza.krak@endian.se --- drivers/gpu/drm/rockchip/dw-mipi-dsi.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/rockchip/dw-mipi-dsi.c b/drivers/gpu/drm/rockchip/dw-mipi-dsi.c index 9a20b9dc27c8..f7fc652b0027 100644 --- a/drivers/gpu/drm/rockchip/dw-mipi-dsi.c +++ b/drivers/gpu/drm/rockchip/dw-mipi-dsi.c @@ -1275,8 +1275,6 @@ static int dw_mipi_dsi_bind(struct device *dev, struct device *master, goto err_pllref; } - pm_runtime_enable(dev); - dsi->dsi_host.ops = &dw_mipi_dsi_host_ops; dsi->dsi_host.dev = dev; ret = mipi_dsi_host_register(&dsi->dsi_host); @@ -1291,6 +1289,7 @@ static int dw_mipi_dsi_bind(struct device *dev, struct device *master, } dev_set_drvdata(dev, dsi); + pm_runtime_enable(dev); return 0; err_mipi_dsi_host: From fa56b3f83177e1ad60815f750dd3816bd4c25677 Mon Sep 17 00:00:00 2001 From: Vitor Massaru Iha Date: Mon, 30 Oct 2017 11:36:54 -0200 Subject: [PATCH 106/333] drm: Fix checkpatch issue: "WARNING: braces {} are not necessary for single statement blocks." Signed-off-by: Vitor Massaru Iha Signed-off-by: Liviu Dudau --- drivers/gpu/drm/arm/hdlcd_crtc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/arm/hdlcd_crtc.c b/drivers/gpu/drm/arm/hdlcd_crtc.c index 72b22b805412..5a5427bbd70e 100644 --- a/drivers/gpu/drm/arm/hdlcd_crtc.c +++ b/drivers/gpu/drm/arm/hdlcd_crtc.c @@ -317,9 +317,8 @@ static struct drm_plane *hdlcd_plane_init(struct drm_device *drm) formats, ARRAY_SIZE(formats), NULL, DRM_PLANE_TYPE_PRIMARY, NULL); - if (ret) { + if (ret) return ERR_PTR(ret); - } drm_plane_helper_add(plane, &hdlcd_plane_helper_funcs); hdlcd->plane = plane; From f73e8b82531573a198a4d0e5bff0d3256cbbd1d8 Mon Sep 17 00:00:00 2001 From: Srishti Sharma Date: Fri, 29 Sep 2017 15:30:40 +0530 Subject: [PATCH 107/333] drm/arm: Replace instances of drm_dev_unref with drm_dev_put. Replace drm_dev_unref with drm_dev_put as it is more consistent with kernel coding style. Done using the following semantic patch by coccinelle. @r@ expression e; @@ -drm_dev_unref(); +drm_dev_put(); Signed-off-by: Srishti Sharma [split into hdlcd and malidp specific patches] Signed-off-by: Liviu Dudau --- drivers/gpu/drm/arm/hdlcd_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/arm/hdlcd_drv.c b/drivers/gpu/drm/arm/hdlcd_drv.c index 45b174fea36b..0afb53b1f4e9 100644 --- a/drivers/gpu/drm/arm/hdlcd_drv.c +++ b/drivers/gpu/drm/arm/hdlcd_drv.c @@ -355,7 +355,7 @@ err_unload: err_free: drm_mode_config_cleanup(drm); dev_set_drvdata(dev, NULL); - drm_dev_unref(drm); + drm_dev_put(drm); return ret; } @@ -380,7 +380,7 @@ static void hdlcd_drm_unbind(struct device *dev) pm_runtime_disable(drm->dev); of_reserved_mem_device_release(drm->dev); drm_mode_config_cleanup(drm); - drm_dev_unref(drm); + drm_dev_put(drm); drm->dev_private = NULL; dev_set_drvdata(dev, NULL); } From 3aaf33bebda8d4ffcc0fc8ef39e6c1ac68823b11 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 27 Nov 2017 11:22:42 +0000 Subject: [PATCH 108/333] ARM: avoid faulting on qemu When qemu starts a kernel in a bare environment, the default SCR has the AW and FW bits clear, which means that the kernel can't modify the PSR A or PSR F bits, and means that FIQs and imprecise aborts are always masked. When running uboot under qemu, the AW and FW SCR bits are set, and the kernel functions normally - and this is how real hardware behaves. Fix this for qemu by ignoring the FIQ bit. Fixes: 8bafae202c82 ("ARM: BUG if jumping to usermode address in kernel mode") Signed-off-by: Russell King --- arch/arm/kernel/entry-header.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S index 75f7a4e8541a..e056c9a9aa9d 100644 --- a/arch/arm/kernel/entry-header.S +++ b/arch/arm/kernel/entry-header.S @@ -299,7 +299,7 @@ mov r2, sp ldr r1, [r2, #\offset + S_PSR] @ get calling cpsr ldr lr, [r2, #\offset + S_PC]! @ get pc - tst r1, #0xcf + tst r1, #PSR_I_BIT | 0x0f bne 1f msr spsr_cxsf, r1 @ save in spsr_svc #if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_32v6K) @@ -331,7 +331,7 @@ ldr r1, [sp, #\offset + S_PSR] @ get calling cpsr ldr lr, [sp, #\offset + S_PC] @ get pc add sp, sp, #\offset + S_SP - tst r1, #0xcf + tst r1, #PSR_I_BIT | 0x0f bne 1f msr spsr_cxsf, r1 @ save in spsr_svc From b7e5a591502b03b4a1408bb93a15968d6ea446ce Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Mon, 20 Nov 2017 10:33:09 -0800 Subject: [PATCH 109/333] RISC-V: Remove __vdso_cmpxchg{32,64} symbol versions These were left over from an earlier version of the port. Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/vdso/vdso.lds.S | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S index 8c9dce95c11d..3ac08eebd11d 100644 --- a/arch/riscv/kernel/vdso/vdso.lds.S +++ b/arch/riscv/kernel/vdso/vdso.lds.S @@ -70,8 +70,6 @@ VERSION LINUX_4.15 { global: __vdso_rt_sigreturn; - __vdso_cmpxchg32; - __vdso_cmpxchg64; local: *; }; } From 28dfbe6ed483e8a589cce88095d7787d61bf9c16 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 23 Oct 2017 15:42:14 -0700 Subject: [PATCH 110/333] RISC-V: Add VDSO entries for clock_get/gettimeofday/getcpu For now these are just placeholders that execute the syscall. We will later optimize them to avoid kernel crossings, but we'd like to have the VDSO entries from the first released kernel version to make the ABI simpler. Signed-off-by: Andrew Waterman Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/vdso/Makefile | 6 +++++- arch/riscv/kernel/vdso/clock_getres.S | 26 ++++++++++++++++++++++++++ arch/riscv/kernel/vdso/clock_gettime.S | 26 ++++++++++++++++++++++++++ arch/riscv/kernel/vdso/getcpu.S | 26 ++++++++++++++++++++++++++ arch/riscv/kernel/vdso/gettimeofday.S | 26 ++++++++++++++++++++++++++ arch/riscv/kernel/vdso/vdso.lds.S | 4 ++++ 6 files changed, 113 insertions(+), 1 deletion(-) create mode 100644 arch/riscv/kernel/vdso/clock_getres.S create mode 100644 arch/riscv/kernel/vdso/clock_gettime.S create mode 100644 arch/riscv/kernel/vdso/getcpu.S create mode 100644 arch/riscv/kernel/vdso/gettimeofday.S diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index 523d0a8ac8db..2dcc4f3070bc 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -1,7 +1,11 @@ # Copied from arch/tile/kernel/vdso/Makefile # Symbols present in the vdso -vdso-syms = rt_sigreturn +vdso-syms = rt_sigreturn +vdso-syms += gettimeofday +vdso-syms += clock_gettime +vdso-syms += clock_getres +vdso-syms += getcpu # Files to link into the vdso obj-vdso = $(patsubst %, %.o, $(vdso-syms)) diff --git a/arch/riscv/kernel/vdso/clock_getres.S b/arch/riscv/kernel/vdso/clock_getres.S new file mode 100644 index 000000000000..edf7e2339648 --- /dev/null +++ b/arch/riscv/kernel/vdso/clock_getres.S @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2017 SiFive + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include + + .text +/* int __vdso_clock_getres(clockid_t clock_id, struct timespec *res); */ +ENTRY(__vdso_clock_getres) + .cfi_startproc + /* For now, just do the syscall. */ + li a7, __NR_clock_getres + ecall + ret + .cfi_endproc +ENDPROC(__vdso_clock_getres) diff --git a/arch/riscv/kernel/vdso/clock_gettime.S b/arch/riscv/kernel/vdso/clock_gettime.S new file mode 100644 index 000000000000..aac65676c6d5 --- /dev/null +++ b/arch/riscv/kernel/vdso/clock_gettime.S @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2017 SiFive + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include + + .text +/* int __vdso_clock_gettime(clockid_t clock_id, struct timespec *tp); */ +ENTRY(__vdso_clock_gettime) + .cfi_startproc + /* For now, just do the syscall. */ + li a7, __NR_clock_gettime + ecall + ret + .cfi_endproc +ENDPROC(__vdso_clock_gettime) diff --git a/arch/riscv/kernel/vdso/getcpu.S b/arch/riscv/kernel/vdso/getcpu.S new file mode 100644 index 000000000000..cc7e98924484 --- /dev/null +++ b/arch/riscv/kernel/vdso/getcpu.S @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2017 SiFive + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include + + .text +/* int __vdso_getcpu(unsigned *cpu, unsigned *node, void *unused); */ +ENTRY(__vdso_getcpu) + .cfi_startproc + /* For now, just do the syscall. */ + li a7, __NR_getcpu + ecall + ret + .cfi_endproc +ENDPROC(__vdso_getcpu) diff --git a/arch/riscv/kernel/vdso/gettimeofday.S b/arch/riscv/kernel/vdso/gettimeofday.S new file mode 100644 index 000000000000..da85d33e8990 --- /dev/null +++ b/arch/riscv/kernel/vdso/gettimeofday.S @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2017 SiFive + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include + + .text +/* int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz); */ +ENTRY(__vdso_gettimeofday) + .cfi_startproc + /* For now, just do the syscall. */ + li a7, __NR_gettimeofday + ecall + ret + .cfi_endproc +ENDPROC(__vdso_gettimeofday) diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S index 3ac08eebd11d..c7543c6a00f9 100644 --- a/arch/riscv/kernel/vdso/vdso.lds.S +++ b/arch/riscv/kernel/vdso/vdso.lds.S @@ -70,6 +70,10 @@ VERSION LINUX_4.15 { global: __vdso_rt_sigreturn; + __vdso_gettimeofday; + __vdso_clock_gettime; + __vdso_clock_getres; + __vdso_getcpu; local: *; }; } From 9f97df50c52c2887432debb6238f4e43567386a5 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 30 Oct 2017 11:20:15 -0400 Subject: [PATCH 111/333] reiserfs: remove unneeded i_version bump The i_version field in reiserfs is not initialized and is only ever updated here. Nothing ever views it, so just remove it. Signed-off-by: Jeff Layton Signed-off-by: Jan Kara --- fs/reiserfs/super.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 5464ec517702..4885c7b6e44f 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -2591,7 +2591,6 @@ out: return err; if (inode->i_size < off + len - towrite) i_size_write(inode, off + len - towrite); - inode->i_version++; inode->i_mtime = inode->i_ctime = current_time(inode); mark_inode_dirty(inode); return len - towrite; From e872fa94662d0644057c7c80b3071bdb9249e5ab Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 17 Nov 2017 11:52:49 +0000 Subject: [PATCH 112/333] KVM: lapic: Split out x2apic ldr calculation Split out the ldr calculation from kvm_apic_set_x2apic_id since we're about to reuse it in the following patch. Signed-off-by: Dr. David Alan Gilbert Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 943acbf00c69..e2edb1103002 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -266,9 +266,14 @@ static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id) recalculate_apic_map(apic->vcpu->kvm); } +static inline u32 kvm_apic_calc_x2apic_ldr(u32 id) +{ + return ((id >> 4) << 16) | (1 << (id & 0xf)); +} + static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id) { - u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf)); + u32 ldr = kvm_apic_calc_x2apic_ldr(id); WARN_ON_ONCE(id != apic->vcpu->vcpu_id); From 12806ba937382fdfdbad62a399aa2dce65c10fcd Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 17 Nov 2017 11:52:50 +0000 Subject: [PATCH 113/333] KVM: lapic: Fixup LDR on load in x2apic In x2apic mode the LDR is fixed based on the ID rather than separately loadable like it was before x2. When kvm_apic_set_state is called, the base is set, and if it has the X2APIC_ENABLE flag set then the LDR is calculated; however that value gets overwritten by the memcpy a few lines below overwriting it with the value that came from userland. The symptom is a lack of EOI after loading the state (e.g. after a QEMU migration) and is due to the EOI bitmap being wrong due to the incorrect LDR. This was seen with a Win2016 guest under Qemu with irqchip=split whose USB mouse didn't work after a VM migration. This corresponds to RH bug: https://bugzilla.redhat.com/show_bug.cgi?id=1502591 Reported-by: Yiqian Wei Signed-off-by: Dr. David Alan Gilbert Cc: stable@vger.kernel.org [Applied fixup from Liran Alon. - Paolo] Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index e2edb1103002..e2c1fb8d35ce 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2250,6 +2250,7 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu, { if (apic_x2apic_mode(vcpu->arch.apic)) { u32 *id = (u32 *)(s->regs + APIC_ID); + u32 *ldr = (u32 *)(s->regs + APIC_LDR); if (vcpu->kvm->arch.x2apic_format) { if (*id != vcpu->vcpu_id) @@ -2260,6 +2261,10 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu, else *id <<= 24; } + + /* In x2APIC mode, the LDR is fixed and based on the id */ + if (set) + *ldr = kvm_apic_calc_x2apic_ldr(*id); } return 0; From e70b57a6ce4e8b92a56a615ae79bdb2bd66035e7 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Mon, 20 Nov 2017 14:55:05 -0800 Subject: [PATCH 114/333] KVM: X86: Fix softlockup when get the current kvmclock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit watchdog: BUG: soft lockup - CPU#6 stuck for 22s! [qemu-system-x86:10185] CPU: 6 PID: 10185 Comm: qemu-system-x86 Tainted: G OE 4.14.0-rc4+ #4 RIP: 0010:kvm_get_time_scale+0x4e/0xa0 [kvm] Call Trace: get_time_ref_counter+0x5a/0x80 [kvm] kvm_hv_process_stimers+0x120/0x5f0 [kvm] kvm_arch_vcpu_ioctl_run+0x4b4/0x1690 [kvm] kvm_vcpu_ioctl+0x33a/0x620 [kvm] do_vfs_ioctl+0xa1/0x5d0 SyS_ioctl+0x79/0x90 entry_SYSCALL_64_fastpath+0x1e/0xa9 This can be reproduced when running kvm-unit-tests/hyperv_stimer.flat and cpu-hotplug stress simultaneously. __this_cpu_read(cpu_tsc_khz) returns 0 (set in kvmclock_cpu_down_prep()) when the pCPU is unhotplug which results in kvm_get_time_scale() gets into an infinite loop. This patch fixes it by treating the unhotplug pCPU as not using master clock. Reviewed-by: Radim Krčmář Reviewed-by: David Hildenbrand Cc: Paolo Bonzini Cc: Radim Krčmář Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4552427105f6..f49fe514d1b2 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1798,10 +1798,13 @@ u64 get_kvmclock_ns(struct kvm *kvm) /* both __this_cpu_read() and rdtsc() should be on the same cpu */ get_cpu(); - kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL, - &hv_clock.tsc_shift, - &hv_clock.tsc_to_system_mul); - ret = __pvclock_read_cycles(&hv_clock, rdtsc()); + if (__this_cpu_read(cpu_tsc_khz)) { + kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL, + &hv_clock.tsc_shift, + &hv_clock.tsc_to_system_mul); + ret = __pvclock_read_cycles(&hv_clock, rdtsc()); + } else + ret = ktime_get_boot_ns() + ka->kvmclock_offset; put_cpu(); From c37c28730bb031cc8a44a130c2555c0f3efbe2d0 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Mon, 20 Nov 2017 14:52:21 -0800 Subject: [PATCH 115/333] KVM: VMX: Fix rflags cache during vCPU reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reported by syzkaller: *** Guest State *** CR0: actual=0x0000000080010031, shadow=0x0000000060000010, gh_mask=fffffffffffffff7 CR4: actual=0x0000000000002061, shadow=0x0000000000000000, gh_mask=ffffffffffffe8f1 CR3 = 0x000000002081e000 RSP = 0x000000000000fffa RIP = 0x0000000000000000 RFLAGS=0x00023000 DR7 = 0x00000000000000 ^^^^^^^^^^ ------------[ cut here ]------------ WARNING: CPU: 6 PID: 24431 at /home/kernel/linux/arch/x86/kvm//x86.c:7302 kvm_arch_vcpu_ioctl_run+0x651/0x2ea0 [kvm] CPU: 6 PID: 24431 Comm: reprotest Tainted: G W OE 4.14.0+ #26 RIP: 0010:kvm_arch_vcpu_ioctl_run+0x651/0x2ea0 [kvm] RSP: 0018:ffff880291d179e0 EFLAGS: 00010202 Call Trace: kvm_vcpu_ioctl+0x479/0x880 [kvm] do_vfs_ioctl+0x142/0x9a0 SyS_ioctl+0x74/0x80 entry_SYSCALL_64_fastpath+0x23/0x9a The failed vmentry is triggered by the following beautified testcase: #include #include #include #include #include #include #include long r[5]; int main() { struct kvm_debugregs dr = { 0 }; r[2] = open("/dev/kvm", O_RDONLY); r[3] = ioctl(r[2], KVM_CREATE_VM, 0); r[4] = ioctl(r[3], KVM_CREATE_VCPU, 7); struct kvm_guest_debug debug = { .control = 0xf0403, .arch = { .debugreg[6] = 0x2, .debugreg[7] = 0x2 } }; ioctl(r[4], KVM_SET_GUEST_DEBUG, &debug); ioctl(r[4], KVM_RUN, 0); } which testcase tries to setup the processor specific debug registers and configure vCPU for handling guest debug events through KVM_SET_GUEST_DEBUG. The KVM_SET_GUEST_DEBUG ioctl will get and set rflags in order to set TF bit if single step is needed. All regs' caches are reset to avail and GUEST_RFLAGS vmcs field is reset to 0x2 during vCPU reset. However, the cache of rflags is not reset during vCPU reset. The function vmx_get_rflags() returns an unreset rflags cache value since the cache is marked avail, it is 0 after boot. Vmentry fails if the rflags reserved bit 1 is 0. This patch fixes it by resetting both the GUEST_RFLAGS vmcs field and its cache to 0x2 during vCPU reset. Reported-by: Dmitry Vyukov Tested-by: Dmitry Vyukov Reviewed-by: David Hildenbrand Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Nadav Amit Cc: Dmitry Vyukov Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 0d59dbe430c8..04f19b099617 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5602,7 +5602,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vmcs_write64(GUEST_IA32_DEBUGCTL, 0); } - vmcs_writel(GUEST_RFLAGS, 0x02); + kvm_set_rflags(vcpu, X86_EFLAGS_FIXED); kvm_rip_write(vcpu, 0xfff0); vmcs_writel(GUEST_GDTR_BASE, 0); From b74558259c5149e5edd79348b70eb34177cbeea0 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Wed, 22 Nov 2017 14:04:00 -0800 Subject: [PATCH 116/333] KVM: VMX: Fix vmx->nested freeing when no SMI handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reported by syzkaller: ------------[ cut here ]------------ WARNING: CPU: 5 PID: 2939 at arch/x86/kvm/vmx.c:3844 free_loaded_vmcs+0x77/0x80 [kvm_intel] CPU: 5 PID: 2939 Comm: repro Not tainted 4.14.0+ #26 RIP: 0010:free_loaded_vmcs+0x77/0x80 [kvm_intel] Call Trace: vmx_free_vcpu+0xda/0x130 [kvm_intel] kvm_arch_destroy_vm+0x192/0x290 [kvm] kvm_put_kvm+0x262/0x560 [kvm] kvm_vm_release+0x2c/0x30 [kvm] __fput+0x190/0x370 task_work_run+0xa1/0xd0 do_exit+0x4d2/0x13e0 do_group_exit+0x89/0x140 get_signal+0x318/0xb80 do_signal+0x8c/0xb40 exit_to_usermode_loop+0xe4/0x140 syscall_return_slowpath+0x206/0x230 entry_SYSCALL_64_fastpath+0x98/0x9a The syzkaller testcase will execute VMXON/VMLAUCH instructions, so the vmx->nested stuff is populated, it will also issue KVM_SMI ioctl. However, the testcase is just a simple c program and not be lauched by something like seabios which implements smi_handler. Commit 05cade71cf (KVM: nSVM: fix SMI injection in guest mode) gets out of guest mode and set nested.vmxon to false for the duration of SMM according to SDM 34.14.1 "leave VMX operation" upon entering SMM. We can't alloc/free the vmx->nested stuff each time when entering/exiting SMM since it will induce more overhead. So the function vmx_pre_enter_smm() marks nested.vmxon false even if vmx->nested stuff is still populated. What it expected is em_rsm() can mark nested.vmxon to be true again. However, the smi_handler/rsm will not execute since there is no something like seabios in this scenario. The function free_nested() fails to free the vmx->nested stuff since the vmx->nested.vmxon is false which results in the above warning. This patch fixes it by also considering the no SMI handler case, luckily vmx->nested.smm.vmxon is marked according to the value of vmx->nested.vmxon in vmx_pre_enter_smm(), we can take advantage of it and free vmx->nested stuff when L1 goes down. Reported-by: Dmitry Vyukov Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Dmitry Vyukov Reviewed-by: Liran Alon Fixes: 05cade71cf (KVM: nSVM: fix SMI injection in guest mode) Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 04f19b099617..4704aaf6d19e 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -7415,10 +7415,11 @@ static inline void nested_release_vmcs12(struct vcpu_vmx *vmx) */ static void free_nested(struct vcpu_vmx *vmx) { - if (!vmx->nested.vmxon) + if (!vmx->nested.vmxon && !vmx->nested.smm.vmxon) return; vmx->nested.vmxon = false; + vmx->nested.smm.vmxon = false; free_vpid(vmx->nested.vpid02); vmx->nested.posted_intr_nv = -1; vmx->nested.current_vmptr = -1ull; From 20b7035c66bacc909ae3ffe92c1a1ea7db99fe4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20H=2E=20Sch=C3=B6nherr?= Date: Fri, 24 Nov 2017 22:39:01 +0100 Subject: [PATCH 117/333] KVM: Let KVM_SET_SIGNAL_MASK work as advertised MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit KVM API says for the signal mask you set via KVM_SET_SIGNAL_MASK, that "any unblocked signal received [...] will cause KVM_RUN to return with -EINTR" and that "the signal will only be delivered if not blocked by the original signal mask". This, however, is only true, when the calling task has a signal handler registered for a signal. If not, signal evaluation is short-circuited for SIG_IGN and SIG_DFL, and the signal is either ignored without KVM_RUN returning or the whole process is terminated. Make KVM_SET_SIGNAL_MASK behave as advertised by utilizing logic similar to that in do_sigtimedwait() to avoid short-circuiting of signals. Signed-off-by: Jan H. Schönherr Signed-off-by: Paolo Bonzini --- arch/mips/kvm/mips.c | 7 ++----- arch/powerpc/kvm/powerpc.c | 7 ++----- arch/s390/kvm/kvm-s390.c | 7 ++----- arch/x86/kvm/x86.c | 7 ++----- include/linux/kvm_host.h | 3 +++ virt/kvm/arm/arm.c | 8 +++----- virt/kvm/kvm_main.c | 23 +++++++++++++++++++++++ 7 files changed, 37 insertions(+), 25 deletions(-) diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index d535edc01434..75fdeaa8c62f 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -445,10 +445,8 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) { int r = -EINTR; - sigset_t sigsaved; - if (vcpu->sigset_active) - sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); + kvm_sigset_activate(vcpu); if (vcpu->mmio_needed) { if (!vcpu->mmio_is_write) @@ -480,8 +478,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) local_irq_enable(); out: - if (vcpu->sigset_active) - sigprocmask(SIG_SETMASK, &sigsaved, NULL); + kvm_sigset_deactivate(vcpu); return r; } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 6b6c53c42ac9..1915e86cef6f 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -1407,7 +1407,6 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) { int r; - sigset_t sigsaved; if (vcpu->mmio_needed) { vcpu->mmio_needed = 0; @@ -1448,16 +1447,14 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) #endif } - if (vcpu->sigset_active) - sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); + kvm_sigset_activate(vcpu); if (run->immediate_exit) r = -EINTR; else r = kvmppc_vcpu_run(run, vcpu); - if (vcpu->sigset_active) - sigprocmask(SIG_SETMASK, &sigsaved, NULL); + kvm_sigset_deactivate(vcpu); return r; } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 98ad8b9e0360..9614aea5839b 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -3372,7 +3372,6 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { int rc; - sigset_t sigsaved; if (kvm_run->immediate_exit) return -EINTR; @@ -3382,8 +3381,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) return 0; } - if (vcpu->sigset_active) - sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); + kvm_sigset_activate(vcpu); if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) { kvm_s390_vcpu_start(vcpu); @@ -3417,8 +3415,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) disable_cpu_timer_accounting(vcpu); store_regs(vcpu, kvm_run); - if (vcpu->sigset_active) - sigprocmask(SIG_SETMASK, &sigsaved, NULL); + kvm_sigset_deactivate(vcpu); vcpu->stat.exit_userspace++; return rc; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f49fe514d1b2..eee8e7faf1af 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7267,12 +7267,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { struct fpu *fpu = ¤t->thread.fpu; int r; - sigset_t sigsaved; fpu__initialize(fpu); - if (vcpu->sigset_active) - sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); + kvm_sigset_activate(vcpu); if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { if (kvm_run->immediate_exit) { @@ -7315,8 +7313,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) out: post_kvm_run_save(vcpu); - if (vcpu->sigset_active) - sigprocmask(SIG_SETMASK, &sigsaved, NULL); + kvm_sigset_deactivate(vcpu); return r; } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 2e754b7c282c..893d6d606cd0 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -715,6 +715,9 @@ int kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, const void *data, unsigned long len); void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn); +void kvm_sigset_activate(struct kvm_vcpu *vcpu); +void kvm_sigset_deactivate(struct kvm_vcpu *vcpu); + void kvm_vcpu_block(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu); diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index a6524ff27de4..a67c106d73f5 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -615,7 +615,6 @@ static void check_vcpu_requests(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) { int ret; - sigset_t sigsaved; if (unlikely(!kvm_vcpu_initialized(vcpu))) return -ENOEXEC; @@ -633,8 +632,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) if (run->immediate_exit) return -EINTR; - if (vcpu->sigset_active) - sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); + kvm_sigset_activate(vcpu); ret = 1; run->exit_reason = KVM_EXIT_UNKNOWN; @@ -769,8 +767,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) kvm_pmu_update_run(vcpu); } - if (vcpu->sigset_active) - sigprocmask(SIG_SETMASK, &sigsaved, NULL); + kvm_sigset_deactivate(vcpu); + return ret; } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 2dd1a9ca4599..c01cff064ec5 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2065,6 +2065,29 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn) } EXPORT_SYMBOL_GPL(kvm_vcpu_mark_page_dirty); +void kvm_sigset_activate(struct kvm_vcpu *vcpu) +{ + if (!vcpu->sigset_active) + return; + + /* + * This does a lockless modification of ->real_blocked, which is fine + * because, only current can change ->real_blocked and all readers of + * ->real_blocked don't care as long ->real_blocked is always a subset + * of ->blocked. + */ + sigprocmask(SIG_SETMASK, &vcpu->sigset, ¤t->real_blocked); +} + +void kvm_sigset_deactivate(struct kvm_vcpu *vcpu) +{ + if (!vcpu->sigset_active) + return; + + sigprocmask(SIG_SETMASK, ¤t->real_blocked, NULL); + sigemptyset(¤t->real_blocked); +} + static void grow_halt_poll_ns(struct kvm_vcpu *vcpu) { unsigned int old, val, grow; From 98c4f78dcdd8cec112d1cbc5e9a792ee6e5ab7a6 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 22 Nov 2017 12:21:07 -0800 Subject: [PATCH 118/333] xfs: always free inline data before resetting inode fork during ifree In xfs_ifree, we reset the data/attr forks to extents format without bothering to free any inline data buffer that might still be around after all the blocks have been truncated off the file. Prior to commit 43518812d2 ("xfs: remove support for inlining data/extents into the inode fork") nobody noticed because the leftover inline data after truncation was small enough to fit inside the inline buffer inside the fork itself. However, now that we've removed the inline buffer, we /always/ have to free the inline data buffer or else we leak them like crazy. This test was found by turning on kmemleak for generic/001 or generic/388. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_inode.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 61d1cb7dc10d..801274126648 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2400,6 +2400,24 @@ retry: return 0; } +/* + * Free any local-format buffers sitting around before we reset to + * extents format. + */ +static inline void +xfs_ifree_local_data( + struct xfs_inode *ip, + int whichfork) +{ + struct xfs_ifork *ifp; + + if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL) + return; + + ifp = XFS_IFORK_PTR(ip, whichfork); + xfs_idata_realloc(ip, -ifp->if_bytes, whichfork); +} + /* * This is called to return an inode to the inode free list. * The inode should already be truncated to 0 length and have @@ -2437,6 +2455,9 @@ xfs_ifree( if (error) return error; + xfs_ifree_local_data(ip, XFS_DATA_FORK); + xfs_ifree_local_data(ip, XFS_ATTR_FORK); + VFS_I(ip)->i_mode = 0; /* mark incore inode as free */ ip->i_d.di_flags = 0; ip->i_d.di_dmevmask = 0; From 509955823cc9cc225c05673b1b83d70ca70c5c60 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 21 Nov 2017 20:53:02 -0800 Subject: [PATCH 119/333] xfs: log recovery should replay deferred ops in order As part of testing log recovery with dm_log_writes, Amir Goldstein discovered an error in the deferred ops recovery that lead to corruption of the filesystem metadata if a reflink+rmap filesystem happened to shut down midway through a CoW remap: "This is what happens [after failed log recovery]: "Phase 1 - find and verify superblock... "Phase 2 - using internal log " - zero log... " - scan filesystem freespace and inode maps... " - found root inode chunk "Phase 3 - for each AG... " - scan (but don't clear) agi unlinked lists... " - process known inodes and perform inode discovery... " - agno = 0 "data fork in regular inode 134 claims CoW block 376 "correcting nextents for inode 134 "bad data fork in inode 134 "would have cleared inode 134" Hou Tao dissected the log contents of exactly such a crash: "According to the implementation of xfs_defer_finish(), these ops should be completed in the following sequence: "Have been done: "(1) CUI: Oper (160) "(2) BUI: Oper (161) "(3) CUD: Oper (194), for CUI Oper (160) "(4) RUI A: Oper (197), free rmap [0x155, 2, -9] "Should be done: "(5) BUD: for BUI Oper (161) "(6) RUI B: add rmap [0x155, 2, 137] "(7) RUD: for RUI A "(8) RUD: for RUI B "Actually be done by xlog_recover_process_intents() "(5) BUD: for BUI Oper (161) "(6) RUI B: add rmap [0x155, 2, 137] "(7) RUD: for RUI B "(8) RUD: for RUI A "So the rmap entry [0x155, 2, -9] for COW should be freed firstly, then a new rmap entry [0x155, 2, 137] will be added. However, as we can see from the log record in post_mount.log (generated after umount) and the trace print, the new rmap entry [0x155, 2, 137] are added firstly, then the rmap entry [0x155, 2, -9] are freed." When reconstructing the internal log state from the log items found on disk, it's required that deferred ops replay in exactly the same order that they would have had the filesystem not gone down. However, replaying unfinished deferred ops can create /more/ deferred ops. These new deferred ops are finished in the wrong order. This causes fs corruption and replay crashes, so let's create a single defer_ops to handle the subsequent ops created during replay, then use one single transaction at the end of log recovery to ensure that everything is replayed in the same order as they're supposed to be. Reported-by: Amir Goldstein Analyzed-by: Hou Tao Reviewed-by: Christoph Hellwig Tested-by: Amir Goldstein Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_bmap_item.c | 23 ++++-------- fs/xfs/xfs_bmap_item.h | 3 +- fs/xfs/xfs_log_recover.c | 75 ++++++++++++++++++++++++++++++++++---- fs/xfs/xfs_refcount_item.c | 21 ++++------- fs/xfs/xfs_refcount_item.h | 3 +- 5 files changed, 85 insertions(+), 40 deletions(-) diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c index dd136f7275e4..e5fb008d75e8 100644 --- a/fs/xfs/xfs_bmap_item.c +++ b/fs/xfs/xfs_bmap_item.c @@ -389,7 +389,8 @@ xfs_bud_init( int xfs_bui_recover( struct xfs_mount *mp, - struct xfs_bui_log_item *buip) + struct xfs_bui_log_item *buip, + struct xfs_defer_ops *dfops) { int error = 0; unsigned int bui_type; @@ -404,9 +405,7 @@ xfs_bui_recover( xfs_exntst_t state; struct xfs_trans *tp; struct xfs_inode *ip = NULL; - struct xfs_defer_ops dfops; struct xfs_bmbt_irec irec; - xfs_fsblock_t firstfsb; ASSERT(!test_bit(XFS_BUI_RECOVERED, &buip->bui_flags)); @@ -464,7 +463,6 @@ xfs_bui_recover( if (VFS_I(ip)->i_nlink == 0) xfs_iflags_set(ip, XFS_IRECOVERY); - xfs_defer_init(&dfops, &firstfsb); /* Process deferred bmap item. */ state = (bmap->me_flags & XFS_BMAP_EXTENT_UNWRITTEN) ? @@ -479,16 +477,16 @@ xfs_bui_recover( break; default: error = -EFSCORRUPTED; - goto err_dfops; + goto err_inode; } xfs_trans_ijoin(tp, ip, 0); count = bmap->me_len; - error = xfs_trans_log_finish_bmap_update(tp, budp, &dfops, type, + error = xfs_trans_log_finish_bmap_update(tp, budp, dfops, type, ip, whichfork, bmap->me_startoff, bmap->me_startblock, &count, state); if (error) - goto err_dfops; + goto err_inode; if (count > 0) { ASSERT(type == XFS_BMAP_UNMAP); @@ -496,16 +494,11 @@ xfs_bui_recover( irec.br_blockcount = count; irec.br_startoff = bmap->me_startoff; irec.br_state = state; - error = xfs_bmap_unmap_extent(tp->t_mountp, &dfops, ip, &irec); + error = xfs_bmap_unmap_extent(tp->t_mountp, dfops, ip, &irec); if (error) - goto err_dfops; + goto err_inode; } - /* Finish transaction, free inodes. */ - error = xfs_defer_finish(&tp, &dfops); - if (error) - goto err_dfops; - set_bit(XFS_BUI_RECOVERED, &buip->bui_flags); error = xfs_trans_commit(tp); xfs_iunlock(ip, XFS_ILOCK_EXCL); @@ -513,8 +506,6 @@ xfs_bui_recover( return error; -err_dfops: - xfs_defer_cancel(&dfops); err_inode: xfs_trans_cancel(tp); if (ip) { diff --git a/fs/xfs/xfs_bmap_item.h b/fs/xfs/xfs_bmap_item.h index c867daae4a3c..24b354a2c836 100644 --- a/fs/xfs/xfs_bmap_item.h +++ b/fs/xfs/xfs_bmap_item.h @@ -93,6 +93,7 @@ struct xfs_bud_log_item *xfs_bud_init(struct xfs_mount *, struct xfs_bui_log_item *); void xfs_bui_item_free(struct xfs_bui_log_item *); void xfs_bui_release(struct xfs_bui_log_item *); -int xfs_bui_recover(struct xfs_mount *mp, struct xfs_bui_log_item *buip); +int xfs_bui_recover(struct xfs_mount *mp, struct xfs_bui_log_item *buip, + struct xfs_defer_ops *dfops); #endif /* __XFS_BMAP_ITEM_H__ */ diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 87b1c331f9eb..28d1abfe835e 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -24,6 +24,7 @@ #include "xfs_bit.h" #include "xfs_sb.h" #include "xfs_mount.h" +#include "xfs_defer.h" #include "xfs_da_format.h" #include "xfs_da_btree.h" #include "xfs_inode.h" @@ -4716,7 +4717,8 @@ STATIC int xlog_recover_process_cui( struct xfs_mount *mp, struct xfs_ail *ailp, - struct xfs_log_item *lip) + struct xfs_log_item *lip, + struct xfs_defer_ops *dfops) { struct xfs_cui_log_item *cuip; int error; @@ -4729,7 +4731,7 @@ xlog_recover_process_cui( return 0; spin_unlock(&ailp->xa_lock); - error = xfs_cui_recover(mp, cuip); + error = xfs_cui_recover(mp, cuip, dfops); spin_lock(&ailp->xa_lock); return error; @@ -4756,7 +4758,8 @@ STATIC int xlog_recover_process_bui( struct xfs_mount *mp, struct xfs_ail *ailp, - struct xfs_log_item *lip) + struct xfs_log_item *lip, + struct xfs_defer_ops *dfops) { struct xfs_bui_log_item *buip; int error; @@ -4769,7 +4772,7 @@ xlog_recover_process_bui( return 0; spin_unlock(&ailp->xa_lock); - error = xfs_bui_recover(mp, buip); + error = xfs_bui_recover(mp, buip, dfops); spin_lock(&ailp->xa_lock); return error; @@ -4805,6 +4808,46 @@ static inline bool xlog_item_is_intent(struct xfs_log_item *lip) } } +/* Take all the collected deferred ops and finish them in order. */ +static int +xlog_finish_defer_ops( + struct xfs_mount *mp, + struct xfs_defer_ops *dfops) +{ + struct xfs_trans *tp; + int64_t freeblks; + uint resblks; + int error; + + /* + * We're finishing the defer_ops that accumulated as a result of + * recovering unfinished intent items during log recovery. We + * reserve an itruncate transaction because it is the largest + * permanent transaction type. Since we're the only user of the fs + * right now, take 93% (15/16) of the available free blocks. Use + * weird math to avoid a 64-bit division. + */ + freeblks = percpu_counter_sum(&mp->m_fdblocks); + if (freeblks <= 0) + return -ENOSPC; + resblks = min_t(int64_t, UINT_MAX, freeblks); + resblks = (resblks * 15) >> 4; + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, resblks, + 0, XFS_TRANS_RESERVE, &tp); + if (error) + return error; + + error = xfs_defer_finish(&tp, dfops); + if (error) + goto out_cancel; + + return xfs_trans_commit(tp); + +out_cancel: + xfs_trans_cancel(tp); + return error; +} + /* * When this is called, all of the log intent items which did not have * corresponding log done items should be in the AIL. What we do now @@ -4825,10 +4868,12 @@ STATIC int xlog_recover_process_intents( struct xlog *log) { - struct xfs_log_item *lip; - int error = 0; + struct xfs_defer_ops dfops; struct xfs_ail_cursor cur; + struct xfs_log_item *lip; struct xfs_ail *ailp; + xfs_fsblock_t firstfsb; + int error = 0; #if defined(DEBUG) || defined(XFS_WARN) xfs_lsn_t last_lsn; #endif @@ -4839,6 +4884,7 @@ xlog_recover_process_intents( #if defined(DEBUG) || defined(XFS_WARN) last_lsn = xlog_assign_lsn(log->l_curr_cycle, log->l_curr_block); #endif + xfs_defer_init(&dfops, &firstfsb); while (lip != NULL) { /* * We're done when we see something other than an intent. @@ -4859,6 +4905,12 @@ xlog_recover_process_intents( */ ASSERT(XFS_LSN_CMP(last_lsn, lip->li_lsn) >= 0); + /* + * NOTE: If your intent processing routine can create more + * deferred ops, you /must/ attach them to the dfops in this + * routine or else those subsequent intents will get + * replayed in the wrong order! + */ switch (lip->li_type) { case XFS_LI_EFI: error = xlog_recover_process_efi(log->l_mp, ailp, lip); @@ -4867,10 +4919,12 @@ xlog_recover_process_intents( error = xlog_recover_process_rui(log->l_mp, ailp, lip); break; case XFS_LI_CUI: - error = xlog_recover_process_cui(log->l_mp, ailp, lip); + error = xlog_recover_process_cui(log->l_mp, ailp, lip, + &dfops); break; case XFS_LI_BUI: - error = xlog_recover_process_bui(log->l_mp, ailp, lip); + error = xlog_recover_process_bui(log->l_mp, ailp, lip, + &dfops); break; } if (error) @@ -4880,6 +4934,11 @@ xlog_recover_process_intents( out: xfs_trans_ail_cursor_done(&cur); spin_unlock(&ailp->xa_lock); + if (error) + xfs_defer_cancel(&dfops); + else + error = xlog_finish_defer_ops(log->l_mp, &dfops); + return error; } diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c index 8f2e2fac4255..3a55d6fc271b 100644 --- a/fs/xfs/xfs_refcount_item.c +++ b/fs/xfs/xfs_refcount_item.c @@ -393,7 +393,8 @@ xfs_cud_init( int xfs_cui_recover( struct xfs_mount *mp, - struct xfs_cui_log_item *cuip) + struct xfs_cui_log_item *cuip, + struct xfs_defer_ops *dfops) { int i; int error = 0; @@ -405,11 +406,9 @@ xfs_cui_recover( struct xfs_trans *tp; struct xfs_btree_cur *rcur = NULL; enum xfs_refcount_intent_type type; - xfs_fsblock_t firstfsb; xfs_fsblock_t new_fsb; xfs_extlen_t new_len; struct xfs_bmbt_irec irec; - struct xfs_defer_ops dfops; bool requeue_only = false; ASSERT(!test_bit(XFS_CUI_RECOVERED, &cuip->cui_flags)); @@ -465,7 +464,6 @@ xfs_cui_recover( return error; cudp = xfs_trans_get_cud(tp, cuip); - xfs_defer_init(&dfops, &firstfsb); for (i = 0; i < cuip->cui_format.cui_nextents; i++) { refc = &cuip->cui_format.cui_extents[i]; refc_type = refc->pe_flags & XFS_REFCOUNT_EXTENT_TYPE_MASK; @@ -485,7 +483,7 @@ xfs_cui_recover( new_len = refc->pe_len; } else error = xfs_trans_log_finish_refcount_update(tp, cudp, - &dfops, type, refc->pe_startblock, refc->pe_len, + dfops, type, refc->pe_startblock, refc->pe_len, &new_fsb, &new_len, &rcur); if (error) goto abort_error; @@ -497,21 +495,21 @@ xfs_cui_recover( switch (type) { case XFS_REFCOUNT_INCREASE: error = xfs_refcount_increase_extent( - tp->t_mountp, &dfops, &irec); + tp->t_mountp, dfops, &irec); break; case XFS_REFCOUNT_DECREASE: error = xfs_refcount_decrease_extent( - tp->t_mountp, &dfops, &irec); + tp->t_mountp, dfops, &irec); break; case XFS_REFCOUNT_ALLOC_COW: error = xfs_refcount_alloc_cow_extent( - tp->t_mountp, &dfops, + tp->t_mountp, dfops, irec.br_startblock, irec.br_blockcount); break; case XFS_REFCOUNT_FREE_COW: error = xfs_refcount_free_cow_extent( - tp->t_mountp, &dfops, + tp->t_mountp, dfops, irec.br_startblock, irec.br_blockcount); break; @@ -525,17 +523,12 @@ xfs_cui_recover( } xfs_refcount_finish_one_cleanup(tp, rcur, error); - error = xfs_defer_finish(&tp, &dfops); - if (error) - goto abort_defer; set_bit(XFS_CUI_RECOVERED, &cuip->cui_flags); error = xfs_trans_commit(tp); return error; abort_error: xfs_refcount_finish_one_cleanup(tp, rcur, error); -abort_defer: - xfs_defer_cancel(&dfops); xfs_trans_cancel(tp); return error; } diff --git a/fs/xfs/xfs_refcount_item.h b/fs/xfs/xfs_refcount_item.h index 5b74dddfa64b..0e5327349a13 100644 --- a/fs/xfs/xfs_refcount_item.h +++ b/fs/xfs/xfs_refcount_item.h @@ -96,6 +96,7 @@ struct xfs_cud_log_item *xfs_cud_init(struct xfs_mount *, struct xfs_cui_log_item *); void xfs_cui_item_free(struct xfs_cui_log_item *); void xfs_cui_release(struct xfs_cui_log_item *); -int xfs_cui_recover(struct xfs_mount *mp, struct xfs_cui_log_item *cuip); +int xfs_cui_recover(struct xfs_mount *mp, struct xfs_cui_log_item *cuip, + struct xfs_defer_ops *dfops); #endif /* __XFS_REFCOUNT_ITEM_H__ */ From 6e0c9507bf51e1517a80ad0ac171e5402528fcef Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 22 Nov 2017 12:28:17 +0100 Subject: [PATCH 120/333] i2c: i801: Fix Failed to allocate irq -2147483648 error On Apollo Lake devices the BIOS does not set up IRQ routing for the i801 SMBUS controller IRQ, so we end up with dev->irq set to IRQ_NOTCONNECTED. Detect this and do not try to use the irq in this case silencing: i801_smbus 0000:00:1f.1: Failed to allocate irq -2147483648: -107 Cc: stable@vger.kernel.org BugLink: https://communities.intel.com/thread/114759 Signed-off-by: Hans de Goede Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-i801.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index 9e12a53ef7b8..8eac00efadc1 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -1617,6 +1617,9 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id) /* Default timeout in interrupt mode: 200 ms */ priv->adapter.timeout = HZ / 5; + if (dev->irq == IRQ_NOTCONNECTED) + priv->features &= ~FEATURE_IRQ; + if (priv->features & FEATURE_IRQ) { u16 pcictl, pcists; From 66a7c84d677e8e4a5a2ef4afdb9bd52e1399a866 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 22 Nov 2017 17:52:24 +0000 Subject: [PATCH 121/333] i2c: i2c-boardinfo: fix memory leaks on devinfo Currently when an error occurs devinfo is still allocated but is unused when the error exit paths break out of the for-loop. Fix this by kfree'ing devinfo to avoid the leak. Detected by CoverityScan, CID#1416590 ("Resource Leak") Fixes: 4124c4eba402 ("i2c: allow attaching IRQ resources to i2c_board_info") Fixes: 0daaf99d8424 ("i2c: copy device properties when using i2c_register_board_info()") Signed-off-by: Colin Ian King Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-boardinfo.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/i2c/i2c-boardinfo.c b/drivers/i2c/i2c-boardinfo.c index 31186ead5a40..509a6007cdf6 100644 --- a/drivers/i2c/i2c-boardinfo.c +++ b/drivers/i2c/i2c-boardinfo.c @@ -86,6 +86,7 @@ int i2c_register_board_info(int busnum, struct i2c_board_info const *info, unsig property_entries_dup(info->properties); if (IS_ERR(devinfo->board_info.properties)) { status = PTR_ERR(devinfo->board_info.properties); + kfree(devinfo); break; } } @@ -98,6 +99,7 @@ int i2c_register_board_info(int busnum, struct i2c_board_info const *info, unsig GFP_KERNEL); if (!devinfo->board_info.resources) { status = -ENOMEM; + kfree(devinfo); break; } } From 2967acbb257a6a9bf912f4778b727e00972eac9b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 19 Nov 2017 11:52:55 -0700 Subject: [PATCH 122/333] blktrace: fix trace mutex deadlock A previous commit changed the locking around registration/cleanup, but direct callers of blk_trace_remove() were missed. This means that if we hit the error path in setup, we will deadlock on attempting to re-acquire the queue trace mutex. Fixes: 1f2cac107c59 ("blktrace: fix unlocked access to init/start-stop/teardown") Signed-off-by: Jens Axboe --- kernel/trace/blktrace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index c5987d4c5f23..987d9a9ae283 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -591,7 +591,7 @@ static int __blk_trace_setup(struct request_queue *q, char *name, dev_t dev, return ret; if (copy_to_user(arg, &buts, sizeof(buts))) { - blk_trace_remove(q); + __blk_trace_remove(q); return -EFAULT; } return 0; @@ -637,7 +637,7 @@ static int compat_blk_trace_setup(struct request_queue *q, char *name, return ret; if (copy_to_user(arg, &buts.name, ARRAY_SIZE(buts.name))) { - blk_trace_remove(q); + __blk_trace_remove(q); return -EFAULT; } From a39e17b2d842938e19997d2fdc0443fdd4cd8d10 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 27 Nov 2017 12:10:23 -0800 Subject: [PATCH 123/333] bpf: offload: add a license header I forgot to add a license on kernel/bpf/offload.c. Luckily I'm still the only author so make it explicitly GPLv2. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: Daniel Borkmann --- kernel/bpf/offload.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index 68ec884440b7..8455b89d1bbf 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -1,3 +1,18 @@ +/* + * Copyright (C) 2017 Netronome Systems, Inc. + * + * This software is licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree. + * + * THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE + * OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME + * THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + */ + #include #include #include From b12cbb21586277f72533769832c24cc6c1d60ab3 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Wed, 22 Nov 2017 07:33:38 -0800 Subject: [PATCH 124/333] apparmor: fix oops in audit_signal_cb hook The apparmor_audit_data struct ordering got messed up during a merge conflict, resulting in the signal integer and peer pointer being in a union instead of a struct. For most of the 4.13 and 4.14 life cycle, this was hidden by commit 651e28c5537a ("apparmor: add base infastructure for socket mediation") which fixed the apparmor_audit_data struct when its data was added. When that commit was reverted in -rc7 the signal audit bug was exposed, and unfortunately it never showed up in any of the testing until after 4.14 was released. Shaun Khan, Zephaniah E. Loss-Cutler-Hull filed nearly simultaneous bug reports (with different oopes, the smaller of which is included below). Full credit goes to Tetsuo Handa for jumping on this as well and noticing the audit data struct problem and reporting it. [ 76.178568] BUG: unable to handle kernel paging request at ffffffff0eee3bc0 [ 76.178579] IP: audit_signal_cb+0x6c/0xe0 [ 76.178581] PGD 1a640a067 P4D 1a640a067 PUD 0 [ 76.178586] Oops: 0000 [#1] PREEMPT SMP [ 76.178589] Modules linked in: fuse rfcomm bnep usblp uvcvideo btusb btrtl btbcm btintel bluetooth ecdh_generic ip6table_filter ip6_tables xt_tcpudp nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack iptable_filter ip_tables x_tables intel_rapl joydev wmi_bmof serio_raw iwldvm iwlwifi shpchp kvm_intel kvm irqbypass autofs4 algif_skcipher nls_iso8859_1 nls_cp437 crc32_pclmul ghash_clmulni_intel [ 76.178620] CPU: 0 PID: 10675 Comm: pidgin Not tainted 4.14.0-f1-dirty #135 [ 76.178623] Hardware name: Hewlett-Packard HP EliteBook Folio 9470m/18DF, BIOS 68IBD Ver. F.62 10/22/2015 [ 76.178625] task: ffff9c7a94c31dc0 task.stack: ffffa09b02a4c000 [ 76.178628] RIP: 0010:audit_signal_cb+0x6c/0xe0 [ 76.178631] RSP: 0018:ffffa09b02a4fc08 EFLAGS: 00010292 [ 76.178634] RAX: ffffa09b02a4fd60 RBX: ffff9c7aee0741f8 RCX: 0000000000000000 [ 76.178636] RDX: ffffffffee012290 RSI: 0000000000000006 RDI: ffff9c7a9493d800 [ 76.178638] RBP: ffffa09b02a4fd40 R08: 000000000000004d R09: ffffa09b02a4fc46 [ 76.178641] R10: ffffa09b02a4fcb8 R11: ffff9c7ab44f5072 R12: ffffa09b02a4fd40 [ 76.178643] R13: ffffffff9e447be0 R14: ffff9c7a94c31dc0 R15: 0000000000000001 [ 76.178646] FS: 00007f8b11ba2a80(0000) GS:ffff9c7afea00000(0000) knlGS:0000000000000000 [ 76.178648] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 76.178650] CR2: ffffffff0eee3bc0 CR3: 00000003d5209002 CR4: 00000000001606f0 [ 76.178652] Call Trace: [ 76.178660] common_lsm_audit+0x1da/0x780 [ 76.178665] ? d_absolute_path+0x60/0x90 [ 76.178669] ? aa_check_perms+0xcd/0xe0 [ 76.178672] aa_check_perms+0xcd/0xe0 [ 76.178675] profile_signal_perm.part.0+0x90/0xa0 [ 76.178679] aa_may_signal+0x16e/0x1b0 [ 76.178686] apparmor_task_kill+0x51/0x120 [ 76.178690] security_task_kill+0x44/0x60 [ 76.178695] group_send_sig_info+0x25/0x60 [ 76.178699] kill_pid_info+0x36/0x60 [ 76.178703] SYSC_kill+0xdb/0x180 [ 76.178707] ? preempt_count_sub+0x92/0xd0 [ 76.178712] ? _raw_write_unlock_irq+0x13/0x30 [ 76.178716] ? task_work_run+0x6a/0x90 [ 76.178720] ? exit_to_usermode_loop+0x80/0xa0 [ 76.178723] entry_SYSCALL_64_fastpath+0x13/0x94 [ 76.178727] RIP: 0033:0x7f8b0e58b767 [ 76.178729] RSP: 002b:00007fff19efd4d8 EFLAGS: 00000206 ORIG_RAX: 000000000000003e [ 76.178732] RAX: ffffffffffffffda RBX: 0000557f3e3c2050 RCX: 00007f8b0e58b767 [ 76.178735] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 000000000000263b [ 76.178737] RBP: 0000000000000000 R08: 0000557f3e3c2270 R09: 0000000000000001 [ 76.178739] R10: 000000000000022d R11: 0000000000000206 R12: 0000000000000000 [ 76.178741] R13: 0000000000000001 R14: 0000557f3e3c13c0 R15: 0000000000000000 [ 76.178745] Code: 48 8b 55 18 48 89 df 41 b8 20 00 08 01 5b 5d 48 8b 42 10 48 8b 52 30 48 63 48 4c 48 8b 44 c8 48 31 c9 48 8b 70 38 e9 f4 fd 00 00 <48> 8b 14 d5 40 27 e5 9e 48 c7 c6 7d 07 19 9f 48 89 df e8 fd 35 [ 76.178794] RIP: audit_signal_cb+0x6c/0xe0 RSP: ffffa09b02a4fc08 [ 76.178796] CR2: ffffffff0eee3bc0 [ 76.178799] ---[ end trace 514af9529297f1a3 ]--- Fixes: cd1dbf76b23d ("apparmor: add the ability to mediate signals") Reported-by: Zephaniah E. Loss-Cutler-Hull Reported-by: Shuah Khan Suggested-by: Tetsuo Handa Tested-by: Ivan Kozik Tested-by: Zephaniah E. Loss-Cutler-Hull Tested-by: Christian Boltz Tested-by: Shuah Khan Cc: stable@vger.kernel.org Signed-off-by: John Johansen --- security/apparmor/include/audit.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/security/apparmor/include/audit.h b/security/apparmor/include/audit.h index 620e81169659..4ac095118717 100644 --- a/security/apparmor/include/audit.h +++ b/security/apparmor/include/audit.h @@ -121,17 +121,19 @@ struct apparmor_audit_data { /* these entries require a custom callback fn */ struct { struct aa_label *peer; - struct { - const char *target; - kuid_t ouid; - } fs; + union { + struct { + const char *target; + kuid_t ouid; + } fs; + int signal; + }; }; struct { struct aa_profile *profile; const char *ns; long pos; } iface; - int signal; struct { int rlim; unsigned long max; From bd467e4eababe4c04272c1e646f066db02734c79 Mon Sep 17 00:00:00 2001 From: Robert Lippert Date: Mon, 27 Nov 2017 15:51:55 -0800 Subject: [PATCH 125/333] hwmon: (pmbus) Use 64bit math for DIRECT format values Power values in the 100s of watt range can easily blow past 32bit math limits when processing everything in microwatts. Use 64bit math instead to avoid these issues on common 32bit ARM BMC platforms. Fixes: 442aba78728e ("hwmon: PMBus device driver") Signed-off-by: Robert Lippert Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/pmbus_core.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c index 52a58b8b6e1b..a139940cd991 100644 --- a/drivers/hwmon/pmbus/pmbus_core.c +++ b/drivers/hwmon/pmbus/pmbus_core.c @@ -21,6 +21,7 @@ #include #include +#include #include #include #include @@ -499,8 +500,8 @@ static long pmbus_reg2data_linear(struct pmbus_data *data, static long pmbus_reg2data_direct(struct pmbus_data *data, struct pmbus_sensor *sensor) { - long val = (s16) sensor->data; - long m, b, R; + s64 b, val = (s16)sensor->data; + s32 m, R; m = data->info->m[sensor->class]; b = data->info->b[sensor->class]; @@ -528,11 +529,12 @@ static long pmbus_reg2data_direct(struct pmbus_data *data, R--; } while (R < 0) { - val = DIV_ROUND_CLOSEST(val, 10); + val = div_s64(val + 5LL, 10L); /* round closest */ R++; } - return (val - b) / m; + val = div_s64(val - b, m); + return clamp_val(val, LONG_MIN, LONG_MAX); } /* @@ -656,7 +658,8 @@ static u16 pmbus_data2reg_linear(struct pmbus_data *data, static u16 pmbus_data2reg_direct(struct pmbus_data *data, struct pmbus_sensor *sensor, long val) { - long m, b, R; + s64 b, val64 = val; + s32 m, R; m = data->info->m[sensor->class]; b = data->info->b[sensor->class]; @@ -673,18 +676,18 @@ static u16 pmbus_data2reg_direct(struct pmbus_data *data, R -= 3; /* Adjust R and b for data in milli-units */ b *= 1000; } - val = val * m + b; + val64 = val64 * m + b; while (R > 0) { - val *= 10; + val64 *= 10; R--; } while (R < 0) { - val = DIV_ROUND_CLOSEST(val, 10); + val64 = div_s64(val64 + 5LL, 10L); /* round closest */ R++; } - return val; + return (u16)clamp_val(val64, S16_MIN, S16_MAX); } static u16 pmbus_data2reg_vid(struct pmbus_data *data, From ae5c631e605a452a5a0e73205a92810c01ed954b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 23 Nov 2017 21:41:56 +0200 Subject: [PATCH 126/333] drm/i915: Don't try indexed reads to alternate slave addresses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We can only specify the one slave address to indexed reads/writes. Make sure the messages we check are destined to the same slave address before deciding to do an indexed transfer. Cc: stable@vger.kernel.org Cc: Daniel Kurtz Cc: Chris Wilson Cc: Daniel Vetter Cc: Sean Paul Fixes: 56f9eac05489 ("drm/i915/intel_i2c: use INDEX cycles for i2c read transactions") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171123194157.25367-2-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson (cherry picked from commit c4deb62d7821672265b87952bcd1c808f3bf3e8f) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_i2c.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c index eb5827110d8f..8affd47b98b8 100644 --- a/drivers/gpu/drm/i915/intel_i2c.c +++ b/drivers/gpu/drm/i915/intel_i2c.c @@ -438,6 +438,7 @@ static bool gmbus_is_index_read(struct i2c_msg *msgs, int i, int num) { return (i + 1 < num && + msgs[i].addr == msgs[i + 1].addr && !(msgs[i].flags & I2C_M_RD) && msgs[i].len <= 2 && (msgs[i + 1].flags & I2C_M_RD)); } From 56350fb8978bbf4aafe08f21234e161dd128b417 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 23 Nov 2017 21:41:57 +0200 Subject: [PATCH 127/333] drm/i915: Prevent zero length "index" write MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The hardware always writes one or two bytes in the index portion of an indexed transfer. Make sure the message we send as the index doesn't have a zero length. Cc: stable@vger.kernel.org Cc: Daniel Kurtz Cc: Chris Wilson Cc: Daniel Vetter Cc: Sean Paul Fixes: 56f9eac05489 ("drm/i915/intel_i2c: use INDEX cycles for i2c read transactions") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171123194157.25367-3-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson (cherry picked from commit bb9e0d4bca50f429152e74a459160b41f3d60fb2) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_i2c.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c index 8affd47b98b8..49fdf09f9919 100644 --- a/drivers/gpu/drm/i915/intel_i2c.c +++ b/drivers/gpu/drm/i915/intel_i2c.c @@ -439,7 +439,8 @@ gmbus_is_index_read(struct i2c_msg *msgs, int i, int num) { return (i + 1 < num && msgs[i].addr == msgs[i + 1].addr && - !(msgs[i].flags & I2C_M_RD) && msgs[i].len <= 2 && + !(msgs[i].flags & I2C_M_RD) && + (msgs[i].len == 1 || msgs[i].len == 2) && (msgs[i + 1].flags & I2C_M_RD)); } From a45b30a6c5db631e2ba680304bd5edd0cd1f9643 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 25 Nov 2017 19:41:55 +0000 Subject: [PATCH 128/333] drm/i915/fbdev: Serialise early hotplug events with async fbdev config As both the hotplug event and fbdev configuration run asynchronously, it is possible for them to run concurrently. If configuration fails, we were freeing the fbdev causing a use-after-free in the hotplug event. <7>[ 3069.935211] [drm:intel_fb_initial_config [i915]] Not using firmware configuration <7>[ 3069.935225] [drm:drm_setup_crtcs] looking for cmdline mode on connector 77 <7>[ 3069.935229] [drm:drm_setup_crtcs] looking for preferred mode on connector 77 0 <7>[ 3069.935233] [drm:drm_setup_crtcs] found mode 3200x1800 <7>[ 3069.935236] [drm:drm_setup_crtcs] picking CRTCs for 8192x8192 config <7>[ 3069.935253] [drm:drm_setup_crtcs] desired mode 3200x1800 set on crtc 43 (0,0) <7>[ 3069.935323] [drm:intelfb_create [i915]] no BIOS fb, allocating a new one <4>[ 3069.967737] general protection fault: 0000 [#1] PREEMPT SMP <0>[ 3069.977453] --------------------------------- <4>[ 3069.977457] Modules linked in: i915(+) vgem snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic x86_pkg_temp_thermal intel_powerclamp coretemp crct10dif_pclmul crc32_pclmul ghash_clmulni_intel snd_hda_codec snd_hwdep snd_hda_core snd_pcm r8169 mei_me mii prime_numbers mei i2c_hid pinctrl_geminilake pinctrl_intel [last unloaded: i915] <4>[ 3069.977492] CPU: 1 PID: 15414 Comm: kworker/1:0 Tainted: G U 4.14.0-CI-CI_DRM_3388+ #1 <4>[ 3069.977497] Hardware name: Intel Corp. Geminilake/GLK RVP1 DDR4 (05), BIOS GELKRVPA.X64.0062.B30.1708222146 08/22/2017 <4>[ 3069.977508] Workqueue: events output_poll_execute <4>[ 3069.977512] task: ffff880177734e40 task.stack: ffffc90001fe4000 <4>[ 3069.977519] RIP: 0010:__lock_acquire+0x109/0x1b60 <4>[ 3069.977523] RSP: 0018:ffffc90001fe7bb0 EFLAGS: 00010002 <4>[ 3069.977526] RAX: 6b6b6b6b6b6b6b6b RBX: 0000000000000282 RCX: 0000000000000000 <4>[ 3069.977530] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff880170d4efd0 <4>[ 3069.977534] RBP: ffffc90001fe7c70 R08: 0000000000000001 R09: 0000000000000000 <4>[ 3069.977538] R10: 0000000000000000 R11: ffffffff81899609 R12: ffff880170d4efd0 <4>[ 3069.977542] R13: ffff880177734e40 R14: 0000000000000001 R15: 0000000000000000 <4>[ 3069.977547] FS: 0000000000000000(0000) GS:ffff88017fc80000(0000) knlGS:0000000000000000 <4>[ 3069.977551] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 <4>[ 3069.977555] CR2: 00007f7e8b7bcf04 CR3: 0000000003e0f000 CR4: 00000000003406e0 <4>[ 3069.977559] Call Trace: <4>[ 3069.977565] ? mark_held_locks+0x64/0x90 <4>[ 3069.977571] ? _raw_spin_unlock_irq+0x24/0x50 <4>[ 3069.977575] ? _raw_spin_unlock_irq+0x24/0x50 <4>[ 3069.977579] ? trace_hardirqs_on_caller+0xde/0x1c0 <4>[ 3069.977583] ? _raw_spin_unlock_irq+0x2f/0x50 <4>[ 3069.977588] ? finish_task_switch+0xa5/0x210 <4>[ 3069.977592] ? lock_acquire+0xaf/0x200 <4>[ 3069.977596] lock_acquire+0xaf/0x200 <4>[ 3069.977600] ? __mutex_lock+0x5e9/0x9b0 <4>[ 3069.977604] _raw_spin_lock+0x2a/0x40 <4>[ 3069.977608] ? __mutex_lock+0x5e9/0x9b0 <4>[ 3069.977612] __mutex_lock+0x5e9/0x9b0 <4>[ 3069.977616] ? drm_fb_helper_hotplug_event.part.19+0x16/0xa0 <4>[ 3069.977621] ? drm_fb_helper_hotplug_event.part.19+0x16/0xa0 <4>[ 3069.977625] drm_fb_helper_hotplug_event.part.19+0x16/0xa0 <4>[ 3069.977630] output_poll_execute+0x8d/0x180 <4>[ 3069.977635] process_one_work+0x22e/0x660 <4>[ 3069.977640] worker_thread+0x48/0x3a0 <4>[ 3069.977644] ? _raw_spin_unlock_irqrestore+0x4c/0x60 <4>[ 3069.977649] kthread+0x102/0x140 <4>[ 3069.977653] ? process_one_work+0x660/0x660 <4>[ 3069.977657] ? kthread_create_on_node+0x40/0x40 <4>[ 3069.977662] ret_from_fork+0x27/0x40 <4>[ 3069.977666] Code: 8d 62 f8 c3 49 81 3c 24 e0 fa 3c 82 41 be 00 00 00 00 45 0f 45 f0 83 fe 01 77 86 89 f0 49 8b 44 c4 08 48 85 c0 0f 84 76 ff ff ff ff 80 38 01 00 00 8b 1d 62 f9 e8 01 45 8b 85 b8 08 00 00 85 <1>[ 3069.977707] RIP: __lock_acquire+0x109/0x1b60 RSP: ffffc90001fe7bb0 <4>[ 3069.977712] ---[ end trace 4ad012eb3af62df7 ]--- In order to keep the dev_priv->ifbdev alive after failure, we have to avoid the free and leave it empty until we unload the module (which is less than ideal, but a necessary evil for simplicity). Then we can use intel_fbdev_sync() to serialise the hotplug event with the configuration. The serialisation between the two was removed in commit 934458c2c95d ("Revert "drm/i915: Fix races on fbdev""), but the use after free is much older, commit 366e39b4d2c5 ("drm/i915: Tear down fbdev if initialization fails") Fixes: 366e39b4d2c5 ("drm/i915: Tear down fbdev if initialization fails") Fixes: 934458c2c95d ("Revert "drm/i915: Fix races on fbdev"") Signed-off-by: Chris Wilson Cc: Lukas Wunner Cc: Joonas Lahtinen Cc: Daniel Vetter Cc: stable@vger.kernel.org Reviewed-by: Lukas Wunner Link: https://patchwork.freedesktop.org/patch/msgid/20171125194155.355-1-chris@chris-wilson.co.uk (cherry picked from commit ad88d7fc6c032ddfb32b8d496a070ab71de3a64f) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_fbdev.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index b8af35187d22..ea96682568e8 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -697,10 +697,8 @@ static void intel_fbdev_initial_config(void *data, async_cookie_t cookie) /* Due to peculiar init order wrt to hpd handling this is separate. */ if (drm_fb_helper_initial_config(&ifbdev->helper, - ifbdev->preferred_bpp)) { + ifbdev->preferred_bpp)) intel_fbdev_unregister(to_i915(ifbdev->helper.dev)); - intel_fbdev_fini(to_i915(ifbdev->helper.dev)); - } } void intel_fbdev_initial_config_async(struct drm_device *dev) @@ -800,7 +798,11 @@ void intel_fbdev_output_poll_changed(struct drm_device *dev) { struct intel_fbdev *ifbdev = to_i915(dev)->fbdev; - if (ifbdev) + if (!ifbdev) + return; + + intel_fbdev_sync(ifbdev); + if (ifbdev->vma) drm_fb_helper_hotplug_event(&ifbdev->helper); } From ac29fc66855b79c2960c63a4a66952d5b721d698 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 27 Nov 2017 16:10:27 +0100 Subject: [PATCH 129/333] drm/i915: fix intel_backlight_device_register declaration The alternative intel_backlight_device_register() definition apparently never got used, but I have now run into a case of i915 being compiled without CONFIG_BACKLIGHT_CLASS_DEVICE, resulting in a number of identical warnings: drivers/gpu/drm/i915/intel_drv.h:1739:12: error: 'intel_backlight_device_register' defined but not used [-Werror=unused-function] This marks the function as 'inline', which was surely the original intention here. Fixes: 1ebaa0b9c2d4 ("drm/i915: Move backlight registration to connector registration") Signed-off-by: Arnd Bergmann Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20171127151239.1813673-1-arnd@arndb.de (cherry picked from commit 2de2d0b063b08becb2c67a2c338c44e37bdcffee) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_drv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 7bc60c848940..6c7f8bca574e 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1736,7 +1736,7 @@ extern struct drm_display_mode *intel_find_panel_downclock( int intel_backlight_device_register(struct intel_connector *connector); void intel_backlight_device_unregister(struct intel_connector *connector); #else /* CONFIG_BACKLIGHT_CLASS_DEVICE */ -static int intel_backlight_device_register(struct intel_connector *connector) +static inline int intel_backlight_device_register(struct intel_connector *connector) { return 0; } From 679fd3ebabc23dec33d42525b19479f16f355e61 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Mon, 13 Nov 2017 14:58:31 +0800 Subject: [PATCH 130/333] drm/i915/gvt: Fix unsafe locking caused by spin_unlock_bh The caller of shadow_context_status_change may disable irqs. So it is not safe to use spin_unlock_bh in such context. Let's switch to irqsave version for safety. ------------[ cut here ]------------ WARNING: CPU: 2 PID: 4504 at kernel/softirq.c:161 __local_bh_enable_ip+0x46/0x60 [ 168.797710] Hardware name: Dell Inc. OptiPlex 7040/0Y7WYT, BIOS 1.2.8 01/26/2016 [ 168.797712] task: ffff8c693d22db80 task.stack: ffffb51b482bc000 [ 168.797718] RIP: 0010:__local_bh_enable_ip+0x46/0x60 [ 168.797721] RSP: 0018:ffffb51b482bfa10 EFLAGS: 00010046 [ 168.797724] RAX: 0000000000000046 RBX: ffff8c6900278000 RCX: 00000000ffffffff [ 168.797726] RDX: 0000000000000001 RSI: 0000000000000200 RDI: ffffffffc06a0330 [ 168.797728] RBP: ffffb51b482bfa10 R08: 0000000000000000 R09: ffff8c690027cb90 [ 168.797730] R10: ffffb51b482bfa40 R11: 00000004072f0001 R12: 0000000000000000 [ 168.797732] R13: 0000000000000000 R14: ffff8c690027ca9c R15: 0000000000000000 [ 168.797735] FS: 00007ff187c56700(0000) GS:ffff8c6959d00000(0000) knlGS:0000000000000000 [ 168.797738] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 168.797740] CR2: 0000562bc0c3991f CR3: 0000000430614006 CR4: 00000000003606e0 [ 168.797742] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 168.797744] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 168.797745] Call Trace: [ 168.797755] _raw_spin_unlock_bh+0x1e/0x20 [ 168.797826] shadow_context_status_change+0x120/0x1e0 [i915] [ 168.797831] notifier_call_chain+0x4a/0x70 [ 168.797834] atomic_notifier_call_chain+0x1a/0x20 [ 168.797896] execlists_cancel_port_requests+0x4f/0x80 [i915] [ 168.797956] reset_common_ring+0x30/0x100 [i915] [ 168.798007] i915_gem_reset_engine+0x114/0x330 [i915] [ 168.798060] ? i915_gem_retire_requests+0x75/0x180 [i915] [ 168.798111] i915_gem_reset+0x3e/0xb0 [i915] [ 168.798149] i915_reset+0x10b/0x1c0 [i915] [ 168.798187] i915_reset_device+0x209/0x220 [i915] [ 168.798225] ? gen8_gt_irq_ack+0x170/0x170 [i915] [ 168.798229] ? __queue_work+0x430/0x430 [ 168.798270] i915_handle_error+0x285/0x420 [i915] [ 168.798275] ? mntput+0x24/0x40 [ 168.798281] ? terminate_walk+0x8e/0xf0 [ 168.798328] i915_wedged_set+0x84/0xc0 [i915] [ 168.798333] simple_attr_write+0xab/0xc0 [ 168.798337] full_proxy_write+0x54/0x90 [ 168.798343] __vfs_write+0x37/0x170 [ 168.798349] ? common_file_perm+0x4c/0x100 [ 168.798355] ? apparmor_file_permission+0x1a/0x20 [ 168.798361] ? security_file_permission+0x3b/0xc0 [ 168.798365] vfs_write+0xb8/0x1b0 [ 168.798370] SyS_write+0x55/0xc0 [ 168.798376] entry_SYSCALL_64_fastpath+0x1e/0xa9 Fixes: 0e86cc9 ("drm/i915/gvt: implement per-vm mmio switching optimization") Signed-off-by: Changbin Du Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/scheduler.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index f6ded475bb2c..f031234290f6 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -140,9 +140,10 @@ static int shadow_context_status_change(struct notifier_block *nb, struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; enum intel_engine_id ring_id = req->engine->id; struct intel_vgpu_workload *workload; + unsigned long flags; if (!is_gvt_request(req)) { - spin_lock_bh(&scheduler->mmio_context_lock); + spin_lock_irqsave(&scheduler->mmio_context_lock, flags); if (action == INTEL_CONTEXT_SCHEDULE_IN && scheduler->engine_owner[ring_id]) { /* Switch ring from vGPU to host. */ @@ -150,7 +151,7 @@ static int shadow_context_status_change(struct notifier_block *nb, NULL, ring_id); scheduler->engine_owner[ring_id] = NULL; } - spin_unlock_bh(&scheduler->mmio_context_lock); + spin_unlock_irqrestore(&scheduler->mmio_context_lock, flags); return NOTIFY_OK; } @@ -161,7 +162,7 @@ static int shadow_context_status_change(struct notifier_block *nb, switch (action) { case INTEL_CONTEXT_SCHEDULE_IN: - spin_lock_bh(&scheduler->mmio_context_lock); + spin_lock_irqsave(&scheduler->mmio_context_lock, flags); if (workload->vgpu != scheduler->engine_owner[ring_id]) { /* Switch ring from host to vGPU or vGPU to vGPU. */ intel_gvt_switch_mmio(scheduler->engine_owner[ring_id], @@ -170,7 +171,7 @@ static int shadow_context_status_change(struct notifier_block *nb, } else gvt_dbg_sched("skip ring %d mmio switch for vgpu%d\n", ring_id, workload->vgpu->id); - spin_unlock_bh(&scheduler->mmio_context_lock); + spin_unlock_irqrestore(&scheduler->mmio_context_lock, flags); atomic_set(&workload->shadow_ctx_active, 1); break; case INTEL_CONTEXT_SCHEDULE_OUT: From bf3a26b3cba59e2eee91cec3de6fdb4e8e670295 Mon Sep 17 00:00:00 2001 From: Weinan Li Date: Tue, 21 Nov 2017 10:54:41 +0800 Subject: [PATCH 131/333] drm/i915/gvt: remove skl_misc_ctl_write handler With different settings of compressed data hash mode between VMs and host may cause gpu issues. Commit: 1999f108c ("drm/i915/gvt: Disable compression workaround for Gen9") disable compression workaround of guest in gvt host to align with host. Commit: 93564044f ("drm/i915: Switch over to the LLC/eLLC hotspot avoidance hash mode for CCS") add compression workaround, then we can remove the skl_misc_ctl_write hanlder. Better solution should be always keeping same settings as host, and bypass the write request from VMs, but it need to fetch data from host's "Context". Cc: Zhi Wang Signed-off-by: Weinan Li Signed-off-by: Xiong Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 45 ++++------------------------- 1 file changed, 5 insertions(+), 40 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index a5bed2e71b92..44cd5ff5e97d 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1381,40 +1381,6 @@ static int skl_power_well_ctl_write(struct intel_vgpu *vgpu, return intel_vgpu_default_mmio_write(vgpu, offset, &v, bytes); } -static int skl_misc_ctl_write(struct intel_vgpu *vgpu, unsigned int offset, - void *p_data, unsigned int bytes) -{ - struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; - u32 v = *(u32 *)p_data; - - if (!IS_SKYLAKE(dev_priv) && !IS_KABYLAKE(dev_priv)) - return intel_vgpu_default_mmio_write(vgpu, - offset, p_data, bytes); - - switch (offset) { - case 0x4ddc: - /* bypass WaCompressedResourceSamplerPbeMediaNewHashMode */ - vgpu_vreg(vgpu, offset) = v & ~(1 << 31); - break; - case 0x42080: - /* bypass WaCompressedResourceDisplayNewHashMode */ - vgpu_vreg(vgpu, offset) = v & ~(1 << 15); - break; - case 0xe194: - /* bypass WaCompressedResourceSamplerPbeMediaNewHashMode */ - vgpu_vreg(vgpu, offset) = v & ~(1 << 8); - break; - case 0x7014: - /* bypass WaCompressedResourceSamplerPbeMediaNewHashMode */ - vgpu_vreg(vgpu, offset) = v & ~(1 << 13); - break; - default: - return -EINVAL; - } - - return 0; -} - static int skl_lcpll_write(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { @@ -1671,8 +1637,8 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_DFH(GAM_ECOCHK, D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(GEN7_COMMON_SLICE_CHICKEN1, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(COMMON_SLICE_CHICKEN2, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, - skl_misc_ctl_write); + MMIO_DFH(COMMON_SLICE_CHICKEN2, D_ALL, F_MODE_MASK | F_CMD_ACCESS, + NULL, NULL); MMIO_DFH(0x9030, D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(0x20a0, D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(0x2420, D_ALL, F_CMD_ACCESS, NULL, NULL); @@ -2564,8 +2530,7 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) MMIO_D(0x6e570, D_BDW_PLUS); MMIO_D(0x65f10, D_BDW_PLUS); - MMIO_DFH(0xe194, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, - skl_misc_ctl_write); + MMIO_DFH(0xe194, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_DFH(0xe188, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_DFH(HALF_SLICE_CHICKEN2, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_DFH(0x2580, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); @@ -2615,8 +2580,8 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) MMIO_D(GEN9_MEDIA_PG_IDLE_HYSTERESIS, D_SKL_PLUS); MMIO_D(GEN9_RENDER_PG_IDLE_HYSTERESIS, D_SKL_PLUS); MMIO_DFH(GEN9_GAMT_ECO_REG_RW_IA, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_DH(0x4ddc, D_SKL_PLUS, NULL, skl_misc_ctl_write); - MMIO_DH(0x42080, D_SKL_PLUS, NULL, skl_misc_ctl_write); + MMIO_DH(0x4ddc, D_SKL_PLUS, NULL, NULL); + MMIO_DH(0x42080, D_SKL_PLUS, NULL, NULL); MMIO_D(0x45504, D_SKL_PLUS); MMIO_D(0x45520, D_SKL_PLUS); MMIO_D(0x46000, D_SKL_PLUS); From c3c80f0736bb2767fab983d1ae53252b2ddd405d Mon Sep 17 00:00:00 2001 From: fred gao Date: Tue, 14 Nov 2017 17:09:35 +0800 Subject: [PATCH 132/333] drm/i915/gvt: Move request alloc to dispatch_workload path only Previously the performance is improved through the workload auditing and shadowing ahead of vGPU scheduling, however, there is the case that more requests are allocated in submit_context before the previous request is added, the timeline will hold its seqno which is later. This patch is to move the request alloc to dispatch_workload function, where is the same place as request is added. It will fix the issue of kernel BUG for (timeline->seqno != request->fence.seqno) check when add_request. Fixes: 89ea20b930cb ("drm/i915/gvt: Factor out scan and shadow from workload dispatch") Signed-off-by: Chuanxiao Dong Signed-off-by: fred gao Signed-off-by: Zhenyu Wang (cherry picked from commit f2880e04f3a5419366926182fc97a3c2e4fd8f2a) --- drivers/gpu/drm/i915/gvt/execlist.c | 6 ++++++ drivers/gpu/drm/i915/gvt/scheduler.c | 24 ++++++++++++++++++++---- drivers/gpu/drm/i915/gvt/scheduler.h | 3 +++ 3 files changed, 29 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c index 4427be18e4a9..940cdaaa3f24 100644 --- a/drivers/gpu/drm/i915/gvt/execlist.c +++ b/drivers/gpu/drm/i915/gvt/execlist.c @@ -496,6 +496,12 @@ static int prepare_execlist_workload(struct intel_vgpu_workload *workload) goto err_unpin_mm; } + ret = intel_gvt_generate_request(workload); + if (ret) { + gvt_vgpu_err("fail to generate request\n"); + goto err_unpin_mm; + } + ret = prepare_shadow_batch_buffer(workload); if (ret) { gvt_vgpu_err("fail to prepare_shadow_batch_buffer\n"); diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index f031234290f6..3ac1dc97a7a0 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -254,7 +254,6 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload) struct i915_gem_context *shadow_ctx = workload->vgpu->shadow_ctx; struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv; struct intel_engine_cs *engine = dev_priv->engine[ring_id]; - struct drm_i915_gem_request *rq; struct intel_vgpu *vgpu = workload->vgpu; struct intel_ring *ring; int ret; @@ -300,6 +299,26 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload) ret = populate_shadow_context(workload); if (ret) goto err_unpin; + workload->shadowed = true; + return 0; + +err_unpin: + engine->context_unpin(engine, shadow_ctx); +err_shadow: + release_shadow_wa_ctx(&workload->wa_ctx); +err_scan: + return ret; +} + +int intel_gvt_generate_request(struct intel_vgpu_workload *workload) +{ + int ring_id = workload->ring_id; + struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv; + struct intel_engine_cs *engine = dev_priv->engine[ring_id]; + struct drm_i915_gem_request *rq; + struct intel_vgpu *vgpu = workload->vgpu; + struct i915_gem_context *shadow_ctx = vgpu->shadow_ctx; + int ret; rq = i915_gem_request_alloc(dev_priv->engine[ring_id], shadow_ctx); if (IS_ERR(rq)) { @@ -314,14 +333,11 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload) ret = copy_workload_to_ring_buffer(workload); if (ret) goto err_unpin; - workload->shadowed = true; return 0; err_unpin: engine->context_unpin(engine, shadow_ctx); -err_shadow: release_shadow_wa_ctx(&workload->wa_ctx); -err_scan: return ret; } diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h index 2d694f6c0907..b9f872204d7e 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.h +++ b/drivers/gpu/drm/i915/gvt/scheduler.h @@ -142,4 +142,7 @@ int intel_vgpu_init_gvt_context(struct intel_vgpu *vgpu); void intel_vgpu_clean_gvt_context(struct intel_vgpu *vgpu); void release_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx); + +int intel_gvt_generate_request(struct intel_vgpu_workload *workload); + #endif From 7e60590208942de856fd43956ccebd19e71d686f Mon Sep 17 00:00:00 2001 From: Xiaolin Zhang Date: Thu, 16 Nov 2017 16:54:23 +0800 Subject: [PATCH 133/333] drm/i915/gvt: enabled pipe A default on creating vgpu when i915 driver unloading, it will shutdown all CRTCs and it will introudce kernel panic when conducting igt drv_module_reload test case under guest environment (bug reported by XENGT-468) as below: BUG: unable to handle kernel NULL pointer dereference at 0000000000000070 IP: intel_edp_backlight_off+0xe/0x7c [i915] RIP: 0010:intel_edp_backlight_off+0xe/0x7c [i915] Call Trace: intel_disable_ddi+0xb3/0xbc [i915] intel_modeset_setup_hw_state+0x654/0xb4c [i915] intel_modeset_init+0x9f1/0xe69 [i915] ? intel_i2c_reset+0x3d/0x40 [i915] ? intel_setup_gmbus+0xba/0x249 [i915] i915_driver_load+0xae5/0xcc0 [i915] i915_pci_probe+0x3a/0x3c [i915] local_pci_probe+0x38/0x7b pci_device_probe+0xec/0x12b driver_probe_device+0x134/0x294 __driver_attach+0x6a/0x8c ? driver_probe_device+0x294/0x294 bus_for_each_dev+0x68/0x80 driver_attach+0x19/0x1b bus_add_driver+0xea/0x1d3 ? 0xffffffffa03cd000 driver_register+0x85/0xc1 ? 0xffffffffa03cd000 __pci_register_driver+0x55/0x57 i915_init+0x57/0x5a [i915] do_one_initcall+0x8a/0x12e ? __vunmap+0x8d/0x93 ? kmem_cache_alloc_trace+0x96/0x11c do_init_module+0x5a/0x1e1 in this case, active connector detected but no active pipe available, so it will hang to disable connector. to fix, on vgpu creating, to report active pipe available for guest. Signed-off-by: Xiaolin Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/display.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c index 3c318439a659..355120865efd 100644 --- a/drivers/gpu/drm/i915/gvt/display.c +++ b/drivers/gpu/drm/i915/gvt/display.c @@ -282,6 +282,7 @@ static void clean_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num) static int setup_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num, int type, unsigned int resolution) { + struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; struct intel_vgpu_port *port = intel_vgpu_port(vgpu, port_num); if (WARN_ON(resolution >= GVT_EDID_NUM)) @@ -307,6 +308,7 @@ static int setup_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num, port->type = type; emulate_monitor_status_change(vgpu); + vgpu_vreg(vgpu, PIPECONF(PIPE_A)) |= PIPECONF_ENABLE; return 0; } From b721b65af4eb46df6a1d9e34b14003225e403565 Mon Sep 17 00:00:00 2001 From: Xiong Zhang Date: Tue, 28 Nov 2017 07:29:54 +0800 Subject: [PATCH 134/333] drm/i915/gvt: Correct ADDR_4K/2M/1G_MASK definition For ADDR_4K_MASK, bit[45..12] should be 1, all other bits should be 0. The current definition wrongly set bit[46] as 1 also. This path fixes this. v2: Add commit message, fixes and cc stable.(Zhenyu) Fixes: 2707e4446688("drm/i915/gvt: vGPU graphics memory virtualization") Signed-off-by: Xiong Zhang Cc: stable@vger.kernel.org Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gtt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 2801d70579d8..8e331142badb 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -311,9 +311,9 @@ static inline int gtt_set_entry64(void *pt, #define GTT_HAW 46 -#define ADDR_1G_MASK (((1UL << (GTT_HAW - 30 + 1)) - 1) << 30) -#define ADDR_2M_MASK (((1UL << (GTT_HAW - 21 + 1)) - 1) << 21) -#define ADDR_4K_MASK (((1UL << (GTT_HAW - 12 + 1)) - 1) << 12) +#define ADDR_1G_MASK (((1UL << (GTT_HAW - 30)) - 1) << 30) +#define ADDR_2M_MASK (((1UL << (GTT_HAW - 21)) - 1) << 21) +#define ADDR_4K_MASK (((1UL << (GTT_HAW - 12)) - 1) << 12) static unsigned long gen8_gtt_get_pfn(struct intel_gvt_gtt_entry *e) { From 1a6152d36dee08da2be2a3030dceb45ef680460a Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 28 Nov 2017 23:01:44 +0800 Subject: [PATCH 135/333] quota: propagate error from __dquot_initialize In commit 6184fc0b8dd7 ("quota: Propagate error from ->acquire_dquot()"), we have propagated error from __dquot_initialize to caller, but we forgot to handle such error in add_dquot_ref(), so, currently, during quota accounting information initialization flow, if we failed for some of inodes, we just ignore such error, and do account for others, which is not a good implementation. In this patch, we choose to let user be aware of such error, so after turning on quota successfully, we can make sure all inodes disk usage can be accounted, which will be more reasonable. Suggested-by: Jan Kara Signed-off-by: Chao Yu Signed-off-by: Jan Kara --- fs/quota/dquot.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 39f1b0b0c76f..d38684ff2ebb 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -941,12 +941,13 @@ static int dqinit_needed(struct inode *inode, int type) } /* This routine is guarded by s_umount semaphore */ -static void add_dquot_ref(struct super_block *sb, int type) +static int add_dquot_ref(struct super_block *sb, int type) { struct inode *inode, *old_inode = NULL; #ifdef CONFIG_QUOTA_DEBUG int reserved = 0; #endif + int err = 0; spin_lock(&sb->s_inode_list_lock); list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { @@ -966,7 +967,11 @@ static void add_dquot_ref(struct super_block *sb, int type) reserved = 1; #endif iput(old_inode); - __dquot_initialize(inode, type); + err = __dquot_initialize(inode, type); + if (err) { + iput(inode); + goto out; + } /* * We hold a reference to 'inode' so it couldn't have been @@ -981,7 +986,7 @@ static void add_dquot_ref(struct super_block *sb, int type) } spin_unlock(&sb->s_inode_list_lock); iput(old_inode); - +out: #ifdef CONFIG_QUOTA_DEBUG if (reserved) { quota_error(sb, "Writes happened before quota was turned on " @@ -989,6 +994,7 @@ static void add_dquot_ref(struct super_block *sb, int type) "Please run quotacheck(8)"); } #endif + return err; } /* @@ -2379,10 +2385,11 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id, dqopt->flags |= dquot_state_flag(flags, type); spin_unlock(&dq_state_lock); - add_dquot_ref(sb, type); - - return 0; + error = add_dquot_ref(sb, type); + if (error) + dquot_disable(sb, type, flags); + return error; out_file_init: dqopt->files[type] = NULL; iput(inode); From 9d0ca444d0b317d31acf6f8fc18ac6f478518924 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 28 Nov 2017 17:20:53 +0100 Subject: [PATCH 136/333] s390/gs: add compat regset for the guarded storage broadcast control block git commit e525f8a6e696210d15f8b8277d4da12fc4add299 "s390/gs: add regset for the guarded storage broadcast control block" added the missing regset to the s390_regsets array but failed to add it to the s390_compat_regsets array. Fixes: e525f8a6e696 ("add compat regset for the guarded storage broadcast control block") Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/ptrace.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 26c0523c1488..cd3df5514552 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -1650,6 +1650,14 @@ static const struct user_regset s390_compat_regsets[] = { .get = s390_gs_cb_get, .set = s390_gs_cb_set, }, + { + .core_note_type = NT_S390_GS_BC, + .n = sizeof(struct gs_cb) / sizeof(__u64), + .size = sizeof(__u64), + .align = sizeof(__u64), + .get = s390_gs_bc_get, + .set = s390_gs_bc_set, + }, { .core_note_type = NT_S390_RI_CB, .n = sizeof(struct runtime_instr_cb) / sizeof(__u64), From eb1bd249ba016284ed762d87c1989dd822500773 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Tue, 28 Nov 2017 18:28:44 +0200 Subject: [PATCH 137/333] nvme-rdma: fix memory leak during queue allocation In case nvme_rdma_wait_for_cm timeout expires before we get an established or rejected event (rdma_connect succeeded) from rdma_cm, we end up with leaking the ib transport resources for dedicated queue. This scenario can easily reproduced using traffic test during port toggling. Also, in order to protect from parallel ib queue destruction, that may be invoked from different context's, introduce new flag that stands for transport readiness. While we're here, protect also against a situation that we can receive rdma_cm events during ib queue destruction. Signed-off-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 02ef0771f6b9..37af56596be6 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -77,6 +77,7 @@ struct nvme_rdma_request { enum nvme_rdma_queue_flags { NVME_RDMA_Q_ALLOCATED = 0, NVME_RDMA_Q_LIVE = 1, + NVME_RDMA_Q_TR_READY = 2, }; struct nvme_rdma_queue { @@ -390,12 +391,23 @@ out_err: static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue) { - struct nvme_rdma_device *dev = queue->device; - struct ib_device *ibdev = dev->dev; + struct nvme_rdma_device *dev; + struct ib_device *ibdev; + + if (!test_and_clear_bit(NVME_RDMA_Q_TR_READY, &queue->flags)) + return; + + dev = queue->device; + ibdev = dev->dev; ib_mr_pool_destroy(queue->qp, &queue->qp->rdma_mrs); - rdma_destroy_qp(queue->cm_id); + /* + * The cm_id object might have been destroyed during RDMA connection + * establishment error flow to avoid getting other cma events, thus + * the destruction of the QP shouldn't use rdma_cm API. + */ + ib_destroy_qp(queue->qp); ib_free_cq(queue->ib_cq); nvme_rdma_free_ring(ibdev, queue->rsp_ring, queue->queue_size, @@ -463,6 +475,8 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue) goto out_destroy_ring; } + set_bit(NVME_RDMA_Q_TR_READY, &queue->flags); + return 0; out_destroy_ring: @@ -529,6 +543,7 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl, out_destroy_cm_id: rdma_destroy_id(queue->cm_id); + nvme_rdma_destroy_queue_ib(queue); return ret; } From 7e5dd57ef3081ff6c03908d786ed5087f6fbb7ae Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Sat, 25 Nov 2017 03:03:00 +0900 Subject: [PATCH 138/333] nvme-pci: fix NULL pointer dereference in nvme_free_host_mem() Following condition which will cause NULL pointer dereference will occur in nvme_free_host_mem() when it tries to remove pci device via nvme_remove() especially after a failure of host memory allocation for HMB. "(host_mem_descs == NULL) && (nr_host_mem_descs != 0)" It's because __nr_host_mem_descs__ is not cleared to 0 unlike __host_mem_descs__ is so. Signed-off-by: Minwoo Im Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 617374762b7c..f5800c3c9082 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1759,6 +1759,7 @@ static void nvme_free_host_mem(struct nvme_dev *dev) dev->nr_host_mem_descs * sizeof(*dev->host_mem_descs), dev->host_mem_descs, dev->host_mem_descs_dma); dev->host_mem_descs = NULL; + dev->nr_host_mem_descs = 0; } static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred, From d210a9874b8f6166579408131cb74495caff1958 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Thu, 23 Nov 2017 17:13:40 +0100 Subject: [PATCH 139/333] xfs: fortify xfs_alloc_buftarg error handling percpu_counter_init failure path doesn't clean up &btp->bt_lru list. Call list_lru_destroy in that error path. Similarly register_shrinker error path is not handled. While it is unlikely to trigger these error path, it is not impossible especially the later might fail with large NUMAs. Let's handle the failure to make the code more robust. Noticed-by: Tetsuo Handa Signed-off-by: Michal Hocko Acked-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_buf.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 4db6e8d780f6..4c6e86d861fd 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1815,22 +1815,27 @@ xfs_alloc_buftarg( btp->bt_daxdev = dax_dev; if (xfs_setsize_buftarg_early(btp, bdev)) - goto error; + goto error_free; if (list_lru_init(&btp->bt_lru)) - goto error; + goto error_free; if (percpu_counter_init(&btp->bt_io_count, 0, GFP_KERNEL)) - goto error; + goto error_lru; btp->bt_shrinker.count_objects = xfs_buftarg_shrink_count; btp->bt_shrinker.scan_objects = xfs_buftarg_shrink_scan; btp->bt_shrinker.seeks = DEFAULT_SEEKS; btp->bt_shrinker.flags = SHRINKER_NUMA_AWARE; - register_shrinker(&btp->bt_shrinker); + if (register_shrinker(&btp->bt_shrinker)) + goto error_pcpu; return btp; -error: +error_pcpu: + percpu_counter_destroy(&btp->bt_io_count); +error_lru: + list_lru_destroy(&btp->bt_lru); +error_free: kmem_free(btp); return NULL; } From d41c6172bd4031979eab722c265a2e5764383c3c Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Mon, 27 Nov 2017 18:23:32 -0800 Subject: [PATCH 140/333] xfs: fix leaks on corruption errors in xfs_bmap.c Use _GOTO instead of _RETURN so we can free the allocated cursor on error. Fixes: bf80628 ("xfs: remove xfs_bmse_shift_one") Fixes-coverity-id: 1423813, 1423676 Signed-off-by: Eric Sandeen Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_bmap.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 08df809e2315..1210f684d3c2 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -5662,7 +5662,8 @@ xfs_bmap_collapse_extents( *done = true; goto del_cursor; } - XFS_WANT_CORRUPTED_RETURN(mp, !isnullstartblock(got.br_startblock)); + XFS_WANT_CORRUPTED_GOTO(mp, !isnullstartblock(got.br_startblock), + del_cursor); new_startoff = got.br_startoff - offset_shift_fsb; if (xfs_iext_peek_prev_extent(ifp, &icur, &prev)) { @@ -5767,7 +5768,8 @@ xfs_bmap_insert_extents( goto del_cursor; } } - XFS_WANT_CORRUPTED_RETURN(mp, !isnullstartblock(got.br_startblock)); + XFS_WANT_CORRUPTED_GOTO(mp, !isnullstartblock(got.br_startblock), + del_cursor); if (stop_fsb >= got.br_startoff + got.br_blockcount) { error = -EIO; From eda6bc27ccc852d34393739009486932f3ba70ae Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Mon, 27 Nov 2017 18:23:32 -0800 Subject: [PATCH 141/333] xfs: fix uninitialized variable in xfs_scrub_quota On the first pass through the while(1) loop, we get to xfs_scrub_should_terminate() which can test the uninitialized error variable. Fixes-coverity-id: 1423737 Fixes: c2fc338c ("xfs: scrub quota information") Signed-off-by: Eric Sandeen Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/scrub/quota.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/scrub/quota.c b/fs/xfs/scrub/quota.c index 8e58ba842946..613def9692a1 100644 --- a/fs/xfs/scrub/quota.c +++ b/fs/xfs/scrub/quota.c @@ -207,7 +207,7 @@ xfs_scrub_quota( xfs_dqid_t id = 0; uint dqtype; int nimaps; - int error; + int error = 0; if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) return -ENOENT; From 712d361d59efa6349a9538f4fd9a49073f0e8127 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Mon, 27 Nov 2017 18:23:33 -0800 Subject: [PATCH 142/333] xfs: calculate correct offset in xfs_scrub_quota_item It's only used for tracepoints so it's relatively harmless, but the offset is calculated incorrectly in xfs_scrub_quota_item. qi_dqperchunk is the nr. of dquots per "chunk" which we have conveniently *cough* defined to always be 1 FSB. Therefore block_offset * qi_dqperchunk == first id in that chunk, and so offset = id / qi_dqperchunk id * dqperchunk is ... meaningless. Fixes-coverity-id: 1423965 Fixes: c2fc338c ("xfs: scrub quota information") Signed-off-by: Eric Sandeen Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/scrub/quota.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/scrub/quota.c b/fs/xfs/scrub/quota.c index 613def9692a1..3d9037eceaf1 100644 --- a/fs/xfs/scrub/quota.c +++ b/fs/xfs/scrub/quota.c @@ -107,7 +107,7 @@ xfs_scrub_quota_item( unsigned long long rcount; xfs_ino_t fs_icount; - offset = id * qi->qi_dqperchunk; + offset = id / qi->qi_dqperchunk; /* * We fed $id and DQNEXT into the xfs_qm_dqget call, which means From f81a348728ec5ac43f3bbcf81c97d52baba253f7 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 21 Nov 2017 11:59:13 +0000 Subject: [PATCH 143/333] arm64: mm: cleanup stale AIVIVT references Since commit: 155433cb365ee466 ("arm64: cache: Remove support for ASID-tagged VIVT I-caches") ... the kernel no longer cares about AIVIVT I-caches, as these were removed from the architecture. This patch removes the stale references to such I-caches. The comment in flush_context() is also updated to clarify when and where the TLB invalidation occurs. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/include/asm/cacheflush.h | 2 +- arch/arm64/mm/context.c | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 76d1cc85d5b1..955130762a3c 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -38,7 +38,7 @@ * * See Documentation/cachetlb.txt for more information. Please note that * the implementation assumes non-aliasing VIPT D-cache and (aliasing) - * VIPT or ASID-tagged VIVT I-cache. + * VIPT I-cache. * * flush_cache_mm(mm) * diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index ab9f5f0fb2c7..28a45a19aae7 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -117,7 +117,10 @@ static void flush_context(unsigned int cpu) per_cpu(reserved_asids, i) = asid; } - /* Queue a TLB invalidate and flush the I-cache if necessary. */ + /* + * Queue a TLB invalidation for each CPU to perform on next + * context-switch + */ cpumask_setall(&tlb_flush_pending); } From 250dcd11466e06df64b92520e2c56bdae453581b Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Mon, 27 Nov 2017 11:28:50 +0100 Subject: [PATCH 144/333] mmc: sdhci: Avoid swiotlb buffer being full The commit de3ee99b097d ("mmc: Delete bounce buffer handling") deletes the bounce buffer handling, but also causes the max_req_size for sdhci to be increased, in case when max_segs == 1. This causes errors for sdhci-pci Ricoh variant, about the swiotlb buffer to become full. Fix the issue, by taking IO_TLB_SEGSIZE and IO_TLB_SHIFT into account when deciding the max_req_size for sdhci. Reported-by: Jiri Slaby Fixes: de3ee99b097d ("mmc: Delete bounce buffer handling") Cc: # v4.14+ Signed-off-by: Ulf Hansson Tested-by: Jiri Slaby Acked-by: Adrian Hunter --- drivers/mmc/host/sdhci.c | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 2f14334e42df..e9290a3439d5 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -3650,17 +3651,6 @@ int sdhci_setup_host(struct sdhci_host *host) spin_lock_init(&host->lock); - /* - * Maximum number of segments. Depends on if the hardware - * can do scatter/gather or not. - */ - if (host->flags & SDHCI_USE_ADMA) - mmc->max_segs = SDHCI_MAX_SEGS; - else if (host->flags & SDHCI_USE_SDMA) - mmc->max_segs = 1; - else /* PIO */ - mmc->max_segs = SDHCI_MAX_SEGS; - /* * Maximum number of sectors in one transfer. Limited by SDMA boundary * size (512KiB). Note some tuning modes impose a 4MiB limit, but this @@ -3668,6 +3658,24 @@ int sdhci_setup_host(struct sdhci_host *host) */ mmc->max_req_size = 524288; + /* + * Maximum number of segments. Depends on if the hardware + * can do scatter/gather or not. + */ + if (host->flags & SDHCI_USE_ADMA) { + mmc->max_segs = SDHCI_MAX_SEGS; + } else if (host->flags & SDHCI_USE_SDMA) { + mmc->max_segs = 1; + if (swiotlb_max_segment()) { + unsigned int max_req_size = (1 << IO_TLB_SHIFT) * + IO_TLB_SEGSIZE; + mmc->max_req_size = min(mmc->max_req_size, + max_req_size); + } + } else { /* PIO */ + mmc->max_segs = SDHCI_MAX_SEGS; + } + /* * Maximum segment size. Could be one segment with the maximum number * of bytes. When doing hardware scatter/gather, each entry cannot From 4650d02ad2d9b2c1c7aa36055166db6aee68f72e Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Tue, 14 Nov 2017 11:35:37 -0800 Subject: [PATCH 145/333] RISC-V: Remove unused arguments from ATOMIC_OP Our atomics are generated from a complicated series of preprocessor macros, each of which is slightly different from the last. When writing the macros I'd accidentally left some unused arguments floating around. This patch removes the unused macro arguments. Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/atomic.h | 94 ++++++++++++++++----------------- 1 file changed, 47 insertions(+), 47 deletions(-) diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h index e2e37c57cbeb..40c73dd59c15 100644 --- a/arch/riscv/include/asm/atomic.h +++ b/arch/riscv/include/asm/atomic.h @@ -50,30 +50,30 @@ static __always_inline void atomic64_set(atomic64_t *v, long i) * have the AQ or RL bits set. These don't return anything, so there's only * one version to worry about. */ -#define ATOMIC_OP(op, asm_op, c_op, I, asm_type, c_type, prefix) \ -static __always_inline void atomic##prefix##_##op(c_type i, atomic##prefix##_t *v) \ -{ \ - __asm__ __volatile__ ( \ - "amo" #asm_op "." #asm_type " zero, %1, %0" \ - : "+A" (v->counter) \ - : "r" (I) \ - : "memory"); \ +#define ATOMIC_OP(op, asm_op, I, asm_type, c_type, prefix) \ +static __always_inline void atomic##prefix##_##op(c_type i, atomic##prefix##_t *v) \ +{ \ + __asm__ __volatile__ ( \ + "amo" #asm_op "." #asm_type " zero, %1, %0" \ + : "+A" (v->counter) \ + : "r" (I) \ + : "memory"); \ } #ifdef CONFIG_GENERIC_ATOMIC64 -#define ATOMIC_OPS(op, asm_op, c_op, I) \ - ATOMIC_OP (op, asm_op, c_op, I, w, int, ) +#define ATOMIC_OPS(op, asm_op, I) \ + ATOMIC_OP (op, asm_op, I, w, int, ) #else -#define ATOMIC_OPS(op, asm_op, c_op, I) \ - ATOMIC_OP (op, asm_op, c_op, I, w, int, ) \ - ATOMIC_OP (op, asm_op, c_op, I, d, long, 64) +#define ATOMIC_OPS(op, asm_op, I) \ + ATOMIC_OP (op, asm_op, I, w, int, ) \ + ATOMIC_OP (op, asm_op, I, d, long, 64) #endif -ATOMIC_OPS(add, add, +, i) -ATOMIC_OPS(sub, add, +, -i) -ATOMIC_OPS(and, and, &, i) -ATOMIC_OPS( or, or, |, i) -ATOMIC_OPS(xor, xor, ^, i) +ATOMIC_OPS(add, add, i) +ATOMIC_OPS(sub, add, -i) +ATOMIC_OPS(and, and, i) +ATOMIC_OPS( or, or, i) +ATOMIC_OPS(xor, xor, i) #undef ATOMIC_OP #undef ATOMIC_OPS @@ -83,7 +83,7 @@ ATOMIC_OPS(xor, xor, ^, i) * There's two flavors of these: the arithmatic ops have both fetch and return * versions, while the logical ops only have fetch versions. */ -#define ATOMIC_FETCH_OP(op, asm_op, c_op, I, asm_or, c_or, asm_type, c_type, prefix) \ +#define ATOMIC_FETCH_OP(op, asm_op, I, asm_or, c_or, asm_type, c_type, prefix) \ static __always_inline c_type atomic##prefix##_fetch_##op##c_or(c_type i, atomic##prefix##_t *v) \ { \ register c_type ret; \ @@ -103,13 +103,13 @@ static __always_inline c_type atomic##prefix##_##op##_return##c_or(c_type i, ato #ifdef CONFIG_GENERIC_ATOMIC64 #define ATOMIC_OPS(op, asm_op, c_op, I, asm_or, c_or) \ - ATOMIC_FETCH_OP (op, asm_op, c_op, I, asm_or, c_or, w, int, ) \ + ATOMIC_FETCH_OP (op, asm_op, I, asm_or, c_or, w, int, ) \ ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_or, c_or, w, int, ) #else #define ATOMIC_OPS(op, asm_op, c_op, I, asm_or, c_or) \ - ATOMIC_FETCH_OP (op, asm_op, c_op, I, asm_or, c_or, w, int, ) \ + ATOMIC_FETCH_OP (op, asm_op, I, asm_or, c_or, w, int, ) \ ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_or, c_or, w, int, ) \ - ATOMIC_FETCH_OP (op, asm_op, c_op, I, asm_or, c_or, d, long, 64) \ + ATOMIC_FETCH_OP (op, asm_op, I, asm_or, c_or, d, long, 64) \ ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_or, c_or, d, long, 64) #endif @@ -126,28 +126,28 @@ ATOMIC_OPS(sub, add, +, -i, .aqrl, ) #undef ATOMIC_OPS #ifdef CONFIG_GENERIC_ATOMIC64 -#define ATOMIC_OPS(op, asm_op, c_op, I, asm_or, c_or) \ - ATOMIC_FETCH_OP(op, asm_op, c_op, I, asm_or, c_or, w, int, ) +#define ATOMIC_OPS(op, asm_op, I, asm_or, c_or) \ + ATOMIC_FETCH_OP(op, asm_op, I, asm_or, c_or, w, int, ) #else -#define ATOMIC_OPS(op, asm_op, c_op, I, asm_or, c_or) \ - ATOMIC_FETCH_OP(op, asm_op, c_op, I, asm_or, c_or, w, int, ) \ - ATOMIC_FETCH_OP(op, asm_op, c_op, I, asm_or, c_or, d, long, 64) +#define ATOMIC_OPS(op, asm_op, I, asm_or, c_or) \ + ATOMIC_FETCH_OP(op, asm_op, I, asm_or, c_or, w, int, ) \ + ATOMIC_FETCH_OP(op, asm_op, I, asm_or, c_or, d, long, 64) #endif -ATOMIC_OPS(and, and, &, i, , _relaxed) -ATOMIC_OPS(and, and, &, i, .aq , _acquire) -ATOMIC_OPS(and, and, &, i, .rl , _release) -ATOMIC_OPS(and, and, &, i, .aqrl, ) +ATOMIC_OPS(and, and, i, , _relaxed) +ATOMIC_OPS(and, and, i, .aq , _acquire) +ATOMIC_OPS(and, and, i, .rl , _release) +ATOMIC_OPS(and, and, i, .aqrl, ) -ATOMIC_OPS( or, or, |, i, , _relaxed) -ATOMIC_OPS( or, or, |, i, .aq , _acquire) -ATOMIC_OPS( or, or, |, i, .rl , _release) -ATOMIC_OPS( or, or, |, i, .aqrl, ) +ATOMIC_OPS( or, or, i, , _relaxed) +ATOMIC_OPS( or, or, i, .aq , _acquire) +ATOMIC_OPS( or, or, i, .rl , _release) +ATOMIC_OPS( or, or, i, .aqrl, ) -ATOMIC_OPS(xor, xor, ^, i, , _relaxed) -ATOMIC_OPS(xor, xor, ^, i, .aq , _acquire) -ATOMIC_OPS(xor, xor, ^, i, .rl , _release) -ATOMIC_OPS(xor, xor, ^, i, .aqrl, ) +ATOMIC_OPS(xor, xor, i, , _relaxed) +ATOMIC_OPS(xor, xor, i, .aq , _acquire) +ATOMIC_OPS(xor, xor, i, .rl , _release) +ATOMIC_OPS(xor, xor, i, .aqrl, ) #undef ATOMIC_OPS @@ -182,13 +182,13 @@ ATOMIC_OPS(add_negative, add, <, 0) #undef ATOMIC_OP #undef ATOMIC_OPS -#define ATOMIC_OP(op, func_op, c_op, I, c_type, prefix) \ +#define ATOMIC_OP(op, func_op, I, c_type, prefix) \ static __always_inline void atomic##prefix##_##op(atomic##prefix##_t *v) \ { \ atomic##prefix##_##func_op(I, v); \ } -#define ATOMIC_FETCH_OP(op, func_op, c_op, I, c_type, prefix) \ +#define ATOMIC_FETCH_OP(op, func_op, I, c_type, prefix) \ static __always_inline c_type atomic##prefix##_fetch_##op(atomic##prefix##_t *v) \ { \ return atomic##prefix##_fetch_##func_op(I, v); \ @@ -202,16 +202,16 @@ static __always_inline c_type atomic##prefix##_##op##_return(atomic##prefix##_t #ifdef CONFIG_GENERIC_ATOMIC64 #define ATOMIC_OPS(op, asm_op, c_op, I) \ - ATOMIC_OP (op, asm_op, c_op, I, int, ) \ - ATOMIC_FETCH_OP (op, asm_op, c_op, I, int, ) \ + ATOMIC_OP (op, asm_op, I, int, ) \ + ATOMIC_FETCH_OP (op, asm_op, I, int, ) \ ATOMIC_OP_RETURN(op, asm_op, c_op, I, int, ) #else #define ATOMIC_OPS(op, asm_op, c_op, I) \ - ATOMIC_OP (op, asm_op, c_op, I, int, ) \ - ATOMIC_FETCH_OP (op, asm_op, c_op, I, int, ) \ + ATOMIC_OP (op, asm_op, I, int, ) \ + ATOMIC_FETCH_OP (op, asm_op, I, int, ) \ ATOMIC_OP_RETURN(op, asm_op, c_op, I, int, ) \ - ATOMIC_OP (op, asm_op, c_op, I, long, 64) \ - ATOMIC_FETCH_OP (op, asm_op, c_op, I, long, 64) \ + ATOMIC_OP (op, asm_op, I, long, 64) \ + ATOMIC_FETCH_OP (op, asm_op, I, long, 64) \ ATOMIC_OP_RETURN(op, asm_op, c_op, I, long, 64) #endif From 8286d51a6c244738aeb071fcd7d2e36a3374e150 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Tue, 28 Nov 2017 14:02:50 -0800 Subject: [PATCH 146/333] RISC-V: Comment on why {,cmp}xchg is ordered how it is This is another memory model FIXME. Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/atomic.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h index 40c73dd59c15..e65d1cd89e28 100644 --- a/arch/riscv/include/asm/atomic.h +++ b/arch/riscv/include/asm/atomic.h @@ -300,8 +300,13 @@ static __always_inline long atomic64_inc_not_zero(atomic64_t *v) /* * atomic_{cmp,}xchg is required to have exactly the same ordering semantics as - * {cmp,}xchg and the operations that return, so they need a barrier. We just - * use the other implementations directly. + * {cmp,}xchg and the operations that return, so they need a barrier. + */ +/* + * FIXME: atomic_cmpxchg_{acquire,release,relaxed} are all implemented by + * assigning the same barrier to both the LR and SC operations, but that might + * not make any sense. We're waiting on a memory model specification to + * determine exactly what the right thing to do is here. */ #define ATOMIC_OP(c_t, prefix, c_or, size, asm_or) \ static __always_inline c_t atomic##prefix##_cmpxchg##c_or(atomic##prefix##_t *v, c_t o, c_t n) \ From 61a60d35b7d1b0b3a31bc21d15805a3654f60920 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Tue, 28 Nov 2017 14:03:48 -0800 Subject: [PATCH 147/333] RISC-V: Remove __smp_bp__{before,after}_atomic These duplicate the asm-generic definitions are therefor aren't useful. Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/barrier.h | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/arch/riscv/include/asm/barrier.h b/arch/riscv/include/asm/barrier.h index 183534b7c39b..455ee16127fb 100644 --- a/arch/riscv/include/asm/barrier.h +++ b/arch/riscv/include/asm/barrier.h @@ -38,21 +38,6 @@ #define smp_rmb() RISCV_FENCE(r,r) #define smp_wmb() RISCV_FENCE(w,w) -/* - * These fences exist to enforce ordering around the relaxed AMOs. The - * documentation defines that - * " - * atomic_fetch_add(); - * is equivalent to: - * smp_mb__before_atomic(); - * atomic_fetch_add_relaxed(); - * smp_mb__after_atomic(); - * " - * So we emit full fences on both sides. - */ -#define __smb_mb__before_atomic() smp_mb() -#define __smb_mb__after_atomic() smp_mb() - /* * These barriers prevent accesses performed outside a spinlock from being moved * inside a spinlock. Since RISC-V sets the aq/rl bits on our spinlock only From 3343eb6806f365b9e3d451040671fa9336e57513 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Tue, 28 Nov 2017 14:03:55 -0800 Subject: [PATCH 148/333] RISC-V: Remove smb_mb__{before,after}_spinlock() These are obselete. Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/barrier.h | 8 -------- 1 file changed, 8 deletions(-) diff --git a/arch/riscv/include/asm/barrier.h b/arch/riscv/include/asm/barrier.h index 455ee16127fb..773c4e039cd7 100644 --- a/arch/riscv/include/asm/barrier.h +++ b/arch/riscv/include/asm/barrier.h @@ -38,14 +38,6 @@ #define smp_rmb() RISCV_FENCE(r,r) #define smp_wmb() RISCV_FENCE(w,w) -/* - * These barriers prevent accesses performed outside a spinlock from being moved - * inside a spinlock. Since RISC-V sets the aq/rl bits on our spinlock only - * enforce release consistency, we need full fences here. - */ -#define smb_mb__before_spinlock() smp_mb() -#define smb_mb__after_spinlock() smp_mb() - #include #endif /* __ASSEMBLY__ */ From 9347ce54cd699db92d37e66191aa4b9a0a92304e Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Tue, 28 Nov 2017 14:04:05 -0800 Subject: [PATCH 149/333] RISC-V: __test_and_op_bit_ord should be strongly ordered I mis-read the documentation. After looking at it again the documentation is actually as clear as it can be, it's just that I didn't actually read it in order and therefor did the wrong thing. Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/bitops.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h index 7c281ef1d583..f30daf26f08f 100644 --- a/arch/riscv/include/asm/bitops.h +++ b/arch/riscv/include/asm/bitops.h @@ -67,7 +67,7 @@ : "memory"); #define __test_and_op_bit(op, mod, nr, addr) \ - __test_and_op_bit_ord(op, mod, nr, addr, ) + __test_and_op_bit_ord(op, mod, nr, addr, .aqrl) #define __op_bit(op, mod, nr, addr) \ __op_bit_ord(op, mod, nr, addr, ) From 21db403660d1433b8a02b26d5d4084921b857c40 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Tue, 28 Nov 2017 14:05:04 -0800 Subject: [PATCH 150/333] RISC-V: Add READ_ONCE in arch_spin_is_locked() This was just incorrect in the original version. Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/spinlock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h index 04c71d938afd..a6a005c4f2fb 100644 --- a/arch/riscv/include/asm/spinlock.h +++ b/arch/riscv/include/asm/spinlock.h @@ -24,7 +24,7 @@ /* FIXME: Replace this with a ticket lock, like MIPS. */ -#define arch_spin_is_locked(x) ((x)->lock != 0) +#define arch_spin_is_locked(x) (READ_ONCE((x)->lock) != 0) static inline void arch_spin_unlock(arch_spinlock_t *lock) { From c901e45a999a1935d7adf653e1cf12dfbcd737aa Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Tue, 28 Nov 2017 14:06:17 -0800 Subject: [PATCH 151/333] RISC-V: `sfence.vma` orderes the instruction cache This is just a comment change, but it's one that bit me on the mailing list. It turns out that issuing a `sfence.vma` enforces instruction cache ordering in addition to TLB ordering. This isn't explicitly called out in the ISA manual, but Andrew will be making that more clear in a future revision. CC: Andrew Waterman Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/tlbflush.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h index 5ee4ae370b5e..c79fab3d377d 100644 --- a/arch/riscv/include/asm/tlbflush.h +++ b/arch/riscv/include/asm/tlbflush.h @@ -17,7 +17,10 @@ #ifdef CONFIG_MMU -/* Flush entire local TLB */ +/* + * Flush entire local TLB. 'sfence.vma' implicitly fences with the instruction + * cache as well, so a 'fence.i' is not necessary. + */ static inline void local_flush_tlb_all(void) { __asm__ __volatile__ ("sfence.vma" : : : "memory"); From bf730552734372e45b10fe056726de1950fdfdde Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Tue, 28 Nov 2017 14:06:31 -0800 Subject: [PATCH 152/333] RISC-V: remove spin_unlock_wait() This was removed from the other architectures in commit 952111d7db02 ("arch: Remove spin_unlock_wait() arch-specific definitions"). That landed between when we got upstream and when our patches were reviewed, so this is a followup patch. Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/spinlock.h | 9 --------- 1 file changed, 9 deletions(-) diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h index a6a005c4f2fb..2fd27e8ef1fd 100644 --- a/arch/riscv/include/asm/spinlock.h +++ b/arch/riscv/include/asm/spinlock.h @@ -58,15 +58,6 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) } } -static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) -{ - smp_rmb(); - do { - cpu_relax(); - } while (arch_spin_is_locked(lock)); - smp_acquire__after_ctrl_dep(); -} - /***********************************************************/ static inline void arch_read_lock(arch_rwlock_t *lock) From b43aaee69d4327d05e7624d9471c17d015b4d67d Mon Sep 17 00:00:00 2001 From: Leo Liu Date: Tue, 21 Nov 2017 09:08:07 -0500 Subject: [PATCH 153/333] drm/amdgpu: move UVD/VCE and VCN structure out from union MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With the enablement of VCN Dec and Enc from user space, User space queries kernel for the IP information, if HW has UVD/VCE, the info comes from these IP blocks, but this could end up mis-interpret for VCN when they are in the union, the other way same when HW with VCN block. Signed-off-by: Leo Liu Acked-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Alex Deucher Fixes: 95d0906f8506 ("drm/amdgpu: add initial vcn support and decode tests") Cc: stable@vger.kernel.org Reviewed-and-Tested-by: Michel Dänzer --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 5afaf6016b4a..c25cedff4915 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1572,18 +1572,14 @@ struct amdgpu_device { /* sdma */ struct amdgpu_sdma sdma; - union { - struct { - /* uvd */ - struct amdgpu_uvd uvd; + /* uvd */ + struct amdgpu_uvd uvd; - /* vce */ - struct amdgpu_vce vce; - }; + /* vce */ + struct amdgpu_vce vce; - /* vcn */ - struct amdgpu_vcn vcn; - }; + /* vcn */ + struct amdgpu_vcn vcn; /* firmwares */ struct amdgpu_firmware firmware; From ed162fe7643023b52cc21998a6b3eff15a233c5e Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 13 Nov 2017 15:41:30 -0500 Subject: [PATCH 154/333] drm/amdgpu/gfx7: cache raster_config values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We did this for gfx6 and 8, but somehow missed gfx7. Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 5c8a7a48a4ad..419ba0ce7ee5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -1819,6 +1819,22 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev) adev->gfx.config.backend_enable_mask, num_rb_pipes); } + + /* cache the values for userspace */ + for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { + for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { + gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff); + adev->gfx.config.rb_config[i][j].rb_backend_disable = + RREG32(mmCC_RB_BACKEND_DISABLE); + adev->gfx.config.rb_config[i][j].user_rb_backend_disable = + RREG32(mmGC_USER_RB_BACKEND_DISABLE); + adev->gfx.config.rb_config[i][j].raster_config = + RREG32(mmPA_SC_RASTER_CONFIG); + adev->gfx.config.rb_config[i][j].raster_config_1 = + RREG32(mmPA_SC_RASTER_CONFIG_1); + } + } + gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); mutex_unlock(&adev->grbm_idx_mutex); } From aca31681b1a594dd1a25a51ff2c58f4f4a165446 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 21 Nov 2017 13:53:29 -0500 Subject: [PATCH 155/333] drm/amdgpu: used cached gca values for cik_read_register MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using the cached values has less latency for bare metal and prevents reading back bogus values if the engine is powergated. This was implemented for VI and SI, but somehow CIK got missed. Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/cik.c | 111 ++++++++++++++++++++++++++----- 1 file changed, 95 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index 793b1470284d..a296f7bbe57c 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -1023,22 +1023,101 @@ static const struct amdgpu_allowed_register_entry cik_allowed_read_registers[] = {mmPA_SC_RASTER_CONFIG_1, true}, }; -static uint32_t cik_read_indexed_register(struct amdgpu_device *adev, - u32 se_num, u32 sh_num, - u32 reg_offset) + +static uint32_t cik_get_register_value(struct amdgpu_device *adev, + bool indexed, u32 se_num, + u32 sh_num, u32 reg_offset) { - uint32_t val; + if (indexed) { + uint32_t val; + unsigned se_idx = (se_num == 0xffffffff) ? 0 : se_num; + unsigned sh_idx = (sh_num == 0xffffffff) ? 0 : sh_num; - mutex_lock(&adev->grbm_idx_mutex); - if (se_num != 0xffffffff || sh_num != 0xffffffff) - amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff); + switch (reg_offset) { + case mmCC_RB_BACKEND_DISABLE: + return adev->gfx.config.rb_config[se_idx][sh_idx].rb_backend_disable; + case mmGC_USER_RB_BACKEND_DISABLE: + return adev->gfx.config.rb_config[se_idx][sh_idx].user_rb_backend_disable; + case mmPA_SC_RASTER_CONFIG: + return adev->gfx.config.rb_config[se_idx][sh_idx].raster_config; + case mmPA_SC_RASTER_CONFIG_1: + return adev->gfx.config.rb_config[se_idx][sh_idx].raster_config_1; + } - val = RREG32(reg_offset); + mutex_lock(&adev->grbm_idx_mutex); + if (se_num != 0xffffffff || sh_num != 0xffffffff) + amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff); - if (se_num != 0xffffffff || sh_num != 0xffffffff) - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); - mutex_unlock(&adev->grbm_idx_mutex); - return val; + val = RREG32(reg_offset); + + if (se_num != 0xffffffff || sh_num != 0xffffffff) + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + mutex_unlock(&adev->grbm_idx_mutex); + return val; + } else { + unsigned idx; + + switch (reg_offset) { + case mmGB_ADDR_CONFIG: + return adev->gfx.config.gb_addr_config; + case mmMC_ARB_RAMCFG: + return adev->gfx.config.mc_arb_ramcfg; + case mmGB_TILE_MODE0: + case mmGB_TILE_MODE1: + case mmGB_TILE_MODE2: + case mmGB_TILE_MODE3: + case mmGB_TILE_MODE4: + case mmGB_TILE_MODE5: + case mmGB_TILE_MODE6: + case mmGB_TILE_MODE7: + case mmGB_TILE_MODE8: + case mmGB_TILE_MODE9: + case mmGB_TILE_MODE10: + case mmGB_TILE_MODE11: + case mmGB_TILE_MODE12: + case mmGB_TILE_MODE13: + case mmGB_TILE_MODE14: + case mmGB_TILE_MODE15: + case mmGB_TILE_MODE16: + case mmGB_TILE_MODE17: + case mmGB_TILE_MODE18: + case mmGB_TILE_MODE19: + case mmGB_TILE_MODE20: + case mmGB_TILE_MODE21: + case mmGB_TILE_MODE22: + case mmGB_TILE_MODE23: + case mmGB_TILE_MODE24: + case mmGB_TILE_MODE25: + case mmGB_TILE_MODE26: + case mmGB_TILE_MODE27: + case mmGB_TILE_MODE28: + case mmGB_TILE_MODE29: + case mmGB_TILE_MODE30: + case mmGB_TILE_MODE31: + idx = (reg_offset - mmGB_TILE_MODE0); + return adev->gfx.config.tile_mode_array[idx]; + case mmGB_MACROTILE_MODE0: + case mmGB_MACROTILE_MODE1: + case mmGB_MACROTILE_MODE2: + case mmGB_MACROTILE_MODE3: + case mmGB_MACROTILE_MODE4: + case mmGB_MACROTILE_MODE5: + case mmGB_MACROTILE_MODE6: + case mmGB_MACROTILE_MODE7: + case mmGB_MACROTILE_MODE8: + case mmGB_MACROTILE_MODE9: + case mmGB_MACROTILE_MODE10: + case mmGB_MACROTILE_MODE11: + case mmGB_MACROTILE_MODE12: + case mmGB_MACROTILE_MODE13: + case mmGB_MACROTILE_MODE14: + case mmGB_MACROTILE_MODE15: + idx = (reg_offset - mmGB_MACROTILE_MODE0); + return adev->gfx.config.macrotile_mode_array[idx]; + default: + return RREG32(reg_offset); + } + } } static int cik_read_register(struct amdgpu_device *adev, u32 se_num, @@ -1048,13 +1127,13 @@ static int cik_read_register(struct amdgpu_device *adev, u32 se_num, *value = 0; for (i = 0; i < ARRAY_SIZE(cik_allowed_read_registers); i++) { + bool indexed = cik_allowed_read_registers[i].grbm_indexed; + if (reg_offset != cik_allowed_read_registers[i].reg_offset) continue; - *value = cik_allowed_read_registers[i].grbm_indexed ? - cik_read_indexed_register(adev, se_num, - sh_num, reg_offset) : - RREG32(reg_offset); + *value = cik_get_register_value(adev, indexed, se_num, sh_num, + reg_offset); return 0; } return -EINVAL; From b693fc1f83bc9aa5e86c87ac7da48870e45bf486 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 27 Nov 2017 17:46:50 -0500 Subject: [PATCH 156/333] Revert "drm/amdgpu: fix rmmod KCQ disable failed error" This reverts commit 446947b44fb8cabc0213ff4efd706931e36b1963. this patch is incorrrect, amdgpu_ucode_bo_fini always called after gfx_hw_fini. Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 --- drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c | 3 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 2 ++ 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 2c574374d9b6..3573ecdb06ee 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1837,9 +1837,6 @@ static int amdgpu_fini(struct amdgpu_device *adev) adev->ip_blocks[i].status.hw = false; } - if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) - amdgpu_ucode_fini_bo(adev); - for (i = adev->num_ip_blocks - 1; i >= 0; i--) { if (!adev->ip_blocks[i].status.sw) continue; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c index 033fba2def6f..5f5aa5fddc16 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c @@ -164,6 +164,9 @@ static int amdgpu_pp_hw_fini(void *handle) ret = adev->powerplay.ip_funcs->hw_fini( adev->powerplay.pp_handle); + if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) + amdgpu_ucode_fini_bo(adev); + return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 7714f4a6c8b0..447d446b5015 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -442,6 +442,8 @@ static int psp_hw_fini(void *handle) if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) return 0; + amdgpu_ucode_fini_bo(adev); + psp_ring_destroy(psp, PSP_RING_TYPE__KM); amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf); From 89ce6e0afee8eafa679093207dabd717af9d09c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Wed, 22 Nov 2017 15:55:21 +0100 Subject: [PATCH 157/333] drm/amdgpu: Set adev->vcn.irq.num_types for VCN MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We were setting adev->uvd.irq.num_types instead. Fixes: 9b257116e784 ("drm/amdgpu: add vcn enc irq support") Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index 1eb4d79d6e30..0450ac5ba6b6 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -1175,7 +1175,7 @@ static const struct amdgpu_irq_src_funcs vcn_v1_0_irq_funcs = { static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev) { - adev->uvd.irq.num_types = adev->vcn.num_enc_rings + 1; + adev->vcn.irq.num_types = adev->vcn.num_enc_rings + 1; adev->vcn.irq.funcs = &vcn_v1_0_irq_funcs; } From fa7c7939b4bf112cd06ba166b739244077898990 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Wed, 22 Nov 2017 15:55:21 +0100 Subject: [PATCH 158/333] drm/amdgpu: Use unsigned ring indices in amdgpu_queue_mgr_map MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This matches the corresponding UAPI fields. Treating the ring index as signed could result in accessing random unrelated memory if the MSB was set. Fixes: effd924d2f3b ("drm/amdgpu: untie user ring ids from kernel ring ids v6") Cc: stable@vger.kernel.org Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index c25cedff4915..0b14b5373783 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -717,7 +717,7 @@ int amdgpu_queue_mgr_fini(struct amdgpu_device *adev, struct amdgpu_queue_mgr *mgr); int amdgpu_queue_mgr_map(struct amdgpu_device *adev, struct amdgpu_queue_mgr *mgr, - int hw_ip, int instance, int ring, + u32 hw_ip, u32 instance, u32 ring, struct amdgpu_ring **out_ring); /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c index 190e28cb827e..93d86619e802 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c @@ -63,7 +63,7 @@ static int amdgpu_update_cached_map(struct amdgpu_queue_mapper *mapper, static int amdgpu_identity_map(struct amdgpu_device *adev, struct amdgpu_queue_mapper *mapper, - int ring, + u32 ring, struct amdgpu_ring **out_ring) { switch (mapper->hw_ip) { @@ -121,7 +121,7 @@ static enum amdgpu_ring_type amdgpu_hw_ip_to_ring_type(int hw_ip) static int amdgpu_lru_map(struct amdgpu_device *adev, struct amdgpu_queue_mapper *mapper, - int user_ring, bool lru_pipe_order, + u32 user_ring, bool lru_pipe_order, struct amdgpu_ring **out_ring) { int r, i, j; @@ -208,7 +208,7 @@ int amdgpu_queue_mgr_fini(struct amdgpu_device *adev, */ int amdgpu_queue_mgr_map(struct amdgpu_device *adev, struct amdgpu_queue_mgr *mgr, - int hw_ip, int instance, int ring, + u32 hw_ip, u32 instance, u32 ring, struct amdgpu_ring **out_ring) { int r, ip_num_rings; From 6edc6910ba4cd6eab309263539c8f09b8ad772bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 24 Nov 2017 11:39:30 +0100 Subject: [PATCH 159/333] drm/amdgpu: don't try to move pinned BOs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Never try to move pinned BOs during CS. Signed-off-by: Christian König Reviewed-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index a57cec737c18..57abf7abd7a9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -409,6 +409,10 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p, if (candidate->robj == validated) break; + /* We can't move pinned BOs here */ + if (bo->pin_count) + continue; + other = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type); /* Check if this BO is in one of the domains we need space for */ From acc34503bd22bd826bbf7b7f95c84a06bb6380e4 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 2 Jun 2017 14:50:01 -0400 Subject: [PATCH 160/333] drm/amdgpu: drop experimental flag for raven Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index ec96bb1f9eaf..c2f414ffb2cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -536,7 +536,7 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x686c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10}, {0x1002, 0x687f, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10}, /* Raven */ - {0x1002, 0x15dd, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU|AMD_EXP_HW_SUPPORT}, + {0x1002, 0x15dd, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU}, {0, 0, 0} }; From 1b6c80674192bd6b235de7bc58f0bd03eb7f89df Mon Sep 17 00:00:00 2001 From: Bhawanpreet Lakha Date: Fri, 13 Oct 2017 12:13:02 -0400 Subject: [PATCH 161/333] drm/amd/display: Add null check for 24BPP (xfm and dpp) Fixes Nullptr error when trying 24BPP Signed-off-by: Bhawanpreet Lakha Reviewed-by: Tony Cheng Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/core/dc_resource.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index d1cdf9f8853d..a2f9be3716cf 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -856,6 +856,7 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) pipe_ctx->plane_res.scl_data.h_active = timing->h_addressable + timing->h_border_left + timing->h_border_right; pipe_ctx->plane_res.scl_data.v_active = timing->v_addressable + timing->v_border_top + timing->v_border_bottom; + /* Taps calculations */ if (pipe_ctx->plane_res.xfm != NULL) res = pipe_ctx->plane_res.xfm->funcs->transform_get_optimal_number_of_taps( @@ -864,16 +865,21 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) if (pipe_ctx->plane_res.dpp != NULL) res = pipe_ctx->plane_res.dpp->funcs->dpp_get_optimal_number_of_taps( pipe_ctx->plane_res.dpp, &pipe_ctx->plane_res.scl_data, &plane_state->scaling_quality); - if (!res) { /* Try 24 bpp linebuffer */ pipe_ctx->plane_res.scl_data.lb_params.depth = LB_PIXEL_DEPTH_24BPP; - res = pipe_ctx->plane_res.xfm->funcs->transform_get_optimal_number_of_taps( - pipe_ctx->plane_res.xfm, &pipe_ctx->plane_res.scl_data, &plane_state->scaling_quality); + if (pipe_ctx->plane_res.xfm != NULL) + res = pipe_ctx->plane_res.xfm->funcs->transform_get_optimal_number_of_taps( + pipe_ctx->plane_res.xfm, + &pipe_ctx->plane_res.scl_data, + &plane_state->scaling_quality); - res = pipe_ctx->plane_res.dpp->funcs->dpp_get_optimal_number_of_taps( - pipe_ctx->plane_res.dpp, &pipe_ctx->plane_res.scl_data, &plane_state->scaling_quality); + if (pipe_ctx->plane_res.dpp != NULL) + res = pipe_ctx->plane_res.dpp->funcs->dpp_get_optimal_number_of_taps( + pipe_ctx->plane_res.dpp, + &pipe_ctx->plane_res.scl_data, + &plane_state->scaling_quality); } if (res) From 827f11e97dbc591f0c9619151b9a89f082e8ecb8 Mon Sep 17 00:00:00 2001 From: "Leo (Sunpeng) Li" Date: Tue, 17 Oct 2017 17:18:24 -0400 Subject: [PATCH 162/333] drm/amd/display: Should disable when new stream is null core_link_disable_stream should be called when the new stream is null (i.e. want to disable). Modify the if condition to reflect that. Signed-off-by: Leo (Sunpeng) Li Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index 1229a3315018..be14a2665072 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -1868,8 +1868,10 @@ static void dce110_reset_hw_ctx_wrap( pipe_need_reprogram(pipe_ctx_old, pipe_ctx)) { struct clock_source *old_clk = pipe_ctx_old->clock_source; - /* disable already, no need to disable again */ - if (pipe_ctx->stream && !pipe_ctx->stream->dpms_off) + /* Disable if new stream is null. O/w, if stream is + * disabled already, no need to disable again. + */ + if (!pipe_ctx->stream || !pipe_ctx->stream->dpms_off) core_link_disable_stream(pipe_ctx_old, FREE_ACQUIRED_RESOURCE); pipe_ctx_old->stream_res.tg->funcs->set_blank(pipe_ctx_old->stream_res.tg, true); From 6bffebc90c23e2341a1f8371e7b496ec94136e47 Mon Sep 17 00:00:00 2001 From: Eric Yang Date: Wed, 18 Oct 2017 20:22:40 -0400 Subject: [PATCH 163/333] drm/amd/display: Add timing validation against dongle cap For DP active dongles, the dpcd dongle caps are read but not used to validate mode timing. This addresses this. In particular, this change fixes light up on the HDMI 4k TV connected through DP active dongle. Since the 4k TV defaults to YCbCr420, which the dongle don't support. This change does not address MST cases, a more generalized approach must be taken for that. Signed-off-by: Eric Yang Reviewed-by: Tony Cheng Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 1 + drivers/gpu/drm/amd/display/dc/core/dc_link.c | 70 ++++++++++++++++++- .../gpu/drm/amd/display/dc/core/dc_link_dp.c | 21 +++++- .../gpu/drm/amd/display/dc/inc/core_status.h | 2 +- 4 files changed, 91 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index fe63f5894d43..15625fd94455 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -128,6 +128,7 @@ static bool create_links( link->link_id.id = CONNECTOR_ID_VIRTUAL; link->link_id.enum_id = ENUM_ID_1; link->link_enc = kzalloc(sizeof(*link->link_enc), GFP_KERNEL); + link->link_status.dpcd_caps = &link->dpcd_caps; enc_init.ctx = dc->ctx; enc_init.channel = CHANNEL_ID_UNKNOWN; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 0602610489d7..73077869151c 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -1801,12 +1801,75 @@ static void disable_link(struct dc_link *link, enum signal_type signal) link->link_enc->funcs->disable_output(link->link_enc, signal, link); } +bool dp_active_dongle_validate_timing( + const struct dc_crtc_timing *timing, + const struct dc_dongle_caps *dongle_caps) +{ + unsigned int required_pix_clk = timing->pix_clk_khz; + + if (dongle_caps->dongle_type != DISPLAY_DONGLE_DP_HDMI_CONVERTER || + dongle_caps->extendedCapValid == false) + return true; + + /* Check Pixel Encoding */ + switch (timing->pixel_encoding) { + case PIXEL_ENCODING_RGB: + case PIXEL_ENCODING_YCBCR444: + break; + case PIXEL_ENCODING_YCBCR422: + if (!dongle_caps->is_dp_hdmi_ycbcr422_pass_through) + return false; + break; + case PIXEL_ENCODING_YCBCR420: + if (!dongle_caps->is_dp_hdmi_ycbcr420_pass_through) + return false; + break; + default: + /* Invalid Pixel Encoding*/ + return false; + } + + + /* Check Color Depth and Pixel Clock */ + if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR420) + required_pix_clk /= 2; + + switch (timing->display_color_depth) { + case COLOR_DEPTH_666: + case COLOR_DEPTH_888: + /*888 and 666 should always be supported*/ + break; + case COLOR_DEPTH_101010: + if (dongle_caps->dp_hdmi_max_bpc < 10) + return false; + required_pix_clk = required_pix_clk * 10 / 8; + break; + case COLOR_DEPTH_121212: + if (dongle_caps->dp_hdmi_max_bpc < 12) + return false; + required_pix_clk = required_pix_clk * 12 / 8; + break; + + case COLOR_DEPTH_141414: + case COLOR_DEPTH_161616: + default: + /* These color depths are currently not supported */ + return false; + } + + if (required_pix_clk > dongle_caps->dp_hdmi_max_pixel_clk) + return false; + + return true; +} + enum dc_status dc_link_validate_mode_timing( const struct dc_stream_state *stream, struct dc_link *link, const struct dc_crtc_timing *timing) { uint32_t max_pix_clk = stream->sink->dongle_max_pix_clk; + struct dc_dongle_caps *dongle_caps = &link->link_status.dpcd_caps->dongle_caps; /* A hack to avoid failing any modes for EDID override feature on * topology change such as lower quality cable for DP or different dongle @@ -1814,8 +1877,13 @@ enum dc_status dc_link_validate_mode_timing( if (link->remote_sinks[0]) return DC_OK; + /* Passive Dongle */ if (0 != max_pix_clk && timing->pix_clk_khz > max_pix_clk) - return DC_EXCEED_DONGLE_MAX_CLK; + return DC_EXCEED_DONGLE_CAP; + + /* Active Dongle*/ + if (!dp_active_dongle_validate_timing(timing, dongle_caps)) + return DC_EXCEED_DONGLE_CAP; switch (stream->signal) { case SIGNAL_TYPE_EDP: diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index ced42484dcfc..8e97b42a03a2 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -2062,6 +2062,24 @@ bool is_dp_active_dongle(const struct dc_link *link) (dongle_type == DISPLAY_DONGLE_DP_HDMI_CONVERTER); } +static int translate_dpcd_max_bpc(enum dpcd_downstream_port_max_bpc bpc) +{ + switch (bpc) { + case DOWN_STREAM_MAX_8BPC: + return 8; + case DOWN_STREAM_MAX_10BPC: + return 10; + case DOWN_STREAM_MAX_12BPC: + return 12; + case DOWN_STREAM_MAX_16BPC: + return 16; + default: + break; + } + + return -1; +} + static void get_active_converter_info( uint8_t data, struct dc_link *link) { @@ -2131,7 +2149,8 @@ static void get_active_converter_info( hdmi_caps.bits.YCrCr420_CONVERSION; link->dpcd_caps.dongle_caps.dp_hdmi_max_bpc = - hdmi_color_caps.bits.MAX_BITS_PER_COLOR_COMPONENT; + translate_dpcd_max_bpc( + hdmi_color_caps.bits.MAX_BITS_PER_COLOR_COMPONENT); link->dpcd_caps.dongle_caps.extendedCapValid = true; } diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_status.h b/drivers/gpu/drm/amd/display/dc/inc/core_status.h index 01df85641684..94fc31080fda 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_status.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_status.h @@ -38,7 +38,7 @@ enum dc_status { DC_FAIL_DETACH_SURFACES = 8, DC_FAIL_SURFACE_VALIDATE = 9, DC_NO_DP_LINK_BANDWIDTH = 10, - DC_EXCEED_DONGLE_MAX_CLK = 11, + DC_EXCEED_DONGLE_CAP = 11, DC_SURFACE_PIXEL_FORMAT_UNSUPPORTED = 12, DC_FAIL_BANDWIDTH_VALIDATE = 13, /* BW and Watermark validation */ DC_FAIL_SCALING = 14, From 3eb4eba42263cc1e810d79b4970cbcb096c210ab Mon Sep 17 00:00:00 2001 From: Roman Li Date: Fri, 20 Oct 2017 10:15:18 -0400 Subject: [PATCH 164/333] drm/amd/display: Fix S3 topology change Clean fake sink flag on resume if real sink connected. Fixing S3 topology change problem like this: 1) x desktop with 1 or > displays 2) unplug display 3) suspend 4) replug same display 5) resume without this change replugged display doesn't light up Signed-off-by: Roman Li Reviewed-by: Sun peng Li Acked-by: Harry Wentland Reviewed-by: Andrey Grodzovsky Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 889ed24084e8..009aaeb263e3 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -677,6 +677,10 @@ int amdgpu_dm_display_resume(struct amdgpu_device *adev) mutex_lock(&aconnector->hpd_lock); dc_link_detect(aconnector->dc_link, DETECT_REASON_HPD); + + if (aconnector->fake_enable && aconnector->dc_link->local_sink) + aconnector->fake_enable = false; + aconnector->dc_sink = NULL; amdgpu_dm_update_connector_after_detect(aconnector); mutex_unlock(&aconnector->hpd_lock); From 4ddd76d1ce00bb0d78e73e29fd9062ecd6bad611 Mon Sep 17 00:00:00 2001 From: Dmytro Laktyushkin Date: Mon, 23 Oct 2017 15:37:37 -0400 Subject: [PATCH 165/333] drm/amd/display: fix split recout calculation Recout split rounding code was wrong Signed-off-by: Dmytro Laktyushkin Reviewed-by: Tony Cheng Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index a2f9be3716cf..ced339a145c6 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -580,14 +580,12 @@ static void calculate_recout(struct pipe_ctx *pipe_ctx, struct view *recout_skip if (pipe_ctx->top_pipe && pipe_ctx->top_pipe->plane_state == pipe_ctx->plane_state) { if (stream->view_format == VIEW_3D_FORMAT_TOP_AND_BOTTOM) { - pipe_ctx->plane_res.scl_data.recout.height /= 2; - pipe_ctx->plane_res.scl_data.recout.y += pipe_ctx->plane_res.scl_data.recout.height; /* Floor primary pipe, ceil 2ndary pipe */ - pipe_ctx->plane_res.scl_data.recout.height += pipe_ctx->plane_res.scl_data.recout.height % 2; + pipe_ctx->plane_res.scl_data.recout.height = (pipe_ctx->plane_res.scl_data.recout.height + 1) / 2; + pipe_ctx->plane_res.scl_data.recout.y += pipe_ctx->plane_res.scl_data.recout.height; } else { - pipe_ctx->plane_res.scl_data.recout.width /= 2; + pipe_ctx->plane_res.scl_data.recout.width = (pipe_ctx->plane_res.scl_data.recout.width + 1) / 2; pipe_ctx->plane_res.scl_data.recout.x += pipe_ctx->plane_res.scl_data.recout.width; - pipe_ctx->plane_res.scl_data.recout.width += pipe_ctx->plane_res.scl_data.recout.width % 2; } } else if (pipe_ctx->bottom_pipe && pipe_ctx->bottom_pipe->plane_state == pipe_ctx->plane_state) { From 58fe8990fc29336c9ea6531d9ecc44466d3b9221 Mon Sep 17 00:00:00 2001 From: Hersen Wu Date: Mon, 23 Oct 2017 09:11:46 -0400 Subject: [PATCH 166/333] drm/amd/display: Handle as MST first and then DP dongle if sink support both Signed-off-by: Hersen Wu Reviewed-by: Tony Cheng Reviewed-by: Wenjing Liu Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 73077869151c..e27ed4a45265 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -480,22 +480,6 @@ static void detect_dp( sink_caps->signal = SIGNAL_TYPE_DISPLAY_PORT; detect_dp_sink_caps(link); - /* DP active dongles */ - if (is_dp_active_dongle(link)) { - link->type = dc_connection_active_dongle; - if (!link->dpcd_caps.sink_count.bits.SINK_COUNT) { - /* - * active dongle unplug processing for short irq - */ - link_disconnect_sink(link); - return; - } - - if (link->dpcd_caps.dongle_type != - DISPLAY_DONGLE_DP_HDMI_CONVERTER) { - *converter_disable_audio = true; - } - } if (is_mst_supported(link)) { sink_caps->signal = SIGNAL_TYPE_DISPLAY_PORT_MST; link->type = dc_connection_mst_branch; @@ -535,6 +519,22 @@ static void detect_dp( sink_caps->signal = SIGNAL_TYPE_DISPLAY_PORT; } } + + if (link->type != dc_connection_mst_branch && + is_dp_active_dongle(link)) { + /* DP active dongles */ + link->type = dc_connection_active_dongle; + if (!link->dpcd_caps.sink_count.bits.SINK_COUNT) { + /* + * active dongle unplug processing for short irq + */ + link_disconnect_sink(link); + return; + } + + if (link->dpcd_caps.dongle_type != DISPLAY_DONGLE_DP_HDMI_CONVERTER) + *converter_disable_audio = true; + } } else { /* DP passive dongles */ sink_caps->signal = dp_passive_dongle_detection(link->ddc, From 1c72be981e8a18b4b4ad9d7fc4620bd19c2aba7f Mon Sep 17 00:00:00 2001 From: Andrew Jiang Date: Tue, 24 Oct 2017 12:00:55 -0400 Subject: [PATCH 167/333] drm/amd/display: Don't reject 3D timings Signed-off-by: Andrew Jiang Reviewed-by: Tony Cheng Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_timing_generator.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_timing_generator.c index c7333cdf1802..fced178c8c79 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_timing_generator.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_timing_generator.c @@ -496,9 +496,6 @@ static bool tgn10_validate_timing( timing->timing_3d_format != TIMING_3D_FORMAT_INBAND_FA) return false; - if (timing->timing_3d_format != TIMING_3D_FORMAT_NONE && - tg->ctx->dc->debug.disable_stereo_support) - return false; /* Temporarily blocking interlacing mode until it's supported */ if (timing->flags.INTERLACE == 1) return false; From 116b2632ab033b2a755c1c7b703fede6860b3140 Mon Sep 17 00:00:00 2001 From: Dmytro Laktyushkin Date: Tue, 24 Oct 2017 17:57:38 -0400 Subject: [PATCH 168/333] drm/amd/display: fix split recout offset Previous recout calculation fix changed recout size rounding and affected the offset when it should not have Signed-off-by: Dmytro Laktyushkin Reviewed-by: Tony Cheng Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index ced339a145c6..1cd15d1cc3f4 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -580,12 +580,12 @@ static void calculate_recout(struct pipe_ctx *pipe_ctx, struct view *recout_skip if (pipe_ctx->top_pipe && pipe_ctx->top_pipe->plane_state == pipe_ctx->plane_state) { if (stream->view_format == VIEW_3D_FORMAT_TOP_AND_BOTTOM) { + pipe_ctx->plane_res.scl_data.recout.y += pipe_ctx->plane_res.scl_data.recout.height / 2; /* Floor primary pipe, ceil 2ndary pipe */ pipe_ctx->plane_res.scl_data.recout.height = (pipe_ctx->plane_res.scl_data.recout.height + 1) / 2; - pipe_ctx->plane_res.scl_data.recout.y += pipe_ctx->plane_res.scl_data.recout.height; } else { + pipe_ctx->plane_res.scl_data.recout.x += pipe_ctx->plane_res.scl_data.recout.width / 2; pipe_ctx->plane_res.scl_data.recout.width = (pipe_ctx->plane_res.scl_data.recout.width + 1) / 2; - pipe_ctx->plane_res.scl_data.recout.x += pipe_ctx->plane_res.scl_data.recout.width; } } else if (pipe_ctx->bottom_pipe && pipe_ctx->bottom_pipe->plane_state == pipe_ctx->plane_state) { From 30ec2b9717c1c6616332f8ce7c0cb3dd72032a2e Mon Sep 17 00:00:00 2001 From: "Jerry (Fangzhi) Zuo" Date: Fri, 3 Nov 2017 16:04:34 -0400 Subject: [PATCH 169/333] drm/amd/display: Check aux channel before MST resume It is to fix: MST display failed to resume from S3 At the beginning of resume from S3, need to check if mgr->aux is NULL. Fake MST encoder doesn't have real aux channel. Signed-off-by: Jerry (Fangzhi) Zuo Reviewed-by: Roman Li Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 009aaeb263e3..5293604a894b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -520,7 +520,8 @@ static int detect_mst_link_for_all_connectors(struct drm_device *dev) list_for_each_entry(connector, &dev->mode_config.connector_list, head) { aconnector = to_amdgpu_dm_connector(connector); - if (aconnector->dc_link->type == dc_connection_mst_branch) { + if (aconnector->dc_link->type == dc_connection_mst_branch && + aconnector->mst_mgr.aux) { DRM_DEBUG_DRIVER("DM_MST: starting TM on aconnector: %p [id: %d]\n", aconnector, aconnector->base.base.id); From 16fb754a294d25f4077e1f475d930c22698f442a Mon Sep 17 00:00:00 2001 From: Dmytro Laktyushkin Date: Fri, 10 Nov 2017 13:53:53 -0500 Subject: [PATCH 170/333] drm/amd/display: fix split viewport rounding error Signed-off-by: Dmytro Laktyushkin Reviewed-by: Tony Cheng Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c | 4 ++++ drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 10 ++++------ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c index 3dce35e66b09..983987f7c983 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c @@ -890,6 +890,10 @@ bool dcn_validate_bandwidth( + pipe->bottom_pipe->plane_res.scl_data.recout.width; } + ASSERT(pipe->plane_res.scl_data.ratios.horz.value != dal_fixed31_32_one.value + || v->scaler_rec_out_width[input_idx] == v->viewport_width[input_idx]); + ASSERT(pipe->plane_res.scl_data.ratios.vert.value != dal_fixed31_32_one.value + || v->scaler_recout_height[input_idx] == v->viewport_height[input_idx]); v->dcc_enable[input_idx] = pipe->plane_state->dcc.enable ? dcn_bw_yes : dcn_bw_no; v->source_pixel_format[input_idx] = tl_pixel_format_to_bw_defs( pipe->plane_state->format); diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 1cd15d1cc3f4..655e08df48a0 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -516,13 +516,11 @@ static void calculate_viewport(struct pipe_ctx *pipe_ctx) right_view = (plane_state->rotation == ROTATION_ANGLE_270) != sec_split; if (right_view) { - data->viewport.width /= 2; - data->viewport_c.width /= 2; - data->viewport.x += data->viewport.width; - data->viewport_c.x += data->viewport_c.width; + data->viewport.x += data->viewport.width / 2; + data->viewport_c.x += data->viewport_c.width / 2; /* Ceil offset pipe */ - data->viewport.width += data->viewport.width % 2; - data->viewport_c.width += data->viewport_c.width % 2; + data->viewport.width = (data->viewport.width + 1) / 2; + data->viewport_c.width = (data->viewport_c.width + 1) / 2; } else { data->viewport.width /= 2; data->viewport_c.width /= 2; From 1b61973f9e0e7724bdc779ef3f9b55bd21b08db4 Mon Sep 17 00:00:00 2001 From: Jordan Lazare Date: Mon, 13 Nov 2017 17:57:33 -0400 Subject: [PATCH 171/333] drm/amd/display: Revert noisy assert messages This partially reverts commit 4fb48bb66211 ("dc: fix split viewport rounding error"). Signed-off-by: Jordan Lazare Reviewed-by: Dmytro Laktyushkin Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c index 983987f7c983..3dce35e66b09 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c @@ -890,10 +890,6 @@ bool dcn_validate_bandwidth( + pipe->bottom_pipe->plane_res.scl_data.recout.width; } - ASSERT(pipe->plane_res.scl_data.ratios.horz.value != dal_fixed31_32_one.value - || v->scaler_rec_out_width[input_idx] == v->viewport_width[input_idx]); - ASSERT(pipe->plane_res.scl_data.ratios.vert.value != dal_fixed31_32_one.value - || v->scaler_recout_height[input_idx] == v->viewport_height[input_idx]); v->dcc_enable[input_idx] = pipe->plane_state->dcc.enable ? dcn_bw_yes : dcn_bw_no; v->source_pixel_format[input_idx] = tl_pixel_format_to_bw_defs( pipe->plane_state->format); From 8ffca5dca093cdd25264e782cc0c4539cb318925 Mon Sep 17 00:00:00 2001 From: "Leo (Sunpeng) Li" Date: Fri, 10 Nov 2017 15:02:19 -0500 Subject: [PATCH 172/333] drm/amd/display: Do DC mode-change check when adding CRTCs Within atomic check, dm_update_crtcs_state is called twice. First to remove from the dc_state, and subsequently to add to it. In both calls, a secondary mode-change check is done using dc-level states. We shouldn't be doing this while removing, since a new dc_stream_state has not been created to do the necessary comparison. Because of this, the mode_changed flag within the DRM state can be mistakenly set to false. Doing so only when adding prevents this. We are also guaranteed that a call to add will come after remove, or else the atomic check fails (and a commit will not happen). Signed-off-by: Leo (Sunpeng) Li Reviewed-by: Tony Cheng Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 5293604a894b..8b0fb25d84a6 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4460,7 +4460,7 @@ static int dm_update_crtcs_state(struct dc *dc, } } - if (dc_is_stream_unchanged(new_stream, dm_old_crtc_state->stream) && + if (enable && dc_is_stream_unchanged(new_stream, dm_old_crtc_state->stream) && dc_is_stream_scaling_unchanged(new_stream, dm_old_crtc_state->stream)) { new_crtc_state->mode_changed = false; From 93984bbc70b3f321b8f6a3ec303e31b084e54230 Mon Sep 17 00:00:00 2001 From: Shirish S Date: Mon, 20 Nov 2017 10:37:08 +0530 Subject: [PATCH 173/333] drm/amd/display: check plane state before validating fbc While validation fbc, array_mode of the pipe is accessed without checking plane_state exists for it. Causing to null pointer dereferencing followed by reboot when a crtc associated with external display(not connected) is page flipped. This patch adds a check for plane_state before using it to validate fbc. Signed-off-by: Shirish S Reviewed-by: Roman Li Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index be14a2665072..c3de7f305cd3 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -1774,6 +1774,10 @@ static enum dc_status validate_fbc(struct dc *dc, if (pipe_ctx->stream->sink->link->psr_enabled) return DC_ERROR_UNEXPECTED; + /* Nothing to compress */ + if (!pipe_ctx->plane_state) + return DC_ERROR_UNEXPECTED; + /* Only for non-linear tiling */ if (pipe_ctx->plane_state->tiling_info.gfx8.array_mode == DC_ARRAY_LINEAR_GENERAL) return DC_ERROR_UNEXPECTED; From 2f2c3b36fc5a3dc9fd2cf22a2f368502a70eea5d Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 22 Nov 2017 16:47:35 +0000 Subject: [PATCH 174/333] drm/amd/display: fix memory leaks on error exit return Currently in the case where some of the allocations fail for dce110_tgv, dce110_xfmv, dce110_miv or dce110_oppv then the exit return path ends up leaking allocated objects. Fix this by kfree'ing them before returning. Also re-work the comparison of the null pointers to use the !ptr idiom. Detected by CoverityScan, CID#1460246, 1460325, 1460324, 1460392 ("Resource Leak") Fixes: c4562236b3bc ("drm/amd/dc: Add dc display driver (v2)") Signed-off-by: Colin Ian King Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dce110/dce110_resource.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c index db96d2b47ff1..61adb8174ce0 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c @@ -1037,11 +1037,13 @@ static bool underlay_create(struct dc_context *ctx, struct resource_pool *pool) struct dce110_opp *dce110_oppv = kzalloc(sizeof(*dce110_oppv), GFP_KERNEL); - if ((dce110_tgv == NULL) || - (dce110_xfmv == NULL) || - (dce110_miv == NULL) || - (dce110_oppv == NULL)) - return false; + if (!dce110_tgv || !dce110_xfmv || !dce110_miv || !dce110_oppv) { + kfree(dce110_tgv); + kfree(dce110_xfmv); + kfree(dce110_miv); + kfree(dce110_oppv); + return false; + } dce110_opp_v_construct(dce110_oppv, ctx); From 2b7c97d687e81db07d8c67b32ff920e7bf59444e Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Wed, 15 Nov 2017 18:27:31 -0500 Subject: [PATCH 175/333] drm/amd/display: fix seq issue: turn on clock before programming afmt. Signed-off-by: Charlene Liu Reviewed-by: Krunoslav Kovac Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- .../amd/display/dc/dce/dce_stream_encoder.c | 5 +++++ .../display/dc/dce110/dce110_hw_sequencer.c | 22 +++++++++---------- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c index 4fd49a16c3b6..c04d67db9cea 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c @@ -87,6 +87,11 @@ static void dce110_update_generic_info_packet( */ uint32_t max_retries = 50; + REG_GET(AFMT_CNTL, AFMT_AUDIO_CLOCK_EN, ®val); + /*we need turn on clock before programming AFMT block*/ + if (regval != 1) + REG_UPDATE(AFMT_CNTL, AFMT_AUDIO_CLOCK_EN, 1); + if (REG(AFMT_VBI_PACKET_CONTROL1)) { if (packet_index >= 8) ASSERT(0); diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index c3de7f305cd3..07ff8d2faf3f 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -991,6 +991,16 @@ void dce110_disable_stream(struct pipe_ctx *pipe_ctx, int option) struct dc_link *link = stream->sink->link; struct dc *dc = pipe_ctx->stream->ctx->dc; + if (dc_is_hdmi_signal(pipe_ctx->stream->signal)) + pipe_ctx->stream_res.stream_enc->funcs->stop_hdmi_info_packets( + pipe_ctx->stream_res.stream_enc); + + if (dc_is_dp_signal(pipe_ctx->stream->signal)) + pipe_ctx->stream_res.stream_enc->funcs->stop_dp_info_packets( + pipe_ctx->stream_res.stream_enc); + + pipe_ctx->stream_res.stream_enc->funcs->audio_mute_control( + pipe_ctx->stream_res.stream_enc, true); if (pipe_ctx->stream_res.audio) { pipe_ctx->stream_res.audio->funcs->az_disable(pipe_ctx->stream_res.audio); @@ -1015,18 +1025,6 @@ void dce110_disable_stream(struct pipe_ctx *pipe_ctx, int option) */ } - if (dc_is_hdmi_signal(pipe_ctx->stream->signal)) - pipe_ctx->stream_res.stream_enc->funcs->stop_hdmi_info_packets( - pipe_ctx->stream_res.stream_enc); - - if (dc_is_dp_signal(pipe_ctx->stream->signal)) - pipe_ctx->stream_res.stream_enc->funcs->stop_dp_info_packets( - pipe_ctx->stream_res.stream_enc); - - pipe_ctx->stream_res.stream_enc->funcs->audio_mute_control( - pipe_ctx->stream_res.stream_enc, true); - - /* blank at encoder level */ if (dc_is_dp_signal(pipe_ctx->stream->signal)) { if (pipe_ctx->stream->sink->link->connector_signal == SIGNAL_TYPE_EDP) From cfb071f7a9673109415d097125b3c12c16836acc Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Wed, 15 Nov 2017 18:55:57 -0500 Subject: [PATCH 176/333] drm/amd/display: try to find matching audio inst for enc inst first [Description] in eDP+ HDMI/DP clone or extended configuration, audio inst changed from inst 1 to inst0. No failure related this though, just playback device endpoint inst changed. Also remove one addition register read. Signed-off-by: Charlene Liu Reviewed-by: Tony Cheng Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 9 +++++++-- drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c | 4 +--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 655e08df48a0..546fe99d34f2 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -1449,11 +1449,16 @@ static struct stream_encoder *find_first_free_match_stream_enc_for_link( static struct audio *find_first_free_audio( struct resource_context *res_ctx, - const struct resource_pool *pool) + const struct resource_pool *pool, + enum engine_id id) { int i; for (i = 0; i < pool->audio_count; i++) { if ((res_ctx->is_audio_acquired[i] == false) && (res_ctx->is_stream_enc_acquired[i] == true)) { + /*we have enough audio endpoint, find the matching inst*/ + if (id != i) + continue; + return pool->audios[i]; } } @@ -1702,7 +1707,7 @@ enum dc_status resource_map_pool_resources( dc_is_audio_capable_signal(pipe_ctx->stream->signal) && stream->audio_info.mode_count) { pipe_ctx->stream_res.audio = find_first_free_audio( - &context->res_ctx, pool); + &context->res_ctx, pool, pipe_ctx->stream_res.stream_enc->id); /* * Audio assigned in order first come first get. diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c index c04d67db9cea..e42b6eb1c1f0 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c @@ -87,10 +87,8 @@ static void dce110_update_generic_info_packet( */ uint32_t max_retries = 50; - REG_GET(AFMT_CNTL, AFMT_AUDIO_CLOCK_EN, ®val); /*we need turn on clock before programming AFMT block*/ - if (regval != 1) - REG_UPDATE(AFMT_CNTL, AFMT_AUDIO_CLOCK_EN, 1); + REG_UPDATE(AFMT_CNTL, AFMT_AUDIO_CLOCK_EN, 1); if (REG(AFMT_VBI_PACKET_CONTROL1)) { if (packet_index >= 8) From 70e8ffc55b98f31af700eae525fef5d0b8fcb6e1 Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Fri, 10 Nov 2017 11:19:02 -0500 Subject: [PATCH 177/333] drm/amd/display: Fix amdgpu_dm bugs found by smatch drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c:2760 create_eml_sink() warn: variable dereferenced before check 'aconnector->base.edid_blob_ptr' (see line 2758) drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c:4270 amdgpu_dm_atomic_commit_tail() warn: variable dereferenced before check 'dm_new_crtc_state->stream' (see line 4266) drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c:4417 dm_restore_drm_connector_state() warn: variable dereferenced before check 'disconnected_acrtc' (see line 4415) Signed-off-by: Harry Wentland Reviewed-by: Sun peng Li Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 8b0fb25d84a6..5025dafb0517 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2709,7 +2709,7 @@ static void create_eml_sink(struct amdgpu_dm_connector *aconnector) .link = aconnector->dc_link, .sink_signal = SIGNAL_TYPE_VIRTUAL }; - struct edid *edid = (struct edid *) aconnector->base.edid_blob_ptr->data; + struct edid *edid; if (!aconnector->base.edid_blob_ptr || !aconnector->base.edid_blob_ptr->data) { @@ -2721,6 +2721,8 @@ static void create_eml_sink(struct amdgpu_dm_connector *aconnector) return; } + edid = (struct edid *) aconnector->base.edid_blob_ptr->data; + aconnector->edid = edid; aconnector->dc_em_sink = dc_link_add_remote_sink( @@ -4198,13 +4200,13 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) update_stream_scaling_settings(&dm_new_con_state->base.crtc->mode, dm_new_con_state, (struct dc_stream_state *)dm_new_crtc_state->stream); + if (!dm_new_crtc_state->stream) + continue; + status = dc_stream_get_status(dm_new_crtc_state->stream); WARN_ON(!status); WARN_ON(!status->plane_count); - if (!dm_new_crtc_state->stream) - continue; - /*TODO How it works with MPO ?*/ if (!dc_commit_planes_to_stream( dm->dc, @@ -4337,9 +4339,11 @@ void dm_restore_drm_connector_state(struct drm_device *dev, return; disconnected_acrtc = to_amdgpu_crtc(connector->encoder->crtc); - acrtc_state = to_dm_crtc_state(disconnected_acrtc->base.state); + if (!disconnected_acrtc) + return; - if (!disconnected_acrtc || !acrtc_state->stream) + acrtc_state = to_dm_crtc_state(disconnected_acrtc->base.state); + if (!acrtc_state->stream) return; /* From a8f9764731968d6a63f6f98c5b0d4e7a0e4154e2 Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Fri, 10 Nov 2017 11:56:15 -0500 Subject: [PATCH 178/333] drm/amd/display: Bunch of smatch error and warning fixes in DC drivers/gpu/drm/amd/amdgpu/../display/dc/basics/log_helpers.c:79 dc_conn_log() error: buffer overflow 'signal_type_info_tbl' 10 <= 10 drivers/gpu/drm/amd/amdgpu/../display/dc/bios/bios_parser.c:266 bios_parser_get_dst_obj() error: uninitialized symbol 'id'. drivers/gpu/drm/amd/amdgpu/../display/dc/dce/dce_audio.c:357 dce_aud_az_enable() warn: inconsistent indenting drivers/gpu/drm/amd/amdgpu/../display/dc/dcn10/dcn10_resource.c:958 dcn10_acquire_idle_pipe_for_layer() error: we previously assumed 'head_pipe' could be null (see line 952) Signed-off-by: Harry Wentland Reviewed-by: Sun peng Li Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/basics/log_helpers.c | 5 +++++ drivers/gpu/drm/amd/display/dc/bios/bios_parser.c | 4 ++-- drivers/gpu/drm/amd/display/dc/dce/dce_audio.c | 10 +++++----- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c | 6 ++++-- 4 files changed, 16 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/basics/log_helpers.c b/drivers/gpu/drm/amd/display/dc/basics/log_helpers.c index 785b943b60ed..6e43168fbdd6 100644 --- a/drivers/gpu/drm/amd/display/dc/basics/log_helpers.c +++ b/drivers/gpu/drm/amd/display/dc/basics/log_helpers.c @@ -75,6 +75,9 @@ void dc_conn_log(struct dc_context *ctx, if (signal == signal_type_info_tbl[i].type) break; + if (i == NUM_ELEMENTS(signal_type_info_tbl)) + goto fail; + dm_logger_append(&entry, "[%s][ConnIdx:%d] ", signal_type_info_tbl[i].name, link->link_index); @@ -96,6 +99,8 @@ void dc_conn_log(struct dc_context *ctx, dm_logger_append(&entry, "^\n"); dm_helpers_dc_conn_log(ctx, &entry, event); + +fail: dm_logger_close(&entry); va_end(args); diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c index aaaebd06d7ee..86e6438c5cf3 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c @@ -249,7 +249,7 @@ static enum bp_result bios_parser_get_dst_obj(struct dc_bios *dcb, struct graphics_object_id *dest_object_id) { uint32_t number; - uint16_t *id; + uint16_t *id = NULL; ATOM_OBJECT *object; struct bios_parser *bp = BP_FROM_DCB(dcb); @@ -260,7 +260,7 @@ static enum bp_result bios_parser_get_dst_obj(struct dc_bios *dcb, number = get_dest_obj_list(bp, object, &id); - if (number <= index) + if (number <= index || !id) return BP_RESULT_BADINPUT; *dest_object_id = object_id_from_bios_object_id(id[index]); diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c index 81c40f8864db..0df9ecb2710c 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c @@ -352,11 +352,11 @@ void dce_aud_az_enable(struct audio *audio) uint32_t value = AZ_REG_READ(AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL); set_reg_field_value(value, 1, - AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL, - CLOCK_GATING_DISABLE); - set_reg_field_value(value, 1, - AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL, - AUDIO_ENABLED); + AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL, + CLOCK_GATING_DISABLE); + set_reg_field_value(value, 1, + AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL, + AUDIO_ENABLED); AZ_REG_WRITE(AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL, value); value = AZ_REG_READ(AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL); diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c index 4c4bd72d4e40..9fc8f827f2a1 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c @@ -912,11 +912,13 @@ static struct pipe_ctx *dcn10_acquire_idle_pipe_for_layer( struct pipe_ctx *head_pipe = resource_get_head_pipe_for_stream(res_ctx, stream); struct pipe_ctx *idle_pipe = find_idle_secondary_pipe(res_ctx, pool); - if (!head_pipe) + if (!head_pipe) { ASSERT(0); + return NULL; + } if (!idle_pipe) - return false; + return NULL; idle_pipe->stream = head_pipe->stream; idle_pipe->stream_res.tg = head_pipe->stream_res.tg; From b001965d4eb5e4c0e3de7dc6dce1d45ca25232a7 Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Fri, 10 Nov 2017 12:11:44 -0500 Subject: [PATCH 179/333] drm/amd/display: Fix use before NULL check in validate_timing Found by smatch: drivers/gpu/drm/amd/amdgpu/../display/dc/dce110/dce110_timing_generator.c:1124 dce110_timing_generator_validate_timing() warn: variable dereferenced before check 'timing' (see line 1116) Signed-off-by: Harry Wentland Reviewed-by: Tony Cheng Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dce110/dce110_timing_generator.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c index 67ac737eaa7e..4befce6cd87a 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c @@ -1112,10 +1112,7 @@ bool dce110_timing_generator_validate_timing( enum signal_type signal) { uint32_t h_blank; - uint32_t h_back_porch; - uint32_t hsync_offset = timing->h_border_right + - timing->h_front_porch; - uint32_t h_sync_start = timing->h_addressable + hsync_offset; + uint32_t h_back_porch, hsync_offset, h_sync_start; struct dce110_timing_generator *tg110 = DCE110TG_FROM_TG(tg); @@ -1124,6 +1121,9 @@ bool dce110_timing_generator_validate_timing( if (!timing) return false; + hsync_offset = timing->h_border_right + timing->h_front_porch; + h_sync_start = timing->h_addressable + hsync_offset; + /* Currently we don't support 3D, so block all 3D timings */ if (timing->timing_3d_format != TIMING_3D_FORMAT_NONE) return false; From b3fb2b4e21a995c4fa511627088bd55b88f6be11 Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Fri, 10 Nov 2017 19:49:44 -0500 Subject: [PATCH 180/333] drm/amd/display: Fix hubp check in set_cursor_position Found by smatch: drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_stream.c:298 dc_stream_set_cursor_position() error: we previously assumed 'hubp' could be null (see line 294) Signed-off-by: Harry Wentland Reviewed-by: Tony Cheng Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_stream.c | 9 ++++----- drivers/gpu/drm/amd/display/dc/inc/hw/transform.h | 7 ------- 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index b00a6040a697..e230cc44a0a7 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -263,7 +263,6 @@ bool dc_stream_set_cursor_position( struct input_pixel_processor *ipp = pipe_ctx->plane_res.ipp; struct mem_input *mi = pipe_ctx->plane_res.mi; struct hubp *hubp = pipe_ctx->plane_res.hubp; - struct transform *xfm = pipe_ctx->plane_res.xfm; struct dpp *dpp = pipe_ctx->plane_res.dpp; struct dc_cursor_position pos_cpy = *position; struct dc_cursor_mi_param param = { @@ -294,11 +293,11 @@ bool dc_stream_set_cursor_position( if (mi != NULL && mi->funcs->set_cursor_position != NULL) mi->funcs->set_cursor_position(mi, &pos_cpy, ¶m); - if (hubp != NULL && hubp->funcs->set_cursor_position != NULL) - hubp->funcs->set_cursor_position(hubp, &pos_cpy, ¶m); + if (!hubp) + continue; - if (xfm != NULL && xfm->funcs->set_cursor_position != NULL) - xfm->funcs->set_cursor_position(xfm, &pos_cpy, ¶m, hubp->curs_attr.width); + if (hubp->funcs->set_cursor_position != NULL) + hubp->funcs->set_cursor_position(hubp, &pos_cpy, ¶m); if (dpp != NULL && dpp->funcs->set_cursor_position != NULL) dpp->funcs->set_cursor_position(dpp, &pos_cpy, ¶m, hubp->curs_attr.width); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h b/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h index 7c08bc62c1f5..ea88997e1bbd 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h @@ -259,13 +259,6 @@ struct transform_funcs { struct transform *xfm_base, const struct dc_cursor_attributes *attr); - void (*set_cursor_position)( - struct transform *xfm_base, - const struct dc_cursor_position *pos, - const struct dc_cursor_mi_param *param, - uint32_t width - ); - }; const uint16_t *get_filter_2tap_16p(void); From edf38b58ecd847df75723fb90d4a1c3669b1e670 Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Fri, 10 Nov 2017 20:11:37 -0500 Subject: [PATCH 181/333] drm/amd/display: Fix potential NULL and mem leak in create_links Found by smatch: drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc.c:148 create_links() error: potential null dereference 'link->link_enc'. (kzalloc returns null) Signed-off-by: Harry Wentland Reviewed-by: Tony Cheng Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 15625fd94455..7240db2e6f09 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -121,6 +121,10 @@ static bool create_links( goto failed_alloc; } + link->link_index = dc->link_count; + dc->links[dc->link_count] = link; + dc->link_count++; + link->ctx = dc->ctx; link->dc = dc; link->connector_signal = SIGNAL_TYPE_VIRTUAL; @@ -128,6 +132,12 @@ static bool create_links( link->link_id.id = CONNECTOR_ID_VIRTUAL; link->link_id.enum_id = ENUM_ID_1; link->link_enc = kzalloc(sizeof(*link->link_enc), GFP_KERNEL); + + if (!link->link_enc) { + BREAK_TO_DEBUGGER(); + goto failed_alloc; + } + link->link_status.dpcd_caps = &link->dpcd_caps; enc_init.ctx = dc->ctx; @@ -139,10 +149,6 @@ static bool create_links( enc_init.encoder.id = ENCODER_ID_INTERNAL_VIRTUAL; enc_init.encoder.enum_id = ENUM_ID_1; virtual_link_encoder_construct(link->link_enc, &enc_init); - - link->link_index = dc->link_count; - dc->links[dc->link_count] = link; - dc->link_count++; } return true; From e41ab0309a1b75d31c360d1d2b0de58e8d7958ad Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Fri, 10 Nov 2017 20:35:27 -0500 Subject: [PATCH 182/333] drm/amd/display: Fix couple more inconsistent NULL checks in dc_resource Found by smatch: drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_resource.c:1001 acquire_free_pipe_for_stream() error: we previously assumed 'head_pipe' could be null (see line 998) drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_resource.c:1808 dc_validate_global_state() error: we previously assumed 'new_ctx' could be null (see line 1778) Signed-off-by: Harry Wentland Reviewed-by: Tony Cheng Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 546fe99d34f2..b7422d3b71ef 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -993,8 +993,10 @@ static struct pipe_ctx *acquire_free_pipe_for_stream( head_pipe = resource_get_head_pipe_for_stream(res_ctx, stream); - if (!head_pipe) + if (!head_pipe) { ASSERT(0); + return NULL; + } if (!head_pipe->plane_state) return head_pipe; @@ -1772,13 +1774,16 @@ enum dc_status dc_validate_global_state( enum dc_status result = DC_ERROR_UNEXPECTED; int i, j; + if (!new_ctx) + return DC_ERROR_UNEXPECTED; + if (dc->res_pool->funcs->validate_global) { result = dc->res_pool->funcs->validate_global(dc, new_ctx); if (result != DC_OK) return result; } - for (i = 0; new_ctx && i < new_ctx->stream_count; i++) { + for (i = 0; i < new_ctx->stream_count; i++) { struct dc_stream_state *stream = new_ctx->streams[i]; for (j = 0; j < dc->res_pool->pipe_count; j++) { From 0a24bfcb2cc2cd161a0c8aae8fcbb58239d5da2b Mon Sep 17 00:00:00 2001 From: "Leo (Sunpeng) Li" Date: Thu, 16 Nov 2017 15:17:27 -0500 Subject: [PATCH 183/333] drm/amd/display: Do not put drm_atomic_state on resume drm_atomic_helper_resume now puts it for us. See relevant patch here: https://lists.freedesktop.org/archives/dri-devel/2017-October/154268.html Signed-off-by: Leo (Sunpeng) Li Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 5025dafb0517..3d6fbc38d95f 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -716,7 +716,6 @@ int amdgpu_dm_display_resume(struct amdgpu_device *adev) ret = drm_atomic_helper_resume(ddev, adev->dm.cached_state); - drm_atomic_state_put(adev->dm.cached_state); adev->dm.cached_state = NULL; amdgpu_dm_irq_resume_late(adev); From d5c9cb6e00047b194d54cadc1e91156f2875a3e5 Mon Sep 17 00:00:00 2001 From: Roman Li Date: Thu, 16 Nov 2017 17:22:39 -0500 Subject: [PATCH 184/333] drm/amd/display: fix gamma setting Adding gamma changed check as condition for affected plane. We ignored adding plane as affected if modeset was not required. But for color management change we still need it. Signed-off-by: Roman Li Reviewed-by: Tony Cheng Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 3d6fbc38d95f..8d94dfc7ba3f 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4717,7 +4717,8 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, } } else { for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { - if (!drm_atomic_crtc_needs_modeset(new_crtc_state)) + if (!drm_atomic_crtc_needs_modeset(new_crtc_state) && + !new_crtc_state->color_mgmt_changed) continue; if (!new_crtc_state->enable) From 320a127437e5d3cbb7fc444f8769eb510d11d3b9 Mon Sep 17 00:00:00 2001 From: Andrey Grodzovsky Date: Tue, 14 Nov 2017 20:45:52 -0500 Subject: [PATCH 185/333] drm/amd/display: Switch to drm_atomic_helper_wait_for_flip_done MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This new helper function is advised to be used for drviers that use the nonblocking commit tracking support instead of drm_atomic_helper_wait_for_vblanks. Signed-off-by: Andrey Grodzovsky Reviewed-by: Harry Wentland Reviewed-and-Tested-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 8d94dfc7ba3f..f71fe6d2ddda 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4259,7 +4259,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) drm_atomic_helper_commit_hw_done(state); if (wait_for_vblank) - drm_atomic_helper_wait_for_vblanks(dev, state); + drm_atomic_helper_wait_for_flip_done(dev, state); drm_atomic_helper_cleanup_planes(dev, state); } From 97011249c3b2ba6b038a2af7025063d62d1448ec Mon Sep 17 00:00:00 2001 From: Hersen Wu Date: Mon, 20 Nov 2017 12:45:54 -0500 Subject: [PATCH 186/333] drm/amd/display: USB-C / thunderbolt dock specific workaround reading dpcd 0x600 cause link loss for a particular USB-C dock with thurderbolt. workaround by avoiding dcpd 0x600 read unless it's necessary. Signed-off-by: Hersen Wu Reviewed-by: Tony Cheng Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/core/dc_link_dp.c | 85 +++++++++---------- 1 file changed, 40 insertions(+), 45 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 8e97b42a03a2..e6bf05d76a94 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -1512,7 +1512,7 @@ static bool hpd_rx_irq_check_link_loss_status( struct dc_link *link, union hpd_irq_data *hpd_irq_dpcd_data) { - uint8_t irq_reg_rx_power_state; + uint8_t irq_reg_rx_power_state = 0; enum dc_status dpcd_result = DC_ERROR_UNEXPECTED; union lane_status lane_status; uint32_t lane; @@ -1524,60 +1524,55 @@ static bool hpd_rx_irq_check_link_loss_status( if (link->cur_link_settings.lane_count == 0) return return_code; - /*1. Check that we can handle interrupt: Not in FS DOS, - * Not in "Display Timeout" state, Link is trained. - */ - dpcd_result = core_link_read_dpcd(link, - DP_SET_POWER, - &irq_reg_rx_power_state, - sizeof(irq_reg_rx_power_state)); + /*1. Check that Link Status changed, before re-training.*/ - if (dpcd_result != DC_OK) { - irq_reg_rx_power_state = DP_SET_POWER_D0; - dm_logger_write(link->ctx->logger, LOG_HW_HPD_IRQ, - "%s: DPCD read failed to obtain power state.\n", - __func__); + /*parse lane status*/ + for (lane = 0; lane < link->cur_link_settings.lane_count; lane++) { + /* check status of lanes 0,1 + * changed DpcdAddress_Lane01Status (0x202) + */ + lane_status.raw = get_nibble_at_index( + &hpd_irq_dpcd_data->bytes.lane01_status.raw, + lane); + + if (!lane_status.bits.CHANNEL_EQ_DONE_0 || + !lane_status.bits.CR_DONE_0 || + !lane_status.bits.SYMBOL_LOCKED_0) { + /* if one of the channel equalization, clock + * recovery or symbol lock is dropped + * consider it as (link has been + * dropped) dp sink status has changed + */ + sink_status_changed = true; + break; + } } - if (irq_reg_rx_power_state == DP_SET_POWER_D0) { + /* Check interlane align.*/ + if (sink_status_changed || + !hpd_irq_dpcd_data->bytes.lane_status_updated.bits.INTERLANE_ALIGN_DONE) { - /*2. Check that Link Status changed, before re-training.*/ + dm_logger_write(link->ctx->logger, LOG_HW_HPD_IRQ, + "%s: Link Status changed.\n", __func__); - /*parse lane status*/ - for (lane = 0; - lane < link->cur_link_settings.lane_count; - lane++) { + return_code = true; - /* check status of lanes 0,1 - * changed DpcdAddress_Lane01Status (0x202)*/ - lane_status.raw = get_nibble_at_index( - &hpd_irq_dpcd_data->bytes.lane01_status.raw, - lane); - - if (!lane_status.bits.CHANNEL_EQ_DONE_0 || - !lane_status.bits.CR_DONE_0 || - !lane_status.bits.SYMBOL_LOCKED_0) { - /* if one of the channel equalization, clock - * recovery or symbol lock is dropped - * consider it as (link has been - * dropped) dp sink status has changed*/ - sink_status_changed = true; - break; - } - - } - - /* Check interlane align.*/ - if (sink_status_changed || - !hpd_irq_dpcd_data->bytes.lane_status_updated.bits. - INTERLANE_ALIGN_DONE) { + /*2. Check that we can handle interrupt: Not in FS DOS, + * Not in "Display Timeout" state, Link is trained. + */ + dpcd_result = core_link_read_dpcd(link, + DP_SET_POWER, + &irq_reg_rx_power_state, + sizeof(irq_reg_rx_power_state)); + if (dpcd_result != DC_OK) { dm_logger_write(link->ctx->logger, LOG_HW_HPD_IRQ, - "%s: Link Status changed.\n", + "%s: DPCD read failed to obtain power state.\n", __func__); - - return_code = true; + } else { + if (irq_reg_rx_power_state != DP_SET_POWER_D0) + return_code = false; } } From aca7573fde95152378361cba734996b384f3b1d3 Mon Sep 17 00:00:00 2001 From: Vaibhav Jain Date: Tue, 28 Nov 2017 08:23:04 +0530 Subject: [PATCH 187/333] powerpc: Avoid signed to unsigned conversion in set_thread_tidr() There is an unsafe signed to unsigned conversion in set_thread_tidr() that may cause an error value to be assigned to SPRN_TIDR register and used as thread-id. The issue happens as assign_thread_tidr() returns an int and thread.tidr is an unsigned-long. So a negative error code returned from assign_thread_tidr() will fail the error check and gets assigned as tidr as a large positive value. To fix this the patch assigns the return value of assign_thread_tidr() to a temporary int and assigns it to thread.tidr iff its '> 0'. The patch shouldn't impact the calling convention of set_thread_tidr() i.e all -ve return-values are error codes and a return value of '0' indicates success. Fixes: ec233ede4c86("powerpc: Add support for setting SPRN_TIDR") Signed-off-by: Vaibhav Jain Reviewed-by: Christophe Lombard clombard@linux.vnet.ibm.com Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index bfdd783e3916..d205b52e3850 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1569,16 +1569,19 @@ void arch_release_task_struct(struct task_struct *t) */ int set_thread_tidr(struct task_struct *t) { + int rc; + if (!cpu_has_feature(CPU_FTR_ARCH_300)) return -EINVAL; if (t != current) return -EINVAL; - t->thread.tidr = assign_thread_tidr(); - if (t->thread.tidr < 0) - return t->thread.tidr; + rc = assign_thread_tidr(); + if (rc < 0) + return rc; + t->thread.tidr = rc; mtspr(SPRN_TIDR, t->thread.tidr); return 0; From 7e4d4233260be0611c7fbdb2730c12731c4b8dc0 Mon Sep 17 00:00:00 2001 From: Vaibhav Jain Date: Fri, 24 Nov 2017 14:03:38 +0530 Subject: [PATCH 188/333] powerpc: Do not assign thread.tidr if already assigned If set_thread_tidr() is called twice for same task_struct then it will allocate a new tidr value to it leaving the previous value still dangling in the vas_thread_ida table. To fix this the patch changes set_thread_tidr() to check if a tidr value is already assigned to the task_struct and if yes then returns zero. Fixes: ec233ede4c86("powerpc: Add support for setting SPRN_TIDR") Signed-off-by: Vaibhav Jain Reviewed-by: Andrew Donnellan [mpe: Modify to return 0 in the success case, not the TID value] Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index d205b52e3850..5acb5a176dbe 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1577,6 +1577,9 @@ int set_thread_tidr(struct task_struct *t) if (t != current) return -EINVAL; + if (t->thread.tidr) + return 0; + rc = assign_thread_tidr(); if (rc < 0) return rc; From 80a780a167d9267c72867b806142bd6ec69ba123 Mon Sep 17 00:00:00 2001 From: Bastian Stender Date: Tue, 28 Nov 2017 09:24:06 +0100 Subject: [PATCH 189/333] mmc: core: prepend 0x to pre_eol_info entry in sysfs The sysfs entry "pre_eol_info" was missing the 0x prefix to identify it as hex formatted. Fixes: 46bc5c408e4e ("mmc: core: Export device lifetime information through sysfs") Signed-off-by: Bastian Stender Cc: # v4.11+ Signed-off-by: Ulf Hansson --- drivers/mmc/core/mmc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index a552f61060d2..b8259fcb4bda 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -781,7 +781,7 @@ MMC_DEV_ATTR(name, "%s\n", card->cid.prod_name); MMC_DEV_ATTR(oemid, "0x%04x\n", card->cid.oemid); MMC_DEV_ATTR(prv, "0x%x\n", card->cid.prv); MMC_DEV_ATTR(rev, "0x%x\n", card->ext_csd.rev); -MMC_DEV_ATTR(pre_eol_info, "%02x\n", card->ext_csd.pre_eol_info); +MMC_DEV_ATTR(pre_eol_info, "0x%02x\n", card->ext_csd.pre_eol_info); MMC_DEV_ATTR(life_time, "0x%02x 0x%02x\n", card->ext_csd.device_life_time_est_typ_a, card->ext_csd.device_life_time_est_typ_b); From c892b0d81705c566f575e489efc3c50762db1bde Mon Sep 17 00:00:00 2001 From: Bastian Stender Date: Tue, 28 Nov 2017 09:24:07 +0100 Subject: [PATCH 190/333] mmc: core: prepend 0x to OCR entry in sysfs The sysfs entry "ocr" was missing the 0x prefix to identify it as hex formatted. Fixes: 5fb06af7a33b ("mmc: core: Extend sysfs with OCR register") Signed-off-by: Bastian Stender Cc: # v4.8+ [Ulf: Amended change to also cover SD-cards] Signed-off-by: Ulf Hansson --- drivers/mmc/core/mmc.c | 2 +- drivers/mmc/core/sd.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index b8259fcb4bda..d209fb466979 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -791,7 +791,7 @@ MMC_DEV_ATTR(enhanced_area_offset, "%llu\n", MMC_DEV_ATTR(enhanced_area_size, "%u\n", card->ext_csd.enhanced_area_size); MMC_DEV_ATTR(raw_rpmb_size_mult, "%#x\n", card->ext_csd.raw_rpmb_size_mult); MMC_DEV_ATTR(rel_sectors, "%#x\n", card->ext_csd.rel_sectors); -MMC_DEV_ATTR(ocr, "%08x\n", card->ocr); +MMC_DEV_ATTR(ocr, "0x%08x\n", card->ocr); MMC_DEV_ATTR(cmdq_en, "%d\n", card->ext_csd.cmdq_en); static ssize_t mmc_fwrev_show(struct device *dev, diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c index 45bf78f32716..62b84dd8f9fe 100644 --- a/drivers/mmc/core/sd.c +++ b/drivers/mmc/core/sd.c @@ -675,7 +675,7 @@ MMC_DEV_ATTR(manfid, "0x%06x\n", card->cid.manfid); MMC_DEV_ATTR(name, "%s\n", card->cid.prod_name); MMC_DEV_ATTR(oemid, "0x%04x\n", card->cid.oemid); MMC_DEV_ATTR(serial, "0x%08x\n", card->cid.serial); -MMC_DEV_ATTR(ocr, "%08x\n", card->ocr); +MMC_DEV_ATTR(ocr, "0x%08x\n", card->ocr); static ssize_t mmc_dsr_show(struct device *dev, From a76dfe350c1fcb466b18595e65b33fa6abf65472 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Wed, 29 Nov 2017 12:04:31 +0100 Subject: [PATCH 191/333] drm/atomic: make drm_atomic_helper_wait_for_vblanks more agressive drm_atomic_helper_setup_commit expects that flipping of previous commits has happened when it is called to set up a new commit. This can be violated by commits where userspace doesn't get a flip completion event to synchronize against i.e. legacy modesets and property changes. The expectation is that those are done by blocking commits, which wait for completion. Most drivers call drm_atomic_helper_wait_for_vblanks in the commit_tail to ensure completion, but the wait for next vblank might not actually happen if the commit didn't change any planes. Make the wait more agressive by also waiting if no planes changed. This is the minimal regression fix for the 4.15 kernel series. Long term drivers should switch away from drm_atomic_helper_wait_for_vblanks and use drm_atomic_helper_wait_for_flip_done instead. Fixes: de39bec1a0c4 ("drm/atomic: Remove waits in drm_atomic_helper_commit_cleanup_done, v2.") Signed-off-by: Lucas Stach Reviewed-by: Maarten Lankhorst Signed-off-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20171129110431.6300-1-l.stach@pengutronix.de --- drivers/gpu/drm/drm_atomic_helper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index 71d712f1b56a..b16f1d69a0bb 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -1225,7 +1225,7 @@ drm_atomic_helper_wait_for_vblanks(struct drm_device *dev, return; for_each_oldnew_crtc_in_state(old_state, crtc, old_crtc_state, new_crtc_state, i) { - if (!new_crtc_state->active || !new_crtc_state->planes_changed) + if (!new_crtc_state->active) continue; ret = drm_crtc_vblank_get(crtc); From 5478e478eee3b096b8d998d4ed445da30da2dfbc Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 27 Nov 2017 22:06:13 +0100 Subject: [PATCH 192/333] eeprom: at24: correctly set the size for at24mac402 There's an ilog2() expansion in AT24_DEVICE_MAGIC() which rounds down the actual size of EUI-48 byte array in at24mac402 eeproms to 4 from 6, making it impossible to read it all. Fix it by manually adjusting the value in probe(). This patch contains a temporary fix that is suitable for stable branches. Eventually we'll probably remove the call to ilog2() while converting the magic values to actual structs. Cc: stable@vger.kernel.org Fixes: 0b813658c115 ("eeprom: at24: add support for at24mac series") Signed-off-by: Bartosz Golaszewski Reviewed-by: Andy Shevchenko --- drivers/misc/eeprom/at24.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c index e0b4b36ef010..783244b485cc 100644 --- a/drivers/misc/eeprom/at24.c +++ b/drivers/misc/eeprom/at24.c @@ -730,6 +730,16 @@ static int at24_probe(struct i2c_client *client, const struct i2c_device_id *id) dev_warn(&client->dev, "page_size looks suspicious (no power of 2)!\n"); + /* + * REVISIT: the size of the EUI-48 byte array is 6 in at24mac402, while + * the call to ilog2() in AT24_DEVICE_MAGIC() rounds it down to 4. + * + * Eventually we'll get rid of the magic values altoghether in favor of + * real structs, but for now just manually set the right size. + */ + if (chip.flags & AT24_FLAG_MAC && chip.byte_len == 4) + chip.byte_len = 6; + /* Use I2C operations unless we're stuck with SMBus extensions. */ if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) { if (chip.flags & AT24_FLAG_ADDR16) From 52dd06506e9bbc2a37b352df7dfc5468f8da21fd Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Tue, 28 Nov 2017 12:16:03 +0100 Subject: [PATCH 193/333] drm/fb_helper: Disable all crtc's when initial setup fails. Some drivers like i915 start with crtc's enabled, but with deferred fbcon setup they were no longer disabled as part of fbdev setup. Headless units could no longer enter pc3 state because the crtc was still enabled. Fix this by calling restore_fbdev_mode when we would have called it otherwise once during initial fbdev setup. Signed-off-by: Maarten Lankhorst Fixes: ca91a2758fce ("drm/fb-helper: Support deferred setup") Cc: # v4.14+ Reported-by: Thomas Voegtle Tested-by: Thomas Voegtle Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20171128111603.62757-1-maarten.lankhorst@linux.intel.com --- drivers/gpu/drm/drm_fb_helper.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 07374008f146..e56166334455 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -1809,6 +1809,10 @@ static int drm_fb_helper_single_fb_probe(struct drm_fb_helper *fb_helper, if (crtc_count == 0 || sizes.fb_width == -1 || sizes.fb_height == -1) { DRM_INFO("Cannot find any crtc or sizes\n"); + + /* First time: disable all crtc's.. */ + if (!fb_helper->deferred_setup && !READ_ONCE(fb_helper->dev->master)) + restore_fbdev_mode(fb_helper); return -EAGAIN; } From 3d7682af228fd78dc46bc6bf40e0268ad04521ec Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 29 Nov 2017 14:25:50 +0000 Subject: [PATCH 194/333] rxrpc: Clean up whitespace Clean up some whitespace from rxrpc. Signed-off-by: David Howells --- net/rxrpc/call_event.c | 2 +- net/rxrpc/conn_object.c | 2 +- net/rxrpc/input.c | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index bda952ffe6a6..555274ddc514 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -426,7 +426,7 @@ recheck_state: next = call->expect_rx_by; #define set(T) { t = READ_ONCE(T); if (time_before(t, next)) next = t; } - + set(call->expect_req_by); set(call->expect_term_by); set(call->ack_at); diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 1aad04a32d5e..c628351eb900 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -424,7 +424,7 @@ void rxrpc_service_connection_reaper(struct work_struct *work) if (earliest != now + MAX_JIFFY_OFFSET) { _debug("reschedule reaper %ld", (long)earliest - (long)now); ASSERT(time_after(earliest, now)); - rxrpc_set_service_reap_timer(rxnet, earliest); + rxrpc_set_service_reap_timer(rxnet, earliest); } while (!list_empty(&graveyard)) { diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 23a5e61d8f79..6fc61400337f 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -976,7 +976,7 @@ static void rxrpc_input_call_packet(struct rxrpc_call *call, rxrpc_reduce_call_timer(call, expect_rx_by, now, rxrpc_timer_set_for_normal); } - + switch (sp->hdr.type) { case RXRPC_PACKET_TYPE_DATA: rxrpc_input_data(call, skb, skew); @@ -1213,7 +1213,7 @@ void rxrpc_data_ready(struct sock *udp_sk) goto reupgrade; conn->service_id = sp->hdr.serviceId; } - + if (sp->hdr.callNumber == 0) { /* Connection-level packet */ _debug("CONN %p {%d}", conn, conn->debug_id); From 5fc62f6a139a7b06b027bf442cd4205619506f59 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 29 Nov 2017 14:40:41 +0000 Subject: [PATCH 195/333] rxrpc: Fix ACK generation from the connection event processor Repeat terminal ACKs and now terminal ACKs are now generated from the connection event processor rather from call handling as this allows us to discard client call structures as soon as possible and free up the channel for a follow on call. However, in ACKs so generated, the additional information trailer is malformed because the padding that's meant to be in the middle isn't included in what's transmitted. Fix it so that the 3 bytes of padding are included in the transmission. Further, the trailer is misaligned because of the padding, so assigment to the u16 and u32 fields inside it might cause problems on some arches, so fix this by breaking the padding and the trailer out of the packed struct. (This also deals with potential compiler weirdies where some of the nested structs are packed and some aren't). The symptoms can be seen in wireshark as terminal DUPLICATE or IDLE ACK packets in which the Max MTU, Interface MTU and rwind fields have weird values and the Max Packets field is apparently missing. Reported-by: Jeffrey Altman Signed-off-by: David Howells --- net/rxrpc/conn_event.c | 50 ++++++++++++++++++++++++------------------ 1 file changed, 29 insertions(+), 21 deletions(-) diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 9e9a8db1bc9c..4ca11be6be3c 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -30,22 +30,18 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, struct rxrpc_skb_priv *sp = skb ? rxrpc_skb(skb) : NULL; struct rxrpc_channel *chan; struct msghdr msg; - struct kvec iov; + struct kvec iov[3]; struct { struct rxrpc_wire_header whdr; union { - struct { - __be32 code; - } abort; - struct { - struct rxrpc_ackpacket ack; - u8 padding[3]; - struct rxrpc_ackinfo info; - }; + __be32 abort_code; + struct rxrpc_ackpacket ack; }; } __attribute__((packed)) pkt; + struct rxrpc_ackinfo ack_info; size_t len; - u32 serial, mtu, call_id; + int ioc; + u32 serial, mtu, call_id, padding; _enter("%d", conn->debug_id); @@ -66,6 +62,13 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, msg.msg_controllen = 0; msg.msg_flags = 0; + iov[0].iov_base = &pkt; + iov[0].iov_len = sizeof(pkt.whdr); + iov[1].iov_base = &padding; + iov[1].iov_len = 3; + iov[2].iov_base = &ack_info; + iov[2].iov_len = sizeof(ack_info); + pkt.whdr.epoch = htonl(conn->proto.epoch); pkt.whdr.cid = htonl(conn->proto.cid); pkt.whdr.callNumber = htonl(call_id); @@ -80,8 +83,10 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, len = sizeof(pkt.whdr); switch (chan->last_type) { case RXRPC_PACKET_TYPE_ABORT: - pkt.abort.code = htonl(chan->last_abort); - len += sizeof(pkt.abort); + pkt.abort_code = htonl(chan->last_abort); + iov[0].iov_len += sizeof(pkt.abort_code); + len += sizeof(pkt.abort_code); + ioc = 1; break; case RXRPC_PACKET_TYPE_ACK: @@ -94,13 +99,19 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, pkt.ack.serial = htonl(skb ? sp->hdr.serial : 0); pkt.ack.reason = skb ? RXRPC_ACK_DUPLICATE : RXRPC_ACK_IDLE; pkt.ack.nAcks = 0; - pkt.info.rxMTU = htonl(rxrpc_rx_mtu); - pkt.info.maxMTU = htonl(mtu); - pkt.info.rwind = htonl(rxrpc_rx_window_size); - pkt.info.jumbo_max = htonl(rxrpc_rx_jumbo_max); + ack_info.rxMTU = htonl(rxrpc_rx_mtu); + ack_info.maxMTU = htonl(mtu); + ack_info.rwind = htonl(rxrpc_rx_window_size); + ack_info.jumbo_max = htonl(rxrpc_rx_jumbo_max); pkt.whdr.flags |= RXRPC_SLOW_START_OK; - len += sizeof(pkt.ack) + sizeof(pkt.info); + padding = 0; + iov[0].iov_len += sizeof(pkt.ack); + len += sizeof(pkt.ack) + 3 + sizeof(ack_info); + ioc = 3; break; + + default: + return; } /* Resync with __rxrpc_disconnect_call() and check that the last call @@ -110,9 +121,6 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, if (READ_ONCE(chan->last_call) != call_id) return; - iov.iov_base = &pkt; - iov.iov_len = len; - serial = atomic_inc_return(&conn->serial); pkt.whdr.serial = htonl(serial); @@ -127,7 +135,7 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, break; } - kernel_sendmsg(conn->params.local->socket, &msg, &iov, 1, len); + kernel_sendmsg(conn->params.local->socket, &msg, iov, ioc, len); _leave(""); return; } From 282ef4729195c8503f7101d574acfb5e7c8a8209 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 28 Nov 2017 11:28:52 -0600 Subject: [PATCH 196/333] rxrpc: Fix variable overwrite Values assigned to both variable resend_at and ack_at are overwritten before they can be used. The correct fix here is to add 'now' to the previously computed value in resend_at and ack_at. Addresses-Coverity-ID: 1462262 Addresses-Coverity-ID: 1462263 Addresses-Coverity-ID: 1462264 Fixes: beb8e5e4f38c ("rxrpc: Express protocol timeouts in terms of RTT") Link: https://marc.info/?i=17004.1511808959%40warthog.procyon.org.uk Signed-off-by: Gustavo A. R. Silva Signed-off-by: David Howells --- net/rxrpc/call_event.c | 2 +- net/rxrpc/sendmsg.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 555274ddc514..ad2ab1103189 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -123,7 +123,7 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, else ack_at = expiry; - ack_at = jiffies + expiry; + ack_at += now; if (time_before(ack_at, call->ack_at)) { WRITE_ONCE(call->ack_at, ack_at); rxrpc_reduce_call_timer(call, ack_at, now, diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index a1c53ac066a1..09f2a3e05221 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -233,7 +233,7 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, if (resend_at < 1) resend_at = 1; - resend_at = now + rxrpc_resend_timeout; + resend_at += now; WRITE_ONCE(call->resend_at, resend_at); rxrpc_reduce_call_timer(call, resend_at, now, rxrpc_timer_set_for_send); From 644a1f19c6c8393d0c4168a5adf79056da6822eb Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 27 Nov 2017 20:46:22 +0100 Subject: [PATCH 197/333] eeprom: at24: fix reading from 24MAC402/24MAC602 Chip datasheet mentions that word addresses other than the actual start position of the MAC delivers undefined results. So fix this. Current implementation doesn't work due to this wrong offset. Cc: stable@vger.kernel.org Fixes: 0b813658c115 ("eeprom: at24: add support for at24mac series") Signed-off-by: Heiner Kallweit Signed-off-by: Bartosz Golaszewski --- drivers/misc/eeprom/at24.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c index 783244b485cc..8ca6772b3baf 100644 --- a/drivers/misc/eeprom/at24.c +++ b/drivers/misc/eeprom/at24.c @@ -425,7 +425,8 @@ static ssize_t at24_eeprom_read_mac(struct at24_data *at24, char *buf, memset(msg, 0, sizeof(msg)); msg[0].addr = client->addr; msg[0].buf = addrbuf; - addrbuf[0] = 0x90 + offset; + /* EUI-48 starts from 0x9a, EUI-64 from 0x98 */ + addrbuf[0] = 0xa0 - at24->chip.byte_len + offset; msg[0].len = 1; msg[1].addr = client->addr; msg[1].flags = I2C_M_RD; From d9bcd462daf34aebb8de9ad7f76de0198bb5a0f0 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 24 Nov 2017 07:47:50 +0100 Subject: [PATCH 198/333] eeprom: at24: check at24_read/write arguments So far we completely rely on the caller to provide valid arguments. To be on the safe side perform an own sanity check. Cc: stable@vger.kernel.org Signed-off-by: Heiner Kallweit Signed-off-by: Bartosz Golaszewski --- drivers/misc/eeprom/at24.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c index 8ca6772b3baf..305a7a464d09 100644 --- a/drivers/misc/eeprom/at24.c +++ b/drivers/misc/eeprom/at24.c @@ -569,6 +569,9 @@ static int at24_read(void *priv, unsigned int off, void *val, size_t count) if (unlikely(!count)) return count; + if (off + count > at24->chip.byte_len) + return -EINVAL; + client = at24_translate_offset(at24, &off); ret = pm_runtime_get_sync(&client->dev); @@ -614,6 +617,9 @@ static int at24_write(void *priv, unsigned int off, void *val, size_t count) if (unlikely(!count)) return -EINVAL; + if (off + count > at24->chip.byte_len) + return -EINVAL; + client = at24_translate_offset(at24, &off); ret = pm_runtime_get_sync(&client->dev); From 88bc0ede8d35edc969350852894dc864a2dc1859 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Wed, 29 Nov 2017 22:34:50 +0900 Subject: [PATCH 199/333] quota: Check for register_shrinker() failure. register_shrinker() might return -ENOMEM error since Linux 3.12. Call panic() as with other failure checks in this function if register_shrinker() failed. Fixes: 1d3d4437eae1 ("vmscan: per-node deferred work") Signed-off-by: Tetsuo Handa Cc: Jan Kara Cc: Michal Hocko Reviewed-by: Michal Hocko Signed-off-by: Jan Kara --- fs/quota/dquot.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index d38684ff2ebb..020c597ef9b6 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -2992,7 +2992,8 @@ static int __init dquot_init(void) pr_info("VFS: Dquot-cache hash table entries: %ld (order %ld," " %ld bytes)\n", nr_hash, order, (PAGE_SIZE << order)); - register_shrinker(&dqcache_shrinker); + if (register_shrinker(&dqcache_shrinker)) + panic("Cannot register dquot shrinker"); return 0; } From 445f288d706cb5a418b13c340280b47d4585d667 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 18 Nov 2017 13:50:11 -0500 Subject: [PATCH 200/333] NFSv4: Ensure gcc 4.4.4 can compile initialiser for "invalid_stateid" gcc 4.4.4 is too old to have full C11 anonymous union support, so the current initialiser fails to compile. Reported-by: Boris Ostrovsky Signed-off-by: Trond Myklebust (compile-)Tested-by: Boris Ostrovsky Reviewed-by: Geert Uytterhoeven Signed-off-by: Anna Schumaker --- fs/nfs/nfs4state.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 54fd56d715a8..e4f4a09ed9f4 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -71,8 +71,8 @@ const nfs4_stateid zero_stateid = { }; const nfs4_stateid invalid_stateid = { { - .seqid = cpu_to_be32(0xffffffffU), - .other = { 0 }, + /* Funky initialiser keeps older gcc versions happy */ + .data = { 0xff, 0xff, 0xff, 0xff, 0 }, }, .type = NFS4_INVALID_STATEID_TYPE, }; From 4ba161a793d5f43757c35feff258d9f20a082940 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 24 Nov 2017 12:00:24 -0500 Subject: [PATCH 201/333] SUNRPC: Allow connect to return EHOSTUNREACH Reported-by: Dmitry Vyukov Signed-off-by: Trond Myklebust Tested-by: Dmitry Vyukov Signed-off-by: Anna Schumaker --- net/sunrpc/xprtsock.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 684e356b40e4..c2b2d489b57b 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2440,6 +2440,7 @@ static void xs_tcp_setup_socket(struct work_struct *work) case -ECONNREFUSED: case -ECONNRESET: case -ENETUNREACH: + case -EHOSTUNREACH: case -EADDRINUSE: case -ENOBUFS: /* From 1569d651f152870663fabd8f1c80af353f967ad5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 27 Nov 2017 13:12:35 +0100 Subject: [PATCH 202/333] drm/ttm: fix populate_and_map() functions once more MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts "drm/ttm: Fix configuration error around populate_and_map() functions". This fix has gone into the wrong direction. Those helpers should be available even when neither CONFIG_INTEL_IOMMU nor CONFIG_SWIOTLB are set. Signed-off-by: Christian König Reviewed-by: Michel Dänzer Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/ttm/ttm_page_alloc.c | 2 -- include/drm/ttm/ttm_page_alloc.h | 32 +++++++++------------------- 2 files changed, 10 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c index 316f831ad5f0..6d48ccca0b38 100644 --- a/drivers/gpu/drm/ttm/ttm_page_alloc.c +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c @@ -1058,7 +1058,6 @@ void ttm_pool_unpopulate(struct ttm_tt *ttm) } EXPORT_SYMBOL(ttm_pool_unpopulate); -#if defined(CONFIG_SWIOTLB) || defined(CONFIG_INTEL_IOMMU) int ttm_populate_and_map_pages(struct device *dev, struct ttm_dma_tt *tt) { unsigned i, j; @@ -1129,7 +1128,6 @@ void ttm_unmap_and_unpopulate_pages(struct device *dev, struct ttm_dma_tt *tt) ttm_pool_unpopulate(&tt->ttm); } EXPORT_SYMBOL(ttm_unmap_and_unpopulate_pages); -#endif int ttm_page_alloc_debugfs(struct seq_file *m, void *data) { diff --git a/include/drm/ttm/ttm_page_alloc.h b/include/drm/ttm/ttm_page_alloc.h index 38a2b4770c35..593811362a91 100644 --- a/include/drm/ttm/ttm_page_alloc.h +++ b/include/drm/ttm/ttm_page_alloc.h @@ -58,12 +58,21 @@ int ttm_pool_populate(struct ttm_tt *ttm); */ void ttm_pool_unpopulate(struct ttm_tt *ttm); +/** + * Populates and DMA maps pages to fullfil a ttm_dma_populate() request + */ +int ttm_populate_and_map_pages(struct device *dev, struct ttm_dma_tt *tt); + +/** + * Unpopulates and DMA unmaps pages as part of a + * ttm_dma_unpopulate() request */ +void ttm_unmap_and_unpopulate_pages(struct device *dev, struct ttm_dma_tt *tt); + /** * Output the state of pools to debugfs file */ int ttm_page_alloc_debugfs(struct seq_file *m, void *data); - #if defined(CONFIG_SWIOTLB) || defined(CONFIG_INTEL_IOMMU) /** * Initialize pool allocator. @@ -83,17 +92,6 @@ int ttm_dma_page_alloc_debugfs(struct seq_file *m, void *data); int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct device *dev); void ttm_dma_unpopulate(struct ttm_dma_tt *ttm_dma, struct device *dev); - -/** - * Populates and DMA maps pages to fullfil a ttm_dma_populate() request - */ -int ttm_populate_and_map_pages(struct device *dev, struct ttm_dma_tt *tt); - -/** - * Unpopulates and DMA unmaps pages as part of a - * ttm_dma_unpopulate() request */ -void ttm_unmap_and_unpopulate_pages(struct device *dev, struct ttm_dma_tt *tt); - #else static inline int ttm_dma_page_alloc_init(struct ttm_mem_global *glob, unsigned max_pages) @@ -116,16 +114,6 @@ static inline void ttm_dma_unpopulate(struct ttm_dma_tt *ttm_dma, struct device *dev) { } - -static inline int ttm_populate_and_map_pages(struct device *dev, struct ttm_dma_tt *tt) -{ - return -ENOMEM; -} - -static inline void ttm_unmap_and_unpopulate_pages(struct device *dev, struct ttm_dma_tt *tt) -{ -} - #endif #endif From 7fdf165a52505392eb059902b0df55e79a45c25f Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Wed, 29 Nov 2017 17:26:56 +0100 Subject: [PATCH 203/333] drm/radeon: remove init of CIK VMIDs 8-16 for amdkfd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit VMIDs 8-16 in Kaveri were reserved for use by the amdkfd driver. Because we removed amdkfd support from radeon, those VMIDs are now used by radeon and are initialized by radeon. This patch removes the function that initialized those VMIDs for amdkfd use. This initialization overridden the radeon initialization and caused GPU faults and GUI crashed. Fixes: f4fa88ab28ab ("drm/radeon: deprecate and remove KFD interface") Rported-by: Michel Dänzer Acked-by: Christian König Reviewed-and-Tested-by: Michel Dänzer Signed-off-by: Oded Gabbay Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/cik.c | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 898f9a078830..a6511918f632 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -5451,28 +5451,6 @@ void cik_pcie_gart_tlb_flush(struct radeon_device *rdev) WREG32(VM_INVALIDATE_REQUEST, 0x1); } -static void cik_pcie_init_compute_vmid(struct radeon_device *rdev) -{ - int i; - uint32_t sh_mem_bases, sh_mem_config; - - sh_mem_bases = 0x6000 | 0x6000 << 16; - sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED); - sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED); - - mutex_lock(&rdev->srbm_mutex); - for (i = 8; i < 16; i++) { - cik_srbm_select(rdev, 0, 0, 0, i); - /* CP and shaders */ - WREG32(SH_MEM_CONFIG, sh_mem_config); - WREG32(SH_MEM_APE1_BASE, 1); - WREG32(SH_MEM_APE1_LIMIT, 0); - WREG32(SH_MEM_BASES, sh_mem_bases); - } - cik_srbm_select(rdev, 0, 0, 0, 0); - mutex_unlock(&rdev->srbm_mutex); -} - /** * cik_pcie_gart_enable - gart enable * @@ -5586,8 +5564,6 @@ static int cik_pcie_gart_enable(struct radeon_device *rdev) cik_srbm_select(rdev, 0, 0, 0, 0); mutex_unlock(&rdev->srbm_mutex); - cik_pcie_init_compute_vmid(rdev); - cik_pcie_gart_tlb_flush(rdev); DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", (unsigned)(rdev->mc.gtt_size >> 20), From 9b85c2d4508563f4bb1de0d971ed02fea0d0d757 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Tue, 28 Nov 2017 17:44:28 -0800 Subject: [PATCH 204/333] tools: bpftool: fix crash on bad parameters with JSON If bad or unrecognised parameters are specified after JSON output is requested, `usage()` will try to output null JSON object before the writer is created. To prevent this, create the writer as soon as the `--json` option is parsed. Fixes: 004b45c0e51a ("tools: bpftool: provide JSON output for all possible commands") Reported-by: Jakub Kicinski Signed-off-by: Quentin Monnet Acked-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/main.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index d6e4762170a4..14ad54a1c404 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -291,7 +291,15 @@ int main(int argc, char **argv) pretty_output = true; /* fall through */ case 'j': - json_output = true; + if (!json_output) { + json_wtr = jsonw_new(stdout); + if (!json_wtr) { + p_err("failed to create JSON writer"); + return -1; + } + json_output = true; + } + jsonw_pretty(json_wtr, pretty_output); break; case 'f': show_pinned = true; @@ -306,15 +314,6 @@ int main(int argc, char **argv) if (argc < 0) usage(); - if (json_output) { - json_wtr = jsonw_new(stdout); - if (!json_wtr) { - p_err("failed to create JSON writer"); - return -1; - } - jsonw_pretty(json_wtr, pretty_output); - } - bfd_init(); ret = cmd_select(cmds, argc, argv, do_help); From 7868620a3c15dbc661fb5d849de403ac04624c50 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Tue, 28 Nov 2017 17:44:29 -0800 Subject: [PATCH 205/333] tools: bpftool: clean up the JSON writer before exiting in usage() The writer is cleaned at the end of the main function, but not if the program exits sooner in usage(). Let's keep it clean and destroy the writer before exiting. Destruction and actual call to exit() are moved to another function so that clean exit can also be performed without printing usage() hints. Fixes: d35efba99d92 ("tools: bpftool: introduce --json and --pretty options") Signed-off-by: Quentin Monnet Acked-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/main.c | 10 +++++++++- tools/bpf/bpftool/main.h | 3 ++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index 14ad54a1c404..d72dd73a4016 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -58,11 +58,19 @@ bool show_pinned; struct pinned_obj_table prog_table; struct pinned_obj_table map_table; +static void __noreturn clean_and_exit(int i) +{ + if (json_output) + jsonw_destroy(&json_wtr); + + exit(i); +} + void usage(void) { last_do_help(last_argc - 1, last_argv + 1); - exit(-1); + clean_and_exit(-1); } static int do_help(int argc, char **argv) diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 9c191e222d6f..0b60ddfb2b93 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -41,6 +41,7 @@ #include #include #include +#include #include #include @@ -80,7 +81,7 @@ void p_info(const char *fmt, ...); bool is_prefix(const char *pfx, const char *str); void fprint_hex(FILE *f, void *arg, unsigned int n, const char *sep); -void usage(void) __attribute__((noreturn)); +void usage(void) __noreturn; struct pinned_obj_table { DECLARE_HASHTABLE(table, 16); From 146882a37da7aa566c7ec088b42c6495d769f2ba Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Tue, 28 Nov 2017 17:44:30 -0800 Subject: [PATCH 206/333] tools: bpftool: make error message from getopt_long() JSON-friendly If `getopt_long()` meets an unknown option, it prints its own error message to standard error output. While this does not strictly break JSON output, it is the only case bpftool prints something to standard error output if JSON output is required. All other errors are printed on standard output as JSON objects, so that an external program does not have to parse stderr. This is changed by setting the global variable `opterr` to 0. Furthermore, p_err() is used to reproduce the error message in a more JSON-friendly way, so that users still get to know what the erroneous option is. Signed-off-by: Quentin Monnet Acked-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/main.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index d72dd73a4016..d294bc8168be 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -288,6 +288,7 @@ int main(int argc, char **argv) hash_init(prog_table.table); hash_init(map_table.table); + opterr = 0; while ((opt = getopt_long(argc, argv, "Vhpjf", options, NULL)) >= 0) { switch (opt) { @@ -313,7 +314,11 @@ int main(int argc, char **argv) show_pinned = true; break; default: - usage(); + p_err("unrecognized option '%s'", argv[optind - 1]); + if (json_output) + clean_and_exit(-1); + else + usage(); } } From 0d954eeb99eee63964a07a68fcac61e1df8488e7 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Tue, 28 Nov 2017 17:44:31 -0800 Subject: [PATCH 207/333] tools: bpftool: remove spurious line break from error message The end-of-line character inside the string would break JSON compliance. Remove it, `p_err()` already adds a '\n' character for plain output anyway. Fixes: 9a5ab8bf1d6d ("tools: bpftool: turn err() and info() macros into functions") Reported-by: Jakub Kicinski Signed-off-by: Quentin Monnet Acked-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/main.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 0b60ddfb2b93..bff330b49791 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -51,7 +51,7 @@ #define NEXT_ARG() ({ argc--; argv++; if (argc < 0) usage(); }) #define NEXT_ARGP() ({ (*argc)--; (*argv)++; if (*argc < 0) usage(); }) -#define BAD_ARG() ({ p_err("what is '%s'?\n", *argv); -1; }) +#define BAD_ARG() ({ p_err("what is '%s'?", *argv); -1; }) #define ERR_MAX_LEN 1024 From 507e590da398a0e3438d563b5e736c3f2a7749d7 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Tue, 28 Nov 2017 17:44:32 -0800 Subject: [PATCH 208/333] tools: bpftool: unify installation directories Programs and documentation not managed by package manager are generally installed under /usr/local/, instead of the user's home directory. In particular, `man` is generally able to find manual pages under `/usr/local/share/man`. bpftool generally follows perf's example, and perf installs to home directory. However bpftool requires root credentials, so it seems sensible to follow the more common convention of installing files under /usr/local instead. So, make /usr/local the default prefix for installing the binary with `make install`, and the documentation with `make doc-install`. Also, create /usr/local/sbin if it does not exist. Note that the bash-completion file, however, is still installed under /usr/share/bash-completion/completions, as the default setup for bash does not attempt to load completion files under /usr/local/. Reported-by: David Beckett Signed-off-by: Quentin Monnet Acked-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/Documentation/Makefile | 2 +- tools/bpf/bpftool/Makefile | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/tools/bpf/bpftool/Documentation/Makefile b/tools/bpf/bpftool/Documentation/Makefile index bde77d7c4390..37292bb5ce60 100644 --- a/tools/bpf/bpftool/Documentation/Makefile +++ b/tools/bpf/bpftool/Documentation/Makefile @@ -6,7 +6,7 @@ RM ?= rm -f # Make the path relative to DESTDIR, not prefix ifndef DESTDIR -prefix?=$(HOME) +prefix ?= /usr/local endif mandir ?= $(prefix)/share/man man8dir = $(mandir)/man8 diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index 813826c50936..c5b21f2cbca5 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -45,8 +45,8 @@ $(LIBBPF)-clean: $(call QUIET_CLEAN, libbpf) $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(OUTPUT) clean >/dev/null -prefix = /usr -bash_compdir ?= $(prefix)/share/bash-completion/completions +prefix = /usr/local +bash_compdir ?= /usr/share/bash-completion/completions CC = gcc @@ -76,6 +76,7 @@ clean: $(LIBBPF)-clean $(Q)rm -rf $(OUTPUT)bpftool $(OUTPUT)*.o $(OUTPUT)*.d install: + install -m 0755 -d $(prefix)/sbin install $(OUTPUT)bpftool $(prefix)/sbin/bpftool install -m 0755 -d $(bash_compdir) install -m 0644 bash-completion/bpftool $(bash_compdir) From ad3cda064402b69148faf5f7cd1ac8c2eee52645 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Tue, 28 Nov 2017 17:44:33 -0800 Subject: [PATCH 209/333] tools: bpftool: declare phony targets as such In the Makefile, targets install, doc and doc-install should be added to .PHONY. Let's fix this. Fixes: 71bb428fe2c1 ("tools: bpf: add bpftool") Signed-off-by: Quentin Monnet Acked-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index c5b21f2cbca5..ec3052c0b004 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -89,5 +89,5 @@ doc-install: FORCE: -.PHONY: all clean FORCE +.PHONY: all clean FORCE install doc doc-install .DEFAULT_GOAL := all From 23721a755f98ac846897a013c92cccb281c1bcc8 Mon Sep 17 00:00:00 2001 From: Xie XiuQi Date: Thu, 30 Nov 2017 09:41:29 +0800 Subject: [PATCH 210/333] trace/xdp: fix compile warning: 'struct bpf_map' declared inside parameter list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We meet this compile warning, which caused by missing bpf.h in xdp.h. In file included from ./include/trace/events/xdp.h:10:0, from ./include/linux/bpf_trace.h:6, from drivers/net/ethernet/intel/i40e/i40e_txrx.c:29: ./include/trace/events/xdp.h:93:17: warning: ‘struct bpf_map’ declared inside parameter list will not be visible outside of this definition or declaration const struct bpf_map *map, u32 map_index), ^ ./include/linux/tracepoint.h:187:34: note: in definition of macro ‘__DECLARE_TRACE’ static inline void trace_##name(proto) \ ^~~~~ ./include/linux/tracepoint.h:352:24: note: in expansion of macro ‘PARAMS’ __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), \ ^~~~~~ ./include/linux/tracepoint.h:477:2: note: in expansion of macro ‘DECLARE_TRACE’ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) ^~~~~~~~~~~~~ ./include/linux/tracepoint.h:477:22: note: in expansion of macro ‘PARAMS’ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) ^~~~~~ ./include/trace/events/xdp.h:89:1: note: in expansion of macro ‘DEFINE_EVENT’ DEFINE_EVENT(xdp_redirect_template, xdp_redirect, ^~~~~~~~~~~~ ./include/trace/events/xdp.h:90:2: note: in expansion of macro ‘TP_PROTO’ TP_PROTO(const struct net_device *dev, ^~~~~~~~ ./include/trace/events/xdp.h:93:17: warning: ‘struct bpf_map’ declared inside parameter list will not be visible outside of this definition or declaration const struct bpf_map *map, u32 map_index), ^ ./include/linux/tracepoint.h:203:38: note: in definition of macro ‘__DECLARE_TRACE’ register_trace_##name(void (*probe)(data_proto), void *data) \ ^~~~~~~~~~ ./include/linux/tracepoint.h:354:4: note: in expansion of macro ‘PARAMS’ PARAMS(void *__data, proto), \ ^~~~~~ Reported-by: Huang Daode Cc: Hanjun Guo Fixes: 8d3b778ff544 ("xdp: tracepoint xdp_redirect also need a map argument") Signed-off-by: Xie XiuQi Acked-by: Jesper Dangaard Brouer Acked-by: Steven Rostedt (VMware) Signed-off-by: Daniel Borkmann --- include/trace/events/xdp.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h index 4cd0f05d0113..8989a92c571a 100644 --- a/include/trace/events/xdp.h +++ b/include/trace/events/xdp.h @@ -8,6 +8,7 @@ #include #include #include +#include #define __XDP_ACT_MAP(FN) \ FN(ABORTED) \ From 4b81cb2ff69c8a8e297a147d2eb4d9b5e8d7c435 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 29 Nov 2017 16:09:54 -0800 Subject: [PATCH 211/333] mm, memory_hotplug: do not back off draining pcp free pages from kworker context drain_all_pages backs off when called from a kworker context since commit 0ccce3b92421 ("mm, page_alloc: drain per-cpu pages from workqueue context") because the original IPI based pcp draining has been replaced by a WQ based one and the check wanted to prevent from recursion and inter workers dependencies. This has made some sense at the time because the system WQ has been used and one worker holding the lock could be blocked while waiting for new workers to emerge which can be a problem under OOM conditions. Since then commit ce612879ddc7 ("mm: move pcp and lru-pcp draining into single wq") has moved draining to a dedicated (mm_percpu_wq) WQ with a rescuer so we shouldn't depend on any other WQ activity to make a forward progress so calling drain_all_pages from a worker context is safe as long as this doesn't happen from mm_percpu_wq itself which is not the case because all workers are required to _not_ depend on any MM locks. Why is this a problem in the first place? ACPI driven memory hot-remove (acpi_device_hotplug) is executed from the worker context. We end up calling __offline_pages to free all the pages and that requires both lru_add_drain_all_cpuslocked and drain_all_pages to do their job otherwise we can have dangling pages on pcp lists and fail the offline operation (__test_page_isolated_in_pageblock would see a page with 0 ref count but without PageBuddy set). Fix the issue by removing the worker check in drain_all_pages. lru_add_drain_all_cpuslocked doesn't have this restriction so it works as expected. Link: http://lkml.kernel.org/r/20170828093341.26341-1-mhocko@kernel.org Fixes: 0ccce3b924212 ("mm, page_alloc: drain per-cpu pages from workqueue context") Signed-off-by: Michal Hocko Cc: Mel Gorman Cc: Tejun Heo Cc: [4.11+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index d4096f4a5c1f..145f5181e4cd 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2507,10 +2507,6 @@ void drain_all_pages(struct zone *zone) if (WARN_ON_ONCE(!mm_percpu_wq)) return; - /* Workqueues cannot recurse */ - if (current->flags & PF_WQ_WORKER) - return; - /* * Do not drain if one is already in progress unless it's specific to * a zone. Such callers are primarily CMA and memory hotplug and need From 687cb0884a714ff484d038e9190edc874edcf146 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Wed, 29 Nov 2017 16:09:58 -0800 Subject: [PATCH 212/333] mm, oom_reaper: gather each vma to prevent leaking TLB entry tlb_gather_mmu(&tlb, mm, 0, -1) means gathering the whole virtual memory space. In this case, tlb->fullmm is true. Some archs like arm64 doesn't flush TLB when tlb->fullmm is true: commit 5a7862e83000 ("arm64: tlbflush: avoid flushing when fullmm == 1"). Which causes leaking of tlb entries. Will clarifies his patch: "Basically, we tag each address space with an ASID (PCID on x86) which is resident in the TLB. This means we can elide TLB invalidation when pulling down a full mm because we won't ever assign that ASID to another mm without doing TLB invalidation elsewhere (which actually just nukes the whole TLB). I think that means that we could potentially not fault on a kernel uaccess, because we could hit in the TLB" There could be a window between complete_signal() sending IPI to other cores and all threads sharing this mm are really kicked off from cores. In this window, the oom reaper may calls tlb_flush_mmu_tlbonly() to flush TLB then frees pages. However, due to the above problem, the TLB entries are not really flushed on arm64. Other threads are possible to access these pages through TLB entries. Moreover, a copy_to_user() can also write to these pages without generating page fault, causes use-after-free bugs. This patch gathers each vma instead of gathering full vm space. In this case tlb->fullmm is not true. The behavior of oom reaper become similar to munmapping before do_exit, which should be safe for all archs. Link: http://lkml.kernel.org/r/20171107095453.179940-1-wangnan0@huawei.com Fixes: aac453635549 ("mm, oom: introduce oom reaper") Signed-off-by: Wang Nan Acked-by: Michal Hocko Acked-by: David Rientjes Cc: Minchan Kim Cc: Will Deacon Cc: Bob Liu Cc: Ingo Molnar Cc: Roman Gushchin Cc: Konstantin Khlebnikov Cc: Andrea Arcangeli Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/oom_kill.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/mm/oom_kill.c b/mm/oom_kill.c index c86fbd1b590e..c957be32b27a 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -550,7 +550,6 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm) */ set_bit(MMF_UNSTABLE, &mm->flags); - tlb_gather_mmu(&tlb, mm, 0, -1); for (vma = mm->mmap ; vma; vma = vma->vm_next) { if (!can_madv_dontneed_vma(vma)) continue; @@ -565,11 +564,13 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm) * we do not want to block exit_mmap by keeping mm ref * count elevated without a good reason. */ - if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) + if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) { + tlb_gather_mmu(&tlb, mm, vma->vm_start, vma->vm_end); unmap_page_range(&tlb, vma, vma->vm_start, vma->vm_end, NULL); + tlb_finish_mmu(&tlb, vma->vm_start, vma->vm_end); + } } - tlb_finish_mmu(&tlb, 0, -1); pr_info("oom_reaper: reaped process %d (%s), now anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n", task_pid_nr(tsk), tsk->comm, K(get_mm_counter(mm, MM_ANONPAGES)), From 63cd448908b5eb51d84c52f02b31b9b4ccd1cb5a Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Wed, 29 Nov 2017 16:10:01 -0800 Subject: [PATCH 213/333] mm/cma: fix alloc_contig_range ret code/potential leak If the call __alloc_contig_migrate_range() in alloc_contig_range returns -EBUSY, processing continues so that test_pages_isolated() is called where there is a tracepoint to identify the busy pages. However, it is possible for busy pages to become available between the calls to these two routines. In this case, the range of pages may be allocated. Unfortunately, the original return code (ret == -EBUSY) is still set and returned to the caller. Therefore, the caller believes the pages were not allocated and they are leaked. Update the comment to indicate that allocation is still possible even if __alloc_contig_migrate_range returns -EBUSY. Also, clear return code in this case so that it is not accidentally used or returned to caller. Link: http://lkml.kernel.org/r/20171122185214.25285-1-mike.kravetz@oracle.com Fixes: 8ef5849fa8a2 ("mm/cma: always check which page caused allocation failure") Signed-off-by: Mike Kravetz Acked-by: Vlastimil Babka Acked-by: Michal Hocko Acked-by: Johannes Weiner Acked-by: Joonsoo Kim Cc: Michal Nazarewicz Cc: Laura Abbott Cc: Michal Hocko Cc: Mel Gorman Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 145f5181e4cd..73f5d4556b3d 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -7652,11 +7652,18 @@ int alloc_contig_range(unsigned long start, unsigned long end, /* * In case of -EBUSY, we'd like to know which page causes problem. - * So, just fall through. We will check it in test_pages_isolated(). + * So, just fall through. test_pages_isolated() has a tracepoint + * which will report the busy page. + * + * It is possible that busy pages could become available before + * the call to test_pages_isolated, and the range will actually be + * allocated. So, if we fall through be sure to clear ret so that + * -EBUSY is not accidentally used or returned to caller. */ ret = __alloc_contig_migrate_range(&cc, start, end); if (ret && ret != -EBUSY) goto done; + ret =0; /* * Pages from [start, end) are within a MAX_ORDER_NR_PAGES From 1501899a898dfb5477c55534bdfd734c046da06d Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 29 Nov 2017 16:10:06 -0800 Subject: [PATCH 214/333] mm: fix device-dax pud write-faults triggered by get_user_pages() Currently only get_user_pages_fast() can safely handle the writable gup case due to its use of pud_access_permitted() to check whether the pud entry is writable. In the gup slow path pud_write() is used instead of pud_access_permitted() and to date it has been unimplemented, just calls BUG_ON(). kernel BUG at ./include/linux/hugetlb.h:244! [..] RIP: 0010:follow_devmap_pud+0x482/0x490 [..] Call Trace: follow_page_mask+0x28c/0x6e0 __get_user_pages+0xe4/0x6c0 get_user_pages_unlocked+0x130/0x1b0 get_user_pages_fast+0x89/0xb0 iov_iter_get_pages_alloc+0x114/0x4a0 nfs_direct_read_schedule_iovec+0xd2/0x350 ? nfs_start_io_direct+0x63/0x70 nfs_file_direct_read+0x1e0/0x250 nfs_file_read+0x90/0xc0 For now this just implements a simple check for the _PAGE_RW bit similar to pmd_write. However, this implies that the gup-slow-path check is missing the extra checks that the gup-fast-path performs with pud_access_permitted. Later patches will align all checks to use the 'access_permitted' helper if the architecture provides it. Note that the generic 'access_permitted' helper fallback is the simple _PAGE_RW check on architectures that do not define the 'access_permitted' helper(s). [dan.j.williams@intel.com: fix powerpc compile error] Link: http://lkml.kernel.org/r/151129126165.37405.16031785266675461397.stgit@dwillia2-desk3.amr.corp.intel.com Link: http://lkml.kernel.org/r/151043109938.2842.14834662818213616199.stgit@dwillia2-desk3.amr.corp.intel.com Fixes: a00cc7d9dd93 ("mm, x86: add support for PUD-sized transparent hugepages") Signed-off-by: Dan Williams Reported-by: Stephen Rothwell Acked-by: Thomas Gleixner [x86] Cc: Kirill A. Shutemov Cc: Catalin Marinas Cc: "David S. Miller" Cc: Dave Hansen Cc: Will Deacon Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Arnd Bergmann Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/include/asm/pgtable.h | 6 ++++++ include/asm-generic/pgtable.h | 8 ++++++++ include/linux/hugetlb.h | 8 -------- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 09f9e1e00e3b..dcce76ee4aa7 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -1088,6 +1088,12 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, clear_bit(_PAGE_BIT_RW, (unsigned long *)pmdp); } +#define pud_write pud_write +static inline int pud_write(pud_t pud) +{ + return pud_flags(pud) & _PAGE_RW; +} + /* * clone_pgd_range(pgd_t *dst, pgd_t *src, int count); * diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 757dc6ffc7ba..1ac457511f4e 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -814,6 +814,14 @@ static inline int pmd_write(pmd_t pmd) #endif /* __HAVE_ARCH_PMD_WRITE */ #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +#ifndef pud_write +static inline int pud_write(pud_t pud) +{ + BUG(); + return 0; +} +#endif /* pud_write */ + #if !defined(CONFIG_TRANSPARENT_HUGEPAGE) || \ (defined(CONFIG_TRANSPARENT_HUGEPAGE) && \ !defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index fbf5b31d47ee..82a25880714a 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -239,14 +239,6 @@ static inline int pgd_write(pgd_t pgd) } #endif -#ifndef pud_write -static inline int pud_write(pud_t pud) -{ - BUG(); - return 0; -} -#endif - #define HUGETLB_ANON_FILE "anon_hugepage" enum { From e4e40e0263ea6a3bfefbfd15d1b6ff5c03f2b95e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 29 Nov 2017 16:10:10 -0800 Subject: [PATCH 215/333] mm: switch to 'define pmd_write' instead of __HAVE_ARCH_PMD_WRITE In response to compile breakage introduced by a series that added the pud_write helper to x86, Stephen notes: did you consider using the other paradigm: In arch include files: #define pud_write pud_write static inline int pud_write(pud_t pud) ..... Then in include/asm-generic/pgtable.h: #ifndef pud_write tatic inline int pud_write(pud_t pud) { .... } #endif If you had, then the powerpc code would have worked ... ;-) and many of the other interfaces in include/asm-generic/pgtable.h are protected that way ... Given that some architecture already define pmd_write() as a macro, it's a net reduction to drop the definition of __HAVE_ARCH_PMD_WRITE. Link: http://lkml.kernel.org/r/151129126721.37405.13339850900081557813.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams Suggested-by: Stephen Rothwell Cc: Benjamin Herrenschmidt Cc: "Aneesh Kumar K.V" Cc: Oliver OHalloran Cc: Chris Metcalf Cc: Russell King Cc: Ralf Baechle Cc: "H. Peter Anvin" Cc: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/include/asm/pgtable-3level.h | 1 - arch/arm64/include/asm/pgtable.h | 1 - arch/mips/include/asm/pgtable.h | 2 +- arch/powerpc/include/asm/book3s/64/pgtable.h | 1 - arch/s390/include/asm/pgtable.h | 2 +- arch/sparc/include/asm/pgtable_64.h | 2 +- arch/tile/include/asm/pgtable.h | 1 - arch/x86/include/asm/pgtable.h | 2 +- include/asm-generic/pgtable.h | 4 ++-- 9 files changed, 6 insertions(+), 10 deletions(-) diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h index 2a029bceaf2f..1a7a17b2a1ba 100644 --- a/arch/arm/include/asm/pgtable-3level.h +++ b/arch/arm/include/asm/pgtable-3level.h @@ -221,7 +221,6 @@ static inline pte_t pte_mkspecial(pte_t pte) } #define __HAVE_ARCH_PTE_SPECIAL -#define __HAVE_ARCH_PMD_WRITE #define pmd_write(pmd) (pmd_isclear((pmd), L_PMD_SECT_RDONLY)) #define pmd_dirty(pmd) (pmd_isset((pmd), L_PMD_SECT_DIRTY)) #define pud_page(pud) pmd_page(__pmd(pud_val(pud))) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index c9530b5b5ca8..149d05fb9421 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -345,7 +345,6 @@ static inline int pmd_protnone(pmd_t pmd) #define pmd_thp_or_huge(pmd) (pmd_huge(pmd) || pmd_trans_huge(pmd)) -#define __HAVE_ARCH_PMD_WRITE #define pmd_write(pmd) pte_write(pmd_pte(pmd)) #define pmd_mkhuge(pmd) (__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT)) diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h index 9e9e94415d08..1a508a74d48d 100644 --- a/arch/mips/include/asm/pgtable.h +++ b/arch/mips/include/asm/pgtable.h @@ -552,7 +552,7 @@ static inline pmd_t pmd_mkhuge(pmd_t pmd) extern void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t pmd); -#define __HAVE_ARCH_PMD_WRITE +#define pmd_write pmd_write static inline int pmd_write(pmd_t pmd) { return !!(pmd_val(pmd) & _PAGE_WRITE); diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 9a677cd5997f..44697817ccc6 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -1005,7 +1005,6 @@ static inline int pmd_protnone(pmd_t pmd) } #endif /* CONFIG_NUMA_BALANCING */ -#define __HAVE_ARCH_PMD_WRITE #define pmd_write(pmd) pte_write(pmd_pte(pmd)) #define __pmd_write(pmd) __pte_write(pmd_pte(pmd)) #define pmd_savedwrite(pmd) pte_savedwrite(pmd_pte(pmd)) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index d7fe9838084d..0a6b0286c32e 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -709,7 +709,7 @@ static inline unsigned long pmd_pfn(pmd_t pmd) return (pmd_val(pmd) & origin_mask) >> PAGE_SHIFT; } -#define __HAVE_ARCH_PMD_WRITE +#define pmd_write pmd_write static inline int pmd_write(pmd_t pmd) { return (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE) != 0; diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 5a9e96be1665..9937c5ff94a9 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -715,7 +715,7 @@ static inline unsigned long pmd_pfn(pmd_t pmd) return pte_pfn(pte); } -#define __HAVE_ARCH_PMD_WRITE +#define pmd_write pmd_write static inline unsigned long pmd_write(pmd_t pmd) { pte_t pte = __pte(pmd_val(pmd)); diff --git a/arch/tile/include/asm/pgtable.h b/arch/tile/include/asm/pgtable.h index 2a26cc4fefc2..adfa21b18488 100644 --- a/arch/tile/include/asm/pgtable.h +++ b/arch/tile/include/asm/pgtable.h @@ -475,7 +475,6 @@ static inline void pmd_clear(pmd_t *pmdp) #define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd))) #define pmd_huge_page(pmd) pte_huge(pmd_pte(pmd)) #define pmd_mkhuge(pmd) pte_pmd(pte_mkhuge(pmd_pte(pmd))) -#define __HAVE_ARCH_PMD_WRITE #define pfn_pmd(pfn, pgprot) pte_pmd(pfn_pte((pfn), (pgprot))) #define pmd_pfn(pmd) pte_pfn(pmd_pte(pmd)) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index dcce76ee4aa7..95e2dfd75521 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -1061,7 +1061,7 @@ extern int pmdp_clear_flush_young(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp); -#define __HAVE_ARCH_PMD_WRITE +#define pmd_write pmd_write static inline int pmd_write(pmd_t pmd) { return pmd_flags(pmd) & _PAGE_RW; diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 1ac457511f4e..b234d54f2cb6 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -805,13 +805,13 @@ static inline int pmd_trans_huge(pmd_t pmd) { return 0; } -#ifndef __HAVE_ARCH_PMD_WRITE +#ifndef pmd_write static inline int pmd_write(pmd_t pmd) { BUG(); return 0; } -#endif /* __HAVE_ARCH_PMD_WRITE */ +#endif /* pmd_write */ #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #ifndef pud_write From e7fe7b5cae90cf85bb6fed5ec5d4c5cf311a4fe9 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 29 Nov 2017 16:10:14 -0800 Subject: [PATCH 216/333] mm: replace pud_write with pud_access_permitted in fault + gup paths The 'access_permitted' helper is used in the gup-fast path and goes beyond the simple _PAGE_RW check to also: - validate that the mapping is writable from a protection keys standpoint - validate that the pte has _PAGE_USER set since all fault paths where pud_write is must be referencing user-memory. [dan.j.williams@intel.com: fix powerpc compile error] Link: http://lkml.kernel.org/r/151129127237.37405.16073414520854722485.stgit@dwillia2-desk3.amr.corp.intel.com Link: http://lkml.kernel.org/r/151043110453.2842.2166049702068628177.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams Cc: Dave Hansen Cc: "David S. Miller" Cc: Kirill A. Shutemov Cc: Martin Schwidefsky Cc: Heiko Carstens Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/s390/include/asm/pgtable.h | 6 ++++++ arch/sparc/mm/gup.c | 2 +- mm/huge_memory.c | 2 +- mm/memory.c | 2 +- 4 files changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 0a6b0286c32e..57d7bc92e0b8 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1264,6 +1264,12 @@ static inline pud_t pud_mkwrite(pud_t pud) return pud; } +#define pud_write pud_write +static inline int pud_write(pud_t pud) +{ + return (pud_val(pud) & _REGION3_ENTRY_WRITE) != 0; +} + static inline pud_t pud_mkclean(pud_t pud) { if (pud_large(pud)) { diff --git a/arch/sparc/mm/gup.c b/arch/sparc/mm/gup.c index 5335ba3c850e..5ae2d0a01a70 100644 --- a/arch/sparc/mm/gup.c +++ b/arch/sparc/mm/gup.c @@ -114,7 +114,7 @@ static int gup_huge_pud(pud_t *pudp, pud_t pud, unsigned long addr, if (!(pud_val(pud) & _PAGE_VALID)) return 0; - if (write && !pud_write(pud)) + if (!pud_access_permitted(pud, write)) return 0; refs = 0; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index f22401fd83b5..4fe2491056a0 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1019,7 +1019,7 @@ struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr, assert_spin_locked(pud_lockptr(mm, pud)); - if (flags & FOLL_WRITE && !pud_write(*pud)) + if (!pud_access_permitted(*pud, flags & FOLL_WRITE)) return NULL; if (pud_present(*pud) && pud_devmap(*pud)) diff --git a/mm/memory.c b/mm/memory.c index b10c1d26f675..25d283d46ea3 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4013,7 +4013,7 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address, /* NUMA case for anonymous PUDs would go here */ - if (dirty && !pud_write(orig_pud)) { + if (dirty && !pud_access_permitted(orig_pud, WRITE)) { ret = wp_huge_pud(&vmf, orig_pud); if (!(ret & VM_FAULT_FALLBACK)) return ret; From c7da82b894e9eef60a04a15f065a8502341bf13b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 29 Nov 2017 16:10:18 -0800 Subject: [PATCH 217/333] mm: replace pmd_write with pmd_access_permitted in fault + gup paths MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 'access_permitted' helper is used in the gup-fast path and goes beyond the simple _PAGE_RW check to also: - validate that the mapping is writable from a protection keys standpoint - validate that the pte has _PAGE_USER set since all fault paths where pmd_write is must be referencing user-memory. Link: http://lkml.kernel.org/r/151043111049.2842.15241454964150083466.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams Cc: Dave Hansen Cc: Kirill A. Shutemov Cc: "Jérôme Glisse" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/sparc/mm/gup.c | 2 +- fs/dax.c | 3 ++- mm/hmm.c | 4 ++-- mm/huge_memory.c | 4 ++-- mm/memory.c | 2 +- 5 files changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/sparc/mm/gup.c b/arch/sparc/mm/gup.c index 5ae2d0a01a70..33c0f8bb0f33 100644 --- a/arch/sparc/mm/gup.c +++ b/arch/sparc/mm/gup.c @@ -75,7 +75,7 @@ static int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, if (!(pmd_val(pmd) & _PAGE_VALID)) return 0; - if (write && !pmd_write(pmd)) + if (!pmd_access_permitted(pmd, write)) return 0; refs = 0; diff --git a/fs/dax.c b/fs/dax.c index 95981591977a..78b72c48374e 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -627,7 +627,8 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping, if (pfn != pmd_pfn(*pmdp)) goto unlock_pmd; - if (!pmd_dirty(*pmdp) && !pmd_write(*pmdp)) + if (!pmd_dirty(*pmdp) + && !pmd_access_permitted(*pmdp, WRITE)) goto unlock_pmd; flush_cache_page(vma, address, pfn); diff --git a/mm/hmm.c b/mm/hmm.c index ea19742a5d60..93718a391611 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -391,11 +391,11 @@ again: if (pmd_protnone(pmd)) return hmm_vma_walk_clear(start, end, walk); - if (write_fault && !pmd_write(pmd)) + if (!pmd_access_permitted(pmd, write_fault)) return hmm_vma_walk_clear(start, end, walk); pfn = pmd_pfn(pmd) + pte_index(addr); - flag |= pmd_write(pmd) ? HMM_PFN_WRITE : 0; + flag |= pmd_access_permitted(pmd, WRITE) ? HMM_PFN_WRITE : 0; for (; addr < end; addr += PAGE_SIZE, i++, pfn++) pfns[i] = hmm_pfn_t_from_pfn(pfn) | flag; return 0; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 4fe2491056a0..05b729f45e8a 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -877,7 +877,7 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr, */ WARN_ONCE(flags & FOLL_COW, "mm: In follow_devmap_pmd with FOLL_COW set"); - if (flags & FOLL_WRITE && !pmd_write(*pmd)) + if (!pmd_access_permitted(*pmd, flags & FOLL_WRITE)) return NULL; if (pmd_present(*pmd) && pmd_devmap(*pmd)) @@ -1393,7 +1393,7 @@ out_unlock: */ static inline bool can_follow_write_pmd(pmd_t pmd, unsigned int flags) { - return pmd_write(pmd) || + return pmd_access_permitted(pmd, WRITE) || ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pmd_dirty(pmd)); } diff --git a/mm/memory.c b/mm/memory.c index 25d283d46ea3..416e451a707e 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4046,7 +4046,7 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address, if (pmd_protnone(orig_pmd) && vma_is_accessible(vma)) return do_huge_pmd_numa_page(&vmf, orig_pmd); - if (dirty && !pmd_write(orig_pmd)) { + if (dirty && !pmd_access_permitted(orig_pmd, WRITE)) { ret = wp_huge_pmd(&vmf, orig_pmd); if (!(ret & VM_FAULT_FALLBACK)) return ret; From 5c9d2d5c269c498aa9a546e8d2158a3e4142a1a2 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 29 Nov 2017 16:10:21 -0800 Subject: [PATCH 218/333] mm: replace pte_write with pte_access_permitted in fault + gup paths MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 'access_permitted' helper is used in the gup-fast path and goes beyond the simple _PAGE_RW check to also: - validate that the mapping is writable from a protection keys standpoint - validate that the pte has _PAGE_USER set since all fault paths where pte_write is must be referencing user-memory. Link: http://lkml.kernel.org/r/151043111604.2842.8051684481794973100.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams Cc: Dave Hansen Cc: Kirill A. Shutemov Cc: "Jérôme Glisse" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/gup.c | 2 +- mm/hmm.c | 4 ++-- mm/memory.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/mm/gup.c b/mm/gup.c index dfcde13f289a..85cc822fd403 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -66,7 +66,7 @@ static int follow_pfn_pte(struct vm_area_struct *vma, unsigned long address, */ static inline bool can_follow_write_pte(pte_t pte, unsigned int flags) { - return pte_write(pte) || + return pte_access_permitted(pte, WRITE) || ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pte_dirty(pte)); } diff --git a/mm/hmm.c b/mm/hmm.c index 93718a391611..3a5c172af560 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -456,11 +456,11 @@ again: continue; } - if (write_fault && !pte_write(pte)) + if (!pte_access_permitted(pte, write_fault)) goto fault; pfns[i] = hmm_pfn_t_from_pfn(pte_pfn(pte)) | flag; - pfns[i] |= pte_write(pte) ? HMM_PFN_WRITE : 0; + pfns[i] |= pte_access_permitted(pte, WRITE) ? HMM_PFN_WRITE : 0; continue; fault: diff --git a/mm/memory.c b/mm/memory.c index 416e451a707e..4f07acd1695f 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3948,7 +3948,7 @@ static int handle_pte_fault(struct vm_fault *vmf) if (unlikely(!pte_same(*vmf->pte, entry))) goto unlock; if (vmf->flags & FAULT_FLAG_WRITE) { - if (!pte_write(entry)) + if (!pte_access_permitted(entry, WRITE)) return do_wp_page(vmf); entry = pte_mkdirty(entry); } @@ -4336,7 +4336,7 @@ int follow_phys(struct vm_area_struct *vma, goto out; pte = *ptep; - if ((flags & FOLL_WRITE) && !pte_write(pte)) + if (!pte_access_permitted(pte, flags & FOLL_WRITE)) goto unlock; *prot = pgprot_val(pte_pgprot(pte)); From 95a87982541932503d3f59aba4c30b0bde0a6294 Mon Sep 17 00:00:00 2001 From: "Liu, Changcheng" Date: Wed, 29 Nov 2017 16:10:25 -0800 Subject: [PATCH 219/333] scripts/faddr2line: extend usage on generic arch When cross-compiling, fadd2line should use the binary tool used for the target system, rather than that of the host. Link: http://lkml.kernel.org/r/20171121092911.GA150711@sofia Signed-off-by: Liu Changcheng Cc: Kate Stewart Cc: NeilBrown Cc: Thomas Gleixner Cc: Greg Kroah-Hartman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/faddr2line | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/scripts/faddr2line b/scripts/faddr2line index 1f5ce959f596..39e07d8574dd 100755 --- a/scripts/faddr2line +++ b/scripts/faddr2line @@ -44,9 +44,16 @@ set -o errexit set -o nounset +READELF="${CROSS_COMPILE}readelf" +ADDR2LINE="${CROSS_COMPILE}addr2line" +SIZE="${CROSS_COMPILE}size" +NM="${CROSS_COMPILE}nm" + command -v awk >/dev/null 2>&1 || die "awk isn't installed" -command -v readelf >/dev/null 2>&1 || die "readelf isn't installed" -command -v addr2line >/dev/null 2>&1 || die "addr2line isn't installed" +command -v ${READELF} >/dev/null 2>&1 || die "readelf isn't installed" +command -v ${ADDR2LINE} >/dev/null 2>&1 || die "addr2line isn't installed" +command -v ${SIZE} >/dev/null 2>&1 || die "size isn't installed" +command -v ${NM} >/dev/null 2>&1 || die "nm isn't installed" usage() { echo "usage: faddr2line ..." >&2 @@ -69,10 +76,10 @@ die() { find_dir_prefix() { local objfile=$1 - local start_kernel_addr=$(readelf -sW $objfile | awk '$8 == "start_kernel" {printf "0x%s", $2}') + local start_kernel_addr=$(${READELF} -sW $objfile | awk '$8 == "start_kernel" {printf "0x%s", $2}') [[ -z $start_kernel_addr ]] && return - local file_line=$(addr2line -e $objfile $start_kernel_addr) + local file_line=$(${ADDR2LINE} -e $objfile $start_kernel_addr) [[ -z $file_line ]] && return local prefix=${file_line%init/main.c:*} @@ -104,7 +111,7 @@ __faddr2line() { # Go through each of the object's symbols which match the func name. # In rare cases there might be duplicates. - file_end=$(size -Ax $objfile | awk '$1 == ".text" {print $2}') + file_end=$(${SIZE} -Ax $objfile | awk '$1 == ".text" {print $2}') while read symbol; do local fields=($symbol) local sym_base=0x${fields[0]} @@ -156,10 +163,10 @@ __faddr2line() { # pass real address to addr2line echo "$func+$offset/$sym_size:" - addr2line -fpie $objfile $addr | sed "s; $dir_prefix\(\./\)*; ;" + ${ADDR2LINE} -fpie $objfile $addr | sed "s; $dir_prefix\(\./\)*; ;" DONE=1 - done < <(nm -n $objfile | awk -v fn=$func -v end=$file_end '$3 == fn { found=1; line=$0; start=$1; next } found == 1 { found=0; print line, "0x"$1 } END {if (found == 1) print line, end; }') + done < <(${NM} -n $objfile | awk -v fn=$func -v end=$file_end '$3 == fn { found=1; line=$0; start=$1; next } found == 1 { found=0; print line, "0x"$1 } END {if (found == 1) print line, end; }') } [[ $# -lt 2 ]] && usage From 31383c6865a578834dd953d9dbc88e6b19fe3997 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 29 Nov 2017 16:10:28 -0800 Subject: [PATCH 220/333] mm, hugetlbfs: introduce ->split() to vm_operations_struct Patch series "device-dax: fix unaligned munmap handling" When device-dax is operating in huge-page mode we want it to behave like hugetlbfs and fail attempts to split vmas into unaligned ranges. It would be messy to teach the munmap path about device-dax alignment constraints in the same (hstate) way that hugetlbfs communicates this constraint. Instead, these patches introduce a new ->split() vm operation. This patch (of 2): The device-dax interface has similar constraints as hugetlbfs in that it requires the munmap path to unmap in huge page aligned units. Rather than add more custom vma handling code in __split_vma() introduce a new vm operation to perform this vma specific check. Link: http://lkml.kernel.org/r/151130418135.4029.6783191281930729710.stgit@dwillia2-desk3.amr.corp.intel.com Fixes: dee410792419 ("/dev/dax, core: file operations and dax-mmap") Signed-off-by: Dan Williams Cc: Jeff Moyer Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 1 + mm/hugetlb.c | 8 ++++++++ mm/mmap.c | 8 +++++--- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index ee073146aaa7..b3b6a7e313e9 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -377,6 +377,7 @@ enum page_entry_size { struct vm_operations_struct { void (*open)(struct vm_area_struct * area); void (*close)(struct vm_area_struct * area); + int (*split)(struct vm_area_struct * area, unsigned long addr); int (*mremap)(struct vm_area_struct * area); int (*fault)(struct vm_fault *vmf); int (*huge_fault)(struct vm_fault *vmf, enum page_entry_size pe_size); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 681b300185c0..698e8fb34031 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3125,6 +3125,13 @@ static void hugetlb_vm_op_close(struct vm_area_struct *vma) } } +static int hugetlb_vm_op_split(struct vm_area_struct *vma, unsigned long addr) +{ + if (addr & ~(huge_page_mask(hstate_vma(vma)))) + return -EINVAL; + return 0; +} + /* * We cannot handle pagefaults against hugetlb pages at all. They cause * handle_mm_fault() to try to instantiate regular-sized pages in the @@ -3141,6 +3148,7 @@ const struct vm_operations_struct hugetlb_vm_ops = { .fault = hugetlb_vm_op_fault, .open = hugetlb_vm_op_open, .close = hugetlb_vm_op_close, + .split = hugetlb_vm_op_split, }; static pte_t make_huge_pte(struct vm_area_struct *vma, struct page *page, diff --git a/mm/mmap.c b/mm/mmap.c index 924839fac0e6..a4d546821214 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2555,9 +2555,11 @@ int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma, struct vm_area_struct *new; int err; - if (is_vm_hugetlb_page(vma) && (addr & - ~(huge_page_mask(hstate_vma(vma))))) - return -EINVAL; + if (vma->vm_ops && vma->vm_ops->split) { + err = vma->vm_ops->split(vma, addr); + if (err) + return err; + } new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); if (!new) From 9702cffdbf2129516db679e4467db81e1cd287da Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 29 Nov 2017 16:10:32 -0800 Subject: [PATCH 221/333] device-dax: implement ->split() to catch invalid munmap attempts Similar to how device-dax enforces that the 'address', 'offset', and 'len' parameters to mmap() be aligned to the device's fundamental alignment, the same constraints apply to munmap(). Implement ->split() to fail munmap calls that violate the alignment constraint. Otherwise, we later fail VM_BUG_ON checks in the unmap_page_range() path with crash signatures of the form: vma ffff8800b60c8a88 start 00007f88c0000000 end 00007f88c0e00000 next (null) prev (null) mm ffff8800b61150c0 prot 8000000000000027 anon_vma (null) vm_ops ffffffffa0091240 pgoff 0 file ffff8800b638ef80 private_data (null) flags: 0x380000fb(read|write|shared|mayread|maywrite|mayexec|mayshare|softdirty|mixedmap|hugepage) ------------[ cut here ]------------ kernel BUG at mm/huge_memory.c:2014! [..] RIP: 0010:__split_huge_pud+0x12a/0x180 [..] Call Trace: unmap_page_range+0x245/0xa40 ? __vma_adjust+0x301/0x990 unmap_vmas+0x4c/0xa0 unmap_region+0xae/0x120 ? __vma_rb_erase+0x11a/0x230 do_munmap+0x276/0x410 vm_munmap+0x6a/0xa0 SyS_munmap+0x1d/0x30 Link: http://lkml.kernel.org/r/151130418681.4029.7118245855057952010.stgit@dwillia2-desk3.amr.corp.intel.com Fixes: dee410792419 ("/dev/dax, core: file operations and dax-mmap") Signed-off-by: Dan Williams Reported-by: Jeff Moyer Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/dax/device.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/dax/device.c b/drivers/dax/device.c index 6833ada237ab..7b0bf825c4e7 100644 --- a/drivers/dax/device.c +++ b/drivers/dax/device.c @@ -428,9 +428,21 @@ static int dev_dax_fault(struct vm_fault *vmf) return dev_dax_huge_fault(vmf, PE_SIZE_PTE); } +static int dev_dax_split(struct vm_area_struct *vma, unsigned long addr) +{ + struct file *filp = vma->vm_file; + struct dev_dax *dev_dax = filp->private_data; + struct dax_region *dax_region = dev_dax->region; + + if (!IS_ALIGNED(addr, dax_region->align)) + return -EINVAL; + return 0; +} + static const struct vm_operations_struct dax_vm_ops = { .fault = dev_dax_fault, .huge_fault = dev_dax_huge_fault, + .split = dev_dax_split, }; static int dax_mmap(struct file *filp, struct vm_area_struct *vma) From 2bb6d2837083de722bfdc369cb0d76ce188dd9b4 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 29 Nov 2017 16:10:35 -0800 Subject: [PATCH 222/333] mm: introduce get_user_pages_longterm Patch series "introduce get_user_pages_longterm()", v2. Here is a new get_user_pages api for cases where a driver intends to keep an elevated page count indefinitely. This is distinct from usages like iov_iter_get_pages where the elevated page counts are transient. The iov_iter_get_pages cases immediately turn around and submit the pages to a device driver which will put_page when the i/o operation completes (under kernel control). In the longterm case userspace is responsible for dropping the page reference at some undefined point in the future. This is untenable for filesystem-dax case where the filesystem is in control of the lifetime of the block / page and needs reasonable limits on how long it can wait for pages in a mapping to become idle. Fixing filesystems to actually wait for dax pages to be idle before blocks from a truncate/hole-punch operation are repurposed is saved for a later patch series. Also, allowing longterm registration of dax mappings is a future patch series that introduces a "map with lease" semantic where the kernel can revoke a lease and force userspace to drop its page references. I have also tagged these for -stable to purposely break cases that might assume that longterm memory registrations for filesystem-dax mappings were supported by the kernel. The behavior regression this policy change implies is one of the reasons we maintain the "dax enabled. Warning: EXPERIMENTAL, use at your own risk" notification when mounting a filesystem in dax mode. It is worth noting the device-dax interface does not suffer the same constraints since it does not support file space management operations like hole-punch. This patch (of 4): Until there is a solution to the dma-to-dax vs truncate problem it is not safe to allow long standing memory registrations against filesytem-dax vmas. Device-dax vmas do not have this problem and are explicitly allowed. This is temporary until a "memory registration with layout-lease" mechanism can be implemented for the affected sub-systems (RDMA and V4L2). [akpm@linux-foundation.org: use kcalloc()] Link: http://lkml.kernel.org/r/151068939435.7446.13560129395419350737.stgit@dwillia2-desk3.amr.corp.intel.com Fixes: 3565fce3a659 ("mm, x86: get_user_pages() for dax mappings") Signed-off-by: Dan Williams Suggested-by: Christoph Hellwig Cc: Doug Ledford Cc: Hal Rosenstock Cc: Inki Dae Cc: Jan Kara Cc: Jason Gunthorpe Cc: Jeff Moyer Cc: Joonyoung Shim Cc: Kyungmin Park Cc: Mauro Carvalho Chehab Cc: Mel Gorman Cc: Ross Zwisler Cc: Sean Hefty Cc: Seung-Woo Kim Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 14 ++++++++++ include/linux/mm.h | 13 ++++++++++ mm/gup.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 91 insertions(+) diff --git a/include/linux/fs.h b/include/linux/fs.h index bbd92da0946e..9dc498d16cc1 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3194,6 +3194,20 @@ static inline bool vma_is_dax(struct vm_area_struct *vma) return vma->vm_file && IS_DAX(vma->vm_file->f_mapping->host); } +static inline bool vma_is_fsdax(struct vm_area_struct *vma) +{ + struct inode *inode; + + if (!vma->vm_file) + return false; + if (!vma_is_dax(vma)) + return false; + inode = file_inode(vma->vm_file); + if (inode->i_mode == S_IFCHR) + return false; /* device-dax */ + return true; +} + static inline int iocb_flags(struct file *file) { int res = 0; diff --git a/include/linux/mm.h b/include/linux/mm.h index b3b6a7e313e9..ea818ff739cd 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1380,6 +1380,19 @@ long get_user_pages_locked(unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages, int *locked); long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages, struct page **pages, unsigned int gup_flags); +#ifdef CONFIG_FS_DAX +long get_user_pages_longterm(unsigned long start, unsigned long nr_pages, + unsigned int gup_flags, struct page **pages, + struct vm_area_struct **vmas); +#else +static inline long get_user_pages_longterm(unsigned long start, + unsigned long nr_pages, unsigned int gup_flags, + struct page **pages, struct vm_area_struct **vmas) +{ + return get_user_pages(start, nr_pages, gup_flags, pages, vmas); +} +#endif /* CONFIG_FS_DAX */ + int get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages); diff --git a/mm/gup.c b/mm/gup.c index 85cc822fd403..d3fb60e5bfac 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1095,6 +1095,70 @@ long get_user_pages(unsigned long start, unsigned long nr_pages, } EXPORT_SYMBOL(get_user_pages); +#ifdef CONFIG_FS_DAX +/* + * This is the same as get_user_pages() in that it assumes we are + * operating on the current task's mm, but it goes further to validate + * that the vmas associated with the address range are suitable for + * longterm elevated page reference counts. For example, filesystem-dax + * mappings are subject to the lifetime enforced by the filesystem and + * we need guarantees that longterm users like RDMA and V4L2 only + * establish mappings that have a kernel enforced revocation mechanism. + * + * "longterm" == userspace controlled elevated page count lifetime. + * Contrast this to iov_iter_get_pages() usages which are transient. + */ +long get_user_pages_longterm(unsigned long start, unsigned long nr_pages, + unsigned int gup_flags, struct page **pages, + struct vm_area_struct **vmas_arg) +{ + struct vm_area_struct **vmas = vmas_arg; + struct vm_area_struct *vma_prev = NULL; + long rc, i; + + if (!pages) + return -EINVAL; + + if (!vmas) { + vmas = kcalloc(nr_pages, sizeof(struct vm_area_struct *), + GFP_KERNEL); + if (!vmas) + return -ENOMEM; + } + + rc = get_user_pages(start, nr_pages, gup_flags, pages, vmas); + + for (i = 0; i < rc; i++) { + struct vm_area_struct *vma = vmas[i]; + + if (vma == vma_prev) + continue; + + vma_prev = vma; + + if (vma_is_fsdax(vma)) + break; + } + + /* + * Either get_user_pages() failed, or the vma validation + * succeeded, in either case we don't need to put_page() before + * returning. + */ + if (i >= rc) + goto out; + + for (i = 0; i < rc; i++) + put_page(pages[i]); + rc = -EOPNOTSUPP; +out: + if (vmas != vmas_arg) + kfree(vmas); + return rc; +} +EXPORT_SYMBOL(get_user_pages_longterm); +#endif /* CONFIG_FS_DAX */ + /** * populate_vma_page_range() - populate a range of pages in the vma. * @vma: target vma From b7f0554a56f21fb3e636a627450a9add030889be Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 29 Nov 2017 16:10:39 -0800 Subject: [PATCH 223/333] mm: fail get_vaddr_frames() for filesystem-dax mappings Until there is a solution to the dma-to-dax vs truncate problem it is not safe to allow V4L2, Exynos, and other frame vector users to create long standing / irrevocable memory registrations against filesytem-dax vmas. [dan.j.williams@intel.com: add comment for vma_is_fsdax() check in get_vaddr_frames(), per Jan] Link: http://lkml.kernel.org/r/151197874035.26211.4061781453123083667.stgit@dwillia2-desk3.amr.corp.intel.com Link: http://lkml.kernel.org/r/151068939985.7446.15684639617389154187.stgit@dwillia2-desk3.amr.corp.intel.com Fixes: 3565fce3a659 ("mm, x86: get_user_pages() for dax mappings") Signed-off-by: Dan Williams Reviewed-by: Jan Kara Cc: Inki Dae Cc: Seung-Woo Kim Cc: Joonyoung Shim Cc: Kyungmin Park Cc: Mauro Carvalho Chehab Cc: Mel Gorman Cc: Vlastimil Babka Cc: Christoph Hellwig Cc: Doug Ledford Cc: Hal Rosenstock Cc: Jason Gunthorpe Cc: Jeff Moyer Cc: Ross Zwisler Cc: Sean Hefty Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/frame_vector.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/mm/frame_vector.c b/mm/frame_vector.c index 2f98df0d460e..297c7238f7d4 100644 --- a/mm/frame_vector.c +++ b/mm/frame_vector.c @@ -53,6 +53,18 @@ int get_vaddr_frames(unsigned long start, unsigned int nr_frames, ret = -EFAULT; goto out; } + + /* + * While get_vaddr_frames() could be used for transient (kernel + * controlled lifetime) pinning of memory pages all current + * users establish long term (userspace controlled lifetime) + * page pinning. Treat get_vaddr_frames() like + * get_user_pages_longterm() and disallow it for filesystem-dax + * mappings. + */ + if (vma_is_fsdax(vma)) + return -EOPNOTSUPP; + if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) { vec->got_ref = true; vec->is_pfns = false; From b70131de648c2b997d22f4653934438013f407a1 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 29 Nov 2017 16:10:43 -0800 Subject: [PATCH 224/333] v4l2: disable filesystem-dax mapping support V4L2 memory registrations are incompatible with filesystem-dax that needs the ability to revoke dma access to a mapping at will, or otherwise allow the kernel to wait for completion of DMA. The filesystem-dax implementation breaks the traditional solution of truncate of active file backed mappings since there is no page-cache page we can orphan to sustain ongoing DMA. If v4l2 wants to support long lived DMA mappings it needs to arrange to hold a file lease or use some other mechanism so that the kernel can coordinate revoking DMA access when the filesystem needs to truncate mappings. Link: http://lkml.kernel.org/r/151068940499.7446.12846708245365671207.stgit@dwillia2-desk3.amr.corp.intel.com Fixes: 3565fce3a659 ("mm, x86: get_user_pages() for dax mappings") Signed-off-by: Dan Williams Reported-by: Jan Kara Reviewed-by: Jan Kara Cc: Mauro Carvalho Chehab Cc: Christoph Hellwig Cc: Doug Ledford Cc: Hal Rosenstock Cc: Inki Dae Cc: Jason Gunthorpe Cc: Jeff Moyer Cc: Joonyoung Shim Cc: Kyungmin Park Cc: Mel Gorman Cc: Ross Zwisler Cc: Sean Hefty Cc: Seung-Woo Kim Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/media/v4l2-core/videobuf-dma-sg.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/media/v4l2-core/videobuf-dma-sg.c b/drivers/media/v4l2-core/videobuf-dma-sg.c index 0b5c43f7e020..f412429cf5ba 100644 --- a/drivers/media/v4l2-core/videobuf-dma-sg.c +++ b/drivers/media/v4l2-core/videobuf-dma-sg.c @@ -185,12 +185,13 @@ static int videobuf_dma_init_user_locked(struct videobuf_dmabuf *dma, dprintk(1, "init user [0x%lx+0x%lx => %d pages]\n", data, size, dma->nr_pages); - err = get_user_pages(data & PAGE_MASK, dma->nr_pages, + err = get_user_pages_longterm(data & PAGE_MASK, dma->nr_pages, flags, dma->pages, NULL); if (err != dma->nr_pages) { dma->nr_pages = (err >= 0) ? err : 0; - dprintk(1, "get_user_pages: err=%d [%d]\n", err, dma->nr_pages); + dprintk(1, "get_user_pages_longterm: err=%d [%d]\n", err, + dma->nr_pages); return err < 0 ? err : -EINVAL; } return 0; From 5f1d43de54164dcfb9bfa542fcc92c1e1a1b6c1d Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 29 Nov 2017 16:10:47 -0800 Subject: [PATCH 225/333] IB/core: disable memory registration of filesystem-dax vmas Until there is a solution to the dma-to-dax vs truncate problem it is not safe to allow RDMA to create long standing memory registrations against filesytem-dax vmas. Link: http://lkml.kernel.org/r/151068941011.7446.7766030590347262502.stgit@dwillia2-desk3.amr.corp.intel.com Fixes: 3565fce3a659 ("mm, x86: get_user_pages() for dax mappings") Signed-off-by: Dan Williams Reported-by: Christoph Hellwig Reviewed-by: Christoph Hellwig Acked-by: Jason Gunthorpe Acked-by: Doug Ledford Cc: Sean Hefty Cc: Hal Rosenstock Cc: Jeff Moyer Cc: Ross Zwisler Cc: Inki Dae Cc: Jan Kara Cc: Joonyoung Shim Cc: Kyungmin Park Cc: Mauro Carvalho Chehab Cc: Mel Gorman Cc: Seung-Woo Kim Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/infiniband/core/umem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 21e60b1e2ff4..130606c3b07c 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -191,7 +191,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, sg_list_start = umem->sg_head.sgl; while (npages) { - ret = get_user_pages(cur_base, + ret = get_user_pages_longterm(cur_base, min_t(unsigned long, npages, PAGE_SIZE / sizeof (struct page *)), gup_flags, page_list, vma_list); From 04e35f4495dd560db30c25efca4eecae8ec8c375 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 29 Nov 2017 16:10:51 -0800 Subject: [PATCH 226/333] exec: avoid RLIMIT_STACK races with prlimit() While the defense-in-depth RLIMIT_STACK limit on setuid processes was protected against races from other threads calling setrlimit(), I missed protecting it against races from external processes calling prlimit(). This adds locking around the change and makes sure that rlim_max is set too. Link: http://lkml.kernel.org/r/20171127193457.GA11348@beast Fixes: 64701dee4178e ("exec: Use sane stack rlimit under secureexec") Signed-off-by: Kees Cook Reported-by: Ben Hutchings Reported-by: Brad Spengler Acked-by: Serge Hallyn Cc: James Morris Cc: Andy Lutomirski Cc: Oleg Nesterov Cc: Jiri Slaby Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/exec.c b/fs/exec.c index 1d6243d9f2b6..6be2aa0ab26f 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1340,10 +1340,15 @@ void setup_new_exec(struct linux_binprm * bprm) * avoid bad behavior from the prior rlimits. This has to * happen before arch_pick_mmap_layout(), which examines * RLIMIT_STACK, but after the point of no return to avoid - * needing to clean up the change on failure. + * races from other threads changing the limits. This also + * must be protected from races with prlimit() calls. */ + task_lock(current->group_leader); if (current->signal->rlim[RLIMIT_STACK].rlim_cur > _STK_LIM) current->signal->rlim[RLIMIT_STACK].rlim_cur = _STK_LIM; + if (current->signal->rlim[RLIMIT_STACK].rlim_max > _STK_LIM) + current->signal->rlim[RLIMIT_STACK].rlim_max = _STK_LIM; + task_unlock(current->group_leader); } arch_pick_mmap_layout(current->mm); From 6ea8d958a2c95a1d514015d4e29ba21a8c0a1a91 Mon Sep 17 00:00:00 2001 From: chenjie Date: Wed, 29 Nov 2017 16:10:54 -0800 Subject: [PATCH 227/333] mm/madvise.c: fix madvise() infinite loop under special circumstances MADVISE_WILLNEED has always been a noop for DAX (formerly XIP) mappings. Unfortunately madvise_willneed() doesn't communicate this information properly to the generic madvise syscall implementation. The calling convention is quite subtle there. madvise_vma() is supposed to either return an error or update &prev otherwise the main loop will never advance to the next vma and it will keep looping for ever without a way to get out of the kernel. It seems this has been broken since introduction. Nobody has noticed because nobody seems to be using MADVISE_WILLNEED on these DAX mappings. [mhocko@suse.com: rewrite changelog] Link: http://lkml.kernel.org/r/20171127115318.911-1-guoxuenan@huawei.com Fixes: fe77ba6f4f97 ("[PATCH] xip: madvice/fadvice: execute in place") Signed-off-by: chenjie Signed-off-by: guoxuenan Acked-by: Michal Hocko Cc: Minchan Kim Cc: zhangyi (F) Cc: Miao Xie Cc: Mike Rapoport Cc: Shaohua Li Cc: Andrea Arcangeli Cc: Mel Gorman Cc: Kirill A. Shutemov Cc: David Rientjes Cc: Anshuman Khandual Cc: Rik van Riel Cc: Carsten Otte Cc: Dan Williams Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/madvise.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mm/madvise.c b/mm/madvise.c index 375cf32087e4..751e97aa2210 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -276,15 +276,14 @@ static long madvise_willneed(struct vm_area_struct *vma, { struct file *file = vma->vm_file; + *prev = vma; #ifdef CONFIG_SWAP if (!file) { - *prev = vma; force_swapin_readahead(vma, start, end); return 0; } if (shmem_mapping(file->f_mapping)) { - *prev = vma; force_shm_swapin_readahead(vma, start, end, file->f_mapping); return 0; @@ -299,7 +298,6 @@ static long madvise_willneed(struct vm_area_struct *vma, return 0; } - *prev = vma; start = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; if (end > vma->vm_end) end = vma->vm_end; From 90daf3062fc0f8f919d5496fe167bbd6016a6a63 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 29 Nov 2017 16:10:58 -0800 Subject: [PATCH 228/333] Revert "mm/page-writeback.c: print a warning if the vm dirtiness settings are illogical" This reverts commit 0f6d24f87856 ("mm/page-writeback.c: print a warning if the vm dirtiness settings are illogical") because it causes false positive warnings during OOM situations as noticed by Tetsuo Handa: Node 0 active_anon:3525940kB inactive_anon:8372kB active_file:216kB inactive_file:1872kB unevictable:0kB isolated(anon):0kB isolated(file):0kB mapped:2504kB dirty:52kB writeback:0kB shmem:8660kB shmem_thp: 0kB shmem_pmdmapped: 0kB anon_thp: 636928kB writeback_tmp:0kB unstable:0kB all_unreclaimable? yes Node 0 DMA free:14848kB min:284kB low:352kB high:420kB active_anon:992kB inactive_anon:0kB active_file:0kB inactive_file:0kB unevictable:0kB writepending:0kB present:15988kB managed:15904kB mlocked:0kB kernel_stack:0kB pagetables:24kB bounce:0kB free_pcp:0kB local_pcp:0kB free_cma:0kB lowmem_reserve[]: 0 2687 3645 3645 Node 0 DMA32 free:53004kB min:49608kB low:62008kB high:74408kB active_anon:2712648kB inactive_anon:0kB active_file:0kB inactive_file:0kB unevictable:0kB writepending:0kB present:3129216kB managed:2773132kB mlocked:0kB kernel_stack:96kB pagetables:5096kB bounce:0kB free_pcp:0kB local_pcp:0kB free_cma:0kB lowmem_reserve[]: 0 0 958 958 Node 0 Normal free:17140kB min:17684kB low:22104kB high:26524kB active_anon:812300kB inactive_anon:8372kB active_file:1228kB inactive_file:1868kB unevictable:0kB writepending:52kB present:1048576kB managed:981224kB mlocked:0kB kernel_stack:3520kB pagetables:8552kB bounce:0kB free_pcp:120kB local_pcp:120kB free_cma:0kB lowmem_reserve[]: 0 0 0 0 [...] Out of memory: Kill process 8459 (a.out) score 999 or sacrifice child Killed process 8459 (a.out) total-vm:4180kB, anon-rss:88kB, file-rss:0kB, shmem-rss:0kB oom_reaper: reaped process 8459 (a.out), now anon-rss:0kB, file-rss:0kB, shmem-rss:0kB vm direct limit must be set greater than background limit. The problem is that both thresh and bg_thresh will be 0 if available_memory is less than 4 pages when evaluating global_dirtyable_memory. While this might be worked around the whole point of the warning is dubious at best. We do rely on admins to do sensible things when changing tunable knobs. Dirty memory writeback knobs are not any special in that regards so revert the warning rather than adding more hacks to work this around. Debugged by Yafang Shao. Link: http://lkml.kernel.org/r/20171127091939.tahb77nznytcxw55@dhcp22.suse.cz Fixes: 0f6d24f87856 ("mm/page-writeback.c: print a warning if the vm dirtiness settings are illogical") Signed-off-by: Michal Hocko Reported-by: Tetsuo Handa Cc: Yafang Shao Cc: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/sysctl/vm.txt | 7 ------- mm/page-writeback.c | 5 +---- 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index b920423f88cb..5025ff9307e6 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt @@ -158,10 +158,6 @@ Note: the minimum value allowed for dirty_bytes is two pages (in bytes); any value lower than this limit will be ignored and the old configuration will be retained. -Note: the value of dirty_bytes also must be set greater than -dirty_background_bytes or the amount of memory corresponding to -dirty_background_ratio. - ============================================================== dirty_expire_centisecs @@ -181,9 +177,6 @@ generating disk writes will itself start writing out dirty data. The total available memory is not equal to total system memory. -Note: dirty_ratio must be set greater than dirty_background_ratio or -ratio corresponding to dirty_background_bytes. - ============================================================== dirty_writeback_centisecs diff --git a/mm/page-writeback.c b/mm/page-writeback.c index e7095030aa1f..586f31261c83 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -433,11 +433,8 @@ static void domain_dirty_limits(struct dirty_throttle_control *dtc) else bg_thresh = (bg_ratio * available_memory) / PAGE_SIZE; - if (unlikely(bg_thresh >= thresh)) { - pr_warn("vm direct limit must be set greater than background limit.\n"); + if (bg_thresh >= thresh) bg_thresh = thresh / 2; - } - tsk = current; if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk)) { bg_thresh += bg_thresh / 4 + global_wb_domain.dirty_limit / 32; From d5dabd633922ac5ee5bcc67748f7defb8b211469 Mon Sep 17 00:00:00 2001 From: Jiang Biao Date: Wed, 29 Nov 2017 16:11:01 -0800 Subject: [PATCH 229/333] fs/mbcache.c: make count_objects() more robust When running ltp stress test for 7*24 hours, vmscan occasionally emits the following warning continuously: mb_cache_scan+0x0/0x3f0 negative objects to delete nr=-9232265467809300450 ... Tracing shows the freeable(mb_cache_count returns) is -1, which causes the continuous accumulation and overflow of total_scan. This patch makes sure that mb_cache_count() cannot return a negative value, which makes the mbcache shrinker more robust. Link: http://lkml.kernel.org/r/1511753419-52328-1-git-send-email-jiang.biao2@zte.com.cn Signed-off-by: Jiang Biao Cc: Al Viro Cc: Minchan Kim Cc: Michal Hocko Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/mbcache.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/mbcache.c b/fs/mbcache.c index d818fd236787..b8b8b9ced9f8 100644 --- a/fs/mbcache.c +++ b/fs/mbcache.c @@ -269,6 +269,9 @@ static unsigned long mb_cache_count(struct shrinker *shrink, struct mb_cache *cache = container_of(shrink, struct mb_cache, c_shrink); + /* Unlikely, but not impossible */ + if (unlikely(cache->c_entry_count < 0)) + return 0; return cache->c_entry_count; } From edbddb83a15b4361d8c3bf00aabee85fd3ef4d80 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 29 Nov 2017 16:11:05 -0800 Subject: [PATCH 230/333] scripts/bloat-o-meter: don't fail with division by 0 Under some circumstances it's possible to get a divider 0 which crashes the script. Traceback (most recent call last): File "linux/scripts/bloat-o-meter", line 98, in print_result("Function", "tTdDbBrR", 2) File "linux/scripts/bloat-o-meter", line 87, in print_result (otot, ntot, (ntot - otot)*100.0/otot)) ZeroDivisionError: float division by zero Hide this by checking the divider first. Link: http://lkml.kernel.org/r/20171123171219.31453-1-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Cc: Alexey Dobriyan Cc: Vaneet Narang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/bloat-o-meter | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/scripts/bloat-o-meter b/scripts/bloat-o-meter index 6f099f915dcf..94b664817ad9 100755 --- a/scripts/bloat-o-meter +++ b/scripts/bloat-o-meter @@ -83,8 +83,11 @@ def print_result(symboltype, symbolformat, argc): for d, n in delta: if d: print("%-40s %7s %7s %+7d" % (n, old.get(n,"-"), new.get(n,"-"), d)) - print("Total: Before=%d, After=%d, chg %+.2f%%" % \ - (otot, ntot, (ntot - otot)*100.0/otot)) + if otot: + percent = (ntot - otot) * 100.0 / otot + else: + percent = 0 + print("Total: Before=%d, After=%d, chg %+.2f%%" % (otot, ntot, percent)) if sys.argv[1] == "-c": print_result("Function", "tT", 3) From bde5f6bc68db51128f875a756e9082a6c6ff7b4c Mon Sep 17 00:00:00 2001 From: Yisheng Xie Date: Wed, 29 Nov 2017 16:11:08 -0800 Subject: [PATCH 231/333] kmemleak: add scheduling point to kmemleak_scan() kmemleak_scan() will scan struct page for each node and it can be really large and resulting in a soft lockup. We have seen a soft lockup when do scan while compile kernel: watchdog: BUG: soft lockup - CPU#53 stuck for 22s! [bash:10287] [...] Call Trace: kmemleak_scan+0x21a/0x4c0 kmemleak_write+0x312/0x350 full_proxy_write+0x5a/0xa0 __vfs_write+0x33/0x150 vfs_write+0xad/0x1a0 SyS_write+0x52/0xc0 do_syscall_64+0x61/0x1a0 entry_SYSCALL64_slow_path+0x25/0x25 Fix this by adding cond_resched every MAX_SCAN_SIZE. Link: http://lkml.kernel.org/r/1511439788-20099-1-git-send-email-xieyisheng1@huawei.com Signed-off-by: Yisheng Xie Suggested-by: Catalin Marinas Acked-by: Catalin Marinas Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kmemleak.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/kmemleak.c b/mm/kmemleak.c index e4738d5e9b8c..3d4781756d50 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -1523,6 +1523,8 @@ static void kmemleak_scan(void) if (page_count(page) == 0) continue; scan_block(page, page + 1, NULL); + if (!(pfn % (MAX_SCAN_SIZE / sizeof(*page)))) + cond_resched(); } } put_online_mems(); From 40a899ed16486455f964e46d1af31fd4fded21c1 Mon Sep 17 00:00:00 2001 From: Zi Yan Date: Wed, 29 Nov 2017 16:11:12 -0800 Subject: [PATCH 232/333] mm: migrate: fix an incorrect call of prep_transhuge_page() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In https://lkml.org/lkml/2017/11/20/411, Andrea reported that during memory hotplug/hot remove prep_transhuge_page() is called incorrectly on non-THP pages for migration, when THP is on but THP migration is not enabled. This leads to a bad state of target pages for migration. By inspecting the code, if called on a non-THP, prep_transhuge_page() will 1) change the value of the mapping of (page + 2), since it is used for THP deferred list; 2) change the lru value of (page + 1), since it is used for THP's dtor. Both can lead to data corruption of these two pages. Andrea said: "Pragmatically and from the point of view of the memory_hotplug subsys, the effect is a kernel crash when pages are being migrated during a memory hot remove offline and migration target pages are found in a bad state" This patch fixes it by only calling prep_transhuge_page() when we are certain that the target page is THP. Link: http://lkml.kernel.org/r/20171121021855.50525-1-zi.yan@sent.com Fixes: 8135d8926c08 ("mm: memory_hotplug: memory hotremove supports thp migration") Signed-off-by: Zi Yan Reported-by: Andrea Reale Cc: Naoya Horiguchi Cc: Michal Hocko Cc: "Jérôme Glisse" Cc: [4.14] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/migrate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 895ec0c4942e..a2246cf670ba 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -54,7 +54,7 @@ static inline struct page *new_page_nodemask(struct page *page, new_page = __alloc_pages_nodemask(gfp_mask, order, preferred_nid, nodemask); - if (new_page && PageTransHuge(page)) + if (new_page && PageTransHuge(new_page)) prep_transhuge_page(new_page); return new_page; From d08afa149acfd00871484ada6dabc3880524cd1c Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Wed, 29 Nov 2017 16:11:15 -0800 Subject: [PATCH 233/333] mm, memcg: fix mem_cgroup_swapout() for THPs Commit d6810d730022 ("memcg, THP, swap: make mem_cgroup_swapout() support THP") changed mem_cgroup_swapout() to support transparent huge page (THP). However the patch missed one location which should be changed for correctly handling THPs. The resulting bug will cause the memory cgroups whose THPs were swapped out to become zombies on deletion. Link: http://lkml.kernel.org/r/20171128161941.20931-1-shakeelb@google.com Fixes: d6810d730022 ("memcg, THP, swap: make mem_cgroup_swapout() support THP") Signed-off-by: Shakeel Butt Acked-by: Johannes Weiner Acked-by: Michal Hocko Cc: Huang Ying Cc: Vladimir Davydov Cc: Greg Thelen Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 50e6906314f8..ac2ffd5e02b9 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -6044,7 +6044,7 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry) memcg_check_events(memcg, page); if (!mem_cgroup_is_root(memcg)) - css_put(&memcg->css); + css_put_many(&memcg->css, nr_entries); } /** From b6e8e12c0aeb5fbf1bf46c84d58cc93aedede385 Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Wed, 29 Nov 2017 16:11:19 -0800 Subject: [PATCH 234/333] fs/fat/inode.c: fix sb_rdonly() change Commit bc98a42c1f7d ("VFS: Convert sb->s_flags & MS_RDONLY to sb_rdonly(sb)") converted fat_remount():new_rdonly from a bool to an int. However fat_remount() depends upon the compiler's conversion of a non-zero integer into boolean `true'. Fix it by switching `new_rdonly' back into a bool. Link: http://lkml.kernel.org/r/87mv3d5x51.fsf@mail.parknet.co.jp Fixes: bc98a42c1f7d0f8 ("VFS: Convert sb->s_flags & MS_RDONLY to sb_rdonly(sb)") Signed-off-by: OGAWA Hirofumi Cc: Joe Perches Cc: David Howells Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 016c46b5e44c..20a0a89eaca5 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -779,7 +779,7 @@ static void __exit fat_destroy_inodecache(void) static int fat_remount(struct super_block *sb, int *flags, char *data) { - int new_rdonly; + bool new_rdonly; struct msdos_sb_info *sbi = MSDOS_SB(sb); *flags |= SB_NODIRATIME | (sbi->options.isvfat ? 0 : SB_NOATIME); From 43694d4bf843ddd34519e8e9de983deefeada699 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Wed, 29 Nov 2017 16:11:23 -0800 Subject: [PATCH 235/333] autofs: revert "autofs: take more care to not update last_used on path walk" While commit 092a53452bb7 ("autofs: take more care to not update last_used on path walk") helped (partially) resolve a problem where automounts were not expiring due to aggressive accesses from user space it has a side effect for very large environments. This change helps with the expire problem by making the expire more aggressive but, for very large environments, that means more mount requests from clients. When there are a lot of clients that can mean fairly significant server load increases. It turns out I put the last_used in this position to solve this very problem and failed to update my own thinking of the autofs expire policy. So the patch being reverted introduces a regression which should be fixed. Link: http://lkml.kernel.org/r/151174729420.6162.1832622523537052460.stgit@pluto.themaw.net Fixes: 092a53452b ("autofs: take more care to not update last_used on path walk") Signed-off-by: Ian Kent Reviewed-by: NeilBrown Cc: Al Viro Cc: [4.11+] Cc: Colin Walters Cc: David Howells Cc: Ondrej Holy Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/autofs4/root.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index d79ced925861..82e8f6edfb48 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -281,8 +281,8 @@ static int autofs4_mount_wait(const struct path *path, bool rcu_walk) pr_debug("waiting for mount name=%pd\n", path->dentry); status = autofs4_wait(sbi, path, NFY_MOUNT); pr_debug("mount wait done status=%d\n", status); - ino->last_used = jiffies; } + ino->last_used = jiffies; return status; } @@ -321,21 +321,16 @@ static struct dentry *autofs4_mountpoint_changed(struct path *path) */ if (autofs_type_indirect(sbi->type) && d_unhashed(dentry)) { struct dentry *parent = dentry->d_parent; + struct autofs_info *ino; struct dentry *new; new = d_lookup(parent, &dentry->d_name); if (!new) return NULL; - if (new == dentry) - dput(new); - else { - struct autofs_info *ino; - - ino = autofs4_dentry_ino(new); - ino->last_used = jiffies; - dput(path->dentry); - path->dentry = new; - } + ino = autofs4_dentry_ino(new); + ino->last_used = jiffies; + dput(path->dentry); + path->dentry = new; } return path->dentry; } From 5d38f049cee1e1c4a7ac55aa79d37d01ddcc3860 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Wed, 29 Nov 2017 16:11:26 -0800 Subject: [PATCH 236/333] autofs: revert "autofs: fix AT_NO_AUTOMOUNT not being honored" Commit 42f461482178 ("autofs: fix AT_NO_AUTOMOUNT not being honored") allowed the fstatat(2) system call to properly honor the AT_NO_AUTOMOUNT flag but introduced a semantic change. In order to honor AT_NO_AUTOMOUNT a semantic change was made to the negative dentry case for stat family system calls in follow_automount(). This changed the unconditional triggering of an automount in this case to no longer be done and an error returned instead. This has caused more problems than I expected so reverting the change is needed. In a discussion with Neil Brown it was concluded that the automount(8) daemon can implement this change without kernel modifications. So that will be done instead and the autofs module documentation updated with a description of the problem and what needs to be done by module users for this specific case. Link: http://lkml.kernel.org/r/151174730120.6162.3848002191530283984.stgit@pluto.themaw.net Fixes: 42f4614821 ("autofs: fix AT_NO_AUTOMOUNT not being honored") Signed-off-by: Ian Kent Cc: Neil Brown Cc: Al Viro Cc: David Howells Cc: Colin Walters Cc: Ondrej Holy Cc: [4.11+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/namei.c | 15 +++------------ include/linux/fs.h | 3 ++- 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index f0c7a7b9b6ca..9cc91fb7f156 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1129,18 +1129,9 @@ static int follow_automount(struct path *path, struct nameidata *nd, * of the daemon to instantiate them before they can be used. */ if (!(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY | - LOOKUP_OPEN | LOOKUP_CREATE | - LOOKUP_AUTOMOUNT))) { - /* Positive dentry that isn't meant to trigger an - * automount, EISDIR will allow it to be used, - * otherwise there's no mount here "now" so return - * ENOENT. - */ - if (path->dentry->d_inode) - return -EISDIR; - else - return -ENOENT; - } + LOOKUP_OPEN | LOOKUP_CREATE | LOOKUP_AUTOMOUNT)) && + path->dentry->d_inode) + return -EISDIR; if (path->dentry->d_sb->s_user_ns != &init_user_ns) return -EACCES; diff --git a/include/linux/fs.h b/include/linux/fs.h index 9dc498d16cc1..511fbaabf624 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3088,7 +3088,8 @@ static inline int vfs_lstat(const char __user *name, struct kstat *stat) static inline int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat, int flags) { - return vfs_statx(dfd, filename, flags, stat, STATX_BASIC_STATS); + return vfs_statx(dfd, filename, flags | AT_NO_AUTOMOUNT, + stat, STATX_BASIC_STATS); } static inline int vfs_fstat(int fd, struct kstat *stat) { From f4f0a3d85b50a65a348e2b8635041d6b30f01deb Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 29 Nov 2017 16:11:30 -0800 Subject: [PATCH 237/333] mm/hugetlb: fix NULL-pointer dereference on 5-level paging machine I made a mistake during converting hugetlb code to 5-level paging: in huge_pte_alloc() we have to use p4d_alloc(), not p4d_offset(). Otherwise it leads to crash -- NULL-pointer dereference in pud_alloc() if p4d table is not yet allocated. It only can happen in 5-level paging mode. In 4-level paging mode p4d_offset() always returns pgd, so we are fine. Link: http://lkml.kernel.org/r/20171122121921.64822-1-kirill.shutemov@linux.intel.com Fixes: c2febafc6773 ("mm: convert generic code to 5-level paging") Signed-off-by: Kirill A. Shutemov Acked-by: Vlastimil Babka Acked-by: Michal Hocko Cc: [4.11+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 698e8fb34031..9a334f5fb730 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4635,7 +4635,9 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, pte_t *pte = NULL; pgd = pgd_offset(mm, addr); - p4d = p4d_offset(pgd, addr); + p4d = p4d_alloc(mm, pgd, addr); + if (!p4d) + return NULL; pud = pud_alloc(mm, p4d, addr); if (pud) { if (sz == PUD_SIZE) { From 72639e6df4128dfde8fee7638a35be7db8114000 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Wed, 29 Nov 2017 16:11:33 -0800 Subject: [PATCH 238/333] fs/hugetlbfs/inode.c: change put_page/unlock_page order in hugetlbfs_fallocate() hugetlfs_fallocate() currently performs put_page() before unlock_page(). This scenario opens a small time window, from the time the page is added to the page cache, until it is unlocked, in which the page might be removed from the page-cache by another core. If the page is removed during this time windows, it might cause a memory corruption, as the wrong page will be unlocked. It is arguable whether this scenario can happen in a real system, and there are several mitigating factors. The issue was found by code inspection (actually grep), and not by actually triggering the flow. Yet, since putting the page before unlocking is incorrect it should be fixed, if only to prevent future breakage or someone copy-pasting this code. Mike said: "I am of the opinion that this does not need to be sent to stable. Although the ordering is current code is incorrect, there is no way for this to be a problem with current locking. In addition, I verified that the perhaps bigger issue with sys_fadvise64(POSIX_FADV_DONTNEED) for hugetlbfs and other filesystems is addressed in 3a77d214807c ("mm: fadvise: avoid fadvise for fs without backing device")" Link: http://lkml.kernel.org/r/20170826191124.51642-1-namit@vmware.com Fixes: 70c3547e36f5c ("hugetlbfs: add hugetlbfs_fallocate()") Signed-off-by: Nadav Amit Reviewed-by: Mike Kravetz Acked-by: Michal Hocko Cc: Eric Biggers Cc: Mike Kravetz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hugetlbfs/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 1e76730aac0d..8a85f3f53446 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -639,11 +639,11 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset, mutex_unlock(&hugetlb_fault_mutex_table[hash]); /* - * page_put due to reference from alloc_huge_page() * unlock_page because locked by add_to_page_cache() + * page_put due to reference from alloc_huge_page() */ - put_page(page); unlock_page(page); + put_page(page); } if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size) From 1b6fba458c0a2e8513071330972c4c587b7d28cc Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Tue, 21 Nov 2017 09:17:43 +0100 Subject: [PATCH 239/333] drm/bridge: adv7511/33: Fix adv7511_cec_init() failure handling If the device tree for a board did not specify a cec clock, then adv7511_cec_init would return an error, which would cause adv7511_probe() to fail and thus there is no HDMI output. There is no need to have adv7511_probe() fail if the CEC initialization fails, so just change adv7511_cec_init() to a void function. In addition, adv7511_cec_init() should just return silently if the cec clock isn't found and show a message for any other errors. An otherwise correct cleanup patch from Dan Carpenter turned this broken failure handling into a kernel Oops, so bisection points to commit 7af35b0addbc ("drm/kirin: Checking for IS_ERR() instead of NULL") rather than 3b1b975003e4 ("drm: adv7511/33: add HDMI CEC support"). Based on earlier patches from Arnd and John. Reported-by: Naresh Kamboju Cc: Xinliang Liu Cc: Dan Carpenter Cc: Sean Paul Cc: Archit Taneja Cc: John Stultz Link: https://bugs.linaro.org/show_bug.cgi?id=3345 Link: https://lkft.validation.linaro.org/scheduler/job/48017#L3551 Fixes: 7af35b0addbc ("drm/kirin: Checking for IS_ERR() instead of NULL") Fixes: 3b1b975003e4 ("drm: adv7511/33: add HDMI CEC support") Signed-off-by: Hans Verkuil Tested-by: Hans Verkuil Reviewed-by: Laurent Pinchart Tested-by: John Stultz Signed-off-by: Archit Taneja Link: https://patchwork.freedesktop.org/patch/msgid/9097b2a4-b6b9-5fca-e039-0a17694b1143@xs4all.nl --- drivers/gpu/drm/bridge/adv7511/adv7511.h | 13 ++++++-- drivers/gpu/drm/bridge/adv7511/adv7511_cec.c | 32 ++++++++++++++------ drivers/gpu/drm/bridge/adv7511/adv7511_drv.c | 17 +++-------- 3 files changed, 37 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511.h b/drivers/gpu/drm/bridge/adv7511/adv7511.h index b4efcbabf7f7..d034b2cb5eee 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511.h +++ b/drivers/gpu/drm/bridge/adv7511/adv7511.h @@ -372,9 +372,18 @@ struct adv7511 { }; #ifdef CONFIG_DRM_I2C_ADV7511_CEC -int adv7511_cec_init(struct device *dev, struct adv7511 *adv7511, - unsigned int offset); +int adv7511_cec_init(struct device *dev, struct adv7511 *adv7511); void adv7511_cec_irq_process(struct adv7511 *adv7511, unsigned int irq1); +#else +static inline int adv7511_cec_init(struct device *dev, struct adv7511 *adv7511) +{ + unsigned int offset = adv7511->type == ADV7533 ? + ADV7533_REG_CEC_OFFSET : 0; + + regmap_write(adv7511->regmap, ADV7511_REG_CEC_CTRL + offset, + ADV7511_CEC_CTRL_POWER_DOWN); + return 0; +} #endif #ifdef CONFIG_DRM_I2C_ADV7533 diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511_cec.c b/drivers/gpu/drm/bridge/adv7511/adv7511_cec.c index b33d730e4d73..a20a45c0b353 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511_cec.c +++ b/drivers/gpu/drm/bridge/adv7511/adv7511_cec.c @@ -300,18 +300,21 @@ static int adv7511_cec_parse_dt(struct device *dev, struct adv7511 *adv7511) return 0; } -int adv7511_cec_init(struct device *dev, struct adv7511 *adv7511, - unsigned int offset) +int adv7511_cec_init(struct device *dev, struct adv7511 *adv7511) { + unsigned int offset = adv7511->type == ADV7533 ? + ADV7533_REG_CEC_OFFSET : 0; int ret = adv7511_cec_parse_dt(dev, adv7511); if (ret) - return ret; + goto err_cec_parse_dt; adv7511->cec_adap = cec_allocate_adapter(&adv7511_cec_adap_ops, adv7511, dev_name(dev), CEC_CAP_DEFAULTS, ADV7511_MAX_ADDRS); - if (IS_ERR(adv7511->cec_adap)) - return PTR_ERR(adv7511->cec_adap); + if (IS_ERR(adv7511->cec_adap)) { + ret = PTR_ERR(adv7511->cec_adap); + goto err_cec_alloc; + } regmap_write(adv7511->regmap, ADV7511_REG_CEC_CTRL + offset, 0); /* cec soft reset */ @@ -329,9 +332,18 @@ int adv7511_cec_init(struct device *dev, struct adv7511 *adv7511, ((adv7511->cec_clk_freq / 750000) - 1) << 2); ret = cec_register_adapter(adv7511->cec_adap, dev); - if (ret) { - cec_delete_adapter(adv7511->cec_adap); - adv7511->cec_adap = NULL; - } - return ret; + if (ret) + goto err_cec_register; + return 0; + +err_cec_register: + cec_delete_adapter(adv7511->cec_adap); + adv7511->cec_adap = NULL; +err_cec_alloc: + dev_info(dev, "Initializing CEC failed with error %d, disabling CEC\n", + ret); +err_cec_parse_dt: + regmap_write(adv7511->regmap, ADV7511_REG_CEC_CTRL + offset, + ADV7511_CEC_CTRL_POWER_DOWN); + return ret == -EPROBE_DEFER ? ret : 0; } diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c index 0e14f1572d05..efa29db5fc2b 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c +++ b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c @@ -1084,7 +1084,6 @@ static int adv7511_probe(struct i2c_client *i2c, const struct i2c_device_id *id) struct device *dev = &i2c->dev; unsigned int main_i2c_addr = i2c->addr << 1; unsigned int edid_i2c_addr = main_i2c_addr + 4; - unsigned int offset; unsigned int val; int ret; @@ -1192,24 +1191,16 @@ static int adv7511_probe(struct i2c_client *i2c, const struct i2c_device_id *id) if (adv7511->type == ADV7511) adv7511_set_link_config(adv7511, &link_config); + ret = adv7511_cec_init(dev, adv7511); + if (ret) + goto err_unregister_cec; + adv7511->bridge.funcs = &adv7511_bridge_funcs; adv7511->bridge.of_node = dev->of_node; drm_bridge_add(&adv7511->bridge); adv7511_audio_init(dev, adv7511); - - offset = adv7511->type == ADV7533 ? ADV7533_REG_CEC_OFFSET : 0; - -#ifdef CONFIG_DRM_I2C_ADV7511_CEC - ret = adv7511_cec_init(dev, adv7511, offset); - if (ret) - goto err_unregister_cec; -#else - regmap_write(adv7511->regmap, ADV7511_REG_CEC_CTRL + offset, - ADV7511_CEC_CTRL_POWER_DOWN); -#endif - return 0; err_unregister_cec: From ebe32c3e282a62974b190b9d514864fc0d56716e Mon Sep 17 00:00:00 2001 From: Pierre-Hugues Husson Date: Sat, 25 Nov 2017 21:18:44 +0100 Subject: [PATCH 240/333] drm/bridge: synopsys/dw-hdmi: Enable cec clock Support the "cec" optional clock. The documentation already mentions "cec" optional clock and it is used by several boards, but currently the driver doesn't enable it, thus preventing cec from working on those boards. And even worse: a /dev/cecX device will appear for those boards, but it won't be functioning without configuring this clock. Changes: v4: - Change commit message to stress the importance of this patch v3: - Drop useless braces v2: - Separate ENOENT errors from others - Propagate other errors (especially -EPROBE_DEFER) Signed-off-by: Pierre-Hugues Husson Signed-off-by: Archit Taneja Link: https://patchwork.freedesktop.org/patch/msgid/20171125201844.11353-1-phh@phh.me --- drivers/gpu/drm/bridge/synopsys/dw-hdmi.c | 25 +++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c index bf14214fa464..b72259bf6e2f 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c @@ -138,6 +138,7 @@ struct dw_hdmi { struct device *dev; struct clk *isfr_clk; struct clk *iahb_clk; + struct clk *cec_clk; struct dw_hdmi_i2c *i2c; struct hdmi_data_info hdmi_data; @@ -2382,6 +2383,26 @@ __dw_hdmi_probe(struct platform_device *pdev, goto err_isfr; } + hdmi->cec_clk = devm_clk_get(hdmi->dev, "cec"); + if (PTR_ERR(hdmi->cec_clk) == -ENOENT) { + hdmi->cec_clk = NULL; + } else if (IS_ERR(hdmi->cec_clk)) { + ret = PTR_ERR(hdmi->cec_clk); + if (ret != -EPROBE_DEFER) + dev_err(hdmi->dev, "Cannot get HDMI cec clock: %d\n", + ret); + + hdmi->cec_clk = NULL; + goto err_iahb; + } else { + ret = clk_prepare_enable(hdmi->cec_clk); + if (ret) { + dev_err(hdmi->dev, "Cannot enable HDMI cec clock: %d\n", + ret); + goto err_iahb; + } + } + /* Product and revision IDs */ hdmi->version = (hdmi_readb(hdmi, HDMI_DESIGN_ID) << 8) | (hdmi_readb(hdmi, HDMI_REVISION_ID) << 0); @@ -2518,6 +2539,8 @@ err_iahb: cec_notifier_put(hdmi->cec_notifier); clk_disable_unprepare(hdmi->iahb_clk); + if (hdmi->cec_clk) + clk_disable_unprepare(hdmi->cec_clk); err_isfr: clk_disable_unprepare(hdmi->isfr_clk); err_res: @@ -2541,6 +2564,8 @@ static void __dw_hdmi_remove(struct dw_hdmi *hdmi) clk_disable_unprepare(hdmi->iahb_clk); clk_disable_unprepare(hdmi->isfr_clk); + if (hdmi->cec_clk) + clk_disable_unprepare(hdmi->cec_clk); if (hdmi->i2c) i2c_del_adapter(&hdmi->i2c->adap); From dbb58bfd9ae6c885b2ca001a9a5ab8b881fb4ba9 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 14 Nov 2017 11:16:47 -0800 Subject: [PATCH 241/333] drm/bridge: Fix lvds-encoder since the panel_bridge rework. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The panel_bridge bridge attaches to the panel's OF node, not the lvds-encoder's node. Put in a little no-op bridge of our own so that our consumers can still find a bridge where they expect. This also fixes an unintended unregistration and leak of the panel-bridge on module remove. Signed-off-by: Eric Anholt Fixes: 13dfc0540a57 ("drm/bridge: Refactor out the panel wrapper from the lvds-encoder bri dge.") Tested-by: Lothar Waßmann Signed-off-by: Archit Taneja Link: https://patchwork.freedesktop.org/patch/msgid/20171114191647.22207-1-eric@anholt.net --- drivers/gpu/drm/bridge/lvds-encoder.c | 48 +++++++++++++++++++++++---- 1 file changed, 41 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/bridge/lvds-encoder.c b/drivers/gpu/drm/bridge/lvds-encoder.c index 0903ba574f61..75b0d3f6e4de 100644 --- a/drivers/gpu/drm/bridge/lvds-encoder.c +++ b/drivers/gpu/drm/bridge/lvds-encoder.c @@ -13,13 +13,37 @@ #include +struct lvds_encoder { + struct drm_bridge bridge; + struct drm_bridge *panel_bridge; +}; + +static int lvds_encoder_attach(struct drm_bridge *bridge) +{ + struct lvds_encoder *lvds_encoder = container_of(bridge, + struct lvds_encoder, + bridge); + + return drm_bridge_attach(bridge->encoder, lvds_encoder->panel_bridge, + bridge); +} + +static struct drm_bridge_funcs funcs = { + .attach = lvds_encoder_attach, +}; + static int lvds_encoder_probe(struct platform_device *pdev) { struct device_node *port; struct device_node *endpoint; struct device_node *panel_node; struct drm_panel *panel; - struct drm_bridge *bridge; + struct lvds_encoder *lvds_encoder; + + lvds_encoder = devm_kzalloc(&pdev->dev, sizeof(*lvds_encoder), + GFP_KERNEL); + if (!lvds_encoder) + return -ENOMEM; /* Locate the panel DT node. */ port = of_graph_get_port_by_id(pdev->dev.of_node, 1); @@ -49,20 +73,30 @@ static int lvds_encoder_probe(struct platform_device *pdev) return -EPROBE_DEFER; } - bridge = drm_panel_bridge_add(panel, DRM_MODE_CONNECTOR_LVDS); - if (IS_ERR(bridge)) - return PTR_ERR(bridge); + lvds_encoder->panel_bridge = + devm_drm_panel_bridge_add(&pdev->dev, + panel, DRM_MODE_CONNECTOR_LVDS); + if (IS_ERR(lvds_encoder->panel_bridge)) + return PTR_ERR(lvds_encoder->panel_bridge); - platform_set_drvdata(pdev, bridge); + /* The panel_bridge bridge is attached to the panel's of_node, + * but we need a bridge attached to our of_node for our user + * to look up. + */ + lvds_encoder->bridge.of_node = pdev->dev.of_node; + lvds_encoder->bridge.funcs = &funcs; + drm_bridge_add(&lvds_encoder->bridge); + + platform_set_drvdata(pdev, lvds_encoder); return 0; } static int lvds_encoder_remove(struct platform_device *pdev) { - struct drm_bridge *bridge = platform_get_drvdata(pdev); + struct lvds_encoder *lvds_encoder = platform_get_drvdata(pdev); - drm_bridge_remove(bridge); + drm_bridge_remove(&lvds_encoder->bridge); return 0; } From cffd2b16c01c3431a7a7dd62e722af33490fc436 Mon Sep 17 00:00:00 2001 From: Andrey Gusakov Date: Tue, 7 Nov 2017 19:56:19 +0300 Subject: [PATCH 242/333] drm/bridge: tc358767: do no fail on hi-res displays Do not fail data rates higher than 2.7 and more than 2 lanes. Try to fall back to 2.7Gbps and 2 lanes. Acked-by: Philipp Zabel Reviewed-by: Andrzej Hajda Signed-off-by: Andrey Gusakov Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/1510073785-16108-2-git-send-email-andrey.gusakov@cogentembedded.com --- drivers/gpu/drm/bridge/tc358767.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c index 8571cfd877c5..e1996dbfb0a1 100644 --- a/drivers/gpu/drm/bridge/tc358767.c +++ b/drivers/gpu/drm/bridge/tc358767.c @@ -603,8 +603,15 @@ static int tc_get_display_props(struct tc_data *tc) ret = drm_dp_link_probe(&tc->aux, &tc->link.base); if (ret < 0) goto err_dpcd_read; - if ((tc->link.base.rate != 162000) && (tc->link.base.rate != 270000)) - goto err_dpcd_inval; + if (tc->link.base.rate != 162000 && tc->link.base.rate != 270000) { + dev_dbg(tc->dev, "Falling to 2.7 Gbps rate\n"); + tc->link.base.rate = 270000; + } + + if (tc->link.base.num_lanes > 2) { + dev_dbg(tc->dev, "Falling to 2 lanes\n"); + tc->link.base.num_lanes = 2; + } ret = drm_dp_dpcd_readb(&tc->aux, DP_MAX_DOWNSPREAD, tmp); if (ret < 0) @@ -637,9 +644,6 @@ static int tc_get_display_props(struct tc_data *tc) err_dpcd_read: dev_err(tc->dev, "failed to read DPCD: %d\n", ret); return ret; -err_dpcd_inval: - dev_err(tc->dev, "invalid DPCD\n"); - return -EINVAL; } static int tc_set_video_mode(struct tc_data *tc, struct drm_display_mode *mode) From 99fc8e963a4c0203dba26a77cf737db6081bca14 Mon Sep 17 00:00:00 2001 From: Andrey Gusakov Date: Tue, 7 Nov 2017 19:56:20 +0300 Subject: [PATCH 243/333] drm/bridge: tc358767: filter out too high modes Pixel clock limitation for DPI is 154 MHz. Do not accept modes with higher pixel clock rate. Reviewed-by: Andrzej Hajda Signed-off-by: Andrey Gusakov Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/1510073785-16108-3-git-send-email-andrey.gusakov@cogentembedded.com --- drivers/gpu/drm/bridge/tc358767.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c index e1996dbfb0a1..e571b5e29ae7 100644 --- a/drivers/gpu/drm/bridge/tc358767.c +++ b/drivers/gpu/drm/bridge/tc358767.c @@ -1103,7 +1103,10 @@ static bool tc_bridge_mode_fixup(struct drm_bridge *bridge, static int tc_connector_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { - /* Accept any mode */ + /* DPI interface clock limitation: upto 154 MHz */ + if (mode->clock > 154000) + return MODE_CLOCK_HIGH; + return MODE_OK; } From f3b8adbe1911f66fd3cab1aaa74f0f66b7ceda25 Mon Sep 17 00:00:00 2001 From: Andrey Gusakov Date: Tue, 7 Nov 2017 19:56:21 +0300 Subject: [PATCH 244/333] drm/bridge: tc358767: fix DP0_MISC register set Remove shift from TU_SIZE_RECOMMENDED define as it used to calculate max_tu_symbols. Acked-by: Philipp Zabel Signed-off-by: Andrey Gusakov Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/1510073785-16108-4-git-send-email-andrey.gusakov@cogentembedded.com --- drivers/gpu/drm/bridge/tc358767.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c index e571b5e29ae7..7334cb2121a3 100644 --- a/drivers/gpu/drm/bridge/tc358767.c +++ b/drivers/gpu/drm/bridge/tc358767.c @@ -97,7 +97,7 @@ #define DP0_ACTIVEVAL 0x0650 #define DP0_SYNCVAL 0x0654 #define DP0_MISC 0x0658 -#define TU_SIZE_RECOMMENDED (0x3f << 16) /* LSCLK cycles per TU */ +#define TU_SIZE_RECOMMENDED (63) /* LSCLK cycles per TU */ #define BPC_6 (0 << 5) #define BPC_8 (1 << 5) @@ -716,7 +716,8 @@ static int tc_set_video_mode(struct tc_data *tc, struct drm_display_mode *mode) * Must be less than tu_size. */ max_tu_symbol = TU_SIZE_RECOMMENDED - 1; - tc_write(DP0_MISC, (max_tu_symbol << 23) | TU_SIZE_RECOMMENDED | BPC_8); + tc_write(DP0_MISC, (max_tu_symbol << 23) | (TU_SIZE_RECOMMENDED << 16) | + BPC_8); return 0; err: From 66d1c3b94d5d59e4325e61a78d520f92c043d645 Mon Sep 17 00:00:00 2001 From: Andrey Gusakov Date: Tue, 7 Nov 2017 19:56:22 +0300 Subject: [PATCH 245/333] drm/bridge: tc358767: fix timing calculations Fields in HTIM01 and HTIM02 regs should be even. Recomended thresh_dly value is max_tu_symbol. Remove set of VPCTRL0.VSDELAY as it is related to DSI input interface. Currently driver supports only DPI. Acked-by: Philipp Zabel Signed-off-by: Andrey Gusakov Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/1510073785-16108-5-git-send-email-andrey.gusakov@cogentembedded.com --- drivers/gpu/drm/bridge/tc358767.c | 34 ++++++++++++++++++------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c index 7334cb2121a3..b916346b933a 100644 --- a/drivers/gpu/drm/bridge/tc358767.c +++ b/drivers/gpu/drm/bridge/tc358767.c @@ -659,6 +659,14 @@ static int tc_set_video_mode(struct tc_data *tc, struct drm_display_mode *mode) int lower_margin = mode->vsync_start - mode->vdisplay; int vsync_len = mode->vsync_end - mode->vsync_start; + /* + * Recommended maximum number of symbols transferred in a transfer unit: + * DIV_ROUND_UP((input active video bandwidth in bytes) * tu_size, + * (output active video bandwidth in bytes)) + * Must be less than tu_size. + */ + max_tu_symbol = TU_SIZE_RECOMMENDED - 1; + dev_dbg(tc->dev, "set mode %dx%d\n", mode->hdisplay, mode->vdisplay); dev_dbg(tc->dev, "H margin %d,%d sync %d\n", @@ -668,13 +676,18 @@ static int tc_set_video_mode(struct tc_data *tc, struct drm_display_mode *mode) dev_dbg(tc->dev, "total: %dx%d\n", mode->htotal, mode->vtotal); - /* LCD Ctl Frame Size */ - tc_write(VPCTRL0, (0x40 << 20) /* VSDELAY */ | + /* + * LCD Ctl Frame Size + * datasheet is not clear of vsdelay in case of DPI + * assume we do not need any delay when DPI is a source of + * sync signals + */ + tc_write(VPCTRL0, (0 << 20) /* VSDELAY */ | OPXLFMT_RGB888 | FRMSYNC_DISABLED | MSF_DISABLED); - tc_write(HTIM01, (left_margin << 16) | /* H back porch */ - (hsync_len << 0)); /* Hsync */ - tc_write(HTIM02, (right_margin << 16) | /* H front porch */ - (mode->hdisplay << 0)); /* width */ + tc_write(HTIM01, (ALIGN(left_margin, 2) << 16) | /* H back porch */ + (ALIGN(hsync_len, 2) << 0)); /* Hsync */ + tc_write(HTIM02, (ALIGN(right_margin, 2) << 16) | /* H front porch */ + (ALIGN(mode->hdisplay, 2) << 0)); /* width */ tc_write(VTIM01, (upper_margin << 16) | /* V back porch */ (vsync_len << 0)); /* Vsync */ tc_write(VTIM02, (lower_margin << 16) | /* V front porch */ @@ -693,7 +706,7 @@ static int tc_set_video_mode(struct tc_data *tc, struct drm_display_mode *mode) /* DP Main Stream Attributes */ vid_sync_dly = hsync_len + left_margin + mode->hdisplay; tc_write(DP0_VIDSYNCDELAY, - (0x003e << 16) | /* thresh_dly */ + (max_tu_symbol << 16) | /* thresh_dly */ (vid_sync_dly << 0)); tc_write(DP0_TOTALVAL, (mode->vtotal << 16) | (mode->htotal)); @@ -709,13 +722,6 @@ static int tc_set_video_mode(struct tc_data *tc, struct drm_display_mode *mode) tc_write(DPIPXLFMT, VS_POL_ACTIVE_LOW | HS_POL_ACTIVE_LOW | DE_POL_ACTIVE_HIGH | SUB_CFG_TYPE_CONFIG1 | DPI_BPP_RGB888); - /* - * Recommended maximum number of symbols transferred in a transfer unit: - * DIV_ROUND_UP((input active video bandwidth in bytes) * tu_size, - * (output active video bandwidth in bytes)) - * Must be less than tu_size. - */ - max_tu_symbol = TU_SIZE_RECOMMENDED - 1; tc_write(DP0_MISC, (max_tu_symbol << 23) | (TU_SIZE_RECOMMENDED << 16) | BPC_8); From 9217c1abbc145a77d65c476cf2004a3df02104c7 Mon Sep 17 00:00:00 2001 From: Andrey Gusakov Date: Tue, 7 Nov 2017 19:56:23 +0300 Subject: [PATCH 246/333] drm/bridge: tc358767: fix AUXDATAn registers access First four bytes should go to DP0_AUXWDATA0. Due to bug if len > 4 first four bytes was writen to DP0_AUXWDATA1 and all data get shifted by 4 bytes. Fix it. Acked-by: Philipp Zabel Signed-off-by: Andrey Gusakov Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/1510073785-16108-6-git-send-email-andrey.gusakov@cogentembedded.com --- drivers/gpu/drm/bridge/tc358767.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c index b916346b933a..24f495bf0567 100644 --- a/drivers/gpu/drm/bridge/tc358767.c +++ b/drivers/gpu/drm/bridge/tc358767.c @@ -318,7 +318,7 @@ static ssize_t tc_aux_transfer(struct drm_dp_aux *aux, tmp = (tmp << 8) | buf[i]; i++; if (((i % 4) == 0) || (i == size)) { - tc_write(DP0_AUXWDATA(i >> 2), tmp); + tc_write(DP0_AUXWDATA((i - 1) >> 2), tmp); tmp = 0; } } From 4dbd6c03fbf88299c573d676838896c6e06aade2 Mon Sep 17 00:00:00 2001 From: Andrey Gusakov Date: Tue, 7 Nov 2017 19:56:24 +0300 Subject: [PATCH 247/333] drm/bridge: tc358767: fix 1-lane behavior Use drm_dp_channel_eq_ok helper Acked-by: Philipp Zabel Signed-off-by: Andrey Gusakov Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/1510073785-16108-7-git-send-email-andrey.gusakov@cogentembedded.com --- drivers/gpu/drm/bridge/tc358767.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c index 24f495bf0567..8636e7eeb731 100644 --- a/drivers/gpu/drm/bridge/tc358767.c +++ b/drivers/gpu/drm/bridge/tc358767.c @@ -819,8 +819,6 @@ static int tc_main_link_setup(struct tc_data *tc) unsigned int rate; u32 dp_phy_ctrl; int timeout; - bool aligned; - bool ready; u32 value; int ret; u8 tmp[8]; @@ -965,16 +963,15 @@ static int tc_main_link_setup(struct tc_data *tc) ret = drm_dp_dpcd_read_link_status(aux, tmp + 2); if (ret < 0) goto err_dpcd_read; - ready = (tmp[2] == ((DP_CHANNEL_EQ_BITS << 4) | /* Lane1 */ - DP_CHANNEL_EQ_BITS)); /* Lane0 */ - aligned = tmp[4] & DP_INTERLANE_ALIGN_DONE; - } while ((--timeout) && !(ready && aligned)); + } while ((--timeout) && + !(drm_dp_channel_eq_ok(tmp + 2, tc->link.base.num_lanes))); if (timeout == 0) { /* Read DPCD 0x200-0x201 */ ret = drm_dp_dpcd_read(aux, DP_SINK_COUNT, tmp, 2); if (ret < 0) goto err_dpcd_read; + dev_err(dev, "channel(s) EQ not ok\n"); dev_info(dev, "0x0200 SINK_COUNT: 0x%02x\n", tmp[0]); dev_info(dev, "0x0201 DEVICE_SERVICE_IRQ_VECTOR: 0x%02x\n", tmp[1]); @@ -985,10 +982,6 @@ static int tc_main_link_setup(struct tc_data *tc) dev_info(dev, "0x0206 ADJUST_REQUEST_LANE0_1: 0x%02x\n", tmp[6]); - if (!ready) - dev_err(dev, "Lane0/1 not ready\n"); - if (!aligned) - dev_err(dev, "Lane0/1 not aligned\n"); return -EAGAIN; } From fd50fbb6bfdea5daa4ae4dd7b7082485ac44bdf5 Mon Sep 17 00:00:00 2001 From: Joonas Lahtinen Date: Mon, 27 Nov 2017 11:12:33 +0200 Subject: [PATCH 248/333] drm/i915: Disable THP until we have a GPU read BW W/A We seem to be missing some W/A for 2M pages and are getting a hit on raw GPU read bandwidths (even 30%) even though the GPU write bandwidths improve (even 10%). For now, disable THP, which is our only practical source of 2M pages until we have a W/A for the issue. v2: - Be explicit that we talk about GPU bandwidths (Eero) - s/deny/never/ because that's why (Chris) Reported-by: Valtteri Rantala Fixes: b901bb89324a ("drm/i915/gemfs: enable THP") Signed-off-by: Joonas Lahtinen Cc: Matthew Auld Cc: Chris Wilson Cc: Jani Nikula Cc: Rodrigo Vivi Cc: Valtteri Rantala Cc: Eero Tamminen Reviewed-by: Chris Wilson Reviewed-by: Matthew Auld Tested-by: Valtteri Rantala Link: https://patchwork.freedesktop.org/patch/msgid/20171127091233.7001-1-joonas.lahtinen@linux.intel.com (cherry picked from commit 9987da4b5dcfc8b94b702d4bb94b30955eb73c75) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gemfs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gemfs.c b/drivers/gpu/drm/i915/i915_gemfs.c index e2993857df37..888b7d3f04c3 100644 --- a/drivers/gpu/drm/i915/i915_gemfs.c +++ b/drivers/gpu/drm/i915/i915_gemfs.c @@ -52,7 +52,8 @@ int i915_gemfs_init(struct drm_i915_private *i915) if (has_transparent_hugepage()) { struct super_block *sb = gemfs->mnt_sb; - char options[] = "huge=within_size"; + /* FIXME: Disabled until we get W/A for read BW issue. */ + char options[] = "huge=never"; int flags = 0; int err; From 8677b1ac2db021ab30bb1fa34f1e56ebe0051ec3 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 24 Sep 2017 08:01:03 +0200 Subject: [PATCH 249/333] drm/omap: Fix error handling path in 'omap_dmm_probe()' If we don't find a matching device node, we must free the memory allocated in 'omap_dmm' a few lines above. Fixes: 7cb0d6c17b96 ("drm/omap: fix TILER on OMAP5") Signed-off-by: Christophe JAILLET Signed-off-by: Tomi Valkeinen --- drivers/gpu/drm/omapdrm/omap_dmm_tiler.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c index 1dd3dafc59af..c60a85e82c6d 100644 --- a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c +++ b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c @@ -638,7 +638,8 @@ static int omap_dmm_probe(struct platform_device *dev) match = of_match_node(dmm_of_match, dev->dev.of_node); if (!match) { dev_err(&dev->dev, "failed to find matching device node\n"); - return -ENODEV; + ret = -ENODEV; + goto fail; } omap_dmm->plat_data = match->data; From 499ec0ed5eb2f6a7fcaab2dd66ffc5993484bda9 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 5 Oct 2017 14:06:41 +0200 Subject: [PATCH 250/333] drm/omap: displays: panel-dpi: add backlight dependency The new backlight code causes a link failure when backlight support itself is disabled: drivers/gpu/drm/omapdrm/displays/panel-dpi.o: In function `panel_dpi_probe_of': panel-dpi.c:(.text+0x35c): undefined reference to `of_find_backlight_by_node' This adds a Kconfig dependency like we have for the other OMAP display targets. Fixes: 39135a305a0f ("drm/omap: displays: panel-dpi: Support for handling backlight devices") Signed-off-by: Arnd Bergmann Signed-off-by: Tomi Valkeinen --- drivers/gpu/drm/omapdrm/displays/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/omapdrm/displays/Kconfig b/drivers/gpu/drm/omapdrm/displays/Kconfig index c226da145fb3..a349cb61961e 100644 --- a/drivers/gpu/drm/omapdrm/displays/Kconfig +++ b/drivers/gpu/drm/omapdrm/displays/Kconfig @@ -35,6 +35,7 @@ config DRM_OMAP_CONNECTOR_ANALOG_TV config DRM_OMAP_PANEL_DPI tristate "Generic DPI panel" + depends on BACKLIGHT_CLASS_DEVICE help Driver for generic DPI panels. From 23970e150a0a49f9a966c46e5d22fed06226098f Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Mon, 20 Nov 2017 11:51:40 +0200 Subject: [PATCH 251/333] omapdrm: hdmi4: Correct the SoC revision matching I believe the intention of the commit 2c9fc9bf45f8 ("drm: omapdrm: Move FEAT_HDMI_* features to hdmi4 driver") was to identify omap4430 ES1.x, omap4430 ES2.x and other OMAP4 revisions, like omap4460. By using family=OMAP4 in the match the code will treat omap4460 ES1.x in a same way as it would treat omap4430 ES1.x This breaks HDMI audio on OMAP4460 devices (PandaES for example). Correct the match rule so we are not going to get false positive match. Fixes: 2c9fc9bf45f8 ("drm: omapdrm: Move FEAT_HDMI_* features to hdmi4 driver") Cc: stable@vger.kernel.org # 4.14 Signed-off-by: Peter Ujfalusi Reviewed-by: Laurent Pinchart Signed-off-by: Tomi Valkeinen --- drivers/gpu/drm/omapdrm/dss/hdmi4_core.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi4_core.c b/drivers/gpu/drm/omapdrm/dss/hdmi4_core.c index 62e451162d96..b06f9956e733 100644 --- a/drivers/gpu/drm/omapdrm/dss/hdmi4_core.c +++ b/drivers/gpu/drm/omapdrm/dss/hdmi4_core.c @@ -886,25 +886,36 @@ struct hdmi4_features { bool audio_use_mclk; }; -static const struct hdmi4_features hdmi4_es1_features = { +static const struct hdmi4_features hdmi4430_es1_features = { .cts_swmode = false, .audio_use_mclk = false, }; -static const struct hdmi4_features hdmi4_es2_features = { +static const struct hdmi4_features hdmi4430_es2_features = { .cts_swmode = true, .audio_use_mclk = false, }; -static const struct hdmi4_features hdmi4_es3_features = { +static const struct hdmi4_features hdmi4_features = { .cts_swmode = true, .audio_use_mclk = true, }; static const struct soc_device_attribute hdmi4_soc_devices[] = { - { .family = "OMAP4", .revision = "ES1.?", .data = &hdmi4_es1_features }, - { .family = "OMAP4", .revision = "ES2.?", .data = &hdmi4_es2_features }, - { .family = "OMAP4", .data = &hdmi4_es3_features }, + { + .machine = "OMAP4430", + .revision = "ES1.?", + .data = &hdmi4430_es1_features, + }, + { + .machine = "OMAP4430", + .revision = "ES2.?", + .data = &hdmi4430_es2_features, + }, + { + .family = "OMAP4", + .data = &hdmi4_features, + }, { /* sentinel */ } }; From bf25dac38f71d392a31ec074f55cbc941f1eaf1d Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Thu, 16 Nov 2017 09:50:19 +0100 Subject: [PATCH 252/333] drm: omapdrm: Fix DPI on platforms using the DSI VDDS Commit d178e034d565 ("drm: omapdrm: Move FEAT_DPI_USES_VDDS_DSI feature to dpi code") replaced usage of platform data version with SoC matching to configure DPI VDDS. The SoC match entries were incorrect, they should have matched on the machine name instead of the SoC family. Fix it. The result was observed on OpenPandora with OMAP3530 where the panel only had the Blue channel and Red&Green were missing. It was not observed on GTA04 with DM3730. Fixes: d178e034d565 ("drm: omapdrm: Move FEAT_DPI_USES_VDDS_DSI feature to dpi code") Signed-off-by: Laurent Pinchart Reported-by: H. Nikolaus Schaller Tested-by: H. Nikolaus Schaller Cc: stable@vger.kernel.org # 4.14 Signed-off-by: Tomi Valkeinen --- drivers/gpu/drm/omapdrm/dss/dpi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/omapdrm/dss/dpi.c b/drivers/gpu/drm/omapdrm/dss/dpi.c index daf286fc8a40..ca1e3b489540 100644 --- a/drivers/gpu/drm/omapdrm/dss/dpi.c +++ b/drivers/gpu/drm/omapdrm/dss/dpi.c @@ -566,8 +566,8 @@ static int dpi_verify_pll(struct dss_pll *pll) } static const struct soc_device_attribute dpi_soc_devices[] = { - { .family = "OMAP3[456]*" }, - { .family = "[AD]M37*" }, + { .machine = "OMAP3[456]*" }, + { .machine = "[AD]M37*" }, { /* sentinel */ } }; From bc2aba905211a8fc6da367e04595d1624f0fa156 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 27 Oct 2017 09:27:05 +0300 Subject: [PATCH 253/333] omapdrm: hdmi4_cec: signedness bug in hdmi4_cec_init() "ret" needs to be signed for the error handling to work. Fixes: 8d7f934df8d8 ("omapdrm: hdmi4_cec: add OMAP4 HDMI CEC support") Signed-off-by: Dan Carpenter Reviewed-by: Sebastian Reichel Acked-by: Hans Verkuil Signed-off-by: Tomi Valkeinen --- drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c b/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c index d86873f2abe6..e626eddf24d5 100644 --- a/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c +++ b/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c @@ -352,7 +352,7 @@ int hdmi4_cec_init(struct platform_device *pdev, struct hdmi_core_data *core, { const u32 caps = CEC_CAP_TRANSMIT | CEC_CAP_LOG_ADDRS | CEC_CAP_PASSTHROUGH | CEC_CAP_RC; - unsigned int ret; + int ret; core->adap = cec_allocate_adapter(&hdmi_cec_adap_ops, core, "omap4", caps, CEC_MAX_LOG_ADDRS); From a154f8e399a063137fc42b961f437248d55ece29 Mon Sep 17 00:00:00 2001 From: Yan Markman Date: Thu, 30 Nov 2017 10:49:46 +0100 Subject: [PATCH 254/333] net: mvpp2: allocate zeroed tx descriptors Reserved and unused fields in the Tx descriptors should be 0. The PPv2 driver doesn't clear them at run-time (for performance reasons) but these descriptors aren't zeroed when allocated, which can lead to unpredictable behaviors. This patch fixes this by using dma_zalloc_coherent instead of dma_alloc_coherent. Fixes: 3f518509dedc ("ethernet: Add new driver for Marvell Armada 375 network unit") Signed-off-by: Yan Markman [Antoine: commit message] Signed-off-by: Antoine Tenart Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index d83a78be98a2..fed2b2f909fc 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -5598,7 +5598,7 @@ static int mvpp2_aggr_txq_init(struct platform_device *pdev, u32 txq_dma; /* Allocate memory for TX descriptors */ - aggr_txq->descs = dma_alloc_coherent(&pdev->dev, + aggr_txq->descs = dma_zalloc_coherent(&pdev->dev, MVPP2_AGGR_TXQ_SIZE * MVPP2_DESC_ALIGNED_SIZE, &aggr_txq->descs_dma, GFP_KERNEL); if (!aggr_txq->descs) From f8821f96ae97260d68228fe53f81848b2ede44d7 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 30 Nov 2017 14:33:56 +0100 Subject: [PATCH 255/333] skbuff: Grammar s/are can/can/, s/change/changes/ Signed-off-by: Geert Uytterhoeven Signed-off-by: David S. Miller --- include/linux/skbuff.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index bc486ef23f20..a38c80e9f91e 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1406,8 +1406,7 @@ static inline struct sk_buff *skb_get(struct sk_buff *skb) } /* - * If users == 1, we are the only owner and are can avoid redundant - * atomic change. + * If users == 1, we are the only owner and can avoid redundant atomic changes. */ /** From 1c08ac0c4bd8e9d66c4dde29bc496c3b430dd028 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Tue, 28 Nov 2017 17:48:22 +0100 Subject: [PATCH 256/333] net: stmmac: dwmac-sun8i: fix allwinner,leds-active-low handling The driver expect "allwinner,leds-active-low" to be in PHY node, but the binding doc expect it to be in MAC node. Since all board DT use it also in MAC node, the driver need to search allwinner,leds-active-low in MAC node. Signed-off-by: Corentin Labbe Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index e5ff734d4f9b..9eb7f65d8000 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -808,8 +808,7 @@ static int sun8i_dwmac_set_syscon(struct stmmac_priv *priv) val, reg); if (gmac->variant->soc_has_internal_phy) { - if (of_property_read_bool(priv->plat->phy_node, - "allwinner,leds-active-low")) + if (of_property_read_bool(node, "allwinner,leds-active-low")) reg |= H3_EPHY_LED_POL; else reg &= ~H3_EPHY_LED_POL; From a152992062aa3803eeabfda84b5b844721ddf6ed Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Thu, 30 Nov 2017 14:31:46 +0100 Subject: [PATCH 257/333] drm/imx: always call wait_for_flip_done in commit_tail drm_atomic_helper_wait_for_vblanks will go away in the future. The new drm_atomic_helper_setup_commit in 4.15 expects that blocking commits have completed flipping before the commit_tail returns. This must be ensured by calling wait_for_vblanks or wait_for_flip_done, where flip_done might do a less agressive wait, which is fine for imx-drm. Fixes: 080de2e5be2d (drm/atomic: Check for busy planes/connectors before setting the commit) Signed-off-by: Lucas Stach Reviewed-by: Daniel Vetter Signed-off-by: Philipp Zabel --- drivers/gpu/drm/imx/imx-drm-core.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/imx/imx-drm-core.c b/drivers/gpu/drm/imx/imx-drm-core.c index 93c7e3f9b4a8..17d2f3a1c562 100644 --- a/drivers/gpu/drm/imx/imx-drm-core.c +++ b/drivers/gpu/drm/imx/imx-drm-core.c @@ -133,9 +133,16 @@ static void imx_drm_atomic_commit_tail(struct drm_atomic_state *state) plane_disabling = true; } - if (plane_disabling) { - drm_atomic_helper_wait_for_vblanks(dev, state); + /* + * The flip done wait is only strictly required by imx-drm if a deferred + * plane disable is in-flight. As the core requires blocking commits + * to wait for the flip it is done here unconditionally. This keeps the + * workitem around a bit longer than required for the majority of + * non-blocking commits, but we accept that for the sake of simplicity. + */ + drm_atomic_helper_wait_for_flip_done(dev, state); + if (plane_disabling) { for_each_old_plane_in_state(state, plane, old_plane_state, i) ipu_plane_disable_deferred(plane); From 90a6ec85351b31449c2c6b5406b5396ac96f191d Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 29 Nov 2017 16:07:51 -0800 Subject: [PATCH 258/333] act_sample: get rid of tcf_sample_cleanup_rcu() Similar to commit d7fb60b9cafb ("net_sched: get rid of tcfa_rcu"), TC actions don't need to respect RCU grace period, because it is either just detached from tc filter (standalone case) or it is removed together with tc filter (bound case) in which case RCU grace period is already respected at filter layer. Fixes: 5c5670fae430 ("net/sched: Introduce sample tc action") Reported-by: Eric Dumazet Cc: Jamal Hadi Salim Cc: Jiri Pirko Cc: Yotam Gigi Signed-off-by: Cong Wang Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tc_act/tc_sample.h | 1 - net/sched/act_sample.c | 16 ++++------------ 2 files changed, 4 insertions(+), 13 deletions(-) diff --git a/include/net/tc_act/tc_sample.h b/include/net/tc_act/tc_sample.h index 524cee4f4c81..01dbfea32672 100644 --- a/include/net/tc_act/tc_sample.h +++ b/include/net/tc_act/tc_sample.h @@ -14,7 +14,6 @@ struct tcf_sample { struct psample_group __rcu *psample_group; u32 psample_group_num; struct list_head tcfm_list; - struct rcu_head rcu; }; #define to_sample(a) ((struct tcf_sample *)a) diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 8b5abcd2f32f..9438969290a6 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -96,21 +96,14 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, return ret; } -static void tcf_sample_cleanup_rcu(struct rcu_head *rcu) -{ - struct tcf_sample *s = container_of(rcu, struct tcf_sample, rcu); - struct psample_group *psample_group; - - psample_group = rcu_dereference_protected(s->psample_group, 1); - RCU_INIT_POINTER(s->psample_group, NULL); - psample_group_put(psample_group); -} - static void tcf_sample_cleanup(struct tc_action *a, int bind) { struct tcf_sample *s = to_sample(a); + struct psample_group *psample_group; - call_rcu(&s->rcu, tcf_sample_cleanup_rcu); + psample_group = rtnl_dereference(s->psample_group); + RCU_INIT_POINTER(s->psample_group, NULL); + psample_group_put(psample_group); } static bool tcf_sample_dev_ok_push(struct net_device *dev) @@ -264,7 +257,6 @@ static int __init sample_init_module(void) static void __exit sample_cleanup_module(void) { - rcu_barrier(); tcf_unregister_action(&act_sample_ops, &sample_net_ops); } From 3016dad75b48279e579117ee3ed566ba90a3b023 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 29 Nov 2017 17:43:57 -0800 Subject: [PATCH 259/333] tcp: remove buggy call to tcp_v6_restore_cb() tcp_v6_send_reset() expects to receive an skb with skb->cb[] layout as used in TCP stack. MD5 lookup uses tcp_v6_iif() and tcp_v6_sdif() and thus TCP_SKB_CB(skb)->header.h6 This patch probably fixes RST packets sent on behalf of a timewait md5 ipv6 socket. Before Florian patch, tcp_v6_restore_cb() was needed before jumping to no_tcp_socket label. Fixes: 271c3b9b7bda ("tcp: honour SO_BINDTODEVICE for TW_RST case too") Signed-off-by: Eric Dumazet Cc: Florian Westphal Acked-by: Florian Westphal Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 6bb98c93edfe..be11dc13aa70 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1590,7 +1590,6 @@ do_time_wait: tcp_v6_timewait_ack(sk, skb); break; case TCP_TW_RST: - tcp_v6_restore_cb(skb); tcp_v6_send_reset(sk, skb); inet_twsk_deschedule_put(inet_twsk(sk)); goto discard_it; From f859b4af1c52493ec21173ccc73d0b60029b5b88 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 30 Nov 2017 10:41:14 +0800 Subject: [PATCH 260/333] sit: update frag_off info After parsing the sit netlink change info, we forget to update frag_off in ipip6_tunnel_update(). Fix it by assigning frag_off with new value. Reported-by: Jianlin Shi Signed-off-by: Hangbin Liu Acked-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv6/sit.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index d60ddcb0bfe2..d7dc23c1b2ca 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1098,6 +1098,7 @@ static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p, ipip6_tunnel_link(sitn, t); t->parms.iph.ttl = p->iph.ttl; t->parms.iph.tos = p->iph.tos; + t->parms.iph.frag_off = p->iph.frag_off; if (t->parms.link != p->link || t->fwmark != fwmark) { t->parms.link = p->link; t->fwmark = fwmark; From 22a6c83777ac7c17d6c63891beeeac24cf5da450 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 27 Nov 2017 09:50:17 -0800 Subject: [PATCH 261/333] xfs: ubsan fixes Fix some complaints from the UBSAN about signed integer addition overflows. Signed-off-by: Darrick J. Wong Reviewed-by: Brian Foster --- fs/xfs/xfs_aops.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index a3eeaba156c5..b0cccf8a81a8 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -399,7 +399,7 @@ xfs_map_blocks( (ip->i_df.if_flags & XFS_IFEXTENTS)); ASSERT(offset <= mp->m_super->s_maxbytes); - if (offset + count > mp->m_super->s_maxbytes) + if ((xfs_ufsize_t)offset + count > mp->m_super->s_maxbytes) count = mp->m_super->s_maxbytes - offset; end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); offset_fsb = XFS_B_TO_FSBT(mp, offset); @@ -1265,7 +1265,7 @@ xfs_map_trim_size( if (mapping_size > size) mapping_size = size; if (offset < i_size_read(inode) && - offset + mapping_size >= i_size_read(inode)) { + (xfs_ufsize_t)offset + mapping_size >= i_size_read(inode)) { /* limit mapping to block that spans EOF */ mapping_size = roundup_64(i_size_read(inode) - offset, i_blocksize(inode)); @@ -1312,7 +1312,7 @@ xfs_get_blocks( lockmode = xfs_ilock_data_map_shared(ip); ASSERT(offset <= mp->m_super->s_maxbytes); - if (offset + size > mp->m_super->s_maxbytes) + if ((xfs_ufsize_t)offset + size > mp->m_super->s_maxbytes) size = mp->m_super->s_maxbytes - offset; end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size); offset_fsb = XFS_B_TO_FSBT(mp, offset); From 2d5f4b5bebccfe983715ebc9255151e611234643 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 27 Nov 2017 09:50:22 -0800 Subject: [PATCH 262/333] xfs: remove unused parameter from xfs_writepage_map The first thing that xfs_writepage_map does is clobber the offset parameter. Since we never use the passed-in value, turn the parameter into a local variable. This gets rid of an UBSAN warning in generic/466. Signed-off-by: Darrick J. Wong Reviewed-by: Brian Foster --- fs/xfs/xfs_aops.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index b0cccf8a81a8..21e2d70884e1 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -896,13 +896,13 @@ xfs_writepage_map( struct writeback_control *wbc, struct inode *inode, struct page *page, - loff_t offset, - uint64_t end_offset) + uint64_t end_offset) { LIST_HEAD(submit_list); struct xfs_ioend *ioend, *next; struct buffer_head *bh, *head; ssize_t len = i_blocksize(inode); + uint64_t offset; int error = 0; int count = 0; int uptodate = 1; @@ -1146,7 +1146,7 @@ xfs_do_writepage( end_offset = offset; } - return xfs_writepage_map(wpc, wbc, inode, page, offset, end_offset); + return xfs_writepage_map(wpc, wbc, inode, page, end_offset); redirty: redirty_page_for_writepage(wbc, page); From 3b42d385753c22b29d259ccb9d4c3f419e583b30 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 27 Nov 2017 21:40:19 -0800 Subject: [PATCH 263/333] xfs: scrub inode mode properly Since we've used up all the bits in i_mode, the existing mode check doesn't actually do anything useful. However, we've not used all the bit values in the format portion of i_mode, so we /do/ need to test that for bad values. Fixes: 80e4e1268 ("xfs: scrub inodes") Fixes-coverity-id: 1423992 Signed-off-by: Darrick J. Wong Reviewed-by: Brian Foster --- fs/xfs/scrub/inode.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c index 637b7a892313..f120fb20452f 100644 --- a/fs/xfs/scrub/inode.c +++ b/fs/xfs/scrub/inode.c @@ -318,8 +318,20 @@ xfs_scrub_dinode( /* di_mode */ mode = be16_to_cpu(dip->di_mode); - if (mode & ~(S_IALLUGO | S_IFMT)) + switch (mode & S_IFMT) { + case S_IFLNK: + case S_IFREG: + case S_IFDIR: + case S_IFCHR: + case S_IFBLK: + case S_IFIFO: + case S_IFSOCK: + /* mode is recognized */ + break; + default: xfs_scrub_ino_set_corrupt(sc, ino, bp); + break; + } /* v1/v2 fields */ switch (dip->di_version) { From 373b0589dc8d58bc09c9a28d03611ae4fb216057 Mon Sep 17 00:00:00 2001 From: Carlos Maiolino Date: Tue, 28 Nov 2017 08:54:10 -0800 Subject: [PATCH 264/333] xfs: Properly retry failed dquot items in case of error during buffer writeback Once the inode item writeback errors is already fixed, it's time to fix the same problem in dquot code. Although there were no reports of users hitting this bug in dquot code (at least none I've seen), the bug is there and I was already planning to fix it when the correct approach to fix the inodes part was decided. This patch aims to fix the same problem in dquot code, regarding failed buffers being unable to be resubmitted once they are flush locked. Tested with the recently test-case sent to fstests list by Hou Tao. Reviewed-by: Brian Foster Signed-off-by: Carlos Maiolino Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_dquot.c | 14 +++++++++++--- fs/xfs/xfs_dquot_item.c | 40 ++++++++++++++++++++++++++++++++++++++-- 2 files changed, 49 insertions(+), 5 deletions(-) diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index d57c2db64e59..f248708c10ff 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -970,14 +970,22 @@ xfs_qm_dqflush_done( * holding the lock before removing the dquot from the AIL. */ if ((lip->li_flags & XFS_LI_IN_AIL) && - lip->li_lsn == qip->qli_flush_lsn) { + ((lip->li_lsn == qip->qli_flush_lsn) || + (lip->li_flags & XFS_LI_FAILED))) { /* xfs_trans_ail_delete() drops the AIL lock. */ spin_lock(&ailp->xa_lock); - if (lip->li_lsn == qip->qli_flush_lsn) + if (lip->li_lsn == qip->qli_flush_lsn) { xfs_trans_ail_delete(ailp, lip, SHUTDOWN_CORRUPT_INCORE); - else + } else { + /* + * Clear the failed state since we are about to drop the + * flush lock + */ + if (lip->li_flags & XFS_LI_FAILED) + xfs_clear_li_failed(lip); spin_unlock(&ailp->xa_lock); + } } /* diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c index 2c7a1629e064..664dea105e76 100644 --- a/fs/xfs/xfs_dquot_item.c +++ b/fs/xfs/xfs_dquot_item.c @@ -137,6 +137,26 @@ xfs_qm_dqunpin_wait( wait_event(dqp->q_pinwait, (atomic_read(&dqp->q_pincount) == 0)); } +/* + * Callback used to mark a buffer with XFS_LI_FAILED when items in the buffer + * have been failed during writeback + * + * this informs the AIL that the dquot is already flush locked on the next push, + * and acquires a hold on the buffer to ensure that it isn't reclaimed before + * dirty data makes it to disk. + */ +STATIC void +xfs_dquot_item_error( + struct xfs_log_item *lip, + struct xfs_buf *bp) +{ + struct xfs_dquot *dqp; + + dqp = DQUOT_ITEM(lip)->qli_dquot; + ASSERT(!completion_done(&dqp->q_flush)); + xfs_set_li_failed(lip, bp); +} + STATIC uint xfs_qm_dquot_logitem_push( struct xfs_log_item *lip, @@ -144,13 +164,28 @@ xfs_qm_dquot_logitem_push( __acquires(&lip->li_ailp->xa_lock) { struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot; - struct xfs_buf *bp = NULL; + struct xfs_buf *bp = lip->li_buf; uint rval = XFS_ITEM_SUCCESS; int error; if (atomic_read(&dqp->q_pincount) > 0) return XFS_ITEM_PINNED; + /* + * The buffer containing this item failed to be written back + * previously. Resubmit the buffer for IO + */ + if (lip->li_flags & XFS_LI_FAILED) { + if (!xfs_buf_trylock(bp)) + return XFS_ITEM_LOCKED; + + if (!xfs_buf_resubmit_failed_buffers(bp, lip, buffer_list)) + rval = XFS_ITEM_FLUSHING; + + xfs_buf_unlock(bp); + return rval; + } + if (!xfs_dqlock_nowait(dqp)) return XFS_ITEM_LOCKED; @@ -242,7 +277,8 @@ static const struct xfs_item_ops xfs_dquot_item_ops = { .iop_unlock = xfs_qm_dquot_logitem_unlock, .iop_committed = xfs_qm_dquot_logitem_committed, .iop_push = xfs_qm_dquot_logitem_push, - .iop_committing = xfs_qm_dquot_logitem_committing + .iop_committing = xfs_qm_dquot_logitem_committing, + .iop_error = xfs_dquot_item_error }; /* From eb5b46faa693470681ec7c28cc2436edd1571198 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 30 Nov 2017 07:21:33 -0500 Subject: [PATCH 265/333] SUNRPC: Handle ENETDOWN errors Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- net/sunrpc/clnt.c | 5 +++++ net/sunrpc/xprtsock.c | 1 + 2 files changed, 6 insertions(+) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index a801da812f86..e2a4184f3c5d 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1841,6 +1841,7 @@ call_bind_status(struct rpc_task *task) case -ECONNABORTED: case -ENOTCONN: case -EHOSTDOWN: + case -ENETDOWN: case -EHOSTUNREACH: case -ENETUNREACH: case -ENOBUFS: @@ -1917,6 +1918,7 @@ call_connect_status(struct rpc_task *task) /* fall through */ case -ECONNRESET: case -ECONNABORTED: + case -ENETDOWN: case -ENETUNREACH: case -EHOSTUNREACH: case -EADDRINUSE: @@ -2022,6 +2024,7 @@ call_transmit_status(struct rpc_task *task) */ case -ECONNREFUSED: case -EHOSTDOWN: + case -ENETDOWN: case -EHOSTUNREACH: case -ENETUNREACH: case -EPERM: @@ -2071,6 +2074,7 @@ call_bc_transmit(struct rpc_task *task) switch (task->tk_status) { case 0: /* Success */ + case -ENETDOWN: case -EHOSTDOWN: case -EHOSTUNREACH: case -ENETUNREACH: @@ -2139,6 +2143,7 @@ call_status(struct rpc_task *task) task->tk_status = 0; switch(status) { case -EHOSTDOWN: + case -ENETDOWN: case -EHOSTUNREACH: case -ENETUNREACH: case -EPERM: diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index c2b2d489b57b..7fa94abfd6b2 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2439,6 +2439,7 @@ static void xs_tcp_setup_socket(struct work_struct *work) */ case -ECONNREFUSED: case -ECONNRESET: + case -ENETDOWN: case -ENETUNREACH: case -EHOSTUNREACH: case -EADDRINUSE: From 5ddf755e4439833847a21bd5e2dc82d686679911 Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Wed, 29 Nov 2017 17:55:12 -0800 Subject: [PATCH 266/333] RISC-V: use generic serial.h Fixes this from allmodconfig: drivers/tty/serial/earlycon.c:27:10: fatal error: asm/serial.h: No such file or directory Signed-off-by: Olof Johansson Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/Kbuild | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild index 18158be62a2b..970460a0b492 100644 --- a/arch/riscv/include/asm/Kbuild +++ b/arch/riscv/include/asm/Kbuild @@ -40,6 +40,7 @@ generic-y += resource.h generic-y += scatterlist.h generic-y += sections.h generic-y += sembuf.h +generic-y += serial.h generic-y += setup.h generic-y += shmbuf.h generic-y += shmparam.h From 5e6f82b0fe7b7b4a204efeb0817fb8b0a2bc0373 Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Wed, 29 Nov 2017 17:55:13 -0800 Subject: [PATCH 267/333] RISC-V: use RISCV_{INT,SHORT} instead of {INT,SHORT} for asm macros INT and SHORT are used by some drivers that pull in the include files, so prefixing helps avoid namespace conflicts. Other constructs in the same file already uses this. Fixes, among others, these warnings with allmodconfig: ../sound/core/pcm_misc.c:43:0: warning: "INT" redefined #define INT __force int Signed-off-by: Olof Johansson Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/asm.h | 12 ++++++------ arch/riscv/include/asm/bug.h | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/riscv/include/asm/asm.h b/arch/riscv/include/asm/asm.h index 6cbbb6a68d76..5ad4cb622bed 100644 --- a/arch/riscv/include/asm/asm.h +++ b/arch/riscv/include/asm/asm.h @@ -58,17 +58,17 @@ #endif #if (__SIZEOF_INT__ == 4) -#define INT __ASM_STR(.word) -#define SZINT __ASM_STR(4) -#define LGINT __ASM_STR(2) +#define RISCV_INT __ASM_STR(.word) +#define RISCV_SZINT __ASM_STR(4) +#define RISCV_LGINT __ASM_STR(2) #else #error "Unexpected __SIZEOF_INT__" #endif #if (__SIZEOF_SHORT__ == 2) -#define SHORT __ASM_STR(.half) -#define SZSHORT __ASM_STR(2) -#define LGSHORT __ASM_STR(1) +#define RISCV_SHORT __ASM_STR(.half) +#define RISCV_SZSHORT __ASM_STR(2) +#define RISCV_LGSHORT __ASM_STR(1) #else #error "Unexpected __SIZEOF_SHORT__" #endif diff --git a/arch/riscv/include/asm/bug.h b/arch/riscv/include/asm/bug.h index c3e13764a943..bfc7f099ab1f 100644 --- a/arch/riscv/include/asm/bug.h +++ b/arch/riscv/include/asm/bug.h @@ -27,8 +27,8 @@ typedef u32 bug_insn_t; #ifdef CONFIG_GENERIC_BUG_RELATIVE_POINTERS -#define __BUG_ENTRY_ADDR INT " 1b - 2b" -#define __BUG_ENTRY_FILE INT " %0 - 2b" +#define __BUG_ENTRY_ADDR RISCV_INT " 1b - 2b" +#define __BUG_ENTRY_FILE RISCV_INT " %0 - 2b" #else #define __BUG_ENTRY_ADDR RISCV_PTR " 1b" #define __BUG_ENTRY_FILE RISCV_PTR " %0" @@ -38,7 +38,7 @@ typedef u32 bug_insn_t; #define __BUG_ENTRY \ __BUG_ENTRY_ADDR "\n\t" \ __BUG_ENTRY_FILE "\n\t" \ - SHORT " %1" + RISCV_SHORT " %1" #else #define __BUG_ENTRY \ __BUG_ENTRY_ADDR From fe2726af9fdc93ecf2469b7c512fc1a8936e128c Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Wed, 29 Nov 2017 17:55:14 -0800 Subject: [PATCH 268/333] RISC-V: io.h: type fixes for warnings include for __iomem definition. Also, add volatile to iounmap() like other architectures have it to avoid "discarding volatile" warnings from some drivers. Finally, explicitly promote the base address for INB/OUTB functions to avoid some old legacy drivers complaining about int-to-ptr promotions. The drivers are unlikely to work but they're included in allmodconfig so the warnings are noisy. Fixes, among other warnings, these with allmodconfig: ../arch/riscv/include/asm/io.h:24:21: error: expected '=', ',', ';', 'asm' or '__attribute__' before '*' token extern void __iomem *ioremap(phys_addr_t offset, unsigned long size); sound/pci/echoaudio/echoaudio.c: In function 'snd_echo_free': sound/pci/echoaudio/echoaudio.c:1879:10: warning: passing argument 1 of 'iounmap' discards 'volatile' qualifier from pointer target type [-Wdiscarded-qualifiers] Signed-off-by: Olof Johansson Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/io.h | 16 +++++++++------- arch/riscv/mm/ioremap.c | 2 +- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/arch/riscv/include/asm/io.h b/arch/riscv/include/asm/io.h index c1f32cfcc79b..2a5c7c371e2d 100644 --- a/arch/riscv/include/asm/io.h +++ b/arch/riscv/include/asm/io.h @@ -19,6 +19,8 @@ #ifndef _ASM_RISCV_IO_H #define _ASM_RISCV_IO_H +#include + #ifdef CONFIG_MMU extern void __iomem *ioremap(phys_addr_t offset, unsigned long size); @@ -32,7 +34,7 @@ extern void __iomem *ioremap(phys_addr_t offset, unsigned long size); #define ioremap_wc(addr, size) ioremap((addr), (size)) #define ioremap_wt(addr, size) ioremap((addr), (size)) -extern void iounmap(void __iomem *addr); +extern void iounmap(volatile void __iomem *addr); #endif /* CONFIG_MMU */ @@ -266,9 +268,9 @@ __io_reads_ins(reads, u32, l, __io_br(), __io_ar()) __io_reads_ins(ins, u8, b, __io_pbr(), __io_par()) __io_reads_ins(ins, u16, w, __io_pbr(), __io_par()) __io_reads_ins(ins, u32, l, __io_pbr(), __io_par()) -#define insb(addr, buffer, count) __insb((void __iomem *)addr, buffer, count) -#define insw(addr, buffer, count) __insw((void __iomem *)addr, buffer, count) -#define insl(addr, buffer, count) __insl((void __iomem *)addr, buffer, count) +#define insb(addr, buffer, count) __insb((void __iomem *)(long)addr, buffer, count) +#define insw(addr, buffer, count) __insw((void __iomem *)(long)addr, buffer, count) +#define insl(addr, buffer, count) __insl((void __iomem *)(long)addr, buffer, count) __io_writes_outs(writes, u8, b, __io_bw(), __io_aw()) __io_writes_outs(writes, u16, w, __io_bw(), __io_aw()) @@ -280,9 +282,9 @@ __io_writes_outs(writes, u32, l, __io_bw(), __io_aw()) __io_writes_outs(outs, u8, b, __io_pbw(), __io_paw()) __io_writes_outs(outs, u16, w, __io_pbw(), __io_paw()) __io_writes_outs(outs, u32, l, __io_pbw(), __io_paw()) -#define outsb(addr, buffer, count) __outsb((void __iomem *)addr, buffer, count) -#define outsw(addr, buffer, count) __outsw((void __iomem *)addr, buffer, count) -#define outsl(addr, buffer, count) __outsl((void __iomem *)addr, buffer, count) +#define outsb(addr, buffer, count) __outsb((void __iomem *)(long)addr, buffer, count) +#define outsw(addr, buffer, count) __outsw((void __iomem *)(long)addr, buffer, count) +#define outsl(addr, buffer, count) __outsl((void __iomem *)(long)addr, buffer, count) #ifdef CONFIG_64BIT __io_reads_ins(reads, u64, q, __io_br(), __io_ar()) diff --git a/arch/riscv/mm/ioremap.c b/arch/riscv/mm/ioremap.c index e99194a4077e..70ef2724cdf6 100644 --- a/arch/riscv/mm/ioremap.c +++ b/arch/riscv/mm/ioremap.c @@ -85,7 +85,7 @@ EXPORT_SYMBOL(ioremap); * * Caller must ensure there is only one unmapping for the same pointer. */ -void iounmap(void __iomem *addr) +void iounmap(volatile void __iomem *addr) { vunmap((void *)((unsigned long)addr & PAGE_MASK)); } From 83e7b8769a08987f47117f3a065de153c839a0a8 Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Wed, 29 Nov 2017 17:55:15 -0800 Subject: [PATCH 269/333] RISC-V: move empty_zero_page definition to C and export it Needed by some modules (exported by other architectures). Signed-off-by: Olof Johansson Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/head.S | 3 --- arch/riscv/kernel/setup.c | 3 +++ 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index 76af908f87c1..78f670d70133 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -152,6 +152,3 @@ END(_start) __PAGE_ALIGNED_BSS /* Empty zero page */ .balign PAGE_SIZE -ENTRY(empty_zero_page) - .fill (empty_zero_page + PAGE_SIZE) - ., 1, 0x00 -END(empty_zero_page) diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index de7db114c315..4375375ad168 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -60,6 +60,9 @@ static char __initdata builtin_cmdline[COMMAND_LINE_SIZE] = CONFIG_CMDLINE; unsigned long va_pa_offset; unsigned long pfn_base; +unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss; +EXPORT_SYMBOL(empty_zero_page); + /* The lucky hart to first increment this variable will boot the other cores */ atomic_t hart_lottery; From 24948b7ec0f31d36dc900088b140c4f9551b6f56 Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Wed, 29 Nov 2017 17:55:16 -0800 Subject: [PATCH 270/333] RISC-V: Export some expected symbols for modules These are the ones needed by current allmodconfig, so add them instead of everything other architectures are exporting -- the rest can be added on demand later if needed. Signed-off-by: Olof Johansson Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/riscv_ksyms.c | 3 +++ arch/riscv/kernel/setup.c | 2 ++ arch/riscv/lib/delay.c | 1 + 3 files changed, 6 insertions(+) diff --git a/arch/riscv/kernel/riscv_ksyms.c b/arch/riscv/kernel/riscv_ksyms.c index 23cc81ec9e94..551734248748 100644 --- a/arch/riscv/kernel/riscv_ksyms.c +++ b/arch/riscv/kernel/riscv_ksyms.c @@ -12,4 +12,7 @@ /* * Assembly functions that may be used (directly or indirectly) by modules */ +EXPORT_SYMBOL(__clear_user); EXPORT_SYMBOL(__copy_user); +EXPORT_SYMBOL(memset); +EXPORT_SYMBOL(memcpy); diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 4375375ad168..8fbb6749910d 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -58,7 +58,9 @@ static char __initdata builtin_cmdline[COMMAND_LINE_SIZE] = CONFIG_CMDLINE; #endif /* CONFIG_CMDLINE_BOOL */ unsigned long va_pa_offset; +EXPORT_SYMBOL(va_pa_offset); unsigned long pfn_base; +EXPORT_SYMBOL(pfn_base); unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss; EXPORT_SYMBOL(empty_zero_page); diff --git a/arch/riscv/lib/delay.c b/arch/riscv/lib/delay.c index 1cc4ac3964b4..dce8ae24c6d3 100644 --- a/arch/riscv/lib/delay.c +++ b/arch/riscv/lib/delay.c @@ -84,6 +84,7 @@ void __delay(unsigned long cycles) while ((unsigned long)(get_cycles() - t0) < cycles) cpu_relax(); } +EXPORT_SYMBOL(__delay); void udelay(unsigned long usecs) { From 4bde63286a6c7c76fe05ff0e03ad253f5260b104 Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Wed, 29 Nov 2017 17:55:17 -0800 Subject: [PATCH 271/333] RISC-V: Provide stub of setup_profiling_timer() Fixes the following on allmodconfig build: profile.c:(.text+0x3e4): undefined reference to `setup_profiling_timer' Signed-off-by: Olof Johansson Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/smp.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c index b4a71ec5906f..17014c68e9fe 100644 --- a/arch/riscv/kernel/smp.c +++ b/arch/riscv/kernel/smp.c @@ -38,6 +38,13 @@ enum ipi_message_type { IPI_MAX }; + +/* Unsupported */ +int setup_profiling_timer(unsigned int multiplier) +{ + return -EINVAL; +} + irqreturn_t handle_ipi(void) { unsigned long *pending_ipis = &ipi_data[smp_processor_id()].bits; From 4a41d5dbb0bbd0c3faffb2ccd8ef1a7aeb12f978 Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Wed, 29 Nov 2017 17:55:18 -0800 Subject: [PATCH 272/333] RISC-V: Use define for get_cycles like other architectures Signed-off-by: Olof Johansson Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/timex.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/timex.h b/arch/riscv/include/asm/timex.h index 3df4932d8964..2f26989cb864 100644 --- a/arch/riscv/include/asm/timex.h +++ b/arch/riscv/include/asm/timex.h @@ -18,7 +18,7 @@ typedef unsigned long cycles_t; -static inline cycles_t get_cycles(void) +static inline cycles_t get_cycles_inline(void) { cycles_t n; @@ -27,6 +27,7 @@ static inline cycles_t get_cycles(void) : "=r" (n)); return n; } +#define get_cycles get_cycles_inline #ifdef CONFIG_64BIT static inline uint64_t get_cycles64(void) From 741fc3ff3a49509a7092d6d9eb51da6bd7577278 Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Wed, 29 Nov 2017 17:55:20 -0800 Subject: [PATCH 273/333] RISC-V: Add missing include Fixes: include/asm-generic/mm_hooks.h:20:11: warning: 'struct vm_area_struct' declared inside parameter list will not be visible outside of this definition or declaration include/asm-generic/mm_hooks.h:19:38: warning: 'struct mm_struct' declared inside parameter list will not be visible outside of this definition or declaration Signed-off-by: Olof Johansson Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/mmu_context.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/include/asm/mmu_context.h b/arch/riscv/include/asm/mmu_context.h index de1fc1631fc4..1cd5172882c2 100644 --- a/arch/riscv/include/asm/mmu_context.h +++ b/arch/riscv/include/asm/mmu_context.h @@ -14,6 +14,7 @@ #ifndef _ASM_RISCV_MMU_CONTEXT_H #define _ASM_RISCV_MMU_CONTEXT_H +#include #include #include From 2b279419567105d63f1e524bb1ac34ae8f918e5d Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 30 Nov 2017 08:52:42 -0800 Subject: [PATCH 274/333] tools/bpf: adjust rlimit RLIMIT_MEMLOCK for test_verifier_log The default rlimit RLIMIT_MEMLOCK is 64KB. In certain cases, e.g. in a test machine mimicking our production system, this test may fail due to unable to charge the required memory for prog load: # ./test_verifier_log Test log_level 0... ERROR: Program load returned: ret:-1/errno:1, expected ret:-1/errno:22 Changing the default rlimit RLIMIT_MEMLOCK to unlimited makes the test always pass. Signed-off-by: Yonghong Song Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_verifier_log.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/testing/selftests/bpf/test_verifier_log.c b/tools/testing/selftests/bpf/test_verifier_log.c index 3cc0b561489e..e9626cf5607a 100644 --- a/tools/testing/selftests/bpf/test_verifier_log.c +++ b/tools/testing/selftests/bpf/test_verifier_log.c @@ -3,6 +3,8 @@ #include #include #include +#include +#include #include #include @@ -131,11 +133,16 @@ static void test_log_bad(char *log, size_t log_len, int log_level) int main(int argc, char **argv) { + struct rlimit limit = { RLIM_INFINITY, RLIM_INFINITY }; char full_log[LOG_SIZE]; char log[LOG_SIZE]; size_t want_len; int i; + /* allow unlimited locked memory to have more consistent error code */ + if (setrlimit(RLIMIT_MEMLOCK, &limit) < 0) + perror("Unable to lift memlock rlimit"); + memset(log, 1, LOG_SIZE); /* Test incorrect attr */ From 6fef90c6b3f6a2b52018e66c0886944ea0c03fcc Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 30 Nov 2017 10:45:26 -0800 Subject: [PATCH 275/333] net: dsa: bcm_sf2: Set correct CHAIN_ID and slice number mask When configuring an IPv6 address mask, we should use SLICE_NUM_MASK as the mask in order to make sure all bits are masked by the hardware. Also, we want matching entries to have a CHAIN_ID value set to the same value as the rule index we return to user-space for convenience, so fix that too. Fixes: ba0696c22e7c ("net: dsa: bcm_sf2: Add support for IPv6 CFP rules") Fixes: dd8eff68343d ("net: dsa: bcm_sf2: Allow matching arbitrary IPv6 masks/lengths") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/bcm_sf2_cfp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/bcm_sf2_cfp.c b/drivers/net/dsa/bcm_sf2_cfp.c index b721a2009b50..23b45da784cb 100644 --- a/drivers/net/dsa/bcm_sf2_cfp.c +++ b/drivers/net/dsa/bcm_sf2_cfp.c @@ -625,7 +625,7 @@ static int bcm_sf2_cfp_ipv6_rule_set(struct bcm_sf2_priv *priv, int port, bcm_sf2_cfp_slice_ipv6(priv, v6_spec->ip6src, v6_spec->psrc, slice_num, false); bcm_sf2_cfp_slice_ipv6(priv, v6_m_spec->ip6src, v6_m_spec->psrc, - slice_num, true); + SLICE_NUM_MASK, true); /* Insert into TCAM now because we need to insert a second rule */ bcm_sf2_cfp_rule_addr_set(priv, rule_index[0]); @@ -699,7 +699,7 @@ static int bcm_sf2_cfp_ipv6_rule_set(struct bcm_sf2_priv *priv, int port, /* Insert into Action and policer RAMs now, set chain ID to * the one we are chained to */ - ret = bcm_sf2_cfp_act_pol_set(priv, rule_index[0], port_num, + ret = bcm_sf2_cfp_act_pol_set(priv, rule_index[1], port_num, queue_num, true); if (ret) goto out_err; From 08f051eda33b51e8ee0f45f05bcfe49d0f0caf6b Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 25 Oct 2017 14:30:32 -0700 Subject: [PATCH 276/333] RISC-V: Flush I$ when making a dirty page executable The RISC-V ISA allows for instruction caches that are not coherent WRT stores, even on a single hart. As a result, we need to explicitly flush the instruction cache whenever marking a dirty page as executable in order to preserve the correct system behavior. Local instruction caches aren't that scary (our implementations actually flush the cache, but RISC-V is defined to allow higher-performance implementations to exist), but RISC-V defines no way to perform an instruction cache shootdown. When explicitly asked to do so we can shoot down remote instruction caches via an IPI, but this is a bit on the slow side. Instead of requiring an IPI to all harts whenever marking a page as executable, we simply flush the currently running harts. In order to maintain correct behavior, we additionally mark every other hart as needing a deferred instruction cache which will be taken before anything runs on it. Signed-off-by: Andrew Waterman Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/cacheflush.h | 24 ++++++++++-- arch/riscv/include/asm/mmu.h | 4 ++ arch/riscv/include/asm/mmu_context.h | 44 +++++++++++++++++++++ arch/riscv/include/asm/pgtable.h | 58 +++++++++++++++------------- arch/riscv/include/asm/tlbflush.h | 2 + arch/riscv/kernel/smp.c | 48 +++++++++++++++++++++++ arch/riscv/mm/Makefile | 1 + arch/riscv/mm/cacheflush.c | 23 +++++++++++ 8 files changed, 174 insertions(+), 30 deletions(-) create mode 100644 arch/riscv/mm/cacheflush.c diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h index 0595585013b0..5c9ed39ee2a2 100644 --- a/arch/riscv/include/asm/cacheflush.h +++ b/arch/riscv/include/asm/cacheflush.h @@ -18,21 +18,37 @@ #undef flush_icache_range #undef flush_icache_user_range +#undef flush_dcache_page static inline void local_flush_icache_all(void) { asm volatile ("fence.i" ::: "memory"); } +#define PG_dcache_clean PG_arch_1 + +static inline void flush_dcache_page(struct page *page) +{ + if (test_bit(PG_dcache_clean, &page->flags)) + clear_bit(PG_dcache_clean, &page->flags); +} + +/* + * RISC-V doesn't have an instruction to flush parts of the instruction cache, + * so instead we just flush the whole thing. + */ +#define flush_icache_range(start, end) flush_icache_all() +#define flush_icache_user_range(vma, pg, addr, len) flush_icache_all() + #ifndef CONFIG_SMP -#define flush_icache_range(start, end) local_flush_icache_all() -#define flush_icache_user_range(vma, pg, addr, len) local_flush_icache_all() +#define flush_icache_all() local_flush_icache_all() +#define flush_icache_mm(mm, local) flush_icache_all() #else /* CONFIG_SMP */ -#define flush_icache_range(start, end) sbi_remote_fence_i(0) -#define flush_icache_user_range(vma, pg, addr, len) sbi_remote_fence_i(0) +#define flush_icache_all() sbi_remote_fence_i(0) +void flush_icache_mm(struct mm_struct *mm, bool local); #endif /* CONFIG_SMP */ diff --git a/arch/riscv/include/asm/mmu.h b/arch/riscv/include/asm/mmu.h index 66805cba9a27..5df2dccdba12 100644 --- a/arch/riscv/include/asm/mmu.h +++ b/arch/riscv/include/asm/mmu.h @@ -19,6 +19,10 @@ typedef struct { void *vdso; +#ifdef CONFIG_SMP + /* A local icache flush is needed before user execution can resume. */ + cpumask_t icache_stale_mask; +#endif } mm_context_t; #endif /* __ASSEMBLY__ */ diff --git a/arch/riscv/include/asm/mmu_context.h b/arch/riscv/include/asm/mmu_context.h index de1fc1631fc4..b15b169e3d22 100644 --- a/arch/riscv/include/asm/mmu_context.h +++ b/arch/riscv/include/asm/mmu_context.h @@ -1,5 +1,6 @@ /* * Copyright (C) 2012 Regents of the University of California + * Copyright (C) 2017 SiFive * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -19,6 +20,7 @@ #include #include #include +#include static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *task) @@ -46,12 +48,54 @@ static inline void set_pgdir(pgd_t *pgd) csr_write(sptbr, virt_to_pfn(pgd) | SPTBR_MODE); } +/* + * When necessary, performs a deferred icache flush for the given MM context, + * on the local CPU. RISC-V has no direct mechanism for instruction cache + * shoot downs, so instead we send an IPI that informs the remote harts they + * need to flush their local instruction caches. To avoid pathologically slow + * behavior in a common case (a bunch of single-hart processes on a many-hart + * machine, ie 'make -j') we avoid the IPIs for harts that are not currently + * executing a MM context and instead schedule a deferred local instruction + * cache flush to be performed before execution resumes on each hart. This + * actually performs that local instruction cache flush, which implicitly only + * refers to the current hart. + */ +static inline void flush_icache_deferred(struct mm_struct *mm) +{ +#ifdef CONFIG_SMP + unsigned int cpu = smp_processor_id(); + cpumask_t *mask = &mm->context.icache_stale_mask; + + if (cpumask_test_cpu(cpu, mask)) { + cpumask_clear_cpu(cpu, mask); + /* + * Ensure the remote hart's writes are visible to this hart. + * This pairs with a barrier in flush_icache_mm. + */ + smp_mb(); + local_flush_icache_all(); + } +#endif +} + static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *task) { if (likely(prev != next)) { + /* + * Mark the current MM context as inactive, and the next as + * active. This is at least used by the icache flushing + * routines in order to determine who should + */ + unsigned int cpu = smp_processor_id(); + + cpumask_clear_cpu(cpu, mm_cpumask(prev)); + cpumask_set_cpu(cpu, mm_cpumask(next)); + set_pgdir(next->pgd); local_flush_tlb_all(); + + flush_icache_deferred(next); } } diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 3399257780b2..2cbd92ed1629 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -178,28 +178,6 @@ static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long addr) #define pte_offset_map(dir, addr) pte_offset_kernel((dir), (addr)) #define pte_unmap(pte) ((void)(pte)) -/* - * Certain architectures need to do special things when PTEs within - * a page table are directly modified. Thus, the following hook is - * made available. - */ -static inline void set_pte(pte_t *ptep, pte_t pteval) -{ - *ptep = pteval; -} - -static inline void set_pte_at(struct mm_struct *mm, - unsigned long addr, pte_t *ptep, pte_t pteval) -{ - set_pte(ptep, pteval); -} - -static inline void pte_clear(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) -{ - set_pte_at(mm, addr, ptep, __pte(0)); -} - static inline int pte_present(pte_t pte) { return (pte_val(pte) & _PAGE_PRESENT); @@ -210,21 +188,22 @@ static inline int pte_none(pte_t pte) return (pte_val(pte) == 0); } -/* static inline int pte_read(pte_t pte) */ - static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITE; } +static inline int pte_exec(pte_t pte) +{ + return pte_val(pte) & _PAGE_EXEC; +} + static inline int pte_huge(pte_t pte) { return pte_present(pte) && (pte_val(pte) & (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC)); } -/* static inline int pte_exec(pte_t pte) */ - static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; @@ -311,6 +290,33 @@ static inline int pte_same(pte_t pte_a, pte_t pte_b) return pte_val(pte_a) == pte_val(pte_b); } +/* + * Certain architectures need to do special things when PTEs within + * a page table are directly modified. Thus, the following hook is + * made available. + */ +static inline void set_pte(pte_t *ptep, pte_t pteval) +{ + *ptep = pteval; +} + +void flush_icache_pte(pte_t pte); + +static inline void set_pte_at(struct mm_struct *mm, + unsigned long addr, pte_t *ptep, pte_t pteval) +{ + if (pte_present(pteval) && pte_exec(pteval)) + flush_icache_pte(pteval); + + set_pte(ptep, pteval); +} + +static inline void pte_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + set_pte_at(mm, addr, ptep, __pte(0)); +} + #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS static inline int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, pte_t *ptep, diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h index 5ee4ae370b5e..77edf2826c1f 100644 --- a/arch/riscv/include/asm/tlbflush.h +++ b/arch/riscv/include/asm/tlbflush.h @@ -17,6 +17,8 @@ #ifdef CONFIG_MMU +#include + /* Flush entire local TLB */ static inline void local_flush_tlb_all(void) { diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c index b4a71ec5906f..1b27ade437b4 100644 --- a/arch/riscv/kernel/smp.c +++ b/arch/riscv/kernel/smp.c @@ -108,3 +108,51 @@ void smp_send_reschedule(int cpu) { send_ipi_message(cpumask_of(cpu), IPI_RESCHEDULE); } + +/* + * Performs an icache flush for the given MM context. RISC-V has no direct + * mechanism for instruction cache shoot downs, so instead we send an IPI that + * informs the remote harts they need to flush their local instruction caches. + * To avoid pathologically slow behavior in a common case (a bunch of + * single-hart processes on a many-hart machine, ie 'make -j') we avoid the + * IPIs for harts that are not currently executing a MM context and instead + * schedule a deferred local instruction cache flush to be performed before + * execution resumes on each hart. + */ +void flush_icache_mm(struct mm_struct *mm, bool local) +{ + unsigned int cpu; + cpumask_t others, *mask; + + preempt_disable(); + + /* Mark every hart's icache as needing a flush for this MM. */ + mask = &mm->context.icache_stale_mask; + cpumask_setall(mask); + /* Flush this hart's I$ now, and mark it as flushed. */ + cpu = smp_processor_id(); + cpumask_clear_cpu(cpu, mask); + local_flush_icache_all(); + + /* + * Flush the I$ of other harts concurrently executing, and mark them as + * flushed. + */ + cpumask_andnot(&others, mm_cpumask(mm), cpumask_of(cpu)); + local |= cpumask_empty(&others); + if (mm != current->active_mm || !local) + sbi_remote_fence_i(others.bits); + else { + /* + * It's assumed that at least one strongly ordered operation is + * performed on this hart between setting a hart's cpumask bit + * and scheduling this MM context on that hart. Sending an SBI + * remote message will do this, but in the case where no + * messages are sent we still need to order this hart's writes + * with flush_icache_deferred(). + */ + smp_mb(); + } + + preempt_enable(); +} diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile index 81f7d9ce6d88..eb22ab49b3e0 100644 --- a/arch/riscv/mm/Makefile +++ b/arch/riscv/mm/Makefile @@ -2,3 +2,4 @@ obj-y += init.o obj-y += fault.o obj-y += extable.o obj-y += ioremap.o +obj-y += cacheflush.o diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c new file mode 100644 index 000000000000..498c0a0814fe --- /dev/null +++ b/arch/riscv/mm/cacheflush.c @@ -0,0 +1,23 @@ +/* + * Copyright (C) 2017 SiFive + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include + +void flush_icache_pte(pte_t pte) +{ + struct page *page = pte_page(pte); + + if (!test_and_set_bit(PG_dcache_clean, &page->flags)) + flush_icache_all(); +} From 921ebd8f2c081b3cf6c3b29ef4103eef3ff26054 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 25 Oct 2017 14:32:16 -0700 Subject: [PATCH 277/333] RISC-V: Allow userspace to flush the instruction cache Despite RISC-V having a direct 'fence.i' instruction available to userspace (which we can't trap!), that's not actually viable when running on Linux because the kernel might schedule a process on another hart. There is no way for userspace to handle this without invoking the kernel (as it doesn't know the thread->hart mappings), so we've defined a RISC-V specific system call to flush the instruction cache. This patch adds both a system call and a VDSO entry. If possible, we'd like to avoid having the system call be considered part of the user-facing ABI and instead restrict that to the VDSO entry -- both just in general to avoid having additional user-visible ABI to maintain, and because we'd prefer that users just call the VDSO entry because there might be a better way to do this in the future (ie, one that doesn't require entering the kernel). Signed-off-by: Andrew Waterman Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/cacheflush.h | 6 +++++ arch/riscv/include/asm/vdso-syscalls.h | 28 ++++++++++++++++++++++ arch/riscv/include/asm/vdso.h | 4 ++++ arch/riscv/kernel/sys_riscv.c | 32 ++++++++++++++++++++++++++ arch/riscv/kernel/syscall_table.c | 2 ++ arch/riscv/kernel/vdso/Makefile | 1 + arch/riscv/kernel/vdso/flush_icache.S | 31 +++++++++++++++++++++++++ arch/riscv/kernel/vdso/vdso.lds.S | 1 + 8 files changed, 105 insertions(+) create mode 100644 arch/riscv/include/asm/vdso-syscalls.h create mode 100644 arch/riscv/kernel/vdso/flush_icache.S diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h index 5c9ed39ee2a2..efd89a88d2d0 100644 --- a/arch/riscv/include/asm/cacheflush.h +++ b/arch/riscv/include/asm/cacheflush.h @@ -52,4 +52,10 @@ void flush_icache_mm(struct mm_struct *mm, bool local); #endif /* CONFIG_SMP */ +/* + * Bits in sys_riscv_flush_icache()'s flags argument. + */ +#define SYS_RISCV_FLUSH_ICACHE_LOCAL 1UL +#define SYS_RISCV_FLUSH_ICACHE_ALL (SYS_RISCV_FLUSH_ICACHE_LOCAL) + #endif /* _ASM_RISCV_CACHEFLUSH_H */ diff --git a/arch/riscv/include/asm/vdso-syscalls.h b/arch/riscv/include/asm/vdso-syscalls.h new file mode 100644 index 000000000000..a2ccf1894929 --- /dev/null +++ b/arch/riscv/include/asm/vdso-syscalls.h @@ -0,0 +1,28 @@ +/* + * Copyright (C) 2017 SiFive + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef _ASM_RISCV_VDSO_SYSCALLS_H +#define _ASM_RISCV_VDSO_SYSCALLS_H + +#ifdef CONFIG_SMP + +/* These syscalls are only used by the vDSO and are not in the uapi. */ +#define __NR_riscv_flush_icache (__NR_arch_specific_syscall + 15) +__SYSCALL(__NR_riscv_flush_icache, sys_riscv_flush_icache) + +#endif + +#endif /* _ASM_RISCV_VDSO_H */ diff --git a/arch/riscv/include/asm/vdso.h b/arch/riscv/include/asm/vdso.h index 602f61257553..541544d64c33 100644 --- a/arch/riscv/include/asm/vdso.h +++ b/arch/riscv/include/asm/vdso.h @@ -38,4 +38,8 @@ struct vdso_data { (void __user *)((unsigned long)(base) + __vdso_##name); \ }) +#ifdef CONFIG_SMP +asmlinkage long sys_riscv_flush_icache(uintptr_t, uintptr_t, uintptr_t); +#endif + #endif /* _ASM_RISCV_VDSO_H */ diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c index 4351be7d0533..c6c037eabaf6 100644 --- a/arch/riscv/kernel/sys_riscv.c +++ b/arch/riscv/kernel/sys_riscv.c @@ -16,6 +16,7 @@ #include #include #include +#include static long riscv_sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, @@ -47,3 +48,34 @@ SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len, return riscv_sys_mmap(addr, len, prot, flags, fd, offset, 12); } #endif /* !CONFIG_64BIT */ + +#ifdef CONFIG_SMP +/* + * Allows the instruction cache to be flushed from userspace. Despite RISC-V + * having a direct 'fence.i' instruction available to userspace (which we + * can't trap!), that's not actually viable when running on Linux because the + * kernel might schedule a process on another hart. There is no way for + * userspace to handle this without invoking the kernel (as it doesn't know the + * thread->hart mappings), so we've defined a RISC-V specific system call to + * flush the instruction cache. + * + * sys_riscv_flush_icache() is defined to flush the instruction cache over an + * address range, with the flush applying to either all threads or just the + * caller. We don't currently do anything with the address range, that's just + * in there for forwards compatibility. + */ +SYSCALL_DEFINE3(riscv_flush_icache, uintptr_t, start, uintptr_t, end, + uintptr_t, flags) +{ + struct mm_struct *mm = current->mm; + bool local = (flags & SYS_RISCV_FLUSH_ICACHE_LOCAL) != 0; + + /* Check the reserved flags. */ + if (unlikely(flags & !SYS_RISCV_FLUSH_ICACHE_ALL)) + return -EINVAL; + + flush_icache_mm(mm, local); + + return 0; +} +#endif diff --git a/arch/riscv/kernel/syscall_table.c b/arch/riscv/kernel/syscall_table.c index 4e30dc5fb593..a5bd6401f95e 100644 --- a/arch/riscv/kernel/syscall_table.c +++ b/arch/riscv/kernel/syscall_table.c @@ -15,6 +15,7 @@ #include #include #include +#include #undef __SYSCALL #define __SYSCALL(nr, call) [nr] = (call), @@ -22,4 +23,5 @@ void *sys_call_table[__NR_syscalls] = { [0 ... __NR_syscalls - 1] = sys_ni_syscall, #include +#include }; diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index 2dcc4f3070bc..324568d33921 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -6,6 +6,7 @@ vdso-syms += gettimeofday vdso-syms += clock_gettime vdso-syms += clock_getres vdso-syms += getcpu +vdso-syms += flush_icache # Files to link into the vdso obj-vdso = $(patsubst %, %.o, $(vdso-syms)) diff --git a/arch/riscv/kernel/vdso/flush_icache.S b/arch/riscv/kernel/vdso/flush_icache.S new file mode 100644 index 000000000000..b0fbad74e873 --- /dev/null +++ b/arch/riscv/kernel/vdso/flush_icache.S @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2017 SiFive + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include + + .text +/* int __vdso_flush_icache(void *start, void *end, unsigned long flags); */ +ENTRY(__vdso_flush_icache) + .cfi_startproc +#ifdef CONFIG_SMP + li a7, __NR_riscv_flush_icache + ecall +#else + fence.i + li a0, 0 +#endif + ret + .cfi_endproc +ENDPROC(__vdso_flush_icache) diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S index c7543c6a00f9..cd1d47e0724b 100644 --- a/arch/riscv/kernel/vdso/vdso.lds.S +++ b/arch/riscv/kernel/vdso/vdso.lds.S @@ -74,6 +74,7 @@ VERSION __vdso_clock_gettime; __vdso_clock_getres; __vdso_getcpu; + __vdso_flush_icache; local: *; }; } From 0e710ac6521f13c68a5c634471f40ae448c31e0a Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Tue, 21 Nov 2017 07:59:28 -0800 Subject: [PATCH 278/333] RISC-V: Clean up an unused include We used to have some cmpxchg syscalls. They're no longer there, so we no longer need the include. CC: Christoph Hellwig Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/sys_riscv.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c index c6c037eabaf6..a2ae936a093e 100644 --- a/arch/riscv/kernel/sys_riscv.c +++ b/arch/riscv/kernel/sys_riscv.c @@ -14,7 +14,6 @@ */ #include -#include #include #include From 68615eb01f82256c19e41967bfb3eef902f77033 Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Mon, 27 Nov 2017 17:31:00 +0100 Subject: [PATCH 279/333] hwmon: (jc42) optionally try to disable the SMBUS timeout With a nxp,se97 chip on an atmel sama5d31 board, the I2C adapter driver is not always capable of avoiding the 25-35 ms timeout as specified by the SMBUS protocol. This may cause silent corruption of the last bit of any transfer, e.g. a one is read instead of a zero if the sensor chip times out. This also affects the eeprom half of the nxp-se97 chip, where this silent corruption was originally noticed. Other I2C adapters probably suffer similar issues, e.g. bit-banging comes to mind as risky... The SMBUS register in the nxp chip is not a standard Jedec register, but it is not special to the nxp chips either, at least the atmel chips have the same mechanism. Therefore, do not special case this on the manufacturer, it is opt-in via the device property anyway. Cc: stable@vger.kernel.org # 4.9+ Signed-off-by: Peter Rosin Acked-by: Rob Herring Signed-off-by: Guenter Roeck --- .../devicetree/bindings/hwmon/jc42.txt | 4 ++++ drivers/hwmon/jc42.c | 21 +++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/Documentation/devicetree/bindings/hwmon/jc42.txt b/Documentation/devicetree/bindings/hwmon/jc42.txt index 07a250498fbb..f569db58f64a 100644 --- a/Documentation/devicetree/bindings/hwmon/jc42.txt +++ b/Documentation/devicetree/bindings/hwmon/jc42.txt @@ -34,6 +34,10 @@ Required properties: - reg: I2C address +Optional properties: +- smbus-timeout-disable: When set, the smbus timeout function will be disabled. + This is not supported on all chips. + Example: temp-sensor@1a { diff --git a/drivers/hwmon/jc42.c b/drivers/hwmon/jc42.c index 5f11dc014ed6..e5234f953a6d 100644 --- a/drivers/hwmon/jc42.c +++ b/drivers/hwmon/jc42.c @@ -22,6 +22,7 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +#include #include #include #include @@ -45,6 +46,7 @@ static const unsigned short normal_i2c[] = { #define JC42_REG_TEMP 0x05 #define JC42_REG_MANID 0x06 #define JC42_REG_DEVICEID 0x07 +#define JC42_REG_SMBUS 0x22 /* NXP and Atmel, possibly others? */ /* Status bits in temperature register */ #define JC42_ALARM_CRIT_BIT 15 @@ -75,6 +77,9 @@ static const unsigned short normal_i2c[] = { #define GT_MANID 0x1c68 /* Giantec */ #define GT_MANID2 0x132d /* Giantec, 2nd mfg ID */ +/* SMBUS register */ +#define SMBUS_STMOUT BIT(7) /* SMBus time-out, active low */ + /* Supported chips */ /* Analog Devices */ @@ -495,6 +500,22 @@ static int jc42_probe(struct i2c_client *client, const struct i2c_device_id *id) data->extended = !!(cap & JC42_CAP_RANGE); + if (device_property_read_bool(dev, "smbus-timeout-disable")) { + int smbus; + + /* + * Not all chips support this register, but from a + * quick read of various datasheets no chip appears + * incompatible with the below attempt to disable + * the timeout. And the whole thing is opt-in... + */ + smbus = i2c_smbus_read_word_swapped(client, JC42_REG_SMBUS); + if (smbus < 0) + return smbus; + i2c_smbus_write_word_swapped(client, JC42_REG_SMBUS, + smbus | SMBUS_STMOUT); + } + config = i2c_smbus_read_word_swapped(client, JC42_REG_CONFIG); if (config < 0) return config; From c8c088ba0edf65044c254b96fc438c91914aaab0 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 30 Nov 2017 13:47:54 -0800 Subject: [PATCH 280/333] bpf: set maximum number of attached progs to 64 for a single perf tp cgropu+bpf prog array has a maximum number of 64 programs. Let us apply the same limit here. Fixes: e87c6bc3852b ("bpf: permit multiple bpf attachments for a single perf event") Signed-off-by: Yonghong Song Signed-off-by: Daniel Borkmann --- kernel/bpf/core.c | 3 ++- kernel/trace/bpf_trace.c | 8 ++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index b9f8686a84cf..86b50aa26ee8 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1447,7 +1447,8 @@ int bpf_prog_array_length(struct bpf_prog_array __rcu *progs) rcu_read_lock(); prog = rcu_dereference(progs)->progs; for (; *prog; prog++) - cnt++; + if (*prog != &dummy_bpf_prog.prog) + cnt++; rcu_read_unlock(); return cnt; } diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 27d1f4ffa3de..0ce99c379c30 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -759,6 +759,8 @@ const struct bpf_prog_ops perf_event_prog_ops = { static DEFINE_MUTEX(bpf_event_mutex); +#define BPF_TRACE_MAX_PROGS 64 + int perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog) { @@ -772,6 +774,12 @@ int perf_event_attach_bpf_prog(struct perf_event *event, goto unlock; old_array = event->tp_event->prog_array; + if (old_array && + bpf_prog_array_length(old_array) >= BPF_TRACE_MAX_PROGS) { + ret = -E2BIG; + goto unlock; + } + ret = bpf_prog_array_copy(old_array, NULL, prog, &new_array); if (ret < 0) goto unlock; From 0ec9552b43b98deb882bf48efd347be4bd7afc9f Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 30 Nov 2017 13:47:55 -0800 Subject: [PATCH 281/333] samples/bpf: add error checking for perf ioctl calls in bpf loader load_bpf_file() should fail if ioctl with command PERF_EVENT_IOC_ENABLE and PERF_EVENT_IOC_SET_BPF fails. When they do fail, proper error messages are printed. With this change, the below "syscall_tp" run shows that the maximum number of bpf progs attaching to the same perf tracepoint is indeed enforced. $ ./syscall_tp -i 64 prog #0: map ids 4 5 ... prog #63: map ids 382 383 $ ./syscall_tp -i 65 prog #0: map ids 4 5 ... prog #64: map ids 388 389 ioctl PERF_EVENT_IOC_SET_BPF failed err Argument list too long Signed-off-by: Yonghong Song Signed-off-by: Daniel Borkmann --- samples/bpf/bpf_load.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c index 522ca9252d6c..242631aa4ea2 100644 --- a/samples/bpf/bpf_load.c +++ b/samples/bpf/bpf_load.c @@ -193,8 +193,18 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) return -1; } event_fd[prog_cnt - 1] = efd; - ioctl(efd, PERF_EVENT_IOC_ENABLE, 0); - ioctl(efd, PERF_EVENT_IOC_SET_BPF, fd); + err = ioctl(efd, PERF_EVENT_IOC_ENABLE, 0); + if (err < 0) { + printf("ioctl PERF_EVENT_IOC_ENABLE failed err %s\n", + strerror(errno)); + return -1; + } + err = ioctl(efd, PERF_EVENT_IOC_SET_BPF, fd); + if (err < 0) { + printf("ioctl PERF_EVENT_IOC_SET_BPF failed err %s\n", + strerror(errno)); + return -1; + } return 0; } From 435019b48033138581a6171093b181fc6b4d3d30 Mon Sep 17 00:00:00 2001 From: Jimmy Assarsson Date: Tue, 21 Nov 2017 08:22:26 +0100 Subject: [PATCH 282/333] can: kvaser_usb: free buf in error paths The allocated buffer was not freed if usb_submit_urb() failed. Signed-off-by: Jimmy Assarsson Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/kvaser_usb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c index 9b18d96ef526..075644591498 100644 --- a/drivers/net/can/usb/kvaser_usb.c +++ b/drivers/net/can/usb/kvaser_usb.c @@ -813,6 +813,7 @@ static int kvaser_usb_simple_msg_async(struct kvaser_usb_net_priv *priv, if (err) { netdev_err(netdev, "Error transmitting URB\n"); usb_unanchor_urb(urb); + kfree(buf); usb_free_urb(urb); return err; } @@ -1768,6 +1769,7 @@ static netdev_tx_t kvaser_usb_start_xmit(struct sk_buff *skb, spin_unlock_irqrestore(&priv->tx_contexts_lock, flags); usb_unanchor_urb(urb); + kfree(buf); stats->tx_dropped++; From e84f44eb5523401faeb9cc1c97895b68e3cfb78d Mon Sep 17 00:00:00 2001 From: Jimmy Assarsson Date: Tue, 21 Nov 2017 08:22:27 +0100 Subject: [PATCH 283/333] can: kvaser_usb: Fix comparison bug in kvaser_usb_read_bulk_callback() The conditon in the while-loop becomes true when actual_length is less than 2 (MSG_HEADER_LEN). In best case we end up with a former, already dispatched msg, that got msg->len greater than actual_length. This will result in a "Format error" error printout. Problem seen when unplugging a Kvaser USB device connected to a vbox guest. warning: comparison between signed and unsigned integer expressions [-Wsign-compare] Signed-off-by: Jimmy Assarsson Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/kvaser_usb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c index 075644591498..d87e330a20b3 100644 --- a/drivers/net/can/usb/kvaser_usb.c +++ b/drivers/net/can/usb/kvaser_usb.c @@ -1334,7 +1334,7 @@ static void kvaser_usb_read_bulk_callback(struct urb *urb) goto resubmit_urb; } - while (pos <= urb->actual_length - MSG_HEADER_LEN) { + while (pos <= (int)(urb->actual_length - MSG_HEADER_LEN)) { msg = urb->transfer_buffer + pos; /* The Kvaser firmware can only read and write messages that From 8bd13bd522ff7dfa0eb371921aeb417155f7a3be Mon Sep 17 00:00:00 2001 From: Jimmy Assarsson Date: Tue, 21 Nov 2017 08:22:28 +0100 Subject: [PATCH 284/333] can: kvaser_usb: ratelimit errors if incomplete messages are received Avoid flooding the kernel log with "Formate error", if incomplete message are received. Signed-off-by: Jimmy Assarsson Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/kvaser_usb.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c index d87e330a20b3..f95945915d20 100644 --- a/drivers/net/can/usb/kvaser_usb.c +++ b/drivers/net/can/usb/kvaser_usb.c @@ -609,8 +609,8 @@ static int kvaser_usb_wait_msg(const struct kvaser_usb *dev, u8 id, } if (pos + tmp->len > actual_len) { - dev_err(dev->udev->dev.parent, - "Format error\n"); + dev_err_ratelimited(dev->udev->dev.parent, + "Format error\n"); break; } @@ -1353,7 +1353,8 @@ static void kvaser_usb_read_bulk_callback(struct urb *urb) } if (pos + msg->len > urb->actual_length) { - dev_err(dev->udev->dev.parent, "Format error\n"); + dev_err_ratelimited(dev->udev->dev.parent, + "Format error\n"); break; } From f6c23b174c3c96616514827407769cbcfc8005cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Oliver=20St=C3=A4bler?= Date: Mon, 20 Nov 2017 14:45:15 +0100 Subject: [PATCH 285/333] can: ti_hecc: Fix napi poll return value for repoll MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After commit d75b1ade567f ("net: less interrupt masking in NAPI") napi repoll is done only when work_done == budget. So we need to return budget if there are still packets to receive. Signed-off-by: Oliver Stäbler Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/ti_hecc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c index 4d4941469cfc..db6ea936dc3f 100644 --- a/drivers/net/can/ti_hecc.c +++ b/drivers/net/can/ti_hecc.c @@ -637,6 +637,9 @@ static int ti_hecc_rx_poll(struct napi_struct *napi, int quota) mbx_mask = hecc_read(priv, HECC_CANMIM); mbx_mask |= HECC_TX_MBOX_MASK; hecc_write(priv, HECC_CANMIM, mbx_mask); + } else { + /* repoll is done only if whole budget is used */ + num_pkts = quota; } return num_pkts; From 5c2cb02edf79ad79d9b8d07c6d52243a948c4c9f Mon Sep 17 00:00:00 2001 From: Stephane Grosjean Date: Thu, 23 Nov 2017 15:44:35 +0100 Subject: [PATCH 286/333] can: peak/pci: fix potential bug when probe() fails PCI/PCIe drivers for PEAK-System CAN/CAN-FD interfaces do some access to the PCI config during probing. In case one of these accesses fails, a POSITIVE PCIBIOS_xxx error code is returned back. This POSITIVE error code MUST be converted into a NEGATIVE errno for the probe() function to indicate it failed. Using the pcibios_err_to_errno() function, we make sure that the return code will always be negative. Signed-off-by: Stephane Grosjean Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/peak_canfd/peak_pciefd_main.c | 5 ++++- drivers/net/can/sja1000/peak_pci.c | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/peak_canfd/peak_pciefd_main.c b/drivers/net/can/peak_canfd/peak_pciefd_main.c index b4efd711f824..788c3464a3b0 100644 --- a/drivers/net/can/peak_canfd/peak_pciefd_main.c +++ b/drivers/net/can/peak_canfd/peak_pciefd_main.c @@ -825,7 +825,10 @@ err_release_regions: err_disable_pci: pci_disable_device(pdev); - return err; + /* pci_xxx_config_word() return positive PCIBIOS_xxx error codes while + * the probe() function must return a negative errno in case of failure + * (err is unchanged if negative) */ + return pcibios_err_to_errno(err); } /* free the board structure object, as well as its resources: */ diff --git a/drivers/net/can/sja1000/peak_pci.c b/drivers/net/can/sja1000/peak_pci.c index 131026fbc2d7..5adc95c922ee 100644 --- a/drivers/net/can/sja1000/peak_pci.c +++ b/drivers/net/can/sja1000/peak_pci.c @@ -717,7 +717,10 @@ failure_release_regions: failure_disable_pci: pci_disable_device(pdev); - return err; + /* pci_xxx_config_word() return positive PCIBIOS_xxx error codes while + * the probe() function must return a negative errno in case of failure + * (err is unchanged if negative) */ + return pcibios_err_to_errno(err); } static void peak_pci_remove(struct pci_dev *pdev) From 658f534c036352a06584c8b2e71a547915173ba9 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Wed, 22 Nov 2017 13:01:08 +0100 Subject: [PATCH 287/333] can: flexcan: Update IRQ Err Passive information The flexcan IP cores used on MX25 and MX35 do not generate Error Passive IRQs. Update the IP core overview table in the driver accordingly. Suggested-by: ZHU Yi (ST-FIR/ENG1-Zhu) Signed-off-by: Marc Kleine-Budde --- drivers/net/can/flexcan.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index a13a4896a8bd..eefddae2e99a 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -184,9 +184,9 @@ * Below is some version info we got: * SOC Version IP-Version Glitch- [TR]WRN_INT IRQ Err Memory err RTR re- * Filter? connected? Passive detection ception in MB - * MX25 FlexCAN2 03.00.00.00 no no ? no no + * MX25 FlexCAN2 03.00.00.00 no no no no no * MX28 FlexCAN2 03.00.04.00 yes yes no no no - * MX35 FlexCAN2 03.00.00.00 no no ? no no + * MX35 FlexCAN2 03.00.00.00 no no no no no * MX53 FlexCAN2 03.00.00.00 yes no no no no * MX6s FlexCAN3 10.00.12.00 yes yes no no yes * VF610 FlexCAN3 ? no yes ? yes yes? From 29c64b17a0bc72232acc45e9533221d88a262efb Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Mon, 27 Nov 2017 09:18:21 +0100 Subject: [PATCH 288/333] can: flexcan: fix VF610 state transition issue Enable FLEXCAN_QUIRK_BROKEN_PERR_STATE for VF610 to report correct state transitions. Tested-by: Mirza Krak Cc: linux-stable # >= v4.11 Signed-off-by: Marc Kleine-Budde --- drivers/net/can/flexcan.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index eefddae2e99a..0626dcfd1f3d 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -189,7 +189,7 @@ * MX35 FlexCAN2 03.00.00.00 no no no no no * MX53 FlexCAN2 03.00.00.00 yes no no no no * MX6s FlexCAN3 10.00.12.00 yes yes no no yes - * VF610 FlexCAN3 ? no yes ? yes yes? + * VF610 FlexCAN3 ? no yes no yes yes? * * Some SOCs do not have the RX_WARN & TX_WARN interrupt line connected. */ @@ -297,7 +297,8 @@ static const struct flexcan_devtype_data fsl_imx6q_devtype_data = { static const struct flexcan_devtype_data fsl_vf610_devtype_data = { .quirks = FLEXCAN_QUIRK_DISABLE_RXFG | FLEXCAN_QUIRK_ENABLE_EACEN_RRS | - FLEXCAN_QUIRK_DISABLE_MECR | FLEXCAN_QUIRK_USE_OFF_TIMESTAMP, + FLEXCAN_QUIRK_DISABLE_MECR | FLEXCAN_QUIRK_USE_OFF_TIMESTAMP | + FLEXCAN_QUIRK_BROKEN_PERR_STATE, }; static const struct can_bittiming_const flexcan_bittiming_const = { From 3fed8dbbc42c0639ac03d2361ab5f0606dd49e28 Mon Sep 17 00:00:00 2001 From: Martin Kelly Date: Mon, 27 Nov 2017 15:49:15 -0800 Subject: [PATCH 289/333] can: mcba_usb: fix typo Fix typo "analizer" --> "Analyzer". Signed-off-by: Martin Kelly Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/mcba_usb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/usb/mcba_usb.c b/drivers/net/can/usb/mcba_usb.c index 7f0272558bef..c4355f0a20d5 100644 --- a/drivers/net/can/usb/mcba_usb.c +++ b/drivers/net/can/usb/mcba_usb.c @@ -862,7 +862,7 @@ static int mcba_usb_probe(struct usb_interface *intf, goto cleanup_unregister_candev; } - dev_info(&intf->dev, "Microchip CAN BUS analizer connected\n"); + dev_info(&intf->dev, "Microchip CAN BUS Analyzer connected\n"); return 0; From 1cb35a33a28394fd711bb26ddf3a564f4e9d9125 Mon Sep 17 00:00:00 2001 From: Martin Kelly Date: Mon, 27 Nov 2017 15:49:16 -0800 Subject: [PATCH 290/333] can: mcba_usb: fix device disconnect bug Currently, when you disconnect the device, the driver infinitely resubmits all URBs, so you see: Rx URB aborted (-32) in an infinite loop. Fix this by catching -EPIPE (what we get in urb->status when the device disconnects) and not resubmitting. With this patch, I can plug and unplug many times and the driver recovers correctly. Signed-off-by: Martin Kelly Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/mcba_usb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/can/usb/mcba_usb.c b/drivers/net/can/usb/mcba_usb.c index c4355f0a20d5..ef417dcddbf7 100644 --- a/drivers/net/can/usb/mcba_usb.c +++ b/drivers/net/can/usb/mcba_usb.c @@ -592,6 +592,7 @@ static void mcba_usb_read_bulk_callback(struct urb *urb) break; case -ENOENT: + case -EPIPE: case -ESHUTDOWN: return; From 1bcab12521d9b23dbaa22ac71184778dcc43e2f6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 1 Dec 2017 11:40:43 +0000 Subject: [PATCH 291/333] afs: Fix permit refcounting Fix four refcount bugs in afs_cache_permit(): (1) When checking the result of the kzalloc(), we can't just return, but must put 'permits'. (2) We shouldn't put permits immediately after hashing a new permit as we need to keep the pointer stable so that we can check to see if vnode->permit_cache has changed before we decide whether to assign to it. (3) 'permits' is being put twice. (4) We need to put either the replacement or the thing replaced after the assignment to vnode->permit_cache. Without this, lots of the following are seen: Kernel BUG at ffffffffa039857b [verbose debug info unavailable] ------------[ cut here ]------------ Kernel BUG at ffffffffa039858a [verbose debug info unavailable] ------------[ cut here ]------------ The addresses are in the .text..refcount section of the kafs.ko module. Following the relocation records for the __ex_table section shows one to be due to the decrement in afs_put_permits() and the other to be key_get() in afs_cache_permit(). Occasionally, the following is seen: refcount_t overflow at afs_cache_permit+0x57d/0x5c0 [kafs] in cc1[562], uid/euid: 0/0 WARNING: CPU: 0 PID: 562 at kernel/panic.c:657 refcount_error_report+0x9c/0xac ... Reported-by: Marc Dionne Signed-off-by: David Howells Tested-by: Marc Dionne --- fs/afs/security.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/fs/afs/security.c b/fs/afs/security.c index 2b00097101b3..b88b7d45fdaa 100644 --- a/fs/afs/security.c +++ b/fs/afs/security.c @@ -120,7 +120,7 @@ static void afs_hash_permits(struct afs_permits *permits) void afs_cache_permit(struct afs_vnode *vnode, struct key *key, unsigned int cb_break) { - struct afs_permits *permits, *xpermits, *replacement, *new = NULL; + struct afs_permits *permits, *xpermits, *replacement, *zap, *new = NULL; afs_access_t caller_access = READ_ONCE(vnode->status.caller_access); size_t size = 0; bool changed = false; @@ -204,7 +204,7 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key, new = kzalloc(sizeof(struct afs_permits) + sizeof(struct afs_permit) * size, GFP_NOFS); if (!new) - return; + goto out_put; refcount_set(&new->usage, 1); new->nr_permits = size; @@ -229,8 +229,6 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key, afs_hash_permits(new); - afs_put_permits(permits); - /* Now see if the permit list we want is actually already available */ spin_lock(&afs_permits_lock); @@ -262,11 +260,15 @@ found: kfree(new); spin_lock(&vnode->lock); - if (cb_break != (vnode->cb_break + vnode->cb_interest->server->cb_s_break) || - permits != rcu_access_pointer(vnode->permit_cache)) - goto someone_else_changed_it_unlock; - rcu_assign_pointer(vnode->permit_cache, replacement); + zap = rcu_access_pointer(vnode->permit_cache); + if (cb_break == (vnode->cb_break + vnode->cb_interest->server->cb_s_break) && + zap == permits) + rcu_assign_pointer(vnode->permit_cache, replacement); + else + zap = replacement; spin_unlock(&vnode->lock); + afs_put_permits(zap); +out_put: afs_put_permits(permits); return; From f8de483e7440b0d23ce6372b3ef8358841c8827b Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 1 Dec 2017 11:40:43 +0000 Subject: [PATCH 292/333] afs: Properly reset afs_vnode (inode) fields When an AFS inode is allocated by afs_alloc_inode(), the allocated afs_vnode struct isn't necessarily reset from the last time it was used as an inode because the slab constructor is only invoked once when the memory is obtained from the page allocator. This means that information can leak from one inode to the next because we're not calling kmem_cache_zalloc(). Some of the information isn't reset, in particular the permit cache pointer. Bring the clearances up to date. Signed-off-by: David Howells Tested-by: Marc Dionne --- fs/afs/internal.h | 5 ++++- fs/afs/super.c | 14 +++++++++++++- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/fs/afs/internal.h b/fs/afs/internal.h index e03910cebdd4..804d1f905622 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -441,7 +441,10 @@ enum afs_lock_state { }; /* - * AFS inode private data + * AFS inode private data. + * + * Note that afs_alloc_inode() *must* reset anything that could incorrectly + * leak from one inode to another. */ struct afs_vnode { struct inode vfs_inode; /* the VFS's inode record */ diff --git a/fs/afs/super.c b/fs/afs/super.c index d3f97da61bdf..1037dd41a622 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -536,7 +536,9 @@ static void afs_kill_super(struct super_block *sb) } /* - * initialise an inode cache slab element prior to any use + * Initialise an inode cache slab element prior to any use. Note that + * afs_alloc_inode() *must* reset anything that could incorrectly leak from one + * inode to another. */ static void afs_i_init_once(void *_vnode) { @@ -568,11 +570,21 @@ static struct inode *afs_alloc_inode(struct super_block *sb) atomic_inc(&afs_count_active_inodes); + /* Reset anything that shouldn't leak from one inode to the next. */ memset(&vnode->fid, 0, sizeof(vnode->fid)); memset(&vnode->status, 0, sizeof(vnode->status)); vnode->volume = NULL; + vnode->lock_key = NULL; + vnode->permit_cache = NULL; + vnode->cb_interest = NULL; +#ifdef CONFIG_AFS_FSCACHE + vnode->cache = NULL; +#endif + vnode->flags = 1 << AFS_VNODE_UNSET; + vnode->cb_type = 0; + vnode->lock_state = AFS_VNODE_LOCK_NONE; _leave(" = %p", &vnode->vfs_inode); return &vnode->vfs_inode; From 7e8b9c1d2e2f5f45db7d40b50d14f606097c25de Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 20 Nov 2017 17:41:29 +0000 Subject: [PATCH 293/333] arm64: module-plts: factor out PLT generation code for ftrace To allow the ftrace trampoline code to reuse the PLT entry routines, factor it out and move it into asm/module.h. Cc: Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/include/asm/module.h | 44 +++++++++++++++++++++++++++++++++ arch/arm64/kernel/module-plts.c | 38 ++-------------------------- 2 files changed, 46 insertions(+), 36 deletions(-) diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h index 19bd97671bb8..11d4aaee82e1 100644 --- a/arch/arm64/include/asm/module.h +++ b/arch/arm64/include/asm/module.h @@ -45,4 +45,48 @@ extern u64 module_alloc_base; #define module_alloc_base ((u64)_etext - MODULES_VSIZE) #endif +struct plt_entry { + /* + * A program that conforms to the AArch64 Procedure Call Standard + * (AAPCS64) must assume that a veneer that alters IP0 (x16) and/or + * IP1 (x17) may be inserted at any branch instruction that is + * exposed to a relocation that supports long branches. Since that + * is exactly what we are dealing with here, we are free to use x16 + * as a scratch register in the PLT veneers. + */ + __le32 mov0; /* movn x16, #0x.... */ + __le32 mov1; /* movk x16, #0x...., lsl #16 */ + __le32 mov2; /* movk x16, #0x...., lsl #32 */ + __le32 br; /* br x16 */ +}; + +static inline struct plt_entry get_plt_entry(u64 val) +{ + /* + * MOVK/MOVN/MOVZ opcode: + * +--------+------------+--------+-----------+-------------+---------+ + * | sf[31] | opc[30:29] | 100101 | hw[22:21] | imm16[20:5] | Rd[4:0] | + * +--------+------------+--------+-----------+-------------+---------+ + * + * Rd := 0x10 (x16) + * hw := 0b00 (no shift), 0b01 (lsl #16), 0b10 (lsl #32) + * opc := 0b11 (MOVK), 0b00 (MOVN), 0b10 (MOVZ) + * sf := 1 (64-bit variant) + */ + return (struct plt_entry){ + cpu_to_le32(0x92800010 | (((~val ) & 0xffff)) << 5), + cpu_to_le32(0xf2a00010 | ((( val >> 16) & 0xffff)) << 5), + cpu_to_le32(0xf2c00010 | ((( val >> 32) & 0xffff)) << 5), + cpu_to_le32(0xd61f0200) + }; +} + +static inline bool plt_entries_equal(const struct plt_entry *a, + const struct plt_entry *b) +{ + return a->mov0 == b->mov0 && + a->mov1 == b->mov1 && + a->mov2 == b->mov2; +} + #endif /* __ASM_MODULE_H */ diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c index d05dbe658409..ebff6c155cac 100644 --- a/arch/arm64/kernel/module-plts.c +++ b/arch/arm64/kernel/module-plts.c @@ -11,21 +11,6 @@ #include #include -struct plt_entry { - /* - * A program that conforms to the AArch64 Procedure Call Standard - * (AAPCS64) must assume that a veneer that alters IP0 (x16) and/or - * IP1 (x17) may be inserted at any branch instruction that is - * exposed to a relocation that supports long branches. Since that - * is exactly what we are dealing with here, we are free to use x16 - * as a scratch register in the PLT veneers. - */ - __le32 mov0; /* movn x16, #0x.... */ - __le32 mov1; /* movk x16, #0x...., lsl #16 */ - __le32 mov2; /* movk x16, #0x...., lsl #32 */ - __le32 br; /* br x16 */ -}; - static bool in_init(const struct module *mod, void *loc) { return (u64)loc - (u64)mod->init_layout.base < mod->init_layout.size; @@ -40,33 +25,14 @@ u64 module_emit_plt_entry(struct module *mod, void *loc, const Elf64_Rela *rela, int i = pltsec->plt_num_entries; u64 val = sym->st_value + rela->r_addend; - /* - * MOVK/MOVN/MOVZ opcode: - * +--------+------------+--------+-----------+-------------+---------+ - * | sf[31] | opc[30:29] | 100101 | hw[22:21] | imm16[20:5] | Rd[4:0] | - * +--------+------------+--------+-----------+-------------+---------+ - * - * Rd := 0x10 (x16) - * hw := 0b00 (no shift), 0b01 (lsl #16), 0b10 (lsl #32) - * opc := 0b11 (MOVK), 0b00 (MOVN), 0b10 (MOVZ) - * sf := 1 (64-bit variant) - */ - plt[i] = (struct plt_entry){ - cpu_to_le32(0x92800010 | (((~val ) & 0xffff)) << 5), - cpu_to_le32(0xf2a00010 | ((( val >> 16) & 0xffff)) << 5), - cpu_to_le32(0xf2c00010 | ((( val >> 32) & 0xffff)) << 5), - cpu_to_le32(0xd61f0200) - }; + plt[i] = get_plt_entry(val); /* * Check if the entry we just created is a duplicate. Given that the * relocations are sorted, this will be the last entry we allocated. * (if one exists). */ - if (i > 0 && - plt[i].mov0 == plt[i - 1].mov0 && - plt[i].mov1 == plt[i - 1].mov1 && - plt[i].mov2 == plt[i - 1].mov2) + if (i > 0 && plt_entries_equal(plt + i, plt + i - 1)) return (u64)&plt[i - 1]; pltsec->plt_num_entries++; From be0f272bfc83797f70d44faca86954df62e2bbc0 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 20 Nov 2017 17:41:30 +0000 Subject: [PATCH 294/333] arm64: ftrace: emit ftrace-mod.o contents through code When building the arm64 kernel with both CONFIG_ARM64_MODULE_PLTS and CONFIG_DYNAMIC_FTRACE enabled, the ftrace-mod.o object file is built with the kernel and contains a trampoline that is linked into each module, so that modules can be loaded far away from the kernel and still reach the ftrace entry point in the core kernel with an ordinary relative branch, as is emitted by the compiler instrumentation code dynamic ftrace relies on. In order to be able to build out of tree modules, this object file needs to be included into the linux-headers or linux-devel packages, which is undesirable, as it makes arm64 a special case (although a precedent does exist for 32-bit PPC). Given that the trampoline essentially consists of a PLT entry, let's not bother with a source or object file for it, and simply patch it in whenever the trampoline is being populated, using the existing PLT support routines. Cc: Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/Makefile | 3 --- arch/arm64/include/asm/module.h | 2 +- arch/arm64/kernel/Makefile | 3 --- arch/arm64/kernel/ftrace-mod.S | 18 ------------------ arch/arm64/kernel/ftrace.c | 14 ++++++++------ arch/arm64/kernel/module-plts.c | 12 ++++++++++++ arch/arm64/kernel/module.lds | 1 + 7 files changed, 22 insertions(+), 31 deletions(-) delete mode 100644 arch/arm64/kernel/ftrace-mod.S diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index b35788c909f1..b481b4a7c011 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -83,9 +83,6 @@ endif ifeq ($(CONFIG_ARM64_MODULE_PLTS),y) KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/arm64/kernel/module.lds -ifeq ($(CONFIG_DYNAMIC_FTRACE),y) -KBUILD_LDFLAGS_MODULE += $(objtree)/arch/arm64/kernel/ftrace-mod.o -endif endif # Default value diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h index 11d4aaee82e1..4f766178fa6f 100644 --- a/arch/arm64/include/asm/module.h +++ b/arch/arm64/include/asm/module.h @@ -32,7 +32,7 @@ struct mod_arch_specific { struct mod_plt_sec init; /* for CONFIG_DYNAMIC_FTRACE */ - void *ftrace_trampoline; + struct plt_entry *ftrace_trampoline; }; #endif diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 8265dd790895..067baace74a0 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -61,6 +61,3 @@ extra-y += $(head-y) vmlinux.lds ifeq ($(CONFIG_DEBUG_EFI),y) AFLAGS_head.o += -DVMLINUX_PATH="\"$(realpath $(objtree)/vmlinux)\"" endif - -# will be included by each individual module but not by the core kernel itself -extra-$(CONFIG_DYNAMIC_FTRACE) += ftrace-mod.o diff --git a/arch/arm64/kernel/ftrace-mod.S b/arch/arm64/kernel/ftrace-mod.S deleted file mode 100644 index 00c4025be4ff..000000000000 --- a/arch/arm64/kernel/ftrace-mod.S +++ /dev/null @@ -1,18 +0,0 @@ -/* - * Copyright (C) 2017 Linaro Ltd - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include - - .section ".text.ftrace_trampoline", "ax" - .align 3 -0: .quad 0 -__ftrace_trampoline: - ldr x16, 0b - br x16 -ENDPROC(__ftrace_trampoline) diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index c13b1fca0e5b..50986e388d2b 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -76,7 +76,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) if (offset < -SZ_128M || offset >= SZ_128M) { #ifdef CONFIG_ARM64_MODULE_PLTS - unsigned long *trampoline; + struct plt_entry trampoline; struct module *mod; /* @@ -104,22 +104,24 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) * is added in the future, but for now, the pr_err() below * deals with a theoretical issue only. */ - trampoline = (unsigned long *)mod->arch.ftrace_trampoline; - if (trampoline[0] != addr) { - if (trampoline[0] != 0) { + trampoline = get_plt_entry(addr); + if (!plt_entries_equal(mod->arch.ftrace_trampoline, + &trampoline)) { + if (!plt_entries_equal(mod->arch.ftrace_trampoline, + &(struct plt_entry){})) { pr_err("ftrace: far branches to multiple entry points unsupported inside a single module\n"); return -EINVAL; } /* point the trampoline to our ftrace entry point */ module_disable_ro(mod); - trampoline[0] = addr; + *mod->arch.ftrace_trampoline = trampoline; module_enable_ro(mod, true); /* update trampoline before patching in the branch */ smp_wmb(); } - addr = (unsigned long)&trampoline[1]; + addr = (unsigned long)(void *)mod->arch.ftrace_trampoline; #else /* CONFIG_ARM64_MODULE_PLTS */ return -EINVAL; #endif /* CONFIG_ARM64_MODULE_PLTS */ diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c index ebff6c155cac..ea640f92fe5a 100644 --- a/arch/arm64/kernel/module-plts.c +++ b/arch/arm64/kernel/module-plts.c @@ -120,6 +120,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, unsigned long core_plts = 0; unsigned long init_plts = 0; Elf64_Sym *syms = NULL; + Elf_Shdr *tramp = NULL; int i; /* @@ -131,6 +132,10 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, mod->arch.core.plt = sechdrs + i; else if (!strcmp(secstrings + sechdrs[i].sh_name, ".init.plt")) mod->arch.init.plt = sechdrs + i; + else if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE) && + !strcmp(secstrings + sechdrs[i].sh_name, + ".text.ftrace_trampoline")) + tramp = sechdrs + i; else if (sechdrs[i].sh_type == SHT_SYMTAB) syms = (Elf64_Sym *)sechdrs[i].sh_addr; } @@ -181,5 +186,12 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, mod->arch.init.plt_num_entries = 0; mod->arch.init.plt_max_entries = init_plts; + if (tramp) { + tramp->sh_type = SHT_NOBITS; + tramp->sh_flags = SHF_EXECINSTR | SHF_ALLOC; + tramp->sh_addralign = __alignof__(struct plt_entry); + tramp->sh_size = sizeof(struct plt_entry); + } + return 0; } diff --git a/arch/arm64/kernel/module.lds b/arch/arm64/kernel/module.lds index f7c9781a9d48..22e36a21c113 100644 --- a/arch/arm64/kernel/module.lds +++ b/arch/arm64/kernel/module.lds @@ -1,4 +1,5 @@ SECTIONS { .plt (NOLOAD) : { BYTE(0) } .init.plt (NOLOAD) : { BYTE(0) } + .text.ftrace_trampoline (NOLOAD) : { BYTE(0) } } From a349b30250634da20950eb91e2551dcd81f1805d Mon Sep 17 00:00:00 2001 From: Jinbum Park Date: Wed, 22 Nov 2017 21:43:59 +0900 Subject: [PATCH 295/333] arm64: pgd: Mark pgd_cache as __ro_after_init pgd_cache is setup once while init stage and never changed after that, so it is good candidate for __ro_after_init Signed-off-by: Jinbum Park Signed-off-by: Will Deacon --- arch/arm64/mm/pgd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c index 371c5f03a170..051e71ec3335 100644 --- a/arch/arm64/mm/pgd.c +++ b/arch/arm64/mm/pgd.c @@ -26,7 +26,7 @@ #include #include -static struct kmem_cache *pgd_cache; +static struct kmem_cache *pgd_cache __ro_after_init; pgd_t *pgd_alloc(struct mm_struct *mm) { From 9de52a755cfb6da5ee21a07e3a868bdc8fbfccb3 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Thu, 30 Nov 2017 11:56:37 +0000 Subject: [PATCH 296/333] arm64: fpsimd: Fix failure to restore FPSIMD state after signals MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The fpsimd_update_current_state() function is responsible for loading the FPSIMD state from the user signal frame into the current task during sigreturn. When implementing support for SVE, conditional code was added to this function in order to handle the case where SVE state need to be loaded for the task and merged with the FPSIMD data from the signal frame; however, the FPSIMD-only case was unintentionally dropped. As a result of this, sigreturn does not currently restore the FPSIMD state of the task, except in the case where the system supports SVE and the signal frame contains SVE state in addition to FPSIMD state. This patch fixes this bug by making the copy-in of the FPSIMD data from the signal frame to thread_struct unconditional. This remains a performance regression from v4.14, since the FPSIMD state is now copied into thread_struct and then loaded back, instead of _only_ being loaded into the CPU FPSIMD registers. However, it is essential to call task_fpsimd_load() here anyway in order to ensure that the SVE enable bit in CPACR_EL1 is set correctly before returning to userspace. This could use some refactoring, but since sigreturn is not a fast path I have kept this patch as a pure fix and left the refactoring for later. Cc: Catalin Marinas Cc: Ard Biesheuvel Fixes: 8cd969d28fd2 ("arm64/sve: Signal handling support") Reported-by: Alex Bennée Tested-by: Alex Bennée Reviewed-by: Alex Bennée Signed-off-by: Dave Martin Signed-off-by: Will Deacon --- arch/arm64/kernel/fpsimd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 143b3e72c25e..5084e699447a 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1026,10 +1026,10 @@ void fpsimd_update_current_state(struct fpsimd_state *state) local_bh_disable(); - if (system_supports_sve() && test_thread_flag(TIF_SVE)) { - current->thread.fpsimd_state = *state; + current->thread.fpsimd_state = *state; + if (system_supports_sve() && test_thread_flag(TIF_SVE)) fpsimd_to_sve(current); - } + task_fpsimd_load(); if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { From f8ada189550984ee21f27be736042b74a7da1d68 Mon Sep 17 00:00:00 2001 From: Xu YiPing Date: Wed, 15 Nov 2017 15:39:26 +0800 Subject: [PATCH 297/333] arm64: perf: remove unsupported events for Cortex-A73 bus access read/write events are not supported in A73, based on the Cortex-A73 TRM r0p2, section 11.9 Events (pages 11-457 to 11-460). Fixes: 5561b6c5e981 "arm64: perf: add support for Cortex-A73" Acked-by: Julien Thierry Signed-off-by: Xu YiPing Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 9eaef51f83ff..3affca3dd96a 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -262,12 +262,6 @@ static const unsigned armv8_a73_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD, [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR, - - [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD, - [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR, - - [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD, - [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR, }; static const unsigned armv8_thunder_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] From 770ba06084f7aeadea120922c775d574f3128ba3 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Wed, 29 Nov 2017 17:03:03 +0300 Subject: [PATCH 298/333] arm64: cpu_ops: Add missing 'const' qualifiers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Building the kernel with an LTO-enabled GCC spits out the following "const" warning for the cpu_ops code: mm/percpu.c:2168:20: error: pcpu_fc_names causes a section type conflict with dt_supported_cpu_ops const char * const pcpu_fc_names[PCPU_FC_NR] __initconst = { ^ arch/arm64/kernel/cpu_ops.c:34:37: note: ‘dt_supported_cpu_ops’ was declared here static const struct cpu_operations *dt_supported_cpu_ops[] __initconst = { Fix it by adding missed const qualifiers. Signed-off-by: Yury Norov Reviewed-by: Nick Desaulniers Signed-off-by: Will Deacon --- arch/arm64/kernel/cpu_ops.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c index d16978213c5b..ea001241bdd4 100644 --- a/arch/arm64/kernel/cpu_ops.c +++ b/arch/arm64/kernel/cpu_ops.c @@ -31,13 +31,13 @@ extern const struct cpu_operations cpu_psci_ops; const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init; -static const struct cpu_operations *dt_supported_cpu_ops[] __initconst = { +static const struct cpu_operations *const dt_supported_cpu_ops[] __initconst = { &smp_spin_table_ops, &cpu_psci_ops, NULL, }; -static const struct cpu_operations *acpi_supported_cpu_ops[] __initconst = { +static const struct cpu_operations *const acpi_supported_cpu_ops[] __initconst = { #ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL &acpi_parking_protocol_ops, #endif @@ -47,7 +47,7 @@ static const struct cpu_operations *acpi_supported_cpu_ops[] __initconst = { static const struct cpu_operations * __init cpu_get_ops(const char *name) { - const struct cpu_operations **ops; + const struct cpu_operations *const *ops; ops = acpi_disabled ? dt_supported_cpu_ops : acpi_supported_cpu_ops; From 3a33c7605750fb6a87613044d16b1455e482414d Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 30 Nov 2017 18:25:17 +0000 Subject: [PATCH 299/333] arm64: context: Fix comments and remove pointless smp_wmb() The comments in the ASID allocator incorrectly hint at an MP-style idiom using the asid_generation and the active_asids array. In fact, the synchronisation is achieved using a combination of an xchg operation and a spinlock, so update the comments and remove the pointless smp_wmb(). Cc: James Morse Signed-off-by: Will Deacon --- arch/arm64/mm/context.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index 28a45a19aae7..6f4017046323 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -96,12 +96,6 @@ static void flush_context(unsigned int cpu) set_reserved_asid_bits(); - /* - * Ensure the generation bump is observed before we xchg the - * active_asids. - */ - smp_wmb(); - for_each_possible_cpu(i) { asid = atomic64_xchg_relaxed(&per_cpu(active_asids, i), 0); /* @@ -205,11 +199,18 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu) asid = atomic64_read(&mm->context.id); /* - * The memory ordering here is subtle. We rely on the control - * dependency between the generation read and the update of - * active_asids to ensure that we are synchronised with a - * parallel rollover (i.e. this pairs with the smp_wmb() in - * flush_context). + * The memory ordering here is subtle. + * If our ASID matches the current generation, then we update + * our active_asids entry with a relaxed xchg. Racing with a + * concurrent rollover means that either: + * + * - We get a zero back from the xchg and end up waiting on the + * lock. Taking the lock synchronises with the rollover and so + * we are forced to see the updated generation. + * + * - We get a valid ASID back from the xchg, which means the + * relaxed xchg in flush_context will treat us as reserved + * because atomic RmWs are totally ordered for a given location. */ if (!((asid ^ atomic64_read(&asid_generation)) >> asid_bits) && atomic64_xchg_relaxed(&per_cpu(active_asids, cpu), asid)) From 23f1b8d938c861ee0bbb786162f7ce0685f722ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Mon, 20 Nov 2017 10:55:15 +0100 Subject: [PATCH 300/333] fw_cfg: fix driver remove MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On driver remove(), all objects created during probe() should be removed, but sysfs qemu_fw_cfg/rev file was left. Also reorder functions to match probe() error cleanup code. Cc: stable@vger.kernel.org Signed-off-by: Marc-André Lureau Signed-off-by: Michael S. Tsirkin --- drivers/firmware/qemu_fw_cfg.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c index 5cfe39f7a45f..deb483064f53 100644 --- a/drivers/firmware/qemu_fw_cfg.c +++ b/drivers/firmware/qemu_fw_cfg.c @@ -582,9 +582,10 @@ static int fw_cfg_sysfs_remove(struct platform_device *pdev) { pr_debug("fw_cfg: unloading.\n"); fw_cfg_sysfs_cache_cleanup(); + sysfs_remove_file(fw_cfg_top_ko, &fw_cfg_rev_attr.attr); + fw_cfg_io_cleanup(); fw_cfg_kset_unregister_recursive(fw_cfg_fname_kset); fw_cfg_kobj_cleanup(fw_cfg_sel_ko); - fw_cfg_io_cleanup(); return 0; } From e60ea67bb60459b95a50a156296041a13e0e380e Mon Sep 17 00:00:00 2001 From: weiping zhang Date: Wed, 29 Nov 2017 09:23:01 +0800 Subject: [PATCH 301/333] virtio: release virtio index when fail to device_register index can be reused by other virtio device. Cc: stable@vger.kernel.org Signed-off-by: weiping zhang Reviewed-by: Cornelia Huck Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index 48230a5e12f2..bf7ff3934d7f 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -333,6 +333,8 @@ int register_virtio_device(struct virtio_device *dev) /* device_register() causes the bus infrastructure to look for a * matching driver. */ err = device_register(&dev->dev); + if (err) + ida_simple_remove(&virtio_index_ida, dev->index); out: if (err) virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED); From d9e427f6ab8142d6868eb719e6a7851aafea56b6 Mon Sep 17 00:00:00 2001 From: Jan Stancek Date: Fri, 1 Dec 2017 10:50:28 +0100 Subject: [PATCH 302/333] virtio_balloon: fix increment of vb->num_pfns in fill_balloon() commit c7cdff0e8647 ("virtio_balloon: fix deadlock on OOM") changed code to increment vb->num_pfns before call to set_page_pfns(), which used to happen only after. This patch fixes boot hang for me on ppc64le KVM guests. Fixes: c7cdff0e8647 ("virtio_balloon: fix deadlock on OOM") Cc: Michael S. Tsirkin Cc: Tetsuo Handa Cc: Michal Hocko Cc: Wei Wang Cc: stable@vger.kernel.org Signed-off-by: Jan Stancek Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_balloon.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 7960746f7597..a1fb52cb3f0a 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -174,13 +174,12 @@ static unsigned fill_balloon(struct virtio_balloon *vb, size_t num) while ((page = balloon_page_pop(&pages))) { balloon_page_enqueue(&vb->vb_dev_info, page); - vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE; - set_page_pfns(vb, vb->pfns + vb->num_pfns, page); vb->num_pages += VIRTIO_BALLOON_PAGES_PER_PAGE; if (!virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) adjust_managed_page_count(page, -1); + vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE; } num_allocated_pages = vb->num_pfns; From d30fc5126efb0c33b7adf5966d3051db2c3d7721 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 25 Nov 2017 21:18:34 +0800 Subject: [PATCH 303/333] sctp: only update outstanding_bytes for transmitted queue when doing prsctp_prune Now outstanding_bytes is only increased when appending chunks into one packet and sending it at 1st time, while decreased when it is about to move into retransmit queue. It means outstanding_bytes value is already decreased for all chunks in retransmit queue. However sctp_prsctp_prune_sent is a common function to check the chunks in both transmitted and retransmit queue, it decrease outstanding_bytes when moving a chunk into abandoned queue from either of them. It could cause outstanding_bytes underflow, as it also decreases it's value for the chunks in retransmit queue. This patch fixes it by only updating outstanding_bytes for transmitted queue when pruning queues for prsctp prio policy, the same fix is also needed in sctp_check_transmitted. Fixes: 8dbdf1f5b09c ("sctp: implement prsctp PRIO policy") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/outqueue.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 4db012aa25f7..7029f8b99063 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -377,7 +377,8 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc, asoc->abandoned_sent[SCTP_PR_INDEX(PRIO)]++; streamout->ext->abandoned_sent[SCTP_PR_INDEX(PRIO)]++; - if (!chk->tsn_gap_acked) { + if (queue != &asoc->outqueue.retransmit && + !chk->tsn_gap_acked) { if (chk->transport) chk->transport->flight_size -= sctp_data_size(chk); @@ -1434,7 +1435,8 @@ static void sctp_check_transmitted(struct sctp_outq *q, /* If this chunk has not been acked, stop * considering it as 'outstanding'. */ - if (!tchunk->tsn_gap_acked) { + if (transmitted_queue != &q->retransmit && + !tchunk->tsn_gap_acked) { if (tchunk->transport) tchunk->transport->flight_size -= sctp_data_size(tchunk); From e5f612969c6f965e3bd1158598e0a3b1c4f389b9 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 25 Nov 2017 21:18:35 +0800 Subject: [PATCH 304/333] sctp: abandon the whole msg if one part of a fragmented message is abandoned As rfc3758#section-3.1 demands: A3) When a TSN is "abandoned", if it is part of a fragmented message, all other TSN's within that fragmented message MUST be abandoned at the same time. Besides, if it couldn't handle this, the rest frags would never get assembled in peer side. This patch supports it by adding abandoned flag in sctp_datamsg, when one chunk is being abandoned, set chunk->msg->abandoned as well. Next time when checking for abandoned, go checking chunk->msg->abandoned first. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 3 ++- net/sctp/chunk.c | 7 +++++++ net/sctp/outqueue.c | 12 ++++++++---- 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 16f949eef52f..2f8f93da5dc2 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -503,7 +503,8 @@ struct sctp_datamsg { /* Did the messenge fail to send? */ int send_error; u8 send_failed:1, - can_delay; /* should this message be Nagle delayed */ + can_delay:1, /* should this message be Nagle delayed */ + abandoned:1; /* should this message be abandoned */ }; struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *, diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 7b261afc47b9..9213805b558d 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -53,6 +53,7 @@ static void sctp_datamsg_init(struct sctp_datamsg *msg) msg->send_failed = 0; msg->send_error = 0; msg->can_delay = 1; + msg->abandoned = 0; msg->expires_at = 0; INIT_LIST_HEAD(&msg->chunks); } @@ -304,6 +305,9 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk) if (!chunk->asoc->peer.prsctp_capable) return 0; + if (chunk->msg->abandoned) + return 1; + if (SCTP_PR_TTL_ENABLED(chunk->sinfo.sinfo_flags) && time_after(jiffies, chunk->msg->expires_at)) { struct sctp_stream_out *streamout = @@ -316,6 +320,7 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk) chunk->asoc->abandoned_unsent[SCTP_PR_INDEX(TTL)]++; streamout->ext->abandoned_unsent[SCTP_PR_INDEX(TTL)]++; } + chunk->msg->abandoned = 1; return 1; } else if (SCTP_PR_RTX_ENABLED(chunk->sinfo.sinfo_flags) && chunk->sent_count > chunk->sinfo.sinfo_timetolive) { @@ -324,10 +329,12 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk) chunk->asoc->abandoned_sent[SCTP_PR_INDEX(RTX)]++; streamout->ext->abandoned_sent[SCTP_PR_INDEX(RTX)]++; + chunk->msg->abandoned = 1; return 1; } else if (!SCTP_PR_POLICY(chunk->sinfo.sinfo_flags) && chunk->msg->expires_at && time_after(jiffies, chunk->msg->expires_at)) { + chunk->msg->abandoned = 1; return 1; } /* PRIO policy is processed by sendmsg, not here */ diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 7029f8b99063..4ab164b5aad0 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -364,10 +364,12 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc, list_for_each_entry_safe(chk, temp, queue, transmitted_list) { struct sctp_stream_out *streamout; - if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) || - chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive) + if (!chk->msg->abandoned && + (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) || + chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive)) continue; + chk->msg->abandoned = 1; list_del_init(&chk->transmitted_list); sctp_insert_list(&asoc->outqueue.abandoned, &chk->transmitted_list); @@ -404,10 +406,12 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc, q->sched->unsched_all(&asoc->stream); list_for_each_entry_safe(chk, temp, &q->out_chunk_list, list) { - if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) || - chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive) + if (!chk->msg->abandoned && + (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) || + chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive)) continue; + chk->msg->abandoned = 1; sctp_sched_dequeue_common(q, chk); asoc->sent_cnt_removable--; asoc->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++; From 779edd7348878a7376c0e3d0f96485c30b5f1b7d Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 25 Nov 2017 21:18:36 +0800 Subject: [PATCH 305/333] sctp: do not abandon the other frags in unsent outq if one msg has outstanding frags Now for the abandoned chunks in unsent outq, it would just free the chunks. Because no tsn is assigned to them yet, there's no need to send fwd tsn to peer, unlike for the abandoned chunks in sent outq. The problem is when parts of the msg have been sent and the other frags are still in unsent outq, if they are abandoned/dropped, the peer would never get this msg reassembled. So these frags in unsent outq can't be dropped if this msg already has outstanding frags. This patch does the check in sctp_chunk_abandoned and sctp_prsctp_prune_unsent. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/chunk.c | 4 ++++ net/sctp/outqueue.c | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 9213805b558d..7f8baa48e7c2 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -308,6 +308,10 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk) if (chunk->msg->abandoned) return 1; + if (!chunk->has_tsn && + !(chunk->chunk_hdr->flags & SCTP_DATA_FIRST_FRAG)) + return 0; + if (SCTP_PR_TTL_ENABLED(chunk->sinfo.sinfo_flags) && time_after(jiffies, chunk->msg->expires_at)) { struct sctp_stream_out *streamout = diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 4ab164b5aad0..7d67feeeffc1 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -407,7 +407,8 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc, list_for_each_entry_safe(chk, temp, &q->out_chunk_list, list) { if (!chk->msg->abandoned && - (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) || + (!(chk->chunk_hdr->flags & SCTP_DATA_FIRST_FRAG) || + !SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) || chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive)) continue; From cfac7f836a715b91f08c851df915d401a4d52783 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 1 Dec 2017 10:06:56 -0800 Subject: [PATCH 306/333] tcp/dccp: block bh before arming time_wait timer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Maciej Żenczykowski reported some panics in tcp_twsk_destructor() that might be caused by the following bug. timewait timer is pinned to the cpu, because we want to transition timwewait refcount from 0 to 4 in one go, once everything has been initialized. At the time commit ed2e92394589 ("tcp/dccp: fix timewait races in timer handling") was merged, TCP was always running from BH habdler. After commit 5413d1babe8f ("net: do not block BH while processing socket backlog") we definitely can run tcp_time_wait() from process context. We need to block BH in the critical section so that the pinned timer has still its purpose. This bug is more likely to happen under stress and when very small RTO are used in datacenter flows. Fixes: 5413d1babe8f ("net: do not block BH while processing socket backlog") Signed-off-by: Eric Dumazet Reported-by: Maciej Żenczykowski Acked-by: Maciej Żenczykowski Signed-off-by: David S. Miller --- net/dccp/minisocks.c | 6 ++++++ net/ipv4/tcp_minisocks.c | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index abd07a443219..178bb9833311 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -57,10 +57,16 @@ void dccp_time_wait(struct sock *sk, int state, int timeo) if (state == DCCP_TIME_WAIT) timeo = DCCP_TIMEWAIT_LEN; + /* tw_timer is pinned, so we need to make sure BH are disabled + * in following section, otherwise timer handler could run before + * we complete the initialization. + */ + local_bh_disable(); inet_twsk_schedule(tw, timeo); /* Linkage updates. */ __inet_twsk_hashdance(tw, sk, &dccp_hashinfo); inet_twsk_put(tw); + local_bh_enable(); } else { /* Sorry, if we're out of memory, just CLOSE this * socket up. We've got bigger problems than diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index e36eff0403f4..b079b619b60c 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -310,10 +310,16 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) if (state == TCP_TIME_WAIT) timeo = TCP_TIMEWAIT_LEN; + /* tw_timer is pinned, so we need to make sure BH are disabled + * in following section, otherwise timer handler could run before + * we complete the initialization. + */ + local_bh_disable(); inet_twsk_schedule(tw, timeo); /* Linkage updates. */ __inet_twsk_hashdance(tw, sk, &tcp_hashinfo); inet_twsk_put(tw); + local_bh_enable(); } else { /* Sorry, if we're out of memory, just CLOSE this * socket up. We've got bigger problems than From c7799c067c2ae33e348508c8afec354f3257ff25 Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Wed, 29 Nov 2017 12:48:42 +0200 Subject: [PATCH 307/333] tipc: call tipc_rcv() only if bearer is up in tipc_udp_recv() Remove the second tipc_rcv() call in tipc_udp_recv(). We have just checked that the bearer is not up, and calling tipc_rcv() with a bearer that is not up leads to a TIPC div-by-zero crash in tipc_node_calculate_timer(). The crash is rare in practice, but can happen like this: We're enabling a bearer, but it's not yet up and fully initialized. At the same time we receive a discovery packet, and in tipc_udp_recv() we end up calling tipc_rcv() with the not-yet-initialized bearer, causing later the div-by-zero crash in tipc_node_calculate_timer(). Jon Maloy explains the impact of removing the second tipc_rcv() call: "link setup in the worst case will be delayed until the next arriving discovery messages, 1 sec later, and this is an acceptable delay." As the tipc_rcv() call is removed, just leave the function via the rcu_out label, so that we will kfree_skb(). [ 12.590450] Own node address <1.1.1>, network identity 1 [ 12.668088] divide error: 0000 [#1] SMP [ 12.676952] CPU: 2 PID: 0 Comm: swapper/2 Not tainted 4.14.2-dirty #1 [ 12.679225] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-2.fc27 04/01/2014 [ 12.682095] task: ffff8c2a761edb80 task.stack: ffffa41cc0cac000 [ 12.684087] RIP: 0010:tipc_node_calculate_timer.isra.12+0x45/0x60 [tipc] [ 12.686486] RSP: 0018:ffff8c2a7fc838a0 EFLAGS: 00010246 [ 12.688451] RAX: 0000000000000000 RBX: ffff8c2a5b382600 RCX: 0000000000000000 [ 12.691197] RDX: 0000000000000000 RSI: ffff8c2a5b382600 RDI: ffff8c2a5b382600 [ 12.693945] RBP: ffff8c2a7fc838b0 R08: 0000000000000001 R09: 0000000000000001 [ 12.696632] R10: 0000000000000000 R11: 0000000000000000 R12: ffff8c2a5d8949d8 [ 12.699491] R13: ffffffff95ede400 R14: 0000000000000000 R15: ffff8c2a5d894800 [ 12.702338] FS: 0000000000000000(0000) GS:ffff8c2a7fc80000(0000) knlGS:0000000000000000 [ 12.705099] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 12.706776] CR2: 0000000001bb9440 CR3: 00000000bd009001 CR4: 00000000003606e0 [ 12.708847] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 12.711016] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 12.712627] Call Trace: [ 12.713390] [ 12.714011] tipc_node_check_dest+0x2e8/0x350 [tipc] [ 12.715286] tipc_disc_rcv+0x14d/0x1d0 [tipc] [ 12.716370] tipc_rcv+0x8b0/0xd40 [tipc] [ 12.717396] ? minmax_running_min+0x2f/0x60 [ 12.718248] ? dst_alloc+0x4c/0xa0 [ 12.718964] ? tcp_ack+0xaf1/0x10b0 [ 12.719658] ? tipc_udp_is_known_peer+0xa0/0xa0 [tipc] [ 12.720634] tipc_udp_recv+0x71/0x1d0 [tipc] [ 12.721459] ? dst_alloc+0x4c/0xa0 [ 12.722130] udp_queue_rcv_skb+0x264/0x490 [ 12.722924] __udp4_lib_rcv+0x21e/0x990 [ 12.723670] ? ip_route_input_rcu+0x2dd/0xbf0 [ 12.724442] ? tcp_v4_rcv+0x958/0xa40 [ 12.725039] udp_rcv+0x1a/0x20 [ 12.725587] ip_local_deliver_finish+0x97/0x1d0 [ 12.726323] ip_local_deliver+0xaf/0xc0 [ 12.726959] ? ip_route_input_noref+0x19/0x20 [ 12.727689] ip_rcv_finish+0xdd/0x3b0 [ 12.728307] ip_rcv+0x2ac/0x360 [ 12.728839] __netif_receive_skb_core+0x6fb/0xa90 [ 12.729580] ? udp4_gro_receive+0x1a7/0x2c0 [ 12.730274] __netif_receive_skb+0x1d/0x60 [ 12.730953] ? __netif_receive_skb+0x1d/0x60 [ 12.731637] netif_receive_skb_internal+0x37/0xd0 [ 12.732371] napi_gro_receive+0xc7/0xf0 [ 12.732920] receive_buf+0x3c3/0xd40 [ 12.733441] virtnet_poll+0xb1/0x250 [ 12.733944] net_rx_action+0x23e/0x370 [ 12.734476] __do_softirq+0xc5/0x2f8 [ 12.734922] irq_exit+0xfa/0x100 [ 12.735315] do_IRQ+0x4f/0xd0 [ 12.735680] common_interrupt+0xa2/0xa2 [ 12.736126] [ 12.736416] RIP: 0010:native_safe_halt+0x6/0x10 [ 12.736925] RSP: 0018:ffffa41cc0cafe90 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff4d [ 12.737756] RAX: 0000000000000000 RBX: ffff8c2a761edb80 RCX: 0000000000000000 [ 12.738504] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 [ 12.739258] RBP: ffffa41cc0cafe90 R08: 0000014b5b9795e5 R09: ffffa41cc12c7e88 [ 12.740118] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000002 [ 12.740964] R13: ffff8c2a761edb80 R14: 0000000000000000 R15: 0000000000000000 [ 12.741831] default_idle+0x2a/0x100 [ 12.742323] arch_cpu_idle+0xf/0x20 [ 12.742796] default_idle_call+0x28/0x40 [ 12.743312] do_idle+0x179/0x1f0 [ 12.743761] cpu_startup_entry+0x1d/0x20 [ 12.744291] start_secondary+0x112/0x120 [ 12.744816] secondary_startup_64+0xa5/0xa5 [ 12.745367] Code: b9 f4 01 00 00 48 89 c2 48 c1 ea 02 48 3d d3 07 00 00 48 0f 47 d1 49 8b 0c 24 48 39 d1 76 07 49 89 14 24 48 89 d1 31 d2 48 89 df <48> f7 f1 89 c6 e8 81 6e ff ff 5b 41 5c 5d c3 66 90 66 2e 0f 1f [ 12.747527] RIP: tipc_node_calculate_timer.isra.12+0x45/0x60 [tipc] RSP: ffff8c2a7fc838a0 [ 12.748555] ---[ end trace 1399ab83390650fd ]--- [ 12.749296] Kernel panic - not syncing: Fatal exception in interrupt [ 12.750123] Kernel Offset: 0x13200000 from 0xffffffff82000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) [ 12.751215] Rebooting in 60 seconds.. Fixes: c9b64d492b1f ("tipc: add replicast peer discovery") Signed-off-by: Tommi Rantala Cc: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/udp_media.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index ecca64fc6a6f..3deabcab4882 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -371,10 +371,6 @@ static int tipc_udp_recv(struct sock *sk, struct sk_buff *skb) goto rcu_out; } - tipc_rcv(sock_net(sk), skb, b); - rcu_read_unlock(); - return 0; - rcu_out: rcu_read_unlock(); out: From a0da456bbf95d2a9294799bb05c61bfb24736bb7 Mon Sep 17 00:00:00 2001 From: Max Uvarov Date: Thu, 30 Nov 2017 13:08:29 +0300 Subject: [PATCH 308/333] net: phy-micrel: check return code in flp center function Fix obvious typo that first return value is set but not checked. Signed-off-by: Max Uvarov Signed-off-by: David S. Miller --- drivers/net/phy/micrel.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index fdb43dd9b5cd..ab4614113403 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -496,16 +496,18 @@ static int ksz9031_of_load_skew_values(struct phy_device *phydev, return ksz9031_extended_write(phydev, OP_DATA, 2, reg, newval); } +/* Center KSZ9031RNX FLP timing at 16ms. */ static int ksz9031_center_flp_timing(struct phy_device *phydev) { int result; - /* Center KSZ9031RNX FLP timing at 16ms. */ result = ksz9031_extended_write(phydev, OP_DATA, 0, MII_KSZ9031RN_FLP_BURST_TX_HI, 0x0006); + if (result) + return result; + result = ksz9031_extended_write(phydev, OP_DATA, 0, MII_KSZ9031RN_FLP_BURST_TX_LO, 0x1A80); - if (result) return result; From acf1c02f023926b8b04672a9e81b1711ae681619 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 30 Nov 2017 13:59:11 +0000 Subject: [PATCH 309/333] sfp: fix RX_LOS signal handling The options word is a be16 quantity, so we need to test the flags having converted the endian-ness. Convert the flag bits to be16, which can be optimised by the compiler, rather than converting a variable at runtime. Reported-by: Florian Fainelli Signed-off-by: Russell King Signed-off-by: David S. Miller --- drivers/net/phy/sfp.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index e381811e5f11..3355141688a6 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -355,7 +355,7 @@ static void sfp_sm_link_check_los(struct sfp *sfp) * SFP_OPTIONS_LOS_NORMAL are set? For now, we assume * the same as SFP_OPTIONS_LOS_NORMAL set. */ - if (sfp->id.ext.options & SFP_OPTIONS_LOS_INVERTED) + if (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_LOS_INVERTED)) los ^= SFP_F_LOS; if (los) @@ -582,7 +582,8 @@ static void sfp_sm_event(struct sfp *sfp, unsigned int event) if (event == SFP_E_TX_FAULT) sfp_sm_fault(sfp, true); else if (event == - (sfp->id.ext.options & SFP_OPTIONS_LOS_INVERTED ? + (sfp->id.ext.options & + cpu_to_be16(SFP_OPTIONS_LOS_INVERTED) ? SFP_E_LOS_HIGH : SFP_E_LOS_LOW)) sfp_sm_link_up(sfp); break; @@ -592,7 +593,8 @@ static void sfp_sm_event(struct sfp *sfp, unsigned int event) sfp_sm_link_down(sfp); sfp_sm_fault(sfp, true); } else if (event == - (sfp->id.ext.options & SFP_OPTIONS_LOS_INVERTED ? + (sfp->id.ext.options & + cpu_to_be16(SFP_OPTIONS_LOS_INVERTED) ? SFP_E_LOS_LOW : SFP_E_LOS_HIGH)) { sfp_sm_link_down(sfp); sfp_sm_next(sfp, SFP_S_WAIT_LOS, 0); From 710dfbb01a8ac0fc8d0cc191131bd84dc3796497 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 30 Nov 2017 13:59:16 +0000 Subject: [PATCH 310/333] sfp: improve RX_LOS handling There are two bits in the option word for the RX_LOS signal. One reports that the RX_LOS signal is active high, the other reports that it is active low. When both or neither are set, the result is not well defined in the specification. Rather than assuming that neither set means normal RX_LOS, take this as meaning no RX_LOS signal available, thereby ignoring the signal. Signed-off-by: Russell King Signed-off-by: David S. Miller --- drivers/net/phy/sfp.c | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index 3355141688a6..c1aab6a81ce9 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -351,12 +351,13 @@ static void sfp_sm_link_check_los(struct sfp *sfp) { unsigned int los = sfp->state & SFP_F_LOS; - /* FIXME: what if neither SFP_OPTIONS_LOS_INVERTED nor - * SFP_OPTIONS_LOS_NORMAL are set? For now, we assume - * the same as SFP_OPTIONS_LOS_NORMAL set. + /* If neither SFP_OPTIONS_LOS_INVERTED nor SFP_OPTIONS_LOS_NORMAL + * are set, we assume that no LOS signal is available. */ if (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_LOS_INVERTED)) los ^= SFP_F_LOS; + else if (!(sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_LOS_NORMAL))) + los = 0; if (los) sfp_sm_next(sfp, SFP_S_WAIT_LOS, 0); @@ -364,6 +365,22 @@ static void sfp_sm_link_check_los(struct sfp *sfp) sfp_sm_link_up(sfp); } +static bool sfp_los_event_active(struct sfp *sfp, unsigned int event) +{ + return (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_LOS_INVERTED) && + event == SFP_E_LOS_LOW) || + (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_LOS_NORMAL) && + event == SFP_E_LOS_HIGH); +} + +static bool sfp_los_event_inactive(struct sfp *sfp, unsigned int event) +{ + return (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_LOS_INVERTED) && + event == SFP_E_LOS_HIGH) || + (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_LOS_NORMAL) && + event == SFP_E_LOS_LOW); +} + static void sfp_sm_fault(struct sfp *sfp, bool warn) { if (sfp->sm_retries && !--sfp->sm_retries) { @@ -581,10 +598,7 @@ static void sfp_sm_event(struct sfp *sfp, unsigned int event) case SFP_S_WAIT_LOS: if (event == SFP_E_TX_FAULT) sfp_sm_fault(sfp, true); - else if (event == - (sfp->id.ext.options & - cpu_to_be16(SFP_OPTIONS_LOS_INVERTED) ? - SFP_E_LOS_HIGH : SFP_E_LOS_LOW)) + else if (sfp_los_event_inactive(sfp, event)) sfp_sm_link_up(sfp); break; @@ -592,10 +606,7 @@ static void sfp_sm_event(struct sfp *sfp, unsigned int event) if (event == SFP_E_TX_FAULT) { sfp_sm_link_down(sfp); sfp_sm_fault(sfp, true); - } else if (event == - (sfp->id.ext.options & - cpu_to_be16(SFP_OPTIONS_LOS_INVERTED) ? - SFP_E_LOS_LOW : SFP_E_LOS_HIGH)) { + } else if (sfp_los_event_active(sfp, event)) { sfp_sm_link_down(sfp); sfp_sm_next(sfp, SFP_S_WAIT_LOS, 0); } From ec7681bde6d0e53ea5ef564477c8656fc318023a Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 30 Nov 2017 13:59:21 +0000 Subject: [PATCH 311/333] sfp: warn about modules requiring address change sequence We do not support SFP modules which require the address change sequence as detailed by SFF 8472 revision 1.22 section 8.9. Warn when these modules are inserted, and treat them as SFF8079 modules for ethtool. Signed-off-by: Russell King Signed-off-by: David S. Miller --- drivers/net/phy/sfp.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index c1aab6a81ce9..9dfc1c4c954f 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -487,6 +487,11 @@ static int sfp_sm_mod_probe(struct sfp *sfp) return -EINVAL; } + /* If the module requires address swap mode, warn about it */ + if (sfp->id.ext.diagmon & SFP_DIAGMON_ADDRMODE) + dev_warn(sfp->dev, + "module address swap to access page 0xA2 is not supported.\n"); + return sfp_module_insert(sfp->sfp_bus, &sfp->id); } @@ -652,7 +657,8 @@ static int sfp_module_info(struct sfp *sfp, struct ethtool_modinfo *modinfo) { /* locking... and check module is present */ - if (sfp->id.ext.sff8472_compliance) { + if (sfp->id.ext.sff8472_compliance && + !(sfp->id.ext.diagmon & SFP_DIAGMON_ADDRMODE)) { modinfo->type = ETH_MODULE_SFF_8472; modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN; } else { From 2012b7d6b2868c532f22d9172c8b24611637eb48 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 30 Nov 2017 13:59:26 +0000 Subject: [PATCH 312/333] phylink: ensure we take the link down when phylink_stop() is called Ensure that we tell the MAC to take the link down when phylink_stop() is called, and that this completes prior to phylink_stop() returns. Reported-by: Florian Fainelli Tested-by: Florian Fainelli Signed-off-by: Russell King Signed-off-by: David S. Miller --- drivers/net/phy/phylink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index e3bbc70372d3..5dc9668dde34 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -773,6 +773,7 @@ void phylink_stop(struct phylink *pl) sfp_upstream_stop(pl->sfp_bus); set_bit(PHYLINK_DISABLE_STOPPED, &pl->phylink_disable_state); + queue_work(system_power_efficient_wq, &pl->resolve); flush_work(&pl->resolve); } EXPORT_SYMBOL_GPL(phylink_stop); From 4db2b604c05afc3d2678fe01d3136c015df313ec Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 22 Nov 2017 11:47:28 +0100 Subject: [PATCH 313/333] move libgcc.h to include/linux Introducing a new include/lib directory just for this file totally messes up tab completion for include/linux, which is highly annoying. Move it to include/linux where we have headers for all kinds of other lib/ code as well. Signed-off-by: Christoph Hellwig Signed-off-by: Palmer Dabbelt --- include/{lib => linux}/libgcc.h | 0 lib/ashldi3.c | 2 +- lib/ashrdi3.c | 2 +- lib/cmpdi2.c | 2 +- lib/lshrdi3.c | 2 +- lib/muldi3.c | 2 +- lib/ucmpdi2.c | 2 +- 7 files changed, 6 insertions(+), 6 deletions(-) rename include/{lib => linux}/libgcc.h (100%) diff --git a/include/lib/libgcc.h b/include/linux/libgcc.h similarity index 100% rename from include/lib/libgcc.h rename to include/linux/libgcc.h diff --git a/lib/ashldi3.c b/lib/ashldi3.c index 1b6087db95a5..3ffc46e3bb6c 100644 --- a/lib/ashldi3.c +++ b/lib/ashldi3.c @@ -16,7 +16,7 @@ #include -#include +#include long long notrace __ashldi3(long long u, word_type b) { diff --git a/lib/ashrdi3.c b/lib/ashrdi3.c index 2e67c97ac65a..ea054550f0e8 100644 --- a/lib/ashrdi3.c +++ b/lib/ashrdi3.c @@ -16,7 +16,7 @@ #include -#include +#include long long notrace __ashrdi3(long long u, word_type b) { diff --git a/lib/cmpdi2.c b/lib/cmpdi2.c index 6d7ebf6c2b86..2250da7e503e 100644 --- a/lib/cmpdi2.c +++ b/lib/cmpdi2.c @@ -16,7 +16,7 @@ #include -#include +#include word_type notrace __cmpdi2(long long a, long long b) { diff --git a/lib/lshrdi3.c b/lib/lshrdi3.c index 8e845f4bb65f..99cfa5721f2d 100644 --- a/lib/lshrdi3.c +++ b/lib/lshrdi3.c @@ -17,7 +17,7 @@ */ #include -#include +#include long long notrace __lshrdi3(long long u, word_type b) { diff --git a/lib/muldi3.c b/lib/muldi3.c index 88938543e10a..54c8b3123376 100644 --- a/lib/muldi3.c +++ b/lib/muldi3.c @@ -15,7 +15,7 @@ */ #include -#include +#include #define W_TYPE_SIZE 32 diff --git a/lib/ucmpdi2.c b/lib/ucmpdi2.c index 49a53505c8e3..25ca2d4c1e19 100644 --- a/lib/ucmpdi2.c +++ b/lib/ucmpdi2.c @@ -15,7 +15,7 @@ */ #include -#include +#include word_type __ucmpdi2(unsigned long long a, unsigned long long b) { From da894ff100be9044c490f47f61541481b4f42b1f Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Tue, 28 Nov 2017 14:15:32 -0800 Subject: [PATCH 314/333] RISC-V: __io_writes should respect the length argument Whoops -- I must have just been being an idiot again. Thanks to Segher for finding the bug :). CC: Segher Boessenkool Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/io.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/io.h b/arch/riscv/include/asm/io.h index c1f32cfcc79b..507ee6a16e67 100644 --- a/arch/riscv/include/asm/io.h +++ b/arch/riscv/include/asm/io.h @@ -250,7 +250,7 @@ static inline u64 __raw_readq(const volatile void __iomem *addr) const ctype *buf = buffer; \ \ do { \ - __raw_writeq(*buf++, addr); \ + __raw_write ## len(*buf++, addr); \ } while (--count); \ } \ afence; \ From a7f3f939dd7d8398acebecd1ceb2e9e7ffbe91d2 Mon Sep 17 00:00:00 2001 From: Ray Jui Date: Fri, 1 Dec 2017 03:13:02 -0500 Subject: [PATCH 315/333] bnxt_en: Need to unconditionally shut down RoCE in bnxt_shutdown The current 'bnxt_shutdown' implementation only invokes 'bnxt_ulp_shutdown' to shut down RoCE in the case when the system is in the path of power off (SYSTEM_POWER_OFF). While this may work in most cases, it does not work in the smart NIC case, when Linux 'reboot' command is initiated from the Linux that runs on the ARM cores of the NIC card. In this particular case, Linux 'reboot' results in a system 'L3' level reset where the entire ARM and associated subsystems are being reset, but at the same time, Nitro core is being kept in sane state (to allow external PCIe connected servers to continue to work). Without properly shutting down RoCE and freeing all associated resources, it results in the ARM core to hang immediately after the 'reboot' By always invoking 'bnxt_ulp_shutdown' in 'bnxt_shutdown', it fixes the above issue Fixes: 0efd2fc65c92 ("bnxt_en: Add a callback to inform RDMA driver during PCI shutdown.") Signed-off-by: Ray Jui Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index c5c38d4b7d1c..7f173eb42aa2 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -8263,8 +8263,9 @@ static void bnxt_shutdown(struct pci_dev *pdev) if (netif_running(dev)) dev_close(dev); + bnxt_ulp_shutdown(bp); + if (system_state == SYSTEM_POWER_OFF) { - bnxt_ulp_shutdown(bp); bnxt_clear_int_mode(bp); pci_wake_from_d3(pdev, bp->wol); pci_set_power_state(pdev, PCI_D3hot); From c8fb7b8259c67b86cd93a71c85e78b34d2c96fdc Mon Sep 17 00:00:00 2001 From: Sunil Challa Date: Fri, 1 Dec 2017 03:13:03 -0500 Subject: [PATCH 316/333] bnxt_en: wildcard smac while creating tunnel decap filter While creating a decap filter the tunnel smac need not (and must not) be specified as we cannot ascertain the neighbor in the recv path. 'ttl' match is also not needed for the decap filter and must be wild-carded. Fixes: f484f6782e01 ("bnxt_en: add hwrm FW cmds for cfa_encap_record and decap_filter") Signed-off-by: Sunil Challa Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index d5031f436f83..96bff48af971 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -532,10 +532,8 @@ static int hwrm_cfa_decap_filter_alloc(struct bnxt *bp, } if (flow->flags & BNXT_TC_FLOW_FLAGS_TUNL_ETH_ADDRS) { - enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_DST_MACADDR | - CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_SRC_MACADDR; + enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_DST_MACADDR; ether_addr_copy(req.dst_macaddr, l2_info->dmac); - ether_addr_copy(req.src_macaddr, l2_info->smac); } if (l2_info->num_vlans) { enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_T_IVLAN_VID; @@ -1012,10 +1010,9 @@ static int bnxt_tc_get_decap_handle(struct bnxt *bp, struct bnxt_tc_flow *flow, if (rc) goto put_decap; - decap_key->ttl = tun_key.ttl; decap_l2_info = &decap_node->l2_info; + /* decap smac is wildcarded */ ether_addr_copy(decap_l2_info->dmac, l2_info.smac); - ether_addr_copy(decap_l2_info->smac, l2_info.dmac); if (l2_info.num_vlans) { decap_l2_info->num_vlans = l2_info.num_vlans; decap_l2_info->inner_vlan_tpid = l2_info.inner_vlan_tpid; From e9ecc731a87912d209d6e9b4ed20ed70451c08cb Mon Sep 17 00:00:00 2001 From: Sathya Perla Date: Fri, 1 Dec 2017 03:13:04 -0500 Subject: [PATCH 317/333] bnxt_en: fix dst/src fid for vxlan encap/decap actions For flows that involve a vxlan encap action, the vxlan sock interface may be specified as the outgoing interface. The driver must resolve the outgoing PF interface used by this socket and use the dst_fid of the PF in the hwrm_cfa_encap_record_alloc cmd. Similarily for flows that have a vxlan decap action, the fid of the incoming PF interface must be used as the src_fid in the hwrm_cfa_decap_filter_alloc cmd. Fixes: 8c95f773b4a3 ("bnxt_en: add support for Flower based vxlan encap/decap offload") Signed-off-by: Sathya Perla Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c | 48 +++++++++++--------- 1 file changed, 26 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index 96bff48af971..3d201d7324bd 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -56,7 +56,6 @@ static int bnxt_tc_parse_redir(struct bnxt *bp, { int ifindex = tcf_mirred_ifindex(tc_act); struct net_device *dev; - u16 dst_fid; dev = __dev_get_by_index(dev_net(bp->dev), ifindex); if (!dev) { @@ -64,15 +63,7 @@ static int bnxt_tc_parse_redir(struct bnxt *bp, return -EINVAL; } - /* find the FID from dev */ - dst_fid = bnxt_flow_get_dst_fid(bp, dev); - if (dst_fid == BNXT_FID_INVALID) { - netdev_info(bp->dev, "can't get fid for ifindex=%d", ifindex); - return -EINVAL; - } - actions->flags |= BNXT_TC_ACTION_FLAG_FWD; - actions->dst_fid = dst_fid; actions->dst_dev = dev; return 0; } @@ -160,13 +151,17 @@ static int bnxt_tc_parse_actions(struct bnxt *bp, if (rc) return rc; - /* Tunnel encap/decap action must be accompanied by a redirect action */ - if ((actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP || - actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_DECAP) && - !(actions->flags & BNXT_TC_ACTION_FLAG_FWD)) { - netdev_info(bp->dev, - "error: no redir action along with encap/decap"); - return -EINVAL; + if (actions->flags & BNXT_TC_ACTION_FLAG_FWD) { + if (actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP) { + /* dst_fid is PF's fid */ + actions->dst_fid = bp->pf.fw_fid; + } else { + /* find the FID from dst_dev */ + actions->dst_fid = + bnxt_flow_get_dst_fid(bp, actions->dst_dev); + if (actions->dst_fid == BNXT_FID_INVALID) + return -EINVAL; + } } return rc; @@ -899,10 +894,10 @@ static void bnxt_tc_put_decap_handle(struct bnxt *bp, static int bnxt_tc_resolve_tunnel_hdrs(struct bnxt *bp, struct ip_tunnel_key *tun_key, - struct bnxt_tc_l2_key *l2_info, - struct net_device *real_dst_dev) + struct bnxt_tc_l2_key *l2_info) { #ifdef CONFIG_INET + struct net_device *real_dst_dev = bp->dev; struct flowi4 flow = { {0} }; struct net_device *dst_dev; struct neighbour *nbr; @@ -1006,7 +1001,7 @@ static int bnxt_tc_get_decap_handle(struct bnxt *bp, struct bnxt_tc_flow *flow, */ tun_key.u.ipv4.dst = flow->tun_key.u.ipv4.src; tun_key.tp_dst = flow->tun_key.tp_dst; - rc = bnxt_tc_resolve_tunnel_hdrs(bp, &tun_key, &l2_info, bp->dev); + rc = bnxt_tc_resolve_tunnel_hdrs(bp, &tun_key, &l2_info); if (rc) goto put_decap; @@ -1092,8 +1087,7 @@ static int bnxt_tc_get_encap_handle(struct bnxt *bp, struct bnxt_tc_flow *flow, if (encap_node->tunnel_handle != INVALID_TUNNEL_HANDLE) goto done; - rc = bnxt_tc_resolve_tunnel_hdrs(bp, encap_key, &encap_node->l2_info, - flow->actions.dst_dev); + rc = bnxt_tc_resolve_tunnel_hdrs(bp, encap_key, &encap_node->l2_info); if (rc) goto put_encap; @@ -1166,6 +1160,15 @@ static int __bnxt_tc_del_flow(struct bnxt *bp, return 0; } +static void bnxt_tc_set_src_fid(struct bnxt *bp, struct bnxt_tc_flow *flow, + u16 src_fid) +{ + if (flow->actions.flags & BNXT_TC_ACTION_FLAG_TUNNEL_DECAP) + flow->src_fid = bp->pf.fw_fid; + else + flow->src_fid = src_fid; +} + /* Add a new flow or replace an existing flow. * Notes on locking: * There are essentially two critical sections here. @@ -1201,7 +1204,8 @@ static int bnxt_tc_add_flow(struct bnxt *bp, u16 src_fid, rc = bnxt_tc_parse_flow(bp, tc_flow_cmd, flow); if (rc) goto free_node; - flow->src_fid = src_fid; + + bnxt_tc_set_src_fid(bp, flow, src_fid); if (!bnxt_tc_can_offload(bp, flow)) { rc = -ENOSPC; From ebd5818cc5d4847897d7fe872e2d9799d7b7fcbb Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Fri, 1 Dec 2017 03:13:05 -0500 Subject: [PATCH 318/333] bnxt_en: Fix a variable scoping in bnxt_hwrm_do_send_msg() short_input variable is assigned to another data pointer which is referred out of its scope. Fix it by moving short_input definition to the beginning of bnxt_hwrm_do_send_msg() function. No failure has been reported so far due to this issue. Fixes: e605db801bde ("bnxt_en: Support for Short Firmware Message") Signed-off-by: Vasundhara Volam Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 7f173eb42aa2..28f5e94274ee 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -3368,6 +3368,7 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len, u16 cp_ring_id, len = 0; struct hwrm_err_output *resp = bp->hwrm_cmd_resp_addr; u16 max_req_len = BNXT_HWRM_MAX_REQ_LEN; + struct hwrm_short_input short_input = {0}; req->seq_id = cpu_to_le16(bp->hwrm_cmd_seq++); memset(resp, 0, PAGE_SIZE); @@ -3376,7 +3377,6 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len, if (bp->flags & BNXT_FLAG_SHORT_CMD) { void *short_cmd_req = bp->hwrm_short_cmd_req_addr; - struct hwrm_short_input short_input = {0}; memcpy(short_cmd_req, req, msg_len); memset(short_cmd_req + msg_len, 0, BNXT_HWRM_MAX_REQ_LEN - From 6e474083f3daf3a3546737f5d7d502ad12eb257c Mon Sep 17 00:00:00 2001 From: Wei Xu Date: Fri, 1 Dec 2017 05:10:36 -0500 Subject: [PATCH 319/333] vhost: fix skb leak in handle_rx() Matthew found a roughly 40% tcp throughput regression with commit c67df11f(vhost_net: try batch dequing from skb array) as discussed in the following thread: https://www.mail-archive.com/netdev@vger.kernel.org/msg187936.html Eventually we figured out that it was a skb leak in handle_rx() when sending packets to the VM. This usually happens when a guest can not drain out vq as fast as vhost fills in, afterwards it sets off the traffic jam and leaks skb(s) which occurs as no headcount to send on the vq from vhost side. This can be avoided by making sure we have got enough headcount before actually consuming a skb from the batched rx array while transmitting, which is simply done by moving checking the zero headcount a bit ahead. Signed-off-by: Wei Xu Reported-by: Matthew Rosato Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/vhost/net.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 8d626d7c2e7e..c7bdeb655646 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -778,16 +778,6 @@ static void handle_rx(struct vhost_net *net) /* On error, stop handling until the next kick. */ if (unlikely(headcount < 0)) goto out; - if (nvq->rx_array) - msg.msg_control = vhost_net_buf_consume(&nvq->rxq); - /* On overrun, truncate and discard */ - if (unlikely(headcount > UIO_MAXIOV)) { - iov_iter_init(&msg.msg_iter, READ, vq->iov, 1, 1); - err = sock->ops->recvmsg(sock, &msg, - 1, MSG_DONTWAIT | MSG_TRUNC); - pr_debug("Discarded rx packet: len %zd\n", sock_len); - continue; - } /* OK, now we need to know about added descriptors. */ if (!headcount) { if (unlikely(vhost_enable_notify(&net->dev, vq))) { @@ -800,6 +790,16 @@ static void handle_rx(struct vhost_net *net) * they refilled. */ goto out; } + if (nvq->rx_array) + msg.msg_control = vhost_net_buf_consume(&nvq->rxq); + /* On overrun, truncate and discard */ + if (unlikely(headcount > UIO_MAXIOV)) { + iov_iter_init(&msg.msg_iter, READ, vq->iov, 1, 1); + err = sock->ops->recvmsg(sock, &msg, + 1, MSG_DONTWAIT | MSG_TRUNC); + pr_debug("Discarded rx packet: len %zd\n", sock_len); + continue; + } /* We don't need to be notified again. */ iov_iter_init(&msg.msg_iter, READ, vq->iov, in, vhost_len); fixup = msg.msg_iter; From c33ee15b3820a03cf8229ba9415084197b827f8c Mon Sep 17 00:00:00 2001 From: Wei Xu Date: Fri, 1 Dec 2017 05:10:37 -0500 Subject: [PATCH 320/333] tun: free skb in early errors tun_recvmsg() supports accepting skb by msg_control after commit ac77cfd4258f ("tun: support receiving skb through msg_control"), the skb if presented should be freed no matter how far it can go along, otherwise it would be leaked. This patch fixes several missed cases. Signed-off-by: Wei Xu Reported-by: Matthew Rosato Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/net/tun.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 95749006d687..4f4a842a1c9c 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1952,8 +1952,11 @@ static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile, tun_debug(KERN_INFO, tun, "tun_do_read\n"); - if (!iov_iter_count(to)) + if (!iov_iter_count(to)) { + if (skb) + kfree_skb(skb); return 0; + } if (!skb) { /* Read frames from ring */ @@ -2069,22 +2072,24 @@ static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len, { struct tun_file *tfile = container_of(sock, struct tun_file, socket); struct tun_struct *tun = tun_get(tfile); + struct sk_buff *skb = m->msg_control; int ret; - if (!tun) - return -EBADFD; + if (!tun) { + ret = -EBADFD; + goto out_free_skb; + } if (flags & ~(MSG_DONTWAIT|MSG_TRUNC|MSG_ERRQUEUE)) { ret = -EINVAL; - goto out; + goto out_put_tun; } if (flags & MSG_ERRQUEUE) { ret = sock_recv_errqueue(sock->sk, m, total_len, SOL_PACKET, TUN_TX_TIMESTAMP); goto out; } - ret = tun_do_read(tun, tfile, &m->msg_iter, flags & MSG_DONTWAIT, - m->msg_control); + ret = tun_do_read(tun, tfile, &m->msg_iter, flags & MSG_DONTWAIT, skb); if (ret > (ssize_t)total_len) { m->msg_flags |= MSG_TRUNC; ret = flags & MSG_TRUNC ? ret : total_len; @@ -2092,6 +2097,13 @@ static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len, out: tun_put(tun); return ret; + +out_put_tun: + tun_put(tun); +out_free_skb: + if (skb) + kfree_skb(skb); + return ret; } static int tun_peek_len(struct socket *sock) From 61d78537843e676e7f56ac6db333db0c0529b892 Mon Sep 17 00:00:00 2001 From: Wei Xu Date: Fri, 1 Dec 2017 05:10:38 -0500 Subject: [PATCH 321/333] tap: free skb if flags error tap_recvmsg() supports accepting skb by msg_control after commit 3b4ba04acca8 ("tap: support receiving skb from msg_control"), the skb if presented should be freed within the function, otherwise it would be leaked. Signed-off-by: Wei Xu Reported-by: Matthew Rosato Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/net/tap.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/net/tap.c b/drivers/net/tap.c index e9489b88407c..0a886fda0129 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -829,8 +829,11 @@ static ssize_t tap_do_read(struct tap_queue *q, DEFINE_WAIT(wait); ssize_t ret = 0; - if (!iov_iter_count(to)) + if (!iov_iter_count(to)) { + if (skb) + kfree_skb(skb); return 0; + } if (skb) goto put; @@ -1154,11 +1157,14 @@ static int tap_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len, int flags) { struct tap_queue *q = container_of(sock, struct tap_queue, sock); + struct sk_buff *skb = m->msg_control; int ret; - if (flags & ~(MSG_DONTWAIT|MSG_TRUNC)) + if (flags & ~(MSG_DONTWAIT|MSG_TRUNC)) { + if (skb) + kfree_skb(skb); return -EINVAL; - ret = tap_do_read(q, &m->msg_iter, flags & MSG_DONTWAIT, - m->msg_control); + } + ret = tap_do_read(q, &m->msg_iter, flags & MSG_DONTWAIT, skb); if (ret > total_len) { m->msg_flags |= MSG_TRUNC; ret = flags & MSG_TRUNC ? ret : total_len; From bc3ab70584696cb798b9e1e0ac8e6ced5fd4c3b8 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Fri, 1 Dec 2017 10:14:49 +0100 Subject: [PATCH 322/333] s390/qeth: fix thinko in IPv4 multicast address tracking Commit 5f78e29ceebf ("qeth: optimize IP handling in rx_mode callback") reworked how secondary addresses are managed for qeth devices. Instead of dropping & subsequently re-adding all addresses on every ndo_set_rx_mode() call, qeth now keeps track of the addresses that are currently registered with the HW. On a ndo_set_rx_mode(), we thus only need to do (de-)registration requests for the addresses that have actually changed. On L3 devices, the lookup for IPv4 Multicast addresses checks the wrong hashtable - and thus never finds a match. As a result, we first delete *all* such addresses, and then re-add them again. So each set_rx_mode() causes a short period where the IPv4 Multicast addresses are not registered, and the card stops forwarding inbound traffic for them. Fix this by setting the ->is_multicast flag on the lookup object, thus enabling qeth_l3_ip_from_hash() to search the correct hashtable and find a match there. Fixes: 5f78e29ceebf ("qeth: optimize IP handling in rx_mode callback") Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_l3_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index aadd384316a3..e79936b50698 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -1376,6 +1376,7 @@ qeth_l3_add_mc_to_hash(struct qeth_card *card, struct in_device *in4_dev) tmp->u.a4.addr = be32_to_cpu(im4->multiaddr); memcpy(tmp->mac, buf, sizeof(tmp->mac)); + tmp->is_multicast = 1; ipm = qeth_l3_ip_from_hash(card, tmp); if (ipm) { From 6d69b1f1eb7a2edf8a3547f361c61f2538e054bb Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Fri, 1 Dec 2017 10:14:50 +0100 Subject: [PATCH 323/333] s390/qeth: fix GSO throughput regression Using GSO with small MTUs currently results in a substantial throughput regression - which is caused by how qeth needs to map non-linear skbs into its IO buffer elements: compared to a linear skb, each GSO-segmented skb effectively consumes twice as many buffer elements (ie two instead of one) due to the additional header-only part. This causes the Output Queue to be congested with low-utilized IO buffers. Fix this as follows: If the MSS is low enough so that a non-SG GSO segmentation produces order-0 skbs (currently ~3500 byte), opt out from NETIF_F_SG. This is where we anticipate the biggest savings, since an SG-enabled GSO segmentation produces skbs that always consume at least two buffer elements. Larger MSS values continue to get a SG-enabled GSO segmentation, since 1) the relative overhead of the additional header-only buffer element becomes less noticeable, and 2) the linearization overhead increases. With the throughput regression fixed, re-enable NETIF_F_SG by default to reap the significant CPU savings of GSO. Fixes: 5722963a8e83 ("qeth: do not turn on SG per default") Reported-by: Nils Hoppmann Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core.h | 3 +++ drivers/s390/net/qeth_core_main.c | 31 +++++++++++++++++++++++++++++++ drivers/s390/net/qeth_l2_main.c | 2 ++ drivers/s390/net/qeth_l3_main.c | 2 ++ 4 files changed, 38 insertions(+) diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index 9cd569ef43ec..15015a24f8ad 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -987,6 +987,9 @@ struct qeth_cmd_buffer *qeth_get_setassparms_cmd(struct qeth_card *, int qeth_set_features(struct net_device *, netdev_features_t); void qeth_recover_features(struct net_device *dev); netdev_features_t qeth_fix_features(struct net_device *, netdev_features_t); +netdev_features_t qeth_features_check(struct sk_buff *skb, + struct net_device *dev, + netdev_features_t features); int qeth_vm_request_mac(struct qeth_card *card); int qeth_push_hdr(struct sk_buff *skb, struct qeth_hdr **hdr, unsigned int len); diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 49b9efeba1bd..d9b0e07d4fa7 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -19,6 +19,11 @@ #include #include #include +#include +#include +#include +#include + #include #include @@ -6438,6 +6443,32 @@ netdev_features_t qeth_fix_features(struct net_device *dev, } EXPORT_SYMBOL_GPL(qeth_fix_features); +netdev_features_t qeth_features_check(struct sk_buff *skb, + struct net_device *dev, + netdev_features_t features) +{ + /* GSO segmentation builds skbs with + * a (small) linear part for the headers, and + * page frags for the data. + * Compared to a linear skb, the header-only part consumes an + * additional buffer element. This reduces buffer utilization, and + * hurts throughput. So compress small segments into one element. + */ + if (netif_needs_gso(skb, features)) { + /* match skb_segment(): */ + unsigned int doffset = skb->data - skb_mac_header(skb); + unsigned int hsize = skb_shinfo(skb)->gso_size; + unsigned int hroom = skb_headroom(skb); + + /* linearize only if resulting skb allocations are order-0: */ + if (SKB_DATA_ALIGN(hroom + doffset + hsize) <= SKB_MAX_HEAD(0)) + features &= ~NETIF_F_SG; + } + + return vlan_features_check(skb, features); +} +EXPORT_SYMBOL_GPL(qeth_features_check); + static int __init qeth_core_init(void) { int rc; diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index d2537c09126d..85162712d207 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -960,6 +960,7 @@ static const struct net_device_ops qeth_l2_netdev_ops = { .ndo_stop = qeth_l2_stop, .ndo_get_stats = qeth_get_stats, .ndo_start_xmit = qeth_l2_hard_start_xmit, + .ndo_features_check = qeth_features_check, .ndo_validate_addr = eth_validate_addr, .ndo_set_rx_mode = qeth_l2_set_rx_mode, .ndo_do_ioctl = qeth_do_ioctl, @@ -1010,6 +1011,7 @@ static int qeth_l2_setup_netdev(struct qeth_card *card) if (card->info.type == QETH_CARD_TYPE_OSD && !card->info.guestlan) { card->dev->hw_features = NETIF_F_SG; card->dev->vlan_features = NETIF_F_SG; + card->dev->features |= NETIF_F_SG; /* OSA 3S and earlier has no RX/TX support */ if (qeth_is_supported(card, IPA_OUTBOUND_CHECKSUM)) { card->dev->hw_features |= NETIF_F_IP_CSUM; diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index e79936b50698..46a841258fc8 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -2918,6 +2918,7 @@ static const struct net_device_ops qeth_l3_osa_netdev_ops = { .ndo_stop = qeth_l3_stop, .ndo_get_stats = qeth_get_stats, .ndo_start_xmit = qeth_l3_hard_start_xmit, + .ndo_features_check = qeth_features_check, .ndo_validate_addr = eth_validate_addr, .ndo_set_rx_mode = qeth_l3_set_multicast_list, .ndo_do_ioctl = qeth_do_ioctl, @@ -2958,6 +2959,7 @@ static int qeth_l3_setup_netdev(struct qeth_card *card) card->dev->vlan_features = NETIF_F_SG | NETIF_F_RXCSUM | NETIF_F_IP_CSUM | NETIF_F_TSO; + card->dev->features |= NETIF_F_SG; } } } else if (card->info.type == QETH_CARD_TYPE_IQD) { From 0cbff6d4546613330a1c5f139f5c368e4ce33ca1 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Fri, 1 Dec 2017 10:14:51 +0100 Subject: [PATCH 324/333] s390/qeth: build max size GSO skbs on L2 devices The current GSO skb size limit was copy&pasted over from the L3 path, where it is needed due to a TSO limitation. As L2 devices don't offer TSO support (and thus all GSO skbs are segmented before they reach the driver), there's no reason to restrict the stack in how large it may build the GSO skbs. Fixes: d52aec97e5bc ("qeth: enable scatter/gather in layer 2 mode") Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_l2_main.c | 2 -- drivers/s390/net/qeth_l3_main.c | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 85162712d207..f21c94810373 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -1030,8 +1030,6 @@ static int qeth_l2_setup_netdev(struct qeth_card *card) card->info.broadcast_capable = 1; qeth_l2_request_initial_mac(card); - card->dev->gso_max_size = (QETH_MAX_BUFFER_ELEMENTS(card) - 1) * - PAGE_SIZE; SET_NETDEV_DEV(card->dev, &card->gdev->dev); netif_napi_add(card->dev, &card->napi, qeth_poll, QETH_NAPI_WEIGHT); netif_carrier_off(card->dev); diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 46a841258fc8..2a25f20566d8 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -2987,8 +2987,8 @@ static int qeth_l3_setup_netdev(struct qeth_card *card) NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER; netif_keep_dst(card->dev); - card->dev->gso_max_size = (QETH_MAX_BUFFER_ELEMENTS(card) - 1) * - PAGE_SIZE; + netif_set_gso_max_size(card->dev, (QETH_MAX_BUFFER_ELEMENTS(card) - 1) * + PAGE_SIZE); SET_NETDEV_DEV(card->dev, &card->gdev->dev); netif_napi_add(card->dev, &card->napi, qeth_poll, QETH_NAPI_WEIGHT); From a98a4ebc8c61d20f0150d6be66e0e65223a347af Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Fri, 1 Dec 2017 09:58:42 +0800 Subject: [PATCH 325/333] ipvlan: Add the skb->mark as flow4's member to lookup route Current codes don't use skb->mark to assign flowi4_mark, it would make the policy route rule with fwmark doesn't work as expected. Signed-off-by: Gao Feng Signed-off-by: David S. Miller --- drivers/net/ipvlan/ipvlan_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index 11c1e7950fe5..77cc4fbaeace 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -393,6 +393,7 @@ static int ipvlan_process_v4_outbound(struct sk_buff *skb) .flowi4_oif = dev->ifindex, .flowi4_tos = RT_TOS(ip4h->tos), .flowi4_flags = FLOWI_FLAG_ANYSRC, + .flowi4_mark = skb->mark, .daddr = ip4h->daddr, .saddr = ip4h->saddr, }; From 45ab4b13e46325d00f4acdb365d406e941a15f81 Mon Sep 17 00:00:00 2001 From: Lars Persson Date: Fri, 1 Dec 2017 11:12:44 +0100 Subject: [PATCH 326/333] stmmac: reset last TSO segment size after device open The mss variable tracks the last max segment size sent to the TSO engine. We do not update the hardware as long as we receive skb:s with the same value in gso_size. During a network device down/up cycle (mapped to stmmac_release() and stmmac_open() callbacks) we issue a reset to the hardware and it forgets the setting for mss. However we did not zero out our mss variable so the next transmission of a gso packet happens with an undefined hardware setting. This triggers a hang in the TSO engine and eventuelly the netdev watchdog will bark. Fixes: f748be531d70 ("stmmac: support new GMAC4") Signed-off-by: Lars Persson Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index f63c2ddced3c..d7250539d0bd 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2588,6 +2588,7 @@ static int stmmac_open(struct net_device *dev) priv->dma_buf_sz = STMMAC_ALIGN(buf_sz); priv->rx_copybreak = STMMAC_RX_COPYBREAK; + priv->mss = 0; ret = alloc_dma_desc_resources(priv); if (ret < 0) { From 886afc1dc489436bf2c4fadf0f3aecacd7269234 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 1 Dec 2017 12:38:11 +0000 Subject: [PATCH 327/333] liquidio: fix incorrect indentation of assignment statement Remove one extraneous level of indentation on assignment statement. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/liquidio/lio_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index 6aa0eee88ea5..a5eecd895a82 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -1113,7 +1113,7 @@ static int liquidio_watchdog(void *param) dev_err(&oct->pci_dev->dev, "ERROR: Octeon core %d crashed or got stuck! See oct-fwdump for details.\n", core); - err_msg_was_printed[core] = true; + err_msg_was_printed[core] = true; } } From c501256406fb19c306504ee1fe41a4ea208d4245 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 1 Dec 2017 11:09:53 +0000 Subject: [PATCH 328/333] rxrpc: Use correct netns source in rxrpc_release_sock() In rxrpc_release_sock() there may be no rx->local value to access, so we can't unconditionally follow it to the rxrpc network namespace information to poke the connection reapers. Instead, use the socket's namespace pointer to find the namespace. This unfixed code causes the following static checker warning: net/rxrpc/af_rxrpc.c:898 rxrpc_release_sock() error: we previously assumed 'rx->local' could be null (see line 887) Fixes: 3d18cbb7fd0c ("rxrpc: Fix conn expiry timers") Reported-by: Dan Carpenter Signed-off-by: David Howells Signed-off-by: David S. Miller --- net/rxrpc/af_rxrpc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 8f7cf4c042be..dcd818fa837e 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -860,6 +860,7 @@ static void rxrpc_sock_destructor(struct sock *sk) static int rxrpc_release_sock(struct sock *sk) { struct rxrpc_sock *rx = rxrpc_sk(sk); + struct rxrpc_net *rxnet = rxrpc_net(sock_net(&rx->sk)); _enter("%p{%d,%d}", sk, sk->sk_state, refcount_read(&sk->sk_refcnt)); @@ -895,8 +896,8 @@ static int rxrpc_release_sock(struct sock *sk) rxrpc_release_calls_on_socket(rx); flush_workqueue(rxrpc_workqueue); rxrpc_purge_queue(&sk->sk_receive_queue); - rxrpc_queue_work(&rx->local->rxnet->service_conn_reaper); - rxrpc_queue_work(&rx->local->rxnet->client_conn_reaper); + rxrpc_queue_work(&rxnet->service_conn_reaper); + rxrpc_queue_work(&rxnet->client_conn_reaper); rxrpc_put_local(rx->local); rx->local = NULL; From bcd1d601e5cc760bf5743a59e4716603490e281c Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 1 Dec 2017 11:10:37 +0000 Subject: [PATCH 329/333] rxrpc: Fix the MAINTAINERS record Fix the MAINTAINERS record so that it's more obvious who the maintainer for AF_RXRPC is. Reported-by: Joe Perches Reported-by: David Miller Signed-off-by: David Howells Signed-off-by: David S. Miller --- MAINTAINERS | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 77d819b458a9..511b858405bc 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -554,13 +554,13 @@ S: Orphan F: Documentation/filesystems/affs.txt F: fs/affs/ -AFS FILESYSTEM & AF_RXRPC SOCKET DOMAIN +AFS FILESYSTEM M: David Howells L: linux-afs@lists.infradead.org S: Supported F: fs/afs/ -F: include/net/af_rxrpc.h -F: net/rxrpc/af_rxrpc.c +F: include/trace/events/afs.h +F: Documentation/filesystems/afs.txt W: https://www.infradead.org/~dhowells/kafs/ AGPGART DRIVER @@ -11777,6 +11777,18 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/jes/linux.git rtl8xxxu-deve S: Maintained F: drivers/net/wireless/realtek/rtl8xxxu/ +RXRPC SOCKETS (AF_RXRPC) +M: David Howells +L: linux-afs@lists.infradead.org +S: Supported +F: net/rxrpc/ +F: include/keys/rxrpc-type.h +F: include/net/af_rxrpc.h +F: include/trace/events/rxrpc.h +F: include/uapi/linux/rxrpc.h +F: Documentation/networking/rxrpc.txt +W: https://www.infradead.org/~dhowells/kafs/ + S3 SAVAGE FRAMEBUFFER DRIVER M: Antonino Daplas L: linux-fbdev@vger.kernel.org From ae64f9bd1d3621b5e60d7363bc20afb46aede215 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 3 Dec 2017 11:01:47 -0500 Subject: [PATCH 330/333] Linux 4.15-rc2 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index f761bf475ba5..c988e46a53cd 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 4 PATCHLEVEL = 15 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Fearless Coyote # *DOCUMENTATION* From eeea10b83a139451130df1594f26710c8fa390c8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 3 Dec 2017 09:32:59 -0800 Subject: [PATCH 331/333] tcp: add tcp_v4_fill_cb()/tcp_v4_restore_cb() James Morris reported kernel stack corruption bug [1] while running the SELinux testsuite, and bisected to a recent commit bffa72cf7f9d ("net: sk_buff rbnode reorg") We believe this commit is fine, but exposes an older bug. SELinux code runs from tcp_filter() and might send an ICMP, expecting IP options to be found in skb->cb[] using regular IPCB placement. We need to defer TCP mangling of skb->cb[] after tcp_filter() calls. This patch adds tcp_v4_fill_cb()/tcp_v4_restore_cb() in a very similar way we added them for IPv6. [1] [ 339.806024] SELinux: failure in selinux_parse_skb(), unable to parse packet [ 339.822505] Kernel panic - not syncing: stack-protector: Kernel stack is corrupted in: ffffffff81745af5 [ 339.822505] [ 339.852250] CPU: 4 PID: 3642 Comm: client Not tainted 4.15.0-rc1-test #15 [ 339.868498] Hardware name: LENOVO 10FGS0VA1L/30BC, BIOS FWKT68A 01/19/2017 [ 339.885060] Call Trace: [ 339.896875] [ 339.908103] dump_stack+0x63/0x87 [ 339.920645] panic+0xe8/0x248 [ 339.932668] ? ip_push_pending_frames+0x33/0x40 [ 339.946328] ? icmp_send+0x525/0x530 [ 339.958861] ? kfree_skbmem+0x60/0x70 [ 339.971431] __stack_chk_fail+0x1b/0x20 [ 339.984049] icmp_send+0x525/0x530 [ 339.996205] ? netlbl_skbuff_err+0x36/0x40 [ 340.008997] ? selinux_netlbl_err+0x11/0x20 [ 340.021816] ? selinux_socket_sock_rcv_skb+0x211/0x230 [ 340.035529] ? security_sock_rcv_skb+0x3b/0x50 [ 340.048471] ? sk_filter_trim_cap+0x44/0x1c0 [ 340.061246] ? tcp_v4_inbound_md5_hash+0x69/0x1b0 [ 340.074562] ? tcp_filter+0x2c/0x40 [ 340.086400] ? tcp_v4_rcv+0x820/0xa20 [ 340.098329] ? ip_local_deliver_finish+0x71/0x1a0 [ 340.111279] ? ip_local_deliver+0x6f/0xe0 [ 340.123535] ? ip_rcv_finish+0x3a0/0x3a0 [ 340.135523] ? ip_rcv_finish+0xdb/0x3a0 [ 340.147442] ? ip_rcv+0x27c/0x3c0 [ 340.158668] ? inet_del_offload+0x40/0x40 [ 340.170580] ? __netif_receive_skb_core+0x4ac/0x900 [ 340.183285] ? rcu_accelerate_cbs+0x5b/0x80 [ 340.195282] ? __netif_receive_skb+0x18/0x60 [ 340.207288] ? process_backlog+0x95/0x140 [ 340.218948] ? net_rx_action+0x26c/0x3b0 [ 340.230416] ? __do_softirq+0xc9/0x26a [ 340.241625] ? do_softirq_own_stack+0x2a/0x40 [ 340.253368] [ 340.262673] ? do_softirq+0x50/0x60 [ 340.273450] ? __local_bh_enable_ip+0x57/0x60 [ 340.285045] ? ip_finish_output2+0x175/0x350 [ 340.296403] ? ip_finish_output+0x127/0x1d0 [ 340.307665] ? nf_hook_slow+0x3c/0xb0 [ 340.318230] ? ip_output+0x72/0xe0 [ 340.328524] ? ip_fragment.constprop.54+0x80/0x80 [ 340.340070] ? ip_local_out+0x35/0x40 [ 340.350497] ? ip_queue_xmit+0x15c/0x3f0 [ 340.361060] ? __kmalloc_reserve.isra.40+0x31/0x90 [ 340.372484] ? __skb_clone+0x2e/0x130 [ 340.382633] ? tcp_transmit_skb+0x558/0xa10 [ 340.393262] ? tcp_connect+0x938/0xad0 [ 340.403370] ? ktime_get_with_offset+0x4c/0xb0 [ 340.414206] ? tcp_v4_connect+0x457/0x4e0 [ 340.424471] ? __inet_stream_connect+0xb3/0x300 [ 340.435195] ? inet_stream_connect+0x3b/0x60 [ 340.445607] ? SYSC_connect+0xd9/0x110 [ 340.455455] ? __audit_syscall_entry+0xaf/0x100 [ 340.466112] ? syscall_trace_enter+0x1d0/0x2b0 [ 340.476636] ? __audit_syscall_exit+0x209/0x290 [ 340.487151] ? SyS_connect+0xe/0x10 [ 340.496453] ? do_syscall_64+0x67/0x1b0 [ 340.506078] ? entry_SYSCALL64_slow_path+0x25/0x25 Fixes: 971f10eca186 ("tcp: better TCP_SKB_CB layout to reduce cache line misses") Signed-off-by: Eric Dumazet Reported-by: James Morris Tested-by: James Morris Tested-by: Casey Schaufler Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 59 ++++++++++++++++++++++++++++++--------------- net/ipv6/tcp_ipv6.c | 10 +++++--- 2 files changed, 46 insertions(+), 23 deletions(-) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index c6bc0c4d19c6..77ea45da0fe9 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1591,6 +1591,34 @@ int tcp_filter(struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(tcp_filter); +static void tcp_v4_restore_cb(struct sk_buff *skb) +{ + memmove(IPCB(skb), &TCP_SKB_CB(skb)->header.h4, + sizeof(struct inet_skb_parm)); +} + +static void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph, + const struct tcphdr *th) +{ + /* This is tricky : We move IPCB at its correct location into TCP_SKB_CB() + * barrier() makes sure compiler wont play fool^Waliasing games. + */ + memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb), + sizeof(struct inet_skb_parm)); + barrier(); + + TCP_SKB_CB(skb)->seq = ntohl(th->seq); + TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + + skb->len - th->doff * 4); + TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); + TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th); + TCP_SKB_CB(skb)->tcp_tw_isn = 0; + TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph); + TCP_SKB_CB(skb)->sacked = 0; + TCP_SKB_CB(skb)->has_rxtstamp = + skb->tstamp || skb_hwtstamps(skb)->hwtstamp; +} + /* * From tcp_input.c */ @@ -1631,24 +1659,6 @@ int tcp_v4_rcv(struct sk_buff *skb) th = (const struct tcphdr *)skb->data; iph = ip_hdr(skb); - /* This is tricky : We move IPCB at its correct location into TCP_SKB_CB() - * barrier() makes sure compiler wont play fool^Waliasing games. - */ - memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb), - sizeof(struct inet_skb_parm)); - barrier(); - - TCP_SKB_CB(skb)->seq = ntohl(th->seq); - TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + - skb->len - th->doff * 4); - TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); - TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th); - TCP_SKB_CB(skb)->tcp_tw_isn = 0; - TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph); - TCP_SKB_CB(skb)->sacked = 0; - TCP_SKB_CB(skb)->has_rxtstamp = - skb->tstamp || skb_hwtstamps(skb)->hwtstamp; - lookup: sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source, th->dest, sdif, &refcounted); @@ -1679,14 +1689,19 @@ process: sock_hold(sk); refcounted = true; nsk = NULL; - if (!tcp_filter(sk, skb)) + if (!tcp_filter(sk, skb)) { + th = (const struct tcphdr *)skb->data; + iph = ip_hdr(skb); + tcp_v4_fill_cb(skb, iph, th); nsk = tcp_check_req(sk, skb, req, false); + } if (!nsk) { reqsk_put(req); goto discard_and_relse; } if (nsk == sk) { reqsk_put(req); + tcp_v4_restore_cb(skb); } else if (tcp_child_process(sk, nsk, skb)) { tcp_v4_send_reset(nsk, skb); goto discard_and_relse; @@ -1712,6 +1727,7 @@ process: goto discard_and_relse; th = (const struct tcphdr *)skb->data; iph = ip_hdr(skb); + tcp_v4_fill_cb(skb, iph, th); skb->dev = NULL; @@ -1742,6 +1758,8 @@ no_tcp_socket: if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) goto discard_it; + tcp_v4_fill_cb(skb, iph, th); + if (tcp_checksum_complete(skb)) { csum_error: __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS); @@ -1768,6 +1786,8 @@ do_time_wait: goto discard_it; } + tcp_v4_fill_cb(skb, iph, th); + if (tcp_checksum_complete(skb)) { inet_twsk_put(inet_twsk(sk)); goto csum_error; @@ -1784,6 +1804,7 @@ do_time_wait: if (sk2) { inet_twsk_deschedule_put(inet_twsk(sk)); sk = sk2; + tcp_v4_restore_cb(skb); refcounted = false; goto process; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index be11dc13aa70..1f04ec0e4a7a 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1454,7 +1454,6 @@ process: struct sock *nsk; sk = req->rsk_listener; - tcp_v6_fill_cb(skb, hdr, th); if (tcp_v6_inbound_md5_hash(sk, skb)) { sk_drops_add(sk, skb); reqsk_put(req); @@ -1467,8 +1466,12 @@ process: sock_hold(sk); refcounted = true; nsk = NULL; - if (!tcp_filter(sk, skb)) + if (!tcp_filter(sk, skb)) { + th = (const struct tcphdr *)skb->data; + hdr = ipv6_hdr(skb); + tcp_v6_fill_cb(skb, hdr, th); nsk = tcp_check_req(sk, skb, req, false); + } if (!nsk) { reqsk_put(req); goto discard_and_relse; @@ -1492,8 +1495,6 @@ process: if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_and_relse; - tcp_v6_fill_cb(skb, hdr, th); - if (tcp_v6_inbound_md5_hash(sk, skb)) goto discard_and_relse; @@ -1501,6 +1502,7 @@ process: goto discard_and_relse; th = (const struct tcphdr *)skb->data; hdr = ipv6_hdr(skb); + tcp_v6_fill_cb(skb, hdr, th); skb->dev = NULL; From b4d1605a8ea608fd7dc45b926a05d75d340bde4b Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sun, 3 Dec 2017 09:33:00 -0800 Subject: [PATCH 332/333] tcp: use IPCB instead of TCP_SKB_CB in inet_exact_dif_match() After this fix : ("tcp: add tcp_v4_fill_cb()/tcp_v4_restore_cb()"), socket lookups happen while skb->cb[] has not been mangled yet by TCP. Fixes: a04a480d4392 ("net: Require exact match for TCP socket lookups if dif is l3mdev") Signed-off-by: David Ahern Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 4e09398009c1..6998707e81f3 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -844,12 +844,11 @@ static inline int tcp_v6_sdif(const struct sk_buff *skb) } #endif -/* TCP_SKB_CB reference means this can not be used from early demux */ static inline bool inet_exact_dif_match(struct net *net, struct sk_buff *skb) { #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) if (!net->ipv4.sysctl_tcp_l3mdev_accept && - skb && ipv4_l3mdev_skb(TCP_SKB_CB(skb)->header.h4.flags)) + skb && ipv4_l3mdev_skb(IPCB(skb)->flags)) return true; #endif return false; From 8ee5ad1d4c0ba93f96d0db31e98102e61ff7d12b Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Mon, 4 Dec 2017 10:52:40 -0500 Subject: [PATCH 333/333] arch/tile: mark as orphaned The chip family of TILEPro and TILE-Gx was developed by Tilera, which was eventually acquired by Mellanox. The tile architecture was added to the kernel in 2010 and first appeared in 2.6.36. Now at Mellanox we are developing new chips based on the ARM64 architecture; our last TILE-Gx chip (the Gx72) was released in 2013, and our customers using tile architecture products are not, as far as we know, looking to upgrade to newer kernel releases. In the absence of someone in the community stepping up to take over maintainership, this commit marks the architecture as orphaned. Cc: Chris Metcalf Signed-off-by: Chris Metcalf Signed-off-by: Linus Torvalds --- MAINTAINERS | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index d4fdcb12616c..603d49e877a9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13647,10 +13647,8 @@ F: drivers/net/wireless/ti/ F: include/linux/wl12xx.h TILE ARCHITECTURE -M: Chris Metcalf W: http://www.mellanox.com/repository/solutions/tile-scm/ -T: git git://git.kernel.org/pub/scm/linux/kernel/git/cmetcalf/linux-tile.git -S: Supported +S: Orphan F: arch/tile/ F: drivers/char/tile-srom.c F: drivers/edac/tile_edac.c