From d76bb7a09bb3b8711077912f3e80cfcf39cd9d0b Mon Sep 17 00:00:00 2001 From: Len Brown Date: Wed, 27 May 2020 00:38:38 -0400 Subject: [PATCH 001/710] tools/power turbostat: Print /dev/cpu_dma_latency Users are puzzled when they use tuned performance and all their C-states vanish. Dump /dev/cpu_dma_latency and state whether the value is default, or constraining, to explain this situation. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 28 +++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 33b370865d16..4c679568fda4 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -4698,6 +4698,32 @@ unsigned int intel_model_duplicates(unsigned int model) } return model; } + +void print_dev_latency(void) +{ + char *path = "/dev/cpu_dma_latency"; + int fd; + int value; + int retval; + + fd = open(path, O_RDONLY); + if (fd < 0) { + warn("fopen %s\n", path); + return; + } + + retval = read(fd, (void *)&value, sizeof(int)); + if (retval != sizeof(int)) { + warn("read %s\n", path); + close(fd); + return; + } + fprintf(outf, "/dev/cpu_dma_latency: %d usec (%s)\n", + value, value == 2000000000 ? "default" : "constrained"); + + close(fd); +} + void process_cpuid() { unsigned int eax, ebx, ecx, edx; @@ -4966,6 +4992,8 @@ void process_cpuid() if (!quiet) dump_cstate_pstate_config_info(family, model); + if (!quiet) + print_dev_latency(); if (!quiet) dump_sysfs_cstate_config(); if (!quiet) From 9aefc2cda6353f48708415d9adc5dff4deb73412 Mon Sep 17 00:00:00 2001 From: Doug Smythies Date: Thu, 26 Mar 2020 13:36:37 -0700 Subject: [PATCH 002/710] tools/power turbostat: Always print idle in the system configuration header If the --quiet option is not used, turbostat prints a useful system configuration header during startup. But inclusion of idle system configuration information in this header is currently a function of inclusion in the columns chosen to be displayed. Always list this idle system configuration. Signed-off-by: Doug Smythies Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 4c679568fda4..4edad3fc760a 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -3530,9 +3530,6 @@ dump_sysfs_cstate_config(void) int state; char *sp; - if (!DO_BIC(BIC_sysfs)) - return; - if (access("/sys/devices/system/cpu/cpuidle", R_OK)) { fprintf(outf, "cpuidle not loaded\n"); return; From 7c2ccc507bd44d17227930181f937b2066565349 Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Sat, 18 Apr 2020 16:31:47 +0800 Subject: [PATCH 003/710] tools/power turbostat: Make the energy variable to be 64 bit Change the energy variable from 32bit to 64bit, so that it can record long time duration. After this conversion, adjust the DELTA_WRAP32() accordingly. Signed-off-by: Chen Yu Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 30 ++++++++++++--------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 4edad3fc760a..29ec1018852d 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -211,12 +211,12 @@ struct pkg_data { long long gfx_rc6_ms; unsigned int gfx_mhz; unsigned int package_id; - unsigned int energy_pkg; /* MSR_PKG_ENERGY_STATUS */ - unsigned int energy_dram; /* MSR_DRAM_ENERGY_STATUS */ - unsigned int energy_cores; /* MSR_PP0_ENERGY_STATUS */ - unsigned int energy_gfx; /* MSR_PP1_ENERGY_STATUS */ - unsigned int rapl_pkg_perf_status; /* MSR_PKG_PERF_STATUS */ - unsigned int rapl_dram_perf_status; /* MSR_DRAM_PERF_STATUS */ + unsigned long long energy_pkg; /* MSR_PKG_ENERGY_STATUS */ + unsigned long long energy_dram; /* MSR_DRAM_ENERGY_STATUS */ + unsigned long long energy_cores; /* MSR_PP0_ENERGY_STATUS */ + unsigned long long energy_gfx; /* MSR_PP1_ENERGY_STATUS */ + unsigned long long rapl_pkg_perf_status; /* MSR_PKG_PERF_STATUS */ + unsigned long long rapl_dram_perf_status; /* MSR_DRAM_PERF_STATUS */ unsigned int pkg_temp_c; unsigned long long counter[MAX_ADDED_COUNTERS]; } *package_even, *package_odd; @@ -858,13 +858,13 @@ int dump_counters(struct thread_data *t, struct core_data *c, outp += sprintf(outp, "pc10: %016llX\n", p->pc10); outp += sprintf(outp, "cpu_lpi: %016llX\n", p->cpu_lpi); outp += sprintf(outp, "sys_lpi: %016llX\n", p->sys_lpi); - outp += sprintf(outp, "Joules PKG: %0X\n", p->energy_pkg); - outp += sprintf(outp, "Joules COR: %0X\n", p->energy_cores); - outp += sprintf(outp, "Joules GFX: %0X\n", p->energy_gfx); - outp += sprintf(outp, "Joules RAM: %0X\n", p->energy_dram); - outp += sprintf(outp, "Throttle PKG: %0X\n", + outp += sprintf(outp, "Joules PKG: %0llX\n", p->energy_pkg); + outp += sprintf(outp, "Joules COR: %0llX\n", p->energy_cores); + outp += sprintf(outp, "Joules GFX: %0llX\n", p->energy_gfx); + outp += sprintf(outp, "Joules RAM: %0llX\n", p->energy_dram); + outp += sprintf(outp, "Throttle PKG: %0llX\n", p->rapl_pkg_perf_status); - outp += sprintf(outp, "Throttle RAM: %0X\n", + outp += sprintf(outp, "Throttle RAM: %0llX\n", p->rapl_dram_perf_status); outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c); @@ -1210,11 +1210,7 @@ void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_ } #define DELTA_WRAP32(new, old) \ - if (new > old) { \ - old = new - old; \ - } else { \ - old = 0x100000000 + new - old; \ - } + old = ((((unsigned long long)new << 32) - ((unsigned long long)old << 32)) >> 32); int delta_package(struct pkg_data *new, struct pkg_data *old) From 87e15da95775a2ffb8c444e84f08ca982b758364 Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Sat, 18 Apr 2020 16:31:57 +0800 Subject: [PATCH 004/710] tools/power turbostat: Introduce functions to accumulate RAPL consumption Since the RAPL Joule Counter is 32 bit, turbostat would only print a *star* instead of printing the actual energy consumed to indicate the overflow due to long duration. This does not meet the requirement from servers as the sampling time of turbostat is usually very long on servers. So maintain a set of MSR buffer, and update them periodically before the 32bit MSR register is wrapped round, so as to avoid the overflow. The idea is similar to the implementation of ktime_get(): Periodical MSR timer: total_rapl_sum += (current_rapl_msr - last_rapl_msr); Using get_msr_sum() to get the accumulated RAPL: return (current_rapl_msr - last_rapl_msr) + total_rapl_sum; The accumulated RAPL mechanism will be turned on in next patch. Originally-by: Aaron Lu Reviewed-by: Doug Smythies Tested-by: Doug Smythies Signed-off-by: Chen Yu Signed-off-by: Len Brown --- tools/power/x86/turbostat/Makefile | 2 +- tools/power/x86/turbostat/turbostat.c | 224 +++++++++++++++++++++++++- 2 files changed, 219 insertions(+), 7 deletions(-) diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile index 2b6551269e43..d08765531bcb 100644 --- a/tools/power/x86/turbostat/Makefile +++ b/tools/power/x86/turbostat/Makefile @@ -16,7 +16,7 @@ override CFLAGS += -D_FORTIFY_SOURCE=2 %: %.c @mkdir -p $(BUILD_OUTPUT) - $(CC) $(CFLAGS) $< -o $(BUILD_OUTPUT)/$@ $(LDFLAGS) -lcap + $(CC) $(CFLAGS) $< -o $(BUILD_OUTPUT)/$@ $(LDFLAGS) -lcap -lrt .PHONY : clean clean : diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 29ec1018852d..c1759f6c84a8 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -259,6 +259,113 @@ struct msr_counter { #define SYSFS_PERCPU (1 << 1) }; +/* + * The accumulated sum of MSR is defined as a monotonic + * increasing MSR, it will be accumulated periodically, + * despite its register's bit width. + */ +enum { + IDX_PKG_ENERGY, + IDX_DRAM_ENERGY, + IDX_PP0_ENERGY, + IDX_PP1_ENERGY, + IDX_PKG_PERF, + IDX_DRAM_PERF, + IDX_COUNT, +}; + +int get_msr_sum(int cpu, off_t offset, unsigned long long *msr); + +struct msr_sum_array { + /* get_msr_sum() = sum + (get_msr() - last) */ + struct { + /*The accumulated MSR value is updated by the timer*/ + unsigned long long sum; + /*The MSR footprint recorded in last timer*/ + unsigned long long last; + } entries[IDX_COUNT]; +}; + +/* The percpu MSR sum array.*/ +struct msr_sum_array *per_cpu_msr_sum; + +int idx_to_offset(int idx) +{ + int offset; + + switch (idx) { + case IDX_PKG_ENERGY: + offset = MSR_PKG_ENERGY_STATUS; + break; + case IDX_DRAM_ENERGY: + offset = MSR_DRAM_ENERGY_STATUS; + break; + case IDX_PP0_ENERGY: + offset = MSR_PP0_ENERGY_STATUS; + break; + case IDX_PP1_ENERGY: + offset = MSR_PP1_ENERGY_STATUS; + break; + case IDX_PKG_PERF: + offset = MSR_PKG_PERF_STATUS; + break; + case IDX_DRAM_PERF: + offset = MSR_DRAM_PERF_STATUS; + break; + default: + offset = -1; + } + return offset; +} + +int offset_to_idx(int offset) +{ + int idx; + + switch (offset) { + case MSR_PKG_ENERGY_STATUS: + idx = IDX_PKG_ENERGY; + break; + case MSR_DRAM_ENERGY_STATUS: + idx = IDX_DRAM_ENERGY; + break; + case MSR_PP0_ENERGY_STATUS: + idx = IDX_PP0_ENERGY; + break; + case MSR_PP1_ENERGY_STATUS: + idx = IDX_PP1_ENERGY; + break; + case MSR_PKG_PERF_STATUS: + idx = IDX_PKG_PERF; + break; + case MSR_DRAM_PERF_STATUS: + idx = IDX_DRAM_PERF; + break; + default: + idx = -1; + } + return idx; +} + +int idx_valid(int idx) +{ + switch (idx) { + case IDX_PKG_ENERGY: + return do_rapl & RAPL_PKG; + case IDX_DRAM_ENERGY: + return do_rapl & RAPL_DRAM; + case IDX_PP0_ENERGY: + return do_rapl & RAPL_CORES_ENERGY_STATUS; + case IDX_PP1_ENERGY: + return do_rapl & RAPL_GFX; + case IDX_PKG_PERF: + return do_rapl & RAPL_PKG_PERF_STATUS; + case IDX_DRAM_PERF: + return do_rapl & RAPL_DRAM_PERF_STATUS; + default: + return 0; + } +} struct sys_counters { unsigned int added_thread_counters; unsigned int added_core_counters; @@ -1250,12 +1357,12 @@ delta_package(struct pkg_data *new, struct pkg_data *old) old->gfx_mhz = new->gfx_mhz; - DELTA_WRAP32(new->energy_pkg, old->energy_pkg); - DELTA_WRAP32(new->energy_cores, old->energy_cores); - DELTA_WRAP32(new->energy_gfx, old->energy_gfx); - DELTA_WRAP32(new->energy_dram, old->energy_dram); - DELTA_WRAP32(new->rapl_pkg_perf_status, old->rapl_pkg_perf_status); - DELTA_WRAP32(new->rapl_dram_perf_status, old->rapl_dram_perf_status); + old->energy_pkg = new->energy_pkg - old->energy_pkg; + old->energy_cores = new->energy_cores - old->energy_cores; + old->energy_gfx = new->energy_gfx - old->energy_gfx; + old->energy_dram = new->energy_dram - old->energy_dram; + old->rapl_pkg_perf_status = new->rapl_pkg_perf_status - old->rapl_pkg_perf_status; + old->rapl_dram_perf_status = new->rapl_dram_perf_status - old->rapl_dram_perf_status; for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { if (mp->format == FORMAT_RAW) @@ -3053,6 +3160,111 @@ void do_sleep(void) } } +int get_msr_sum(int cpu, off_t offset, unsigned long long *msr) +{ + int ret, idx; + unsigned long long msr_cur, msr_last; + + if (!per_cpu_msr_sum) + return 1; + + idx = offset_to_idx(offset); + if (idx < 0) + return idx; + /* get_msr_sum() = sum + (get_msr() - last) */ + ret = get_msr(cpu, offset, &msr_cur); + if (ret) + return ret; + msr_last = per_cpu_msr_sum[cpu].entries[idx].last; + DELTA_WRAP32(msr_cur, msr_last); + *msr = msr_last + per_cpu_msr_sum[cpu].entries[idx].sum; + + return 0; +} + +timer_t timerid; + +/* Timer callback, update the sum of MSRs periodically. */ +static int update_msr_sum(struct thread_data *t, struct core_data *c, struct pkg_data *p) +{ + int i, ret; + int cpu = t->cpu_id; + + for (i = IDX_PKG_ENERGY; i < IDX_COUNT; i++) { + unsigned long long msr_cur, msr_last; + int offset; + + if (!idx_valid(i)) + continue; + offset = idx_to_offset(i); + if (offset < 0) + continue; + ret = get_msr(cpu, offset, &msr_cur); + if (ret) { + fprintf(outf, "Can not update msr(0x%x)\n", offset); + continue; + } + + msr_last = per_cpu_msr_sum[cpu].entries[i].last; + per_cpu_msr_sum[cpu].entries[i].last = msr_cur & 0xffffffff; + + DELTA_WRAP32(msr_cur, msr_last); + per_cpu_msr_sum[cpu].entries[i].sum += msr_last; + } + return 0; +} + +static void +msr_record_handler(union sigval v) +{ + for_all_cpus(update_msr_sum, EVEN_COUNTERS); +} + +void msr_sum_record(void) +{ + struct itimerspec its; + struct sigevent sev; + + per_cpu_msr_sum = calloc(topo.max_cpu_num + 1, sizeof(struct msr_sum_array)); + if (!per_cpu_msr_sum) { + fprintf(outf, "Can not allocate memory for long time MSR.\n"); + return; + } + /* + * Signal handler might be restricted, so use thread notifier instead. + */ + memset(&sev, 0, sizeof(struct sigevent)); + sev.sigev_notify = SIGEV_THREAD; + sev.sigev_notify_function = msr_record_handler; + + sev.sigev_value.sival_ptr = &timerid; + if (timer_create(CLOCK_REALTIME, &sev, &timerid) == -1) { + fprintf(outf, "Can not create timer.\n"); + goto release_msr; + } + + its.it_value.tv_sec = 0; + its.it_value.tv_nsec = 1; + /* + * A wraparound time has been calculated early. + * Some sources state that the peak power for a + * microprocessor is usually 1.5 times the TDP rating, + * use 2 * TDP for safety. + */ + its.it_interval.tv_sec = rapl_joule_counter_range / 2; + its.it_interval.tv_nsec = 0; + + if (timer_settime(timerid, 0, &its, NULL) == -1) { + fprintf(outf, "Can not set timer.\n"); + goto release_timer; + } + return; + + release_timer: + timer_delete(timerid); + release_msr: + free(per_cpu_msr_sum); +} void turbostat_loop() { From 9972d5d84d76982606806b2ce887f70c2f8ba60a Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Sat, 18 Apr 2020 16:32:05 +0800 Subject: [PATCH 005/710] tools/power turbostat: Enable accumulate RAPL display Enable the accumulated RAPL display by default. Signed-off-by: Chen Yu Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 38 +++++++++++---------------- 1 file changed, 16 insertions(+), 22 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index c1759f6c84a8..66c468262020 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -1169,14 +1169,7 @@ int format_counters(struct thread_data *t, struct core_data *c, } } - /* - * If measurement interval exceeds minimum RAPL Joule Counter range, - * indicate that results are suspect by printing "**" in fraction place. - */ - if (interval_float < rapl_joule_counter_range) - fmt8 = "%s%.2f"; - else - fmt8 = "%6.0f**"; + fmt8 = "%s%.2f"; if (DO_BIC(BIC_CorWatt) && (do_rapl & RAPL_PER_CORE_ENERGY)) outp += sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units / interval_float); @@ -2069,39 +2062,39 @@ retry: p->sys_lpi = cpuidle_cur_sys_lpi_us; if (do_rapl & RAPL_PKG) { - if (get_msr(cpu, MSR_PKG_ENERGY_STATUS, &msr)) + if (get_msr_sum(cpu, MSR_PKG_ENERGY_STATUS, &msr)) return -13; - p->energy_pkg = msr & 0xFFFFFFFF; + p->energy_pkg = msr; } if (do_rapl & RAPL_CORES_ENERGY_STATUS) { - if (get_msr(cpu, MSR_PP0_ENERGY_STATUS, &msr)) + if (get_msr_sum(cpu, MSR_PP0_ENERGY_STATUS, &msr)) return -14; - p->energy_cores = msr & 0xFFFFFFFF; + p->energy_cores = msr; } if (do_rapl & RAPL_DRAM) { - if (get_msr(cpu, MSR_DRAM_ENERGY_STATUS, &msr)) + if (get_msr_sum(cpu, MSR_DRAM_ENERGY_STATUS, &msr)) return -15; - p->energy_dram = msr & 0xFFFFFFFF; + p->energy_dram = msr; } if (do_rapl & RAPL_GFX) { - if (get_msr(cpu, MSR_PP1_ENERGY_STATUS, &msr)) + if (get_msr_sum(cpu, MSR_PP1_ENERGY_STATUS, &msr)) return -16; - p->energy_gfx = msr & 0xFFFFFFFF; + p->energy_gfx = msr; } if (do_rapl & RAPL_PKG_PERF_STATUS) { - if (get_msr(cpu, MSR_PKG_PERF_STATUS, &msr)) + if (get_msr_sum(cpu, MSR_PKG_PERF_STATUS, &msr)) return -16; - p->rapl_pkg_perf_status = msr & 0xFFFFFFFF; + p->rapl_pkg_perf_status = msr; } if (do_rapl & RAPL_DRAM_PERF_STATUS) { - if (get_msr(cpu, MSR_DRAM_PERF_STATUS, &msr)) + if (get_msr_sum(cpu, MSR_DRAM_PERF_STATUS, &msr)) return -16; - p->rapl_dram_perf_status = msr & 0xFFFFFFFF; + p->rapl_dram_perf_status = msr; } if (do_rapl & RAPL_AMD_F17H) { - if (get_msr(cpu, MSR_PKG_ENERGY_STAT, &msr)) + if (get_msr_sum(cpu, MSR_PKG_ENERGY_STAT, &msr)) return -13; - p->energy_pkg = msr & 0xFFFFFFFF; + p->energy_pkg = msr; } if (DO_BIC(BIC_PkgTmp)) { if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr)) @@ -6101,6 +6094,7 @@ int main(int argc, char **argv) return 0; } + msr_sum_record(); /* * if any params left, it must be a command to fork */ From 8201a0285789fade1c5b031914577e2b27a64f05 Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Mon, 29 Jun 2020 15:26:57 -0400 Subject: [PATCH 006/710] tools/power turbostat: Use sched_getcpu() instead of hardcoded cpu 0 Disabling cpu 0 results in an error turbostat: /sys/devices/system/cpu/cpu0/topology/thread_siblings: open failed: No such file or directory Use sched_getcpu() instead of a hardcoded cpu 0 to get the max cpu number. Signed-off-by: Prarit Bhargava Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 66c468262020..151a70f2311b 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -2865,12 +2865,19 @@ void re_initialize(void) void set_max_cpu_num(void) { FILE *filep; + int base_cpu; unsigned long dummy; + char pathname[64]; + base_cpu = sched_getcpu(); + if (base_cpu < 0) + err(1, "cannot find calling cpu ID"); + sprintf(pathname, + "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", + base_cpu); + + filep = fopen_or_die(pathname, "r"); topo.max_cpu_num = 0; - filep = fopen_or_die( - "/sys/devices/system/cpu/cpu0/topology/thread_siblings", - "r"); while (fscanf(filep, "%lx,", &dummy) == 1) topo.max_cpu_num += BITMASK_SIZE; fclose(filep); From b88cad57d4d32bb5c53cd8e0ce3a1971062142af Mon Sep 17 00:00:00 2001 From: "Alexander A. Klimov" Date: Wed, 8 Jul 2020 12:55:30 +0200 Subject: [PATCH 007/710] tools/power turbostat: Replace HTTP links with HTTPS ones: TURBOSTAT UTILITY Rationale: Reduces attack surface on kernel devs opening the links for MITM as HTTPS traffic is much harder to manipulate. Deterministic algorithm: For each file: If not .svg: For each line: If doesn't contain `\bxmlns\b`: For each link, `\bhttp://[^# \t\r\n]*(?:\w|/)`: If neither `\bgnu\.org/license`, nor `\bmozilla\.org/MPL\b`: If both the HTTP and HTTPS versions return 200 OK and serve the same content: Replace HTTP with HTTPS. Signed-off-by: Alexander A. Klimov Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.8 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index a6db83a88e85..f6b7e85b121c 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -335,7 +335,7 @@ that they count at TSC rate, which is true on all processors tested to date. .SH REFERENCES Volume 3B: System Programming Guide" -http://www.intel.com/products/processor/manuals/ +https://www.intel.com/products/processor/manuals/ .SH FILES .ta From fecb3bc839df64761cc63c9ee9b45c1cad36aee8 Mon Sep 17 00:00:00 2001 From: David Arcari Date: Mon, 10 Aug 2020 10:43:30 -0400 Subject: [PATCH 008/710] tools/power turbostat: Fix output formatting for ACPI CST enumeration turbostat formatting is broken with ACPI CST for enumeration. The problem is that the CX_ACPI% is eight characters long which does not work with tab formatting. One simple solution is to remove the underbar from the state name such that C1_ACPI will be displayed as C1ACPI. Signed-off-by: David Arcari Cc: Len Brown Cc: linux-kernel@vger.kernel.org Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 151a70f2311b..56b93e0d9415 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -3682,6 +3682,20 @@ int has_config_tdp(unsigned int family, unsigned int model) } } +static void +remove_underbar(char *s) +{ + char *to = s; + + while (*s) { + if (*s != '_') + *to++ = *s; + s++; + } + + *to = 0; +} + static void dump_cstate_pstate_config_info(unsigned int family, unsigned int model) { @@ -3764,6 +3778,8 @@ dump_sysfs_cstate_config(void) *sp = '\0'; fclose(input); + remove_underbar(name_buf); + sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/desc", base_cpu, state); input = fopen(path, "r"); @@ -5830,6 +5846,8 @@ void probe_sysfs(void) *sp = '%'; *(sp + 1) = '\0'; + remove_underbar(name_buf); + fclose(input); sprintf(path, "cpuidle/state%d/time", state); @@ -5857,6 +5875,8 @@ void probe_sysfs(void) *sp = '\0'; fclose(input); + remove_underbar(name_buf); + sprintf(path, "cpuidle/state%d/usage", state); if (is_deferred_skip(name_buf)) From e7af1ed3fa4756e8df8270a8635d852a94266061 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Thu, 13 Aug 2020 19:06:03 -0400 Subject: [PATCH 009/710] tools/power turbostat: Support additional CPU model numbers Initial support for models recently added to intel-family.h. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 56b93e0d9415..4ee4e3067681 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -4906,6 +4906,9 @@ unsigned int intel_model_duplicates(unsigned int model) case INTEL_FAM6_ICELAKE_NNPI: case INTEL_FAM6_TIGERLAKE_L: case INTEL_FAM6_TIGERLAKE: + case INTEL_FAM6_ROCKETLAKE: + case INTEL_FAM6_LAKEFIELD: + case INTEL_FAM6_ALDERLAKE: return INTEL_FAM6_CANNONLAKE_L; case INTEL_FAM6_ATOM_TREMONT_D: @@ -4915,6 +4918,7 @@ unsigned int intel_model_duplicates(unsigned int model) return INTEL_FAM6_ATOM_TREMONT; case INTEL_FAM6_ICELAKE_X: + case INTEL_FAM6_SAPPHIRERAPIDS_X: return INTEL_FAM6_SKYLAKE_X; } return model; From c315a09b1b0f491c27d46e9d05f397023a44fb81 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Thu, 13 Aug 2020 19:18:22 -0400 Subject: [PATCH 010/710] tools/power turbostat: Skip pc8, pc9, pc10 columns, if they are disabled Like we skip PC3 and PC6 columns when the package C-state limit disables them, skip PC8/PC9/CP10 under analogous conditions. Reported-by: Zhang Rui Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 4ee4e3067681..72c0b19db36e 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -5186,9 +5186,12 @@ void process_cpuid() BIC_NOT_PRESENT(BIC_Pkgpc7); } if (has_c8910_msrs(family, model)) { - BIC_PRESENT(BIC_Pkgpc8); - BIC_PRESENT(BIC_Pkgpc9); - BIC_PRESENT(BIC_Pkgpc10); + if (pkg_cstate_limit >= PCL__8) + BIC_PRESENT(BIC_Pkgpc8); + if (pkg_cstate_limit >= PCL__9) + BIC_PRESENT(BIC_Pkgpc9); + if (pkg_cstate_limit >= PCL_10) + BIC_PRESENT(BIC_Pkgpc10); } do_irtl_hsw = has_c8910_msrs(family, model); if (has_skl_msrs(family, model)) { From 0936cdfbb527a4fa2559292069ebff2e8cf2c843 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Lyson=C4=9Bk?= Date: Fri, 27 Mar 2020 08:27:12 +0100 Subject: [PATCH 011/710] tools/power x86_energy_perf_policy: Input/output error in a VM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I've encountered an issue with x86_energy_perf_policy. If I run it on a machine that I'm told is a qemu-kvm virtual machine running inside a privileged container, I get the following error: x86_energy_perf_policy: /dev/cpu/0/msr offset 0x1ad read failed: Input/output error I get the same error in a Digital Ocean droplet, so that might be a similar environment. I created the following patch which is intended to give a more user-friendly message. It's based on a patch for turbostat from Prarit Bhargava that was posted some time ago. The patch is "[v2] turbostat: Running on virtual machine is not supported" [1]. Given my limited knowledge of the topic, I can't say with confidence that this is the right solution, though (that's why this is not an official patch submission). Also, I'm not sure what the convention with exit codes is in this tool. Also, instead of the error message, perhaps the tool should just not print anything in this case, which is how it behaves in a "regular" VM? [1] https://patchwork.kernel.org/patch/9868587/ Signed-off-by: Ondřej Lysoněk Signed-off-by: Len Brown --- .../x86_energy_perf_policy.c | 67 +++++++++++++++---- 1 file changed, 54 insertions(+), 13 deletions(-) diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c index 3fe1eed900d4..ff6c6661f075 100644 --- a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c +++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c @@ -622,6 +622,57 @@ void cmdline(int argc, char **argv) } } +/* + * Open a file, and exit on failure + */ +FILE *fopen_or_die(const char *path, const char *mode) +{ + FILE *filep = fopen(path, "r"); + + if (!filep) + err(1, "%s: open failed", path); + return filep; +} + +void err_on_hypervisor(void) +{ + FILE *cpuinfo; + char *flags, *hypervisor; + char *buffer; + + /* On VMs /proc/cpuinfo contains a "flags" entry for hypervisor */ + cpuinfo = fopen_or_die("/proc/cpuinfo", "ro"); + + buffer = malloc(4096); + if (!buffer) { + fclose(cpuinfo); + err(-ENOMEM, "buffer malloc fail"); + } + + if (!fread(buffer, 1024, 1, cpuinfo)) { + fclose(cpuinfo); + free(buffer); + err(1, "Reading /proc/cpuinfo failed"); + } + + flags = strstr(buffer, "flags"); + rewind(cpuinfo); + fseek(cpuinfo, flags - buffer, SEEK_SET); + if (!fgets(buffer, 4096, cpuinfo)) { + fclose(cpuinfo); + free(buffer); + err(1, "Reading /proc/cpuinfo failed"); + } + fclose(cpuinfo); + + hypervisor = strstr(buffer, "hypervisor"); + + free(buffer); + + if (hypervisor) + err(-1, + "not supported on this virtual machine"); +} int get_msr(int cpu, int offset, unsigned long long *msr) { @@ -635,8 +686,10 @@ int get_msr(int cpu, int offset, unsigned long long *msr) err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname); retval = pread(fd, msr, sizeof(*msr), offset); - if (retval != sizeof(*msr)) + if (retval != sizeof(*msr)) { + err_on_hypervisor(); err(-1, "%s offset 0x%llx read failed", pathname, (unsigned long long)offset); + } if (debug > 1) fprintf(stderr, "get_msr(cpu%d, 0x%X, 0x%llX)\n", cpu, offset, *msr); @@ -1086,18 +1139,6 @@ int update_cpu_msrs(int cpu) return 0; } -/* - * Open a file, and exit on failure - */ -FILE *fopen_or_die(const char *path, const char *mode) -{ - FILE *filep = fopen(path, "r"); - - if (!filep) - err(1, "%s: open failed", path); - return filep; -} - unsigned int get_pkg_num(int cpu) { FILE *fp; From b4b9156953fea108a9540c262e48eafeeff99ab0 Mon Sep 17 00:00:00 2001 From: Rafael Antognolli Date: Wed, 22 Apr 2020 15:02:07 -0700 Subject: [PATCH 012/710] tools/power turbostat: Add a new GFXAMHz column that exposes gt_act_freq_mhz. The column already present called GFXMHz reads from gt_cur_freq_mhz, which represents the GT frequency that was requested, but power management might not be able to do that. So the new column will display what the actual frequency GT is running at. Signed-off-by: Rafael Antognolli Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 50 +++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 72c0b19db36e..7a6e91aedf0f 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -79,6 +79,7 @@ unsigned long long gfx_cur_rc6_ms; unsigned long long cpuidle_cur_cpu_lpi_us; unsigned long long cpuidle_cur_sys_lpi_us; unsigned int gfx_cur_mhz; +unsigned int gfx_act_mhz; unsigned int tcc_activation_temp; unsigned int tcc_activation_temp_override; double rapl_power_units, rapl_time_units; @@ -210,6 +211,7 @@ struct pkg_data { unsigned long long pkg_both_core_gfxe_c0; long long gfx_rc6_ms; unsigned int gfx_mhz; + unsigned int gfx_act_mhz; unsigned int package_id; unsigned long long energy_pkg; /* MSR_PKG_ENERGY_STATUS */ unsigned long long energy_dram; /* MSR_DRAM_ENERGY_STATUS */ @@ -558,6 +560,7 @@ struct msr_counter bic[] = { { 0x0, "APIC" }, { 0x0, "X2APIC" }, { 0x0, "Die" }, + { 0x0, "GFXAMHz" }, }; #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter)) @@ -612,6 +615,7 @@ struct msr_counter bic[] = { #define BIC_APIC (1ULL << 48) #define BIC_X2APIC (1ULL << 49) #define BIC_Die (1ULL << 50) +#define BIC_GFXACTMHz (1ULL << 51) #define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC) @@ -831,6 +835,9 @@ void print_header(char *delim) if (DO_BIC(BIC_GFXMHz)) outp += sprintf(outp, "%sGFXMHz", (printed++ ? delim : "")); + if (DO_BIC(BIC_GFXACTMHz)) + outp += sprintf(outp, "%sGFXAMHz", (printed++ ? delim : "")); + if (DO_BIC(BIC_Totl_c0)) outp += sprintf(outp, "%sTotl%%C0", (printed++ ? delim : "")); if (DO_BIC(BIC_Any_c0)) @@ -1198,6 +1205,10 @@ int format_counters(struct thread_data *t, struct core_data *c, if (DO_BIC(BIC_GFXMHz)) outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_mhz); + /* GFXACTMHz */ + if (DO_BIC(BIC_GFXACTMHz)) + outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_act_mhz); + /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */ if (DO_BIC(BIC_Totl_c0)) outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0/tsc); @@ -1349,6 +1360,7 @@ delta_package(struct pkg_data *new, struct pkg_data *old) old->gfx_rc6_ms = new->gfx_rc6_ms - old->gfx_rc6_ms; old->gfx_mhz = new->gfx_mhz; + old->gfx_act_mhz = new->gfx_act_mhz; old->energy_pkg = new->energy_pkg - old->energy_pkg; old->energy_cores = new->energy_cores - old->energy_cores; @@ -1565,6 +1577,7 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data p->gfx_rc6_ms = 0; p->gfx_mhz = 0; + p->gfx_act_mhz = 0; for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) t->counter[i] = 0; @@ -1660,6 +1673,7 @@ int sum_counters(struct thread_data *t, struct core_data *c, average.packages.gfx_rc6_ms = p->gfx_rc6_ms; average.packages.gfx_mhz = p->gfx_mhz; + average.packages.gfx_act_mhz = p->gfx_act_mhz; average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c); @@ -2108,6 +2122,9 @@ retry: if (DO_BIC(BIC_GFXMHz)) p->gfx_mhz = gfx_cur_mhz; + if (DO_BIC(BIC_GFXACTMHz)) + p->gfx_act_mhz = gfx_act_mhz; + for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { if (get_mp(cpu, mp, &p->counter[i])) return -10; @@ -3018,6 +3035,33 @@ int snapshot_gfx_mhz(void) return 0; } +/* + * snapshot_gfx_cur_mhz() + * + * record snapshot of + * /sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz + * + * return 1 if config change requires a restart, else return 0 + */ +int snapshot_gfx_act_mhz(void) +{ + static FILE *fp; + int retval; + + if (fp == NULL) + fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", "r"); + else { + rewind(fp); + fflush(fp); + } + + retval = fscanf(fp, "%d", &gfx_act_mhz); + if (retval != 1) + err(1, "GFX ACT MHz"); + + return 0; +} + /* * snapshot_cpu_lpi() * @@ -3083,6 +3127,9 @@ int snapshot_proc_sysfs_files(void) if (DO_BIC(BIC_GFXMHz)) snapshot_gfx_mhz(); + if (DO_BIC(BIC_GFXACTMHz)) + snapshot_gfx_act_mhz(); + if (DO_BIC(BIC_CPU_LPI)) snapshot_cpu_lpi_us(); @@ -5236,6 +5283,9 @@ void process_cpuid() if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK)) BIC_PRESENT(BIC_GFXMHz); + if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", R_OK)) + BIC_PRESENT(BIC_GFXACTMHz); + if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", R_OK)) BIC_PRESENT(BIC_CPU_LPI); else From 20de0dab238849414d33c81bc96e2db68cc61467 Mon Sep 17 00:00:00 2001 From: Antti Laakso Date: Mon, 17 Aug 2020 18:03:48 +0300 Subject: [PATCH 013/710] tools/power turbostat: Remove empty columns for Jacobsville Jacobsville doesn't have Package C2 and C6. Also Core and DRAM RAPL are not available. Adjust output accordingly. Signed-off-by: Antti Laakso Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 33 ++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 7a6e91aedf0f..629b809075c1 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -2286,6 +2286,7 @@ int has_turbo_ratio_group_limits(int family, int model) case INTEL_FAM6_ATOM_GOLDMONT: case INTEL_FAM6_SKYLAKE_X: case INTEL_FAM6_ATOM_GOLDMONT_D: + case INTEL_FAM6_ATOM_TREMONT_D: return 1; } return 0; @@ -3534,6 +3535,7 @@ int probe_nhm_msrs(unsigned int family, unsigned int model) case INTEL_FAM6_ATOM_GOLDMONT_PLUS: case INTEL_FAM6_ATOM_GOLDMONT_D: /* DNV */ case INTEL_FAM6_ATOM_TREMONT: /* EHL */ + case INTEL_FAM6_ATOM_TREMONT_D: /* JVL */ pkg_cstate_limits = glm_pkg_cstate_limits; break; default: @@ -3616,6 +3618,17 @@ int is_ehl(unsigned int family, unsigned int model) } return 0; } +int is_jvl(unsigned int family, unsigned int model) +{ + if (!genuine_intel) + return 0; + + switch (model) { + case INTEL_FAM6_ATOM_TREMONT_D: + return 1; + } + return 0; +} int has_turbo_ratio_limit(unsigned int family, unsigned int model) { @@ -4227,6 +4240,14 @@ void rapl_probe_intel(unsigned int family, unsigned int model) BIC_PRESENT(BIC_GFXWatt); } break; + case INTEL_FAM6_ATOM_TREMONT_D: /* JVL */ + do_rapl = RAPL_PKG | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO; + BIC_PRESENT(BIC_PKG__); + if (rapl_joules) + BIC_PRESENT(BIC_Pkg_J); + else + BIC_PRESENT(BIC_PkgWatt); + break; case INTEL_FAM6_SKYLAKE_L: /* SKL */ case INTEL_FAM6_CANNONLAKE_L: /* CNL */ do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_GFX | RAPL_PKG_POWER_INFO; @@ -4629,6 +4650,7 @@ int has_snb_msrs(unsigned int family, unsigned int model) case INTEL_FAM6_ATOM_GOLDMONT_PLUS: case INTEL_FAM6_ATOM_GOLDMONT_D: /* DNV */ case INTEL_FAM6_ATOM_TREMONT: /* EHL */ + case INTEL_FAM6_ATOM_TREMONT_D: /* JVL */ return 1; } return 0; @@ -4958,9 +4980,6 @@ unsigned int intel_model_duplicates(unsigned int model) case INTEL_FAM6_ALDERLAKE: return INTEL_FAM6_CANNONLAKE_L; - case INTEL_FAM6_ATOM_TREMONT_D: - return INTEL_FAM6_ATOM_GOLDMONT_D; - case INTEL_FAM6_ATOM_TREMONT_L: return INTEL_FAM6_ATOM_TREMONT; @@ -5214,6 +5233,14 @@ void process_cpuid() BIC_PRESENT(BIC_Mod_c6); use_c1_residency_msr = 1; } + if (is_jvl(family, model)) { + BIC_NOT_PRESENT(BIC_CPU_c3); + BIC_NOT_PRESENT(BIC_CPU_c7); + BIC_NOT_PRESENT(BIC_Pkgpc2); + BIC_NOT_PRESENT(BIC_Pkgpc3); + BIC_NOT_PRESENT(BIC_Pkgpc6); + BIC_NOT_PRESENT(BIC_Pkgpc7); + } if (is_dnv(family, model)) { BIC_PRESENT(BIC_CPU_c1); BIC_NOT_PRESENT(BIC_CPU_c3); From 33eb82251af9be47a625ca1578f44e596a3a0ca9 Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Mon, 17 Aug 2020 17:42:15 -0500 Subject: [PATCH 014/710] tools/power turbostat: Support AMD Family 19h Family 19h processors have the same RAPL (Running average power limit) hardware register interface as Family 17h processors. Change the family checks to succeed for Family 17h and above to enable core and package energy measurement on Family 19h machines. Also update the TDP to the largest found at the bottom of the page at amd.com->processors->servers->epyc->2nd-gen-epyc, i.e., the EPYC 7H12. Signed-off-by: Kim Phillips Cc: Len Brown Cc: Len Brown Cc: linux-pm@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 34 +++++++++------------------ 1 file changed, 11 insertions(+), 23 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 629b809075c1..0869f791ed14 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -4162,13 +4162,8 @@ double get_tdp_intel(unsigned int model) double get_tdp_amd(unsigned int family) { - switch (family) { - case 0x17: - case 0x18: - default: - /* This is the max stock TDP of HEDT/Server Fam17h chips */ - return 250.0; - } + /* This is the max stock TDP of HEDT/Server Fam17h+ chips */ + return 280.0; } /* @@ -4358,27 +4353,20 @@ void rapl_probe_amd(unsigned int family, unsigned int model) if (max_extended_level >= 0x80000007) { __cpuid(0x80000007, eax, ebx, ecx, edx); - /* RAPL (Fam 17h) */ + /* RAPL (Fam 17h+) */ has_rapl = edx & (1 << 14); } - if (!has_rapl) + if (!has_rapl || family < 0x17) return; - switch (family) { - case 0x17: /* Zen, Zen+ */ - case 0x18: /* Hygon Dhyana */ - do_rapl = RAPL_AMD_F17H | RAPL_PER_CORE_ENERGY; - if (rapl_joules) { - BIC_PRESENT(BIC_Pkg_J); - BIC_PRESENT(BIC_Cor_J); - } else { - BIC_PRESENT(BIC_PkgWatt); - BIC_PRESENT(BIC_CorWatt); - } - break; - default: - return; + do_rapl = RAPL_AMD_F17H | RAPL_PER_CORE_ENERGY; + if (rapl_joules) { + BIC_PRESENT(BIC_Pkg_J); + BIC_PRESENT(BIC_Cor_J); + } else { + BIC_PRESENT(BIC_PkgWatt); + BIC_PRESENT(BIC_CorWatt); } if (get_msr(base_cpu, MSR_RAPL_PWR_UNIT, &msr)) From 4be61e6b769fc3f97b58870aa4258e27968f07e1 Mon Sep 17 00:00:00 2001 From: Alexander Monakov Date: Sun, 23 Aug 2020 23:27:02 +0300 Subject: [PATCH 015/710] tools/power turbostat: Build with _FILE_OFFSET_BITS=64 For compatibility reasons, Glibc off_t is a 32-bit type on 32-bit x86 unless _FILE_OFFSET_BITS=64 is defined. Add this define, as otherwise reading MSRs with index 0x80000000 and above attempts a pread with a negative offset, which fails. Signed-off-by: Alexander Monakov Tested-by: Liwei Song Signed-off-by: Len Brown --- tools/power/x86/turbostat/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile index d08765531bcb..f3e3c94ab9bd 100644 --- a/tools/power/x86/turbostat/Makefile +++ b/tools/power/x86/turbostat/Makefile @@ -12,6 +12,7 @@ turbostat : turbostat.c override CFLAGS += -O2 -Wall -I../../../include override CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/asm/msr-index.h"' override CFLAGS += -DINTEL_FAMILY_HEADER='"../../../../arch/x86/include/asm/intel-family.h"' +override CFLAGS += -D_FILE_OFFSET_BITS=64 override CFLAGS += -D_FORTIFY_SOURCE=2 %: %.c From 022fc5315b7aff69d3df2c953b892a6232642d50 Mon Sep 17 00:00:00 2001 From: Martijn van de Streek Date: Fri, 16 Oct 2020 08:38:05 +0200 Subject: [PATCH 016/710] HID: uclogic: Add ID for Trust Flex Design Tablet The Trust Flex Design Tablet has an UGTizer USB ID and requires the same initialization as the UGTizer GP0610 to be detected as a graphics tablet instead of a mouse. Signed-off-by: Martijn van de Streek Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 1 + drivers/hid/hid-uclogic-core.c | 2 ++ drivers/hid/hid-uclogic-params.c | 2 ++ 3 files changed, 5 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index d69842f79fc6..49d68bbf4852 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -1298,6 +1298,7 @@ #define USB_VENDOR_ID_UGTIZER 0x2179 #define USB_DEVICE_ID_UGTIZER_TABLET_GP0610 0x0053 +#define USB_DEVICE_ID_UGTIZER_TABLET_GT5040 0x0077 #define USB_VENDOR_ID_VIEWSONIC 0x0543 #define USB_DEVICE_ID_VIEWSONIC_PD1011 0xe621 diff --git a/drivers/hid/hid-uclogic-core.c b/drivers/hid/hid-uclogic-core.c index 86b568037cb8..8e9c9e646cb7 100644 --- a/drivers/hid/hid-uclogic-core.c +++ b/drivers/hid/hid-uclogic-core.c @@ -385,6 +385,8 @@ static const struct hid_device_id uclogic_devices[] = { USB_DEVICE_ID_UCLOGIC_DRAWIMAGE_G3) }, { HID_USB_DEVICE(USB_VENDOR_ID_UGTIZER, USB_DEVICE_ID_UGTIZER_TABLET_GP0610) }, + { HID_USB_DEVICE(USB_VENDOR_ID_UGTIZER, + USB_DEVICE_ID_UGTIZER_TABLET_GT5040) }, { HID_USB_DEVICE(USB_VENDOR_ID_UGEE, USB_DEVICE_ID_UGEE_TABLET_G5) }, { HID_USB_DEVICE(USB_VENDOR_ID_UGEE, diff --git a/drivers/hid/hid-uclogic-params.c b/drivers/hid/hid-uclogic-params.c index 7d20d1fcf8d2..d26d8cd98efc 100644 --- a/drivers/hid/hid-uclogic-params.c +++ b/drivers/hid/hid-uclogic-params.c @@ -997,6 +997,8 @@ int uclogic_params_init(struct uclogic_params *params, break; case VID_PID(USB_VENDOR_ID_UGTIZER, USB_DEVICE_ID_UGTIZER_TABLET_GP0610): + case VID_PID(USB_VENDOR_ID_UGTIZER, + USB_DEVICE_ID_UGTIZER_TABLET_GT5040): case VID_PID(USB_VENDOR_ID_UGEE, USB_DEVICE_ID_UGEE_XPPEN_TABLET_G540): case VID_PID(USB_VENDOR_ID_UGEE, From b59f38dbfd5d19eb7e03d8b639f0c0d385ba8cc5 Mon Sep 17 00:00:00 2001 From: Harry Cutts Date: Wed, 21 Oct 2020 06:56:12 -0700 Subject: [PATCH 017/710] HID: logitech-hidpp: Add PID for MX Anywhere 2 It seems that the PID 0x4072 was missing from the list Logitech gave me for this mouse, as I found one with it in the wild (with which I tested this patch). Fixes: 4435ff2f09a2 ("HID: logitech: Enable high-resolution scrolling on Logitech mice") Signed-off-by: Harry Cutts Acked-by: Peter Hutterer Signed-off-by: Jiri Kosina --- drivers/hid/hid-logitech-hidpp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c index b8b53dc95e86..730036650f7d 100644 --- a/drivers/hid/hid-logitech-hidpp.c +++ b/drivers/hid/hid-logitech-hidpp.c @@ -3947,6 +3947,7 @@ static const struct hid_device_id hidpp_devices[] = { LDJ_DEVICE(0x405e), .driver_data = HIDPP_QUIRK_HI_RES_SCROLL_X2121 }, { /* Mouse Logitech MX Anywhere 2 */ LDJ_DEVICE(0x404a), .driver_data = HIDPP_QUIRK_HI_RES_SCROLL_X2121 }, + { LDJ_DEVICE(0x4072), .driver_data = HIDPP_QUIRK_HI_RES_SCROLL_X2121 }, { LDJ_DEVICE(0xb013), .driver_data = HIDPP_QUIRK_HI_RES_SCROLL_X2121 }, { LDJ_DEVICE(0xb018), .driver_data = HIDPP_QUIRK_HI_RES_SCROLL_X2121 }, { LDJ_DEVICE(0xb01f), .driver_data = HIDPP_QUIRK_HI_RES_SCROLL_X2121 }, From 3c785a06dee99501a17f8e8cf29b2b7e3f1e94ea Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 19 Oct 2020 09:48:14 +0200 Subject: [PATCH 018/710] HID: ite: Replace ABS_MISC 120/121 events with touchpad on/off keypresses The usb-hid keyboard-dock for the Acer Switch 10 SW5-012 model declares an application and hid-usage page of 0x0088 for the INPUT(4) report which it sends. This reports contains 2 8-bit fields which are declared as HID_MAIN_ITEM_VARIABLE. The keyboard-touchpad combo never actually generates this report, except when the touchpad is toggled on/off with the Fn + F7 hotkey combo. The toggle on/off is handled inside the keyboard-dock, when the touchpad is toggled off it simply stops sending events. When the touchpad is toggled on/off an INPUT(4) report is generated with the first content byte set to 120/121, before this commit the kernel would report this as ABS_MISC 120/121 events. Patch the descriptor to replace the HID_MAIN_ITEM_VARIABLE with HID_MAIN_ITEM_RELATIVE (because no key-presss release events are send) and add mappings for the 0x00880078 and 0x00880079 usages to generate touchpad on/off key events when the touchpad is toggled on/off. Signed-off-by: Hans de Goede Signed-off-by: Jiri Kosina --- drivers/hid/hid-ite.c | 61 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 60 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-ite.c b/drivers/hid/hid-ite.c index 044a93f3c117..742c052b0110 100644 --- a/drivers/hid/hid-ite.c +++ b/drivers/hid/hid-ite.c @@ -11,6 +11,48 @@ #include "hid-ids.h" +#define QUIRK_TOUCHPAD_ON_OFF_REPORT BIT(0) + +static __u8 *ite_report_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int *rsize) +{ + unsigned long quirks = (unsigned long)hid_get_drvdata(hdev); + + if (quirks & QUIRK_TOUCHPAD_ON_OFF_REPORT) { + if (*rsize == 188 && rdesc[162] == 0x81 && rdesc[163] == 0x02) { + hid_info(hdev, "Fixing up ITE keyboard report descriptor\n"); + rdesc[163] = HID_MAIN_ITEM_RELATIVE; + } + } + + return rdesc; +} + +static int ite_input_mapping(struct hid_device *hdev, + struct hid_input *hi, struct hid_field *field, + struct hid_usage *usage, unsigned long **bit, + int *max) +{ + + unsigned long quirks = (unsigned long)hid_get_drvdata(hdev); + + if ((quirks & QUIRK_TOUCHPAD_ON_OFF_REPORT) && + (usage->hid & HID_USAGE_PAGE) == 0x00880000) { + if (usage->hid == 0x00880078) { + /* Touchpad on, userspace expects F22 for this */ + hid_map_usage_clear(hi, usage, bit, max, EV_KEY, KEY_F22); + return 1; + } + if (usage->hid == 0x00880079) { + /* Touchpad off, userspace expects F23 for this */ + hid_map_usage_clear(hi, usage, bit, max, EV_KEY, KEY_F23); + return 1; + } + return -1; + } + + return 0; +} + static int ite_event(struct hid_device *hdev, struct hid_field *field, struct hid_usage *usage, __s32 value) { @@ -37,13 +79,27 @@ static int ite_event(struct hid_device *hdev, struct hid_field *field, return 0; } +static int ite_probe(struct hid_device *hdev, const struct hid_device_id *id) +{ + int ret; + + hid_set_drvdata(hdev, (void *)id->driver_data); + + ret = hid_open_report(hdev); + if (ret) + return ret; + + return hid_hw_start(hdev, HID_CONNECT_DEFAULT); +} + static const struct hid_device_id ite_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_ITE, USB_DEVICE_ID_ITE8595) }, { HID_USB_DEVICE(USB_VENDOR_ID_258A, USB_DEVICE_ID_258A_6A88) }, /* ITE8595 USB kbd ctlr, with Synaptics touchpad connected to it. */ { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, USB_VENDOR_ID_SYNAPTICS, - USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_012) }, + USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_012), + .driver_data = QUIRK_TOUCHPAD_ON_OFF_REPORT }, /* ITE8910 USB kbd ctlr, with Synaptics touchpad connected to it. */ { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, USB_VENDOR_ID_SYNAPTICS, @@ -55,6 +111,9 @@ MODULE_DEVICE_TABLE(hid, ite_devices); static struct hid_driver ite_driver = { .name = "itetech", .id_table = ite_devices, + .probe = ite_probe, + .report_fixup = ite_report_fixup, + .input_mapping = ite_input_mapping, .event = ite_event, }; module_hid_driver(ite_driver); From 652f3d00de523a17b0cebe7b90debccf13aa8c31 Mon Sep 17 00:00:00 2001 From: Frank Yang Date: Fri, 21 Aug 2020 03:16:50 +0900 Subject: [PATCH 019/710] HID: cypress: Support Varmilo Keyboards' media hotkeys The Varmilo VA104M Keyboard (04b4:07b1, reported as Varmilo Z104M) exposes media control hotkeys as a USB HID consumer control device, but these keys do not work in the current (5.8-rc1) kernel due to the incorrect HID report descriptor. Fix the problem by modifying the internal HID report descriptor. More specifically, the keyboard report descriptor specifies the logical boundary as 572~10754 (0x023c ~ 0x2a02) while the usage boundary is specified as 0~10754 (0x00 ~ 0x2a02). This results in an incorrect interpretation of input reports, causing inputs to be ignored. By setting the Logical Minimum to zero, we align the logical boundary with the Usage ID boundary. Some notes: * There seem to be multiple variants of the VA104M keyboard. This patch specifically targets 04b4:07b1 variant. * The device works out-of-the-box on Windows platform with the generic consumer control device driver (hidserv.inf). This suggests that Windows either ignores the Logical Minimum/Logical Maximum or interprets the Usage ID assignment differently from the linux implementation; Maybe there are other devices out there that only works on Windows due to this problem? Signed-off-by: Frank Yang Signed-off-by: Jiri Kosina --- drivers/hid/hid-cypress.c | 44 ++++++++++++++++++++++++++++++++++----- drivers/hid/hid-ids.h | 2 ++ 2 files changed, 41 insertions(+), 5 deletions(-) diff --git a/drivers/hid/hid-cypress.c b/drivers/hid/hid-cypress.c index a50ba4a4a1d7..b88f889b3932 100644 --- a/drivers/hid/hid-cypress.c +++ b/drivers/hid/hid-cypress.c @@ -23,19 +23,17 @@ #define CP_2WHEEL_MOUSE_HACK 0x02 #define CP_2WHEEL_MOUSE_HACK_ON 0x04 +#define VA_INVAL_LOGICAL_BOUNDARY 0x08 + /* * Some USB barcode readers from cypress have usage min and usage max in * the wrong order */ -static __u8 *cp_report_fixup(struct hid_device *hdev, __u8 *rdesc, +static __u8 *cp_rdesc_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int *rsize) { - unsigned long quirks = (unsigned long)hid_get_drvdata(hdev); unsigned int i; - if (!(quirks & CP_RDESC_SWAPPED_MIN_MAX)) - return rdesc; - if (*rsize < 4) return rdesc; @@ -48,6 +46,40 @@ static __u8 *cp_report_fixup(struct hid_device *hdev, __u8 *rdesc, return rdesc; } +static __u8 *va_logical_boundary_fixup(struct hid_device *hdev, __u8 *rdesc, + unsigned int *rsize) +{ + /* + * Varmilo VA104M (with VID Cypress and device ID 07B1) incorrectly + * reports Logical Minimum of its Consumer Control device as 572 + * (0x02 0x3c). Fix this by setting its Logical Minimum to zero. + */ + if (*rsize == 25 && + rdesc[0] == 0x05 && rdesc[1] == 0x0c && + rdesc[2] == 0x09 && rdesc[3] == 0x01 && + rdesc[6] == 0x19 && rdesc[7] == 0x00 && + rdesc[11] == 0x16 && rdesc[12] == 0x3c && rdesc[13] == 0x02) { + hid_info(hdev, + "fixing up varmilo VA104M consumer control report descriptor\n"); + rdesc[12] = 0x00; + rdesc[13] = 0x00; + } + return rdesc; +} + +static __u8 *cp_report_fixup(struct hid_device *hdev, __u8 *rdesc, + unsigned int *rsize) +{ + unsigned long quirks = (unsigned long)hid_get_drvdata(hdev); + + if (quirks & CP_RDESC_SWAPPED_MIN_MAX) + rdesc = cp_rdesc_fixup(hdev, rdesc, rsize); + if (quirks & VA_INVAL_LOGICAL_BOUNDARY) + rdesc = va_logical_boundary_fixup(hdev, rdesc, rsize); + + return rdesc; +} + static int cp_input_mapped(struct hid_device *hdev, struct hid_input *hi, struct hid_field *field, struct hid_usage *usage, unsigned long **bit, int *max) @@ -128,6 +160,8 @@ static const struct hid_device_id cp_devices[] = { .driver_data = CP_RDESC_SWAPPED_MIN_MAX }, { HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_MOUSE), .driver_data = CP_2WHEEL_MOUSE_HACK }, + { HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_VARMILO_VA104M_07B1), + .driver_data = VA_INVAL_LOGICAL_BOUNDARY }, { } }; MODULE_DEVICE_TABLE(hid, cp_devices); diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 49d68bbf4852..81d99c47c2e5 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -331,6 +331,8 @@ #define USB_DEVICE_ID_CYPRESS_BARCODE_4 0xed81 #define USB_DEVICE_ID_CYPRESS_TRUETOUCH 0xc001 +#define USB_DEVICE_ID_CYPRESS_VARMILO_VA104M_07B1 0X07b1 + #define USB_VENDOR_ID_DATA_MODUL 0x7374 #define USB_VENDOR_ID_DATA_MODUL_EASYMAXTOUCH 0x1201 From 1811977cb11354aef8cbd13e35ff50db716728a4 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Wed, 30 Sep 2020 22:52:31 +0200 Subject: [PATCH 020/710] HID: add support for Sega Saturn MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This device needs HID_QUIRK_MULTI_INPUT in order to be presented to userspace in a consistent way. Reported-and-tested-by: David Gámiz Jiménez Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 1 + drivers/hid/hid-quirks.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 81d99c47c2e5..28ecd635edc1 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -487,6 +487,7 @@ #define USB_DEVICE_ID_PENPOWER 0x00f4 #define USB_VENDOR_ID_GREENASIA 0x0e8f +#define USB_DEVICE_ID_GREENASIA_DUAL_SAT_ADAPTOR 0x3010 #define USB_DEVICE_ID_GREENASIA_DUAL_USB_JOYPAD 0x3013 #define USB_VENDOR_ID_GRETAGMACBETH 0x0971 diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c index 7a2be0205dfd..b154e11d43bf 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c @@ -83,6 +83,7 @@ static const struct hid_device_id hid_quirks[] = { { HID_USB_DEVICE(USB_VENDOR_ID_FORMOSA, USB_DEVICE_ID_FORMOSA_IR_RECEIVER), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_FREESCALE, USB_DEVICE_ID_FREESCALE_MX28), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_FUTABA, USB_DEVICE_ID_LED_DISPLAY), HID_QUIRK_NO_INIT_REPORTS }, + { HID_USB_DEVICE(USB_VENDOR_ID_GREENASIA, USB_DEVICE_ID_GREENASIA_DUAL_SAT_ADAPTOR), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_GREENASIA, USB_DEVICE_ID_GREENASIA_DUAL_USB_JOYPAD), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_HAPP, USB_DEVICE_ID_UGCI_DRIVING), HID_QUIRK_BADPAD | HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_HAPP, USB_DEVICE_ID_UGCI_FIGHTING), HID_QUIRK_BADPAD | HID_QUIRK_MULTI_INPUT }, From 6ff7cb371c4bea3dba03a56d774da925e78a5087 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Tue, 29 Sep 2020 17:28:42 -0400 Subject: [PATCH 021/710] tools/power turbostat: adjust for temperature offset cpu1: MSR_IA32_TEMPERATURE_TARGET: 0x05640000 (95 C) (100 default - 5 offset) Account for the new "offset" field in MSR_TEMPERATURE_TARGET. While this field is usually zero, ignoring it results in over-stating the current temperature, both per-core and per-package. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 62 +++++++++++++-------------- 1 file changed, 29 insertions(+), 33 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 0869f791ed14..4122468fb73b 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -4785,12 +4785,33 @@ double discover_bclk(unsigned int family, unsigned int model) * below this value, including the Digital Thermal Sensor (DTS), * Package Thermal Management Sensor (PTM), and thermal event thresholds. */ -int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p) +int read_tcc_activation_temp() { unsigned long long msr; - unsigned int target_c_local; - int cpu; + unsigned int tcc, target_c, offset_c; + /* Temperature Target MSR is Nehalem and newer only */ + if (!do_nhm_platform_info) + return 0; + + if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr)) + return 0; + + target_c = (msr >> 16) & 0xFF; + + offset_c = (msr >> 24) & 0xF; + + tcc = target_c - offset_c; + + if (!quiet) + fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C) (%d default - %d offset)\n", + base_cpu, msr, tcc, target_c, offset_c); + + return tcc; +} + +int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p) +{ /* tcc_activation_temp is used only for dts or ptm */ if (!(do_dts || do_ptm)) return 0; @@ -4799,43 +4820,18 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) return 0; - cpu = t->cpu_id; - if (cpu_migrate(cpu)) { - fprintf(outf, "Could not migrate to CPU %d\n", cpu); - return -1; - } - if (tcc_activation_temp_override != 0) { tcc_activation_temp = tcc_activation_temp_override; - fprintf(outf, "cpu%d: Using cmdline TCC Target (%d C)\n", - cpu, tcc_activation_temp); + fprintf(outf, "Using cmdline TCC Target (%d C)\n", tcc_activation_temp); return 0; } - /* Temperature Target MSR is Nehalem and newer only */ - if (!do_nhm_platform_info) - goto guess; + tcc_activation_temp = read_tcc_activation_temp(); + if (tcc_activation_temp) + return 0; - if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr)) - goto guess; - - target_c_local = (msr >> 16) & 0xFF; - - if (!quiet) - fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n", - cpu, msr, target_c_local); - - if (!target_c_local) - goto guess; - - tcc_activation_temp = target_c_local; - - return 0; - -guess: tcc_activation_temp = TJMAX_DEFAULT; - fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n", - cpu, tcc_activation_temp); + fprintf(outf, "Guessing tjMax %d C, Please use -T to specify\n", tcc_activation_temp); return 0; } From 3d7772ea5602b88c7c7f0a50d512171a2eed6659 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Wed, 30 Sep 2020 20:58:15 -0400 Subject: [PATCH 022/710] tools/power turbostat: harden against cpu hotplug turbostat tends to get confused when CPUs are added and removed while it is running. There are races, such as checking the current cpu, and then reading a sysfs file that depends on that cpu number. Close the two issues that seem to come up the most. First, there is an infinite reset loop detector -- change that to allow more resets before giving up. Secondly, one of those file reads didn't really need to exit the program on failure... Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 4122468fb73b..74d2fc6fc78a 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -1894,7 +1894,7 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) int i; if (cpu_migrate(cpu)) { - fprintf(outf, "Could not migrate to CPU %d\n", cpu); + fprintf(outf, "get_counters: Could not migrate to CPU %d\n", cpu); return -1; } @@ -2764,7 +2764,12 @@ int get_thread_siblings(struct cpu_topology *thiscpu) sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", cpu); - filep = fopen_or_die(path, "r"); + filep = fopen(path, "r"); + + if (!filep) { + warnx("%s: open failed", path); + return -1; + } do { offset -= BITMASK_SIZE; if (fscanf(filep, "%lx%c", &map, &character) != 2) @@ -2877,7 +2882,7 @@ void re_initialize(void) { free_all_buffers(); setup_all_buffers(); - printf("turbostat: re-initialized with num_cpus %d\n", topo.num_cpus); + fprintf(outf, "turbostat: re-initialized with num_cpus %d\n", topo.num_cpus); } void set_max_cpu_num(void) @@ -3331,7 +3336,7 @@ restart: if (retval < -1) { exit(retval); } else if (retval == -1) { - if (restarted > 1) { + if (restarted > 10) { exit(retval); } re_initialize(); @@ -3926,7 +3931,7 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p) return 0; if (cpu_migrate(cpu)) { - fprintf(outf, "Could not migrate to CPU %d\n", cpu); + fprintf(outf, "print_epb: Could not migrate to CPU %d\n", cpu); return -1; } @@ -3970,7 +3975,7 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p) return 0; if (cpu_migrate(cpu)) { - fprintf(outf, "Could not migrate to CPU %d\n", cpu); + fprintf(outf, "print_hwp: Could not migrate to CPU %d\n", cpu); return -1; } @@ -4058,7 +4063,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data return 0; if (cpu_migrate(cpu)) { - fprintf(outf, "Could not migrate to CPU %d\n", cpu); + fprintf(outf, "print_perf_limit: Could not migrate to CPU %d\n", cpu); return -1; } @@ -4439,7 +4444,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p return 0; if (cpu_migrate(cpu)) { - fprintf(outf, "Could not migrate to CPU %d\n", cpu); + fprintf(outf, "print_thermal: Could not migrate to CPU %d\n", cpu); return -1; } @@ -4511,7 +4516,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) cpu = t->cpu_id; if (cpu_migrate(cpu)) { - fprintf(outf, "Could not migrate to CPU %d\n", cpu); + fprintf(outf, "print_rapl: Could not migrate to CPU %d\n", cpu); return -1; } From fbc81ec5b85d43a4b22e49ec0e643fa7dec2ea40 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 3 Oct 2020 17:28:27 +0200 Subject: [PATCH 023/710] efi/arm: set HSCTLR Thumb2 bit correctly for HVC calls from HYP Commit db227c19e68db353 ("ARM: 8985/1: efi/decompressor: deal with HYP mode boot gracefully") updated the EFI entry code to permit firmware to invoke the EFI stub loader in HYP mode, with the MMU either enabled or disabled, neither of which is permitted by the EFI spec, but which does happen in the field. In the MMU on case, we remain in HYP mode as configured by the firmware, and rely on the fact that any HVC instruction issued in this mode will be dispatched via the SVC slot in the HYP vector table. However, this slot will point to a Thumb2 symbol if the kernel is built in Thumb2 mode, and so we have to configure HSCTLR to ensure that the exception handlers are invoked in Thumb2 mode as well. Fixes: db227c19e68db353 ("ARM: 8985/1: efi/decompressor: deal with HYP mode boot gracefully") Signed-off-by: Ard Biesheuvel --- arch/arm/boot/compressed/head.S | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 2e04ec5b5446..caa27322a0ab 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -1472,6 +1472,9 @@ ENTRY(efi_enter_kernel) @ issued from HYP mode take us to the correct handler code. We @ will disable the MMU before jumping to the kernel proper. @ + ARM( bic r1, r1, #(1 << 30) ) @ clear HSCTLR.TE + THUMB( orr r1, r1, #(1 << 30) ) @ set HSCTLR.TE + mcr p15, 4, r1, c1, c0, 0 adr r0, __hyp_reentry_vectors mcr p15, 4, r0, c12, c0, 0 @ set HYP vector base (HVBAR) isb From fe5186cf12e30facfe261e9be6c7904a170bd822 Mon Sep 17 00:00:00 2001 From: Vamshi K Sthambamkadi Date: Fri, 23 Oct 2020 17:24:39 +0530 Subject: [PATCH 024/710] efivarfs: fix memory leak in efivarfs_create() kmemleak report: unreferenced object 0xffff9b8915fcb000 (size 4096): comm "efivarfs.sh", pid 2360, jiffies 4294920096 (age 48.264s) hex dump (first 32 bytes): 2d 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 -............... 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<00000000cc4d897c>] kmem_cache_alloc_trace+0x155/0x4b0 [<000000007d1dfa72>] efivarfs_create+0x6e/0x1a0 [<00000000e6ee18fc>] path_openat+0xe4b/0x1120 [<000000000ad0414f>] do_filp_open+0x91/0x100 [<00000000ce93a198>] do_sys_openat2+0x20c/0x2d0 [<000000002a91be6d>] do_sys_open+0x46/0x80 [<000000000a854999>] __x64_sys_openat+0x20/0x30 [<00000000c50d89c9>] do_syscall_64+0x38/0x90 [<00000000cecd6b5f>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 In efivarfs_create(), inode->i_private is setup with efivar_entry object which is never freed. Cc: Signed-off-by: Vamshi K Sthambamkadi Link: https://lore.kernel.org/r/20201023115429.GA2479@cosmos Signed-off-by: Ard Biesheuvel --- fs/efivarfs/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c index 15880a68faad..f943fd0b0699 100644 --- a/fs/efivarfs/super.c +++ b/fs/efivarfs/super.c @@ -21,6 +21,7 @@ LIST_HEAD(efivarfs_list); static void efivarfs_evict_inode(struct inode *inode) { clear_inode(inode); + kfree(inode->i_private); } static const struct super_operations efivarfs_ops = { From fdc24d722f353610b6aad23d99147632a19a6138 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Fri, 9 Oct 2020 09:44:23 +0200 Subject: [PATCH 025/710] =?UTF-8?q?MAINTAINERS:=20Add=20Jernej=20=C5=A0kra?= =?UTF-8?q?bec=20as=20a=20reviewer=20for=20Allwinner=20SoCs=20support?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Jernej has helped a lot by reviewing patches recently, so let's make it official. Signed-off-by: Maxime Ripard Acked-by: Chen-Yu Tsai Acked-by: Jernej Skrabec Link: https://lore.kernel.org/r/20201009074423.10708-1-maxime@cerno.tech --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index e73636b75f29..b5c4163577fb 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1546,6 +1546,7 @@ F: drivers/clk/sunxi/ ARM/Allwinner sunXi SoC support M: Maxime Ripard M: Chen-Yu Tsai +R: Jernej Skrabec L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/sunxi/linux.git From a2089ac7f8dc682ef52ed74b52997d36cde76d05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20P=C3=A9ron?= Date: Sun, 11 Oct 2020 23:15:14 +0200 Subject: [PATCH 026/710] arm64: dts: allwinner: pinetab: Drop unnecessary address/size-cells information MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit make dtbs_check warm about unknown address/size-cells property in the pinetab device-tree. This is because these information are not necessary. Drop them. Signed-off-by: Clément Péron Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20201011211514.155266-1-peron.clem@gmail.com --- arch/arm64/boot/dts/allwinner/sun50i-a64-pinetab.dts | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinetab.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinetab.dts index 3ab0f0347bc9..0494bfaf2ffa 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinetab.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinetab.dts @@ -122,9 +122,6 @@ status = "okay"; port { - #address-cells = <1>; - #size-cells = <0>; - csi_ep: endpoint { remote-endpoint = <&ov5640_ep>; bus-width = <8>; From 97a38c1c213b162aa577299de698f39c18ba696b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20P=C3=A9ron?= Date: Sun, 18 Oct 2020 19:24:09 +0200 Subject: [PATCH 027/710] arm64: dts: allwinner: beelink-gs1: Enable both RGMII RX/TX delay MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Before the commit bbc4d71d6354 ("net: phy: realtek: fix rtl8211e rx/tx delay config"), the software overwrite for RX/TX delays of the RTL8211e were not working properly and the Beelink GS1 had both RX/TX delay of RGMII interface set using pull-up on the TXDLY and RXDLY pins. Now that these delays are working properly they overwrite the HW config and set this to 'rgmii' meaning no delay on both RX/TX. This makes the ethernet of this board not working anymore. Set the phy-mode to 'rgmii-id' meaning RGMII with RX/TX delays in the device-tree to keep the correct configuration. Fixes: 089bee8dd119 ("arm64: dts: allwinner: h6: Introduce Beelink GS1 board") Signed-off-by: Clément Péron Signed-off-by: Maxime Ripard Acked-by: Chen-Yu Tsai Link: https://lore.kernel.org/r/20201018172409.1754775-1-peron.clem@gmail.com --- arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts b/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts index 3f7ceeb1a767..7c9dbde645b5 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts @@ -97,7 +97,7 @@ &emac { pinctrl-names = "default"; pinctrl-0 = <&ext_rgmii_pins>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-handle = <&ext_rgmii_phy>; phy-supply = <®_aldo2>; status = "okay"; From 419c65f5000a6c25597ea52488528d75b287cbd0 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Mon, 19 Oct 2020 06:34:49 +0000 Subject: [PATCH 028/710] arm64: dts: allwinner: Pine H64: Enable both RGMII RX/TX delay Since commit bbc4d71d6354 ("net: phy: realtek: fix rtl8211e rx/tx delay config"), the network is unusable on PineH64 model A. This is due to phy-mode incorrectly set to rgmii instead of rgmii-id. Fixes: 729e1ffcf47e ("arm64: allwinner: h6: add support for the Ethernet on Pine H64") Signed-off-by: Corentin Labbe Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20201019063449.33316-1-clabbe@baylibre.com --- arch/arm64/boot/dts/allwinner/sun50i-h6-pine-h64.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6-pine-h64.dts b/arch/arm64/boot/dts/allwinner/sun50i-h6-pine-h64.dts index af85b2074867..961732c52aa0 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h6-pine-h64.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h6-pine-h64.dts @@ -100,7 +100,7 @@ &emac { pinctrl-names = "default"; pinctrl-0 = <&ext_rgmii_pins>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-handle = <&ext_rgmii_phy>; phy-supply = <®_gmac_3v3>; allwinner,rx-delay-ps = <200>; From d7cdff444579e6659459b2fe04340ebb27628d5e Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Thu, 22 Oct 2020 20:58:39 +0200 Subject: [PATCH 029/710] arm64: dts: allwinner: a64: OrangePi Win: Fix ethernet node RX/TX delay on OrangePi Win board is set on PHY. Reflect that in ethernet node. Fixes: 93d6a27cfcc0 ("arm64: dts: allwinner: a64: Orange Pi Win: Add Ethernet node") Signed-off-by: Jernej Skrabec Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20201022185839.2779245-1-jernej.skrabec@siol.net --- arch/arm64/boot/dts/allwinner/sun50i-a64-orangepi-win.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-orangepi-win.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-orangepi-win.dts index d894ec5fa8a1..70e31743f0ba 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-orangepi-win.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-orangepi-win.dts @@ -120,7 +120,7 @@ &emac { pinctrl-names = "default"; pinctrl-0 = <&rgmii_pins>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-handle = <&ext_rgmii_phy>; phy-supply = <®_gmac_3v3>; status = "okay"; From 927f42fcc1b4f7d04a2ac5cf02f25612aa8923a4 Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Thu, 22 Oct 2020 23:13:01 +0200 Subject: [PATCH 030/710] arm64: dts: allwinner: a64: Pine64 Plus: Fix ethernet node According to board schematic, PHY provides both, RX and TX delays. However, according to "fix" Realtek provided for this board, only TX delay should be provided by PHY. Tests show that both variants work but TX only PHY delay works slightly better. Update ethernet node to reflect the fact that PHY provides TX delay. Fixes: 94f442886711 ("arm64: dts: allwinner: A64: Restore EMAC changes") Signed-off-by: Jernej Skrabec Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20201022211301.3548422-1-jernej.skrabec@siol.net --- arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts index b26181cf9095..b54099b654c8 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts @@ -13,7 +13,7 @@ &emac { pinctrl-names = "default"; pinctrl-0 = <&rgmii_pins>; - phy-mode = "rgmii"; + phy-mode = "rgmii-txid"; phy-handle = <&ext_rgmii_phy>; status = "okay"; }; From b34bf9f6a623ddb82600a5ed5c644224122395e1 Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Fri, 23 Oct 2020 20:48:58 +0200 Subject: [PATCH 031/710] arm64: dts: allwinner: h5: OrangePi PC2: Fix ethernet node RX and TX delay are provided by ethernet PHY. Reflect that in ethernet node. Fixes: 44a94c7ef989 ("arm64: dts: allwinner: H5: Restore EMAC changes") Signed-off-by: Jernej Skrabec Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20201023184858.3272918-1-jernej.skrabec@siol.net --- arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-pc2.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-pc2.dts b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-pc2.dts index 7d7aad18f078..8bf2db9dcbda 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-pc2.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-pc2.dts @@ -123,7 +123,7 @@ pinctrl-0 = <&emac_rgmii_pins>; phy-supply = <®_gmac_3v3>; phy-handle = <&ext_rgmii_phy>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; From b3eec3212e66ece33f69be0de98d54e67834e798 Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Sun, 25 Oct 2020 09:19:49 +0100 Subject: [PATCH 032/710] ARM: dts: sun8i: r40: bananapi-m2-ultra: Fix ethernet node Ethernet PHY on BananaPi M2 Ultra provides RX and TX delays. Fix ethernet node to reflect that fact. Fixes: c36fd5a48bd2 ("ARM: dts: sun8i: r40: bananapi-m2-ultra: Enable GMAC ethernet controller") Signed-off-by: Jernej Skrabec Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20201025081949.783443-1-jernej.skrabec@siol.net --- arch/arm/boot/dts/sun8i-r40-bananapi-m2-ultra.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/sun8i-r40-bananapi-m2-ultra.dts b/arch/arm/boot/dts/sun8i-r40-bananapi-m2-ultra.dts index 2fc62ef0cb3e..a6a1087a0c9b 100644 --- a/arch/arm/boot/dts/sun8i-r40-bananapi-m2-ultra.dts +++ b/arch/arm/boot/dts/sun8i-r40-bananapi-m2-ultra.dts @@ -129,7 +129,7 @@ pinctrl-names = "default"; pinctrl-0 = <&gmac_rgmii_pins>; phy-handle = <&phy1>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-supply = <®_dc1sw>; status = "okay"; }; From 8d80e2f00a42ef10b54e1b2d9e97314f8fd046c0 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Sun, 25 Oct 2020 00:25:06 +0800 Subject: [PATCH 033/710] Revert "arm: sun8i: orangepi-pc-plus: Set EMAC activity LEDs to active high" This reverts commit 75ee680cbd2e4d0156b94f9fec50076361ab12f2. Turns out the activity and link LEDs on the RJ45 port are active low, just like on the Orange Pi PC. Revert the commit that says otherwise. Fixes: 75ee680cbd2e ("arm: sun8i: orangepi-pc-plus: Set EMAC activity LEDs to active high") Fixes: 4904337fe34f ("ARM: dts: sunxi: Restore EMAC changes (boards)") Signed-off-by: Chen-Yu Tsai Signed-off-by: Maxime Ripard Tested-by: Jernej Skrabec Acked-by: Jernej Skrabec Link: https://lore.kernel.org/r/20201024162515.30032-1-wens@kernel.org --- arch/arm/boot/dts/sun8i-h3-orangepi-pc-plus.dts | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/arm/boot/dts/sun8i-h3-orangepi-pc-plus.dts b/arch/arm/boot/dts/sun8i-h3-orangepi-pc-plus.dts index 71fb73208939..babf4cf1b2f6 100644 --- a/arch/arm/boot/dts/sun8i-h3-orangepi-pc-plus.dts +++ b/arch/arm/boot/dts/sun8i-h3-orangepi-pc-plus.dts @@ -53,11 +53,6 @@ }; }; -&emac { - /* LEDs changed to active high on the plus */ - /delete-property/ allwinner,leds-active-low; -}; - &mmc1 { vmmc-supply = <®_vcc3v3>; bus-width = <4>; From e76724153f5b4539802cc21b2c6131058668a1c6 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Sun, 25 Oct 2020 00:25:07 +0800 Subject: [PATCH 034/710] ARM: dts: sun6i: a31-hummingbird: Enable RGMII RX/TX delay on Ethernet PHY The Ethernet PHY on the A31 Hummingbird has the RX and TX delays enabled on the PHY, using pull-ups on the RXDLY and TXDLY pins. Fix the phy-mode description to correct reflect this so that the implementation doesn't reconfigure the delays incorrectly. This happened with commit bbc4d71d6354 ("net: phy: realtek: fix rtl8211e rx/tx delay config"). Fixes: c220aec2bb79 ("ARM: dts: sun6i: Add Merrii A31 Hummingbird support") Signed-off-by: Chen-Yu Tsai Signed-off-by: Maxime Ripard Acked-by: Jernej Skrabec Link: https://lore.kernel.org/r/20201024162515.30032-2-wens@kernel.org --- arch/arm/boot/dts/sun6i-a31-hummingbird.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/sun6i-a31-hummingbird.dts b/arch/arm/boot/dts/sun6i-a31-hummingbird.dts index 049e6ab3cf56..73de34ae37fd 100644 --- a/arch/arm/boot/dts/sun6i-a31-hummingbird.dts +++ b/arch/arm/boot/dts/sun6i-a31-hummingbird.dts @@ -154,7 +154,7 @@ pinctrl-names = "default"; pinctrl-0 = <&gmac_rgmii_pins>; phy-handle = <&phy1>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; From 353c3de1303fc93032164402c0eb8550ecd6f154 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Sun, 25 Oct 2020 00:25:08 +0800 Subject: [PATCH 035/710] ARM: dts: sun7i: cubietruck: Enable RGMII RX/TX delay on Ethernet PHY MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Ethernet PHY on the Cubietruck has the RX and TX delays enabled on the PHY, using pull-ups on the RXDLY and TXDLY pins. Fix the phy-mode description to correct reflect this so that the implementation doesn't reconfigure the delays incorrectly. This happened with commit bbc4d71d6354 ("net: phy: realtek: fix rtl8211e rx/tx delay config"). Fixes: 67073d97672d ("ARM: dts: sun7i: cubietruck: Enable the GMAC") Signed-off-by: Chen-Yu Tsai Signed-off-by: Maxime Ripard Tested-by: Emilio López Reviewed-by: Emilio López Link: https://lore.kernel.org/r/20201024162515.30032-3-wens@kernel.org --- arch/arm/boot/dts/sun7i-a20-cubietruck.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/sun7i-a20-cubietruck.dts b/arch/arm/boot/dts/sun7i-a20-cubietruck.dts index 8c8dee6ea461..9109ca0919ad 100644 --- a/arch/arm/boot/dts/sun7i-a20-cubietruck.dts +++ b/arch/arm/boot/dts/sun7i-a20-cubietruck.dts @@ -151,7 +151,7 @@ pinctrl-names = "default"; pinctrl-0 = <&gmac_rgmii_pins>; phy-handle = <&phy1>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; From f94f78bd93f567c022f594589dbeecdf59931365 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Sun, 25 Oct 2020 00:25:09 +0800 Subject: [PATCH 036/710] ARM: dts: sun7i: bananapi-m1-plus: Enable RGMII RX/TX delay on Ethernet PHY The Ethernet PHY on the Bananapi M1+ has the RX and TX delays enabled on the PHY, using pull-ups on the RXDLY and TXDLY pins. Fix the phy-mode description to correct reflect this so that the implementation doesn't reconfigure the delays incorrectly. This happened with commit bbc4d71d6354 ("net: phy: realtek: fix rtl8211e rx/tx delay config"). Fixes: 04c85ecad32a ("ARM: dts: sun7i: Add dts file for Bananapi M1 Plus board") Signed-off-by: Chen-Yu Tsai Signed-off-by: Maxime Ripard Acked-by: Jernej Skrabec Link: https://lore.kernel.org/r/20201024162515.30032-4-wens@kernel.org --- arch/arm/boot/dts/sun7i-a20-bananapi-m1-plus.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/sun7i-a20-bananapi-m1-plus.dts b/arch/arm/boot/dts/sun7i-a20-bananapi-m1-plus.dts index 32d5d45a35c0..8945dbb114a2 100644 --- a/arch/arm/boot/dts/sun7i-a20-bananapi-m1-plus.dts +++ b/arch/arm/boot/dts/sun7i-a20-bananapi-m1-plus.dts @@ -130,7 +130,7 @@ pinctrl-names = "default"; pinctrl-0 = <&gmac_rgmii_pins>; phy-handle = <&phy1>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-supply = <®_gmac_3v3>; status = "okay"; }; From e080ab31a0aa126b0a7e4f67f2b01b371b852c88 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Sun, 25 Oct 2020 00:25:10 +0800 Subject: [PATCH 037/710] ARM: dts: sun8i: h3: orangepi-plus2e: Enable RGMII RX/TX delay on Ethernet PHY The Ethernet PHY on the Orange Pi Plus 2E has the RX and TX delays enabled on the PHY, using pull-ups on the RXDLY and TXDLY pins. Fix the phy-mode description to correct reflect this so that the implementation doesn't reconfigure the delays incorrectly. This happened with commit bbc4d71d6354 ("net: phy: realtek: fix rtl8211e rx/tx delay config"). Fixes: 4904337fe34f ("ARM: dts: sunxi: Restore EMAC changes (boards)") Fixes: 7a78ef92cdc5 ("ARM: sun8i: h3: Enable EMAC with external PHY on Orange Pi Plus 2E") Signed-off-by: Chen-Yu Tsai Signed-off-by: Maxime Ripard Tested-by: Jernej Skrabec Acked-by: Jernej Skrabec Link: https://lore.kernel.org/r/20201024162515.30032-5-wens@kernel.org --- arch/arm/boot/dts/sun8i-h3-orangepi-plus2e.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/sun8i-h3-orangepi-plus2e.dts b/arch/arm/boot/dts/sun8i-h3-orangepi-plus2e.dts index 6dbf7b2e0c13..b6ca45d18e51 100644 --- a/arch/arm/boot/dts/sun8i-h3-orangepi-plus2e.dts +++ b/arch/arm/boot/dts/sun8i-h3-orangepi-plus2e.dts @@ -67,7 +67,7 @@ pinctrl-0 = <&emac_rgmii_pins>; phy-supply = <®_gmac_3v3>; phy-handle = <&ext_rgmii_phy>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; From 57dbe558457bf4042169bc1f334e3b53a8480a1c Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Sun, 25 Oct 2020 00:25:11 +0800 Subject: [PATCH 038/710] ARM: dts: sun8i: a83t: Enable both RGMII RX/TX delay on Ethernet PHY The Ethernet PHY on the Bananapi M3 and Cubietruck Plus have the RX and TX delays enabled on the PHY, using pull-ups on the RXDLY and TXDLY pins. Fix the phy-mode description to correct reflect this so that the implementation doesn't reconfigure the delays incorrectly. This happened with commit bbc4d71d6354 ("net: phy: realtek: fix rtl8211e rx/tx delay config"). Fixes: 039359948a4b ("ARM: dts: sun8i: a83t: Enable Ethernet on two boards") Signed-off-by: Chen-Yu Tsai Signed-off-by: Maxime Ripard Acked-by: Jernej Skrabec Link: https://lore.kernel.org/r/20201024162515.30032-6-wens@kernel.org --- arch/arm/boot/dts/sun8i-a83t-bananapi-m3.dts | 2 +- arch/arm/boot/dts/sun8i-a83t-cubietruck-plus.dts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/sun8i-a83t-bananapi-m3.dts b/arch/arm/boot/dts/sun8i-a83t-bananapi-m3.dts index 9d34eabba121..431f70234d36 100644 --- a/arch/arm/boot/dts/sun8i-a83t-bananapi-m3.dts +++ b/arch/arm/boot/dts/sun8i-a83t-bananapi-m3.dts @@ -131,7 +131,7 @@ pinctrl-0 = <&emac_rgmii_pins>; phy-supply = <®_sw>; phy-handle = <&rgmii_phy>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; allwinner,rx-delay-ps = <700>; allwinner,tx-delay-ps = <700>; status = "okay"; diff --git a/arch/arm/boot/dts/sun8i-a83t-cubietruck-plus.dts b/arch/arm/boot/dts/sun8i-a83t-cubietruck-plus.dts index d9be511f054f..d8326a5c681d 100644 --- a/arch/arm/boot/dts/sun8i-a83t-cubietruck-plus.dts +++ b/arch/arm/boot/dts/sun8i-a83t-cubietruck-plus.dts @@ -183,7 +183,7 @@ pinctrl-0 = <&emac_rgmii_pins>; phy-supply = <®_dldo4>; phy-handle = <&rgmii_phy>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; From b1064037e8ecf09d587b7b4966eebe0c362908e5 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Sun, 25 Oct 2020 00:25:12 +0800 Subject: [PATCH 039/710] ARM: dts: sun9i: Enable both RGMII RX/TX delay on Ethernet PHY The Ethernet PHY on the Cubieboard 4 and A80 Optimus have the RX and TX delays enabled on the PHY, using pull-ups on the RXDLY and TXDLY pins. Fix the phy-mode description to correct reflect this so that the implementation doesn't reconfigure the delays incorrectly. This happened with commit bbc4d71d6354 ("net: phy: realtek: fix rtl8211e rx/tx delay config"). Fixes: 98048143b7f8 ("ARM: dts: sun9i: cubieboard4: Enable GMAC") Fixes: bc9bd03a44f9 ("ARM: dts: sun9i: a80-optimus: Enable GMAC") Signed-off-by: Chen-Yu Tsai Signed-off-by: Maxime Ripard Acked-by: Jernej Skrabec Link: https://lore.kernel.org/r/20201024162515.30032-7-wens@kernel.org --- arch/arm/boot/dts/sun9i-a80-cubieboard4.dts | 2 +- arch/arm/boot/dts/sun9i-a80-optimus.dts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/sun9i-a80-cubieboard4.dts b/arch/arm/boot/dts/sun9i-a80-cubieboard4.dts index d3b337b043a1..484b93df20cb 100644 --- a/arch/arm/boot/dts/sun9i-a80-cubieboard4.dts +++ b/arch/arm/boot/dts/sun9i-a80-cubieboard4.dts @@ -129,7 +129,7 @@ pinctrl-names = "default"; pinctrl-0 = <&gmac_rgmii_pins>; phy-handle = <&phy1>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-supply = <®_cldo1>; status = "okay"; }; diff --git a/arch/arm/boot/dts/sun9i-a80-optimus.dts b/arch/arm/boot/dts/sun9i-a80-optimus.dts index bbc6335e5631..5c3580d712e4 100644 --- a/arch/arm/boot/dts/sun9i-a80-optimus.dts +++ b/arch/arm/boot/dts/sun9i-a80-optimus.dts @@ -124,7 +124,7 @@ pinctrl-names = "default"; pinctrl-0 = <&gmac_rgmii_pins>; phy-handle = <&phy1>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-supply = <®_cldo1>; status = "okay"; }; From 3914160ffc0bf762d6d605d4b27036b7b89367ea Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Sun, 25 Oct 2020 00:25:13 +0800 Subject: [PATCH 040/710] ARM: dts: sunxi: bananapi-m2-plus: Enable RGMII RX/TX delay on Ethernet PHY The Ethernet PHY on the Bananapi M2+ has the RX and TX delays enabled on the PHY, using pull-ups on the RXDLY and TXDLY pins. Fix the phy-mode description to correct reflect this so that the implementation doesn't reconfigure the delays incorrectly. This happened with commit bbc4d71d6354 ("net: phy: realtek: fix rtl8211e rx/tx delay config"). Fixes: 8c7ba536e709 ("ARM: sun8i: bananapi-m2-plus: Enable dwmac-sun8i") Fixes: 4904337fe34f ("ARM: dts: sunxi: Restore EMAC changes (boards)") Fixes: aa8fee415f46 ("ARM: dts: sun8i: h3: Split out non-SoC-specific parts of Bananapi M2 Plus") Signed-off-by: Chen-Yu Tsai Signed-off-by: Maxime Ripard Tested-by: Jernej Skrabec Acked-by: Jernej Skrabec Link: https://lore.kernel.org/r/20201024162515.30032-8-wens@kernel.org --- arch/arm/boot/dts/sunxi-bananapi-m2-plus.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/sunxi-bananapi-m2-plus.dtsi b/arch/arm/boot/dts/sunxi-bananapi-m2-plus.dtsi index 39263e74fbb5..8e5cb3b3fd68 100644 --- a/arch/arm/boot/dts/sunxi-bananapi-m2-plus.dtsi +++ b/arch/arm/boot/dts/sunxi-bananapi-m2-plus.dtsi @@ -126,7 +126,7 @@ pinctrl-0 = <&emac_rgmii_pins>; phy-supply = <®_gmac_3v3>; phy-handle = <&ext_rgmii_phy>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; From 2bd8570d20c88909b8be3251727a26476b02652c Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Sun, 25 Oct 2020 00:25:14 +0800 Subject: [PATCH 041/710] arm64: dts: allwinner: h5: libretech-all-h5-cc: Enable RGMII RX/TX delay on PHY The Ethernet PHY on the Libre Computer ALL-H5-CC has the RX and TX delays enabled on the PHY, using pull-ups on the RXDLY and TXDLY pins. Fix the phy-mode description to correct reflect this so that the implementation doesn't reconfigure the delays incorrectly. This happened with commit bbc4d71d6354 ("net: phy: realtek: fix rtl8211e rx/tx delay config"). Fixes: 60d0426d7603 ("arm64: dts: allwinner: h5: Add Libre Computer ALL-H5-CC H5 board") Signed-off-by: Chen-Yu Tsai Signed-off-by: Maxime Ripard Acked-by: Jernej Skrabec Link: https://lore.kernel.org/r/20201024162515.30032-9-wens@kernel.org --- arch/arm64/boot/dts/allwinner/sun50i-h5-libretech-all-h5-cc.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5-libretech-all-h5-cc.dts b/arch/arm64/boot/dts/allwinner/sun50i-h5-libretech-all-h5-cc.dts index df1b9263ad0e..6e30a564c87f 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h5-libretech-all-h5-cc.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h5-libretech-all-h5-cc.dts @@ -36,7 +36,7 @@ pinctrl-0 = <&emac_rgmii_pins>; phy-supply = <®_gmac_3v3>; phy-handle = <&ext_rgmii_phy>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; /delete-property/ allwinner,leds-active-low; status = "okay"; }; From 1a9a8910b2153cd3c4f3f2f8defcb853ead3b1fd Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Sun, 25 Oct 2020 00:25:15 +0800 Subject: [PATCH 042/710] arm64: dts: allwinner: a64: bananapi-m64: Enable RGMII RX/TX delay on PHY The Ethernet PHY on the Bananapi M64 has the RX and TX delays enabled on the PHY, using pull-ups on the RXDLY and TXDLY pins. Fix the phy-mode description to correct reflect this so that the implementation doesn't reconfigure the delays incorrectly. This happened with commit bbc4d71d6354 ("net: phy: realtek: fix rtl8211e rx/tx delay config"). Fixes: e7295499903d ("arm64: allwinner: bananapi-m64: Enable dwmac-sun8i") Fixes: 94f442886711 ("arm64: dts: allwinner: A64: Restore EMAC changes") Signed-off-by: Chen-Yu Tsai Signed-off-by: Maxime Ripard Tested-by: Corentin Labbe Acked-by: Jernej Skrabec Link: https://lore.kernel.org/r/20201024162515.30032-10-wens@kernel.org --- arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts index 3ea5182ca489..e5e840b9fbb4 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts @@ -105,7 +105,7 @@ &emac { pinctrl-names = "default"; pinctrl-0 = <&rgmii_pins>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-handle = <&ext_rgmii_phy>; phy-supply = <®_dc1sw>; status = "okay"; From dd26209bc56886cacdbd828571e54a6bca251e55 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 14 Oct 2020 13:46:37 +0300 Subject: [PATCH 043/710] pinctrl: intel: Fix 2 kOhm bias which is 833 Ohm 2 kOhm bias was never an option in Intel GPIO hardware, the available matrix is: 000 none 001 1 kOhm (if available) 010 5 kOhm 100 20 kOhm As easy to get the 3 resistors are gated separately and according to parallel circuits calculations we may get combinations of the above where the result is always strictly less than minimal resistance. Hence, additional values can be: 011 ~833.3 Ohm 101 ~952.4 Ohm 110 ~4 kOhm 111 ~800 Ohm That said, convert TERM definitions to be the bit masks to reflect the above. While at it, enable the same setting for pull down case. Fixes: 7981c0015af2 ("pinctrl: intel: Add Intel Sunrisepoint pin controller and GPIO support") Cc: Jamie McClymont Signed-off-by: Andy Shevchenko Acked-by: Mika Westerberg --- drivers/pinctrl/intel/pinctrl-intel.c | 32 ++++++++++++++++++--------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/drivers/pinctrl/intel/pinctrl-intel.c b/drivers/pinctrl/intel/pinctrl-intel.c index 154ce3f908cd..f97b049674b7 100644 --- a/drivers/pinctrl/intel/pinctrl-intel.c +++ b/drivers/pinctrl/intel/pinctrl-intel.c @@ -62,10 +62,10 @@ #define PADCFG1_TERM_UP BIT(13) #define PADCFG1_TERM_SHIFT 10 #define PADCFG1_TERM_MASK GENMASK(12, 10) -#define PADCFG1_TERM_20K 4 -#define PADCFG1_TERM_2K 3 -#define PADCFG1_TERM_5K 2 -#define PADCFG1_TERM_1K 1 +#define PADCFG1_TERM_20K BIT(2) +#define PADCFG1_TERM_5K BIT(1) +#define PADCFG1_TERM_1K BIT(0) +#define PADCFG1_TERM_833 (BIT(1) | BIT(0)) #define PADCFG2 0x008 #define PADCFG2_DEBEN BIT(0) @@ -549,12 +549,12 @@ static int intel_config_get_pull(struct intel_pinctrl *pctrl, unsigned int pin, return -EINVAL; switch (term) { + case PADCFG1_TERM_833: + *arg = 833; + break; case PADCFG1_TERM_1K: *arg = 1000; break; - case PADCFG1_TERM_2K: - *arg = 2000; - break; case PADCFG1_TERM_5K: *arg = 5000; break; @@ -570,6 +570,11 @@ static int intel_config_get_pull(struct intel_pinctrl *pctrl, unsigned int pin, return -EINVAL; switch (term) { + case PADCFG1_TERM_833: + if (!(community->features & PINCTRL_FEATURE_1K_PD)) + return -EINVAL; + *arg = 833; + break; case PADCFG1_TERM_1K: if (!(community->features & PINCTRL_FEATURE_1K_PD)) return -EINVAL; @@ -685,12 +690,12 @@ static int intel_config_set_pull(struct intel_pinctrl *pctrl, unsigned int pin, case 5000: value |= PADCFG1_TERM_5K << PADCFG1_TERM_SHIFT; break; - case 2000: - value |= PADCFG1_TERM_2K << PADCFG1_TERM_SHIFT; - break; case 1000: value |= PADCFG1_TERM_1K << PADCFG1_TERM_SHIFT; break; + case 833: + value |= PADCFG1_TERM_833 << PADCFG1_TERM_SHIFT; + break; default: ret = -EINVAL; } @@ -714,6 +719,13 @@ static int intel_config_set_pull(struct intel_pinctrl *pctrl, unsigned int pin, } value |= PADCFG1_TERM_1K << PADCFG1_TERM_SHIFT; break; + case 833: + if (!(community->features & PINCTRL_FEATURE_1K_PD)) { + ret = -EINVAL; + break; + } + value |= PADCFG1_TERM_833 << PADCFG1_TERM_SHIFT; + break; default: ret = -EINVAL; } From f3c75e7a9349d1d33eb53ddc1b31640994969f73 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 14 Oct 2020 13:46:38 +0300 Subject: [PATCH 044/710] pinctrl: intel: Set default bias in case no particular value given When GPIO library asks pin control to set the bias, it doesn't pass any value of it and argument is considered boolean (and this is true for ACPI GpioIo() / GpioInt() resources, by the way). Thus, individual drivers must behave well, when they got the resistance value of 1 Ohm, i.e. transforming it to sane default. In case of Intel pin control hardware the 5 kOhm sounds plausible because on one hand it's a minimum of resistors present in all hardware generations and at the same time it's high enough to minimize leakage current (will be only 200 uA with the above choice). Fixes: e57725eabf87 ("pinctrl: intel: Add support for hardware debouncer") Reported-by: Jamie McClymont Signed-off-by: Andy Shevchenko Acked-by: Mika Westerberg --- drivers/pinctrl/intel/pinctrl-intel.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/pinctrl/intel/pinctrl-intel.c b/drivers/pinctrl/intel/pinctrl-intel.c index f97b049674b7..1c10ab184783 100644 --- a/drivers/pinctrl/intel/pinctrl-intel.c +++ b/drivers/pinctrl/intel/pinctrl-intel.c @@ -683,6 +683,10 @@ static int intel_config_set_pull(struct intel_pinctrl *pctrl, unsigned int pin, value |= PADCFG1_TERM_UP; + /* Set default strength value in case none is given */ + if (arg == 1) + arg = 5000; + switch (arg) { case 20000: value |= PADCFG1_TERM_20K << PADCFG1_TERM_SHIFT; @@ -705,6 +709,10 @@ static int intel_config_set_pull(struct intel_pinctrl *pctrl, unsigned int pin, case PIN_CONFIG_BIAS_PULL_DOWN: value &= ~(PADCFG1_TERM_UP | PADCFG1_TERM_MASK); + /* Set default strength value in case none is given */ + if (arg == 1) + arg = 5000; + switch (arg) { case 20000: value |= PADCFG1_TERM_20K << PADCFG1_TERM_SHIFT; From 3fe37204c9a233d1bd852b98bca43ec61854ba78 Mon Sep 17 00:00:00 2001 From: Jia He Date: Fri, 16 Oct 2020 23:35:44 +0800 Subject: [PATCH 045/710] gpio: dwapb: Fix missing conversion to GPIO-lib-based IRQ-chip Commit 0ea683931adb ("gpio: dwapb: Convert driver to using the GPIO-lib-based IRQ-chip") missed the case in dwapb_irq_set_wake(). Without this fix, probing the dwapb gpio driver will hit a error: "address between user and kernel address ranges" on a Ampere armv8a server and cause a panic. Fixes: 0ea683931adb ("gpio: dwapb: Convert driver to using the GPIO-lib-based IRQ-chip") Signed-off-by: Jia He Reviewed-by: Andy Shevchenko Acked-by: Serge Semin Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-dwapb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpio-dwapb.c b/drivers/gpio/gpio-dwapb.c index a5b326754124..2a9046c0fb16 100644 --- a/drivers/gpio/gpio-dwapb.c +++ b/drivers/gpio/gpio-dwapb.c @@ -343,8 +343,8 @@ static int dwapb_irq_set_type(struct irq_data *d, u32 type) #ifdef CONFIG_PM_SLEEP static int dwapb_irq_set_wake(struct irq_data *d, unsigned int enable) { - struct irq_chip_generic *igc = irq_data_get_irq_chip_data(d); - struct dwapb_gpio *gpio = igc->private; + struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + struct dwapb_gpio *gpio = to_dwapb_gpio(gc); struct dwapb_context *ctx = gpio->ports[0].ctx; irq_hw_number_t bit = irqd_to_hwirq(d); From 560b6ac37a87fcb78d580437e3e0bc2b6b5b0295 Mon Sep 17 00:00:00 2001 From: Billy Tsai Date: Mon, 19 Oct 2020 12:50:26 +0800 Subject: [PATCH 046/710] gpio: aspeed: fix ast2600 bank properties GPIO_T is mapped to the most significant byte of input/output mask, and the byte in "output" mask should be 0 because GPIO_T is input only. All the other bits need to be 1 because GPIO_Q/R/S support both input and output modes. Fixes: ab4a85534c3e ("gpio: aspeed: Add in ast2600 details to Aspeed driver") Signed-off-by: Billy Tsai Reviewed-by: Tao Ren Reviewed-by: Joel Stanley Reviewed-by: Andrew Jeffery Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-aspeed.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpio/gpio-aspeed.c b/drivers/gpio/gpio-aspeed.c index e44d5de2a120..b966f5e28ebf 100644 --- a/drivers/gpio/gpio-aspeed.c +++ b/drivers/gpio/gpio-aspeed.c @@ -1114,6 +1114,7 @@ static const struct aspeed_gpio_config ast2500_config = static const struct aspeed_bank_props ast2600_bank_props[] = { /* input output */ + {4, 0xffffffff, 0x00ffffff}, /* Q/R/S/T */ {5, 0xffffffff, 0xffffff00}, /* U/V/W/X */ {6, 0x0000ffff, 0x0000ffff}, /* Y/Z */ { }, From 402dab548d0da38b260f3843225cdfd37d91f512 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 22 Oct 2020 10:08:24 +0300 Subject: [PATCH 047/710] hwmon: (pmbus/max20730) use scnprintf() instead of snprintf() The snprintf() function returns the number of characters which would have been printed if there were enough space, but the scnprintf() returns the number of characters which were actually printed. If the buffer is not large enough, then using snprintf() would result in a read overflow and an information leak. Fixes: 8910c0bd533d ("hwmon: (pmbus/max20730) add device monitoring via debugfs") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/20201022070824.GC2817762@mwanda Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/max20730.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/hwmon/pmbus/max20730.c b/drivers/hwmon/pmbus/max20730.c index 57923d72490c..be83b98411c7 100644 --- a/drivers/hwmon/pmbus/max20730.c +++ b/drivers/hwmon/pmbus/max20730.c @@ -122,8 +122,8 @@ static ssize_t max20730_debugfs_read(struct file *file, char __user *buf, switch (idx) { case MAX20730_DEBUGFS_VOUT_MIN: ret = VOLT_FROM_REG(data->mfr_voutmin * 10000); - len = snprintf(tbuf, DEBUG_FS_DATA_MAX, "%d.%d\n", - ret / 10000, ret % 10000); + len = scnprintf(tbuf, DEBUG_FS_DATA_MAX, "%d.%d\n", + ret / 10000, ret % 10000); break; case MAX20730_DEBUGFS_FREQUENCY: val = (data->mfr_devset1 & MAX20730_MFR_DEVSET1_FSW_MASK) @@ -141,7 +141,7 @@ static ssize_t max20730_debugfs_read(struct file *file, char __user *buf, ret = 800; else ret = 900; - len = snprintf(tbuf, DEBUG_FS_DATA_MAX, "%d\n", ret); + len = scnprintf(tbuf, DEBUG_FS_DATA_MAX, "%d\n", ret); break; case MAX20730_DEBUGFS_PG_DELAY: val = (data->mfr_devset1 & MAX20730_MFR_DEVSET1_TSTAT_MASK) @@ -223,7 +223,7 @@ static ssize_t max20730_debugfs_read(struct file *file, char __user *buf, case MAX20730_DEBUGFS_OC_PROTECT_MODE: ret = (data->mfr_devset2 & MAX20730_MFR_DEVSET2_OCPM_MASK) >> MAX20730_MFR_DEVSET2_OCPM_BIT_POS; - len = snprintf(tbuf, DEBUG_FS_DATA_MAX, "%d\n", ret); + len = scnprintf(tbuf, DEBUG_FS_DATA_MAX, "%d\n", ret); break; case MAX20730_DEBUGFS_SS_TIMING: val = (data->mfr_devset2 & MAX20730_MFR_DEVSET2_SS_MASK) @@ -241,32 +241,32 @@ static ssize_t max20730_debugfs_read(struct file *file, char __user *buf, case MAX20730_DEBUGFS_IMAX: ret = (data->mfr_devset2 & MAX20730_MFR_DEVSET2_IMAX_MASK) >> MAX20730_MFR_DEVSET2_IMAX_BIT_POS; - len = snprintf(tbuf, DEBUG_FS_DATA_MAX, "%d\n", ret); + len = scnprintf(tbuf, DEBUG_FS_DATA_MAX, "%d\n", ret); break; case MAX20730_DEBUGFS_OPERATION: ret = i2c_smbus_read_byte_data(psu->client, PMBUS_OPERATION); if (ret < 0) return ret; - len = snprintf(tbuf, DEBUG_FS_DATA_MAX, "%d\n", ret); + len = scnprintf(tbuf, DEBUG_FS_DATA_MAX, "%d\n", ret); break; case MAX20730_DEBUGFS_ON_OFF_CONFIG: ret = i2c_smbus_read_byte_data(psu->client, PMBUS_ON_OFF_CONFIG); if (ret < 0) return ret; - len = snprintf(tbuf, DEBUG_FS_DATA_MAX, "%d\n", ret); + len = scnprintf(tbuf, DEBUG_FS_DATA_MAX, "%d\n", ret); break; case MAX20730_DEBUGFS_SMBALERT_MASK: ret = i2c_smbus_read_word_data(psu->client, PMBUS_SMB_ALERT_MASK); if (ret < 0) return ret; - len = snprintf(tbuf, DEBUG_FS_DATA_MAX, "%d\n", ret); + len = scnprintf(tbuf, DEBUG_FS_DATA_MAX, "%d\n", ret); break; case MAX20730_DEBUGFS_VOUT_MODE: ret = i2c_smbus_read_byte_data(psu->client, PMBUS_VOUT_MODE); if (ret < 0) return ret; - len = snprintf(tbuf, DEBUG_FS_DATA_MAX, "%d\n", ret); + len = scnprintf(tbuf, DEBUG_FS_DATA_MAX, "%d\n", ret); break; case MAX20730_DEBUGFS_VOUT_COMMAND: ret = i2c_smbus_read_word_data(psu->client, PMBUS_VOUT_COMMAND); @@ -274,8 +274,8 @@ static ssize_t max20730_debugfs_read(struct file *file, char __user *buf, return ret; ret = VOLT_FROM_REG(ret * 10000); - len = snprintf(tbuf, DEBUG_FS_DATA_MAX, - "%d.%d\n", ret / 10000, ret % 10000); + len = scnprintf(tbuf, DEBUG_FS_DATA_MAX, + "%d.%d\n", ret / 10000, ret % 10000); break; case MAX20730_DEBUGFS_VOUT_MAX: ret = i2c_smbus_read_word_data(psu->client, PMBUS_VOUT_MAX); @@ -283,8 +283,8 @@ static ssize_t max20730_debugfs_read(struct file *file, char __user *buf, return ret; ret = VOLT_FROM_REG(ret * 10000); - len = snprintf(tbuf, DEBUG_FS_DATA_MAX, - "%d.%d\n", ret / 10000, ret % 10000); + len = scnprintf(tbuf, DEBUG_FS_DATA_MAX, + "%d.%d\n", ret / 10000, ret % 10000); break; default: len = strlcpy(tbuf, "Invalid\n", DEBUG_FS_DATA_MAX); From 7342ca34d931a357d408aaa25fadd031e46af137 Mon Sep 17 00:00:00 2001 From: Jing Xiangfeng Date: Thu, 15 Oct 2020 16:40:53 +0800 Subject: [PATCH 048/710] thunderbolt: Add the missed ida_simple_remove() in ring_request_msix() ring_request_msix() misses to call ida_simple_remove() in an error path. Add a label 'err_ida_remove' and jump to it. Fixes: 046bee1f9ab8 ("thunderbolt: Add MSI-X support") Cc: stable@vger.kernel.org Signed-off-by: Jing Xiangfeng Reviewed-by: Andy Shevchenko Signed-off-by: Mika Westerberg --- drivers/thunderbolt/nhi.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c index 3f79baa54829..e29ac3e3aa1e 100644 --- a/drivers/thunderbolt/nhi.c +++ b/drivers/thunderbolt/nhi.c @@ -406,12 +406,23 @@ static int ring_request_msix(struct tb_ring *ring, bool no_suspend) ring->vector = ret; - ring->irq = pci_irq_vector(ring->nhi->pdev, ring->vector); - if (ring->irq < 0) - return ring->irq; + ret = pci_irq_vector(ring->nhi->pdev, ring->vector); + if (ret < 0) + goto err_ida_remove; + + ring->irq = ret; irqflags = no_suspend ? IRQF_NO_SUSPEND : 0; - return request_irq(ring->irq, ring_msix, irqflags, "thunderbolt", ring); + ret = request_irq(ring->irq, ring_msix, irqflags, "thunderbolt", ring); + if (ret) + goto err_ida_remove; + + return 0; + +err_ida_remove: + ida_simple_remove(&nhi->msix_ida, ring->vector); + + return ret; } static void ring_release_msix(struct tb_ring *ring) From 472547778de24e2764ab325268dd5b77e6923939 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 22 Oct 2020 13:27:38 -0700 Subject: [PATCH 049/710] selftest/bpf: Fix profiler test using CO-RE relocation for enums Instead of hard-coding invalid pids_cgrp_id, use Kconfig to detect the presence of that enum value and CO-RE to capture its actual value in the hosts's kernel. Fixes: 03d4d13fab3f ("selftests/bpf: Add profiler test") Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Tested-by: Song Liu Link: https://lore.kernel.org/bpf/20201022202739.3667367-1-andrii@kernel.org --- tools/testing/selftests/bpf/progs/profiler.inc.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/profiler.inc.h b/tools/testing/selftests/bpf/progs/profiler.inc.h index 00578311a423..30982a7e4d0f 100644 --- a/tools/testing/selftests/bpf/progs/profiler.inc.h +++ b/tools/testing/selftests/bpf/progs/profiler.inc.h @@ -243,7 +243,10 @@ static ino_t get_inode_from_kernfs(struct kernfs_node* node) } } -int pids_cgrp_id = 1; +extern bool CONFIG_CGROUP_PIDS __kconfig __weak; +enum cgroup_subsys_id___local { + pids_cgrp_id___local = 123, /* value doesn't matter */ +}; static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data, struct task_struct* task, @@ -253,7 +256,9 @@ static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data, BPF_CORE_READ(task, nsproxy, cgroup_ns, root_cset, dfl_cgrp, kn); struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn); - if (ENABLE_CGROUP_V1_RESOLVER) { + if (ENABLE_CGROUP_V1_RESOLVER && CONFIG_CGROUP_PIDS) { + int cgrp_id = bpf_core_enum_value(enum cgroup_subsys_id___local, + pids_cgrp_id___local); #ifdef UNROLL #pragma unroll #endif @@ -262,7 +267,7 @@ static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data, BPF_CORE_READ(task, cgroups, subsys[i]); if (subsys != NULL) { int subsys_id = BPF_CORE_READ(subsys, ss, id); - if (subsys_id == pids_cgrp_id) { + if (subsys_id == cgrp_id) { proc_kernfs = BPF_CORE_READ(subsys, cgroup, kn); root_kernfs = BPF_CORE_READ(subsys, ss, root, kf_root, kn); break; From 29813a2297910d5c4be08c7b390054f23dd794a5 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 26 Oct 2020 16:53:48 +0100 Subject: [PATCH 050/710] asm-generic: percpu: avoid Wshadow warning Nesting macros that use the same local variable names causes warnings when building with "make W=2": include/asm-generic/percpu.h:117:14: warning: declaration of '__ret' shadows a previous local [-Wshadow] include/asm-generic/percpu.h:126:14: warning: declaration of '__ret' shadows a previous local [-Wshadow] These are fairly harmless, but since the warning comes from a global header, the warning happens every time the headers are included, which is fairly annoying. Rename the variables to avoid shadowing and shut up the warning. Signed-off-by: Arnd Bergmann Reviewed-by: Luc Van Oostenryck Signed-off-by: Dennis Zhou --- include/asm-generic/percpu.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index 35e4a53b83e6..6432a7fade91 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -114,21 +114,21 @@ do { \ #define __this_cpu_generic_read_nopreempt(pcp) \ ({ \ - typeof(pcp) __ret; \ + typeof(pcp) ___ret; \ preempt_disable_notrace(); \ - __ret = READ_ONCE(*raw_cpu_ptr(&(pcp))); \ + ___ret = READ_ONCE(*raw_cpu_ptr(&(pcp))); \ preempt_enable_notrace(); \ - __ret; \ + ___ret; \ }) #define __this_cpu_generic_read_noirq(pcp) \ ({ \ - typeof(pcp) __ret; \ - unsigned long __flags; \ - raw_local_irq_save(__flags); \ - __ret = raw_cpu_generic_read(pcp); \ - raw_local_irq_restore(__flags); \ - __ret; \ + typeof(pcp) ___ret; \ + unsigned long ___flags; \ + raw_local_irq_save(___flags); \ + ___ret = raw_cpu_generic_read(pcp); \ + raw_local_irq_restore(___flags); \ + ___ret; \ }) #define this_cpu_generic_read(pcp) \ From 33b6c39e747c552fa770eecebd1776f1f4a222b1 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 26 Oct 2020 17:10:09 -0700 Subject: [PATCH 051/710] Input: adxl34x - clean up a data type in adxl34x_probe() The "revid" is used to store negative error codes so it should be an int type. Fixes: e27c729219ad ("Input: add driver for ADXL345/346 Digital Accelerometers") Signed-off-by: Dan Carpenter Acked-by: Michael Hennerich Link: https://lore.kernel.org/r/20201026072824.GA1620546@mwanda Signed-off-by: Dmitry Torokhov --- drivers/input/misc/adxl34x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/misc/adxl34x.c b/drivers/input/misc/adxl34x.c index 5fe92d4ba3f0..4cc4e8ff42b3 100644 --- a/drivers/input/misc/adxl34x.c +++ b/drivers/input/misc/adxl34x.c @@ -696,7 +696,7 @@ struct adxl34x *adxl34x_probe(struct device *dev, int irq, struct input_dev *input_dev; const struct adxl34x_platform_data *pdata; int err, range, i; - unsigned char revid; + int revid; if (!irq) { dev_err(dev, "no IRQ?\n"); From b1884583fcd17d6a1b1bba94bbb5826e6b5c6e17 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 26 Oct 2020 20:53:57 -0700 Subject: [PATCH 052/710] Input: i8042 - allow insmod to succeed on devices without an i8042 controller The i8042 module exports several symbols which may be used by other modules. Before this commit it would refuse to load (when built as a module itself) on systems without an i8042 controller. This is a problem specifically for the asus-nb-wmi module. Many Asus laptops support the Asus WMI interface. Some of them have an i8042 controller and need to use i8042_install_filter() to filter some kbd events. Other models do not have an i8042 controller (e.g. they use an USB attached kbd). Before this commit the asus-nb-wmi driver could not be loaded on Asus models without an i8042 controller, when the i8042 code was built as a module (as Arch Linux does) because the module_init function of the i8042 module would fail with -ENODEV and thus the i8042_install_filter symbol could not be loaded. This commit fixes this by exiting from module_init with a return code of 0 if no controller is found. It also adds a i8042_present bool to make the module_exit function a no-op in this case and also adds a check for i8042_present to the exported i8042_command function. The latter i8042_present check should not really be necessary because when builtin that function can already be used on systems without an i8042 controller, but better safe then sorry. Reported-and-tested-by: Marius Iacob Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20201008112628.3979-2-hdegoede@redhat.com Signed-off-by: Dmitry Torokhov --- drivers/input/serio/i8042.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c index d3eda48032e3..944cbb519c6d 100644 --- a/drivers/input/serio/i8042.c +++ b/drivers/input/serio/i8042.c @@ -122,6 +122,7 @@ module_param_named(unmask_kbd_data, i8042_unmask_kbd_data, bool, 0600); MODULE_PARM_DESC(unmask_kbd_data, "Unconditional enable (may reveal sensitive data) of normally sanitize-filtered kbd data traffic debug log [pre-condition: i8042.debug=1 enabled]"); #endif +static bool i8042_present; static bool i8042_bypass_aux_irq_test; static char i8042_kbd_firmware_id[128]; static char i8042_aux_firmware_id[128]; @@ -343,6 +344,9 @@ int i8042_command(unsigned char *param, int command) unsigned long flags; int retval; + if (!i8042_present) + return -1; + spin_lock_irqsave(&i8042_lock, flags); retval = __i8042_command(param, command); spin_unlock_irqrestore(&i8042_lock, flags); @@ -1612,12 +1616,15 @@ static int __init i8042_init(void) err = i8042_platform_init(); if (err) - return err; + return (err == -ENODEV) ? 0 : err; err = i8042_controller_check(); if (err) goto err_platform_exit; + /* Set this before creating the dev to allow i8042_command to work right away */ + i8042_present = true; + pdev = platform_create_bundle(&i8042_driver, i8042_probe, NULL, 0, NULL, 0); if (IS_ERR(pdev)) { err = PTR_ERR(pdev); @@ -1636,6 +1643,9 @@ static int __init i8042_init(void) static void __exit i8042_exit(void) { + if (!i8042_present) + return; + platform_device_unregister(i8042_platform_device); platform_driver_unregister(&i8042_driver); i8042_platform_exit(); From 31b4d8e172f614adc53ddecb4b6b2f6411a49b84 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 23 Oct 2020 12:44:40 -0700 Subject: [PATCH 053/710] MIPS: export has_transparent_hugepage() for modules MIPS should export its local version of "has_transparent_hugepage" so that loadable modules (dax) can use it. Fixes this build error: ERROR: modpost: "has_transparent_hugepage" [drivers/dax/dax.ko] undefined! Fixes: fd8cfd300019 ("arch: fix has_transparent_hugepage()") Reported-by: kernel test robot Signed-off-by: Randy Dunlap Cc: Thomas Bogendoerfer Cc: linux-mips@vger.kernel.org Cc: Dan Williams Cc: Vishal Verma Cc: Dave Jiang Cc: linux-nvdimm@lists.01.org Cc: Hugh Dickins Cc: Andrew Morton Signed-off-by: Thomas Bogendoerfer --- arch/mips/mm/tlb-r4k.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c index 38e2894d5fa3..1b939abbe4ca 100644 --- a/arch/mips/mm/tlb-r4k.c +++ b/arch/mips/mm/tlb-r4k.c @@ -438,6 +438,7 @@ int has_transparent_hugepage(void) } return mask == PM_HUGE_MASK; } +EXPORT_SYMBOL(has_transparent_hugepage); #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ From 9fa2e7af3d53a4b769136eccc32c02e128a4ee51 Mon Sep 17 00:00:00 2001 From: Andrew Jeffery Date: Thu, 22 Oct 2020 01:43:59 +0100 Subject: [PATCH 054/710] ARM: 9019/1: kprobes: Avoid fortify_panic() when copying optprobe template Setting both CONFIG_KPROBES=y and CONFIG_FORTIFY_SOURCE=y on ARM leads to a panic in memcpy() when injecting a kprobe despite the fixes found in commit e46daee53bb5 ("ARM: 8806/1: kprobes: Fix false positive with FORTIFY_SOURCE") and commit 0ac569bf6a79 ("ARM: 8834/1: Fix: kprobes: optimized kprobes illegal instruction"). arch/arm/include/asm/kprobes.h effectively declares the target type of the optprobe_template_entry assembly label as a u32 which leads memcpy()'s __builtin_object_size() call to determine that the pointed-to object is of size four. However, the symbol is used as a handle for the optimised probe assembly template that is at least 96 bytes in size. The symbol's use despite its type blows up the memcpy() in ARM's arch_prepare_optimized_kprobe() with a false-positive fortify_panic() when it should instead copy the optimised probe template into place: ``` $ sudo perf probe -a aspeed_g6_pinctrl_probe [ 158.457252] detected buffer overflow in memcpy [ 158.458069] ------------[ cut here ]------------ [ 158.458283] kernel BUG at lib/string.c:1153! [ 158.458436] Internal error: Oops - BUG: 0 [#1] SMP ARM [ 158.458768] Modules linked in: [ 158.459043] CPU: 1 PID: 99 Comm: perf Not tainted 5.9.0-rc7-00038-gc53ebf8167e9 #158 [ 158.459296] Hardware name: Generic DT based system [ 158.459529] PC is at fortify_panic+0x18/0x20 [ 158.459658] LR is at __irq_work_queue_local+0x3c/0x74 [ 158.459831] pc : [<8047451c>] lr : [<8020ecd4>] psr: 60000013 [ 158.460032] sp : be2d1d50 ip : be2d1c58 fp : be2d1d5c [ 158.460174] r10: 00000006 r9 : 00000000 r8 : 00000060 [ 158.460348] r7 : 8011e434 r6 : b9e0b800 r5 : 7f000000 r4 : b9fe4f0c [ 158.460557] r3 : 80c04cc8 r2 : 00000000 r1 : be7c03cc r0 : 00000022 [ 158.460801] Flags: nZCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none [ 158.461037] Control: 10c5387d Table: b9cd806a DAC: 00000051 [ 158.461251] Process perf (pid: 99, stack limit = 0x81c71a69) [ 158.461472] Stack: (0xbe2d1d50 to 0xbe2d2000) [ 158.461757] 1d40: be2d1d84 be2d1d60 8011e724 80474510 [ 158.462104] 1d60: b9e0b800 b9fe4f0c 00000000 b9fe4f14 80c8ec80 be235000 be2d1d9c be2d1d88 [ 158.462436] 1d80: 801cee44 8011e57c b9fe4f0c 00000000 be2d1dc4 be2d1da0 801d0ad0 801cedec [ 158.462742] 1da0: 00000000 00000000 b9fe4f00 ffffffea 00000000 be235000 be2d1de4 be2d1dc8 [ 158.463087] 1dc0: 80204604 801d0738 00000000 00000000 b9fe4004 ffffffea be2d1e94 be2d1de8 [ 158.463428] 1de0: 80205434 80204570 00385c00 00000000 00000000 00000000 be2d1e14 be2d1e08 [ 158.463880] 1e00: 802ba014 b9fe4f00 b9e718c0 b9fe4f84 b9e71ec8 be2d1e24 00000000 00385c00 [ 158.464365] 1e20: 00000000 626f7270 00000065 802b905c be2d1e94 0000002e 00000000 802b9914 [ 158.464829] 1e40: be2d1e84 be2d1e50 802b9914 8028ff78 804629d0 b9e71ec0 0000002e b9e71ec0 [ 158.465141] 1e60: be2d1ea8 80c04cc8 00000cc0 b9e713c4 00000002 80205834 80205834 0000002e [ 158.465488] 1e80: be235000 be235000 be2d1ea4 be2d1e98 80205854 80204e94 be2d1ecc be2d1ea8 [ 158.465806] 1ea0: 801ee4a0 80205840 00000002 80c04cc8 00000000 0000002e 0000002e 00000000 [ 158.466110] 1ec0: be2d1f0c be2d1ed0 801ee5c8 801ee428 00000000 be2d0000 006b1fd0 00000051 [ 158.466398] 1ee0: 00000000 b9eedf00 0000002e 80204410 006b1fd0 be2d1f60 00000000 00000004 [ 158.466763] 1f00: be2d1f24 be2d1f10 8020442c 801ee4c4 80205834 802c613c be2d1f5c be2d1f28 [ 158.467102] 1f20: 802c60ac 8020441c be2d1fac be2d1f38 8010c764 802e9888 be2d1f5c b9eedf00 [ 158.467447] 1f40: b9eedf00 006b1fd0 0000002e 00000000 be2d1f94 be2d1f60 802c634c 802c5fec [ 158.467812] 1f60: 00000000 00000000 00000000 80c04cc8 006b1fd0 00000003 76f7a610 00000004 [ 158.468155] 1f80: 80100284 be2d0000 be2d1fa4 be2d1f98 802c63ec 802c62e8 00000000 be2d1fa8 [ 158.468508] 1fa0: 80100080 802c63e0 006b1fd0 00000003 00000003 006b1fd0 0000002e 00000000 [ 158.468858] 1fc0: 006b1fd0 00000003 76f7a610 00000004 006b1fb0 0026d348 00000017 7ef2738c [ 158.469202] 1fe0: 76f3431c 7ef272d8 0014ec50 76f34338 60000010 00000003 00000000 00000000 [ 158.469461] Backtrace: [ 158.469683] [<80474504>] (fortify_panic) from [<8011e724>] (arch_prepare_optimized_kprobe+0x1b4/0x1f8) [ 158.470021] [<8011e570>] (arch_prepare_optimized_kprobe) from [<801cee44>] (alloc_aggr_kprobe+0x64/0x70) [ 158.470287] r9:be235000 r8:80c8ec80 r7:b9fe4f14 r6:00000000 r5:b9fe4f0c r4:b9e0b800 [ 158.470478] [<801cede0>] (alloc_aggr_kprobe) from [<801d0ad0>] (register_kprobe+0x3a4/0x5a0) [ 158.470685] r5:00000000 r4:b9fe4f0c [ 158.470790] [<801d072c>] (register_kprobe) from [<80204604>] (__register_trace_kprobe+0xa0/0xa4) [ 158.471001] r9:be235000 r8:00000000 r7:ffffffea r6:b9fe4f00 r5:00000000 r4:00000000 [ 158.471188] [<80204564>] (__register_trace_kprobe) from [<80205434>] (trace_kprobe_create+0x5ac/0x9ac) [ 158.471408] r7:ffffffea r6:b9fe4004 r5:00000000 r4:00000000 [ 158.471553] [<80204e88>] (trace_kprobe_create) from [<80205854>] (create_or_delete_trace_kprobe+0x20/0x3c) [ 158.471766] r10:be235000 r9:be235000 r8:0000002e r7:80205834 r6:80205834 r5:00000002 [ 158.471949] r4:b9e713c4 [ 158.472027] [<80205834>] (create_or_delete_trace_kprobe) from [<801ee4a0>] (trace_run_command+0x84/0x9c) [ 158.472255] [<801ee41c>] (trace_run_command) from [<801ee5c8>] (trace_parse_run_command+0x110/0x1f8) [ 158.472471] r6:00000000 r5:0000002e r4:0000002e [ 158.472594] [<801ee4b8>] (trace_parse_run_command) from [<8020442c>] (probes_write+0x1c/0x28) [ 158.472800] r10:00000004 r9:00000000 r8:be2d1f60 r7:006b1fd0 r6:80204410 r5:0000002e [ 158.472968] r4:b9eedf00 [ 158.473046] [<80204410>] (probes_write) from [<802c60ac>] (vfs_write+0xcc/0x1e8) [ 158.473226] [<802c5fe0>] (vfs_write) from [<802c634c>] (ksys_write+0x70/0xf8) [ 158.473400] r8:00000000 r7:0000002e r6:006b1fd0 r5:b9eedf00 r4:b9eedf00 [ 158.473567] [<802c62dc>] (ksys_write) from [<802c63ec>] (sys_write+0x18/0x1c) [ 158.473745] r9:be2d0000 r8:80100284 r7:00000004 r6:76f7a610 r5:00000003 r4:006b1fd0 [ 158.473932] [<802c63d4>] (sys_write) from [<80100080>] (ret_fast_syscall+0x0/0x54) [ 158.474126] Exception stack(0xbe2d1fa8 to 0xbe2d1ff0) [ 158.474305] 1fa0: 006b1fd0 00000003 00000003 006b1fd0 0000002e 00000000 [ 158.474573] 1fc0: 006b1fd0 00000003 76f7a610 00000004 006b1fb0 0026d348 00000017 7ef2738c [ 158.474811] 1fe0: 76f3431c 7ef272d8 0014ec50 76f34338 [ 158.475171] Code: e24cb004 e1a01000 e59f0004 ebf40dd3 (e7f001f2) [ 158.475847] ---[ end trace 55a5b31c08a29f00 ]--- [ 158.476088] Kernel panic - not syncing: Fatal exception [ 158.476375] CPU0: stopping [ 158.476709] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G D 5.9.0-rc7-00038-gc53ebf8167e9 #158 [ 158.477176] Hardware name: Generic DT based system [ 158.477411] Backtrace: [ 158.477604] [<8010dd28>] (dump_backtrace) from [<8010dfd4>] (show_stack+0x20/0x24) [ 158.477990] r7:00000000 r6:60000193 r5:00000000 r4:80c2f634 [ 158.478323] [<8010dfb4>] (show_stack) from [<8046390c>] (dump_stack+0xcc/0xe8) [ 158.478686] [<80463840>] (dump_stack) from [<80110750>] (handle_IPI+0x334/0x3a0) [ 158.479063] r7:00000000 r6:00000004 r5:80b65cc8 r4:80c78278 [ 158.479352] [<8011041c>] (handle_IPI) from [<801013f8>] (gic_handle_irq+0x88/0x94) [ 158.479757] r10:10c5387d r9:80c01ed8 r8:00000000 r7:c0802000 r6:80c0537c r5:000003ff [ 158.480146] r4:c080200c r3:fffffff4 [ 158.480364] [<80101370>] (gic_handle_irq) from [<80100b6c>] (__irq_svc+0x6c/0x90) [ 158.480748] Exception stack(0x80c01ed8 to 0x80c01f20) [ 158.481031] 1ec0: 000128bc 00000000 [ 158.481499] 1ee0: be7b8174 8011d3a0 80c00000 00000000 80c04cec 80c04d28 80c5d7c2 80a026d4 [ 158.482091] 1f00: 10c5387d 80c01f34 80c01f38 80c01f28 80109554 80109558 60000013 ffffffff [ 158.482621] r9:80c00000 r8:80c5d7c2 r7:80c01f0c r6:ffffffff r5:60000013 r4:80109558 [ 158.482983] [<80109518>] (arch_cpu_idle) from [<80818780>] (default_idle_call+0x38/0x120) [ 158.483360] [<80818748>] (default_idle_call) from [<801585a8>] (do_idle+0xd4/0x158) [ 158.483945] r5:00000000 r4:80c00000 [ 158.484237] [<801584d4>] (do_idle) from [<801588f4>] (cpu_startup_entry+0x28/0x2c) [ 158.484784] r9:80c78000 r8:00000000 r7:80c78000 r6:80c78040 r5:80c04cc0 r4:000000d6 [ 158.485328] [<801588cc>] (cpu_startup_entry) from [<80810a78>] (rest_init+0x9c/0xbc) [ 158.485930] [<808109dc>] (rest_init) from [<80b00ae4>] (arch_call_rest_init+0x18/0x1c) [ 158.486503] r5:80c04cc0 r4:00000001 [ 158.486857] [<80b00acc>] (arch_call_rest_init) from [<80b00fcc>] (start_kernel+0x46c/0x548) [ 158.487589] [<80b00b60>] (start_kernel) from [<00000000>] (0x0) ``` Fixes: e46daee53bb5 ("ARM: 8806/1: kprobes: Fix false positive with FORTIFY_SOURCE") Fixes: 0ac569bf6a79 ("ARM: 8834/1: Fix: kprobes: optimized kprobes illegal instruction") Suggested-by: Kees Cook Signed-off-by: Andrew Jeffery Tested-by: Luka Oreskovic Tested-by: Joel Stanley Reviewed-by: Joel Stanley Acked-by: Masami Hiramatsu Cc: Luka Oreskovic Cc: Juraj Vijtiuk Signed-off-by: Russell King --- arch/arm/include/asm/kprobes.h | 22 +++++++++++----------- arch/arm/probes/kprobes/opt-arm.c | 18 +++++++++--------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/arch/arm/include/asm/kprobes.h b/arch/arm/include/asm/kprobes.h index 213607a1f45c..e26a278d301a 100644 --- a/arch/arm/include/asm/kprobes.h +++ b/arch/arm/include/asm/kprobes.h @@ -44,20 +44,20 @@ int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data); /* optinsn template addresses */ -extern __visible kprobe_opcode_t optprobe_template_entry; -extern __visible kprobe_opcode_t optprobe_template_val; -extern __visible kprobe_opcode_t optprobe_template_call; -extern __visible kprobe_opcode_t optprobe_template_end; -extern __visible kprobe_opcode_t optprobe_template_sub_sp; -extern __visible kprobe_opcode_t optprobe_template_add_sp; -extern __visible kprobe_opcode_t optprobe_template_restore_begin; -extern __visible kprobe_opcode_t optprobe_template_restore_orig_insn; -extern __visible kprobe_opcode_t optprobe_template_restore_end; +extern __visible kprobe_opcode_t optprobe_template_entry[]; +extern __visible kprobe_opcode_t optprobe_template_val[]; +extern __visible kprobe_opcode_t optprobe_template_call[]; +extern __visible kprobe_opcode_t optprobe_template_end[]; +extern __visible kprobe_opcode_t optprobe_template_sub_sp[]; +extern __visible kprobe_opcode_t optprobe_template_add_sp[]; +extern __visible kprobe_opcode_t optprobe_template_restore_begin[]; +extern __visible kprobe_opcode_t optprobe_template_restore_orig_insn[]; +extern __visible kprobe_opcode_t optprobe_template_restore_end[]; #define MAX_OPTIMIZED_LENGTH 4 #define MAX_OPTINSN_SIZE \ - ((unsigned long)&optprobe_template_end - \ - (unsigned long)&optprobe_template_entry) + ((unsigned long)optprobe_template_end - \ + (unsigned long)optprobe_template_entry) #define RELATIVEJUMP_SIZE 4 struct arch_optimized_insn { diff --git a/arch/arm/probes/kprobes/opt-arm.c b/arch/arm/probes/kprobes/opt-arm.c index 7a449df0b359..c78180172120 100644 --- a/arch/arm/probes/kprobes/opt-arm.c +++ b/arch/arm/probes/kprobes/opt-arm.c @@ -85,21 +85,21 @@ asm ( "optprobe_template_end:\n"); #define TMPL_VAL_IDX \ - ((unsigned long *)&optprobe_template_val - (unsigned long *)&optprobe_template_entry) + ((unsigned long *)optprobe_template_val - (unsigned long *)optprobe_template_entry) #define TMPL_CALL_IDX \ - ((unsigned long *)&optprobe_template_call - (unsigned long *)&optprobe_template_entry) + ((unsigned long *)optprobe_template_call - (unsigned long *)optprobe_template_entry) #define TMPL_END_IDX \ - ((unsigned long *)&optprobe_template_end - (unsigned long *)&optprobe_template_entry) + ((unsigned long *)optprobe_template_end - (unsigned long *)optprobe_template_entry) #define TMPL_ADD_SP \ - ((unsigned long *)&optprobe_template_add_sp - (unsigned long *)&optprobe_template_entry) + ((unsigned long *)optprobe_template_add_sp - (unsigned long *)optprobe_template_entry) #define TMPL_SUB_SP \ - ((unsigned long *)&optprobe_template_sub_sp - (unsigned long *)&optprobe_template_entry) + ((unsigned long *)optprobe_template_sub_sp - (unsigned long *)optprobe_template_entry) #define TMPL_RESTORE_BEGIN \ - ((unsigned long *)&optprobe_template_restore_begin - (unsigned long *)&optprobe_template_entry) + ((unsigned long *)optprobe_template_restore_begin - (unsigned long *)optprobe_template_entry) #define TMPL_RESTORE_ORIGN_INSN \ - ((unsigned long *)&optprobe_template_restore_orig_insn - (unsigned long *)&optprobe_template_entry) + ((unsigned long *)optprobe_template_restore_orig_insn - (unsigned long *)optprobe_template_entry) #define TMPL_RESTORE_END \ - ((unsigned long *)&optprobe_template_restore_end - (unsigned long *)&optprobe_template_entry) + ((unsigned long *)optprobe_template_restore_end - (unsigned long *)optprobe_template_entry) /* * ARM can always optimize an instruction when using ARM ISA, except @@ -234,7 +234,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *or } /* Copy arch-dep-instance from template. */ - memcpy(code, (unsigned long *)&optprobe_template_entry, + memcpy(code, (unsigned long *)optprobe_template_entry, TMPL_END_IDX * sizeof(kprobe_opcode_t)); /* Adjust buffer according to instruction. */ From 343a3e8bc635bd4c58d45a4fe67f9c3a78fbd191 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 26 Oct 2020 17:20:50 +0100 Subject: [PATCH 055/710] bpf: Fix -Wshadow warnings There are thousands of warnings about one macro in a W=2 build: include/linux/filter.h:561:6: warning: declaration of 'ret' shadows a previous local [-Wshadow] Prefix all the locals in that macro with __ to avoid most of these warnings. Fixes: 492ecee892c2 ("bpf: enable program stats") Signed-off-by: Arnd Bergmann Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201026162110.3710415-1-arnd@kernel.org --- include/linux/filter.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index 72d62cbc1578..1b62397bd124 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -558,21 +558,21 @@ struct sk_filter { DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key); #define __BPF_PROG_RUN(prog, ctx, dfunc) ({ \ - u32 ret; \ + u32 __ret; \ cant_migrate(); \ if (static_branch_unlikely(&bpf_stats_enabled_key)) { \ - struct bpf_prog_stats *stats; \ - u64 start = sched_clock(); \ - ret = dfunc(ctx, (prog)->insnsi, (prog)->bpf_func); \ - stats = this_cpu_ptr(prog->aux->stats); \ - u64_stats_update_begin(&stats->syncp); \ - stats->cnt++; \ - stats->nsecs += sched_clock() - start; \ - u64_stats_update_end(&stats->syncp); \ + struct bpf_prog_stats *__stats; \ + u64 __start = sched_clock(); \ + __ret = dfunc(ctx, (prog)->insnsi, (prog)->bpf_func); \ + __stats = this_cpu_ptr(prog->aux->stats); \ + u64_stats_update_begin(&__stats->syncp); \ + __stats->cnt++; \ + __stats->nsecs += sched_clock() - __start; \ + u64_stats_update_end(&__stats->syncp); \ } else { \ - ret = dfunc(ctx, (prog)->insnsi, (prog)->bpf_func); \ + __ret = dfunc(ctx, (prog)->insnsi, (prog)->bpf_func); \ } \ - ret; }) + __ret; }) #define BPF_PROG_RUN(prog, ctx) \ __BPF_PROG_RUN(prog, ctx, bpf_dispatcher_nop_func) From c66dca98a24cb5f3493dd08d40bcfa94a220fa92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Tue, 27 Oct 2020 00:36:23 +0100 Subject: [PATCH 056/710] samples/bpf: Set rlimit for memlock to infinity in all samples MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The memlock rlimit is a notorious source of failure for BPF programs. Most of the samples just set it to infinity, but a few used a lower limit. The problem with unconditionally setting a lower limit is that this will also override the limit if the system-wide setting is *higher* than the limit being set, which can lead to failures on systems that lock a lot of memory, but set 'ulimit -l' to unlimited before running a sample. One fix for this is to only conditionally set the limit if the current limit is lower, but it is simpler to just unify all the samples and have them all set the limit to infinity. Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Acked-by: Jesper Dangaard Brouer Link: https://lore.kernel.org/bpf/20201026233623.91728-1-toke@redhat.com --- samples/bpf/task_fd_query_user.c | 2 +- samples/bpf/tracex2_user.c | 2 +- samples/bpf/tracex3_user.c | 2 +- samples/bpf/xdp_redirect_cpu_user.c | 2 +- samples/bpf/xdp_rxq_info_user.c | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/samples/bpf/task_fd_query_user.c b/samples/bpf/task_fd_query_user.c index 4a74531dc403..b68bd2f8fdc9 100644 --- a/samples/bpf/task_fd_query_user.c +++ b/samples/bpf/task_fd_query_user.c @@ -290,7 +290,7 @@ static int test_debug_fs_uprobe(char *binary_path, long offset, bool is_return) int main(int argc, char **argv) { - struct rlimit r = {1024*1024, RLIM_INFINITY}; + struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; extern char __executable_start; char filename[256], buf[256]; __u64 uprobe_file_offset; diff --git a/samples/bpf/tracex2_user.c b/samples/bpf/tracex2_user.c index 3e36b3e4e3ef..3d6eab711d23 100644 --- a/samples/bpf/tracex2_user.c +++ b/samples/bpf/tracex2_user.c @@ -116,7 +116,7 @@ static void int_exit(int sig) int main(int ac, char **argv) { - struct rlimit r = {1024*1024, RLIM_INFINITY}; + struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; long key, next_key, value; struct bpf_link *links[2]; struct bpf_program *prog; diff --git a/samples/bpf/tracex3_user.c b/samples/bpf/tracex3_user.c index 70e987775c15..83e0fecbb01a 100644 --- a/samples/bpf/tracex3_user.c +++ b/samples/bpf/tracex3_user.c @@ -107,7 +107,7 @@ static void print_hist(int fd) int main(int ac, char **argv) { - struct rlimit r = {1024*1024, RLIM_INFINITY}; + struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; struct bpf_link *links[2]; struct bpf_program *prog; struct bpf_object *obj; diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c index 6fb8dbde62c5..f78cb18319aa 100644 --- a/samples/bpf/xdp_redirect_cpu_user.c +++ b/samples/bpf/xdp_redirect_cpu_user.c @@ -765,7 +765,7 @@ static int load_cpumap_prog(char *file_name, char *prog_name, int main(int argc, char **argv) { - struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY}; + struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; char *prog_name = "xdp_cpu_map5_lb_hash_ip_pairs"; char *mprog_filename = "xdp_redirect_kern.o"; char *redir_interface = NULL, *redir_map = NULL; diff --git a/samples/bpf/xdp_rxq_info_user.c b/samples/bpf/xdp_rxq_info_user.c index caa4e7ffcfc7..93fa1bc54f13 100644 --- a/samples/bpf/xdp_rxq_info_user.c +++ b/samples/bpf/xdp_rxq_info_user.c @@ -450,7 +450,7 @@ static void stats_poll(int interval, int action, __u32 cfg_opt) int main(int argc, char **argv) { __u32 cfg_options= NO_TOUCH ; /* Default: Don't touch packet memory */ - struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY}; + struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; struct bpf_prog_load_attr prog_load_attr = { .prog_type = BPF_PROG_TYPE_XDP, }; From 484f910e93b48c1d8890d8330a87e34ae61f4782 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 27 Oct 2020 14:34:09 -0700 Subject: [PATCH 057/710] dmaengine: idxd: fix wq config registers offset programming DSA spec v1.1 [1] updated to include a stride size register for WQ configuration that will specify how much space is reserved for the WQ configuration register set. This change is expected to be in the final gen1 DSA hardware. Fix the driver to use WQCFG_OFFSET() for all WQ offset calculation and fixup WQCFG_OFFSET() to use the new calculated wq size. [1]: https://software.intel.com/content/www/us/en/develop/download/intel-data-streaming-accelerator-preliminary-architecture-specification.html Fixes: bfe1d56091c1 ("dmaengine: idxd: Init and probe for Intel data accelerators") Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/160383444959.48058.14249265538404901781.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/device.c | 29 ++++++++++++++--------------- drivers/dma/idxd/idxd.h | 3 ++- drivers/dma/idxd/init.c | 5 +++++ drivers/dma/idxd/registers.h | 23 ++++++++++++++++++++++- 4 files changed, 43 insertions(+), 17 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 200b9109cacf..506ac85c4a7f 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -295,7 +295,7 @@ void idxd_wq_disable_cleanup(struct idxd_wq *wq) int i, wq_offset; lockdep_assert_held(&idxd->dev_lock); - memset(&wq->wqcfg, 0, sizeof(wq->wqcfg)); + memset(wq->wqcfg, 0, idxd->wqcfg_size); wq->type = IDXD_WQT_NONE; wq->size = 0; wq->group = NULL; @@ -304,8 +304,8 @@ void idxd_wq_disable_cleanup(struct idxd_wq *wq) clear_bit(WQ_FLAG_DEDICATED, &wq->flags); memset(wq->name, 0, WQ_NAME_SIZE); - for (i = 0; i < 8; i++) { - wq_offset = idxd->wqcfg_offset + wq->id * 32 + i * sizeof(u32); + for (i = 0; i < WQCFG_STRIDES(idxd); i++) { + wq_offset = WQCFG_OFFSET(idxd, wq->id, i); iowrite32(0, idxd->reg_base + wq_offset); dev_dbg(dev, "WQ[%d][%d][%#x]: %#x\n", wq->id, i, wq_offset, @@ -539,10 +539,10 @@ static int idxd_wq_config_write(struct idxd_wq *wq) if (!wq->group) return 0; - memset(&wq->wqcfg, 0, sizeof(union wqcfg)); + memset(wq->wqcfg, 0, idxd->wqcfg_size); /* byte 0-3 */ - wq->wqcfg.wq_size = wq->size; + wq->wqcfg->wq_size = wq->size; if (wq->size == 0) { dev_warn(dev, "Incorrect work queue size: 0\n"); @@ -550,22 +550,21 @@ static int idxd_wq_config_write(struct idxd_wq *wq) } /* bytes 4-7 */ - wq->wqcfg.wq_thresh = wq->threshold; + wq->wqcfg->wq_thresh = wq->threshold; /* byte 8-11 */ - wq->wqcfg.priv = !!(wq->type == IDXD_WQT_KERNEL); - wq->wqcfg.mode = 1; - - wq->wqcfg.priority = wq->priority; + wq->wqcfg->priv = !!(wq->type == IDXD_WQT_KERNEL); + wq->wqcfg->mode = 1; + wq->wqcfg->priority = wq->priority; /* bytes 12-15 */ - wq->wqcfg.max_xfer_shift = ilog2(wq->max_xfer_bytes); - wq->wqcfg.max_batch_shift = ilog2(wq->max_batch_size); + wq->wqcfg->max_xfer_shift = ilog2(wq->max_xfer_bytes); + wq->wqcfg->max_batch_shift = ilog2(wq->max_batch_size); dev_dbg(dev, "WQ %d CFGs\n", wq->id); - for (i = 0; i < 8; i++) { - wq_offset = idxd->wqcfg_offset + wq->id * 32 + i * sizeof(u32); - iowrite32(wq->wqcfg.bits[i], idxd->reg_base + wq_offset); + for (i = 0; i < WQCFG_STRIDES(idxd); i++) { + wq_offset = WQCFG_OFFSET(idxd, wq->id, i); + iowrite32(wq->wqcfg->bits[i], idxd->reg_base + wq_offset); dev_dbg(dev, "WQ[%d][%d][%#x]: %#x\n", wq->id, i, wq_offset, ioread32(idxd->reg_base + wq_offset)); diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index c64df197e724..d48f193daacc 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -103,7 +103,7 @@ struct idxd_wq { u32 priority; enum idxd_wq_state state; unsigned long flags; - union wqcfg wqcfg; + union wqcfg *wqcfg; u32 vec_ptr; /* interrupt steering */ struct dsa_hw_desc **hw_descs; int num_descs; @@ -183,6 +183,7 @@ struct idxd_device { int max_wq_size; int token_limit; int nr_tokens; /* non-reserved tokens */ + unsigned int wqcfg_size; union sw_err_reg sw_err; wait_queue_head_t cmd_waitq; diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 11e5ce168177..0a4432b063b5 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -178,6 +178,9 @@ static int idxd_setup_internals(struct idxd_device *idxd) wq->idxd_cdev.minor = -1; wq->max_xfer_bytes = idxd->max_xfer_bytes; wq->max_batch_size = idxd->max_batch_size; + wq->wqcfg = devm_kzalloc(dev, idxd->wqcfg_size, GFP_KERNEL); + if (!wq->wqcfg) + return -ENOMEM; } for (i = 0; i < idxd->max_engines; i++) { @@ -251,6 +254,8 @@ static void idxd_read_caps(struct idxd_device *idxd) dev_dbg(dev, "total workqueue size: %u\n", idxd->max_wq_size); idxd->max_wqs = idxd->hw.wq_cap.num_wqs; dev_dbg(dev, "max workqueues: %u\n", idxd->max_wqs); + idxd->wqcfg_size = 1 << (idxd->hw.wq_cap.wqcfg_size + IDXD_WQCFG_MIN); + dev_dbg(dev, "wqcfg size: %u\n", idxd->wqcfg_size); /* reading operation capabilities */ for (i = 0; i < 4; i++) { diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h index a39e7ae6b3d9..aef5a902829e 100644 --- a/drivers/dma/idxd/registers.h +++ b/drivers/dma/idxd/registers.h @@ -43,7 +43,8 @@ union wq_cap_reg { struct { u64 total_wq_size:16; u64 num_wqs:8; - u64 rsvd:24; + u64 wqcfg_size:4; + u64 rsvd:20; u64 shared_mode:1; u64 dedicated_mode:1; u64 rsvd2:1; @@ -55,6 +56,7 @@ union wq_cap_reg { u64 bits; } __packed; #define IDXD_WQCAP_OFFSET 0x20 +#define IDXD_WQCFG_MIN 5 union group_cap_reg { struct { @@ -333,4 +335,23 @@ union wqcfg { }; u32 bits[8]; } __packed; + +/* + * This macro calculates the offset into the WQCFG register + * idxd - struct idxd * + * n - wq id + * ofs - the index of the 32b dword for the config register + * + * The WQCFG register block is divided into groups per each wq. The n index + * allows us to move to the register group that's for that particular wq. + * Each register is 32bits. The ofs gives us the number of register to access. + */ +#define WQCFG_OFFSET(_idxd_dev, n, ofs) \ +({\ + typeof(_idxd_dev) __idxd_dev = (_idxd_dev); \ + (__idxd_dev)->wqcfg_offset + (n) * (__idxd_dev)->wqcfg_size + sizeof(u32) * (ofs); \ +}) + +#define WQCFG_STRIDES(_idxd_dev) ((_idxd_dev)->wqcfg_size / sizeof(u32)) + #endif From 768664114b1ac9184f1dc6217d9c930a08ffbfa8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 26 Oct 2020 17:01:15 +0100 Subject: [PATCH 058/710] dmaengine: ti: k3-udma: fix -Wenum-conversion warning gcc warns about a mismatch argument type when passing 'false' into a function that expects an enum: drivers/dma/ti/k3-udma-private.c: In function 'xudma_tchan_get': drivers/dma/ti/k3-udma-private.c:86:34: warning: implicit conversion from 'enum ' to 'enum udma_tp_level' [-Wenum-conversion] 86 | return __udma_reserve_##res(ud, false, id); \ | ^~~~~ drivers/dma/ti/k3-udma-private.c:95:1: note: in expansion of macro 'XUDMA_GET_PUT_RESOURCE' 95 | XUDMA_GET_PUT_RESOURCE(tchan); | ^~~~~~~~~~~~~~~~~~~~~~ In this case, false has the same numerical value as UDMA_TP_NORMAL, so passing that is most likely the correct way to avoid the warning without changing the behavior. Fixes: d70241913413 ("dmaengine: ti: k3-udma: Add glue layer for non DMAengine users") Signed-off-by: Arnd Bergmann Acked-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20201026160123.3704531-1-arnd@kernel.org Signed-off-by: Vinod Koul --- drivers/dma/ti/k3-udma-private.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/ti/k3-udma-private.c b/drivers/dma/ti/k3-udma-private.c index aa24e554f7b4..8563a392f30b 100644 --- a/drivers/dma/ti/k3-udma-private.c +++ b/drivers/dma/ti/k3-udma-private.c @@ -83,7 +83,7 @@ EXPORT_SYMBOL(xudma_rflow_is_gp); #define XUDMA_GET_PUT_RESOURCE(res) \ struct udma_##res *xudma_##res##_get(struct udma_dev *ud, int id) \ { \ - return __udma_reserve_##res(ud, false, id); \ + return __udma_reserve_##res(ud, UDMA_TP_NORMAL, id); \ } \ EXPORT_SYMBOL(xudma_##res##_get); \ \ From 5760648e63e6c1006a3ed0bfc2167f623b8bcbcd Mon Sep 17 00:00:00 2001 From: Kent Gibson Date: Mon, 5 Oct 2020 15:03:25 +0800 Subject: [PATCH 059/710] gpio: uapi: fix kernel-doc warnings Fix kernel-doc warnings, specifically gpioline_info_changed.padding is not documented and 'GPIO event types' describes defines, which are not documented by kernel-doc. Signed-off-by: Kent Gibson Reviewed-by: Bartosz Golaszewski Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20201005070329.21055-2-warthog618@gmail.com Signed-off-by: Linus Walleij --- include/uapi/linux/gpio.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/gpio.h b/include/uapi/linux/gpio.h index 07865c601099..b0d5e7a1c693 100644 --- a/include/uapi/linux/gpio.h +++ b/include/uapi/linux/gpio.h @@ -346,6 +346,7 @@ enum { * @timestamp: estimate of time of status change occurrence, in nanoseconds * @event_type: one of GPIOLINE_CHANGED_REQUESTED, GPIOLINE_CHANGED_RELEASED * and GPIOLINE_CHANGED_CONFIG + * @padding: reserved for future use * * Note: struct gpioline_info embedded here has 32-bit alignment on its own, * but it works fine with 64-bit alignment too. With its 72 byte size, we can @@ -469,7 +470,7 @@ struct gpioevent_request { int fd; }; -/** +/* * GPIO event types */ #define GPIOEVENT_EVENT_RISING_EDGE 0x01 From f20160217537e9006ce4a625da62b358416fc4ed Mon Sep 17 00:00:00 2001 From: Kent Gibson Date: Mon, 5 Oct 2020 15:03:26 +0800 Subject: [PATCH 060/710] gpio: uapi: comment consistency Make debounce_period_us field documentation consistent with other fields in the union. Signed-off-by: Kent Gibson Reviewed-by: Bartosz Golaszewski Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20201005070329.21055-3-warthog618@gmail.com Signed-off-by: Linus Walleij --- include/uapi/linux/gpio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/gpio.h b/include/uapi/linux/gpio.h index b0d5e7a1c693..1fdb0e851f83 100644 --- a/include/uapi/linux/gpio.h +++ b/include/uapi/linux/gpio.h @@ -98,7 +98,7 @@ struct gpio_v2_line_values { * identifying which field of the attribute union is in use. * @GPIO_V2_LINE_ATTR_ID_FLAGS: flags field is in use * @GPIO_V2_LINE_ATTR_ID_OUTPUT_VALUES: values field is in use - * @GPIO_V2_LINE_ATTR_ID_DEBOUNCE: debounce_period_us is in use + * @GPIO_V2_LINE_ATTR_ID_DEBOUNCE: debounce_period_us field is in use */ enum gpio_v2_line_attr_id { GPIO_V2_LINE_ATTR_ID_FLAGS = 1, From 2cc522d3931ba2aa744d09d41f874d61bf3a1851 Mon Sep 17 00:00:00 2001 From: Kent Gibson Date: Mon, 5 Oct 2020 15:03:27 +0800 Subject: [PATCH 061/710] gpio: uapi: kernel-doc formatting improvements Add kernel-doc formatting to all references to structs, enums, fields and constants, and move deprecation warnings into the Note section of the deprecated struct. Replace 'OR:ed' with 'added', as the former looks odd. Signed-off-by: Kent Gibson Reviewed-by: Bartosz Golaszewski Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20201005070329.21055-4-warthog618@gmail.com Signed-off-by: Linus Walleij --- include/uapi/linux/gpio.h | 93 ++++++++++++++++++++------------------- 1 file changed, 47 insertions(+), 46 deletions(-) diff --git a/include/uapi/linux/gpio.h b/include/uapi/linux/gpio.h index 1fdb0e851f83..32dd18f238c3 100644 --- a/include/uapi/linux/gpio.h +++ b/include/uapi/linux/gpio.h @@ -110,17 +110,17 @@ enum gpio_v2_line_attr_id { * struct gpio_v2_line_attribute - a configurable attribute of a line * @id: attribute identifier with value from &enum gpio_v2_line_attr_id * @padding: reserved for future use and must be zero filled - * @flags: if id is GPIO_V2_LINE_ATTR_ID_FLAGS, the flags for the GPIO - * line, with values from enum gpio_v2_line_flag, such as - * GPIO_V2_LINE_FLAG_ACTIVE_LOW, GPIO_V2_LINE_FLAG_OUTPUT etc, OR:ed + * @flags: if id is %GPIO_V2_LINE_ATTR_ID_FLAGS, the flags for the GPIO + * line, with values from &enum gpio_v2_line_flag, such as + * %GPIO_V2_LINE_FLAG_ACTIVE_LOW, %GPIO_V2_LINE_FLAG_OUTPUT etc, added * together. This overrides the default flags contained in the &struct * gpio_v2_line_config for the associated line. - * @values: if id is GPIO_V2_LINE_ATTR_ID_OUTPUT_VALUES, a bitmap + * @values: if id is %GPIO_V2_LINE_ATTR_ID_OUTPUT_VALUES, a bitmap * containing the values to which the lines will be set, with each bit * number corresponding to the index into &struct * gpio_v2_line_request.offsets. - * @debounce_period_us: if id is GPIO_V2_LINE_ATTR_ID_DEBOUNCE, the desired - * debounce period, in microseconds + * @debounce_period_us: if id is %GPIO_V2_LINE_ATTR_ID_DEBOUNCE, the + * desired debounce period, in microseconds */ struct gpio_v2_line_attribute { __u32 id; @@ -147,12 +147,12 @@ struct gpio_v2_line_config_attribute { /** * struct gpio_v2_line_config - Configuration for GPIO lines - * @flags: flags for the GPIO lines, with values from enum - * gpio_v2_line_flag, such as GPIO_V2_LINE_FLAG_ACTIVE_LOW, - * GPIO_V2_LINE_FLAG_OUTPUT etc, OR:ed together. This is the default for + * @flags: flags for the GPIO lines, with values from &enum + * gpio_v2_line_flag, such as %GPIO_V2_LINE_FLAG_ACTIVE_LOW, + * %GPIO_V2_LINE_FLAG_OUTPUT etc, added together. This is the default for * all requested lines but may be overridden for particular lines using - * attrs. - * @num_attrs: the number of attributes in attrs + * @attrs. + * @num_attrs: the number of attributes in @attrs * @padding: reserved for future use and must be zero filled * @attrs: the configuration attributes associated with the requested * lines. Any attribute should only be associated with a particular line @@ -175,17 +175,17 @@ struct gpio_v2_line_config { * "my-bitbanged-relay" * @config: requested configuration for the lines. * @num_lines: number of lines requested in this request, i.e. the number - * of valid fields in the GPIO_V2_LINES_MAX sized arrays, set to 1 to + * of valid fields in the %GPIO_V2_LINES_MAX sized arrays, set to 1 to * request a single line * @event_buffer_size: a suggested minimum number of line events that the * kernel should buffer. This is only relevant if edge detection is * enabled in the configuration. Note that this is only a suggested value * and the kernel may allocate a larger buffer or cap the size of the * buffer. If this field is zero then the buffer size defaults to a minimum - * of num_lines*16. + * of @num_lines * 16. * @padding: reserved for future use and must be zero filled * @fd: if successful this field will contain a valid anonymous file handle - * after a GPIO_GET_LINE_IOCTL operation, zero or negative value means + * after a %GPIO_GET_LINE_IOCTL operation, zero or negative value means * error */ struct gpio_v2_line_request { @@ -207,11 +207,12 @@ struct gpio_v2_line_request { * @consumer: a functional name for the consumer of this GPIO line as set * by whatever is using it, will be empty if there is no current user but * may also be empty if the consumer doesn't set this up - * @flags: flags for the GPIO line, such as GPIO_V2_LINE_FLAG_ACTIVE_LOW, - * GPIO_V2_LINE_FLAG_OUTPUT etc, OR:ed together * @offset: the local offset on this GPIO chip, fill this in when * requesting the line information from the kernel - * @num_attrs: the number of attributes in attrs + * @num_attrs: the number of attributes in @attrs + * @flags: flags for the GPIO lines, with values from &enum + * gpio_v2_line_flag, such as %GPIO_V2_LINE_FLAG_ACTIVE_LOW, + * %GPIO_V2_LINE_FLAG_OUTPUT etc, added together. * @attrs: the configuration attributes associated with the line * @padding: reserved for future use */ @@ -244,7 +245,7 @@ enum gpio_v2_line_changed_type { * of a GPIO line * @info: updated line information * @timestamp_ns: estimate of time of status change occurrence, in nanoseconds - * @event_type: the type of change with a value from enum + * @event_type: the type of change with a value from &enum * gpio_v2_line_changed_type * @padding: reserved for future use */ @@ -269,10 +270,10 @@ enum gpio_v2_line_event_id { /** * struct gpio_v2_line_event - The actual event being pushed to userspace * @timestamp_ns: best estimate of time of event occurrence, in nanoseconds. - * The timestamp_ns is read from CLOCK_MONOTONIC and is intended to allow the - * accurate measurement of the time between events. It does not provide + * The @timestamp_ns is read from %CLOCK_MONOTONIC and is intended to allow + * the accurate measurement of the time between events. It does not provide * the wall-clock time. - * @id: event identifier with value from enum gpio_v2_line_event_id + * @id: event identifier with value from &enum gpio_v2_line_event_id * @offset: the offset of the line that triggered the event * @seqno: the sequence number for this event in the sequence of events for * all the lines in this line request @@ -319,8 +320,8 @@ struct gpio_v2_line_event { * whatever is using it, will be empty if there is no current user but may * also be empty if the consumer doesn't set this up * - * This struct is part of ABI v1 and is deprecated. - * Use struct gpio_v2_line_info instead. + * Note: This struct is part of ABI v1 and is deprecated. + * Use &struct gpio_v2_line_info instead. */ struct gpioline_info { __u32 line_offset; @@ -344,18 +345,18 @@ enum { * of a GPIO line * @info: updated line information * @timestamp: estimate of time of status change occurrence, in nanoseconds - * @event_type: one of GPIOLINE_CHANGED_REQUESTED, GPIOLINE_CHANGED_RELEASED - * and GPIOLINE_CHANGED_CONFIG + * @event_type: one of %GPIOLINE_CHANGED_REQUESTED, + * %GPIOLINE_CHANGED_RELEASED and %GPIOLINE_CHANGED_CONFIG * @padding: reserved for future use * - * Note: struct gpioline_info embedded here has 32-bit alignment on its own, + * The &struct gpioline_info embedded here has 32-bit alignment on its own, * but it works fine with 64-bit alignment too. With its 72 byte size, we can * guarantee there are no implicit holes between it and subsequent members. * The 20-byte padding at the end makes sure we don't add any implicit padding * at the end of the structure on 64-bit architectures. * - * This struct is part of ABI v1 and is deprecated. - * Use struct gpio_v2_line_info_changed instead. + * Note: This struct is part of ABI v1 and is deprecated. + * Use &struct gpio_v2_line_info_changed instead. */ struct gpioline_info_changed { struct gpioline_info info; @@ -379,13 +380,13 @@ struct gpioline_info_changed { * @lineoffsets: an array of desired lines, specified by offset index for the * associated GPIO device * @flags: desired flags for the desired GPIO lines, such as - * GPIOHANDLE_REQUEST_OUTPUT, GPIOHANDLE_REQUEST_ACTIVE_LOW etc, OR:ed + * %GPIOHANDLE_REQUEST_OUTPUT, %GPIOHANDLE_REQUEST_ACTIVE_LOW etc, added * together. Note that even if multiple lines are requested, the same flags * must be applicable to all of them, if you want lines with individual * flags set, request them one by one. It is possible to select * a batch of input or output lines, but they must all have the same * characteristics, i.e. all inputs or all outputs, all active low etc - * @default_values: if the GPIOHANDLE_REQUEST_OUTPUT is set for a requested + * @default_values: if the %GPIOHANDLE_REQUEST_OUTPUT is set for a requested * line, this specifies the default output value, should be 0 (low) or * 1 (high), anything else than 0 or 1 will be interpreted as 1 (high) * @consumer_label: a desired consumer label for the selected GPIO line(s) @@ -393,11 +394,11 @@ struct gpioline_info_changed { * @lines: number of lines requested in this request, i.e. the number of * valid fields in the above arrays, set to 1 to request a single line * @fd: if successful this field will contain a valid anonymous file handle - * after a GPIO_GET_LINEHANDLE_IOCTL operation, zero or negative value + * after a %GPIO_GET_LINEHANDLE_IOCTL operation, zero or negative value * means error * - * This struct is part of ABI v1 and is deprecated. - * Use struct gpio_v2_line_request instead. + * Note: This struct is part of ABI v1 and is deprecated. + * Use &struct gpio_v2_line_request instead. */ struct gpiohandle_request { __u32 lineoffsets[GPIOHANDLES_MAX]; @@ -411,15 +412,15 @@ struct gpiohandle_request { /** * struct gpiohandle_config - Configuration for a GPIO handle request * @flags: updated flags for the requested GPIO lines, such as - * GPIOHANDLE_REQUEST_OUTPUT, GPIOHANDLE_REQUEST_ACTIVE_LOW etc, OR:ed + * %GPIOHANDLE_REQUEST_OUTPUT, %GPIOHANDLE_REQUEST_ACTIVE_LOW etc, added * together - * @default_values: if the GPIOHANDLE_REQUEST_OUTPUT is set in flags, + * @default_values: if the %GPIOHANDLE_REQUEST_OUTPUT is set in flags, * this specifies the default output value, should be 0 (low) or * 1 (high), anything else than 0 or 1 will be interpreted as 1 (high) * @padding: reserved for future use and should be zero filled * - * This struct is part of ABI v1 and is deprecated. - * Use struct gpio_v2_line_config instead. + * Note: This struct is part of ABI v1 and is deprecated. + * Use &struct gpio_v2_line_config instead. */ struct gpiohandle_config { __u32 flags; @@ -433,8 +434,8 @@ struct gpiohandle_config { * state of a line, when setting the state of lines these should contain * the desired target state * - * This struct is part of ABI v1 and is deprecated. - * Use struct gpio_v2_line_values instead. + * Note: This struct is part of ABI v1 and is deprecated. + * Use &struct gpio_v2_line_values instead. */ struct gpiohandle_data { __u8 values[GPIOHANDLES_MAX]; @@ -450,17 +451,17 @@ struct gpiohandle_data { * @lineoffset: the desired line to subscribe to events from, specified by * offset index for the associated GPIO device * @handleflags: desired handle flags for the desired GPIO line, such as - * GPIOHANDLE_REQUEST_ACTIVE_LOW or GPIOHANDLE_REQUEST_OPEN_DRAIN + * %GPIOHANDLE_REQUEST_ACTIVE_LOW or %GPIOHANDLE_REQUEST_OPEN_DRAIN * @eventflags: desired flags for the desired GPIO event line, such as - * GPIOEVENT_REQUEST_RISING_EDGE or GPIOEVENT_REQUEST_FALLING_EDGE + * %GPIOEVENT_REQUEST_RISING_EDGE or %GPIOEVENT_REQUEST_FALLING_EDGE * @consumer_label: a desired consumer label for the selected GPIO line(s) * such as "my-listener" * @fd: if successful this field will contain a valid anonymous file handle - * after a GPIO_GET_LINEEVENT_IOCTL operation, zero or negative value + * after a %GPIO_GET_LINEEVENT_IOCTL operation, zero or negative value * means error * - * This struct is part of ABI v1 and is deprecated. - * Use struct gpio_v2_line_request instead. + * Note: This struct is part of ABI v1 and is deprecated. + * Use &struct gpio_v2_line_request instead. */ struct gpioevent_request { __u32 lineoffset; @@ -481,8 +482,8 @@ struct gpioevent_request { * @timestamp: best estimate of time of event occurrence, in nanoseconds * @id: event identifier * - * This struct is part of ABI v1 and is deprecated. - * Use struct gpio_v2_line_event instead. + * Note: This struct is part of ABI v1 and is deprecated. + * Use &struct gpio_v2_line_event instead. */ struct gpioevent_data { __u64 timestamp; From c303c51c87a61ace7330b5e0217468b1b8f98a75 Mon Sep 17 00:00:00 2001 From: Kent Gibson Date: Mon, 5 Oct 2020 15:03:28 +0800 Subject: [PATCH 062/710] gpio: uapi: remove whitespace Remove leading whitespace in ABI v1 comment. Signed-off-by: Kent Gibson Reviewed-by: Bartosz Golaszewski Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20201005070329.21055-5-warthog618@gmail.com Signed-off-by: Linus Walleij --- include/uapi/linux/gpio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/gpio.h b/include/uapi/linux/gpio.h index 32dd18f238c3..ad3f56dd87ec 100644 --- a/include/uapi/linux/gpio.h +++ b/include/uapi/linux/gpio.h @@ -292,7 +292,7 @@ struct gpio_v2_line_event { }; /* - * ABI v1 + * ABI v1 * * This version of the ABI is deprecated. * Use the latest version of the ABI, defined above, instead. From 2f84a2de539cc4301a332c2c76473fc25baf21b7 Mon Sep 17 00:00:00 2001 From: Kent Gibson Date: Mon, 5 Oct 2020 15:03:29 +0800 Subject: [PATCH 063/710] gpio: uapi: clarify the meaning of 'empty' char arrays Clarify that a char array containing a string is considered 'empty' if the first character is the null terminator. The remaining characters are not relevant to this determination. Signed-off-by: Kent Gibson Reviewed-by: Bartosz Golaszewski Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20201005070329.21055-6-warthog618@gmail.com Signed-off-by: Linus Walleij --- include/uapi/linux/gpio.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/uapi/linux/gpio.h b/include/uapi/linux/gpio.h index ad3f56dd87ec..2072c260f5d0 100644 --- a/include/uapi/linux/gpio.h +++ b/include/uapi/linux/gpio.h @@ -26,7 +26,7 @@ * struct gpiochip_info - Information about a certain GPIO chip * @name: the Linux kernel name of this GPIO chip * @label: a functional name for this GPIO chip, such as a product - * number, may be empty + * number, may be empty (i.e. label[0] == '\0') * @lines: number of GPIO lines on this chip */ struct gpiochip_info { @@ -203,7 +203,7 @@ struct gpio_v2_line_request { * struct gpio_v2_line_info - Information about a certain GPIO line * @name: the name of this GPIO line, such as the output pin of the line on * the chip, a rail or a pin header name on a board, as specified by the - * GPIO chip, may be empty + * GPIO chip, may be empty (i.e. name[0] == '\0') * @consumer: a functional name for the consumer of this GPIO line as set * by whatever is using it, will be empty if there is no current user but * may also be empty if the consumer doesn't set this up @@ -315,7 +315,7 @@ struct gpio_v2_line_event { * @flags: various flags for this line * @name: the name of this GPIO line, such as the output pin of the line on the * chip, a rail or a pin header name on a board, as specified by the gpio - * chip, may be empty + * chip, may be empty (i.e. name[0] == '\0') * @consumer: a functional name for the consumer of this GPIO line as set by * whatever is using it, will be empty if there is no current user but may * also be empty if the consumer doesn't set this up From 7ffa08169849be898eed6f3694aab8c425497749 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 28 Oct 2020 08:05:56 +0200 Subject: [PATCH 064/710] Revert "Revert "gpio: omap: Fix lost edge wake-up interrupts"" This reverts commit 579ced8fdb00b8e94304a83e3cc419f6f8eab08e. Turns out I was overly optimistic about cpu_pm blocking idle being a solution for handling edge interrupts. While it helps in preventing entering idle states that potentially lose context, we can still get an edge interrupt triggering while entering idle. So we need to also add back the workaround for seeing if there are any pending edge interrupts when waking up. Signed-off-by: Tony Lindgren Cc: Aaro Koskinen Cc: Grygorii Strashko Cc: Keerthy Cc: Ladislav Michl Cc: Peter Ujfalusi Cc: Russell King Cc: Tero Kristo Link: https://lore.kernel.org/r/20201028060556.56038-1-tony@atomide.com Signed-off-by: Linus Walleij --- drivers/gpio/gpio-omap.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c index 6d59e3a43761..f7ceb2b11afc 100644 --- a/drivers/gpio/gpio-omap.c +++ b/drivers/gpio/gpio-omap.c @@ -1114,13 +1114,23 @@ static void omap_gpio_idle(struct gpio_bank *bank, bool may_lose_context) { struct device *dev = bank->chip.parent; void __iomem *base = bank->base; - u32 nowake; + u32 mask, nowake; bank->saved_datain = readl_relaxed(base + bank->regs->datain); if (!bank->enabled_non_wakeup_gpios) goto update_gpio_context_count; + /* Check for pending EDGE_FALLING, ignore EDGE_BOTH */ + mask = bank->enabled_non_wakeup_gpios & bank->context.fallingdetect; + mask &= ~bank->context.risingdetect; + bank->saved_datain |= mask; + + /* Check for pending EDGE_RISING, ignore EDGE_BOTH */ + mask = bank->enabled_non_wakeup_gpios & bank->context.risingdetect; + mask &= ~bank->context.fallingdetect; + bank->saved_datain &= ~mask; + if (!may_lose_context) goto update_gpio_context_count; From f83c2609079cde0bb3ad4c1da60f9c69c0ec8920 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Sat, 10 Oct 2020 21:25:09 +0200 Subject: [PATCH 065/710] pinctrl: ingenic: Fix invalid SSI pins MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The values for the SSI pins on GPIO chips D and E were off by 0x20. Fixes: d3ef8c6b2286 ("pinctrl: Ingenic: Add SSI pins support for JZ4770 and JZ4780.") Signed-off-by: Paul Cercueil Reported-by: Artur Rojek Link: https://lore.kernel.org/r/20201010192509.9098-1-paul@crapouillou.net Reviewed-by: 周琰杰 (Zhou Yanjie) Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-ingenic.c | 72 +++++++++++++++---------------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/drivers/pinctrl/pinctrl-ingenic.c b/drivers/pinctrl/pinctrl-ingenic.c index c8e50a58a5e5..621909b01deb 100644 --- a/drivers/pinctrl/pinctrl-ingenic.c +++ b/drivers/pinctrl/pinctrl-ingenic.c @@ -635,44 +635,44 @@ static int jz4770_uart3_data_pins[] = { 0x6c, 0x85, }; static int jz4770_uart3_hwflow_pins[] = { 0x88, 0x89, }; static int jz4770_ssi0_dt_a_pins[] = { 0x15, }; static int jz4770_ssi0_dt_b_pins[] = { 0x35, }; -static int jz4770_ssi0_dt_d_pins[] = { 0x55, }; -static int jz4770_ssi0_dt_e_pins[] = { 0x71, }; +static int jz4770_ssi0_dt_d_pins[] = { 0x75, }; +static int jz4770_ssi0_dt_e_pins[] = { 0x91, }; static int jz4770_ssi0_dr_a_pins[] = { 0x14, }; static int jz4770_ssi0_dr_b_pins[] = { 0x34, }; -static int jz4770_ssi0_dr_d_pins[] = { 0x54, }; -static int jz4770_ssi0_dr_e_pins[] = { 0x6e, }; +static int jz4770_ssi0_dr_d_pins[] = { 0x74, }; +static int jz4770_ssi0_dr_e_pins[] = { 0x8e, }; static int jz4770_ssi0_clk_a_pins[] = { 0x12, }; static int jz4770_ssi0_clk_b_pins[] = { 0x3c, }; -static int jz4770_ssi0_clk_d_pins[] = { 0x58, }; -static int jz4770_ssi0_clk_e_pins[] = { 0x6f, }; +static int jz4770_ssi0_clk_d_pins[] = { 0x78, }; +static int jz4770_ssi0_clk_e_pins[] = { 0x8f, }; static int jz4770_ssi0_gpc_b_pins[] = { 0x3e, }; -static int jz4770_ssi0_gpc_d_pins[] = { 0x56, }; -static int jz4770_ssi0_gpc_e_pins[] = { 0x73, }; +static int jz4770_ssi0_gpc_d_pins[] = { 0x76, }; +static int jz4770_ssi0_gpc_e_pins[] = { 0x93, }; static int jz4770_ssi0_ce0_a_pins[] = { 0x13, }; static int jz4770_ssi0_ce0_b_pins[] = { 0x3d, }; -static int jz4770_ssi0_ce0_d_pins[] = { 0x59, }; -static int jz4770_ssi0_ce0_e_pins[] = { 0x70, }; +static int jz4770_ssi0_ce0_d_pins[] = { 0x79, }; +static int jz4770_ssi0_ce0_e_pins[] = { 0x90, }; static int jz4770_ssi0_ce1_b_pins[] = { 0x3f, }; -static int jz4770_ssi0_ce1_d_pins[] = { 0x57, }; -static int jz4770_ssi0_ce1_e_pins[] = { 0x72, }; +static int jz4770_ssi0_ce1_d_pins[] = { 0x77, }; +static int jz4770_ssi0_ce1_e_pins[] = { 0x92, }; static int jz4770_ssi1_dt_b_pins[] = { 0x35, }; -static int jz4770_ssi1_dt_d_pins[] = { 0x55, }; -static int jz4770_ssi1_dt_e_pins[] = { 0x71, }; +static int jz4770_ssi1_dt_d_pins[] = { 0x75, }; +static int jz4770_ssi1_dt_e_pins[] = { 0x91, }; static int jz4770_ssi1_dr_b_pins[] = { 0x34, }; -static int jz4770_ssi1_dr_d_pins[] = { 0x54, }; -static int jz4770_ssi1_dr_e_pins[] = { 0x6e, }; +static int jz4770_ssi1_dr_d_pins[] = { 0x74, }; +static int jz4770_ssi1_dr_e_pins[] = { 0x8e, }; static int jz4770_ssi1_clk_b_pins[] = { 0x3c, }; -static int jz4770_ssi1_clk_d_pins[] = { 0x58, }; -static int jz4770_ssi1_clk_e_pins[] = { 0x6f, }; +static int jz4770_ssi1_clk_d_pins[] = { 0x78, }; +static int jz4770_ssi1_clk_e_pins[] = { 0x8f, }; static int jz4770_ssi1_gpc_b_pins[] = { 0x3e, }; -static int jz4770_ssi1_gpc_d_pins[] = { 0x56, }; -static int jz4770_ssi1_gpc_e_pins[] = { 0x73, }; +static int jz4770_ssi1_gpc_d_pins[] = { 0x76, }; +static int jz4770_ssi1_gpc_e_pins[] = { 0x93, }; static int jz4770_ssi1_ce0_b_pins[] = { 0x3d, }; -static int jz4770_ssi1_ce0_d_pins[] = { 0x59, }; -static int jz4770_ssi1_ce0_e_pins[] = { 0x70, }; +static int jz4770_ssi1_ce0_d_pins[] = { 0x79, }; +static int jz4770_ssi1_ce0_e_pins[] = { 0x90, }; static int jz4770_ssi1_ce1_b_pins[] = { 0x3f, }; -static int jz4770_ssi1_ce1_d_pins[] = { 0x57, }; -static int jz4770_ssi1_ce1_e_pins[] = { 0x72, }; +static int jz4770_ssi1_ce1_d_pins[] = { 0x77, }; +static int jz4770_ssi1_ce1_e_pins[] = { 0x92, }; static int jz4770_mmc0_1bit_a_pins[] = { 0x12, 0x13, 0x14, }; static int jz4770_mmc0_4bit_a_pins[] = { 0x15, 0x16, 0x17, }; static int jz4770_mmc0_1bit_e_pins[] = { 0x9c, 0x9d, 0x94, }; @@ -1050,35 +1050,35 @@ static int jz4780_ssi0_dt_a_19_pins[] = { 0x13, }; static int jz4780_ssi0_dt_a_21_pins[] = { 0x15, }; static int jz4780_ssi0_dt_a_28_pins[] = { 0x1c, }; static int jz4780_ssi0_dt_b_pins[] = { 0x3d, }; -static int jz4780_ssi0_dt_d_pins[] = { 0x59, }; +static int jz4780_ssi0_dt_d_pins[] = { 0x79, }; static int jz4780_ssi0_dr_a_20_pins[] = { 0x14, }; static int jz4780_ssi0_dr_a_27_pins[] = { 0x1b, }; static int jz4780_ssi0_dr_b_pins[] = { 0x34, }; -static int jz4780_ssi0_dr_d_pins[] = { 0x54, }; +static int jz4780_ssi0_dr_d_pins[] = { 0x74, }; static int jz4780_ssi0_clk_a_pins[] = { 0x12, }; static int jz4780_ssi0_clk_b_5_pins[] = { 0x25, }; static int jz4780_ssi0_clk_b_28_pins[] = { 0x3c, }; -static int jz4780_ssi0_clk_d_pins[] = { 0x58, }; +static int jz4780_ssi0_clk_d_pins[] = { 0x78, }; static int jz4780_ssi0_gpc_b_pins[] = { 0x3e, }; -static int jz4780_ssi0_gpc_d_pins[] = { 0x56, }; +static int jz4780_ssi0_gpc_d_pins[] = { 0x76, }; static int jz4780_ssi0_ce0_a_23_pins[] = { 0x17, }; static int jz4780_ssi0_ce0_a_25_pins[] = { 0x19, }; static int jz4780_ssi0_ce0_b_pins[] = { 0x3f, }; -static int jz4780_ssi0_ce0_d_pins[] = { 0x57, }; +static int jz4780_ssi0_ce0_d_pins[] = { 0x77, }; static int jz4780_ssi0_ce1_b_pins[] = { 0x35, }; -static int jz4780_ssi0_ce1_d_pins[] = { 0x55, }; +static int jz4780_ssi0_ce1_d_pins[] = { 0x75, }; static int jz4780_ssi1_dt_b_pins[] = { 0x3d, }; -static int jz4780_ssi1_dt_d_pins[] = { 0x59, }; +static int jz4780_ssi1_dt_d_pins[] = { 0x79, }; static int jz4780_ssi1_dr_b_pins[] = { 0x34, }; -static int jz4780_ssi1_dr_d_pins[] = { 0x54, }; +static int jz4780_ssi1_dr_d_pins[] = { 0x74, }; static int jz4780_ssi1_clk_b_pins[] = { 0x3c, }; -static int jz4780_ssi1_clk_d_pins[] = { 0x58, }; +static int jz4780_ssi1_clk_d_pins[] = { 0x78, }; static int jz4780_ssi1_gpc_b_pins[] = { 0x3e, }; -static int jz4780_ssi1_gpc_d_pins[] = { 0x56, }; +static int jz4780_ssi1_gpc_d_pins[] = { 0x76, }; static int jz4780_ssi1_ce0_b_pins[] = { 0x3f, }; -static int jz4780_ssi1_ce0_d_pins[] = { 0x57, }; +static int jz4780_ssi1_ce0_d_pins[] = { 0x77, }; static int jz4780_ssi1_ce1_b_pins[] = { 0x35, }; -static int jz4780_ssi1_ce1_d_pins[] = { 0x55, }; +static int jz4780_ssi1_ce1_d_pins[] = { 0x75, }; static int jz4780_mmc0_8bit_a_pins[] = { 0x04, 0x05, 0x06, 0x07, 0x18, }; static int jz4780_i2c3_pins[] = { 0x6a, 0x6b, }; static int jz4780_i2c4_e_pins[] = { 0x8c, 0x8d, }; From 77f6ab8b7768cf5e6bdd0e72499270a0671506ee Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 28 Oct 2020 16:39:49 -0400 Subject: [PATCH 066/710] don't dump the threads that had been already exiting when zapped. Coredump logics needs to report not only the registers of the dumping thread, but (since 2.5.43) those of other threads getting killed. Doing that might require extra state saved on the stack in asm glue at kernel entry; signal delivery logics does that (we need to be able to save sigcontext there, at the very least) and so does seccomp. That covers all callers of do_coredump(). Secondary threads get hit with SIGKILL and caught as soon as they reach exit_mm(), which normally happens in signal delivery, so those are also fine most of the time. Unfortunately, it is possible to end up with secondary zapped when it has already entered exit(2) (or, worse yet, is oopsing). In those cases we reach exit_mm() when mm->core_state is already set, but the stack contents is not what we would have in signal delivery. At least on two architectures (alpha and m68k) it leads to infoleaks - we end up with a chunk of kernel stack written into coredump, with the contents consisting of normal C stack frames of the call chain leading to exit_mm() instead of the expected copy of userland registers. In case of alpha we leak 312 bytes of stack. Other architectures (including the regset-using ones) might have similar problems - the normal user of regsets is ptrace and the state of tracee at the time of such calls is special in the same way signal delivery is. Note that had the zapper gotten to the exiting thread slightly later, it wouldn't have been included into coredump anyway - we skip the threads that have already cleared their ->mm. So let's pretend that zapper always loses the race. IOW, have exit_mm() only insert into the dumper list if we'd gotten there from handling a fatal signal[*] As the result, the callers of do_exit() that have *not* gone through get_signal() are not seen by coredump logics as secondary threads. Which excludes voluntary exit()/oopsen/traps/etc. The dumper thread itself is unaffected by that, so seccomp is fine. [*] originally I intended to add a new flag in tsk->flags, but ebiederman pointed out that PF_SIGNALED is already doing just what we need. Cc: stable@vger.kernel.org Fixes: d89f3847def4 ("[PATCH] thread-aware coredumps, 2.5.43-C3") History-tree: https://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.git Acked-by: "Eric W. Biederman" Signed-off-by: Al Viro --- kernel/exit.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/exit.c b/kernel/exit.c index 87a2d515de0d..1f236ed375f8 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -454,7 +454,10 @@ static void exit_mm(void) mmap_read_unlock(mm); self.task = current; - self.next = xchg(&core_state->dumper.next, &self); + if (self.task->flags & PF_SIGNALED) + self.next = xchg(&core_state->dumper.next, &self); + else + self.task = NULL; /* * Implies mb(), the result of xchg() must be visible * to core_state->dumper. From 424f5ca7c8515a4b6e3b0812899ddda082f42fd5 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 16 Oct 2020 17:15:25 +0200 Subject: [PATCH 067/710] MAINTAINERS: move Kamil Debski to credits Kamil Debski has not been active on LKML since 2017: https://lore.kernel.org/lkml/?q=f%3A%22Kamil+Debski%22 Move Kamil Debski to the CREDITS file. Thank you for the effort you put in to the upstream Linux kernel work. Signed-off-by: Krzysztof Kozlowski Acked-by: Mauro Carvalho Chehab Cc: Kamil Debski Cc: Andrzej Hajda Cc: Bartlomiej Zolnierkiewicz Cc: Sylwester Nawrocki Link: https://lore.kernel.org/r/20201016151528.7553-1-krzk@kernel.org --- CREDITS | 6 ++++++ MAINTAINERS | 4 ---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/CREDITS b/CREDITS index cb02b9923a52..f95e46f7a428 100644 --- a/CREDITS +++ b/CREDITS @@ -849,6 +849,12 @@ D: trivial hack to add variable address length routing to Rose. D: AX25-HOWTO, HAM-HOWTO, IPX-HOWTO, NET-2-HOWTO D: ax25-utils maintainer. +N: Kamil Debski +E: kamil@wypas.org +D: Samsung S5P 2D graphics acceleration and Multi Format Codec drivers +D: Samsung USB2 phy drivers +D: PWM fan driver + N: Helge Deller E: deller@gmx.de W: http://www.parisc-linux.org/ diff --git a/MAINTAINERS b/MAINTAINERS index e73636b75f29..b8ce3f209144 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2412,7 +2412,6 @@ F: arch/arm/mach-s5pv210/ ARM/SAMSUNG S5P SERIES 2D GRAPHICS ACCELERATION (G2D) SUPPORT M: Kyungmin Park -M: Kamil Debski M: Andrzej Hajda L: linux-arm-kernel@lists.infradead.org L: linux-media@vger.kernel.org @@ -2438,7 +2437,6 @@ F: drivers/media/platform/s5p-jpeg/ ARM/SAMSUNG S5P SERIES Multi Format Codec (MFC) SUPPORT M: Kyungmin Park -M: Kamil Debski M: Jeongtae Park M: Andrzej Hajda L: linux-arm-kernel@lists.infradead.org @@ -14230,7 +14228,6 @@ F: drivers/media/usb/pwc/* F: include/trace/events/pwc.h PWM FAN DRIVER -M: Kamil Debski M: Bartlomiej Zolnierkiewicz L: linux-hwmon@vger.kernel.org S: Supported @@ -15515,7 +15512,6 @@ T: git https://github.com/lmajewski/linux-samsung-thermal.git F: drivers/thermal/samsung/ SAMSUNG USB2 PHY DRIVER -M: Kamil Debski M: Sylwester Nawrocki L: linux-kernel@vger.kernel.org S: Supported From 215f06d7efc263d4e8150e0b97c49dd9914b59d7 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 16 Oct 2020 17:16:17 +0200 Subject: [PATCH 068/710] MAINTAINERS: move Kyungmin Park to credits Kyungmin Park maintained and contributed to some of the upstreamed S5Pv210 and Exynos4210 machines - as described in commit 10ffa96407b2 ("MAINTAINERS: add maintainer of Samsung Mobile Machine support"). However the entry in maintainers got slightly twisted by commit 004bbd3c01d4 ("MAINTAINERS: remove non existent files") - the directory matching pattern was changed from specific machines to the entire S5Pv210. Anyway since long time, all S5Pv210 maintenance is covered by the Samsung ARM architectures maintainer entry and Krzysztof Kozlowski, so move Kyungmin Park to the CREDITS. There was also no activity on LKML regarding other maintained drivers: https://lore.kernel.org/lkml/?q=f%3A%22Kyungmin+Park%22 Dear Kyungmin Park, thank you for all the effort you put in to the upstream Samsung support. Signed-off-by: Krzysztof Kozlowski Cc: Kyungmin Park Cc: Andrzej Hajda Cc: Sylwester Nawrocki Cc: Arnd Bergmann Cc: Olof Johansson Link: https://lore.kernel.org/r/20201016151528.7553-1-krzk@kernel.org --- CREDITS | 4 ++++ MAINTAINERS | 13 +------------ 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/CREDITS b/CREDITS index f95e46f7a428..205ed442b8e6 100644 --- a/CREDITS +++ b/CREDITS @@ -2849,6 +2849,10 @@ D: IPX development and support N: Venkatesh Pallipadi (Venki) D: x86/HPET +N: Kyungmin Park +E: kyungmin.park@samsung.com +D: Samsung S5Pv210 and Exynos4210 mobile platforms + N: David Parsons E: orc@pell.chi.il.us D: improved memory detection code. diff --git a/MAINTAINERS b/MAINTAINERS index b8ce3f209144..f7824667ff9d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2374,7 +2374,7 @@ F: drivers/i2c/busses/i2c-rk3x.c F: sound/soc/rockchip/ N: rockchip -ARM/SAMSUNG EXYNOS ARM ARCHITECTURES +ARM/SAMSUNG S3C, S5P AND EXYNOS ARM ARCHITECTURES M: Kukjin Kim M: Krzysztof Kozlowski L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) @@ -2404,14 +2404,7 @@ N: s3c2410 N: s3c64xx N: s5pv210 -ARM/SAMSUNG MOBILE MACHINE SUPPORT -M: Kyungmin Park -L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) -S: Maintained -F: arch/arm/mach-s5pv210/ - ARM/SAMSUNG S5P SERIES 2D GRAPHICS ACCELERATION (G2D) SUPPORT -M: Kyungmin Park M: Andrzej Hajda L: linux-arm-kernel@lists.infradead.org L: linux-media@vger.kernel.org @@ -2436,7 +2429,6 @@ S: Maintained F: drivers/media/platform/s5p-jpeg/ ARM/SAMSUNG S5P SERIES Multi Format Codec (MFC) SUPPORT -M: Kyungmin Park M: Jeongtae Park M: Andrzej Hajda L: linux-arm-kernel@lists.infradead.org @@ -15438,14 +15430,12 @@ F: Documentation/devicetree/bindings/net/nfc/samsung,s3fwrn5.yaml F: drivers/nfc/s3fwrn5 SAMSUNG S5C73M3 CAMERA DRIVER -M: Kyungmin Park M: Andrzej Hajda L: linux-media@vger.kernel.org S: Supported F: drivers/media/i2c/s5c73m3/* SAMSUNG S5K5BAF CAMERA DRIVER -M: Kyungmin Park M: Andrzej Hajda L: linux-media@vger.kernel.org S: Supported @@ -15463,7 +15453,6 @@ F: Documentation/devicetree/bindings/crypto/samsung-sss.yaml F: drivers/crypto/s5p-sss.c SAMSUNG S5P/EXYNOS4 SOC SERIES CAMERA SUBSYSTEM DRIVERS -M: Kyungmin Park M: Sylwester Nawrocki L: linux-media@vger.kernel.org S: Supported From d5a69b6ba186d01cb036074d337e5171d27e9c72 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 16 Oct 2020 17:15:27 +0200 Subject: [PATCH 069/710] MAINTAINERS: remove Jeongtae Park from Samsung MFC entry Jeongtae Park has not been active on LKML: https://lore.kernel.org/lkml/?q=f%3A%22Jeongtae+Park%22 Remove him from the Samsung S5P MFC driver entry. Signed-off-by: Krzysztof Kozlowski Cc: Jeongtae Park Cc: Andrzej Hajda Link: https://lore.kernel.org/r/20201016151528.7553-3-krzk@kernel.org --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index f7824667ff9d..40b4a582b4cc 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2429,7 +2429,6 @@ S: Maintained F: drivers/media/platform/s5p-jpeg/ ARM/SAMSUNG S5P SERIES Multi Format Codec (MFC) SUPPORT -M: Jeongtae Park M: Andrzej Hajda L: linux-arm-kernel@lists.infradead.org L: linux-media@vger.kernel.org From cd12e4f14f7cca7d04fbcf0da2dc116070d7f26f Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 16 Oct 2020 17:15:28 +0200 Subject: [PATCH 070/710] CREDITS: remove trailing white spaces Remove trailing white spaces. No functional/substantive change. Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20201016151528.7553-4-krzk@kernel.org --- CREDITS | 52 ++++++++++++++++++++++++++-------------------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/CREDITS b/CREDITS index 205ed442b8e6..b11224ae7235 100644 --- a/CREDITS +++ b/CREDITS @@ -98,7 +98,7 @@ N: Erik Andersen E: andersen@codepoet.org W: https://www.codepoet.org/ P: 1024D/30D39057 1BC4 2742 E885 E4DE 9301 0C82 5F9B 643E 30D3 9057 -D: Maintainer of ide-cd and Uniform CD-ROM driver, +D: Maintainer of ide-cd and Uniform CD-ROM driver, D: ATAPI CD-Changer support, Major 2.1.x CD-ROM update. S: 352 North 525 East S: Springville, Utah 84663 @@ -263,7 +263,7 @@ N: Paul Barton-Davis E: pbd@op.net D: Driver for WaveFront soundcards (Turtle Beach Maui, Tropez, Tropez+) D: Various bugfixes and changes to sound drivers -S: USA +S: USA N: Carlos Henrique Bauer E: chbauer@acm.org @@ -1205,7 +1205,7 @@ N: Daniel J. Frasnelli E: dfrasnel@alphalinux.org W: http://www.alphalinux.org/ P: 1024/3EF87611 B9 F1 44 50 D3 E8 C2 80 DA E5 55 AA 56 7C 42 DA -D: DEC Alpha hacker +D: DEC Alpha hacker D: Miscellaneous bug squisher N: Jim Freeman @@ -1305,7 +1305,7 @@ S: P.O. Box 76, Epping S: New South Wales, 2121 S: Australia -N: Carlos E. Gorges +N: Carlos E. Gorges E: carlos@techlinux.com.br D: fix smp support on cmpci driver P: 2048G/EA3C4B19 FF31 33A6 0362 4915 B7EB E541 17D0 0379 EA3C 4B19 @@ -1346,7 +1346,7 @@ E: wgreathouse@smva.com E: wgreathouse@myfavoritei.com D: Current Belkin USB Serial Adapter F5U103 hacker D: Kernel hacker, embedded systems -S: 7802 Fitzwater Road +S: 7802 Fitzwater Road S: Brecksville, OH 44141-1334 S: USA @@ -1387,7 +1387,7 @@ N: Grant Guenther E: grant@torque.net W: http://www.torque.net/linux-pp.html D: original author of ppa driver for parallel port ZIP drive -D: original architect of the parallel-port sharing scheme +D: original architect of the parallel-port sharing scheme D: PARIDE subsystem: drivers for parallel port IDE & ATAPI devices S: 44 St. Joseph Street, Suite 506 S: Toronto, Ontario, M4Y 2W4 @@ -1529,7 +1529,7 @@ N: Benjamin Herrenschmidt E: benh@kernel.crashing.org D: Various parts of PPC/PPC64 & PowerMac S: 312/107 Canberra Avenue -S: Griffith, ACT 2603 +S: Griffith, ACT 2603 S: Australia N: Andreas Herrmann @@ -1831,7 +1831,7 @@ S: Hungary N: Bernhard Kaindl E: bkaindl@netway.at E: edv@bartelt.via.at -D: Author of a menu based configuration tool, kmenu, which +D: Author of a menu based configuration tool, kmenu, which D: is the predecessor of 'make menuconfig' and 'make xconfig'. D: digiboard driver update(modularisation work and 2.1.x upd) S: Tallak 95 @@ -2013,7 +2013,7 @@ W: http://www.xos.nl/ D: IP transparent proxy support S: X/OS Experts in Open Systems BV S: Kruislaan 419 -S: 1098 VA Amsterdam +S: 1098 VA Amsterdam S: The Netherlands N: Goran Koruga @@ -2085,7 +2085,7 @@ S: Germany N: Andrzej M. Krzysztofowicz E: ankry@mif.pg.gda.pl -D: Some 8-bit XT disk driver and devfs hacking +D: Some 8-bit XT disk driver and devfs hacking D: Aladdin 1533/1543(C) chipset IDE D: PIIX chipset IDE S: ul. Matemblewska 1B/10 @@ -2460,7 +2460,7 @@ E: mge@EZ-Darmstadt.Telekom.de D: Logical Volume Manager S: Bartningstr. 12 S: 64289 Darmstadt -S: Germany +S: Germany N: Mark W. McClelland E: mmcclell@bigfoot.com @@ -2544,7 +2544,7 @@ E: meskes@debian.org P: 1024/04B6E8F5 6C 77 33 CA CC D6 22 03 AB AB 15 A3 AE AD 39 7D D: Kernel hacker. PostgreSQL hacker. Software watchdog daemon. D: Maintainer of several Debian packages -S: Th.-Heuss-Str. 61 +S: Th.-Heuss-Str. 61 S: D-41812 Erkelenz S: Germany @@ -2782,7 +2782,7 @@ E: neuffer@goofy.zdv.uni-mainz.de W: http://www.i-Connect.Net/~mike/ D: Developer and maintainer of the EATA-DMA SCSI driver D: Co-developer EATA-PIO SCSI driver -D: /proc/scsi and assorted other snippets +D: /proc/scsi and assorted other snippets S: Zum Schiersteiner Grund 2 S: 55127 Mainz S: Germany @@ -3020,7 +3020,7 @@ D: Embedded PowerPC 4xx/6xx/7xx/74xx support S: Chandler, Arizona 85249 S: USA -N: Frederic Potter +N: Frederic Potter E: fpotter@cirpack.com D: Some PCI kernel support @@ -3453,21 +3453,21 @@ S: Klosterweg 28 / i309 S: 76131 Karlsruhe S: Germany -N: James Simmons +N: James Simmons E: jsimmons@infradead.org -E: jsimmons@users.sf.net +E: jsimmons@users.sf.net D: Frame buffer device maintainer D: input layer development D: tty/console layer -D: various mipsel devices -S: 115 Carmel Avenue +D: various mipsel devices +S: 115 Carmel Avenue S: El Cerrito CA 94530 -S: USA +S: USA N: Jaspreet Singh E: jaspreet@sangoma.com W: www.sangoma.com -D: WANPIPE drivers & API Support for Sangoma S508/FT1 cards +D: WANPIPE drivers & API Support for Sangoma S508/FT1 cards S: Sangoma Technologies Inc., S: 1001 Denison Street S: Suite 101 @@ -3491,7 +3491,7 @@ N: Craig Small E: csmall@triode.apana.org.au E: vk2xlz@gonzo.vk2xlz.ampr.org (packet radio) D: Gracilis PackeTwin device driver -D: RSPF daemon +D: RSPF daemon S: 10 Stockalls Place S: Minto, NSW, 2566 S: Australia @@ -3701,7 +3701,7 @@ N: Tsu-Sheng Tsao E: tsusheng@scf.usc.edu D: IGMP(Internet Group Management Protocol) version 2 S: 2F 14 ALY 31 LN 166 SEC 1 SHIH-PEI RD -S: Taipei +S: Taipei S: Taiwan 112 S: Republic of China S: 24335 Delta Drive @@ -3862,7 +3862,7 @@ D: Produced the Slackware distribution, updated the SVGAlib D: patches for ghostscript, worked on color 'ls', etc. S: 301 15th Street S. S: Moorhead, Minnesota 56560 -S: USA +S: USA N: Jos Vos E: jos@xos.nl @@ -3870,7 +3870,7 @@ W: http://www.xos.nl/ D: Various IP firewall updates, ipfwadm S: X/OS Experts in Open Systems BV S: Kruislaan 419 -S: 1098 VA Amsterdam +S: 1098 VA Amsterdam S: The Netherlands N: Jeroen Vreeken @@ -4108,7 +4108,7 @@ S: People's Repulic of China N: Victor Yodaiken E: yodaiken@fsmlabs.com D: RTLinux (RealTime Linux) -S: POB 1822 +S: POB 1822 S: Socorro NM, 87801 S: USA @@ -4206,7 +4206,7 @@ D: EISA/sysfs subsystem S: France # Don't add your name here, unless you really _are_ after Marc -# alphabetically. Leonard used to be very proud of being the +# alphabetically. Leonard used to be very proud of being the # last entry, and he'll get positively pissed if he can't even # be second-to-last. (and this file really _is_ supposed to be # in alphabetic order) From 8d8c3131248d7e9c6c8ab448e1c6cb6bd7755e9c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 26 Oct 2020 17:13:57 +0100 Subject: [PATCH 071/710] clk: define to_clk_regmap() as inline function Nesting container_of() causes warnings with W=2, which is annoying if it happens in headers and fills the build log like: In file included from drivers/clk/qcom/clk-alpha-pll.c:6: drivers/clk/qcom/clk-alpha-pll.c: In function 'clk_alpha_pll_hwfsm_enable': include/linux/kernel.h:852:8: warning: declaration of '__mptr' shadows a previous local [-Wshadow] 852 | void *__mptr = (void *)(ptr); \ | ^~~~~~ drivers/clk/qcom/clk-alpha-pll.c:155:31: note: in expansion of macro 'container_of' 155 | #define to_clk_alpha_pll(_hw) container_of(to_clk_regmap(_hw), \ | ^~~~~~~~~~~~ drivers/clk/qcom/clk-regmap.h:27:28: note: in expansion of macro 'container_of' 27 | #define to_clk_regmap(_hw) container_of(_hw, struct clk_regmap, hw) | ^~~~~~~~~~~~ drivers/clk/qcom/clk-alpha-pll.c:155:44: note: in expansion of macro 'to_clk_regmap' 155 | #define to_clk_alpha_pll(_hw) container_of(to_clk_regmap(_hw), \ | ^~~~~~~~~~~~~ drivers/clk/qcom/clk-alpha-pll.c:254:30: note: in expansion of macro 'to_clk_alpha_pll' 254 | struct clk_alpha_pll *pll = to_clk_alpha_pll(hw); | ^~~~~~~~~~~~~~~~ include/linux/kernel.h:852:8: note: shadowed declaration is here 852 | void *__mptr = (void *)(ptr); \ | ^~~~~~ Redefine two copies of the to_clk_regmap() macro as inline functions to avoid a lot of these. Fixes: ea11dda9e091 ("clk: meson: add regmap clocks") Fixes: 085d7a455444 ("clk: qcom: Add a regmap type clock struct") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20201026161411.3708639-1-arnd@kernel.org Acked-by: Jerome Brunet Signed-off-by: Stephen Boyd --- drivers/clk/meson/clk-regmap.h | 5 ++++- drivers/clk/qcom/clk-regmap.h | 6 +++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/clk/meson/clk-regmap.h b/drivers/clk/meson/clk-regmap.h index c4a39604cffd..e365312da54e 100644 --- a/drivers/clk/meson/clk-regmap.h +++ b/drivers/clk/meson/clk-regmap.h @@ -26,7 +26,10 @@ struct clk_regmap { void *data; }; -#define to_clk_regmap(_hw) container_of(_hw, struct clk_regmap, hw) +static inline struct clk_regmap *to_clk_regmap(struct clk_hw *hw) +{ + return container_of(hw, struct clk_regmap, hw); +} /** * struct clk_regmap_gate_data - regmap backed gate specific data diff --git a/drivers/clk/qcom/clk-regmap.h b/drivers/clk/qcom/clk-regmap.h index 6cfc1bccb255..14ec659a3a77 100644 --- a/drivers/clk/qcom/clk-regmap.h +++ b/drivers/clk/qcom/clk-regmap.h @@ -24,7 +24,11 @@ struct clk_regmap { unsigned int enable_mask; bool enable_is_inverted; }; -#define to_clk_regmap(_hw) container_of(_hw, struct clk_regmap, hw) + +static inline struct clk_regmap *to_clk_regmap(struct clk_hw *hw) +{ + return container_of(hw, struct clk_regmap, hw); +} int clk_is_enabled_regmap(struct clk_hw *hw); int clk_enable_regmap(struct clk_hw *hw); From 107954afc5df667da438644aa4982606663f9b17 Mon Sep 17 00:00:00 2001 From: Nenad Peric Date: Wed, 28 Oct 2020 12:58:17 +0100 Subject: [PATCH 072/710] arm64: dts: allwinner: h5: OrangePi Prime: Fix ethernet node RX and TX delay are provided by ethernet PHY. Reflect that in ethernet node. Fixes: 44a94c7ef989 ("arm64: dts: allwinner: H5: Restore EMAC changes") Signed-off-by: Nenad Peric Signed-off-by: Maxime Ripard Acked-by: Jernej Skrabec Link: https://lore.kernel.org/r/20201028115817.68113-1-nperic@gmail.com --- arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-prime.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-prime.dts b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-prime.dts index cb44bfa5981f..33ab44072e6d 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-prime.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-prime.dts @@ -124,7 +124,7 @@ pinctrl-0 = <&emac_rgmii_pins>; phy-supply = <®_gmac_3v3>; phy-handle = <&ext_rgmii_phy>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; From 5c7e02a896689407555b3a10d6ed87369c70916e Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 26 Oct 2020 16:46:06 +0100 Subject: [PATCH 073/710] HID: i2c-hid: Put ACPI enumerated devices in D3 on shutdown The i2c-hid driver would quietly fail to probe the i2c-hid sensor-hub with an ACPI device-id of SMO91D0 every other boot. Specifically, the i2c_smbus_read_byte() "Make sure there is something at this address" check would fail every other boot. It seems that the BIOS does not properly reset/power-cycle the device leaving it in a confused state where it refuses to respond to i2c-xfers. On boots where probing the device failed, the driver-core puts the device in D3 after the probe-failure, which causes the probe to succeed the next boot. Putting the device in D3 from the shutdown-handler fixes the sensors not working every other boot. This has been tested on both a Lenovo Miix 2-10 and a Dell Venue 8 Pro 5830 both of which use an i2c-hid sensor-hub with an ACPI id of SMO91D0. Note that it is safe to call acpi_device_set_power() with a NULL pointer as first argument, so on none ACPI enumerated devices this change is a no-op. Cc: Kai-Heng Feng Signed-off-by: Hans de Goede Acked-by: Kai-Heng Feng Signed-off-by: Jiri Kosina --- drivers/hid/i2c-hid/i2c-hid-core.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/hid/i2c-hid/i2c-hid-core.c b/drivers/hid/i2c-hid/i2c-hid-core.c index 786e3e9af1c9..aeff1ffb0c8b 100644 --- a/drivers/hid/i2c-hid/i2c-hid-core.c +++ b/drivers/hid/i2c-hid/i2c-hid-core.c @@ -943,6 +943,11 @@ static void i2c_hid_acpi_enable_wakeup(struct device *dev) } } +static void i2c_hid_acpi_shutdown(struct device *dev) +{ + acpi_device_set_power(ACPI_COMPANION(dev), ACPI_STATE_D3_COLD); +} + static const struct acpi_device_id i2c_hid_acpi_match[] = { {"ACPI0C50", 0 }, {"PNP0C50", 0 }, @@ -959,6 +964,8 @@ static inline int i2c_hid_acpi_pdata(struct i2c_client *client, static inline void i2c_hid_acpi_fix_up_power(struct device *dev) {} static inline void i2c_hid_acpi_enable_wakeup(struct device *dev) {} + +static inline void i2c_hid_acpi_shutdown(struct device *dev) {} #endif #ifdef CONFIG_OF @@ -1175,6 +1182,8 @@ static void i2c_hid_shutdown(struct i2c_client *client) i2c_hid_set_power(client, I2C_HID_PWR_SLEEP); free_irq(client->irq, ihid); + + i2c_hid_acpi_shutdown(&client->dev); } #ifdef CONFIG_PM_SLEEP From 821f5c90130d15f8f725412d714d05df3b9e0fac Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 28 Oct 2020 11:12:04 -0700 Subject: [PATCH 074/710] bpf: Add struct bpf_redir_neigh forward declaration to BPF helper defs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Forward-declare struct bpf_redir_neigh in bpf_helper_defs.h to avoid compiler warning about unknown structs. Fixes: ba452c9e996d ("bpf: Fix bpf_redirect_neigh helper api to support supplying nexthop") Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/bpf/20201028181204.111241-1-andrii@kernel.org --- scripts/bpf_helpers_doc.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py index 6769caae142f..31484377b8b1 100755 --- a/scripts/bpf_helpers_doc.py +++ b/scripts/bpf_helpers_doc.py @@ -408,6 +408,7 @@ class PrinterHelpers(Printer): 'struct bpf_perf_event_data', 'struct bpf_perf_event_value', 'struct bpf_pidns_info', + 'struct bpf_redir_neigh', 'struct bpf_sock', 'struct bpf_sock_addr', 'struct bpf_sock_ops', From e5e1a4bc916d29958c3b587354293738fcb984d7 Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Tue, 27 Oct 2020 13:32:01 +0100 Subject: [PATCH 075/710] xsk: Fix possible memory leak at socket close MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a possible memory leak at xsk socket close that is caused by the refcounting of the umem object being wrong. The reference count of the umem was decremented only after the pool had been freed. Note that if the buffer pool is destroyed, it is important that the umem is destroyed after the pool, otherwise the umem would disappear while the driver is still running. And as the buffer pool needs to be destroyed in a work queue, the umem is also (if its refcount reaches zero) destroyed after the buffer pool in that same work queue. What was missing is that the refcount also needs to be decremented when the pool is not freed and when the pool has not even been created. The first case happens when the refcount of the pool is higher than 1, i.e. it is still being used by some other socket using the same device and queue id. In this case, it is safe to decrement the refcount of the umem outside of the work queue as the umem will never be freed because the refcount of the umem is always greater than or equal to the refcount of the buffer pool. The second case is if the buffer pool has not been created yet, i.e. the socket was closed before it was bound but after the umem was created. In this case, it is safe to destroy the umem outside of the work queue, since there is no pool that can use it by definition. Fixes: 1c1efc2af158 ("xsk: Create and free buffer pool independently from umem") Reported-by: syzbot+eb71df123dc2be2c1456@syzkaller.appspotmail.com Signed-off-by: Magnus Karlsson Signed-off-by: Daniel Borkmann Acked-by: Björn Töpel Link: https://lore.kernel.org/bpf/1603801921-2712-1-git-send-email-magnus.karlsson@gmail.com --- include/net/xsk_buff_pool.h | 2 +- net/xdp/xsk.c | 3 ++- net/xdp/xsk_buff_pool.c | 7 +++++-- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index 0140d086dc84..01755b838c74 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -86,7 +86,7 @@ int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_umem *umem, void xp_destroy(struct xsk_buff_pool *pool); void xp_release(struct xdp_buff_xsk *xskb); void xp_get_pool(struct xsk_buff_pool *pool); -void xp_put_pool(struct xsk_buff_pool *pool); +bool xp_put_pool(struct xsk_buff_pool *pool); void xp_clear_dev(struct xsk_buff_pool *pool); void xp_add_xsk(struct xsk_buff_pool *pool, struct xdp_sock *xs); void xp_del_xsk(struct xsk_buff_pool *pool, struct xdp_sock *xs); diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index b71a32eeae65..cfbec3989a76 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -1146,7 +1146,8 @@ static void xsk_destruct(struct sock *sk) if (!sock_flag(sk, SOCK_DEAD)) return; - xp_put_pool(xs->pool); + if (!xp_put_pool(xs->pool)) + xdp_put_umem(xs->umem); sk_refcnt_debug_dec(sk); } diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index 64c9e55d4d4e..8a3bf4e1318e 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -251,15 +251,18 @@ void xp_get_pool(struct xsk_buff_pool *pool) refcount_inc(&pool->users); } -void xp_put_pool(struct xsk_buff_pool *pool) +bool xp_put_pool(struct xsk_buff_pool *pool) { if (!pool) - return; + return false; if (refcount_dec_and_test(&pool->users)) { INIT_WORK(&pool->work, xp_release_deferred); schedule_work(&pool->work); + return true; } + + return false; } static struct xsk_dma_map *xp_find_dma_map(struct xsk_buff_pool *pool) From 1e6f5dcc1b9ec9068f5d38331cec38b35498edf5 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 27 Oct 2020 16:36:45 -0700 Subject: [PATCH 076/710] tools, bpftool: Avoid array index warnings. The bpf_caps array is shorter without CAP_BPF, avoid out of bounds reads if this isn't defined. Working around this avoids -Wno-array-bounds with clang. Signed-off-by: Ian Rogers Signed-off-by: Daniel Borkmann Reviewed-by: Tobias Klauser Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201027233646.3434896-1-irogers@google.com --- tools/bpf/bpftool/feature.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c index a43a6f10b564..359960a8f1de 100644 --- a/tools/bpf/bpftool/feature.c +++ b/tools/bpf/bpftool/feature.c @@ -843,9 +843,14 @@ static int handle_perms(void) else p_err("missing %s%s%s%s%s%s%s%srequired for full feature probing; run as root or use 'unprivileged'", capability_msg(bpf_caps, 0), +#ifdef CAP_BPF capability_msg(bpf_caps, 1), capability_msg(bpf_caps, 2), - capability_msg(bpf_caps, 3)); + capability_msg(bpf_caps, 3) +#else + "", "", "", "", "", "" +#endif /* CAP_BPF */ + ); goto exit_free; } From 0698ac66e01019528f0db4191ae3aaf9978e67da Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 27 Oct 2020 16:36:46 -0700 Subject: [PATCH 077/710] tools, bpftool: Remove two unused variables. Avoid an unused variable warning. Signed-off-by: Ian Rogers Signed-off-by: Daniel Borkmann Reviewed-by: Tobias Klauser Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201027233646.3434896-2-irogers@google.com --- tools/bpf/bpftool/skeleton/profiler.bpf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/bpf/bpftool/skeleton/profiler.bpf.c b/tools/bpf/bpftool/skeleton/profiler.bpf.c index 4e3512f700c0..ce5b65e07ab1 100644 --- a/tools/bpf/bpftool/skeleton/profiler.bpf.c +++ b/tools/bpf/bpftool/skeleton/profiler.bpf.c @@ -70,7 +70,7 @@ int BPF_PROG(fentry_XXX) static inline void fexit_update_maps(u32 id, struct bpf_perf_event_value *after) { - struct bpf_perf_event_value *before, diff, *accum; + struct bpf_perf_event_value *before, diff; before = bpf_map_lookup_elem(&fentry_readings, &id); /* only account samples with a valid fentry_reading */ @@ -95,7 +95,7 @@ int BPF_PROG(fexit_XXX) { struct bpf_perf_event_value readings[MAX_NUM_MATRICS]; u32 cpu = bpf_get_smp_processor_id(); - u32 i, one = 1, zero = 0; + u32 i, zero = 0; int err; u64 *count; From 00203737867c8b63ca247e71ada1b32bb0b0dd3d Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 27 Sep 2020 18:59:42 +0200 Subject: [PATCH 078/710] arm64: dts: imx8mm-var-som: fix missing PMIC's interrupt line pull-up The PMIC's interrupt is level low and should be pulled up. The PMIC's device node had pinctrl-0 property but it lacked pinctrl-names which is required to apply the pin configuration. The actual problem in DTS was pointed out by Felix Radensky from Variscite. Reported-by: Felix Radensky Fixes: 5f67317bd967 ("arm64: dts: imx8mm: correct interrupt flags") Signed-off-by: Krzysztof Kozlowski Reviewed-by: Robin Gong Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mm-var-som.dtsi | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-var-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-var-som.dtsi index 4107fe914d08..49082529764f 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-var-som.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-var-som.dtsi @@ -135,13 +135,10 @@ pmic@4b { compatible = "rohm,bd71847"; reg = <0x4b>; + pinctrl-names = "default"; pinctrl-0 = <&pinctrl_pmic>; interrupt-parent = <&gpio2>; - /* - * The interrupt is not correct. It should be level low, - * however with internal pull up this causes IRQ storm. - */ - interrupts = <8 IRQ_TYPE_EDGE_RISING>; + interrupts = <8 IRQ_TYPE_LEVEL_LOW>; rohm,reset-snvs-powered; #clock-cells = <0>; @@ -398,7 +395,7 @@ pinctrl_pmic: pmicirqgrp { fsl,pins = < - MX8MM_IOMUXC_SD1_DATA6_GPIO2_IO8 0x41 + MX8MM_IOMUXC_SD1_DATA6_GPIO2_IO8 0x141 >; }; From 0710e4385c9c978952333393396061ed1672d145 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 27 Sep 2020 18:59:43 +0200 Subject: [PATCH 079/710] arm64: dts: imx8mm-beacon-som: fix missing PMIC's interrupt line pull-up The PMIC's interrupt is level low and should be pulled up. The PMIC's device node had pinctrl-0 property but it lacked pinctrl-names which is required to apply the pin configuration. Fixes: 5f67317bd967 ("arm64: dts: imx8mm: correct interrupt flags") Fixes: 593816fa2f35 ("arm64: dts: imx: Add Beacon i.MX8m-Mini development kit") Signed-off-by: Krzysztof Kozlowski Tested-by: Adam Ford Reviewed-by: Robin Gong Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi index 6de86a4f0ec4..55b36bddd513 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi @@ -72,6 +72,7 @@ pmic@4b { compatible = "rohm,bd71847"; reg = <0x4b>; + pinctrl-names = "default"; pinctrl-0 = <&pinctrl_pmic>; interrupt-parent = <&gpio1>; interrupts = <3 IRQ_TYPE_LEVEL_LOW>; From ce6fc31f388d45b9f7135169f911cd27f4d21126 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 27 Sep 2020 18:59:44 +0200 Subject: [PATCH 080/710] arm64: dts: imx8mm-evk: fix missing PMIC's interrupt line pull-up The PMIC's interrupt is level low and should be pulled up. The PMIC's device node had pinctrl-0 property but it lacked pinctrl-names which is required to apply the pin configuration. Fixes: 5f67317bd967 ("arm64: dts: imx8mm: correct interrupt flags") Fixes: aa71d0648318 ("arm64: dts: imx8mm: Split the imx8mm evk board dts to a common dtsi") Signed-off-by: Krzysztof Kozlowski Reviewed-by: Robin Gong Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mm-evk.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-evk.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-evk.dtsi index f305a530ff6f..521eb3a5a12e 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-evk.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-evk.dtsi @@ -121,6 +121,7 @@ pmic@4b { compatible = "rohm,bd71847"; reg = <0x4b>; + pinctrl-names = "default"; pinctrl-0 = <&pinctrl_pmic>; interrupt-parent = <&gpio1>; interrupts = <3 IRQ_TYPE_LEVEL_LOW>; From 34a1c5e39b670fd7a324b5620c9ad4ac80c2f018 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 27 Sep 2020 18:59:45 +0200 Subject: [PATCH 081/710] arm64: dts: imx8mn-var-som: fix missing PMIC's interrupt line pull-up The PMIC's interrupt is level low and should be pulled up. The PMIC's device node had pinctrl-0 property but it lacked pinctrl-names which is required to apply the pin configuration. The actual problem in DTS was pointed out by Felix Radensky from Variscite. Reported-by: Felix Radensky Fixes: ade0176dd8a0 ("arm64: dts: imx8mn-var-som: Add Variscite VAR-SOM-MX8MN System on Module") Signed-off-by: Krzysztof Kozlowski Reviewed-by: Robin Gong Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi index a2d0190921e4..7f356edf9f91 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi @@ -116,13 +116,10 @@ pmic@4b { compatible = "rohm,bd71847"; reg = <0x4b>; + pinctrl-names = "default"; pinctrl-0 = <&pinctrl_pmic>; interrupt-parent = <&gpio2>; - /* - * The interrupt is not correct. It should be level low, - * however with internal pull up this causes IRQ storm. - */ - interrupts = <8 IRQ_TYPE_EDGE_RISING>; + interrupts = <8 IRQ_TYPE_LEVEL_LOW>; rohm,reset-snvs-powered; regulators { @@ -388,7 +385,7 @@ pinctrl_pmic: pmicirqgrp { fsl,pins = < - MX8MN_IOMUXC_SD1_DATA6_GPIO2_IO8 0x101 + MX8MN_IOMUXC_SD1_DATA6_GPIO2_IO8 0x141 >; }; From 4d20fa1dac2e3cf5aa0cd317b3436f4fda680b04 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 27 Sep 2020 18:59:46 +0200 Subject: [PATCH 082/710] arm64: dts: imx8mn-ddr4-evk: fix missing PMIC's interrupt line pull-up The PMIC's interrupt is level low and should be pulled up. The PMIC's device node had pinctrl-0 property but it lacked pinctrl-names which is required to apply the pin configuration. Fixes: 4153f7811a9b ("arm64: dts: imx8mn: correct interrupt flags") Fixes: 3e44dd09736d ("arm64: dts: imx8mn-ddr4-evk: Add rohm,bd71847 PMIC support") Signed-off-by: Krzysztof Kozlowski Reviewed-by: Robin Gong Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts b/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts index 46e76cf32b2f..7dfee715a2c4 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts +++ b/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts @@ -53,6 +53,7 @@ pmic@4b { compatible = "rohm,bd71847"; reg = <0x4b>; + pinctrl-names = "default"; pinctrl-0 = <&pinctrl_pmic>; interrupt-parent = <&gpio1>; interrupts = <3 IRQ_TYPE_LEVEL_LOW>; From 6efb099a1da4e954409e241b47257a637120e5c2 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 27 Sep 2020 18:59:47 +0200 Subject: [PATCH 083/710] arm64: dts: imx8mn-evk: fix missing PMIC's interrupt line pull-up The PMIC's interrupt is level low and should be pulled up. The PMIC's device node had pinctrl-0 property but it lacked pinctrl-names which is required to apply the pin configuration. Fixes: 4153f7811a9b ("arm64: dts: imx8mn: correct interrupt flags") Fixes: 6386156eb279 ("arm64: dts: imx8mn-evk: add pca9450 for i.mx8mn-evk board") Signed-off-by: Krzysztof Kozlowski Reviewed-by: Robin Gong Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mn-evk.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/freescale/imx8mn-evk.dts b/arch/arm64/boot/dts/freescale/imx8mn-evk.dts index 707d8486b4d8..8311b95dee49 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn-evk.dts +++ b/arch/arm64/boot/dts/freescale/imx8mn-evk.dts @@ -18,6 +18,7 @@ pmic: pmic@25 { compatible = "nxp,pca9450b"; reg = <0x25>; + pinctrl-names = "default"; pinctrl-0 = <&pinctrl_pmic>; interrupt-parent = <&gpio1>; interrupts = <3 IRQ_TYPE_LEVEL_LOW>; From d92454287ee25d78f1caac3734a1864f8a5a5275 Mon Sep 17 00:00:00 2001 From: Biwen Li Date: Tue, 29 Sep 2020 09:30:21 +0800 Subject: [PATCH 084/710] arm64: dts: fsl: fix endianness issue of rcpm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add little-endian property to RCPM node (for ls1028a,ls1088a,ls208xa), otherwise RCPM driver will program hardware with incorrect setting, causing system (such as LS1028ARDB) failed to be waked by wakeup source. Fixes: 791c88ca5713 (“arm64: dts: ls1028a: Add ftm_alarm0 DT node”) Fixes: f4fe3a8665495 (“arm64: dts: layerscape: add ftm_alarm0 node”) Signed-off-by: Biwen Li Signed-off-by: Ran Wang Acked-by: Li Yang Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi | 1 + arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi | 1 + arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi | 1 + 3 files changed, 3 insertions(+) diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi index 73e4f9466887..7a6fb7e1fb82 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi @@ -1012,6 +1012,7 @@ compatible = "fsl,ls1028a-rcpm", "fsl,qoriq-rcpm-2.1+"; reg = <0x0 0x1e34040 0x0 0x1c>; #fsl,rcpm-wakeup-cells = <7>; + little-endian; }; ftm_alarm0: timer@2800000 { diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi index ff5805206a28..692d8f4a206d 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi @@ -805,6 +805,7 @@ compatible = "fsl,ls1088a-rcpm", "fsl,qoriq-rcpm-2.1+"; reg = <0x0 0x1e34040 0x0 0x18>; #fsl,rcpm-wakeup-cells = <6>; + little-endian; }; ftm_alarm0: timer@2800000 { diff --git a/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi index bf72918fe545..e7abb74bd816 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi @@ -892,6 +892,7 @@ compatible = "fsl,ls208xa-rcpm", "fsl,qoriq-rcpm-2.1+"; reg = <0x0 0x1e34040 0x0 0x18>; #fsl,rcpm-wakeup-cells = <6>; + little-endian; }; ftm_alarm0: timer@2800000 { From 080b6f40763565f65ebb9540219c71ce885cf568 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 28 Oct 2020 18:15:05 +0100 Subject: [PATCH 085/710] bpf: Don't rely on GCC __attribute__((optimize)) to disable GCSE Commit 3193c0836 ("bpf: Disable GCC -fgcse optimization for ___bpf_prog_run()") introduced a __no_fgcse macro that expands to a function scope __attribute__((optimize("-fno-gcse"))), to disable a GCC specific optimization that was causing trouble on x86 builds, and was not expected to have any positive effect in the first place. However, as the GCC manual documents, __attribute__((optimize)) is not for production use, and results in all other optimization options to be forgotten for the function in question. This can cause all kinds of trouble, but in one particular reported case, it causes -fno-asynchronous-unwind-tables to be disregarded, resulting in .eh_frame info to be emitted for the function. This reverts commit 3193c0836, and instead, it disables the -fgcse optimization for the entire source file, but only when building for X86 using GCC with CONFIG_BPF_JIT_ALWAYS_ON disabled. Note that the original commit states that CONFIG_RETPOLINE=n triggers the issue, whereas CONFIG_RETPOLINE=y performs better without the optimization, so it is kept disabled in both cases. Fixes: 3193c0836f20 ("bpf: Disable GCC -fgcse optimization for ___bpf_prog_run()") Signed-off-by: Ard Biesheuvel Signed-off-by: Alexei Starovoitov Tested-by: Geert Uytterhoeven Reviewed-by: Nick Desaulniers Link: https://lore.kernel.org/lkml/CAMuHMdUg0WJHEcq6to0-eODpXPOywLot6UD2=GFHpzoj_hCoBQ@mail.gmail.com/ Link: https://lore.kernel.org/bpf/20201028171506.15682-2-ardb@kernel.org --- include/linux/compiler-gcc.h | 2 -- include/linux/compiler_types.h | 4 ---- kernel/bpf/Makefile | 6 +++++- kernel/bpf/core.c | 2 +- 4 files changed, 6 insertions(+), 8 deletions(-) diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index d1e3c6896b71..5deb37024574 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -175,5 +175,3 @@ #else #define __diag_GCC_8(s) #endif - -#define __no_fgcse __attribute__((optimize("-fno-gcse"))) diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 6e390d58a9f8..ac3fa37a84f9 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -247,10 +247,6 @@ struct ftrace_likely_data { #define asm_inline asm #endif -#ifndef __no_fgcse -# define __no_fgcse -#endif - /* Are two types/vars the same type (ignoring qualifiers)? */ #define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b)) diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index bdc8cd1b6767..c1b9f71ee6aa 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -1,6 +1,10 @@ # SPDX-License-Identifier: GPL-2.0 obj-y := core.o -CFLAGS_core.o += $(call cc-disable-warning, override-init) +ifneq ($(CONFIG_BPF_JIT_ALWAYS_ON),y) +# ___bpf_prog_run() needs GCSE disabled on x86; see 3193c0836f203 for details +cflags-nogcse-$(CONFIG_X86)$(CONFIG_CC_IS_GCC) := -fno-gcse +endif +CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy) obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o prog_iter.o obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 9268d77898b7..55454d2278b1 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1369,7 +1369,7 @@ u64 __weak bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr) * * Decode and execute eBPF instructions. */ -static u64 __no_fgcse ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack) +static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack) { #define BPF_INSN_2_LBL(x, y) [BPF_##x | BPF_##y] = &&x##_##y #define BPF_INSN_3_LBL(x, y, z) [BPF_##x | BPF_##y | BPF_##z] = &&x##_##y##_##z From 054b5d97448714ae4a0bcd6f36b0515ac7aed21e Mon Sep 17 00:00:00 2001 From: Madalin Bucur Date: Mon, 5 Oct 2020 15:46:39 +0300 Subject: [PATCH 086/710] arm64: dts: fsl: DPAA FMan DMA operations are coherent Although the DPAA 1 FMan operations are coherent, the device tree node for the FMan does not indicate that, resulting in a needless loss of performance. Adding the missing dma-coherent property. Fixes: 1ffbecdd8321 ("arm64: dts: add DPAA FMan nodes") Signed-off-by: Madalin Bucur Tested-by: Camelia Groza Acked-by: Li Yang Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/qoriq-fman3-0.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/freescale/qoriq-fman3-0.dtsi b/arch/arm64/boot/dts/freescale/qoriq-fman3-0.dtsi index 8bc6caa9167d..4338db14c5da 100644 --- a/arch/arm64/boot/dts/freescale/qoriq-fman3-0.dtsi +++ b/arch/arm64/boot/dts/freescale/qoriq-fman3-0.dtsi @@ -19,6 +19,7 @@ fman0: fman@1a00000 { clock-names = "fmanclk"; fsl,qman-channel-range = <0x800 0x10>; ptimer-handle = <&ptp_timer0>; + dma-coherent; muram@0 { compatible = "fsl,fman-muram"; From 587258edd94c305077923ec458e04c032fca83e6 Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Wed, 7 Oct 2020 08:02:37 -0500 Subject: [PATCH 087/710] arm64: dts: imx8mm-beacon-som: Fix Choppy BT audio When streaming bluetooth audio, the sound is choppy due to the fact that the default baud rate of the HCI interface is too slow to handle 16-bit stereo at 48KHz. The Bluetooth chip is capable of up to 4M baud on the serial port, so this patch sets the max-speed to 4000000 in order to properly stream audio over the Bluetooth. Fixes: 593816fa2f35 ("arm64: dts: imx: Add Beacon i.MX8m-Mini development kit") Signed-off-by: Adam Ford Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi index 55b36bddd513..b88c3c99b007 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi @@ -211,6 +211,7 @@ host-wakeup-gpios = <&gpio2 8 GPIO_ACTIVE_HIGH>; device-wakeup-gpios = <&gpio2 7 GPIO_ACTIVE_HIGH>; clocks = <&osc_32k>; + max-speed = <4000000>; clock-names = "extclk"; }; }; From cf5abb0132193767c07c83e06f91b777d22ba495 Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Thu, 8 Oct 2020 13:33:00 -0500 Subject: [PATCH 088/710] arm64: dts imx8mn: Remove non-existent USB OTG2 According to the i.MX8MN TRM, there is only one OTG port. The address for OTG2 is reserved on Nano. This patch removes the non-existent OTG2, usbphynop2, and the usbmisc2 nodes. Fixes: 6c3debcbae47 ("arm64: dts: freescale: Add i.MX8MN dtsi support") Signed-off-by: Adam Ford Reviewed-by: Krzysztof Kozlowski Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mn.dtsi | 30 ----------------------- 1 file changed, 30 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mn.dtsi b/arch/arm64/boot/dts/freescale/imx8mn.dtsi index 746faf1cf2fb..16c7202885d7 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mn.dtsi @@ -790,28 +790,6 @@ #index-cells = <1>; reg = <0x32e40200 0x200>; }; - - usbotg2: usb@32e50000 { - compatible = "fsl,imx8mn-usb", "fsl,imx7d-usb"; - reg = <0x32e50000 0x200>; - interrupts = ; - clocks = <&clk IMX8MN_CLK_USB1_CTRL_ROOT>; - clock-names = "usb1_ctrl_root_clk"; - assigned-clocks = <&clk IMX8MN_CLK_USB_BUS>, - <&clk IMX8MN_CLK_USB_CORE_REF>; - assigned-clock-parents = <&clk IMX8MN_SYS_PLL2_500M>, - <&clk IMX8MN_SYS_PLL1_100M>; - fsl,usbphy = <&usbphynop2>; - fsl,usbmisc = <&usbmisc2 0>; - status = "disabled"; - }; - - usbmisc2: usbmisc@32e50200 { - compatible = "fsl,imx8mn-usbmisc", "fsl,imx7d-usbmisc"; - #index-cells = <1>; - reg = <0x32e50200 0x200>; - }; - }; dma_apbh: dma-controller@33000000 { @@ -876,12 +854,4 @@ assigned-clock-parents = <&clk IMX8MN_SYS_PLL1_100M>; clock-names = "main_clk"; }; - - usbphynop2: usbphynop2 { - compatible = "usb-nop-xceiv"; - clocks = <&clk IMX8MN_CLK_USB_PHY_REF>; - assigned-clocks = <&clk IMX8MN_CLK_USB_PHY_REF>; - assigned-clock-parents = <&clk IMX8MN_SYS_PLL1_100M>; - clock-names = "main_clk"; - }; }; From 61cf93d3e14a29288e4d5522aecb6e58268eec62 Mon Sep 17 00:00:00 2001 From: Dennis Zhou Date: Fri, 30 Oct 2020 20:40:21 +0000 Subject: [PATCH 089/710] percpu: convert flexible array initializers to use struct_size() Use the safer macro as sparked by the long discussion in [1]. [1] https://lore.kernel.org/lkml/20200917204514.GA2880159@google.com/ Reviewed-by: Gustavo A. R. Silva Signed-off-by: Dennis Zhou --- mm/percpu.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mm/percpu.c b/mm/percpu.c index 66a93f096394..ad7a37ee74ef 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1315,8 +1315,8 @@ static struct pcpu_chunk * __init pcpu_alloc_first_chunk(unsigned long tmp_addr, region_size = ALIGN(start_offset + map_size, lcm_align); /* allocate chunk */ - alloc_size = sizeof(struct pcpu_chunk) + - BITS_TO_LONGS(region_size >> PAGE_SHIFT) * sizeof(unsigned long); + alloc_size = struct_size(chunk, populated, + BITS_TO_LONGS(region_size >> PAGE_SHIFT)); chunk = memblock_alloc(alloc_size, SMP_CACHE_BYTES); if (!chunk) panic("%s: Failed to allocate %zu bytes\n", __func__, @@ -2521,8 +2521,8 @@ void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, pcpu_unit_pages = ai->unit_size >> PAGE_SHIFT; pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT; pcpu_atom_size = ai->atom_size; - pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) + - BITS_TO_LONGS(pcpu_unit_pages) * sizeof(unsigned long); + pcpu_chunk_struct_size = struct_size(chunk, populated, + BITS_TO_LONGS(pcpu_unit_pages)); pcpu_stats_save_ai(ai); From 56e4f2dda23c6d39d327944faa89efaa4eb290d1 Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Tue, 30 Jun 2020 08:37:30 -0700 Subject: [PATCH 090/710] iio: cros_ec: Use default frequencies when EC returns invalid information Minimal and maximal frequencies supported by a sensor is queried. On some older machines, these frequencies are not returned properly and the EC returns 0 instead. When returned maximal frequency is 0, ignore the information and use default frequencies instead. Fixes: ae7b02ad2f32 ("iio: common: cros_ec_sensors: Expose cros_ec_sensors frequency range via iio sysfs") Signed-off-by: Gwendal Grignou Reviewed-by: Enric Balletbo i Serra Link: https://lore.kernel.org/r/20200630153730.3302889-1-gwendal@chromium.org CC: Signed-off-by: Jonathan Cameron --- .../cros_ec_sensors/cros_ec_sensors_core.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c index c62cacc04672..e3f507771f17 100644 --- a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c +++ b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c @@ -256,7 +256,7 @@ int cros_ec_sensors_core_init(struct platform_device *pdev, struct cros_ec_sensorhub *sensor_hub = dev_get_drvdata(dev->parent); struct cros_ec_dev *ec = sensor_hub->ec; struct cros_ec_sensor_platform *sensor_platform = dev_get_platdata(dev); - u32 ver_mask; + u32 ver_mask, temp; int frequencies[ARRAY_SIZE(state->frequencies) / 2] = { 0 }; int ret, i; @@ -311,10 +311,16 @@ int cros_ec_sensors_core_init(struct platform_device *pdev, &frequencies[2], &state->fifo_max_event_count); } else { - frequencies[1] = state->resp->info_3.min_frequency; - frequencies[2] = state->resp->info_3.max_frequency; - state->fifo_max_event_count = - state->resp->info_3.fifo_max_event_count; + if (state->resp->info_3.max_frequency == 0) { + get_default_min_max_freq(state->resp->info.type, + &frequencies[1], + &frequencies[2], + &temp); + } else { + frequencies[1] = state->resp->info_3.min_frequency; + frequencies[2] = state->resp->info_3.max_frequency; + } + state->fifo_max_event_count = state->resp->info_3.fifo_max_event_count; } for (i = 0; i < ARRAY_SIZE(frequencies); i++) { state->frequencies[2 * i] = frequencies[i] / 1000; From 15207a92e019803d62687455d8aa2ff9eb3dc82c Mon Sep 17 00:00:00 2001 From: Fabien Parent Date: Sun, 18 Oct 2020 21:46:44 +0200 Subject: [PATCH 091/710] iio: adc: mediatek: fix unset field dev_comp field is used in a couple of places but it is never set. This results in kernel oops when dereferencing a NULL pointer. Set the `dev_comp` field correctly in the probe function. Fixes: 6d97024dce23 ("iio: adc: mediatek: mt6577-auxadc, add mt6765 support") Signed-off-by: Fabien Parent Reviewed-by: Matthias Brugger Cc: Link: https://lore.kernel.org/r/20201018194644.3366846-1-fparent@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/mt6577_auxadc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/iio/adc/mt6577_auxadc.c b/drivers/iio/adc/mt6577_auxadc.c index ac415cb089cd..79c1dd68b909 100644 --- a/drivers/iio/adc/mt6577_auxadc.c +++ b/drivers/iio/adc/mt6577_auxadc.c @@ -9,9 +9,9 @@ #include #include #include -#include -#include +#include #include +#include #include #include #include @@ -276,6 +276,8 @@ static int mt6577_auxadc_probe(struct platform_device *pdev) goto err_disable_clk; } + adc_dev->dev_comp = device_get_match_data(&pdev->dev); + mutex_init(&adc_dev->lock); mt6577_auxadc_mod_reg(adc_dev->reg_base + MT6577_AUXADC_MISC, From 695e2f5c289bb7f8b85351dcfa35fa236e0200a4 Mon Sep 17 00:00:00 2001 From: Olivier Moysan Date: Wed, 21 Oct 2020 10:53:13 +0200 Subject: [PATCH 092/710] iio: adc: stm32-adc: fix a regression when using dma and irq Since overrun interrupt support has been added, there's a regression when two ADCs are used at the same time, with: - an ADC configured to use IRQs. EOCIE bit is set. The handler is normally called in this case. - an ADC configured to use DMA. EOCIE bit isn't set. EOC triggers the DMA request. It's then automatically cleared by DMA read. But the handler gets called due to status bit is temporarily set (IRQ triggered by the other ADC). This is a regression as similar issue had been fixed earlier by commit dcb10920179a ("iio: adc: stm32-adc: fix a race when using several adcs with dma and irq"). Issue is that stm32_adc_eoc_enabled() returns non-zero value (always) since OVR bit has been added and enabled for both DMA and IRQ case. Remove OVR mask in IER register, and rely only on CSR status for overrun. To avoid subsequent calls to interrupt routine on overrun, CSR OVR bit has to be cleared. CSR OVR bit cannot be cleared directly by software. To do this ADC must be stopped first, and OVR bit in ADC ISR has to be cleared. Also add a check in ADC IRQ handler to report spurious IRQs. Fixes: cc06e67d8fa5 ("iio: adc: stm32-adc: Add check on overrun interrupt") Signed-off-by: Olivier Moysan Signed-off-by: Fabrice Gasnier Cc: Link: https://lore.kernel.org/r/20201021085313.5335-1-olivier.moysan@st.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/stm32-adc-core.c | 41 +++++++++++--------------- drivers/iio/adc/stm32-adc.c | 50 ++++++++++++++++++++++++++++++-- 2 files changed, 65 insertions(+), 26 deletions(-) diff --git a/drivers/iio/adc/stm32-adc-core.c b/drivers/iio/adc/stm32-adc-core.c index cd870c089182..a83199b212a4 100644 --- a/drivers/iio/adc/stm32-adc-core.c +++ b/drivers/iio/adc/stm32-adc-core.c @@ -41,18 +41,16 @@ * struct stm32_adc_common_regs - stm32 common registers * @csr: common status register offset * @ccr: common control register offset - * @eoc1_msk: adc1 end of conversion flag in @csr - * @eoc2_msk: adc2 end of conversion flag in @csr - * @eoc3_msk: adc3 end of conversion flag in @csr + * @eoc_msk: array of eoc (end of conversion flag) masks in csr for adc1..n + * @ovr_msk: array of ovr (overrun flag) masks in csr for adc1..n * @ier: interrupt enable register offset for each adc * @eocie_msk: end of conversion interrupt enable mask in @ier */ struct stm32_adc_common_regs { u32 csr; u32 ccr; - u32 eoc1_msk; - u32 eoc2_msk; - u32 eoc3_msk; + u32 eoc_msk[STM32_ADC_MAX_ADCS]; + u32 ovr_msk[STM32_ADC_MAX_ADCS]; u32 ier; u32 eocie_msk; }; @@ -282,21 +280,20 @@ out: static const struct stm32_adc_common_regs stm32f4_adc_common_regs = { .csr = STM32F4_ADC_CSR, .ccr = STM32F4_ADC_CCR, - .eoc1_msk = STM32F4_EOC1 | STM32F4_OVR1, - .eoc2_msk = STM32F4_EOC2 | STM32F4_OVR2, - .eoc3_msk = STM32F4_EOC3 | STM32F4_OVR3, + .eoc_msk = { STM32F4_EOC1, STM32F4_EOC2, STM32F4_EOC3}, + .ovr_msk = { STM32F4_OVR1, STM32F4_OVR2, STM32F4_OVR3}, .ier = STM32F4_ADC_CR1, - .eocie_msk = STM32F4_EOCIE | STM32F4_OVRIE, + .eocie_msk = STM32F4_EOCIE, }; /* STM32H7 common registers definitions */ static const struct stm32_adc_common_regs stm32h7_adc_common_regs = { .csr = STM32H7_ADC_CSR, .ccr = STM32H7_ADC_CCR, - .eoc1_msk = STM32H7_EOC_MST | STM32H7_OVR_MST, - .eoc2_msk = STM32H7_EOC_SLV | STM32H7_OVR_SLV, + .eoc_msk = { STM32H7_EOC_MST, STM32H7_EOC_SLV}, + .ovr_msk = { STM32H7_OVR_MST, STM32H7_OVR_SLV}, .ier = STM32H7_ADC_IER, - .eocie_msk = STM32H7_EOCIE | STM32H7_OVRIE, + .eocie_msk = STM32H7_EOCIE, }; static const unsigned int stm32_adc_offset[STM32_ADC_MAX_ADCS] = { @@ -318,6 +315,7 @@ static void stm32_adc_irq_handler(struct irq_desc *desc) { struct stm32_adc_priv *priv = irq_desc_get_handler_data(desc); struct irq_chip *chip = irq_desc_get_chip(desc); + int i; u32 status; chained_irq_enter(chip, desc); @@ -335,17 +333,12 @@ static void stm32_adc_irq_handler(struct irq_desc *desc) * before invoking the interrupt handler (e.g. call ISR only for * IRQ-enabled ADCs). */ - if (status & priv->cfg->regs->eoc1_msk && - stm32_adc_eoc_enabled(priv, 0)) - generic_handle_irq(irq_find_mapping(priv->domain, 0)); - - if (status & priv->cfg->regs->eoc2_msk && - stm32_adc_eoc_enabled(priv, 1)) - generic_handle_irq(irq_find_mapping(priv->domain, 1)); - - if (status & priv->cfg->regs->eoc3_msk && - stm32_adc_eoc_enabled(priv, 2)) - generic_handle_irq(irq_find_mapping(priv->domain, 2)); + for (i = 0; i < priv->cfg->num_irqs; i++) { + if ((status & priv->cfg->regs->eoc_msk[i] && + stm32_adc_eoc_enabled(priv, i)) || + (status & priv->cfg->regs->ovr_msk[i])) + generic_handle_irq(irq_find_mapping(priv->domain, i)); + } chained_irq_exit(chip, desc); }; diff --git a/drivers/iio/adc/stm32-adc.c b/drivers/iio/adc/stm32-adc.c index b3f31f147347..16c02c30dec7 100644 --- a/drivers/iio/adc/stm32-adc.c +++ b/drivers/iio/adc/stm32-adc.c @@ -154,6 +154,7 @@ struct stm32_adc; * @start_conv: routine to start conversions * @stop_conv: routine to stop conversions * @unprepare: optional unprepare routine (disable, power-down) + * @irq_clear: routine to clear irqs * @smp_cycles: programmable sampling time (ADC clock cycles) */ struct stm32_adc_cfg { @@ -166,6 +167,7 @@ struct stm32_adc_cfg { void (*start_conv)(struct iio_dev *, bool dma); void (*stop_conv)(struct iio_dev *); void (*unprepare)(struct iio_dev *); + void (*irq_clear)(struct iio_dev *indio_dev, u32 msk); const unsigned int *smp_cycles; }; @@ -621,6 +623,13 @@ static void stm32f4_adc_stop_conv(struct iio_dev *indio_dev) STM32F4_ADON | STM32F4_DMA | STM32F4_DDS); } +static void stm32f4_adc_irq_clear(struct iio_dev *indio_dev, u32 msk) +{ + struct stm32_adc *adc = iio_priv(indio_dev); + + stm32_adc_clr_bits(adc, adc->cfg->regs->isr_eoc.reg, msk); +} + static void stm32h7_adc_start_conv(struct iio_dev *indio_dev, bool dma) { struct stm32_adc *adc = iio_priv(indio_dev); @@ -659,6 +668,13 @@ static void stm32h7_adc_stop_conv(struct iio_dev *indio_dev) stm32_adc_clr_bits(adc, STM32H7_ADC_CFGR, STM32H7_DMNGT_MASK); } +static void stm32h7_adc_irq_clear(struct iio_dev *indio_dev, u32 msk) +{ + struct stm32_adc *adc = iio_priv(indio_dev); + /* On STM32H7 IRQs are cleared by writing 1 into ISR register */ + stm32_adc_set_bits(adc, adc->cfg->regs->isr_eoc.reg, msk); +} + static int stm32h7_adc_exit_pwr_down(struct iio_dev *indio_dev) { struct stm32_adc *adc = iio_priv(indio_dev); @@ -1235,17 +1251,40 @@ static int stm32_adc_read_raw(struct iio_dev *indio_dev, } } +static void stm32_adc_irq_clear(struct iio_dev *indio_dev, u32 msk) +{ + struct stm32_adc *adc = iio_priv(indio_dev); + + adc->cfg->irq_clear(indio_dev, msk); +} + static irqreturn_t stm32_adc_threaded_isr(int irq, void *data) { struct iio_dev *indio_dev = data; struct stm32_adc *adc = iio_priv(indio_dev); const struct stm32_adc_regspec *regs = adc->cfg->regs; u32 status = stm32_adc_readl(adc, regs->isr_eoc.reg); + u32 mask = stm32_adc_readl(adc, regs->ier_eoc.reg); - if (status & regs->isr_ovr.mask) + /* Check ovr status right now, as ovr mask should be already disabled */ + if (status & regs->isr_ovr.mask) { + /* + * Clear ovr bit to avoid subsequent calls to IRQ handler. + * This requires to stop ADC first. OVR bit state in ISR, + * is propaged to CSR register by hardware. + */ + adc->cfg->stop_conv(indio_dev); + stm32_adc_irq_clear(indio_dev, regs->isr_ovr.mask); dev_err(&indio_dev->dev, "Overrun, stopping: restart needed\n"); + return IRQ_HANDLED; + } - return IRQ_HANDLED; + if (!(status & mask)) + dev_err_ratelimited(&indio_dev->dev, + "Unexpected IRQ: IER=0x%08x, ISR=0x%08x\n", + mask, status); + + return IRQ_NONE; } static irqreturn_t stm32_adc_isr(int irq, void *data) @@ -1254,6 +1293,10 @@ static irqreturn_t stm32_adc_isr(int irq, void *data) struct stm32_adc *adc = iio_priv(indio_dev); const struct stm32_adc_regspec *regs = adc->cfg->regs; u32 status = stm32_adc_readl(adc, regs->isr_eoc.reg); + u32 mask = stm32_adc_readl(adc, regs->ier_eoc.reg); + + if (!(status & mask)) + return IRQ_WAKE_THREAD; if (status & regs->isr_ovr.mask) { /* @@ -2046,6 +2089,7 @@ static const struct stm32_adc_cfg stm32f4_adc_cfg = { .start_conv = stm32f4_adc_start_conv, .stop_conv = stm32f4_adc_stop_conv, .smp_cycles = stm32f4_adc_smp_cycles, + .irq_clear = stm32f4_adc_irq_clear, }; static const struct stm32_adc_cfg stm32h7_adc_cfg = { @@ -2057,6 +2101,7 @@ static const struct stm32_adc_cfg stm32h7_adc_cfg = { .prepare = stm32h7_adc_prepare, .unprepare = stm32h7_adc_unprepare, .smp_cycles = stm32h7_adc_smp_cycles, + .irq_clear = stm32h7_adc_irq_clear, }; static const struct stm32_adc_cfg stm32mp1_adc_cfg = { @@ -2069,6 +2114,7 @@ static const struct stm32_adc_cfg stm32mp1_adc_cfg = { .prepare = stm32h7_adc_prepare, .unprepare = stm32h7_adc_unprepare, .smp_cycles = stm32h7_adc_smp_cycles, + .irq_clear = stm32h7_adc_irq_clear, }; static const struct of_device_id stm32_adc_of_match[] = { From 271b339236e1c0e6448bc1cafeaedcb529324bf0 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Sun, 25 Oct 2020 11:51:22 -0500 Subject: [PATCH 093/710] counter/ti-eqep: Fix regmap max_register The values given were the offset of the register after the last register instead of the actual last register in each range. Fix by using the correct last register of each range. Fixes: f213729f6796 ("counter: new TI eQEP driver") Signed-off-by: David Lechner Acked-by: William Breathitt Gray Link: https://lore.kernel.org/r/20201025165122.607866-1-david@lechnology.com Signed-off-by: Jonathan Cameron --- drivers/counter/ti-eqep.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/counter/ti-eqep.c b/drivers/counter/ti-eqep.c index e27771df8e23..a60aee1a1a29 100644 --- a/drivers/counter/ti-eqep.c +++ b/drivers/counter/ti-eqep.c @@ -368,7 +368,7 @@ static const struct regmap_config ti_eqep_regmap32_config = { .reg_bits = 32, .val_bits = 32, .reg_stride = 4, - .max_register = 0x24, + .max_register = QUPRD, }; static const struct regmap_config ti_eqep_regmap16_config = { @@ -376,7 +376,7 @@ static const struct regmap_config ti_eqep_regmap16_config = { .reg_bits = 16, .val_bits = 16, .reg_stride = 2, - .max_register = 0x1e, + .max_register = QCPRDLAT, }; static int ti_eqep_probe(struct platform_device *pdev) From fe0b980ffd1dd8b10c09f82385514819ba2a661d Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sun, 1 Nov 2020 17:21:18 +0100 Subject: [PATCH 094/710] iio: imu: st_lsm6dsx: set 10ms as min shub slave timeout Set 10ms as minimum i2c slave configuration timeout since st_lsm6dsx relies on accel ODR for i2c master clock and at high sample rates (e.g. 833Hz or 416Hz) the slave sensor occasionally may need more cycles than i2c master timeout (2s/833Hz + 1 ~ 3ms) to apply the configuration resulting in an uncomplete slave configuration and a constant reading from the i2c slave connected to st_lsm6dsx i2c master. Fixes: 8f9a5249e3d9 ("iio: imu: st_lsm6dsx: enable 833Hz sample frequency for tagged sensors") Fixes: c91c1c844ebd ("iio: imu: st_lsm6dsx: add i2c embedded controller support") Signed-off-by: Lorenzo Bianconi Cc: Link: https://lore.kernel.org/r/a69c8236bf16a1569966815ed71710af2722ed7d.1604247274.git.lorenzo@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_shub.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_shub.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_shub.c index 8c8d8870ca07..99562ba85ee4 100644 --- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_shub.c +++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_shub.c @@ -156,11 +156,13 @@ static const struct st_lsm6dsx_ext_dev_settings st_lsm6dsx_ext_dev_table[] = { static void st_lsm6dsx_shub_wait_complete(struct st_lsm6dsx_hw *hw) { struct st_lsm6dsx_sensor *sensor; - u32 odr; + u32 odr, timeout; sensor = iio_priv(hw->iio_devs[ST_LSM6DSX_ID_ACC]); odr = (hw->enable_mask & BIT(ST_LSM6DSX_ID_ACC)) ? sensor->odr : 12500; - msleep((2000000U / odr) + 1); + /* set 10ms as minimum timeout for i2c slave configuration */ + timeout = max_t(u32, 2000000U / odr + 1, 10); + msleep(timeout); } /* From 7dd8f0ba88fce98e2953267a66af74c6f4792a56 Mon Sep 17 00:00:00 2001 From: Sergey Matyukevich Date: Sat, 24 Oct 2020 23:11:20 +0300 Subject: [PATCH 095/710] arm: dts: imx6qdl-udoo: fix rgmii phy-mode for ksz9031 phy Commit bcf3440c6dd7 ("net: phy: micrel: add phy-mode support for the KSZ9031 PHY") fixed micrel phy driver adding proper support for phy modes. Adapt imx6q-udoo board phy settings : explicitly set required delay configuration using "rgmii-id". Fixes: cbd54fe0b2bc ("ARM: dts: imx6dl-udoo: Add board support based off imx6q-udoo") Signed-off-by: Sergey Matyukevich Reviewed-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6qdl-udoo.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/imx6qdl-udoo.dtsi b/arch/arm/boot/dts/imx6qdl-udoo.dtsi index 828dd20cd27d..d07d8f83456d 100644 --- a/arch/arm/boot/dts/imx6qdl-udoo.dtsi +++ b/arch/arm/boot/dts/imx6qdl-udoo.dtsi @@ -98,7 +98,7 @@ &fec { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enet>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; From f8b5a33707c9a19ec905d2826be0acd151997a09 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Fri, 30 Oct 2020 01:55:28 +0100 Subject: [PATCH 096/710] ARM: dts: vf610-zii-dev-rev-b: Fix MDIO over clocking The ZII devel B board has two generations of Marvell Switches. The mv88e6352 supports an MDIO clock of 12MHz. However the older 88e6185 does not like 12MHz, and often fails to probe. Reduce the clock speed to 5MHz, which seems to work reliably. Cc: Chris Healy Fixes: b955387667ec ("ARM: dts: ZII: update MDIO speed and preamble") Signed-off-by: Andrew Lunn Reviewed-by: Chris Healy Signed-off-by: Shawn Guo --- arch/arm/boot/dts/vf610-zii-dev-rev-b.dts | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm/boot/dts/vf610-zii-dev-rev-b.dts b/arch/arm/boot/dts/vf610-zii-dev-rev-b.dts index e500911ce0a5..6f1e0f0d4f0a 100644 --- a/arch/arm/boot/dts/vf610-zii-dev-rev-b.dts +++ b/arch/arm/boot/dts/vf610-zii-dev-rev-b.dts @@ -406,6 +406,9 @@ }; }; +&mdio1 { + clock-frequency = <5000000>; +}; &iomuxc { pinctrl_gpio_e6185_eeprom_sel: pinctrl-gpio-e6185-eeprom-spi0 { From e402599e5e5e0b2758d7766fd9f6d7953d4ccd85 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Mon, 12 Oct 2020 09:18:16 +0200 Subject: [PATCH 097/710] ARM: dts: imx6q-prti6q: fix PHY address Due to bug in the bootloader, the PHY has floating address and may randomly change on each PHY reset. To avoid it, the updated bootloader with the following patch[0] should be used: | ARM: protonic: disable on-die termination to fix PHY bootstrapping | | If on-die termination is enabled, the RXC pin of iMX6 will be pulled | high. Since we already have an 10K pull-down on board, the RXC level on | PHY reset will be ~800mV, which is mostly interpreted as 1. On some | reboots we get 0 instead and kernel can't detect the PHY properly. | | Since the default 0x020e07ac value is 0, it is sufficient to remove this | entry from the affected imxcfg files. | | Since we get stable 0 on pin PHYADDR[2], the PHY address is changed from | 4 to 0. With latest bootloader update, the PHY address will be fixed to "0". [0] https://git.pengutronix.de/cgit/barebox/commit/?id=93f7dcf631edfcda19e7757b28d66017ea274b81 Fixes: 0d446a505592 ("ARM: dts: add Protonic PRTI6Q board") Signed-off-by: Oleksij Rempel Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6q-prti6q.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/imx6q-prti6q.dts b/arch/arm/boot/dts/imx6q-prti6q.dts index d112b50f8c5d..b4605edfd2ab 100644 --- a/arch/arm/boot/dts/imx6q-prti6q.dts +++ b/arch/arm/boot/dts/imx6q-prti6q.dts @@ -213,8 +213,8 @@ #size-cells = <0>; /* Microchip KSZ9031RNX PHY */ - rgmii_phy: ethernet-phy@4 { - reg = <4>; + rgmii_phy: ethernet-phy@0 { + reg = <0>; interrupts-extended = <&gpio1 28 IRQ_TYPE_LEVEL_LOW>; reset-gpios = <&gpio1 25 GPIO_ACTIVE_LOW>; reset-assert-us = <10000>; From f51778db088b2407ec177f2f4da0f6290602aa3f Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Mon, 2 Nov 2020 12:43:27 +1100 Subject: [PATCH 098/710] swiotlb: using SIZE_MAX needs limits.h included After merging the drm-misc tree, linux-next build (arm multi_v7_defconfig) failed like this: In file included from drivers/gpu/drm/nouveau/nouveau_ttm.c:26: include/linux/swiotlb.h: In function 'swiotlb_max_mapping_size': include/linux/swiotlb.h:99:9: error: 'SIZE_MAX' undeclared (first use in this function) 99 | return SIZE_MAX; | ^~~~~~~~ include/linux/swiotlb.h:7:1: note: 'SIZE_MAX' is defined in header ''; did you forget to '#include '? 6 | #include +++ |+#include 7 | #include include/linux/swiotlb.h:99:9: note: each undeclared identifier is reported only once for each function it appears in 99 | return SIZE_MAX; | ^~~~~~~~ Caused by commit abe420bfae52 ("swiotlb: Introduce swiotlb_max_mapping_size()") but only exposed by commit "drm/nouveu: fix swiotlb include" Fix it by including linux/limits.h as appropriate. Fixes: abe420bfae52 ("swiotlb: Introduce swiotlb_max_mapping_size()") Signed-off-by: Stephen Rothwell Link: https://lore.kernel.org/r/20201102124327.2f82b2a7@canb.auug.org.au Signed-off-by: Michael S. Tsirkin --- include/linux/swiotlb.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 046bb94bd4d6..fa5122c6711e 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -5,6 +5,7 @@ #include #include #include +#include struct device; struct page; From e9696d259d0fb5d239e8c28ca41089838ea76d13 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Mon, 26 Oct 2020 17:02:14 -0700 Subject: [PATCH 099/710] swiotlb: fix "x86: Don't panic if can not alloc buffer for swiotlb" kernel/dma/swiotlb.c:swiotlb_init gets called first and tries to allocate a buffer for the swiotlb. It does so by calling memblock_alloc_low(PAGE_ALIGN(bytes), PAGE_SIZE); If the allocation must fail, no_iotlb_memory is set. Later during initialization swiotlb-xen comes in (drivers/xen/swiotlb-xen.c:xen_swiotlb_init) and given that io_tlb_start is != 0, it thinks the memory is ready to use when actually it is not. When the swiotlb is actually needed, swiotlb_tbl_map_single gets called and since no_iotlb_memory is set the kernel panics. Instead, if swiotlb-xen.c:xen_swiotlb_init knew the swiotlb hadn't been initialized, it would do the initialization itself, which might still succeed. Fix the panic by setting io_tlb_start to 0 on swiotlb initialization failure, and also by setting no_iotlb_memory to false on swiotlb initialization success. Fixes: ac2cbab21f31 ("x86: Don't panic if can not alloc buffer for swiotlb") Reported-by: Elliott Mitchell Tested-by: Elliott Mitchell Signed-off-by: Stefano Stabellini Reviewed-by: Christoph Hellwig Cc: stable@vger.kernel.org Signed-off-by: Konrad Rzeszutek Wilk --- kernel/dma/swiotlb.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index b4eea0abc3f0..54078f0d4c87 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -229,6 +229,7 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) io_tlb_orig_addr[i] = INVALID_PHYS_ADDR; } io_tlb_index = 0; + no_iotlb_memory = false; if (verbose) swiotlb_print_info(); @@ -260,9 +261,11 @@ swiotlb_init(int verbose) if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose)) return; - if (io_tlb_start) + if (io_tlb_start) { memblock_free_early(io_tlb_start, PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); + io_tlb_start = 0; + } pr_warn("Cannot allocate buffer"); no_iotlb_memory = true; } @@ -360,6 +363,7 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs) io_tlb_orig_addr[i] = INVALID_PHYS_ADDR; } io_tlb_index = 0; + no_iotlb_memory = false; swiotlb_print_info(); From fc0021aa340af65a0a37d77be39e22aa886a6132 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 23 Oct 2020 08:33:09 +0200 Subject: [PATCH 100/710] swiotlb: remove the tbl_dma_addr argument to swiotlb_tbl_map_single The tbl_dma_addr argument is used to check the DMA boundary for the allocations, and thus needs to be a dma_addr_t. swiotlb-xen instead passed a physical address, which could lead to incorrect results for strange offsets. Fix this by removing the parameter entirely and hard code the DMA address for io_tlb_start instead. Fixes: 91ffe4ad534a ("swiotlb-xen: introduce phys_to_dma/dma_to_phys translations") Signed-off-by: Christoph Hellwig Reviewed-by: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk --- drivers/iommu/intel/iommu.c | 5 ++--- drivers/xen/swiotlb-xen.c | 3 +-- include/linux/swiotlb.h | 10 +++------- kernel/dma/swiotlb.c | 16 ++++++---------- 4 files changed, 12 insertions(+), 22 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 8651f6d4dfa0..6b560e6f1930 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -3815,9 +3815,8 @@ bounce_map_single(struct device *dev, phys_addr_t paddr, size_t size, * page aligned, we don't need to use a bounce page. */ if (!IS_ALIGNED(paddr | size, VTD_PAGE_SIZE)) { - tlb_addr = swiotlb_tbl_map_single(dev, - phys_to_dma_unencrypted(dev, io_tlb_start), - paddr, size, aligned_size, dir, attrs); + tlb_addr = swiotlb_tbl_map_single(dev, paddr, size, + aligned_size, dir, attrs); if (tlb_addr == DMA_MAPPING_ERROR) { goto swiotlb_error; } else { diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 71ce1b7a23d1..2b385c1b4a99 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -395,8 +395,7 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page, */ trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force); - map = swiotlb_tbl_map_single(dev, virt_to_phys(xen_io_tlb_start), - phys, size, size, dir, attrs); + map = swiotlb_tbl_map_single(dev, phys, size, size, dir, attrs); if (map == (phys_addr_t)DMA_MAPPING_ERROR) return DMA_MAPPING_ERROR; diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 513913ff7486..3bb72266a75a 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -45,13 +45,9 @@ enum dma_sync_target { SYNC_FOR_DEVICE = 1, }; -extern phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, - dma_addr_t tbl_dma_addr, - phys_addr_t phys, - size_t mapping_size, - size_t alloc_size, - enum dma_data_direction dir, - unsigned long attrs); +phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t phys, + size_t mapping_size, size_t alloc_size, + enum dma_data_direction dir, unsigned long attrs); extern void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr, diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 54078f0d4c87..781b9dca197c 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -445,14 +445,11 @@ static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr, } } -phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, - dma_addr_t tbl_dma_addr, - phys_addr_t orig_addr, - size_t mapping_size, - size_t alloc_size, - enum dma_data_direction dir, - unsigned long attrs) +phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t orig_addr, + size_t mapping_size, size_t alloc_size, + enum dma_data_direction dir, unsigned long attrs) { + dma_addr_t tbl_dma_addr = phys_to_dma_unencrypted(hwdev, io_tlb_start); unsigned long flags; phys_addr_t tlb_addr; unsigned int nslots, stride, index, wrap; @@ -671,9 +668,8 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size, trace_swiotlb_bounced(dev, phys_to_dma(dev, paddr), size, swiotlb_force); - swiotlb_addr = swiotlb_tbl_map_single(dev, - phys_to_dma_unencrypted(dev, io_tlb_start), - paddr, size, size, dir, attrs); + swiotlb_addr = swiotlb_tbl_map_single(dev, paddr, size, size, dir, + attrs); if (swiotlb_addr == (phys_addr_t)DMA_MAPPING_ERROR) return DMA_MAPPING_ERROR; From 3fc2bfa365311c6ef3e4411437786a54a911d9a9 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 16 Oct 2020 12:13:00 +0200 Subject: [PATCH 101/710] nfsroot: Default mount option should ask for built-in NFS version Change the nfsroot default mount option to ask for NFSv2 only *if* the kernel was built with NFSv2 support. If not, default to NFSv3 or as last choice to NFSv4, depending on actual kernel config. Signed-off-by: Helge Deller Signed-off-by: Anna Schumaker --- fs/nfs/nfsroot.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index 8d3278805602..fa148308822c 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c @@ -88,7 +88,13 @@ #define NFS_ROOT "/tftpboot/%s" /* Default NFSROOT mount options. */ +#if defined(CONFIG_NFS_V2) #define NFS_DEF_OPTIONS "vers=2,tcp,rsize=4096,wsize=4096" +#elif defined(CONFIG_NFS_V3) +#define NFS_DEF_OPTIONS "vers=3,tcp,rsize=4096,wsize=4096" +#else +#define NFS_DEF_OPTIONS "vers=4,tcp,rsize=4096,wsize=4096" +#endif /* Parameters passed from the kernel command line */ static char nfs_root_parms[NFS_MAXPATHLEN + 1] __initdata = ""; From f126b6702e7354d6247a36f20b9172457af5c15a Mon Sep 17 00:00:00 2001 From: Dinh Nguyen Date: Sun, 1 Nov 2020 14:02:56 -0600 Subject: [PATCH 102/710] arm64: dts: agilex/stratix10: Fix qspi node compatible The QSPI flash node needs to have the required "jedec,spi-nor" in the compatible string. Fixes: 0cb140d07fc7 ("arm64: dts: stratix10: Add QSPI support for Stratix10") Cc: stable@vger.kernel.org Suggested-by: Vignesh Raghavendra Signed-off-by: Dinh Nguyen --- arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts | 2 +- arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts | 2 +- arch/arm64/boot/dts/intel/socfpga_agilex_socdk.dts | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts index feadd21bc0dc..46e558ab7729 100644 --- a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts +++ b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts @@ -159,7 +159,7 @@ flash@0 { #address-cells = <1>; #size-cells = <1>; - compatible = "n25q00a"; + compatible = "micron,mt25qu02g", "jedec,spi-nor"; reg = <0>; spi-max-frequency = <100000000>; diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts index c07966740e14..f9b4a39683cf 100644 --- a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts +++ b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts @@ -192,7 +192,7 @@ flash@0 { #address-cells = <1>; #size-cells = <1>; - compatible = "n25q00a"; + compatible = "micron,mt25qu02g", "jedec,spi-nor"; reg = <0>; spi-max-frequency = <100000000>; diff --git a/arch/arm64/boot/dts/intel/socfpga_agilex_socdk.dts b/arch/arm64/boot/dts/intel/socfpga_agilex_socdk.dts index 96c50d48289d..a7a83f29f00b 100644 --- a/arch/arm64/boot/dts/intel/socfpga_agilex_socdk.dts +++ b/arch/arm64/boot/dts/intel/socfpga_agilex_socdk.dts @@ -110,7 +110,7 @@ flash@0 { #address-cells = <1>; #size-cells = <1>; - compatible = "mt25qu02g"; + compatible = "micron,mt25qu02g", "jedec,spi-nor"; reg = <0>; spi-max-frequency = <100000000>; From 7a078d2d18801bba7bde7337a823d7342299acf7 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 29 Oct 2020 15:37:07 -0700 Subject: [PATCH 103/710] libbpf, hashmap: Fix undefined behavior in hash_bits If bits is 0, the case when the map is empty, then the >> is the size of the register which is undefined behavior - on x86 it is the same as a shift by 0. Fix by handling the 0 case explicitly and guarding calls to hash_bits for empty maps in hashmap__for_each_key_entry and hashmap__for_each_entry_safe. Fixes: e3b924224028 ("libbpf: add resizable non-thread safe internal hashmap") Suggested-by: Andrii Nakryiko , Signed-off-by: Ian Rogers Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20201029223707.494059-1-irogers@google.com --- tools/lib/bpf/hashmap.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/tools/lib/bpf/hashmap.h b/tools/lib/bpf/hashmap.h index d9b385fe808c..10a4c4cd13cf 100644 --- a/tools/lib/bpf/hashmap.h +++ b/tools/lib/bpf/hashmap.h @@ -15,6 +15,9 @@ static inline size_t hash_bits(size_t h, int bits) { /* shuffle bits and return requested number of upper bits */ + if (bits == 0) + return 0; + #if (__SIZEOF_SIZE_T__ == __SIZEOF_LONG_LONG__) /* LP64 case */ return (h * 11400714819323198485llu) >> (__SIZEOF_LONG_LONG__ * 8 - bits); @@ -174,17 +177,17 @@ bool hashmap__find(const struct hashmap *map, const void *key, void **value); * @key: key to iterate entries for */ #define hashmap__for_each_key_entry(map, cur, _key) \ - for (cur = ({ size_t bkt = hash_bits(map->hash_fn((_key), map->ctx),\ - map->cap_bits); \ - map->buckets ? map->buckets[bkt] : NULL; }); \ + for (cur = map->buckets \ + ? map->buckets[hash_bits(map->hash_fn((_key), map->ctx), map->cap_bits)] \ + : NULL; \ cur; \ cur = cur->next) \ if (map->equal_fn(cur->key, (_key), map->ctx)) #define hashmap__for_each_key_entry_safe(map, cur, tmp, _key) \ - for (cur = ({ size_t bkt = hash_bits(map->hash_fn((_key), map->ctx),\ - map->cap_bits); \ - cur = map->buckets ? map->buckets[bkt] : NULL; }); \ + for (cur = map->buckets \ + ? map->buckets[hash_bits(map->hash_fn((_key), map->ctx), map->cap_bits)] \ + : NULL; \ cur && ({ tmp = cur->next; true; }); \ cur = tmp) \ if (map->equal_fn(cur->key, (_key), map->ctx)) From bcbc0b2e275f0a797de11a10eff495b4571863fc Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Thu, 29 Oct 2020 11:54:42 +0200 Subject: [PATCH 104/710] mei: protect mei_cl_mtu from null dereference A receive callback is queued while the client is still connected but can still be called after the client was disconnected. Upon disconnect cl->me_cl is set to NULL, hence we need to check that ME client is not-NULL in mei_cl_mtu to avoid null dereference. Cc: Signed-off-by: Alexander Usyskin Signed-off-by: Tomas Winkler Link: https://lore.kernel.org/r/20201029095444.957924-2-tomas.winkler@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/client.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/misc/mei/client.h b/drivers/misc/mei/client.h index 64143d4ec758..9e08a9843bba 100644 --- a/drivers/misc/mei/client.h +++ b/drivers/misc/mei/client.h @@ -182,11 +182,11 @@ static inline u8 mei_cl_me_id(const struct mei_cl *cl) * * @cl: host client * - * Return: mtu + * Return: mtu or 0 if client is not connected */ static inline size_t mei_cl_mtu(const struct mei_cl *cl) { - return cl->me_cl->props.max_msg_length; + return cl->me_cl ? cl->me_cl->props.max_msg_length : 0; } /** From d3938ee23e97bfcac2e0eb6b356875da73d700df Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Sun, 1 Nov 2020 03:51:02 +0800 Subject: [PATCH 105/710] erofs: derive atime instead of leaving it empty EROFS has _only one_ ondisk timestamp (ctime is currently documented and recorded, we might also record mtime instead with a new compat feature if needed) for each extended inode since EROFS isn't mainly for archival purposes so no need to keep all timestamps on disk especially for Android scenarios due to security concerns. Also, romfs/cramfs don't have their own on-disk timestamp, and squashfs only records mtime instead. Let's also derive access time from ondisk timestamp rather than leaving it empty, and if mtime/atime for each file are really needed for specific scenarios as well, we can also use xattrs to record them then. Link: https://lore.kernel.org/r/20201031195102.21221-1-hsiangkao@aol.com [ Gao Xiang: It'd be better to backport for user-friendly concern. ] Fixes: 431339ba9042 ("staging: erofs: add inode operations") Cc: stable # 4.19+ Reported-by: nl6720 Reviewed-by: Chao Yu Signed-off-by: Gao Xiang --- fs/erofs/inode.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c index 139d0bed42f8..3e21c0e8adae 100644 --- a/fs/erofs/inode.c +++ b/fs/erofs/inode.c @@ -107,11 +107,9 @@ static struct page *erofs_read_inode(struct inode *inode, i_gid_write(inode, le32_to_cpu(die->i_gid)); set_nlink(inode, le32_to_cpu(die->i_nlink)); - /* ns timestamp */ - inode->i_mtime.tv_sec = inode->i_ctime.tv_sec = - le64_to_cpu(die->i_ctime); - inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec = - le32_to_cpu(die->i_ctime_nsec); + /* extended inode has its own timestamp */ + inode->i_ctime.tv_sec = le64_to_cpu(die->i_ctime); + inode->i_ctime.tv_nsec = le32_to_cpu(die->i_ctime_nsec); inode->i_size = le64_to_cpu(die->i_size); @@ -149,11 +147,9 @@ static struct page *erofs_read_inode(struct inode *inode, i_gid_write(inode, le16_to_cpu(dic->i_gid)); set_nlink(inode, le16_to_cpu(dic->i_nlink)); - /* use build time to derive all file time */ - inode->i_mtime.tv_sec = inode->i_ctime.tv_sec = - sbi->build_time; - inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec = - sbi->build_time_nsec; + /* use build time for compact inodes */ + inode->i_ctime.tv_sec = sbi->build_time; + inode->i_ctime.tv_nsec = sbi->build_time_nsec; inode->i_size = le32_to_cpu(dic->i_size); if (erofs_inode_is_data_compressed(vi->datalayout)) @@ -167,6 +163,11 @@ static struct page *erofs_read_inode(struct inode *inode, goto err_out; } + inode->i_mtime.tv_sec = inode->i_ctime.tv_sec; + inode->i_atime.tv_sec = inode->i_ctime.tv_sec; + inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec; + inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec; + if (!nblks) /* measure inode.i_blocks as generic filesystems */ inode->i_blocks = roundup(inode->i_size, EROFS_BLKSIZ) >> 9; From a30573b3cdc77b8533d004ece1ea7c0146b437a0 Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Thu, 22 Oct 2020 22:57:21 +0800 Subject: [PATCH 106/710] erofs: fix setting up pcluster for temporary pages pcluster should be only set up for all managed pages instead of temporary pages. Since it currently uses page->mapping to identify, the impact is minor for now. [ Update: Vladimir reported the kernel log becomes polluted because PAGE_FLAGS_CHECK_AT_FREE flag(s) set if the page allocation debug option is enabled. ] Link: https://lore.kernel.org/r/20201022145724.27284-1-hsiangkao@aol.com Fixes: 5ddcee1f3a1c ("erofs: get rid of __stagingpage_alloc helper") Cc: # 5.5+ Tested-by: Vladimir Zapolskiy Reviewed-by: Chao Yu Signed-off-by: Gao Xiang --- fs/erofs/zdata.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index 50912a5420b4..86fd3bf62af6 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -1078,8 +1078,11 @@ out_allocpage: cond_resched(); goto repeat; } - set_page_private(page, (unsigned long)pcl); - SetPagePrivate(page); + + if (tocache) { + set_page_private(page, (unsigned long)pcl); + SetPagePrivate(page); + } out: /* the only exit (for tracing and debugging) */ return page; } From 18e8db7f6526928858dfa99b49d831497f0f8df8 Mon Sep 17 00:00:00 2001 From: Robert Hancock Date: Tue, 3 Nov 2020 13:33:15 -0600 Subject: [PATCH 107/710] hwmon: (pmbus) Add mutex locking for sysfs reads As part of commit a919ba06979a7 ("hwmon: (pmbus) Stop caching register values"), the update of the sensor value is now triggered directly by the sensor attribute value being read from sysfs. This created (or at least made much more likely) a locking issue, since nothing protected the device page selection from being unexpectedly modified by concurrent reads. If sensor values on different pages on the same device were being concurrently read by multiple threads, this could cause spurious read errors due to the page register not reading back the same value last written, or sensor values being read from the incorrect page. Add locking of the update_lock mutex in pmbus_show_sensor and pmbus_show_samples so that these cannot result in concurrent reads from the underlying device. Fixes: a919ba06979a7 ("hwmon: (pmbus) Stop caching register values") Signed-off-by: Robert Hancock Reviewed-by: Alex Qiu Link: https://lore.kernel.org/r/20201103193315.3011800-1-robert.hancock@calian.com Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/pmbus_core.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c index 170a9f82ca61..b0e2820a2d57 100644 --- a/drivers/hwmon/pmbus/pmbus_core.c +++ b/drivers/hwmon/pmbus/pmbus_core.c @@ -941,12 +941,16 @@ static ssize_t pmbus_show_sensor(struct device *dev, struct i2c_client *client = to_i2c_client(dev->parent); struct pmbus_sensor *sensor = to_pmbus_sensor(devattr); struct pmbus_data *data = i2c_get_clientdata(client); + ssize_t ret; + mutex_lock(&data->update_lock); pmbus_update_sensor_data(client, sensor); if (sensor->data < 0) - return sensor->data; - - return snprintf(buf, PAGE_SIZE, "%lld\n", pmbus_reg2data(data, sensor)); + ret = sensor->data; + else + ret = snprintf(buf, PAGE_SIZE, "%lld\n", pmbus_reg2data(data, sensor)); + mutex_unlock(&data->update_lock); + return ret; } static ssize_t pmbus_set_sensor(struct device *dev, @@ -2012,8 +2016,11 @@ static ssize_t pmbus_show_samples(struct device *dev, int val; struct i2c_client *client = to_i2c_client(dev->parent); struct pmbus_samples_reg *reg = to_samples_reg(devattr); + struct pmbus_data *data = i2c_get_clientdata(client); + mutex_lock(&data->update_lock); val = _pmbus_read_word_data(client, reg->page, 0xff, reg->attr->reg); + mutex_unlock(&data->update_lock); if (val < 0) return val; From 82948e6e1d88d2383b82bd3f95c4241a674cd3d9 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 26 Oct 2020 17:08:06 +0100 Subject: [PATCH 108/710] habanalabs: fix kernel pointer type All throughout the driver, normal kernel pointers are stored as 'u64' struct members, which is kind of silly and requires casting through a uintptr_t to void* every time they are used. There is one line that missed the intermediate uintptr_t case, which leads to a compiler warning: drivers/misc/habanalabs/common/command_buffer.c: In function 'hl_cb_mmap': drivers/misc/habanalabs/common/command_buffer.c:512:44: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] 512 | rc = hdev->asic_funcs->cb_mmap(hdev, vma, (void *) cb->kernel_address, Rather than adding one more cast, just fix the type and remove all the other casts. Fixes: 0db575350cb1 ("habanalabs: make use of dma_mmap_coherent") Signed-off-by: Arnd Bergmann Acked-by: Christoph Hellwig Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- .../misc/habanalabs/common/command_buffer.c | 9 +++-- drivers/misc/habanalabs/common/habanalabs.h | 14 ++++---- drivers/misc/habanalabs/common/hw_queue.c | 19 +++++------ drivers/misc/habanalabs/common/irq.c | 17 +++++----- drivers/misc/habanalabs/gaudi/gaudi.c | 33 ++++++++----------- drivers/misc/habanalabs/goya/goya.c | 26 +++++++-------- drivers/misc/habanalabs/goya/goyaP.h | 2 +- 7 files changed, 55 insertions(+), 65 deletions(-) diff --git a/drivers/misc/habanalabs/common/command_buffer.c b/drivers/misc/habanalabs/common/command_buffer.c index 901e213daf40..ada570f35a41 100644 --- a/drivers/misc/habanalabs/common/command_buffer.c +++ b/drivers/misc/habanalabs/common/command_buffer.c @@ -142,11 +142,10 @@ static void cb_fini(struct hl_device *hdev, struct hl_cb *cb) { if (cb->is_internal) gen_pool_free(hdev->internal_cb_pool, - cb->kernel_address, cb->size); + (uintptr_t)cb->kernel_address, cb->size); else hdev->asic_funcs->asic_dma_free_coherent(hdev, cb->size, - (void *) (uintptr_t) cb->kernel_address, - cb->bus_address); + cb->kernel_address, cb->bus_address); kfree(cb); } @@ -230,7 +229,7 @@ static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size, return NULL; } - cb->kernel_address = (u64) (uintptr_t) p; + cb->kernel_address = p; cb->size = cb_size; return cb; @@ -509,7 +508,7 @@ int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma) vma->vm_private_data = cb; - rc = hdev->asic_funcs->cb_mmap(hdev, vma, (void *) cb->kernel_address, + rc = hdev->asic_funcs->cb_mmap(hdev, vma, cb->kernel_address, cb->bus_address, cb->size); if (rc) { spin_lock(&cb->lock); diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 80d4d7385ffe..6ed974d2def0 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -452,7 +452,7 @@ struct hl_cb { struct list_head pool_list; struct list_head va_block_list; u64 id; - u64 kernel_address; + void *kernel_address; dma_addr_t bus_address; u32 mmap_size; u32 size; @@ -515,7 +515,7 @@ struct hl_hw_queue { struct hl_hw_sob hw_sob[HL_RSVD_SOBS]; struct hl_cs_job **shadow_queue; enum hl_queue_type queue_type; - u64 kernel_address; + void *kernel_address; dma_addr_t bus_address; u32 pi; atomic_t ci; @@ -544,7 +544,7 @@ struct hl_hw_queue { */ struct hl_cq { struct hl_device *hdev; - u64 kernel_address; + void *kernel_address; dma_addr_t bus_address; u32 cq_idx; u32 hw_queue_id; @@ -562,7 +562,7 @@ struct hl_cq { */ struct hl_eq { struct hl_device *hdev; - u64 kernel_address; + void *kernel_address; dma_addr_t bus_address; u32 ci; }; @@ -757,7 +757,7 @@ struct hl_asic_funcs { u32 (*get_dma_desc_list_size)(struct hl_device *hdev, struct sg_table *sgt); void (*add_end_of_cb_packets)(struct hl_device *hdev, - u64 kernel_address, u32 len, + void *kernel_address, u32 len, u64 cq_addr, u32 cq_val, u32 msix_num, bool eb); void (*update_eq_ci)(struct hl_device *hdev, u32 val); @@ -1382,13 +1382,13 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val); for (;;) { \ /* Verify we read updates done by other cores or by device */ \ mb(); \ - (val) = *((u32 *) (uintptr_t) (addr)); \ + (val) = *((u32 *)(addr)); \ if (mem_written_by_device) \ (val) = le32_to_cpu(*(__le32 *) &(val)); \ if (cond) \ break; \ if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) { \ - (val) = *((u32 *) (uintptr_t) (addr)); \ + (val) = *((u32 *)(addr)); \ if (mem_written_by_device) \ (val) = le32_to_cpu(*(__le32 *) &(val)); \ break; \ diff --git a/drivers/misc/habanalabs/common/hw_queue.c b/drivers/misc/habanalabs/common/hw_queue.c index 5e66c98fb0d3..250cf9cefc06 100644 --- a/drivers/misc/habanalabs/common/hw_queue.c +++ b/drivers/misc/habanalabs/common/hw_queue.c @@ -75,7 +75,7 @@ static void ext_and_hw_queue_submit_bd(struct hl_device *hdev, { struct hl_bd *bd; - bd = (struct hl_bd *) (uintptr_t) q->kernel_address; + bd = q->kernel_address; bd += hl_pi_2_offset(q->pi); bd->ctl = cpu_to_le32(ctl); bd->len = cpu_to_le32(len); @@ -335,8 +335,7 @@ static void int_queue_schedule_job(struct hl_cs_job *job) bd.len = cpu_to_le32(job->job_cb_size); bd.ptr = cpu_to_le64((u64) (uintptr_t) job->user_cb); - pi = (__le64 *) (uintptr_t) (q->kernel_address + - ((q->pi & (q->int_queue_len - 1)) * sizeof(bd))); + pi = q->kernel_address + (q->pi & (q->int_queue_len - 1)) * sizeof(bd); q->pi++; q->pi &= ((q->int_queue_len << 1) - 1); @@ -630,7 +629,7 @@ static int ext_and_cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q, if (!p) return -ENOMEM; - q->kernel_address = (u64) (uintptr_t) p; + q->kernel_address = p; q->shadow_queue = kmalloc_array(HL_QUEUE_LENGTH, sizeof(*q->shadow_queue), @@ -653,11 +652,11 @@ free_queue: if (is_cpu_queue) hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, HL_QUEUE_SIZE_IN_BYTES, - (void *) (uintptr_t) q->kernel_address); + q->kernel_address); else hdev->asic_funcs->asic_dma_free_coherent(hdev, HL_QUEUE_SIZE_IN_BYTES, - (void *) (uintptr_t) q->kernel_address, + q->kernel_address, q->bus_address); return rc; @@ -676,7 +675,7 @@ static int int_queue_init(struct hl_device *hdev, struct hl_hw_queue *q) return -EFAULT; } - q->kernel_address = (u64) (uintptr_t) p; + q->kernel_address = p; q->pi = 0; atomic_set(&q->ci, 0); @@ -704,7 +703,7 @@ static int hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q) if (!p) return -ENOMEM; - q->kernel_address = (u64) (uintptr_t) p; + q->kernel_address = p; /* Make sure read/write pointers are initialized to start of queue */ atomic_set(&q->ci, 0); @@ -839,11 +838,11 @@ static void queue_fini(struct hl_device *hdev, struct hl_hw_queue *q) if (q->queue_type == QUEUE_TYPE_CPU) hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, HL_QUEUE_SIZE_IN_BYTES, - (void *) (uintptr_t) q->kernel_address); + q->kernel_address); else hdev->asic_funcs->asic_dma_free_coherent(hdev, HL_QUEUE_SIZE_IN_BYTES, - (void *) (uintptr_t) q->kernel_address, + q->kernel_address, q->bus_address); } diff --git a/drivers/misc/habanalabs/common/irq.c b/drivers/misc/habanalabs/common/irq.c index d20e40a53d70..de53fb5f978a 100644 --- a/drivers/misc/habanalabs/common/irq.c +++ b/drivers/misc/habanalabs/common/irq.c @@ -90,7 +90,7 @@ irqreturn_t hl_irq_handler_cq(int irq, void *arg) return IRQ_HANDLED; } - cq_base = (struct hl_cq_entry *) (uintptr_t) cq->kernel_address; + cq_base = cq->kernel_address; while (1) { bool entry_ready = ((le32_to_cpu(cq_base[cq->ci].data) & @@ -152,7 +152,7 @@ irqreturn_t hl_irq_handler_eq(int irq, void *arg) struct hl_eq_entry *eq_base; struct hl_eqe_work *handle_eqe_work; - eq_base = (struct hl_eq_entry *) (uintptr_t) eq->kernel_address; + eq_base = eq->kernel_address; while (1) { bool entry_ready = @@ -221,7 +221,7 @@ int hl_cq_init(struct hl_device *hdev, struct hl_cq *q, u32 hw_queue_id) return -ENOMEM; q->hdev = hdev; - q->kernel_address = (u64) (uintptr_t) p; + q->kernel_address = p; q->hw_queue_id = hw_queue_id; q->ci = 0; q->pi = 0; @@ -242,7 +242,8 @@ int hl_cq_init(struct hl_device *hdev, struct hl_cq *q, u32 hw_queue_id) void hl_cq_fini(struct hl_device *hdev, struct hl_cq *q) { hdev->asic_funcs->asic_dma_free_coherent(hdev, HL_CQ_SIZE_IN_BYTES, - (void *) (uintptr_t) q->kernel_address, q->bus_address); + q->kernel_address, + q->bus_address); } void hl_cq_reset(struct hl_device *hdev, struct hl_cq *q) @@ -259,7 +260,7 @@ void hl_cq_reset(struct hl_device *hdev, struct hl_cq *q) * when the device is operational again */ - memset((void *) (uintptr_t) q->kernel_address, 0, HL_CQ_SIZE_IN_BYTES); + memset(q->kernel_address, 0, HL_CQ_SIZE_IN_BYTES); } /** @@ -282,7 +283,7 @@ int hl_eq_init(struct hl_device *hdev, struct hl_eq *q) return -ENOMEM; q->hdev = hdev; - q->kernel_address = (u64) (uintptr_t) p; + q->kernel_address = p; q->ci = 0; return 0; @@ -302,7 +303,7 @@ void hl_eq_fini(struct hl_device *hdev, struct hl_eq *q) hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, HL_EQ_SIZE_IN_BYTES, - (void *) (uintptr_t) q->kernel_address); + q->kernel_address); } void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q) @@ -316,5 +317,5 @@ void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q) * when the device is operational again */ - memset((void *) (uintptr_t) q->kernel_address, 0, HL_EQ_SIZE_IN_BYTES); + memset(q->kernel_address, 0, HL_EQ_SIZE_IN_BYTES); } diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 5f65a1691551..b071965fa10a 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -680,8 +680,7 @@ static int _gaudi_init_tpc_mem(struct hl_device *hdev, if (!cb) return -EFAULT; - init_tpc_mem_pkt = (struct packet_lin_dma *) (uintptr_t) - cb->kernel_address; + init_tpc_mem_pkt = cb->kernel_address; cb_size = sizeof(*init_tpc_mem_pkt); memset(init_tpc_mem_pkt, 0, cb_size); @@ -3811,8 +3810,7 @@ static int gaudi_validate_cb(struct hl_device *hdev, u16 pkt_size; struct gaudi_packet *user_pkt; - user_pkt = (struct gaudi_packet *) (uintptr_t) - (parser->user_cb->kernel_address + cb_parsed_length); + user_pkt = parser->user_cb->kernel_address + cb_parsed_length; pkt_id = (enum packet_id) ( (le64_to_cpu(user_pkt->header) & @@ -4035,11 +4033,9 @@ static int gaudi_patch_cb(struct hl_device *hdev, u32 new_pkt_size = 0; struct gaudi_packet *user_pkt, *kernel_pkt; - user_pkt = (struct gaudi_packet *) (uintptr_t) - (parser->user_cb->kernel_address + cb_parsed_length); - kernel_pkt = (struct gaudi_packet *) (uintptr_t) - (parser->patched_cb->kernel_address + - cb_patched_cur_length); + user_pkt = parser->user_cb->kernel_address + cb_parsed_length; + kernel_pkt = parser->patched_cb->kernel_address + + cb_patched_cur_length; pkt_id = (enum packet_id) ( (le64_to_cpu(user_pkt->header) & @@ -4155,8 +4151,8 @@ static int gaudi_parse_cb_mmu(struct hl_device *hdev, * The check that parser->user_cb_size <= parser->user_cb->size was done * in validate_queue_index(). */ - memcpy((void *) (uintptr_t) parser->patched_cb->kernel_address, - (void *) (uintptr_t) parser->user_cb->kernel_address, + memcpy(parser->patched_cb->kernel_address, + parser->user_cb->kernel_address, parser->user_cb_size); patched_cb_size = parser->patched_cb_size; @@ -4290,7 +4286,7 @@ static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser) } static void gaudi_add_end_of_cb_packets(struct hl_device *hdev, - u64 kernel_address, u32 len, + void *kernel_address, u32 len, u64 cq_addr, u32 cq_val, u32 msi_vec, bool eb) { @@ -4298,8 +4294,7 @@ static void gaudi_add_end_of_cb_packets(struct hl_device *hdev, struct packet_msg_prot *cq_pkt; u32 tmp; - cq_pkt = (struct packet_msg_prot *) (uintptr_t) - (kernel_address + len - (sizeof(struct packet_msg_prot) * 2)); + cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2); tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT); tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); @@ -4342,7 +4337,7 @@ static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr, if (!cb) return -EFAULT; - lin_dma_pkt = (struct packet_lin_dma *) (uintptr_t) cb->kernel_address; + lin_dma_pkt = cb->kernel_address; memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt)); cb_size = sizeof(*lin_dma_pkt); @@ -4954,8 +4949,8 @@ static int gaudi_send_job_on_qman0(struct hl_device *hdev, cb = job->patched_cb; - fence_pkt = (struct packet_msg_prot *) (uintptr_t) (cb->kernel_address + - job->job_cb_size - sizeof(struct packet_msg_prot)); + fence_pkt = cb->kernel_address + + job->job_cb_size - sizeof(struct packet_msg_prot); tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT); tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1); @@ -6386,7 +6381,7 @@ static void gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id) struct packet_msg_short *pkt; u32 value, ctl; - pkt = (struct packet_msg_short *) (uintptr_t) cb->kernel_address; + pkt = cb->kernel_address; memset(pkt, 0, sizeof(*pkt)); /* Inc by 1, Mode ADD */ @@ -6478,7 +6473,7 @@ static void gaudi_gen_wait_cb(struct hl_device *hdev, void *data, u16 sob_id, u16 sob_val, u16 mon_id, u32 q_idx) { struct hl_cb *cb = (struct hl_cb *) data; - void *buf = (void *) (uintptr_t) cb->kernel_address; + void *buf = cb->kernel_address; u64 monitor_base, fence_addr = 0; u32 size = 0; u16 msg_addr_offset; diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 5db52064ed9e..235d47b2420f 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -2882,8 +2882,8 @@ static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job) cb = job->patched_cb; - fence_pkt = (struct packet_msg_prot *) (uintptr_t) (cb->kernel_address + - job->job_cb_size - sizeof(struct packet_msg_prot)); + fence_pkt = cb->kernel_address + + job->job_cb_size - sizeof(struct packet_msg_prot); tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) | (1 << GOYA_PKT_CTL_EB_SHIFT) | @@ -3475,8 +3475,7 @@ static int goya_validate_cb(struct hl_device *hdev, u16 pkt_size; struct goya_packet *user_pkt; - user_pkt = (struct goya_packet *) (uintptr_t) - (parser->user_cb->kernel_address + cb_parsed_length); + user_pkt = parser->user_cb->kernel_address + cb_parsed_length; pkt_id = (enum packet_id) ( (le64_to_cpu(user_pkt->header) & @@ -3713,11 +3712,9 @@ static int goya_patch_cb(struct hl_device *hdev, u32 new_pkt_size = 0; struct goya_packet *user_pkt, *kernel_pkt; - user_pkt = (struct goya_packet *) (uintptr_t) - (parser->user_cb->kernel_address + cb_parsed_length); - kernel_pkt = (struct goya_packet *) (uintptr_t) - (parser->patched_cb->kernel_address + - cb_patched_cur_length); + user_pkt = parser->user_cb->kernel_address + cb_parsed_length; + kernel_pkt = parser->patched_cb->kernel_address + + cb_patched_cur_length; pkt_id = (enum packet_id) ( (le64_to_cpu(user_pkt->header) & @@ -3841,8 +3838,8 @@ static int goya_parse_cb_mmu(struct hl_device *hdev, * The check that parser->user_cb_size <= parser->user_cb->size was done * in validate_queue_index(). */ - memcpy((void *) (uintptr_t) parser->patched_cb->kernel_address, - (void *) (uintptr_t) parser->user_cb->kernel_address, + memcpy(parser->patched_cb->kernel_address, + parser->user_cb->kernel_address, parser->user_cb_size); patched_cb_size = parser->patched_cb_size; @@ -3974,15 +3971,14 @@ int goya_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser) return goya_parse_cb_no_mmu(hdev, parser); } -void goya_add_end_of_cb_packets(struct hl_device *hdev, u64 kernel_address, +void goya_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address, u32 len, u64 cq_addr, u32 cq_val, u32 msix_vec, bool eb) { struct packet_msg_prot *cq_pkt; u32 tmp; - cq_pkt = (struct packet_msg_prot *) (uintptr_t) - (kernel_address + len - (sizeof(struct packet_msg_prot) * 2)); + cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2); tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) | (1 << GOYA_PKT_CTL_EB_SHIFT) | @@ -4746,7 +4742,7 @@ static int goya_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, if (!cb) return -ENOMEM; - lin_dma_pkt = (struct packet_lin_dma *) (uintptr_t) cb->kernel_address; + lin_dma_pkt = cb->kernel_address; do { memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt)); diff --git a/drivers/misc/habanalabs/goya/goyaP.h b/drivers/misc/habanalabs/goya/goyaP.h index 09b4006d4dc3..def86c75e035 100644 --- a/drivers/misc/habanalabs/goya/goyaP.h +++ b/drivers/misc/habanalabs/goya/goyaP.h @@ -217,7 +217,7 @@ int goya_resume(struct hl_device *hdev); void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry); void *goya_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size); -void goya_add_end_of_cb_packets(struct hl_device *hdev, u64 kernel_address, +void goya_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address, u32 len, u64 cq_addr, u32 cq_val, u32 msix_vec, bool eb); int goya_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser); From 1137e1ead98c0c75f7c5a9a12f0285c5155f20e2 Mon Sep 17 00:00:00 2001 From: Ofir Bitton Date: Wed, 30 Sep 2020 18:43:52 +0300 Subject: [PATCH 109/710] habanalabs/gaudi: move coresight mmu config We must relocate the coresight mmu configuration to the coresight flow to make it work in case the first submission is to configure the profiler. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi/gaudi.c | 5 +---- drivers/misc/habanalabs/gaudi/gaudiP.h | 1 + drivers/misc/habanalabs/gaudi/gaudi_coresight.c | 5 +++++ 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index b071965fa10a..2519a34e25b7 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -4742,7 +4742,7 @@ static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val) (addr - gaudi->hbm_bar_cur_addr)); } -static void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid) +void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid) { /* mask to zero the MMBP and ASID bits */ WREG32_AND(reg, ~0x7FF); @@ -4910,9 +4910,6 @@ static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid) gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid); gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid); - gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid); - gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid); - hdev->asic_funcs->set_clock_gating(hdev); mutex_unlock(&gaudi->clk_gate_mutex); diff --git a/drivers/misc/habanalabs/gaudi/gaudiP.h b/drivers/misc/habanalabs/gaudi/gaudiP.h index 83ad2b0a3a61..8eb598db81b2 100644 --- a/drivers/misc/habanalabs/gaudi/gaudiP.h +++ b/drivers/misc/habanalabs/gaudi/gaudiP.h @@ -271,5 +271,6 @@ void gaudi_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq); int gaudi_debug_coresight(struct hl_device *hdev, void *data); void gaudi_halt_coresight(struct hl_device *hdev); int gaudi_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk); +void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid); #endif /* GAUDIP_H_ */ diff --git a/drivers/misc/habanalabs/gaudi/gaudi_coresight.c b/drivers/misc/habanalabs/gaudi/gaudi_coresight.c index 881531d4d9da..3d2b0f0f4650 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi_coresight.c +++ b/drivers/misc/habanalabs/gaudi/gaudi_coresight.c @@ -623,6 +623,11 @@ static int gaudi_config_etr(struct hl_device *hdev, return -EINVAL; } + gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, + hdev->compute_ctx->asid); + gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, + hdev->compute_ctx->asid); + msb = upper_32_bits(input->buffer_address) >> 8; msb &= PSOC_GLOBAL_CONF_TRACE_ADDR_MSB_MASK; WREG32(mmPSOC_GLOBAL_CONF_TRACE_ADDR, msb); From f83f3a31b2972ddc907fbb286c6446dd9db6e198 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Mon, 2 Nov 2020 18:36:03 +0200 Subject: [PATCH 110/710] habanalabs/gaudi: mask WDT error in QMAN This interrupt cause is not relevant because of how the user use the QMAN arbitration mechanism. We must mask it as the log explodes with it. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/include/gaudi/gaudi_masks.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/misc/habanalabs/include/gaudi/gaudi_masks.h b/drivers/misc/habanalabs/include/gaudi/gaudi_masks.h index f395721060bd..46aed13f16b1 100644 --- a/drivers/misc/habanalabs/include/gaudi/gaudi_masks.h +++ b/drivers/misc/habanalabs/include/gaudi/gaudi_masks.h @@ -421,7 +421,6 @@ enum axi_id { #define QM_ARB_ERR_MSG_EN_MASK (\ QM_ARB_ERR_MSG_EN_CHOISE_OVF_MASK |\ - QM_ARB_ERR_MSG_EN_CHOISE_WDT_MASK |\ QM_ARB_ERR_MSG_EN_AXI_LBW_ERR_MASK) #define PCIE_AUX_FLR_CTRL_HW_CTRL_MASK 0x1 From 63fbf8013b2f6430754526ef9594f229c7219b1f Mon Sep 17 00:00:00 2001 From: Jianqun Xu Date: Tue, 13 Oct 2020 14:37:30 +0800 Subject: [PATCH 111/710] pinctrl: rockchip: enable gpio pclk for rockchip_gpio_to_irq There need to enable pclk_gpio when do irq_create_mapping, since it will do access to gpio controller. Signed-off-by: Jianqun Xu Reviewed-by: Heiko Stuebner Reviewed-by: Kever Yang Link: https://lore.kernel.org/r/20201013063731.3618-3-jay.xu@rock-chips.com Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-rockchip.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pinctrl/pinctrl-rockchip.c b/drivers/pinctrl/pinctrl-rockchip.c index 0401c1da79dd..7b398ed2113e 100644 --- a/drivers/pinctrl/pinctrl-rockchip.c +++ b/drivers/pinctrl/pinctrl-rockchip.c @@ -3155,7 +3155,9 @@ static int rockchip_gpio_to_irq(struct gpio_chip *gc, unsigned offset) if (!bank->domain) return -ENXIO; + clk_enable(bank->clk); virq = irq_create_mapping(bank->domain, offset); + clk_disable(bank->clk); return (virq) ? : -ENXIO; } From 8045ec42d14c6f77b5e925d1421150c043dfb75d Mon Sep 17 00:00:00 2001 From: Jianqun Xu Date: Tue, 13 Oct 2020 14:37:31 +0800 Subject: [PATCH 112/710] pinctrl: rockchip: create irq mapping in gpio_to_irq Remove totally irq mappings create in probe, the gpio irq mapping will be created when do gpio_to_irq -> rockchip_gpio_to_irq -> irq_create_mapping This patch can speed up system boot on, also abandon many unused irq mappings' create. Signed-off-by: Jianqun Xu Reviewed-by: Heiko Stuebner Reviewed-by: Kever Yang Link: https://lore.kernel.org/r/20201013063731.3618-4-jay.xu@rock-chips.com Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-rockchip.c | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/drivers/pinctrl/pinctrl-rockchip.c b/drivers/pinctrl/pinctrl-rockchip.c index 7b398ed2113e..aa1a1c850d05 100644 --- a/drivers/pinctrl/pinctrl-rockchip.c +++ b/drivers/pinctrl/pinctrl-rockchip.c @@ -3196,7 +3196,7 @@ static void rockchip_irq_demux(struct irq_desc *desc) irq = __ffs(pend); pend &= ~BIT(irq); - virq = irq_linear_revmap(bank->domain, irq); + virq = irq_find_mapping(bank->domain, irq); if (!virq) { dev_err(bank->drvdata->dev, "unmapped irq %d\n", irq); @@ -3375,7 +3375,7 @@ static int rockchip_interrupts_register(struct platform_device *pdev, unsigned int clr = IRQ_NOREQUEST | IRQ_NOPROBE | IRQ_NOAUTOEN; struct irq_chip_generic *gc; int ret; - int i, j; + int i; for (i = 0; i < ctrl->nr_banks; ++i, ++bank) { if (!bank->valid) { @@ -3402,7 +3402,7 @@ static int rockchip_interrupts_register(struct platform_device *pdev, ret = irq_alloc_domain_generic_chips(bank->domain, 32, 1, "rockchip_gpio_irq", handle_level_irq, - clr, 0, IRQ_GC_INIT_MASK_CACHE); + clr, 0, 0); if (ret) { dev_err(&pdev->dev, "could not alloc generic chips for bank %s\n", bank->name); @@ -3411,14 +3411,6 @@ static int rockchip_interrupts_register(struct platform_device *pdev, continue; } - /* - * Linux assumes that all interrupts start out disabled/masked. - * Our driver only uses the concept of masked and always keeps - * things enabled, so for us that's all masked and all enabled. - */ - writel_relaxed(0xffffffff, bank->reg_base + GPIO_INTMASK); - writel_relaxed(0xffffffff, bank->reg_base + GPIO_INTEN); - gc = irq_get_domain_generic_chip(bank->domain, 0); gc->reg_base = bank->reg_base; gc->private = bank; @@ -3435,13 +3427,17 @@ static int rockchip_interrupts_register(struct platform_device *pdev, gc->chip_types[0].chip.irq_set_type = rockchip_irq_set_type; gc->wake_enabled = IRQ_MSK(bank->nr_pins); + /* + * Linux assumes that all interrupts start out disabled/masked. + * Our driver only uses the concept of masked and always keeps + * things enabled, so for us that's all masked and all enabled. + */ + writel_relaxed(0xffffffff, bank->reg_base + GPIO_INTMASK); + writel_relaxed(0xffffffff, bank->reg_base + GPIO_INTEN); + gc->mask_cache = 0xffffffff; + irq_set_chained_handler_and_data(bank->irq, rockchip_irq_demux, bank); - - /* map the gpio irqs here, when the clock is still running */ - for (j = 0 ; j < 32 ; j++) - irq_create_mapping(bank->domain, j); - clk_disable(bank->clk); } From f78331f74cacb33d87cd60376dacc5bd397959e2 Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Tue, 3 Nov 2020 10:41:29 +0100 Subject: [PATCH 113/710] libbpf: Fix null dereference in xsk_socket__delete Fix a possible null pointer dereference in xsk_socket__delete that will occur if a null pointer is fed into the function. Fixes: 2f6324a3937f ("libbpf: Support shared umems between queues and devices") Reported-by: Andrii Nakryiko Signed-off-by: Magnus Karlsson Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/1604396490-12129-2-git-send-email-magnus.karlsson@gmail.com --- tools/lib/bpf/xsk.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index e3c98c007825..504b7a85d445 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -891,13 +891,14 @@ int xsk_umem__delete(struct xsk_umem *umem) void xsk_socket__delete(struct xsk_socket *xsk) { size_t desc_sz = sizeof(struct xdp_desc); - struct xsk_ctx *ctx = xsk->ctx; struct xdp_mmap_offsets off; + struct xsk_ctx *ctx; int err; if (!xsk) return; + ctx = xsk->ctx; if (ctx->prog_fd != -1) { xsk_delete_bpf_maps(xsk); close(ctx->prog_fd); From 25cf73b9ff88fd4608699a0313f820758b4c252d Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Tue, 3 Nov 2020 10:41:30 +0100 Subject: [PATCH 114/710] libbpf: Fix possible use after free in xsk_socket__delete Fix a possible use after free in xsk_socket__delete that will happen if xsk_put_ctx() frees the ctx. To fix, save the umem reference taken from the context and just use that instead. Fixes: 2f6324a3937f ("libbpf: Support shared umems between queues and devices") Signed-off-by: Magnus Karlsson Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/1604396490-12129-3-git-send-email-magnus.karlsson@gmail.com --- tools/lib/bpf/xsk.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index 504b7a85d445..9bc537d0b92d 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -892,6 +892,7 @@ void xsk_socket__delete(struct xsk_socket *xsk) { size_t desc_sz = sizeof(struct xdp_desc); struct xdp_mmap_offsets off; + struct xsk_umem *umem; struct xsk_ctx *ctx; int err; @@ -899,6 +900,7 @@ void xsk_socket__delete(struct xsk_socket *xsk) return; ctx = xsk->ctx; + umem = ctx->umem; if (ctx->prog_fd != -1) { xsk_delete_bpf_maps(xsk); close(ctx->prog_fd); @@ -918,11 +920,11 @@ void xsk_socket__delete(struct xsk_socket *xsk) xsk_put_ctx(ctx); - ctx->umem->refcount--; + umem->refcount--; /* Do not close an fd that also has an associated umem connected * to it. */ - if (xsk->fd != ctx->umem->fd) + if (xsk->fd != umem->fd) close(xsk->fd); free(xsk); } From c277ca155d2f0028a5c79708426d3f79b54a5fc1 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Sun, 1 Nov 2020 19:23:54 +0800 Subject: [PATCH 115/710] clk: imx8m: fix bus critical clk registration noc/axi/ahb are bus clk, not peripheral clk. Since peripheral clk has a limitation that for peripheral clock slice, IP clock slices must be stopped to change the clock source. However if the bus clk is marked as critical clk peripheral, the assigned clock parent operation will fail. So we added CLK_SET_PARENT_GATE flag to avoid glitch. And add imx8m_clk_hw_composite_bus_critical for bus critical clock usage Fixes: 936c383673b9e ("clk: imx: fix composite peripheral flags") Reviewed-by: Abel Vesa Reported-by: Abel Vesa Signed-off-by: Peng Fan Link: https://lore.kernel.org/r/1604229834-25594-1-git-send-email-peng.fan@nxp.com Signed-off-by: Stephen Boyd --- drivers/clk/imx/clk-imx8mm.c | 10 +++++----- drivers/clk/imx/clk-imx8mn.c | 6 +++--- drivers/clk/imx/clk-imx8mp.c | 10 +++++----- drivers/clk/imx/clk-imx8mq.c | 8 ++++---- drivers/clk/imx/clk.h | 5 +++++ 5 files changed, 22 insertions(+), 17 deletions(-) diff --git a/drivers/clk/imx/clk-imx8mm.c b/drivers/clk/imx/clk-imx8mm.c index 0de0be0cf548..f358ad907299 100644 --- a/drivers/clk/imx/clk-imx8mm.c +++ b/drivers/clk/imx/clk-imx8mm.c @@ -443,9 +443,9 @@ static int imx8mm_clocks_probe(struct platform_device *pdev) hws[IMX8MM_CLK_A53_CORE] = imx_clk_hw_mux2("arm_a53_core", base + 0x9880, 24, 1, imx8mm_a53_core_sels, ARRAY_SIZE(imx8mm_a53_core_sels)); /* BUS */ - hws[IMX8MM_CLK_MAIN_AXI] = imx8m_clk_hw_composite_critical("main_axi", imx8mm_main_axi_sels, base + 0x8800); + hws[IMX8MM_CLK_MAIN_AXI] = imx8m_clk_hw_composite_bus_critical("main_axi", imx8mm_main_axi_sels, base + 0x8800); hws[IMX8MM_CLK_ENET_AXI] = imx8m_clk_hw_composite_bus("enet_axi", imx8mm_enet_axi_sels, base + 0x8880); - hws[IMX8MM_CLK_NAND_USDHC_BUS] = imx8m_clk_hw_composite_critical("nand_usdhc_bus", imx8mm_nand_usdhc_sels, base + 0x8900); + hws[IMX8MM_CLK_NAND_USDHC_BUS] = imx8m_clk_hw_composite_bus_critical("nand_usdhc_bus", imx8mm_nand_usdhc_sels, base + 0x8900); hws[IMX8MM_CLK_VPU_BUS] = imx8m_clk_hw_composite_bus("vpu_bus", imx8mm_vpu_bus_sels, base + 0x8980); hws[IMX8MM_CLK_DISP_AXI] = imx8m_clk_hw_composite_bus("disp_axi", imx8mm_disp_axi_sels, base + 0x8a00); hws[IMX8MM_CLK_DISP_APB] = imx8m_clk_hw_composite_bus("disp_apb", imx8mm_disp_apb_sels, base + 0x8a80); @@ -453,11 +453,11 @@ static int imx8mm_clocks_probe(struct platform_device *pdev) hws[IMX8MM_CLK_USB_BUS] = imx8m_clk_hw_composite_bus("usb_bus", imx8mm_usb_bus_sels, base + 0x8b80); hws[IMX8MM_CLK_GPU_AXI] = imx8m_clk_hw_composite_bus("gpu_axi", imx8mm_gpu_axi_sels, base + 0x8c00); hws[IMX8MM_CLK_GPU_AHB] = imx8m_clk_hw_composite_bus("gpu_ahb", imx8mm_gpu_ahb_sels, base + 0x8c80); - hws[IMX8MM_CLK_NOC] = imx8m_clk_hw_composite_critical("noc", imx8mm_noc_sels, base + 0x8d00); - hws[IMX8MM_CLK_NOC_APB] = imx8m_clk_hw_composite_critical("noc_apb", imx8mm_noc_apb_sels, base + 0x8d80); + hws[IMX8MM_CLK_NOC] = imx8m_clk_hw_composite_bus_critical("noc", imx8mm_noc_sels, base + 0x8d00); + hws[IMX8MM_CLK_NOC_APB] = imx8m_clk_hw_composite_bus_critical("noc_apb", imx8mm_noc_apb_sels, base + 0x8d80); /* AHB */ - hws[IMX8MM_CLK_AHB] = imx8m_clk_hw_composite_critical("ahb", imx8mm_ahb_sels, base + 0x9000); + hws[IMX8MM_CLK_AHB] = imx8m_clk_hw_composite_bus_critical("ahb", imx8mm_ahb_sels, base + 0x9000); hws[IMX8MM_CLK_AUDIO_AHB] = imx8m_clk_hw_composite_bus("audio_ahb", imx8mm_audio_ahb_sels, base + 0x9100); /* IPG */ diff --git a/drivers/clk/imx/clk-imx8mn.c b/drivers/clk/imx/clk-imx8mn.c index e984de543f0b..f3c5e6cf55dd 100644 --- a/drivers/clk/imx/clk-imx8mn.c +++ b/drivers/clk/imx/clk-imx8mn.c @@ -431,7 +431,7 @@ static int imx8mn_clocks_probe(struct platform_device *pdev) hws[IMX8MN_CLK_A53_CORE] = imx_clk_hw_mux2("arm_a53_core", base + 0x9880, 24, 1, imx8mn_a53_core_sels, ARRAY_SIZE(imx8mn_a53_core_sels)); /* BUS */ - hws[IMX8MN_CLK_MAIN_AXI] = imx8m_clk_hw_composite_critical("main_axi", imx8mn_main_axi_sels, base + 0x8800); + hws[IMX8MN_CLK_MAIN_AXI] = imx8m_clk_hw_composite_bus_critical("main_axi", imx8mn_main_axi_sels, base + 0x8800); hws[IMX8MN_CLK_ENET_AXI] = imx8m_clk_hw_composite_bus("enet_axi", imx8mn_enet_axi_sels, base + 0x8880); hws[IMX8MN_CLK_NAND_USDHC_BUS] = imx8m_clk_hw_composite_bus("nand_usdhc_bus", imx8mn_nand_usdhc_sels, base + 0x8900); hws[IMX8MN_CLK_DISP_AXI] = imx8m_clk_hw_composite_bus("disp_axi", imx8mn_disp_axi_sels, base + 0x8a00); @@ -439,9 +439,9 @@ static int imx8mn_clocks_probe(struct platform_device *pdev) hws[IMX8MN_CLK_USB_BUS] = imx8m_clk_hw_composite_bus("usb_bus", imx8mn_usb_bus_sels, base + 0x8b80); hws[IMX8MN_CLK_GPU_AXI] = imx8m_clk_hw_composite_bus("gpu_axi", imx8mn_gpu_axi_sels, base + 0x8c00); hws[IMX8MN_CLK_GPU_AHB] = imx8m_clk_hw_composite_bus("gpu_ahb", imx8mn_gpu_ahb_sels, base + 0x8c80); - hws[IMX8MN_CLK_NOC] = imx8m_clk_hw_composite_critical("noc", imx8mn_noc_sels, base + 0x8d00); + hws[IMX8MN_CLK_NOC] = imx8m_clk_hw_composite_bus_critical("noc", imx8mn_noc_sels, base + 0x8d00); - hws[IMX8MN_CLK_AHB] = imx8m_clk_hw_composite_critical("ahb", imx8mn_ahb_sels, base + 0x9000); + hws[IMX8MN_CLK_AHB] = imx8m_clk_hw_composite_bus_critical("ahb", imx8mn_ahb_sels, base + 0x9000); hws[IMX8MN_CLK_AUDIO_AHB] = imx8m_clk_hw_composite_bus("audio_ahb", imx8mn_audio_ahb_sels, base + 0x9100); hws[IMX8MN_CLK_IPG_ROOT] = imx_clk_hw_divider2("ipg_root", "ahb", base + 0x9080, 0, 1); hws[IMX8MN_CLK_IPG_AUDIO_ROOT] = imx_clk_hw_divider2("ipg_audio_root", "audio_ahb", base + 0x9180, 0, 1); diff --git a/drivers/clk/imx/clk-imx8mp.c b/drivers/clk/imx/clk-imx8mp.c index 12ce4770f702..48e212477f52 100644 --- a/drivers/clk/imx/clk-imx8mp.c +++ b/drivers/clk/imx/clk-imx8mp.c @@ -557,9 +557,9 @@ static int imx8mp_clocks_probe(struct platform_device *pdev) /* CORE SEL */ hws[IMX8MP_CLK_A53_CORE] = imx_clk_hw_mux2("arm_a53_core", ccm_base + 0x9880, 24, 1, imx8mp_a53_core_sels, ARRAY_SIZE(imx8mp_a53_core_sels)); - hws[IMX8MP_CLK_MAIN_AXI] = imx8m_clk_hw_composite_critical("main_axi", imx8mp_main_axi_sels, ccm_base + 0x8800); + hws[IMX8MP_CLK_MAIN_AXI] = imx8m_clk_hw_composite_bus_critical("main_axi", imx8mp_main_axi_sels, ccm_base + 0x8800); hws[IMX8MP_CLK_ENET_AXI] = imx8m_clk_hw_composite_bus("enet_axi", imx8mp_enet_axi_sels, ccm_base + 0x8880); - hws[IMX8MP_CLK_NAND_USDHC_BUS] = imx8m_clk_hw_composite_critical("nand_usdhc_bus", imx8mp_nand_usdhc_sels, ccm_base + 0x8900); + hws[IMX8MP_CLK_NAND_USDHC_BUS] = imx8m_clk_hw_composite_bus_critical("nand_usdhc_bus", imx8mp_nand_usdhc_sels, ccm_base + 0x8900); hws[IMX8MP_CLK_VPU_BUS] = imx8m_clk_hw_composite_bus("vpu_bus", imx8mp_vpu_bus_sels, ccm_base + 0x8980); hws[IMX8MP_CLK_MEDIA_AXI] = imx8m_clk_hw_composite_bus("media_axi", imx8mp_media_axi_sels, ccm_base + 0x8a00); hws[IMX8MP_CLK_MEDIA_APB] = imx8m_clk_hw_composite_bus("media_apb", imx8mp_media_apb_sels, ccm_base + 0x8a80); @@ -567,12 +567,12 @@ static int imx8mp_clocks_probe(struct platform_device *pdev) hws[IMX8MP_CLK_HDMI_AXI] = imx8m_clk_hw_composite_bus("hdmi_axi", imx8mp_media_axi_sels, ccm_base + 0x8b80); hws[IMX8MP_CLK_GPU_AXI] = imx8m_clk_hw_composite_bus("gpu_axi", imx8mp_gpu_axi_sels, ccm_base + 0x8c00); hws[IMX8MP_CLK_GPU_AHB] = imx8m_clk_hw_composite_bus("gpu_ahb", imx8mp_gpu_ahb_sels, ccm_base + 0x8c80); - hws[IMX8MP_CLK_NOC] = imx8m_clk_hw_composite_critical("noc", imx8mp_noc_sels, ccm_base + 0x8d00); - hws[IMX8MP_CLK_NOC_IO] = imx8m_clk_hw_composite_critical("noc_io", imx8mp_noc_io_sels, ccm_base + 0x8d80); + hws[IMX8MP_CLK_NOC] = imx8m_clk_hw_composite_bus_critical("noc", imx8mp_noc_sels, ccm_base + 0x8d00); + hws[IMX8MP_CLK_NOC_IO] = imx8m_clk_hw_composite_bus_critical("noc_io", imx8mp_noc_io_sels, ccm_base + 0x8d80); hws[IMX8MP_CLK_ML_AXI] = imx8m_clk_hw_composite_bus("ml_axi", imx8mp_ml_axi_sels, ccm_base + 0x8e00); hws[IMX8MP_CLK_ML_AHB] = imx8m_clk_hw_composite_bus("ml_ahb", imx8mp_ml_ahb_sels, ccm_base + 0x8e80); - hws[IMX8MP_CLK_AHB] = imx8m_clk_hw_composite_critical("ahb_root", imx8mp_ahb_sels, ccm_base + 0x9000); + hws[IMX8MP_CLK_AHB] = imx8m_clk_hw_composite_bus_critical("ahb_root", imx8mp_ahb_sels, ccm_base + 0x9000); hws[IMX8MP_CLK_AUDIO_AHB] = imx8m_clk_hw_composite_bus("audio_ahb", imx8mp_audio_ahb_sels, ccm_base + 0x9100); hws[IMX8MP_CLK_MIPI_DSI_ESC_RX] = imx8m_clk_hw_composite_bus("mipi_dsi_esc_rx", imx8mp_mipi_dsi_esc_rx_sels, ccm_base + 0x9200); diff --git a/drivers/clk/imx/clk-imx8mq.c b/drivers/clk/imx/clk-imx8mq.c index 8265d1d48af4..06292d4a98ff 100644 --- a/drivers/clk/imx/clk-imx8mq.c +++ b/drivers/clk/imx/clk-imx8mq.c @@ -431,7 +431,7 @@ static int imx8mq_clocks_probe(struct platform_device *pdev) hws[IMX8MQ_CLK_A53_CORE] = imx_clk_hw_mux2("arm_a53_core", base + 0x9880, 24, 1, imx8mq_a53_core_sels, ARRAY_SIZE(imx8mq_a53_core_sels)); /* BUS */ - hws[IMX8MQ_CLK_MAIN_AXI] = imx8m_clk_hw_composite_critical("main_axi", imx8mq_main_axi_sels, base + 0x8800); + hws[IMX8MQ_CLK_MAIN_AXI] = imx8m_clk_hw_composite_bus_critical("main_axi", imx8mq_main_axi_sels, base + 0x8800); hws[IMX8MQ_CLK_ENET_AXI] = imx8m_clk_hw_composite_bus("enet_axi", imx8mq_enet_axi_sels, base + 0x8880); hws[IMX8MQ_CLK_NAND_USDHC_BUS] = imx8m_clk_hw_composite_bus("nand_usdhc_bus", imx8mq_nand_usdhc_sels, base + 0x8900); hws[IMX8MQ_CLK_VPU_BUS] = imx8m_clk_hw_composite_bus("vpu_bus", imx8mq_vpu_bus_sels, base + 0x8980); @@ -441,12 +441,12 @@ static int imx8mq_clocks_probe(struct platform_device *pdev) hws[IMX8MQ_CLK_USB_BUS] = imx8m_clk_hw_composite_bus("usb_bus", imx8mq_usb_bus_sels, base + 0x8b80); hws[IMX8MQ_CLK_GPU_AXI] = imx8m_clk_hw_composite_bus("gpu_axi", imx8mq_gpu_axi_sels, base + 0x8c00); hws[IMX8MQ_CLK_GPU_AHB] = imx8m_clk_hw_composite_bus("gpu_ahb", imx8mq_gpu_ahb_sels, base + 0x8c80); - hws[IMX8MQ_CLK_NOC] = imx8m_clk_hw_composite_critical("noc", imx8mq_noc_sels, base + 0x8d00); - hws[IMX8MQ_CLK_NOC_APB] = imx8m_clk_hw_composite_critical("noc_apb", imx8mq_noc_apb_sels, base + 0x8d80); + hws[IMX8MQ_CLK_NOC] = imx8m_clk_hw_composite_bus_critical("noc", imx8mq_noc_sels, base + 0x8d00); + hws[IMX8MQ_CLK_NOC_APB] = imx8m_clk_hw_composite_bus_critical("noc_apb", imx8mq_noc_apb_sels, base + 0x8d80); /* AHB */ /* AHB clock is used by the AHB bus therefore marked as critical */ - hws[IMX8MQ_CLK_AHB] = imx8m_clk_hw_composite_critical("ahb", imx8mq_ahb_sels, base + 0x9000); + hws[IMX8MQ_CLK_AHB] = imx8m_clk_hw_composite_bus_critical("ahb", imx8mq_ahb_sels, base + 0x9000); hws[IMX8MQ_CLK_AUDIO_AHB] = imx8m_clk_hw_composite_bus("audio_ahb", imx8mq_audio_ahb_sels, base + 0x9100); /* IPG */ diff --git a/drivers/clk/imx/clk.h b/drivers/clk/imx/clk.h index 3b796b3da249..1d7be0c86538 100644 --- a/drivers/clk/imx/clk.h +++ b/drivers/clk/imx/clk.h @@ -549,6 +549,11 @@ struct clk_hw *imx8m_clk_hw_composite_flags(const char *name, IMX_COMPOSITE_BUS, \ CLK_SET_RATE_NO_REPARENT | CLK_OPS_PARENT_ENABLE) +#define imx8m_clk_hw_composite_bus_critical(name, parent_names, reg) \ + imx8m_clk_hw_composite_flags(name, parent_names, ARRAY_SIZE(parent_names), reg, \ + IMX_COMPOSITE_BUS, \ + CLK_SET_RATE_NO_REPARENT | CLK_OPS_PARENT_ENABLE | CLK_IS_CRITICAL) + #define imx8m_clk_hw_composite_core(name, parent_names, reg) \ imx8m_clk_hw_composite_flags(name, parent_names, \ ARRAY_SIZE(parent_names), reg, \ From da3fecb0040324c08f1587e5bff1f15f36be1872 Mon Sep 17 00:00:00 2001 From: Can Guo Date: Mon, 2 Nov 2020 22:24:39 -0800 Subject: [PATCH 116/710] scsi: ufs: Fix unbalanced scsi_block_reqs_cnt caused by ufshcd_hold() The scsi_block_reqs_cnt increased in ufshcd_hold() is supposed to be decreased back in ufshcd_ungate_work() in a paired way. However, if specific ufshcd_hold/release sequences are met, it is possible that scsi_block_reqs_cnt is increased twice but only one ungate work is queued. To make sure scsi_block_reqs_cnt is handled by ufshcd_hold() and ufshcd_ungate_work() in a paired way, increase it only if queue_work() returns true. Link: https://lore.kernel.org/r/1604384682-15837-2-git-send-email-cang@codeaurora.org Reviewed-by: Hongwu Su Reviewed-by: Stanley Chu Reviewed-by: Bean Huo Signed-off-by: Can Guo Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index b8f573a02713..09deefe5904a 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -1627,12 +1627,12 @@ start: */ fallthrough; case CLKS_OFF: - ufshcd_scsi_block_requests(hba); hba->clk_gating.state = REQ_CLKS_ON; trace_ufshcd_clk_gating(dev_name(hba->dev), hba->clk_gating.state); - queue_work(hba->clk_gating.clk_gating_workq, - &hba->clk_gating.ungate_work); + if (queue_work(hba->clk_gating.clk_gating_workq, + &hba->clk_gating.ungate_work)) + ufshcd_scsi_block_requests(hba); /* * fall through to check if we should wait for this * work to be done or not. From 0f52fcb99ea2738a0a0f28e12cf4dd427069dd2a Mon Sep 17 00:00:00 2001 From: Can Guo Date: Mon, 2 Nov 2020 22:24:40 -0800 Subject: [PATCH 117/710] scsi: ufs: Try to save power mode change and UIC cmd completion timeout Use the uic_cmd->cmd_active as a flag to track the lifecycle of an UIC cmd. The flag is set before sending the UIC cmd and cleared in IRQ handler. When a PMC or UIC cmd completion timeout happens, if the flag is not set, instead of returning timeout error, we still treat it as a successful operation. This is to deal with the scenario in which completion has been raised but the one waiting for the completion cannot be awaken in time due to kernel scheduling problem. Link: https://lore.kernel.org/r/1604384682-15837-3-git-send-email-cang@codeaurora.org Reviewed-by: Stanley Chu Signed-off-by: Can Guo Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 26 ++++++++++++++++++++++++-- drivers/scsi/ufs/ufshcd.h | 2 ++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 09deefe5904a..5167c3e770a3 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -2115,10 +2115,20 @@ ufshcd_wait_for_uic_cmd(struct ufs_hba *hba, struct uic_command *uic_cmd) unsigned long flags; if (wait_for_completion_timeout(&uic_cmd->done, - msecs_to_jiffies(UIC_CMD_TIMEOUT))) + msecs_to_jiffies(UIC_CMD_TIMEOUT))) { ret = uic_cmd->argument2 & MASK_UIC_COMMAND_RESULT; - else + } else { ret = -ETIMEDOUT; + dev_err(hba->dev, + "uic cmd 0x%x with arg3 0x%x completion timeout\n", + uic_cmd->command, uic_cmd->argument3); + + if (!uic_cmd->cmd_active) { + dev_err(hba->dev, "%s: UIC cmd has been completed, return the result\n", + __func__); + ret = uic_cmd->argument2 & MASK_UIC_COMMAND_RESULT; + } + } spin_lock_irqsave(hba->host->host_lock, flags); hba->active_uic_cmd = NULL; @@ -2150,6 +2160,7 @@ __ufshcd_send_uic_cmd(struct ufs_hba *hba, struct uic_command *uic_cmd, if (completion) init_completion(&uic_cmd->done); + uic_cmd->cmd_active = 1; ufshcd_dispatch_uic_cmd(hba, uic_cmd); return 0; @@ -3807,10 +3818,18 @@ static int ufshcd_uic_pwr_ctrl(struct ufs_hba *hba, struct uic_command *cmd) dev_err(hba->dev, "pwr ctrl cmd 0x%x with mode 0x%x completion timeout\n", cmd->command, cmd->argument3); + + if (!cmd->cmd_active) { + dev_err(hba->dev, "%s: Power Mode Change operation has been completed, go check UPMCRS\n", + __func__); + goto check_upmcrs; + } + ret = -ETIMEDOUT; goto out; } +check_upmcrs: status = ufshcd_get_upmcrs(hba); if (status != PWR_LOCAL) { dev_err(hba->dev, @@ -4902,11 +4921,14 @@ static irqreturn_t ufshcd_uic_cmd_compl(struct ufs_hba *hba, u32 intr_status) ufshcd_get_uic_cmd_result(hba); hba->active_uic_cmd->argument3 = ufshcd_get_dme_attr_val(hba); + if (!hba->uic_async_done) + hba->active_uic_cmd->cmd_active = 0; complete(&hba->active_uic_cmd->done); retval = IRQ_HANDLED; } if ((intr_status & UFSHCD_UIC_PWR_MASK) && hba->uic_async_done) { + hba->active_uic_cmd->cmd_active = 0; complete(hba->uic_async_done); retval = IRQ_HANDLED; } diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index 47eb1430274c..e0f00a42371c 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -64,6 +64,7 @@ enum dev_cmd_type { * @argument1: UIC command argument 1 * @argument2: UIC command argument 2 * @argument3: UIC command argument 3 + * @cmd_active: Indicate if UIC command is outstanding * @done: UIC command completion */ struct uic_command { @@ -71,6 +72,7 @@ struct uic_command { u32 argument1; u32 argument2; u32 argument3; + int cmd_active; struct completion done; }; From 34a9fa2025d9d3177c99351c7aaf256c5f50691f Mon Sep 17 00:00:00 2001 From: Pablo Ceballos Date: Mon, 2 Nov 2020 19:29:39 -0500 Subject: [PATCH 118/710] HID: hid-sensor-hub: Fix issue with devices with no report ID Some HID devices don't use a report ID because they only have a single report. In those cases, the report ID in struct hid_report will be zero and the data for the report will start at the first byte, so don't skip over the first byte. Signed-off-by: Pablo Ceballos Acked-by: Srinivas Pandruvada Signed-off-by: Jiri Kosina --- drivers/hid/hid-sensor-hub.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-sensor-hub.c b/drivers/hid/hid-sensor-hub.c index 94c7398b5c27..3dd7d3246737 100644 --- a/drivers/hid/hid-sensor-hub.c +++ b/drivers/hid/hid-sensor-hub.c @@ -483,7 +483,8 @@ static int sensor_hub_raw_event(struct hid_device *hdev, return 1; ptr = raw_data; - ptr++; /* Skip report id */ + if (report->id) + ptr++; /* Skip report id */ spin_lock_irqsave(&pdata->lock, flags); From 567b8e9fed8add9e20885be38ecd73bb0e07406b Mon Sep 17 00:00:00 2001 From: Lars Povlsen Date: Wed, 4 Nov 2020 23:02:23 +0100 Subject: [PATCH 119/710] HID: mcp2221: Fix GPIO output handling The mcp2221 driver GPIO output handling has has several issues. * A wrong value is used for the GPIO direction. * Wrong offsets are calculated for some GPIO set value/set direction operations, when offset is larger than 0. This has been fixed by introducing proper manifest constants for the direction encoding, and using 'offsetof' when calculating GPIO register offsets. The updated driver has been tested with the Sparx5 pcb134/pcb135 board, which has the mcp2221 device with several (output) GPIO's. Fixes: 328de1c519c5c092 ("HID: mcp2221: add GPIO functionality support") Reviewed-by: Rishi Gupta Signed-off-by: Lars Povlsen Signed-off-by: Jiri Kosina --- drivers/hid/hid-mcp2221.c | 48 +++++++++++++++++++++++++++++++-------- 1 file changed, 39 insertions(+), 9 deletions(-) diff --git a/drivers/hid/hid-mcp2221.c b/drivers/hid/hid-mcp2221.c index 0d27ccb55dd9..4211b9839209 100644 --- a/drivers/hid/hid-mcp2221.c +++ b/drivers/hid/hid-mcp2221.c @@ -49,6 +49,36 @@ enum { MCP2221_ALT_F_NOT_GPIOD = 0xEF, }; +/* MCP GPIO direction encoding */ +enum { + MCP2221_DIR_OUT = 0x00, + MCP2221_DIR_IN = 0x01, +}; + +#define MCP_NGPIO 4 + +/* MCP GPIO set command layout */ +struct mcp_set_gpio { + u8 cmd; + u8 dummy; + struct { + u8 change_value; + u8 value; + u8 change_direction; + u8 direction; + } gpio[MCP_NGPIO]; +} __packed; + +/* MCP GPIO get command layout */ +struct mcp_get_gpio { + u8 cmd; + u8 dummy; + struct { + u8 direction; + u8 value; + } gpio[MCP_NGPIO]; +} __packed; + /* * There is no way to distinguish responses. Therefore next command * is sent only after response to previous has been received. Mutex @@ -542,7 +572,7 @@ static int mcp_gpio_get(struct gpio_chip *gc, mcp->txbuf[0] = MCP2221_GPIO_GET; - mcp->gp_idx = (offset + 1) * 2; + mcp->gp_idx = offsetof(struct mcp_get_gpio, gpio[offset].value); mutex_lock(&mcp->lock); ret = mcp_send_data_req_status(mcp, mcp->txbuf, 1); @@ -559,7 +589,7 @@ static void mcp_gpio_set(struct gpio_chip *gc, memset(mcp->txbuf, 0, 18); mcp->txbuf[0] = MCP2221_GPIO_SET; - mcp->gp_idx = ((offset + 1) * 4) - 1; + mcp->gp_idx = offsetof(struct mcp_set_gpio, gpio[offset].value); mcp->txbuf[mcp->gp_idx - 1] = 1; mcp->txbuf[mcp->gp_idx] = !!value; @@ -575,7 +605,7 @@ static int mcp_gpio_dir_set(struct mcp2221 *mcp, memset(mcp->txbuf, 0, 18); mcp->txbuf[0] = MCP2221_GPIO_SET; - mcp->gp_idx = (offset + 1) * 5; + mcp->gp_idx = offsetof(struct mcp_set_gpio, gpio[offset].direction); mcp->txbuf[mcp->gp_idx - 1] = 1; mcp->txbuf[mcp->gp_idx] = val; @@ -590,7 +620,7 @@ static int mcp_gpio_direction_input(struct gpio_chip *gc, struct mcp2221 *mcp = gpiochip_get_data(gc); mutex_lock(&mcp->lock); - ret = mcp_gpio_dir_set(mcp, offset, 0); + ret = mcp_gpio_dir_set(mcp, offset, MCP2221_DIR_IN); mutex_unlock(&mcp->lock); return ret; @@ -603,7 +633,7 @@ static int mcp_gpio_direction_output(struct gpio_chip *gc, struct mcp2221 *mcp = gpiochip_get_data(gc); mutex_lock(&mcp->lock); - ret = mcp_gpio_dir_set(mcp, offset, 1); + ret = mcp_gpio_dir_set(mcp, offset, MCP2221_DIR_OUT); mutex_unlock(&mcp->lock); /* Can't configure as output, bailout early */ @@ -623,7 +653,7 @@ static int mcp_gpio_get_direction(struct gpio_chip *gc, mcp->txbuf[0] = MCP2221_GPIO_GET; - mcp->gp_idx = (offset + 1) * 2; + mcp->gp_idx = offsetof(struct mcp_get_gpio, gpio[offset].direction); mutex_lock(&mcp->lock); ret = mcp_send_data_req_status(mcp, mcp->txbuf, 1); @@ -632,7 +662,7 @@ static int mcp_gpio_get_direction(struct gpio_chip *gc, if (ret) return ret; - if (mcp->gpio_dir) + if (mcp->gpio_dir == MCP2221_DIR_IN) return GPIO_LINE_DIRECTION_IN; return GPIO_LINE_DIRECTION_OUT; @@ -758,7 +788,7 @@ static int mcp2221_raw_event(struct hid_device *hdev, mcp->status = -ENOENT; } else { mcp->status = !!data[mcp->gp_idx]; - mcp->gpio_dir = !!data[mcp->gp_idx + 1]; + mcp->gpio_dir = data[mcp->gp_idx + 1]; } break; default: @@ -860,7 +890,7 @@ static int mcp2221_probe(struct hid_device *hdev, mcp->gc->get_direction = mcp_gpio_get_direction; mcp->gc->set = mcp_gpio_set; mcp->gc->get = mcp_gpio_get; - mcp->gc->ngpio = 4; + mcp->gc->ngpio = MCP_NGPIO; mcp->gc->base = -1; mcp->gc->can_sleep = 1; mcp->gc->parent = &hdev->dev; From 2b12c13637134897ba320bd8906a8d918ee7069b Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 9 Oct 2020 21:08:55 +0300 Subject: [PATCH 120/710] pinctrl: mcp23s08: Use full chunk of memory for regmap configuration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It appears that simplification of mcp23s08_spi_regmap_init() made a regression due to wrong size calculation for dev_kmemdup() call. It misses the fact that config variable is already a pointer, thus the sizeof() calculation is wrong and only 4 or 8 bytes were copied. Fix the parameters to devm_kmemdup() to copy a full chunk of memory. Fixes: 0874758ecb2b ("pinctrl: mcp23s08: Refactor mcp23s08_spi_regmap_init()") Reported-by: Martin Hundebøll Signed-off-by: Andy Shevchenko Tested-by: Martin Hundebøll Link: https://lore.kernel.org/r/20201009180856.4738-1-andriy.shevchenko@linux.intel.com Tested-by: Jan Kundrát Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-mcp23s08_spi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/pinctrl-mcp23s08_spi.c b/drivers/pinctrl/pinctrl-mcp23s08_spi.c index 1f47a661b0a7..7c72cffe1412 100644 --- a/drivers/pinctrl/pinctrl-mcp23s08_spi.c +++ b/drivers/pinctrl/pinctrl-mcp23s08_spi.c @@ -119,7 +119,7 @@ static int mcp23s08_spi_regmap_init(struct mcp23s08 *mcp, struct device *dev, return -EINVAL; } - copy = devm_kmemdup(dev, &config, sizeof(config), GFP_KERNEL); + copy = devm_kmemdup(dev, config, sizeof(*config), GFP_KERNEL); if (!copy) return -ENOMEM; From a835d3a114ab0dc2f0d8c6963c3f53734b1c5965 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 9 Oct 2020 21:08:56 +0300 Subject: [PATCH 121/710] pinctrl: mcp23s08: Print error message when regmap init fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is useful for debugging to have the error message printed when regmap initialisation fails. Add it to the driver. Signed-off-by: Andy Shevchenko Cc: Martin Hundebøll Link: https://lore.kernel.org/r/20201009180856.4738-2-andriy.shevchenko@linux.intel.com Tested-by: Jan Kundrát Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-mcp23s08_spi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pinctrl/pinctrl-mcp23s08_spi.c b/drivers/pinctrl/pinctrl-mcp23s08_spi.c index 7c72cffe1412..9ae10318f6f3 100644 --- a/drivers/pinctrl/pinctrl-mcp23s08_spi.c +++ b/drivers/pinctrl/pinctrl-mcp23s08_spi.c @@ -126,6 +126,8 @@ static int mcp23s08_spi_regmap_init(struct mcp23s08 *mcp, struct device *dev, copy->name = name; mcp->regmap = devm_regmap_init(dev, &mcp23sxx_spi_regmap, mcp, copy); + if (IS_ERR(mcp->regmap)) + dev_err(dev, "regmap init failed for %s\n", mcp->chip.label); return PTR_ERR_OR_ZERO(mcp->regmap); } From a663e0df4a374b8537562a44d1cecafb472cd65b Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 7 Oct 2020 17:06:17 +0300 Subject: [PATCH 122/710] thunderbolt: Fix memory leak if ida_simple_get() fails in enumerate_services() The svc->key field is not released as it should be if ida_simple_get() fails so fix that. Fixes: 9aabb68568b4 ("thunderbolt: Fix to check return value of ida_simple_get") Cc: stable@vger.kernel.org Signed-off-by: Mika Westerberg --- drivers/thunderbolt/xdomain.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/thunderbolt/xdomain.c b/drivers/thunderbolt/xdomain.c index 48907853732a..c00ad817042e 100644 --- a/drivers/thunderbolt/xdomain.c +++ b/drivers/thunderbolt/xdomain.c @@ -881,6 +881,7 @@ static void enumerate_services(struct tb_xdomain *xd) id = ida_simple_get(&xd->service_ids, 0, 0, GFP_KERNEL); if (id < 0) { + kfree(svc->key); kfree(svc); break; } From 77455129fb5b2a8749330b2b40d0c8750b6bf076 Mon Sep 17 00:00:00 2001 From: Casey Bowman Date: Wed, 7 Oct 2020 16:13:07 -0700 Subject: [PATCH 123/710] thunderbolt: Add uaccess dependency to debugfs interface Some calls in the debugfs interface are made to the linux/uaccess.h header, but the header is not referenced. So, for x86_64 architectures, this dependency seems to be pulled in elsewhere, which leads to a successful compilation. However, on arm/arm64 architectures, it was found to error out on implicit declarations. This change fixes the implicit declaration error by adding the linux/uaccess.h header. Fixes: 54e418106c76 ("thunderbolt: Add debugfs interface") Signed-off-by: Casey Bowman Signed-off-by: Mika Westerberg --- drivers/thunderbolt/debugfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/thunderbolt/debugfs.c b/drivers/thunderbolt/debugfs.c index 3680b2784ea1..ed65d2b13964 100644 --- a/drivers/thunderbolt/debugfs.c +++ b/drivers/thunderbolt/debugfs.c @@ -9,6 +9,7 @@ #include #include +#include #include "tb.h" From f8fa2c2e63c76e5d73526f38bdde59fdcfbea166 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 5 Oct 2020 18:55:24 +0300 Subject: [PATCH 124/710] thunderbolt: Only configure USB4 wake for lane 0 adapters Only USB4 lane 0 adapter has the USB4 port capability for wakes so only program wakes on such adapters. Fixes: b2911a593a70 ("thunderbolt: Enable wakes from system suspend") Signed-off-by: Mika Westerberg --- drivers/thunderbolt/usb4.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/thunderbolt/usb4.c b/drivers/thunderbolt/usb4.c index 40f13579a3fe..f2583b4053e4 100644 --- a/drivers/thunderbolt/usb4.c +++ b/drivers/thunderbolt/usb4.c @@ -421,8 +421,12 @@ int usb4_switch_set_wake(struct tb_switch *sw, unsigned int flags) * upstream USB4 port. */ tb_switch_for_each_port(sw, port) { + if (!tb_port_is_null(port)) + continue; if (!route && tb_is_upstream_port(port)) continue; + if (!port->cap_usb4) + continue; ret = tb_port_read(port, &val, TB_CFG_PORT, port->cap_usb4 + PORT_CS_19, 1); From a1fbc6750e212c5675a4e48d7f51d44607eb8756 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sun, 4 Oct 2020 19:04:26 +0100 Subject: [PATCH 125/710] btrfs: fix potential overflow in cluster_pages_for_defrag on 32bit arch On 32-bit systems, this shift will overflow for files larger than 4GB as start_index is unsigned long while the calls to btrfs_delalloc_*_space expect u64. CC: stable@vger.kernel.org # 4.4+ Fixes: df480633b891 ("btrfs: extent-tree: Switch to new delalloc space reserve and release") Reviewed-by: Josef Bacik Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: David Sterba [ define the variable instead of repeating the shift ] Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index ab408a23ba32..69a384145dc6 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1274,6 +1274,7 @@ static int cluster_pages_for_defrag(struct inode *inode, u64 page_start; u64 page_end; u64 page_cnt; + u64 start = (u64)start_index << PAGE_SHIFT; int ret; int i; int i_done; @@ -1290,8 +1291,7 @@ static int cluster_pages_for_defrag(struct inode *inode, page_cnt = min_t(u64, (u64)num_pages, (u64)file_end - start_index + 1); ret = btrfs_delalloc_reserve_space(BTRFS_I(inode), &data_reserved, - start_index << PAGE_SHIFT, - page_cnt << PAGE_SHIFT); + start, page_cnt << PAGE_SHIFT); if (ret) return ret; i_done = 0; @@ -1380,8 +1380,7 @@ again: btrfs_mod_outstanding_extents(BTRFS_I(inode), 1); spin_unlock(&BTRFS_I(inode)->lock); btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved, - start_index << PAGE_SHIFT, - (page_cnt - i_done) << PAGE_SHIFT, true); + start, (page_cnt - i_done) << PAGE_SHIFT, true); } @@ -1408,8 +1407,7 @@ out: put_page(pages[i]); } btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved, - start_index << PAGE_SHIFT, - page_cnt << PAGE_SHIFT, true); + start, page_cnt << PAGE_SHIFT, true); btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT); extent_changeset_free(data_reserved); return ret; From e38fdb716702879a942017c85e84c0a3a9e4af96 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 26 Oct 2020 16:57:26 -0400 Subject: [PATCH 126/710] btrfs: print the block rsv type when we fail our reservation To help with debugging, print the type of the block rsv when we fail to use our target block rsv in btrfs_use_block_rsv. This now produces: [ 544.672035] BTRFS: block rsv 1 returned -28 which is still cryptic without consulting the enum in block-rsv.h but I guess it's better than nothing. Reviewed-by: Nikolay Borisov Signed-off-by: Josef Bacik Reviewed-by: David Sterba [ add note from Nikolay ] Signed-off-by: David Sterba --- fs/btrfs/block-rsv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/block-rsv.c b/fs/btrfs/block-rsv.c index 7e1549a84fcc..bc920afe23bf 100644 --- a/fs/btrfs/block-rsv.c +++ b/fs/btrfs/block-rsv.c @@ -511,7 +511,8 @@ again: /*DEFAULT_RATELIMIT_BURST*/ 1); if (__ratelimit(&_rs)) WARN(1, KERN_DEBUG - "BTRFS: block rsv returned %d\n", ret); + "BTRFS: block rsv %d returned %d\n", + block_rsv->type, ret); } try_reserve: ret = btrfs_reserve_metadata_bytes(root, block_rsv, blocksize, From fca3a45d08782a2bb85e048fb8e3128b1388d7b7 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 26 Oct 2020 16:57:27 -0400 Subject: [PATCH 127/710] btrfs: fix min reserved size calculation in merge_reloc_root The minimum reserve size was adjusted to take into account the height of the tree we are merging, however we can have a root with a level == 0. What we want is root_level + 1 to get the number of nodes we may have to cow. This fixes the enospc_debug warning pops with btrfs/101. Nikolay: this fixes failures on btrfs/060 btrfs/062 btrfs/063 and btrfs/195 That I was seeing, the call trace was: [ 3680.515564] ------------[ cut here ]------------ [ 3680.515566] BTRFS: block rsv returned -28 [ 3680.515585] WARNING: CPU: 2 PID: 8339 at fs/btrfs/block-rsv.c:521 btrfs_use_block_rsv+0x162/0x180 [ 3680.515587] Modules linked in: [ 3680.515591] CPU: 2 PID: 8339 Comm: btrfs Tainted: G W 5.9.0-rc8-default #95 [ 3680.515593] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1 04/01/2014 [ 3680.515595] RIP: 0010:btrfs_use_block_rsv+0x162/0x180 [ 3680.515600] RSP: 0018:ffffa01ac9753910 EFLAGS: 00010282 [ 3680.515602] RAX: 0000000000000000 RBX: ffff984b34200000 RCX: 0000000000000027 [ 3680.515604] RDX: 0000000000000027 RSI: 0000000000000000 RDI: ffff984b3bd19e28 [ 3680.515606] RBP: 0000000000004000 R08: ffff984b3bd19e20 R09: 0000000000000001 [ 3680.515608] R10: 0000000000000004 R11: 0000000000000046 R12: ffff984b264fdc00 [ 3680.515609] R13: ffff984b13149000 R14: 00000000ffffffe4 R15: ffff984b34200000 [ 3680.515613] FS: 00007f4e2912b8c0(0000) GS:ffff984b3bd00000(0000) knlGS:0000000000000000 [ 3680.515615] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 3680.515617] CR2: 00007fab87122150 CR3: 0000000118e42000 CR4: 00000000000006e0 [ 3680.515620] Call Trace: [ 3680.515627] btrfs_alloc_tree_block+0x8b/0x340 [ 3680.515633] ? __lock_acquire+0x51a/0xac0 [ 3680.515646] alloc_tree_block_no_bg_flush+0x4f/0x60 [ 3680.515651] __btrfs_cow_block+0x14e/0x7e0 [ 3680.515662] btrfs_cow_block+0x144/0x2c0 [ 3680.515670] merge_reloc_root+0x4d4/0x610 [ 3680.515675] ? btrfs_lookup_fs_root+0x78/0x90 [ 3680.515686] merge_reloc_roots+0xee/0x280 [ 3680.515695] relocate_block_group+0x2ce/0x5e0 [ 3680.515704] btrfs_relocate_block_group+0x16e/0x310 [ 3680.515711] btrfs_relocate_chunk+0x38/0xf0 [ 3680.515716] btrfs_shrink_device+0x200/0x560 [ 3680.515728] btrfs_rm_device+0x1ae/0x6a6 [ 3680.515744] ? _copy_from_user+0x6e/0xb0 [ 3680.515750] btrfs_ioctl+0x1afe/0x28c0 [ 3680.515755] ? find_held_lock+0x2b/0x80 [ 3680.515760] ? do_user_addr_fault+0x1f8/0x418 [ 3680.515773] ? __x64_sys_ioctl+0x77/0xb0 [ 3680.515775] __x64_sys_ioctl+0x77/0xb0 [ 3680.515781] do_syscall_64+0x31/0x70 [ 3680.515785] entry_SYSCALL_64_after_hwframe+0x44/0xa9 Reported-by: Nikolay Borisov Fixes: 44d354abf33e ("btrfs: relocation: review the call sites which can be interrupted by signal") CC: stable@vger.kernel.org # 5.4+ Reviewed-by: Nikolay Borisov Tested-by: Nikolay Borisov Signed-off-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/relocation.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 3602806d71bd..9ba92d86da0b 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1648,6 +1648,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, struct btrfs_root_item *root_item; struct btrfs_path *path; struct extent_buffer *leaf; + int reserve_level; int level; int max_level; int replaced = 0; @@ -1696,7 +1697,8 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, * Thus the needed metadata size is at most root_level * nodesize, * and * 2 since we have two trees to COW. */ - min_reserved = fs_info->nodesize * btrfs_root_level(root_item) * 2; + reserve_level = max_t(int, 1, btrfs_root_level(root_item)); + min_reserved = fs_info->nodesize * reserve_level * 2; memset(&next_key, 0, sizeof(next_key)); while (1) { From f07728d541ebefcf3d2ec7bc99a3bffd052d9f90 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 23 Oct 2020 14:26:33 +0300 Subject: [PATCH 128/710] btrfs: clean up NULL checks in qgroup_unreserve_range() Smatch complains that this code dereferences "entry" before checking whether it's NULL on the next line. Fortunately, rb_entry() will never return NULL so it doesn't cause a problem. We can clean up the NULL checking a bit to silence the warning and make the code more clear. Reviewed-by: Qu Wenruo Signed-off-by: Dan Carpenter Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/qgroup.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index c54ea6586632..77c54749f432 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -3435,24 +3435,20 @@ static int qgroup_unreserve_range(struct btrfs_inode *inode, { struct rb_node *node; struct rb_node *next; - struct ulist_node *entry = NULL; + struct ulist_node *entry; int ret = 0; node = reserved->range_changed.root.rb_node; + if (!node) + return 0; while (node) { entry = rb_entry(node, struct ulist_node, rb_node); if (entry->val < start) node = node->rb_right; - else if (entry) - node = node->rb_left; else - break; + node = node->rb_left; } - /* Empty changeset */ - if (!entry) - return 0; - if (entry->val > start && rb_prev(&entry->rb_node)) entry = rb_entry(rb_prev(&entry->rb_node), struct ulist_node, rb_node); From a4852cf268b5ae487ba18f2b24e44094afce0675 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 9 Jul 2020 11:25:40 +0200 Subject: [PATCH 129/710] btrfs: scrub: update message regarding read-only status Based on user feedback update the message printed when scrub fails to start due to write requirements. To make a distinction add a device id to the messages. Reviewed-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/scrub.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index cf63f1e27a27..e71e7586e9eb 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -3866,8 +3866,9 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, if (!is_dev_replace && !readonly && !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) { mutex_unlock(&fs_info->fs_devices->device_list_mutex); - btrfs_err_in_rcu(fs_info, "scrub: device %s is not writable", - rcu_str_deref(dev->name)); + btrfs_err_in_rcu(fs_info, + "scrub on devid %llu: filesystem on %s is not writable", + devid, rcu_str_deref(dev->name)); ret = -EROFS; goto out; } From cf89af146b7e62af55470cf5f3ec3c56ec144a5e Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Fri, 30 Oct 2020 06:53:56 +0800 Subject: [PATCH 130/710] btrfs: dev-replace: fail mount if we don't have replace item with target device If there is a device BTRFS_DEV_REPLACE_DEVID without the device replace item, then it means the filesystem is inconsistent state. This is either corruption or a crafted image. Fail the mount as this needs a closer look what is actually wrong. As of now if BTRFS_DEV_REPLACE_DEVID is present without the replace item, in __btrfs_free_extra_devids() we determine that there is an extra device, and free those extra devices but continue to mount the device. However, we were wrong in keeping tack of the rw_devices so the syzbot testcase failed: WARNING: CPU: 1 PID: 3612 at fs/btrfs/volumes.c:1166 close_fs_devices.part.0+0x607/0x800 fs/btrfs/volumes.c:1166 Kernel panic - not syncing: panic_on_warn set ... CPU: 1 PID: 3612 Comm: syz-executor.2 Not tainted 5.9.0-rc4-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x198/0x1fd lib/dump_stack.c:118 panic+0x347/0x7c0 kernel/panic.c:231 __warn.cold+0x20/0x46 kernel/panic.c:600 report_bug+0x1bd/0x210 lib/bug.c:198 handle_bug+0x38/0x90 arch/x86/kernel/traps.c:234 exc_invalid_op+0x14/0x40 arch/x86/kernel/traps.c:254 asm_exc_invalid_op+0x12/0x20 arch/x86/include/asm/idtentry.h:536 RIP: 0010:close_fs_devices.part.0+0x607/0x800 fs/btrfs/volumes.c:1166 RSP: 0018:ffffc900091777e0 EFLAGS: 00010246 RAX: 0000000000040000 RBX: ffffffffffffffff RCX: ffffc9000c8b7000 RDX: 0000000000040000 RSI: ffffffff83097f47 RDI: 0000000000000007 RBP: dffffc0000000000 R08: 0000000000000001 R09: ffff8880988a187f R10: 0000000000000000 R11: 0000000000000001 R12: ffff88809593a130 R13: ffff88809593a1ec R14: ffff8880988a1908 R15: ffff88809593a050 close_fs_devices fs/btrfs/volumes.c:1193 [inline] btrfs_close_devices+0x95/0x1f0 fs/btrfs/volumes.c:1179 open_ctree+0x4984/0x4a2d fs/btrfs/disk-io.c:3434 btrfs_fill_super fs/btrfs/super.c:1316 [inline] btrfs_mount_root.cold+0x14/0x165 fs/btrfs/super.c:1672 The fix here is, when we determine that there isn't a replace item then fail the mount if there is a replace target device (devid 0). CC: stable@vger.kernel.org # 4.19+ Reported-by: syzbot+4cfe71a4da060be47502@syzkaller.appspotmail.com Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/dev-replace.c | 26 ++++++++++++++++++++++++-- fs/btrfs/volumes.c | 26 +++++++------------------- 2 files changed, 31 insertions(+), 21 deletions(-) diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 5b9e3f3ace22..10638537b9ef 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -91,6 +91,17 @@ int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info) ret = btrfs_search_slot(NULL, dev_root, &key, path, 0, 0); if (ret) { no_valid_dev_replace_entry_found: + /* + * We don't have a replace item or it's corrupted. If there is + * a replace target, fail the mount. + */ + if (btrfs_find_device(fs_info->fs_devices, + BTRFS_DEV_REPLACE_DEVID, NULL, NULL, false)) { + btrfs_err(fs_info, + "found replace target device without a valid replace item"); + ret = -EUCLEAN; + goto out; + } ret = 0; dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED; @@ -143,8 +154,19 @@ no_valid_dev_replace_entry_found: case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED: case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED: case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED: - dev_replace->srcdev = NULL; - dev_replace->tgtdev = NULL; + /* + * We don't have an active replace item but if there is a + * replace target, fail the mount. + */ + if (btrfs_find_device(fs_info->fs_devices, + BTRFS_DEV_REPLACE_DEVID, NULL, NULL, false)) { + btrfs_err(fs_info, + "replace devid present without an active replace item"); + ret = -EUCLEAN; + } else { + dev_replace->srcdev = NULL; + dev_replace->tgtdev = NULL; + } break; case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED: diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index b1e48078c318..a6406b3b8c2b 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1056,22 +1056,13 @@ static void __btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices, continue; } - if (device->devid == BTRFS_DEV_REPLACE_DEVID) { - /* - * In the first step, keep the device which has - * the correct fsid and the devid that is used - * for the dev_replace procedure. - * In the second step, the dev_replace state is - * read from the device tree and it is known - * whether the procedure is really active or - * not, which means whether this device is - * used or whether it should be removed. - */ - if (step == 0 || test_bit(BTRFS_DEV_STATE_REPLACE_TGT, - &device->dev_state)) { - continue; - } - } + /* + * We have already validated the presence of BTRFS_DEV_REPLACE_DEVID, + * in btrfs_init_dev_replace() so just continue. + */ + if (device->devid == BTRFS_DEV_REPLACE_DEVID) + continue; + if (device->bdev) { blkdev_put(device->bdev, device->mode); device->bdev = NULL; @@ -1080,9 +1071,6 @@ static void __btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices, if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) { list_del_init(&device->dev_alloc_list); clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state); - if (!test_bit(BTRFS_DEV_STATE_REPLACE_TGT, - &device->dev_state)) - fs_devices->rw_devices--; } list_del_init(&device->dev_list); fs_devices->num_devices--; From 468600c6ec28613b756193c5f780aac062f1acdf Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Wed, 21 Oct 2020 13:36:55 +0800 Subject: [PATCH 131/710] btrfs: ref-verify: fix memory leak in btrfs_ref_tree_mod There is one error handling path that does not free ref, which may cause a minor memory leak. CC: stable@vger.kernel.org # 4.19+ Reviewed-by: Josef Bacik Signed-off-by: Dinghao Liu Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ref-verify.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c index 7f03dbe5b609..78693d3dd15b 100644 --- a/fs/btrfs/ref-verify.c +++ b/fs/btrfs/ref-verify.c @@ -860,6 +860,7 @@ int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info, "dropping a ref for a root that doesn't have a ref on the block"); dump_block_entry(fs_info, be); dump_ref_action(fs_info, ra); + kfree(ref); kfree(ra); goto out_unlock; } From 9b92f5c51e9a41352d665f6f956bd95085a56a83 Mon Sep 17 00:00:00 2001 From: Billy Tsai Date: Fri, 30 Oct 2020 13:54:50 +0800 Subject: [PATCH 132/710] pinctrl: aspeed: Fix GPI only function problem. Some gpio pin at aspeed soc is input only and the prefix name of these pin is "GPI" only. This patch fine-tune the condition of GPIO check from "GPIO" to "GPI" and it will fix the usage error of banks D and E in the AST2400/AST2500 and banks T and U in the AST2600. Fixes: 4d3d0e4272d8 ("pinctrl: Add core support for Aspeed SoCs") Signed-off-by: Billy Tsai Reviewed-by: Andrew Jeffery Link: https://lore.kernel.org/r/20201030055450.29613-1-billy_tsai@aspeedtech.com Signed-off-by: Linus Walleij --- drivers/pinctrl/aspeed/pinctrl-aspeed.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/aspeed/pinctrl-aspeed.c b/drivers/pinctrl/aspeed/pinctrl-aspeed.c index 6a94eaecf638..d6b849552a1e 100644 --- a/drivers/pinctrl/aspeed/pinctrl-aspeed.c +++ b/drivers/pinctrl/aspeed/pinctrl-aspeed.c @@ -286,13 +286,14 @@ int aspeed_pinmux_set_mux(struct pinctrl_dev *pctldev, unsigned int function, static bool aspeed_expr_is_gpio(const struct aspeed_sig_expr *expr) { /* - * The signal type is GPIO if the signal name has "GPIO" as a prefix. + * The signal type is GPIO if the signal name has "GPI" as a prefix. * strncmp (rather than strcmp) is used to implement the prefix * requirement. * - * expr->signal might look like "GPIOT3" in the GPIO case. + * expr->signal might look like "GPIOB1" in the GPIO case. + * expr->signal might look like "GPIT0" in the GPI case. */ - return strncmp(expr->signal, "GPIO", 4) == 0; + return strncmp(expr->signal, "GPI", 3) == 0; } static bool aspeed_gpio_in_exprs(const struct aspeed_sig_expr **exprs) From 1f5eb8b17f02d216703ee56e4c3115f592b060fb Mon Sep 17 00:00:00 2001 From: Kent Gibson Date: Thu, 5 Nov 2020 18:40:49 +0800 Subject: [PATCH 133/710] gpiolib: fix sysfs when cdev is not selected In gpiochip_setup_dev() the call to gpiolib_cdev_register() indirectly calls device_add(). This is still required for the sysfs even when CONFIG_GPIO_CDEV is not selected in the build. Replace the stubbed functions in gpiolib-cdev.h with macros in gpiolib.c that perform the required device_add() and device_del() when CONFIG_GPIO_CDEV is not selected. Fixes: d143493c01b7 (gpiolib: make cdev a build option) Reported-by: Nicolas Schichan Signed-off-by: Kent Gibson Tested-by: Nicolas Schichan Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib-cdev.h | 15 --------------- drivers/gpio/gpiolib.c | 18 +++++++++++++++--- 2 files changed, 15 insertions(+), 18 deletions(-) diff --git a/drivers/gpio/gpiolib-cdev.h b/drivers/gpio/gpiolib-cdev.h index cb41dd757338..b42644cbffb8 100644 --- a/drivers/gpio/gpiolib-cdev.h +++ b/drivers/gpio/gpiolib-cdev.h @@ -7,22 +7,7 @@ struct gpio_device; -#ifdef CONFIG_GPIO_CDEV - int gpiolib_cdev_register(struct gpio_device *gdev, dev_t devt); void gpiolib_cdev_unregister(struct gpio_device *gdev); -#else - -static inline int gpiolib_cdev_register(struct gpio_device *gdev, dev_t devt) -{ - return 0; -} - -static inline void gpiolib_cdev_unregister(struct gpio_device *gdev) -{ -} - -#endif /* CONFIG_GPIO_CDEV */ - #endif /* GPIOLIB_CDEV_H */ diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 3cdf9effc13a..089ddcaa9bc6 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -480,11 +480,23 @@ static void gpiodevice_release(struct device *dev) kfree(gdev); } +#ifdef CONFIG_GPIO_CDEV +#define gcdev_register(gdev, devt) gpiolib_cdev_register((gdev), (devt)) +#define gcdev_unregister(gdev) gpiolib_cdev_unregister((gdev)) +#else +/* + * gpiolib_cdev_register() indirectly calls device_add(), which is still + * required even when cdev is not selected. + */ +#define gcdev_register(gdev, devt) device_add(&(gdev)->dev) +#define gcdev_unregister(gdev) device_del(&(gdev)->dev) +#endif + static int gpiochip_setup_dev(struct gpio_device *gdev) { int ret; - ret = gpiolib_cdev_register(gdev, gpio_devt); + ret = gcdev_register(gdev, gpio_devt); if (ret) return ret; @@ -500,7 +512,7 @@ static int gpiochip_setup_dev(struct gpio_device *gdev) return 0; err_remove_device: - gpiolib_cdev_unregister(gdev); + gcdev_unregister(gdev); return ret; } @@ -825,7 +837,7 @@ void gpiochip_remove(struct gpio_chip *gc) * be removed, else it will be dangling until the last user is * gone. */ - gpiolib_cdev_unregister(gdev); + gcdev_unregister(gdev); put_device(&gdev->dev); } EXPORT_SYMBOL_GPL(gpiochip_remove); From 93bd813c17763177cf87e96c2313bd4dd747d234 Mon Sep 17 00:00:00 2001 From: Jack Yu Date: Thu, 5 Nov 2020 11:08:04 +0800 Subject: [PATCH 134/710] ASoC: rt1015: add delay to fix pop noise from speaker Add delay to fix pop noise from speaker. Signed-off-by: Jack Yu Reviewed-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20201105030804.31115-1-jack.yu@realtek.com Signed-off-by: Mark Brown --- .../devicetree/bindings/sound/rt1015.txt | 6 ++++++ include/sound/rt1015.h | 15 ++++++++++++++ sound/soc/codecs/rt1015.c | 20 +++++++++++++++++++ sound/soc/codecs/rt1015.h | 2 ++ 4 files changed, 43 insertions(+) create mode 100644 include/sound/rt1015.h diff --git a/Documentation/devicetree/bindings/sound/rt1015.txt b/Documentation/devicetree/bindings/sound/rt1015.txt index fcfd02d8d32f..e498966d436f 100644 --- a/Documentation/devicetree/bindings/sound/rt1015.txt +++ b/Documentation/devicetree/bindings/sound/rt1015.txt @@ -8,10 +8,16 @@ Required properties: - reg : The I2C address of the device. +Optional properties: + +- realtek,power-up-delay-ms + Set a delay time for flush work to be completed, + this value is adjustable depending on platform. Example: rt1015: codec@28 { compatible = "realtek,rt1015"; reg = <0x28>; + realtek,power-up-delay-ms = <50>; }; diff --git a/include/sound/rt1015.h b/include/sound/rt1015.h new file mode 100644 index 000000000000..70a7538d4c89 --- /dev/null +++ b/include/sound/rt1015.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/sound/rt1015.h -- Platform data for RT1015 + * + * Copyright 2020 Realtek Microelectronics + */ + +#ifndef __LINUX_SND_RT1015_H +#define __LINUX_SND_RT1015_H + +struct rt1015_platform_data { + unsigned int power_up_delay_ms; +}; + +#endif diff --git a/sound/soc/codecs/rt1015.c b/sound/soc/codecs/rt1015.c index 25fe2ddedd54..967193518349 100644 --- a/sound/soc/codecs/rt1015.c +++ b/sound/soc/codecs/rt1015.c @@ -27,10 +27,15 @@ #include #include #include +#include #include "rl6231.h" #include "rt1015.h" +static const struct rt1015_platform_data i2s_default_platform_data = { + .power_up_delay_ms = 50, +}; + static const struct reg_default rt1015_reg[] = { { 0x0000, 0x0000 }, { 0x0004, 0xa000 }, @@ -650,6 +655,7 @@ static int rt1015_amp_drv_event(struct snd_soc_dapm_widget *w, case SND_SOC_DAPM_POST_PMU: if (rt1015->hw_config == RT1015_HW_28) schedule_delayed_work(&rt1015->flush_work, msecs_to_jiffies(10)); + msleep(rt1015->pdata.power_up_delay_ms); break; default: break; @@ -1067,9 +1073,16 @@ static struct acpi_device_id rt1015_acpi_match[] = { MODULE_DEVICE_TABLE(acpi, rt1015_acpi_match); #endif +static void rt1015_parse_dt(struct rt1015_priv *rt1015, struct device *dev) +{ + device_property_read_u32(dev, "realtek,power-up-delay-ms", + &rt1015->pdata.power_up_delay_ms); +} + static int rt1015_i2c_probe(struct i2c_client *i2c, const struct i2c_device_id *id) { + struct rt1015_platform_data *pdata = dev_get_platdata(&i2c->dev); struct rt1015_priv *rt1015; int ret; unsigned int val; @@ -1081,6 +1094,13 @@ static int rt1015_i2c_probe(struct i2c_client *i2c, i2c_set_clientdata(i2c, rt1015); + rt1015->pdata = i2s_default_platform_data; + + if (pdata) + rt1015->pdata = *pdata; + else + rt1015_parse_dt(rt1015, &i2c->dev); + rt1015->regmap = devm_regmap_init_i2c(i2c, &rt1015_regmap); if (IS_ERR(rt1015->regmap)) { ret = PTR_ERR(rt1015->regmap); diff --git a/sound/soc/codecs/rt1015.h b/sound/soc/codecs/rt1015.h index d3fdd30aca6d..15cadb361ec3 100644 --- a/sound/soc/codecs/rt1015.h +++ b/sound/soc/codecs/rt1015.h @@ -12,6 +12,7 @@ #ifndef __RT1015_H__ #define __RT1015_H__ +#include #define RT1015_DEVICE_ID_VAL 0x1011 #define RT1015_DEVICE_ID_VAL2 0x1015 @@ -380,6 +381,7 @@ enum { struct rt1015_priv { struct snd_soc_component *component; + struct rt1015_platform_data pdata; struct regmap *regmap; int sysclk; int sysclk_src; From e68e28b4a9d71261e3f8fd05a72d6cf0b443a493 Mon Sep 17 00:00:00 2001 From: Maor Dickman Date: Wed, 30 Sep 2020 16:31:11 +0300 Subject: [PATCH 135/710] net/mlx5e: Fix modify header actions memory leak Modify header actions are allocated during parse tc actions and only freed during the flow creation, however, on error flow the allocated memory is wrongly unfreed. Fix this by calling dealloc_mod_hdr_actions in __mlx5e_add_fdb_flow and mlx5e_add_nic_flow error flow. Fixes: d7e75a325cb2 ("net/mlx5e: Add offloading of E-Switch TC pedit (header re-write) actions") Fixes: 2f4fe4cab073 ("net/mlx5e: Add offloading of NIC TC pedit (header re-write) actions") Signed-off-by: Maor Dickman Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index e3a968e9e2a0..2e2fa0440032 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -4658,6 +4658,7 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv, return flow; err_free: + dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts); mlx5e_flow_put(priv, flow); out: return ERR_PTR(err); @@ -4802,6 +4803,7 @@ mlx5e_add_nic_flow(struct mlx5e_priv *priv, return 0; err_free: + dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts); mlx5e_flow_put(priv, flow); out: return err; From 78c906e430b13d30a8cfbdef4ccbbe1686841a9e Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 31 Aug 2020 16:17:29 +0300 Subject: [PATCH 136/710] net/mlx5e: Protect encap route dev from concurrent release In functions mlx5e_route_lookup_ipv{4|6}() route_dev can be arbitrary net device and not necessary mlx5 eswitch port representor. As such, in order to ensure that route_dev is not destroyed concurrent the code needs either explicitly take reference to the device before releasing reference to rtable instance or ensure that caller holds rtnl lock. First approach is chosen as a fix since rtnl lock dependency was intentionally removed from mlx5 TC layer. To prevent unprotected usage of route_dev in encap code take a reference to the device before releasing rt. Don't save direct pointer to the device in mlx5_encap_entry structure and use ifindex instead. Modify users of route_dev pointer to properly obtain the net device instance from its ifindex. Fixes: 61086f391044 ("net/mlx5e: Protect encap hash table with mutex") Fixes: 6707f74be862 ("net/mlx5e: Update hw flows when encap source mac changed") Signed-off-by: Vlad Buslov Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en/rep/tc.c | 6 +- .../ethernet/mellanox/mlx5/core/en/tc_tun.c | 72 ++++++++++++------- .../net/ethernet/mellanox/mlx5/core/en_rep.h | 2 +- 3 files changed, 52 insertions(+), 28 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c index e36e505d38ad..d29af7b9c695 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c @@ -107,12 +107,16 @@ void mlx5e_rep_update_flows(struct mlx5e_priv *priv, mlx5e_tc_encap_flows_del(priv, e, &flow_list); if (neigh_connected && !(e->flags & MLX5_ENCAP_ENTRY_VALID)) { + struct net_device *route_dev; + ether_addr_copy(e->h_dest, ha); ether_addr_copy(eth->h_dest, ha); /* Update the encap source mac, in case that we delete * the flows when encap source mac changed. */ - ether_addr_copy(eth->h_source, e->route_dev->dev_addr); + route_dev = __dev_get_by_index(dev_net(priv->netdev), e->route_dev_ifindex); + if (route_dev) + ether_addr_copy(eth->h_source, route_dev->dev_addr); mlx5e_tc_encap_flows_add(priv, e, &flow_list); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index 7cce85faa16f..90930e54b6f2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -77,13 +77,13 @@ static int get_route_and_out_devs(struct mlx5e_priv *priv, return 0; } -static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv, - struct net_device *mirred_dev, - struct net_device **out_dev, - struct net_device **route_dev, - struct flowi4 *fl4, - struct neighbour **out_n, - u8 *out_ttl) +static int mlx5e_route_lookup_ipv4_get(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct net_device **out_dev, + struct net_device **route_dev, + struct flowi4 *fl4, + struct neighbour **out_n, + u8 *out_ttl) { struct neighbour *n; struct rtable *rt; @@ -117,18 +117,28 @@ static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv, ip_rt_put(rt); return ret; } + dev_hold(*route_dev); if (!(*out_ttl)) *out_ttl = ip4_dst_hoplimit(&rt->dst); n = dst_neigh_lookup(&rt->dst, &fl4->daddr); ip_rt_put(rt); - if (!n) + if (!n) { + dev_put(*route_dev); return -ENOMEM; + } *out_n = n; return 0; } +static void mlx5e_route_lookup_ipv4_put(struct net_device *route_dev, + struct neighbour *n) +{ + neigh_release(n); + dev_put(route_dev); +} + static const char *mlx5e_netdev_kind(struct net_device *dev) { if (dev->rtnl_link_ops) @@ -193,8 +203,8 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, fl4.saddr = tun_key->u.ipv4.src; ttl = tun_key->ttl; - err = mlx5e_route_lookup_ipv4(priv, mirred_dev, &out_dev, &route_dev, - &fl4, &n, &ttl); + err = mlx5e_route_lookup_ipv4_get(priv, mirred_dev, &out_dev, &route_dev, + &fl4, &n, &ttl); if (err) return err; @@ -223,7 +233,7 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, e->m_neigh.family = n->ops->family; memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len); e->out_dev = out_dev; - e->route_dev = route_dev; + e->route_dev_ifindex = route_dev->ifindex; /* It's important to add the neigh to the hash table before checking * the neigh validity state. So if we'll get a notification, in case the @@ -278,7 +288,7 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, e->flags |= MLX5_ENCAP_ENTRY_VALID; mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev)); - neigh_release(n); + mlx5e_route_lookup_ipv4_put(route_dev, n); return err; destroy_neigh_entry: @@ -286,18 +296,18 @@ destroy_neigh_entry: free_encap: kfree(encap_header); release_neigh: - neigh_release(n); + mlx5e_route_lookup_ipv4_put(route_dev, n); return err; } #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) -static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv, - struct net_device *mirred_dev, - struct net_device **out_dev, - struct net_device **route_dev, - struct flowi6 *fl6, - struct neighbour **out_n, - u8 *out_ttl) +static int mlx5e_route_lookup_ipv6_get(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct net_device **out_dev, + struct net_device **route_dev, + struct flowi6 *fl6, + struct neighbour **out_n, + u8 *out_ttl) { struct dst_entry *dst; struct neighbour *n; @@ -318,15 +328,25 @@ static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv, return ret; } + dev_hold(*route_dev); n = dst_neigh_lookup(dst, &fl6->daddr); dst_release(dst); - if (!n) + if (!n) { + dev_put(*route_dev); return -ENOMEM; + } *out_n = n; return 0; } +static void mlx5e_route_lookup_ipv6_put(struct net_device *route_dev, + struct neighbour *n) +{ + neigh_release(n); + dev_put(route_dev); +} + int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, struct net_device *mirred_dev, struct mlx5e_encap_entry *e) @@ -348,8 +368,8 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, fl6.daddr = tun_key->u.ipv6.dst; fl6.saddr = tun_key->u.ipv6.src; - err = mlx5e_route_lookup_ipv6(priv, mirred_dev, &out_dev, &route_dev, - &fl6, &n, &ttl); + err = mlx5e_route_lookup_ipv6_get(priv, mirred_dev, &out_dev, &route_dev, + &fl6, &n, &ttl); if (err) return err; @@ -378,7 +398,7 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, e->m_neigh.family = n->ops->family; memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len); e->out_dev = out_dev; - e->route_dev = route_dev; + e->route_dev_ifindex = route_dev->ifindex; /* It's importent to add the neigh to the hash table before checking * the neigh validity state. So if we'll get a notification, in case the @@ -433,7 +453,7 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, e->flags |= MLX5_ENCAP_ENTRY_VALID; mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev)); - neigh_release(n); + mlx5e_route_lookup_ipv6_put(route_dev, n); return err; destroy_neigh_entry: @@ -441,7 +461,7 @@ destroy_neigh_entry: free_encap: kfree(encap_header); release_neigh: - neigh_release(n); + mlx5e_route_lookup_ipv6_put(route_dev, n); return err; } #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index 9020d1419bcf..8932c387d46a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -186,7 +186,7 @@ struct mlx5e_encap_entry { unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ struct net_device *out_dev; - struct net_device *route_dev; + int route_dev_ifindex; struct mlx5e_tc_tunnel *tunnel; int reformat_type; u8 flags; From f42139ba49791ab6b12443c60044872705b74a1e Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Thu, 8 Oct 2020 11:34:03 +0300 Subject: [PATCH 137/710] net/mlx5e: Use spin_lock_bh for async_icosq_lock async_icosq_lock may be taken from softirq and non-softirq contexts. It requires protection with spin_lock_bh, otherwise a softirq may be triggered in the middle of the critical section, and it may deadlock if it tries to take the same lock. This patch fixes such a scenario by using spin_lock_bh to disable softirqs on that CPU while inside the critical section. Fixes: 8d94b590f1e4 ("net/mlx5e: Turn XSK ICOSQ into a general asynchronous one") Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en/xsk/setup.c | 4 ++-- .../net/ethernet/mellanox/mlx5/core/en/xsk/tx.c | 4 ++-- .../ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c | 14 +++++++------- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c index 4e574ac73019..be3465ba38ca 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c @@ -122,9 +122,9 @@ void mlx5e_activate_xsk(struct mlx5e_channel *c) set_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state); /* TX queue is created active. */ - spin_lock(&c->async_icosq_lock); + spin_lock_bh(&c->async_icosq_lock); mlx5e_trigger_irq(&c->async_icosq); - spin_unlock(&c->async_icosq_lock); + spin_unlock_bh(&c->async_icosq_lock); } void mlx5e_deactivate_xsk(struct mlx5e_channel *c) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c index fb671a457129..8e96260fce1d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c @@ -36,9 +36,9 @@ int mlx5e_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags) if (test_and_set_bit(MLX5E_SQ_STATE_PENDING_XSK_TX, &c->async_icosq.state)) return 0; - spin_lock(&c->async_icosq_lock); + spin_lock_bh(&c->async_icosq_lock); mlx5e_trigger_irq(&c->async_icosq); - spin_unlock(&c->async_icosq_lock); + spin_unlock_bh(&c->async_icosq_lock); } return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c index ccaccb9fc2f7..7f6221b8b1f7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c @@ -188,7 +188,7 @@ static int post_rx_param_wqes(struct mlx5e_channel *c, err = 0; sq = &c->async_icosq; - spin_lock(&c->async_icosq_lock); + spin_lock_bh(&c->async_icosq_lock); cseg = post_static_params(sq, priv_rx); if (IS_ERR(cseg)) @@ -199,7 +199,7 @@ static int post_rx_param_wqes(struct mlx5e_channel *c, mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, cseg); unlock: - spin_unlock(&c->async_icosq_lock); + spin_unlock_bh(&c->async_icosq_lock); return err; @@ -265,10 +265,10 @@ resync_post_get_progress_params(struct mlx5e_icosq *sq, BUILD_BUG_ON(MLX5E_KTLS_GET_PROGRESS_WQEBBS != 1); - spin_lock(&sq->channel->async_icosq_lock); + spin_lock_bh(&sq->channel->async_icosq_lock); if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, 1))) { - spin_unlock(&sq->channel->async_icosq_lock); + spin_unlock_bh(&sq->channel->async_icosq_lock); err = -ENOSPC; goto err_dma_unmap; } @@ -299,7 +299,7 @@ resync_post_get_progress_params(struct mlx5e_icosq *sq, icosq_fill_wi(sq, pi, &wi); sq->pc++; mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, cseg); - spin_unlock(&sq->channel->async_icosq_lock); + spin_unlock_bh(&sq->channel->async_icosq_lock); return 0; @@ -360,7 +360,7 @@ static int resync_handle_seq_match(struct mlx5e_ktls_offload_context_rx *priv_rx err = 0; sq = &c->async_icosq; - spin_lock(&c->async_icosq_lock); + spin_lock_bh(&c->async_icosq_lock); cseg = post_static_params(sq, priv_rx); if (IS_ERR(cseg)) { @@ -372,7 +372,7 @@ static int resync_handle_seq_match(struct mlx5e_ktls_offload_context_rx *priv_rx mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, cseg); priv_rx->stats->tls_resync_res_ok++; unlock: - spin_unlock(&c->async_icosq_lock); + spin_unlock_bh(&c->async_icosq_lock); return err; } From 465e7baab6d93b399344f5868f84c177ab5cd16f Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Wed, 21 Oct 2020 08:42:49 +0300 Subject: [PATCH 138/710] net/mlx5: Fix deletion of duplicate rules When a rule is duplicated, the refcount of the rule is increased so only the second deletion of the rule should cause destruction of the FTE. Currently, the FTE will be destroyed in the first deletion of rule since the modify_mask will be 0. Fix it and call to destroy FTE only if all the rules (FTE's children) have been removed. Fixes: 718ce4d601db ("net/mlx5: Consolidate update FTE for all removal changes") Signed-off-by: Maor Gottlieb Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 16091838bfcf..325a5b0d6829 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -2010,10 +2010,11 @@ void mlx5_del_flow_rules(struct mlx5_flow_handle *handle) down_write_ref_node(&fte->node, false); for (i = handle->num_rules - 1; i >= 0; i--) tree_remove_node(&handle->rule[i]->node, true); - if (fte->modify_mask && fte->dests_size) { - modify_fte(fte); + if (fte->dests_size) { + if (fte->modify_mask) + modify_fte(fte); up_write_ref_node(&fte->node, false); - } else { + } else if (list_empty(&fte->node.children)) { del_hw_fte(&fte->node); /* Avoid double call to del_hw_fte */ fte->node.del_hw_func = NULL; From ae35859445607f7f18dd4f332749219cd636ed59 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Mon, 2 Nov 2020 12:41:28 +0200 Subject: [PATCH 139/710] net/mlx5: E-switch, Avoid extack error log for disabled vport When E-switch vport is disabled, querying its hardware address is unsupported. Avoid setting extack error log message in such case. Fixes: f099fde16db3 ("net/mlx5: E-switch, Support querying port function mac address") Signed-off-by: Parav Pandit Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 6e6a9a563992..e8e6294c7cca 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1902,8 +1902,6 @@ int mlx5_devlink_port_function_hw_addr_get(struct devlink *devlink, ether_addr_copy(hw_addr, vport->info.mac); *hw_addr_len = ETH_ALEN; err = 0; - } else { - NL_SET_ERR_MSG_MOD(extack, "Eswitch vport is disabled"); } mutex_unlock(&esw->state_lock); return err; From c5eb51adf06b2644fa28d4af886bfdcc53e288da Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Wed, 23 Sep 2020 12:58:44 +0300 Subject: [PATCH 140/710] net/mlx5e: Fix VXLAN synchronization after function reload During driver reload, perform firmware tear-down which results in firmware losing the configured VXLAN ports. These ports are still available in the driver's database. Fix this by cleaning up driver's VXLAN database in the nic unload flow, before firmware tear-down. With that, minimize mlx5_vxlan_destroy() to remove only what was added in mlx5_vxlan_create() and warn on leftover UDP ports. Fixes: 18a2b7f969c9 ("net/mlx5: convert to new udp_tunnel infrastructure") Signed-off-by: Aya Levin Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en_main.c | 1 + .../ethernet/mellanox/mlx5/core/lib/vxlan.c | 23 ++++++++++++++----- .../ethernet/mellanox/mlx5/core/lib/vxlan.h | 2 ++ 3 files changed, 20 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index b3f02aac7f26..ebce97921e03 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -5253,6 +5253,7 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv) mlx5e_disable_async_events(priv); mlx5_lag_remove(mdev); + mlx5_vxlan_reset_to_default(mdev->vxlan); } int mlx5e_update_nic_rx(struct mlx5e_priv *priv) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c index 3315afe2f8dc..38084400ee8f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c @@ -167,6 +167,17 @@ struct mlx5_vxlan *mlx5_vxlan_create(struct mlx5_core_dev *mdev) } void mlx5_vxlan_destroy(struct mlx5_vxlan *vxlan) +{ + if (!mlx5_vxlan_allowed(vxlan)) + return; + + mlx5_vxlan_del_port(vxlan, IANA_VXLAN_UDP_PORT); + WARN_ON(!hash_empty(vxlan->htable)); + + kfree(vxlan); +} + +void mlx5_vxlan_reset_to_default(struct mlx5_vxlan *vxlan) { struct mlx5_vxlan_port *vxlanp; struct hlist_node *tmp; @@ -175,12 +186,12 @@ void mlx5_vxlan_destroy(struct mlx5_vxlan *vxlan) if (!mlx5_vxlan_allowed(vxlan)) return; - /* Lockless since we are the only hash table consumers*/ hash_for_each_safe(vxlan->htable, bkt, tmp, vxlanp, hlist) { - hash_del(&vxlanp->hlist); - mlx5_vxlan_core_del_port_cmd(vxlan->mdev, vxlanp->udp_port); - kfree(vxlanp); + /* Don't delete default UDP port added by the HW. + * Remove only user configured ports + */ + if (vxlanp->udp_port == IANA_VXLAN_UDP_PORT) + continue; + mlx5_vxlan_del_port(vxlan, vxlanp->udp_port); } - - kfree(vxlan); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.h index ec766529f49b..34ef662da35e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.h @@ -56,6 +56,7 @@ void mlx5_vxlan_destroy(struct mlx5_vxlan *vxlan); int mlx5_vxlan_add_port(struct mlx5_vxlan *vxlan, u16 port); int mlx5_vxlan_del_port(struct mlx5_vxlan *vxlan, u16 port); bool mlx5_vxlan_lookup_port(struct mlx5_vxlan *vxlan, u16 port); +void mlx5_vxlan_reset_to_default(struct mlx5_vxlan *vxlan); #else static inline struct mlx5_vxlan* mlx5_vxlan_create(struct mlx5_core_dev *mdev) { return ERR_PTR(-EOPNOTSUPP); } @@ -63,6 +64,7 @@ static inline void mlx5_vxlan_destroy(struct mlx5_vxlan *vxlan) { return; } static inline int mlx5_vxlan_add_port(struct mlx5_vxlan *vxlan, u16 port) { return -EOPNOTSUPP; } static inline int mlx5_vxlan_del_port(struct mlx5_vxlan *vxlan, u16 port) { return -EOPNOTSUPP; } static inline bool mlx5_vxlan_lookup_port(struct mlx5_vxlan *vxlan, u16 port) { return false; } +static inline void mlx5_vxlan_reset_to_default(struct mlx5_vxlan *vxlan) { return; } #endif #endif /* __MLX5_VXLAN_H__ */ From 1a50cf9a67ff2241c2949d30bc11c8dd4280eef8 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Thu, 22 Oct 2020 12:49:51 +0300 Subject: [PATCH 141/710] net/mlx5e: Fix incorrect access of RCU-protected xdp_prog rq->xdp_prog is RCU-protected and should be accessed only with rcu_access_pointer for the NULL check in mlx5e_poll_rx_cq. rq->xdp_prog may change on the fly only from one non-NULL value to another non-NULL value, so the checks in mlx5e_xdp_handle and mlx5e_poll_rx_cq will have the same result during one NAPI cycle, meaning that no additional synchronization is needed. Fixes: fe45386a2082 ("net/mlx5e: Use RCU to protect rq->xdp_prog") Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 599f5b5ebc97..6628a0197b4e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -1584,7 +1584,7 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) } while ((++work_done < budget) && (cqe = mlx5_cqwq_get_cqe(cqwq))); out: - if (rq->xdp_prog) + if (rcu_access_pointer(rq->xdp_prog)) mlx5e_xdp_rx_poll_complete(rq); mlx5_cqwq_update_db_record(cqwq); From 1905cac9d621a10358bc2750f8b25b64df439a21 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 23 Oct 2020 10:41:01 -0400 Subject: [PATCH 142/710] NFSD: NFSv3 PATHCONF Reply is improperly formed Commit cc028a10a48c ("NFSD: Hoist status code encoding into XDR encoder functions") missed a spot. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs3xdr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 9c23b6acf234..2277f83da250 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -1114,6 +1114,7 @@ nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, __be32 *p) { struct nfsd3_pathconfres *resp = rqstp->rq_resp; + *p++ = resp->status; *p++ = xdr_zero; /* no post_op_attr */ if (resp->status == 0) { From d321ff589c16d8c2207485a6d7fbdb14e873d46e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 23 Oct 2020 10:41:07 -0400 Subject: [PATCH 143/710] SUNRPC: Fix general protection fault in trace_rpc_xdr_overflow() The TP_fast_assign() section is careful enough not to dereference xdr->rqst if it's NULL. The TP_STRUCT__entry section is not. Fixes: 5582863f450c ("SUNRPC: Add XDR overflow trace event") Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/trace/events/sunrpc.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index f45b3c01370c..2477014e3fa6 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -655,10 +655,10 @@ TRACE_EVENT(rpc_xdr_overflow, __field(size_t, tail_len) __field(unsigned int, page_len) __field(unsigned int, len) - __string(progname, - xdr->rqst->rq_task->tk_client->cl_program->name) - __string(procedure, - xdr->rqst->rq_task->tk_msg.rpc_proc->p_name) + __string(progname, xdr->rqst ? + xdr->rqst->rq_task->tk_client->cl_program->name : "unknown") + __string(procedure, xdr->rqst ? + xdr->rqst->rq_task->tk_msg.rpc_proc->p_name : "unknown") ), TP_fast_assign( From 66d60e3ad1e44d42d940767f62bf265f107fb628 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 23 Oct 2020 10:41:12 -0400 Subject: [PATCH 144/710] NFSD: MKNOD should return NFSERR_BADTYPE instead of NFSERR_INVAL A late paragraph of RFC 1813 Section 3.3.11 states: | ... if the server does not support the target type or the | target type is illegal, the error, NFS3ERR_BADTYPE, should | be returned. Note that NF3REG, NF3DIR, and NF3LNK are | illegal types for MKNOD. The Linux NFS server incorrectly returns NFSERR_INVAL in these cases. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs3proc.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 14468613d150..a633044b0dc1 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -316,10 +316,6 @@ nfsd3_proc_mknod(struct svc_rqst *rqstp) fh_copy(&resp->dirfh, &argp->fh); fh_init(&resp->fh, NFS3_FHSIZE); - if (argp->ftype == 0 || argp->ftype >= NF3BAD) { - resp->status = nfserr_inval; - goto out; - } if (argp->ftype == NF3CHR || argp->ftype == NF3BLK) { rdev = MKDEV(argp->major, argp->minor); if (MAJOR(rdev) != argp->major || @@ -328,7 +324,7 @@ nfsd3_proc_mknod(struct svc_rqst *rqstp) goto out; } } else if (argp->ftype != NF3SOCK && argp->ftype != NF3FIFO) { - resp->status = nfserr_inval; + resp->status = nfserr_badtype; goto out; } From 36e1e5ba90fb3fba6888fae26e4dfc28bf70aaf1 Mon Sep 17 00:00:00 2001 From: Dai Ngo Date: Thu, 29 Oct 2020 15:07:15 -0400 Subject: [PATCH 145/710] NFSD: Fix use-after-free warning when doing inter-server copy The source file nfsd_file is not constructed the same as other nfsd_file's via nfsd_file_alloc. nfsd_file_put should not be called to free the object; nfsd_file_put is not the inverse of kzalloc, instead kfree is called by nfsd4_do_async_copy when done. Fixes: ce0887ac96d3 ("NFSD add nfs4 inter ssc to nfsd4_copy") Signed-off-by: Dai Ngo Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index ad2fa1a8e7ad..9c43cad7e408 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1299,7 +1299,7 @@ nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct nfsd_file *src, struct nfsd_file *dst) { nfs42_ssc_close(src->nf_file); - nfsd_file_put(src); + /* 'src' is freed by nfsd4_do_async_copy */ nfsd_file_put(dst); mntput(ss_mnt); } From 49a361327332c9221438397059067f9b205f690d Mon Sep 17 00:00:00 2001 From: Dai Ngo Date: Thu, 29 Oct 2020 15:07:16 -0400 Subject: [PATCH 146/710] NFSD: fix missing refcount in nfsd4_copy by nfsd4_do_async_copy Need to initialize nfsd4_copy's refcount to 1 to avoid use-after-free warning when nfs4_put_copy is called from nfsd4_cb_offload_release. Fixes: ce0887ac96d3 ("NFSD add nfs4 inter ssc to nfsd4_copy") Signed-off-by: Dai Ngo Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 9c43cad7e408..e83b21778816 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1486,6 +1486,7 @@ do_callback: cb_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL); if (!cb_copy) goto out; + refcount_set(&cb_copy->refcount, 1); memcpy(&cb_copy->cp_res, ©->cp_res, sizeof(copy->cp_res)); cb_copy->cp_clp = copy->cp_clp; cb_copy->nfserr = copy->nfserr; From a422490a595600659664901b609aacccdbba4a5f Mon Sep 17 00:00:00 2001 From: Bhawanpreet Lakha Date: Fri, 16 Oct 2020 14:57:23 -0400 Subject: [PATCH 147/710] drm/amd/display: Add missing pflip irq If we have more than 4 displays we will run into dummy irq calls or flip timout issues. Signed-off-by: Bhawanpreet Lakha Reviewed-by: Charlene Liu Acked-by: Qingqing Zhuo Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 5.9.x --- drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c b/drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c index 49689f71f4f1..0effbb2bd74a 100644 --- a/drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c @@ -306,8 +306,8 @@ irq_source_info_dcn30[DAL_IRQ_SOURCES_NUMBER] = { pflip_int_entry(1), pflip_int_entry(2), pflip_int_entry(3), - [DC_IRQ_SOURCE_PFLIP5] = dummy_irq_entry(), - [DC_IRQ_SOURCE_PFLIP6] = dummy_irq_entry(), + pflip_int_entry(4), + pflip_int_entry(5), [DC_IRQ_SOURCE_PFLIP_UNDERLAY0] = dummy_irq_entry(), gpio_pad_int_entry(0), gpio_pad_int_entry(1), From f9b7ff0d7f7a466a920424246e7ddc2b84c87e52 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Thu, 5 Nov 2020 11:52:30 +0000 Subject: [PATCH 148/710] tools/bpftool: Fix attaching flow dissector My earlier patch to reject non-zero arguments to flow dissector attach broke attaching via bpftool. Instead of 0 it uses -1 for target_fd. Fix this by passing a zero argument when attaching the flow dissector. Fixes: 1b514239e859 ("bpf: flow_dissector: Check value of unused flags to BPF_PROG_ATTACH") Reported-by: Jiri Benc Signed-off-by: Lorenz Bauer Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20201105115230.296657-1-lmb@cloudflare.com --- tools/bpf/bpftool/prog.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index d942c1e3372c..acdb2c245f0a 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -940,7 +940,7 @@ static int parse_attach_detach_args(int argc, char **argv, int *progfd, } if (*attach_type == BPF_FLOW_DISSECTOR) { - *mapfd = -1; + *mapfd = 0; return 0; } From 7c0afcad7507636529e6a5a2a5eef5482619a449 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 5 Nov 2020 11:51:09 -0800 Subject: [PATCH 149/710] bpf: BPF_PRELOAD depends on BPF_SYSCALL Fix build error when BPF_SYSCALL is not set/enabled but BPF_PRELOAD is by making BPF_PRELOAD depend on BPF_SYSCALL. ERROR: modpost: "bpf_preload_ops" [kernel/bpf/preload/bpf_preload.ko] undefined! Reported-by: kernel test robot Reported-by: Randy Dunlap Signed-off-by: Randy Dunlap Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20201105195109.26232-1-rdunlap@infradead.org --- kernel/bpf/preload/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/bpf/preload/Kconfig b/kernel/bpf/preload/Kconfig index ace49111d3a3..26bced262473 100644 --- a/kernel/bpf/preload/Kconfig +++ b/kernel/bpf/preload/Kconfig @@ -6,6 +6,7 @@ config USERMODE_DRIVER menuconfig BPF_PRELOAD bool "Preload BPF file system with kernel specific program and map iterators" depends on BPF + depends on BPF_SYSCALL # The dependency on !COMPILE_TEST prevents it from being enabled # in allmodconfig or allyesconfig configurations depends on !COMPILE_TEST From d3bec0138bfbe58606fc1d6f57a4cdc1a20218db Mon Sep 17 00:00:00 2001 From: David Verbeiren Date: Wed, 4 Nov 2020 12:23:32 +0100 Subject: [PATCH 150/710] bpf: Zero-fill re-used per-cpu map element Zero-fill element values for all other cpus than current, just as when not using prealloc. This is the only way the bpf program can ensure known initial values for all cpus ('onallcpus' cannot be set when coming from the bpf program). The scenario is: bpf program inserts some elements in a per-cpu map, then deletes some (or userspace does). When later adding new elements using bpf_map_update_elem(), the bpf program can only set the value of the new elements for the current cpu. When prealloc is enabled, previously deleted elements are re-used. Without the fix, values for other cpus remain whatever they were when the re-used entry was previously freed. A selftest is added to validate correct operation in above scenario as well as in case of LRU per-cpu map element re-use. Fixes: 6c9059817432 ("bpf: pre-allocate hash map elements") Signed-off-by: David Verbeiren Signed-off-by: Alexei Starovoitov Acked-by: Matthieu Baerts Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201104112332.15191-1-david.verbeiren@tessares.net --- kernel/bpf/hashtab.c | 30 ++- .../selftests/bpf/prog_tests/map_init.c | 214 ++++++++++++++++++ .../selftests/bpf/progs/test_map_init.c | 33 +++ 3 files changed, 275 insertions(+), 2 deletions(-) create mode 100644 tools/testing/selftests/bpf/prog_tests/map_init.c create mode 100644 tools/testing/selftests/bpf/progs/test_map_init.c diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 1815e97d4c9c..1fccba6e88c4 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -821,6 +821,32 @@ static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr, } } +static void pcpu_init_value(struct bpf_htab *htab, void __percpu *pptr, + void *value, bool onallcpus) +{ + /* When using prealloc and not setting the initial value on all cpus, + * zero-fill element values for other cpus (just as what happens when + * not using prealloc). Otherwise, bpf program has no way to ensure + * known initial values for cpus other than current one + * (onallcpus=false always when coming from bpf prog). + */ + if (htab_is_prealloc(htab) && !onallcpus) { + u32 size = round_up(htab->map.value_size, 8); + int current_cpu = raw_smp_processor_id(); + int cpu; + + for_each_possible_cpu(cpu) { + if (cpu == current_cpu) + bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value, + size); + else + memset(per_cpu_ptr(pptr, cpu), 0, size); + } + } else { + pcpu_copy_value(htab, pptr, value, onallcpus); + } +} + static bool fd_htab_map_needs_adjust(const struct bpf_htab *htab) { return htab->map.map_type == BPF_MAP_TYPE_HASH_OF_MAPS && @@ -891,7 +917,7 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, } } - pcpu_copy_value(htab, pptr, value, onallcpus); + pcpu_init_value(htab, pptr, value, onallcpus); if (!prealloc) htab_elem_set_ptr(l_new, key_size, pptr); @@ -1183,7 +1209,7 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key, pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size), value, onallcpus); } else { - pcpu_copy_value(htab, htab_elem_get_ptr(l_new, key_size), + pcpu_init_value(htab, htab_elem_get_ptr(l_new, key_size), value, onallcpus); hlist_nulls_add_head_rcu(&l_new->hash_node, head); l_new = NULL; diff --git a/tools/testing/selftests/bpf/prog_tests/map_init.c b/tools/testing/selftests/bpf/prog_tests/map_init.c new file mode 100644 index 000000000000..14a31109dd0e --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/map_init.c @@ -0,0 +1,214 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2020 Tessares SA */ + +#include +#include "test_map_init.skel.h" + +#define TEST_VALUE 0x1234 +#define FILL_VALUE 0xdeadbeef + +static int nr_cpus; +static int duration; + +typedef unsigned long long map_key_t; +typedef unsigned long long map_value_t; +typedef struct { + map_value_t v; /* padding */ +} __bpf_percpu_val_align pcpu_map_value_t; + + +static int map_populate(int map_fd, int num) +{ + pcpu_map_value_t value[nr_cpus]; + int i, err; + map_key_t key; + + for (i = 0; i < nr_cpus; i++) + bpf_percpu(value, i) = FILL_VALUE; + + for (key = 1; key <= num; key++) { + err = bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem")) + return -1; + } + + return 0; +} + +static struct test_map_init *setup(enum bpf_map_type map_type, int map_sz, + int *map_fd, int populate) +{ + struct test_map_init *skel; + int err; + + skel = test_map_init__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return NULL; + + err = bpf_map__set_type(skel->maps.hashmap1, map_type); + if (!ASSERT_OK(err, "bpf_map__set_type")) + goto error; + + err = bpf_map__set_max_entries(skel->maps.hashmap1, map_sz); + if (!ASSERT_OK(err, "bpf_map__set_max_entries")) + goto error; + + err = test_map_init__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto error; + + *map_fd = bpf_map__fd(skel->maps.hashmap1); + if (CHECK(*map_fd < 0, "bpf_map__fd", "failed\n")) + goto error; + + err = map_populate(*map_fd, populate); + if (!ASSERT_OK(err, "map_populate")) + goto error_map; + + return skel; + +error_map: + close(*map_fd); +error: + test_map_init__destroy(skel); + return NULL; +} + +/* executes bpf program that updates map with key, value */ +static int prog_run_insert_elem(struct test_map_init *skel, map_key_t key, + map_value_t value) +{ + struct test_map_init__bss *bss; + + bss = skel->bss; + + bss->inKey = key; + bss->inValue = value; + bss->inPid = getpid(); + + if (!ASSERT_OK(test_map_init__attach(skel), "skel_attach")) + return -1; + + /* Let tracepoint trigger */ + syscall(__NR_getpgid); + + test_map_init__detach(skel); + + return 0; +} + +static int check_values_one_cpu(pcpu_map_value_t *value, map_value_t expected) +{ + int i, nzCnt = 0; + map_value_t val; + + for (i = 0; i < nr_cpus; i++) { + val = bpf_percpu(value, i); + if (val) { + if (CHECK(val != expected, "map value", + "unexpected for cpu %d: 0x%llx\n", i, val)) + return -1; + nzCnt++; + } + } + + if (CHECK(nzCnt != 1, "map value", "set for %d CPUs instead of 1!\n", + nzCnt)) + return -1; + + return 0; +} + +/* Add key=1 elem with values set for all CPUs + * Delete elem key=1 + * Run bpf prog that inserts new key=1 elem with value=0x1234 + * (bpf prog can only set value for current CPU) + * Lookup Key=1 and check value is as expected for all CPUs: + * value set by bpf prog for one CPU, 0 for all others + */ +static void test_pcpu_map_init(void) +{ + pcpu_map_value_t value[nr_cpus]; + struct test_map_init *skel; + int map_fd, err; + map_key_t key; + + /* max 1 elem in map so insertion is forced to reuse freed entry */ + skel = setup(BPF_MAP_TYPE_PERCPU_HASH, 1, &map_fd, 1); + if (!ASSERT_OK_PTR(skel, "prog_setup")) + return; + + /* delete element so the entry can be re-used*/ + key = 1; + err = bpf_map_delete_elem(map_fd, &key); + if (!ASSERT_OK(err, "bpf_map_delete_elem")) + goto cleanup; + + /* run bpf prog that inserts new elem, re-using the slot just freed */ + err = prog_run_insert_elem(skel, key, TEST_VALUE); + if (!ASSERT_OK(err, "prog_run_insert_elem")) + goto cleanup; + + /* check that key=1 was re-created by bpf prog */ + err = bpf_map_lookup_elem(map_fd, &key, value); + if (!ASSERT_OK(err, "bpf_map_lookup_elem")) + goto cleanup; + + /* and has expected values */ + check_values_one_cpu(value, TEST_VALUE); + +cleanup: + test_map_init__destroy(skel); +} + +/* Add key=1 and key=2 elems with values set for all CPUs + * Run bpf prog that inserts new key=3 elem + * (only for current cpu; other cpus should have initial value = 0) + * Lookup Key=1 and check value is as expected for all CPUs + */ +static void test_pcpu_lru_map_init(void) +{ + pcpu_map_value_t value[nr_cpus]; + struct test_map_init *skel; + int map_fd, err; + map_key_t key; + + /* Set up LRU map with 2 elements, values filled for all CPUs. + * With these 2 elements, the LRU map is full + */ + skel = setup(BPF_MAP_TYPE_LRU_PERCPU_HASH, 2, &map_fd, 2); + if (!ASSERT_OK_PTR(skel, "prog_setup")) + return; + + /* run bpf prog that inserts new key=3 element, re-using LRU slot */ + key = 3; + err = prog_run_insert_elem(skel, key, TEST_VALUE); + if (!ASSERT_OK(err, "prog_run_insert_elem")) + goto cleanup; + + /* check that key=3 replaced one of earlier elements */ + err = bpf_map_lookup_elem(map_fd, &key, value); + if (!ASSERT_OK(err, "bpf_map_lookup_elem")) + goto cleanup; + + /* and has expected values */ + check_values_one_cpu(value, TEST_VALUE); + +cleanup: + test_map_init__destroy(skel); +} + +void test_map_init(void) +{ + nr_cpus = bpf_num_possible_cpus(); + if (nr_cpus <= 1) { + printf("%s:SKIP: >1 cpu needed for this test\n", __func__); + test__skip(); + return; + } + + if (test__start_subtest("pcpu_map_init")) + test_pcpu_map_init(); + if (test__start_subtest("pcpu_lru_map_init")) + test_pcpu_lru_map_init(); +} diff --git a/tools/testing/selftests/bpf/progs/test_map_init.c b/tools/testing/selftests/bpf/progs/test_map_init.c new file mode 100644 index 000000000000..c89d28ead673 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_map_init.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Tessares SA */ + +#include "vmlinux.h" +#include + +__u64 inKey = 0; +__u64 inValue = 0; +__u32 inPid = 0; + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_HASH); + __uint(max_entries, 2); + __type(key, __u64); + __type(value, __u64); +} hashmap1 SEC(".maps"); + + +SEC("tp/syscalls/sys_enter_getpgid") +int sysenter_getpgid(const void *ctx) +{ + /* Just do it for once, when called from our own test prog. This + * ensures the map value is only updated for a single CPU. + */ + int cur_pid = bpf_get_current_pid_tgid() >> 32; + + if (cur_pid == inPid) + bpf_map_update_elem(&hashmap1, &inKey, &inValue, BPF_NOEXIST); + + return 0; +} + +char _license[] SEC("license") = "GPL"; From e2b2e4386cb7a5e935dff388cf8961317daf39ce Mon Sep 17 00:00:00 2001 From: Sergio Paracuellos Date: Mon, 2 Nov 2020 21:25:15 +0100 Subject: [PATCH 151/710] staging: mt7621-pci: avoid to request pci bus resources After upgrading kernel to version 5.9.x the driver was not working anymore showing the following kernel trace: ... mt7621-pci 1e140000.pcie: resource collision: [mem 0x60000000-0x6fffffff] conflicts with pcie@1e140000 [mem 0x60000000-0x6fffffff] ------------[ cut here ]------------ WARNING: CPU: 2 PID: 73 at kernel/resource.c:1400 devm_request_resource+0xfc/0x10c Modules linked in: CPU: 2 PID: 73 Comm: kworker/2:1 Not tainted 5.9.2 #0 Workqueue: events deferred_probe_work_func Stack : 00000000 81590000 807d0a1c 808a0000 8fd49080 807d0000 00000009 808ac820 00000001 808338d0 7fff0001 800839dc 00000049 00000001 8fe51b00 367204ab 00000000 00000000 807d0a1c 807c0000 00000001 80082358 8fe50000 00559000 00000000 8fe519f1 ffffffff 00000005 00000000 00000001 00000000 807d0000 00000009 808ac820 00000001 808338d0 00000001 803bf1b0 00000008 81390008 Call Trace: [<8000d018>] show_stack+0x30/0x100 [<8032e66c>] dump_stack+0xa4/0xd4 [<8002db1c>] __warn+0xc0/0x134 [<8002dbec>] warn_slowpath_fmt+0x5c/0xac [<80033b34>] devm_request_resource+0xfc/0x10c [<80365ff8>] devm_request_pci_bus_resources+0x58/0xdc [<8048e13c>] mt7621_pci_probe+0x8dc/0xe48 [<803d2140>] platform_drv_probe+0x40/0x94 [<803cfd94>] really_probe+0x108/0x4ec [<803cd958>] bus_for_each_drv+0x70/0xb0 [<803d0388>] __device_attach+0xec/0x164 [<803cec8c>] bus_probe_device+0xa4/0xc0 [<803cf1c4>] deferred_probe_work_func+0x80/0xc4 [<80048444>] process_one_work+0x260/0x510 [<80048a4c>] worker_thread+0x358/0x5cc [<8004f7d0>] kthread+0x134/0x13c [<80007478>] ret_from_kernel_thread+0x14/0x1c ---[ end trace a9dd2e37537510d3 ]--- mt7621-pci 1e140000.pcie: Error requesting resources mt7621-pci: probe of 1e140000.pcie failed with error -16 ... With commit 669cbc708122 ("PCI: Move DT resource setup into devm_pci_alloc_host_bridge()"), the DT 'ranges' is parsed and populated into resources when the host bridge is allocated. The resources are requested as well, but that happens a 2nd time for this driver in mt7621_pcie_request_resources(). Hence we should avoid this second request. Also, the bus ranges was also populated by default, so we can remove it from mt7621_pcie_request_resources() to avoid the following trace if we don't avoid it: pci_bus 0000:00: busn_res: can not insert [bus 00-ff] under domain [bus 00-ff] (conflicts with (null) [bus 00-ff]) Function 'mt7621_pcie_request_resources' has been renamed into 'mt7621_pcie_add_resources' which now is a more accurate name for this function. Cc: stable@vger.kernel.org #5.9.x- Signed-off-by: Sergio Paracuellos Link: https://lore.kernel.org/r/20201102202515.19073-1-sergio.paracuellos@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/mt7621-pci/pci-mt7621.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/drivers/staging/mt7621-pci/pci-mt7621.c b/drivers/staging/mt7621-pci/pci-mt7621.c index f961b353c22e..8831db383fad 100644 --- a/drivers/staging/mt7621-pci/pci-mt7621.c +++ b/drivers/staging/mt7621-pci/pci-mt7621.c @@ -653,16 +653,11 @@ static int mt7621_pcie_init_virtual_bridges(struct mt7621_pcie *pcie) return 0; } -static int mt7621_pcie_request_resources(struct mt7621_pcie *pcie, - struct list_head *res) +static void mt7621_pcie_add_resources(struct mt7621_pcie *pcie, + struct list_head *res) { - struct device *dev = pcie->dev; - pci_add_resource_offset(res, &pcie->io, pcie->offset.io); pci_add_resource_offset(res, &pcie->mem, pcie->offset.mem); - pci_add_resource(res, &pcie->busn); - - return devm_request_pci_bus_resources(dev, res); } static int mt7621_pcie_register_host(struct pci_host_bridge *host, @@ -738,11 +733,7 @@ static int mt7621_pci_probe(struct platform_device *pdev) setup_cm_memory_region(pcie); - err = mt7621_pcie_request_resources(pcie, &res); - if (err) { - dev_err(dev, "Error requesting resources\n"); - return err; - } + mt7621_pcie_add_resources(pcie, &res); err = mt7621_pcie_register_host(bridge, &res); if (err) { From 06ea594051707c6b8834ef5b24e9b0730edd391b Mon Sep 17 00:00:00 2001 From: Necip Fazil Yildiran Date: Wed, 4 Nov 2020 21:15:23 +0300 Subject: [PATCH 152/710] staging: ralink-gdma: fix kconfig dependency bug for DMA_RALINK When DMA_RALINK is enabled and DMADEVICES is disabled, it results in the following Kbuild warnings: WARNING: unmet direct dependencies detected for DMA_ENGINE Depends on [n]: DMADEVICES [=n] Selected by [y]: - DMA_RALINK [=y] && STAGING [=y] && RALINK [=y] && !SOC_RT288X [=n] WARNING: unmet direct dependencies detected for DMA_VIRTUAL_CHANNELS Depends on [n]: DMADEVICES [=n] Selected by [y]: - DMA_RALINK [=y] && STAGING [=y] && RALINK [=y] && !SOC_RT288X [=n] The reason is that DMA_RALINK selects DMA_ENGINE and DMA_VIRTUAL_CHANNELS without depending on or selecting DMADEVICES while DMA_ENGINE and DMA_VIRTUAL_CHANNELS are subordinate to DMADEVICES. This can also fail building the kernel as demonstrated in a bug report. Honor the kconfig dependency to remove unmet direct dependency warnings and avoid any potential build failures. Link: https://bugzilla.kernel.org/show_bug.cgi?id=210055 Signed-off-by: Necip Fazil Yildiran Link: https://lore.kernel.org/r/20201104181522.43567-1-fazilyildiran@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/ralink-gdma/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/ralink-gdma/Kconfig b/drivers/staging/ralink-gdma/Kconfig index 54e8029e6b1a..0017376234e2 100644 --- a/drivers/staging/ralink-gdma/Kconfig +++ b/drivers/staging/ralink-gdma/Kconfig @@ -2,6 +2,7 @@ config DMA_RALINK tristate "RALINK DMA support" depends on RALINK && !SOC_RT288X + depends on DMADEVICES select DMA_ENGINE select DMA_VIRTUAL_CHANNELS From f6439c531d52193f890807958aaec52905bc0f2e Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Sat, 28 Mar 2020 11:59:11 +0200 Subject: [PATCH 153/710] thunderbolt: Add support for Intel Tiger Lake-H Intel Tiger Lake-H has the same Thunderbolt/USB4 controller as Tiger Lake-LP. Add the Tiger Lake-H PCI IDs to the driver list of supported devices. Signed-off-by: Mika Westerberg --- drivers/thunderbolt/icm.c | 2 ++ drivers/thunderbolt/nhi.c | 4 ++++ drivers/thunderbolt/nhi.h | 2 ++ drivers/thunderbolt/tb.h | 2 ++ 4 files changed, 10 insertions(+) diff --git a/drivers/thunderbolt/icm.c b/drivers/thunderbolt/icm.c index b51fc3f62b1f..977ba91f4d0e 100644 --- a/drivers/thunderbolt/icm.c +++ b/drivers/thunderbolt/icm.c @@ -2284,6 +2284,8 @@ struct tb *icm_probe(struct tb_nhi *nhi) case PCI_DEVICE_ID_INTEL_TGL_NHI0: case PCI_DEVICE_ID_INTEL_TGL_NHI1: + case PCI_DEVICE_ID_INTEL_TGL_H_NHI0: + case PCI_DEVICE_ID_INTEL_TGL_H_NHI1: icm->is_supported = icm_tgl_is_supported; icm->driver_ready = icm_icl_driver_ready; icm->set_uuid = icm_icl_set_uuid; diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c index e29ac3e3aa1e..db80dc5dfeba 100644 --- a/drivers/thunderbolt/nhi.c +++ b/drivers/thunderbolt/nhi.c @@ -1345,6 +1345,10 @@ static struct pci_device_id nhi_ids[] = { .driver_data = (kernel_ulong_t)&icl_nhi_ops }, { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_TGL_NHI1), .driver_data = (kernel_ulong_t)&icl_nhi_ops }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_TGL_H_NHI0), + .driver_data = (kernel_ulong_t)&icl_nhi_ops }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_TGL_H_NHI1), + .driver_data = (kernel_ulong_t)&icl_nhi_ops }, /* Any USB4 compliant host */ { PCI_DEVICE_CLASS(PCI_CLASS_SERIAL_USB_USB4, ~0) }, diff --git a/drivers/thunderbolt/nhi.h b/drivers/thunderbolt/nhi.h index 80162e4b013f..4e0861d75072 100644 --- a/drivers/thunderbolt/nhi.h +++ b/drivers/thunderbolt/nhi.h @@ -75,6 +75,8 @@ extern const struct tb_nhi_ops icl_nhi_ops; #define PCI_DEVICE_ID_INTEL_ICL_NHI0 0x8a17 #define PCI_DEVICE_ID_INTEL_TGL_NHI0 0x9a1b #define PCI_DEVICE_ID_INTEL_TGL_NHI1 0x9a1d +#define PCI_DEVICE_ID_INTEL_TGL_H_NHI0 0x9a1f +#define PCI_DEVICE_ID_INTEL_TGL_H_NHI1 0x9a21 #define PCI_CLASS_SERIAL_USB_USB4 0x0c0340 diff --git a/drivers/thunderbolt/tb.h b/drivers/thunderbolt/tb.h index a9995e21b916..8ea360b0ff77 100644 --- a/drivers/thunderbolt/tb.h +++ b/drivers/thunderbolt/tb.h @@ -784,6 +784,8 @@ static inline bool tb_switch_is_tiger_lake(const struct tb_switch *sw) switch (sw->config.device_id) { case PCI_DEVICE_ID_INTEL_TGL_NHI0: case PCI_DEVICE_ID_INTEL_TGL_NHI1: + case PCI_DEVICE_ID_INTEL_TGL_H_NHI0: + case PCI_DEVICE_ID_INTEL_TGL_H_NHI1: return true; } } From d8f270efeac850c569c305dc0baa42ac3d607988 Mon Sep 17 00:00:00 2001 From: Arnaud de Turckheim Date: Wed, 4 Nov 2020 16:24:53 +0100 Subject: [PATCH 154/710] gpio: pcie-idio-24: Fix irq mask when masking Fix the bitwise operation to remove only the corresponding bit from the mask. Fixes: 585562046628 ("gpio: Add GPIO support for the ACCES PCIe-IDIO-24 family") Cc: stable@vger.kernel.org Signed-off-by: Arnaud de Turckheim Reviewed-by: William Breathitt Gray Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-pcie-idio-24.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-pcie-idio-24.c b/drivers/gpio/gpio-pcie-idio-24.c index a68941d19ac6..5ea517416366 100644 --- a/drivers/gpio/gpio-pcie-idio-24.c +++ b/drivers/gpio/gpio-pcie-idio-24.c @@ -339,7 +339,7 @@ static void idio_24_irq_mask(struct irq_data *data) raw_spin_lock_irqsave(&idio24gpio->lock, flags); - idio24gpio->irq_mask &= BIT(bit_offset); + idio24gpio->irq_mask &= ~BIT(bit_offset); new_irq_mask = idio24gpio->irq_mask >> bank_offset; if (!new_irq_mask) { From 23a7fdc06ebcc334fa667f0550676b035510b70b Mon Sep 17 00:00:00 2001 From: Arnaud de Turckheim Date: Wed, 4 Nov 2020 16:24:54 +0100 Subject: [PATCH 155/710] gpio: pcie-idio-24: Fix IRQ Enable Register value This fixes the COS Enable Register value for enabling/disabling the corresponding IRQs bank. Fixes: 585562046628 ("gpio: Add GPIO support for the ACCES PCIe-IDIO-24 family") Cc: stable@vger.kernel.org Signed-off-by: Arnaud de Turckheim Reviewed-by: William Breathitt Gray Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-pcie-idio-24.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpio/gpio-pcie-idio-24.c b/drivers/gpio/gpio-pcie-idio-24.c index 5ea517416366..a61de14d09b6 100644 --- a/drivers/gpio/gpio-pcie-idio-24.c +++ b/drivers/gpio/gpio-pcie-idio-24.c @@ -334,13 +334,13 @@ static void idio_24_irq_mask(struct irq_data *data) unsigned long flags; const unsigned long bit_offset = irqd_to_hwirq(data) - 24; unsigned char new_irq_mask; - const unsigned long bank_offset = bit_offset/8 * 8; + const unsigned long bank_offset = bit_offset / 8; unsigned char cos_enable_state; raw_spin_lock_irqsave(&idio24gpio->lock, flags); idio24gpio->irq_mask &= ~BIT(bit_offset); - new_irq_mask = idio24gpio->irq_mask >> bank_offset; + new_irq_mask = idio24gpio->irq_mask >> bank_offset * 8; if (!new_irq_mask) { cos_enable_state = ioread8(&idio24gpio->reg->cos_enable); @@ -363,12 +363,12 @@ static void idio_24_irq_unmask(struct irq_data *data) unsigned long flags; unsigned char prev_irq_mask; const unsigned long bit_offset = irqd_to_hwirq(data) - 24; - const unsigned long bank_offset = bit_offset/8 * 8; + const unsigned long bank_offset = bit_offset / 8; unsigned char cos_enable_state; raw_spin_lock_irqsave(&idio24gpio->lock, flags); - prev_irq_mask = idio24gpio->irq_mask >> bank_offset; + prev_irq_mask = idio24gpio->irq_mask >> bank_offset * 8; idio24gpio->irq_mask |= BIT(bit_offset); if (!prev_irq_mask) { From 10a2f11d3c9e48363c729419e0f0530dea76e4fe Mon Sep 17 00:00:00 2001 From: Arnaud de Turckheim Date: Wed, 4 Nov 2020 16:24:55 +0100 Subject: [PATCH 156/710] gpio: pcie-idio-24: Enable PEX8311 interrupts This enables the PEX8311 internal PCI wire interrupt and the PEX8311 local interrupt input so the local interrupts are forwarded to the PCI. Fixes: 585562046628 ("gpio: Add GPIO support for the ACCES PCIe-IDIO-24 family") Cc: stable@vger.kernel.org Signed-off-by: Arnaud de Turckheim Reviewed-by: William Breathitt Gray Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-pcie-idio-24.c | 52 +++++++++++++++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-pcie-idio-24.c b/drivers/gpio/gpio-pcie-idio-24.c index a61de14d09b6..2a07fd96707e 100644 --- a/drivers/gpio/gpio-pcie-idio-24.c +++ b/drivers/gpio/gpio-pcie-idio-24.c @@ -28,6 +28,47 @@ #include #include +/* + * PLX PEX8311 PCI LCS_INTCSR Interrupt Control/Status + * + * Bit: Description + * 0: Enable Interrupt Sources (Bit 0) + * 1: Enable Interrupt Sources (Bit 1) + * 2: Generate Internal PCI Bus Internal SERR# Interrupt + * 3: Mailbox Interrupt Enable + * 4: Power Management Interrupt Enable + * 5: Power Management Interrupt + * 6: Slave Read Local Data Parity Check Error Enable + * 7: Slave Read Local Data Parity Check Error Status + * 8: Internal PCI Wire Interrupt Enable + * 9: PCI Express Doorbell Interrupt Enable + * 10: PCI Abort Interrupt Enable + * 11: Local Interrupt Input Enable + * 12: Retry Abort Enable + * 13: PCI Express Doorbell Interrupt Active + * 14: PCI Abort Interrupt Active + * 15: Local Interrupt Input Active + * 16: Local Interrupt Output Enable + * 17: Local Doorbell Interrupt Enable + * 18: DMA Channel 0 Interrupt Enable + * 19: DMA Channel 1 Interrupt Enable + * 20: Local Doorbell Interrupt Active + * 21: DMA Channel 0 Interrupt Active + * 22: DMA Channel 1 Interrupt Active + * 23: Built-In Self-Test (BIST) Interrupt Active + * 24: Direct Master was the Bus Master during a Master or Target Abort + * 25: DMA Channel 0 was the Bus Master during a Master or Target Abort + * 26: DMA Channel 1 was the Bus Master during a Master or Target Abort + * 27: Target Abort after internal 256 consecutive Master Retrys + * 28: PCI Bus wrote data to LCS_MBOX0 + * 29: PCI Bus wrote data to LCS_MBOX1 + * 30: PCI Bus wrote data to LCS_MBOX2 + * 31: PCI Bus wrote data to LCS_MBOX3 + */ +#define PLX_PEX8311_PCI_LCS_INTCSR 0x68 +#define INTCSR_INTERNAL_PCI_WIRE BIT(8) +#define INTCSR_LOCAL_INPUT BIT(11) + /** * struct idio_24_gpio_reg - GPIO device registers structure * @out0_7: Read: FET Outputs 0-7 @@ -92,6 +133,7 @@ struct idio_24_gpio_reg { struct idio_24_gpio { struct gpio_chip chip; raw_spinlock_t lock; + __u8 __iomem *plx; struct idio_24_gpio_reg __iomem *reg; unsigned long irq_mask; }; @@ -455,6 +497,7 @@ static int idio_24_probe(struct pci_dev *pdev, const struct pci_device_id *id) struct device *const dev = &pdev->dev; struct idio_24_gpio *idio24gpio; int err; + const size_t pci_plx_bar_index = 1; const size_t pci_bar_index = 2; const char *const name = pci_name(pdev); struct gpio_irq_chip *girq; @@ -469,12 +512,13 @@ static int idio_24_probe(struct pci_dev *pdev, const struct pci_device_id *id) return err; } - err = pcim_iomap_regions(pdev, BIT(pci_bar_index), name); + err = pcim_iomap_regions(pdev, BIT(pci_plx_bar_index) | BIT(pci_bar_index), name); if (err) { dev_err(dev, "Unable to map PCI I/O addresses (%d)\n", err); return err; } + idio24gpio->plx = pcim_iomap_table(pdev)[pci_plx_bar_index]; idio24gpio->reg = pcim_iomap_table(pdev)[pci_bar_index]; idio24gpio->chip.label = name; @@ -504,6 +548,12 @@ static int idio_24_probe(struct pci_dev *pdev, const struct pci_device_id *id) /* Software board reset */ iowrite8(0, &idio24gpio->reg->soft_reset); + /* + * enable PLX PEX8311 internal PCI wire interrupt and local interrupt + * input + */ + iowrite8((INTCSR_INTERNAL_PCI_WIRE | INTCSR_LOCAL_INPUT) >> 8, + idio24gpio->plx + PLX_PEX8311_PCI_LCS_INTCSR + 1); err = devm_gpiochip_add_data(dev, &idio24gpio->chip, idio24gpio); if (err) { From faf000397e7f103df9953a312e1df21df1dc797f Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 3 Nov 2020 11:30:09 +1100 Subject: [PATCH 157/710] KVM: arm64: Fix build error in user_mem_abort() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The PUD and PMD are folded into PGD when the following options are enabled. In that case, PUD_SHIFT is equal to PMD_SHIFT and we fail to build with the indicated errors: CONFIG_ARM64_VA_BITS_42=y CONFIG_ARM64_PAGE_SHIFT=16 CONFIG_PGTABLE_LEVELS=3 arch/arm64/kvm/mmu.c: In function ‘user_mem_abort’: arch/arm64/kvm/mmu.c:798:2: error: duplicate case value case PMD_SHIFT: ^~~~ arch/arm64/kvm/mmu.c:791:2: note: previously used here case PUD_SHIFT: ^~~~ This fixes the issue by skipping the check on PUD huge page when PUD and PMD are folded into PGD. Fixes: 2f40c46021bbb ("KVM: arm64: Use fallback mapping sizes for contiguous huge page sizes") Reported-by: Eric Auger Signed-off-by: Gavin Shan Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201103003009.32955-1-gshan@redhat.com --- arch/arm64/kvm/mmu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index c7c6df6309d5..a109c5001827 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -788,10 +788,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, } switch (vma_shift) { +#ifndef __PAGETABLE_PMD_FOLDED case PUD_SHIFT: if (fault_supports_stage2_huge_mapping(memslot, hva, PUD_SIZE)) break; fallthrough; +#endif case CONT_PMD_SHIFT: vma_shift = PMD_SHIFT; fallthrough; From f81cb2c3ad41ac6d8cb2650e3d72d5f67db1aa28 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Thu, 5 Nov 2020 10:10:19 +0100 Subject: [PATCH 158/710] KVM: arm64: Don't hide ID registers from userspace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ID registers are RAZ until they've been allocated a purpose, but that doesn't mean they should be removed from the KVM_GET_REG_LIST list. So far we only have one register, SYS_ID_AA64ZFR0_EL1, that is hidden from userspace when its function, SVE, is not present. Expose SYS_ID_AA64ZFR0_EL1 to userspace as RAZ when SVE is not implemented. Removing the userspace visibility checks is enough to reexpose it, as it will already return zero to userspace when SVE is not present. The register already behaves as RAZ for the guest when SVE is not present. Fixes: 73433762fcae ("KVM: arm64/sve: System register context switch and access support") Reported-by: 张东旭 Signed-off-by: Andrew Jones Signed-off-by: Marc Zyngier Cc: stable@vger.kernel.org#v5.2+ Link: https://lore.kernel.org/r/20201105091022.15373-2-drjones@redhat.com --- arch/arm64/kvm/sys_regs.c | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 983994f01a63..3af306e6b9cd 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1193,16 +1193,6 @@ static unsigned int sve_visibility(const struct kvm_vcpu *vcpu, return REG_HIDDEN_USER | REG_HIDDEN_GUEST; } -/* Visibility overrides for SVE-specific ID registers */ -static unsigned int sve_id_visibility(const struct kvm_vcpu *vcpu, - const struct sys_reg_desc *rd) -{ - if (vcpu_has_sve(vcpu)) - return 0; - - return REG_HIDDEN_USER; -} - /* Generate the emulated ID_AA64ZFR0_EL1 value exposed to the guest */ static u64 guest_id_aa64zfr0_el1(const struct kvm_vcpu *vcpu) { @@ -1229,9 +1219,6 @@ static int get_id_aa64zfr0_el1(struct kvm_vcpu *vcpu, { u64 val; - if (WARN_ON(!vcpu_has_sve(vcpu))) - return -ENOENT; - val = guest_id_aa64zfr0_el1(vcpu); return reg_to_user(uaddr, &val, reg->id); } @@ -1244,9 +1231,6 @@ static int set_id_aa64zfr0_el1(struct kvm_vcpu *vcpu, int err; u64 val; - if (WARN_ON(!vcpu_has_sve(vcpu))) - return -ENOENT; - err = reg_from_user(&val, uaddr, id); if (err) return err; @@ -1509,7 +1493,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { ID_SANITISED(ID_AA64PFR1_EL1), ID_UNALLOCATED(4,2), ID_UNALLOCATED(4,3), - { SYS_DESC(SYS_ID_AA64ZFR0_EL1), access_id_aa64zfr0_el1, .get_user = get_id_aa64zfr0_el1, .set_user = set_id_aa64zfr0_el1, .visibility = sve_id_visibility }, + { SYS_DESC(SYS_ID_AA64ZFR0_EL1), access_id_aa64zfr0_el1, .get_user = get_id_aa64zfr0_el1, .set_user = set_id_aa64zfr0_el1, }, ID_UNALLOCATED(4,5), ID_UNALLOCATED(4,6), ID_UNALLOCATED(4,7), From 01fe5ace92ddb8732e3331355e7ba9cb6f2ef787 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Thu, 5 Nov 2020 10:10:20 +0100 Subject: [PATCH 159/710] KVM: arm64: Consolidate REG_HIDDEN_GUEST/USER REG_HIDDEN_GUEST and REG_HIDDEN_USER are always used together. Consolidate them into a single REG_HIDDEN flag. We can always add another flag later if some register needs to expose itself differently to the guest than it does to userspace. No functional change intended. Signed-off-by: Andrew Jones Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201105091022.15373-3-drjones@redhat.com --- arch/arm64/kvm/sys_regs.c | 12 ++++++------ arch/arm64/kvm/sys_regs.h | 18 ++++-------------- 2 files changed, 10 insertions(+), 20 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 3af306e6b9cd..26a285127620 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1069,7 +1069,7 @@ static bool trap_ptrauth(struct kvm_vcpu *vcpu, static unsigned int ptrauth_visibility(const struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd) { - return vcpu_has_ptrauth(vcpu) ? 0 : REG_HIDDEN_USER | REG_HIDDEN_GUEST; + return vcpu_has_ptrauth(vcpu) ? 0 : REG_HIDDEN; } #define __PTRAUTH_KEY(k) \ @@ -1190,7 +1190,7 @@ static unsigned int sve_visibility(const struct kvm_vcpu *vcpu, if (vcpu_has_sve(vcpu)) return 0; - return REG_HIDDEN_USER | REG_HIDDEN_GUEST; + return REG_HIDDEN; } /* Generate the emulated ID_AA64ZFR0_EL1 value exposed to the guest */ @@ -2153,7 +2153,7 @@ static void perform_access(struct kvm_vcpu *vcpu, trace_kvm_sys_access(*vcpu_pc(vcpu), params, r); /* Check for regs disabled by runtime config */ - if (sysreg_hidden_from_guest(vcpu, r)) { + if (sysreg_hidden(vcpu, r)) { kvm_inject_undefined(vcpu); return; } @@ -2652,7 +2652,7 @@ int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg return get_invariant_sys_reg(reg->id, uaddr); /* Check for regs disabled by runtime config */ - if (sysreg_hidden_from_user(vcpu, r)) + if (sysreg_hidden(vcpu, r)) return -ENOENT; if (r->get_user) @@ -2677,7 +2677,7 @@ int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg return set_invariant_sys_reg(reg->id, uaddr); /* Check for regs disabled by runtime config */ - if (sysreg_hidden_from_user(vcpu, r)) + if (sysreg_hidden(vcpu, r)) return -ENOENT; if (r->set_user) @@ -2748,7 +2748,7 @@ static int walk_one_sys_reg(const struct kvm_vcpu *vcpu, if (!(rd->reg || rd->get_user)) return 0; - if (sysreg_hidden_from_user(vcpu, rd)) + if (sysreg_hidden(vcpu, rd)) return 0; if (!copy_reg_to_user(rd, uind)) diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h index 5a6fc30f5989..2641b2ee6a91 100644 --- a/arch/arm64/kvm/sys_regs.h +++ b/arch/arm64/kvm/sys_regs.h @@ -59,8 +59,7 @@ struct sys_reg_desc { const struct sys_reg_desc *rd); }; -#define REG_HIDDEN_USER (1 << 0) /* hidden from userspace ioctls */ -#define REG_HIDDEN_GUEST (1 << 1) /* hidden from guest */ +#define REG_HIDDEN (1 << 0) /* hidden from userspace and guest */ static __printf(2, 3) inline void print_sys_reg_msg(const struct sys_reg_params *p, @@ -111,22 +110,13 @@ static inline void reset_val(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r __vcpu_sys_reg(vcpu, r->reg) = r->val; } -static inline bool sysreg_hidden_from_guest(const struct kvm_vcpu *vcpu, - const struct sys_reg_desc *r) +static inline bool sysreg_hidden(const struct kvm_vcpu *vcpu, + const struct sys_reg_desc *r) { if (likely(!r->visibility)) return false; - return r->visibility(vcpu, r) & REG_HIDDEN_GUEST; -} - -static inline bool sysreg_hidden_from_user(const struct kvm_vcpu *vcpu, - const struct sys_reg_desc *r) -{ - if (likely(!r->visibility)) - return false; - - return r->visibility(vcpu, r) & REG_HIDDEN_USER; + return r->visibility(vcpu, r) & REG_HIDDEN; } static inline int cmp_sys_reg(const struct sys_reg_desc *i1, From 912dee572691ffb2b387dd8b4f183d549a6b24d1 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Thu, 5 Nov 2020 10:10:21 +0100 Subject: [PATCH 160/710] KVM: arm64: Check RAZ visibility in ID register accessors The instruction encodings of ID registers are preallocated. Until an encoding is assigned a purpose the register is RAZ. KVM's general ID register accessor functions already support both paths, RAZ or not. If for each ID register we can determine if it's RAZ or not, then all ID registers can build on the general functions. The register visibility function allows us to check whether a register should be completely hidden or not, extending it to also report when the register should be RAZ or not allows us to use it for ID registers as well. Check for RAZ visibility in the ID register accessor functions, allowing the RAZ case to be handled in a generic way for all system registers. The new REG_RAZ flag will be used in a later patch. This patch has no intended functional change. Signed-off-by: Andrew Jones Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201105091022.15373-4-drjones@redhat.com --- arch/arm64/kvm/sys_regs.c | 19 ++++++++++++++++--- arch/arm64/kvm/sys_regs.h | 10 ++++++++++ 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 26a285127620..4ee098abd87c 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1151,6 +1151,12 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, return val; } +static unsigned int id_visibility(const struct kvm_vcpu *vcpu, + const struct sys_reg_desc *r) +{ + return 0; +} + /* cpufeature ID register access trap handlers */ static bool __access_id_reg(struct kvm_vcpu *vcpu, @@ -1169,7 +1175,9 @@ static bool access_id_reg(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) { - return __access_id_reg(vcpu, p, r, false); + bool raz = sysreg_visible_as_raz(vcpu, r); + + return __access_id_reg(vcpu, p, r, raz); } static bool access_raz_id_reg(struct kvm_vcpu *vcpu, @@ -1281,13 +1289,17 @@ static int __set_id_reg(const struct kvm_vcpu *vcpu, static int get_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, const struct kvm_one_reg *reg, void __user *uaddr) { - return __get_id_reg(vcpu, rd, uaddr, false); + bool raz = sysreg_visible_as_raz(vcpu, rd); + + return __get_id_reg(vcpu, rd, uaddr, raz); } static int set_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, const struct kvm_one_reg *reg, void __user *uaddr) { - return __set_id_reg(vcpu, rd, uaddr, false); + bool raz = sysreg_visible_as_raz(vcpu, rd); + + return __set_id_reg(vcpu, rd, uaddr, raz); } static int get_raz_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, @@ -1372,6 +1384,7 @@ static bool access_ccsidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, .access = access_id_reg, \ .get_user = get_id_reg, \ .set_user = set_id_reg, \ + .visibility = id_visibility, \ } /* diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h index 2641b2ee6a91..0f95964339b1 100644 --- a/arch/arm64/kvm/sys_regs.h +++ b/arch/arm64/kvm/sys_regs.h @@ -60,6 +60,7 @@ struct sys_reg_desc { }; #define REG_HIDDEN (1 << 0) /* hidden from userspace and guest */ +#define REG_RAZ (1 << 1) /* RAZ from userspace and guest */ static __printf(2, 3) inline void print_sys_reg_msg(const struct sys_reg_params *p, @@ -119,6 +120,15 @@ static inline bool sysreg_hidden(const struct kvm_vcpu *vcpu, return r->visibility(vcpu, r) & REG_HIDDEN; } +static inline bool sysreg_visible_as_raz(const struct kvm_vcpu *vcpu, + const struct sys_reg_desc *r) +{ + if (likely(!r->visibility)) + return false; + + return r->visibility(vcpu, r) & REG_RAZ; +} + static inline int cmp_sys_reg(const struct sys_reg_desc *i1, const struct sys_reg_desc *i2) { From c512298eed0360923d0cbc4a1f30bc0509af0d50 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Thu, 5 Nov 2020 10:10:22 +0100 Subject: [PATCH 161/710] KVM: arm64: Remove AA64ZFR0_EL1 accessors The AA64ZFR0_EL1 accessors are just the general accessors with its visibility function open-coded. It also skips the if-else chain in read_id_reg, but there's no reason not to go there. Indeed consolidating ID register accessors and removing lines of code make it worthwhile. Remove the AA64ZFR0_EL1 accessors, replacing them with the general accessors for sanitized ID registers. No functional change intended. Signed-off-by: Andrew Jones Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201105091022.15373-5-drjones@redhat.com --- arch/arm64/kvm/sys_regs.c | 61 +++++++-------------------------------- 1 file changed, 11 insertions(+), 50 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 4ee098abd87c..436043cd327e 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1154,6 +1154,16 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, static unsigned int id_visibility(const struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { + u32 id = sys_reg((u32)r->Op0, (u32)r->Op1, + (u32)r->CRn, (u32)r->CRm, (u32)r->Op2); + + switch (id) { + case SYS_ID_AA64ZFR0_EL1: + if (!vcpu_has_sve(vcpu)) + return REG_RAZ; + break; + } + return 0; } @@ -1201,55 +1211,6 @@ static unsigned int sve_visibility(const struct kvm_vcpu *vcpu, return REG_HIDDEN; } -/* Generate the emulated ID_AA64ZFR0_EL1 value exposed to the guest */ -static u64 guest_id_aa64zfr0_el1(const struct kvm_vcpu *vcpu) -{ - if (!vcpu_has_sve(vcpu)) - return 0; - - return read_sanitised_ftr_reg(SYS_ID_AA64ZFR0_EL1); -} - -static bool access_id_aa64zfr0_el1(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - const struct sys_reg_desc *rd) -{ - if (p->is_write) - return write_to_read_only(vcpu, p, rd); - - p->regval = guest_id_aa64zfr0_el1(vcpu); - return true; -} - -static int get_id_aa64zfr0_el1(struct kvm_vcpu *vcpu, - const struct sys_reg_desc *rd, - const struct kvm_one_reg *reg, void __user *uaddr) -{ - u64 val; - - val = guest_id_aa64zfr0_el1(vcpu); - return reg_to_user(uaddr, &val, reg->id); -} - -static int set_id_aa64zfr0_el1(struct kvm_vcpu *vcpu, - const struct sys_reg_desc *rd, - const struct kvm_one_reg *reg, void __user *uaddr) -{ - const u64 id = sys_reg_to_index(rd); - int err; - u64 val; - - err = reg_from_user(&val, uaddr, id); - if (err) - return err; - - /* This is what we mean by invariant: you can't change it. */ - if (val != guest_id_aa64zfr0_el1(vcpu)) - return -EINVAL; - - return 0; -} - /* * cpufeature ID register user accessors * @@ -1506,7 +1467,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { ID_SANITISED(ID_AA64PFR1_EL1), ID_UNALLOCATED(4,2), ID_UNALLOCATED(4,3), - { SYS_DESC(SYS_ID_AA64ZFR0_EL1), access_id_aa64zfr0_el1, .get_user = get_id_aa64zfr0_el1, .set_user = set_id_aa64zfr0_el1, }, + ID_SANITISED(ID_AA64ZFR0_EL1), ID_UNALLOCATED(4,5), ID_UNALLOCATED(4,6), ID_UNALLOCATED(4,7), From aee9dccc5b64e878cf1b18207436e73f66d74157 Mon Sep 17 00:00:00 2001 From: Brian O'Keefe Date: Fri, 6 Nov 2020 10:10:34 -0500 Subject: [PATCH 162/710] staging: rtl8723bs: Add 024c:0627 to the list of SDIO device-ids Add 024c:0627 to the list of SDIO device-ids, based on hardware found in the wild. This hardware exists on at least some Acer SW1-011 tablets. Signed-off-by: Brian O'Keefe Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/b9e1523f-2ba7-fb82-646a-37f095b4440e@alum.wpi.edu Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8723bs/os_dep/sdio_intf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/rtl8723bs/os_dep/sdio_intf.c b/drivers/staging/rtl8723bs/os_dep/sdio_intf.c index 79b55ec827a4..b2208e5f190a 100644 --- a/drivers/staging/rtl8723bs/os_dep/sdio_intf.c +++ b/drivers/staging/rtl8723bs/os_dep/sdio_intf.c @@ -20,6 +20,7 @@ static const struct sdio_device_id sdio_ids[] = { { SDIO_DEVICE(0x024c, 0x0525), }, { SDIO_DEVICE(0x024c, 0x0623), }, { SDIO_DEVICE(0x024c, 0x0626), }, + { SDIO_DEVICE(0x024c, 0x0627), }, { SDIO_DEVICE(0x024c, 0xb723), }, { /* end: all zeroes */ }, }; From 92cfcd030e4b1de11a6b1edb0840e55c26332d31 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 30 Oct 2020 17:45:56 -0700 Subject: [PATCH 163/710] fscrypt: remove reachable WARN in fscrypt_setup_iv_ino_lblk_32_key() I_CREATING isn't actually set until the inode has been assigned an inode number and inserted into the inode hash table. So the WARN_ON() in fscrypt_setup_iv_ino_lblk_32_key() is wrong, and it can trigger when creating an encrypted file on ext4. Remove it. This was sometimes causing xfstest generic/602 to fail on ext4. I didn't notice it before because due to a separate oversight, new inodes that haven't been assigned an inode number yet don't necessarily have i_ino == 0 as I had thought, so by chance I never saw the test fail. Fixes: a992b20cd4ee ("fscrypt: add fscrypt_prepare_new_inode() and fscrypt_set_context()") Reported-by: Theodore Y. Ts'o Link: https://lore.kernel.org/r/20201031004556.87862-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- fs/crypto/keysetup.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c index d3c3e5d9b41f..d595abb8ef90 100644 --- a/fs/crypto/keysetup.c +++ b/fs/crypto/keysetup.c @@ -269,9 +269,7 @@ unlock: * New inodes may not have an inode number assigned yet. * Hashing their inode number is delayed until later. */ - if (ci->ci_inode->i_ino == 0) - WARN_ON(!(ci->ci_inode->i_state & I_CREATING)); - else + if (ci->ci_inode->i_ino) fscrypt_hash_inode_number(ci, mk); return 0; } From d435c05ab0197ee302290e1cee3f2d9c9024a64f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 6 Nov 2020 15:39:50 -0500 Subject: [PATCH 164/710] net/sunrpc: return 0 on attempt to write to "transports" You can't write to this file because the permissions are 0444. But it sort of looked like you could do a write and it would result in a read. Then it looked like proc_sys_call_handler() just ignored it. Which is confusing. It's more clear if the "write" just returns zero. Also, the "lenp" pointer is never NULL so that check can be removed. Signed-off-by: Dan Carpenter Signed-off-by: J. Bruce Fields --- net/sunrpc/sysctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c index a18b36b5422d..5c9f5bca4d99 100644 --- a/net/sunrpc/sysctl.c +++ b/net/sunrpc/sysctl.c @@ -65,7 +65,7 @@ static int proc_do_xprt(struct ctl_table *table, int write, char tmpbuf[256]; size_t len; - if ((*ppos && !write) || !*lenp) { + if (write || *ppos) { *lenp = 0; return 0; } From 6f64e477830000746c1f992050fbd45c03c89429 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Thu, 5 Nov 2020 23:06:51 +0000 Subject: [PATCH 165/710] bpf: Update verification logic for LSM programs The current logic checks if the name of the BTF type passed in attach_btf_id starts with "bpf_lsm_", this is not sufficient as it also allows attachment to non-LSM hooks like the very function that performs this check, i.e. bpf_lsm_verify_prog. In order to ensure that this verification logic allows attachment to only LSM hooks, the LSM_HOOK definitions in lsm_hook_defs.h are used to generate a BTF_ID set. Upon verification, the attach_btf_id of the program being attached is checked for presence in this set. Fixes: 9e4e01dfd325 ("bpf: lsm: Implement attach, detach and execution") Signed-off-by: KP Singh Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20201105230651.2621917-1-kpsingh@chromium.org --- kernel/bpf/bpf_lsm.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c index 78ea8a7bd27f..56cc5a915f67 100644 --- a/kernel/bpf/bpf_lsm.c +++ b/kernel/bpf/bpf_lsm.c @@ -13,6 +13,7 @@ #include #include #include +#include /* For every LSM hook that allows attachment of BPF programs, declare a nop * function where a BPF program can be attached. @@ -26,7 +27,11 @@ noinline RET bpf_lsm_##NAME(__VA_ARGS__) \ #include #undef LSM_HOOK -#define BPF_LSM_SYM_PREFX "bpf_lsm_" +#define LSM_HOOK(RET, DEFAULT, NAME, ...) BTF_ID(func, bpf_lsm_##NAME) +BTF_SET_START(bpf_lsm_hooks) +#include +#undef LSM_HOOK +BTF_SET_END(bpf_lsm_hooks) int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog, const struct bpf_prog *prog) @@ -37,8 +42,7 @@ int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog, return -EINVAL; } - if (strncmp(BPF_LSM_SYM_PREFX, prog->aux->attach_func_name, - sizeof(BPF_LSM_SYM_PREFX) - 1)) { + if (!btf_id_set_contains(&bpf_lsm_hooks, prog->aux->attach_btf_id)) { bpf_log(vlog, "attach_btf_id %u points to wrong type name %s\n", prog->aux->attach_btf_id, prog->aux->attach_func_name); return -EINVAL; From 174fe5ba2d1ea0d6c5ab2a7d4aa058d6d497ae4d Mon Sep 17 00:00:00 2001 From: Kaixu Xia Date: Thu, 29 Oct 2020 23:46:36 +0800 Subject: [PATCH 166/710] ext4: correctly report "not supported" for {usr,grp}jquota when !CONFIG_QUOTA The macro MOPT_Q is used to indicates the mount option is related to quota stuff and is defined to be MOPT_NOSUPPORT when CONFIG_QUOTA is disabled. Normally the quota options are handled explicitly, so it didn't matter that the MOPT_STRING flag was missing, even though the usrjquota and grpjquota mount options take a string argument. It's important that's present in the !CONFIG_QUOTA case, since without MOPT_STRING, the mount option matcher will match usrjquota= followed by an integer, and will otherwise skip the table entry, and so "mount option not supported" error message is never reported. [ Fixed up the commit description to better explain why the fix works. --TYT ] Fixes: 26092bf52478 ("ext4: use a table-driven handler for mount options") Signed-off-by: Kaixu Xia Link: https://lore.kernel.org/r/1603986396-28917-1-git-send-email-kaixuxia@tencent.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/super.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index ef4734b40e2a..5dbbd48923c1 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2027,8 +2027,8 @@ static const struct mount_opts { {Opt_noquota, (EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA | EXT4_MOUNT_GRPQUOTA | EXT4_MOUNT_PRJQUOTA), MOPT_CLEAR | MOPT_Q}, - {Opt_usrjquota, 0, MOPT_Q}, - {Opt_grpjquota, 0, MOPT_Q}, + {Opt_usrjquota, 0, MOPT_Q | MOPT_STRING}, + {Opt_grpjquota, 0, MOPT_Q | MOPT_STRING}, {Opt_offusrjquota, 0, MOPT_Q}, {Opt_offgrpjquota, 0, MOPT_Q}, {Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT}, From a0650046d31d3ca92e7fb41ae5c667ed9250a2fc Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 30 Oct 2020 10:24:35 +0800 Subject: [PATCH 167/710] MAINTAINERS: add missing file in ext4 entry include/trace/events/ext4.h belongs to ext4 module, add the file path into ext4 entry in MAINTAINERS. Signed-off-by: Chao Yu Link: https://lore.kernel.org/r/20201030022435.1136-1-yuchao0@huawei.com Signed-off-by: Theodore Ts'o --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index b516bb34a8d5..025d16ae0767 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6614,6 +6614,7 @@ Q: http://patchwork.ozlabs.org/project/linux-ext4/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4.git F: Documentation/filesystems/ext4/ F: fs/ext4/ +F: include/trace/events/ext4.h Extended Verification Module (EVM) M: Mimi Zohar From e121bd48b9eb8e3b9104d3d5d08fdf88e9ca0f97 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 30 Oct 2020 14:46:20 +0300 Subject: [PATCH 168/710] ext4: silence an uninitialized variable warning Smatch complains that "i" can be uninitialized if we don't enter the loop. I don't know if it's possible but we may as well silence this warning. [ Initialize i to sb->s_blocksize instead of 0. The only way the for loop could be skipped entirely is the in-memory data structures, in particular the bh->b_data for the on-disk superblock has gotten corrupted enough that calculated value of group is >= to ext4_get_groups_count(sb). In that case, we want to exit immediately without allocating a block. -- TYT ] Fixes: 8016e29f4362 ("ext4: fast commit recovery path") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/20201030114620.GB3251003@mwanda Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/mballoc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 85abbfb98cbe..f067e83f149e 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -5167,7 +5167,7 @@ static ext4_fsblk_t ext4_mb_new_blocks_simple(handle_t *handle, struct super_block *sb = ar->inode->i_sb; ext4_group_t group; ext4_grpblk_t blkoff; - int i; + int i = sb->s_blocksize; ext4_fsblk_t goal, block; struct ext4_super_block *es = EXT4_SB(sb)->s_es; From 7067b2619017d51e71686ca9756b454de0e5826a Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Tue, 3 Nov 2020 10:29:02 +0800 Subject: [PATCH 169/710] ext4: unlock xattr_sem properly in ext4_inline_data_truncate() It takes xattr_sem to check inline data again but without unlock it in case not have. So unlock it before return. Fixes: aef1c8513c1f ("ext4: let ext4_truncate handle inline data correctly") Reported-by: Dan Carpenter Cc: Tao Ma Signed-off-by: Joseph Qi Reviewed-by: Andreas Dilger Link: https://lore.kernel.org/r/1604370542-124630-1-git-send-email-joseph.qi@linux.alibaba.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/inline.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index caa51473207d..b41512d1badc 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -1880,6 +1880,7 @@ int ext4_inline_data_truncate(struct inode *inode, int *has_inline) ext4_write_lock_xattr(inode, &no_expand); if (!ext4_has_inline_data(inode)) { + ext4_write_unlock_xattr(inode, &no_expand); *has_inline = 0; ext4_journal_stop(handle); return 0; From a44ad6835da52fdf4df2e482f45a167336555121 Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Thu, 5 Nov 2020 19:58:50 -0800 Subject: [PATCH 170/710] ext4: describe fast_commit feature flags Fast commit feature has flags in the file system as well in JBD2. The meaning of fast commit feature flags can get confusing. Update docs and code to add more documentation about it. Suggested-by: Jan Kara Signed-off-by: Harshad Shirwadkar Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20201106035911.1942128-2-harshadshirwadkar@gmail.com Signed-off-by: Theodore Ts'o --- Documentation/filesystems/ext4/journal.rst | 6 ++++++ Documentation/filesystems/ext4/super.rst | 7 +++++++ fs/ext4/ext4.h | 7 +++++++ 3 files changed, 20 insertions(+) diff --git a/Documentation/filesystems/ext4/journal.rst b/Documentation/filesystems/ext4/journal.rst index 805a1e9ea3a5..849d5b119eb8 100644 --- a/Documentation/filesystems/ext4/journal.rst +++ b/Documentation/filesystems/ext4/journal.rst @@ -256,6 +256,10 @@ which is 1024 bytes long: - s\_padding2 - * - 0x54 + - \_\_be32 + - s\_num\_fc\_blocks + - Number of fast commit blocks in the journal. + * - 0x58 - \_\_u32 - s\_padding[42] - @@ -310,6 +314,8 @@ The journal incompat features are any combination of the following: - This journal uses v3 of the checksum on-disk format. This is the same as v2, but the journal block tag size is fixed regardless of the size of block numbers. (JBD2\_FEATURE\_INCOMPAT\_CSUM\_V3) + * - 0x20 + - Journal has fast commit blocks. (JBD2\_FEATURE\_INCOMPAT\_FAST\_COMMIT) .. _jbd2_checksum_type: diff --git a/Documentation/filesystems/ext4/super.rst b/Documentation/filesystems/ext4/super.rst index 93e55d7c1d40..2eb1ab20498d 100644 --- a/Documentation/filesystems/ext4/super.rst +++ b/Documentation/filesystems/ext4/super.rst @@ -596,6 +596,13 @@ following: - Sparse Super Block, v2. If this flag is set, the SB field s\_backup\_bgs points to the two block groups that contain backup superblocks (COMPAT\_SPARSE\_SUPER2). + * - 0x400 + - Fast commits supported. Although fast commits blocks are + backward incompatible, fast commit blocks are not always + present in the journal. If fast commit blocks are present in + the journal, JBD2 incompat feature + (JBD2\_FEATURE\_INCOMPAT\_FAST\_COMMIT) gets + set (COMPAT\_FAST\_COMMIT). .. _super_incompat: diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 45fcdbf538d1..d513553ceafa 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1863,6 +1863,13 @@ static inline bool ext4_verity_in_progress(struct inode *inode) #define EXT4_FEATURE_COMPAT_RESIZE_INODE 0x0010 #define EXT4_FEATURE_COMPAT_DIR_INDEX 0x0020 #define EXT4_FEATURE_COMPAT_SPARSE_SUPER2 0x0200 +/* + * The reason why "FAST_COMMIT" is a compat feature is that, FS becomes + * incompatible only if fast commit blocks are present in the FS. Since we + * clear the journal (and thus the fast commit blocks), we don't mark FS as + * incompatible. We also have a JBD2 incompat feature, which gets set when + * there are fast commit blocks present in the journal. + */ #define EXT4_FEATURE_COMPAT_FAST_COMMIT 0x0400 #define EXT4_FEATURE_COMPAT_STABLE_INODES 0x0800 From b21ebf143af219207214c79bc217beb39c43212a Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Thu, 5 Nov 2020 19:58:51 -0800 Subject: [PATCH 171/710] ext4: mark fc ineligible if inode gets evictied due to mem pressure If inode gets evicted due to memory pressure, we have to remove it from the fast commit list. However, that inode may have uncommitted changes that fast commits will lose. So, just fall back to full commits in this case. Also, rename the fast commit ineligiblity reason from "EXT4_FC_REASON_MEM" to "EXT4_FC_REASON_MEM_NOMEM" for better expression. Suggested-by: Jan Kara Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20201106035911.1942128-3-harshadshirwadkar@gmail.com Signed-off-by: Theodore Ts'o --- fs/ext4/fast_commit.c | 4 ++-- fs/ext4/fast_commit.h | 2 +- fs/ext4/inode.c | 2 ++ include/trace/events/ext4.h | 6 +++--- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 8d43058386c3..48b7bc129392 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -384,7 +384,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update) mutex_unlock(&ei->i_fc_lock); node = kmem_cache_alloc(ext4_fc_dentry_cachep, GFP_NOFS); if (!node) { - ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_MEM); + ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM); mutex_lock(&ei->i_fc_lock); return -ENOMEM; } @@ -397,7 +397,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update) if (!node->fcd_name.name) { kmem_cache_free(ext4_fc_dentry_cachep, node); ext4_fc_mark_ineligible(inode->i_sb, - EXT4_FC_REASON_MEM); + EXT4_FC_REASON_NOMEM); mutex_lock(&ei->i_fc_lock); return -ENOMEM; } diff --git a/fs/ext4/fast_commit.h b/fs/ext4/fast_commit.h index 06907d485989..140fbb6af19e 100644 --- a/fs/ext4/fast_commit.h +++ b/fs/ext4/fast_commit.h @@ -100,7 +100,7 @@ enum { EXT4_FC_REASON_XATTR = 0, EXT4_FC_REASON_CROSS_RENAME, EXT4_FC_REASON_JOURNAL_FLAG_CHANGE, - EXT4_FC_REASON_MEM, + EXT4_FC_REASON_NOMEM, EXT4_FC_REASON_SWAP_BOOT, EXT4_FC_REASON_RESIZE, EXT4_FC_REASON_RENAME_DIR, diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index b96a18679a27..081b6a86b5db 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -327,6 +327,8 @@ stop_handle: ext4_xattr_inode_array_free(ea_inode_array); return; no_delete: + if (!list_empty(&EXT4_I(inode)->i_fc_list)) + ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM); ext4_clear_inode(inode); /* We must guarantee clearing of inode... */ } diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index b14314fcf732..0f899d3b09d3 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -100,7 +100,7 @@ TRACE_DEFINE_ENUM(ES_REFERENCED_B); { EXT4_FC_REASON_XATTR, "XATTR"}, \ { EXT4_FC_REASON_CROSS_RENAME, "CROSS_RENAME"}, \ { EXT4_FC_REASON_JOURNAL_FLAG_CHANGE, "JOURNAL_FLAG_CHANGE"}, \ - { EXT4_FC_REASON_MEM, "NO_MEM"}, \ + { EXT4_FC_REASON_NOMEM, "NO_MEM"}, \ { EXT4_FC_REASON_SWAP_BOOT, "SWAP_BOOT"}, \ { EXT4_FC_REASON_RESIZE, "RESIZE"}, \ { EXT4_FC_REASON_RENAME_DIR, "RENAME_DIR"}, \ @@ -2917,13 +2917,13 @@ TRACE_EVENT(ext4_fc_stats, ), TP_printk("dev %d:%d fc ineligible reasons:\n" - "%s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s,%d; " + "%s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d; " "num_commits:%ld, ineligible: %ld, numblks: %ld", MAJOR(__entry->dev), MINOR(__entry->dev), FC_REASON_NAME_STAT(EXT4_FC_REASON_XATTR), FC_REASON_NAME_STAT(EXT4_FC_REASON_CROSS_RENAME), FC_REASON_NAME_STAT(EXT4_FC_REASON_JOURNAL_FLAG_CHANGE), - FC_REASON_NAME_STAT(EXT4_FC_REASON_MEM), + FC_REASON_NAME_STAT(EXT4_FC_REASON_NOMEM), FC_REASON_NAME_STAT(EXT4_FC_REASON_SWAP_BOOT), FC_REASON_NAME_STAT(EXT4_FC_REASON_RESIZE), FC_REASON_NAME_STAT(EXT4_FC_REASON_RENAME_DIR), From 5b552ad70c6197e764ffe6070089c5b355fe2d26 Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Thu, 5 Nov 2020 19:58:52 -0800 Subject: [PATCH 172/710] ext4: drop redundant calls ext4_fc_track_range ext4_fc_track_range() should only be called when blocks are added or removed from an inode. So, the only places from where we need to call this function are ext4_map_blocks(), punch hole, collapse / zero range, truncate. Remove all the other redundant calls to ths function. Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20201106035911.1942128-4-harshadshirwadkar@gmail.com Signed-off-by: Theodore Ts'o --- fs/ext4/extents.c | 5 ----- fs/ext4/super.c | 4 ---- 2 files changed, 9 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 57cfa28919a9..94ba44785d28 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3724,7 +3724,6 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle, err = ext4_ext_dirty(handle, inode, path + path->p_depth); out: ext4_ext_show_leaf(inode, path); - ext4_fc_track_range(inode, ee_block, ee_block + ee_len - 1); return err; } @@ -3796,7 +3795,6 @@ convert_initialized_extent(handle_t *handle, struct inode *inode, if (*allocated > map->m_len) *allocated = map->m_len; map->m_len = *allocated; - ext4_fc_track_range(inode, ee_block, ee_block + ee_len - 1); return 0; } @@ -4329,7 +4327,6 @@ got_allocated_blocks: map->m_len = ar.len; allocated = map->m_len; ext4_ext_show_leaf(inode, path); - ext4_fc_track_range(inode, map->m_lblk, map->m_lblk + map->m_len - 1); out: ext4_ext_drop_refs(path); kfree(path); @@ -4651,8 +4648,6 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | FALLOC_FL_INSERT_RANGE)) return -EOPNOTSUPP; - ext4_fc_track_range(inode, offset >> blkbits, - (offset + len - 1) >> blkbits); ext4_fc_start_update(inode); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 5dbbd48923c1..738a6dd4957d 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -6560,10 +6560,6 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, brelse(bh); out: if (inode->i_size < off + len) { - ext4_fc_track_range(inode, - (inode->i_size > 0 ? inode->i_size - 1 : 0) - >> inode->i_sb->s_blocksize_bits, - (off + len) >> inode->i_sb->s_blocksize_bits); i_size_write(inode, off + len); EXT4_I(inode)->i_disksize = inode->i_size; err2 = ext4_mark_inode_dirty(handle, inode); From a80f7fcf18672ae4971a6b713b58c0d389aa99fe Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Thu, 5 Nov 2020 19:58:53 -0800 Subject: [PATCH 173/710] ext4: fixup ext4_fc_track_* functions' signature Firstly, pass handle to all ext4_fc_track_* functions and use transaction id found in handle->h_transaction->h_tid for tracking fast commit updates. Secondly, don't pass inode to ext4_fc_track_link/create/unlink functions. inode can be found inside these functions as d_inode(dentry). However, rename path is an exeception. That's because in that case, we need inode that's not same as d_inode(dentry). To handle that, add a couple of low-level wrapper functions that take inode and dentry as arguments. Suggested-by: Jan Kara Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20201106035911.1942128-5-harshadshirwadkar@gmail.com Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 16 +++++++----- fs/ext4/extents.c | 2 +- fs/ext4/fast_commit.c | 48 +++++++++++++++++++++------------- fs/ext4/inode.c | 10 +++---- fs/ext4/namei.c | 61 ++++++++++++++++++++----------------------- 5 files changed, 74 insertions(+), 63 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index d513553ceafa..c6339bd75a93 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2738,12 +2738,16 @@ extern void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate); int ext4_fc_info_show(struct seq_file *seq, void *v); void ext4_fc_init(struct super_block *sb, journal_t *journal); void ext4_fc_init_inode(struct inode *inode); -void ext4_fc_track_range(struct inode *inode, ext4_lblk_t start, +void ext4_fc_track_range(handle_t *handle, struct inode *inode, ext4_lblk_t start, ext4_lblk_t end); -void ext4_fc_track_unlink(struct inode *inode, struct dentry *dentry); -void ext4_fc_track_link(struct inode *inode, struct dentry *dentry); -void ext4_fc_track_create(struct inode *inode, struct dentry *dentry); -void ext4_fc_track_inode(struct inode *inode); +void __ext4_fc_track_unlink(handle_t *handle, struct inode *inode, + struct dentry *dentry); +void __ext4_fc_track_link(handle_t *handle, struct inode *inode, + struct dentry *dentry); +void ext4_fc_track_unlink(handle_t *handle, struct dentry *dentry); +void ext4_fc_track_link(handle_t *handle, struct dentry *dentry); +void ext4_fc_track_create(handle_t *handle, struct dentry *dentry); +void ext4_fc_track_inode(handle_t *handle, struct inode *inode); void ext4_fc_mark_ineligible(struct super_block *sb, int reason); void ext4_fc_start_ineligible(struct super_block *sb, int reason); void ext4_fc_stop_ineligible(struct super_block *sb); @@ -3459,7 +3463,7 @@ extern int ext4_handle_dirty_dirblock(handle_t *handle, struct inode *inode, extern int ext4_ci_compare(const struct inode *parent, const struct qstr *fname, const struct qstr *entry, bool quick); -extern int __ext4_unlink(struct inode *dir, const struct qstr *d_name, +extern int __ext4_unlink(handle_t *handle, struct inode *dir, const struct qstr *d_name, struct inode *inode); extern int __ext4_link(struct inode *dir, struct inode *inode, struct dentry *dentry); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 94ba44785d28..17d7096b3212 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4599,7 +4599,7 @@ static long ext4_zero_range(struct file *file, loff_t offset, ret = ext4_mark_inode_dirty(handle, inode); if (unlikely(ret)) goto out_handle; - ext4_fc_track_range(inode, offset >> inode->i_sb->s_blocksize_bits, + ext4_fc_track_range(handle, inode, offset >> inode->i_sb->s_blocksize_bits, (offset + len - 1) >> inode->i_sb->s_blocksize_bits); /* Zero out partial block at the edges of the range */ ret = ext4_zero_partial_blocks(handle, inode, offset, len); diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 48b7bc129392..9399e9cccb7e 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -323,13 +323,14 @@ static inline int ext4_fc_is_ineligible(struct super_block *sb) * If enqueue is set, this function enqueues the inode in fast commit list. */ static int ext4_fc_track_template( - struct inode *inode, int (*__fc_track_fn)(struct inode *, void *, bool), + handle_t *handle, struct inode *inode, + int (*__fc_track_fn)(struct inode *, void *, bool), void *args, int enqueue) { - tid_t running_txn_tid; bool update = false; struct ext4_inode_info *ei = EXT4_I(inode); struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); + tid_t tid = 0; int ret; if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) || @@ -339,15 +340,13 @@ static int ext4_fc_track_template( if (ext4_fc_is_ineligible(inode->i_sb)) return -EINVAL; - running_txn_tid = sbi->s_journal ? - sbi->s_journal->j_commit_sequence + 1 : 0; - + tid = handle->h_transaction->t_tid; mutex_lock(&ei->i_fc_lock); - if (running_txn_tid == ei->i_sync_tid) { + if (tid == ei->i_sync_tid) { update = true; } else { ext4_fc_reset_inode(inode); - ei->i_sync_tid = running_txn_tid; + ei->i_sync_tid = tid; } ret = __fc_track_fn(inode, args, update); mutex_unlock(&ei->i_fc_lock); @@ -422,7 +421,8 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update) return 0; } -void ext4_fc_track_unlink(struct inode *inode, struct dentry *dentry) +void __ext4_fc_track_unlink(handle_t *handle, + struct inode *inode, struct dentry *dentry) { struct __track_dentry_update_args args; int ret; @@ -430,12 +430,18 @@ void ext4_fc_track_unlink(struct inode *inode, struct dentry *dentry) args.dentry = dentry; args.op = EXT4_FC_TAG_UNLINK; - ret = ext4_fc_track_template(inode, __track_dentry_update, + ret = ext4_fc_track_template(handle, inode, __track_dentry_update, (void *)&args, 0); trace_ext4_fc_track_unlink(inode, dentry, ret); } -void ext4_fc_track_link(struct inode *inode, struct dentry *dentry) +void ext4_fc_track_unlink(handle_t *handle, struct dentry *dentry) +{ + __ext4_fc_track_unlink(handle, d_inode(dentry), dentry); +} + +void __ext4_fc_track_link(handle_t *handle, + struct inode *inode, struct dentry *dentry) { struct __track_dentry_update_args args; int ret; @@ -443,20 +449,26 @@ void ext4_fc_track_link(struct inode *inode, struct dentry *dentry) args.dentry = dentry; args.op = EXT4_FC_TAG_LINK; - ret = ext4_fc_track_template(inode, __track_dentry_update, + ret = ext4_fc_track_template(handle, inode, __track_dentry_update, (void *)&args, 0); trace_ext4_fc_track_link(inode, dentry, ret); } -void ext4_fc_track_create(struct inode *inode, struct dentry *dentry) +void ext4_fc_track_link(handle_t *handle, struct dentry *dentry) +{ + __ext4_fc_track_link(handle, d_inode(dentry), dentry); +} + +void ext4_fc_track_create(handle_t *handle, struct dentry *dentry) { struct __track_dentry_update_args args; + struct inode *inode = d_inode(dentry); int ret; args.dentry = dentry; args.op = EXT4_FC_TAG_CREAT; - ret = ext4_fc_track_template(inode, __track_dentry_update, + ret = ext4_fc_track_template(handle, inode, __track_dentry_update, (void *)&args, 0); trace_ext4_fc_track_create(inode, dentry, ret); } @@ -472,14 +484,14 @@ static int __track_inode(struct inode *inode, void *arg, bool update) return 0; } -void ext4_fc_track_inode(struct inode *inode) +void ext4_fc_track_inode(handle_t *handle, struct inode *inode) { int ret; if (S_ISDIR(inode->i_mode)) return; - ret = ext4_fc_track_template(inode, __track_inode, NULL, 1); + ret = ext4_fc_track_template(handle, inode, __track_inode, NULL, 1); trace_ext4_fc_track_inode(inode, ret); } @@ -515,7 +527,7 @@ static int __track_range(struct inode *inode, void *arg, bool update) return 0; } -void ext4_fc_track_range(struct inode *inode, ext4_lblk_t start, +void ext4_fc_track_range(handle_t *handle, struct inode *inode, ext4_lblk_t start, ext4_lblk_t end) { struct __track_range_args args; @@ -527,7 +539,7 @@ void ext4_fc_track_range(struct inode *inode, ext4_lblk_t start, args.start = start; args.end = end; - ret = ext4_fc_track_template(inode, __track_range, &args, 1); + ret = ext4_fc_track_template(handle, inode, __track_range, &args, 1); trace_ext4_fc_track_range(inode, start, end, ret); } @@ -1263,7 +1275,7 @@ static int ext4_fc_replay_unlink(struct super_block *sb, struct ext4_fc_tl *tl) return 0; } - ret = __ext4_unlink(old_parent, &entry, inode); + ret = __ext4_unlink(NULL, old_parent, &entry, inode); /* -ENOENT ok coz it might not exist anymore. */ if (ret == -ENOENT) ret = 0; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 081b6a86b5db..87120c4c44f3 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -732,7 +732,7 @@ out_sem: if (ret) return ret; } - ext4_fc_track_range(inode, map->m_lblk, + ext4_fc_track_range(handle, inode, map->m_lblk, map->m_lblk + map->m_len - 1); } @@ -4111,7 +4111,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) up_write(&EXT4_I(inode)->i_data_sem); } - ext4_fc_track_range(inode, first_block, stop_block); + ext4_fc_track_range(handle, inode, first_block, stop_block); if (IS_SYNC(inode)) ext4_handle_sync(handle); @@ -5444,14 +5444,14 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) } if (shrink) - ext4_fc_track_range(inode, + ext4_fc_track_range(handle, inode, (attr->ia_size > 0 ? attr->ia_size - 1 : 0) >> inode->i_sb->s_blocksize_bits, (oldsize > 0 ? oldsize - 1 : 0) >> inode->i_sb->s_blocksize_bits); else ext4_fc_track_range( - inode, + handle, inode, (oldsize > 0 ? oldsize - 1 : oldsize) >> inode->i_sb->s_blocksize_bits, (attr->ia_size > 0 ? attr->ia_size - 1 : 0) >> @@ -5701,7 +5701,7 @@ int ext4_mark_iloc_dirty(handle_t *handle, put_bh(iloc->bh); return -EIO; } - ext4_fc_track_inode(inode); + ext4_fc_track_inode(handle, inode); if (IS_I_VERSION(inode)) inode_inc_iversion(inode); diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index f458d1d81d96..33509266f5a0 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2606,7 +2606,7 @@ static int ext4_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { handle_t *handle; - struct inode *inode, *inode_save; + struct inode *inode; int err, credits, retries = 0; err = dquot_initialize(dir); @@ -2624,11 +2624,9 @@ retry: inode->i_op = &ext4_file_inode_operations; inode->i_fop = &ext4_file_operations; ext4_set_aops(inode); - inode_save = inode; - ihold(inode_save); err = ext4_add_nondir(handle, dentry, &inode); - ext4_fc_track_create(inode_save, dentry); - iput(inode_save); + if (!err) + ext4_fc_track_create(handle, dentry); } if (handle) ext4_journal_stop(handle); @@ -2643,7 +2641,7 @@ static int ext4_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) { handle_t *handle; - struct inode *inode, *inode_save; + struct inode *inode; int err, credits, retries = 0; err = dquot_initialize(dir); @@ -2660,12 +2658,9 @@ retry: if (!IS_ERR(inode)) { init_special_inode(inode, inode->i_mode, rdev); inode->i_op = &ext4_special_inode_operations; - inode_save = inode; - ihold(inode_save); err = ext4_add_nondir(handle, dentry, &inode); if (!err) - ext4_fc_track_create(inode_save, dentry); - iput(inode_save); + ext4_fc_track_create(handle, dentry); } if (handle) ext4_journal_stop(handle); @@ -2829,7 +2824,6 @@ out_clear_inode: iput(inode); goto out_retry; } - ext4_fc_track_create(inode, dentry); ext4_inc_count(dir); ext4_update_dx_flag(dir); @@ -2837,6 +2831,7 @@ out_clear_inode: if (err) goto out_clear_inode; d_instantiate_new(dentry, inode); + ext4_fc_track_create(handle, dentry); if (IS_DIRSYNC(dir)) ext4_handle_sync(handle); @@ -3171,7 +3166,7 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry) goto end_rmdir; ext4_dec_count(dir); ext4_update_dx_flag(dir); - ext4_fc_track_unlink(inode, dentry); + ext4_fc_track_unlink(handle, dentry); retval = ext4_mark_inode_dirty(handle, dir); #ifdef CONFIG_UNICODE @@ -3192,13 +3187,12 @@ end_rmdir: return retval; } -int __ext4_unlink(struct inode *dir, const struct qstr *d_name, +int __ext4_unlink(handle_t *handle, struct inode *dir, const struct qstr *d_name, struct inode *inode) { int retval = -ENOENT; struct buffer_head *bh; struct ext4_dir_entry_2 *de; - handle_t *handle = NULL; int skip_remove_dentry = 0; bh = ext4_find_entry(dir, d_name, &de, NULL); @@ -3217,14 +3211,7 @@ int __ext4_unlink(struct inode *dir, const struct qstr *d_name, if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) skip_remove_dentry = 1; else - goto out_bh; - } - - handle = ext4_journal_start(dir, EXT4_HT_DIR, - EXT4_DATA_TRANS_BLOCKS(dir->i_sb)); - if (IS_ERR(handle)) { - retval = PTR_ERR(handle); - goto out_bh; + goto out; } if (IS_DIRSYNC(dir)) @@ -3233,12 +3220,12 @@ int __ext4_unlink(struct inode *dir, const struct qstr *d_name, if (!skip_remove_dentry) { retval = ext4_delete_entry(handle, dir, de, bh); if (retval) - goto out_handle; + goto out; dir->i_ctime = dir->i_mtime = current_time(dir); ext4_update_dx_flag(dir); retval = ext4_mark_inode_dirty(handle, dir); if (retval) - goto out_handle; + goto out; } else { retval = 0; } @@ -3252,15 +3239,14 @@ int __ext4_unlink(struct inode *dir, const struct qstr *d_name, inode->i_ctime = current_time(inode); retval = ext4_mark_inode_dirty(handle, inode); -out_handle: - ext4_journal_stop(handle); -out_bh: +out: brelse(bh); return retval; } static int ext4_unlink(struct inode *dir, struct dentry *dentry) { + handle_t *handle; int retval; if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb)))) @@ -3278,9 +3264,16 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry) if (retval) goto out_trace; - retval = __ext4_unlink(dir, &dentry->d_name, d_inode(dentry)); + handle = ext4_journal_start(dir, EXT4_HT_DIR, + EXT4_DATA_TRANS_BLOCKS(dir->i_sb)); + if (IS_ERR(handle)) { + retval = PTR_ERR(handle); + goto out_trace; + } + + retval = __ext4_unlink(handle, dir, &dentry->d_name, d_inode(dentry)); if (!retval) - ext4_fc_track_unlink(d_inode(dentry), dentry); + ext4_fc_track_unlink(handle, dentry); #ifdef CONFIG_UNICODE /* VFS negative dentries are incompatible with Encoding and * Case-insensitiveness. Eventually we'll want avoid @@ -3291,6 +3284,8 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry) if (IS_CASEFOLDED(dir)) d_invalidate(dentry); #endif + if (handle) + ext4_journal_stop(handle); out_trace: trace_ext4_unlink_exit(dentry, retval); @@ -3447,7 +3442,6 @@ retry: err = ext4_add_entry(handle, dentry, inode); if (!err) { - ext4_fc_track_link(inode, dentry); err = ext4_mark_inode_dirty(handle, inode); /* this can happen only for tmpfile being * linked the first time @@ -3455,6 +3449,7 @@ retry: if (inode->i_nlink == 1) ext4_orphan_del(handle, inode); d_instantiate(dentry, inode); + ext4_fc_track_link(handle, dentry); } else { drop_nlink(inode); iput(inode); @@ -3915,9 +3910,9 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, EXT4_FC_REASON_RENAME_DIR); } else { if (new.inode) - ext4_fc_track_unlink(new.inode, new.dentry); - ext4_fc_track_link(old.inode, new.dentry); - ext4_fc_track_unlink(old.inode, old.dentry); + ext4_fc_track_unlink(handle, new.dentry); + __ext4_fc_track_link(handle, old.inode, new.dentry); + __ext4_fc_track_unlink(handle, old.inode, old.dentry); } if (new.inode) { From ede7dc7fa0af619afc08995776eadb9ff3b0a711 Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Thu, 5 Nov 2020 19:58:54 -0800 Subject: [PATCH 174/710] jbd2: rename j_maxlen to j_total_len and add jbd2_journal_max_txn_bufs The on-disk superblock field sb->s_maxlen represents the total size of the journal including the fast commit area and is no more the max number of blocks available for a transaction. The maximum number of blocks available to a transaction is reduced by the number of fast commit blocks. So, this patch renames j_maxlen to j_total_len to better represent its intent. Also, it adds a function to calculate max number of bufs available for a transaction. Suggested-by: Jan Kara Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20201106035911.1942128-6-harshadshirwadkar@gmail.com Signed-off-by: Theodore Ts'o --- fs/ext4/fsmap.c | 2 +- fs/ext4/super.c | 2 +- fs/jbd2/commit.c | 2 +- fs/jbd2/journal.c | 12 ++++++------ fs/jbd2/recovery.c | 6 +++--- fs/ocfs2/journal.c | 2 +- include/linux/jbd2.h | 9 +++++++-- 7 files changed, 20 insertions(+), 15 deletions(-) diff --git a/fs/ext4/fsmap.c b/fs/ext4/fsmap.c index b232c2767534..4c2a9fe30067 100644 --- a/fs/ext4/fsmap.c +++ b/fs/ext4/fsmap.c @@ -280,7 +280,7 @@ static int ext4_getfsmap_logdev(struct super_block *sb, struct ext4_fsmap *keys, /* Fabricate an rmap entry for the external log device. */ irec.fmr_physical = journal->j_blk_offset; - irec.fmr_length = journal->j_maxlen; + irec.fmr_length = journal->j_total_len; irec.fmr_owner = EXT4_FMR_OWN_LOG; irec.fmr_flags = 0; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 738a6dd4957d..8a6dd433bb70 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3976,7 +3976,7 @@ int ext4_calculate_overhead(struct super_block *sb) * loaded or not */ if (sbi->s_journal && !sbi->s_journal_bdev) - overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_maxlen); + overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_total_len); else if (ext4_has_feature_journal(sb) && !sbi->s_journal && j_inum) { /* j_inum for internal journal is non-zero */ j_inode = ext4_get_journal_inode(sb, j_inum); diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index fa688e163a80..ec516490cb35 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -801,7 +801,7 @@ start_journal_io: if (first_block < journal->j_tail) freed += journal->j_last - journal->j_first; /* Update tail only if we free significant amount of space */ - if (freed < journal->j_maxlen / 4) + if (freed < jbd2_journal_get_max_txn_bufs(journal)) update_tail = 0; } J_ASSERT(commit_transaction->t_state == T_COMMIT); diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 0c7c42bd530f..c3c768248527 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1348,7 +1348,7 @@ static journal_t *journal_init_common(struct block_device *bdev, journal->j_dev = bdev; journal->j_fs_dev = fs_dev; journal->j_blk_offset = start; - journal->j_maxlen = len; + journal->j_total_len = len; /* We need enough buffers to write out full descriptor block. */ n = journal->j_blocksize / jbd2_min_tag_size(); journal->j_wbufsize = n; @@ -1531,7 +1531,7 @@ static int journal_reset(journal_t *journal) journal->j_commit_sequence = journal->j_transaction_sequence - 1; journal->j_commit_request = journal->j_commit_sequence; - journal->j_max_transaction_buffers = journal->j_maxlen / 4; + journal->j_max_transaction_buffers = jbd2_journal_get_max_txn_bufs(journal); /* * As a special case, if the on-disk copy is already marked as needing @@ -1792,15 +1792,15 @@ static int journal_get_superblock(journal_t *journal) goto out; } - if (be32_to_cpu(sb->s_maxlen) < journal->j_maxlen) - journal->j_maxlen = be32_to_cpu(sb->s_maxlen); - else if (be32_to_cpu(sb->s_maxlen) > journal->j_maxlen) { + if (be32_to_cpu(sb->s_maxlen) < journal->j_total_len) + journal->j_total_len = be32_to_cpu(sb->s_maxlen); + else if (be32_to_cpu(sb->s_maxlen) > journal->j_total_len) { printk(KERN_WARNING "JBD2: journal file too short\n"); goto out; } if (be32_to_cpu(sb->s_first) == 0 || - be32_to_cpu(sb->s_first) >= journal->j_maxlen) { + be32_to_cpu(sb->s_first) >= journal->j_total_len) { printk(KERN_WARNING "JBD2: Invalid start block of journal: %u\n", be32_to_cpu(sb->s_first)); diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index eb2606133cd8..dc0694fcfcd1 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c @@ -74,8 +74,8 @@ static int do_readahead(journal_t *journal, unsigned int start) /* Do up to 128K of readahead */ max = start + (128 * 1024 / journal->j_blocksize); - if (max > journal->j_maxlen) - max = journal->j_maxlen; + if (max > journal->j_total_len) + max = journal->j_total_len; /* Do the readahead itself. We'll submit MAXBUF buffer_heads at * a time to the block device IO layer. */ @@ -134,7 +134,7 @@ static int jread(struct buffer_head **bhp, journal_t *journal, *bhp = NULL; - if (offset >= journal->j_maxlen) { + if (offset >= journal->j_total_len) { printk(KERN_ERR "JBD2: corrupted journal superblock\n"); return -EFSCORRUPTED; } diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index b9a9d69dde7e..db52e843002a 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -877,7 +877,7 @@ int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty) goto done; } - trace_ocfs2_journal_init_maxlen(j_journal->j_maxlen); + trace_ocfs2_journal_init_maxlen(j_journal->j_total_len); *dirty = (le32_to_cpu(di->id1.journal1.ij_flags) & OCFS2_JOURNAL_DIRTY_FL); diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 1d5566af48ac..e0b6b53eae64 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -988,9 +988,9 @@ struct journal_s struct block_device *j_fs_dev; /** - * @j_maxlen: Total maximum capacity of the journal region on disk. + * @j_total_len: Total maximum capacity of the journal region on disk. */ - unsigned int j_maxlen; + unsigned int j_total_len; /** * @j_reserved_credits: @@ -1624,6 +1624,11 @@ int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode); int jbd2_fc_wait_bufs(journal_t *journal, int num_blks); int jbd2_fc_release_bufs(journal_t *journal); +static inline int jbd2_journal_get_max_txn_bufs(journal_t *journal) +{ + return (journal->j_total_len - journal->j_fc_wbufsize) / 4; +} + /* * is_journal_abort * From a1e5e465b31d6015fccb359d99053b39e5180466 Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Thu, 5 Nov 2020 19:58:55 -0800 Subject: [PATCH 175/710] ext4: clean up the JBD2 API that initializes fast commits This patch removes jbd2_fc_init() API and its related functions to simplify enabling fast commits. With this change, the number of fast commit blocks to use is solely determined by the JBD2 layer. So, we move the default value for minimum number of fast commit blocks from ext4/fast_commit.h to include/linux/jbd2.h. However, whether or not to use fast commits is determined by the file system. The file system just sets the fast commit feature using jbd2_journal_set_features(). JBD2 layer then determines how many blocks to use for fast commits (based on the value found in the JBD2 superblock). Note that the JBD2 feature flag of fast commits is just an indication that there are fast commit blocks present on disk. It doesn't tell JBD2 layer about the intent of the file system of whether to it wants to use fast commit or not. That's why, we blindly clear the fast commit flag in journal_reset() after the recovery is done. Suggested-by: Jan Kara Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20201106035911.1942128-7-harshadshirwadkar@gmail.com Signed-off-by: Theodore Ts'o --- fs/ext4/fast_commit.c | 14 ------- fs/ext4/fast_commit.h | 3 -- fs/ext4/super.c | 8 ++++ fs/jbd2/journal.c | 96 +++++++++++++++++++++++++------------------ include/linux/jbd2.h | 2 +- 5 files changed, 65 insertions(+), 58 deletions(-) diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 9399e9cccb7e..bab60c5d5095 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -2091,8 +2091,6 @@ static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh, void ext4_fc_init(struct super_block *sb, journal_t *journal) { - int num_fc_blocks; - /* * We set replay callback even if fast commit disabled because we may * could still have fast commit blocks that need to be replayed even if @@ -2102,18 +2100,6 @@ void ext4_fc_init(struct super_block *sb, journal_t *journal) if (!test_opt2(sb, JOURNAL_FAST_COMMIT)) return; journal->j_fc_cleanup_callback = ext4_fc_cleanup; - if (!buffer_uptodate(journal->j_sb_buffer) - && ext4_read_bh_lock(journal->j_sb_buffer, REQ_META | REQ_PRIO, - true)) { - ext4_msg(sb, KERN_ERR, "I/O error on journal"); - return; - } - num_fc_blocks = be32_to_cpu(journal->j_superblock->s_num_fc_blks); - if (jbd2_fc_init(journal, num_fc_blocks ? num_fc_blocks : - EXT4_NUM_FC_BLKS)) { - pr_warn("Error while enabling fast commits, turning off."); - ext4_clear_feature_fast_commit(sb); - } } const char *fc_ineligible_reasons[] = { diff --git a/fs/ext4/fast_commit.h b/fs/ext4/fast_commit.h index 140fbb6af19e..1d96e0ac8138 100644 --- a/fs/ext4/fast_commit.h +++ b/fs/ext4/fast_commit.h @@ -3,9 +3,6 @@ #ifndef __FAST_COMMIT_H__ #define __FAST_COMMIT_H__ -/* Number of blocks in journal area to allocate for fast commits */ -#define EXT4_NUM_FC_BLKS 256 - /* Fast commit tags */ #define EXT4_FC_TAG_ADD_RANGE 0x0001 #define EXT4_FC_TAG_DEL_RANGE 0x0002 diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 8a6dd433bb70..ba02d7c86fb3 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4857,6 +4857,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount_wq; } + if (test_opt2(sb, JOURNAL_FAST_COMMIT) && + !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0, + JBD2_FEATURE_INCOMPAT_FAST_COMMIT)) { + ext4_msg(sb, KERN_ERR, + "Failed to set fast commit journal feature"); + goto failed_mount_wq; + } + /* We have now updated the journal if required, so we can * validate the data journaling mode. */ switch (test_opt(sb, DATA_FLAGS)) { diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index c3c768248527..500152f0421a 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1352,19 +1352,12 @@ static journal_t *journal_init_common(struct block_device *bdev, /* We need enough buffers to write out full descriptor block. */ n = journal->j_blocksize / jbd2_min_tag_size(); journal->j_wbufsize = n; + journal->j_fc_wbuf = NULL; journal->j_wbuf = kmalloc_array(n, sizeof(struct buffer_head *), GFP_KERNEL); if (!journal->j_wbuf) goto err_cleanup; - if (journal->j_fc_wbufsize > 0) { - journal->j_fc_wbuf = kmalloc_array(journal->j_fc_wbufsize, - sizeof(struct buffer_head *), - GFP_KERNEL); - if (!journal->j_fc_wbuf) - goto err_cleanup; - } - bh = getblk_unmovable(journal->j_dev, start, journal->j_blocksize); if (!bh) { pr_err("%s: Cannot get buffer for journal superblock\n", @@ -1378,23 +1371,11 @@ static journal_t *journal_init_common(struct block_device *bdev, err_cleanup: kfree(journal->j_wbuf); - kfree(journal->j_fc_wbuf); jbd2_journal_destroy_revoke(journal); kfree(journal); return NULL; } -int jbd2_fc_init(journal_t *journal, int num_fc_blks) -{ - journal->j_fc_wbufsize = num_fc_blks; - journal->j_fc_wbuf = kmalloc_array(journal->j_fc_wbufsize, - sizeof(struct buffer_head *), GFP_KERNEL); - if (!journal->j_fc_wbuf) - return -ENOMEM; - return 0; -} -EXPORT_SYMBOL(jbd2_fc_init); - /* jbd2_journal_init_dev and jbd2_journal_init_inode: * * Create a journal structure assigned some fixed set of disk blocks to @@ -1512,16 +1493,7 @@ static int journal_reset(journal_t *journal) } journal->j_first = first; - - if (jbd2_has_feature_fast_commit(journal) && - journal->j_fc_wbufsize > 0) { - journal->j_fc_last = last; - journal->j_last = last - journal->j_fc_wbufsize; - journal->j_fc_first = journal->j_last + 1; - journal->j_fc_off = 0; - } else { - journal->j_last = last; - } + journal->j_last = last; journal->j_head = journal->j_first; journal->j_tail = journal->j_first; @@ -1533,6 +1505,13 @@ static int journal_reset(journal_t *journal) journal->j_max_transaction_buffers = jbd2_journal_get_max_txn_bufs(journal); + /* + * Now that journal recovery is done, turn fast commits off here. This + * way, if fast commit was enabled before the crash but if now FS has + * disabled it, we don't enable fast commits. + */ + jbd2_clear_feature_fast_commit(journal); + /* * As a special case, if the on-disk copy is already marked as needing * no recovery (s_start == 0), then we can safely defer the superblock @@ -1872,6 +1851,7 @@ static int load_superblock(journal_t *journal) { int err; journal_superblock_t *sb; + int num_fc_blocks; err = journal_get_superblock(journal); if (err) @@ -1883,15 +1863,17 @@ static int load_superblock(journal_t *journal) journal->j_tail = be32_to_cpu(sb->s_start); journal->j_first = be32_to_cpu(sb->s_first); journal->j_errno = be32_to_cpu(sb->s_errno); + journal->j_last = be32_to_cpu(sb->s_maxlen); - if (jbd2_has_feature_fast_commit(journal) && - journal->j_fc_wbufsize > 0) { + if (jbd2_has_feature_fast_commit(journal)) { journal->j_fc_last = be32_to_cpu(sb->s_maxlen); - journal->j_last = journal->j_fc_last - journal->j_fc_wbufsize; + num_fc_blocks = be32_to_cpu(sb->s_num_fc_blks); + if (!num_fc_blocks) + num_fc_blocks = JBD2_MIN_FC_BLOCKS; + if (journal->j_last - num_fc_blocks >= JBD2_MIN_JOURNAL_BLOCKS) + journal->j_last = journal->j_fc_last - num_fc_blocks; journal->j_fc_first = journal->j_last + 1; journal->j_fc_off = 0; - } else { - journal->j_last = be32_to_cpu(sb->s_maxlen); } return 0; @@ -1954,9 +1936,6 @@ int jbd2_journal_load(journal_t *journal) */ journal->j_flags &= ~JBD2_ABORT; - if (journal->j_fc_wbufsize > 0) - jbd2_journal_set_features(journal, 0, 0, - JBD2_FEATURE_INCOMPAT_FAST_COMMIT); /* OK, we've finished with the dynamic journal bits: * reinitialise the dynamic contents of the superblock in memory * and reset them on disk. */ @@ -2040,8 +2019,7 @@ int jbd2_journal_destroy(journal_t *journal) jbd2_journal_destroy_revoke(journal); if (journal->j_chksum_driver) crypto_free_shash(journal->j_chksum_driver); - if (journal->j_fc_wbufsize > 0) - kfree(journal->j_fc_wbuf); + kfree(journal->j_fc_wbuf); kfree(journal->j_wbuf); kfree(journal); @@ -2116,6 +2094,37 @@ int jbd2_journal_check_available_features(journal_t *journal, unsigned long comp return 0; } +static int +jbd2_journal_initialize_fast_commit(journal_t *journal) +{ + journal_superblock_t *sb = journal->j_superblock; + unsigned long long num_fc_blks; + + num_fc_blks = be32_to_cpu(sb->s_num_fc_blks); + if (num_fc_blks == 0) + num_fc_blks = JBD2_MIN_FC_BLOCKS; + if (journal->j_last - num_fc_blks < JBD2_MIN_JOURNAL_BLOCKS) + return -ENOSPC; + + /* Are we called twice? */ + WARN_ON(journal->j_fc_wbuf != NULL); + journal->j_fc_wbuf = kmalloc_array(num_fc_blks, + sizeof(struct buffer_head *), GFP_KERNEL); + if (!journal->j_fc_wbuf) + return -ENOMEM; + + journal->j_fc_wbufsize = num_fc_blks; + journal->j_fc_last = journal->j_last; + journal->j_last = journal->j_fc_last - num_fc_blks; + journal->j_fc_first = journal->j_last + 1; + journal->j_fc_off = 0; + journal->j_free = journal->j_last - journal->j_first; + journal->j_max_transaction_buffers = + jbd2_journal_get_max_txn_bufs(journal); + + return 0; +} + /** * int jbd2_journal_set_features() - Mark a given journal feature in the superblock * @journal: Journal to act on. @@ -2159,6 +2168,13 @@ int jbd2_journal_set_features(journal_t *journal, unsigned long compat, sb = journal->j_superblock; + if (incompat & JBD2_FEATURE_INCOMPAT_FAST_COMMIT) { + if (jbd2_journal_initialize_fast_commit(journal)) { + pr_err("JBD2: Cannot enable fast commits.\n"); + return 0; + } + } + /* Load the checksum driver if necessary */ if ((journal->j_chksum_driver == NULL) && INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V3)) { diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index e0b6b53eae64..b2caf7bbd8e5 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -68,6 +68,7 @@ extern void *jbd2_alloc(size_t size, gfp_t flags); extern void jbd2_free(void *ptr, size_t size); #define JBD2_MIN_JOURNAL_BLOCKS 1024 +#define JBD2_MIN_FC_BLOCKS 256 #ifdef __KERNEL__ @@ -1614,7 +1615,6 @@ extern void __jbd2_journal_drop_transaction(journal_t *, transaction_t *); extern int jbd2_cleanup_journal_tail(journal_t *); /* Fast commit related APIs */ -int jbd2_fc_init(journal_t *journal, int num_fc_blks); int jbd2_fc_begin_commit(journal_t *journal, tid_t tid); int jbd2_fc_end_commit(journal_t *journal); int jbd2_fc_end_commit_fallback(journal_t *journal, tid_t tid); From 37e0a30e94f1aa25f16b403dfabb64e0b806de0b Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Thu, 5 Nov 2020 19:58:56 -0800 Subject: [PATCH 176/710] jbd2: drop jbd2_fc_init documentation Now that jbd2_fc_init is dropped, drop its docs too. Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20201106035911.1942128-8-harshadshirwadkar@gmail.com Signed-off-by: Theodore Ts'o --- Documentation/filesystems/journalling.rst | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Documentation/filesystems/journalling.rst b/Documentation/filesystems/journalling.rst index 5a5f70b4063e..e18f90ffc6fd 100644 --- a/Documentation/filesystems/journalling.rst +++ b/Documentation/filesystems/journalling.rst @@ -136,10 +136,8 @@ Fast commits ~~~~~~~~~~~~ JBD2 to also allows you to perform file-system specific delta commits known as -fast commits. In order to use fast commits, you first need to call -:c:func:`jbd2_fc_init` and tell how many blocks at the end of journal -area should be reserved for fast commits. Along with that, you will also need -to set following callbacks that perform correspodning work: +fast commits. In order to use fast commits, you will need to set following +callbacks that perform correspodning work: `journal->j_fc_cleanup_cb`: Cleanup function called after every full commit and fast commit. From c460e5edc85a063ec9cb60addff93d00ed378701 Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Thu, 5 Nov 2020 19:58:57 -0800 Subject: [PATCH 177/710] jbd2: don't use state lock during commit path Variables journal->j_fc_off, journal->j_fc_wbuf are accessed during commit path. Since today we allow only one process to perform a fast commit, there is no need take state lock before accessing these variables. This patch removes these locks and adds comments to describe this. Suggested-by: Jan Kara Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20201106035911.1942128-9-harshadshirwadkar@gmail.com Signed-off-by: Theodore Ts'o --- fs/jbd2/journal.c | 6 ------ include/linux/jbd2.h | 10 ++++++---- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 500152f0421a..778ea50fc8d1 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -865,7 +865,6 @@ int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out) int fc_off; *bh_out = NULL; - write_lock(&journal->j_state_lock); if (journal->j_fc_off + journal->j_fc_first < journal->j_fc_last) { fc_off = journal->j_fc_off; @@ -874,7 +873,6 @@ int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out) } else { ret = -EINVAL; } - write_unlock(&journal->j_state_lock); if (ret) return ret; @@ -909,9 +907,7 @@ int jbd2_fc_wait_bufs(journal_t *journal, int num_blks) struct buffer_head *bh; int i, j_fc_off; - read_lock(&journal->j_state_lock); j_fc_off = journal->j_fc_off; - read_unlock(&journal->j_state_lock); /* * Wait in reverse order to minimize chances of us being woken up before @@ -939,9 +935,7 @@ int jbd2_fc_release_bufs(journal_t *journal) struct buffer_head *bh; int i, j_fc_off; - read_lock(&journal->j_state_lock); j_fc_off = journal->j_fc_off; - read_unlock(&journal->j_state_lock); /* * Wait in reverse order to minimize chances of us being woken up before diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index b2caf7bbd8e5..5f0ef6380b0c 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -945,8 +945,9 @@ struct journal_s /** * @j_fc_off: * - * Number of fast commit blocks currently allocated. - * [j_state_lock]. + * Number of fast commit blocks currently allocated. Accessed only + * during fast commit. Currently only process can do fast commit, so + * this field is not protected by any lock. */ unsigned long j_fc_off; @@ -1109,8 +1110,9 @@ struct journal_s struct buffer_head **j_wbuf; /** - * @j_fc_wbuf: Array of fast commit bhs for - * jbd2_journal_commit_transaction. + * @j_fc_wbuf: Array of fast commit bhs for fast commit. Accessed only + * during a fast commit. Currently only process can do fast commit, so + * this field is not protected by any lock. */ struct buffer_head **j_fc_wbuf; From 0bce577bf9cae13ae32d391432d0030e3f67fc1d Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Thu, 5 Nov 2020 19:58:58 -0800 Subject: [PATCH 178/710] jbd2: don't pass tid to jbd2_fc_end_commit_fallback() In jbd2_fc_end_commit_fallback(), we know which tid to commit. There's no need for caller to pass it. Suggested-by: Jan Kara Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20201106035911.1942128-10-harshadshirwadkar@gmail.com Signed-off-by: Theodore Ts'o --- fs/ext4/fast_commit.c | 2 +- fs/jbd2/journal.c | 12 +++++++++--- include/linux/jbd2.h | 2 +- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index bab60c5d5095..e69c580fa91e 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -1143,7 +1143,7 @@ out: "Fast commit ended with blks = %d, reason = %d, subtid - %d", nblks, reason, subtid); if (reason == EXT4_FC_REASON_FC_FAILED) - return jbd2_fc_end_commit_fallback(journal, commit_tid); + return jbd2_fc_end_commit_fallback(journal); if (reason == EXT4_FC_REASON_FC_START_FAILED || reason == EXT4_FC_REASON_INELIGIBLE) return jbd2_complete_transaction(journal, commit_tid); diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 778ea50fc8d1..59166e299cde 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -777,13 +777,19 @@ static int __jbd2_fc_end_commit(journal_t *journal, tid_t tid, bool fallback) int jbd2_fc_end_commit(journal_t *journal) { - return __jbd2_fc_end_commit(journal, 0, 0); + return __jbd2_fc_end_commit(journal, 0, false); } EXPORT_SYMBOL(jbd2_fc_end_commit); -int jbd2_fc_end_commit_fallback(journal_t *journal, tid_t tid) +int jbd2_fc_end_commit_fallback(journal_t *journal) { - return __jbd2_fc_end_commit(journal, tid, 1); + tid_t tid; + + read_lock(&journal->j_state_lock); + tid = journal->j_running_transaction ? + journal->j_running_transaction->t_tid : 0; + read_unlock(&journal->j_state_lock); + return __jbd2_fc_end_commit(journal, tid, true); } EXPORT_SYMBOL(jbd2_fc_end_commit_fallback); diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 5f0ef6380b0c..1c49fd62ff2e 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1619,7 +1619,7 @@ extern int jbd2_cleanup_journal_tail(journal_t *); /* Fast commit related APIs */ int jbd2_fc_begin_commit(journal_t *journal, tid_t tid); int jbd2_fc_end_commit(journal_t *journal); -int jbd2_fc_end_commit_fallback(journal_t *journal, tid_t tid); +int jbd2_fc_end_commit_fallback(journal_t *journal); int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out); int jbd2_submit_inode_data(struct jbd2_inode *jinode); int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode); From cc80586a57f704f806b9a1b99a21cd07e37dbedc Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Thu, 5 Nov 2020 19:58:59 -0800 Subject: [PATCH 179/710] jbd2: add todo for a fast commit performance optimization Fast commit performance can be optimized if commit thread doesn't wait for ongoing fast commits to complete until the transaction enters T_FLUSH state. Document this optimization. Suggested-by: Jan Kara Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20201106035911.1942128-11-harshadshirwadkar@gmail.com Signed-off-by: Theodore Ts'o --- fs/jbd2/commit.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index ec516490cb35..b121d7d434c6 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -450,6 +450,15 @@ void jbd2_journal_commit_transaction(journal_t *journal) schedule(); write_lock(&journal->j_state_lock); finish_wait(&journal->j_fc_wait, &wait); + /* + * TODO: by blocking fast commits here, we are increasing + * fsync() latency slightly. Strictly speaking, we don't need + * to block fast commits until the transaction enters T_FLUSH + * state. So an optimization is possible where we block new fast + * commits here and wait for existing ones to complete + * just before we enter T_FLUSH. That way, the existing fast + * commits and this full commit can proceed parallely. + */ } write_unlock(&journal->j_state_lock); From 0ee66ddcf3c1503a9bdb3e49a7a96c6e429ddfad Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Thu, 5 Nov 2020 19:59:00 -0800 Subject: [PATCH 180/710] jbd2: don't touch buffer state until it is filled Fast commit buffers should be filled in before toucing their state. Remove code that sets buffer state as dirty before the buffer is passed to the file system. Suggested-by: Jan Kara Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20201106035911.1942128-12-harshadshirwadkar@gmail.com Signed-off-by: Theodore Ts'o --- fs/jbd2/journal.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 59166e299cde..b5fbcd1b444c 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -891,11 +891,7 @@ int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out) if (!bh) return -ENOMEM; - lock_buffer(bh); - clear_buffer_uptodate(bh); - set_buffer_dirty(bh); - unlock_buffer(bh); journal->j_fc_wbuf[fc_off] = bh; *bh_out = bh; From 480f89d553260e7823920e687846877bebc8dca0 Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Thu, 5 Nov 2020 19:59:01 -0800 Subject: [PATCH 181/710] jbd2: don't read journal->j_commit_sequence without taking a lock Take journal state lock before reading journal->j_commit_sequence. Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20201106035911.1942128-13-harshadshirwadkar@gmail.com Signed-off-by: Theodore Ts'o --- fs/jbd2/journal.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index b5fbcd1b444c..f7ebf6ef69af 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -734,10 +734,12 @@ int jbd2_fc_begin_commit(journal_t *journal, tid_t tid) if (!journal->j_stats.ts_tid) return -EINVAL; - if (tid <= journal->j_commit_sequence) - return -EALREADY; - write_lock(&journal->j_state_lock); + if (tid <= journal->j_commit_sequence) { + write_unlock(&journal->j_state_lock); + return -EALREADY; + } + if (journal->j_flags & JBD2_FULL_COMMIT_ONGOING || (journal->j_flags & JBD2_FAST_COMMIT_ONGOING)) { DEFINE_WAIT(wait); From f6634e2609d13d7aa8852734e16300845db915d5 Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Thu, 5 Nov 2020 19:59:02 -0800 Subject: [PATCH 182/710] ext4: dedpulicate the code to wait on inode that's being committed This patch removes the deduplicates the code that implements waiting on inode that's being committed. That code is moved into a new function. Suggested-by: Jan Kara Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20201106035911.1942128-14-harshadshirwadkar@gmail.com Signed-off-by: Theodore Ts'o --- fs/ext4/fast_commit.c | 61 +++++++++++++++++++------------------------ 1 file changed, 27 insertions(+), 34 deletions(-) diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index e69c580fa91e..fc5a5e6a581d 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -155,6 +155,30 @@ void ext4_fc_init_inode(struct inode *inode) ei->i_fc_committed_subtid = 0; } +/* This function must be called with sbi->s_fc_lock held. */ +static void ext4_fc_wait_committing_inode(struct inode *inode) +{ + wait_queue_head_t *wq; + struct ext4_inode_info *ei = EXT4_I(inode); + +#if (BITS_PER_LONG < 64) + DEFINE_WAIT_BIT(wait, &ei->i_state_flags, + EXT4_STATE_FC_COMMITTING); + wq = bit_waitqueue(&ei->i_state_flags, + EXT4_STATE_FC_COMMITTING); +#else + DEFINE_WAIT_BIT(wait, &ei->i_flags, + EXT4_STATE_FC_COMMITTING); + wq = bit_waitqueue(&ei->i_flags, + EXT4_STATE_FC_COMMITTING); +#endif + lockdep_assert_held(&EXT4_SB(inode->i_sb)->s_fc_lock); + prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); + spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock); + schedule(); + finish_wait(wq, &wait.wq_entry); +} + /* * Inform Ext4's fast about start of an inode update * @@ -176,22 +200,7 @@ restart: goto out; if (ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) { - wait_queue_head_t *wq; -#if (BITS_PER_LONG < 64) - DEFINE_WAIT_BIT(wait, &ei->i_state_flags, - EXT4_STATE_FC_COMMITTING); - wq = bit_waitqueue(&ei->i_state_flags, - EXT4_STATE_FC_COMMITTING); -#else - DEFINE_WAIT_BIT(wait, &ei->i_flags, - EXT4_STATE_FC_COMMITTING); - wq = bit_waitqueue(&ei->i_flags, - EXT4_STATE_FC_COMMITTING); -#endif - prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); - spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock); - schedule(); - finish_wait(wq, &wait.wq_entry); + ext4_fc_wait_committing_inode(inode); goto restart; } out: @@ -234,26 +243,10 @@ restart: } if (ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) { - wait_queue_head_t *wq; -#if (BITS_PER_LONG < 64) - DEFINE_WAIT_BIT(wait, &ei->i_state_flags, - EXT4_STATE_FC_COMMITTING); - wq = bit_waitqueue(&ei->i_state_flags, - EXT4_STATE_FC_COMMITTING); -#else - DEFINE_WAIT_BIT(wait, &ei->i_flags, - EXT4_STATE_FC_COMMITTING); - wq = bit_waitqueue(&ei->i_flags, - EXT4_STATE_FC_COMMITTING); -#endif - prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); - spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock); - schedule(); - finish_wait(wq, &wait.wq_entry); + ext4_fc_wait_committing_inode(inode); goto restart; } - if (!list_empty(&ei->i_fc_list)) - list_del_init(&ei->i_fc_list); + list_del_init(&ei->i_fc_list); spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock); } From a740762fb3b36dbdddb63ebe65b71cea3014f1c3 Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Thu, 5 Nov 2020 19:59:03 -0800 Subject: [PATCH 183/710] ext4: fix code documentatioon Add a TODO to remember fixing REQ_FUA | REQ_PREFLUSH for fast commit buffers. Also, fix a typo in top level comment in fast_commit.c Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20201106035911.1942128-15-harshadshirwadkar@gmail.com Signed-off-by: Theodore Ts'o --- fs/ext4/fast_commit.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index fc5a5e6a581d..639b2a308c7b 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -83,7 +83,7 @@ * * Atomicity of commits * -------------------- - * In order to gaurantee atomicity during the commit operation, fast commit + * In order to guarantee atomicity during the commit operation, fast commit * uses "EXT4_FC_TAG_TAIL" tag that marks a fast commit as complete. Tail * tag contains CRC of the contents and TID of the transaction after which * this fast commit should be applied. Recovery code replays fast commit @@ -542,6 +542,7 @@ static void ext4_fc_submit_bh(struct super_block *sb) int write_flags = REQ_SYNC; struct buffer_head *bh = EXT4_SB(sb)->s_fc_bh; + /* TODO: REQ_FUA | REQ_PREFLUSH is unnecessarily expensive. */ if (test_opt(sb, BARRIER)) write_flags |= REQ_FUA | REQ_PREFLUSH; lock_buffer(bh); From 764b3fd31d131c4b8b5fa064aa94382091923aec Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Thu, 5 Nov 2020 19:59:04 -0800 Subject: [PATCH 184/710] ext4: mark buf dirty before submitting fast commit buffer Mark the fast commit buffer as dirty before submission. Suggested-by: Jan Kara Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20201106035911.1942128-16-harshadshirwadkar@gmail.com Signed-off-by: Theodore Ts'o --- fs/ext4/fast_commit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 639b2a308c7b..05e6e76a7663 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -546,7 +546,7 @@ static void ext4_fc_submit_bh(struct super_block *sb) if (test_opt(sb, BARRIER)) write_flags |= REQ_FUA | REQ_PREFLUSH; lock_buffer(bh); - clear_buffer_dirty(bh); + set_buffer_dirty(bh); set_buffer_uptodate(bh); bh->b_end_io = ext4_end_buffer_io_sync; submit_bh(REQ_OP_WRITE, write_flags, bh); From a3114fe747be42351ac1368bd3ad30f695e473a7 Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Thu, 5 Nov 2020 19:59:05 -0800 Subject: [PATCH 185/710] ext4: remove unnecessary fast commit calls from ext4_file_mmap Remove unnecessary calls to ext4_fc_start_update() and ext4_fc_stop_update() from ext4_file_mmap(). Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20201106035911.1942128-17-harshadshirwadkar@gmail.com Signed-off-by: Theodore Ts'o --- fs/ext4/file.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/ext4/file.c b/fs/ext4/file.c index d85412d12e3a..80ad5ccc0288 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -761,7 +761,6 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma) if (!daxdev_mapping_supported(vma, dax_dev)) return -EOPNOTSUPP; - ext4_fc_start_update(inode); file_accessed(file); if (IS_DAX(file_inode(file))) { vma->vm_ops = &ext4_dax_vm_ops; @@ -769,7 +768,6 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma) } else { vma->vm_ops = &ext4_file_vm_ops; } - ext4_fc_stop_update(inode); return 0; } From 1ceecb537f72734e4315638e7a1bb62e56c86fbf Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Thu, 5 Nov 2020 19:59:06 -0800 Subject: [PATCH 186/710] ext4: fix inode dirty check in case of fast commits In case of fast commits, determine if the inode is dirty by checking if the inode is on fast commit list. This also helps us get rid of ext4_inode_info.i_fc_committed_subtid field. Reported-by: Andrea Righi Tested-by: Andrea Righi Reviewed-by: Jan Kara Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20201106035911.1942128-18-harshadshirwadkar@gmail.com Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 3 --- fs/ext4/fast_commit.c | 3 --- fs/ext4/inode.c | 3 +-- 3 files changed, 1 insertion(+), 8 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index c6339bd75a93..2a89c5bbb61b 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1028,9 +1028,6 @@ struct ext4_inode_info { * protected by sbi->s_fc_lock. */ - /* Fast commit subtid when this inode was committed */ - unsigned int i_fc_committed_subtid; - /* Start of lblk range that needs to be committed in this fast commit */ ext4_lblk_t i_fc_lblk_start; diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 05e6e76a7663..6b963e09af2c 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -152,7 +152,6 @@ void ext4_fc_init_inode(struct inode *inode) INIT_LIST_HEAD(&ei->i_fc_list); init_waitqueue_head(&ei->i_fc_wait); atomic_set(&ei->i_fc_updates, 0); - ei->i_fc_committed_subtid = 0; } /* This function must be called with sbi->s_fc_lock held. */ @@ -1037,8 +1036,6 @@ static int ext4_fc_perform_commit(journal_t *journal) if (ret) goto out; spin_lock(&sbi->s_fc_lock); - EXT4_I(inode)->i_fc_committed_subtid = - atomic_read(&sbi->s_fc_subtid); } spin_unlock(&sbi->s_fc_lock); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 87120c4c44f3..000bf70e88ed 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3312,8 +3312,7 @@ static bool ext4_inode_datasync_dirty(struct inode *inode) EXT4_I(inode)->i_datasync_tid)) return false; if (test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT)) - return atomic_read(&EXT4_SB(inode->i_sb)->s_fc_subtid) < - EXT4_I(inode)->i_fc_committed_subtid; + return !list_empty(&EXT4_I(inode)->i_fc_list); return true; } From 556e0319fbb8eee3fa19fdcc27c8bcb4af1c7211 Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Thu, 5 Nov 2020 19:59:07 -0800 Subject: [PATCH 187/710] ext4: disable fast commit with data journalling Fast commits don't work with data journalling. This patch disables the fast commit support when data journalling is turned on. Suggested-by: Jan Kara Signed-off-by: Harshad Shirwadkar Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20201106035911.1942128-19-harshadshirwadkar@gmail.com Signed-off-by: Theodore Ts'o --- fs/ext4/fast_commit.c | 7 +++++++ fs/ext4/fast_commit.h | 1 + fs/ext4/super.c | 3 ++- include/trace/events/ext4.h | 6 ++++-- 4 files changed, 14 insertions(+), 3 deletions(-) diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 6b963e09af2c..fb9b4e9d82b2 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -483,6 +483,12 @@ void ext4_fc_track_inode(handle_t *handle, struct inode *inode) if (S_ISDIR(inode->i_mode)) return; + if (ext4_should_journal_data(inode)) { + ext4_fc_mark_ineligible(inode->i_sb, + EXT4_FC_REASON_INODE_JOURNAL_DATA); + return; + } + ret = ext4_fc_track_template(handle, inode, __track_inode, NULL, 1); trace_ext4_fc_track_inode(inode, ret); } @@ -2102,6 +2108,7 @@ const char *fc_ineligible_reasons[] = { "Resize", "Dir renamed", "Falloc range op", + "Data journalling", "FC Commit Failed" }; diff --git a/fs/ext4/fast_commit.h b/fs/ext4/fast_commit.h index 1d96e0ac8138..3a6e5a1fa1b8 100644 --- a/fs/ext4/fast_commit.h +++ b/fs/ext4/fast_commit.h @@ -102,6 +102,7 @@ enum { EXT4_FC_REASON_RESIZE, EXT4_FC_REASON_RENAME_DIR, EXT4_FC_REASON_FALLOC_RANGE, + EXT4_FC_REASON_INODE_JOURNAL_DATA, EXT4_FC_COMMIT_FAILED, EXT4_FC_REASON_MAX }; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index ba02d7c86fb3..9e2e06ed2c45 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4340,9 +4340,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) #endif if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { - printk_once(KERN_WARNING "EXT4-fs: Warning: mounting with data=journal disables delayed allocation, dioread_nolock, and O_DIRECT support!\n"); + printk_once(KERN_WARNING "EXT4-fs: Warning: mounting with data=journal disables delayed allocation, dioread_nolock, O_DIRECT and fast_commit support!\n"); /* can't mount with both data=journal and dioread_nolock. */ clear_opt(sb, DIOREAD_NOLOCK); + clear_opt2(sb, JOURNAL_FAST_COMMIT); if (test_opt2(sb, EXPLICIT_DELALLOC)) { ext4_msg(sb, KERN_ERR, "can't mount with " "both data=journal and delalloc"); diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 0f899d3b09d3..70ae5497b73a 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -104,7 +104,8 @@ TRACE_DEFINE_ENUM(ES_REFERENCED_B); { EXT4_FC_REASON_SWAP_BOOT, "SWAP_BOOT"}, \ { EXT4_FC_REASON_RESIZE, "RESIZE"}, \ { EXT4_FC_REASON_RENAME_DIR, "RENAME_DIR"}, \ - { EXT4_FC_REASON_FALLOC_RANGE, "FALLOC_RANGE"}) + { EXT4_FC_REASON_FALLOC_RANGE, "FALLOC_RANGE"}, \ + { EXT4_FC_REASON_INODE_JOURNAL_DATA, "INODE_JOURNAL_DATA"}) TRACE_EVENT(ext4_other_inode_update_time, TP_PROTO(struct inode *inode, ino_t orig_ino), @@ -2917,7 +2918,7 @@ TRACE_EVENT(ext4_fc_stats, ), TP_printk("dev %d:%d fc ineligible reasons:\n" - "%s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d; " + "%s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d; " "num_commits:%ld, ineligible: %ld, numblks: %ld", MAJOR(__entry->dev), MINOR(__entry->dev), FC_REASON_NAME_STAT(EXT4_FC_REASON_XATTR), @@ -2928,6 +2929,7 @@ TRACE_EVENT(ext4_fc_stats, FC_REASON_NAME_STAT(EXT4_FC_REASON_RESIZE), FC_REASON_NAME_STAT(EXT4_FC_REASON_RENAME_DIR), FC_REASON_NAME_STAT(EXT4_FC_REASON_FALLOC_RANGE), + FC_REASON_NAME_STAT(EXT4_FC_REASON_INODE_JOURNAL_DATA), __entry->sbi->s_fc_stats.fc_num_commits, __entry->sbi->s_fc_stats.fc_ineligible_commits, __entry->sbi->s_fc_stats.fc_numblks) From da0c5d2695265962f20099737348fcb3ff524d0f Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Thu, 5 Nov 2020 19:59:08 -0800 Subject: [PATCH 188/710] ext4: issue fsdev cache flush before starting fast commit If the journal dev is different from fsdev, issue a cache flush before committing fast commit blocks to disk. Suggested-by: Jan Kara Signed-off-by: Harshad Shirwadkar Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20201106035911.1942128-20-harshadshirwadkar@gmail.com Signed-off-by: Theodore Ts'o --- fs/ext4/fast_commit.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index fb9b4e9d82b2..ebe5f423f8f2 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -1007,6 +1007,13 @@ static int ext4_fc_perform_commit(journal_t *journal) if (ret) return ret; + /* + * If file system device is different from journal device, issue a cache + * flush before we start writing fast commit blocks. + */ + if (journal->j_fs_dev != journal->j_dev) + blkdev_issue_flush(journal->j_fs_dev, GFP_NOFS); + blk_start_plug(&plug); if (sbi->s_fc_bytes == 0) { /* From 9b5f6c9b83d912c63ef9fb486a052be79b06f8b0 Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Thu, 5 Nov 2020 19:59:09 -0800 Subject: [PATCH 189/710] ext4: make s_mount_flags modifications atomic Fast commit file system states are recorded in sbi->s_mount_flags. Fast commit expects these bit manipulations to be atomic. This patch adds helpers to make those modifications atomic. Suggested-by: Jan Kara Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20201106035911.1942128-21-harshadshirwadkar@gmail.com Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 40 +++++++++++++++++++++++++++++----------- fs/ext4/fast_commit.c | 18 +++++++++--------- fs/ext4/file.c | 4 ++-- fs/ext4/fsync.c | 2 +- fs/ext4/inode.c | 4 ++-- fs/ext4/mballoc.c | 4 ++-- fs/ext4/super.c | 14 +++++++------- 7 files changed, 52 insertions(+), 34 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 2a89c5bbb61b..1b399cafb15a 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1419,16 +1419,6 @@ struct ext4_super_block { #ifdef __KERNEL__ -/* - * run-time mount flags - */ -#define EXT4_MF_MNTDIR_SAMPLED 0x0001 -#define EXT4_MF_FS_ABORTED 0x0002 /* Fatal error detected */ -#define EXT4_MF_FC_INELIGIBLE 0x0004 /* Fast commit ineligible */ -#define EXT4_MF_FC_COMMITTING 0x0008 /* File system underoing a fast - * commit. - */ - #ifdef CONFIG_FS_ENCRYPTION #define DUMMY_ENCRYPTION_ENABLED(sbi) ((sbi)->s_dummy_enc_policy.policy != NULL) #else @@ -1463,7 +1453,7 @@ struct ext4_sb_info { struct buffer_head * __rcu *s_group_desc; unsigned int s_mount_opt; unsigned int s_mount_opt2; - unsigned int s_mount_flags; + unsigned long s_mount_flags; unsigned int s_def_mount_opt; ext4_fsblk_t s_sb_block; atomic64_t s_resv_clusters; @@ -1691,6 +1681,34 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) _v; \ }) +/* + * run-time mount flags + */ +enum { + EXT4_MF_MNTDIR_SAMPLED, + EXT4_MF_FS_ABORTED, /* Fatal error detected */ + EXT4_MF_FC_INELIGIBLE, /* Fast commit ineligible */ + EXT4_MF_FC_COMMITTING /* File system underoing a fast + * commit. + */ +}; + +static inline void ext4_set_mount_flag(struct super_block *sb, int bit) +{ + set_bit(bit, &EXT4_SB(sb)->s_mount_flags); +} + +static inline void ext4_clear_mount_flag(struct super_block *sb, int bit) +{ + clear_bit(bit, &EXT4_SB(sb)->s_mount_flags); +} + +static inline int ext4_test_mount_flag(struct super_block *sb, int bit) +{ + return test_bit(bit, &EXT4_SB(sb)->s_mount_flags); +} + + /* * Simulate_fail codes */ diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index ebe5f423f8f2..5cd6630ab1b9 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -261,7 +261,7 @@ void ext4_fc_mark_ineligible(struct super_block *sb, int reason) (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY)) return; - sbi->s_mount_flags |= EXT4_MF_FC_INELIGIBLE; + ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); WARN_ON(reason >= EXT4_FC_REASON_MAX); sbi->s_fc_stats.fc_ineligible_reason_count[reason]++; } @@ -294,14 +294,14 @@ void ext4_fc_stop_ineligible(struct super_block *sb) (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY)) return; - EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FC_INELIGIBLE; + ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); atomic_dec(&EXT4_SB(sb)->s_fc_ineligible_updates); } static inline int ext4_fc_is_ineligible(struct super_block *sb) { - return (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FC_INELIGIBLE) || - atomic_read(&EXT4_SB(sb)->s_fc_ineligible_updates); + return (ext4_test_mount_flag(sb, EXT4_MF_FC_INELIGIBLE) || + atomic_read(&EXT4_SB(sb)->s_fc_ineligible_updates)); } /* @@ -349,7 +349,7 @@ static int ext4_fc_track_template( spin_lock(&sbi->s_fc_lock); if (list_empty(&EXT4_I(inode)->i_fc_list)) list_add_tail(&EXT4_I(inode)->i_fc_list, - (sbi->s_mount_flags & EXT4_MF_FC_COMMITTING) ? + (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_COMMITTING)) ? &sbi->s_fc_q[FC_Q_STAGING] : &sbi->s_fc_q[FC_Q_MAIN]); spin_unlock(&sbi->s_fc_lock); @@ -402,7 +402,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update) node->fcd_name.len = dentry->d_name.len; spin_lock(&sbi->s_fc_lock); - if (sbi->s_mount_flags & EXT4_MF_FC_COMMITTING) + if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_COMMITTING)) list_add_tail(&node->fcd_list, &sbi->s_fc_dentry_q[FC_Q_STAGING]); else @@ -857,7 +857,7 @@ static int ext4_fc_submit_inode_data_all(journal_t *journal) int ret = 0; spin_lock(&sbi->s_fc_lock); - sbi->s_mount_flags |= EXT4_MF_FC_COMMITTING; + ext4_set_mount_flag(sb, EXT4_MF_FC_COMMITTING); list_for_each(pos, &sbi->s_fc_q[FC_Q_MAIN]) { ei = list_entry(pos, struct ext4_inode_info, i_fc_list); ext4_set_inode_state(&ei->vfs_inode, EXT4_STATE_FC_COMMITTING); @@ -1206,8 +1206,8 @@ static void ext4_fc_cleanup(journal_t *journal, int full) list_splice_init(&sbi->s_fc_q[FC_Q_STAGING], &sbi->s_fc_q[FC_Q_STAGING]); - sbi->s_mount_flags &= ~EXT4_MF_FC_COMMITTING; - sbi->s_mount_flags &= ~EXT4_MF_FC_INELIGIBLE; + ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING); + ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); if (full) sbi->s_fc_bytes = 0; diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 80ad5ccc0288..3ed8c048fb12 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -780,13 +780,13 @@ static int ext4_sample_last_mounted(struct super_block *sb, handle_t *handle; int err; - if (likely(sbi->s_mount_flags & EXT4_MF_MNTDIR_SAMPLED)) + if (likely(ext4_test_mount_flag(sb, EXT4_MF_MNTDIR_SAMPLED))) return 0; if (sb_rdonly(sb) || !sb_start_intwrite_trylock(sb)) return 0; - sbi->s_mount_flags |= EXT4_MF_MNTDIR_SAMPLED; + ext4_set_mount_flag(sb, EXT4_MF_MNTDIR_SAMPLED); /* * Sample where the filesystem has been mounted and * store it in the superblock for sysadmin convenience diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 81a545fd14a3..a42ca95840f2 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -143,7 +143,7 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) if (sb_rdonly(inode->i_sb)) { /* Make sure that we read updated s_mount_flags value */ smp_rmb(); - if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) + if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FS_ABORTED)) ret = -EROFS; goto out; } diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 000bf70e88ed..0d8385aea898 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2442,7 +2442,7 @@ static int mpage_map_and_submit_extent(handle_t *handle, struct super_block *sb = inode->i_sb; if (ext4_forced_shutdown(EXT4_SB(sb)) || - EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED) + ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED)) goto invalidate_dirty_pages; /* * Let the uper layers retry transient errors. @@ -2676,7 +2676,7 @@ static int ext4_writepages(struct address_space *mapping, * the stack trace. */ if (unlikely(ext4_forced_shutdown(EXT4_SB(mapping->host->i_sb)) || - sbi->s_mount_flags & EXT4_MF_FS_ABORTED)) { + ext4_test_mount_flag(inode->i_sb, EXT4_MF_FS_ABORTED))) { ret = -EROFS; goto out_writepages; } diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index f067e83f149e..24af9ed5c3e5 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4477,7 +4477,7 @@ static inline void ext4_mb_show_pa(struct super_block *sb) { ext4_group_t i, ngroups; - if (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED) + if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED)) return; ngroups = ext4_get_groups_count(sb); @@ -4508,7 +4508,7 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac) { struct super_block *sb = ac->ac_sb; - if (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED) + if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED)) return; mb_debug(sb, "Can't allocate:" diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 9e2e06ed2c45..7bd02c252f7f 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -686,7 +686,7 @@ static void ext4_handle_error(struct super_block *sb) if (!test_opt(sb, ERRORS_CONT)) { journal_t *journal = EXT4_SB(sb)->s_journal; - EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED; + ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED); if (journal) jbd2_journal_abort(journal, -EIO); } @@ -904,7 +904,7 @@ void __ext4_abort(struct super_block *sb, const char *function, va_end(args); if (sb_rdonly(sb) == 0) { - EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED; + ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED); if (EXT4_SB(sb)->s_journal) jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); @@ -2153,7 +2153,7 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token, ext4_msg(sb, KERN_WARNING, "Ignoring removed %s option", opt); return 1; case Opt_abort: - sbi->s_mount_flags |= EXT4_MF_FS_ABORTED; + ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED); return 1; case Opt_i_version: sb->s_flags |= SB_I_VERSION; @@ -4778,8 +4778,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_MAIN]); INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_STAGING]); sbi->s_fc_bytes = 0; - sbi->s_mount_flags &= ~EXT4_MF_FC_INELIGIBLE; - sbi->s_mount_flags &= ~EXT4_MF_FC_COMMITTING; + ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); + ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING); spin_lock_init(&sbi->s_fc_lock); memset(&sbi->s_fc_stats, 0, sizeof(sbi->s_fc_stats)); sbi->s_fc_replay_state.fc_regions = NULL; @@ -5881,7 +5881,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) goto restore_opts; } - if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) + if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED)) ext4_abort(sb, EXT4_ERR_ESHUTDOWN, "Abort forced by user"); sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | @@ -5895,7 +5895,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) } if ((bool)(*flags & SB_RDONLY) != sb_rdonly(sb)) { - if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) { + if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED)) { err = -EROFS; goto restore_opts; } From 87a144f09380152d28352ecbcc4c65874e7eb892 Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Thu, 5 Nov 2020 19:59:10 -0800 Subject: [PATCH 190/710] jbd2: don't start fast commit on aborted journal Fast commit should not be started if the journal is aborted. Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20201106035911.1942128-22-harshadshirwadkar@gmail.com Signed-off-by: Theodore Ts'o --- fs/jbd2/journal.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index f7ebf6ef69af..0c3d5e3b24b2 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -727,6 +727,8 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid) */ int jbd2_fc_begin_commit(journal_t *journal, tid_t tid) { + if (unlikely(is_journal_aborted(journal))) + return -EIO; /* * Fast commits only allowed if at least one full commit has * been processed. From 99c880decf27858b5b0a57d8d811bb50226c3c12 Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Thu, 5 Nov 2020 19:59:11 -0800 Subject: [PATCH 191/710] ext4: cleanup fast commit mount options Drop no_fc mount option that disable fast commit even if it was enabled at mkfs time. Move fc_debug_force mount option under ifdef EXT4_DEBUG to annotate that this is strictly for debugging and testing purposes and should not be used in production. Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20201106035911.1942128-23-harshadshirwadkar@gmail.com Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 7bd02c252f7f..c3b864588a0b 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1716,11 +1716,10 @@ enum { Opt_dioread_nolock, Opt_dioread_lock, Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable, Opt_max_dir_size_kb, Opt_nojournal_checksum, Opt_nombcache, - Opt_prefetch_block_bitmaps, Opt_no_fc, + Opt_prefetch_block_bitmaps, #ifdef CONFIG_EXT4_DEBUG - Opt_fc_debug_max_replay, + Opt_fc_debug_max_replay, Opt_fc_debug_force #endif - Opt_fc_debug_force }; static const match_table_t tokens = { @@ -1807,9 +1806,8 @@ static const match_table_t tokens = { {Opt_init_itable, "init_itable=%u"}, {Opt_init_itable, "init_itable"}, {Opt_noinit_itable, "noinit_itable"}, - {Opt_no_fc, "no_fc"}, - {Opt_fc_debug_force, "fc_debug_force"}, #ifdef CONFIG_EXT4_DEBUG + {Opt_fc_debug_force, "fc_debug_force"}, {Opt_fc_debug_max_replay, "fc_debug_max_replay=%u"}, #endif {Opt_max_dir_size_kb, "max_dir_size_kb=%u"}, @@ -2039,11 +2037,9 @@ static const struct mount_opts { {Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET}, {Opt_prefetch_block_bitmaps, EXT4_MOUNT_PREFETCH_BLOCK_BITMAPS, MOPT_SET}, - {Opt_no_fc, EXT4_MOUNT2_JOURNAL_FAST_COMMIT, - MOPT_CLEAR | MOPT_2 | MOPT_EXT4_ONLY}, +#ifdef CONFIG_EXT4_DEBUG {Opt_fc_debug_force, EXT4_MOUNT2_JOURNAL_FAST_COMMIT, MOPT_SET | MOPT_2 | MOPT_EXT4_ONLY}, -#ifdef CONFIG_EXT4_DEBUG {Opt_fc_debug_max_replay, 0, MOPT_GTE0}, #endif {Opt_err, 0, 0} From fa329e27317f7f0762001b9fb1e76c387a9db25d Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 6 Nov 2020 23:59:42 -0500 Subject: [PATCH 192/710] ext4: fix sparse warnings in fast_commit code Add missing __acquire() and __releases() annotations, and make fc_ineligible_reasons[] static, as it is not used outside of fs/ext4/fast_commit.c. Signed-off-by: Theodore Ts'o --- fs/ext4/fast_commit.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 5cd6630ab1b9..f2033e13a273 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -156,6 +156,7 @@ void ext4_fc_init_inode(struct inode *inode) /* This function must be called with sbi->s_fc_lock held. */ static void ext4_fc_wait_committing_inode(struct inode *inode) +__releases(&EXT4_SB(inode->i_sb)->s_fc_lock) { wait_queue_head_t *wq; struct ext4_inode_info *ei = EXT4_I(inode); @@ -911,6 +912,8 @@ static int ext4_fc_wait_inode_data_all(journal_t *journal) /* Commit all the directory entry updates */ static int ext4_fc_commit_dentry_updates(journal_t *journal, u32 *crc) +__acquires(&sbi->s_fc_lock) +__releases(&sbi->s_fc_lock) { struct super_block *sb = (struct super_block *)(journal->j_private); struct ext4_sb_info *sbi = EXT4_SB(sb); @@ -2106,7 +2109,7 @@ void ext4_fc_init(struct super_block *sb, journal_t *journal) journal->j_fc_cleanup_callback = ext4_fc_cleanup; } -const char *fc_ineligible_reasons[] = { +static const char *fc_ineligible_reasons[] = { "Extended attributes changed", "Cross rename", "Journal flag changed", From 05d5233df85e9621597c5838e95235107eb624a2 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 7 Nov 2020 00:00:49 -0500 Subject: [PATCH 193/710] jbd2: fix up sparse warnings in checkpoint code Add missing __acquires() and __releases() annotations. Also, in an "this should never happen" WARN_ON check, if it *does* actually happen, we need to release j_state_lock since this function is always supposed to release that lock. Otherwise, things will quickly grind to a halt after the WARN_ON trips. Fixes: 96f1e0974575 ("jbd2: avoid long hold times of j_state_lock...") Cc: stable@kernel.org Signed-off-by: Theodore Ts'o --- fs/jbd2/checkpoint.c | 2 ++ fs/jbd2/transaction.c | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 263f02ad8ebf..472932b9e6bc 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -106,6 +106,8 @@ static int __try_to_free_cp_buf(struct journal_head *jh) * for a checkpoint to free up some space in the log. */ void __jbd2_log_wait_for_space(journal_t *journal) +__acquires(&journal->j_state_lock) +__releases(&journal->j_state_lock) { int nblocks, space_left; /* assert_spin_locked(&journal->j_state_lock); */ diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 43985738aa86..d54f04674e8e 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -195,8 +195,10 @@ static void wait_transaction_switching(journal_t *journal) DEFINE_WAIT(wait); if (WARN_ON(!journal->j_running_transaction || - journal->j_running_transaction->t_state != T_SWITCH)) + journal->j_running_transaction->t_state != T_SWITCH)) { + read_unlock(&journal->j_state_lock); return; + } prepare_to_wait(&journal->j_wait_transaction_locked, &wait, TASK_UNINTERRUPTIBLE); read_unlock(&journal->j_state_lock); From 446b8185f0c39ac3faadbcd8ac156c50f2fd4ffe Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Mon, 2 Nov 2020 15:00:12 +0800 Subject: [PATCH 194/710] ALSA: hda/realtek - Add supported for Lenovo ThinkPad Headset Button Add supported for Lenovo ThinkPad Headset Button. Thinkpad P1 Gen 3 (0x22c1) Thinkpad X1 Extreme Gen 3 (0x22c2) Signed-off-by: Kailang Yang Cc: Link: https://lore.kernel.org/r/f39b11d00340408ca2ed2df9b4fc2a09@realtek.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 6899089d132e..c3a02738ead2 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6301,6 +6301,7 @@ enum { ALC274_FIXUP_HP_MIC, ALC274_FIXUP_HP_HEADSET_MIC, ALC256_FIXUP_ASUS_HPE, + ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK, }; static const struct hda_fixup alc269_fixups[] = { @@ -7705,6 +7706,12 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC294_FIXUP_ASUS_HEADSET_MIC }, + [ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc_fixup_headset_jack, + .chained = true, + .chain_id = ALC269_FIXUP_THINKPAD_ACPI + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -7966,6 +7973,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x225d, "Thinkpad T480", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x17aa, 0x2292, "Thinkpad X1 Carbon 7th", ALC285_FIXUP_THINKPAD_HEADSET_JACK), SND_PCI_QUIRK(0x17aa, 0x22be, "Thinkpad X1 Carbon 8th", ALC285_FIXUP_THINKPAD_HEADSET_JACK), + SND_PCI_QUIRK(0x17aa, 0x22c1, "Thinkpad P1 Gen 3", ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK), + SND_PCI_QUIRK(0x17aa, 0x22c2, "Thinkpad X1 Extreme Gen 3", ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK), SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x310c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), From cc6528bc9a0c901c83b8220a2e2617f3354d6dd9 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Thu, 5 Nov 2020 15:28:42 +0100 Subject: [PATCH 195/710] r8169: fix potential skb double free in an error path The caller of rtl8169_tso_csum_v2() frees the skb if false is returned. eth_skb_pad() internally frees the skb on error what would result in a double free. Therefore use __skb_put_padto() directly and instruct it to not free the skb on error. Fixes: b423e9ae49d7 ("r8169: fix offloaded tx checksum for small packets.") Reported-by: Jakub Kicinski Signed-off-by: Heiner Kallweit Link: https://lore.kernel.org/r/f7e68191-acff-9ded-4263-c016428a8762@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/realtek/r8169_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 7766d73823eb..4cb43a980ce9 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -4163,7 +4163,8 @@ static bool rtl8169_tso_csum_v2(struct rtl8169_private *tp, opts[1] |= transport_offset << TCPHO_SHIFT; } else { if (unlikely(skb->len < ETH_ZLEN && rtl_test_hw_pad_bug(tp))) - return !eth_skb_pad(skb); + /* eth_skb_pad would free the skb on error */ + return !__skb_put_padto(skb, ETH_ZLEN, false); } return true; From 847f0a2bfd2fe16d6afa537816b313b71f32e139 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Thu, 5 Nov 2020 18:14:47 +0100 Subject: [PATCH 196/710] r8169: disable hw csum for short packets on all chip versions RTL8125B has same or similar short packet hw padding bug as RTL8168evl. The main workaround has been extended accordingly, however we have to disable also hw checksumming for short packets on affected new chip versions. Instead of checking for an affected chip version let's simply disable hw checksumming for short packets in general. v2: - remove the version checks and disable short packet hw csum in general - reflect this in commit title and message Fixes: 0439297be951 ("r8169: add support for RTL8125B") Signed-off-by: Heiner Kallweit Link: https://lore.kernel.org/r/7fbb35f0-e244-ef65-aa55-3872d7d38698@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/realtek/r8169_main.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 4cb43a980ce9..85d9c3e30c69 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -4343,18 +4343,9 @@ static netdev_features_t rtl8169_features_check(struct sk_buff *skb, rtl_chip_supports_csum_v2(tp)) features &= ~NETIF_F_ALL_TSO; } else if (skb->ip_summed == CHECKSUM_PARTIAL) { - if (skb->len < ETH_ZLEN) { - switch (tp->mac_version) { - case RTL_GIGA_MAC_VER_11: - case RTL_GIGA_MAC_VER_12: - case RTL_GIGA_MAC_VER_17: - case RTL_GIGA_MAC_VER_34: - features &= ~NETIF_F_CSUM_MASK; - break; - default: - break; - } - } + /* work around hw bug on some chip versions */ + if (skb->len < ETH_ZLEN) + features &= ~NETIF_F_CSUM_MASK; if (transport_offset > TCPHO_MAX && rtl_chip_supports_csum_v2(tp)) From 4e0396c59559264442963b349ab71f66e471f84d Mon Sep 17 00:00:00 2001 From: Vadym Kochan Date: Fri, 6 Nov 2020 18:11:25 +0200 Subject: [PATCH 197/710] net: marvell: prestera: fix compilation with CONFIG_BRIDGE=m With CONFIG_BRIDGE=m the compilation fails: ld: drivers/net/ethernet/marvell/prestera/prestera_switchdev.o: in function `prestera_bridge_port_event': prestera_switchdev.c:(.text+0x2ebd): undefined reference to `br_vlan_enabled' in case the driver is statically enabled. Fix it by adding 'BRIDGE || BRIDGE=n' dependency. Fixes: e1189d9a5fbe ("net: marvell: prestera: Add Switchdev driver implementation") Reported-by: Randy Dunlap Signed-off-by: Vadym Kochan Acked-by: Randy Dunlap # build-tested Link: https://lore.kernel.org/r/20201106161128.24069-1-vadym.kochan@plvision.eu Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/prestera/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/marvell/prestera/Kconfig b/drivers/net/ethernet/marvell/prestera/Kconfig index b1fcc44f566a..b6f20e2034c6 100644 --- a/drivers/net/ethernet/marvell/prestera/Kconfig +++ b/drivers/net/ethernet/marvell/prestera/Kconfig @@ -6,6 +6,7 @@ config PRESTERA tristate "Marvell Prestera Switch ASICs support" depends on NET_SWITCHDEV && VLAN_8021Q + depends on BRIDGE || BRIDGE=n select NET_DEVLINK help This driver supports Marvell Prestera Switch ASICs family. From c6c4f961cb879aed67b1343bdef2087c899fdaa9 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Sun, 27 Sep 2020 16:44:57 +0800 Subject: [PATCH 198/710] KVM: x86/mmu: fix counting of rmap entries in pte_list_add Fix an off-by-one style bug in pte_list_add() where it failed to account the last full set of SPTEs, i.e. when desc->sptes is full and desc->more is NULL. Merge the two "PTE_LIST_EXT-1" checks as part of the fix to avoid an extra comparison. Signed-off-by: Li RongQing Reviewed-by: Sean Christopherson Message-Id: <1601196297-24104-1-git-send-email-lirongqing@baidu.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 1f96adff8dc4..5bb1939b65d8 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -856,12 +856,14 @@ static int pte_list_add(struct kvm_vcpu *vcpu, u64 *spte, } else { rmap_printk("pte_list_add: %p %llx many->many\n", spte, *spte); desc = (struct pte_list_desc *)(rmap_head->val & ~1ul); - while (desc->sptes[PTE_LIST_EXT-1] && desc->more) { - desc = desc->more; + while (desc->sptes[PTE_LIST_EXT-1]) { count += PTE_LIST_EXT; - } - if (desc->sptes[PTE_LIST_EXT-1]) { - desc->more = mmu_alloc_pte_list_desc(vcpu); + + if (!desc->more) { + desc->more = mmu_alloc_pte_list_desc(vcpu); + desc = desc->more; + break; + } desc = desc->more; } for (i = 0; desc->sptes[i]; ++i) From 3d20267abc789e6753fce60019bb5945fe8a74f3 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 30 Sep 2020 21:20:31 -0400 Subject: [PATCH 199/710] KVM: Documentation: Update entry for KVM_X86_SET_MSR_FILTER It should be an accident when rebase, since we've already have section 8.25 (which is KVM_CAP_S390_DIAG318). Fix the number. Signed-off-by: Peter Xu Message-Id: <20201001012044.5151-2-peterx@redhat.com> Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 36d5f1f3c6dd..e53e3bd99b2c 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -6367,7 +6367,7 @@ accesses that would usually trigger a #GP by KVM into the guest will instead get bounced to user space through the KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR exit notifications. -8.25 KVM_X86_SET_MSR_FILTER +8.27 KVM_X86_SET_MSR_FILTER --------------------------- :Architectures: x86 From 177158e5b1a558a28b9ce6b27a14bea588a6f2fb Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Fri, 23 Oct 2020 14:33:46 -0400 Subject: [PATCH 200/710] KVM: Documentation: Update entry for KVM_CAP_ENFORCE_PV_CPUID Should be squashed into 66570e966dd9cb4f. Signed-off-by: Peter Xu Message-Id: <20201023183358.50607-3-peterx@redhat.com> Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index e53e3bd99b2c..e00a66d72372 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -6381,8 +6381,7 @@ In combination with KVM_CAP_X86_USER_SPACE_MSR, this allows user space to trap and emulate MSRs that are outside of the scope of KVM as well as limit the attack surface on KVM's MSR emulation code. - -8.26 KVM_CAP_ENFORCE_PV_CPUID +8.28 KVM_CAP_ENFORCE_PV_CPUID ----------------------------- Architectures: x86 From cc4cb017678aa66d3fb4501b2f7424ed28fc7f4d Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Sun, 1 Nov 2020 13:55:23 +0200 Subject: [PATCH 201/710] KVM: x86: use positive error values for msr emulation that causes #GP Recent introduction of the userspace msr filtering added code that uses negative error codes for cases that result in either #GP delivery to the guest, or handled by the userspace msr filtering. This breaks an assumption that a negative error code returned from the msr emulation code is a semi-fatal error which should be returned to userspace via KVM_RUN ioctl and usually kill the guest. Fix this by reusing the already existing KVM_MSR_RET_INVALID error code, and by adding a new KVM_MSR_RET_FILTERED error code for the userspace filtered msrs. Fixes: 291f35fb2c1d1 ("KVM: x86: report negative values from wrmsr emulation to userspace") Reported-by: Qian Cai Signed-off-by: Maxim Levitsky Message-Id: <20201101115523.115780-1-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 28 +++++++++++++++------------- arch/x86/kvm/x86.h | 8 +++++++- 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f5ede41bf9e6..a4c59be8d566 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -255,11 +255,10 @@ static struct kmem_cache *x86_emulator_cache; /* * When called, it means the previous get/set msr reached an invalid msr. - * Return 0 if we want to ignore/silent this failed msr access, or 1 if we want - * to fail the caller. + * Return true if we want to ignore/silent this failed msr access. */ -static int kvm_msr_ignored_check(struct kvm_vcpu *vcpu, u32 msr, - u64 data, bool write) +static bool kvm_msr_ignored_check(struct kvm_vcpu *vcpu, u32 msr, + u64 data, bool write) { const char *op = write ? "wrmsr" : "rdmsr"; @@ -268,11 +267,11 @@ static int kvm_msr_ignored_check(struct kvm_vcpu *vcpu, u32 msr, kvm_pr_unimpl("ignored %s: 0x%x data 0x%llx\n", op, msr, data); /* Mask the error */ - return 0; + return true; } else { kvm_debug_ratelimited("unhandled %s: 0x%x data 0x%llx\n", op, msr, data); - return -ENOENT; + return false; } } @@ -1416,7 +1415,8 @@ static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data) if (r == KVM_MSR_RET_INVALID) { /* Unconditionally clear the output for simplicity */ *data = 0; - r = kvm_msr_ignored_check(vcpu, index, 0, false); + if (kvm_msr_ignored_check(vcpu, index, 0, false)) + r = 0; } if (r) @@ -1540,7 +1540,7 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data, struct msr_data msr; if (!host_initiated && !kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_WRITE)) - return -EPERM; + return KVM_MSR_RET_FILTERED; switch (index) { case MSR_FS_BASE: @@ -1581,7 +1581,8 @@ static int kvm_set_msr_ignored_check(struct kvm_vcpu *vcpu, int ret = __kvm_set_msr(vcpu, index, data, host_initiated); if (ret == KVM_MSR_RET_INVALID) - ret = kvm_msr_ignored_check(vcpu, index, data, true); + if (kvm_msr_ignored_check(vcpu, index, data, true)) + ret = 0; return ret; } @@ -1599,7 +1600,7 @@ int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, int ret; if (!host_initiated && !kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_READ)) - return -EPERM; + return KVM_MSR_RET_FILTERED; msr.index = index; msr.host_initiated = host_initiated; @@ -1618,7 +1619,8 @@ static int kvm_get_msr_ignored_check(struct kvm_vcpu *vcpu, if (ret == KVM_MSR_RET_INVALID) { /* Unconditionally clear *data for simplicity */ *data = 0; - ret = kvm_msr_ignored_check(vcpu, index, 0, false); + if (kvm_msr_ignored_check(vcpu, index, 0, false)) + ret = 0; } return ret; @@ -1662,9 +1664,9 @@ static int complete_emulated_wrmsr(struct kvm_vcpu *vcpu) static u64 kvm_msr_reason(int r) { switch (r) { - case -ENOENT: + case KVM_MSR_RET_INVALID: return KVM_MSR_EXIT_REASON_UNKNOWN; - case -EPERM: + case KVM_MSR_RET_FILTERED: return KVM_MSR_EXIT_REASON_FILTER; default: return KVM_MSR_EXIT_REASON_INVAL; diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 3900ab0c6004..e7ca622a468f 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -376,7 +376,13 @@ int kvm_handle_memory_failure(struct kvm_vcpu *vcpu, int r, int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva); bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type); -#define KVM_MSR_RET_INVALID 2 +/* + * Internal error codes that are used to indicate that MSR emulation encountered + * an error that should result in #GP in the guest, unless userspace + * handles it. + */ +#define KVM_MSR_RET_INVALID 2 /* in-kernel MSR emulation #GP condition */ +#define KVM_MSR_RET_FILTERED 3 /* #GP due to userspace MSR filter */ #define __cr4_reserved_bits(__cpu_has, __c) \ ({ \ From 1930e5ddcead2c23567131e62c86b15efce054be Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 27 Oct 2020 16:10:41 -0700 Subject: [PATCH 202/710] kvm: x86: reads of restricted pv msrs should also result in #GP commit 66570e966dd9 ("kvm: x86: only provide PV features if enabled in guest's CPUID") only protects against disallowed guest writes to KVM paravirtual msrs, leaving msr reads unchecked. Fix this by enforcing KVM_CPUID_FEATURES for msr reads as well. Fixes: 66570e966dd9 ("kvm: x86: only provide PV features if enabled in guest's CPUID") Signed-off-by: Oliver Upton Reviewed-by: Peter Shier Message-Id: <20201027231044.655110-4-oupton@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a4c59be8d566..d12820acc548 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3465,29 +3465,63 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = vcpu->arch.efer; break; case MSR_KVM_WALL_CLOCK: + if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE)) + return 1; + + msr_info->data = vcpu->kvm->arch.wall_clock; + break; case MSR_KVM_WALL_CLOCK_NEW: + if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE2)) + return 1; + msr_info->data = vcpu->kvm->arch.wall_clock; break; case MSR_KVM_SYSTEM_TIME: + if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE)) + return 1; + + msr_info->data = vcpu->arch.time; + break; case MSR_KVM_SYSTEM_TIME_NEW: + if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE2)) + return 1; + msr_info->data = vcpu->arch.time; break; case MSR_KVM_ASYNC_PF_EN: + if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF)) + return 1; + msr_info->data = vcpu->arch.apf.msr_en_val; break; case MSR_KVM_ASYNC_PF_INT: + if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT)) + return 1; + msr_info->data = vcpu->arch.apf.msr_int_val; break; case MSR_KVM_ASYNC_PF_ACK: + if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF)) + return 1; + msr_info->data = 0; break; case MSR_KVM_STEAL_TIME: + if (!guest_pv_has(vcpu, KVM_FEATURE_STEAL_TIME)) + return 1; + msr_info->data = vcpu->arch.st.msr_val; break; case MSR_KVM_PV_EOI_EN: + if (!guest_pv_has(vcpu, KVM_FEATURE_PV_EOI)) + return 1; + msr_info->data = vcpu->arch.pv_eoi.msr_val; break; case MSR_KVM_POLL_CONTROL: + if (!guest_pv_has(vcpu, KVM_FEATURE_POLL_CONTROL)) + return 1; + msr_info->data = vcpu->arch.msr_kvm_poll_control; break; case MSR_IA32_P5_MC_ADDR: From 01b4f510b9f467abfc781e198e810e1ecffb782e Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 27 Oct 2020 16:10:42 -0700 Subject: [PATCH 203/710] kvm: x86: ensure pv_cpuid.features is initialized when enabling cap Make the paravirtual cpuid enforcement mechanism idempotent to ioctl() ordering by updating pv_cpuid.features whenever userspace requests the capability. Extract this update out of kvm_update_cpuid_runtime() into a new helper function and move its other call site into kvm_vcpu_after_set_cpuid() where it more likely belongs. Fixes: 66570e966dd9 ("kvm: x86: only provide PV features if enabled in guest's CPUID") Signed-off-by: Oliver Upton Reviewed-by: Peter Shier Message-Id: <20201027231044.655110-5-oupton@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/cpuid.c | 23 ++++++++++++++++------- arch/x86/kvm/cpuid.h | 1 + arch/x86/kvm/x86.c | 2 ++ 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 06a278b3701d..d50041f570e8 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -90,6 +90,20 @@ static int kvm_check_cpuid(struct kvm_cpuid_entry2 *entries, int nent) return 0; } +void kvm_update_pv_runtime(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid_entry2 *best; + + best = kvm_find_cpuid_entry(vcpu, KVM_CPUID_FEATURES, 0); + + /* + * save the feature bitmap to avoid cpuid lookup for every PV + * operation + */ + if (best) + vcpu->arch.pv_cpuid.features = best->eax; +} + void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; @@ -124,13 +138,6 @@ void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu) (best->eax & (1 << KVM_FEATURE_PV_UNHALT))) best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT); - /* - * save the feature bitmap to avoid cpuid lookup for every PV - * operation - */ - if (best) - vcpu->arch.pv_cpuid.features = best->eax; - if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT)) { best = kvm_find_cpuid_entry(vcpu, 0x1, 0); if (best) @@ -162,6 +169,8 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) vcpu->arch.guest_supported_xcr0 = (best->eax | ((u64)best->edx << 32)) & supported_xcr0; + kvm_update_pv_runtime(vcpu); + vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); kvm_mmu_reset_context(vcpu); diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index bf8577947ed2..f7a6e8f83783 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -11,6 +11,7 @@ extern u32 kvm_cpu_caps[NCAPINTS] __read_mostly; void kvm_set_cpu_caps(void); void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu); +void kvm_update_pv_runtime(struct kvm_vcpu *vcpu); struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, u32 function, u32 index); int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d12820acc548..948561183fd1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4611,6 +4611,8 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, case KVM_CAP_ENFORCE_PV_FEATURE_CPUID: vcpu->arch.pv_cpuid.enforce = cap->args[0]; + if (vcpu->arch.pv_cpuid.enforce) + kvm_update_pv_runtime(vcpu); return 0; From 1e293d1ae88cd0e2a0ad4c275f5dc2d8ae7b4387 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 27 Oct 2020 16:10:43 -0700 Subject: [PATCH 204/710] kvm: x86: request masterclock update any time guest uses different msr Commit 5b9bb0ebbcdc ("kvm: x86: encapsulate wrmsr(MSR_KVM_SYSTEM_TIME) emulation in helper fn", 2020-10-21) subtly changed the behavior of guest writes to MSR_KVM_SYSTEM_TIME(_NEW). Restore the previous behavior; update the masterclock any time the guest uses a different msr than before. Fixes: 5b9bb0ebbcdc ("kvm: x86: encapsulate wrmsr(MSR_KVM_SYSTEM_TIME) emulation in helper fn", 2020-10-21) Signed-off-by: Oliver Upton Reviewed-by: Peter Shier Message-Id: <20201027231044.655110-6-oupton@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 948561183fd1..d9651faabe68 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1967,7 +1967,7 @@ static void kvm_write_system_time(struct kvm_vcpu *vcpu, gpa_t system_time, struct kvm_arch *ka = &vcpu->kvm->arch; if (vcpu->vcpu_id == 0 && !host_initiated) { - if (ka->boot_vcpu_runs_old_kvmclock && old_msr) + if (ka->boot_vcpu_runs_old_kvmclock != old_msr) kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu); ka->boot_vcpu_runs_old_kvmclock = old_msr; From 2cdef91cf882abc74dd2f6bfae16db782b44c6ce Mon Sep 17 00:00:00 2001 From: Pankaj Gupta Date: Thu, 5 Nov 2020 16:39:32 +0100 Subject: [PATCH 205/710] KVM: x86: handle MSR_IA32_DEBUGCTLMSR with report_ignored_msrs Windows2016 guest tries to enable LBR by setting the corresponding bits in MSR_IA32_DEBUGCTLMSR. KVM does not emulate MSR_IA32_DEBUGCTLMSR and spams the host kernel logs with error messages like: kvm [...]: vcpu1, guest rIP: 0xfffff800a8b687d3 kvm_set_msr_common: MSR_IA32_DEBUGCTLMSR 0x1, nop" This patch fixes this by enabling error logging only with 'report_ignored_msrs=1'. Signed-off-by: Pankaj Gupta Message-Id: <20201105153932.24316-1-pankaj.gupta.linux@gmail.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d9651faabe68..447edc0d1d5a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3065,9 +3065,9 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) /* Values other than LBR and BTF are vendor-specific, thus reserved and should throw a #GP */ return 1; - } - vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n", - __func__, data); + } else if (report_ignored_msrs) + vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n", + __func__, data); break; case 0x200 ... 0x2ff: return kvm_mtrr_set_msr(vcpu, msr, data); From 8519873d19120c5046e4124d18a9c09eec20eab9 Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Mon, 26 Oct 2020 11:54:41 -0500 Subject: [PATCH 206/710] drm: bridge: cdns: Kconfig: Switch over dependency to ARCH_K3 With the integration of chip-id detection scheme in kernel[1], there is no specific need to maintain multitudes of SoC specific config options, discussed as per [2], we have deprecated the usage in other places for v5.10-rc1. Fix the missing user so that we can clean up the configs in v5.11. [1] drivers/soc/ti/k3-socinfo.c commit 907a2b7e2fc7 ("soc: ti: add k3 platforms chipid module driver") [2] https://lore.kernel.org/linux-arm-kernel/20200908112534.t5bgrjf7y3a6l2ss@akan/ Fixes: afba7e6c5fc1 ("drm: bridge: cdns-mhdp8546: Add TI J721E wrapper") Cc: Swapnil Jakhade Cc: Tomi Valkeinen Cc: Laurent Pinchart Cc: Yuti Amonkar Cc: Jyri Sarha Signed-off-by: Nishanth Menon Reviewed-by: Tomi Valkeinen Signed-off-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/20201026165441.22894-1-nm@ti.com --- drivers/gpu/drm/bridge/cadence/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/bridge/cadence/Kconfig b/drivers/gpu/drm/bridge/cadence/Kconfig index 511d67b16d14..ef8c230e0f62 100644 --- a/drivers/gpu/drm/bridge/cadence/Kconfig +++ b/drivers/gpu/drm/bridge/cadence/Kconfig @@ -13,7 +13,7 @@ config DRM_CDNS_MHDP8546 if DRM_CDNS_MHDP8546 config DRM_CDNS_MHDP8546_J721E - depends on ARCH_K3_J721E_SOC || COMPILE_TEST + depends on ARCH_K3 || COMPILE_TEST bool "J721E Cadence DPI/DP wrapper support" default y help From df11f7dd5834146defa448acba097e8d7703cc42 Mon Sep 17 00:00:00 2001 From: Aaron Lewis Date: Mon, 12 Oct 2020 12:47:13 -0700 Subject: [PATCH 207/710] selftests: kvm: Fix the segment descriptor layout to match the actual layout Fix the layout of 'struct desc64' to match the layout described in the SDM Vol 3, Chapter 3 "Protected-Mode Memory Management", section 3.4.5 "Segment Descriptors", Figure 3-8 "Segment Descriptor". The test added later in this series relies on this and crashes if this layout is not correct. Signed-off-by: Aaron Lewis Reviewed-by: Alexander Graf Message-Id: <20201012194716.3950330-2-aaronlewis@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/include/x86_64/processor.h | 2 +- tools/testing/selftests/kvm/lib/x86_64/processor.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 82b7fe16a824..0a65e7bb5249 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -59,7 +59,7 @@ struct gpr64_regs { struct desc64 { uint16_t limit0; uint16_t base0; - unsigned base1:8, s:1, type:4, dpl:2, p:1; + unsigned base1:8, type:4, s:1, dpl:2, p:1; unsigned limit1:4, avl:1, l:1, db:1, g:1, base2:8; uint32_t base3; uint32_t zero1; diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index f6eb34eaa0d2..1ccf6c9b3476 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -392,11 +392,12 @@ static void kvm_seg_fill_gdt_64bit(struct kvm_vm *vm, struct kvm_segment *segp) desc->limit0 = segp->limit & 0xFFFF; desc->base0 = segp->base & 0xFFFF; desc->base1 = segp->base >> 16; - desc->s = segp->s; desc->type = segp->type; + desc->s = segp->s; desc->dpl = segp->dpl; desc->p = segp->present; desc->limit1 = segp->limit >> 16; + desc->avl = segp->avl; desc->l = segp->l; desc->db = segp->db; desc->g = segp->g; From 85f2a4320ef27ce74b9da0631460561028c48756 Mon Sep 17 00:00:00 2001 From: Aaron Lewis Date: Mon, 12 Oct 2020 12:47:14 -0700 Subject: [PATCH 208/710] selftests: kvm: Clear uc so UCALL_NONE is being properly reported Ensure the out value 'uc' in get_ucall() is properly reporting UCALL_NONE if the call fails. The return value will be correctly reported, however, the out parameter 'uc' will not be. Clear the struct to ensure the correct value is being reported in the out parameter. Signed-off-by: Aaron Lewis Reviewed-by: Andrew Jones Reviewed-by: Alexander Graf Message-Id: <20201012194716.3950330-3-aaronlewis@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/lib/aarch64/ucall.c | 3 +++ tools/testing/selftests/kvm/lib/s390x/ucall.c | 3 +++ tools/testing/selftests/kvm/lib/x86_64/ucall.c | 3 +++ 3 files changed, 9 insertions(+) diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/aarch64/ucall.c index c8e0ec20d3bf..2f37b90ee1a9 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c +++ b/tools/testing/selftests/kvm/lib/aarch64/ucall.c @@ -94,6 +94,9 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc) struct kvm_run *run = vcpu_state(vm, vcpu_id); struct ucall ucall = {}; + if (uc) + memset(uc, 0, sizeof(*uc)); + if (run->exit_reason == KVM_EXIT_MMIO && run->mmio.phys_addr == (uint64_t)ucall_exit_mmio_addr) { vm_vaddr_t gva; diff --git a/tools/testing/selftests/kvm/lib/s390x/ucall.c b/tools/testing/selftests/kvm/lib/s390x/ucall.c index fd589dc9bfab..9d3b0f15249a 100644 --- a/tools/testing/selftests/kvm/lib/s390x/ucall.c +++ b/tools/testing/selftests/kvm/lib/s390x/ucall.c @@ -38,6 +38,9 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc) struct kvm_run *run = vcpu_state(vm, vcpu_id); struct ucall ucall = {}; + if (uc) + memset(uc, 0, sizeof(*uc)); + if (run->exit_reason == KVM_EXIT_S390_SIEIC && run->s390_sieic.icptcode == 4 && (run->s390_sieic.ipa >> 8) == 0x83 && /* 0x83 means DIAGNOSE */ diff --git a/tools/testing/selftests/kvm/lib/x86_64/ucall.c b/tools/testing/selftests/kvm/lib/x86_64/ucall.c index da4d89ad5419..a3489973e290 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/ucall.c +++ b/tools/testing/selftests/kvm/lib/x86_64/ucall.c @@ -40,6 +40,9 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc) struct kvm_run *run = vcpu_state(vm, vcpu_id); struct ucall ucall = {}; + if (uc) + memset(uc, 0, sizeof(*uc)); + if (run->exit_reason == KVM_EXIT_IO && run->io.port == UCALL_PIO_PORT) { struct kvm_regs regs; From 29faeb9632012d6c3fa4aa33c3d589b9ff18b206 Mon Sep 17 00:00:00 2001 From: Aaron Lewis Date: Mon, 12 Oct 2020 12:47:15 -0700 Subject: [PATCH 209/710] selftests: kvm: Add exception handling to selftests Add the infrastructure needed to enable exception handling in selftests. This allows any of the exception and interrupt vectors to be overridden in the guest. Signed-off-by: Aaron Lewis Reviewed-by: Alexander Graf Message-Id: <20201012194716.3950330-4-aaronlewis@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/Makefile | 19 ++- .../testing/selftests/kvm/include/kvm_util.h | 2 + .../selftests/kvm/include/x86_64/processor.h | 24 ++++ .../selftests/kvm/lib/aarch64/processor.c | 4 + tools/testing/selftests/kvm/lib/kvm_util.c | 3 + .../selftests/kvm/lib/kvm_util_internal.h | 2 + .../selftests/kvm/lib/s390x/processor.c | 4 + .../selftests/kvm/lib/x86_64/handlers.S | 81 +++++++++++++ .../selftests/kvm/lib/x86_64/processor.c | 114 +++++++++++++++++- 9 files changed, 244 insertions(+), 9 deletions(-) create mode 100644 tools/testing/selftests/kvm/lib/x86_64/handlers.S diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 30afbad36cd5..b0f1fcab79f5 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -34,7 +34,7 @@ ifeq ($(ARCH),s390) endif LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c lib/test_util.c -LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c +LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c @@ -111,14 +111,21 @@ LDFLAGS += -pthread $(no-pie-option) $(pgste-option) include ../lib.mk STATIC_LIBS := $(OUTPUT)/libkvm.a -LIBKVM_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM)) -EXTRA_CLEAN += $(LIBKVM_OBJ) $(STATIC_LIBS) cscope.* +LIBKVM_C := $(filter %.c,$(LIBKVM)) +LIBKVM_S := $(filter %.S,$(LIBKVM)) +LIBKVM_C_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_C)) +LIBKVM_S_OBJ := $(patsubst %.S, $(OUTPUT)/%.o, $(LIBKVM_S)) +EXTRA_CLEAN += $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(STATIC_LIBS) cscope.* -x := $(shell mkdir -p $(sort $(dir $(LIBKVM_OBJ)))) -$(LIBKVM_OBJ): $(OUTPUT)/%.o: %.c +x := $(shell mkdir -p $(sort $(dir $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ)))) +$(LIBKVM_C_OBJ): $(OUTPUT)/%.o: %.c $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ -$(OUTPUT)/libkvm.a: $(LIBKVM_OBJ) +$(LIBKVM_S_OBJ): $(OUTPUT)/%.o: %.S + $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ + +LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) +$(OUTPUT)/libkvm.a: $(LIBKVM_OBJS) $(AR) crs $@ $^ x := $(shell mkdir -p $(sort $(dir $(TEST_GEN_PROGS)))) diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 919e161dd289..356930690bea 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -294,6 +294,8 @@ int vm_create_device(struct kvm_vm *vm, struct kvm_create_device *cd); memcpy(&(g), _p, sizeof(g)); \ }) +void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid); + /* Common ucalls */ enum { UCALL_NONE, diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 0a65e7bb5249..02530dc6339b 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -36,6 +36,8 @@ #define X86_CR4_SMAP (1ul << 21) #define X86_CR4_PKE (1ul << 22) +#define UNEXPECTED_VECTOR_PORT 0xfff0u + /* General Registers in 64-Bit Mode */ struct gpr64_regs { u64 rax; @@ -239,6 +241,11 @@ static inline struct desc_ptr get_idt(void) return idt; } +static inline void outl(uint16_t port, uint32_t value) +{ + __asm__ __volatile__("outl %%eax, %%dx" : : "d"(port), "a"(value)); +} + #define SET_XMM(__var, __xmm) \ asm volatile("movq %0, %%"#__xmm : : "r"(__var) : #__xmm) @@ -338,6 +345,23 @@ uint32_t kvm_get_cpuid_max_basic(void); uint32_t kvm_get_cpuid_max_extended(void); void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits); +struct ex_regs { + uint64_t rax, rcx, rdx, rbx; + uint64_t rbp, rsi, rdi; + uint64_t r8, r9, r10, r11; + uint64_t r12, r13, r14, r15; + uint64_t vector; + uint64_t error_code; + uint64_t rip; + uint64_t cs; + uint64_t rflags; +}; + +void vm_init_descriptor_tables(struct kvm_vm *vm); +void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid); +void vm_handle_exception(struct kvm_vm *vm, int vector, + void (*handler)(struct ex_regs *)); + /* * Basic CPU control in CR0 */ diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index 2afa6618b396..d6c32c328e9a 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -350,3 +350,7 @@ void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...) va_end(ap); } + +void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid) +{ +} diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 3327cebc1095..be230f3728d5 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -1204,6 +1204,9 @@ int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid) do { rc = ioctl(vcpu->fd, KVM_RUN, NULL); } while (rc == -1 && errno == EINTR); + + assert_on_unhandled_exception(vm, vcpuid); + return rc; } diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h index 2ef446520748..f07d383d03a1 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util_internal.h +++ b/tools/testing/selftests/kvm/lib/kvm_util_internal.h @@ -50,6 +50,8 @@ struct kvm_vm { vm_paddr_t pgd; vm_vaddr_t gdt; vm_vaddr_t tss; + vm_vaddr_t idt; + vm_vaddr_t handlers; }; struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid); diff --git a/tools/testing/selftests/kvm/lib/s390x/processor.c b/tools/testing/selftests/kvm/lib/s390x/processor.c index a88c5d665725..7349bb2e1a24 100644 --- a/tools/testing/selftests/kvm/lib/s390x/processor.c +++ b/tools/testing/selftests/kvm/lib/s390x/processor.c @@ -241,3 +241,7 @@ void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent) fprintf(stream, "%*spstate: psw: 0x%.16llx:0x%.16llx\n", indent, "", vcpu->state->psw_mask, vcpu->state->psw_addr); } + +void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid) +{ +} diff --git a/tools/testing/selftests/kvm/lib/x86_64/handlers.S b/tools/testing/selftests/kvm/lib/x86_64/handlers.S new file mode 100644 index 000000000000..aaf7bc7d2ce1 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/x86_64/handlers.S @@ -0,0 +1,81 @@ +handle_exception: + push %r15 + push %r14 + push %r13 + push %r12 + push %r11 + push %r10 + push %r9 + push %r8 + + push %rdi + push %rsi + push %rbp + push %rbx + push %rdx + push %rcx + push %rax + mov %rsp, %rdi + + call route_exception + + pop %rax + pop %rcx + pop %rdx + pop %rbx + pop %rbp + pop %rsi + pop %rdi + pop %r8 + pop %r9 + pop %r10 + pop %r11 + pop %r12 + pop %r13 + pop %r14 + pop %r15 + + /* Discard vector and error code. */ + add $16, %rsp + iretq + +/* + * Build the handle_exception wrappers which push the vector/error code on the + * stack and an array of pointers to those wrappers. + */ +.pushsection .rodata +.globl idt_handlers +idt_handlers: +.popsection + +.macro HANDLERS has_error from to + vector = \from + .rept \to - \from + 1 + .align 8 + + /* Fetch current address and append it to idt_handlers. */ + current_handler = . +.pushsection .rodata +.quad current_handler +.popsection + + .if ! \has_error + pushq $0 + .endif + pushq $vector + jmp handle_exception + vector = vector + 1 + .endr +.endm + +.global idt_handler_code +idt_handler_code: + HANDLERS has_error=0 from=0 to=7 + HANDLERS has_error=1 from=8 to=8 + HANDLERS has_error=0 from=9 to=9 + HANDLERS has_error=1 from=10 to=14 + HANDLERS has_error=0 from=15 to=16 + HANDLERS has_error=1 from=17 to=17 + HANDLERS has_error=0 from=18 to=255 + +.section .note.GNU-stack, "", %progbits diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 1ccf6c9b3476..66228297ac03 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -12,9 +12,18 @@ #include "../kvm_util_internal.h" #include "processor.h" +#ifndef NUM_INTERRUPTS +#define NUM_INTERRUPTS 256 +#endif + +#define DEFAULT_CODE_SELECTOR 0x8 +#define DEFAULT_DATA_SELECTOR 0x10 + /* Minimum physical address used for virtual translation tables. */ #define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000 +vm_vaddr_t exception_handlers; + /* Virtual translation table structure declarations */ struct pageMapL4Entry { uint64_t present:1; @@ -557,9 +566,9 @@ static void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_m sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX); kvm_seg_set_unusable(&sregs.ldt); - kvm_seg_set_kernel_code_64bit(vm, 0x8, &sregs.cs); - kvm_seg_set_kernel_data_64bit(vm, 0x10, &sregs.ds); - kvm_seg_set_kernel_data_64bit(vm, 0x10, &sregs.es); + kvm_seg_set_kernel_code_64bit(vm, DEFAULT_CODE_SELECTOR, &sregs.cs); + kvm_seg_set_kernel_data_64bit(vm, DEFAULT_DATA_SELECTOR, &sregs.ds); + kvm_seg_set_kernel_data_64bit(vm, DEFAULT_DATA_SELECTOR, &sregs.es); kvm_setup_tss_64bit(vm, &sregs.tr, 0x18, gdt_memslot, pgd_memslot); break; @@ -1119,3 +1128,102 @@ void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits) *va_bits = (entry->eax >> 8) & 0xff; } } + +struct idt_entry { + uint16_t offset0; + uint16_t selector; + uint16_t ist : 3; + uint16_t : 5; + uint16_t type : 4; + uint16_t : 1; + uint16_t dpl : 2; + uint16_t p : 1; + uint16_t offset1; + uint32_t offset2; uint32_t reserved; +}; + +static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr, + int dpl, unsigned short selector) +{ + struct idt_entry *base = + (struct idt_entry *)addr_gva2hva(vm, vm->idt); + struct idt_entry *e = &base[vector]; + + memset(e, 0, sizeof(*e)); + e->offset0 = addr; + e->selector = selector; + e->ist = 0; + e->type = 14; + e->dpl = dpl; + e->p = 1; + e->offset1 = addr >> 16; + e->offset2 = addr >> 32; +} + +void kvm_exit_unexpected_vector(uint32_t value) +{ + outl(UNEXPECTED_VECTOR_PORT, value); +} + +void route_exception(struct ex_regs *regs) +{ + typedef void(*handler)(struct ex_regs *); + handler *handlers = (handler *)exception_handlers; + + if (handlers && handlers[regs->vector]) { + handlers[regs->vector](regs); + return; + } + + kvm_exit_unexpected_vector(regs->vector); +} + +void vm_init_descriptor_tables(struct kvm_vm *vm) +{ + extern void *idt_handlers; + int i; + + vm->idt = vm_vaddr_alloc(vm, getpagesize(), 0x2000, 0, 0); + vm->handlers = vm_vaddr_alloc(vm, 256 * sizeof(void *), 0x2000, 0, 0); + /* Handlers have the same address in both address spaces.*/ + for (i = 0; i < NUM_INTERRUPTS; i++) + set_idt_entry(vm, i, (unsigned long)(&idt_handlers)[i], 0, + DEFAULT_CODE_SELECTOR); +} + +void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid) +{ + struct kvm_sregs sregs; + + vcpu_sregs_get(vm, vcpuid, &sregs); + sregs.idt.base = vm->idt; + sregs.idt.limit = NUM_INTERRUPTS * sizeof(struct idt_entry) - 1; + sregs.gdt.base = vm->gdt; + sregs.gdt.limit = getpagesize() - 1; + kvm_seg_set_kernel_data_64bit(NULL, DEFAULT_DATA_SELECTOR, &sregs.gs); + vcpu_sregs_set(vm, vcpuid, &sregs); + *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers; +} + +void vm_handle_exception(struct kvm_vm *vm, int vector, + void (*handler)(struct ex_regs *)) +{ + vm_vaddr_t *handlers = (vm_vaddr_t *)addr_gva2hva(vm, vm->handlers); + + handlers[vector] = (vm_vaddr_t)handler; +} + +void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid) +{ + if (vcpu_state(vm, vcpuid)->exit_reason == KVM_EXIT_IO + && vcpu_state(vm, vcpuid)->io.port == UNEXPECTED_VECTOR_PORT + && vcpu_state(vm, vcpuid)->io.size == 4) { + /* Grab pointer to io data */ + uint32_t *data = (void *)vcpu_state(vm, vcpuid) + + vcpu_state(vm, vcpuid)->io.data_offset; + + TEST_ASSERT(false, + "Unexpected vectored event in guest (vector:0x%x)", + *data); + } +} From ac4a4d6de22e674cd6e3fe57199a15383496aad2 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 27 Oct 2020 16:10:44 -0700 Subject: [PATCH 210/710] selftests: kvm: test enforcement of paravirtual cpuid features Add a set of tests that ensure the guest cannot access paravirtual msrs and hypercalls that have been disabled in the KVM_CPUID_FEATURES leaf. Expect a #GP in the case of msr accesses and -KVM_ENOSYS from hypercalls. Cc: Jim Mattson Signed-off-by: Oliver Upton Reviewed-by: Peter Shier Reviewed-by: Aaron Lewis Message-Id: <20201027231044.655110-7-oupton@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/.gitignore | 1 + tools/testing/selftests/kvm/Makefile | 1 + .../testing/selftests/kvm/include/kvm_util.h | 3 + .../selftests/kvm/include/x86_64/processor.h | 12 + tools/testing/selftests/kvm/lib/kvm_util.c | 28 +++ .../selftests/kvm/lib/x86_64/processor.c | 29 +++ .../selftests/kvm/x86_64/kvm_pv_test.c | 234 ++++++++++++++++++ 7 files changed, 308 insertions(+) create mode 100644 tools/testing/selftests/kvm/x86_64/kvm_pv_test.c diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index d2c2d6205008..22973aa75eda 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -5,6 +5,7 @@ /x86_64/cr4_cpuid_sync_test /x86_64/debug_regs /x86_64/evmcs_test +/x86_64/kvm_pv_test /x86_64/hyperv_cpuid /x86_64/mmio_warning_test /x86_64/platform_info_test diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index b0f1fcab79f5..44ca6badb544 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -41,6 +41,7 @@ LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid +TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 356930690bea..77f53dbc34ff 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -63,6 +63,9 @@ enum vm_mem_backing_src_type { int kvm_check_cap(long cap); int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap); +int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id, + struct kvm_enable_cap *cap); +void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size); struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm); struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm); diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 02530dc6339b..8e61340b3911 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -362,6 +362,18 @@ void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid); void vm_handle_exception(struct kvm_vm *vm, int vector, void (*handler)(struct ex_regs *)); +/* + * set_cpuid() - overwrites a matching cpuid entry with the provided value. + * matches based on ent->function && ent->index. returns true + * if a match was found and successfully overwritten. + * @cpuid: the kvm cpuid list to modify. + * @ent: cpuid entry to insert + */ +bool set_cpuid(struct kvm_cpuid2 *cpuid, struct kvm_cpuid_entry2 *ent); + +uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2, + uint64_t a3); + /* * Basic CPU control in CR0 */ diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index be230f3728d5..91e547031d8c 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -86,6 +86,34 @@ int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap) return ret; } +/* VCPU Enable Capability + * + * Input Args: + * vm - Virtual Machine + * vcpu_id - VCPU + * cap - Capability + * + * Output Args: None + * + * Return: On success, 0. On failure a TEST_ASSERT failure is produced. + * + * Enables a capability (KVM_CAP_*) on the VCPU. + */ +int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id, + struct kvm_enable_cap *cap) +{ + struct vcpu *vcpu = vcpu_find(vm, vcpu_id); + int r; + + TEST_ASSERT(vcpu, "cannot find vcpu %d", vcpu_id); + + r = ioctl(vcpu->fd, KVM_ENABLE_CAP, cap); + TEST_ASSERT(!r, "KVM_ENABLE_CAP vCPU ioctl failed,\n" + " rc: %i, errno: %i", r, errno); + + return r; +} + static void vm_open(struct kvm_vm *vm, int perm) { vm->kvm_fd = open(KVM_DEV_PATH, perm); diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 66228297ac03..d10c5c05bdf0 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -1227,3 +1227,32 @@ void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid) *data); } } + +bool set_cpuid(struct kvm_cpuid2 *cpuid, + struct kvm_cpuid_entry2 *ent) +{ + int i; + + for (i = 0; i < cpuid->nent; i++) { + struct kvm_cpuid_entry2 *cur = &cpuid->entries[i]; + + if (cur->function != ent->function || cur->index != ent->index) + continue; + + memcpy(cur, ent, sizeof(struct kvm_cpuid_entry2)); + return true; + } + + return false; +} + +uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2, + uint64_t a3) +{ + uint64_t r; + + asm volatile("vmcall" + : "=a"(r) + : "b"(a0), "c"(a1), "d"(a2), "S"(a3)); + return r; +} diff --git a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c new file mode 100644 index 000000000000..b10a27485bad --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c @@ -0,0 +1,234 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020, Google LLC. + * + * Tests for KVM paravirtual feature disablement + */ +#include +#include +#include + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" + +extern unsigned char rdmsr_start; +extern unsigned char rdmsr_end; + +static u64 do_rdmsr(u32 idx) +{ + u32 lo, hi; + + asm volatile("rdmsr_start: rdmsr;" + "rdmsr_end:" + : "=a"(lo), "=c"(hi) + : "c"(idx)); + + return (((u64) hi) << 32) | lo; +} + +extern unsigned char wrmsr_start; +extern unsigned char wrmsr_end; + +static void do_wrmsr(u32 idx, u64 val) +{ + u32 lo, hi; + + lo = val; + hi = val >> 32; + + asm volatile("wrmsr_start: wrmsr;" + "wrmsr_end:" + : : "a"(lo), "c"(idx), "d"(hi)); +} + +static int nr_gp; + +static void guest_gp_handler(struct ex_regs *regs) +{ + unsigned char *rip = (unsigned char *)regs->rip; + bool r, w; + + r = rip == &rdmsr_start; + w = rip == &wrmsr_start; + GUEST_ASSERT(r || w); + + nr_gp++; + + if (r) + regs->rip = (uint64_t)&rdmsr_end; + else + regs->rip = (uint64_t)&wrmsr_end; +} + +struct msr_data { + uint32_t idx; + const char *name; +}; + +#define TEST_MSR(msr) { .idx = msr, .name = #msr } +#define UCALL_PR_MSR 0xdeadbeef +#define PR_MSR(msr) ucall(UCALL_PR_MSR, 1, msr) + +/* + * KVM paravirtual msrs to test. Expect a #GP if any of these msrs are read or + * written, as the KVM_CPUID_FEATURES leaf is cleared. + */ +static struct msr_data msrs_to_test[] = { + TEST_MSR(MSR_KVM_SYSTEM_TIME), + TEST_MSR(MSR_KVM_SYSTEM_TIME_NEW), + TEST_MSR(MSR_KVM_WALL_CLOCK), + TEST_MSR(MSR_KVM_WALL_CLOCK_NEW), + TEST_MSR(MSR_KVM_ASYNC_PF_EN), + TEST_MSR(MSR_KVM_STEAL_TIME), + TEST_MSR(MSR_KVM_PV_EOI_EN), + TEST_MSR(MSR_KVM_POLL_CONTROL), + TEST_MSR(MSR_KVM_ASYNC_PF_INT), + TEST_MSR(MSR_KVM_ASYNC_PF_ACK), +}; + +static void test_msr(struct msr_data *msr) +{ + PR_MSR(msr); + do_rdmsr(msr->idx); + GUEST_ASSERT(READ_ONCE(nr_gp) == 1); + + nr_gp = 0; + do_wrmsr(msr->idx, 0); + GUEST_ASSERT(READ_ONCE(nr_gp) == 1); + nr_gp = 0; +} + +struct hcall_data { + uint64_t nr; + const char *name; +}; + +#define TEST_HCALL(hc) { .nr = hc, .name = #hc } +#define UCALL_PR_HCALL 0xdeadc0de +#define PR_HCALL(hc) ucall(UCALL_PR_HCALL, 1, hc) + +/* + * KVM hypercalls to test. Expect -KVM_ENOSYS when called, as the corresponding + * features have been cleared in KVM_CPUID_FEATURES. + */ +static struct hcall_data hcalls_to_test[] = { + TEST_HCALL(KVM_HC_KICK_CPU), + TEST_HCALL(KVM_HC_SEND_IPI), + TEST_HCALL(KVM_HC_SCHED_YIELD), +}; + +static void test_hcall(struct hcall_data *hc) +{ + uint64_t r; + + PR_HCALL(hc); + r = kvm_hypercall(hc->nr, 0, 0, 0, 0); + GUEST_ASSERT(r == -KVM_ENOSYS); +} + +static void guest_main(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(msrs_to_test); i++) { + test_msr(&msrs_to_test[i]); + } + + for (i = 0; i < ARRAY_SIZE(hcalls_to_test); i++) { + test_hcall(&hcalls_to_test[i]); + } + + GUEST_DONE(); +} + +static void clear_kvm_cpuid_features(struct kvm_cpuid2 *cpuid) +{ + struct kvm_cpuid_entry2 ent = {0}; + + ent.function = KVM_CPUID_FEATURES; + TEST_ASSERT(set_cpuid(cpuid, &ent), + "failed to clear KVM_CPUID_FEATURES leaf"); +} + +static void pr_msr(struct ucall *uc) +{ + struct msr_data *msr = (struct msr_data *)uc->args[0]; + + pr_info("testing msr: %s (%#x)\n", msr->name, msr->idx); +} + +static void pr_hcall(struct ucall *uc) +{ + struct hcall_data *hc = (struct hcall_data *)uc->args[0]; + + pr_info("testing hcall: %s (%lu)\n", hc->name, hc->nr); +} + +static void handle_abort(struct ucall *uc) +{ + TEST_FAIL("%s at %s:%ld", (const char *)uc->args[0], + __FILE__, uc->args[1]); +} + +#define VCPU_ID 0 + +static void enter_guest(struct kvm_vm *vm) +{ + struct kvm_run *run; + struct ucall uc; + int r; + + run = vcpu_state(vm, VCPU_ID); + + while (true) { + r = _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(!r, "vcpu_run failed: %d\n", r); + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "unexpected exit reason: %u (%s)", + run->exit_reason, exit_reason_str(run->exit_reason)); + + switch (get_ucall(vm, VCPU_ID, &uc)) { + case UCALL_PR_MSR: + pr_msr(&uc); + break; + case UCALL_PR_HCALL: + pr_hcall(&uc); + break; + case UCALL_ABORT: + handle_abort(&uc); + return; + case UCALL_DONE: + return; + } + } +} + +int main(void) +{ + struct kvm_enable_cap cap = {0}; + struct kvm_cpuid2 *best; + struct kvm_vm *vm; + + if (!kvm_check_cap(KVM_CAP_ENFORCE_PV_FEATURE_CPUID)) { + pr_info("will skip kvm paravirt restriction tests.\n"); + return 0; + } + + vm = vm_create_default(VCPU_ID, 0, guest_main); + + cap.cap = KVM_CAP_ENFORCE_PV_FEATURE_CPUID; + cap.args[0] = 1; + vcpu_enable_cap(vm, VCPU_ID, &cap); + + best = kvm_get_supported_cpuid(); + clear_kvm_cpuid_features(best); + vcpu_set_cpuid(vm, VCPU_ID, best); + + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vm, VCPU_ID); + vm_handle_exception(vm, GP_VECTOR, guest_gp_handler); + + enter_guest(vm); + kvm_vm_free(vm); +} From fd02029a9e019e941835e110651486e2d77d3f84 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Thu, 29 Oct 2020 21:17:01 +0100 Subject: [PATCH 211/710] KVM: selftests: Add aarch64 get-reg-list test Check for KVM_GET_REG_LIST regressions. The blessed list was created by running on v4.15 with the --core-reg-fixup option. The following script was also used in order to annotate system registers with their names when possible. When new system registers are added the names can just be added manually using the same grep. while read reg; do if [[ ! $reg =~ ARM64_SYS_REG ]]; then printf "\t$reg\n" continue fi encoding=$(echo "$reg" | sed "s/ARM64_SYS_REG(//;s/),//") if ! name=$(grep "$encoding" ../../../../arch/arm64/include/asm/sysreg.h); then printf "\t$reg\n" continue fi name=$(echo "$name" | sed "s/.*SYS_//;s/[\t ]*sys_reg($encoding)$//") printf "\t$reg\t/* $name */\n" done < <(aarch64/get-reg-list --core-reg-fixup --list) Signed-off-by: Andrew Jones Message-Id: <20201029201703.102716-3-drjones@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/.gitignore | 1 + tools/testing/selftests/kvm/Makefile | 1 + .../selftests/kvm/aarch64/get-reg-list.c | 671 ++++++++++++++++++ .../testing/selftests/kvm/include/kvm_util.h | 1 + tools/testing/selftests/kvm/lib/kvm_util.c | 29 + 5 files changed, 703 insertions(+) create mode 100644 tools/testing/selftests/kvm/aarch64/get-reg-list.c diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 22973aa75eda..2034765862a2 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -1,4 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only +/aarch64/get-reg-list /s390x/memop /s390x/resets /s390x/sync_regs_test diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 44ca6badb544..771455ae4a1b 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -66,6 +66,7 @@ TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus TEST_GEN_PROGS_x86_64 += set_memory_region_test TEST_GEN_PROGS_x86_64 += steal_time +TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list TEST_GEN_PROGS_aarch64 += clear_dirty_log_test TEST_GEN_PROGS_aarch64 += demand_paging_test TEST_GEN_PROGS_aarch64 += dirty_log_test diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c new file mode 100644 index 000000000000..3ff097f6886e --- /dev/null +++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c @@ -0,0 +1,671 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Check for KVM_GET_REG_LIST regressions. + * + * Copyright (C) 2020, Red Hat, Inc. + * + * When attempting to migrate from a host with an older kernel to a host + * with a newer kernel we allow the newer kernel on the destination to + * list new registers with get-reg-list. We assume they'll be unused, at + * least until the guest reboots, and so they're relatively harmless. + * However, if the destination host with the newer kernel is missing + * registers which the source host with the older kernel has, then that's + * a regression in get-reg-list. This test checks for that regression by + * checking the current list against a blessed list. We should never have + * missing registers, but if new ones appear then they can probably be + * added to the blessed list. A completely new blessed list can be created + * by running the test with the --list command line argument. + * + * Note, the blessed list should be created from the oldest possible + * kernel. We can't go older than v4.15, though, because that's the first + * release to expose the ID system registers in KVM_GET_REG_LIST, see + * commit 93390c0a1b20 ("arm64: KVM: Hide unsupported AArch64 CPU features + * from guests"). Also, one must use the --core-reg-fixup command line + * option when running on an older kernel that doesn't include df205b5c6328 + * ("KVM: arm64: Filter out invalid core register IDs in KVM_GET_REG_LIST") + */ +#include +#include +#include +#include "kvm_util.h" +#include "test_util.h" + +#define REG_MASK (KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_COPROC_MASK) + +#define for_each_reg(i) \ + for ((i) = 0; (i) < reg_list->n; ++(i)) + +#define for_each_missing_reg(i) \ + for ((i) = 0; (i) < blessed_n; ++(i)) \ + if (!find_reg(reg_list->reg, reg_list->n, blessed_reg[i])) + +#define for_each_new_reg(i) \ + for ((i) = 0; (i) < reg_list->n; ++(i)) \ + if (!find_reg(blessed_reg, blessed_n, reg_list->reg[i])) + + +static struct kvm_reg_list *reg_list; + +static __u64 blessed_reg[]; +static __u64 blessed_n; + +static bool find_reg(__u64 regs[], __u64 nr_regs, __u64 reg) +{ + int i; + + for (i = 0; i < nr_regs; ++i) + if (reg == regs[i]) + return true; + return false; +} + +static const char *str_with_index(const char *template, __u64 index) +{ + char *str, *p; + int n; + + str = strdup(template); + p = strstr(str, "##"); + n = sprintf(p, "%lld", index); + strcat(p + n, strstr(template, "##") + 2); + + return (const char *)str; +} + +#define CORE_REGS_XX_NR_WORDS 2 +#define CORE_SPSR_XX_NR_WORDS 2 +#define CORE_FPREGS_XX_NR_WORDS 4 + +static const char *core_id_to_str(__u64 id) +{ + __u64 core_off = id & ~REG_MASK, idx; + + /* + * core_off is the offset into struct kvm_regs + */ + switch (core_off) { + case KVM_REG_ARM_CORE_REG(regs.regs[0]) ... + KVM_REG_ARM_CORE_REG(regs.regs[30]): + idx = (core_off - KVM_REG_ARM_CORE_REG(regs.regs[0])) / CORE_REGS_XX_NR_WORDS; + TEST_ASSERT(idx < 31, "Unexpected regs.regs index: %lld", idx); + return str_with_index("KVM_REG_ARM_CORE_REG(regs.regs[##])", idx); + case KVM_REG_ARM_CORE_REG(regs.sp): + return "KVM_REG_ARM_CORE_REG(regs.sp)"; + case KVM_REG_ARM_CORE_REG(regs.pc): + return "KVM_REG_ARM_CORE_REG(regs.pc)"; + case KVM_REG_ARM_CORE_REG(regs.pstate): + return "KVM_REG_ARM_CORE_REG(regs.pstate)"; + case KVM_REG_ARM_CORE_REG(sp_el1): + return "KVM_REG_ARM_CORE_REG(sp_el1)"; + case KVM_REG_ARM_CORE_REG(elr_el1): + return "KVM_REG_ARM_CORE_REG(elr_el1)"; + case KVM_REG_ARM_CORE_REG(spsr[0]) ... + KVM_REG_ARM_CORE_REG(spsr[KVM_NR_SPSR - 1]): + idx = (core_off - KVM_REG_ARM_CORE_REG(spsr[0])) / CORE_SPSR_XX_NR_WORDS; + TEST_ASSERT(idx < KVM_NR_SPSR, "Unexpected spsr index: %lld", idx); + return str_with_index("KVM_REG_ARM_CORE_REG(spsr[##])", idx); + case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ... + KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]): + idx = (core_off - KVM_REG_ARM_CORE_REG(fp_regs.vregs[0])) / CORE_FPREGS_XX_NR_WORDS; + TEST_ASSERT(idx < 32, "Unexpected fp_regs.vregs index: %lld", idx); + return str_with_index("KVM_REG_ARM_CORE_REG(fp_regs.vregs[##])", idx); + case KVM_REG_ARM_CORE_REG(fp_regs.fpsr): + return "KVM_REG_ARM_CORE_REG(fp_regs.fpsr)"; + case KVM_REG_ARM_CORE_REG(fp_regs.fpcr): + return "KVM_REG_ARM_CORE_REG(fp_regs.fpcr)"; + } + + TEST_FAIL("Unknown core reg id: 0x%llx", id); + return NULL; +} + +static void print_reg(__u64 id) +{ + unsigned op0, op1, crn, crm, op2; + const char *reg_size = NULL; + + TEST_ASSERT((id & KVM_REG_ARCH_MASK) == KVM_REG_ARM64, + "KVM_REG_ARM64 missing in reg id: 0x%llx", id); + + switch (id & KVM_REG_SIZE_MASK) { + case KVM_REG_SIZE_U8: + reg_size = "KVM_REG_SIZE_U8"; + break; + case KVM_REG_SIZE_U16: + reg_size = "KVM_REG_SIZE_U16"; + break; + case KVM_REG_SIZE_U32: + reg_size = "KVM_REG_SIZE_U32"; + break; + case KVM_REG_SIZE_U64: + reg_size = "KVM_REG_SIZE_U64"; + break; + case KVM_REG_SIZE_U128: + reg_size = "KVM_REG_SIZE_U128"; + break; + case KVM_REG_SIZE_U256: + reg_size = "KVM_REG_SIZE_U256"; + break; + case KVM_REG_SIZE_U512: + reg_size = "KVM_REG_SIZE_U512"; + break; + case KVM_REG_SIZE_U1024: + reg_size = "KVM_REG_SIZE_U1024"; + break; + case KVM_REG_SIZE_U2048: + reg_size = "KVM_REG_SIZE_U2048"; + break; + default: + TEST_FAIL("Unexpected reg size: 0x%llx in reg id: 0x%llx", + (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id); + } + + switch (id & KVM_REG_ARM_COPROC_MASK) { + case KVM_REG_ARM_CORE: + printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_CORE | %s,\n", reg_size, core_id_to_str(id)); + break; + case KVM_REG_ARM_DEMUX: + TEST_ASSERT(!(id & ~(REG_MASK | KVM_REG_ARM_DEMUX_ID_MASK | KVM_REG_ARM_DEMUX_VAL_MASK)), + "Unexpected bits set in DEMUX reg id: 0x%llx", id); + printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | %lld,\n", + reg_size, id & KVM_REG_ARM_DEMUX_VAL_MASK); + break; + case KVM_REG_ARM64_SYSREG: + op0 = (id & KVM_REG_ARM64_SYSREG_OP0_MASK) >> KVM_REG_ARM64_SYSREG_OP0_SHIFT; + op1 = (id & KVM_REG_ARM64_SYSREG_OP1_MASK) >> KVM_REG_ARM64_SYSREG_OP1_SHIFT; + crn = (id & KVM_REG_ARM64_SYSREG_CRN_MASK) >> KVM_REG_ARM64_SYSREG_CRN_SHIFT; + crm = (id & KVM_REG_ARM64_SYSREG_CRM_MASK) >> KVM_REG_ARM64_SYSREG_CRM_SHIFT; + op2 = (id & KVM_REG_ARM64_SYSREG_OP2_MASK) >> KVM_REG_ARM64_SYSREG_OP2_SHIFT; + TEST_ASSERT(id == ARM64_SYS_REG(op0, op1, crn, crm, op2), + "Unexpected bits set in SYSREG reg id: 0x%llx", id); + printf("\tARM64_SYS_REG(%d, %d, %d, %d, %d),\n", op0, op1, crn, crm, op2); + break; + case KVM_REG_ARM_FW: + TEST_ASSERT(id == KVM_REG_ARM_FW_REG(id & 0xffff), + "Unexpected bits set in FW reg id: 0x%llx", id); + printf("\tKVM_REG_ARM_FW_REG(%lld),\n", id & 0xffff); + break; + case KVM_REG_ARM64_SVE: + TEST_FAIL("KVM_REG_ARM64_SVE is an unexpected coproc type in reg id: 0x%llx", id); + break; + default: + TEST_FAIL("Unexpected coproc type: 0x%llx in reg id: 0x%llx", + (id & KVM_REG_ARM_COPROC_MASK) >> KVM_REG_ARM_COPROC_SHIFT, id); + } +} + +/* + * Older kernels listed each 32-bit word of CORE registers separately. + * For 64 and 128-bit registers we need to ignore the extra words. We + * also need to fixup the sizes, because the older kernels stated all + * registers were 64-bit, even when they weren't. + */ +static void core_reg_fixup(void) +{ + struct kvm_reg_list *tmp; + __u64 id, core_off; + int i; + + tmp = calloc(1, sizeof(*tmp) + reg_list->n * sizeof(__u64)); + + for (i = 0; i < reg_list->n; ++i) { + id = reg_list->reg[i]; + + if ((id & KVM_REG_ARM_COPROC_MASK) != KVM_REG_ARM_CORE) { + tmp->reg[tmp->n++] = id; + continue; + } + + core_off = id & ~REG_MASK; + + switch (core_off) { + case 0x52: case 0xd2: case 0xd6: + /* + * These offsets are pointing at padding. + * We need to ignore them too. + */ + continue; + case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ... + KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]): + if (core_off & 3) + continue; + id &= ~KVM_REG_SIZE_MASK; + id |= KVM_REG_SIZE_U128; + tmp->reg[tmp->n++] = id; + continue; + case KVM_REG_ARM_CORE_REG(fp_regs.fpsr): + case KVM_REG_ARM_CORE_REG(fp_regs.fpcr): + id &= ~KVM_REG_SIZE_MASK; + id |= KVM_REG_SIZE_U32; + tmp->reg[tmp->n++] = id; + continue; + default: + if (core_off & 1) + continue; + tmp->reg[tmp->n++] = id; + break; + } + } + + free(reg_list); + reg_list = tmp; +} + +int main(int ac, char **av) +{ + int new_regs = 0, missing_regs = 0, i; + int failed_get = 0, failed_set = 0; + bool print_list = false, fixup_core_regs = false; + struct kvm_vm *vm; + + for (i = 1; i < ac; ++i) { + if (strcmp(av[i], "--core-reg-fixup") == 0) + fixup_core_regs = true; + else if (strcmp(av[i], "--list") == 0) + print_list = true; + else + fprintf(stderr, "Ignoring unknown option: %s\n", av[i]); + } + + vm = vm_create_default(0, 0, NULL); + reg_list = vcpu_get_reg_list(vm, 0); + + if (fixup_core_regs) + core_reg_fixup(); + + if (print_list) { + putchar('\n'); + for_each_reg(i) + print_reg(reg_list->reg[i]); + putchar('\n'); + return 0; + } + + /* + * We only test that we can get the register and then write back the + * same value. Some registers may allow other values to be written + * back, but others only allow some bits to be changed, and at least + * for ID registers set will fail if the value does not exactly match + * what was returned by get. If registers that allow other values to + * be written need to have the other values tested, then we should + * create a new set of tests for those in a new independent test + * executable. + */ + for_each_reg(i) { + uint8_t addr[2048 / 8]; + struct kvm_one_reg reg = { + .id = reg_list->reg[i], + .addr = (__u64)&addr, + }; + int ret; + + ret = _vcpu_ioctl(vm, 0, KVM_GET_ONE_REG, ®); + if (ret) { + puts("Failed to get "); + print_reg(reg.id); + putchar('\n'); + ++failed_get; + } + + ret = _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, ®); + if (ret) { + puts("Failed to set "); + print_reg(reg.id); + putchar('\n'); + ++failed_set; + } + } + + for_each_new_reg(i) + ++new_regs; + + for_each_missing_reg(i) + ++missing_regs; + + if (new_regs || missing_regs) { + printf("Number blessed registers: %5lld\n", blessed_n); + printf("Number registers: %5lld\n", reg_list->n); + } + + if (new_regs) { + printf("\nThere are %d new registers.\n" + "Consider adding them to the blessed reg " + "list with the following lines:\n\n", new_regs); + for_each_new_reg(i) + print_reg(reg_list->reg[i]); + putchar('\n'); + } + + if (missing_regs) { + printf("\nThere are %d missing registers.\n" + "The following lines are missing registers:\n\n", missing_regs); + for_each_missing_reg(i) + print_reg(blessed_reg[i]); + putchar('\n'); + } + + TEST_ASSERT(!missing_regs && !failed_get && !failed_set, + "There are %d missing registers; %d registers failed get; %d registers failed set", + missing_regs, failed_get, failed_set); + + return 0; +} + +/* + * The current blessed list was primed with the output of kernel version + * v4.15 with --core-reg-fixup and then later updated with new registers. + */ +static __u64 blessed_reg[] = { + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[0]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[1]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[2]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[3]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[4]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[5]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[6]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[7]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[8]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[9]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[10]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[11]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[12]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[13]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[14]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[15]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[16]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[17]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[18]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[19]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[20]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[21]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[22]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[23]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[24]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[25]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[26]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[27]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[28]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[29]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[30]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.sp), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.pc), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.pstate), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(sp_el1), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(elr_el1), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[0]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[1]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[2]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[3]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[4]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[1]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[2]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[3]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[4]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[5]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[6]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[7]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[8]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[9]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[10]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[11]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[12]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[13]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[14]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[15]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[16]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[17]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[18]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[19]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[20]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[21]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[22]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[23]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[24]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[25]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[26]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[27]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[28]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[29]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[30]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]), + KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpsr), + KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpcr), + KVM_REG_ARM_FW_REG(0), + KVM_REG_ARM_FW_REG(1), + KVM_REG_ARM_FW_REG(2), + ARM64_SYS_REG(3, 3, 14, 3, 1), /* CNTV_CTL_EL0 */ + ARM64_SYS_REG(3, 3, 14, 3, 2), /* CNTV_CVAL_EL0 */ + ARM64_SYS_REG(3, 3, 14, 0, 2), + ARM64_SYS_REG(3, 0, 0, 0, 0), /* MIDR_EL1 */ + ARM64_SYS_REG(3, 0, 0, 0, 6), /* REVIDR_EL1 */ + ARM64_SYS_REG(3, 1, 0, 0, 1), /* CLIDR_EL1 */ + ARM64_SYS_REG(3, 1, 0, 0, 7), /* AIDR_EL1 */ + ARM64_SYS_REG(3, 3, 0, 0, 1), /* CTR_EL0 */ + ARM64_SYS_REG(2, 0, 0, 0, 4), + ARM64_SYS_REG(2, 0, 0, 0, 5), + ARM64_SYS_REG(2, 0, 0, 0, 6), + ARM64_SYS_REG(2, 0, 0, 0, 7), + ARM64_SYS_REG(2, 0, 0, 1, 4), + ARM64_SYS_REG(2, 0, 0, 1, 5), + ARM64_SYS_REG(2, 0, 0, 1, 6), + ARM64_SYS_REG(2, 0, 0, 1, 7), + ARM64_SYS_REG(2, 0, 0, 2, 0), /* MDCCINT_EL1 */ + ARM64_SYS_REG(2, 0, 0, 2, 2), /* MDSCR_EL1 */ + ARM64_SYS_REG(2, 0, 0, 2, 4), + ARM64_SYS_REG(2, 0, 0, 2, 5), + ARM64_SYS_REG(2, 0, 0, 2, 6), + ARM64_SYS_REG(2, 0, 0, 2, 7), + ARM64_SYS_REG(2, 0, 0, 3, 4), + ARM64_SYS_REG(2, 0, 0, 3, 5), + ARM64_SYS_REG(2, 0, 0, 3, 6), + ARM64_SYS_REG(2, 0, 0, 3, 7), + ARM64_SYS_REG(2, 0, 0, 4, 4), + ARM64_SYS_REG(2, 0, 0, 4, 5), + ARM64_SYS_REG(2, 0, 0, 4, 6), + ARM64_SYS_REG(2, 0, 0, 4, 7), + ARM64_SYS_REG(2, 0, 0, 5, 4), + ARM64_SYS_REG(2, 0, 0, 5, 5), + ARM64_SYS_REG(2, 0, 0, 5, 6), + ARM64_SYS_REG(2, 0, 0, 5, 7), + ARM64_SYS_REG(2, 0, 0, 6, 4), + ARM64_SYS_REG(2, 0, 0, 6, 5), + ARM64_SYS_REG(2, 0, 0, 6, 6), + ARM64_SYS_REG(2, 0, 0, 6, 7), + ARM64_SYS_REG(2, 0, 0, 7, 4), + ARM64_SYS_REG(2, 0, 0, 7, 5), + ARM64_SYS_REG(2, 0, 0, 7, 6), + ARM64_SYS_REG(2, 0, 0, 7, 7), + ARM64_SYS_REG(2, 0, 0, 8, 4), + ARM64_SYS_REG(2, 0, 0, 8, 5), + ARM64_SYS_REG(2, 0, 0, 8, 6), + ARM64_SYS_REG(2, 0, 0, 8, 7), + ARM64_SYS_REG(2, 0, 0, 9, 4), + ARM64_SYS_REG(2, 0, 0, 9, 5), + ARM64_SYS_REG(2, 0, 0, 9, 6), + ARM64_SYS_REG(2, 0, 0, 9, 7), + ARM64_SYS_REG(2, 0, 0, 10, 4), + ARM64_SYS_REG(2, 0, 0, 10, 5), + ARM64_SYS_REG(2, 0, 0, 10, 6), + ARM64_SYS_REG(2, 0, 0, 10, 7), + ARM64_SYS_REG(2, 0, 0, 11, 4), + ARM64_SYS_REG(2, 0, 0, 11, 5), + ARM64_SYS_REG(2, 0, 0, 11, 6), + ARM64_SYS_REG(2, 0, 0, 11, 7), + ARM64_SYS_REG(2, 0, 0, 12, 4), + ARM64_SYS_REG(2, 0, 0, 12, 5), + ARM64_SYS_REG(2, 0, 0, 12, 6), + ARM64_SYS_REG(2, 0, 0, 12, 7), + ARM64_SYS_REG(2, 0, 0, 13, 4), + ARM64_SYS_REG(2, 0, 0, 13, 5), + ARM64_SYS_REG(2, 0, 0, 13, 6), + ARM64_SYS_REG(2, 0, 0, 13, 7), + ARM64_SYS_REG(2, 0, 0, 14, 4), + ARM64_SYS_REG(2, 0, 0, 14, 5), + ARM64_SYS_REG(2, 0, 0, 14, 6), + ARM64_SYS_REG(2, 0, 0, 14, 7), + ARM64_SYS_REG(2, 0, 0, 15, 4), + ARM64_SYS_REG(2, 0, 0, 15, 5), + ARM64_SYS_REG(2, 0, 0, 15, 6), + ARM64_SYS_REG(2, 0, 0, 15, 7), + ARM64_SYS_REG(2, 4, 0, 7, 0), /* DBGVCR32_EL2 */ + ARM64_SYS_REG(3, 0, 0, 0, 5), /* MPIDR_EL1 */ + ARM64_SYS_REG(3, 0, 0, 1, 0), /* ID_PFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 1, 1), /* ID_PFR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 1, 2), /* ID_DFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 1, 3), /* ID_AFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 1, 4), /* ID_MMFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 1, 5), /* ID_MMFR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 1, 6), /* ID_MMFR2_EL1 */ + ARM64_SYS_REG(3, 0, 0, 1, 7), /* ID_MMFR3_EL1 */ + ARM64_SYS_REG(3, 0, 0, 2, 0), /* ID_ISAR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 2, 1), /* ID_ISAR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 2, 2), /* ID_ISAR2_EL1 */ + ARM64_SYS_REG(3, 0, 0, 2, 3), /* ID_ISAR3_EL1 */ + ARM64_SYS_REG(3, 0, 0, 2, 4), /* ID_ISAR4_EL1 */ + ARM64_SYS_REG(3, 0, 0, 2, 5), /* ID_ISAR5_EL1 */ + ARM64_SYS_REG(3, 0, 0, 2, 6), /* ID_MMFR4_EL1 */ + ARM64_SYS_REG(3, 0, 0, 2, 7), /* ID_ISAR6_EL1 */ + ARM64_SYS_REG(3, 0, 0, 3, 0), /* MVFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 3, 1), /* MVFR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 3, 2), /* MVFR2_EL1 */ + ARM64_SYS_REG(3, 0, 0, 3, 3), + ARM64_SYS_REG(3, 0, 0, 3, 4), /* ID_PFR2_EL1 */ + ARM64_SYS_REG(3, 0, 0, 3, 5), /* ID_DFR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 3, 6), /* ID_MMFR5_EL1 */ + ARM64_SYS_REG(3, 0, 0, 3, 7), + ARM64_SYS_REG(3, 0, 0, 4, 0), /* ID_AA64PFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 4, 1), /* ID_AA64PFR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 4, 2), + ARM64_SYS_REG(3, 0, 0, 4, 3), + ARM64_SYS_REG(3, 0, 0, 4, 4), /* ID_AA64ZFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 4, 5), + ARM64_SYS_REG(3, 0, 0, 4, 6), + ARM64_SYS_REG(3, 0, 0, 4, 7), + ARM64_SYS_REG(3, 0, 0, 5, 0), /* ID_AA64DFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 5, 1), /* ID_AA64DFR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 5, 2), + ARM64_SYS_REG(3, 0, 0, 5, 3), + ARM64_SYS_REG(3, 0, 0, 5, 4), /* ID_AA64AFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 5, 5), /* ID_AA64AFR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 5, 6), + ARM64_SYS_REG(3, 0, 0, 5, 7), + ARM64_SYS_REG(3, 0, 0, 6, 0), /* ID_AA64ISAR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 6, 1), /* ID_AA64ISAR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 6, 2), + ARM64_SYS_REG(3, 0, 0, 6, 3), + ARM64_SYS_REG(3, 0, 0, 6, 4), + ARM64_SYS_REG(3, 0, 0, 6, 5), + ARM64_SYS_REG(3, 0, 0, 6, 6), + ARM64_SYS_REG(3, 0, 0, 6, 7), + ARM64_SYS_REG(3, 0, 0, 7, 0), /* ID_AA64MMFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 7, 1), /* ID_AA64MMFR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 7, 2), /* ID_AA64MMFR2_EL1 */ + ARM64_SYS_REG(3, 0, 0, 7, 3), + ARM64_SYS_REG(3, 0, 0, 7, 4), + ARM64_SYS_REG(3, 0, 0, 7, 5), + ARM64_SYS_REG(3, 0, 0, 7, 6), + ARM64_SYS_REG(3, 0, 0, 7, 7), + ARM64_SYS_REG(3, 0, 1, 0, 0), /* SCTLR_EL1 */ + ARM64_SYS_REG(3, 0, 1, 0, 1), /* ACTLR_EL1 */ + ARM64_SYS_REG(3, 0, 1, 0, 2), /* CPACR_EL1 */ + ARM64_SYS_REG(3, 0, 2, 0, 0), /* TTBR0_EL1 */ + ARM64_SYS_REG(3, 0, 2, 0, 1), /* TTBR1_EL1 */ + ARM64_SYS_REG(3, 0, 2, 0, 2), /* TCR_EL1 */ + ARM64_SYS_REG(3, 0, 5, 1, 0), /* AFSR0_EL1 */ + ARM64_SYS_REG(3, 0, 5, 1, 1), /* AFSR1_EL1 */ + ARM64_SYS_REG(3, 0, 5, 2, 0), /* ESR_EL1 */ + ARM64_SYS_REG(3, 0, 6, 0, 0), /* FAR_EL1 */ + ARM64_SYS_REG(3, 0, 7, 4, 0), /* PAR_EL1 */ + ARM64_SYS_REG(3, 0, 9, 14, 1), /* PMINTENSET_EL1 */ + ARM64_SYS_REG(3, 0, 9, 14, 2), /* PMINTENCLR_EL1 */ + ARM64_SYS_REG(3, 0, 10, 2, 0), /* MAIR_EL1 */ + ARM64_SYS_REG(3, 0, 10, 3, 0), /* AMAIR_EL1 */ + ARM64_SYS_REG(3, 0, 12, 0, 0), /* VBAR_EL1 */ + ARM64_SYS_REG(3, 0, 12, 1, 1), /* DISR_EL1 */ + ARM64_SYS_REG(3, 0, 13, 0, 1), /* CONTEXTIDR_EL1 */ + ARM64_SYS_REG(3, 0, 13, 0, 4), /* TPIDR_EL1 */ + ARM64_SYS_REG(3, 0, 14, 1, 0), /* CNTKCTL_EL1 */ + ARM64_SYS_REG(3, 2, 0, 0, 0), /* CSSELR_EL1 */ + ARM64_SYS_REG(3, 3, 9, 12, 0), /* PMCR_EL0 */ + ARM64_SYS_REG(3, 3, 9, 12, 1), /* PMCNTENSET_EL0 */ + ARM64_SYS_REG(3, 3, 9, 12, 2), /* PMCNTENCLR_EL0 */ + ARM64_SYS_REG(3, 3, 9, 12, 3), /* PMOVSCLR_EL0 */ + ARM64_SYS_REG(3, 3, 9, 12, 4), /* PMSWINC_EL0 */ + ARM64_SYS_REG(3, 3, 9, 12, 5), /* PMSELR_EL0 */ + ARM64_SYS_REG(3, 3, 9, 13, 0), /* PMCCNTR_EL0 */ + ARM64_SYS_REG(3, 3, 9, 14, 0), /* PMUSERENR_EL0 */ + ARM64_SYS_REG(3, 3, 9, 14, 3), /* PMOVSSET_EL0 */ + ARM64_SYS_REG(3, 3, 13, 0, 2), /* TPIDR_EL0 */ + ARM64_SYS_REG(3, 3, 13, 0, 3), /* TPIDRRO_EL0 */ + ARM64_SYS_REG(3, 3, 14, 8, 0), + ARM64_SYS_REG(3, 3, 14, 8, 1), + ARM64_SYS_REG(3, 3, 14, 8, 2), + ARM64_SYS_REG(3, 3, 14, 8, 3), + ARM64_SYS_REG(3, 3, 14, 8, 4), + ARM64_SYS_REG(3, 3, 14, 8, 5), + ARM64_SYS_REG(3, 3, 14, 8, 6), + ARM64_SYS_REG(3, 3, 14, 8, 7), + ARM64_SYS_REG(3, 3, 14, 9, 0), + ARM64_SYS_REG(3, 3, 14, 9, 1), + ARM64_SYS_REG(3, 3, 14, 9, 2), + ARM64_SYS_REG(3, 3, 14, 9, 3), + ARM64_SYS_REG(3, 3, 14, 9, 4), + ARM64_SYS_REG(3, 3, 14, 9, 5), + ARM64_SYS_REG(3, 3, 14, 9, 6), + ARM64_SYS_REG(3, 3, 14, 9, 7), + ARM64_SYS_REG(3, 3, 14, 10, 0), + ARM64_SYS_REG(3, 3, 14, 10, 1), + ARM64_SYS_REG(3, 3, 14, 10, 2), + ARM64_SYS_REG(3, 3, 14, 10, 3), + ARM64_SYS_REG(3, 3, 14, 10, 4), + ARM64_SYS_REG(3, 3, 14, 10, 5), + ARM64_SYS_REG(3, 3, 14, 10, 6), + ARM64_SYS_REG(3, 3, 14, 10, 7), + ARM64_SYS_REG(3, 3, 14, 11, 0), + ARM64_SYS_REG(3, 3, 14, 11, 1), + ARM64_SYS_REG(3, 3, 14, 11, 2), + ARM64_SYS_REG(3, 3, 14, 11, 3), + ARM64_SYS_REG(3, 3, 14, 11, 4), + ARM64_SYS_REG(3, 3, 14, 11, 5), + ARM64_SYS_REG(3, 3, 14, 11, 6), + ARM64_SYS_REG(3, 3, 14, 12, 0), + ARM64_SYS_REG(3, 3, 14, 12, 1), + ARM64_SYS_REG(3, 3, 14, 12, 2), + ARM64_SYS_REG(3, 3, 14, 12, 3), + ARM64_SYS_REG(3, 3, 14, 12, 4), + ARM64_SYS_REG(3, 3, 14, 12, 5), + ARM64_SYS_REG(3, 3, 14, 12, 6), + ARM64_SYS_REG(3, 3, 14, 12, 7), + ARM64_SYS_REG(3, 3, 14, 13, 0), + ARM64_SYS_REG(3, 3, 14, 13, 1), + ARM64_SYS_REG(3, 3, 14, 13, 2), + ARM64_SYS_REG(3, 3, 14, 13, 3), + ARM64_SYS_REG(3, 3, 14, 13, 4), + ARM64_SYS_REG(3, 3, 14, 13, 5), + ARM64_SYS_REG(3, 3, 14, 13, 6), + ARM64_SYS_REG(3, 3, 14, 13, 7), + ARM64_SYS_REG(3, 3, 14, 14, 0), + ARM64_SYS_REG(3, 3, 14, 14, 1), + ARM64_SYS_REG(3, 3, 14, 14, 2), + ARM64_SYS_REG(3, 3, 14, 14, 3), + ARM64_SYS_REG(3, 3, 14, 14, 4), + ARM64_SYS_REG(3, 3, 14, 14, 5), + ARM64_SYS_REG(3, 3, 14, 14, 6), + ARM64_SYS_REG(3, 3, 14, 14, 7), + ARM64_SYS_REG(3, 3, 14, 15, 0), + ARM64_SYS_REG(3, 3, 14, 15, 1), + ARM64_SYS_REG(3, 3, 14, 15, 2), + ARM64_SYS_REG(3, 3, 14, 15, 3), + ARM64_SYS_REG(3, 3, 14, 15, 4), + ARM64_SYS_REG(3, 3, 14, 15, 5), + ARM64_SYS_REG(3, 3, 14, 15, 6), + ARM64_SYS_REG(3, 3, 14, 15, 7), /* PMCCFILTR_EL0 */ + ARM64_SYS_REG(3, 4, 3, 0, 0), /* DACR32_EL2 */ + ARM64_SYS_REG(3, 4, 5, 0, 1), /* IFSR32_EL2 */ + ARM64_SYS_REG(3, 4, 5, 3, 0), /* FPEXC32_EL2 */ + KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | 0, + KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | 1, + KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | 2, +}; +static __u64 blessed_n = ARRAY_SIZE(blessed_reg); diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 77f53dbc34ff..feb949a92055 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -152,6 +152,7 @@ void vcpu_set_guest_debug(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_guest_debug *debug); void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_mp_state *mp_state); +struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vm *vm, uint32_t vcpuid); void vcpu_regs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs); void vcpu_regs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 91e547031d8c..7669cb7c86e3 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -1291,6 +1291,35 @@ void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid, "rc: %i errno: %i", ret, errno); } +/* + * VM VCPU Get Reg List + * + * Input Args: + * vm - Virtual Machine + * vcpuid - VCPU ID + * + * Output Args: + * None + * + * Return: + * A pointer to an allocated struct kvm_reg_list + * + * Get the list of guest registers which are supported for + * KVM_GET_ONE_REG/KVM_SET_ONE_REG calls + */ +struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vm *vm, uint32_t vcpuid) +{ + struct kvm_reg_list reg_list_n = { .n = 0 }, *reg_list; + int ret; + + ret = _vcpu_ioctl(vm, vcpuid, KVM_GET_REG_LIST, ®_list_n); + TEST_ASSERT(ret == -1 && errno == E2BIG, "KVM_GET_REG_LIST n=0"); + reg_list = calloc(1, sizeof(*reg_list) + reg_list_n.n * sizeof(__u64)); + reg_list->n = reg_list_n.n; + vcpu_ioctl(vm, vcpuid, KVM_GET_REG_LIST, reg_list); + return reg_list; +} + /* * VM VCPU Regs Get * From 31d212959179015bc07f3af4e890cadd26e01ee0 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Thu, 29 Oct 2020 21:17:03 +0100 Subject: [PATCH 212/710] KVM: selftests: Add blessed SVE registers to get-reg-list Add support for the SVE registers to get-reg-list and create a new test, get-reg-list-sve, which tests them when running on a machine with SVE support. Signed-off-by: Andrew Jones Message-Id: <20201029201703.102716-5-drjones@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/.gitignore | 1 + tools/testing/selftests/kvm/Makefile | 1 + .../selftests/kvm/aarch64/get-reg-list-sve.c | 3 + .../selftests/kvm/aarch64/get-reg-list.c | 254 +++++++++++++++--- 4 files changed, 217 insertions(+), 42 deletions(-) create mode 100644 tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 2034765862a2..ea1692d43411 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only /aarch64/get-reg-list +/aarch64/get-reg-list-sve /s390x/memop /s390x/resets /s390x/sync_regs_test diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 771455ae4a1b..2b0d7d325e2a 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -67,6 +67,7 @@ TEST_GEN_PROGS_x86_64 += set_memory_region_test TEST_GEN_PROGS_x86_64 += steal_time TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list +TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list-sve TEST_GEN_PROGS_aarch64 += clear_dirty_log_test TEST_GEN_PROGS_aarch64 += demand_paging_test TEST_GEN_PROGS_aarch64 += dirty_log_test diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c b/tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c new file mode 100644 index 000000000000..efba76682b4b --- /dev/null +++ b/tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c @@ -0,0 +1,3 @@ +// SPDX-License-Identifier: GPL-2.0 +#define REG_LIST_SVE +#include "get-reg-list.c" diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c index 3ff097f6886e..33218a395d9f 100644 --- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c +++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c @@ -29,6 +29,13 @@ #include #include "kvm_util.h" #include "test_util.h" +#include "processor.h" + +#ifdef REG_LIST_SVE +#define reg_list_sve() (true) +#else +#define reg_list_sve() (false) +#endif #define REG_MASK (KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_COPROC_MASK) @@ -46,8 +53,9 @@ static struct kvm_reg_list *reg_list; -static __u64 blessed_reg[]; -static __u64 blessed_n; +static __u64 base_regs[], vregs[], sve_regs[], rejects_set[]; +static __u64 base_regs_n, vregs_n, sve_regs_n, rejects_set_n; +static __u64 *blessed_reg, blessed_n; static bool find_reg(__u64 regs[], __u64 nr_regs, __u64 reg) { @@ -119,6 +127,40 @@ static const char *core_id_to_str(__u64 id) return NULL; } +static const char *sve_id_to_str(__u64 id) +{ + __u64 sve_off, n, i; + + if (id == KVM_REG_ARM64_SVE_VLS) + return "KVM_REG_ARM64_SVE_VLS"; + + sve_off = id & ~(REG_MASK | ((1ULL << 5) - 1)); + i = id & (KVM_ARM64_SVE_MAX_SLICES - 1); + + TEST_ASSERT(i == 0, "Currently we don't expect slice > 0, reg id 0x%llx", id); + + switch (sve_off) { + case KVM_REG_ARM64_SVE_ZREG_BASE ... + KVM_REG_ARM64_SVE_ZREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_ZREGS - 1: + n = (id >> 5) & (KVM_ARM64_SVE_NUM_ZREGS - 1); + TEST_ASSERT(id == KVM_REG_ARM64_SVE_ZREG(n, 0), + "Unexpected bits set in SVE ZREG id: 0x%llx", id); + return str_with_index("KVM_REG_ARM64_SVE_ZREG(##, 0)", n); + case KVM_REG_ARM64_SVE_PREG_BASE ... + KVM_REG_ARM64_SVE_PREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_PREGS - 1: + n = (id >> 5) & (KVM_ARM64_SVE_NUM_PREGS - 1); + TEST_ASSERT(id == KVM_REG_ARM64_SVE_PREG(n, 0), + "Unexpected bits set in SVE PREG id: 0x%llx", id); + return str_with_index("KVM_REG_ARM64_SVE_PREG(##, 0)", n); + case KVM_REG_ARM64_SVE_FFR_BASE: + TEST_ASSERT(id == KVM_REG_ARM64_SVE_FFR(0), + "Unexpected bits set in SVE FFR id: 0x%llx", id); + return "KVM_REG_ARM64_SVE_FFR(0)"; + } + + return NULL; +} + static void print_reg(__u64 id) { unsigned op0, op1, crn, crm, op2; @@ -186,7 +228,10 @@ static void print_reg(__u64 id) printf("\tKVM_REG_ARM_FW_REG(%lld),\n", id & 0xffff); break; case KVM_REG_ARM64_SVE: - TEST_FAIL("KVM_REG_ARM64_SVE is an unexpected coproc type in reg id: 0x%llx", id); + if (reg_list_sve()) + printf("\t%s,\n", sve_id_to_str(id)); + else + TEST_FAIL("KVM_REG_ARM64_SVE is an unexpected coproc type in reg id: 0x%llx", id); break; default: TEST_FAIL("Unexpected coproc type: 0x%llx in reg id: 0x%llx", @@ -251,12 +296,40 @@ static void core_reg_fixup(void) reg_list = tmp; } +static void prepare_vcpu_init(struct kvm_vcpu_init *init) +{ + if (reg_list_sve()) + init->features[0] |= 1 << KVM_ARM_VCPU_SVE; +} + +static void finalize_vcpu(struct kvm_vm *vm, uint32_t vcpuid) +{ + int feature; + + if (reg_list_sve()) { + feature = KVM_ARM_VCPU_SVE; + vcpu_ioctl(vm, vcpuid, KVM_ARM_VCPU_FINALIZE, &feature); + } +} + +static void check_supported(void) +{ + if (reg_list_sve() && !kvm_check_cap(KVM_CAP_ARM_SVE)) { + fprintf(stderr, "SVE not available, skipping tests\n"); + exit(KSFT_SKIP); + } +} + int main(int ac, char **av) { + struct kvm_vcpu_init init = { .target = -1, }; int new_regs = 0, missing_regs = 0, i; - int failed_get = 0, failed_set = 0; + int failed_get = 0, failed_set = 0, failed_reject = 0; bool print_list = false, fixup_core_regs = false; struct kvm_vm *vm; + __u64 *vec_regs; + + check_supported(); for (i = 1; i < ac; ++i) { if (strcmp(av[i], "--core-reg-fixup") == 0) @@ -267,7 +340,11 @@ int main(int ac, char **av) fprintf(stderr, "Ignoring unknown option: %s\n", av[i]); } - vm = vm_create_default(0, 0, NULL); + vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR); + prepare_vcpu_init(&init); + aarch64_vcpu_add_default(vm, 0, &init, NULL); + finalize_vcpu(vm, 0); + reg_list = vcpu_get_reg_list(vm, 0); if (fixup_core_regs) @@ -307,6 +384,18 @@ int main(int ac, char **av) ++failed_get; } + /* rejects_set registers are rejected after KVM_ARM_VCPU_FINALIZE */ + if (find_reg(rejects_set, rejects_set_n, reg.id)) { + ret = _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, ®); + if (ret != -1 || errno != EPERM) { + printf("Failed to reject (ret=%d, errno=%d) ", ret, errno); + print_reg(reg.id); + putchar('\n'); + ++failed_reject; + } + continue; + } + ret = _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, ®); if (ret) { puts("Failed to set "); @@ -316,6 +405,20 @@ int main(int ac, char **av) } } + if (reg_list_sve()) { + blessed_n = base_regs_n + sve_regs_n; + vec_regs = sve_regs; + } else { + blessed_n = base_regs_n + vregs_n; + vec_regs = vregs; + } + + blessed_reg = calloc(blessed_n, sizeof(__u64)); + for (i = 0; i < base_regs_n; ++i) + blessed_reg[i] = base_regs[i]; + for (i = 0; i < blessed_n - base_regs_n; ++i) + blessed_reg[base_regs_n + i] = vec_regs[i]; + for_each_new_reg(i) ++new_regs; @@ -344,9 +447,10 @@ int main(int ac, char **av) putchar('\n'); } - TEST_ASSERT(!missing_regs && !failed_get && !failed_set, - "There are %d missing registers; %d registers failed get; %d registers failed set", - missing_regs, failed_get, failed_set); + TEST_ASSERT(!missing_regs && !failed_get && !failed_set && !failed_reject, + "There are %d missing registers; " + "%d registers failed get; %d registers failed set; %d registers failed reject", + missing_regs, failed_get, failed_set, failed_reject); return 0; } @@ -355,7 +459,7 @@ int main(int ac, char **av) * The current blessed list was primed with the output of kernel version * v4.15 with --core-reg-fixup and then later updated with new registers. */ -static __u64 blessed_reg[] = { +static __u64 base_regs[] = { KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[0]), KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[1]), KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[2]), @@ -397,38 +501,6 @@ static __u64 blessed_reg[] = { KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[2]), KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[3]), KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[4]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[1]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[2]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[3]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[4]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[5]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[6]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[7]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[8]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[9]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[10]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[11]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[12]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[13]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[14]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[15]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[16]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[17]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[18]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[19]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[20]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[21]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[22]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[23]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[24]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[25]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[26]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[27]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[28]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[29]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[30]), - KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]), KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpsr), KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpcr), KVM_REG_ARM_FW_REG(0), @@ -668,4 +740,102 @@ static __u64 blessed_reg[] = { KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | 1, KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | 2, }; -static __u64 blessed_n = ARRAY_SIZE(blessed_reg); +static __u64 base_regs_n = ARRAY_SIZE(base_regs); + +static __u64 vregs[] = { + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[1]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[2]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[3]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[4]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[5]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[6]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[7]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[8]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[9]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[10]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[11]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[12]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[13]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[14]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[15]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[16]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[17]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[18]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[19]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[20]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[21]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[22]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[23]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[24]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[25]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[26]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[27]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[28]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[29]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[30]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]), +}; +static __u64 vregs_n = ARRAY_SIZE(vregs); + +static __u64 sve_regs[] = { + KVM_REG_ARM64_SVE_VLS, + KVM_REG_ARM64_SVE_ZREG(0, 0), + KVM_REG_ARM64_SVE_ZREG(1, 0), + KVM_REG_ARM64_SVE_ZREG(2, 0), + KVM_REG_ARM64_SVE_ZREG(3, 0), + KVM_REG_ARM64_SVE_ZREG(4, 0), + KVM_REG_ARM64_SVE_ZREG(5, 0), + KVM_REG_ARM64_SVE_ZREG(6, 0), + KVM_REG_ARM64_SVE_ZREG(7, 0), + KVM_REG_ARM64_SVE_ZREG(8, 0), + KVM_REG_ARM64_SVE_ZREG(9, 0), + KVM_REG_ARM64_SVE_ZREG(10, 0), + KVM_REG_ARM64_SVE_ZREG(11, 0), + KVM_REG_ARM64_SVE_ZREG(12, 0), + KVM_REG_ARM64_SVE_ZREG(13, 0), + KVM_REG_ARM64_SVE_ZREG(14, 0), + KVM_REG_ARM64_SVE_ZREG(15, 0), + KVM_REG_ARM64_SVE_ZREG(16, 0), + KVM_REG_ARM64_SVE_ZREG(17, 0), + KVM_REG_ARM64_SVE_ZREG(18, 0), + KVM_REG_ARM64_SVE_ZREG(19, 0), + KVM_REG_ARM64_SVE_ZREG(20, 0), + KVM_REG_ARM64_SVE_ZREG(21, 0), + KVM_REG_ARM64_SVE_ZREG(22, 0), + KVM_REG_ARM64_SVE_ZREG(23, 0), + KVM_REG_ARM64_SVE_ZREG(24, 0), + KVM_REG_ARM64_SVE_ZREG(25, 0), + KVM_REG_ARM64_SVE_ZREG(26, 0), + KVM_REG_ARM64_SVE_ZREG(27, 0), + KVM_REG_ARM64_SVE_ZREG(28, 0), + KVM_REG_ARM64_SVE_ZREG(29, 0), + KVM_REG_ARM64_SVE_ZREG(30, 0), + KVM_REG_ARM64_SVE_ZREG(31, 0), + KVM_REG_ARM64_SVE_PREG(0, 0), + KVM_REG_ARM64_SVE_PREG(1, 0), + KVM_REG_ARM64_SVE_PREG(2, 0), + KVM_REG_ARM64_SVE_PREG(3, 0), + KVM_REG_ARM64_SVE_PREG(4, 0), + KVM_REG_ARM64_SVE_PREG(5, 0), + KVM_REG_ARM64_SVE_PREG(6, 0), + KVM_REG_ARM64_SVE_PREG(7, 0), + KVM_REG_ARM64_SVE_PREG(8, 0), + KVM_REG_ARM64_SVE_PREG(9, 0), + KVM_REG_ARM64_SVE_PREG(10, 0), + KVM_REG_ARM64_SVE_PREG(11, 0), + KVM_REG_ARM64_SVE_PREG(12, 0), + KVM_REG_ARM64_SVE_PREG(13, 0), + KVM_REG_ARM64_SVE_PREG(14, 0), + KVM_REG_ARM64_SVE_PREG(15, 0), + KVM_REG_ARM64_SVE_FFR(0), + ARM64_SYS_REG(3, 0, 1, 2, 0), /* ZCR_EL1 */ +}; +static __u64 sve_regs_n = ARRAY_SIZE(sve_regs); + +static __u64 rejects_set[] = { +#ifdef REG_LIST_SVE + KVM_REG_ARM64_SVE_VLS, +#endif +}; +static __u64 rejects_set_n = ARRAY_SIZE(rejects_set); From 3031e0288e60f09533339e61117b83099a6e126e Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 30 Sep 2020 21:22:28 -0400 Subject: [PATCH 213/710] KVM: selftests: Always clear dirty bitmap after iteration We used not to clear the dirty bitmap before because KVM_GET_DIRTY_LOG would overwrite it the next time it copies the dirty log onto it. In the upcoming dirty ring tests we'll start to fetch dirty pages from a ring buffer, so no one is going to clear the dirty bitmap for us. Reviewed-by: Andrew Jones Signed-off-by: Peter Xu Message-Id: <20201001012228.5916-1-peterx@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/dirty_log_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 752ec158ac59..6a8275a22861 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -195,7 +195,7 @@ static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap) page); } - if (test_bit_le(page, bmap)) { + if (test_and_clear_bit_le(page, bmap)) { host_dirty_count++; /* * If the bit is set, the value written onto From afdb1960071935cfd5c1908691a34cc6e36931f7 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 30 Sep 2020 21:22:33 -0400 Subject: [PATCH 214/710] KVM: selftests: Use a single binary for dirty/clear log test Remove the clear_dirty_log test, instead merge it into the existing dirty_log_test. It should be cleaner to use this single binary to do both tests, also it's a preparation for the upcoming dirty ring test. The default behavior will run all the modes in sequence. Reviewed-by: Andrew Jones Signed-off-by: Peter Xu Message-Id: <20201001012233.6013-1-peterx@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/Makefile | 2 - .../selftests/kvm/clear_dirty_log_test.c | 6 - tools/testing/selftests/kvm/dirty_log_test.c | 187 +++++++++++++++--- 3 files changed, 156 insertions(+), 39 deletions(-) delete mode 100644 tools/testing/selftests/kvm/clear_dirty_log_test.c diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 2b0d7d325e2a..c8af04dccf7f 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -59,7 +59,6 @@ TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test TEST_GEN_PROGS_x86_64 += x86_64/debug_regs TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test TEST_GEN_PROGS_x86_64 += x86_64/user_msr_test -TEST_GEN_PROGS_x86_64 += clear_dirty_log_test TEST_GEN_PROGS_x86_64 += demand_paging_test TEST_GEN_PROGS_x86_64 += dirty_log_test TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus @@ -68,7 +67,6 @@ TEST_GEN_PROGS_x86_64 += steal_time TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list-sve -TEST_GEN_PROGS_aarch64 += clear_dirty_log_test TEST_GEN_PROGS_aarch64 += demand_paging_test TEST_GEN_PROGS_aarch64 += dirty_log_test TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus diff --git a/tools/testing/selftests/kvm/clear_dirty_log_test.c b/tools/testing/selftests/kvm/clear_dirty_log_test.c deleted file mode 100644 index 11672ec6f74e..000000000000 --- a/tools/testing/selftests/kvm/clear_dirty_log_test.c +++ /dev/null @@ -1,6 +0,0 @@ -#define USE_CLEAR_DIRTY_LOG -#define KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE (1 << 0) -#define KVM_DIRTY_LOG_INITIALLY_SET (1 << 1) -#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \ - KVM_DIRTY_LOG_INITIALLY_SET) -#include "dirty_log_test.c" diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 6a8275a22861..139ccb550618 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -128,6 +128,78 @@ static uint64_t host_dirty_count; static uint64_t host_clear_count; static uint64_t host_track_next_count; +enum log_mode_t { + /* Only use KVM_GET_DIRTY_LOG for logging */ + LOG_MODE_DIRTY_LOG = 0, + + /* Use both KVM_[GET|CLEAR]_DIRTY_LOG for logging */ + LOG_MODE_CLEAR_LOG = 1, + + LOG_MODE_NUM, + + /* Run all supported modes */ + LOG_MODE_ALL = LOG_MODE_NUM, +}; + +/* Mode of logging to test. Default is to run all supported modes */ +static enum log_mode_t host_log_mode_option = LOG_MODE_ALL; +/* Logging mode for current run */ +static enum log_mode_t host_log_mode; + +static bool clear_log_supported(void) +{ + return kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2); +} + +static void clear_log_create_vm_done(struct kvm_vm *vm) +{ + struct kvm_enable_cap cap = {}; + u64 manual_caps; + + manual_caps = kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2); + TEST_ASSERT(manual_caps, "MANUAL_CAPS is zero!"); + manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | + KVM_DIRTY_LOG_INITIALLY_SET); + cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2; + cap.args[0] = manual_caps; + vm_enable_cap(vm, &cap); +} + +static void dirty_log_collect_dirty_pages(struct kvm_vm *vm, int slot, + void *bitmap, uint32_t num_pages) +{ + kvm_vm_get_dirty_log(vm, slot, bitmap); +} + +static void clear_log_collect_dirty_pages(struct kvm_vm *vm, int slot, + void *bitmap, uint32_t num_pages) +{ + kvm_vm_get_dirty_log(vm, slot, bitmap); + kvm_vm_clear_dirty_log(vm, slot, bitmap, 0, num_pages); +} + +struct log_mode { + const char *name; + /* Return true if this mode is supported, otherwise false */ + bool (*supported)(void); + /* Hook when the vm creation is done (before vcpu creation) */ + void (*create_vm_done)(struct kvm_vm *vm); + /* Hook to collect the dirty pages into the bitmap provided */ + void (*collect_dirty_pages) (struct kvm_vm *vm, int slot, + void *bitmap, uint32_t num_pages); +} log_modes[LOG_MODE_NUM] = { + { + .name = "dirty-log", + .collect_dirty_pages = dirty_log_collect_dirty_pages, + }, + { + .name = "clear-log", + .supported = clear_log_supported, + .create_vm_done = clear_log_create_vm_done, + .collect_dirty_pages = clear_log_collect_dirty_pages, + }, +}; + /* * We use this bitmap to track some pages that should have its dirty * bit set in the _next_ iteration. For example, if we detected the @@ -137,6 +209,44 @@ static uint64_t host_track_next_count; */ static unsigned long *host_bmap_track; +static void log_modes_dump(void) +{ + int i; + + printf("all"); + for (i = 0; i < LOG_MODE_NUM; i++) + printf(", %s", log_modes[i].name); + printf("\n"); +} + +static bool log_mode_supported(void) +{ + struct log_mode *mode = &log_modes[host_log_mode]; + + if (mode->supported) + return mode->supported(); + + return true; +} + +static void log_mode_create_vm_done(struct kvm_vm *vm) +{ + struct log_mode *mode = &log_modes[host_log_mode]; + + if (mode->create_vm_done) + mode->create_vm_done(vm); +} + +static void log_mode_collect_dirty_pages(struct kvm_vm *vm, int slot, + void *bitmap, uint32_t num_pages) +{ + struct log_mode *mode = &log_modes[host_log_mode]; + + TEST_ASSERT(mode->collect_dirty_pages != NULL, + "collect_dirty_pages() is required for any log mode!"); + mode->collect_dirty_pages(vm, slot, bitmap, num_pages); +} + static void generate_random_array(uint64_t *guest_array, uint64_t size) { uint64_t i; @@ -257,6 +367,7 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid, #ifdef __x86_64__ vm_create_irqchip(vm); #endif + log_mode_create_vm_done(vm); vm_vcpu_add_default(vm, vcpuid, guest_code); return vm; } @@ -264,10 +375,6 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid, #define DIRTY_MEM_BITS 30 /* 1G */ #define PAGE_SHIFT_4K 12 -#ifdef USE_CLEAR_DIRTY_LOG -static u64 dirty_log_manual_caps; -#endif - static void run_test(enum vm_guest_mode mode, unsigned long iterations, unsigned long interval, uint64_t phys_offset) { @@ -275,6 +382,12 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, struct kvm_vm *vm; unsigned long *bmap; + if (!log_mode_supported()) { + print_skip("Log mode '%s' not supported", + log_modes[host_log_mode].name); + return; + } + /* * We reserve page table for 2 times of extra dirty mem which * will definitely cover the original (1G+) test range. Here @@ -317,14 +430,6 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, bmap = bitmap_alloc(host_num_pages); host_bmap_track = bitmap_alloc(host_num_pages); -#ifdef USE_CLEAR_DIRTY_LOG - struct kvm_enable_cap cap = {}; - - cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2; - cap.args[0] = dirty_log_manual_caps; - vm_enable_cap(vm, &cap); -#endif - /* Add an extra memory slot for testing dirty logging */ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, guest_test_phys_mem, @@ -362,11 +467,8 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, while (iteration < iterations) { /* Give the vcpu thread some time to dirty some pages */ usleep(interval * 1000); - kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap); -#ifdef USE_CLEAR_DIRTY_LOG - kvm_vm_clear_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap, 0, - host_num_pages); -#endif + log_mode_collect_dirty_pages(vm, TEST_MEM_SLOT_INDEX, + bmap, host_num_pages); vm_dirty_log_verify(mode, bmap); iteration++; sync_global_to_guest(vm, iteration); @@ -410,6 +512,9 @@ static void help(char *name) TEST_HOST_LOOP_INTERVAL); printf(" -p: specify guest physical test memory offset\n" " Warning: a low offset can conflict with the loaded test code.\n"); + printf(" -M: specify the host logging mode " + "(default: run all log modes). Supported modes: \n\t"); + log_modes_dump(); printf(" -m: specify the guest mode ID to test " "(default: test all supported modes)\n" " This option may be used multiple times.\n" @@ -429,18 +534,7 @@ int main(int argc, char *argv[]) bool mode_selected = false; uint64_t phys_offset = 0; unsigned int mode; - int opt, i; - -#ifdef USE_CLEAR_DIRTY_LOG - dirty_log_manual_caps = - kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2); - if (!dirty_log_manual_caps) { - print_skip("KVM_CLEAR_DIRTY_LOG not available"); - exit(KSFT_SKIP); - } - dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | - KVM_DIRTY_LOG_INITIALLY_SET); -#endif + int opt, i, j; #ifdef __x86_64__ guest_mode_init(VM_MODE_PXXV48_4K, true, true); @@ -464,7 +558,7 @@ int main(int argc, char *argv[]) guest_mode_init(VM_MODE_P40V48_4K, true, true); #endif - while ((opt = getopt(argc, argv, "hi:I:p:m:")) != -1) { + while ((opt = getopt(argc, argv, "hi:I:p:m:M:")) != -1) { switch (opt) { case 'i': iterations = strtol(optarg, NULL, 10); @@ -486,6 +580,26 @@ int main(int argc, char *argv[]) "Guest mode ID %d too big", mode); guest_modes[mode].enabled = true; break; + case 'M': + if (!strcmp(optarg, "all")) { + host_log_mode_option = LOG_MODE_ALL; + break; + } + for (i = 0; i < LOG_MODE_NUM; i++) { + if (!strcmp(optarg, log_modes[i].name)) { + pr_info("Setting log mode to: '%s'\n", + optarg); + host_log_mode_option = i; + break; + } + } + if (i == LOG_MODE_NUM) { + printf("Log mode '%s' invalid. Please choose " + "from: ", optarg); + log_modes_dump(); + exit(1); + } + break; case 'h': default: help(argv[0]); @@ -507,7 +621,18 @@ int main(int argc, char *argv[]) TEST_ASSERT(guest_modes[i].supported, "Guest mode ID %d (%s) not supported.", i, vm_guest_mode_string(i)); - run_test(i, iterations, interval, phys_offset); + if (host_log_mode_option == LOG_MODE_ALL) { + /* Run each log mode */ + for (j = 0; j < LOG_MODE_NUM; j++) { + pr_info("Testing Log Mode '%s'\n", + log_modes[j].name); + host_log_mode = j; + run_test(i, iterations, interval, phys_offset); + } + } else { + host_log_mode = host_log_mode_option; + run_test(i, iterations, interval, phys_offset); + } } return 0; From 4b5d12b0e21cc9f9f00201819844fcafb020ffad Mon Sep 17 00:00:00 2001 From: Ben Gardon Date: Tue, 27 Oct 2020 16:37:29 -0700 Subject: [PATCH 215/710] KVM: selftests: Factor code out of demand_paging_test Much of the code in demand_paging_test can be reused by other, similar multi-vCPU-memory-touching-perfromance-tests. Factor that common code out for reuse. No functional change expected. This series was tested by running the following invocations on an Intel Skylake machine: dirty_log_perf_test -b 20m -i 100 -v 64 dirty_log_perf_test -b 20g -i 5 -v 4 dirty_log_perf_test -b 4g -i 5 -v 32 demand_paging_test -b 20m -v 64 demand_paging_test -b 20g -v 4 demand_paging_test -b 4g -v 32 All behaved as expected. Signed-off-by: Ben Gardon Message-Id: <20201027233733.1484855-2-bgardon@google.com> Signed-off-by: Paolo Bonzini --- .../selftests/kvm/demand_paging_test.c | 204 ++---------------- .../selftests/kvm/include/perf_test_util.h | 187 ++++++++++++++++ 2 files changed, 210 insertions(+), 181 deletions(-) create mode 100644 tools/testing/selftests/kvm/include/perf_test_util.h diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index 360cd3ea4cd6..4251e98ceb69 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -21,18 +21,12 @@ #include #include -#include "test_util.h" -#include "kvm_util.h" +#include "perf_test_util.h" #include "processor.h" +#include "test_util.h" #ifdef __NR_userfaultfd -/* The memory slot index demand page */ -#define TEST_MEM_SLOT_INDEX 1 - -/* Default guest test virtual memory offset */ -#define DEFAULT_GUEST_TEST_MEM 0xc0000000 - #define DEFAULT_GUEST_TEST_MEM_SIZE (1 << 30) /* 1G */ #ifdef PRINT_PER_PAGE_UPDATES @@ -47,75 +41,14 @@ #define PER_VCPU_DEBUG(...) _no_printf(__VA_ARGS__) #endif -#define MAX_VCPUS 512 - -/* - * Guest/Host shared variables. Ensure addr_gva2hva() and/or - * sync_global_to/from_guest() are used when accessing from - * the host. READ/WRITE_ONCE() should also be used with anything - * that may change. - */ -static uint64_t host_page_size; -static uint64_t guest_page_size; - static char *guest_data_prototype; -/* - * Guest physical memory offset of the testing memory slot. - * This will be set to the topmost valid physical address minus - * the test memory size. - */ -static uint64_t guest_test_phys_mem; - -/* - * Guest virtual memory offset of the testing memory slot. - * Must not conflict with identity mapped test code. - */ -static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; - -struct vcpu_args { - uint64_t gva; - uint64_t pages; - - /* Only used by the host userspace part of the vCPU thread */ - int vcpu_id; - struct kvm_vm *vm; -}; - -static struct vcpu_args vcpu_args[MAX_VCPUS]; - -/* - * Continuously write to the first 8 bytes of each page in the demand paging - * memory region. - */ -static void guest_code(uint32_t vcpu_id) -{ - uint64_t gva; - uint64_t pages; - int i; - - /* Make sure vCPU args data structure is not corrupt. */ - GUEST_ASSERT(vcpu_args[vcpu_id].vcpu_id == vcpu_id); - - gva = vcpu_args[vcpu_id].gva; - pages = vcpu_args[vcpu_id].pages; - - for (i = 0; i < pages; i++) { - uint64_t addr = gva + (i * guest_page_size); - - addr &= ~(host_page_size - 1); - *(uint64_t *)addr = 0x0123456789ABCDEF; - } - - GUEST_SYNC(1); -} - static void *vcpu_worker(void *data) { int ret; - struct vcpu_args *args = (struct vcpu_args *)data; - struct kvm_vm *vm = args->vm; - int vcpu_id = args->vcpu_id; + struct vcpu_args *vcpu_args = (struct vcpu_args *)data; + int vcpu_id = vcpu_args->vcpu_id; + struct kvm_vm *vm = perf_test_args.vm; struct kvm_run *run; struct timespec start, end, ts_diff; @@ -141,39 +74,6 @@ static void *vcpu_worker(void *data) return NULL; } -#define PAGE_SHIFT_4K 12 -#define PTES_PER_4K_PT 512 - -static struct kvm_vm *create_vm(enum vm_guest_mode mode, int vcpus, - uint64_t vcpu_memory_bytes) -{ - struct kvm_vm *vm; - uint64_t pages = DEFAULT_GUEST_PHY_PAGES; - - /* Account for a few pages per-vCPU for stacks */ - pages += DEFAULT_STACK_PGS * vcpus; - - /* - * Reserve twice the ammount of memory needed to map the test region and - * the page table / stacks region, at 4k, for page tables. Do the - * calculation with 4K page size: the smallest of all archs. (e.g., 64K - * page size guest will need even less memory for page tables). - */ - pages += (2 * pages) / PTES_PER_4K_PT; - pages += ((2 * vcpus * vcpu_memory_bytes) >> PAGE_SHIFT_4K) / - PTES_PER_4K_PT; - pages = vm_adjust_num_guest_pages(mode, pages); - - pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode)); - - vm = _vm_create(mode, pages, O_RDWR); - kvm_vm_elf_load(vm, program_invocation_name, 0, 0); -#ifdef __x86_64__ - vm_create_irqchip(vm); -#endif - return vm; -} - static int handle_uffd_page_request(int uffd, uint64_t addr) { pid_t tid; @@ -186,7 +86,7 @@ static int handle_uffd_page_request(int uffd, uint64_t addr) copy.src = (uint64_t)guest_data_prototype; copy.dst = addr; - copy.len = host_page_size; + copy.len = perf_test_args.host_page_size; copy.mode = 0; clock_gettime(CLOCK_MONOTONIC, &start); @@ -203,7 +103,7 @@ static int handle_uffd_page_request(int uffd, uint64_t addr) PER_PAGE_DEBUG("UFFDIO_COPY %d \t%ld ns\n", tid, timespec_to_ns(timespec_sub(end, start))); PER_PAGE_DEBUG("Paged in %ld bytes at 0x%lx from thread %d\n", - host_page_size, addr, tid); + perf_test_args.host_page_size, addr, tid); return 0; } @@ -360,64 +260,21 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, struct timespec start, end, ts_diff; int *pipefds = NULL; struct kvm_vm *vm; - uint64_t guest_num_pages; int vcpu_id; int r; vm = create_vm(mode, vcpus, vcpu_memory_bytes); - guest_page_size = vm_get_page_size(vm); - - TEST_ASSERT(vcpu_memory_bytes % guest_page_size == 0, - "Guest memory size is not guest page size aligned."); - - guest_num_pages = (vcpus * vcpu_memory_bytes) / guest_page_size; - guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages); - - /* - * If there should be more memory in the guest test region than there - * can be pages in the guest, it will definitely cause problems. - */ - TEST_ASSERT(guest_num_pages < vm_get_max_gfn(vm), - "Requested more guest memory than address space allows.\n" - " guest pages: %lx max gfn: %x vcpus: %d wss: %lx]\n", - guest_num_pages, vm_get_max_gfn(vm), vcpus, - vcpu_memory_bytes); - - host_page_size = getpagesize(); - TEST_ASSERT(vcpu_memory_bytes % host_page_size == 0, - "Guest memory size is not host page size aligned."); - - guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) * - guest_page_size; - guest_test_phys_mem &= ~(host_page_size - 1); - -#ifdef __s390x__ - /* Align to 1M (segment size) */ - guest_test_phys_mem &= ~((1 << 20) - 1); -#endif - - pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem); - - /* Add an extra memory slot for testing demand paging */ - vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, - guest_test_phys_mem, - TEST_MEM_SLOT_INDEX, - guest_num_pages, 0); - - /* Do mapping for the demand paging memory slot */ - virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0); - - ucall_init(vm, NULL); - - guest_data_prototype = malloc(host_page_size); + guest_data_prototype = malloc(perf_test_args.host_page_size); TEST_ASSERT(guest_data_prototype, "Failed to allocate buffer for guest data pattern"); - memset(guest_data_prototype, 0xAB, host_page_size); + memset(guest_data_prototype, 0xAB, perf_test_args.host_page_size); vcpu_threads = malloc(vcpus * sizeof(*vcpu_threads)); TEST_ASSERT(vcpu_threads, "Memory allocation failed"); + add_vcpus(vm, vcpus, vcpu_memory_bytes); + if (use_uffd) { uffd_handler_threads = malloc(vcpus * sizeof(*uffd_handler_threads)); @@ -428,22 +285,18 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, pipefds = malloc(sizeof(int) * vcpus * 2); TEST_ASSERT(pipefds, "Unable to allocate memory for pipefd"); - } - for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { - vm_paddr_t vcpu_gpa; - void *vcpu_hva; + for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { + vm_paddr_t vcpu_gpa; + void *vcpu_hva; - vm_vcpu_add_default(vm, vcpu_id, guest_code); + vcpu_gpa = guest_test_phys_mem + (vcpu_id * vcpu_memory_bytes); + PER_VCPU_DEBUG("Added VCPU %d with test mem gpa [%lx, %lx)\n", + vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_memory_bytes); - vcpu_gpa = guest_test_phys_mem + (vcpu_id * vcpu_memory_bytes); - PER_VCPU_DEBUG("Added VCPU %d with test mem gpa [%lx, %lx)\n", - vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_memory_bytes); + /* Cache the HVA pointer of the region */ + vcpu_hva = addr_gpa2hva(vm, vcpu_gpa); - /* Cache the HVA pointer of the region */ - vcpu_hva = addr_gpa2hva(vm, vcpu_gpa); - - if (use_uffd) { /* * Set up user fault fd to handle demand paging * requests. @@ -460,22 +313,10 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, if (r < 0) exit(-r); } - -#ifdef __x86_64__ - vcpu_set_cpuid(vm, vcpu_id, kvm_get_supported_cpuid()); -#endif - - vcpu_args[vcpu_id].vm = vm; - vcpu_args[vcpu_id].vcpu_id = vcpu_id; - vcpu_args[vcpu_id].gva = guest_test_virt_mem + - (vcpu_id * vcpu_memory_bytes); - vcpu_args[vcpu_id].pages = vcpu_memory_bytes / guest_page_size; } /* Export the shared variables to the guest */ - sync_global_to_guest(vm, host_page_size); - sync_global_to_guest(vm, guest_page_size); - sync_global_to_guest(vm, vcpu_args); + sync_global_to_guest(vm, perf_test_args); pr_info("Finished creating vCPUs and starting uffd threads\n"); @@ -483,7 +324,7 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker, - &vcpu_args[vcpu_id]); + &perf_test_args.vcpu_args[vcpu_id]); } pr_info("Started all vCPUs\n"); @@ -514,7 +355,8 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, pr_info("Total guest execution time: %ld.%.9lds\n", ts_diff.tv_sec, ts_diff.tv_nsec); pr_info("Overall demand paging rate: %f pgs/sec\n", - guest_num_pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0)); + perf_test_args.vcpu_args[0].pages * vcpus / + ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0)); ucall_uninit(vm); kvm_vm_free(vm); diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h new file mode 100644 index 000000000000..f71f0858a1f2 --- /dev/null +++ b/tools/testing/selftests/kvm/include/perf_test_util.h @@ -0,0 +1,187 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * tools/testing/selftests/kvm/include/perf_test_util.h + * + * Copyright (C) 2020, Google LLC. + */ + +#ifndef SELFTEST_KVM_PERF_TEST_UTIL_H +#define SELFTEST_KVM_PERF_TEST_UTIL_H + +#include "kvm_util.h" +#include "processor.h" + +#define MAX_VCPUS 512 + +#define PAGE_SHIFT_4K 12 +#define PTES_PER_4K_PT 512 + +#define TEST_MEM_SLOT_INDEX 1 + +/* Default guest test virtual memory offset */ +#define DEFAULT_GUEST_TEST_MEM 0xc0000000 + +/* + * Guest physical memory offset of the testing memory slot. + * This will be set to the topmost valid physical address minus + * the test memory size. + */ +static uint64_t guest_test_phys_mem; + +/* + * Guest virtual memory offset of the testing memory slot. + * Must not conflict with identity mapped test code. + */ +static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; + +struct vcpu_args { + uint64_t gva; + uint64_t pages; + + /* Only used by the host userspace part of the vCPU thread */ + int vcpu_id; +}; + +struct perf_test_args { + struct kvm_vm *vm; + uint64_t host_page_size; + uint64_t guest_page_size; + + struct vcpu_args vcpu_args[MAX_VCPUS]; +}; + +static struct perf_test_args perf_test_args; + +/* + * Continuously write to the first 8 bytes of each page in the + * specified region. + */ +static void guest_code(uint32_t vcpu_id) +{ + struct vcpu_args *vcpu_args = &perf_test_args.vcpu_args[vcpu_id]; + uint64_t gva; + uint64_t pages; + int i; + + /* Make sure vCPU args data structure is not corrupt. */ + GUEST_ASSERT(vcpu_args->vcpu_id == vcpu_id); + + gva = vcpu_args->gva; + pages = vcpu_args->pages; + + for (i = 0; i < pages; i++) { + uint64_t addr = gva + (i * perf_test_args.guest_page_size); + + addr &= ~(perf_test_args.host_page_size - 1); + *(uint64_t *)addr = 0x0123456789ABCDEF; + } + + GUEST_SYNC(1); +} + +static struct kvm_vm *create_vm(enum vm_guest_mode mode, int vcpus, + uint64_t vcpu_memory_bytes) +{ + struct kvm_vm *vm; + uint64_t pages = DEFAULT_GUEST_PHY_PAGES; + uint64_t guest_num_pages; + + /* Account for a few pages per-vCPU for stacks */ + pages += DEFAULT_STACK_PGS * vcpus; + + /* + * Reserve twice the ammount of memory needed to map the test region and + * the page table / stacks region, at 4k, for page tables. Do the + * calculation with 4K page size: the smallest of all archs. (e.g., 64K + * page size guest will need even less memory for page tables). + */ + pages += (2 * pages) / PTES_PER_4K_PT; + pages += ((2 * vcpus * vcpu_memory_bytes) >> PAGE_SHIFT_4K) / + PTES_PER_4K_PT; + pages = vm_adjust_num_guest_pages(mode, pages); + + pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode)); + + vm = _vm_create(mode, pages, O_RDWR); + kvm_vm_elf_load(vm, program_invocation_name, 0, 0); +#ifdef __x86_64__ + vm_create_irqchip(vm); +#endif + + perf_test_args.vm = vm; + perf_test_args.guest_page_size = vm_get_page_size(vm); + perf_test_args.host_page_size = getpagesize(); + + TEST_ASSERT(vcpu_memory_bytes % perf_test_args.guest_page_size == 0, + "Guest memory size is not guest page size aligned."); + + guest_num_pages = (vcpus * vcpu_memory_bytes) / + perf_test_args.guest_page_size; + guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages); + + /* + * If there should be more memory in the guest test region than there + * can be pages in the guest, it will definitely cause problems. + */ + TEST_ASSERT(guest_num_pages < vm_get_max_gfn(vm), + "Requested more guest memory than address space allows.\n" + " guest pages: %lx max gfn: %x vcpus: %d wss: %lx]\n", + guest_num_pages, vm_get_max_gfn(vm), vcpus, + vcpu_memory_bytes); + + TEST_ASSERT(vcpu_memory_bytes % perf_test_args.host_page_size == 0, + "Guest memory size is not host page size aligned."); + + guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) * + perf_test_args.guest_page_size; + guest_test_phys_mem &= ~(perf_test_args.host_page_size - 1); + +#ifdef __s390x__ + /* Align to 1M (segment size) */ + guest_test_phys_mem &= ~((1 << 20) - 1); +#endif + + pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem); + + /* Add an extra memory slot for testing */ + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, + guest_test_phys_mem, + TEST_MEM_SLOT_INDEX, + guest_num_pages, 0); + + /* Do mapping for the demand paging memory slot */ + virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0); + + ucall_init(vm, NULL); + + return vm; +} + +static void add_vcpus(struct kvm_vm *vm, int vcpus, uint64_t vcpu_memory_bytes) +{ + vm_paddr_t vcpu_gpa; + struct vcpu_args *vcpu_args; + int vcpu_id; + + for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { + vcpu_args = &perf_test_args.vcpu_args[vcpu_id]; + + vm_vcpu_add_default(vm, vcpu_id, guest_code); + +#ifdef __x86_64__ + vcpu_set_cpuid(vm, vcpu_id, kvm_get_supported_cpuid()); +#endif + + vcpu_args->vcpu_id = vcpu_id; + vcpu_args->gva = guest_test_virt_mem + + (vcpu_id * vcpu_memory_bytes); + vcpu_args->pages = vcpu_memory_bytes / + perf_test_args.guest_page_size; + + vcpu_gpa = guest_test_phys_mem + (vcpu_id * vcpu_memory_bytes); + pr_debug("Added VCPU %d with test mem gpa [%lx, %lx)\n", + vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_memory_bytes); + } +} + +#endif /* SELFTEST_KVM_PERF_TEST_UTIL_H */ From 2fe5149bdfbf3c2cdfafd2b5b496252d45ca1f78 Mon Sep 17 00:00:00 2001 From: Ben Gardon Date: Tue, 27 Oct 2020 16:37:30 -0700 Subject: [PATCH 216/710] KVM: selftests: Remove address rounding in guest code Rounding the address the guest writes to a host page boundary will only have an effect if the host page size is larger than the guest page size, but in that case the guest write would still go to the same host page. There's no reason to round the address down, so remove the rounding to simplify the demand paging test. This series was tested by running the following invocations on an Intel Skylake machine: dirty_log_perf_test -b 20m -i 100 -v 64 dirty_log_perf_test -b 20g -i 5 -v 4 dirty_log_perf_test -b 4g -i 5 -v 32 demand_paging_test -b 20m -v 64 demand_paging_test -b 20g -v 4 demand_paging_test -b 4g -v 32 All behaved as expected. Signed-off-by: Ben Gardon Message-Id: <20201027233733.1484855-3-bgardon@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/include/perf_test_util.h | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h index f71f0858a1f2..838f946700f0 100644 --- a/tools/testing/selftests/kvm/include/perf_test_util.h +++ b/tools/testing/selftests/kvm/include/perf_test_util.h @@ -72,7 +72,6 @@ static void guest_code(uint32_t vcpu_id) for (i = 0; i < pages; i++) { uint64_t addr = gva + (i * perf_test_args.guest_page_size); - addr &= ~(perf_test_args.host_page_size - 1); *(uint64_t *)addr = 0x0123456789ABCDEF; } From 1eafbd27edb5098ed6b6bc404c35d56c78beb0fd Mon Sep 17 00:00:00 2001 From: Ben Gardon Date: Tue, 27 Oct 2020 16:37:31 -0700 Subject: [PATCH 217/710] KVM: selftests: Simplify demand_paging_test with timespec_diff_now Add a helper function to get the current time and return the time since a given start time. Use that function to simplify the timekeeping in the demand paging test. This series was tested by running the following invocations on an Intel Skylake machine: dirty_log_perf_test -b 20m -i 100 -v 64 dirty_log_perf_test -b 20g -i 5 -v 4 dirty_log_perf_test -b 4g -i 5 -v 32 demand_paging_test -b 20m -v 64 demand_paging_test -b 20g -v 4 demand_paging_test -b 4g -v 32 All behaved as expected. Signed-off-by: Ben Gardon Message-Id: <20201027233733.1484855-4-bgardon@google.com> Signed-off-by: Paolo Bonzini --- .../selftests/kvm/demand_paging_test.c | 26 +++++++++---------- .../testing/selftests/kvm/include/test_util.h | 1 + tools/testing/selftests/kvm/lib/test_util.c | 15 +++++++++-- 3 files changed, 27 insertions(+), 15 deletions(-) diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index 4251e98ceb69..7de6feb00076 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -50,7 +50,8 @@ static void *vcpu_worker(void *data) int vcpu_id = vcpu_args->vcpu_id; struct kvm_vm *vm = perf_test_args.vm; struct kvm_run *run; - struct timespec start, end, ts_diff; + struct timespec start; + struct timespec ts_diff; vcpu_args_set(vm, vcpu_id, 1, vcpu_id); run = vcpu_state(vm, vcpu_id); @@ -66,8 +67,7 @@ static void *vcpu_worker(void *data) exit_reason_str(run->exit_reason)); } - clock_gettime(CLOCK_MONOTONIC, &end); - ts_diff = timespec_sub(end, start); + ts_diff = timespec_diff_now(start); PER_VCPU_DEBUG("vCPU %d execution time: %ld.%.9lds\n", vcpu_id, ts_diff.tv_sec, ts_diff.tv_nsec); @@ -78,7 +78,7 @@ static int handle_uffd_page_request(int uffd, uint64_t addr) { pid_t tid; struct timespec start; - struct timespec end; + struct timespec ts_diff; struct uffdio_copy copy; int r; @@ -98,10 +98,10 @@ static int handle_uffd_page_request(int uffd, uint64_t addr) return r; } - clock_gettime(CLOCK_MONOTONIC, &end); + ts_diff = timespec_diff_now(start); PER_PAGE_DEBUG("UFFDIO_COPY %d \t%ld ns\n", tid, - timespec_to_ns(timespec_sub(end, start))); + timespec_to_ns(ts_diff)); PER_PAGE_DEBUG("Paged in %ld bytes at 0x%lx from thread %d\n", perf_test_args.host_page_size, addr, tid); @@ -123,7 +123,8 @@ static void *uffd_handler_thread_fn(void *arg) int pipefd = uffd_args->pipefd; useconds_t delay = uffd_args->delay; int64_t pages = 0; - struct timespec start, end, ts_diff; + struct timespec start; + struct timespec ts_diff; clock_gettime(CLOCK_MONOTONIC, &start); while (!quit_uffd_thread) { @@ -192,8 +193,7 @@ static void *uffd_handler_thread_fn(void *arg) pages++; } - clock_gettime(CLOCK_MONOTONIC, &end); - ts_diff = timespec_sub(end, start); + ts_diff = timespec_diff_now(start); PER_VCPU_DEBUG("userfaulted %ld pages over %ld.%.9lds. (%f/sec)\n", pages, ts_diff.tv_sec, ts_diff.tv_nsec, pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0)); @@ -257,7 +257,8 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, pthread_t *vcpu_threads; pthread_t *uffd_handler_threads = NULL; struct uffd_handler_args *uffd_args = NULL; - struct timespec start, end, ts_diff; + struct timespec start; + struct timespec ts_diff; int *pipefds = NULL; struct kvm_vm *vm; int vcpu_id; @@ -335,9 +336,9 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, PER_VCPU_DEBUG("Joined thread for vCPU %d\n", vcpu_id); } - pr_info("All vCPU threads joined\n"); + ts_diff = timespec_diff_now(start); - clock_gettime(CLOCK_MONOTONIC, &end); + pr_info("All vCPU threads joined\n"); if (use_uffd) { char c; @@ -351,7 +352,6 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, } } - ts_diff = timespec_sub(end, start); pr_info("Total guest execution time: %ld.%.9lds\n", ts_diff.tv_sec, ts_diff.tv_nsec); pr_info("Overall demand paging rate: %f pgs/sec\n", diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index 5eb01bf51b86..1cc036ddb0c5 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -64,5 +64,6 @@ int64_t timespec_to_ns(struct timespec ts); struct timespec timespec_add_ns(struct timespec ts, int64_t ns); struct timespec timespec_add(struct timespec ts1, struct timespec ts2); struct timespec timespec_sub(struct timespec ts1, struct timespec ts2); +struct timespec timespec_diff_now(struct timespec start); #endif /* SELFTEST_KVM_TEST_UTIL_H */ diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c index 689e97c27ee2..1a46c2c48c7c 100644 --- a/tools/testing/selftests/kvm/lib/test_util.c +++ b/tools/testing/selftests/kvm/lib/test_util.c @@ -4,10 +4,13 @@ * * Copyright (C) 2020, Google LLC. */ -#include + +#include #include #include -#include +#include +#include + #include "test_util.h" /* @@ -81,6 +84,14 @@ struct timespec timespec_sub(struct timespec ts1, struct timespec ts2) return timespec_add_ns((struct timespec){0}, ns1 - ns2); } +struct timespec timespec_diff_now(struct timespec start) +{ + struct timespec end; + + clock_gettime(CLOCK_MONOTONIC, &end); + return timespec_sub(end, start); +} + void print_skip(const char *fmt, ...) { va_list ap; From 92ab4b9a22cfea9b0d353e86024208040c10e807 Mon Sep 17 00:00:00 2001 From: Ben Gardon Date: Tue, 27 Oct 2020 16:37:32 -0700 Subject: [PATCH 218/710] KVM: selftests: Add wrfract to common guest code Wrfract will be used by the dirty logging perf test introduced later in this series to dirty memory sparsely. This series was tested by running the following invocations on an Intel Skylake machine: dirty_log_perf_test -b 20m -i 100 -v 64 dirty_log_perf_test -b 20g -i 5 -v 4 dirty_log_perf_test -b 4g -i 5 -v 32 demand_paging_test -b 20m -v 64 demand_paging_test -b 20g -v 4 demand_paging_test -b 4g -v 32 All behaved as expected. Signed-off-by: Ben Gardon Message-Id: <20201027233733.1484855-5-bgardon@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/demand_paging_test.c | 2 ++ tools/testing/selftests/kvm/include/perf_test_util.h | 6 +++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index 7de6feb00076..47defc65aeda 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -266,6 +266,8 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, vm = create_vm(mode, vcpus, vcpu_memory_bytes); + perf_test_args.wr_fract = 1; + guest_data_prototype = malloc(perf_test_args.host_page_size); TEST_ASSERT(guest_data_prototype, "Failed to allocate buffer for guest data pattern"); diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h index 838f946700f0..1716300469c0 100644 --- a/tools/testing/selftests/kvm/include/perf_test_util.h +++ b/tools/testing/selftests/kvm/include/perf_test_util.h @@ -46,6 +46,7 @@ struct perf_test_args { struct kvm_vm *vm; uint64_t host_page_size; uint64_t guest_page_size; + int wr_fract; struct vcpu_args vcpu_args[MAX_VCPUS]; }; @@ -72,7 +73,10 @@ static void guest_code(uint32_t vcpu_id) for (i = 0; i < pages; i++) { uint64_t addr = gva + (i * perf_test_args.guest_page_size); - *(uint64_t *)addr = 0x0123456789ABCDEF; + if (i % perf_test_args.wr_fract == 0) + *(uint64_t *)addr = 0x0123456789ABCDEF; + else + READ_ONCE(*(uint64_t *)addr); } GUEST_SYNC(1); From f663132d1e09166db419afb9832d463e0a79f3d5 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Wed, 4 Nov 2020 22:23:48 +0100 Subject: [PATCH 219/710] KVM: selftests: Drop pointless vm_create wrapper Signed-off-by: Andrew Jones Message-Id: <20201104212357.171559-3-drjones@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/dirty_log_test.c | 2 +- tools/testing/selftests/kvm/include/kvm_util.h | 1 - tools/testing/selftests/kvm/include/perf_test_util.h | 2 +- tools/testing/selftests/kvm/lib/kvm_util.c | 7 +------ 4 files changed, 3 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 139ccb550618..54da9cc20db4 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -362,7 +362,7 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid, pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode)); - vm = _vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR); + vm = vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR); kvm_vm_elf_load(vm, program_invocation_name, 0, 0); #ifdef __x86_64__ vm_create_irqchip(vm); diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index feb949a92055..7d29aa786959 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -68,7 +68,6 @@ int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id, void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size); struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm); -struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm); void kvm_vm_free(struct kvm_vm *vmp); void kvm_vm_restart(struct kvm_vm *vmp, int perm); void kvm_vm_release(struct kvm_vm *vmp); diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h index 1716300469c0..8e053e9dc7ea 100644 --- a/tools/testing/selftests/kvm/include/perf_test_util.h +++ b/tools/testing/selftests/kvm/include/perf_test_util.h @@ -105,7 +105,7 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, int vcpus, pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode)); - vm = _vm_create(mode, pages, O_RDWR); + vm = vm_create(mode, pages, O_RDWR); kvm_vm_elf_load(vm, program_invocation_name, 0, 0); #ifdef __x86_64__ vm_create_irqchip(vm); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 7669cb7c86e3..126c6727a6b0 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -180,7 +180,7 @@ _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) * descriptor to control the created VM is created with the permissions * given by perm (e.g. O_RDWR). */ -struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) +struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) { struct kvm_vm *vm; @@ -271,11 +271,6 @@ struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) return vm; } -struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) -{ - return _vm_create(mode, phy_pages, perm); -} - /* * VM Restart * From 6769155fece2100506e22161945712afae61769f Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Wed, 4 Nov 2020 22:23:52 +0100 Subject: [PATCH 220/710] KVM: selftests: Make the per vcpu memory size global Rename vcpu_memory_bytes to something with "percpu" in it in order to be less ambiguous. Also make it global to simplify things. Signed-off-by: Andrew Jones Message-Id: <20201104212357.171559-7-drjones@redhat.com> Signed-off-by: Paolo Bonzini --- .../selftests/kvm/demand_paging_test.c | 20 ++++++++----------- .../selftests/kvm/include/perf_test_util.h | 3 +++ 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index 47defc65aeda..33acd954a298 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -27,8 +27,6 @@ #ifdef __NR_userfaultfd -#define DEFAULT_GUEST_TEST_MEM_SIZE (1 << 30) /* 1G */ - #ifdef PRINT_PER_PAGE_UPDATES #define PER_PAGE_DEBUG(...) printf(__VA_ARGS__) #else @@ -251,8 +249,7 @@ static int setup_demand_paging(struct kvm_vm *vm, } static void run_test(enum vm_guest_mode mode, bool use_uffd, - useconds_t uffd_delay, int vcpus, - uint64_t vcpu_memory_bytes) + useconds_t uffd_delay, int vcpus) { pthread_t *vcpu_threads; pthread_t *uffd_handler_threads = NULL; @@ -264,7 +261,7 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, int vcpu_id; int r; - vm = create_vm(mode, vcpus, vcpu_memory_bytes); + vm = create_vm(mode, vcpus, guest_percpu_mem_size); perf_test_args.wr_fract = 1; @@ -276,7 +273,7 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, vcpu_threads = malloc(vcpus * sizeof(*vcpu_threads)); TEST_ASSERT(vcpu_threads, "Memory allocation failed"); - add_vcpus(vm, vcpus, vcpu_memory_bytes); + add_vcpus(vm, vcpus, guest_percpu_mem_size); if (use_uffd) { uffd_handler_threads = @@ -293,9 +290,9 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, vm_paddr_t vcpu_gpa; void *vcpu_hva; - vcpu_gpa = guest_test_phys_mem + (vcpu_id * vcpu_memory_bytes); + vcpu_gpa = guest_test_phys_mem + (vcpu_id * guest_percpu_mem_size); PER_VCPU_DEBUG("Added VCPU %d with test mem gpa [%lx, %lx)\n", - vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_memory_bytes); + vcpu_id, vcpu_gpa, vcpu_gpa + guest_percpu_mem_size); /* Cache the HVA pointer of the region */ vcpu_hva = addr_gpa2hva(vm, vcpu_gpa); @@ -312,7 +309,7 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, &uffd_handler_threads[vcpu_id], pipefds[vcpu_id * 2], uffd_delay, &uffd_args[vcpu_id], - vcpu_hva, vcpu_memory_bytes); + vcpu_hva, guest_percpu_mem_size); if (r < 0) exit(-r); } @@ -413,7 +410,6 @@ static void help(char *name) int main(int argc, char *argv[]) { bool mode_selected = false; - uint64_t vcpu_memory_bytes = DEFAULT_GUEST_TEST_MEM_SIZE; int vcpus = 1; unsigned int mode; int opt, i; @@ -463,7 +459,7 @@ int main(int argc, char *argv[]) "A negative UFFD delay is not supported."); break; case 'b': - vcpu_memory_bytes = parse_size(optarg); + guest_percpu_mem_size = parse_size(optarg); break; case 'v': vcpus = atoi(optarg); @@ -486,7 +482,7 @@ int main(int argc, char *argv[]) TEST_ASSERT(guest_modes[i].supported, "Guest mode ID %d (%s) not supported.", i, vm_guest_mode_string(i)); - run_test(i, use_uffd, uffd_delay, vcpus, vcpu_memory_bytes); + run_test(i, use_uffd, uffd_delay, vcpus); } return 0; diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h index 8e053e9dc7ea..840dc2a19ce1 100644 --- a/tools/testing/selftests/kvm/include/perf_test_util.h +++ b/tools/testing/selftests/kvm/include/perf_test_util.h @@ -21,6 +21,8 @@ /* Default guest test virtual memory offset */ #define DEFAULT_GUEST_TEST_MEM 0xc0000000 +#define DEFAULT_PER_VCPU_MEM_SIZE (1 << 30) /* 1G */ + /* * Guest physical memory offset of the testing memory slot. * This will be set to the topmost valid physical address minus @@ -33,6 +35,7 @@ static uint64_t guest_test_phys_mem; * Must not conflict with identity mapped test code. */ static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; +static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; struct vcpu_args { uint64_t gva; From 3be18630954672b889186e7be9b631f00134e954 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Wed, 4 Nov 2020 22:23:53 +0100 Subject: [PATCH 221/710] KVM: selftests: Make the number of vcpus global We also check the input number of vcpus against the maximum supported. Signed-off-by: Andrew Jones Message-Id: <20201104212357.171559-8-drjones@redhat.com> Signed-off-by: Paolo Bonzini --- .../selftests/kvm/demand_paging_test.c | 37 +++++++++---------- .../selftests/kvm/include/perf_test_util.h | 3 ++ 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index 33acd954a298..3d96a7bfaff3 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -249,7 +249,7 @@ static int setup_demand_paging(struct kvm_vm *vm, } static void run_test(enum vm_guest_mode mode, bool use_uffd, - useconds_t uffd_delay, int vcpus) + useconds_t uffd_delay) { pthread_t *vcpu_threads; pthread_t *uffd_handler_threads = NULL; @@ -261,7 +261,7 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, int vcpu_id; int r; - vm = create_vm(mode, vcpus, guest_percpu_mem_size); + vm = create_vm(mode, nr_vcpus, guest_percpu_mem_size); perf_test_args.wr_fract = 1; @@ -270,23 +270,23 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, "Failed to allocate buffer for guest data pattern"); memset(guest_data_prototype, 0xAB, perf_test_args.host_page_size); - vcpu_threads = malloc(vcpus * sizeof(*vcpu_threads)); + vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads)); TEST_ASSERT(vcpu_threads, "Memory allocation failed"); - add_vcpus(vm, vcpus, guest_percpu_mem_size); + add_vcpus(vm, nr_vcpus, guest_percpu_mem_size); if (use_uffd) { uffd_handler_threads = - malloc(vcpus * sizeof(*uffd_handler_threads)); + malloc(nr_vcpus * sizeof(*uffd_handler_threads)); TEST_ASSERT(uffd_handler_threads, "Memory allocation failed"); - uffd_args = malloc(vcpus * sizeof(*uffd_args)); + uffd_args = malloc(nr_vcpus * sizeof(*uffd_args)); TEST_ASSERT(uffd_args, "Memory allocation failed"); - pipefds = malloc(sizeof(int) * vcpus * 2); + pipefds = malloc(sizeof(int) * nr_vcpus * 2); TEST_ASSERT(pipefds, "Unable to allocate memory for pipefd"); - for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { vm_paddr_t vcpu_gpa; void *vcpu_hva; @@ -322,7 +322,7 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, clock_gettime(CLOCK_MONOTONIC, &start); - for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker, &perf_test_args.vcpu_args[vcpu_id]); } @@ -330,7 +330,7 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, pr_info("Started all vCPUs\n"); /* Wait for the vcpu threads to quit */ - for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { pthread_join(vcpu_threads[vcpu_id], NULL); PER_VCPU_DEBUG("Joined thread for vCPU %d\n", vcpu_id); } @@ -343,7 +343,7 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, char c; /* Tell the user fault fd handler threads to quit */ - for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { r = write(pipefds[vcpu_id * 2 + 1], &c, 1); TEST_ASSERT(r == 1, "Unable to write to pipefd"); @@ -354,7 +354,7 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, pr_info("Total guest execution time: %ld.%.9lds\n", ts_diff.tv_sec, ts_diff.tv_nsec); pr_info("Overall demand paging rate: %f pgs/sec\n", - perf_test_args.vcpu_args[0].pages * vcpus / + perf_test_args.vcpu_args[0].pages * nr_vcpus / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0)); ucall_uninit(vm); @@ -409,8 +409,8 @@ static void help(char *name) int main(int argc, char *argv[]) { + int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS); bool mode_selected = false; - int vcpus = 1; unsigned int mode; int opt, i; bool use_uffd = false; @@ -462,12 +462,9 @@ int main(int argc, char *argv[]) guest_percpu_mem_size = parse_size(optarg); break; case 'v': - vcpus = atoi(optarg); - TEST_ASSERT(vcpus > 0, - "Must have a positive number of vCPUs"); - TEST_ASSERT(vcpus <= MAX_VCPUS, - "This test does not currently support\n" - "more than %d vCPUs.", MAX_VCPUS); + nr_vcpus = atoi(optarg); + TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus, + "Invalid number of vcpus, must be between 1 and %d", max_vcpus); break; case 'h': default: @@ -482,7 +479,7 @@ int main(int argc, char *argv[]) TEST_ASSERT(guest_modes[i].supported, "Guest mode ID %d (%s) not supported.", i, vm_guest_mode_string(i)); - run_test(i, use_uffd, uffd_delay, vcpus); + run_test(i, use_uffd, uffd_delay); } return 0; diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h index 840dc2a19ce1..05eeb5d2bfc4 100644 --- a/tools/testing/selftests/kvm/include/perf_test_util.h +++ b/tools/testing/selftests/kvm/include/perf_test_util.h @@ -37,6 +37,9 @@ static uint64_t guest_test_phys_mem; static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; +/* Number of VCPUs for the test */ +static int nr_vcpus = 1; + struct vcpu_args { uint64_t gva; uint64_t pages; From 4fd94ec7d566ee2f0b52111cc6d26dd311f8a7c3 Mon Sep 17 00:00:00 2001 From: Ben Gardon Date: Tue, 27 Oct 2020 16:37:33 -0700 Subject: [PATCH 222/710] KVM: selftests: Introduce the dirty log perf test The dirty log perf test will time verious dirty logging operations (enabling dirty logging, dirtying memory, getting the dirty log, clearing the dirty log, and disabling dirty logging) in order to quantify dirty logging performance. This test can be used to inform future performance improvements to KVM's dirty logging infrastructure. This series was tested by running the following invocations on an Intel Skylake machine: dirty_log_perf_test -b 20m -i 100 -v 64 dirty_log_perf_test -b 20g -i 5 -v 4 dirty_log_perf_test -b 4g -i 5 -v 32 demand_paging_test -b 20m -v 64 demand_paging_test -b 20g -v 4 demand_paging_test -b 4g -v 32 All behaved as expected. Signed-off-by: Ben Gardon Message-Id: <20201027233733.1484855-6-bgardon@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/.gitignore | 1 + tools/testing/selftests/kvm/Makefile | 1 + .../selftests/kvm/dirty_log_perf_test.c | 376 ++++++++++++++++++ .../selftests/kvm/include/perf_test_util.h | 18 +- .../testing/selftests/kvm/include/test_util.h | 1 + tools/testing/selftests/kvm/lib/test_util.c | 7 + 6 files changed, 396 insertions(+), 8 deletions(-) create mode 100644 tools/testing/selftests/kvm/dirty_log_perf_test.c diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index ea1692d43411..7a2c242b7152 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -27,6 +27,7 @@ /clear_dirty_log_test /demand_paging_test /dirty_log_test +/dirty_log_perf_test /kvm_create_max_vcpus /set_memory_region_test /steal_time diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index c8af04dccf7f..3d14ef77755e 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -61,6 +61,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test TEST_GEN_PROGS_x86_64 += x86_64/user_msr_test TEST_GEN_PROGS_x86_64 += demand_paging_test TEST_GEN_PROGS_x86_64 += dirty_log_test +TEST_GEN_PROGS_x86_64 += dirty_log_perf_test TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus TEST_GEN_PROGS_x86_64 += set_memory_region_test TEST_GEN_PROGS_x86_64 += steal_time diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c new file mode 100644 index 000000000000..cda7b000b1bc --- /dev/null +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -0,0 +1,376 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * KVM dirty page logging performance test + * + * Based on dirty_log_test.c + * + * Copyright (C) 2018, Red Hat, Inc. + * Copyright (C) 2020, Google, Inc. + */ + +#define _GNU_SOURCE /* for program_invocation_name */ + +#include +#include +#include +#include +#include +#include +#include + +#include "kvm_util.h" +#include "perf_test_util.h" +#include "processor.h" +#include "test_util.h" + +/* How many host loops to run by default (one KVM_GET_DIRTY_LOG for each loop)*/ +#define TEST_HOST_LOOP_N 2UL + +/* Host variables */ +static bool host_quit; +static uint64_t iteration; +static uint64_t vcpu_last_completed_iteration[MAX_VCPUS]; + +static void *vcpu_worker(void *data) +{ + int ret; + struct kvm_vm *vm = perf_test_args.vm; + uint64_t pages_count = 0; + struct kvm_run *run; + struct timespec start; + struct timespec ts_diff; + struct timespec total = (struct timespec){0}; + struct timespec avg; + struct vcpu_args *vcpu_args = (struct vcpu_args *)data; + int vcpu_id = vcpu_args->vcpu_id; + + vcpu_args_set(vm, vcpu_id, 1, vcpu_id); + run = vcpu_state(vm, vcpu_id); + + while (!READ_ONCE(host_quit)) { + uint64_t current_iteration = READ_ONCE(iteration); + + clock_gettime(CLOCK_MONOTONIC, &start); + ret = _vcpu_run(vm, vcpu_id); + ts_diff = timespec_diff_now(start); + + TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret); + TEST_ASSERT(get_ucall(vm, vcpu_id, NULL) == UCALL_SYNC, + "Invalid guest sync status: exit_reason=%s\n", + exit_reason_str(run->exit_reason)); + + pr_debug("Got sync event from vCPU %d\n", vcpu_id); + vcpu_last_completed_iteration[vcpu_id] = current_iteration; + pr_debug("vCPU %d updated last completed iteration to %lu\n", + vcpu_id, vcpu_last_completed_iteration[vcpu_id]); + + if (current_iteration) { + pages_count += vcpu_args->pages; + total = timespec_add(total, ts_diff); + pr_debug("vCPU %d iteration %lu dirty memory time: %ld.%.9lds\n", + vcpu_id, current_iteration, ts_diff.tv_sec, + ts_diff.tv_nsec); + } else { + pr_debug("vCPU %d iteration %lu populate memory time: %ld.%.9lds\n", + vcpu_id, current_iteration, ts_diff.tv_sec, + ts_diff.tv_nsec); + } + + while (current_iteration == READ_ONCE(iteration) && + !READ_ONCE(host_quit)) {} + } + + avg = timespec_div(total, vcpu_last_completed_iteration[vcpu_id]); + pr_debug("\nvCPU %d dirtied 0x%lx pages over %lu iterations in %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n", + vcpu_id, pages_count, vcpu_last_completed_iteration[vcpu_id], + total.tv_sec, total.tv_nsec, avg.tv_sec, avg.tv_nsec); + + return NULL; +} + +#ifdef USE_CLEAR_DIRTY_LOG +static u64 dirty_log_manual_caps; +#endif + +static void run_test(enum vm_guest_mode mode, unsigned long iterations, + uint64_t phys_offset, int wr_fract) +{ + pthread_t *vcpu_threads; + struct kvm_vm *vm; + unsigned long *bmap; + uint64_t guest_num_pages; + uint64_t host_num_pages; + int vcpu_id; + struct timespec start; + struct timespec ts_diff; + struct timespec get_dirty_log_total = (struct timespec){0}; + struct timespec vcpu_dirty_total = (struct timespec){0}; + struct timespec avg; +#ifdef USE_CLEAR_DIRTY_LOG + struct kvm_enable_cap cap = {}; + struct timespec clear_dirty_log_total = (struct timespec){0}; +#endif + + vm = create_vm(mode, nr_vcpus, guest_percpu_mem_size); + + perf_test_args.wr_fract = wr_fract; + + guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm_get_page_shift(vm); + guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages); + host_num_pages = vm_num_host_pages(mode, guest_num_pages); + bmap = bitmap_alloc(host_num_pages); + +#ifdef USE_CLEAR_DIRTY_LOG + cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2; + cap.args[0] = dirty_log_manual_caps; + vm_enable_cap(vm, &cap); +#endif + + vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads)); + TEST_ASSERT(vcpu_threads, "Memory allocation failed"); + + add_vcpus(vm, nr_vcpus, guest_percpu_mem_size); + + sync_global_to_guest(vm, perf_test_args); + + /* Start the iterations */ + iteration = 0; + host_quit = false; + + clock_gettime(CLOCK_MONOTONIC, &start); + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { + pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker, + &perf_test_args.vcpu_args[vcpu_id]); + } + + /* Allow the vCPU to populate memory */ + pr_debug("Starting iteration %lu - Populating\n", iteration); + while (READ_ONCE(vcpu_last_completed_iteration[vcpu_id]) != iteration) + pr_debug("Waiting for vcpu_last_completed_iteration == %lu\n", + iteration); + + ts_diff = timespec_diff_now(start); + pr_info("Populate memory time: %ld.%.9lds\n", + ts_diff.tv_sec, ts_diff.tv_nsec); + + /* Enable dirty logging */ + clock_gettime(CLOCK_MONOTONIC, &start); + vm_mem_region_set_flags(vm, TEST_MEM_SLOT_INDEX, + KVM_MEM_LOG_DIRTY_PAGES); + ts_diff = timespec_diff_now(start); + pr_info("Enabling dirty logging time: %ld.%.9lds\n\n", + ts_diff.tv_sec, ts_diff.tv_nsec); + + while (iteration < iterations) { + /* + * Incrementing the iteration number will start the vCPUs + * dirtying memory again. + */ + clock_gettime(CLOCK_MONOTONIC, &start); + iteration++; + + pr_debug("Starting iteration %lu\n", iteration); + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { + while (READ_ONCE(vcpu_last_completed_iteration[vcpu_id]) != iteration) + pr_debug("Waiting for vCPU %d vcpu_last_completed_iteration == %lu\n", + vcpu_id, iteration); + } + + ts_diff = timespec_diff_now(start); + vcpu_dirty_total = timespec_add(vcpu_dirty_total, ts_diff); + pr_info("Iteration %lu dirty memory time: %ld.%.9lds\n", + iteration, ts_diff.tv_sec, ts_diff.tv_nsec); + + clock_gettime(CLOCK_MONOTONIC, &start); + kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap); + + ts_diff = timespec_diff_now(start); + get_dirty_log_total = timespec_add(get_dirty_log_total, + ts_diff); + pr_info("Iteration %lu get dirty log time: %ld.%.9lds\n", + iteration, ts_diff.tv_sec, ts_diff.tv_nsec); + +#ifdef USE_CLEAR_DIRTY_LOG + clock_gettime(CLOCK_MONOTONIC, &start); + kvm_vm_clear_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap, 0, + host_num_pages); + + ts_diff = timespec_diff_now(start); + clear_dirty_log_total = timespec_add(clear_dirty_log_total, + ts_diff); + pr_info("Iteration %lu clear dirty log time: %ld.%.9lds\n", + iteration, ts_diff.tv_sec, ts_diff.tv_nsec); +#endif + } + + /* Tell the vcpu thread to quit */ + host_quit = true; + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) + pthread_join(vcpu_threads[vcpu_id], NULL); + + /* Disable dirty logging */ + clock_gettime(CLOCK_MONOTONIC, &start); + vm_mem_region_set_flags(vm, TEST_MEM_SLOT_INDEX, 0); + ts_diff = timespec_diff_now(start); + pr_info("Disabling dirty logging time: %ld.%.9lds\n", + ts_diff.tv_sec, ts_diff.tv_nsec); + + avg = timespec_div(get_dirty_log_total, iterations); + pr_info("Get dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n", + iterations, get_dirty_log_total.tv_sec, + get_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec); + +#ifdef USE_CLEAR_DIRTY_LOG + avg = timespec_div(clear_dirty_log_total, iterations); + pr_info("Clear dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n", + iterations, clear_dirty_log_total.tv_sec, + clear_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec); +#endif + + free(bmap); + free(vcpu_threads); + ucall_uninit(vm); + kvm_vm_free(vm); +} + +struct guest_mode { + bool supported; + bool enabled; +}; +static struct guest_mode guest_modes[NUM_VM_MODES]; + +#define guest_mode_init(mode, supported, enabled) ({ \ + guest_modes[mode] = (struct guest_mode){ supported, enabled }; \ +}) + +static void help(char *name) +{ + int i; + + puts(""); + printf("usage: %s [-h] [-i iterations] [-p offset] " + "[-m mode] [-b vcpu bytes] [-v vcpus]\n", name); + puts(""); + printf(" -i: specify iteration counts (default: %"PRIu64")\n", + TEST_HOST_LOOP_N); + printf(" -p: specify guest physical test memory offset\n" + " Warning: a low offset can conflict with the loaded test code.\n"); + printf(" -m: specify the guest mode ID to test " + "(default: test all supported modes)\n" + " This option may be used multiple times.\n" + " Guest mode IDs:\n"); + for (i = 0; i < NUM_VM_MODES; ++i) { + printf(" %d: %s%s\n", i, vm_guest_mode_string(i), + guest_modes[i].supported ? " (supported)" : ""); + } + printf(" -b: specify the size of the memory region which should be\n" + " dirtied by each vCPU. e.g. 10M or 3G.\n" + " (default: 1G)\n"); + printf(" -f: specify the fraction of pages which should be written to\n" + " as opposed to simply read, in the form\n" + " 1/.\n" + " (default: 1 i.e. all pages are written to.)\n"); + printf(" -v: specify the number of vCPUs to run.\n"); + puts(""); + exit(0); +} + +int main(int argc, char *argv[]) +{ + unsigned long iterations = TEST_HOST_LOOP_N; + bool mode_selected = false; + uint64_t phys_offset = 0; + unsigned int mode; + int opt, i; + int wr_fract = 1; + +#ifdef USE_CLEAR_DIRTY_LOG + dirty_log_manual_caps = + kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2); + if (!dirty_log_manual_caps) { + print_skip("KVM_CLEAR_DIRTY_LOG not available"); + exit(KSFT_SKIP); + } + dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | + KVM_DIRTY_LOG_INITIALLY_SET); +#endif + +#ifdef __x86_64__ + guest_mode_init(VM_MODE_PXXV48_4K, true, true); +#endif +#ifdef __aarch64__ + guest_mode_init(VM_MODE_P40V48_4K, true, true); + guest_mode_init(VM_MODE_P40V48_64K, true, true); + + { + unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE); + + if (limit >= 52) + guest_mode_init(VM_MODE_P52V48_64K, true, true); + if (limit >= 48) { + guest_mode_init(VM_MODE_P48V48_4K, true, true); + guest_mode_init(VM_MODE_P48V48_64K, true, true); + } + } +#endif +#ifdef __s390x__ + guest_mode_init(VM_MODE_P40V48_4K, true, true); +#endif + + while ((opt = getopt(argc, argv, "hi:p:m:b:f:v:")) != -1) { + switch (opt) { + case 'i': + iterations = strtol(optarg, NULL, 10); + break; + case 'p': + phys_offset = strtoull(optarg, NULL, 0); + break; + case 'm': + if (!mode_selected) { + for (i = 0; i < NUM_VM_MODES; ++i) + guest_modes[i].enabled = false; + mode_selected = true; + } + mode = strtoul(optarg, NULL, 10); + TEST_ASSERT(mode < NUM_VM_MODES, + "Guest mode ID %d too big", mode); + guest_modes[mode].enabled = true; + break; + case 'b': + guest_percpu_mem_size = parse_size(optarg); + break; + case 'f': + wr_fract = atoi(optarg); + TEST_ASSERT(wr_fract >= 1, + "Write fraction cannot be less than one"); + break; + case 'v': + nr_vcpus = atoi(optarg); + TEST_ASSERT(nr_vcpus > 0, + "Must have a positive number of vCPUs"); + TEST_ASSERT(nr_vcpus <= MAX_VCPUS, + "This test does not currently support\n" + "more than %d vCPUs.", MAX_VCPUS); + break; + case 'h': + default: + help(argv[0]); + break; + } + } + + TEST_ASSERT(iterations > 2, "Iterations must be greater than two"); + + pr_info("Test iterations: %"PRIu64"\n", iterations); + + for (i = 0; i < NUM_VM_MODES; ++i) { + if (!guest_modes[i].enabled) + continue; + TEST_ASSERT(guest_modes[i].supported, + "Guest mode ID %d (%s) not supported.", + i, vm_guest_mode_string(i)); + run_test(i, iterations, phys_offset, wr_fract); + } + + return 0; +} diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h index 05eeb5d2bfc4..2618052057b1 100644 --- a/tools/testing/selftests/kvm/include/perf_test_util.h +++ b/tools/testing/selftests/kvm/include/perf_test_util.h @@ -76,16 +76,18 @@ static void guest_code(uint32_t vcpu_id) gva = vcpu_args->gva; pages = vcpu_args->pages; - for (i = 0; i < pages; i++) { - uint64_t addr = gva + (i * perf_test_args.guest_page_size); + while (true) { + for (i = 0; i < pages; i++) { + uint64_t addr = gva + (i * perf_test_args.guest_page_size); - if (i % perf_test_args.wr_fract == 0) - *(uint64_t *)addr = 0x0123456789ABCDEF; - else - READ_ONCE(*(uint64_t *)addr); + if (i % perf_test_args.wr_fract == 0) + *(uint64_t *)addr = 0x0123456789ABCDEF; + else + READ_ONCE(*(uint64_t *)addr); + } + + GUEST_SYNC(1); } - - GUEST_SYNC(1); } static struct kvm_vm *create_vm(enum vm_guest_mode mode, int vcpus, diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index 1cc036ddb0c5..ffffa560436b 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -65,5 +65,6 @@ struct timespec timespec_add_ns(struct timespec ts, int64_t ns); struct timespec timespec_add(struct timespec ts1, struct timespec ts2); struct timespec timespec_sub(struct timespec ts1, struct timespec ts2); struct timespec timespec_diff_now(struct timespec start); +struct timespec timespec_div(struct timespec ts, int divisor); #endif /* SELFTEST_KVM_TEST_UTIL_H */ diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c index 1a46c2c48c7c..8e04c0b1608e 100644 --- a/tools/testing/selftests/kvm/lib/test_util.c +++ b/tools/testing/selftests/kvm/lib/test_util.c @@ -92,6 +92,13 @@ struct timespec timespec_diff_now(struct timespec start) return timespec_sub(end, start); } +struct timespec timespec_div(struct timespec ts, int divisor) +{ + int64_t ns = timespec_to_ns(ts) / divisor; + + return timespec_add_ns((struct timespec){0}, ns); +} + void print_skip(const char *fmt, ...) { va_list ap; From c91ebcc578e09783cfa4d85c1b437790f140f29a Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Wed, 4 Nov 2020 19:28:43 +0000 Subject: [PATCH 223/710] iio/adc: ingenic: Fix battery VREF for JZ4770 SoC The reference voltage for the battery is clearly marked as 1.2V in the programming manual. With this fixed, the battery channel now returns correct values. Fixes: a515d6488505 ("IIO: Ingenic JZ47xx: Add support for JZ4770 SoC ADC.") Signed-off-by: Paul Cercueil Acked-by: Artur Rojek Cc: Link: https://lore.kernel.org/r/20201104192843.67187-1-paul@crapouillou.net Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ingenic-adc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/ingenic-adc.c b/drivers/iio/adc/ingenic-adc.c index 92b25083e23f..973e84deebea 100644 --- a/drivers/iio/adc/ingenic-adc.c +++ b/drivers/iio/adc/ingenic-adc.c @@ -71,7 +71,7 @@ #define JZ4725B_ADC_BATTERY_HIGH_VREF_BITS 10 #define JZ4740_ADC_BATTERY_HIGH_VREF (7500 * 0.986) #define JZ4740_ADC_BATTERY_HIGH_VREF_BITS 12 -#define JZ4770_ADC_BATTERY_VREF 6600 +#define JZ4770_ADC_BATTERY_VREF 1200 #define JZ4770_ADC_BATTERY_VREF_BITS 12 #define JZ_ADC_IRQ_AUX BIT(0) From 6d6aa2907d59ddd3c0ebb2b93e1ddc84e474485b Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Tue, 3 Nov 2020 20:12:38 +0000 Subject: [PATCH 224/710] iio/adc: ingenic: Fix AUX/VBAT readings when touchscreen is used When the command feature of the ADC is used, it is possible to program the ADC, and specify at each step what input should be processed, and in comparison to what reference. This broke the AUX and battery readings when the touchscreen was enabled, most likely because the CMD feature would change the VREF all the time. Now, when AUX or battery are read, we temporarily disable the CMD feature, which means that we won't get touchscreen readings in that time frame. But it now gives correct values for AUX / battery, and the touchscreen isn't disabled for long enough to be an actual issue. Fixes: b96952f498db ("IIO: Ingenic JZ47xx: Add touchscreen mode.") Signed-off-by: Paul Cercueil Acked-by: Artur Rojek Cc: Link: https://lore.kernel.org/r/20201103201238.161083-1-paul@crapouillou.net Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ingenic-adc.c | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/drivers/iio/adc/ingenic-adc.c b/drivers/iio/adc/ingenic-adc.c index 973e84deebea..1aafbe2cfe67 100644 --- a/drivers/iio/adc/ingenic-adc.c +++ b/drivers/iio/adc/ingenic-adc.c @@ -177,13 +177,12 @@ static void ingenic_adc_set_config(struct ingenic_adc *adc, mutex_unlock(&adc->lock); } -static void ingenic_adc_enable(struct ingenic_adc *adc, - int engine, - bool enabled) +static void ingenic_adc_enable_unlocked(struct ingenic_adc *adc, + int engine, + bool enabled) { u8 val; - mutex_lock(&adc->lock); val = readb(adc->base + JZ_ADC_REG_ENABLE); if (enabled) @@ -192,20 +191,41 @@ static void ingenic_adc_enable(struct ingenic_adc *adc, val &= ~BIT(engine); writeb(val, adc->base + JZ_ADC_REG_ENABLE); +} + +static void ingenic_adc_enable(struct ingenic_adc *adc, + int engine, + bool enabled) +{ + mutex_lock(&adc->lock); + ingenic_adc_enable_unlocked(adc, engine, enabled); mutex_unlock(&adc->lock); } static int ingenic_adc_capture(struct ingenic_adc *adc, int engine) { + u32 cfg; u8 val; int ret; - ingenic_adc_enable(adc, engine, true); + /* + * Disable CMD_SEL temporarily, because it causes wrong VBAT readings, + * probably due to the switch of VREF. We must keep the lock here to + * avoid races with the buffer enable/disable functions. + */ + mutex_lock(&adc->lock); + cfg = readl(adc->base + JZ_ADC_REG_CFG); + writel(cfg & ~JZ_ADC_REG_CFG_CMD_SEL, adc->base + JZ_ADC_REG_CFG); + + ingenic_adc_enable_unlocked(adc, engine, true); ret = readb_poll_timeout(adc->base + JZ_ADC_REG_ENABLE, val, !(val & BIT(engine)), 250, 1000); if (ret) - ingenic_adc_enable(adc, engine, false); + ingenic_adc_enable_unlocked(adc, engine, false); + + writel(cfg, adc->base + JZ_ADC_REG_CFG); + mutex_unlock(&adc->lock); return ret; } From 44a146a44f656fc03d368c1b9248d29a128cd053 Mon Sep 17 00:00:00 2001 From: Necip Fazil Yildiran Date: Tue, 3 Nov 2020 01:35:24 +0300 Subject: [PATCH 225/710] iio: light: fix kconfig dependency bug for VCNL4035 When VCNL4035 is enabled and IIO_BUFFER is disabled, it results in the following Kbuild warning: WARNING: unmet direct dependencies detected for IIO_TRIGGERED_BUFFER Depends on [n]: IIO [=y] && IIO_BUFFER [=n] Selected by [y]: - VCNL4035 [=y] && IIO [=y] && I2C [=y] The reason is that VCNL4035 selects IIO_TRIGGERED_BUFFER without depending on or selecting IIO_BUFFER while IIO_TRIGGERED_BUFFER depends on IIO_BUFFER. This can also fail building the kernel. Honor the kconfig dependency to remove unmet direct dependency warnings and avoid any potential build failures. Fixes: 55707294c4eb ("iio: light: Add support for vishay vcnl4035") Signed-off-by: Necip Fazil Yildiran Link: https://bugzilla.kernel.org/show_bug.cgi?id=209883 Link: https://lore.kernel.org/r/20201102223523.572461-1-fazilyildiran@gmail.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/light/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/light/Kconfig b/drivers/iio/light/Kconfig index cade6dc0305b..33ad4dd0b5c7 100644 --- a/drivers/iio/light/Kconfig +++ b/drivers/iio/light/Kconfig @@ -544,6 +544,7 @@ config VCNL4000 config VCNL4035 tristate "VCNL4035 combined ALS and proximity sensor" + select IIO_BUFFER select IIO_TRIGGERED_BUFFER select REGMAP_I2C depends on I2C From ae2975046dbc65855c217fe6fbd5b33140c5ff18 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 6 Nov 2020 15:50:39 -0500 Subject: [PATCH 226/710] net/sunrpc: fix useless comparison in proc_do_xprt() In the original code, the "if (*lenp < 0)" check didn't work because "*lenp" is unsigned. Fortunately, the memory_read_from_buffer() call will never fail in this context so it doesn't affect runtime. Signed-off-by: Dan Carpenter Signed-off-by: J. Bruce Fields --- net/sunrpc/sysctl.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c index 5c9f5bca4d99..3aad6ef18504 100644 --- a/net/sunrpc/sysctl.c +++ b/net/sunrpc/sysctl.c @@ -63,19 +63,20 @@ static int proc_do_xprt(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { char tmpbuf[256]; - size_t len; + ssize_t len; if (write || *ppos) { *lenp = 0; return 0; } len = svc_print_xprts(tmpbuf, sizeof(tmpbuf)); - *lenp = memory_read_from_buffer(buffer, *lenp, ppos, tmpbuf, len); + len = memory_read_from_buffer(buffer, *lenp, ppos, tmpbuf, len); - if (*lenp < 0) { + if (len < 0) { *lenp = 0; return -EINVAL; } + *lenp = len; return 0; } From 77e70d351db7de07a46ac49b87a6c3c7a60fca7e Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 26 Oct 2020 13:36:17 -0700 Subject: [PATCH 227/710] Input: sunkbd - avoid use-after-free in teardown paths We need to make sure we cancel the reinit work before we tear down the driver structures. Reported-by: Bodong Zhao Tested-by: Bodong Zhao Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/sunkbd.c | 41 ++++++++++++++++++++++++++------- 1 file changed, 33 insertions(+), 8 deletions(-) diff --git a/drivers/input/keyboard/sunkbd.c b/drivers/input/keyboard/sunkbd.c index 27126e621eb6..d450f11b98a7 100644 --- a/drivers/input/keyboard/sunkbd.c +++ b/drivers/input/keyboard/sunkbd.c @@ -99,7 +99,8 @@ static irqreturn_t sunkbd_interrupt(struct serio *serio, switch (data) { case SUNKBD_RET_RESET: - schedule_work(&sunkbd->tq); + if (sunkbd->enabled) + schedule_work(&sunkbd->tq); sunkbd->reset = -1; break; @@ -200,16 +201,12 @@ static int sunkbd_initialize(struct sunkbd *sunkbd) } /* - * sunkbd_reinit() sets leds and beeps to a state the computer remembers they - * were in. + * sunkbd_set_leds_beeps() sets leds and beeps to a state the computer remembers + * they were in. */ -static void sunkbd_reinit(struct work_struct *work) +static void sunkbd_set_leds_beeps(struct sunkbd *sunkbd) { - struct sunkbd *sunkbd = container_of(work, struct sunkbd, tq); - - wait_event_interruptible_timeout(sunkbd->wait, sunkbd->reset >= 0, HZ); - serio_write(sunkbd->serio, SUNKBD_CMD_SETLED); serio_write(sunkbd->serio, (!!test_bit(LED_CAPSL, sunkbd->dev->led) << 3) | @@ -222,11 +219,39 @@ static void sunkbd_reinit(struct work_struct *work) SUNKBD_CMD_BELLOFF - !!test_bit(SND_BELL, sunkbd->dev->snd)); } + +/* + * sunkbd_reinit() wait for the keyboard reset to complete and restores state + * of leds and beeps. + */ + +static void sunkbd_reinit(struct work_struct *work) +{ + struct sunkbd *sunkbd = container_of(work, struct sunkbd, tq); + + /* + * It is OK that we check sunkbd->enabled without pausing serio, + * as we only want to catch true->false transition that will + * happen once and we will be woken up for it. + */ + wait_event_interruptible_timeout(sunkbd->wait, + sunkbd->reset >= 0 || !sunkbd->enabled, + HZ); + + if (sunkbd->reset >= 0 && sunkbd->enabled) + sunkbd_set_leds_beeps(sunkbd); +} + static void sunkbd_enable(struct sunkbd *sunkbd, bool enable) { serio_pause_rx(sunkbd->serio); sunkbd->enabled = enable; serio_continue_rx(sunkbd->serio); + + if (!enable) { + wake_up_interruptible(&sunkbd->wait); + cancel_work_sync(&sunkbd->tq); + } } /* From 34a280831384d7e58327ff0e82e18db8e788107c Mon Sep 17 00:00:00 2001 From: Olaf Hering Date: Fri, 6 Nov 2020 19:39:41 +0100 Subject: [PATCH 228/710] video: hyperv_fb: include vmalloc.h hvfb_getmem uses vzalloc, therefore vmalloc.h should be included. Fixes commit d21987d709e807ba7bbf47044deb56a3c02e8be4 ("video: hyperv: hyperv_fb: Support deferred IO for Hyper-V frame buffer driver") Signed-off-by: Olaf Hering Signed-off-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/20201106183941.9751-1-olaf@aepfle.de --- drivers/video/fbdev/hyperv_fb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/video/fbdev/hyperv_fb.c b/drivers/video/fbdev/hyperv_fb.c index e36fb1a0ecdb..5bc86f481a78 100644 --- a/drivers/video/fbdev/hyperv_fb.c +++ b/drivers/video/fbdev/hyperv_fb.c @@ -47,6 +47,7 @@ #include #include +#include #include #include #include From ff1f855804cdbbb6db7b9b6df6cab783d1a40d66 Mon Sep 17 00:00:00 2001 From: Rijo Thomas Date: Wed, 4 Nov 2020 11:56:09 +0530 Subject: [PATCH 229/710] tee: amdtee: fix memory leak due to reset of global shm list The driver maintains a list of shared memory buffers along with their mapped buffer id's in a global linked list. These buffers need to be unmapped after use by the user-space client. The global shared memory list is initialized to zero entries in the function amdtee_open(). This clearing of list entries can be a source for memory leak on secure side if the global linked list previously held some mapped buffer entries allocated from another TEE context. Fix potential memory leak issue by moving global shared memory list to AMD-TEE driver context data structure. Fixes: 757cc3e9ff1d ("tee: add AMD-TEE driver") Reviewed-by: Devaraj Rangasamy Signed-off-by: Rijo Thomas Signed-off-by: Jens Wiklander --- drivers/tee/amdtee/amdtee_private.h | 7 +++---- drivers/tee/amdtee/core.c | 18 +++++++++++------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/drivers/tee/amdtee/amdtee_private.h b/drivers/tee/amdtee/amdtee_private.h index d7f798c3394b..97df16a17285 100644 --- a/drivers/tee/amdtee/amdtee_private.h +++ b/drivers/tee/amdtee/amdtee_private.h @@ -64,9 +64,12 @@ struct amdtee_session { /** * struct amdtee_context_data - AMD-TEE driver context data * @sess_list: Keeps track of sessions opened in current TEE context + * @shm_list: Keeps track of buffers allocated and mapped in current TEE + * context */ struct amdtee_context_data { struct list_head sess_list; + struct list_head shm_list; }; struct amdtee_driver_data { @@ -89,10 +92,6 @@ struct amdtee_shm_data { u32 buf_id; }; -struct amdtee_shm_context { - struct list_head shmdata_list; -}; - #define LOWER_TWO_BYTE_MASK 0x0000FFFF /** diff --git a/drivers/tee/amdtee/core.c b/drivers/tee/amdtee/core.c index 27b4cd77d0db..ce61c68ec58c 100644 --- a/drivers/tee/amdtee/core.c +++ b/drivers/tee/amdtee/core.c @@ -20,7 +20,6 @@ static struct amdtee_driver_data *drv_data; static DEFINE_MUTEX(session_list_mutex); -static struct amdtee_shm_context shmctx; static void amdtee_get_version(struct tee_device *teedev, struct tee_ioctl_version_data *vers) @@ -42,7 +41,7 @@ static int amdtee_open(struct tee_context *ctx) return -ENOMEM; INIT_LIST_HEAD(&ctxdata->sess_list); - INIT_LIST_HEAD(&shmctx.shmdata_list); + INIT_LIST_HEAD(&ctxdata->shm_list); ctx->data = ctxdata; return 0; @@ -152,10 +151,11 @@ static struct amdtee_session *find_session(struct amdtee_context_data *ctxdata, u32 get_buffer_id(struct tee_shm *shm) { - u32 buf_id = 0; + struct amdtee_context_data *ctxdata = shm->ctx->data; struct amdtee_shm_data *shmdata; + u32 buf_id = 0; - list_for_each_entry(shmdata, &shmctx.shmdata_list, shm_node) + list_for_each_entry(shmdata, &ctxdata->shm_list, shm_node) if (shmdata->kaddr == shm->kaddr) { buf_id = shmdata->buf_id; break; @@ -333,8 +333,9 @@ int amdtee_close_session(struct tee_context *ctx, u32 session) int amdtee_map_shmem(struct tee_shm *shm) { - struct shmem_desc shmem; + struct amdtee_context_data *ctxdata; struct amdtee_shm_data *shmnode; + struct shmem_desc shmem; int rc, count; u32 buf_id; @@ -362,7 +363,8 @@ int amdtee_map_shmem(struct tee_shm *shm) shmnode->kaddr = shm->kaddr; shmnode->buf_id = buf_id; - list_add(&shmnode->shm_node, &shmctx.shmdata_list); + ctxdata = shm->ctx->data; + list_add(&shmnode->shm_node, &ctxdata->shm_list); pr_debug("buf_id :[%x] kaddr[%p]\n", shmnode->buf_id, shmnode->kaddr); @@ -371,6 +373,7 @@ int amdtee_map_shmem(struct tee_shm *shm) void amdtee_unmap_shmem(struct tee_shm *shm) { + struct amdtee_context_data *ctxdata; struct amdtee_shm_data *shmnode; u32 buf_id; @@ -381,7 +384,8 @@ void amdtee_unmap_shmem(struct tee_shm *shm) /* Unmap the shared memory from TEE */ handle_unmap_shmem(buf_id); - list_for_each_entry(shmnode, &shmctx.shmdata_list, shm_node) + ctxdata = shm->ctx->data; + list_for_each_entry(shmnode, &ctxdata->shm_list, shm_node) if (buf_id == shmnode->buf_id) { list_del(&shmnode->shm_node); kfree(shmnode); From be353be27874f40837327d9a39e3ad2149ab66d3 Mon Sep 17 00:00:00 2001 From: Rijo Thomas Date: Wed, 4 Nov 2020 11:56:10 +0530 Subject: [PATCH 230/710] tee: amdtee: synchronize access to shm list Synchronize access to shm or shared memory buffer list to prevent race conditions due to concurrent updates to shared shm list by multiple threads. Fixes: 757cc3e9ff1d ("tee: add AMD-TEE driver") Reviewed-by: Devaraj Rangasamy Signed-off-by: Rijo Thomas Signed-off-by: Jens Wiklander --- drivers/tee/amdtee/amdtee_private.h | 1 + drivers/tee/amdtee/core.c | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/drivers/tee/amdtee/amdtee_private.h b/drivers/tee/amdtee/amdtee_private.h index 97df16a17285..337c8d82f74e 100644 --- a/drivers/tee/amdtee/amdtee_private.h +++ b/drivers/tee/amdtee/amdtee_private.h @@ -70,6 +70,7 @@ struct amdtee_session { struct amdtee_context_data { struct list_head sess_list; struct list_head shm_list; + struct mutex shm_mutex; /* synchronizes access to @shm_list */ }; struct amdtee_driver_data { diff --git a/drivers/tee/amdtee/core.c b/drivers/tee/amdtee/core.c index ce61c68ec58c..8a6a8f30bb42 100644 --- a/drivers/tee/amdtee/core.c +++ b/drivers/tee/amdtee/core.c @@ -42,6 +42,7 @@ static int amdtee_open(struct tee_context *ctx) INIT_LIST_HEAD(&ctxdata->sess_list); INIT_LIST_HEAD(&ctxdata->shm_list); + mutex_init(&ctxdata->shm_mutex); ctx->data = ctxdata; return 0; @@ -85,6 +86,7 @@ static void amdtee_release(struct tee_context *ctx) list_del(&sess->list_node); release_session(sess); } + mutex_destroy(&ctxdata->shm_mutex); kfree(ctxdata); ctx->data = NULL; @@ -155,11 +157,13 @@ u32 get_buffer_id(struct tee_shm *shm) struct amdtee_shm_data *shmdata; u32 buf_id = 0; + mutex_lock(&ctxdata->shm_mutex); list_for_each_entry(shmdata, &ctxdata->shm_list, shm_node) if (shmdata->kaddr == shm->kaddr) { buf_id = shmdata->buf_id; break; } + mutex_unlock(&ctxdata->shm_mutex); return buf_id; } @@ -364,7 +368,9 @@ int amdtee_map_shmem(struct tee_shm *shm) shmnode->kaddr = shm->kaddr; shmnode->buf_id = buf_id; ctxdata = shm->ctx->data; + mutex_lock(&ctxdata->shm_mutex); list_add(&shmnode->shm_node, &ctxdata->shm_list); + mutex_unlock(&ctxdata->shm_mutex); pr_debug("buf_id :[%x] kaddr[%p]\n", shmnode->buf_id, shmnode->kaddr); @@ -385,12 +391,14 @@ void amdtee_unmap_shmem(struct tee_shm *shm) handle_unmap_shmem(buf_id); ctxdata = shm->ctx->data; + mutex_lock(&ctxdata->shm_mutex); list_for_each_entry(shmnode, &ctxdata->shm_list, shm_node) if (buf_id == shmnode->buf_id) { list_del(&shmnode->shm_node); kfree(shmnode); break; } + mutex_unlock(&ctxdata->shm_mutex); } int amdtee_invoke_func(struct tee_context *ctx, From ce9dfafe29bed86fe3cda330ac6072ce84e1ff81 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 3 Nov 2020 16:55:43 +0100 Subject: [PATCH 231/710] s390: fix system call exit path The system call exit path is running with interrupts enabled while checking for TIF/PIF/CIF bits which require special handling. If all bits have been checked interrupts are disabled and the kernel exits to user space. The problem is that after checking all bits and before interrupts are disabled bits can be set already again, due to interrupt handling. This means that the kernel can exit to user space with some TIF/PIF/CIF bits set, which should never happen. E.g. TIF_NEED_RESCHED might be set, which might lead to additional latencies, since that bit will only be recognized with next exit to user space. Fix this by checking the corresponding bits only when interrupts are disabled. Fixes: 0b0ed657fe00 ("s390: remove critical section cleanup from entry.S") Cc: # 5.8 Acked-by: Sven Schnelle Signed-off-by: Heiko Carstens --- arch/s390/kernel/entry.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 86235919c2d1..5346545b9860 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -422,6 +422,7 @@ ENTRY(system_call) #endif LOCKDEP_SYS_EXIT .Lsysc_tif: + DISABLE_INTS TSTMSK __PT_FLAGS(%r11),_PIF_WORK jnz .Lsysc_work TSTMSK __TI_flags(%r12),_TIF_WORK @@ -444,6 +445,7 @@ ENTRY(system_call) # One of the work bits is on. Find out which one. # .Lsysc_work: + ENABLE_INTS TSTMSK __TI_flags(%r12),_TIF_NEED_RESCHED jo .Lsysc_reschedule TSTMSK __PT_FLAGS(%r11),_PIF_SYSCALL_RESTART From 7de8bfaa095fcbc2db2952d4b561be102a41c2a6 Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Wed, 4 Nov 2020 10:55:08 +0000 Subject: [PATCH 232/710] arm64: dts: renesas: r8a774e1: Add missing audio_clk_b Add audio_clk_b configured as 0 Hz, this will be overridden by the boards providing the audio clock. Fixes: 8183a7938cfec ("arm64: dts: renesas: r8a774e1: Add audio support") Reported-by: Nobuhiro Iwamatsu Signed-off-by: Lad Prabhakar Link: https://lore.kernel.org/r/20201104105508.21197-1-prabhakar.mahadev-lad.rj@bp.renesas.com Signed-off-by: Geert Uytterhoeven --- arch/arm64/boot/dts/renesas/r8a774e1.dtsi | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm64/boot/dts/renesas/r8a774e1.dtsi b/arch/arm64/boot/dts/renesas/r8a774e1.dtsi index 9cbf963aa068..c29643442e91 100644 --- a/arch/arm64/boot/dts/renesas/r8a774e1.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a774e1.dtsi @@ -28,6 +28,12 @@ clock-frequency = <0>; }; + audio_clk_b: audio_clk_b { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <0>; + }; + audio_clk_c: audio_clk_c { compatible = "fixed-clock"; #clock-cells = <0>; From f59ee399de4a8ca4d7d19cdcabb4b63e94867f09 Mon Sep 17 00:00:00 2001 From: Chris Ye Date: Sun, 1 Nov 2020 11:34:52 -0800 Subject: [PATCH 233/710] HID: add HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE for Gamevice devices Kernel 5.4 introduces HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE, devices need to be set explicitly with this flag. Signed-off-by: Chris Ye Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 4 ++++ drivers/hid/hid-quirks.c | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 28ecd635edc1..af361cd04bf0 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -445,6 +445,10 @@ #define USB_VENDOR_ID_FRUCTEL 0x25B6 #define USB_DEVICE_ID_GAMETEL_MT_MODE 0x0002 +#define USB_VENDOR_ID_GAMEVICE 0x27F8 +#define USB_DEVICE_ID_GAMEVICE_GV186 0x0BBE +#define USB_DEVICE_ID_GAMEVICE_KISHI 0x0BBF + #define USB_VENDOR_ID_GAMERON 0x0810 #define USB_DEVICE_ID_GAMERON_DUAL_PSX_ADAPTOR 0x0001 #define USB_DEVICE_ID_GAMERON_DUAL_PCS_ADAPTOR 0x0002 diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c index b154e11d43bf..bf7ecab5d9e5 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c @@ -85,6 +85,10 @@ static const struct hid_device_id hid_quirks[] = { { HID_USB_DEVICE(USB_VENDOR_ID_FUTABA, USB_DEVICE_ID_LED_DISPLAY), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_GREENASIA, USB_DEVICE_ID_GREENASIA_DUAL_SAT_ADAPTOR), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_GREENASIA, USB_DEVICE_ID_GREENASIA_DUAL_USB_JOYPAD), HID_QUIRK_MULTI_INPUT }, + { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_GAMEVICE, USB_DEVICE_ID_GAMEVICE_GV186), + HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE }, + { HID_USB_DEVICE(USB_VENDOR_ID_GAMEVICE, USB_DEVICE_ID_GAMEVICE_KISHI), + HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE }, { HID_USB_DEVICE(USB_VENDOR_ID_HAPP, USB_DEVICE_ID_UGCI_DRIVING), HID_QUIRK_BADPAD | HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_HAPP, USB_DEVICE_ID_UGCI_FIGHTING), HID_QUIRK_BADPAD | HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_HAPP, USB_DEVICE_ID_UGCI_FLYING), HID_QUIRK_BADPAD | HID_QUIRK_MULTI_INPUT }, From 0ba2df09f1500d3f27398a3382b86d39c3e6abe2 Mon Sep 17 00:00:00 2001 From: Marc Ferland Date: Wed, 4 Nov 2020 12:30:04 +0530 Subject: [PATCH 234/710] dmaengine: xilinx_dma: use readl_poll_timeout_atomic variant The xilinx_dma_poll_timeout macro is sometimes called while holding a spinlock (see xilinx_dma_issue_pending() for an example) this means we shouldn't sleep when polling the dma channel registers. To address it in xilinx poll timeout macro use readl_poll_timeout_atomic instead of readl_poll_timeout variant. Signed-off-by: Marc Ferland Signed-off-by: Radhey Shyam Pandey Link: https://lore.kernel.org/r/1604473206-32573-2-git-send-email-radhey.shyam.pandey@xilinx.com Signed-off-by: Vinod Koul --- drivers/dma/xilinx/xilinx_dma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c index ecff35402860..d858d540b88a 100644 --- a/drivers/dma/xilinx/xilinx_dma.c +++ b/drivers/dma/xilinx/xilinx_dma.c @@ -517,8 +517,8 @@ struct xilinx_dma_device { #define to_dma_tx_descriptor(tx) \ container_of(tx, struct xilinx_dma_tx_descriptor, async_tx) #define xilinx_dma_poll_timeout(chan, reg, val, cond, delay_us, timeout_us) \ - readl_poll_timeout(chan->xdev->regs + chan->ctrl_offset + reg, val, \ - cond, delay_us, timeout_us) + readl_poll_timeout_atomic(chan->xdev->regs + chan->ctrl_offset + reg, \ + val, cond, delay_us, timeout_us) /* IO accessors */ static inline u32 dma_read(struct xilinx_dma_chan *chan, u32 reg) From c8ae7932997d0cc92d016829138074c7520248e5 Mon Sep 17 00:00:00 2001 From: Matthew Murrian Date: Wed, 4 Nov 2020 12:30:05 +0530 Subject: [PATCH 235/710] dmaengine: xilinx_dma: Fix usage of xilinx_aximcdma_tx_segment Several code sections incorrectly use struct xilinx_axidma_tx_segment instead of struct xilinx_aximcdma_tx_segment when operating as Multichannel DMA. As their structures are similar, this just works. Fixes: 6ccd692bfb7f ("dmaengine: xilinx_dma: Add Xilinx AXI MCDMA Engine driver support") Signed-off-by: Matthew Murrian Signed-off-by: Radhey Shyam Pandey Link: https://lore.kernel.org/r/1604473206-32573-3-git-send-email-radhey.shyam.pandey@xilinx.com Signed-off-by: Vinod Koul --- drivers/dma/xilinx/xilinx_dma.c | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c index d858d540b88a..9fd924cf2337 100644 --- a/drivers/dma/xilinx/xilinx_dma.c +++ b/drivers/dma/xilinx/xilinx_dma.c @@ -948,8 +948,10 @@ static u32 xilinx_dma_get_residue(struct xilinx_dma_chan *chan, { struct xilinx_cdma_tx_segment *cdma_seg; struct xilinx_axidma_tx_segment *axidma_seg; + struct xilinx_aximcdma_tx_segment *aximcdma_seg; struct xilinx_cdma_desc_hw *cdma_hw; struct xilinx_axidma_desc_hw *axidma_hw; + struct xilinx_aximcdma_desc_hw *aximcdma_hw; struct list_head *entry; u32 residue = 0; @@ -961,13 +963,23 @@ static u32 xilinx_dma_get_residue(struct xilinx_dma_chan *chan, cdma_hw = &cdma_seg->hw; residue += (cdma_hw->control - cdma_hw->status) & chan->xdev->max_buffer_len; - } else { + } else if (chan->xdev->dma_config->dmatype == + XDMA_TYPE_AXIDMA) { axidma_seg = list_entry(entry, struct xilinx_axidma_tx_segment, node); axidma_hw = &axidma_seg->hw; residue += (axidma_hw->control - axidma_hw->status) & chan->xdev->max_buffer_len; + } else { + aximcdma_seg = + list_entry(entry, + struct xilinx_aximcdma_tx_segment, + node); + aximcdma_hw = &aximcdma_seg->hw; + residue += + (aximcdma_hw->control - aximcdma_hw->status) & + chan->xdev->max_buffer_len; } } @@ -1135,7 +1147,7 @@ static int xilinx_dma_alloc_chan_resources(struct dma_chan *dchan) upper_32_bits(chan->seg_p + sizeof(*chan->seg_mv) * ((i + 1) % XILINX_DMA_NUM_DESCS)); chan->seg_mv[i].phys = chan->seg_p + - sizeof(*chan->seg_v) * i; + sizeof(*chan->seg_mv) * i; list_add_tail(&chan->seg_mv[i].node, &chan->free_seg_list); } @@ -1560,7 +1572,7 @@ static void xilinx_dma_start_transfer(struct xilinx_dma_chan *chan) static void xilinx_mcdma_start_transfer(struct xilinx_dma_chan *chan) { struct xilinx_dma_tx_descriptor *head_desc, *tail_desc; - struct xilinx_axidma_tx_segment *tail_segment; + struct xilinx_aximcdma_tx_segment *tail_segment; u32 reg; /* @@ -1582,7 +1594,7 @@ static void xilinx_mcdma_start_transfer(struct xilinx_dma_chan *chan) tail_desc = list_last_entry(&chan->pending_list, struct xilinx_dma_tx_descriptor, node); tail_segment = list_last_entry(&tail_desc->segments, - struct xilinx_axidma_tx_segment, node); + struct xilinx_aximcdma_tx_segment, node); reg = dma_ctrl_read(chan, XILINX_MCDMA_CHAN_CR_OFFSET(chan->tdest)); @@ -1864,6 +1876,7 @@ static void append_desc_queue(struct xilinx_dma_chan *chan, struct xilinx_vdma_tx_segment *tail_segment; struct xilinx_dma_tx_descriptor *tail_desc; struct xilinx_axidma_tx_segment *axidma_tail_segment; + struct xilinx_aximcdma_tx_segment *aximcdma_tail_segment; struct xilinx_cdma_tx_segment *cdma_tail_segment; if (list_empty(&chan->pending_list)) @@ -1885,11 +1898,17 @@ static void append_desc_queue(struct xilinx_dma_chan *chan, struct xilinx_cdma_tx_segment, node); cdma_tail_segment->hw.next_desc = (u32)desc->async_tx.phys; - } else { + } else if (chan->xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) { axidma_tail_segment = list_last_entry(&tail_desc->segments, struct xilinx_axidma_tx_segment, node); axidma_tail_segment->hw.next_desc = (u32)desc->async_tx.phys; + } else { + aximcdma_tail_segment = + list_last_entry(&tail_desc->segments, + struct xilinx_aximcdma_tx_segment, + node); + aximcdma_tail_segment->hw.next_desc = (u32)desc->async_tx.phys; } /* From 96d5d884f78306206d745d856aad322becd100c3 Mon Sep 17 00:00:00 2001 From: Matthew Murrian Date: Wed, 4 Nov 2020 12:30:06 +0530 Subject: [PATCH 236/710] dmaengine: xilinx_dma: Fix SG capability check for MCDMA The SG capability is inherently present with Multichannel DMA operation. The register used to check for this capability with other DMA driver types is not defined for MCDMA. Fixes: 6ccd692bfb7f ("dmaengine: xilinx_dma: Add Xilinx AXI MCDMA Engine driver support") Signed-off-by: Matthew Murrian Signed-off-by: Radhey Shyam Pandey Link: https://lore.kernel.org/r/1604473206-32573-4-git-send-email-radhey.shyam.pandey@xilinx.com Signed-off-by: Vinod Koul --- drivers/dma/xilinx/xilinx_dma.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c index 9fd924cf2337..22faea653ea8 100644 --- a/drivers/dma/xilinx/xilinx_dma.c +++ b/drivers/dma/xilinx/xilinx_dma.c @@ -2855,10 +2855,11 @@ static int xilinx_dma_chan_probe(struct xilinx_dma_device *xdev, chan->stop_transfer = xilinx_dma_stop_transfer; } - /* check if SG is enabled (only for AXIDMA and CDMA) */ + /* check if SG is enabled (only for AXIDMA, AXIMCDMA, and CDMA) */ if (xdev->dma_config->dmatype != XDMA_TYPE_VDMA) { - if (dma_ctrl_read(chan, XILINX_DMA_REG_DMASR) & - XILINX_DMA_DMASR_SG_MASK) + if (xdev->dma_config->dmatype == XDMA_TYPE_AXIMCDMA || + dma_ctrl_read(chan, XILINX_DMA_REG_DMASR) & + XILINX_DMA_DMASR_SG_MASK) chan->has_sg = true; dev_dbg(chan->dev, "ch %d: SG %s\n", chan->id, chan->has_sg ? "enabled" : "disabled"); From 52d9edbe6efc5042cf57fae6a25d07572ddf398b Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Thu, 8 Oct 2020 21:35:38 +0200 Subject: [PATCH 237/710] ARM: dts: stm32: Fix TA3-GPIO-C key on STM32MP1 DHCOM PDK2 On the prototype DHCOM, the TA3-GPIO-C button was connected to pin PI11 of the STM32MP15xx, however on the production SoM this was changed to pin PG0 to free up the IRQ line 11 for LAN8710i PHY IRQ. Update the connection in the DT. Since the IRQ line 0 is used for PMIC as well and cannot be shared with the button, make the button polled. Fixes: 87cabf9405cb ("ARM: dts: stm32: Add GPIO keys for STM32MP1 DHCOM PDK2") Signed-off-by: Marek Vasut Cc: Alexandre Torgue Cc: Maxime Coquelin Cc: Patrice Chotard Cc: Patrick Delaunay Cc: linux-stm32@st-md-mailman.stormreply.com To: linux-arm-kernel@lists.infradead.org Signed-off-by: Alexandre Torgue --- arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi b/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi index 5dff24e39af8..9a0a59678097 100644 --- a/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi +++ b/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi @@ -46,6 +46,16 @@ linux,code = ; gpios = <&gpiof 3 GPIO_ACTIVE_LOW>; }; + + /* + * The EXTi IRQ line 0 is shared with PMIC, + * so mark this as polled GPIO key. + */ + button-2 { + label = "TA3-GPIO-C"; + linux,code = ; + gpios = <&gpiog 0 GPIO_ACTIVE_LOW>; + }; }; gpio-keys { @@ -59,13 +69,6 @@ wakeup-source; }; - button-2 { - label = "TA3-GPIO-C"; - linux,code = ; - gpios = <&gpioi 11 GPIO_ACTIVE_LOW>; - wakeup-source; - }; - button-3 { label = "TA4-GPIO-D"; linux,code = ; From 7e5f3155dcbb4d724386b30cc232002d9b9d81f5 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Wed, 28 Oct 2020 21:46:17 +0100 Subject: [PATCH 238/710] ARM: dts: stm32: Fix LED5 on STM32MP1 DHCOM PDK2 On the prototype DHCOM, the LED5 was connected to pin PG2 of the STM32MP15xx, however on the production SoM this was changed to pin PC6. Update the connection in the DT. Fixes: 81d5fc719798 ("ARM: dts: stm32: Add GPIO LEDs for STM32MP1 DHCOM PDK2") Signed-off-by: Marek Vasut Cc: Alexandre Torgue Cc: Maxime Coquelin Cc: Patrice Chotard Cc: Patrick Delaunay Cc: linux-stm32@st-md-mailman.stormreply.com To: linux-arm-kernel@lists.infradead.org Signed-off-by: Alexandre Torgue --- arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi b/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi index 9a0a59678097..8456f172d4b1 100644 --- a/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi +++ b/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi @@ -82,7 +82,7 @@ led-0 { label = "green:led5"; - gpios = <&gpiog 2 GPIO_ACTIVE_HIGH>; + gpios = <&gpioc 6 GPIO_ACTIVE_HIGH>; default-state = "off"; }; From 1f3d7fc279b1a299bb8b1b225d80309a2062ab8a Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Thu, 29 Oct 2020 20:46:17 +0100 Subject: [PATCH 239/710] ARM: dts: stm32: Define VIO regulator supply on DHCOM The VIO regulator is supplied by PMIC Buck3, describe this in the DT. Fixes: 34e0c7847dcf ("ARM: dts: stm32: Add DH Electronics DHCOM STM32MP1 SoM and PDK2 board") Signed-off-by: Marek Vasut Cc: Alexandre Torgue Cc: Maxime Coquelin Cc: Patrice Chotard Cc: Patrick Delaunay Cc: linux-stm32@st-md-mailman.stormreply.com To: linux-arm-kernel@lists.infradead.org Signed-off-by: Alexandre Torgue --- arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi b/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi index b4b52cf634af..c89ebfaa115d 100644 --- a/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi +++ b/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi @@ -68,6 +68,7 @@ gpio = <&gpiog 3 GPIO_ACTIVE_LOW>; regulator-always-on; regulator-boot-on; + vin-supply = <&vdd>; }; }; From 1e106aa3509b86738769775969822ffc1ec21bf4 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 6 Nov 2020 11:52:05 +0300 Subject: [PATCH 240/710] futex: Don't enable IRQs unconditionally in put_pi_state() The exit_pi_state_list() function calls put_pi_state() with IRQs disabled and is not expecting that IRQs will be enabled inside the function. Use the _irqsave() variant so that IRQs are restored to the original state instead of being enabled unconditionally. Fixes: 153fbd1226fb ("futex: Fix more put_pi_state() vs. exit_pi_state_list() races") Signed-off-by: Dan Carpenter Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20201106085205.GA1159983@mwanda --- kernel/futex.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/futex.c b/kernel/futex.c index ac328874f6e5..00259c7e288e 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -788,8 +788,9 @@ static void put_pi_state(struct futex_pi_state *pi_state) */ if (pi_state->owner) { struct task_struct *owner; + unsigned long flags; - raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); + raw_spin_lock_irqsave(&pi_state->pi_mutex.wait_lock, flags); owner = pi_state->owner; if (owner) { raw_spin_lock(&owner->pi_lock); @@ -797,7 +798,7 @@ static void put_pi_state(struct futex_pi_state *pi_state) raw_spin_unlock(&owner->pi_lock); } rt_mutex_proxy_unlock(&pi_state->pi_mutex, owner); - raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); + raw_spin_unlock_irqrestore(&pi_state->pi_mutex.wait_lock, flags); } if (current->pi_state_cache) { From e5ace7f62695656ef8a66ad5a4c3edd055894876 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Thu, 24 Sep 2020 01:25:35 +0200 Subject: [PATCH 241/710] ARM: dts: stm32: Enable thermal sensor support on stm32mp15xx-dhcor Enable STM32 Digital Thermal Sensor driver for stm32mp15xx-dhcor SoMs. Fixes: 94cafe1b6482 ("ARM: dts: stm32: Add Avenger96 devicetree support based on STM32MP157A") Signed-off-by: Marek Vasut Cc: Alexandre Torgue Cc: Manivannan Sadhasivam Cc: linux-stm32@st-md-mailman.stormreply.com To: linux-arm-kernel@lists.infradead.org Reviewed-by: Manivannan Sadhasivam Signed-off-by: Alexandre Torgue --- arch/arm/boot/dts/stm32mp15xx-dhcor-som.dtsi | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm/boot/dts/stm32mp15xx-dhcor-som.dtsi b/arch/arm/boot/dts/stm32mp15xx-dhcor-som.dtsi index 04fbb324a541..803eb8bc9c85 100644 --- a/arch/arm/boot/dts/stm32mp15xx-dhcor-som.dtsi +++ b/arch/arm/boot/dts/stm32mp15xx-dhcor-som.dtsi @@ -21,6 +21,10 @@ }; }; +&dts { + status = "okay"; +}; + &i2c4 { pinctrl-names = "default"; pinctrl-0 = <&i2c4_pins_a>; From f4c7fa39415da6db1fa0bc26162ac23a0fbae8bb Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Thu, 29 Oct 2020 20:46:52 +0100 Subject: [PATCH 242/710] ARM: dts: stm32: Keep VDDA LDO1 always on on DHCOM The VDDA LDO1 PMIC output supplies the analog VDDA input of the STM32MP1 on DHCOM, keep it always on, otherwise there could be leakage through the SoC. Fixes: 34e0c7847dcf ("ARM: dts: stm32: Add DH Electronics DHCOM STM32MP1 SoM and PDK2 board") Signed-off-by: Marek Vasut Cc: Alexandre Torgue Cc: Maxime Coquelin Cc: Patrice Chotard Cc: Patrick Delaunay Cc: linux-stm32@st-md-mailman.stormreply.com To: linux-arm-kernel@lists.infradead.org Signed-off-by: Alexandre Torgue --- arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi b/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi index c89ebfaa115d..f796a6150313 100644 --- a/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi +++ b/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi @@ -203,6 +203,7 @@ vdda: ldo1 { regulator-name = "vdda"; + regulator-always-on; regulator-min-microvolt = <2900000>; regulator-max-microvolt = <2900000>; interrupts = ; From 65cae18882f943215d0505ddc7e70495877308e6 Mon Sep 17 00:00:00 2001 From: Brian Masney Date: Fri, 6 Nov 2020 20:11:19 -0500 Subject: [PATCH 243/710] x86/xen: don't unbind uninitialized lock_kicker_irq When booting a hyperthreaded system with the kernel parameter 'mitigations=auto,nosmt', the following warning occurs: WARNING: CPU: 0 PID: 1 at drivers/xen/events/events_base.c:1112 unbind_from_irqhandler+0x4e/0x60 ... Hardware name: Xen HVM domU, BIOS 4.2.amazon 08/24/2006 ... Call Trace: xen_uninit_lock_cpu+0x28/0x62 xen_hvm_cpu_die+0x21/0x30 takedown_cpu+0x9c/0xe0 ? trace_suspend_resume+0x60/0x60 cpuhp_invoke_callback+0x9a/0x530 _cpu_up+0x11a/0x130 cpu_up+0x7e/0xc0 bringup_nonboot_cpus+0x48/0x50 smp_init+0x26/0x79 kernel_init_freeable+0xea/0x229 ? rest_init+0xaa/0xaa kernel_init+0xa/0x106 ret_from_fork+0x35/0x40 The secondary CPUs are not activated with the nosmt mitigations and only the primary thread on each CPU core is used. In this situation, xen_hvm_smp_prepare_cpus(), and more importantly xen_init_lock_cpu(), is not called, so the lock_kicker_irq is not initialized for the secondary CPUs. Let's fix this by exiting early in xen_uninit_lock_cpu() if the irq is not set to avoid the warning from above for each secondary CPU. Signed-off-by: Brian Masney Link: https://lore.kernel.org/r/20201107011119.631442-1-bmasney@redhat.com Reviewed-by: Juergen Gross Signed-off-by: Boris Ostrovsky --- arch/x86/xen/spinlock.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index 799f4eba0a62..043c73dfd2c9 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -93,10 +93,20 @@ void xen_init_lock_cpu(int cpu) void xen_uninit_lock_cpu(int cpu) { + int irq; + if (!xen_pvspin) return; - unbind_from_irqhandler(per_cpu(lock_kicker_irq, cpu), NULL); + /* + * When booting the kernel with 'mitigations=auto,nosmt', the secondary + * CPUs are not activated, and lock_kicker_irq is not initialized. + */ + irq = per_cpu(lock_kicker_irq, cpu); + if (irq == -1) + return; + + unbind_from_irqhandler(irq, NULL); per_cpu(lock_kicker_irq, cpu) = -1; kfree(per_cpu(irq_name, cpu)); per_cpu(irq_name, cpu) = NULL; From 7372e79c9eb9d7034e498721eb2861ae4fdbc618 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sun, 8 Nov 2020 12:59:06 +0200 Subject: [PATCH 244/710] fanotify: fix logic of reporting name info with watched parent The victim inode's parent and name info is required when an event needs to be delivered to a group interested in filename info OR when the inode's parent is interested in an event on its children. Let us call the first condition 'parent_needed' and the second condition 'parent_interested'. In fsnotify_parent(), the condition where the inode's parent is interested in some events on its children, but not necessarily interested the specific event is called 'parent_watched'. fsnotify_parent() tests the condition (!parent_watched && !parent_needed) for sending the event without parent and name info, which is correct. It then wrongly assumes that parent_watched implies !parent_needed and tests the condition (parent_watched && !parent_interested) for sending the event without parent and name info, which is wrong, because parent may still be needed by some group. For example, after initializing a group with FAN_REPORT_DFID_NAME and adding a FAN_MARK_MOUNT with FAN_OPEN mask, open events on non-directory children of "testdir" are delivered with file name info. After adding another mark to the same group on the parent "testdir" with FAN_CLOSE|FAN_EVENT_ON_CHILD mask, open events on non-directory children of "testdir" are no longer delivered with file name info. Fix the logic and use auxiliary variables to clarify the conditions. Fixes: 9b93f33105f5 ("fsnotify: send event with parent/name info to sb/mount/non-dir marks") Cc: stable@vger.kernel.org#v5.9 Link: https://lore.kernel.org/r/20201108105906.8493-1-amir73il@gmail.com Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- fs/notify/fsnotify.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index a960ec3a569a..8d3ad5ef2925 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -178,6 +178,7 @@ int __fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data, struct inode *inode = d_inode(dentry); struct dentry *parent; bool parent_watched = dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED; + bool parent_needed, parent_interested; __u32 p_mask; struct inode *p_inode = NULL; struct name_snapshot name; @@ -193,7 +194,8 @@ int __fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data, return 0; parent = NULL; - if (!parent_watched && !fsnotify_event_needs_parent(inode, mnt, mask)) + parent_needed = fsnotify_event_needs_parent(inode, mnt, mask); + if (!parent_watched && !parent_needed) goto notify; /* Does parent inode care about events on children? */ @@ -205,17 +207,17 @@ int __fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data, /* * Include parent/name in notification either if some notification - * groups require parent info (!parent_watched case) or the parent is - * interested in this event. + * groups require parent info or the parent is interested in this event. */ - if (!parent_watched || (mask & p_mask & ALL_FSNOTIFY_EVENTS)) { + parent_interested = mask & p_mask & ALL_FSNOTIFY_EVENTS; + if (parent_needed || parent_interested) { /* When notifying parent, child should be passed as data */ WARN_ON_ONCE(inode != fsnotify_data_inode(data, data_type)); /* Notify both parent and child with child name info */ take_dentry_name_snapshot(&name, dentry); file_name = &name.name; - if (parent_watched) + if (parent_interested) mask |= FS_EVENT_ON_CHILD; } From 06ad8d339524bf94b89859047822c31df6ace239 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Thu, 5 Nov 2020 20:02:56 +0100 Subject: [PATCH 245/710] drm/gma500: Fix out-of-bounds access to struct drm_device.vblank[] The gma500 driver expects 3 pipelines in several it's IRQ functions. Accessing struct drm_device.vblank[], this fails with devices that only have 2 pipelines. An example KASAN report is shown below. [ 62.267688] ================================================================== [ 62.268856] BUG: KASAN: slab-out-of-bounds in psb_irq_postinstall+0x250/0x3c0 [gma500_gfx] [ 62.269450] Read of size 1 at addr ffff8880012bc6d0 by task systemd-udevd/285 [ 62.269949] [ 62.270192] CPU: 0 PID: 285 Comm: systemd-udevd Tainted: G E 5.10.0-rc1-1-default+ #572 [ 62.270807] Hardware name: /DN2800MT, BIOS MTCDT10N.86A.0164.2012.1213.1024 12/13/2012 [ 62.271366] Call Trace: [ 62.271705] dump_stack+0xae/0xe5 [ 62.272180] print_address_description.constprop.0+0x17/0xf0 [ 62.272987] ? psb_irq_postinstall+0x250/0x3c0 [gma500_gfx] [ 62.273474] __kasan_report.cold+0x20/0x38 [ 62.273989] ? psb_irq_postinstall+0x250/0x3c0 [gma500_gfx] [ 62.274460] kasan_report+0x3a/0x50 [ 62.274891] psb_irq_postinstall+0x250/0x3c0 [gma500_gfx] [ 62.275380] drm_irq_install+0x131/0x1f0 <...> [ 62.300751] Allocated by task 285: [ 62.301223] kasan_save_stack+0x1b/0x40 [ 62.301731] __kasan_kmalloc.constprop.0+0xbf/0xd0 [ 62.302293] drmm_kmalloc+0x55/0x100 [ 62.302773] drm_vblank_init+0x77/0x210 Resolve the issue by only handling vblank entries up to the number of CRTCs. I'm adding a Fixes tag for reference, although the bug has been present since the driver's initial commit. Signed-off-by: Thomas Zimmermann Reviewed-by: Daniel Vetter Fixes: 5c49fd3aa0ab ("gma500: Add the core DRM files and headers") Cc: Alan Cox Cc: Dave Airlie Cc: Patrik Jakobsson Cc: dri-devel@lists.freedesktop.org Cc: stable@vger.kernel.org#v3.3+ Link: https://patchwork.freedesktop.org/patch/msgid/20201105190256.3893-1-tzimmermann@suse.de --- drivers/gpu/drm/gma500/psb_irq.c | 34 +++++++++++--------------------- 1 file changed, 12 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/gma500/psb_irq.c b/drivers/gpu/drm/gma500/psb_irq.c index 15eb3770d817..361e3a0c5ab6 100644 --- a/drivers/gpu/drm/gma500/psb_irq.c +++ b/drivers/gpu/drm/gma500/psb_irq.c @@ -347,6 +347,7 @@ int psb_irq_postinstall(struct drm_device *dev) { struct drm_psb_private *dev_priv = dev->dev_private; unsigned long irqflags; + unsigned int i; spin_lock_irqsave(&dev_priv->irqmask_lock, irqflags); @@ -359,20 +360,12 @@ int psb_irq_postinstall(struct drm_device *dev) PSB_WVDC32(dev_priv->vdc_irq_mask, PSB_INT_ENABLE_R); PSB_WVDC32(0xFFFFFFFF, PSB_HWSTAM); - if (dev->vblank[0].enabled) - psb_enable_pipestat(dev_priv, 0, PIPE_VBLANK_INTERRUPT_ENABLE); - else - psb_disable_pipestat(dev_priv, 0, PIPE_VBLANK_INTERRUPT_ENABLE); - - if (dev->vblank[1].enabled) - psb_enable_pipestat(dev_priv, 1, PIPE_VBLANK_INTERRUPT_ENABLE); - else - psb_disable_pipestat(dev_priv, 1, PIPE_VBLANK_INTERRUPT_ENABLE); - - if (dev->vblank[2].enabled) - psb_enable_pipestat(dev_priv, 2, PIPE_VBLANK_INTERRUPT_ENABLE); - else - psb_disable_pipestat(dev_priv, 2, PIPE_VBLANK_INTERRUPT_ENABLE); + for (i = 0; i < dev->num_crtcs; ++i) { + if (dev->vblank[i].enabled) + psb_enable_pipestat(dev_priv, i, PIPE_VBLANK_INTERRUPT_ENABLE); + else + psb_disable_pipestat(dev_priv, i, PIPE_VBLANK_INTERRUPT_ENABLE); + } if (dev_priv->ops->hotplug_enable) dev_priv->ops->hotplug_enable(dev, true); @@ -385,6 +378,7 @@ void psb_irq_uninstall(struct drm_device *dev) { struct drm_psb_private *dev_priv = dev->dev_private; unsigned long irqflags; + unsigned int i; spin_lock_irqsave(&dev_priv->irqmask_lock, irqflags); @@ -393,14 +387,10 @@ void psb_irq_uninstall(struct drm_device *dev) PSB_WVDC32(0xFFFFFFFF, PSB_HWSTAM); - if (dev->vblank[0].enabled) - psb_disable_pipestat(dev_priv, 0, PIPE_VBLANK_INTERRUPT_ENABLE); - - if (dev->vblank[1].enabled) - psb_disable_pipestat(dev_priv, 1, PIPE_VBLANK_INTERRUPT_ENABLE); - - if (dev->vblank[2].enabled) - psb_disable_pipestat(dev_priv, 2, PIPE_VBLANK_INTERRUPT_ENABLE); + for (i = 0; i < dev->num_crtcs; ++i) { + if (dev->vblank[i].enabled) + psb_disable_pipestat(dev_priv, i, PIPE_VBLANK_INTERRUPT_ENABLE); + } dev_priv->vdc_irq_mask &= _PSB_IRQ_SGX_FLAG | _PSB_IRQ_MSVDX_FLAG | From 6d6a18fdde8b86b919b740ad629153de432d12a8 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 9 Nov 2020 09:45:17 -0500 Subject: [PATCH 246/710] KVM: selftests: allow two iterations of dirty_log_perf_test Even though one iteration is not enough for the dirty log performance test (due to the cost of building page tables, zeroing memory etc.) two is okay and it is the default. Without this patch, "./dirty_log_perf_test" without any further arguments fails. Cc: Ben Gardon Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/dirty_log_perf_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index cda7b000b1bc..85c9b8f73142 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -359,7 +359,7 @@ int main(int argc, char *argv[]) } } - TEST_ASSERT(iterations > 2, "Iterations must be greater than two"); + TEST_ASSERT(iterations >= 2, "The test should have at least two iterations"); pr_info("Test iterations: %"PRIu64"\n", iterations); From 65c5a055b0d567b7e7639d942c0605da9cc54c5e Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 9 Nov 2020 02:57:34 -0800 Subject: [PATCH 247/710] nvme: fix incorrect behavior when BLKROSET is called by the user The offending commit breaks BLKROSET ioctl because a device revalidation will blindly override BLKROSET setting. Hence, we remove the disk rw setting in case NVME_NS_ATTR_RO is cleared from by the controller. Fixes: 1293477f4f32 ("nvme: set gendisk read only based on nsattr") Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 40ca71b29bb9..9b01afcb7777 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2060,8 +2060,6 @@ static void nvme_update_disk_info(struct gendisk *disk, if (id->nsattr & NVME_NS_ATTR_RO) set_disk_ro(disk, true); - else - set_disk_ro(disk, false); } static inline bool nvme_first_scan(struct gendisk *disk) From 9d516aa82b7d4fbe7f6303348697960ba03a530b Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Wed, 4 Nov 2020 15:31:36 +0000 Subject: [PATCH 248/710] virtio: virtio_console: fix DMA memory allocation for rproc serial Since commit 086d08725d34 ("remoteproc: create vdev subdevice with specific dma memory pool"), every remoteproc has a DMA subdevice ("remoteprocX#vdevYbuffer") for each virtio device, which inherits DMA capabilities from the corresponding platform device. This allowed to associate different DMA pools with each vdev, and required from virtio drivers to perform DMA operations with the parent device (vdev->dev.parent) instead of grandparent (vdev->dev.parent->parent). virtio_rpmsg_bus was already changed in the same merge cycle with commit d999b622fcfb ("rpmsg: virtio: allocate buffer from parent"), but virtio_console did not. In fact, operations using the grandparent worked fine while the grandparent was the platform device, but since commit c774ad010873 ("remoteproc: Fix and restore the parenting hierarchy for vdev") this was changed, and now the grandparent device is the remoteproc device without any DMA capabilities. So, starting v5.8-rc1 the following warning is observed: [ 2.483925] ------------[ cut here ]------------ [ 2.489148] WARNING: CPU: 3 PID: 101 at kernel/dma/mapping.c:427 0x80e7eee8 [ 2.489152] Modules linked in: virtio_console(+) [ 2.503737] virtio_rpmsg_bus rpmsg_core [ 2.508903] [ 2.528898] [ 2.913043] [ 2.914907] ---[ end trace 93ac8746beab612c ]--- [ 2.920102] virtio-ports vport1p0: Error allocating inbufs kernel/dma/mapping.c:427 is: WARN_ON_ONCE(!dev->coherent_dma_mask); obviously because the grandparent now is remoteproc dev without any DMA caps: [ 3.104943] Parent: remoteproc0#vdev1buffer, grandparent: remoteproc0 Fix this the same way as it was for virtio_rpmsg_bus, using just the parent device (vdev->dev.parent, "remoteprocX#vdevYbuffer") for DMA operations. This also allows now to reserve DMA pools/buffers for rproc serial via Device Tree. Fixes: c774ad010873 ("remoteproc: Fix and restore the parenting hierarchy for vdev") Cc: stable@vger.kernel.org # 5.1+ Reviewed-by: Mathieu Poirier Acked-by: Jason Wang Signed-off-by: Alexander Lobakin Date: Thu, 5 Nov 2020 11:10:24 +0800 Link: https://lore.kernel.org/r/AOKowLclCbOCKxyiJ71WeNyuAAj2q8EUtxrXbyky5E@cp7-web-042.plabs.ch Signed-off-by: Greg Kroah-Hartman --- drivers/char/virtio_console.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index a2da8f768b94..1836cc56e357 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -435,12 +435,12 @@ static struct port_buffer *alloc_buf(struct virtio_device *vdev, size_t buf_size /* * Allocate DMA memory from ancestor. When a virtio * device is created by remoteproc, the DMA memory is - * associated with the grandparent device: - * vdev => rproc => platform-dev. + * associated with the parent device: + * virtioY => remoteprocX#vdevYbuffer. */ - if (!vdev->dev.parent || !vdev->dev.parent->parent) + buf->dev = vdev->dev.parent; + if (!buf->dev) goto free_buf; - buf->dev = vdev->dev.parent->parent; /* Increase device refcnt to avoid freeing it */ get_device(buf->dev); From 267fb27352b6fc9fdbad753127a239f75618ecbc Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 30 Oct 2020 15:50:32 +0100 Subject: [PATCH 249/710] perf: Reduce stack usage of perf_output_begin() __perf_output_begin() has an on-stack struct perf_sample_data in the unlikely case it needs to generate a LOST record. However, every call to perf_output_begin() must already have a perf_sample_data on-stack. Reported-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20201030151954.985416146@infradead.org --- arch/powerpc/perf/imc-pmu.c | 2 +- arch/s390/kernel/perf_cpum_sf.c | 2 +- arch/x86/events/intel/ds.c | 4 ++-- include/linux/perf_event.h | 7 +++++-- kernel/events/core.c | 32 +++++++++++++++++--------------- kernel/events/ring_buffer.c | 20 +++++++++++--------- 6 files changed, 37 insertions(+), 30 deletions(-) diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index 9ed4fcccf8a9..7b25548ec42b 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -1336,7 +1336,7 @@ static void dump_trace_imc_data(struct perf_event *event) /* If this is a valid record, create the sample */ struct perf_output_handle handle; - if (perf_output_begin(&handle, event, header.size)) + if (perf_output_begin(&handle, &data, event, header.size)) return; perf_output_sample(&handle, &header, &data, event); diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 4f9e4626df55..00255ae3979d 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -672,7 +672,7 @@ static void cpumsf_output_event_pid(struct perf_event *event, rcu_read_lock(); perf_prepare_sample(&header, data, event, regs); - if (perf_output_begin(&handle, event, header.size)) + if (perf_output_begin(&handle, data, event, header.size)) goto out; /* Update the process ID (see also kernel/events/core.c) */ diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 404315df1e16..cd2ae14a0a98 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -642,8 +642,8 @@ int intel_pmu_drain_bts_buffer(void) rcu_read_lock(); perf_prepare_sample(&header, &data, event, ®s); - if (perf_output_begin(&handle, event, header.size * - (top - base - skip))) + if (perf_output_begin(&handle, &data, event, + header.size * (top - base - skip))) goto unlock; for (at = base; at < top; at++) { diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 0c19d279b97f..b775ae0a8c87 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1400,11 +1400,14 @@ perf_event_addr_filters(struct perf_event *event) extern void perf_event_addr_filters_sync(struct perf_event *event); extern int perf_output_begin(struct perf_output_handle *handle, + struct perf_sample_data *data, struct perf_event *event, unsigned int size); extern int perf_output_begin_forward(struct perf_output_handle *handle, - struct perf_event *event, - unsigned int size); + struct perf_sample_data *data, + struct perf_event *event, + unsigned int size); extern int perf_output_begin_backward(struct perf_output_handle *handle, + struct perf_sample_data *data, struct perf_event *event, unsigned int size); diff --git a/kernel/events/core.c b/kernel/events/core.c index 5a29ab09e72d..fc681c7c1e03 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7186,6 +7186,7 @@ __perf_event_output(struct perf_event *event, struct perf_sample_data *data, struct pt_regs *regs, int (*output_begin)(struct perf_output_handle *, + struct perf_sample_data *, struct perf_event *, unsigned int)) { @@ -7198,7 +7199,7 @@ __perf_event_output(struct perf_event *event, perf_prepare_sample(&header, data, event, regs); - err = output_begin(&handle, event, header.size); + err = output_begin(&handle, data, event, header.size); if (err) goto exit; @@ -7264,7 +7265,7 @@ perf_event_read_event(struct perf_event *event, int ret; perf_event_header__init_id(&read_event.header, &sample, event); - ret = perf_output_begin(&handle, event, read_event.header.size); + ret = perf_output_begin(&handle, &sample, event, read_event.header.size); if (ret) return; @@ -7533,7 +7534,7 @@ static void perf_event_task_output(struct perf_event *event, perf_event_header__init_id(&task_event->event_id.header, &sample, event); - ret = perf_output_begin(&handle, event, + ret = perf_output_begin(&handle, &sample, event, task_event->event_id.header.size); if (ret) goto out; @@ -7636,7 +7637,7 @@ static void perf_event_comm_output(struct perf_event *event, return; perf_event_header__init_id(&comm_event->event_id.header, &sample, event); - ret = perf_output_begin(&handle, event, + ret = perf_output_begin(&handle, &sample, event, comm_event->event_id.header.size); if (ret) @@ -7736,7 +7737,7 @@ static void perf_event_namespaces_output(struct perf_event *event, perf_event_header__init_id(&namespaces_event->event_id.header, &sample, event); - ret = perf_output_begin(&handle, event, + ret = perf_output_begin(&handle, &sample, event, namespaces_event->event_id.header.size); if (ret) goto out; @@ -7863,7 +7864,7 @@ static void perf_event_cgroup_output(struct perf_event *event, void *data) perf_event_header__init_id(&cgroup_event->event_id.header, &sample, event); - ret = perf_output_begin(&handle, event, + ret = perf_output_begin(&handle, &sample, event, cgroup_event->event_id.header.size); if (ret) goto out; @@ -7989,7 +7990,7 @@ static void perf_event_mmap_output(struct perf_event *event, } perf_event_header__init_id(&mmap_event->event_id.header, &sample, event); - ret = perf_output_begin(&handle, event, + ret = perf_output_begin(&handle, &sample, event, mmap_event->event_id.header.size); if (ret) goto out; @@ -8299,7 +8300,7 @@ void perf_event_aux_event(struct perf_event *event, unsigned long head, int ret; perf_event_header__init_id(&rec.header, &sample, event); - ret = perf_output_begin(&handle, event, rec.header.size); + ret = perf_output_begin(&handle, &sample, event, rec.header.size); if (ret) return; @@ -8333,7 +8334,7 @@ void perf_log_lost_samples(struct perf_event *event, u64 lost) perf_event_header__init_id(&lost_samples_event.header, &sample, event); - ret = perf_output_begin(&handle, event, + ret = perf_output_begin(&handle, &sample, event, lost_samples_event.header.size); if (ret) return; @@ -8388,7 +8389,7 @@ static void perf_event_switch_output(struct perf_event *event, void *data) perf_event_header__init_id(&se->event_id.header, &sample, event); - ret = perf_output_begin(&handle, event, se->event_id.header.size); + ret = perf_output_begin(&handle, &sample, event, se->event_id.header.size); if (ret) return; @@ -8463,7 +8464,7 @@ static void perf_log_throttle(struct perf_event *event, int enable) perf_event_header__init_id(&throttle_event.header, &sample, event); - ret = perf_output_begin(&handle, event, + ret = perf_output_begin(&handle, &sample, event, throttle_event.header.size); if (ret) return; @@ -8506,7 +8507,7 @@ static void perf_event_ksymbol_output(struct perf_event *event, void *data) perf_event_header__init_id(&ksymbol_event->event_id.header, &sample, event); - ret = perf_output_begin(&handle, event, + ret = perf_output_begin(&handle, &sample, event, ksymbol_event->event_id.header.size); if (ret) return; @@ -8596,7 +8597,7 @@ static void perf_event_bpf_output(struct perf_event *event, void *data) perf_event_header__init_id(&bpf_event->event_id.header, &sample, event); - ret = perf_output_begin(&handle, event, + ret = perf_output_begin(&handle, data, event, bpf_event->event_id.header.size); if (ret) return; @@ -8705,7 +8706,8 @@ static void perf_event_text_poke_output(struct perf_event *event, void *data) perf_event_header__init_id(&text_poke_event->event_id.header, &sample, event); - ret = perf_output_begin(&handle, event, text_poke_event->event_id.header.size); + ret = perf_output_begin(&handle, &sample, event, + text_poke_event->event_id.header.size); if (ret) return; @@ -8786,7 +8788,7 @@ static void perf_log_itrace_start(struct perf_event *event) rec.tid = perf_event_tid(event, current); perf_event_header__init_id(&rec.header, &sample, event); - ret = perf_output_begin(&handle, event, rec.header.size); + ret = perf_output_begin(&handle, &sample, event, rec.header.size); if (ret) return; diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 192b8abc6330..ef91ae75ca56 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -147,6 +147,7 @@ ring_buffer_has_space(unsigned long head, unsigned long tail, static __always_inline int __perf_output_begin(struct perf_output_handle *handle, + struct perf_sample_data *data, struct perf_event *event, unsigned int size, bool backward) { @@ -237,18 +238,16 @@ __perf_output_begin(struct perf_output_handle *handle, handle->size = (1UL << page_shift) - offset; if (unlikely(have_lost)) { - struct perf_sample_data sample_data; - lost_event.header.size = sizeof(lost_event); lost_event.header.type = PERF_RECORD_LOST; lost_event.header.misc = 0; lost_event.id = event->id; lost_event.lost = local_xchg(&rb->lost, 0); - perf_event_header__init_id(&lost_event.header, - &sample_data, event); + /* XXX mostly redundant; @data is already fully initializes */ + perf_event_header__init_id(&lost_event.header, data, event); perf_output_put(handle, lost_event); - perf_event__output_id_sample(event, handle, &sample_data); + perf_event__output_id_sample(event, handle, data); } return 0; @@ -263,22 +262,25 @@ out: } int perf_output_begin_forward(struct perf_output_handle *handle, - struct perf_event *event, unsigned int size) + struct perf_sample_data *data, + struct perf_event *event, unsigned int size) { - return __perf_output_begin(handle, event, size, false); + return __perf_output_begin(handle, data, event, size, false); } int perf_output_begin_backward(struct perf_output_handle *handle, + struct perf_sample_data *data, struct perf_event *event, unsigned int size) { - return __perf_output_begin(handle, event, size, true); + return __perf_output_begin(handle, data, event, size, true); } int perf_output_begin(struct perf_output_handle *handle, + struct perf_sample_data *data, struct perf_event *event, unsigned int size) { - return __perf_output_begin(handle, event, size, + return __perf_output_begin(handle, data, event, size, unlikely(is_write_backward(event))); } From 9dfa9a5c9bae3417b87824e7ac73b00c10b6a874 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 30 Oct 2020 14:58:48 +0100 Subject: [PATCH 250/710] perf/x86: Reduce stack usage for x86_pmu::drain_pebs() intel_pmu_drain_pebs_*() is typically called from handle_pmi_common(), both have an on-stack struct perf_sample_data, which is *big*. Rewire things so that drain_pebs() can use the one handle_pmi_common() has. Reported-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20201030151955.054099690@infradead.org --- arch/x86/events/intel/core.c | 2 +- arch/x86/events/intel/ds.c | 47 +++++++++++++++++++----------------- arch/x86/events/perf_event.h | 2 +- 3 files changed, 27 insertions(+), 24 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index f1926e9f2143..c37387c8212a 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2630,7 +2630,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) u64 pebs_enabled = cpuc->pebs_enabled; handled++; - x86_pmu.drain_pebs(regs); + x86_pmu.drain_pebs(regs, &data); status &= x86_pmu.intel_ctrl | GLOBAL_STATUS_TRACE_TOPAPMI; /* diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index cd2ae14a0a98..276b29da732f 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -670,7 +670,9 @@ unlock: static inline void intel_pmu_drain_pebs_buffer(void) { - x86_pmu.drain_pebs(NULL); + struct perf_sample_data data; + + x86_pmu.drain_pebs(NULL, &data); } /* @@ -1719,19 +1721,20 @@ intel_pmu_save_and_restart_reload(struct perf_event *event, int count) return 0; } -static void __intel_pmu_pebs_event(struct perf_event *event, - struct pt_regs *iregs, - void *base, void *top, - int bit, int count, - void (*setup_sample)(struct perf_event *, - struct pt_regs *, - void *, - struct perf_sample_data *, - struct pt_regs *)) +static __always_inline void +__intel_pmu_pebs_event(struct perf_event *event, + struct pt_regs *iregs, + struct perf_sample_data *data, + void *base, void *top, + int bit, int count, + void (*setup_sample)(struct perf_event *, + struct pt_regs *, + void *, + struct perf_sample_data *, + struct pt_regs *)) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; - struct perf_sample_data data; struct x86_perf_regs perf_regs; struct pt_regs *regs = &perf_regs.regs; void *at = get_next_pebs_record_by_bit(base, top, bit); @@ -1752,14 +1755,14 @@ static void __intel_pmu_pebs_event(struct perf_event *event, iregs = &dummy_iregs; while (count > 1) { - setup_sample(event, iregs, at, &data, regs); - perf_event_output(event, &data, regs); + setup_sample(event, iregs, at, data, regs); + perf_event_output(event, data, regs); at += cpuc->pebs_record_size; at = get_next_pebs_record_by_bit(at, top, bit); count--; } - setup_sample(event, iregs, at, &data, regs); + setup_sample(event, iregs, at, data, regs); if (iregs == &dummy_iregs) { /* * The PEBS records may be drained in the non-overflow context, @@ -1767,18 +1770,18 @@ static void __intel_pmu_pebs_event(struct perf_event *event, * last record the same as other PEBS records, and doesn't * invoke the generic overflow handler. */ - perf_event_output(event, &data, regs); + perf_event_output(event, data, regs); } else { /* * All but the last records are processed. * The last one is left to be able to call the overflow handler. */ - if (perf_event_overflow(event, &data, regs)) + if (perf_event_overflow(event, data, regs)) x86_pmu_stop(event, 0); } } -static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) +static void intel_pmu_drain_pebs_core(struct pt_regs *iregs, struct perf_sample_data *data) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct debug_store *ds = cpuc->ds; @@ -1812,7 +1815,7 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) return; } - __intel_pmu_pebs_event(event, iregs, at, top, 0, n, + __intel_pmu_pebs_event(event, iregs, data, at, top, 0, n, setup_pebs_fixed_sample_data); } @@ -1835,7 +1838,7 @@ static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, int } } -static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) +static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_data *data) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct debug_store *ds = cpuc->ds; @@ -1942,14 +1945,14 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) } if (counts[bit]) { - __intel_pmu_pebs_event(event, iregs, base, + __intel_pmu_pebs_event(event, iregs, data, base, top, bit, counts[bit], setup_pebs_fixed_sample_data); } } } -static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs) +static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_data *data) { short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {}; struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); @@ -1997,7 +2000,7 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs) if (WARN_ON_ONCE(!event->attr.precise_ip)) continue; - __intel_pmu_pebs_event(event, iregs, base, + __intel_pmu_pebs_event(event, iregs, data, base, top, bit, counts[bit], setup_pebs_adaptive_sample_data); } diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index ee2b9b9fc2a5..1d1fe46552ba 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -727,7 +727,7 @@ struct x86_pmu { int pebs_record_size; int pebs_buffer_size; int max_pebs_events; - void (*drain_pebs)(struct pt_regs *regs); + void (*drain_pebs)(struct pt_regs *regs, struct perf_sample_data *data); struct event_constraint *pebs_constraints; void (*pebs_aliases)(struct perf_event *event); unsigned long large_pebs_flags; From ce0f17fc93f63ee91428af10b7b2ddef38cd19e5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 30 Oct 2020 12:49:45 +0100 Subject: [PATCH 251/710] perf: Fix get_recursion_context() One should use in_serving_softirq() to detect SoftIRQ context. Fixes: 96f6d4444302 ("perf_counter: avoid recursion") Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20201030151955.120572175@infradead.org --- kernel/events/internal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/internal.h b/kernel/events/internal.h index fcbf5616a441..402054e755f2 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -211,7 +211,7 @@ static inline int get_recursion_context(int *recursion) rctx = 3; else if (in_irq()) rctx = 2; - else if (in_softirq()) + else if (in_serving_softirq()) rctx = 1; else rctx = 0; From 09da9c81253dd8e43e0d2d7cea02de6f9f19499d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 30 Oct 2020 13:43:16 +0100 Subject: [PATCH 252/710] perf: Optimize get_recursion_context() "Look ma, no branches!" Signed-off-by: Peter Zijlstra (Intel) Acked-by: Jesper Dangaard Brouer Link: https://lkml.kernel.org/r/20201030151955.187580298@infradead.org --- kernel/events/internal.h | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/kernel/events/internal.h b/kernel/events/internal.h index 402054e755f2..228801e20788 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -205,16 +205,12 @@ DEFINE_OUTPUT_COPY(__output_copy_user, arch_perf_out_copy_user) static inline int get_recursion_context(int *recursion) { - int rctx; + unsigned int pc = preempt_count(); + unsigned char rctx = 0; - if (unlikely(in_nmi())) - rctx = 3; - else if (in_irq()) - rctx = 2; - else if (in_serving_softirq()) - rctx = 1; - else - rctx = 0; + rctx += !!(pc & (NMI_MASK)); + rctx += !!(pc & (NMI_MASK | HARDIRQ_MASK)); + rctx += !!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET)); if (recursion[rctx]) return -1; From 76a4efa80900fc40e0fdf243b42aec9fb8c35d24 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 30 Oct 2020 12:14:21 +0100 Subject: [PATCH 253/710] perf/arch: Remove perf_sample_data::regs_user_copy struct perf_sample_data lives on-stack, we should be careful about it's size. Furthermore, the pt_regs copy in there is only because x86_64 is a trainwreck, solve it differently. Reported-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Tested-by: Steven Rostedt Link: https://lkml.kernel.org/r/20201030151955.258178461@infradead.org --- arch/arm/kernel/perf_regs.c | 3 +-- arch/arm64/kernel/perf_regs.c | 3 +-- arch/csky/kernel/perf_regs.c | 3 +-- arch/powerpc/perf/perf_regs.c | 3 +-- arch/riscv/kernel/perf_regs.c | 3 +-- arch/s390/kernel/perf_regs.c | 3 +-- arch/x86/kernel/perf_regs.c | 15 +++++++++++---- include/linux/perf_event.h | 6 ------ include/linux/perf_regs.h | 6 ++---- kernel/events/core.c | 8 +++----- 10 files changed, 22 insertions(+), 31 deletions(-) diff --git a/arch/arm/kernel/perf_regs.c b/arch/arm/kernel/perf_regs.c index 05fe92aa7d98..0529f90395c9 100644 --- a/arch/arm/kernel/perf_regs.c +++ b/arch/arm/kernel/perf_regs.c @@ -32,8 +32,7 @@ u64 perf_reg_abi(struct task_struct *task) } void perf_get_regs_user(struct perf_regs *regs_user, - struct pt_regs *regs, - struct pt_regs *regs_user_copy) + struct pt_regs *regs) { regs_user->regs = task_pt_regs(current); regs_user->abi = perf_reg_abi(current); diff --git a/arch/arm64/kernel/perf_regs.c b/arch/arm64/kernel/perf_regs.c index 94e8718e7229..f6f58e6265df 100644 --- a/arch/arm64/kernel/perf_regs.c +++ b/arch/arm64/kernel/perf_regs.c @@ -73,8 +73,7 @@ u64 perf_reg_abi(struct task_struct *task) } void perf_get_regs_user(struct perf_regs *regs_user, - struct pt_regs *regs, - struct pt_regs *regs_user_copy) + struct pt_regs *regs) { regs_user->regs = task_pt_regs(current); regs_user->abi = perf_reg_abi(current); diff --git a/arch/csky/kernel/perf_regs.c b/arch/csky/kernel/perf_regs.c index eb32838b8210..09b7f88a2d6a 100644 --- a/arch/csky/kernel/perf_regs.c +++ b/arch/csky/kernel/perf_regs.c @@ -32,8 +32,7 @@ u64 perf_reg_abi(struct task_struct *task) } void perf_get_regs_user(struct perf_regs *regs_user, - struct pt_regs *regs, - struct pt_regs *regs_user_copy) + struct pt_regs *regs) { regs_user->regs = task_pt_regs(current); regs_user->abi = perf_reg_abi(current); diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c index 8e53f2fc3fe0..6f681b105eec 100644 --- a/arch/powerpc/perf/perf_regs.c +++ b/arch/powerpc/perf/perf_regs.c @@ -144,8 +144,7 @@ u64 perf_reg_abi(struct task_struct *task) } void perf_get_regs_user(struct perf_regs *regs_user, - struct pt_regs *regs, - struct pt_regs *regs_user_copy) + struct pt_regs *regs) { regs_user->regs = task_pt_regs(current); regs_user->abi = (regs_user->regs) ? perf_reg_abi(current) : diff --git a/arch/riscv/kernel/perf_regs.c b/arch/riscv/kernel/perf_regs.c index 04a38fbeb9c7..fd304a248de6 100644 --- a/arch/riscv/kernel/perf_regs.c +++ b/arch/riscv/kernel/perf_regs.c @@ -36,8 +36,7 @@ u64 perf_reg_abi(struct task_struct *task) } void perf_get_regs_user(struct perf_regs *regs_user, - struct pt_regs *regs, - struct pt_regs *regs_user_copy) + struct pt_regs *regs) { regs_user->regs = task_pt_regs(current); regs_user->abi = perf_reg_abi(current); diff --git a/arch/s390/kernel/perf_regs.c b/arch/s390/kernel/perf_regs.c index 4352a504f235..6e9e5d5e927e 100644 --- a/arch/s390/kernel/perf_regs.c +++ b/arch/s390/kernel/perf_regs.c @@ -53,8 +53,7 @@ u64 perf_reg_abi(struct task_struct *task) } void perf_get_regs_user(struct perf_regs *regs_user, - struct pt_regs *regs, - struct pt_regs *regs_user_copy) + struct pt_regs *regs) { /* * Use the regs from the first interruption and let diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c index bb7e1132290b..f9e5352b3bef 100644 --- a/arch/x86/kernel/perf_regs.c +++ b/arch/x86/kernel/perf_regs.c @@ -101,8 +101,7 @@ u64 perf_reg_abi(struct task_struct *task) } void perf_get_regs_user(struct perf_regs *regs_user, - struct pt_regs *regs, - struct pt_regs *regs_user_copy) + struct pt_regs *regs) { regs_user->regs = task_pt_regs(current); regs_user->abi = perf_reg_abi(current); @@ -129,12 +128,20 @@ u64 perf_reg_abi(struct task_struct *task) return PERF_SAMPLE_REGS_ABI_64; } +static DEFINE_PER_CPU(struct pt_regs, nmi_user_regs); + void perf_get_regs_user(struct perf_regs *regs_user, - struct pt_regs *regs, - struct pt_regs *regs_user_copy) + struct pt_regs *regs) { + struct pt_regs *regs_user_copy = this_cpu_ptr(&nmi_user_regs); struct pt_regs *user_regs = task_pt_regs(current); + if (!in_nmi()) { + regs_user->regs = user_regs; + regs_user->abi = perf_reg_abi(current); + return; + } + /* * If we're in an NMI that interrupted task_pt_regs setup, then * we can't sample user regs at all. This check isn't really diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index b775ae0a8c87..96450f6fb1de 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1022,13 +1022,7 @@ struct perf_sample_data { struct perf_callchain_entry *callchain; u64 aux_size; - /* - * regs_user may point to task_pt_regs or to regs_user_copy, depending - * on arch details. - */ struct perf_regs regs_user; - struct pt_regs regs_user_copy; - struct perf_regs regs_intr; u64 stack_user_size; diff --git a/include/linux/perf_regs.h b/include/linux/perf_regs.h index 2d12e97d5e7b..f632c5725f16 100644 --- a/include/linux/perf_regs.h +++ b/include/linux/perf_regs.h @@ -20,8 +20,7 @@ u64 perf_reg_value(struct pt_regs *regs, int idx); int perf_reg_validate(u64 mask); u64 perf_reg_abi(struct task_struct *task); void perf_get_regs_user(struct perf_regs *regs_user, - struct pt_regs *regs, - struct pt_regs *regs_user_copy); + struct pt_regs *regs); #else #define PERF_REG_EXTENDED_MASK 0 @@ -42,8 +41,7 @@ static inline u64 perf_reg_abi(struct task_struct *task) } static inline void perf_get_regs_user(struct perf_regs *regs_user, - struct pt_regs *regs, - struct pt_regs *regs_user_copy) + struct pt_regs *regs) { regs_user->regs = task_pt_regs(current); regs_user->abi = perf_reg_abi(current); diff --git a/kernel/events/core.c b/kernel/events/core.c index fc681c7c1e03..d67c9cbb0f6a 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6374,14 +6374,13 @@ perf_output_sample_regs(struct perf_output_handle *handle, } static void perf_sample_regs_user(struct perf_regs *regs_user, - struct pt_regs *regs, - struct pt_regs *regs_user_copy) + struct pt_regs *regs) { if (user_mode(regs)) { regs_user->abi = perf_reg_abi(current); regs_user->regs = regs; } else if (!(current->flags & PF_KTHREAD)) { - perf_get_regs_user(regs_user, regs, regs_user_copy); + perf_get_regs_user(regs_user, regs); } else { regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE; regs_user->regs = NULL; @@ -7083,8 +7082,7 @@ void perf_prepare_sample(struct perf_event_header *header, } if (sample_type & (PERF_SAMPLE_REGS_USER | PERF_SAMPLE_STACK_USER)) - perf_sample_regs_user(&data->regs_user, regs, - &data->regs_user_copy); + perf_sample_regs_user(&data->regs_user, regs); if (sample_type & PERF_SAMPLE_REGS_USER) { /* regs dump ABI info */ From e506d1dac0edb2df82f2aa0582e814f9cd9aa07d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 30 Oct 2020 12:15:06 +0100 Subject: [PATCH 254/710] perf/x86: Make dummy_iregs static Having pt_regs on-stack is unfortunate, it's 168 bytes. Since it isn't actually used, make it a static variable. This both gets if off the stack and ensures it gets 0 initialized, just in case someone does look at it. Reported-by: Steven Rostedt Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20201030151955.324273677@infradead.org --- arch/x86/events/intel/ds.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 276b29da732f..b47cc4226934 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1738,7 +1738,7 @@ __intel_pmu_pebs_event(struct perf_event *event, struct x86_perf_regs perf_regs; struct pt_regs *regs = &perf_regs.regs; void *at = get_next_pebs_record_by_bit(base, top, bit); - struct pt_regs dummy_iregs; + static struct pt_regs dummy_iregs; if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) { /* From 8c7855d82933bab7fa5e96f0e568fc125c2e1ab4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 29 Oct 2020 16:28:25 +0100 Subject: [PATCH 255/710] perf: Simplify group_sched_out() Since event_sched_out() clears cpuctx->exclusive upon removal of an exclusive event (and only group leaders can be exclusive), there is no point in group_sched_out() trying to do it too. It is impossible for cpuctx->exclusive to still be set here. Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20201029162901.904060564@infradead.org --- kernel/events/core.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index d67c9cbb0f6a..9a5736617a82 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2312,9 +2312,6 @@ group_sched_out(struct perf_event *group_event, event_sched_out(event, cpuctx, ctx); perf_pmu_enable(ctx->pmu); - - if (group_event->attr.exclusive) - cpuctx->exclusive = 0; } #define DETACH_GROUP 0x01UL From 251ff2d49347793d348babcff745289b11910e96 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 29 Oct 2020 16:29:15 +0100 Subject: [PATCH 256/710] perf: Simplify group_sched_in() Collate the error paths. Code duplication only leads to divergence and extra bugs. Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20201029162901.972161394@infradead.org --- kernel/events/core.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 9a5736617a82..f0e526866a1c 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2580,11 +2580,8 @@ group_sched_in(struct perf_event *group_event, pmu->start_txn(pmu, PERF_PMU_TXN_ADD); - if (event_sched_in(group_event, cpuctx, ctx)) { - pmu->cancel_txn(pmu); - perf_mux_hrtimer_restart(cpuctx); - return -EAGAIN; - } + if (event_sched_in(group_event, cpuctx, ctx)) + goto error; /* * Schedule in siblings as one group (if any): @@ -2613,10 +2610,9 @@ group_error: } event_sched_out(group_event, cpuctx, ctx); +error: pmu->cancel_txn(pmu); - perf_mux_hrtimer_restart(cpuctx); - return -EAGAIN; } From 2714c3962f304d031d5016c963c4b459337b0749 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 29 Oct 2020 16:29:53 +0100 Subject: [PATCH 257/710] perf: Fix event multiplexing for exclusive groups Commit 9e6302056f80 ("perf: Use hrtimers for event multiplexing") placed the hrtimer (re)start call in the wrong place. Instead of capturing all scheduling failures, it only considered the PMU failure. The result is that groups using perf_event_attr::exclusive are no longer rotated. Fixes: 9e6302056f80 ("perf: Use hrtimers for event multiplexing") Reported-by: Andi Kleen Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20201029162902.038667689@infradead.org --- kernel/events/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index f0e526866a1c..00be48acdc36 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2612,7 +2612,6 @@ group_error: error: pmu->cancel_txn(pmu); - perf_mux_hrtimer_restart(cpuctx); return -EAGAIN; } @@ -3672,6 +3671,7 @@ static int merge_sched_in(struct perf_event *event, void *data) *can_add_hw = 0; ctx->rotate_necessary = 1; + perf_mux_hrtimer_restart(cpuctx); } return 0; From 1908dc911792067287458fdb0800f036f4f4e0f6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 29 Oct 2020 16:32:22 +0100 Subject: [PATCH 258/710] perf: Tweak perf_event_attr::exclusive semantics Currently perf_event_attr::exclusive can be used to ensure an event(group) is the sole group scheduled on the PMU. One consequence is that when you have a pinned event (say the watchdog) you can no longer have regular exclusive event(group)s. Inspired by the fact that !pinned events are considered less strict, allow !pinned,exclusive events to share the PMU with pinned,!exclusive events. Pinned,exclusive is still fully exclusive. Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20201029162902.105962225@infradead.org --- kernel/events/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 00be48acdc36..dc568ca295bd 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2637,7 +2637,7 @@ static int group_can_go_on(struct perf_event *event, * If this group is exclusive and there are already * events on the CPU, it can't go on. */ - if (event->attr.exclusive && cpuctx->active_oncpu) + if (event->attr.exclusive && !list_empty(get_event_list(event))) return 0; /* * Otherwise, try to add it if all previous groups were able From cadbaa039b99a6d5c26ce1c7f2fc0325943e605a Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Wed, 28 Oct 2020 12:42:47 -0700 Subject: [PATCH 259/710] perf/x86/intel: Make anythread filter support conditional Starting with Arch Perfmon v5, the anythread filter on generic counters may be deprecated. The current kernel was exporting the any filter without checking. On Icelake, it means you could do cpu/event=0x3c,any/ even though the filter does not exist. This patch corrects the problem by relying on the CPUID 0xa leaf function to determine if anythread is supported or not as described in the Intel SDM Vol3b 18.2.5.1 AnyThread Deprecation section. Signed-off-by: Stephane Eranian Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20201028194247.3160610-1-eranian@google.com --- arch/x86/events/intel/core.c | 10 ++++++++++ arch/x86/events/perf_event.h | 1 + arch/x86/include/asm/perf_event.h | 4 +++- arch/x86/kvm/cpuid.c | 4 +++- 4 files changed, 17 insertions(+), 2 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index c37387c8212a..af457f8cb29d 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -4987,6 +4987,12 @@ __init int intel_pmu_init(void) x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */ + if (version >= 5) { + x86_pmu.intel_cap.anythread_deprecated = edx.split.anythread_deprecated; + if (x86_pmu.intel_cap.anythread_deprecated) + pr_cont(" AnyThread deprecated, "); + } + /* * Install the hw-cache-events table: */ @@ -5512,6 +5518,10 @@ __init int intel_pmu_init(void) x86_pmu.intel_ctrl |= ((1LL << x86_pmu.num_counters_fixed)-1) << INTEL_PMC_IDX_FIXED; + /* AnyThread may be deprecated on arch perfmon v5 or later */ + if (x86_pmu.intel_cap.anythread_deprecated) + x86_pmu.format_attrs = intel_arch_formats_attr; + if (x86_pmu.event_constraints) { /* * event on fixed counter2 (REF_CYCLES) only works on this diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 1d1fe46552ba..6a8edfe59b09 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -585,6 +585,7 @@ union perf_capabilities { u64 pebs_baseline:1; u64 perf_metrics:1; u64 pebs_output_pt_available:1; + u64 anythread_deprecated:1; }; u64 capabilities; }; diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 6960cd6d1f23..b9a7fd0a27e2 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -137,7 +137,9 @@ union cpuid10_edx { struct { unsigned int num_counters_fixed:5; unsigned int bit_width_fixed:8; - unsigned int reserved:19; + unsigned int reserved1:2; + unsigned int anythread_deprecated:1; + unsigned int reserved2:16; } split; unsigned int full; }; diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 06a278b3701d..0752dec66e29 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -672,7 +672,9 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) edx.split.num_counters_fixed = min(cap.num_counters_fixed, MAX_FIXED_COUNTERS); edx.split.bit_width_fixed = cap.bit_width_fixed; - edx.split.reserved = 0; + edx.split.anythread_deprecated = 1; + edx.split.reserved1 = 0; + edx.split.reserved2 = 0; entry->eax = eax.full; entry->ebx = cap.events_mask; From d7012df3c9aecdcfb50f7a2ebad766952fd1410e Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Mon, 12 Oct 2020 18:06:46 +0200 Subject: [PATCH 260/710] speakup: Fix var_id_t values and thus keymap commit d97a9d7aea04 ("staging/speakup: Add inflection synth parameter") introduced a new "inflection" speakup parameter next to "pitch", but the values of the var_id_t enum are actually used by the keymap tables so we must not renumber them. The effect was that notably the volume control shortcut (speakup-1 or 2) was actually changing the inflection. This moves the INFLECTION value at the end of the var_id_t enum to fix back the enum values. This also adds a warning about it. Fixes: d97a9d7aea04 ("staging/speakup: Add inflection synth parameter") Cc: stable@vger.kernel.org Reported-by: Kirk Reiser Reported-by: Gregory Nowak Tested-by: Gregory Nowak Signed-off-by: Samuel Thibault Link: https://lore.kernel.org/r/20201012160646.qmdo4eqtj24hpch4@function Signed-off-by: Greg Kroah-Hartman --- drivers/accessibility/speakup/spk_types.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/accessibility/speakup/spk_types.h b/drivers/accessibility/speakup/spk_types.h index 7398f1196e10..91fca3033a45 100644 --- a/drivers/accessibility/speakup/spk_types.h +++ b/drivers/accessibility/speakup/spk_types.h @@ -32,6 +32,10 @@ enum { E_NEW_DEFAULT, }; +/* + * Note: add new members at the end, speakupmap.h depends on the values of the + * enum starting from SPELL_DELAY (see inc_dec_var) + */ enum var_id_t { VERSION = 0, SYNTH, SILENT, SYNTH_DIRECT, KEYMAP, CHARS, @@ -42,9 +46,9 @@ enum var_id_t { SAY_CONTROL, SAY_WORD_CTL, NO_INTERRUPT, KEY_ECHO, SPELL_DELAY, PUNC_LEVEL, READING_PUNC, ATTRIB_BLEEP, BLEEPS, - RATE, PITCH, INFLECTION, VOL, TONE, PUNCT, VOICE, FREQUENCY, LANG, + RATE, PITCH, VOL, TONE, PUNCT, VOICE, FREQUENCY, LANG, DIRECT, PAUSE, - CAPS_START, CAPS_STOP, CHARTAB, + CAPS_START, CAPS_STOP, CHARTAB, INFLECTION, MAXVARS }; From 640969a69ca4dd2ac025fe873c6bf25eba8f11b3 Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Sun, 8 Nov 2020 00:33:10 +0100 Subject: [PATCH 261/710] speakup: Fix clearing selection in safe context speakup_cut() calls speakup_clear_selection() which calls console_lock. Problem is: speakup_cut() is called from a keyboard interrupt context. This would hang if speakup_cut is pressed while the console lock is unfortunately already held. We can however as well just defer calling clear_selection() until the already-deferred set_selection_kernel() call. This was spotted by the lock hardener: Possible unsafe locking scenario:\x0a CPU0 ---- lock(console_lock); lock(console_lock); \x0a *** DEADLOCK ***\x0a [...] Call Trace: dump_stack+0xc2/0x11a print_usage_bug.cold+0x3e0/0x4b1 mark_lock+0xd95/0x1390 ? print_irq_inversion_bug+0xa0/0xa0 __lock_acquire+0x21eb/0x5730 ? __kasan_check_read+0x11/0x20 ? check_chain_key+0x215/0x5e0 ? register_lock_class+0x1580/0x1580 ? lock_downgrade+0x7a0/0x7a0 ? __rwlock_init+0x140/0x140 lock_acquire+0x13f/0x370 ? speakup_clear_selection+0xe/0x20 [speakup] console_lock+0x33/0x50 ? speakup_clear_selection+0xe/0x20 [speakup] speakup_clear_selection+0xe/0x20 [speakup] speakup_cut+0x19e/0x4b0 [speakup] keyboard_notifier_call+0x1f04/0x4a40 [speakup] ? read_all_doc+0x240/0x240 [speakup] notifier_call_chain+0xbf/0x130 __atomic_notifier_call_chain+0x80/0x130 atomic_notifier_call_chain+0x16/0x20 kbd_event+0x7d7/0x3b20 ? k_pad+0x850/0x850 ? sysrq_filter+0x450/0xd40 input_to_handler+0x362/0x4b0 ? rcu_read_lock_sched_held+0xe0/0xe0 input_pass_values+0x408/0x5a0 ? __rwlock_init+0x140/0x140 ? lock_acquire+0x13f/0x370 input_handle_event+0x70e/0x1380 input_event+0x67/0x90 atkbd_interrupt+0xe62/0x1d4e [atkbd] ? __kasan_check_write+0x14/0x20 ? atkbd_event_work+0x130/0x130 [atkbd] ? _raw_spin_lock_irqsave+0x26/0x70 serio_interrupt+0x93/0x120 [serio] i8042_interrupt+0x232/0x510 [i8042] ? rcu_read_lock_bh_held+0xd0/0xd0 ? handle_irq_event+0xa5/0x13a ? i8042_remove+0x1f0/0x1f0 [i8042] __handle_irq_event_percpu+0xe6/0x6c0 handle_irq_event_percpu+0x71/0x150 ? __handle_irq_event_percpu+0x6c0/0x6c0 ? __kasan_check_read+0x11/0x20 ? do_raw_spin_unlock+0x5c/0x240 handle_irq_event+0xad/0x13a handle_edge_irq+0x233/0xa90 do_IRQ+0x10b/0x310 common_interrupt+0xf/0xf Cc: stable@vger.kernel.org Reported-by: Jookia Signed-off-by: Samuel Thibault Link: https://lore.kernel.org/r/20201107233310.7iisvaozpiqj3yvy@function Signed-off-by: Greg Kroah-Hartman --- drivers/accessibility/speakup/main.c | 1 - drivers/accessibility/speakup/selection.c | 11 ++++------- drivers/accessibility/speakup/speakup.h | 1 - 3 files changed, 4 insertions(+), 9 deletions(-) diff --git a/drivers/accessibility/speakup/main.c b/drivers/accessibility/speakup/main.c index be79b2135fac..48019660a096 100644 --- a/drivers/accessibility/speakup/main.c +++ b/drivers/accessibility/speakup/main.c @@ -357,7 +357,6 @@ static void speakup_cut(struct vc_data *vc) mark_cut_flag = 0; synth_printf("%s\n", spk_msg_get(MSG_CUT)); - speakup_clear_selection(); ret = speakup_set_selection(tty); switch (ret) { diff --git a/drivers/accessibility/speakup/selection.c b/drivers/accessibility/speakup/selection.c index 032f3264fba1..7df7afad5ab4 100644 --- a/drivers/accessibility/speakup/selection.c +++ b/drivers/accessibility/speakup/selection.c @@ -22,13 +22,6 @@ struct speakup_selection_work { struct tty_struct *tty; }; -void speakup_clear_selection(void) -{ - console_lock(); - clear_selection(); - console_unlock(); -} - static void __speakup_set_selection(struct work_struct *work) { struct speakup_selection_work *ssw = @@ -51,6 +44,10 @@ static void __speakup_set_selection(struct work_struct *work) goto unref; } + console_lock(); + clear_selection(); + console_unlock(); + set_selection_kernel(&sel, tty); unref: diff --git a/drivers/accessibility/speakup/speakup.h b/drivers/accessibility/speakup/speakup.h index 74fe49c2c511..33594f5a7983 100644 --- a/drivers/accessibility/speakup/speakup.h +++ b/drivers/accessibility/speakup/speakup.h @@ -70,7 +70,6 @@ void spk_do_flush(void); void speakup_start_ttys(void); void synth_buffer_add(u16 ch); void synth_buffer_clear(void); -void speakup_clear_selection(void); int speakup_set_selection(struct tty_struct *tty); void speakup_cancel_selection(void); int speakup_paste_selection(struct tty_struct *tty); From 3ed1cfb2cee4355ddef49489897bfe474daeeaec Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Sun, 8 Nov 2020 14:12:33 +0100 Subject: [PATCH 262/710] speakup ttyio: Do not schedule() in ttyio_in_nowait With the ltlk and spkout drivers, the index read function, i.e. in_nowait, is getting called from the read_all_doc mechanism, from the timer softirq: Call Trace: dump_stack+0x71/0x98 dequeue_task_idle+0x1f/0x28 __schedule+0x167/0x5d6 ? trace_hardirqs_on+0x2e/0x3a ? usleep_range+0x7f/0x7f schedule+0x8a/0xae schedule_timeout+0xb1/0xea ? del_timer_sync+0x31/0x31 do_wait_for_common+0xba/0x12b ? wake_up_q+0x45/0x45 wait_for_common+0x37/0x50 ttyio_in+0x2a/0x6b spk_ttyio_in_nowait+0xc/0x13 spk_get_index_count+0x20/0x93 cursor_done+0x1c6/0x4c6 ? read_all_doc+0xb1/0xb1 call_timer_fn+0x89/0x140 run_timer_softirq+0x164/0x1a5 ? read_all_doc+0xb1/0xb1 ? hrtimer_forward+0x7b/0x87 ? timerqueue_add+0x62/0x68 ? enqueue_hrtimer+0x95/0x9f __do_softirq+0x181/0x31f irq_exit+0x6a/0x86 smp_apic_timer_interrupt+0x15e/0x183 apic_timer_interrupt+0xf/0x20 We thus should not schedule() at all, even with timeout == 0, this crashes the kernel. We can however use try_wait_for_completion() instead of wait_for_completion_timeout(0). Cc: stable@vger.kernel.org Reported-by: John Covici Tested-by: John Covici Signed-off-by: Samuel Thibault Link: https://lore.kernel.org/r/20201108131233.tadycr73sxlvodgo@function Signed-off-by: Greg Kroah-Hartman --- drivers/accessibility/speakup/spk_ttyio.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/accessibility/speakup/spk_ttyio.c b/drivers/accessibility/speakup/spk_ttyio.c index a831ff64f8ba..ecc39983e946 100644 --- a/drivers/accessibility/speakup/spk_ttyio.c +++ b/drivers/accessibility/speakup/spk_ttyio.c @@ -298,11 +298,13 @@ static unsigned char ttyio_in(int timeout) struct spk_ldisc_data *ldisc_data = speakup_tty->disc_data; char rv; - if (wait_for_completion_timeout(&ldisc_data->completion, + if (!timeout) { + if (!try_wait_for_completion(&ldisc_data->completion)) + return 0xff; + } else if (wait_for_completion_timeout(&ldisc_data->completion, usecs_to_jiffies(timeout)) == 0) { - if (timeout) - pr_warn("spk_ttyio: timeout (%d) while waiting for input\n", - timeout); + pr_warn("spk_ttyio: timeout (%d) while waiting for input\n", + timeout); return 0xff; } From d9109fe0f30a1fba66b8623837fc3d3c1a031090 Mon Sep 17 00:00:00 2001 From: Andra Paraschiv Date: Mon, 2 Nov 2020 19:36:22 +0200 Subject: [PATCH 263/710] nitro_enclaves: Fixup type and simplify logic of the poll mask setup Update the assigned value of the poll result to be EPOLLHUP instead of POLLHUP to match the __poll_t type. While at it, simplify the logic of setting the mask result of the poll function. Reported-by: kernel test robot Reviewed-by: Alexander Graf Signed-off-by: Andra Paraschiv Link: https://lore.kernel.org/r/20201102173622.32169-1-andraprs@amazon.com Signed-off-by: Greg Kroah-Hartman --- drivers/virt/nitro_enclaves/ne_misc_dev.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/virt/nitro_enclaves/ne_misc_dev.c b/drivers/virt/nitro_enclaves/ne_misc_dev.c index f06622b48d69..f1964ea4b826 100644 --- a/drivers/virt/nitro_enclaves/ne_misc_dev.c +++ b/drivers/virt/nitro_enclaves/ne_misc_dev.c @@ -1505,10 +1505,8 @@ static __poll_t ne_enclave_poll(struct file *file, poll_table *wait) poll_wait(file, &ne_enclave->eventq, wait); - if (!ne_enclave->has_event) - return mask; - - mask = POLLHUP; + if (ne_enclave->has_event) + mask |= EPOLLHUP; return mask; } From f3217d6f2f7a76b36a3326ad58c8897f4d5fbe31 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 26 Oct 2020 16:54:36 +0100 Subject: [PATCH 264/710] firmware: xilinx: fix out-of-bounds access The zynqmp_pm_set_suspend_mode() and zynqmp_pm_get_trustzone_version() functions pass values as api_id into zynqmp_pm_invoke_fn that are beyond PM_API_MAX, resulting in an out-of-bounds access: drivers/firmware/xilinx/zynqmp.c: In function 'zynqmp_pm_set_suspend_mode': drivers/firmware/xilinx/zynqmp.c:150:24: warning: array subscript 2562 is above array bounds of 'u32[64]' {aka 'unsigned int[64]'} [-Warray-bounds] 150 | if (zynqmp_pm_features[api_id] != PM_FEATURE_UNCHECKED) | ~~~~~~~~~~~~~~~~~~^~~~~~~~ drivers/firmware/xilinx/zynqmp.c:28:12: note: while referencing 'zynqmp_pm_features' 28 | static u32 zynqmp_pm_features[PM_API_MAX]; | ^~~~~~~~~~~~~~~~~~ Replace the resulting undefined behavior with an error return. This may break some things that happen to work at the moment but seems better than randomly overwriting kernel data. I assume we need additional fixes for the two functions that now return an error. Fixes: 76582671eb5d ("firmware: xilinx: Add Zynqmp firmware driver") Fixes: e178df31cf41 ("firmware: xilinx: Implement ZynqMP power management APIs") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20201026155449.3703142-1-arnd@kernel.org Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/xilinx/zynqmp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/firmware/xilinx/zynqmp.c b/drivers/firmware/xilinx/zynqmp.c index 8d1ff2454e2e..efb8a66efc68 100644 --- a/drivers/firmware/xilinx/zynqmp.c +++ b/drivers/firmware/xilinx/zynqmp.c @@ -147,6 +147,9 @@ static int zynqmp_pm_feature(u32 api_id) return 0; /* Return value if feature is already checked */ + if (api_id > ARRAY_SIZE(zynqmp_pm_features)) + return PM_FEATURE_INVALID; + if (zynqmp_pm_features[api_id] != PM_FEATURE_UNCHECKED) return zynqmp_pm_features[api_id]; From 092561f06702dd4fdd7fb74dd3a838f1818529b7 Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Mon, 2 Nov 2020 21:28:19 +0900 Subject: [PATCH 265/710] uio: Fix use-after-free in uio_unregister_device() Commit 8fd0e2a6df26 ("uio: free uio id after uio file node is freed") triggered KASAN use-after-free failure at deletion of TCM-user backstores [1]. In uio_unregister_device(), struct uio_device *idev is passed to uio_free_minor() to refer idev->minor. However, before uio_free_minor() call, idev is already freed by uio_device_release() during call to device_unregister(). To avoid reference to idev->minor after idev free, keep idev->minor value in a local variable. Also modify uio_free_minor() argument to receive the value. [1] BUG: KASAN: use-after-free in uio_unregister_device+0x166/0x190 Read of size 4 at addr ffff888105196508 by task targetcli/49158 CPU: 3 PID: 49158 Comm: targetcli Not tainted 5.10.0-rc1 #1 Hardware name: Supermicro Super Server/X10SRL-F, BIOS 2.0 12/17/2015 Call Trace: dump_stack+0xae/0xe5 ? uio_unregister_device+0x166/0x190 print_address_description.constprop.0+0x1c/0x210 ? uio_unregister_device+0x166/0x190 ? uio_unregister_device+0x166/0x190 kasan_report.cold+0x37/0x7c ? kobject_put+0x80/0x410 ? uio_unregister_device+0x166/0x190 uio_unregister_device+0x166/0x190 tcmu_destroy_device+0x1c4/0x280 [target_core_user] ? tcmu_release+0x90/0x90 [target_core_user] ? __mutex_unlock_slowpath+0xd6/0x5d0 target_free_device+0xf3/0x2e0 [target_core_mod] config_item_cleanup+0xea/0x210 configfs_rmdir+0x651/0x860 ? detach_groups.isra.0+0x380/0x380 vfs_rmdir.part.0+0xec/0x3a0 ? __lookup_hash+0x20/0x150 do_rmdir+0x252/0x320 ? do_file_open_root+0x420/0x420 ? strncpy_from_user+0xbc/0x2f0 ? getname_flags.part.0+0x8e/0x450 do_syscall_64+0x33/0x40 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f9e2bfc91fb Code: 73 01 c3 48 8b 0d 9d ec 0c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa b8 54 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 6d ec 0c 00 f7 d8 64 89 01 48 RSP: 002b:00007ffdd2baafe8 EFLAGS: 00000246 ORIG_RAX: 0000000000000054 RAX: ffffffffffffffda RBX: 00007f9e2beb44a0 RCX: 00007f9e2bfc91fb RDX: 0000000000000000 RSI: 0000000000000000 RDI: 00007f9e1c20be90 RBP: 00007ffdd2bab000 R08: 0000000000000000 R09: 00007f9e2bdf2440 R10: 00007ffdd2baaf37 R11: 0000000000000246 R12: 00000000ffffff9c R13: 000055f9abb7e390 R14: 000055f9abcf9558 R15: 00007f9e2be7a780 Allocated by task 34735: kasan_save_stack+0x1b/0x40 __kasan_kmalloc.constprop.0+0xc2/0xd0 __uio_register_device+0xeb/0xd40 tcmu_configure_device+0x5a0/0xbc0 [target_core_user] target_configure_device+0x12f/0x760 [target_core_mod] target_dev_enable_store+0x32/0x50 [target_core_mod] configfs_write_file+0x2bb/0x450 vfs_write+0x1ce/0x610 ksys_write+0xe9/0x1b0 do_syscall_64+0x33/0x40 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Freed by task 49158: kasan_save_stack+0x1b/0x40 kasan_set_track+0x1c/0x30 kasan_set_free_info+0x1b/0x30 __kasan_slab_free+0x110/0x150 slab_free_freelist_hook+0x5a/0x170 kfree+0xc6/0x560 device_release+0x9b/0x210 kobject_put+0x13e/0x410 uio_unregister_device+0xf9/0x190 tcmu_destroy_device+0x1c4/0x280 [target_core_user] target_free_device+0xf3/0x2e0 [target_core_mod] config_item_cleanup+0xea/0x210 configfs_rmdir+0x651/0x860 vfs_rmdir.part.0+0xec/0x3a0 do_rmdir+0x252/0x320 do_syscall_64+0x33/0x40 entry_SYSCALL_64_after_hwframe+0x44/0xa9 The buggy address belongs to the object at ffff888105196000 which belongs to the cache kmalloc-2k of size 2048 The buggy address is located 1288 bytes inside of 2048-byte region [ffff888105196000, ffff888105196800) The buggy address belongs to the page: page:0000000098e6ca81 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x105190 head:0000000098e6ca81 order:3 compound_mapcount:0 compound_pincount:0 flags: 0x17ffffc0010200(slab|head) raw: 0017ffffc0010200 dead000000000100 dead000000000122 ffff888100043040 raw: 0000000000000000 0000000000080008 00000001ffffffff ffff88810eb55c01 page dumped because: kasan: bad access detected page->mem_cgroup:ffff88810eb55c01 Memory state around the buggy address: ffff888105196400: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff888105196480: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff888105196500: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff888105196580: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff888105196600: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb Fixes: 8fd0e2a6df26 ("uio: free uio id after uio file node is freed") Cc: stable Signed-off-by: Shin'ichiro Kawasaki Link: https://lore.kernel.org/r/20201102122819.2346270-1-shinichiro.kawasaki@wdc.com Signed-off-by: Greg Kroah-Hartman --- drivers/uio/uio.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c index 6dca744e39e9..be06f1a961c2 100644 --- a/drivers/uio/uio.c +++ b/drivers/uio/uio.c @@ -413,10 +413,10 @@ static int uio_get_minor(struct uio_device *idev) return retval; } -static void uio_free_minor(struct uio_device *idev) +static void uio_free_minor(unsigned long minor) { mutex_lock(&minor_lock); - idr_remove(&uio_idr, idev->minor); + idr_remove(&uio_idr, minor); mutex_unlock(&minor_lock); } @@ -990,7 +990,7 @@ err_request_irq: err_uio_dev_add_attributes: device_del(&idev->dev); err_device_create: - uio_free_minor(idev); + uio_free_minor(idev->minor); put_device(&idev->dev); return ret; } @@ -1042,11 +1042,13 @@ EXPORT_SYMBOL_GPL(__devm_uio_register_device); void uio_unregister_device(struct uio_info *info) { struct uio_device *idev; + unsigned long minor; if (!info || !info->uio_dev) return; idev = info->uio_dev; + minor = idev->minor; mutex_lock(&idev->info_lock); uio_dev_del_attributes(idev); @@ -1062,7 +1064,7 @@ void uio_unregister_device(struct uio_info *info) device_unregister(&idev->dev); - uio_free_minor(idev); + uio_free_minor(minor); return; } From 1bd3387979bff49cb3115c497895d78ffd5092e3 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 29 Oct 2020 21:32:41 +0200 Subject: [PATCH 266/710] Documentation: firmware-guide: gpio-properties: Fix factual mistakes Fix factual mistakes and style issues in GPIO properties document. This converts IoRestriction from InputOnly to OutputOnly as pins in the example are used as outputs. Signed-off-by: Andy Shevchenko Signed-off-by: Rafael J. Wysocki --- .../firmware-guide/acpi/gpio-properties.rst | 29 ++++++++++--------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/Documentation/firmware-guide/acpi/gpio-properties.rst b/Documentation/firmware-guide/acpi/gpio-properties.rst index bb6d74f23ee0..e6e65ceb2ca1 100644 --- a/Documentation/firmware-guide/acpi/gpio-properties.rst +++ b/Documentation/firmware-guide/acpi/gpio-properties.rst @@ -20,9 +20,9 @@ index, like the ASL example below shows:: Name (_CRS, ResourceTemplate () { - GpioIo (Exclusive, PullUp, 0, 0, IoRestrictionInputOnly, + GpioIo (Exclusive, PullUp, 0, 0, IoRestrictionOutputOnly, "\\_SB.GPO0", 0, ResourceConsumer) {15} - GpioIo (Exclusive, PullUp, 0, 0, IoRestrictionInputOnly, + GpioIo (Exclusive, PullUp, 0, 0, IoRestrictionOutputOnly, "\\_SB.GPO0", 0, ResourceConsumer) {27, 31} }) @@ -49,7 +49,7 @@ index pin Pin in the GpioIo()/GpioInt() resource. Typically this is zero. active_low - If 1 the GPIO is marked as active_low. + If 1, the GPIO is marked as active_low. Since ACPI GpioIo() resource does not have a field saying whether it is active low or high, the "active_low" argument can be used here. Setting @@ -112,8 +112,8 @@ Example:: Package () { "gpio-line-names", Package () { - "SPI0_CS_N", "EXP2_INT", "MUX6_IO", "UART0_RXD", "MUX7_IO", - "LVL_C_A1", "MUX0_IO", "SPI1_MISO" + "SPI0_CS_N", "EXP2_INT", "MUX6_IO", "UART0_RXD", + "MUX7_IO", "LVL_C_A1", "MUX0_IO", "SPI1_MISO", } } @@ -137,7 +137,7 @@ to the GPIO lines it is going to use and provide the GPIO subsystem with a mapping between those names and the ACPI GPIO resources corresponding to them. To do that, the driver needs to define a mapping table as a NULL-terminated -array of struct acpi_gpio_mapping objects that each contain a name, a pointer +array of struct acpi_gpio_mapping objects that each contains a name, a pointer to an array of line data (struct acpi_gpio_params) objects and the size of that array. Each struct acpi_gpio_params object consists of three fields, crs_entry_index, line_index, active_low, representing the index of the target @@ -154,13 +154,14 @@ question would look like this:: static const struct acpi_gpio_mapping bluetooth_acpi_gpios[] = { { "reset-gpios", &reset_gpio, 1 }, { "shutdown-gpios", &shutdown_gpio, 1 }, - { }, + { } }; Next, the mapping table needs to be passed as the second argument to -acpi_dev_add_driver_gpios() that will register it with the ACPI device object -pointed to by its first argument. That should be done in the driver's .probe() -routine. On removal, the driver should unregister its GPIO mapping table by +acpi_dev_add_driver_gpios() or its managed analogue that will +register it with the ACPI device object pointed to by its first +argument. That should be done in the driver's .probe() routine. +On removal, the driver should unregister its GPIO mapping table by calling acpi_dev_remove_driver_gpios() on the ACPI device object where that table was previously registered. @@ -191,12 +192,12 @@ The driver might expect to get the right GPIO when it does:: but since there is no way to know the mapping between "reset" and the GpioIo() in _CRS desc will hold ERR_PTR(-ENOENT). -The driver author can solve this by passing the mapping explictly -(the recommended way and documented in the above chapter). +The driver author can solve this by passing the mapping explicitly +(this is the recommended way and it's documented in the above chapter). The ACPI GPIO mapping tables should not contaminate drivers that are not knowing about which exact device they are servicing on. It implies that -the ACPI GPIO mapping tables are hardly linked to ACPI ID and certain +the ACPI GPIO mapping tables are hardly linked to an ACPI ID and certain objects, as listed in the above chapter, of the device in question. Getting GPIO descriptor @@ -229,5 +230,5 @@ Case 2 explicitly tells GPIO core to look for resources in _CRS. Be aware that gpiod_get_index() in cases 1 and 2, assuming that there are two versions of ACPI device description provided and no mapping is present in the driver, will return different resources. That's why a -certain driver has to handle them carefully as explained in previous +certain driver has to handle them carefully as explained in the previous chapter. From 0d6c41cf801fd56b92f4359374667061d27a6472 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 29 Oct 2020 21:32:42 +0200 Subject: [PATCH 267/710] Documentation: firmware-guide: gpio-properties: active_low only for GpioIo() It appears that people may misinterpret active_low field in _DSD for GpioInt() resource. Add a paragraph to clarify this. Reported-by: Ricardo Ribalda Signed-off-by: Andy Shevchenko Reviewed-by: Mika Westerberg Reviewed-by: Ricardo Ribalda Signed-off-by: Rafael J. Wysocki --- Documentation/firmware-guide/acpi/gpio-properties.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/firmware-guide/acpi/gpio-properties.rst b/Documentation/firmware-guide/acpi/gpio-properties.rst index e6e65ceb2ca1..370fe46c6af9 100644 --- a/Documentation/firmware-guide/acpi/gpio-properties.rst +++ b/Documentation/firmware-guide/acpi/gpio-properties.rst @@ -55,6 +55,9 @@ Since ACPI GpioIo() resource does not have a field saying whether it is active low or high, the "active_low" argument can be used here. Setting it to 1 marks the GPIO as active low. +Note, active_low in _DSD does not make sense for GpioInt() resource and +must be 0. GpioInt() resource has its own means of defining it. + In our Bluetooth example the "reset-gpios" refers to the second GpioIo() resource, second pin in that resource with the GPIO number of 31. From 8b31e972f9872e5a6a3348506b5b84353fecef58 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 29 Oct 2020 21:32:43 +0200 Subject: [PATCH 268/710] Documentation: firmware-guide: gpio-properties: Clarify initial output state GpioIo() doesn't provide an explicit state for an output pin. Linux tries to be smart and uses a common sense based on other parameters. Document how it looks like in the code. Signed-off-by: Andy Shevchenko Reviewed-by: Mika Westerberg Signed-off-by: Rafael J. Wysocki --- .../firmware-guide/acpi/gpio-properties.rst | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/Documentation/firmware-guide/acpi/gpio-properties.rst b/Documentation/firmware-guide/acpi/gpio-properties.rst index 370fe46c6af9..59aad6138b6e 100644 --- a/Documentation/firmware-guide/acpi/gpio-properties.rst +++ b/Documentation/firmware-guide/acpi/gpio-properties.rst @@ -61,6 +61,29 @@ must be 0. GpioInt() resource has its own means of defining it. In our Bluetooth example the "reset-gpios" refers to the second GpioIo() resource, second pin in that resource with the GPIO number of 31. +The GpioIo() resource unfortunately doesn't explicitly provide an initial +state of the output pin which driver should use during its initialization. + +Linux tries to use common sense here and derives the state from the bias +and polarity settings. The table below shows the expectations: + +========= ============= ============== +Pull Bias Polarity Requested... +========= ============= ============== +Implicit x AS IS (assumed firmware configured for us) +Explicit x (no _DSD) as Pull Bias (Up == High, Down == Low), + assuming non-active (Polarity = !Pull Bias) +Down Low as low, assuming active +Down High as low, assuming non-active +Up Low as high, assuming non-active +Up High as high, assuming active +========= ============= ============== + +That said, for our above example the both GPIOs, since the bias setting +is explicit and _DSD is present, will be treated as active with a high +polarity and Linux will configure the pins in this state until a driver +reprograms them differently. + It is possible to leave holes in the array of GPIOs. This is useful in cases like with SPI host controllers where some chip selects may be implemented as GPIOs and some as native signals. For example a SPI host From c1e9735975c05d36ca97e9d39e9b06c3e0b3b0d7 Mon Sep 17 00:00:00 2001 From: John Garry Date: Mon, 2 Nov 2020 19:19:31 +0800 Subject: [PATCH 269/710] ACPI: scan: Fix acpi_dma_configure_id() kerneldoc name For some reason building with W=1 doesn't pick up on this, but the kerneldoc name for acpi_dma_configure_id() is not right, so fix it up. Signed-off-by: John Garry Acked-by: Lorenzo Pieralisi Signed-off-by: Rafael J. Wysocki --- drivers/acpi/scan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index a896e5e87c93..bc6a79e33220 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1453,7 +1453,7 @@ int acpi_dma_get_range(struct device *dev, u64 *dma_addr, u64 *offset, } /** - * acpi_dma_configure - Set-up DMA configuration for the device. + * acpi_dma_configure_id - Set-up DMA configuration for the device. * @dev: The pointer to the device * @attr: device dma attributes * @input_id: input device id const value pointer From c6237b210ddc4f026a368172e957cbd3d5b5c78a Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Thu, 5 Nov 2020 03:06:00 +0100 Subject: [PATCH 270/710] ACPI: Fix whitespace inconsistencies Replaces spaces with tabs where spaces have been (inconsistently) used for indentation and removes trailing whitespaces. Signed-off-by: Maximilian Luz Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpi_video.c | 6 +++--- drivers/acpi/battery.c | 2 +- drivers/acpi/event.c | 2 +- drivers/acpi/internal.h | 2 +- drivers/acpi/nfit/core.c | 10 +++++----- drivers/acpi/pci_irq.c | 2 +- drivers/acpi/pci_link.c | 12 ++++++------ drivers/acpi/pci_mcfg.c | 2 +- drivers/acpi/power.c | 6 +++--- drivers/acpi/processor_perflib.c | 6 +++--- drivers/acpi/sbs.c | 2 +- drivers/acpi/sbshc.c | 2 +- drivers/acpi/sbshc.h | 6 +++--- drivers/acpi/video_detect.c | 16 ++++++++-------- drivers/acpi/wakeup.c | 4 ++-- 15 files changed, 40 insertions(+), 40 deletions(-) diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c index bc96457c9e25..a322a7bd286b 100644 --- a/drivers/acpi/acpi_video.c +++ b/drivers/acpi/acpi_video.c @@ -578,7 +578,7 @@ acpi_video_bqc_value_to_level(struct acpi_video_device *device, ACPI_VIDEO_FIRST_LEVEL - 1 - bqc_value; level = device->brightness->levels[bqc_value + - ACPI_VIDEO_FIRST_LEVEL]; + ACPI_VIDEO_FIRST_LEVEL]; } else { level = bqc_value; } @@ -990,8 +990,8 @@ set_level: goto out_free_levels; ACPI_DEBUG_PRINT((ACPI_DB_INFO, - "found %d brightness levels\n", - br->count - ACPI_VIDEO_FIRST_LEVEL)); + "found %d brightness levels\n", + br->count - ACPI_VIDEO_FIRST_LEVEL)); return 0; out_free_levels: diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c index cab4af532f36..08ee1c7b12e0 100644 --- a/drivers/acpi/battery.c +++ b/drivers/acpi/battery.c @@ -987,7 +987,7 @@ static int acpi_battery_update(struct acpi_battery *battery, bool resume) */ if ((battery->state & ACPI_BATTERY_STATE_CRITICAL) || (test_bit(ACPI_BATTERY_ALARM_PRESENT, &battery->flags) && - (battery->capacity_now <= battery->alarm))) + (battery->capacity_now <= battery->alarm))) acpi_pm_wakeup_event(&battery->device->dev); return result; diff --git a/drivers/acpi/event.c b/drivers/acpi/event.c index 170643927044..92e59f45329b 100644 --- a/drivers/acpi/event.c +++ b/drivers/acpi/event.c @@ -31,7 +31,7 @@ int acpi_notifier_call_chain(struct acpi_device *dev, u32 type, u32 data) event.type = type; event.data = data; return (blocking_notifier_call_chain(&acpi_chain_head, 0, (void *)&event) - == NOTIFY_BAD) ? -EINVAL : 0; + == NOTIFY_BAD) ? -EINVAL : 0; } EXPORT_SYMBOL(acpi_notifier_call_chain); diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index 43411a7457cd..e3638bafb941 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -134,7 +134,7 @@ int acpi_add_power_resource(acpi_handle handle); void acpi_power_add_remove_device(struct acpi_device *adev, bool add); int acpi_power_wakeup_list_init(struct list_head *list, int *system_level); int acpi_device_sleep_wake(struct acpi_device *dev, - int enable, int sleep_state, int dev_state); + int enable, int sleep_state, int dev_state); int acpi_power_get_inferred_state(struct acpi_device *device, int *state); int acpi_power_on_resources(struct acpi_device *device, int state); int acpi_power_transition(struct acpi_device *device, int state); diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 3a3c209ed3d3..442608220b5c 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -2175,10 +2175,10 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc) * these commands. */ enum nfit_aux_cmds { - NFIT_CMD_TRANSLATE_SPA = 5, - NFIT_CMD_ARS_INJECT_SET = 7, - NFIT_CMD_ARS_INJECT_CLEAR = 8, - NFIT_CMD_ARS_INJECT_GET = 9, + NFIT_CMD_TRANSLATE_SPA = 5, + NFIT_CMD_ARS_INJECT_SET = 7, + NFIT_CMD_ARS_INJECT_CLEAR = 8, + NFIT_CMD_ARS_INJECT_GET = 9, }; static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc) @@ -2632,7 +2632,7 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus, nfit_blk->bdw_offset = nfit_mem->bdw->offset; mmio = &nfit_blk->mmio[BDW]; mmio->addr.base = devm_nvdimm_memremap(dev, nfit_mem->spa_bdw->address, - nfit_mem->spa_bdw->length, nd_blk_memremap_flags(ndbr)); + nfit_mem->spa_bdw->length, nd_blk_memremap_flags(ndbr)); if (!mmio->addr.base) { dev_dbg(dev, "%s failed to map bdw\n", nvdimm_name(nvdimm)); diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c index dea8a60e18a4..14ee631cb7cf 100644 --- a/drivers/acpi/pci_irq.c +++ b/drivers/acpi/pci_irq.c @@ -175,7 +175,7 @@ static int acpi_pci_irq_check_entry(acpi_handle handle, struct pci_dev *dev, * configure the IRQ assigned to this slot|dev|pin. The 'source_index' * indicates which resource descriptor in the resource template (of * the link device) this interrupt is allocated from. - * + * * NOTE: Don't query the Link Device for IRQ information at this time * because Link Device enumeration may not have occurred yet * (e.g. exists somewhere 'below' this _PRT entry in the ACPI diff --git a/drivers/acpi/pci_link.c b/drivers/acpi/pci_link.c index 606da5d77ad3..fb4c5632a232 100644 --- a/drivers/acpi/pci_link.c +++ b/drivers/acpi/pci_link.c @@ -6,8 +6,8 @@ * Copyright (C) 2001, 2002 Paul Diefenbaugh * Copyright (C) 2002 Dominik Brodowski * - * TBD: - * 1. Support more than one IRQ resource entry per link device (index). + * TBD: + * 1. Support more than one IRQ resource entry per link device (index). * 2. Implement start/stop mechanism and use ACPI Bus Driver facilities * for IRQ management (e.g. start()->_SRS). */ @@ -249,8 +249,8 @@ static int acpi_pci_link_get_current(struct acpi_pci_link *link) } } - /* - * Query and parse _CRS to get the current IRQ assignment. + /* + * Query and parse _CRS to get the current IRQ assignment. */ status = acpi_walk_resources(link->device->handle, METHOD_NAME__CRS, @@ -396,7 +396,7 @@ static int acpi_pci_link_set(struct acpi_pci_link *link, int irq) /* * "acpi_irq_balance" (default in APIC mode) enables ACPI to use PIC Interrupt * Link Devices to move the PIRQs around to minimize sharing. - * + * * "acpi_irq_nobalance" (default in PIC mode) tells ACPI not to move any PIC IRQs * that the BIOS has already set to active. This is necessary because * ACPI has no automatic means of knowing what ISA IRQs are used. Note that @@ -414,7 +414,7 @@ static int acpi_pci_link_set(struct acpi_pci_link *link, int irq) * * Note that PCI IRQ routers have a list of possible IRQs, * which may not include the IRQs this table says are available. - * + * * Since this heuristic can't tell the difference between a link * that no device will attach to, vs. a link which may be shared * by multiple active devices -- it is not optimal. diff --git a/drivers/acpi/pci_mcfg.c b/drivers/acpi/pci_mcfg.c index 7ddd57abadd1..95f23acd5b80 100644 --- a/drivers/acpi/pci_mcfg.c +++ b/drivers/acpi/pci_mcfg.c @@ -173,7 +173,7 @@ static int pci_mcfg_quirk_matches(struct mcfg_fixup *f, u16 segment, { if (!memcmp(f->oem_id, mcfg_oem_id, ACPI_OEM_ID_SIZE) && !memcmp(f->oem_table_id, mcfg_oem_table_id, - ACPI_OEM_TABLE_ID_SIZE) && + ACPI_OEM_TABLE_ID_SIZE) && f->oem_revision == mcfg_oem_revision && f->segment == segment && resource_contains(&f->bus_range, bus_range)) diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c index 837b875d075e..8048da85b7e0 100644 --- a/drivers/acpi/power.c +++ b/drivers/acpi/power.c @@ -13,7 +13,7 @@ * 1. via "Device Specific (D-State) Control" * 2. via "Power Resource Control". * The code below deals with ACPI Power Resources control. - * + * * An ACPI "power resource object" represents a software controllable power * plane, clock plane, or other resource depended on by a device. * @@ -645,7 +645,7 @@ int acpi_power_wakeup_list_init(struct list_head *list, int *system_level_p) * -ENODEV if the execution of either _DSW or _PSW has failed */ int acpi_device_sleep_wake(struct acpi_device *dev, - int enable, int sleep_state, int dev_state) + int enable, int sleep_state, int dev_state) { union acpi_object in_arg[3]; struct acpi_object_list arg_list = { 3, in_arg }; @@ -690,7 +690,7 @@ int acpi_device_sleep_wake(struct acpi_device *dev, /* * Prepare a wakeup device, two steps (Ref ACPI 2.0:P229): - * 1. Power on the power resources required for the wakeup device + * 1. Power on the power resources required for the wakeup device * 2. Execute _DSW (Device Sleep Wake) or (deprecated in ACPI 3.0) _PSW (Power * State Wake) for the device, if present */ diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c index 5909e8fa4013..b04a68950ff1 100644 --- a/drivers/acpi/processor_perflib.c +++ b/drivers/acpi/processor_perflib.c @@ -354,7 +354,7 @@ static int acpi_processor_get_performance_states(struct acpi_processor *pr) (u32) px->control, (u32) px->status)); /* - * Check that ACPI's u64 MHz will be valid as u32 KHz in cpufreq + * Check that ACPI's u64 MHz will be valid as u32 KHz in cpufreq */ if (!px->core_frequency || ((u32)(px->core_frequency * 1000) != @@ -627,7 +627,7 @@ int acpi_processor_preregister_performance( goto err_ret; /* - * Now that we have _PSD data from all CPUs, lets setup P-state + * Now that we have _PSD data from all CPUs, lets setup P-state * domain info. */ for_each_possible_cpu(i) { @@ -693,7 +693,7 @@ int acpi_processor_preregister_performance( if (match_pdomain->domain != pdomain->domain) continue; - match_pr->performance->shared_type = + match_pr->performance->shared_type = pr->performance->shared_type; cpumask_copy(match_pr->performance->shared_cpu_map, pr->performance->shared_cpu_map); diff --git a/drivers/acpi/sbs.c b/drivers/acpi/sbs.c index f158b8c30113..e6d9f4de2800 100644 --- a/drivers/acpi/sbs.c +++ b/drivers/acpi/sbs.c @@ -366,7 +366,7 @@ static int acpi_battery_get_state(struct acpi_battery *battery) state_readers[i].mode, ACPI_SBS_BATTERY, state_readers[i].command, - (u8 *)battery + + (u8 *)battery + state_readers[i].offset); if (result) goto end; diff --git a/drivers/acpi/sbshc.c b/drivers/acpi/sbshc.c index 87b74e9015e5..53c2862c4c75 100644 --- a/drivers/acpi/sbshc.c +++ b/drivers/acpi/sbshc.c @@ -176,7 +176,7 @@ int acpi_smbus_write(struct acpi_smb_hc *hc, u8 protocol, u8 address, EXPORT_SYMBOL_GPL(acpi_smbus_write); int acpi_smbus_register_callback(struct acpi_smb_hc *hc, - smbus_alarm_callback callback, void *context) + smbus_alarm_callback callback, void *context) { mutex_lock(&hc->lock); hc->callback = callback; diff --git a/drivers/acpi/sbshc.h b/drivers/acpi/sbshc.h index c3522bb82792..695c390e2884 100644 --- a/drivers/acpi/sbshc.h +++ b/drivers/acpi/sbshc.h @@ -24,9 +24,9 @@ enum acpi_sbs_device_addr { typedef void (*smbus_alarm_callback)(void *context); extern int acpi_smbus_read(struct acpi_smb_hc *hc, u8 protocol, u8 address, - u8 command, u8 * data); + u8 command, u8 *data); extern int acpi_smbus_write(struct acpi_smb_hc *hc, u8 protocol, u8 slave_address, - u8 command, u8 * data, u8 length); + u8 command, u8 *data, u8 length); extern int acpi_smbus_register_callback(struct acpi_smb_hc *hc, - smbus_alarm_callback callback, void *context); + smbus_alarm_callback callback, void *context); extern int acpi_smbus_unregister_callback(struct acpi_smb_hc *hc); diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index 3a032afd9d05..4f5463b2a217 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -178,14 +178,14 @@ static const struct dmi_system_id video_detect_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad X201s"), }, }, - { - .callback = video_detect_force_video, - .ident = "ThinkPad X201T", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), - DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad X201T"), - }, - }, + { + .callback = video_detect_force_video, + .ident = "ThinkPad X201T", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad X201T"), + }, + }, /* The native backlight controls do not work on some older machines */ { diff --git a/drivers/acpi/wakeup.c b/drivers/acpi/wakeup.c index f89dd9a99e6e..b02bf770aead 100644 --- a/drivers/acpi/wakeup.c +++ b/drivers/acpi/wakeup.c @@ -44,7 +44,7 @@ void acpi_enable_wakeup_devices(u8 sleep_state) if (!dev->wakeup.flags.valid || sleep_state > (u32) dev->wakeup.sleep_state || !(device_may_wakeup(&dev->dev) - || dev->wakeup.prepare_count)) + || dev->wakeup.prepare_count)) continue; if (device_may_wakeup(&dev->dev)) @@ -69,7 +69,7 @@ void acpi_disable_wakeup_devices(u8 sleep_state) if (!dev->wakeup.flags.valid || sleep_state > (u32) dev->wakeup.sleep_state || !(device_may_wakeup(&dev->dev) - || dev->wakeup.prepare_count)) + || dev->wakeup.prepare_count)) continue; acpi_set_gpe_wake_mask(dev->wakeup.gpe_device, dev->wakeup.gpe_number, From 9debfb81e7654fe7388a49f45bc4d789b94c1103 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Sat, 7 Nov 2020 00:49:39 -0800 Subject: [PATCH 271/710] ACPI: GED: fix -Wformat Clang is more aggressive about -Wformat warnings when the format flag specifies a type smaller than the parameter. It turns out that gsi is an int. Fixes: drivers/acpi/evged.c:105:48: warning: format specifies type 'unsigned char' but the argument has type 'unsigned int' [-Wformat] trigger == ACPI_EDGE_SENSITIVE ? 'E' : 'L', gsi); ^~~ Link: https://github.com/ClangBuiltLinux/linux/issues/378 Fixes: ea6f3af4c5e6 ("ACPI: GED: add support for _Exx / _Lxx handler methods") Acked-by: Ard Biesheuvel Signed-off-by: Nick Desaulniers Signed-off-by: Rafael J. Wysocki --- drivers/acpi/evged.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/evged.c b/drivers/acpi/evged.c index b1a7f8d6965e..fe6b6792c8bb 100644 --- a/drivers/acpi/evged.c +++ b/drivers/acpi/evged.c @@ -101,7 +101,7 @@ static acpi_status acpi_ged_request_interrupt(struct acpi_resource *ares, switch (gsi) { case 0 ... 255: - sprintf(ev_name, "_%c%02hhX", + sprintf(ev_name, "_%c%02X", trigger == ACPI_EDGE_SENSITIVE ? 'E' : 'L', gsi); if (ACPI_SUCCESS(acpi_get_handle(handle, ev_name, &evt_handle))) From 7daaa06357bf7f1874b62bb1ea9d66a51d4e567e Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 7 Nov 2020 14:32:54 +0100 Subject: [PATCH 272/710] ACPI: button: Add DMI quirk for Medion Akoya E2228T The Medion Akoya E2228T's ACPI _LID implementation is quite broken, it has the same issues as the one from the Medion Akoya E2215T: 1. For notifications it uses an ActiveLow Edge GpioInt, rather then an ActiveBoth one, meaning that the device is only notified when the lid is closed, not when it is opened. 2. Matching with this its _LID method simply always returns 0 (closed) In order for the Linux LID code to work properly with this implementation, the lid_init_state selection needs to be set to ACPI_BUTTON_LID_INIT_OPEN, add a DMI quirk for this. While working on this I also found out that the MD60### part of the model number differs per country/batch while all of the E2215T and E2228T models have this issue, so also remove the " MD60198" part from the E2215T quirk. Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki --- drivers/acpi/button.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c index 0761529cac05..0d93a5ef4d07 100644 --- a/drivers/acpi/button.c +++ b/drivers/acpi/button.c @@ -89,7 +89,18 @@ static const struct dmi_system_id dmi_lid_quirks[] = { */ .matches = { DMI_MATCH(DMI_SYS_VENDOR, "MEDION"), - DMI_MATCH(DMI_PRODUCT_NAME, "E2215T MD60198"), + DMI_MATCH(DMI_PRODUCT_NAME, "E2215T"), + }, + .driver_data = (void *)(long)ACPI_BUTTON_LID_INIT_OPEN, + }, + { + /* + * Medion Akoya E2228T, notification of the LID device only + * happens on close, not on open and _LID always returns closed. + */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "MEDION"), + DMI_MATCH(DMI_PRODUCT_NAME, "E2228T"), }, .driver_data = (void *)(long)ACPI_BUTTON_LID_INIT_OPEN, }, From 197afc631413d96dc60acfc7970bdd4125d38cd3 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 6 Nov 2020 16:02:51 -0800 Subject: [PATCH 273/710] libbpf: Don't attempt to load unused subprog as an entry-point BPF program If BPF code contains unused BPF subprogram and there are no other subprogram calls (which can realistically happen in real-world applications given sufficiently smart Clang code optimizations), libbpf will erroneously assume that subprograms are entry-point programs and will attempt to load them with UNSPEC program type. Fix by not relying on subcall instructions and rather detect it based on the structure of BPF object's sections. Fixes: 9a94f277c4fb ("tools: libbpf: restore the ability to load programs from .text section") Reported-by: Dmitrii Banshchikov Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20201107000251.256821-1-andrii@kernel.org --- tools/lib/bpf/libbpf.c | 23 +++++++++++-------- .../selftests/bpf/prog_tests/subprogs.c | 6 +++++ .../bpf/progs/test_subprogs_unused.c | 21 +++++++++++++++++ 3 files changed, 40 insertions(+), 10 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/test_subprogs_unused.c diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 313034117070..28baee7ba1ca 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -560,8 +560,6 @@ bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog, const char *name, size_t sec_idx, const char *sec_name, size_t sec_off, void *insn_data, size_t insn_data_sz) { - int i; - if (insn_data_sz == 0 || insn_data_sz % BPF_INSN_SZ || sec_off % BPF_INSN_SZ) { pr_warn("sec '%s': corrupted program '%s', offset %zu, size %zu\n", sec_name, name, sec_off, insn_data_sz); @@ -600,13 +598,6 @@ bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog, goto errout; memcpy(prog->insns, insn_data, insn_data_sz); - for (i = 0; i < prog->insns_cnt; i++) { - if (insn_is_subprog_call(&prog->insns[i])) { - obj->has_subcalls = true; - break; - } - } - return 0; errout: pr_warn("sec '%s': failed to allocate memory for prog '%s'\n", sec_name, name); @@ -3280,7 +3271,19 @@ bpf_object__find_program_by_title(const struct bpf_object *obj, static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog) { - return prog->sec_idx == obj->efile.text_shndx && obj->has_subcalls; + /* For legacy reasons, libbpf supports an entry-point BPF programs + * without SEC() attribute, i.e., those in the .text section. But if + * there are 2 or more such programs in the .text section, they all + * must be subprograms called from entry-point BPF programs in + * designated SEC()'tions, otherwise there is no way to distinguish + * which of those programs should be loaded vs which are a subprogram. + * Similarly, if there is a function/program in .text and at least one + * other BPF program with custom SEC() attribute, then we just assume + * .text programs are subprograms (even if they are not called from + * other programs), because libbpf never explicitly supported mixing + * SEC()-designated BPF programs and .text entry-point BPF programs. + */ + return prog->sec_idx == obj->efile.text_shndx && obj->nr_programs > 1; } struct bpf_program * diff --git a/tools/testing/selftests/bpf/prog_tests/subprogs.c b/tools/testing/selftests/bpf/prog_tests/subprogs.c index a00abf58c037..3f3d2ac4dd57 100644 --- a/tools/testing/selftests/bpf/prog_tests/subprogs.c +++ b/tools/testing/selftests/bpf/prog_tests/subprogs.c @@ -3,12 +3,14 @@ #include #include #include "test_subprogs.skel.h" +#include "test_subprogs_unused.skel.h" static int duration; void test_subprogs(void) { struct test_subprogs *skel; + struct test_subprogs_unused *skel2; int err; skel = test_subprogs__open_and_load(); @@ -26,6 +28,10 @@ void test_subprogs(void) CHECK(skel->bss->res3 != 19, "res3", "got %d, exp %d\n", skel->bss->res3, 19); CHECK(skel->bss->res4 != 36, "res4", "got %d, exp %d\n", skel->bss->res4, 36); + skel2 = test_subprogs_unused__open_and_load(); + ASSERT_OK_PTR(skel2, "unused_progs_skel"); + test_subprogs_unused__destroy(skel2); + cleanup: test_subprogs__destroy(skel); } diff --git a/tools/testing/selftests/bpf/progs/test_subprogs_unused.c b/tools/testing/selftests/bpf/progs/test_subprogs_unused.c new file mode 100644 index 000000000000..75d975f8cf90 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_subprogs_unused.c @@ -0,0 +1,21 @@ +#include "vmlinux.h" +#include +#include + +const char LICENSE[] SEC("license") = "GPL"; + +__attribute__((maybe_unused)) __noinline int unused1(int x) +{ + return x + 1; +} + +static __attribute__((maybe_unused)) __noinline int unused2(int x) +{ + return x + 2; +} + +SEC("raw_tp/sys_enter") +int main_prog(void *ctx) +{ + return 0; +} From abbaa433de07076fb8ef524b77ce55d94bad5fc5 Mon Sep 17 00:00:00 2001 From: Wang Qing Date: Sat, 7 Nov 2020 15:45:44 +0800 Subject: [PATCH 274/710] bpf: Fix passing zero to PTR_ERR() in bpf_btf_printf_prepare There is a bug when passing zero to PTR_ERR() and return. Fix the smatch error. Fixes: c4d0bfb45068 ("bpf: Add bpf_snprintf_btf helper") Signed-off-by: Wang Qing Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/1604735144-686-1-git-send-email-wangqing@vivo.com --- kernel/trace/bpf_trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 4517c8b66518..5113fd423cdf 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1198,7 +1198,7 @@ static int bpf_btf_printf_prepare(struct btf_ptr *ptr, u32 btf_ptr_size, *btf = bpf_get_btf_vmlinux(); if (IS_ERR_OR_NULL(*btf)) - return PTR_ERR(*btf); + return IS_ERR(*btf) ? PTR_ERR(*btf) : -EINVAL; if (ptr->type_id > 0) *btf_id = ptr->type_id; From 8ef9ba4d666614497a057d09b0a6eafc1e34eadf Mon Sep 17 00:00:00 2001 From: Oliver Herms Date: Tue, 3 Nov 2020 11:41:33 +0100 Subject: [PATCH 275/710] IPv6: Set SIT tunnel hard_header_len to zero Due to the legacy usage of hard_header_len for SIT tunnels while already using infrastructure from net/ipv4/ip_tunnel.c the calculation of the path MTU in tnl_update_pmtu is incorrect. This leads to unnecessary creation of MTU exceptions for any flow going over a SIT tunnel. As SIT tunnels do not have a header themsevles other than their transport (L3, L2) headers we're leaving hard_header_len set to zero as tnl_update_pmtu is already taking care of the transport headers sizes. This will also help avoiding unnecessary IPv6 GC runs and spinlock contention seen when using SIT tunnels and for more than net.ipv6.route.gc_thresh flows. Fixes: c54419321455 ("GRE: Refactor GRE tunneling code.") Signed-off-by: Oliver Herms Acked-by: Willem de Bruijn Link: https://lore.kernel.org/r/20201103104133.GA1573211@tws Signed-off-by: Jakub Kicinski --- net/ipv6/sit.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 5e2c34c0ac97..5e7983cb6154 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1128,7 +1128,6 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev) if (tdev && !netif_is_l3_master(tdev)) { int t_hlen = tunnel->hlen + sizeof(struct iphdr); - dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr); dev->mtu = tdev->mtu - t_hlen; if (dev->mtu < IPV6_MIN_MTU) dev->mtu = IPV6_MIN_MTU; @@ -1426,7 +1425,6 @@ static void ipip6_tunnel_setup(struct net_device *dev) dev->priv_destructor = ipip6_dev_free; dev->type = ARPHRD_SIT; - dev->hard_header_len = LL_MAX_HEADER + t_hlen; dev->mtu = ETH_DATA_LEN - t_hlen; dev->min_mtu = IPV6_MIN_MTU; dev->max_mtu = IP6_MAX_MTU - t_hlen; From 77a2d673d5c9d1d359b5652ff75043273c5dea28 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Fri, 6 Nov 2020 17:59:52 +0100 Subject: [PATCH 276/710] tunnels: Fix off-by-one in lower MTU bounds for ICMP/ICMPv6 replies Jianlin reports that a bridged IPv6 VXLAN endpoint, carrying IPv6 packets over a link with a PMTU estimation of exactly 1350 bytes, won't trigger ICMPv6 Packet Too Big replies when the encapsulated datagrams exceed said PMTU value. VXLAN over IPv6 adds 70 bytes of overhead, so an ICMPv6 reply indicating 1280 bytes as inner MTU would be legitimate and expected. This comes from an off-by-one error I introduced in checks added as part of commit 4cb47a8644cc ("tunnels: PMTU discovery support for directly bridged IP packets"), whose purpose was to prevent sending ICMPv6 Packet Too Big messages with an MTU lower than the smallest permissible IPv6 link MTU, i.e. 1280 bytes. In iptunnel_pmtud_check_icmpv6(), avoid triggering a reply only if the advertised MTU would be less than, and not equal to, 1280 bytes. Also fix the analogous comparison for IPv4, that is, skip the ICMP reply only if the resulting MTU is strictly less than 576 bytes. This becomes apparent while running the net/pmtu.sh bridged VXLAN or GENEVE selftests with adjusted lower-link MTU values. Using e.g. GENEVE, setting ll_mtu to the values reported below, in the test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception() test function, we can see failures on the following tests: test | ll_mtu -------------------------------|-------- pmtu_ipv4_br_geneve4_exception | 626 pmtu_ipv6_br_geneve4_exception | 1330 pmtu_ipv6_br_geneve6_exception | 1350 owing to the different tunneling overheads implied by the corresponding configurations. Reported-by: Jianlin Shi Fixes: 4cb47a8644cc ("tunnels: PMTU discovery support for directly bridged IP packets") Signed-off-by: Stefano Brivio Link: https://lore.kernel.org/r/4f5fc2f33bfdf8409549fafd4f952b008bf04d63.1604681709.git.sbrivio@redhat.com Signed-off-by: Jakub Kicinski --- net/ipv4/ip_tunnel_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 25f1caf5abf9..e25be2d01a7a 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -263,7 +263,7 @@ static int iptunnel_pmtud_check_icmp(struct sk_buff *skb, int mtu) const struct icmphdr *icmph = icmp_hdr(skb); const struct iphdr *iph = ip_hdr(skb); - if (mtu <= 576 || iph->frag_off != htons(IP_DF)) + if (mtu < 576 || iph->frag_off != htons(IP_DF)) return 0; if (ipv4_is_lbcast(iph->daddr) || ipv4_is_multicast(iph->daddr) || @@ -359,7 +359,7 @@ static int iptunnel_pmtud_check_icmpv6(struct sk_buff *skb, int mtu) __be16 frag_off; int offset; - if (mtu <= IPV6_MIN_MTU) + if (mtu < IPV6_MIN_MTU) return 0; if (stype == IPV6_ADDR_ANY || stype == IPV6_ADDR_MULTICAST || From 413691384a37fe27f43460226c4160e33140e638 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Sun, 8 Nov 2020 00:46:15 +0000 Subject: [PATCH 277/710] ethtool: netlink: add missing netdev_features_change() call After updating userspace Ethtool from 5.7 to 5.9, I noticed that NETDEV_FEAT_CHANGE is no more raised when changing netdev features through Ethtool. That's because the old Ethtool ioctl interface always calls netdev_features_change() at the end of user request processing to inform the kernel that our netdevice has some features changed, but the new Netlink interface does not. Instead, it just notifies itself with ETHTOOL_MSG_FEATURES_NTF. Replace this ethtool_notify() call with netdev_features_change(), so the kernel will be aware of any features changes, just like in case with the ioctl interface. This does not omit Ethtool notifications, as Ethtool itself listens to NETDEV_FEAT_CHANGE and drops ETHTOOL_MSG_FEATURES_NTF on it (net/ethtool/netlink.c:ethnl_netdev_event()). From v1 [1]: - dropped extra new line as advised by Jakub; - no functional changes. [1] https://lore.kernel.org/netdev/AlZXQ2o5uuTVHCfNGOiGgJ8vJ3KgO5YIWAnQjH0cDE@cp3-web-009.plabs.ch Fixes: 0980bfcd6954 ("ethtool: set netdev features with FEATURES_SET request") Signed-off-by: Alexander Lobakin Reviewed-by: Michal Kubecek Link: https://lore.kernel.org/r/ahA2YWXYICz5rbUSQqNG4roJ8OlJzzYQX7PTiG80@cp4-web-028.plabs.ch Signed-off-by: Jakub Kicinski --- net/ethtool/features.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ethtool/features.c b/net/ethtool/features.c index 8ee4cdbd6b82..1c9f4df273bd 100644 --- a/net/ethtool/features.c +++ b/net/ethtool/features.c @@ -280,7 +280,7 @@ int ethnl_set_features(struct sk_buff *skb, struct genl_info *info) active_diff_mask, compact); } if (mod) - ethtool_notify(dev, ETHTOOL_MSG_FEATURES_NTF, NULL); + netdev_features_change(dev); out_rtnl: rtnl_unlock(); From 16eb0eb835c77c5e8824b8aa90b11b00ddc5c122 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= Date: Sat, 7 Nov 2020 23:08:21 +0100 Subject: [PATCH 278/710] docs: networking: phy: s/2.5 times faster/2.5 times as fast/ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 2.5 times faster would be 3.5 Gbps (4.375 Gbaud after 8b/10b encoding). Signed-off-by: Jonathan Neuschäfer Link: https://lore.kernel.org/r/20201107220822.1291215-1-j.neuschaefer@gmx.net Signed-off-by: Jakub Kicinski --- Documentation/networking/phy.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/networking/phy.rst b/Documentation/networking/phy.rst index 256106054c8c..b2f7ec794bc8 100644 --- a/Documentation/networking/phy.rst +++ b/Documentation/networking/phy.rst @@ -247,8 +247,8 @@ Some of the interface modes are described below: speeds (see below.) ``PHY_INTERFACE_MODE_2500BASEX`` - This defines a variant of 1000BASE-X which is clocked 2.5 times faster, - than the 802.3 standard giving a fixed bit rate of 3.125Gbaud. + This defines a variant of 1000BASE-X which is clocked 2.5 times as fast + as the 802.3 standard, giving a fixed bit rate of 3.125Gbaud. ``PHY_INTERFACE_MODE_SGMII`` This is used for Cisco SGMII, which is a modification of 1000BASE-X From 989ef49bdf100cc772b3a8737089df36b1ab1e30 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Sun, 8 Nov 2020 19:49:59 +0100 Subject: [PATCH 279/710] mptcp: provide rmem[0] limit The mptcp proto struct currently does not provide the required limit for forward memory scheduling. Under pressure sk_rmem_schedule() will unconditionally try to use such field and will oops. Address the issue inheriting the tcp limit, as we already do for the wmem one. Fixes: 9c3f94e1681b ("mptcp: add missing memory scheduling in the rx path") Signed-off-by: Paolo Abeni Reviewed-by: Matthieu Baerts Link: https://lore.kernel.org/r/37af798bd46f402fb7c79f57ebbdd00614f5d7fa.1604861097.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index e7419fd15d84..88f2a7a0ccb8 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2467,6 +2467,7 @@ static struct proto mptcp_prot = { .memory_pressure = &tcp_memory_pressure, .stream_memory_free = mptcp_memory_free, .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem), + .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem), .sysctl_mem = sysctl_tcp_mem, .obj_size = sizeof(struct mptcp_sock), .slab_flags = SLAB_TYPESAFE_BY_RCU, From d19d2152ca055baf20339cfacbf039c2cfb8d936 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Thu, 5 Nov 2020 18:06:12 +0100 Subject: [PATCH 280/710] arm64: dts: imx8mm: fix voltage for 1.6GHz CPU operating point The datasheet for both the industrial and consumer variant of the SoC lists a typical voltage of 0.95V for the 1.6GHz CPU operating point. Fixes: e85c9d0faa75 (arm64: dts: imx8mm: Add cpufreq properties) Signed-off-by: Lucas Stach Reviewed-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mm.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm.dtsi b/arch/arm64/boot/dts/freescale/imx8mm.dtsi index b83f400def8b..05ee062548e4 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm.dtsi @@ -129,7 +129,7 @@ opp-1600000000 { opp-hz = /bits/ 64 <1600000000>; - opp-microvolt = <900000>; + opp-microvolt = <950000>; opp-supported-hw = <0xc>, <0x7>; clock-latency-ns = <150000>; opp-suspend; From 33d0d843872c5ddbe28457a92fc6f2487315fb9f Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Thu, 5 Nov 2020 18:13:20 -0300 Subject: [PATCH 281/710] ARM: dts: imx50-evk: Fix the chip select 1 IOMUX The SPI chip selects are represented as: cs-gpios = <&gpio4 11 GPIO_ACTIVE_LOW>, <&gpio4 13 GPIO_ACTIVE_LOW>; , which means that they are used in GPIO function instead of native SPI mode. Fix the IOMUX for the chip select 1 to use GPIO4_13 instead of the native CSPI_SSI function. Fixes: c605cbf5e135 ("ARM: dts: imx: add device tree support for Freescale imx50evk board") Signed-off-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx50-evk.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/imx50-evk.dts b/arch/arm/boot/dts/imx50-evk.dts index 878e89c20190..4ea5c23f181b 100644 --- a/arch/arm/boot/dts/imx50-evk.dts +++ b/arch/arm/boot/dts/imx50-evk.dts @@ -59,7 +59,7 @@ MX50_PAD_CSPI_MISO__CSPI_MISO 0x00 MX50_PAD_CSPI_MOSI__CSPI_MOSI 0x00 MX50_PAD_CSPI_SS0__GPIO4_11 0xc4 - MX50_PAD_ECSPI1_MOSI__CSPI_SS1 0xf4 + MX50_PAD_ECSPI1_MOSI__GPIO4_13 0x84 >; }; From 642403e3599e80370f71ba7a8a8c8fa82e5f6706 Mon Sep 17 00:00:00 2001 From: Colin Xu Date: Mon, 9 Nov 2020 15:39:39 +0800 Subject: [PATCH 282/710] drm/i915/gvt: Temporarily disable vfio_edid for BXT/APL Some disply regs are not setup correctly during HPD for BXT/APL thus vfio_edid still not working. Temporarily disable the vfio_edid dynamic update until issue fixed. Acked-by: Zhenyu Wang Signed-off-by: Colin Xu Signed-off-by: Zhenyu Wang Link: http://patchwork.freedesktop.org/patch/msgid/20201109073939.758302-1-colin.xu@intel.com --- drivers/gpu/drm/i915/gvt/vgpu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index f6d7e33c7099..399582aeeefb 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -439,7 +439,8 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt, if (IS_BROADWELL(dev_priv)) ret = intel_gvt_hypervisor_set_edid(vgpu, PORT_B); - else + /* FixMe: Re-enable APL/BXT once vfio_edid enabled */ + else if (!IS_BROXTON(dev_priv)) ret = intel_gvt_hypervisor_set_edid(vgpu, PORT_D); if (ret) goto out_clean_sched_policy; From e8973201d9b281375b5a8c66093de5679423021a Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Fri, 6 Nov 2020 18:25:30 +0900 Subject: [PATCH 283/710] mmc: renesas_sdhi_core: Add missing tmio_mmc_host_free() at remove MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The commit 94b110aff867 ("mmc: tmio: add tmio_mmc_host_alloc/free()") added tmio_mmc_host_free(), but missed the function calling in the sh_mobile_sdhi_remove() at that time. So, fix it. Otherwise, we cannot rebind the sdhi/mmc devices when we use aliases of mmc. Fixes: 94b110aff867 ("mmc: tmio: add tmio_mmc_host_alloc/free()") Signed-off-by: Yoshihiro Shimoda Reviewed-by: Wolfram Sang Tested-by: Wolfram Sang Reviewed-by: Niklas Söderlund Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/1604654730-29914-1-git-send-email-yoshihiro.shimoda.uh@renesas.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/renesas_sdhi_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mmc/host/renesas_sdhi_core.c b/drivers/mmc/host/renesas_sdhi_core.c index 414314151d0a..03c905a781a7 100644 --- a/drivers/mmc/host/renesas_sdhi_core.c +++ b/drivers/mmc/host/renesas_sdhi_core.c @@ -1160,6 +1160,7 @@ int renesas_sdhi_remove(struct platform_device *pdev) tmio_mmc_host_remove(host); renesas_sdhi_clk_disable(host); + tmio_mmc_host_free(host); return 0; } From 71b053276a87ddfa40c8f236315d81543219bfb9 Mon Sep 17 00:00:00 2001 From: Yangbo Lu Date: Tue, 10 Nov 2020 15:13:14 +0800 Subject: [PATCH 284/710] mmc: sdhci-of-esdhc: Handle pulse width detection erratum for more SoCs Apply erratum workaround of unreliable pulse width detection to more affected platforms (LX2160A Rev2.0 and LS1028A Rev1.0). Signed-off-by: Yangbo Lu Fixes: 48e304cc1970 ("mmc: sdhci-of-esdhc: workaround for unreliable pulse width detection") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20201110071314.3868-1-yangbo.lu@nxp.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-of-esdhc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/mmc/host/sdhci-of-esdhc.c b/drivers/mmc/host/sdhci-of-esdhc.c index bb094459196a..ab5ab969f711 100644 --- a/drivers/mmc/host/sdhci-of-esdhc.c +++ b/drivers/mmc/host/sdhci-of-esdhc.c @@ -1324,6 +1324,8 @@ static struct soc_device_attribute soc_fixup_sdhc_clkdivs[] = { static struct soc_device_attribute soc_unreliable_pulse_detection[] = { { .family = "QorIQ LX2160A", .revision = "1.0", }, + { .family = "QorIQ LX2160A", .revision = "2.0", }, + { .family = "QorIQ LS1028A", .revision = "1.0", }, { }, }; From 29a25b9246f7f24203d30d59424cbe22bd905dfc Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 9 Nov 2020 17:40:13 +0200 Subject: [PATCH 285/710] dmaengine: ti: omap-dma: Block PM if SDMA is busy to fix audio We now use cpu_pm for saving and restoring device context for deeper SoC idle states. But for omap3, we must also block idle if SDMA is busy. If we don't block idle when SDMA is busy, we eventually end up saving and restoring SDMA register state on PER domain idle while SDMA is active and that causes at least audio playback to fail. Fixes: 4c74ecf79227 ("dmaengine: ti: omap-dma: Add device tree match data and use it for cpu_pm") Reported-by: Peter Ujfalusi Signed-off-by: Tony Lindgren Tested-by: Peter Ujfalusi Acked-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20201109154013.11950-1-tony@atomide.com Signed-off-by: Vinod Koul --- drivers/dma/ti/omap-dma.c | 37 ++++++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/drivers/dma/ti/omap-dma.c b/drivers/dma/ti/omap-dma.c index c9fe5e3a6b55..268a08058714 100644 --- a/drivers/dma/ti/omap-dma.c +++ b/drivers/dma/ti/omap-dma.c @@ -1522,29 +1522,38 @@ static void omap_dma_free(struct omap_dmadev *od) } } +/* Currently used by omap2 & 3 to block deeper SoC idle states */ +static bool omap_dma_busy(struct omap_dmadev *od) +{ + struct omap_chan *c; + int lch = -1; + + while (1) { + lch = find_next_bit(od->lch_bitmap, od->lch_count, lch + 1); + if (lch >= od->lch_count) + break; + c = od->lch_map[lch]; + if (!c) + continue; + if (omap_dma_chan_read(c, CCR) & CCR_ENABLE) + return true; + } + + return false; +} + /* Currently only used for omap2. For omap1, also a check for lcd_dma is needed */ static int omap_dma_busy_notifier(struct notifier_block *nb, unsigned long cmd, void *v) { struct omap_dmadev *od; - struct omap_chan *c; - int lch = -1; od = container_of(nb, struct omap_dmadev, nb); switch (cmd) { case CPU_CLUSTER_PM_ENTER: - while (1) { - lch = find_next_bit(od->lch_bitmap, od->lch_count, - lch + 1); - if (lch >= od->lch_count) - break; - c = od->lch_map[lch]; - if (!c) - continue; - if (omap_dma_chan_read(c, CCR) & CCR_ENABLE) - return NOTIFY_BAD; - } + if (omap_dma_busy(od)) + return NOTIFY_BAD; break; case CPU_CLUSTER_PM_ENTER_FAILED: case CPU_CLUSTER_PM_EXIT: @@ -1595,6 +1604,8 @@ static int omap_dma_context_notifier(struct notifier_block *nb, switch (cmd) { case CPU_CLUSTER_PM_ENTER: + if (omap_dma_busy(od)) + return NOTIFY_BAD; omap_dma_context_save(od); break; case CPU_CLUSTER_PM_ENTER_FAILED: From 1023e290ba567af0640f9a5bd878207a5dff6ed2 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 6 Nov 2020 08:25:47 +0100 Subject: [PATCH 286/710] mmc: tmio: when resetting, reset DMA controller, too MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When applying a revert, the assumption that DMA only needs to be cleared in specific cases was wrong. We want to reset the DMA controller every time the rest of the HW gets reset, too. Fixes: 34e3211e5492 ("Revert "mmc: tmio: fix reset operation"") Reported-by: Yoshihiro Shimoda Signed-off-by: Wolfram Sang Tested-by: Niklas Söderlund Reviewed-by: Yoshihiro Shimoda Tested-by: Yoshihiro Shimoda Link: https://lore.kernel.org/r/20201106072549.1495-2-wsa+renesas@sang-engineering.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/tmio_mmc_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/tmio_mmc_core.c b/drivers/mmc/host/tmio_mmc_core.c index 2fce0518632d..cfb53d7c63d7 100644 --- a/drivers/mmc/host/tmio_mmc_core.c +++ b/drivers/mmc/host/tmio_mmc_core.c @@ -175,6 +175,8 @@ static void tmio_mmc_reset(struct tmio_mmc_host *host) if (host->reset) host->reset(host); + tmio_mmc_abort_dma(host); + if (host->pdata->flags & TMIO_MMC_SDIO_IRQ) { sd_ctrl_write16(host, CTL_SDIO_IRQ_MASK, host->sdio_irq_mask); sd_ctrl_write16(host, CTL_TRANSACTION_CTL, 0x0001); @@ -223,8 +225,6 @@ static void tmio_mmc_reset_work(struct work_struct *work) /* Ready for new calls */ host->mrq = NULL; - - tmio_mmc_abort_dma(host); mmc_request_done(host->mmc, mrq); } From 24ce2d7b8beaede6a467640bfa7636e73d9b491e Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 6 Nov 2020 08:25:48 +0100 Subject: [PATCH 287/710] mmc: tmio: bring tuning HW to a sane state with MMC_POWER_OFF MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When powering off a card, we need to disable the tuning HW (like SCC for the Renesas SDHI) to get to a sane state and allow for re-tuning new cards. This was hidden before because we wrongly did that in hw_reset() before which was an unintended use of hw_reset(). Now that we corrected the use of hw_reset() meanwhile, we revealed this shortcoming and need to fix it properly by explicitly calling the downgrade callback. Fixes: 6e7d4de10890 ("mmc: renesas_sdhi: move wrong 'hw_reset' to 'reset'") Suggested-by: Takeshi Saito Reviewed-by: Takeshi Saito Signed-off-by: Wolfram Sang Tested-by: Niklas Söderlund Reviewed-by: Yoshihiro Shimoda Tested-by: Yoshihiro Shimoda Link: https://lore.kernel.org/r/20201106072549.1495-3-wsa+renesas@sang-engineering.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/tmio_mmc_core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/mmc/host/tmio_mmc_core.c b/drivers/mmc/host/tmio_mmc_core.c index cfb53d7c63d7..cb4149fd12e0 100644 --- a/drivers/mmc/host/tmio_mmc_core.c +++ b/drivers/mmc/host/tmio_mmc_core.c @@ -927,6 +927,9 @@ static void tmio_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) switch (ios->power_mode) { case MMC_POWER_OFF: tmio_mmc_power_off(host); + /* Downgrade ensures a sane state for tuning HW (e.g. SCC) */ + if (host->mmc->ops->hs400_downgrade) + host->mmc->ops->hs400_downgrade(host->mmc); host->set_clock(host, 0); break; case MMC_POWER_UP: From 03d80e042a8e3248163a38f74b43809f8079d652 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 6 Nov 2020 08:25:49 +0100 Subject: [PATCH 288/710] Revert "mmc: renesas_sdhi: workaround a regression when reinserting SD cards" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit db1af1e9712920f47b5dc6a995fca3eec05ea85e. It was only a workaround to hide a regression. We now have proper fixes. Signed-off-by: Wolfram Sang Tested-by: Niklas Söderlund Reviewed-by: Yoshihiro Shimoda Tested-by: Yoshihiro Shimoda Link: https://lore.kernel.org/r/20201106072549.1495-4-wsa+renesas@sang-engineering.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/renesas_sdhi_core.c | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/drivers/mmc/host/renesas_sdhi_core.c b/drivers/mmc/host/renesas_sdhi_core.c index 03c905a781a7..acb9c81a4e45 100644 --- a/drivers/mmc/host/renesas_sdhi_core.c +++ b/drivers/mmc/host/renesas_sdhi_core.c @@ -572,17 +572,6 @@ static void renesas_sdhi_reset(struct tmio_mmc_host *host) TMIO_MASK_INIT_RCAR2); } -/* - * This is a temporary workaround! This driver used 'hw_reset' wrongly and the - * fix for that showed a regression. So, we mimic the old behaviour until the - * proper solution is found. - */ -static void renesas_sdhi_hw_reset(struct mmc_host *mmc) -{ - struct tmio_mmc_host *host = mmc_priv(mmc); - renesas_sdhi_reset(host); -} - #define SH_MOBILE_SDHI_MIN_TAP_ROW 3 static int renesas_sdhi_select_tuning(struct tmio_mmc_host *host) @@ -1020,8 +1009,6 @@ int renesas_sdhi_probe(struct platform_device *pdev, if (of_data && of_data->scc_offset) { priv->scc_ctl = host->ctl + of_data->scc_offset; host->reset = renesas_sdhi_reset; - host->ops.hw_reset = renesas_sdhi_hw_reset; - host->mmc->caps |= MMC_CAP_HW_RESET; } } From f969f03888b9438fdb227b6460d99ede5737326d Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 6 Nov 2020 11:14:26 +0000 Subject: [PATCH 289/710] arm64: errata: Fix handling of 1418040 with late CPU onlining In a surprising turn of events, it transpires that CPU capabilities configured as ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE are never set as the result of late-onlining. Therefore our handling of erratum 1418040 does not get activated if it is not required by any of the boot CPUs, even though we allow late-onlining of an affected CPU. In order to get things working again, replace the cpus_have_const_cap() invocation with an explicit check for the current CPU using this_cpu_has_cap(). Cc: Sai Prakash Ranjan Cc: Stephen Boyd Cc: Catalin Marinas Cc: Mark Rutland Reviewed-by: Suzuki K Poulose Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20201106114952.10032-1-will@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpufeature.h | 2 ++ arch/arm64/kernel/process.c | 5 ++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index f7e7144af174..c59c16a6ea8b 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -268,6 +268,8 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0; /* * CPU feature detected at boot time based on feature of one or more CPUs. * All possible conflicts for a late CPU are ignored. + * NOTE: this means that a late CPU with the feature will *not* cause the + * capability to be advertised by cpus_have_*cap()! */ #define ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE \ (ARM64_CPUCAP_SCOPE_LOCAL_CPU | \ diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 4784011cecac..a47a40ec6ad9 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -522,14 +522,13 @@ static void erratum_1418040_thread_switch(struct task_struct *prev, bool prev32, next32; u64 val; - if (!(IS_ENABLED(CONFIG_ARM64_ERRATUM_1418040) && - cpus_have_const_cap(ARM64_WORKAROUND_1418040))) + if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_1418040)) return; prev32 = is_compat_thread(task_thread_info(prev)); next32 = is_compat_thread(task_thread_info(next)); - if (prev32 == next32) + if (prev32 == next32 || !this_cpu_has_cap(ARM64_WORKAROUND_1418040)) return; val = read_sysreg(cntkctl_el1); From 85f0b2fc917f8de4bca02d169ef7d23dbfc29155 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 9 Nov 2020 10:49:23 +0000 Subject: [PATCH 290/710] arm64: kexec_file: Fix sparse warning Sparse gets cross about us returning 0 from image_load(), which has a return type of 'void *': >> arch/arm64/kernel/kexec_image.c:130:16: sparse: sparse: Using plain integer as NULL pointer Return NULL instead, as we don't use the return value for anything if it does not indicate an error. Cc: Benjamin Gwin Reported-by: kernel test robot Fixes: 108aa503657e ("arm64: kexec_file: try more regions if loading segments fails") Link: https://lore.kernel.org/r/202011091736.T0zH8kaC-lkp@intel.com Signed-off-by: Will Deacon --- arch/arm64/kernel/kexec_image.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/kexec_image.c b/arch/arm64/kernel/kexec_image.c index 66adee8b5fc8..9ec34690e255 100644 --- a/arch/arm64/kernel/kexec_image.c +++ b/arch/arm64/kernel/kexec_image.c @@ -127,7 +127,7 @@ static void *image_load(struct kimage *image, kernel_segment->mem, kbuf.bufsz, kernel_segment->memsz); - return 0; + return NULL; } #ifdef CONFIG_KEXEC_IMAGE_VERIFY_SIG From 891deb87585017d526b67b59c15d38755b900fea Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 6 Nov 2020 09:57:55 +0000 Subject: [PATCH 291/710] arm64: psci: Avoid printing in cpu_psci_cpu_die() cpu_psci_cpu_die() is called in the context of the dying CPU, which will no longer be online or tracked by RCU. It is therefore not generally safe to call printk() if the PSCI "cpu off" request fails, so remove the pr_crit() invocation. Cc: Qian Cai Cc: "Paul E. McKenney" Cc: Catalin Marinas Link: https://lore.kernel.org/r/20201106103602.9849-2-will@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/psci.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index 43ae4e0c968f..62d2bda7adb8 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -66,7 +66,6 @@ static int cpu_psci_cpu_disable(unsigned int cpu) static void cpu_psci_cpu_die(unsigned int cpu) { - int ret; /* * There are no known implementations of PSCI actually using the * power state field, pass a sensible default for now. @@ -74,9 +73,7 @@ static void cpu_psci_cpu_die(unsigned int cpu) u32 state = PSCI_POWER_STATE_TYPE_POWER_DOWN << PSCI_0_2_POWER_STATE_TYPE_SHIFT; - ret = psci_ops.cpu_off(state); - - pr_crit("unable to power off CPU%u (%d)\n", cpu, ret); + psci_ops.cpu_off(state); } static int cpu_psci_cpu_kill(unsigned int cpu) From 04e613ded8c26489b3e0f9101b44462f780d1a35 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 6 Nov 2020 10:25:49 +0000 Subject: [PATCH 292/710] arm64: smp: Tell RCU about CPUs that fail to come online Commit ce3d31ad3cac ("arm64/smp: Move rcu_cpu_starting() earlier") ensured that RCU is informed early about incoming CPUs that might end up calling into printk() before they are online. However, if such a CPU fails the early CPU feature compatibility checks in check_local_cpu_capabilities(), then it will be powered off or parked without informing RCU, leading to an endless stream of stalls: | rcu: INFO: rcu_preempt detected stalls on CPUs/tasks: | rcu: 2-O...: (0 ticks this GP) idle=002/1/0x4000000000000000 softirq=0/0 fqs=2593 | (detected by 0, t=5252 jiffies, g=9317, q=136) | Task dump for CPU 2: | task:swapper/2 state:R running task stack: 0 pid: 0 ppid: 1 flags:0x00000028 | Call trace: | ret_from_fork+0x0/0x30 Ensure that the dying CPU invokes rcu_report_dead() prior to being powered off or parked. Cc: Qian Cai Cc: "Paul E. McKenney" Reviewed-by: Paul E. McKenney Suggested-by: Qian Cai Link: https://lore.kernel.org/r/20201105222242.GA8842@willie-the-truck Link: https://lore.kernel.org/r/20201106103602.9849-3-will@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/smp.c | 1 + kernel/rcu/tree.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 09c96f57818c..18e9727d3f64 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -413,6 +413,7 @@ void cpu_die_early(void) /* Mark this CPU absent */ set_cpu_present(cpu, 0); + rcu_report_dead(cpu); if (IS_ENABLED(CONFIG_HOTPLUG_CPU)) { update_cpu_boot_status(CPU_KILL_ME); diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 06895ef85d69..946e7c0c4cf7 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -4077,7 +4077,6 @@ void rcu_cpu_starting(unsigned int cpu) smp_mb(); /* Ensure RCU read-side usage follows above initialization. */ } -#ifdef CONFIG_HOTPLUG_CPU /* * The outgoing function has no further need of RCU, so remove it from * the rcu_node tree's ->qsmaskinitnext bit masks. @@ -4117,6 +4116,7 @@ void rcu_report_dead(unsigned int cpu) rdp->cpu_started = false; } +#ifdef CONFIG_HOTPLUG_CPU /* * The outgoing CPU has just passed through the dying-idle state, and we * are being invoked from the CPU that was IPIed to continue the offline From 06abe8291bc31839950f7d0362d9979edc88a666 Mon Sep 17 00:00:00 2001 From: Coiby Xu Date: Fri, 6 Nov 2020 07:19:09 +0800 Subject: [PATCH 293/710] pinctrl: amd: fix incorrect way to disable debounce filter The correct way to disable debounce filter is to clear bit 5 and 6 of the register. Cc: stable@vger.kerne.org Signed-off-by: Coiby Xu Reviewed-by: Hans de Goede Cc: Hans de Goede Link: https://lore.kernel.org/linux-gpio/df2c008b-e7b5-4fdd-42ea-4d1c62b52139@redhat.com/ Link: https://lore.kernel.org/r/20201105231912.69527-2-coiby.xu@gmail.com Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-amd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index 9a760f5cd7ed..d6b2b4bd337c 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -166,14 +166,14 @@ static int amd_gpio_set_debounce(struct gpio_chip *gc, unsigned offset, pin_reg |= BIT(DB_TMR_OUT_UNIT_OFF); pin_reg |= BIT(DB_TMR_LARGE_OFF); } else { - pin_reg &= ~DB_CNTRl_MASK; + pin_reg &= ~(DB_CNTRl_MASK << DB_CNTRL_OFF); ret = -EINVAL; } } else { pin_reg &= ~BIT(DB_TMR_OUT_UNIT_OFF); pin_reg &= ~BIT(DB_TMR_LARGE_OFF); pin_reg &= ~DB_TMR_OUT_MASK; - pin_reg &= ~DB_CNTRl_MASK; + pin_reg &= ~(DB_CNTRl_MASK << DB_CNTRL_OFF); } writel(pin_reg, gpio_dev->base + offset * 4); raw_spin_unlock_irqrestore(&gpio_dev->lock, flags); From c64a6a0d4a928c63e5bc3b485552a8903a506c36 Mon Sep 17 00:00:00 2001 From: Coiby Xu Date: Fri, 6 Nov 2020 07:19:10 +0800 Subject: [PATCH 294/710] pinctrl: amd: use higher precision for 512 RtcClk RTC is 32.768kHz thus 512 RtcClk equals 15625 usec. The documentation likely has dropped precision and that's why the driver mistakenly took the slightly deviated value. Cc: stable@vger.kernel.org Reported-by: Andy Shevchenko Suggested-by: Andy Shevchenko Suggested-by: Hans de Goede Signed-off-by: Coiby Xu Reviewed-by: Andy Shevchenko Reviewed-by: Hans de Goede Link: https://lore.kernel.org/linux-gpio/2f4706a1-502f-75f0-9596-cc25b4933b6c@redhat.com/ Link: https://lore.kernel.org/r/20201105231912.69527-3-coiby.xu@gmail.com Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-amd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index d6b2b4bd337c..4aea3e05e8c6 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -156,7 +156,7 @@ static int amd_gpio_set_debounce(struct gpio_chip *gc, unsigned offset, pin_reg |= BIT(DB_TMR_OUT_UNIT_OFF); pin_reg &= ~BIT(DB_TMR_LARGE_OFF); } else if (debounce < 250000) { - time = debounce / 15600; + time = debounce / 15625; pin_reg |= time & DB_TMR_OUT_MASK; pin_reg &= ~BIT(DB_TMR_OUT_UNIT_OFF); pin_reg |= BIT(DB_TMR_LARGE_OFF); From 71266d9d39366c9b24b866d811b3facaf837f13f Mon Sep 17 00:00:00 2001 From: Maulik Shah Date: Thu, 5 Nov 2020 13:08:04 +0530 Subject: [PATCH 295/710] pinctrl: qcom: Move clearing pending IRQ to .irq_request_resources callback When GPIOs that are routed to PDC are used as output they can still latch the IRQ pending at GIC. As a result the spurious IRQ was handled when the client driver change the direction to input to starts using it as IRQ. Currently such erroneous latched IRQ are cleared with .irq_enable callback however if the driver continue to use GPIO as interrupt and invokes disable_irq() followed by enable_irq() then everytime during enable_irq() previously latched interrupt gets cleared. This can make edge IRQs not seen after enable_irq() if they had arrived after the driver has invoked disable_irq() and were pending at GIC. Move clearing erroneous IRQ to .irq_request_resources callback as this is the place where GPIO direction is changed as input and its locked as IRQ. While at this add a missing check to invoke msm_gpio_irq_clear_unmask() from .irq_enable callback only when GPIO is not routed to PDC. Fixes: e35a6ae0eb3a ("pinctrl/msm: Setup GPIO chip in hierarchy") Signed-off-by: Maulik Shah Link: https://lore.kernel.org/r/1604561884-10166-1-git-send-email-mkshah@codeaurora.org Signed-off-by: Linus Walleij --- drivers/pinctrl/qcom/pinctrl-msm.c | 34 ++++++++++++++++++------------ 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c b/drivers/pinctrl/qcom/pinctrl-msm.c index c4bcda90aac4..77a25bdf0da7 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm.c +++ b/drivers/pinctrl/qcom/pinctrl-msm.c @@ -815,21 +815,14 @@ static void msm_gpio_irq_clear_unmask(struct irq_data *d, bool status_clear) static void msm_gpio_irq_enable(struct irq_data *d) { - /* - * Clear the interrupt that may be pending before we enable - * the line. - * This is especially a problem with the GPIOs routed to the - * PDC. These GPIOs are direct-connect interrupts to the GIC. - * Disabling the interrupt line at the PDC does not prevent - * the interrupt from being latched at the GIC. The state at - * GIC needs to be cleared before enabling. - */ - if (d->parent_data) { - irq_chip_set_parent_state(d, IRQCHIP_STATE_PENDING, 0); - irq_chip_enable_parent(d); - } + struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + struct msm_pinctrl *pctrl = gpiochip_get_data(gc); - msm_gpio_irq_clear_unmask(d, true); + if (d->parent_data) + irq_chip_enable_parent(d); + + if (!test_bit(d->hwirq, pctrl->skip_wake_irqs)) + msm_gpio_irq_clear_unmask(d, true); } static void msm_gpio_irq_disable(struct irq_data *d) @@ -1104,6 +1097,19 @@ static int msm_gpio_irq_reqres(struct irq_data *d) ret = -EINVAL; goto out; } + + /* + * Clear the interrupt that may be pending before we enable + * the line. + * This is especially a problem with the GPIOs routed to the + * PDC. These GPIOs are direct-connect interrupts to the GIC. + * Disabling the interrupt line at the PDC does not prevent + * the interrupt from being latched at the GIC. The state at + * GIC needs to be cleared before enabling. + */ + if (d->parent_data && test_bit(d->hwirq, pctrl->skip_wake_irqs)) + irq_chip_set_parent_state(d, IRQCHIP_STATE_PENDING, 0); + return 0; out: module_put(gc->owner); From b41efeed507addecb92e83dd444d86c1fbe38ae0 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Tue, 27 Oct 2020 21:36:42 -0700 Subject: [PATCH 296/710] pinctrl: qcom: sm8250: Specify PDC map Specify the PDC mapping for SM8250, so that gpio interrupts are propertly mapped to the wakeup IRQs of the PDC. Fixes: 4e3ec9e407ad ("pinctrl: qcom: Add sm8250 pinctrl driver.") Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20201028043642.1141723-1-bjorn.andersson@linaro.org Signed-off-by: Linus Walleij --- drivers/pinctrl/qcom/pinctrl-sm8250.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/pinctrl/qcom/pinctrl-sm8250.c b/drivers/pinctrl/qcom/pinctrl-sm8250.c index 826df0d637ea..af144e724bd9 100644 --- a/drivers/pinctrl/qcom/pinctrl-sm8250.c +++ b/drivers/pinctrl/qcom/pinctrl-sm8250.c @@ -1313,6 +1313,22 @@ static const struct msm_pingroup sm8250_groups[] = { [183] = SDC_PINGROUP(sdc2_data, 0xb7000, 9, 0), }; +static const struct msm_gpio_wakeirq_map sm8250_pdc_map[] = { + { 0, 79 }, { 1, 84 }, { 2, 80 }, { 3, 82 }, { 4, 107 }, { 7, 43 }, + { 11, 42 }, { 14, 44 }, { 15, 52 }, { 19, 67 }, { 23, 68 }, { 24, 105 }, + { 27, 92 }, { 28, 106 }, { 31, 69 }, { 35, 70 }, { 39, 37 }, + { 40, 108 }, { 43, 71 }, { 45, 72 }, { 47, 83 }, { 51, 74 }, { 55, 77 }, + { 59, 78 }, { 63, 75 }, { 64, 81 }, { 65, 87 }, { 66, 88 }, { 67, 89 }, + { 68, 54 }, { 70, 85 }, { 77, 46 }, { 80, 90 }, { 81, 91 }, { 83, 97 }, + { 84, 98 }, { 86, 99 }, { 87, 100 }, { 88, 101 }, { 89, 102 }, + { 92, 103 }, { 93, 104 }, { 100, 53 }, { 103, 47 }, { 104, 48 }, + { 108, 49 }, { 109, 94 }, { 110, 95 }, { 111, 96 }, { 112, 55 }, + { 113, 56 }, { 118, 50 }, { 121, 51 }, { 122, 57 }, { 123, 58 }, + { 124, 45 }, { 126, 59 }, { 128, 76 }, { 129, 86 }, { 132, 93 }, + { 133, 65 }, { 134, 66 }, { 136, 62 }, { 137, 63 }, { 138, 64 }, + { 142, 60 }, { 143, 61 } +}; + static const struct msm_pinctrl_soc_data sm8250_pinctrl = { .pins = sm8250_pins, .npins = ARRAY_SIZE(sm8250_pins), @@ -1323,6 +1339,8 @@ static const struct msm_pinctrl_soc_data sm8250_pinctrl = { .ngpios = 181, .tiles = sm8250_tiles, .ntiles = ARRAY_SIZE(sm8250_tiles), + .wakeirq_map = sm8250_pdc_map, + .nwakeirq_map = ARRAY_SIZE(sm8250_pdc_map), }; static int sm8250_pinctrl_probe(struct platform_device *pdev) From 2bd645b2d3f0bacadaa6037f067538e1cd4e42ef Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 9 Nov 2020 18:30:59 +0100 Subject: [PATCH 297/710] nbd: fix a block_device refcount leak in nbd_release bdget_disk needs to be paired with bdput to not leak a reference on the block device inode. Fixes: 08ba91ee6e2c ("nbd: Add the nbd NBD_DISCONNECT_ON_CLOSE config flag.") Signed-off-by: Christoph Hellwig Reviewed-by: Josef Bacik Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index c4f9ccf5cc2a..aaae9220f3a0 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -1518,6 +1518,7 @@ static void nbd_release(struct gendisk *disk, fmode_t mode) if (test_bit(NBD_RT_DISCONNECT_ON_CLOSE, &nbd->config->runtime_flags) && bdev->bd_openers == 0) nbd_disconnect_and_put(nbd); + bdput(bdev); nbd_config_put(nbd); nbd_put(nbd); From 949dd0104c496fa7c14991a23c03c62e44637e71 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Tue, 10 Nov 2020 13:00:00 -0800 Subject: [PATCH 298/710] powercap: restrict energy meter to root access Remove non-privileged user access to power data contained in /sys/class/powercap/intel-rapl*/*/energy_uj Non-privileged users currently have read access to power data and can use this data to form a security attack. Some privileged drivers/applications need read access to this data, but don't expose it to non-privileged users. For example, thermald uses this data to ensure that power management works correctly. Thus removing non-privileged access is preferred over completely disabling this power reporting capability with CONFIG_INTEL_RAPL=n. Fixes: 95677a9a3847 ("PowerCap: Fix mode for energy counter") Signed-off-by: Len Brown Cc: stable@vger.kernel.org --- drivers/powercap/powercap_sys.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/powercap/powercap_sys.c b/drivers/powercap/powercap_sys.c index f808c5fa9838..3f0b8e2ef3d4 100644 --- a/drivers/powercap/powercap_sys.c +++ b/drivers/powercap/powercap_sys.c @@ -367,9 +367,9 @@ static void create_power_zone_common_attributes( &dev_attr_max_energy_range_uj.attr; if (power_zone->ops->get_energy_uj) { if (power_zone->ops->reset_energy_uj) - dev_attr_energy_uj.attr.mode = S_IWUSR | S_IRUGO; + dev_attr_energy_uj.attr.mode = S_IWUSR | S_IRUSR; else - dev_attr_energy_uj.attr.mode = S_IRUGO; + dev_attr_energy_uj.attr.mode = S_IRUSR; power_zone->zone_dev_attrs[count++] = &dev_attr_energy_uj.attr; } From 3e9fa9983b9297407c2448114d6d27782d5e2ef2 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Wed, 30 Sep 2020 21:04:05 -0400 Subject: [PATCH 299/710] tools/power turbostat: update version number goodbye summer... Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 74d2fc6fc78a..f3a1746f7f45 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -5712,7 +5712,7 @@ int get_and_dump_counters(void) } void print_version() { - fprintf(outf, "turbostat version 20.03.20" + fprintf(outf, "turbostat version 20.09.30" " - Len Brown \n"); } From c088a4985e5f6f6c2cbe5a6953357dfc30b7c57e Mon Sep 17 00:00:00 2001 From: Pi-Hsun Shih Date: Fri, 6 Nov 2020 14:48:17 +0800 Subject: [PATCH 300/710] regulator: core: don't disable regulator if is_enabled return error. In regulator_late_cleanup when is_enabled failed, don't try to disable the regulator since it would likely to fail too and causing confusing error messages. Signed-off-by: Pi-Hsun Shih Link: https://lore.kernel.org/r/20201106064817.3290927-1-pihsun@chromium.org Signed-off-by: Mark Brown --- drivers/regulator/core.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index a5ad553da8cd..70168f2a53ed 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -5843,13 +5843,14 @@ static int regulator_late_cleanup(struct device *dev, void *data) if (rdev->use_count) goto unlock; - /* If we can't read the status assume it's on. */ + /* If we can't read the status assume it's always on. */ if (ops->is_enabled) enabled = ops->is_enabled(rdev); else enabled = 1; - if (!enabled) + /* But if reading the status failed, assume that it's off. */ + if (enabled <= 0) goto unlock; if (have_full_constraints()) { From 9a2a9ebc0a758d887ee06e067e9f7f0b36ff7574 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 10 Nov 2020 18:25:57 +0100 Subject: [PATCH 301/710] cpufreq: Introduce governor flags A new cpufreq governor flag will be added subsequently, so replace the bool dynamic_switching fleid in struct cpufreq_governor with a flags field and introduce CPUFREQ_GOV_DYNAMIC_SWITCHING to set for the "dynamic switching" governors instead of it. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq.c | 2 +- drivers/cpufreq/cpufreq_governor.h | 2 +- include/linux/cpufreq.h | 9 +++++++-- kernel/sched/cpufreq_schedutil.c | 2 +- 4 files changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 336b5e94cbc8..0252903f1b43 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2254,7 +2254,7 @@ static int cpufreq_init_governor(struct cpufreq_policy *policy) return -EINVAL; /* Platform doesn't want dynamic frequency switching ? */ - if (policy->governor->dynamic_switching && + if (policy->governor->flags & CPUFREQ_GOV_DYNAMIC_SWITCHING && cpufreq_driver->flags & CPUFREQ_NO_AUTO_DYNAMIC_SWITCHING) { struct cpufreq_governor *gov = cpufreq_fallback_governor(); diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index c56773c25757..bab8e6140377 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -156,7 +156,7 @@ void cpufreq_dbs_governor_limits(struct cpufreq_policy *policy); #define CPUFREQ_DBS_GOVERNOR_INITIALIZER(_name_) \ { \ .name = _name_, \ - .dynamic_switching = true, \ + .flags = CPUFREQ_GOV_DYNAMIC_SWITCHING, \ .owner = THIS_MODULE, \ .init = cpufreq_dbs_governor_init, \ .exit = cpufreq_dbs_governor_exit, \ diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 1eaa04f1bae6..9bdfcf3c4748 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -570,12 +570,17 @@ struct cpufreq_governor { char *buf); int (*store_setspeed) (struct cpufreq_policy *policy, unsigned int freq); - /* For governors which change frequency dynamically by themselves */ - bool dynamic_switching; struct list_head governor_list; struct module *owner; + u8 flags; }; +/* Governor flags */ + +/* For governors which change frequency dynamically by themselves */ +#define CPUFREQ_GOV_DYNAMIC_SWITCHING BIT(0) + + /* Pass a target to the cpufreq driver */ unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy, unsigned int target_freq); diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index d73bccde2720..97d318b0cd0c 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -881,7 +881,7 @@ static void sugov_limits(struct cpufreq_policy *policy) struct cpufreq_governor schedutil_gov = { .name = "schedutil", .owner = THIS_MODULE, - .dynamic_switching = true, + .flags = CPUFREQ_GOV_DYNAMIC_SWITCHING, .init = sugov_init, .exit = sugov_exit, .start = sugov_start, From 218f66870181bec7aaa6e3c72f346039c590c3c2 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 10 Nov 2020 18:26:10 +0100 Subject: [PATCH 302/710] cpufreq: Introduce CPUFREQ_GOV_STRICT_TARGET Introduce a new governor flag, CPUFREQ_GOV_STRICT_TARGET, for the governors that want the target frequency to be set exactly to the given value without leaving any room for adjustments on the hardware side and set this flag for the powersave and performance governors. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_performance.c | 1 + drivers/cpufreq/cpufreq_powersave.c | 1 + include/linux/cpufreq.h | 3 +++ 3 files changed, 5 insertions(+) diff --git a/drivers/cpufreq/cpufreq_performance.c b/drivers/cpufreq/cpufreq_performance.c index 71c1d9aba772..addd93f2a420 100644 --- a/drivers/cpufreq/cpufreq_performance.c +++ b/drivers/cpufreq/cpufreq_performance.c @@ -20,6 +20,7 @@ static void cpufreq_gov_performance_limits(struct cpufreq_policy *policy) static struct cpufreq_governor cpufreq_gov_performance = { .name = "performance", .owner = THIS_MODULE, + .flags = CPUFREQ_GOV_STRICT_TARGET, .limits = cpufreq_gov_performance_limits, }; diff --git a/drivers/cpufreq/cpufreq_powersave.c b/drivers/cpufreq/cpufreq_powersave.c index 7749522355b5..8d830d860e91 100644 --- a/drivers/cpufreq/cpufreq_powersave.c +++ b/drivers/cpufreq/cpufreq_powersave.c @@ -21,6 +21,7 @@ static struct cpufreq_governor cpufreq_gov_powersave = { .name = "powersave", .limits = cpufreq_gov_powersave_limits, .owner = THIS_MODULE, + .flags = CPUFREQ_GOV_STRICT_TARGET, }; MODULE_AUTHOR("Dominik Brodowski "); diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 9bdfcf3c4748..6eb9a3b8ec7b 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -580,6 +580,9 @@ struct cpufreq_governor { /* For governors which change frequency dynamically by themselves */ #define CPUFREQ_GOV_DYNAMIC_SWITCHING BIT(0) +/* For governors wanting the target frequency to be set exactly */ +#define CPUFREQ_GOV_STRICT_TARGET BIT(1) + /* Pass a target to the cpufreq driver */ unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy, From ea9364bbadf11f0c55802cf11387d74f524cee84 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 10 Nov 2020 18:26:37 +0100 Subject: [PATCH 303/710] cpufreq: Add strict_target to struct cpufreq_policy Add a new field to be set when the CPUFREQ_GOV_STRICT_TARGET flag is set for the current governor to struct cpufreq_policy, so that the drivers needing to check CPUFREQ_GOV_STRICT_TARGET do not have to access the governor object during every frequency transition. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq.c | 2 ++ include/linux/cpufreq.h | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 0252903f1b43..1e7e3f2ff09f 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2280,6 +2280,8 @@ static int cpufreq_init_governor(struct cpufreq_policy *policy) } } + policy->strict_target = !!(policy->governor->flags & CPUFREQ_GOV_STRICT_TARGET); + return 0; } diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 6eb9a3b8ec7b..acbad3b36322 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -109,6 +109,12 @@ struct cpufreq_policy { bool fast_switch_possible; bool fast_switch_enabled; + /* + * Set if the CPUFREQ_GOV_STRICT_TARGET flag is set for the current + * governor. + */ + bool strict_target; + /* * Preferred average time interval between consecutive invocations of * the driver to set the frequency for this policy. To be set by the From fcb3a1ab79904d54499db77017793ccca665eb7e Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 10 Nov 2020 18:27:40 +0100 Subject: [PATCH 304/710] cpufreq: intel_pstate: Take CPUFREQ_GOV_STRICT_TARGET into account Make intel_pstate take the new CPUFREQ_GOV_STRICT_TARGET governor flag into account when it operates in the passive mode with HWP enabled, so as to fix the "powersave" governor behavior in that case (currently, HWP is allowed to scale the performance all the way up to the policy max limit when the "powersave" governor is used, but it should be constrained to the policy min limit then). Fixes: f6ebbcf08f37 ("cpufreq: intel_pstate: Implement passive mode with HWP enabled") Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar Cc: 5.9+ # 5.9+: 9a2a9ebc0a75 cpufreq: Introduce governor flags Cc: 5.9+ # 5.9+: 218f66870181 cpufreq: Introduce CPUFREQ_GOV_STRICT_TARGET Cc: 5.9+ # 5.9+: ea9364bbadf1 cpufreq: Add strict_target to struct cpufreq_policy --- drivers/cpufreq/intel_pstate.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index b7a9779250aa..36a3ccfe6d3d 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2527,7 +2527,7 @@ static void intel_cpufreq_trace(struct cpudata *cpu, unsigned int trace_type, in } static void intel_cpufreq_adjust_hwp(struct cpudata *cpu, u32 target_pstate, - bool fast_switch) + bool strict, bool fast_switch) { u64 prev = READ_ONCE(cpu->hwp_req_cached), value = prev; @@ -2539,7 +2539,7 @@ static void intel_cpufreq_adjust_hwp(struct cpudata *cpu, u32 target_pstate, * field in it, so opportunistically update the max too if needed. */ value &= ~HWP_MAX_PERF(~0L); - value |= HWP_MAX_PERF(cpu->max_perf_ratio); + value |= HWP_MAX_PERF(strict ? target_pstate : cpu->max_perf_ratio); if (value == prev) return; @@ -2562,14 +2562,16 @@ static void intel_cpufreq_adjust_perf_ctl(struct cpudata *cpu, pstate_funcs.get_val(cpu, target_pstate)); } -static int intel_cpufreq_update_pstate(struct cpudata *cpu, int target_pstate, - bool fast_switch) +static int intel_cpufreq_update_pstate(struct cpufreq_policy *policy, + int target_pstate, bool fast_switch) { + struct cpudata *cpu = all_cpu_data[policy->cpu]; int old_pstate = cpu->pstate.current_pstate; target_pstate = intel_pstate_prepare_request(cpu, target_pstate); if (hwp_active) { - intel_cpufreq_adjust_hwp(cpu, target_pstate, fast_switch); + intel_cpufreq_adjust_hwp(cpu, target_pstate, + policy->strict_target, fast_switch); cpu->pstate.current_pstate = target_pstate; } else if (target_pstate != old_pstate) { intel_cpufreq_adjust_perf_ctl(cpu, target_pstate, fast_switch); @@ -2609,7 +2611,7 @@ static int intel_cpufreq_target(struct cpufreq_policy *policy, break; } - target_pstate = intel_cpufreq_update_pstate(cpu, target_pstate, false); + target_pstate = intel_cpufreq_update_pstate(policy, target_pstate, false); freqs.new = target_pstate * cpu->pstate.scaling; @@ -2628,7 +2630,7 @@ static unsigned int intel_cpufreq_fast_switch(struct cpufreq_policy *policy, target_pstate = DIV_ROUND_UP(target_freq, cpu->pstate.scaling); - target_pstate = intel_cpufreq_update_pstate(cpu, target_pstate, true); + target_pstate = intel_cpufreq_update_pstate(policy, target_pstate, true); return target_pstate * cpu->pstate.scaling; } From d61fc96a37603384cd531622c1e89de1096b5123 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Mon, 2 Nov 2020 13:37:41 +0800 Subject: [PATCH 305/710] lockdep: Avoid to modify chain keys in validate_chain() Chris Wilson reported a problem spotted by check_chain_key(): a chain key got changed in validate_chain() because we modify the ->read in validate_chain() to skip checks for dependency adding, and ->read is taken into calculation for chain key since commit f611e8cf98ec ("lockdep: Take read/write status in consideration when generate chainkey"). Fix this by avoiding to modify ->read in validate_chain() based on two facts: a) since we now support recursive read lock detection, there is no need to skip checks for dependency adding for recursive readers, b) since we have a), there is only one case left (nest_lock) where we want to skip checks in validate_chain(), we simply remove the modification for ->read and rely on the return value of check_deadlock() to skip the dependency adding. Reported-by: Chris Wilson Signed-off-by: Boqun Feng Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20201102053743.450459-1-boqun.feng@gmail.com --- kernel/locking/lockdep.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index b71ad8d9f1c9..d9fb9e19d2ed 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -2765,7 +2765,9 @@ print_deadlock_bug(struct task_struct *curr, struct held_lock *prev, * (Note that this has to be done separately, because the graph cannot * detect such classes of deadlocks.) * - * Returns: 0 on deadlock detected, 1 on OK, 2 on recursive read + * Returns: 0 on deadlock detected, 1 on OK, 2 if another lock with the same + * lock class is held but nest_lock is also held, i.e. we rely on the + * nest_lock to avoid the deadlock. */ static int check_deadlock(struct task_struct *curr, struct held_lock *next) @@ -2788,7 +2790,7 @@ check_deadlock(struct task_struct *curr, struct held_lock *next) * lock class (i.e. read_lock(lock)+read_lock(lock)): */ if ((next->read == 2) && prev->read) - return 2; + continue; /* * We're holding the nest_lock, which serializes this lock's @@ -3592,16 +3594,13 @@ static int validate_chain(struct task_struct *curr, if (!ret) return 0; - /* - * Mark recursive read, as we jump over it when - * building dependencies (just like we jump over - * trylock entries): - */ - if (ret == 2) - hlock->read = 2; /* * Add dependency only if this lock is not the head - * of the chain, and if it's not a secondary read-lock: + * of the chain, and if the new lock introduces no more + * lock dependency (because we already hold a lock with the + * same lock class) nor deadlock (because the nest_lock + * serializes nesting locks), see the comments for + * check_deadlock(). */ if (!chain_head && ret != 2) { if (!check_prevs_add(curr, hlock)) From 1a8cfa24e21c2f154791f0cdd85fc28496918722 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 26 Oct 2020 22:51:57 +0100 Subject: [PATCH 306/710] perf/x86/intel/uncore: Fix Add BW copypasta gcc -Wextra points out a duplicate initialization of one array member: arch/x86/events/intel/uncore_snb.c:478:37: warning: initialized field overwritten [-Woverride-init] 478 | [SNB_PCI_UNCORE_IMC_DATA_READS] = { SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE, The only sensible explanation is that a duplicate 'READS' was used instead of the correct 'WRITES', so change it back. Fixes: 24633d901ea4 ("perf/x86/intel/uncore: Add BW counters for GT, IA and IO breakdown") Signed-off-by: Arnd Bergmann Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20201026215203.3893972-1-arnd@kernel.org --- arch/x86/events/intel/uncore_snb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index 39e632ed6ca9..bbd1120ae161 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -475,7 +475,7 @@ enum perf_snb_uncore_imc_freerunning_types { static struct freerunning_counters snb_uncore_imc_freerunning[] = { [SNB_PCI_UNCORE_IMC_DATA_READS] = { SNB_UNCORE_PCI_IMC_DATA_READS_BASE, 0x0, 0x0, 1, 32 }, - [SNB_PCI_UNCORE_IMC_DATA_READS] = { SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE, + [SNB_PCI_UNCORE_IMC_DATA_WRITES] = { SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE, 0x0, 0x0, 1, 32 }, [SNB_PCI_UNCORE_IMC_GT_REQUESTS] = { SNB_UNCORE_PCI_IMC_GT_REQUESTS_BASE, 0x0, 0x0, 1, 32 }, From 16b0a7a1a0af9db6e008fecd195fe4d6cb366d83 Mon Sep 17 00:00:00 2001 From: Vincent Guittot Date: Mon, 2 Nov 2020 11:24:57 +0100 Subject: [PATCH 307/710] sched/fair: Ensure tasks spreading in LLC during LB schbench shows latency increase for 95 percentile above since: commit 0b0695f2b34a ("sched/fair: Rework load_balance()") Align the behavior of the load balancer with the wake up path, which tries to select an idle CPU which belongs to the LLC for a waking task. calculate_imbalance() will use nr_running instead of the spare capacity when CPUs share resources (ie cache) at the domain level. This will ensure a better spread of tasks on idle CPUs. Running schbench on a hikey (8cores arm64) shows the problem: tip/sched/core : schbench -m 2 -t 4 -s 10000 -c 1000000 -r 10 Latency percentiles (usec) 50.0th: 33 75.0th: 45 90.0th: 51 95.0th: 4152 *99.0th: 14288 99.5th: 14288 99.9th: 14288 min=0, max=14276 tip/sched/core + patch : schbench -m 2 -t 4 -s 10000 -c 1000000 -r 10 Latency percentiles (usec) 50.0th: 34 75.0th: 47 90.0th: 52 95.0th: 78 *99.0th: 94 99.5th: 94 99.9th: 94 min=0, max=94 Fixes: 0b0695f2b34a ("sched/fair: Rework load_balance()") Reported-by: Chris Mason Suggested-by: Rik van Riel Signed-off-by: Vincent Guittot Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Rik van Riel Tested-by: Rik van Riel Link: https://lkml.kernel.org/r/20201102102457.28808-1-vincent.guittot@linaro.org --- kernel/sched/fair.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index aa4c6227cd6d..210b15f068a6 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -9031,7 +9031,8 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s * emptying busiest. */ if (local->group_type == group_has_spare) { - if (busiest->group_type > group_fully_busy) { + if ((busiest->group_type > group_fully_busy) && + !(env->sd->flags & SD_SHARE_PKG_RESOURCES)) { /* * If busiest is overloaded, try to fill spare * capacity. This might end up creating spare capacity From b4c9c9f15649c98a5b45408919d1ff4fd7f5531c Mon Sep 17 00:00:00 2001 From: Vincent Guittot Date: Thu, 29 Oct 2020 17:18:24 +0100 Subject: [PATCH 308/710] sched/fair: Prefer prev cpu in asymmetric wakeup path During fast wakeup path, scheduler always check whether local or prev cpus are good candidates for the task before looking for other cpus in the domain. With commit b7a331615d25 ("sched/fair: Add asymmetric CPU capacity wakeup scan") the heterogenous system gains a dedicated path but doesn't try to reuse prev cpu whenever possible. If the previous cpu is idle and belong to the LLC domain, we should check it 1st before looking for another cpu because it stays one of the best candidate and this also stabilizes task placement on the system. This change aligns asymmetric path behavior with symmetric one and reduces cases where the task migrates across all cpus of the sd_asym_cpucapacity domains at wakeup. This change does not impact normal EAS mode but only the overloaded case or when EAS is not used. - On hikey960 with performance governor (EAS disable) ./perf bench sched pipe -T -l 50000 mainline w/ patch # migrations 999364 0 ops/sec 149313(+/-0.28%) 182587(+/- 0.40) +22% - On hikey with performance governor ./perf bench sched pipe -T -l 50000 mainline w/ patch # migrations 0 0 ops/sec 47721(+/-0.76%) 47899(+/- 0.56) +0.4% According to test on hikey, the patch doesn't impact symmetric system compared to current implementation (only tested on arm64) Also read the uclamped value of task's utilization at most twice instead instead each time we compare task's utilization with cpu's capacity. Fixes: b7a331615d25 ("sched/fair: Add asymmetric CPU capacity wakeup scan") Signed-off-by: Vincent Guittot Signed-off-by: Peter Zijlstra (Intel) Tested-by: Dietmar Eggemann Reviewed-by: Valentin Schneider Link: https://lkml.kernel.org/r/20201029161824.26389-1-vincent.guittot@linaro.org --- kernel/sched/fair.c | 67 +++++++++++++++++++++++++++++---------------- 1 file changed, 43 insertions(+), 24 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 210b15f068a6..8e563cf2b5e7 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6172,21 +6172,21 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t static int select_idle_capacity(struct task_struct *p, struct sched_domain *sd, int target) { - unsigned long best_cap = 0; + unsigned long task_util, best_cap = 0; int cpu, best_cpu = -1; struct cpumask *cpus; - sync_entity_load_avg(&p->se); - cpus = this_cpu_cpumask_var_ptr(select_idle_mask); cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr); + task_util = uclamp_task_util(p); + for_each_cpu_wrap(cpu, cpus, target) { unsigned long cpu_cap = capacity_of(cpu); if (!available_idle_cpu(cpu) && !sched_idle_cpu(cpu)) continue; - if (task_fits_capacity(p, cpu_cap)) + if (fits_capacity(task_util, cpu_cap)) return cpu; if (cpu_cap > best_cap) { @@ -6198,44 +6198,42 @@ select_idle_capacity(struct task_struct *p, struct sched_domain *sd, int target) return best_cpu; } +static inline bool asym_fits_capacity(int task_util, int cpu) +{ + if (static_branch_unlikely(&sched_asym_cpucapacity)) + return fits_capacity(task_util, capacity_of(cpu)); + + return true; +} + /* * Try and locate an idle core/thread in the LLC cache domain. */ static int select_idle_sibling(struct task_struct *p, int prev, int target) { struct sched_domain *sd; + unsigned long task_util; int i, recent_used_cpu; /* - * For asymmetric CPU capacity systems, our domain of interest is - * sd_asym_cpucapacity rather than sd_llc. + * On asymmetric system, update task utilization because we will check + * that the task fits with cpu's capacity. */ if (static_branch_unlikely(&sched_asym_cpucapacity)) { - sd = rcu_dereference(per_cpu(sd_asym_cpucapacity, target)); - /* - * On an asymmetric CPU capacity system where an exclusive - * cpuset defines a symmetric island (i.e. one unique - * capacity_orig value through the cpuset), the key will be set - * but the CPUs within that cpuset will not have a domain with - * SD_ASYM_CPUCAPACITY. These should follow the usual symmetric - * capacity path. - */ - if (!sd) - goto symmetric; - - i = select_idle_capacity(p, sd, target); - return ((unsigned)i < nr_cpumask_bits) ? i : target; + sync_entity_load_avg(&p->se); + task_util = uclamp_task_util(p); } -symmetric: - if (available_idle_cpu(target) || sched_idle_cpu(target)) + if ((available_idle_cpu(target) || sched_idle_cpu(target)) && + asym_fits_capacity(task_util, target)) return target; /* * If the previous CPU is cache affine and idle, don't be stupid: */ if (prev != target && cpus_share_cache(prev, target) && - (available_idle_cpu(prev) || sched_idle_cpu(prev))) + (available_idle_cpu(prev) || sched_idle_cpu(prev)) && + asym_fits_capacity(task_util, prev)) return prev; /* @@ -6258,7 +6256,8 @@ symmetric: recent_used_cpu != target && cpus_share_cache(recent_used_cpu, target) && (available_idle_cpu(recent_used_cpu) || sched_idle_cpu(recent_used_cpu)) && - cpumask_test_cpu(p->recent_used_cpu, p->cpus_ptr)) { + cpumask_test_cpu(p->recent_used_cpu, p->cpus_ptr) && + asym_fits_capacity(task_util, recent_used_cpu)) { /* * Replace recent_used_cpu with prev as it is a potential * candidate for the next wake: @@ -6267,6 +6266,26 @@ symmetric: return recent_used_cpu; } + /* + * For asymmetric CPU capacity systems, our domain of interest is + * sd_asym_cpucapacity rather than sd_llc. + */ + if (static_branch_unlikely(&sched_asym_cpucapacity)) { + sd = rcu_dereference(per_cpu(sd_asym_cpucapacity, target)); + /* + * On an asymmetric CPU capacity system where an exclusive + * cpuset defines a symmetric island (i.e. one unique + * capacity_orig value through the cpuset), the key will be set + * but the CPUs within that cpuset will not have a domain with + * SD_ASYM_CPUCAPACITY. These should follow the usual symmetric + * capacity path. + */ + if (sd) { + i = select_idle_capacity(p, sd, target); + return ((unsigned)i < nr_cpumask_bits) ? i : target; + } + } + sd = rcu_dereference(per_cpu(sd_llc, target)); if (!sd) return target; From 8d4d9c7b4333abccb3bf310d76ef7ea2edb9828f Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 29 Oct 2020 15:11:03 +0000 Subject: [PATCH 309/710] sched/debug: Fix memory corruption caused by multiple small reads of flags Reading /proc/sys/kernel/sched_domain/cpu*/domain0/flags mutliple times with small reads causes oopses with slub corruption issues because the kfree is free'ing an offset from a previous allocation. Fix this by adding in a new pointer 'buf' for the allocation and kfree and use the temporary pointer tmp to handle memory copies of the buf offsets. Fixes: 5b9f8ff7b320 ("sched/debug: Output SD flag names rather than their values") Reported-by: Jeff Bastian Signed-off-by: Colin Ian King Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Valentin Schneider Link: https://lkml.kernel.org/r/20201029151103.373410-1-colin.king@canonical.com --- kernel/sched/debug.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 0655524700d2..2357921580f9 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -251,7 +251,7 @@ static int sd_ctl_doflags(struct ctl_table *table, int write, unsigned long flags = *(unsigned long *)table->data; size_t data_size = 0; size_t len = 0; - char *tmp; + char *tmp, *buf; int idx; if (write) @@ -269,17 +269,17 @@ static int sd_ctl_doflags(struct ctl_table *table, int write, return 0; } - tmp = kcalloc(data_size + 1, sizeof(*tmp), GFP_KERNEL); - if (!tmp) + buf = kcalloc(data_size + 1, sizeof(*buf), GFP_KERNEL); + if (!buf) return -ENOMEM; for_each_set_bit(idx, &flags, __SD_FLAG_CNT) { char *name = sd_flag_debug[idx].name; - len += snprintf(tmp + len, strlen(name) + 2, "%s ", name); + len += snprintf(buf + len, strlen(name) + 2, "%s ", name); } - tmp += *ppos; + tmp = buf + *ppos; len -= *ppos; if (len > *lenp) @@ -294,7 +294,7 @@ static int sd_ctl_doflags(struct ctl_table *table, int write, *lenp = len; *ppos += len; - kfree(tmp); + kfree(buf); return 0; } From 8d936bb13ce788c616084ab1a5754da3490a9f0c Mon Sep 17 00:00:00 2001 From: Flavio Suligoi Date: Tue, 10 Nov 2020 14:03:38 +0100 Subject: [PATCH 310/710] Documentation: ACPI: fix spelling mistakes Signed-off-by: Flavio Suligoi Signed-off-by: Rafael J. Wysocki --- Documentation/firmware-guide/acpi/acpi-lid.rst | 8 ++++---- Documentation/firmware-guide/acpi/method-tracing.rst | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Documentation/firmware-guide/acpi/acpi-lid.rst b/Documentation/firmware-guide/acpi/acpi-lid.rst index 874ce0ed340d..71b9af13a048 100644 --- a/Documentation/firmware-guide/acpi/acpi-lid.rst +++ b/Documentation/firmware-guide/acpi/acpi-lid.rst @@ -19,9 +19,9 @@ report the "current" state of the lid as either "opened" or "closed". For most platforms, both the _LID method and the lid notifications are reliable. However, there are exceptions. In order to work with these -exceptional buggy platforms, special restrictions and expections should be +exceptional buggy platforms, special restrictions and exceptions should be taken into account. This document describes the restrictions and the -expections of the Linux ACPI lid device driver. +exceptions of the Linux ACPI lid device driver. Restrictions of the returning value of the _LID control method @@ -46,7 +46,7 @@ state is changed to "closed". The "closed" notification is normally used to trigger some system power saving operations on Windows. Since it is fully tested, it is reliable from all AML tables. -Expections for the userspace users of the ACPI lid device driver +Exceptions for the userspace users of the ACPI lid device driver ================================================================ The ACPI button driver exports the lid state to the userspace via the @@ -100,7 +100,7 @@ use the following kernel parameter: C. button.lid_init_state=ignore: When this option is specified, the ACPI button driver never reports the initial lid state and there is a compensation mechanism implemented to - ensure that the reliable "closed" notifications can always be delievered + ensure that the reliable "closed" notifications can always be delivered to the userspace by always pairing "closed" input events with complement "opened" input events. But there is still no guarantee that the "opened" notifications can be delivered to the userspace when the lid is actually diff --git a/Documentation/firmware-guide/acpi/method-tracing.rst b/Documentation/firmware-guide/acpi/method-tracing.rst index 0aa7e2c5d32a..6ab6c0964042 100644 --- a/Documentation/firmware-guide/acpi/method-tracing.rst +++ b/Documentation/firmware-guide/acpi/method-tracing.rst @@ -98,7 +98,7 @@ subject to change:: [ 0.188903] exdebug-0398 ex_trace_point : Method End [0xf58394d8:\_SB.PCI0.LPCB.ECOK] execution. Developers can utilize these special log entries to track the AML -interpretion, thus can aid issue debugging and performance tuning. Note +interpretation, thus can aid issue debugging and performance tuning. Note that, as the "AML tracer" logs are implemented via ACPI_DEBUG_PRINT() macro, CONFIG_ACPI_DEBUG is also required to be enabled for enabling "AML tracer" logs. From 38748bcb940e8b52beee19b0e5cfd740475a99e1 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Tue, 10 Nov 2020 09:50:58 -0800 Subject: [PATCH 311/710] ACPI: DPTF: Support Alder Lake Add Alder Lake ACPI IDs for DPTF devices. Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- drivers/acpi/dptf/dptf_pch_fivr.c | 1 + drivers/acpi/dptf/dptf_power.c | 2 ++ drivers/acpi/dptf/int340x_thermal.c | 6 ++++++ drivers/acpi/fan.c | 1 + 4 files changed, 10 insertions(+) diff --git a/drivers/acpi/dptf/dptf_pch_fivr.c b/drivers/acpi/dptf/dptf_pch_fivr.c index 4c1992fce150..5fca18296bf6 100644 --- a/drivers/acpi/dptf/dptf_pch_fivr.c +++ b/drivers/acpi/dptf/dptf_pch_fivr.c @@ -106,6 +106,7 @@ static int pch_fivr_remove(struct platform_device *pdev) static const struct acpi_device_id pch_fivr_device_ids[] = { {"INTC1045", 0}, + {"INTC1049", 0}, {"", 0}, }; MODULE_DEVICE_TABLE(acpi, pch_fivr_device_ids); diff --git a/drivers/acpi/dptf/dptf_power.c b/drivers/acpi/dptf/dptf_power.c index 06741305fc77..a24d5d7aa117 100644 --- a/drivers/acpi/dptf/dptf_power.c +++ b/drivers/acpi/dptf/dptf_power.c @@ -229,6 +229,8 @@ static const struct acpi_device_id int3407_device_ids[] = { {"INT3532", 0}, {"INTC1047", 0}, {"INTC1050", 0}, + {"INTC1060", 0}, + {"INTC1061", 0}, {"", 0}, }; MODULE_DEVICE_TABLE(acpi, int3407_device_ids); diff --git a/drivers/acpi/dptf/int340x_thermal.c b/drivers/acpi/dptf/int340x_thermal.c index 8d420c7e7178..d14025a85ce8 100644 --- a/drivers/acpi/dptf/int340x_thermal.c +++ b/drivers/acpi/dptf/int340x_thermal.c @@ -25,10 +25,16 @@ static const struct acpi_device_id int340x_thermal_device_ids[] = { {"INT340A"}, {"INT340B"}, {"INTC1040"}, + {"INTC1041"}, {"INTC1043"}, {"INTC1044"}, {"INTC1045"}, + {"INTC1046"}, {"INTC1047"}, + {"INTC1048"}, + {"INTC1049"}, + {"INTC1060"}, + {"INTC1061"}, {""}, }; diff --git a/drivers/acpi/fan.c b/drivers/acpi/fan.c index 62873388b24f..48354f82fba6 100644 --- a/drivers/acpi/fan.c +++ b/drivers/acpi/fan.c @@ -27,6 +27,7 @@ static const struct acpi_device_id fan_device_ids[] = { {"PNP0C0B", 0}, {"INT3404", 0}, {"INTC1044", 0}, + {"INTC1048", 0}, {"", 0}, }; MODULE_DEVICE_TABLE(acpi, fan_device_ids); From c2fe61d8be491ff8188edaf22e838f819999146b Mon Sep 17 00:00:00 2001 From: Arvind Sankar Date: Tue, 10 Nov 2020 11:39:19 -0500 Subject: [PATCH 312/710] efi/x86: Free efi_pgd with free_pages() Commit d9e9a6418065 ("x86/mm/pti: Allocate a separate user PGD") changed the PGD allocation to allocate PGD_ALLOCATION_ORDER pages, so in the error path it should be freed using free_pages() rather than free_page(). Commit 06ace26f4e6f ("x86/efi: Free efi_pgd with free_pages()") fixed one instance of this, but missed another. Move the freeing out-of-line to avoid code duplication and fix this bug. Fixes: d9e9a6418065 ("x86/mm/pti: Allocate a separate user PGD") Link: https://lore.kernel.org/r/20201110163919.1134431-1-nivedita@alum.mit.edu Signed-off-by: Arvind Sankar Signed-off-by: Ard Biesheuvel --- arch/x86/platform/efi/efi_64.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 8f5759df7776..e1e8d4e3a213 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -78,28 +78,30 @@ int __init efi_alloc_page_tables(void) gfp_mask = GFP_KERNEL | __GFP_ZERO; efi_pgd = (pgd_t *)__get_free_pages(gfp_mask, PGD_ALLOCATION_ORDER); if (!efi_pgd) - return -ENOMEM; + goto fail; pgd = efi_pgd + pgd_index(EFI_VA_END); p4d = p4d_alloc(&init_mm, pgd, EFI_VA_END); - if (!p4d) { - free_page((unsigned long)efi_pgd); - return -ENOMEM; - } + if (!p4d) + goto free_pgd; pud = pud_alloc(&init_mm, p4d, EFI_VA_END); - if (!pud) { - if (pgtable_l5_enabled()) - free_page((unsigned long) pgd_page_vaddr(*pgd)); - free_pages((unsigned long)efi_pgd, PGD_ALLOCATION_ORDER); - return -ENOMEM; - } + if (!pud) + goto free_p4d; efi_mm.pgd = efi_pgd; mm_init_cpumask(&efi_mm); init_new_context(NULL, &efi_mm); return 0; + +free_p4d: + if (pgtable_l5_enabled()) + free_page((unsigned long)pgd_page_vaddr(*pgd)); +free_pgd: + free_pages((unsigned long)efi_pgd, PGD_ALLOCATION_ORDER); +fail: + return -ENOMEM; } /* From c335b4f1f65012713832d988ec06512c7bda5c04 Mon Sep 17 00:00:00 2001 From: Brendan Higgins Date: Thu, 5 Nov 2020 15:24:40 -0800 Subject: [PATCH 313/710] kunit: tool: unmark test_data as binary blobs The tools/testing/kunit/test_data/ directory was marked as binary because some of the test_data files cause checkpatch warnings. Fix this by dropping the .gitattributes file. Fixes: afc63da64f1e ("kunit: kunit_parser: make parser more robust") Signed-off-by: Brendan Higgins Signed-off-by: Shuah Khan --- tools/testing/kunit/.gitattributes | 1 - 1 file changed, 1 deletion(-) delete mode 100644 tools/testing/kunit/.gitattributes diff --git a/tools/testing/kunit/.gitattributes b/tools/testing/kunit/.gitattributes deleted file mode 100644 index 5b7da1fc3b8f..000000000000 --- a/tools/testing/kunit/.gitattributes +++ /dev/null @@ -1 +0,0 @@ -test_data/* binary From 3959d0a63b3202ea2aa12b3f6effd5400d773d31 Mon Sep 17 00:00:00 2001 From: David Gow Date: Wed, 21 Oct 2020 00:16:03 -0700 Subject: [PATCH 314/710] kunit: Fix kunit.py parse subcommand (use null build_dir) When JSON support was added in [1], the KunitParseRequest tuple was updated to contain a 'build_dir' field, but kunit.py parse doesn't accept --build_dir as an option. The code nevertheless tried to access it, resulting in this error: AttributeError: 'Namespace' object has no attribute 'build_dir' Given that the parser only uses the build_dir variable to set the 'build_environment' json field, we set it to None (which gives the JSON 'null') for now. Ultimately, we probably do want to be able to set this, but since it's new functionality which (for the parse subcommand) never worked, this is the quickest way of getting it back up and running. [1]: https://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git/commit/?h=kunit-fixes&id=21a6d1780d5bbfca0ce9b8104ca6233502fcbf86 Fixes: 21a6d1780d5b ("kunit: tool: allow generating test results in JSON") Signed-off-by: David Gow Reviewed-by: Brendan Higgins Tested-by: Brendan Higgins Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py index ebf5f5763dee..a6d5f219f714 100755 --- a/tools/testing/kunit/kunit.py +++ b/tools/testing/kunit/kunit.py @@ -337,7 +337,7 @@ def main(argv, linux=None): kunit_output = f.read().splitlines() request = KunitParseRequest(cli_args.raw_output, kunit_output, - cli_args.build_dir, + None, cli_args.json) result = parse_tests(request) if result.status != KunitStatus.SUCCESS: From b7e0b983ff13714d261883e89910b0755eb12169 Mon Sep 17 00:00:00 2001 From: Daniel Latypov Date: Wed, 21 Oct 2020 15:07:52 -0700 Subject: [PATCH 315/710] kunit: tool: fix pre-existing python type annotation errors The code uses annotations, but they aren't accurate. Note that type checking in python is a separate process, running `kunit.py run` will not check and complain about invalid types at runtime. Fix pre-existing issues found by running a type checker $ mypy *.py All but one of these were returning `None` without denoting this properly (via `Optional[Type]`). Signed-off-by: Daniel Latypov Reviewed-by: David Gow Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit_parser.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py index 84a1af2581f5..db0347baa428 100644 --- a/tools/testing/kunit/kunit_parser.py +++ b/tools/testing/kunit/kunit_parser.py @@ -12,7 +12,7 @@ from collections import namedtuple from datetime import datetime from enum import Enum, auto from functools import reduce -from typing import List +from typing import List, Optional, Tuple TestResult = namedtuple('TestResult', ['status','suites','log']) @@ -151,7 +151,7 @@ def parse_diagnostic(lines: List[str], test_case: TestCase) -> bool: else: return False -def parse_test_case(lines: List[str]) -> TestCase: +def parse_test_case(lines: List[str]) -> Optional[TestCase]: test_case = TestCase() save_non_diagnositic(lines, test_case) while parse_diagnostic(lines, test_case): @@ -163,7 +163,7 @@ def parse_test_case(lines: List[str]) -> TestCase: SUBTEST_HEADER = re.compile(r'^[\s]+# Subtest: (.*)$') -def parse_subtest_header(lines: List[str]) -> str: +def parse_subtest_header(lines: List[str]) -> Optional[str]: consume_non_diagnositic(lines) if not lines: return None @@ -176,7 +176,7 @@ def parse_subtest_header(lines: List[str]) -> str: SUBTEST_PLAN = re.compile(r'[\s]+[0-9]+\.\.([0-9]+)') -def parse_subtest_plan(lines: List[str]) -> int: +def parse_subtest_plan(lines: List[str]) -> Optional[int]: consume_non_diagnositic(lines) match = SUBTEST_PLAN.match(lines[0]) if match: @@ -230,7 +230,7 @@ def bubble_up_test_case_errors(test_suite: TestSuite) -> TestStatus: max_test_case_status = bubble_up_errors(lambda x: x.status, test_suite.cases) return max_status(max_test_case_status, test_suite.status) -def parse_test_suite(lines: List[str], expected_suite_index: int) -> TestSuite: +def parse_test_suite(lines: List[str], expected_suite_index: int) -> Optional[TestSuite]: if not lines: return None consume_non_diagnositic(lines) @@ -271,7 +271,7 @@ def parse_tap_header(lines: List[str]) -> bool: TEST_PLAN = re.compile(r'[0-9]+\.\.([0-9]+)') -def parse_test_plan(lines: List[str]) -> int: +def parse_test_plan(lines: List[str]) -> Optional[int]: consume_non_diagnositic(lines) match = TEST_PLAN.match(lines[0]) if match: @@ -310,7 +310,7 @@ def parse_test_result(lines: List[str]) -> TestResult: else: return TestResult(TestStatus.NO_TESTS, [], lines) -def print_and_count_results(test_result: TestResult) -> None: +def print_and_count_results(test_result: TestResult) -> Tuple[int, int, int]: total_tests = 0 failed_tests = 0 crashed_tests = 0 From fcdb0bc08ced274078f371e1e0fe6421a97fa9f2 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 26 Oct 2020 18:59:25 +0200 Subject: [PATCH 316/710] kunit: Do not pollute source directory with generated files (.kunitconfig) When --build_dir is provided use it and do not pollute source directory which even can be mounted over network or read-only. Signed-off-by: Andy Shevchenko Reviewed-by: Brendan Higgins Tested-by: Brendan Higgins Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit.py | 25 ++++++++++++------------- tools/testing/kunit/kunit_kernel.py | 24 +++++++++++++++++++----- 2 files changed, 31 insertions(+), 18 deletions(-) diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py index a6d5f219f714..d4f7846d0745 100755 --- a/tools/testing/kunit/kunit.py +++ b/tools/testing/kunit/kunit.py @@ -11,7 +11,6 @@ import argparse import sys import os import time -import shutil from collections import namedtuple from enum import Enum, auto @@ -44,11 +43,6 @@ class KunitStatus(Enum): BUILD_FAILURE = auto() TEST_FAILURE = auto() -def create_default_kunitconfig(): - if not os.path.exists(kunit_kernel.kunitconfig_path): - shutil.copyfile('arch/um/configs/kunit_defconfig', - kunit_kernel.kunitconfig_path) - def get_kernel_root_path(): parts = sys.argv[0] if not __file__ else __file__ parts = os.path.realpath(parts).split('tools/testing/kunit') @@ -61,7 +55,6 @@ def config_tests(linux: kunit_kernel.LinuxSourceTree, kunit_parser.print_with_timestamp('Configuring KUnit Kernel ...') config_start = time.time() - create_default_kunitconfig() success = linux.build_reconfig(request.build_dir, request.make_options) config_end = time.time() if not success: @@ -262,12 +255,12 @@ def main(argv, linux=None): if not os.path.exists(cli_args.build_dir): os.mkdir(cli_args.build_dir) - if not os.path.exists(kunit_kernel.kunitconfig_path): - create_default_kunitconfig() - if not linux: linux = kunit_kernel.LinuxSourceTree() + linux.create_kunitconfig(cli_args.build_dir) + linux.read_kunitconfig(cli_args.build_dir) + request = KunitRequest(cli_args.raw_output, cli_args.timeout, cli_args.jobs, @@ -283,12 +276,12 @@ def main(argv, linux=None): not os.path.exists(cli_args.build_dir)): os.mkdir(cli_args.build_dir) - if not os.path.exists(kunit_kernel.kunitconfig_path): - create_default_kunitconfig() - if not linux: linux = kunit_kernel.LinuxSourceTree() + linux.create_kunitconfig(cli_args.build_dir) + linux.read_kunitconfig(cli_args.build_dir) + request = KunitConfigRequest(cli_args.build_dir, cli_args.make_options) result = config_tests(linux, request) @@ -301,6 +294,9 @@ def main(argv, linux=None): if not linux: linux = kunit_kernel.LinuxSourceTree() + linux.create_kunitconfig(cli_args.build_dir) + linux.read_kunitconfig(cli_args.build_dir) + request = KunitBuildRequest(cli_args.jobs, cli_args.build_dir, cli_args.alltests, @@ -315,6 +311,9 @@ def main(argv, linux=None): if not linux: linux = kunit_kernel.LinuxSourceTree() + linux.create_kunitconfig(cli_args.build_dir) + linux.read_kunitconfig(cli_args.build_dir) + exec_request = KunitExecRequest(cli_args.timeout, cli_args.build_dir, cli_args.alltests) diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py index b557b1e93f98..633a2efcfdbd 100644 --- a/tools/testing/kunit/kunit_kernel.py +++ b/tools/testing/kunit/kunit_kernel.py @@ -6,10 +6,10 @@ # Author: Felix Guo # Author: Brendan Higgins - import logging import subprocess import os +import shutil import signal from contextlib import ExitStack @@ -18,7 +18,8 @@ import kunit_config import kunit_parser KCONFIG_PATH = '.config' -kunitconfig_path = '.kunitconfig' +KUNITCONFIG_PATH = '.kunitconfig' +DEFAULT_KUNITCONFIG_PATH = 'arch/um/configs/kunit_defconfig' BROKEN_ALLCONFIG_PATH = 'tools/testing/kunit/configs/broken_on_uml.config' class ConfigError(Exception): @@ -99,19 +100,22 @@ class LinuxSourceTreeOperations(object): stderr=subprocess.STDOUT) process.wait(timeout) - def get_kconfig_path(build_dir): kconfig_path = KCONFIG_PATH if build_dir: kconfig_path = os.path.join(build_dir, KCONFIG_PATH) return kconfig_path +def get_kunitconfig_path(build_dir): + kunitconfig_path = KUNITCONFIG_PATH + if build_dir: + kunitconfig_path = os.path.join(build_dir, KUNITCONFIG_PATH) + return kunitconfig_path + class LinuxSourceTree(object): """Represents a Linux kernel source tree with KUnit tests.""" def __init__(self): - self._kconfig = kunit_config.Kconfig() - self._kconfig.read_from_file(kunitconfig_path) self._ops = LinuxSourceTreeOperations() signal.signal(signal.SIGINT, self.signal_handler) @@ -123,6 +127,16 @@ class LinuxSourceTree(object): return False return True + def create_kunitconfig(self, build_dir, defconfig=DEFAULT_KUNITCONFIG_PATH): + kunitconfig_path = get_kunitconfig_path(build_dir) + if not os.path.exists(kunitconfig_path): + shutil.copyfile(defconfig, kunitconfig_path) + + def read_kunitconfig(self, build_dir): + kunitconfig_path = get_kunitconfig_path(build_dir) + self._kconfig = kunit_config.Kconfig() + self._kconfig.read_from_file(kunitconfig_path) + def validate_config(self, build_dir): kconfig_path = get_kconfig_path(build_dir) validated_kconfig = kunit_config.Kconfig() From 128dc4bcc8c0c7c3bab4a3818a1ec608cccb017a Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 26 Oct 2020 18:59:26 +0200 Subject: [PATCH 317/710] kunit: Do not pollute source directory with generated files (test.log) When --build_dir is provided use it and do not pollute source directory which even can be mounted over network or read-only. Signed-off-by: Andy Shevchenko Reviewed-by: Brendan Higgins Tested-by: Brendan Higgins Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit_kernel.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py index 633a2efcfdbd..b4768fa03ce0 100644 --- a/tools/testing/kunit/kunit_kernel.py +++ b/tools/testing/kunit/kunit_kernel.py @@ -21,6 +21,7 @@ KCONFIG_PATH = '.config' KUNITCONFIG_PATH = '.kunitconfig' DEFAULT_KUNITCONFIG_PATH = 'arch/um/configs/kunit_defconfig' BROKEN_ALLCONFIG_PATH = 'tools/testing/kunit/configs/broken_on_uml.config' +OUTFILE_PATH = 'test.log' class ConfigError(Exception): """Represents an error trying to configure the Linux kernel.""" @@ -89,11 +90,12 @@ class LinuxSourceTreeOperations(object): except subprocess.CalledProcessError as e: raise BuildError(e.output.decode()) - def linux_bin(self, params, timeout, build_dir, outfile): + def linux_bin(self, params, timeout, build_dir): """Runs the Linux UML binary. Must be named 'linux'.""" linux_bin = './linux' if build_dir: linux_bin = os.path.join(build_dir, 'linux') + outfile = get_outfile_path(build_dir) with open(outfile, 'w') as output: process = subprocess.Popen([linux_bin] + params, stdout=output, @@ -112,6 +114,12 @@ def get_kunitconfig_path(build_dir): kunitconfig_path = os.path.join(build_dir, KUNITCONFIG_PATH) return kunitconfig_path +def get_outfile_path(build_dir): + outfile_path = OUTFILE_PATH + if build_dir: + outfile_path = os.path.join(build_dir, OUTFILE_PATH) + return outfile_path + class LinuxSourceTree(object): """Represents a Linux kernel source tree with KUnit tests.""" @@ -192,8 +200,8 @@ class LinuxSourceTree(object): def run_kernel(self, args=[], build_dir='', timeout=None): args.extend(['mem=1G']) - outfile = 'test.log' - self._ops.linux_bin(args, timeout, build_dir, outfile) + self._ops.linux_bin(args, timeout, build_dir) + outfile = get_outfile_path(build_dir) subprocess.call(['stty', 'sane']) with open(outfile, 'r') as file: for line in file: From f7766424cf15fd6e03e8230fb17d5612c5b76dbe Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 27 Oct 2020 11:48:53 -0700 Subject: [PATCH 318/710] KUnit: Docs: fix a wording typo Fix a wording typo (keyboard glitch). Signed-off-by: Randy Dunlap Cc: David Gow Cc: linux-kselftest@vger.kernel.org Cc: kunit-dev@googlegroups.com Cc: Shuah Khan Cc: Shuah Khan Cc: Brendan Higgins Reviewed-by: David Gow Signed-off-by: Shuah Khan --- Documentation/dev-tools/kunit/faq.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/dev-tools/kunit/faq.rst b/Documentation/dev-tools/kunit/faq.rst index 1628862e7024..8d5029ad210a 100644 --- a/Documentation/dev-tools/kunit/faq.rst +++ b/Documentation/dev-tools/kunit/faq.rst @@ -90,7 +90,7 @@ things to try. re-run kunit_tool. 5. Try to run ``make ARCH=um defconfig`` before running ``kunit.py run``. This may help clean up any residual config items which could be causing problems. -6. Finally, try running KUnit outside UML. KUnit and KUnit tests can run be +6. Finally, try running KUnit outside UML. KUnit and KUnit tests can be built into any kernel, or can be built as a module and loaded at runtime. Doing so should allow you to determine if UML is causing the issue you're seeing. When tests are built-in, they will execute when the kernel boots, and From 1f4dde57125b3d91b900e82ac33a196312be5c8e Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 28 Oct 2020 10:42:18 -0700 Subject: [PATCH 319/710] KUnit: Docs: style: fix some Kconfig example issues Fix the Kconfig example to be closer to Kconfig coding style. Also add punctuation and a trailing slash ('/') to a sub-directory name -- this is how the text mostly appears in other Kconfig files. Signed-off-by: Randy Dunlap Cc: David Gow Cc: linux-kselftest@vger.kernel.org Cc: kunit-dev@googlegroups.com Cc: Shuah Khan Cc: Shuah Khan Cc: Brendan Higgins Reviewed-by: David Gow Signed-off-by: Shuah Khan --- Documentation/dev-tools/kunit/style.rst | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/Documentation/dev-tools/kunit/style.rst b/Documentation/dev-tools/kunit/style.rst index da1d6f0ed6bc..8dbcdc552606 100644 --- a/Documentation/dev-tools/kunit/style.rst +++ b/Documentation/dev-tools/kunit/style.rst @@ -175,17 +175,17 @@ An example Kconfig entry: .. code-block:: none - config FOO_KUNIT_TEST - tristate "KUnit test for foo" if !KUNIT_ALL_TESTS - depends on KUNIT - default KUNIT_ALL_TESTS - help - This builds unit tests for foo. + config FOO_KUNIT_TEST + tristate "KUnit test for foo" if !KUNIT_ALL_TESTS + depends on KUNIT + default KUNIT_ALL_TESTS + help + This builds unit tests for foo. - For more information on KUnit and unit tests in general, please refer - to the KUnit documentation in Documentation/dev-tools/kunit + For more information on KUnit and unit tests in general, please refer + to the KUnit documentation in Documentation/dev-tools/kunit/. - If unsure, say N + If unsure, say N. Test File and Module Names From 873ddeb881e055fb0c4e371cc3a006bfd9388f00 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 28 Oct 2020 10:43:19 -0700 Subject: [PATCH 320/710] KUnit: Docs: usage: wording fixes Fix minor grammar and punctutation glitches. Hyphenate "architecture-specific" instances. Signed-off-by: Randy Dunlap Cc: David Gow Cc: linux-kselftest@vger.kernel.org Cc: kunit-dev@googlegroups.com Cc: Shuah Khan Cc: Shuah Khan Cc: Brendan Higgins Reviewed-by: David Gow Signed-off-by: Shuah Khan --- Documentation/dev-tools/kunit/usage.rst | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Documentation/dev-tools/kunit/usage.rst b/Documentation/dev-tools/kunit/usage.rst index 62142a47488c..9c28c518e6a3 100644 --- a/Documentation/dev-tools/kunit/usage.rst +++ b/Documentation/dev-tools/kunit/usage.rst @@ -92,7 +92,7 @@ behavior of a function called ``add``; the first parameter is always of type the second parameter, in this case, is what the value is expected to be; the last value is what the value actually is. If ``add`` passes all of these expectations, the test case, ``add_test_basic`` will pass; if any one of these -expectations fail, the test case will fail. +expectations fails, the test case will fail. It is important to understand that a test case *fails* when any expectation is violated; however, the test will continue running, potentially trying other @@ -202,7 +202,7 @@ Example: kunit_test_suite(example_test_suite); In the above example the test suite, ``example_test_suite``, would run the test -cases ``example_test_foo``, ``example_test_bar``, and ``example_test_baz``, +cases ``example_test_foo``, ``example_test_bar``, and ``example_test_baz``; each would have ``example_test_init`` called immediately before it and would have ``example_test_exit`` called immediately after it. ``kunit_test_suite(example_test_suite)`` registers the test suite with the @@ -229,7 +229,7 @@ through some sort of indirection where a function is exposed as part of an API such that the definition of that function can be changed without affecting the rest of the code base. In the kernel this primarily comes from two constructs, classes, structs that contain function pointers that are provided by the -implementer, and architecture specific functions which have definitions selected +implementer, and architecture-specific functions which have definitions selected at compile time. Classes @@ -459,7 +459,7 @@ KUnit on non-UML architectures By default KUnit uses UML as a way to provide dependencies for code under test. Under most circumstances KUnit's usage of UML should be treated as an implementation detail of how KUnit works under the hood. Nevertheless, there -are instances where being able to run architecture specific code or test +are instances where being able to run architecture-specific code or test against real hardware is desirable. For these reasons KUnit supports running on other architectures. @@ -599,7 +599,7 @@ writing normal KUnit tests. One special caveat is that you have to reset hardware state in between test cases; if this is not possible, you may only be able to run one test case per invocation. -.. TODO(brendanhiggins@google.com): Add an actual example of an architecture +.. TODO(brendanhiggins@google.com): Add an actual example of an architecture- dependent KUnit test. KUnit debugfs representation From 390881448b1ff1e9d82896abbbda7cdb8e0be27c Mon Sep 17 00:00:00 2001 From: Daniel Latypov Date: Thu, 29 Oct 2020 15:09:29 -0700 Subject: [PATCH 321/710] kunit: tool: print out stderr from make (like build warnings) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently the tool redirects make stdout + stderr, and only shows them if the make command fails. This means build warnings aren't shown to the user. This change prints the contents of stderr even if make succeeds, under the assumption these are only build warnings or other messages the user likely wants to see. We drop stdout from the raised exception since we can no longer easily collate stdout and stderr and just showing the stderr seems fine. Example with a warning: [14:56:35] Building KUnit Kernel ... ../lib/kunit/kunit-test.c: In function ‘kunit_test_successful_try’: ../lib/kunit/kunit-test.c:19:6: warning: unused variable ‘unused’ [-Wunused-variable] 19 | int unused; | ^~~~~~ [14:56:40] Starting KUnit Kernel ... Note the stderr has a trailing \n, and since we use print, we add another, but it helps separate make and kunit.py output. Example with a build error: [15:02:45] Building KUnit Kernel ... ERROR:root:../lib/kunit/kunit-test.c: In function ‘kunit_test_successful_try’: ../lib/kunit/kunit-test.c:19:2: error: unknown type name ‘invalid_type’ 19 | invalid_type *test = data; | ^~~~~~~~~~~~ ... Signed-off-by: Daniel Latypov Reviewed-by: David Gow Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit_kernel.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py index b4768fa03ce0..2e3cc0fac726 100644 --- a/tools/testing/kunit/kunit_kernel.py +++ b/tools/testing/kunit/kunit_kernel.py @@ -84,11 +84,16 @@ class LinuxSourceTreeOperations(object): if build_dir: command += ['O=' + build_dir] try: - subprocess.check_output(command, stderr=subprocess.STDOUT) + proc = subprocess.Popen(command, + stderr=subprocess.PIPE, + stdout=subprocess.DEVNULL) except OSError as e: - raise BuildError('Could not call execute make: ' + str(e)) - except subprocess.CalledProcessError as e: - raise BuildError(e.output.decode()) + raise BuildError('Could not call make command: ' + str(e)) + _, stderr = proc.communicate() + if proc.returncode != 0: + raise BuildError(stderr.decode()) + if stderr: # likely only due to build warnings + print(stderr.decode()) def linux_bin(self, params, timeout, build_dir): """Runs the Linux UML binary. Must be named 'linux'.""" From 060352e141e4c71ce147a2737f6d30a97f2ec317 Mon Sep 17 00:00:00 2001 From: Daniel Latypov Date: Fri, 30 Oct 2020 15:38:53 -0700 Subject: [PATCH 322/710] kunit: tool: fix extra trailing \n in raw + parsed test output For simplcity, strip all trailing whitespace from parsed output. I imagine no one is printing out meaningful trailing whitespace via KUNIT_FAIL() or similar, and that if they are, they really shouldn't. `isolate_kunit_output()` yielded liens with trailing \n, which results in artifacty output like this: $ ./tools/testing/kunit/kunit.py run [16:16:46] [FAILED] example_simple_test [16:16:46] # example_simple_test: EXPECTATION FAILED at lib/kunit/kunit-example-test.c:29 [16:16:46] Expected 1 + 1 == 3, but [16:16:46] 1 + 1 == 2 [16:16:46] 3 == 3 [16:16:46] not ok 1 - example_simple_test [16:16:46] After this change: [16:16:46] # example_simple_test: EXPECTATION FAILED at lib/kunit/kunit-example-test.c:29 [16:16:46] Expected 1 + 1 == 3, but [16:16:46] 1 + 1 == 2 [16:16:46] 3 == 3 [16:16:46] not ok 1 - example_simple_test [16:16:46] We should *not* be expecting lines to end with \n in kunit_tool_test.py for this reason. Do the same for `raw_output()` as well which suffers from the same issue. This is a followup to [1], but rebased onto kunit-fixes to pick up the other raw_output() fix and fixes for kunit_tool_test.py. [1] https://lore.kernel.org/linux-kselftest/20201020233219.4146059-1-dlatypov@google.com/ Signed-off-by: Daniel Latypov Reviewed-by: David Gow Tested-by: David Gow Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit_parser.py | 3 ++- tools/testing/kunit/kunit_tool_test.py | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py index db0347baa428..bbfe1b4e4c1c 100644 --- a/tools/testing/kunit/kunit_parser.py +++ b/tools/testing/kunit/kunit_parser.py @@ -54,6 +54,7 @@ kunit_end_re = re.compile('(List of all partitions:|' def isolate_kunit_output(kernel_output): started = False for line in kernel_output: + line = line.rstrip() # line always has a trailing \n if kunit_start_re.search(line): prefix_len = len(line.split('TAP version')[0]) started = True @@ -65,7 +66,7 @@ def isolate_kunit_output(kernel_output): def raw_output(kernel_output): for line in kernel_output: - print(line) + print(line.rstrip()) DIVIDER = '=' * 60 diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py index 0b60855fb819..497ab51bc170 100755 --- a/tools/testing/kunit/kunit_tool_test.py +++ b/tools/testing/kunit/kunit_tool_test.py @@ -102,7 +102,7 @@ class KUnitParserTest(unittest.TestCase): 'test_data/test_output_isolated_correctly.log') file = open(log_path) result = kunit_parser.isolate_kunit_output(file.readlines()) - self.assertContains('TAP version 14\n', result) + self.assertContains('TAP version 14', result) self.assertContains(' # Subtest: example', result) self.assertContains(' 1..2', result) self.assertContains(' ok 1 - example_simple_test', result) @@ -115,7 +115,7 @@ class KUnitParserTest(unittest.TestCase): 'test_data/test_pound_sign.log') with open(log_path) as file: result = kunit_parser.isolate_kunit_output(file.readlines()) - self.assertContains('TAP version 14\n', result) + self.assertContains('TAP version 14', result) self.assertContains(' # Subtest: kunit-resource-test', result) self.assertContains(' 1..5', result) self.assertContains(' ok 1 - kunit_resource_test_init_resources', result) From 3084db0e0d5076cd48408274ab0911cd3ccdae88 Mon Sep 17 00:00:00 2001 From: Daniel Latypov Date: Mon, 2 Nov 2020 15:23:04 -0800 Subject: [PATCH 323/710] kunit: fix display of failed expectations for strings Currently the following expectation KUNIT_EXPECT_STREQ(test, "hi", "bye"); will produce: Expected "hi" == "bye", but "hi" == 1625079497 "bye" == 1625079500 After this patch: Expected "hi" == "bye", but "hi" == hi "bye" == bye KUNIT_INIT_BINARY_STR_ASSERT_STRUCT() was written but just mistakenly not actually used by KUNIT_EXPECT_STREQ() and friends. Signed-off-by: Daniel Latypov Reviewed-by: Brendan Higgins Tested-by: Brendan Higgins Signed-off-by: Shuah Khan --- include/kunit/test.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/kunit/test.h b/include/kunit/test.h index db1b0ae666c4..df60be7e22ca 100644 --- a/include/kunit/test.h +++ b/include/kunit/test.h @@ -1105,7 +1105,7 @@ do { \ KUNIT_ASSERTION(test, \ strcmp(__left, __right) op 0, \ kunit_binary_str_assert, \ - KUNIT_INIT_BINARY_ASSERT_STRUCT(test, \ + KUNIT_INIT_BINARY_STR_ASSERT_STRUCT(test, \ assert_type, \ #op, \ #left, \ From 9a5085b3fad5d5d6019a3d160cdd70357d35c8b1 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Mon, 19 Oct 2020 23:10:49 +0200 Subject: [PATCH 324/710] um: Call pgtable_pmd_page_dtor() in __pmd_free_tlb() Commit b2b29d6d0119 ("mm: account PMD tables like PTE tables") uncovered a bug in uml, we forgot to call the destructor. While we are here, give x a sane name. Reported-by: Anton Ivanov Co-developed-by: Matthew Wilcox (Oracle) Signed-off-by: Richard Weinberger Tested-by: Christopher Obbard --- arch/um/include/asm/pgalloc.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h index 5393e13e07e0..2bbf28cf3aa9 100644 --- a/arch/um/include/asm/pgalloc.h +++ b/arch/um/include/asm/pgalloc.h @@ -33,7 +33,13 @@ do { \ } while (0) #ifdef CONFIG_3_LEVEL_PGTABLES -#define __pmd_free_tlb(tlb,x, address) tlb_remove_page((tlb),virt_to_page(x)) + +#define __pmd_free_tlb(tlb, pmd, address) \ +do { \ + pgtable_pmd_page_dtor(virt_to_page(pmd)); \ + tlb_remove_page((tlb),virt_to_page(pmd)); \ +} while (0) \ + #endif #endif From 97adb13dc9ba08ecd4758bc59efc0205f5cbf377 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Sat, 7 Nov 2020 13:19:28 +0200 Subject: [PATCH 325/710] selftest: fix flower terse dump tests Iproute2 tc classifier terse dump has been accepted with modified syntax. Update the tests accordingly. Signed-off-by: Vlad Buslov Fixes: e7534fd42a99 ("selftests: implement flower classifier terse dump tests") Link: https://lore.kernel.org/r/20201107111928.453534-1-vlad@buslov.dev Signed-off-by: Jakub Kicinski --- .../testing/selftests/tc-testing/tc-tests/filters/tests.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json index bb543bf69d69..361235ad574b 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json @@ -100,7 +100,7 @@ ], "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 ingress flower dst_mac e4:11:22:11:4a:51 action drop", "expExitCode": "0", - "verifyCmd": "$TC filter show terse dev $DEV2 ingress", + "verifyCmd": "$TC -br filter show dev $DEV2 ingress", "matchPattern": "filter protocol ip pref 1 flower.*handle", "matchCount": "1", "teardown": [ @@ -119,7 +119,7 @@ ], "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 ingress flower dst_mac e4:11:22:11:4a:51 action drop", "expExitCode": "0", - "verifyCmd": "$TC filter show terse dev $DEV2 ingress", + "verifyCmd": "$TC -br filter show dev $DEV2 ingress", "matchPattern": " dst_mac e4:11:22:11:4a:51", "matchCount": "0", "teardown": [ From 3a7001788fed0311d6fb77ed0dabe7bed3567bc0 Mon Sep 17 00:00:00 2001 From: Slawomir Laba Date: Wed, 14 Oct 2020 08:54:09 +0000 Subject: [PATCH 326/710] i40e: Fix MAC address setting for a VF via Host/VM Fix MAC setting flow for the PF driver. Update the unicast VF's MAC address in VF structure if it is a new setting in i40e_vc_add_mac_addr_msg. When unicast MAC address gets deleted, record that and set the new unicast MAC address that is already waiting in the filter list. This logic is based on the order of messages arriving to the PF driver. Without this change the MAC address setting was interpreted incorrectly in the following use cases: 1) Print incorrect VF MAC or zero MAC ip link show dev $pf 2) Don't preserve MAC between driver reload rmmod iavf; modprobe iavf 3) Update VF MAC when macvlan was set ip link add link $vf address $mac $vf.1 type macvlan 4) Failed to update mac address when VF was trusted ip link set dev $vf address $mac This includes all other configurations including above commands. Fixes: f657a6e1313b ("i40e: Fix VF driver MAC address configuration") Signed-off-by: Slawomir Laba Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- .../ethernet/intel/i40e/i40e_virtchnl_pf.c | 26 +++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index c96e2f2d4cba..4919d22d7b6b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -2713,6 +2713,10 @@ static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg) spin_unlock_bh(&vsi->mac_filter_hash_lock); goto error_param; } + if (is_valid_ether_addr(al->list[i].addr) && + is_zero_ether_addr(vf->default_lan_addr.addr)) + ether_addr_copy(vf->default_lan_addr.addr, + al->list[i].addr); } } spin_unlock_bh(&vsi->mac_filter_hash_lock); @@ -2740,6 +2744,7 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg) { struct virtchnl_ether_addr_list *al = (struct virtchnl_ether_addr_list *)msg; + bool was_unimac_deleted = false; struct i40e_pf *pf = vf->pf; struct i40e_vsi *vsi = NULL; i40e_status ret = 0; @@ -2759,6 +2764,8 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg) ret = I40E_ERR_INVALID_MAC_ADDR; goto error_param; } + if (ether_addr_equal(al->list[i].addr, vf->default_lan_addr.addr)) + was_unimac_deleted = true; } vsi = pf->vsi[vf->lan_vsi_idx]; @@ -2779,10 +2786,25 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg) dev_err(&pf->pdev->dev, "Unable to program VF %d MAC filters, error %d\n", vf->vf_id, ret); + if (vf->trusted && was_unimac_deleted) { + struct i40e_mac_filter *f; + struct hlist_node *h; + u8 *macaddr = NULL; + int bkt; + + /* set last unicast mac address as default */ + spin_lock_bh(&vsi->mac_filter_hash_lock); + hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) { + if (is_valid_ether_addr(f->macaddr)) + macaddr = f->macaddr; + } + if (macaddr) + ether_addr_copy(vf->default_lan_addr.addr, macaddr); + spin_unlock_bh(&vsi->mac_filter_hash_lock); + } error_param: /* send the response to the VF */ - return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_DEL_ETH_ADDR, - ret); + return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_DEL_ETH_ADDR, ret); } /** From 1773482fd8cecd5b060d409853f8145be3064a41 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 16 Sep 2020 17:32:28 +0300 Subject: [PATCH 327/710] i40e, xsk: uninitialized variable in i40e_clean_rx_irq_zc() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The "failure" variable is used without being initialized. It should be set to false. Fixes: 8cbf74149903 ("i40e, xsk: move buffer allocation out of the Rx processing loop") Signed-off-by: Dan Carpenter Acked-by: Björn Töpel Tested-by: George Kuruvinakunnel Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_xsk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index 6acede0acdca..567fd67e900e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -281,8 +281,8 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget) unsigned int total_rx_bytes = 0, total_rx_packets = 0; u16 cleaned_count = I40E_DESC_UNUSED(rx_ring); unsigned int xdp_res, xdp_xmit = 0; + bool failure = false; struct sk_buff *skb; - bool failure; while (likely(total_rx_packets < (unsigned int)budget)) { union i40e_rx_desc *rx_desc; From 6b7ed22ae4c96a415001f0c3116ebee15bb8491a Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Fri, 25 Sep 2020 11:35:37 -0700 Subject: [PATCH 328/710] igc: Fix returning wrong statistics 'igc_update_stats()' was not updating 'netdev->stats', so the returned statistics, for example, requested by: $ ip -s link show dev enp3s0 were not being updated and were always zero. Fix by returning a set of statistics that are actually being updated (adapter->stats64). Fixes: c9a11c23ceb6 ("igc: Add netdev") Signed-off-by: Vinicius Costa Gomes Tested-by: Aaron Brown Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_main.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 9112dff075cf..b673ac1199bb 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -3891,21 +3891,23 @@ static int igc_change_mtu(struct net_device *netdev, int new_mtu) } /** - * igc_get_stats - Get System Network Statistics + * igc_get_stats64 - Get System Network Statistics * @netdev: network interface device structure + * @stats: rtnl_link_stats64 pointer * * Returns the address of the device statistics structure. * The statistics are updated here and also from the timer callback. */ -static struct net_device_stats *igc_get_stats(struct net_device *netdev) +static void igc_get_stats64(struct net_device *netdev, + struct rtnl_link_stats64 *stats) { struct igc_adapter *adapter = netdev_priv(netdev); + spin_lock(&adapter->stats64_lock); if (!test_bit(__IGC_RESETTING, &adapter->state)) igc_update_stats(adapter); - - /* only return the current stats */ - return &netdev->stats; + memcpy(stats, &adapter->stats64, sizeof(*stats)); + spin_unlock(&adapter->stats64_lock); } static netdev_features_t igc_fix_features(struct net_device *netdev, @@ -4855,7 +4857,7 @@ static const struct net_device_ops igc_netdev_ops = { .ndo_set_rx_mode = igc_set_rx_mode, .ndo_set_mac_address = igc_set_mac, .ndo_change_mtu = igc_change_mtu, - .ndo_get_stats = igc_get_stats, + .ndo_get_stats64 = igc_get_stats64, .ndo_fix_features = igc_fix_features, .ndo_set_features = igc_set_features, .ndo_features_check = igc_features_check, From 5fb7f75bc138c868df2df40d386c7244122cca77 Mon Sep 17 00:00:00 2001 From: Tony Nguyen Date: Mon, 9 Nov 2020 16:07:35 -0800 Subject: [PATCH 329/710] MAINTAINERS: Update repositories for Intel Ethernet Drivers Update Intel Ethernet Drivers repositories to new locations. Signed-off-by: Tony Nguyen --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index cd123d0a6a2d..9e826b55fcd9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8829,8 +8829,8 @@ S: Supported W: http://www.intel.com/support/feedback.htm W: http://e1000.sourceforge.net/ Q: http://patchwork.ozlabs.org/project/intel-wired-lan/list/ -T: git git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/net-queue.git -T: git git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/next-queue.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/net-queue.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/next-queue.git F: Documentation/networking/device_drivers/ethernet/intel/ F: drivers/net/ethernet/intel/ F: drivers/net/ethernet/intel/*/ From a6c40b8032b845f132abfcbcbed6bddebbcc3b4a Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sun, 8 Nov 2020 12:35:35 +0100 Subject: [PATCH 330/710] drm/mcde: Fix unbalanced regulator Since we now turn off the EPOD regulator to reset the hardware, we need to balance the regulators after that point. If registering the master fails we only need to disable one regulator. Fix this by open-coding this leg of the error path. Fixes: c4842d4d0f74 ("drm/mcde: Fix display pipeline restart") Signed-off-by: Linus Walleij Reviewed-by: Sam Ravnborg Cc: Stephan Gerhold Link: https://patchwork.freedesktop.org/patch/msgid/20201108113535.1819952-1-linus.walleij@linaro.org --- drivers/gpu/drm/mcde/mcde_drv.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/mcde/mcde_drv.c b/drivers/gpu/drm/mcde/mcde_drv.c index c592957ed07f..92f8bd907193 100644 --- a/drivers/gpu/drm/mcde/mcde_drv.c +++ b/drivers/gpu/drm/mcde/mcde_drv.c @@ -413,7 +413,13 @@ static int mcde_probe(struct platform_device *pdev) match); if (ret) { dev_err(dev, "failed to add component master\n"); - goto clk_disable; + /* + * The EPOD regulator is already disabled at this point so some + * special errorpath code is needed + */ + clk_disable_unprepare(mcde->mcde_clk); + regulator_disable(mcde->vana); + return ret; } return 0; From 866358ec331f8faa394995fb4b511af1db0247c8 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Sun, 8 Nov 2020 09:08:26 -0500 Subject: [PATCH 331/710] netlabel: fix our progress tracking in netlbl_unlabel_staticlist() The current NetLabel code doesn't correctly keep track of the netlink dump state in some cases, in particular when multiple interfaces with large configurations are loaded. The problem manifests itself by not reporting the full configuration to userspace, even though it is loaded and active in the kernel. This patch fixes this by ensuring that the dump state is properly reset when necessary inside the netlbl_unlabel_staticlist() function. Fixes: 8cc44579d1bd ("NetLabel: Introduce static network labels for unlabeled connections") Signed-off-by: Paul Moore Link: https://lore.kernel.org/r/160484450633.3752.16512718263560813473.stgit@sifl Signed-off-by: Jakub Kicinski --- net/netlabel/netlabel_unlabeled.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index 2e8e3f7b2111..fc55c9116da0 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -1166,12 +1166,13 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb, struct netlbl_unlhsh_walk_arg cb_arg; u32 skip_bkt = cb->args[0]; u32 skip_chain = cb->args[1]; - u32 iter_bkt; - u32 iter_chain = 0, iter_addr4 = 0, iter_addr6 = 0; + u32 skip_addr4 = cb->args[2]; + u32 iter_bkt, iter_chain, iter_addr4 = 0, iter_addr6 = 0; struct netlbl_unlhsh_iface *iface; struct list_head *iter_list; struct netlbl_af4list *addr4; #if IS_ENABLED(CONFIG_IPV6) + u32 skip_addr6 = cb->args[3]; struct netlbl_af6list *addr6; #endif @@ -1182,7 +1183,7 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb, rcu_read_lock(); for (iter_bkt = skip_bkt; iter_bkt < rcu_dereference(netlbl_unlhsh)->size; - iter_bkt++, iter_chain = 0, iter_addr4 = 0, iter_addr6 = 0) { + iter_bkt++) { iter_list = &rcu_dereference(netlbl_unlhsh)->tbl[iter_bkt]; list_for_each_entry_rcu(iface, iter_list, list) { if (!iface->valid || @@ -1190,7 +1191,7 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb, continue; netlbl_af4list_foreach_rcu(addr4, &iface->addr4_list) { - if (iter_addr4++ < cb->args[2]) + if (iter_addr4++ < skip_addr4) continue; if (netlbl_unlabel_staticlist_gen( NLBL_UNLABEL_C_STATICLIST, @@ -1203,10 +1204,12 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb, goto unlabel_staticlist_return; } } + iter_addr4 = 0; + skip_addr4 = 0; #if IS_ENABLED(CONFIG_IPV6) netlbl_af6list_foreach_rcu(addr6, &iface->addr6_list) { - if (iter_addr6++ < cb->args[3]) + if (iter_addr6++ < skip_addr6) continue; if (netlbl_unlabel_staticlist_gen( NLBL_UNLABEL_C_STATICLIST, @@ -1219,8 +1222,12 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb, goto unlabel_staticlist_return; } } + iter_addr6 = 0; + skip_addr6 = 0; #endif /* IPv6 */ } + iter_chain = 0; + skip_chain = 0; } unlabel_staticlist_return: From 902a66e08ceaadb9a7a1ab3a4f3af611cd1d8cba Mon Sep 17 00:00:00 2001 From: Sven Van Asbroeck Date: Sun, 8 Nov 2020 12:12:24 -0500 Subject: [PATCH 332/710] lan743x: correctly handle chips with internal PHY Commit 6f197fb63850 ("lan743x: Added fixed link and RGMII support") assumes that chips with an internal PHY will never have a devicetree entry. This is incorrect: even for these chips, a devicetree entry can be useful e.g. to pass the mac address from bootloader to chip: &pcie { status = "okay"; host@0 { reg = <0 0 0 0 0>; #address-cells = <3>; #size-cells = <2>; lan7430: ethernet@0 { /* LAN7430 with internal PHY */ compatible = "microchip,lan743x"; status = "okay"; reg = <0 0 0 0 0>; /* filled in by bootloader */ local-mac-address = [00 00 00 00 00 00]; }; }; }; If a devicetree entry is present, the driver will not attach the chip to its internal phy, and the chip will be non-operational. Fix by tweaking the phy connection algorithm: - first try to connect to a phy specified in the devicetree (could be 'real' phy, or just a 'fixed-link') - if that doesn't succeed, try to connect to an internal phy, even if the chip has a devnode Tested on a LAN7430 with internal PHY. I cannot test a device using fixed-link, as I do not have access to one. Fixes: 6f197fb63850 ("lan743x: Added fixed link and RGMII support") Tested-by: Sven Van Asbroeck # lan7430 Reviewed-by: Andrew Lunn Signed-off-by: Sven Van Asbroeck Link: https://lore.kernel.org/r/20201108171224.23829-1-TheSven73@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/lan743x_main.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index a1938842f828..bd77877cf1cc 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -1026,9 +1026,9 @@ static int lan743x_phy_open(struct lan743x_adapter *adapter) netdev = adapter->netdev; phynode = of_node_get(adapter->pdev->dev.of_node); - adapter->phy_mode = PHY_INTERFACE_MODE_GMII; if (phynode) { + /* try devicetree phy, or fixed link */ of_get_phy_mode(phynode, &adapter->phy_mode); if (of_phy_is_fixed_link(phynode)) { @@ -1044,13 +1044,15 @@ static int lan743x_phy_open(struct lan743x_adapter *adapter) lan743x_phy_link_status_change, 0, adapter->phy_mode); of_node_put(phynode); - if (!phydev) - goto return_error; - } else { + } + + if (!phydev) { + /* try internal phy */ phydev = phy_find_first(adapter->mdiobus); if (!phydev) goto return_error; + adapter->phy_mode = PHY_INTERFACE_MODE_GMII; ret = phy_connect_direct(netdev, phydev, lan743x_phy_link_status_change, adapter->phy_mode); From f3037c5a31b58a73b32a36e938ad0560085acadd Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sun, 8 Nov 2020 22:44:02 +0100 Subject: [PATCH 333/710] net: phy: realtek: support paged operations on RTL8201CP The RTL8401-internal PHY identifies as RTL8201CP, and the init sequence in r8169, copied from vendor driver r8168, uses paged operations. Therefore set the same paged operation callbacks as for the other Realtek PHY's. Fixes: cdafdc29ef75 ("r8169: sync support for RTL8401 with vendor driver") Signed-off-by: Heiner Kallweit Link: https://lore.kernel.org/r/69882f7a-ca2f-e0c7-ae83-c9b6937282cd@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/realtek.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c index fb1db713b7fb..575580d3ffe0 100644 --- a/drivers/net/phy/realtek.c +++ b/drivers/net/phy/realtek.c @@ -551,6 +551,8 @@ static struct phy_driver realtek_drvs[] = { { PHY_ID_MATCH_EXACT(0x00008201), .name = "RTL8201CP Ethernet", + .read_page = rtl821x_read_page, + .write_page = rtl821x_write_page, }, { PHY_ID_MATCH_EXACT(0x001cc816), .name = "RTL8201F Fast Ethernet", From ea8439899c0b15a176664df62aff928010fad276 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 8 Nov 2020 16:32:43 -0800 Subject: [PATCH 334/710] xfs: fix flags argument to rmap lookup when converting shared file rmaps Pass the same oldext argument (which contains the existing rmapping's unwritten state) to xfs_rmap_lookup_le_range at the start of xfs_rmap_convert_shared. At this point in the code, flags is zero, which means that we perform lookups using the wrong key. Fixes: 3f165b334e51 ("xfs: convert unwritten status of reverse mappings for shared files") Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_rmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c index 340c83f76c80..2668ebe02865 100644 --- a/fs/xfs/libxfs/xfs_rmap.c +++ b/fs/xfs/libxfs/xfs_rmap.c @@ -1514,7 +1514,7 @@ xfs_rmap_convert_shared( * record for our insertion point. This will also give us the record for * start block contiguity tests. */ - error = xfs_rmap_lookup_le_range(cur, bno, owner, offset, flags, + error = xfs_rmap_lookup_le_range(cur, bno, owner, offset, oldext, &PREV, &i); if (error) goto done; From 5dda3897fd90783358c4c6115ef86047d8c8f503 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 8 Nov 2020 16:32:43 -0800 Subject: [PATCH 335/710] xfs: set the unwritten bit in rmap lookup flags in xchk_bmap_get_rmapextents When the bmbt scrubber is looking up rmap extents, we need to set the extent flags from the bmbt record fully. This will matter once we fix the rmap btree comparison functions to check those flags correctly. Fixes: d852657ccfc0 ("xfs: cross-reference reverse-mapping btree") Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/scrub/bmap.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c index 955302e7cdde..412e2ec55e38 100644 --- a/fs/xfs/scrub/bmap.c +++ b/fs/xfs/scrub/bmap.c @@ -113,6 +113,8 @@ xchk_bmap_get_rmap( if (info->whichfork == XFS_ATTR_FORK) rflags |= XFS_RMAP_ATTR_FORK; + if (irec->br_state == XFS_EXT_UNWRITTEN) + rflags |= XFS_RMAP_UNWRITTEN; /* * CoW staging extents are owned (on disk) by the refcountbt, so From 6ff646b2ceb0eec916101877f38da0b73e3a5b7f Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 8 Nov 2020 16:32:44 -0800 Subject: [PATCH 336/710] xfs: fix rmap key and record comparison functions Keys for extent interval records in the reverse mapping btree are supposed to be computed as follows: (physical block, owner, fork, is_btree, is_unwritten, offset) This provides users the ability to look up a reverse mapping from a bmbt record -- start with the physical block; then if there are multiple records for the same block, move on to the owner; then the inode fork type; and so on to the file offset. However, the key comparison functions incorrectly remove the fork/btree/unwritten information that's encoded in the on-disk offset. This means that lookup comparisons are only done with: (physical block, owner, offset) This means that queries can return incorrect results. On consistent filesystems this hasn't been an issue because blocks are never shared between forks or with bmbt blocks; and are never unwritten. However, this bug means that online repair cannot always detect corruption in the key information in internal rmapbt nodes. Found by fuzzing keys[1].attrfork = ones on xfs/371. Fixes: 4b8ed67794fe ("xfs: add rmap btree operations") Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_rmap_btree.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c index beb81c84a937..577a66381327 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.c +++ b/fs/xfs/libxfs/xfs_rmap_btree.c @@ -243,8 +243,8 @@ xfs_rmapbt_key_diff( else if (y > x) return -1; - x = XFS_RMAP_OFF(be64_to_cpu(kp->rm_offset)); - y = rec->rm_offset; + x = be64_to_cpu(kp->rm_offset); + y = xfs_rmap_irec_offset_pack(rec); if (x > y) return 1; else if (y > x) @@ -275,8 +275,8 @@ xfs_rmapbt_diff_two_keys( else if (y > x) return -1; - x = XFS_RMAP_OFF(be64_to_cpu(kp1->rm_offset)); - y = XFS_RMAP_OFF(be64_to_cpu(kp2->rm_offset)); + x = be64_to_cpu(kp1->rm_offset); + y = be64_to_cpu(kp2->rm_offset); if (x > y) return 1; else if (y > x) @@ -390,8 +390,8 @@ xfs_rmapbt_keys_inorder( return 1; else if (a > b) return 0; - a = XFS_RMAP_OFF(be64_to_cpu(k1->rmap.rm_offset)); - b = XFS_RMAP_OFF(be64_to_cpu(k2->rmap.rm_offset)); + a = be64_to_cpu(k1->rmap.rm_offset); + b = be64_to_cpu(k2->rmap.rm_offset); if (a <= b) return 1; return 0; @@ -420,8 +420,8 @@ xfs_rmapbt_recs_inorder( return 1; else if (a > b) return 0; - a = XFS_RMAP_OFF(be64_to_cpu(r1->rmap.rm_offset)); - b = XFS_RMAP_OFF(be64_to_cpu(r2->rmap.rm_offset)); + a = be64_to_cpu(r1->rmap.rm_offset); + b = be64_to_cpu(r2->rmap.rm_offset); if (a <= b) return 1; return 0; From 54e9b09e153842ab5adb8a460b891e11b39e9c3d Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 8 Nov 2020 16:32:42 -0800 Subject: [PATCH 337/710] xfs: fix brainos in the refcount scrubber's rmap fragment processor Fix some serious WTF in the reference count scrubber's rmap fragment processing. The code comment says that this loop is supposed to move all fragment records starting at or before bno onto the worklist, but there's no obvious reason why nr (the number of items added) should increment starting from 1, and breaking the loop when we've added the target number seems dubious since we could have more rmap fragments that should have been added to the worklist. This seems to manifest in xfs/411 when adding one to the refcount field. Fixes: dbde19da9637 ("xfs: cross-reference the rmapbt data with the refcountbt") Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/scrub/refcount.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c index beaeb6fa3119..dd672e6bbc75 100644 --- a/fs/xfs/scrub/refcount.c +++ b/fs/xfs/scrub/refcount.c @@ -170,7 +170,6 @@ xchk_refcountbt_process_rmap_fragments( */ INIT_LIST_HEAD(&worklist); rbno = NULLAGBLOCK; - nr = 1; /* Make sure the fragments actually /are/ in agbno order. */ bno = 0; @@ -184,15 +183,14 @@ xchk_refcountbt_process_rmap_fragments( * Find all the rmaps that start at or before the refc extent, * and put them on the worklist. */ + nr = 0; list_for_each_entry_safe(frag, n, &refchk->fragments, list) { - if (frag->rm.rm_startblock > refchk->bno) - goto done; + if (frag->rm.rm_startblock > refchk->bno || nr > target_nr) + break; bno = frag->rm.rm_startblock + frag->rm.rm_blockcount; if (bno < rbno) rbno = bno; list_move_tail(&frag->list, &worklist); - if (nr == target_nr) - break; nr++; } From 22843291efc986ce7722610073fcf85a39b4cb13 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 10 Nov 2020 16:49:29 -0800 Subject: [PATCH 338/710] vfs: remove lockdep bogosity in __sb_start_write __sb_start_write has some weird looking lockdep code that claims to exist to handle nested freeze locking requests from xfs. The code as written seems broken -- if we think we hold a read lock on any of the higher freeze levels (e.g. we hold SB_FREEZE_WRITE and are trying to lock SB_FREEZE_PAGEFAULT), it converts a blocking lock attempt into a trylock. However, it's not correct to downgrade a blocking lock attempt to a trylock unless the downgrading code or the callers are prepared to deal with that situation. Neither __sb_start_write nor its callers handle this at all. For example: sb_start_pagefault ignores the return value completely, with the result that if xfs_filemap_fault loses a race with a different thread trying to fsfreeze, it will proceed without pagefault freeze protection (thereby breaking locking rules) and then unlocks the pagefault freeze lock that it doesn't own on its way out (thereby corrupting the lock state), which leads to a system hang shortly afterwards. Normally, this won't happen because our ownership of a read lock on a higher freeze protection level blocks fsfreeze from grabbing a write lock on that higher level. *However*, if lockdep is offline, lock_is_held_type unconditionally returns 1, which means that percpu_rwsem_is_held returns 1, which means that __sb_start_write unconditionally converts blocking freeze lock attempts into trylocks, even when we *don't* hold anything that would block a fsfreeze. Apparently this all held together until 5.10-rc1, when bugs in lockdep caused lockdep to shut itself off early in an fstests run, and once fstests gets to the "race writes with freezer" tests, kaboom. This might explain the long trail of vanishingly infrequent livelocks in fstests after lockdep goes offline that I've never been able to diagnose. We could fix it by spinning on the trylock if wait==true, but AFAICT the locking works fine if lockdep is not built at all (and I didn't see any complaints running fstests overnight), so remove this snippet entirely. NOTE: Commit f4b554af9931 in 2015 created the current weird logic (which used to exist in a different form in commit 5accdf82ba25c from 2012) in __sb_start_write. XFS solved this whole problem in the late 2.6 era by creating a variant of transactions (XFS_TRANS_NO_WRITECOUNT) that don't grab intwrite freeze protection, thus making lockdep's solution unnecessary. The commit claims that Dave Chinner explained that the trylock hack + comment could be removed, but nobody ever did. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara --- fs/super.c | 33 ++++----------------------------- 1 file changed, 4 insertions(+), 29 deletions(-) diff --git a/fs/super.c b/fs/super.c index a51c2083cd6b..e1fd667454d4 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1647,36 +1647,11 @@ EXPORT_SYMBOL(__sb_end_write); */ int __sb_start_write(struct super_block *sb, int level, bool wait) { - bool force_trylock = false; - int ret = 1; + if (!wait) + return percpu_down_read_trylock(sb->s_writers.rw_sem + level-1); -#ifdef CONFIG_LOCKDEP - /* - * We want lockdep to tell us about possible deadlocks with freezing - * but it's it bit tricky to properly instrument it. Getting a freeze - * protection works as getting a read lock but there are subtle - * problems. XFS for example gets freeze protection on internal level - * twice in some cases, which is OK only because we already hold a - * freeze protection also on higher level. Due to these cases we have - * to use wait == F (trylock mode) which must not fail. - */ - if (wait) { - int i; - - for (i = 0; i < level - 1; i++) - if (percpu_rwsem_is_held(sb->s_writers.rw_sem + i)) { - force_trylock = true; - break; - } - } -#endif - if (wait && !force_trylock) - percpu_down_read(sb->s_writers.rw_sem + level-1); - else - ret = percpu_down_read_trylock(sb->s_writers.rw_sem + level-1); - - WARN_ON(force_trylock && !ret); - return ret; + percpu_down_read(sb->s_writers.rw_sem + level-1); + return 1; } EXPORT_SYMBOL(__sb_start_write); From 8a3c84b649b033024d2349f96234b26cbd6083a6 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 10 Nov 2020 16:50:21 -0800 Subject: [PATCH 339/710] vfs: separate __sb_start_write into blocking and non-blocking helpers Break this function into two helpers so that it's obvious that the trylock versions return a value that must be checked, and the blocking versions don't require that. While we're at it, clean up the return type mismatch. Signed-off-by: Darrick J. Wong Reviewed-by: Jan Kara Reviewed-by: Christoph Hellwig --- fs/aio.c | 2 +- fs/io_uring.c | 3 +-- fs/super.c | 18 ++++++++++++------ include/linux/fs.h | 21 +++++++++++---------- 4 files changed, 25 insertions(+), 19 deletions(-) diff --git a/fs/aio.c b/fs/aio.c index c45c20d87538..6a21d8919409 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1572,7 +1572,7 @@ static int aio_write(struct kiocb *req, const struct iocb *iocb, * we return to userspace. */ if (S_ISREG(file_inode(file)->i_mode)) { - __sb_start_write(file_inode(file)->i_sb, SB_FREEZE_WRITE, true); + sb_start_write(file_inode(file)->i_sb); __sb_writers_release(file_inode(file)->i_sb, SB_FREEZE_WRITE); } req->ki_flags |= IOCB_WRITE; diff --git a/fs/io_uring.c b/fs/io_uring.c index b42dfa0243bf..4cbaddfe3d80 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -3532,8 +3532,7 @@ static int io_write(struct io_kiocb *req, bool force_nonblock, * we return to userspace. */ if (req->flags & REQ_F_ISREG) { - __sb_start_write(file_inode(req->file)->i_sb, - SB_FREEZE_WRITE, true); + sb_start_write(file_inode(req->file)->i_sb); __sb_writers_release(file_inode(req->file)->i_sb, SB_FREEZE_WRITE); } diff --git a/fs/super.c b/fs/super.c index e1fd667454d4..59aa59279133 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1645,16 +1645,22 @@ EXPORT_SYMBOL(__sb_end_write); * This is an internal function, please use sb_start_{write,pagefault,intwrite} * instead. */ -int __sb_start_write(struct super_block *sb, int level, bool wait) +void __sb_start_write(struct super_block *sb, int level) { - if (!wait) - return percpu_down_read_trylock(sb->s_writers.rw_sem + level-1); - - percpu_down_read(sb->s_writers.rw_sem + level-1); - return 1; + percpu_down_read(sb->s_writers.rw_sem + level - 1); } EXPORT_SYMBOL(__sb_start_write); +/* + * This is an internal function, please use sb_start_{write,pagefault,intwrite} + * instead. + */ +bool __sb_start_write_trylock(struct super_block *sb, int level) +{ + return percpu_down_read_trylock(sb->s_writers.rw_sem + level - 1); +} +EXPORT_SYMBOL_GPL(__sb_start_write_trylock); + /** * sb_wait_write - wait until all writers to given file system finish * @sb: the super for which we wait diff --git a/include/linux/fs.h b/include/linux/fs.h index 0bd126418bb6..305989afd49c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1581,7 +1581,8 @@ extern struct timespec64 current_time(struct inode *inode); */ void __sb_end_write(struct super_block *sb, int level); -int __sb_start_write(struct super_block *sb, int level, bool wait); +void __sb_start_write(struct super_block *sb, int level); +bool __sb_start_write_trylock(struct super_block *sb, int level); #define __sb_writers_acquired(sb, lev) \ percpu_rwsem_acquire(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_) @@ -1645,12 +1646,12 @@ static inline void sb_end_intwrite(struct super_block *sb) */ static inline void sb_start_write(struct super_block *sb) { - __sb_start_write(sb, SB_FREEZE_WRITE, true); + __sb_start_write(sb, SB_FREEZE_WRITE); } -static inline int sb_start_write_trylock(struct super_block *sb) +static inline bool sb_start_write_trylock(struct super_block *sb) { - return __sb_start_write(sb, SB_FREEZE_WRITE, false); + return __sb_start_write_trylock(sb, SB_FREEZE_WRITE); } /** @@ -1674,7 +1675,7 @@ static inline int sb_start_write_trylock(struct super_block *sb) */ static inline void sb_start_pagefault(struct super_block *sb) { - __sb_start_write(sb, SB_FREEZE_PAGEFAULT, true); + __sb_start_write(sb, SB_FREEZE_PAGEFAULT); } /* @@ -1692,12 +1693,12 @@ static inline void sb_start_pagefault(struct super_block *sb) */ static inline void sb_start_intwrite(struct super_block *sb) { - __sb_start_write(sb, SB_FREEZE_FS, true); + __sb_start_write(sb, SB_FREEZE_FS); } -static inline int sb_start_intwrite_trylock(struct super_block *sb) +static inline bool sb_start_intwrite_trylock(struct super_block *sb) { - return __sb_start_write(sb, SB_FREEZE_FS, false); + return __sb_start_write_trylock(sb, SB_FREEZE_FS); } @@ -2756,14 +2757,14 @@ static inline void file_start_write(struct file *file) { if (!S_ISREG(file_inode(file)->i_mode)) return; - __sb_start_write(file_inode(file)->i_sb, SB_FREEZE_WRITE, true); + sb_start_write(file_inode(file)->i_sb); } static inline bool file_start_write_trylock(struct file *file) { if (!S_ISREG(file_inode(file)->i_mode)) return true; - return __sb_start_write(file_inode(file)->i_sb, SB_FREEZE_WRITE, false); + return sb_start_write_trylock(file_inode(file)->i_sb); } static inline void file_end_write(struct file *file) From 9b8523423b23ee3dfd88e32f5b7207be56a4e782 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 10 Nov 2020 16:50:21 -0800 Subject: [PATCH 340/710] vfs: move __sb_{start,end}_write* to fs.h Now that we've straightened out the callers, move these three functions to fs.h since they're fairly trivial. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara --- fs/super.c | 30 ------------------------------ include/linux/fs.h | 21 ++++++++++++++++++--- 2 files changed, 18 insertions(+), 33 deletions(-) diff --git a/fs/super.c b/fs/super.c index 59aa59279133..98bb0629ee10 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1631,36 +1631,6 @@ int super_setup_bdi(struct super_block *sb) } EXPORT_SYMBOL(super_setup_bdi); -/* - * This is an internal function, please use sb_end_{write,pagefault,intwrite} - * instead. - */ -void __sb_end_write(struct super_block *sb, int level) -{ - percpu_up_read(sb->s_writers.rw_sem + level-1); -} -EXPORT_SYMBOL(__sb_end_write); - -/* - * This is an internal function, please use sb_start_{write,pagefault,intwrite} - * instead. - */ -void __sb_start_write(struct super_block *sb, int level) -{ - percpu_down_read(sb->s_writers.rw_sem + level - 1); -} -EXPORT_SYMBOL(__sb_start_write); - -/* - * This is an internal function, please use sb_start_{write,pagefault,intwrite} - * instead. - */ -bool __sb_start_write_trylock(struct super_block *sb, int level) -{ - return percpu_down_read_trylock(sb->s_writers.rw_sem + level - 1); -} -EXPORT_SYMBOL_GPL(__sb_start_write_trylock); - /** * sb_wait_write - wait until all writers to given file system finish * @sb: the super for which we wait diff --git a/include/linux/fs.h b/include/linux/fs.h index 305989afd49c..6dabd019cab0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1580,9 +1580,24 @@ extern struct timespec64 current_time(struct inode *inode); * Snapshotting support. */ -void __sb_end_write(struct super_block *sb, int level); -void __sb_start_write(struct super_block *sb, int level); -bool __sb_start_write_trylock(struct super_block *sb, int level); +/* + * These are internal functions, please use sb_start_{write,pagefault,intwrite} + * instead. + */ +static inline void __sb_end_write(struct super_block *sb, int level) +{ + percpu_up_read(sb->s_writers.rw_sem + level-1); +} + +static inline void __sb_start_write(struct super_block *sb, int level) +{ + percpu_down_read(sb->s_writers.rw_sem + level - 1); +} + +static inline bool __sb_start_write_trylock(struct super_block *sb, int level) +{ + return percpu_down_read_trylock(sb->s_writers.rw_sem + level - 1); +} #define __sb_writers_acquired(sb, lev) \ percpu_rwsem_acquire(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_) From c583bcb8f5edd48c1798798e341f78afb9bf4f6f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 24 Sep 2020 15:11:55 -0700 Subject: [PATCH 341/710] rcu: Don't invoke try_invoke_on_locked_down_task() with irqs disabled The try_invoke_on_locked_down_task() function requires that interrupts be enabled, but it is called with interrupts disabled from rcu_print_task_stall(), resulting in an "IRQs not enabled as expected" diagnostic. This commit therefore updates rcu_print_task_stall() to accumulate a list of the first few tasks while holding the current leaf rcu_node structure's ->lock, then releases that lock and only then uses try_invoke_on_locked_down_task() to attempt to obtain per-task detailed information. Of course, as soon as ->lock is released, the task might exit, so the get_task_struct() function is used to prevent the task structure from going away in the meantime. Link: https://lore.kernel.org/lkml/000000000000903d5805ab908fc4@google.com/ Fixes: 5bef8da66a9c ("rcu: Add per-task state to RCU CPU stall warnings") Reported-by: syzbot+cb3b69ae80afd6535b0e@syzkaller.appspotmail.com Reported-by: syzbot+f04854e1c5c9e913cc27@syzkaller.appspotmail.com Tested-by: Sebastian Andrzej Siewior Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_stall.h | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index 0fde39b8daab..ca21d28a0f98 100644 --- a/kernel/rcu/tree_stall.h +++ b/kernel/rcu/tree_stall.h @@ -249,13 +249,16 @@ static bool check_slow_task(struct task_struct *t, void *arg) /* * Scan the current list of tasks blocked within RCU read-side critical - * sections, printing out the tid of each. + * sections, printing out the tid of each of the first few of them. */ -static int rcu_print_task_stall(struct rcu_node *rnp) +static int rcu_print_task_stall(struct rcu_node *rnp, unsigned long flags) + __releases(rnp->lock) { + int i = 0; int ndetected = 0; struct rcu_stall_chk_rdr rscr; struct task_struct *t; + struct task_struct *ts[8]; if (!rcu_preempt_blocked_readers_cgp(rnp)) return 0; @@ -264,6 +267,14 @@ static int rcu_print_task_stall(struct rcu_node *rnp) t = list_entry(rnp->gp_tasks->prev, struct task_struct, rcu_node_entry); list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) { + get_task_struct(t); + ts[i++] = t; + if (i >= ARRAY_SIZE(ts)) + break; + } + raw_spin_unlock_irqrestore_rcu_node(rnp, flags); + for (i--; i; i--) { + t = ts[i]; if (!try_invoke_on_locked_down_task(t, check_slow_task, &rscr)) pr_cont(" P%d", t->pid); else @@ -273,6 +284,7 @@ static int rcu_print_task_stall(struct rcu_node *rnp) ".q"[rscr.rs.b.need_qs], ".e"[rscr.rs.b.exp_hint], ".l"[rscr.on_blkd_list]); + put_task_struct(t); ndetected++; } pr_cont("\n"); @@ -293,8 +305,9 @@ static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp) * Because preemptible RCU does not exist, we never have to check for * tasks blocked within RCU read-side critical sections. */ -static int rcu_print_task_stall(struct rcu_node *rnp) +static int rcu_print_task_stall(struct rcu_node *rnp, unsigned long flags) { + raw_spin_unlock_irqrestore_rcu_node(rnp, flags); return 0; } #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ @@ -472,7 +485,6 @@ static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps) pr_err("INFO: %s detected stalls on CPUs/tasks:\n", rcu_state.name); rcu_for_each_leaf_node(rnp) { raw_spin_lock_irqsave_rcu_node(rnp, flags); - ndetected += rcu_print_task_stall(rnp); if (rnp->qsmask != 0) { for_each_leaf_node_possible_cpu(rnp, cpu) if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu)) { @@ -480,7 +492,7 @@ static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps) ndetected++; } } - raw_spin_unlock_irqrestore_rcu_node(rnp, flags); + ndetected += rcu_print_task_stall(rnp, flags); // Releases rnp->lock. } for_each_possible_cpu(cpu) From 909172a149749242990a6e64cb55d55460d4e417 Mon Sep 17 00:00:00 2001 From: Mao Wenan Date: Tue, 10 Nov 2020 08:16:31 +0800 Subject: [PATCH 342/710] net: Update window_clamp if SOCK_RCVBUF is set When net.ipv4.tcp_syncookies=1 and syn flood is happened, cookie_v4_check or cookie_v6_check tries to redo what tcp_v4_send_synack or tcp_v6_send_synack did, rsk_window_clamp will be changed if SOCK_RCVBUF is set, which will make rcv_wscale is different, the client still operates with initial window scale and can overshot granted window, the client use the initial scale but local server use new scale to advertise window value, and session work abnormally. Fixes: e88c64f0a425 ("tcp: allow effective reduction of TCP's rcv-buffer via setsockopt") Signed-off-by: Mao Wenan Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/1604967391-123737-1-git-send-email-wenan.mao@linux.alibaba.com Signed-off-by: Jakub Kicinski --- net/ipv4/syncookies.c | 9 +++++++-- net/ipv6/syncookies.c | 10 ++++++++-- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 6ac473b47f30..00dc3f943c80 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -331,7 +331,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) __u32 cookie = ntohl(th->ack_seq) - 1; struct sock *ret = sk; struct request_sock *req; - int mss; + int full_space, mss; struct rtable *rt; __u8 rcv_wscale; struct flowi4 fl4; @@ -427,8 +427,13 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) /* Try to redo what tcp_v4_send_synack did. */ req->rsk_window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW); + /* limit the window selection if the user enforce a smaller rx buffer */ + full_space = tcp_full_space(sk); + if (sk->sk_userlocks & SOCK_RCVBUF_LOCK && + (req->rsk_window_clamp > full_space || req->rsk_window_clamp == 0)) + req->rsk_window_clamp = full_space; - tcp_select_initial_window(sk, tcp_full_space(sk), req->mss, + tcp_select_initial_window(sk, full_space, req->mss, &req->rsk_rcv_wnd, &req->rsk_window_clamp, ireq->wscale_ok, &rcv_wscale, dst_metric(&rt->dst, RTAX_INITRWND)); diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index e796a64be308..9b6cae1e49d9 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -136,7 +136,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) __u32 cookie = ntohl(th->ack_seq) - 1; struct sock *ret = sk; struct request_sock *req; - int mss; + int full_space, mss; struct dst_entry *dst; __u8 rcv_wscale; u32 tsoff = 0; @@ -241,7 +241,13 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) } req->rsk_window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW); - tcp_select_initial_window(sk, tcp_full_space(sk), req->mss, + /* limit the window selection if the user enforce a smaller rx buffer */ + full_space = tcp_full_space(sk); + if (sk->sk_userlocks & SOCK_RCVBUF_LOCK && + (req->rsk_window_clamp > full_space || req->rsk_window_clamp == 0)) + req->rsk_window_clamp = full_space; + + tcp_select_initial_window(sk, full_space, req->mss, &req->rsk_rcv_wnd, &req->rsk_window_clamp, ireq->wscale_ok, &rcv_wscale, dst_metric(dst, RTAX_INITRWND)); From 2bae900b9419db3f3e43bbda3194657235fee096 Mon Sep 17 00:00:00 2001 From: zhangxiaoxu Date: Mon, 9 Nov 2020 09:44:16 -0500 Subject: [PATCH 343/710] net: dsa: mv88e6xxx: Fix memleak in mv88e6xxx_region_atu_snapshot When mv88e6xxx_fid_map return error, we lost free the table. Fix it. Fixes: bfb255428966 ("net: dsa: mv88e6xxx: Add devlink regions") Reported-by: Hulk Robot Signed-off-by: zhangxiaoxu Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20201109144416.1540867-1-zhangxiaoxu5@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mv88e6xxx/devlink.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/mv88e6xxx/devlink.c b/drivers/net/dsa/mv88e6xxx/devlink.c index 10cd1bfd81a0..ade04c036fd9 100644 --- a/drivers/net/dsa/mv88e6xxx/devlink.c +++ b/drivers/net/dsa/mv88e6xxx/devlink.c @@ -393,8 +393,10 @@ static int mv88e6xxx_region_atu_snapshot(struct devlink *dl, mv88e6xxx_reg_lock(chip); err = mv88e6xxx_fid_map(chip, fid_bitmap); - if (err) + if (err) { + kfree(table); goto out; + } while (1) { fid = find_next_bit(fid_bitmap, MV88E6XXX_N_FID, fid + 1); From 2b52a4b65bc8f14520fe6e996ea7fb3f7e400761 Mon Sep 17 00:00:00 2001 From: Sven Van Asbroeck Date: Mon, 9 Nov 2020 15:38:28 -0500 Subject: [PATCH 344/710] lan743x: fix "BUG: invalid wait context" when setting rx mode In the net core, the struct net_device_ops -> ndo_set_rx_mode() callback is called with the dev->addr_list_lock spinlock held. However, this driver's ndo_set_rx_mode callback eventually calls lan743x_dp_write(), which acquires a mutex. Mutex acquisition may sleep, and this is not allowed when holding a spinlock. Fix by removing the dp_lock mutex entirely. Its purpose is to prevent concurrent accesses to the data port. No concurrent accesses are possible, because the dev->addr_list_lock spinlock in the core only lets through one thread at a time. Fixes: 23f0703c125b ("lan743x: Add main source files for new lan743x driver") Signed-off-by: Sven Van Asbroeck Link: https://lore.kernel.org/r/20201109203828.5115-1-TheSven73@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/lan743x_main.c | 12 +++--------- drivers/net/ethernet/microchip/lan743x_main.h | 3 --- 2 files changed, 3 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index bd77877cf1cc..173158656559 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -674,14 +674,12 @@ clean_up: static int lan743x_dp_write(struct lan743x_adapter *adapter, u32 select, u32 addr, u32 length, u32 *buf) { - int ret = -EIO; u32 dp_sel; int i; - mutex_lock(&adapter->dp_lock); if (lan743x_csr_wait_for_bit(adapter, DP_SEL, DP_SEL_DPRDY_, 1, 40, 100, 100)) - goto unlock; + return -EIO; dp_sel = lan743x_csr_read(adapter, DP_SEL); dp_sel &= ~DP_SEL_MASK_; dp_sel |= select; @@ -693,13 +691,10 @@ static int lan743x_dp_write(struct lan743x_adapter *adapter, lan743x_csr_write(adapter, DP_CMD, DP_CMD_WRITE_); if (lan743x_csr_wait_for_bit(adapter, DP_SEL, DP_SEL_DPRDY_, 1, 40, 100, 100)) - goto unlock; + return -EIO; } - ret = 0; -unlock: - mutex_unlock(&adapter->dp_lock); - return ret; + return 0; } static u32 lan743x_mac_mii_access(u16 id, u16 index, int read) @@ -2735,7 +2730,6 @@ static int lan743x_hardware_init(struct lan743x_adapter *adapter, adapter->intr.irq = adapter->pdev->irq; lan743x_csr_write(adapter, INT_EN_CLR, 0xFFFFFFFF); - mutex_init(&adapter->dp_lock); ret = lan743x_gpio_init(adapter); if (ret) diff --git a/drivers/net/ethernet/microchip/lan743x_main.h b/drivers/net/ethernet/microchip/lan743x_main.h index c61a40411317..a536f4a4994d 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.h +++ b/drivers/net/ethernet/microchip/lan743x_main.h @@ -712,9 +712,6 @@ struct lan743x_adapter { struct lan743x_csr csr; struct lan743x_intr intr; - /* lock, used to prevent concurrent access to data port */ - struct mutex dp_lock; - struct lan743x_gpio gpio; struct lan743x_ptp ptp; From 4031eeafa71eaf22ae40a15606a134ae86345daf Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Mon, 9 Nov 2020 08:57:05 +0100 Subject: [PATCH 345/710] net/af_iucv: fix null pointer dereference on shutdown syzbot reported the following KASAN finding: BUG: KASAN: nullptr-dereference in iucv_send_ctrl+0x390/0x3f0 net/iucv/af_iucv.c:385 Read of size 2 at addr 000000000000021e by task syz-executor907/519 CPU: 0 PID: 519 Comm: syz-executor907 Not tainted 5.9.0-syzkaller-07043-gbcf9877ad213 #0 Hardware name: IBM 3906 M04 701 (KVM/Linux) Call Trace: [<00000000c576af60>] unwind_start arch/s390/include/asm/unwind.h:65 [inline] [<00000000c576af60>] show_stack+0x180/0x228 arch/s390/kernel/dumpstack.c:135 [<00000000c9dcd1f8>] __dump_stack lib/dump_stack.c:77 [inline] [<00000000c9dcd1f8>] dump_stack+0x268/0x2f0 lib/dump_stack.c:118 [<00000000c5fed016>] print_address_description.constprop.0+0x5e/0x218 mm/kasan/report.c:383 [<00000000c5fec82a>] __kasan_report mm/kasan/report.c:517 [inline] [<00000000c5fec82a>] kasan_report+0x11a/0x168 mm/kasan/report.c:534 [<00000000c98b5b60>] iucv_send_ctrl+0x390/0x3f0 net/iucv/af_iucv.c:385 [<00000000c98b6262>] iucv_sock_shutdown+0x44a/0x4c0 net/iucv/af_iucv.c:1457 [<00000000c89d3a54>] __sys_shutdown+0x12c/0x1c8 net/socket.c:2204 [<00000000c89d3b70>] __do_sys_shutdown net/socket.c:2212 [inline] [<00000000c89d3b70>] __s390x_sys_shutdown+0x38/0x48 net/socket.c:2210 [<00000000c9e36eac>] system_call+0xe0/0x28c arch/s390/kernel/entry.S:415 There is nothing to shutdown if a connection has never been established. Besides that iucv->hs_dev is not yet initialized if a socket is in IUCV_OPEN state and iucv->path is not yet initialized if socket is in IUCV_BOUND state. So, just skip the shutdown calls for a socket in these states. Fixes: eac3731bd04c ("[S390]: Add AF_IUCV socket support") Fixes: 82492a355fac ("af_iucv: add shutdown for HS transport") Reviewed-by: Vasily Gorbik Signed-off-by: Ursula Braun [jwi: correct one Fixes tag] Signed-off-by: Julian Wiedmann Signed-off-by: Jakub Kicinski --- net/iucv/af_iucv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index d80572074667..047238f01ba6 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1434,7 +1434,8 @@ static int iucv_sock_shutdown(struct socket *sock, int how) break; } - if (how == SEND_SHUTDOWN || how == SHUTDOWN_MASK) { + if ((how == SEND_SHUTDOWN || how == SHUTDOWN_MASK) && + sk->sk_state == IUCV_CONNECTED) { if (iucv->transport == AF_IUCV_TRANS_IUCV) { txmsg.class = 0; txmsg.tag = 0; From 4711497ae85d90de903671989daf5145054c123e Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Mon, 9 Nov 2020 08:57:06 +0100 Subject: [PATCH 346/710] MAINTAINERS: remove Ursula Braun as s390 network maintainer I am retiring soon. Thus this patch removes myself from the MAINTAINERS file (s390 network). Signed-off-by: Ursula Braun [jwi: fix up the subject] Signed-off-by: Julian Wiedmann Signed-off-by: Jakub Kicinski --- MAINTAINERS | 3 --- 1 file changed, 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index cd123d0a6a2d..fa11fe773df5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15246,7 +15246,6 @@ F: drivers/iommu/s390-iommu.c S390 IUCV NETWORK LAYER M: Julian Wiedmann M: Karsten Graul -M: Ursula Braun L: linux-s390@vger.kernel.org S: Supported W: http://www.ibm.com/developerworks/linux/linux390/ @@ -15257,7 +15256,6 @@ F: net/iucv/ S390 NETWORK DRIVERS M: Julian Wiedmann M: Karsten Graul -M: Ursula Braun L: linux-s390@vger.kernel.org S: Supported W: http://www.ibm.com/developerworks/linux/linux390/ @@ -15828,7 +15826,6 @@ S: Maintained F: drivers/misc/sgi-xp/ SHARED MEMORY COMMUNICATIONS (SMC) SOCKETS -M: Ursula Braun M: Karsten Graul L: linux-s390@vger.kernel.org S: Supported From 2e6f11a797a24d1e2141a214a6dd6dfbe709f55d Mon Sep 17 00:00:00 2001 From: Qinglang Miao Date: Tue, 10 Nov 2020 15:42:23 +0800 Subject: [PATCH 347/710] scsi: ufshcd: Fix missing destroy_workqueue() Add the missing destroy_workqueue() before return from ufshcd_init in the error handling case as well as in ufshcd_remove. Link: https://lore.kernel.org/r/20201110074223.41280-1-miaoqinglang@huawei.com Fixes: 4db7a2360597 ("scsi: ufs: Fix concurrency of error handler and other error recovery paths") Suggested-by: Avri Altman Reviewed-by: Asutosh Das Reviewed-by: Avri Altman Signed-off-by: Qinglang Miao Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 5167c3e770a3..7a160b86adc6 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -8928,6 +8928,7 @@ void ufshcd_remove(struct ufs_hba *hba) blk_mq_free_tag_set(&hba->tmf_tag_set); blk_cleanup_queue(hba->cmd_queue); scsi_remove_host(hba->host); + destroy_workqueue(hba->eh_wq); /* disable interrupts */ ufshcd_disable_intr(hba, hba->intr_mask); ufshcd_hba_stop(hba); @@ -9228,6 +9229,7 @@ out_remove_scsi_host: exit_gating: ufshcd_exit_clk_scaling(hba); ufshcd_exit_clk_gating(hba); + destroy_workqueue(hba->eh_wq); out_disable: hba->is_irq_enabled = false; ufshcd_hba_exit(hba); From b5acfe152abaa2721c9ca8aa67f941d7de55d24e Mon Sep 17 00:00:00 2001 From: PeiSen Hou Date: Wed, 11 Nov 2020 08:58:59 +0100 Subject: [PATCH 348/710] ALSA: hda/realtek: Add some Clove SSID in the ALC293(ALC1220) Fix "use as headset mic, without its own jack detect" problem. [ Minor coding style fixes by tiwai ] Signed-off-by: PeiSen Hou Cc: Link: https://lore.kernel.org/r/481963e4a5694ff19f27ae1e283d79ad@realtek.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 52 +++++++++++++++++++++++++++++++++-- 1 file changed, 50 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index c3a02738ead2..69a952c9e011 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -2522,13 +2522,23 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK_VENDOR(0x1462, "MSI", ALC882_FIXUP_GPIO3), SND_PCI_QUIRK(0x147b, 0x107a, "Abit AW9D-MAX", ALC882_FIXUP_ABIT_AW9D_MAX), SND_PCI_QUIRK(0x1558, 0x9501, "Clevo P950HR", ALC1220_FIXUP_CLEVO_P950), + SND_PCI_QUIRK(0x1558, 0x9506, "Clevo P955HQ", ALC1220_FIXUP_CLEVO_P950), + SND_PCI_QUIRK(0x1558, 0x950A, "Clevo P955H[PR]", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1558, 0x95e1, "Clevo P95xER", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1558, 0x95e2, "Clevo P950ER", ALC1220_FIXUP_CLEVO_P950), + SND_PCI_QUIRK(0x1558, 0x95e3, "Clevo P955[ER]T", ALC1220_FIXUP_CLEVO_P950), + SND_PCI_QUIRK(0x1558, 0x95e4, "Clevo P955ER", ALC1220_FIXUP_CLEVO_P950), + SND_PCI_QUIRK(0x1558, 0x95e5, "Clevo P955EE6", ALC1220_FIXUP_CLEVO_P950), + SND_PCI_QUIRK(0x1558, 0x95e6, "Clevo P950R[CDF]", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1558, 0x96e1, "Clevo P960[ER][CDFN]-K", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1558, 0x97e1, "Clevo P970[ER][CDFN]", ALC1220_FIXUP_CLEVO_P950), - SND_PCI_QUIRK(0x1558, 0x65d1, "Clevo PB51[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), - SND_PCI_QUIRK(0x1558, 0x67d1, "Clevo PB71[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), + SND_PCI_QUIRK(0x1558, 0x97e2, "Clevo P970RC-M", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1558, 0x50d3, "Clevo PC50[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), + SND_PCI_QUIRK(0x1558, 0x65d1, "Clevo PB51[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), + SND_PCI_QUIRK(0x1558, 0x65d2, "Clevo PB51R[CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), + SND_PCI_QUIRK(0x1558, 0x65e1, "Clevo PB51[ED][DF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), + SND_PCI_QUIRK(0x1558, 0x67d1, "Clevo PB71[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), + SND_PCI_QUIRK(0x1558, 0x67e1, "Clevo PB71[DE][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x70d1, "Clevo PC70[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x7714, "Clevo X170", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK_VENDOR(0x1558, "Clevo laptop", ALC882_FIXUP_EAPD), @@ -7931,11 +7941,49 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1458, 0xfa53, "Gigabyte BXBT-2807", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1462, 0xb120, "MSI Cubi MS-B120", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1462, 0xb171, "Cubi N 8GL (MS-B171)", ALC283_FIXUP_HEADSET_MIC), + SND_PCI_QUIRK(0x1558, 0x1323, "Clevo N130ZU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x1325, "System76 Darter Pro (darp5)", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x1401, "Clevo L140[CZ]U", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x1403, "Clevo N140CU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x1404, "Clevo N150CU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x14a1, "Clevo L141MU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x4018, "Clevo NV40M[BE]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x4019, "Clevo NV40MZ", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x4020, "Clevo NV40MB", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x40a1, "Clevo NL40GU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x40c1, "Clevo NL40[CZ]U", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x40d1, "Clevo NL41DU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x50a3, "Clevo NJ51GU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x50b3, "Clevo NK50S[BEZ]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x50b6, "Clevo NK50S5", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x50b8, "Clevo NK50SZ", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x50d5, "Clevo NP50D5", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x50f0, "Clevo NH50A[CDF]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x50f3, "Clevo NH58DPQ", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x5101, "Clevo S510WU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x5157, "Clevo W517GU1", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x51a1, "Clevo NS50MU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x70a1, "Clevo NB70T[HJK]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x70b3, "Clevo NK70SB", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x8228, "Clevo NR40BU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x8520, "Clevo NH50D[CD]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x8521, "Clevo NH77D[CD]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x8535, "Clevo NH50D[BE]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x8536, "Clevo NH79D[BE]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x8550, "System76 Gazelle (gaze14)", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x8551, "System76 Gazelle (gaze14)", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x8560, "System76 Gazelle (gaze14)", ALC269_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1558, 0x8561, "System76 Gazelle (gaze14)", ALC269_FIXUP_HEADSET_MIC), + SND_PCI_QUIRK(0x1558, 0x8668, "Clevo NP50B[BE]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x8680, "Clevo NJ50LU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x8686, "Clevo NH50[CZ]U", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x8a20, "Clevo NH55DCQ-Y", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x8a51, "Clevo NH70RCQ-Y", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x8d50, "Clevo NH55RCQ-M", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x951d, "Clevo N950T[CDF]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x961d, "Clevo N960S[CDF]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x971d, "Clevo N970T[CDF]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0xa500, "Clevo NL53RU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x17aa, 0x1036, "Lenovo P520", ALC233_FIXUP_LENOVO_MULTI_CODECS), SND_PCI_QUIRK(0x17aa, 0x1048, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x17aa, 0x20f2, "Thinkpad SL410/510", ALC269_FIXUP_SKU_IGNORE), From b72de3ff19fdc4bbe4d4bb3f4483c7e46e00bac3 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Sat, 7 Nov 2020 17:13:57 +0900 Subject: [PATCH 349/710] gpio: sifive: Fix SiFive gpio probe Fix the check on the number of IRQs to allow up to the maximum (32) instead of only the maximum minus one. Fixes: 96868dce644d ("gpio/sifive: Add GPIO driver for SiFive SoCs") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20201107081420.60325-10-damien.lemoal@wdc.com Signed-off-by: Linus Walleij --- drivers/gpio/gpio-sifive.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-sifive.c b/drivers/gpio/gpio-sifive.c index c54dd08f2cbf..d5eb9ca11901 100644 --- a/drivers/gpio/gpio-sifive.c +++ b/drivers/gpio/gpio-sifive.c @@ -183,7 +183,7 @@ static int sifive_gpio_probe(struct platform_device *pdev) return PTR_ERR(chip->regs); ngpio = of_irq_count(node); - if (ngpio >= SIFIVE_GPIO_MAX) { + if (ngpio > SIFIVE_GPIO_MAX) { dev_err(dev, "Too many GPIO interrupts (max=%d)\n", SIFIVE_GPIO_MAX); return -ENXIO; From f16e631333a8f12ae8128826e695db4b2a528407 Mon Sep 17 00:00:00 2001 From: Kaixu Xia Date: Wed, 11 Nov 2020 13:03:46 +0800 Subject: [PATCH 350/710] bpf: Fix unsigned 'datasec_id' compared with zero in check_pseudo_btf_id The unsigned variable datasec_id is assigned a return value from the call to check_pseudo_btf_id(), which may return negative error code. This fixes the following coccicheck warning: ./kernel/bpf/verifier.c:9616:5-15: WARNING: Unsigned expression compared with zero: datasec_id > 0 Fixes: eaa6bcb71ef6 ("bpf: Introduce bpf_per_cpu_ptr()") Reported-by: Tosk Robot Signed-off-by: Kaixu Xia Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Acked-by: John Fastabend Cc: Hao Luo Link: https://lore.kernel.org/bpf/1605071026-25906-1-git-send-email-kaixuxia@tencent.com --- kernel/bpf/verifier.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 6200519582a6..6204ec705d80 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -9572,12 +9572,13 @@ static int check_pseudo_btf_id(struct bpf_verifier_env *env, struct bpf_insn *insn, struct bpf_insn_aux_data *aux) { - u32 datasec_id, type, id = insn->imm; const struct btf_var_secinfo *vsi; const struct btf_type *datasec; const struct btf_type *t; const char *sym_name; bool percpu = false; + u32 type, id = insn->imm; + s32 datasec_id; u64 addr; int i; From 92e4dc8b05663d6539b1b8375f3b1cf7b204cfe9 Mon Sep 17 00:00:00 2001 From: Chris Co Date: Tue, 10 Nov 2020 19:01:18 +0000 Subject: [PATCH 351/710] Drivers: hv: vmbus: Allow cleanup of VMBUS_CONNECT_CPU if disconnected When invoking kexec() on a Linux guest running on a Hyper-V host, the kernel panics. RIP: 0010:cpuhp_issue_call+0x137/0x140 Call Trace: __cpuhp_remove_state_cpuslocked+0x99/0x100 __cpuhp_remove_state+0x1c/0x30 hv_kexec_handler+0x23/0x30 [hv_vmbus] hv_machine_shutdown+0x1e/0x30 machine_shutdown+0x10/0x20 kernel_kexec+0x6d/0x96 __do_sys_reboot+0x1ef/0x230 __x64_sys_reboot+0x1d/0x20 do_syscall_64+0x6b/0x3d8 entry_SYSCALL_64_after_hwframe+0x44/0xa9 This was due to hv_synic_cleanup() callback returning -EBUSY to cpuhp_issue_call() when tearing down the VMBUS_CONNECT_CPU, even if the vmbus_connection.conn_state = DISCONNECTED. hv_synic_cleanup() should succeed in the case where vmbus_connection.conn_state is DISCONNECTED. Fix is to add an extra condition to test for vmbus_connection.conn_state == CONNECTED on the VMBUS_CONNECT_CPU and only return early if true. This way the kexec() path can still shut everything down while preserving the initial behavior of preventing CPU offlining on the VMBUS_CONNECT_CPU while the VM is running. Fixes: 8a857c55420f29 ("Drivers: hv: vmbus: Always handle the VMBus messages on CPU0") Signed-off-by: Chris Co Reviewed-by: Andrea Parri (Microsoft) Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20201110190118.15596-1-chrco@linux.microsoft.com Signed-off-by: Wei Liu --- drivers/hv/hv.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index da69338f92f5..75a8638ff68b 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -244,9 +244,13 @@ int hv_synic_cleanup(unsigned int cpu) /* * Hyper-V does not provide a way to change the connect CPU once - * it is set; we must prevent the connect CPU from going offline. + * it is set; we must prevent the connect CPU from going offline + * while the VM is running normally. But in the panic or kexec() + * path where the vmbus is already disconnected, the CPU must be + * allowed to shut down. */ - if (cpu == VMBUS_CONNECT_CPU) + if (cpu == VMBUS_CONNECT_CPU && + vmbus_connection.conn_state == CONNECTED) return -EBUSY; /* From b2896458b850ec7cb69b054b195b4b399f7e1f22 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 9 Nov 2020 10:36:53 +0100 Subject: [PATCH 352/710] x86/platform/uv: Drop last traces of uv_flush_tlb_others Commit 39297dde7390 ("x86/platform/uv: Remove UV BAU TLB Shootdown Handler") removed uv_flush_tlb_others. Its declaration was removed also from asm/uv/uv.h. But only for the CONFIG_X86_UV=y case. The inline definition (!X86_UV case) is still in place. So remove this implementation with everything what was added to support uv_flush_tlb_others: * include of asm/tlbflush.h * forward declarations of struct cpumask, mm_struct, and flush_tlb_info Signed-off-by: Jiri Slaby Signed-off-by: Thomas Gleixner Acked-by: Mike Travis Acked-by: Steve Wahl Link: https://lore.kernel.org/r/20201109093653.2042-1-jslaby@suse.cz --- arch/x86/include/asm/uv/uv.h | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h index 172d3e4a9e4b..648eb23fe7f0 100644 --- a/arch/x86/include/asm/uv/uv.h +++ b/arch/x86/include/asm/uv/uv.h @@ -2,14 +2,8 @@ #ifndef _ASM_X86_UV_UV_H #define _ASM_X86_UV_UV_H -#include - enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC}; -struct cpumask; -struct mm_struct; -struct flush_tlb_info; - #ifdef CONFIG_X86_UV #include @@ -44,10 +38,6 @@ static inline int is_uv_system(void) { return 0; } static inline int is_uv_hubbed(int uv) { return 0; } static inline void uv_cpu_init(void) { } static inline void uv_system_init(void) { } -static inline const struct cpumask * -uv_flush_tlb_others(const struct cpumask *cpumask, - const struct flush_tlb_info *info) -{ return cpumask; } #endif /* X86_UV */ From 365ec8b61689bd64d6a61e129e0319bf71336407 Mon Sep 17 00:00:00 2001 From: Sean Nyekjaer Date: Tue, 10 Nov 2020 18:41:13 +0100 Subject: [PATCH 353/710] regulator: pfuze100: limit pfuze-support-disable-sw to pfuze{100,200} Limit the fsl,pfuze-support-disable-sw to the pfuze100 and pfuze200 variants. When enabling fsl,pfuze-support-disable-sw and using a pfuze3000 or pfuze3001, the driver would choose pfuze100_sw_disable_regulator_ops instead of the newly introduced and correct pfuze3000_sw_regulator_ops. Signed-off-by: Sean Nyekjaer Fixes: 6f1cf5257acc ("regualtor: pfuze100: correct sw1a/sw2 on pfuze3000") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20201110174113.2066534-1-sean@geanix.com Signed-off-by: Mark Brown --- drivers/regulator/pfuze100-regulator.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/regulator/pfuze100-regulator.c b/drivers/regulator/pfuze100-regulator.c index 7e8ba9246167..01a12cfcea7c 100644 --- a/drivers/regulator/pfuze100-regulator.c +++ b/drivers/regulator/pfuze100-regulator.c @@ -836,11 +836,14 @@ static int pfuze100_regulator_probe(struct i2c_client *client, * the switched regulator till yet. */ if (pfuze_chip->flags & PFUZE_FLAG_DISABLE_SW) { - if (pfuze_chip->regulator_descs[i].sw_reg) { - desc->ops = &pfuze100_sw_disable_regulator_ops; - desc->enable_val = 0x8; - desc->disable_val = 0x0; - desc->enable_time = 500; + if (pfuze_chip->chip_id == PFUZE100 || + pfuze_chip->chip_id == PFUZE200) { + if (pfuze_chip->regulator_descs[i].sw_reg) { + desc->ops = &pfuze100_sw_disable_regulator_ops; + desc->enable_val = 0x8; + desc->disable_val = 0x0; + desc->enable_time = 500; + } } } From 766c6b63aa044e84b045803b40b14754d69a2a1d Mon Sep 17 00:00:00 2001 From: Sven Van Asbroeck Date: Fri, 6 Nov 2020 10:07:06 -0500 Subject: [PATCH 354/710] spi: fix client driver breakages when using GPIO descriptors Commit f3186dd87669 ("spi: Optionally use GPIO descriptors for CS GPIOs") introduced the optional use of GPIO descriptors for chip selects. A side-effect of this change: when a SPI bus uses GPIO descriptors, all its client devices have SPI_CS_HIGH set in spi->mode. This flag is required for the SPI bus to operate correctly. This unfortunately breaks many client drivers, which use the following pattern to configure their underlying SPI bus: static int client_device_probe(struct spi_device *spi) { ... spi->mode = SPI_MODE_0; spi->bits_per_word = 8; err = spi_setup(spi); .. } In short, many client drivers overwrite the SPI_CS_HIGH bit in spi->mode, and break the underlying SPI bus driver. This is especially true for Freescale/NXP imx ecspi, where large numbers of spi client drivers now no longer work. Proposed fix: ------------- When using gpio descriptors, depend on gpiolib to handle CS polarity. Existing quirks in gpiolib ensure that this is handled correctly. Existing gpiolib behaviour will force the polarity of any chip-select gpiod to active-high (if 'spi-active-high' devicetree prop present) or active-low (if 'spi-active-high' absent). Irrespective of whether the gpio is marked GPIO_ACTIVE_[HIGH|LOW] in the devicetree. Loose ends: ----------- If this fix is applied: - is commit 138c9c32f090 ("spi: spidev: Fix CS polarity if GPIO descriptors are used") still necessary / correct ? Fixes: f3186dd87669 ("spi: Optionally use GPIO descriptors for CS GPIOs") Signed-off-by: Sven Van Asbroeck Acked-by: Linus Walleij Link: https://lore.kernel.org/r/20201106150706.29089-1-TheSven73@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi.c | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 0cab239d8e7f..7566482c052c 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -812,18 +812,16 @@ static void spi_set_cs(struct spi_device *spi, bool enable) enable = !enable; if (spi->cs_gpiod || gpio_is_valid(spi->cs_gpio)) { - /* - * Honour the SPI_NO_CS flag and invert the enable line, as - * active low is default for SPI. Execution paths that handle - * polarity inversion in gpiolib (such as device tree) will - * enforce active high using the SPI_CS_HIGH resulting in a - * double inversion through the code above. - */ if (!(spi->mode & SPI_NO_CS)) { if (spi->cs_gpiod) + /* polarity handled by gpiolib */ gpiod_set_value_cansleep(spi->cs_gpiod, - !enable); + enable1); else + /* + * invert the enable line, as active low is + * default for SPI. + */ gpio_set_value_cansleep(spi->cs_gpio, !enable); } /* Some SPI masters need both GPIO CS & slave_select */ @@ -1992,15 +1990,6 @@ static int of_spi_parse_dt(struct spi_controller *ctlr, struct spi_device *spi, } spi->chip_select = value; - /* - * For descriptors associated with the device, polarity inversion is - * handled in the gpiolib, so all gpio chip selects are "active high" - * in the logical sense, the gpiolib will invert the line if need be. - */ - if ((ctlr->use_gpio_descriptors) && ctlr->cs_gpiods && - ctlr->cs_gpiods[spi->chip_select]) - spi->mode |= SPI_CS_HIGH; - /* Device speed */ if (!of_property_read_u32(nc, "spi-max-frequency", &value)) spi->max_speed_hz = value; From ee4ad5d06509b3aea79b6a77bebd09ef891bed8d Mon Sep 17 00:00:00 2001 From: Eddie James Date: Tue, 10 Nov 2020 15:47:36 -0600 Subject: [PATCH 355/710] spi: fsi: Fix transfer returning without finalizing message In the case that the SPI mux isn't set, the transfer_one_message function returns without finalizing the message. This means that the transfer never completes, resulting in hung tasks and an eventual kernel panic. Fix it by finalizing the transfer in this case. Fixes: 9211a441e606 ("spi: fsi: Check mux status before transfers") Signed-off-by: Eddie James Reviewed-by: Joel Stanley Link: https://lore.kernel.org/r/20201110214736.25718-1-eajames@linux.ibm.com Signed-off-by: Mark Brown --- drivers/spi/spi-fsi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-fsi.c b/drivers/spi/spi-fsi.c index 8a440c7078ef..3920cd3286d8 100644 --- a/drivers/spi/spi-fsi.c +++ b/drivers/spi/spi-fsi.c @@ -477,7 +477,7 @@ static int fsi_spi_transfer_one_message(struct spi_controller *ctlr, rc = fsi_spi_check_mux(ctx->fsi, ctx->dev); if (rc) - return rc; + goto error; list_for_each_entry(transfer, &mesg->transfers, transfer_list) { struct fsi_spi_sequence seq; From 2bd3fa793aaa7e98b74e3653fdcc72fa753913b5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 11 Nov 2020 08:07:37 -0800 Subject: [PATCH 356/710] xfs: fix a missing unlock on error in xfs_fs_map_blocks We also need to drop the iolock when invalidate_inode_pages2 fails, not only on all other error or successful cases. Fixes: 527851124d10 ("xfs: implement pNFS export operations") Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_pnfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c index b101feb2aab4..f3082a957d5e 100644 --- a/fs/xfs/xfs_pnfs.c +++ b/fs/xfs/xfs_pnfs.c @@ -134,7 +134,7 @@ xfs_fs_map_blocks( goto out_unlock; error = invalidate_inode_pages2(inode->i_mapping); if (WARN_ON_ONCE(error)) - return error; + goto out_unlock; end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + length); offset_fsb = XFS_B_TO_FSBT(mp, offset); From 88ec3211e46344a7d10cf6cb5045f839f7785f8e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 11 Nov 2020 10:38:53 -0700 Subject: [PATCH 357/710] io_uring: round-up cq size before comparing with rounded sq size If an application specifies IORING_SETUP_CQSIZE to set the CQ ring size to a specific size, we ensure that the CQ size is at least that of the SQ ring size. But in doing so, we compare the already rounded up to power of two SQ size to the as-of yet unrounded CQ size. This means that if an application passes in non power of two sizes, we can return -EINVAL when the final value would've been fine. As an example, an application passing in 100/100 for sq/cq size should end up with 128 for both. But since we round the SQ size first, we compare the CQ size of 100 to 128, and return -EINVAL as that is too small. Cc: stable@vger.kernel.org Fixes: 33a107f0a1b8 ("io_uring: allow application controlled CQ ring size") Reported-by: Dan Melnic Signed-off-by: Jens Axboe --- fs/io_uring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 8018c7076b25..c77584de68d7 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -9226,6 +9226,7 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p, * to a power-of-two, if it isn't already. We do NOT impose * any cq vs sq ring sizing. */ + p->cq_entries = roundup_pow_of_two(p->cq_entries); if (p->cq_entries < p->sq_entries) return -EINVAL; if (p->cq_entries > IORING_MAX_CQ_ENTRIES) { @@ -9233,7 +9234,6 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p, return -EINVAL; p->cq_entries = IORING_MAX_CQ_ENTRIES; } - p->cq_entries = roundup_pow_of_two(p->cq_entries); } else { p->cq_entries = 2 * p->sq_entries; } From a72b38eebea4661d4d67b194353124e63ce48f66 Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Wed, 11 Nov 2020 10:32:09 -0800 Subject: [PATCH 358/710] ext4: handle dax mount option collision Mount options dax=inode and dax=never collided with fast_commit and journal checksum. Redefine the mount flags to remove the collision. Reported-by: Murphy Zhou Fixes: 9cb20f94afcd2 ("fs/ext4: Make DAX mount option a tri-state") Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20201111183209.447175-1-harshads@google.com Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 1b399cafb15a..bf9429484462 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1231,13 +1231,13 @@ struct ext4_inode_info { blocks */ #define EXT4_MOUNT2_HURD_COMPAT 0x00000004 /* Support HURD-castrated file systems */ -#define EXT4_MOUNT2_DAX_NEVER 0x00000008 /* Do not allow Direct Access */ -#define EXT4_MOUNT2_DAX_INODE 0x00000010 /* For printing options only */ - #define EXT4_MOUNT2_EXPLICIT_JOURNAL_CHECKSUM 0x00000008 /* User explicitly specified journal checksum */ #define EXT4_MOUNT2_JOURNAL_FAST_COMMIT 0x00000010 /* Journal fast commit */ +#define EXT4_MOUNT2_DAX_NEVER 0x00000020 /* Do not allow Direct Access */ +#define EXT4_MOUNT2_DAX_INODE 0x00000040 /* For printing options only */ + #define clear_opt(sb, opt) EXT4_SB(sb)->s_mount_opt &= \ ~EXT4_MOUNT_##opt From d196e229a80c39254f4adbc312f55f5198e98941 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 11 Nov 2020 14:24:18 -0500 Subject: [PATCH 359/710] Revert "ext4: fix superblock checksum calculation race" This reverts commit acaa532687cdc3a03757defafece9c27aa667546 which can result in a ext4_superblock_csum_set() trying to sleep while a spinlock is being held. For more discussion of this issue, please see: https://lore.kernel.org/r/000000000000f50cb705b313ed70@google.com Reported-by: syzbot+7a4ba6a239b91a126c28@syzkaller.appspotmail.com Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index c3b864588a0b..6633b20224d5 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -289,18 +289,7 @@ void ext4_superblock_csum_set(struct super_block *sb) if (!ext4_has_metadata_csum(sb)) return; - /* - * Locking the superblock prevents the scenario - * where: - * 1) a first thread pauses during checksum calculation. - * 2) a second thread updates the superblock, recalculates - * the checksum, and updates s_checksum - * 3) the first thread resumes and finishes its checksum calculation - * and updates s_checksum with a potentially stale or torn value. - */ - lock_buffer(EXT4_SB(sb)->s_sbh); es->s_checksum = ext4_superblock_csum(sb, es); - unlock_buffer(EXT4_SB(sb)->s_sbh); } ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, From fa6882c63621821f73cc806f291208e1c6ea6187 Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Mon, 9 Nov 2020 22:09:13 +0800 Subject: [PATCH 360/710] tipc: fix memory leak in tipc_topsrv_start() kmemleak report a memory leak as follows: unreferenced object 0xffff88810a596800 (size 512): comm "ip", pid 21558, jiffies 4297568990 (age 112.120s) hex dump (first 32 bytes): 00 00 00 00 ad 4e ad de ff ff ff ff 00 00 00 00 .....N.......... ff ff ff ff ff ff ff ff 00 83 60 b0 ff ff ff ff ..........`..... backtrace: [<0000000022bbe21f>] tipc_topsrv_init_net+0x1f3/0xa70 [<00000000fe15ddf7>] ops_init+0xa8/0x3c0 [<00000000138af6f2>] setup_net+0x2de/0x7e0 [<000000008c6807a3>] copy_net_ns+0x27d/0x530 [<000000006b21adbd>] create_new_namespaces+0x382/0xa30 [<00000000bb169746>] unshare_nsproxy_namespaces+0xa1/0x1d0 [<00000000fe2e42bc>] ksys_unshare+0x39c/0x780 [<0000000009ba3b19>] __x64_sys_unshare+0x2d/0x40 [<00000000614ad866>] do_syscall_64+0x56/0xa0 [<00000000a1b5ca3c>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 'srv' is malloced in tipc_topsrv_start() but not free before leaving from the error handling cases. We need to free it. Fixes: 5c45ab24ac77 ("tipc: make struct tipc_server private for server.c") Reported-by: Hulk Robot Signed-off-by: Wang Hai Link: https://lore.kernel.org/r/20201109140913.47370-1-wanghai38@huawei.com Signed-off-by: Jakub Kicinski --- net/tipc/topsrv.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c index 5f6f86051c83..13f3143609f9 100644 --- a/net/tipc/topsrv.c +++ b/net/tipc/topsrv.c @@ -664,12 +664,18 @@ static int tipc_topsrv_start(struct net *net) ret = tipc_topsrv_work_start(srv); if (ret < 0) - return ret; + goto err_start; ret = tipc_topsrv_create_listener(srv); if (ret < 0) - tipc_topsrv_work_stop(srv); + goto err_create; + return 0; + +err_create: + tipc_topsrv_work_stop(srv); +err_start: + kfree(srv); return ret; } From df392aefe96b9f94efb01ef298b617bab346a9be Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Mon, 9 Nov 2020 12:04:36 +0100 Subject: [PATCH 361/710] arm64: dts: fsl-ls1028a-kontron-sl28: specify in-band mode for ENETC Since commit 71b77a7a27a3 ("enetc: Migrate to PHYLINK and PCS_LYNX") the network port of the Kontron sl28 board is broken. After the migration to phylink the device tree has to specify the in-band-mode property. Add it. Fixes: 71b77a7a27a3 ("enetc: Migrate to PHYLINK and PCS_LYNX") Suggested-by: Vladimir Oltean Signed-off-by: Michael Walle Reviewed-by: Vladimir Oltean Link: https://lore.kernel.org/r/20201109110436.5906-1-michael@walle.cc Signed-off-by: Jakub Kicinski --- arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28.dts b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28.dts index f46eb47cfa4d..8161dd237971 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28.dts +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28.dts @@ -75,6 +75,7 @@ &enetc_port0 { phy-handle = <&phy0>; phy-connection-type = "sgmii"; + managed = "in-band-status"; status = "okay"; mdio { From 361182308766a265b6c521879b34302617a8c209 Mon Sep 17 00:00:00 2001 From: Martin Schiller Date: Mon, 9 Nov 2020 07:54:49 +0100 Subject: [PATCH 362/710] net/x25: Fix null-ptr-deref in x25_connect This fixes a regression for blocking connects introduced by commit 4becb7ee5b3d ("net/x25: Fix x25_neigh refcnt leak when x25 disconnect"). The x25->neighbour is already set to "NULL" by x25_disconnect() now, while a blocking connect is waiting in x25_wait_for_connection_establishment(). Therefore x25->neighbour must not be accessed here again and x25->state is also already set to X25_STATE_0 by x25_disconnect(). Fixes: 4becb7ee5b3d ("net/x25: Fix x25_neigh refcnt leak when x25 disconnect") Signed-off-by: Martin Schiller Reviewed-by: Xie He Link: https://lore.kernel.org/r/20201109065449.9014-1-ms@dev.tdt.de Signed-off-by: Jakub Kicinski --- net/x25/af_x25.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 0bbb283f23c9..046d3fee66a9 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -825,7 +825,7 @@ static int x25_connect(struct socket *sock, struct sockaddr *uaddr, sock->state = SS_CONNECTED; rc = 0; out_put_neigh: - if (rc) { + if (rc && x25->neighbour) { read_lock_bh(&x25_list_lock); x25_neigh_put(x25->neighbour); x25->neighbour = NULL; From a5bea04fcc0b3c0aec71ee1fd58fd4ff7ee36177 Mon Sep 17 00:00:00 2001 From: Evan Nimmo Date: Tue, 10 Nov 2020 15:28:25 +1300 Subject: [PATCH 363/710] of/address: Fix of_node memory leak in of_dma_is_coherent Commit dabf6b36b83a ("of: Add OF_DMA_DEFAULT_COHERENT & select it on powerpc") added a check to of_dma_is_coherent which returns early if OF_DMA_DEFAULT_COHERENT is enabled. This results in the of_node_put() being skipped causing a memory leak. Moved the of_node_get() below this check so we now we only get the node if OF_DMA_DEFAULT_COHERENT is not enabled. Fixes: dabf6b36b83a ("of: Add OF_DMA_DEFAULT_COHERENT & select it on powerpc") Signed-off-by: Evan Nimmo Link: https://lore.kernel.org/r/20201110022825.30895-1-evan.nimmo@alliedtelesis.co.nz Signed-off-by: Rob Herring --- drivers/of/address.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/of/address.c b/drivers/of/address.c index eb9ab4f1e80b..1c3257a2d4e3 100644 --- a/drivers/of/address.c +++ b/drivers/of/address.c @@ -1034,11 +1034,13 @@ out: */ bool of_dma_is_coherent(struct device_node *np) { - struct device_node *node = of_node_get(np); + struct device_node *node; if (IS_ENABLED(CONFIG_OF_DMA_DEFAULT_COHERENT)) return true; + node = of_node_get(np); + while (node) { if (of_property_read_bool(node, "dma-coherent")) { of_node_put(node); From 49c3e714ff4391144d8bb3fa99d0b460f8dbfd86 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Wed, 11 Nov 2020 14:05:07 +0100 Subject: [PATCH 364/710] dt-bindings: can: fsl,flexcan.yaml: fix fsl,stop-mode The fsl,stop-mode property is a phandle-array and should consist of one phandle and two 32 bit integers, e.g.: fsl,stop-mode = <&gpr 0x34 28>; This patch fixes the following errors, which shows up during a dtbs_check: arch/arm/boot/dts/imx6dl-apf6dev.dt.yaml: can@2090000: fsl,stop-mode: [[1, 52, 28]] is too short From schema: Documentation/devicetree/bindings/net/can/fsl,flexcan.yaml Fixes: e5ab9aa7e49b ("dt-bindings: can: flexcan: convert fsl,*flexcan bindings to yaml") Reported-by: Rob Herring Cc: Oleksij Rempel Signed-off-by: Marc Kleine-Budde Link: https://lore.kernel.org/r/20201111130507.1560881-5-mkl@pengutronix.de Signed-off-by: Rob Herring --- .../devicetree/bindings/net/can/fsl,flexcan.yaml | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/Documentation/devicetree/bindings/net/can/fsl,flexcan.yaml b/Documentation/devicetree/bindings/net/can/fsl,flexcan.yaml index 43df15ba8fa4..3093c22e761c 100644 --- a/Documentation/devicetree/bindings/net/can/fsl,flexcan.yaml +++ b/Documentation/devicetree/bindings/net/can/fsl,flexcan.yaml @@ -81,11 +81,12 @@ properties: req_bit is the bit offset of CAN stop request. $ref: /schemas/types.yaml#/definitions/phandle-array items: - - description: The 'gpr' is the phandle to general purpose register node. - - description: The 'req_gpr' is the gpr register offset of CAN stop request. - maximum: 0xff - - description: The 'req_bit' is the bit offset of CAN stop request. - maximum: 0x1f + items: + - description: The 'gpr' is the phandle to general purpose register node. + - description: The 'req_gpr' is the gpr register offset of CAN stop request. + maximum: 0xff + - description: The 'req_bit' is the bit offset of CAN stop request. + maximum: 0x1f fsl,clk-source: description: | From 9d2e5e9eeb59524a59b461fe256139826d464e1e Mon Sep 17 00:00:00 2001 From: Rohit Maheshwari Date: Mon, 9 Nov 2020 16:21:31 +0530 Subject: [PATCH 365/710] cxgb4/ch_ktls: decrypted bit is not enough If skb has retransmit data starting before start marker, e.g. ccs, decrypted bit won't be set for that, and if it has some data to encrypt, then it must be given to crypto ULD. So in place of decrypted, check if socket is tls offloaded. Also, unless skb has some data to encrypt, no need to give it for tls offload handling. v2->v3: - Removed ifdef. Fixes: 5a4b9fe7fece ("cxgb4/chcr: complete record tx handling") Signed-off-by: Rohit Maheshwari Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 1 + drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h | 5 +++++ drivers/net/ethernet/chelsio/cxgb4/sge.c | 3 ++- .../net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c | 4 ---- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index a952fe198eb9..7fd264a6d085 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -1176,6 +1176,7 @@ static u16 cxgb_select_queue(struct net_device *dev, struct sk_buff *skb, txq = netdev_pick_tx(dev, skb, sb_dev); if (xfrm_offload(skb) || is_ptp_enabled(skb, dev) || skb->encapsulation || + cxgb4_is_ktls_skb(skb) || (proto != IPPROTO_TCP && proto != IPPROTO_UDP)) txq = txq % pi->nqsets; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h index b169776ab484..e2a4941fa802 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h @@ -493,6 +493,11 @@ struct cxgb4_uld_info { #endif }; +static inline bool cxgb4_is_ktls_skb(struct sk_buff *skb) +{ + return skb->sk && tls_is_sk_tx_device_offloaded(skb->sk); +} + void cxgb4_uld_enable(struct adapter *adap); void cxgb4_register_uld(enum cxgb4_uld type, const struct cxgb4_uld_info *p); int cxgb4_unregister_uld(enum cxgb4_uld type); diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index a9e9c7ae565d..01bd9c0dfe4e 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -1422,7 +1422,8 @@ static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev) #endif /* CHELSIO_IPSEC_INLINE */ #if IS_ENABLED(CONFIG_CHELSIO_TLS_DEVICE) - if (skb->decrypted) + if (cxgb4_is_ktls_skb(skb) && + (skb->len - (skb_transport_offset(skb) + tcp_hdrlen(skb)))) return adap->uld[CXGB4_ULD_KTLS].tx_handler(skb, dev); #endif /* CHELSIO_TLS_DEVICE */ diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c index 5195f692f14d..43c723c72c61 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c @@ -1878,10 +1878,6 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev) mss = skb_is_gso(skb) ? skb_shinfo(skb)->gso_size : skb->data_len; - /* check if we haven't set it for ktls offload */ - if (!skb->sk || !tls_is_sk_tx_device_offloaded(skb->sk)) - goto out; - tls_ctx = tls_get_ctx(skb->sk); if (unlikely(tls_ctx->netdev != dev)) goto out; From b1b5cb18032b37ab69b23a461eb8be1a44fcfc3b Mon Sep 17 00:00:00 2001 From: Rohit Maheshwari Date: Mon, 9 Nov 2020 16:21:32 +0530 Subject: [PATCH 366/710] ch_ktls: Correction in finding correct length There is a possibility of linear skbs coming in. Correcting the length extraction logic. v2->v3: - Separated un-related changes from this patch. Fixes: 5a4b9fe7fece ("cxgb4/chcr: complete record tx handling") Signed-off-by: Rohit Maheshwari Signed-off-by: Jakub Kicinski --- .../chelsio/inline_crypto/ch_ktls/chcr_ktls.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c index 43c723c72c61..447aec7ae954 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c @@ -967,7 +967,7 @@ chcr_ktls_write_tcp_options(struct chcr_ktls_info *tx_info, struct sk_buff *skb, /* packet length = eth hdr len + ip hdr len + tcp hdr len * (including options). */ - pktlen = skb->len - skb->data_len; + pktlen = skb_transport_offset(skb) + tcp_hdrlen(skb); ctrl = sizeof(*cpl) + pktlen; len16 = DIV_ROUND_UP(sizeof(*wr) + ctrl, 16); @@ -1860,6 +1860,7 @@ out: /* nic tls TX handler */ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev) { + u32 tls_end_offset, tcp_seq, skb_data_len, skb_offset; struct ch_ktls_port_stats_debug *port_stats; struct chcr_ktls_ofld_ctx_tx *tx_ctx; struct ch_ktls_stats_debug *stats; @@ -1867,7 +1868,6 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev) int data_len, qidx, ret = 0, mss; struct tls_record_info *record; struct chcr_ktls_info *tx_info; - u32 tls_end_offset, tcp_seq; struct tls_context *tls_ctx; struct sk_buff *local_skb; struct sge_eth_txq *q; @@ -1875,8 +1875,11 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev) unsigned long flags; tcp_seq = ntohl(th->seq); + skb_offset = skb_transport_offset(skb) + tcp_hdrlen(skb); + skb_data_len = skb->len - skb_offset; + data_len = skb_data_len; - mss = skb_is_gso(skb) ? skb_shinfo(skb)->gso_size : skb->data_len; + mss = skb_is_gso(skb) ? skb_shinfo(skb)->gso_size : data_len; tls_ctx = tls_get_ctx(skb->sk); if (unlikely(tls_ctx->netdev != dev)) @@ -1922,8 +1925,6 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev) /* copy skb contents into local skb */ chcr_ktls_skb_copy(skb, local_skb); - /* go through the skb and send only one record at a time. */ - data_len = skb->data_len; /* TCP segments can be in received either complete or partial. * chcr_end_part_handler will handle cases if complete record or end * part of the record is received. Incase of partial end part of record, @@ -2020,9 +2021,9 @@ clear_ref: } while (data_len > 0); - tx_info->prev_seq = ntohl(th->seq) + skb->data_len; + tx_info->prev_seq = ntohl(th->seq) + skb_data_len; atomic64_inc(&port_stats->ktls_tx_encrypted_packets); - atomic64_add(skb->data_len, &port_stats->ktls_tx_encrypted_bytes); + atomic64_add(skb_data_len, &port_stats->ktls_tx_encrypted_bytes); /* tcp finish is set, send a separate tcp msg including all the options * as well. From 86716b51d14fc2201938939b323ba3ad99186910 Mon Sep 17 00:00:00 2001 From: Rohit Maheshwari Date: Mon, 9 Nov 2020 16:21:33 +0530 Subject: [PATCH 367/710] ch_ktls: Update cheksum information Checksum update was missing in the WR. Fixes: 429765a149f1 ("chcr: handle partial end part of a record") Signed-off-by: Rohit Maheshwari Signed-off-by: Jakub Kicinski --- .../chelsio/inline_crypto/ch_ktls/chcr_ktls.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c index 447aec7ae954..b7a3e757ee72 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c @@ -959,6 +959,7 @@ chcr_ktls_write_tcp_options(struct chcr_ktls_info *tx_info, struct sk_buff *skb, struct iphdr *ip; int credits; u8 buf[150]; + u64 cntrl1; void *pos; iplen = skb_network_header_len(skb); @@ -997,22 +998,28 @@ chcr_ktls_write_tcp_options(struct chcr_ktls_info *tx_info, struct sk_buff *skb, TXPKT_PF_V(tx_info->adap->pf)); cpl->pack = 0; cpl->len = htons(pktlen); - /* checksum offload */ - cpl->ctrl1 = 0; - - pos = cpl + 1; memcpy(buf, skb->data, pktlen); if (tx_info->ip_family == AF_INET) { /* we need to correct ip header len */ ip = (struct iphdr *)(buf + maclen); ip->tot_len = htons(pktlen - maclen); + cntrl1 = TXPKT_CSUM_TYPE_V(TX_CSUM_TCPIP); #if IS_ENABLED(CONFIG_IPV6) } else { ip6 = (struct ipv6hdr *)(buf + maclen); ip6->payload_len = htons(pktlen - maclen - iplen); + cntrl1 = TXPKT_CSUM_TYPE_V(TX_CSUM_TCPIP6); #endif } + + cntrl1 |= T6_TXPKT_ETHHDR_LEN_V(maclen - ETH_HLEN) | + TXPKT_IPHDR_LEN_V(iplen); + /* checksum offload */ + cpl->ctrl1 = cpu_to_be64(cntrl1); + + pos = cpl + 1; + /* now take care of the tcp header, if fin is not set then clear push * bit as well, and if fin is set, it will be sent at the last so we * need to update the tcp sequence number as per the last packet. From 687823d2d104df8226eacba74fda9f4ba3aecd6c Mon Sep 17 00:00:00 2001 From: Rohit Maheshwari Date: Mon, 9 Nov 2020 16:21:34 +0530 Subject: [PATCH 368/710] cxgb4/ch_ktls: creating skbs causes panic Creating SKB per tls record and freeing the original one causes panic. There will be race if connection reset is requested. By freeing original skb, refcnt will be decremented and that means, there is no pending record to send, and so tls_dev_del will be requested in control path while SKB of related connection is in queue. Better approach is to use same SKB to send one record (partial data) at a time. We still have to create a new SKB when partial last part of a record is requested. This fix introduces new API cxgb4_write_partial_sgl() to send partial part of skb. Present cxgb4_write_sgl can only provide feasibility to start from an offset which limits to header only and it can write sgls for the whole skb len. But this new API will help in both. It can start from any offset and can end writing in middle of the skb. v4->v5: - Removed extra changes. Fixes: 429765a149f1 ("chcr: handle partial end part of a record") Signed-off-by: Rohit Maheshwari Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 3 + drivers/net/ethernet/chelsio/cxgb4/sge.c | 108 +++++++ .../chelsio/inline_crypto/ch_ktls/chcr_ktls.c | 284 +++++++----------- 3 files changed, 226 insertions(+), 169 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 3352dad6ca99..27308600da15 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -2124,6 +2124,9 @@ void cxgb4_inline_tx_skb(const struct sk_buff *skb, const struct sge_txq *q, void cxgb4_write_sgl(const struct sk_buff *skb, struct sge_txq *q, struct ulptx_sgl *sgl, u64 *end, unsigned int start, const dma_addr_t *addr); +void cxgb4_write_partial_sgl(const struct sk_buff *skb, struct sge_txq *q, + struct ulptx_sgl *sgl, u64 *end, + const dma_addr_t *addr, u32 start, u32 send_len); void cxgb4_ring_tx_db(struct adapter *adap, struct sge_txq *q, int n); int t4_set_vlan_acl(struct adapter *adap, unsigned int mbox, unsigned int vf, u16 vlan); diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index 01bd9c0dfe4e..196652a114c5 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -890,6 +890,114 @@ void cxgb4_write_sgl(const struct sk_buff *skb, struct sge_txq *q, } EXPORT_SYMBOL(cxgb4_write_sgl); +/* cxgb4_write_partial_sgl - populate SGL for partial packet + * @skb: the packet + * @q: the Tx queue we are writing into + * @sgl: starting location for writing the SGL + * @end: points right after the end of the SGL + * @addr: the list of bus addresses for the SGL elements + * @start: start offset in the SKB where partial data starts + * @len: length of data from @start to send out + * + * This API will handle sending out partial data of a skb if required. + * Unlike cxgb4_write_sgl, @start can be any offset into the skb data, + * and @len will decide how much data after @start offset to send out. + */ +void cxgb4_write_partial_sgl(const struct sk_buff *skb, struct sge_txq *q, + struct ulptx_sgl *sgl, u64 *end, + const dma_addr_t *addr, u32 start, u32 len) +{ + struct ulptx_sge_pair buf[MAX_SKB_FRAGS / 2 + 1] = {0}, *to; + u32 frag_size, skb_linear_data_len = skb_headlen(skb); + struct skb_shared_info *si = skb_shinfo(skb); + u8 i = 0, frag_idx = 0, nfrags = 0; + skb_frag_t *frag; + + /* Fill the first SGL either from linear data or from partial + * frag based on @start. + */ + if (unlikely(start < skb_linear_data_len)) { + frag_size = min(len, skb_linear_data_len - start); + sgl->len0 = htonl(frag_size); + sgl->addr0 = cpu_to_be64(addr[0] + start); + len -= frag_size; + nfrags++; + } else { + start -= skb_linear_data_len; + frag = &si->frags[frag_idx]; + frag_size = skb_frag_size(frag); + /* find the first frag */ + while (start >= frag_size) { + start -= frag_size; + frag_idx++; + frag = &si->frags[frag_idx]; + frag_size = skb_frag_size(frag); + } + + frag_size = min(len, skb_frag_size(frag) - start); + sgl->len0 = cpu_to_be32(frag_size); + sgl->addr0 = cpu_to_be64(addr[frag_idx + 1] + start); + len -= frag_size; + nfrags++; + frag_idx++; + } + + /* If the entire partial data fit in one SGL, then send it out + * now. + */ + if (!len) + goto done; + + /* Most of the complexity below deals with the possibility we hit the + * end of the queue in the middle of writing the SGL. For this case + * only we create the SGL in a temporary buffer and then copy it. + */ + to = (u8 *)end > (u8 *)q->stat ? buf : sgl->sge; + + /* If the skb couldn't fit in first SGL completely, fill the + * rest of the frags in subsequent SGLs. Note that each SGL + * pair can store 2 frags. + */ + while (len) { + frag_size = min(len, skb_frag_size(&si->frags[frag_idx])); + to->len[i & 1] = cpu_to_be32(frag_size); + to->addr[i & 1] = cpu_to_be64(addr[frag_idx + 1]); + if (i && (i & 1)) + to++; + nfrags++; + frag_idx++; + i++; + len -= frag_size; + } + + /* If we ended in an odd boundary, then set the second SGL's + * length in the pair to 0. + */ + if (i & 1) + to->len[1] = cpu_to_be32(0); + + /* Copy from temporary buffer to Tx ring, in case we hit the + * end of the queue in the middle of writing the SGL. + */ + if (unlikely((u8 *)end > (u8 *)q->stat)) { + u32 part0 = (u8 *)q->stat - (u8 *)sgl->sge, part1; + + if (likely(part0)) + memcpy(sgl->sge, buf, part0); + part1 = (u8 *)end - (u8 *)q->stat; + memcpy(q->desc, (u8 *)buf + part0, part1); + end = (void *)q->desc + part1; + } + + /* 0-pad to multiple of 16 */ + if ((uintptr_t)end & 8) + *end = 0; +done: + sgl->cmd_nsge = htonl(ULPTX_CMD_V(ULP_TX_SC_DSGL) | + ULPTX_NSGE_V(nfrags)); +} +EXPORT_SYMBOL(cxgb4_write_partial_sgl); + /* This function copies 64 byte coalesced work request to * memory mapped BAR2 space. For coalesced WR SGE fetches * data from the FIFO instead of from Host. diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c index b7a3e757ee72..950841988ffe 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c @@ -14,6 +14,50 @@ static LIST_HEAD(uld_ctx_list); static DEFINE_MUTEX(dev_mutex); +/* chcr_get_nfrags_to_send: get the remaining nfrags after start offset + * @skb: skb + * @start: start offset. + * @len: how much data to send after @start + */ +static int chcr_get_nfrags_to_send(struct sk_buff *skb, u32 start, u32 len) +{ + struct skb_shared_info *si = skb_shinfo(skb); + u32 frag_size, skb_linear_data_len = skb_headlen(skb); + u8 nfrags = 0, frag_idx = 0; + skb_frag_t *frag; + + /* if its a linear skb then return 1 */ + if (!skb_is_nonlinear(skb)) + return 1; + + if (unlikely(start < skb_linear_data_len)) { + frag_size = min(len, skb_linear_data_len - start); + start = 0; + } else { + start -= skb_linear_data_len; + + frag = &si->frags[frag_idx]; + frag_size = skb_frag_size(frag); + while (start >= frag_size) { + start -= frag_size; + frag_idx++; + frag = &si->frags[frag_idx]; + frag_size = skb_frag_size(frag); + } + frag_size = min(len, skb_frag_size(frag) - start); + } + len -= frag_size; + nfrags++; + + while (len) { + frag_size = min(len, skb_frag_size(&si->frags[frag_idx])); + len -= frag_size; + nfrags++; + frag_idx++; + } + return nfrags; +} + static int chcr_init_tcb_fields(struct chcr_ktls_info *tx_info); /* * chcr_ktls_save_keys: calculate and save crypto keys. @@ -865,35 +909,15 @@ static int chcr_ktls_xmit_tcb_cpls(struct chcr_ktls_info *tx_info, return 0; } -/* - * chcr_ktls_skb_copy - * @nskb - new skb where the frags to be added. - * @skb - old skb from which frags will be copied. - */ -static void chcr_ktls_skb_copy(struct sk_buff *skb, struct sk_buff *nskb) -{ - int i; - - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - skb_shinfo(nskb)->frags[i] = skb_shinfo(skb)->frags[i]; - __skb_frag_ref(&skb_shinfo(nskb)->frags[i]); - } - - skb_shinfo(nskb)->nr_frags = skb_shinfo(skb)->nr_frags; - nskb->len += skb->data_len; - nskb->data_len = skb->data_len; - nskb->truesize += skb->data_len; -} - /* * chcr_ktls_get_tx_flits * returns number of flits to be sent out, it includes key context length, WR * size and skb fragments. */ static unsigned int -chcr_ktls_get_tx_flits(const struct sk_buff *skb, unsigned int key_ctx_len) +chcr_ktls_get_tx_flits(u32 nr_frags, unsigned int key_ctx_len) { - return chcr_sgl_len(skb_shinfo(skb)->nr_frags) + + return chcr_sgl_len(nr_frags) + DIV_ROUND_UP(key_ctx_len + CHCR_KTLS_WR_SIZE, 8); } @@ -1038,71 +1062,6 @@ chcr_ktls_write_tcp_options(struct chcr_ktls_info *tx_info, struct sk_buff *skb, return 0; } -/* chcr_ktls_skb_shift - Shifts request length paged data from skb to another. - * @tgt- buffer into which tail data gets added - * @skb- buffer from which the paged data comes from - * @shiftlen- shift up to this many bytes - */ -static int chcr_ktls_skb_shift(struct sk_buff *tgt, struct sk_buff *skb, - int shiftlen) -{ - skb_frag_t *fragfrom, *fragto; - int from, to, todo; - - WARN_ON(shiftlen > skb->data_len); - - todo = shiftlen; - from = 0; - to = 0; - fragfrom = &skb_shinfo(skb)->frags[from]; - - while ((todo > 0) && (from < skb_shinfo(skb)->nr_frags)) { - fragfrom = &skb_shinfo(skb)->frags[from]; - fragto = &skb_shinfo(tgt)->frags[to]; - - if (todo >= skb_frag_size(fragfrom)) { - *fragto = *fragfrom; - todo -= skb_frag_size(fragfrom); - from++; - to++; - - } else { - __skb_frag_ref(fragfrom); - skb_frag_page_copy(fragto, fragfrom); - skb_frag_off_copy(fragto, fragfrom); - skb_frag_size_set(fragto, todo); - - skb_frag_off_add(fragfrom, todo); - skb_frag_size_sub(fragfrom, todo); - todo = 0; - - to++; - break; - } - } - - /* Ready to "commit" this state change to tgt */ - skb_shinfo(tgt)->nr_frags = to; - - /* Reposition in the original skb */ - to = 0; - while (from < skb_shinfo(skb)->nr_frags) - skb_shinfo(skb)->frags[to++] = skb_shinfo(skb)->frags[from++]; - - skb_shinfo(skb)->nr_frags = to; - - WARN_ON(todo > 0 && !skb_shinfo(skb)->nr_frags); - - skb->len -= shiftlen; - skb->data_len -= shiftlen; - skb->truesize -= shiftlen; - tgt->len += shiftlen; - tgt->data_len += shiftlen; - tgt->truesize += shiftlen; - - return shiftlen; -} - /* * chcr_ktls_xmit_wr_complete: This sends out the complete record. If an skb * received has partial end part of the record, send out the complete record, so @@ -1118,6 +1077,8 @@ static int chcr_ktls_skb_shift(struct sk_buff *tgt, struct sk_buff *skb, static int chcr_ktls_xmit_wr_complete(struct sk_buff *skb, struct chcr_ktls_info *tx_info, struct sge_eth_txq *q, u32 tcp_seq, + bool is_last_wr, u32 data_len, + u32 skb_offset, u32 nfrags, bool tcp_push, u32 mss) { u32 len16, wr_mid = 0, flits = 0, ndesc, cipher_start; @@ -1133,7 +1094,7 @@ static int chcr_ktls_xmit_wr_complete(struct sk_buff *skb, u64 *end; /* get the number of flits required */ - flits = chcr_ktls_get_tx_flits(skb, tx_info->key_ctx_len); + flits = chcr_ktls_get_tx_flits(nfrags, tx_info->key_ctx_len); /* number of descriptors */ ndesc = chcr_flits_to_desc(flits); /* check if enough credits available */ @@ -1162,6 +1123,9 @@ static int chcr_ktls_xmit_wr_complete(struct sk_buff *skb, return NETDEV_TX_BUSY; } + if (!is_last_wr) + skb_get(skb); + pos = &q->q.desc[q->q.pidx]; end = (u64 *)pos + flits; /* FW_ULPTX_WR */ @@ -1194,7 +1158,7 @@ static int chcr_ktls_xmit_wr_complete(struct sk_buff *skb, CPL_TX_SEC_PDU_CPLLEN_V(CHCR_CPL_TX_SEC_PDU_LEN_64BIT) | CPL_TX_SEC_PDU_PLACEHOLDER_V(1) | CPL_TX_SEC_PDU_IVINSRTOFST_V(TLS_HEADER_SIZE + 1)); - cpl->pldlen = htonl(skb->data_len); + cpl->pldlen = htonl(data_len); /* encryption should start after tls header size + iv size */ cipher_start = TLS_HEADER_SIZE + tx_info->iv_size + 1; @@ -1236,7 +1200,7 @@ static int chcr_ktls_xmit_wr_complete(struct sk_buff *skb, /* CPL_TX_DATA */ tx_data = (void *)pos; OPCODE_TID(tx_data) = htonl(MK_OPCODE_TID(CPL_TX_DATA, tx_info->tid)); - tx_data->len = htonl(TX_DATA_MSS_V(mss) | TX_LENGTH_V(skb->data_len)); + tx_data->len = htonl(TX_DATA_MSS_V(mss) | TX_LENGTH_V(data_len)); tx_data->rsvd = htonl(tcp_seq); @@ -1256,8 +1220,8 @@ static int chcr_ktls_xmit_wr_complete(struct sk_buff *skb, } /* send the complete packet except the header */ - cxgb4_write_sgl(skb, &q->q, pos, end, skb->len - skb->data_len, - sgl_sdesc->addr); + cxgb4_write_partial_sgl(skb, &q->q, pos, end, sgl_sdesc->addr, + skb_offset, data_len); sgl_sdesc->skb = skb; chcr_txq_advance(&q->q, ndesc); @@ -1289,10 +1253,11 @@ static int chcr_ktls_xmit_wr_short(struct sk_buff *skb, struct sge_eth_txq *q, u32 tcp_seq, bool tcp_push, u32 mss, u32 tls_rec_offset, u8 *prior_data, - u32 prior_data_len) + u32 prior_data_len, u32 data_len, + u32 skb_offset) { + u32 len16, wr_mid = 0, cipher_start, nfrags; struct adapter *adap = tx_info->adap; - u32 len16, wr_mid = 0, cipher_start; unsigned int flits = 0, ndesc; int credits, left, last_desc; struct tx_sw_desc *sgl_sdesc; @@ -1305,10 +1270,11 @@ static int chcr_ktls_xmit_wr_short(struct sk_buff *skb, void *pos; u64 *end; + nfrags = chcr_get_nfrags_to_send(skb, skb_offset, data_len); /* get the number of flits required, it's a partial record so 2 flits * (AES_BLOCK_SIZE) will be added. */ - flits = chcr_ktls_get_tx_flits(skb, tx_info->key_ctx_len) + 2; + flits = chcr_ktls_get_tx_flits(nfrags, tx_info->key_ctx_len) + 2; /* get the correct 8 byte IV of this record */ iv_record = cpu_to_be64(tx_info->iv + tx_info->record_no); /* If it's a middle record and not 16 byte aligned to run AES CTR, need @@ -1380,7 +1346,7 @@ static int chcr_ktls_xmit_wr_short(struct sk_buff *skb, htonl(CPL_TX_SEC_PDU_OPCODE_V(CPL_TX_SEC_PDU) | CPL_TX_SEC_PDU_CPLLEN_V(CHCR_CPL_TX_SEC_PDU_LEN_64BIT) | CPL_TX_SEC_PDU_IVINSRTOFST_V(1)); - cpl->pldlen = htonl(skb->data_len + AES_BLOCK_LEN + prior_data_len); + cpl->pldlen = htonl(data_len + AES_BLOCK_LEN + prior_data_len); cpl->aadstart_cipherstop_hi = htonl(CPL_TX_SEC_PDU_CIPHERSTART_V(cipher_start)); cpl->cipherstop_lo_authinsert = 0; @@ -1411,7 +1377,7 @@ static int chcr_ktls_xmit_wr_short(struct sk_buff *skb, tx_data = (void *)pos; OPCODE_TID(tx_data) = htonl(MK_OPCODE_TID(CPL_TX_DATA, tx_info->tid)); tx_data->len = htonl(TX_DATA_MSS_V(mss) | - TX_LENGTH_V(skb->data_len + prior_data_len)); + TX_LENGTH_V(data_len + prior_data_len)); tx_data->rsvd = htonl(tcp_seq); tx_data->flags = htonl(TX_BYPASS_F); if (tcp_push) @@ -1444,8 +1410,8 @@ static int chcr_ktls_xmit_wr_short(struct sk_buff *skb, if (prior_data_len) pos = chcr_copy_to_txd(prior_data, &q->q, pos, 16); /* send the complete packet except the header */ - cxgb4_write_sgl(skb, &q->q, pos, end, skb->len - skb->data_len, - sgl_sdesc->addr); + cxgb4_write_partial_sgl(skb, &q->q, pos, end, sgl_sdesc->addr, + skb_offset, data_len); sgl_sdesc->skb = skb; chcr_txq_advance(&q->q, ndesc); @@ -1473,6 +1439,7 @@ static int chcr_ktls_tx_plaintxt(struct chcr_ktls_info *tx_info, struct sk_buff *skb, u32 tcp_seq, u32 mss, bool tcp_push, struct sge_eth_txq *q, u32 port_id, u8 *prior_data, + u32 data_len, u32 skb_offset, u32 prior_data_len) { int credits, left, len16, last_desc; @@ -1482,14 +1449,16 @@ static int chcr_ktls_tx_plaintxt(struct chcr_ktls_info *tx_info, struct ulptx_idata *idata; struct ulp_txpkt *ulptx; struct fw_ulptx_wr *wr; - u32 wr_mid = 0; + u32 wr_mid = 0, nfrags; void *pos; u64 *end; flits = DIV_ROUND_UP(CHCR_PLAIN_TX_DATA_LEN, 8); - flits += chcr_sgl_len(skb_shinfo(skb)->nr_frags); + nfrags = chcr_get_nfrags_to_send(skb, skb_offset, data_len); + flits += chcr_sgl_len(nfrags); if (prior_data_len) flits += 2; + /* WR will need len16 */ len16 = DIV_ROUND_UP(flits, 2); /* check how many descriptors needed */ @@ -1542,7 +1511,7 @@ static int chcr_ktls_tx_plaintxt(struct chcr_ktls_info *tx_info, tx_data = (struct cpl_tx_data *)(idata + 1); OPCODE_TID(tx_data) = htonl(MK_OPCODE_TID(CPL_TX_DATA, tx_info->tid)); tx_data->len = htonl(TX_DATA_MSS_V(mss) | - TX_LENGTH_V(skb->data_len + prior_data_len)); + TX_LENGTH_V(data_len + prior_data_len)); /* set tcp seq number */ tx_data->rsvd = htonl(tcp_seq); tx_data->flags = htonl(TX_BYPASS_F); @@ -1566,8 +1535,8 @@ static int chcr_ktls_tx_plaintxt(struct chcr_ktls_info *tx_info, end = pos + left; } /* send the complete packet including the header */ - cxgb4_write_sgl(skb, &q->q, pos, end, skb->len - skb->data_len, - sgl_sdesc->addr); + cxgb4_write_partial_sgl(skb, &q->q, pos, end, sgl_sdesc->addr, + skb_offset, data_len); sgl_sdesc->skb = skb; chcr_txq_advance(&q->q, ndesc); @@ -1578,9 +1547,11 @@ static int chcr_ktls_tx_plaintxt(struct chcr_ktls_info *tx_info, /* * chcr_ktls_copy_record_in_skb * @nskb - new skb where the frags to be added. + * @skb - old skb, to copy socket and destructor details. * @record - specific record which has complete 16k record in frags. */ static void chcr_ktls_copy_record_in_skb(struct sk_buff *nskb, + struct sk_buff *skb, struct tls_record_info *record) { int i = 0; @@ -1595,6 +1566,9 @@ static void chcr_ktls_copy_record_in_skb(struct sk_buff *nskb, nskb->data_len = record->len; nskb->len += record->len; nskb->truesize += record->len; + nskb->sk = skb->sk; + nskb->destructor = skb->destructor; + refcount_add(nskb->truesize, &nskb->sk->sk_wmem_alloc); } /* @@ -1666,7 +1640,7 @@ static int chcr_end_part_handler(struct chcr_ktls_info *tx_info, struct sk_buff *skb, struct tls_record_info *record, u32 tcp_seq, int mss, bool tcp_push_no_fin, - struct sge_eth_txq *q, + struct sge_eth_txq *q, u32 skb_offset, u32 tls_end_offset, bool last_wr) { struct sk_buff *nskb = NULL; @@ -1675,13 +1649,14 @@ static int chcr_end_part_handler(struct chcr_ktls_info *tx_info, nskb = skb; atomic64_inc(&tx_info->adap->ch_ktls_stats.ktls_tx_complete_pkts); } else { - dev_kfree_skb_any(skb); - - nskb = alloc_skb(0, GFP_KERNEL); - if (!nskb) + nskb = alloc_skb(0, GFP_ATOMIC); + if (!nskb) { + dev_kfree_skb_any(skb); return NETDEV_TX_BUSY; + } + /* copy complete record in skb */ - chcr_ktls_copy_record_in_skb(nskb, record); + chcr_ktls_copy_record_in_skb(nskb, skb, record); /* packet is being sent from the beginning, update the tcp_seq * accordingly. */ @@ -1691,10 +1666,20 @@ static int chcr_end_part_handler(struct chcr_ktls_info *tx_info, */ if (chcr_ktls_update_snd_una(tx_info, q)) goto out; + /* reset skb offset */ + skb_offset = 0; + + if (last_wr) + dev_kfree_skb_any(skb); + + last_wr = true; + atomic64_inc(&tx_info->adap->ch_ktls_stats.ktls_tx_end_pkts); } if (chcr_ktls_xmit_wr_complete(nskb, tx_info, q, tcp_seq, + last_wr, record->len, skb_offset, + record->num_frags, (last_wr && tcp_push_no_fin), mss)) { goto out; @@ -1730,41 +1715,32 @@ static int chcr_short_record_handler(struct chcr_ktls_info *tx_info, struct sk_buff *skb, struct tls_record_info *record, u32 tcp_seq, int mss, bool tcp_push_no_fin, + u32 data_len, u32 skb_offset, struct sge_eth_txq *q, u32 tls_end_offset) { u32 tls_rec_offset = tcp_seq - tls_record_start_seq(record); u8 prior_data[16] = {0}; u32 prior_data_len = 0; - u32 data_len; /* check if the skb is ending in middle of tag/HASH, its a big * trouble, send the packet before the HASH. */ - int remaining_record = tls_end_offset - skb->data_len; + int remaining_record = tls_end_offset - data_len; if (remaining_record > 0 && remaining_record < TLS_CIPHER_AES_GCM_128_TAG_SIZE) { - int trimmed_len = skb->data_len - + int trimmed_len = data_len - (TLS_CIPHER_AES_GCM_128_TAG_SIZE - remaining_record); - struct sk_buff *tmp_skb = NULL; /* don't process the pkt if it is only a partial tag */ - if (skb->data_len < TLS_CIPHER_AES_GCM_128_TAG_SIZE) + if (data_len < TLS_CIPHER_AES_GCM_128_TAG_SIZE) goto out; - WARN_ON(trimmed_len > skb->data_len); + WARN_ON(trimmed_len > data_len); - /* shift to those many bytes */ - tmp_skb = alloc_skb(0, GFP_KERNEL); - if (unlikely(!tmp_skb)) - goto out; - - chcr_ktls_skb_shift(tmp_skb, skb, trimmed_len); - /* free the last trimmed portion */ - dev_kfree_skb_any(skb); - skb = tmp_skb; + data_len = trimmed_len; atomic64_inc(&tx_info->adap->ch_ktls_stats.ktls_tx_trimmed_pkts); } - data_len = skb->data_len; + /* check if the middle record's start point is 16 byte aligned. CTR * needs 16 byte aligned start point to start encryption. */ @@ -1825,9 +1801,6 @@ static int chcr_short_record_handler(struct chcr_ktls_info *tx_info, } /* reset tcp_seq as per the prior_data_required len */ tcp_seq -= prior_data_len; - /* include prio_data_len for further calculation. - */ - data_len += prior_data_len; } /* reset snd una, so the middle record won't send the already * sent part. @@ -1844,6 +1817,7 @@ static int chcr_short_record_handler(struct chcr_ktls_info *tx_info, tcp_push_no_fin, q, tx_info->port_id, prior_data, + data_len, skb_offset, prior_data_len)) { goto out; } @@ -1854,7 +1828,7 @@ static int chcr_short_record_handler(struct chcr_ktls_info *tx_info, if (chcr_ktls_xmit_wr_short(skb, tx_info, q, tcp_seq, tcp_push_no_fin, mss, tls_rec_offset, prior_data, - prior_data_len)) { + prior_data_len, data_len, skb_offset)) { goto out; } @@ -1876,7 +1850,6 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev) struct tls_record_info *record; struct chcr_ktls_info *tx_info; struct tls_context *tls_ctx; - struct sk_buff *local_skb; struct sge_eth_txq *q; struct adapter *adap; unsigned long flags; @@ -1898,14 +1871,6 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev) if (unlikely(!tx_info)) goto out; - /* don't touch the original skb, make a new skb to extract each records - * and send them separately. - */ - local_skb = alloc_skb(0, GFP_KERNEL); - - if (unlikely(!local_skb)) - return NETDEV_TX_BUSY; - adap = tx_info->adap; stats = &adap->ch_ktls_stats; port_stats = &stats->ktls_port[tx_info->port_id]; @@ -1925,13 +1890,9 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev) ntohl(th->ack_seq), ntohs(th->window)); if (ret) { - dev_kfree_skb_any(local_skb); return NETDEV_TX_BUSY; } - /* copy skb contents into local skb */ - chcr_ktls_skb_copy(skb, local_skb); - /* TCP segments can be in received either complete or partial. * chcr_end_part_handler will handle cases if complete record or end * part of the record is received. Incase of partial end part of record, @@ -1961,7 +1922,6 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev) atomic64_inc(&port_stats->ktls_tx_skip_no_sync_data); goto out; } - /* increase page reference count of the record, so that there * won't be any chance of page free in middle if in case stack * receives ACK and try to delete the record. @@ -1977,44 +1937,28 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev) tcp_seq, record->end_seq, tx_info->prev_seq, data_len); /* if a tls record is finishing in this SKB */ if (tls_end_offset <= data_len) { - struct sk_buff *nskb = NULL; - - if (tls_end_offset < data_len) { - nskb = alloc_skb(0, GFP_KERNEL); - if (unlikely(!nskb)) { - ret = -ENOMEM; - goto clear_ref; - } - - chcr_ktls_skb_shift(nskb, local_skb, - tls_end_offset); - } else { - /* its the only record in this skb, directly - * point it. - */ - nskb = local_skb; - } - ret = chcr_end_part_handler(tx_info, nskb, record, + ret = chcr_end_part_handler(tx_info, skb, record, tcp_seq, mss, (!th->fin && th->psh), q, + skb_offset, tls_end_offset, - (nskb == local_skb)); - - if (ret && nskb != local_skb) - dev_kfree_skb_any(local_skb); + skb_offset + + tls_end_offset == skb->len); data_len -= tls_end_offset; /* tcp_seq increment is required to handle next record. */ tcp_seq += tls_end_offset; + skb_offset += tls_end_offset; } else { - ret = chcr_short_record_handler(tx_info, local_skb, + ret = chcr_short_record_handler(tx_info, skb, record, tcp_seq, mss, (!th->fin && th->psh), + data_len, skb_offset, q, tls_end_offset); data_len = 0; } -clear_ref: + /* clear the frag ref count which increased locally before */ for (i = 0; i < record->num_frags; i++) { /* clear the frag ref count */ @@ -2022,7 +1966,8 @@ clear_ref: } /* if any failure, come out from the loop. */ if (ret) - goto out; + return NETDEV_TX_OK; + /* length should never be less than 0 */ WARN_ON(data_len < 0); @@ -2038,6 +1983,7 @@ clear_ref: if (th->fin) chcr_ktls_write_tcp_options(tx_info, skb, q, tx_info->tx_chan); + return NETDEV_TX_OK; out: dev_kfree_skb_any(skb); return NETDEV_TX_OK; From c68a28a9e2798a4602dde1c77046a3b577eb31f4 Mon Sep 17 00:00:00 2001 From: Rohit Maheshwari Date: Mon, 9 Nov 2020 16:21:35 +0530 Subject: [PATCH 369/710] ch_ktls: Correction in trimmed_len calculation trimmed length calculation goes wrong if skb has only tag part to send. It should be zero if there is no data bytes apart from TAG. Fixes: dc05f3df8fac ("chcr: Handle first or middle part of record") Signed-off-by: Rohit Maheshwari Signed-off-by: Jakub Kicinski --- .../chelsio/inline_crypto/ch_ktls/chcr_ktls.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c index 950841988ffe..4286decce095 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c @@ -1729,10 +1729,13 @@ static int chcr_short_record_handler(struct chcr_ktls_info *tx_info, if (remaining_record > 0 && remaining_record < TLS_CIPHER_AES_GCM_128_TAG_SIZE) { - int trimmed_len = data_len - - (TLS_CIPHER_AES_GCM_128_TAG_SIZE - remaining_record); - /* don't process the pkt if it is only a partial tag */ - if (data_len < TLS_CIPHER_AES_GCM_128_TAG_SIZE) + int trimmed_len = 0; + + if (tls_end_offset > TLS_CIPHER_AES_GCM_128_TAG_SIZE) + trimmed_len = data_len - + (TLS_CIPHER_AES_GCM_128_TAG_SIZE - + remaining_record); + if (!trimmed_len) goto out; WARN_ON(trimmed_len > data_len); From 83deb094dd5c636a790da3914008570c9fd1693f Mon Sep 17 00:00:00 2001 From: Rohit Maheshwari Date: Mon, 9 Nov 2020 16:21:36 +0530 Subject: [PATCH 370/710] ch_ktls: missing handling of header alone If an skb has only header part which doesn't start from beginning, is not being handled properly. Fixes: dc05f3df8fac ("chcr: Handle first or middle part of record") Signed-off-by: Rohit Maheshwari Signed-off-by: Jakub Kicinski --- .../chelsio/inline_crypto/ch_ktls/chcr_ktls.c | 25 ++++++++----------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c index 4286decce095..8a54fce9bfae 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c @@ -1744,6 +1744,17 @@ static int chcr_short_record_handler(struct chcr_ktls_info *tx_info, atomic64_inc(&tx_info->adap->ch_ktls_stats.ktls_tx_trimmed_pkts); } + /* check if it is only the header part. */ + if (tls_rec_offset + data_len <= (TLS_HEADER_SIZE + tx_info->iv_size)) { + if (chcr_ktls_tx_plaintxt(tx_info, skb, tcp_seq, mss, + tcp_push_no_fin, q, + tx_info->port_id, prior_data, + data_len, skb_offset, prior_data_len)) + goto out; + + return 0; + } + /* check if the middle record's start point is 16 byte aligned. CTR * needs 16 byte aligned start point to start encryption. */ @@ -1812,20 +1823,6 @@ static int chcr_short_record_handler(struct chcr_ktls_info *tx_info, goto out; atomic64_inc(&tx_info->adap->ch_ktls_stats.ktls_tx_middle_pkts); } else { - /* Else means, its a partial first part of the record. Check if - * its only the header, don't need to send for encryption then. - */ - if (data_len <= TLS_HEADER_SIZE + tx_info->iv_size) { - if (chcr_ktls_tx_plaintxt(tx_info, skb, tcp_seq, mss, - tcp_push_no_fin, q, - tx_info->port_id, - prior_data, - data_len, skb_offset, - prior_data_len)) { - goto out; - } - return 0; - } atomic64_inc(&tx_info->adap->ch_ktls_stats.ktls_tx_start_pkts); } From 63ee4591fa2f97dc08ce37514f214fc0430e9dc3 Mon Sep 17 00:00:00 2001 From: Rohit Maheshwari Date: Mon, 9 Nov 2020 16:21:37 +0530 Subject: [PATCH 371/710] ch_ktls: Correction in middle record handling If a record starts in middle, reset TCB UNA so that we could avoid sending out extra packet which is needed to make it 16 byte aligned to start AES CTR. Check also considers prev_seq, which should be what is actually sent, not the skb data length. Avoid updating partial TAG to HW at any point of time, that's why we need to check if remaining part is smaller than TAG size, then reset TX_MAX to be TAG starting sequence number. Fixes: 5a4b9fe7fece ("cxgb4/chcr: complete record tx handling") Signed-off-by: Rohit Maheshwari Signed-off-by: Jakub Kicinski --- .../chelsio/inline_crypto/ch_ktls/chcr_ktls.c | 50 ++++++++++++------- 1 file changed, 31 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c index 8a54fce9bfae..026c66599d1e 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c @@ -827,7 +827,7 @@ static void *chcr_write_cpl_set_tcb_ulp(struct chcr_ktls_info *tx_info, */ static int chcr_ktls_xmit_tcb_cpls(struct chcr_ktls_info *tx_info, struct sge_eth_txq *q, u64 tcp_seq, - u64 tcp_ack, u64 tcp_win) + u64 tcp_ack, u64 tcp_win, bool offset) { bool first_wr = ((tx_info->prev_ack == 0) && (tx_info->prev_win == 0)); struct ch_ktls_port_stats_debug *port_stats; @@ -862,7 +862,7 @@ static int chcr_ktls_xmit_tcb_cpls(struct chcr_ktls_info *tx_info, cpl++; } /* reset snd una if it's a re-transmit pkt */ - if (tcp_seq != tx_info->prev_seq) { + if (tcp_seq != tx_info->prev_seq || offset) { /* reset snd_una */ port_stats = &tx_info->adap->ch_ktls_stats.ktls_port[tx_info->port_id]; @@ -871,7 +871,8 @@ static int chcr_ktls_xmit_tcb_cpls(struct chcr_ktls_info *tx_info, TCB_SND_UNA_RAW_V (TCB_SND_UNA_RAW_M), TCB_SND_UNA_RAW_V(0), 0); - atomic64_inc(&port_stats->ktls_tx_ooo); + if (tcp_seq != tx_info->prev_seq) + atomic64_inc(&port_stats->ktls_tx_ooo); cpl++; } /* update ack */ @@ -1661,11 +1662,6 @@ static int chcr_end_part_handler(struct chcr_ktls_info *tx_info, * accordingly. */ tcp_seq = tls_record_start_seq(record); - /* reset snd una, so the middle record won't send the already - * sent part. - */ - if (chcr_ktls_update_snd_una(tx_info, q)) - goto out; /* reset skb offset */ skb_offset = 0; @@ -1684,6 +1680,7 @@ static int chcr_end_part_handler(struct chcr_ktls_info *tx_info, mss)) { goto out; } + tx_info->prev_seq = record->end_seq; return 0; out: dev_kfree_skb_any(nskb); @@ -1752,6 +1749,7 @@ static int chcr_short_record_handler(struct chcr_ktls_info *tx_info, data_len, skb_offset, prior_data_len)) goto out; + tx_info->prev_seq = tcp_seq + data_len; return 0; } @@ -1832,6 +1830,7 @@ static int chcr_short_record_handler(struct chcr_ktls_info *tx_info, goto out; } + tx_info->prev_seq = tcp_seq + data_len + prior_data_len; return 0; out: dev_kfree_skb_any(skb); @@ -1885,13 +1884,6 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev) if (ret) return NETDEV_TX_BUSY; } - /* update tcb */ - ret = chcr_ktls_xmit_tcb_cpls(tx_info, q, ntohl(th->seq), - ntohl(th->ack_seq), - ntohs(th->window)); - if (ret) { - return NETDEV_TX_BUSY; - } /* TCP segments can be in received either complete or partial. * chcr_end_part_handler will handle cases if complete record or end @@ -1922,6 +1914,30 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev) atomic64_inc(&port_stats->ktls_tx_skip_no_sync_data); goto out; } + tls_end_offset = record->end_seq - tcp_seq; + + pr_debug("seq 0x%x, end_seq 0x%x prev_seq 0x%x, datalen 0x%x\n", + tcp_seq, record->end_seq, tx_info->prev_seq, data_len); + /* update tcb for the skb */ + if (skb_data_len == data_len) { + u32 tx_max = tcp_seq; + + if (!tls_record_is_start_marker(record) && + tls_end_offset < TLS_CIPHER_AES_GCM_128_TAG_SIZE) + tx_max = record->end_seq - + TLS_CIPHER_AES_GCM_128_TAG_SIZE; + + ret = chcr_ktls_xmit_tcb_cpls(tx_info, q, tx_max, + ntohl(th->ack_seq), + ntohs(th->window), + tls_end_offset != + record->len); + if (ret) { + spin_unlock_irqrestore(&tx_ctx->base.lock, + flags); + goto out; + } + } /* increase page reference count of the record, so that there * won't be any chance of page free in middle if in case stack * receives ACK and try to delete the record. @@ -1931,10 +1947,7 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev) /* lock cleared */ spin_unlock_irqrestore(&tx_ctx->base.lock, flags); - tls_end_offset = record->end_seq - tcp_seq; - pr_debug("seq 0x%x, end_seq 0x%x prev_seq 0x%x, datalen 0x%x\n", - tcp_seq, record->end_seq, tx_info->prev_seq, data_len); /* if a tls record is finishing in this SKB */ if (tls_end_offset <= data_len) { ret = chcr_end_part_handler(tx_info, skb, record, @@ -1973,7 +1986,6 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev) } while (data_len > 0); - tx_info->prev_seq = ntohl(th->seq) + skb_data_len; atomic64_inc(&port_stats->ktls_tx_encrypted_packets); atomic64_add(skb_data_len, &port_stats->ktls_tx_encrypted_bytes); From 9478e083941c873d60a97b232760a14dec6c69d3 Mon Sep 17 00:00:00 2001 From: Rohit Maheshwari Date: Mon, 9 Nov 2020 16:21:38 +0530 Subject: [PATCH 372/710] ch_ktls: packet handling prior to start marker There could be a case where ACK for tls exchanges prior to start marker is missed out, and by the time tls is offloaded. This pkt should not be discarded and handled carefully. It could be plaintext alone or plaintext + finish as well. Fixes: 5a4b9fe7fece ("cxgb4/chcr: complete record tx handling") Signed-off-by: Rohit Maheshwari Signed-off-by: Jakub Kicinski --- .../chelsio/inline_crypto/ch_ktls/chcr_ktls.c | 38 ++++++++++++++++--- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c index 026c66599d1e..bbda71b7f98b 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c @@ -1909,11 +1909,6 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev) goto out; } - if (unlikely(tls_record_is_start_marker(record))) { - spin_unlock_irqrestore(&tx_ctx->base.lock, flags); - atomic64_inc(&port_stats->ktls_tx_skip_no_sync_data); - goto out; - } tls_end_offset = record->end_seq - tcp_seq; pr_debug("seq 0x%x, end_seq 0x%x prev_seq 0x%x, datalen 0x%x\n", @@ -1938,6 +1933,39 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev) goto out; } } + + if (unlikely(tls_record_is_start_marker(record))) { + atomic64_inc(&port_stats->ktls_tx_skip_no_sync_data); + /* If tls_end_offset < data_len, means there is some + * data after start marker, which needs encryption, send + * plaintext first and take skb refcount. else send out + * complete pkt as plaintext. + */ + if (tls_end_offset < data_len) + skb_get(skb); + else + tls_end_offset = data_len; + + ret = chcr_ktls_tx_plaintxt(tx_info, skb, tcp_seq, mss, + (!th->fin && th->psh), q, + tx_info->port_id, NULL, + tls_end_offset, skb_offset, + 0); + + spin_unlock_irqrestore(&tx_ctx->base.lock, flags); + if (ret) { + /* free the refcount taken earlier */ + if (tls_end_offset < data_len) + dev_kfree_skb_any(skb); + goto out; + } + + data_len -= tls_end_offset; + tcp_seq = record->end_seq; + skb_offset += tls_end_offset; + continue; + } + /* increase page reference count of the record, so that there * won't be any chance of page free in middle if in case stack * receives ACK and try to delete the record. From 659bf0383d15b07e492e27443d87736b24171558 Mon Sep 17 00:00:00 2001 From: Rohit Maheshwari Date: Mon, 9 Nov 2020 16:21:39 +0530 Subject: [PATCH 373/710] ch_ktls: don't free skb before sending FIN If its a last packet and fin is set. Make sure FIN is informed to HW before skb gets freed. Fixes: 429765a149f1 ("chcr: handle partial end part of a record") Signed-off-by: Rohit Maheshwari Signed-off-by: Jakub Kicinski --- .../chelsio/inline_crypto/ch_ktls/chcr_ktls.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c index bbda71b7f98b..a8062e038ebc 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c @@ -1932,6 +1932,9 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev) flags); goto out; } + + if (th->fin) + skb_get(skb); } if (unlikely(tls_record_is_start_marker(record))) { @@ -2006,8 +2009,11 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev) __skb_frag_unref(&record->frags[i]); } /* if any failure, come out from the loop. */ - if (ret) + if (ret) { + if (th->fin) + dev_kfree_skb_any(skb); return NETDEV_TX_OK; + } /* length should never be less than 0 */ WARN_ON(data_len < 0); @@ -2020,8 +2026,10 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev) /* tcp finish is set, send a separate tcp msg including all the options * as well. */ - if (th->fin) + if (th->fin) { chcr_ktls_write_tcp_options(tx_info, skb, q, tx_info->tx_chan); + dev_kfree_skb_any(skb); + } return NETDEV_TX_OK; out: From 21f82acbb8b4e8812521d405479b6fc3790078de Mon Sep 17 00:00:00 2001 From: Rohit Maheshwari Date: Mon, 9 Nov 2020 16:21:40 +0530 Subject: [PATCH 374/710] ch_ktls/cxgb4: handle partial tag alone SKBs If TCP congestion caused a very small packets which only has some part fo the TAG, and that too is not till the end. HW can't handle such case, so falling back to sw crypto in such cases. v1->v2: - Marked chcr_ktls_sw_fallback() static. Fixes: dc05f3df8fac ("chcr: Handle first or middle part of record") Signed-off-by: Rohit Maheshwari Signed-off-by: Jakub Kicinski --- .../ethernet/chelsio/cxgb4/cxgb4_debugfs.c | 2 + .../net/ethernet/chelsio/cxgb4/cxgb4_uld.h | 1 + .../chelsio/inline_crypto/ch_ktls/chcr_ktls.c | 116 +++++++++++++++++- .../chelsio/inline_crypto/ch_ktls/chcr_ktls.h | 1 + 4 files changed, 119 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c index 0273f40b85f7..17410fe86626 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c @@ -3573,6 +3573,8 @@ static int chcr_stats_show(struct seq_file *seq, void *v) atomic64_read(&adap->ch_ktls_stats.ktls_tx_complete_pkts)); seq_printf(seq, "TX trim pkts : %20llu\n", atomic64_read(&adap->ch_ktls_stats.ktls_tx_trimmed_pkts)); + seq_printf(seq, "TX sw fallback : %20llu\n", + atomic64_read(&adap->ch_ktls_stats.ktls_tx_fallback)); while (i < MAX_NPORTS) { ktls_port = &adap->ch_ktls_stats.ktls_port[i]; seq_printf(seq, "Port %d\n", i); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h index e2a4941fa802..1b49f2fa9b18 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h @@ -388,6 +388,7 @@ struct ch_ktls_stats_debug { atomic64_t ktls_tx_retransmit_pkts; atomic64_t ktls_tx_complete_pkts; atomic64_t ktls_tx_trimmed_pkts; + atomic64_t ktls_tx_fallback; }; #endif diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c index a8062e038ebc..b182c940b4a0 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c @@ -1545,6 +1545,88 @@ static int chcr_ktls_tx_plaintxt(struct chcr_ktls_info *tx_info, return 0; } +static int chcr_ktls_tunnel_pkt(struct chcr_ktls_info *tx_info, + struct sk_buff *skb, + struct sge_eth_txq *q) +{ + u32 ctrl, iplen, maclen, wr_mid = 0, len16; + struct tx_sw_desc *sgl_sdesc; + struct fw_eth_tx_pkt_wr *wr; + struct cpl_tx_pkt_core *cpl; + unsigned int flits, ndesc; + int credits, last_desc; + u64 cntrl1, *end; + void *pos; + + ctrl = sizeof(*cpl); + flits = DIV_ROUND_UP(sizeof(*wr) + ctrl, 8); + + flits += chcr_sgl_len(skb_shinfo(skb)->nr_frags + 1); + len16 = DIV_ROUND_UP(flits, 2); + /* check how many descriptors needed */ + ndesc = DIV_ROUND_UP(flits, 8); + + credits = chcr_txq_avail(&q->q) - ndesc; + if (unlikely(credits < 0)) { + chcr_eth_txq_stop(q); + return -ENOMEM; + } + + if (unlikely(credits < ETHTXQ_STOP_THRES)) { + chcr_eth_txq_stop(q); + wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F; + } + + last_desc = q->q.pidx + ndesc - 1; + if (last_desc >= q->q.size) + last_desc -= q->q.size; + sgl_sdesc = &q->q.sdesc[last_desc]; + + if (unlikely(cxgb4_map_skb(tx_info->adap->pdev_dev, skb, + sgl_sdesc->addr) < 0)) { + memset(sgl_sdesc->addr, 0, sizeof(sgl_sdesc->addr)); + q->mapping_err++; + return -ENOMEM; + } + + iplen = skb_network_header_len(skb); + maclen = skb_mac_header_len(skb); + + pos = &q->q.desc[q->q.pidx]; + end = (u64 *)pos + flits; + wr = pos; + + /* Firmware work request header */ + wr->op_immdlen = htonl(FW_WR_OP_V(FW_ETH_TX_PKT_WR) | + FW_WR_IMMDLEN_V(ctrl)); + + wr->equiq_to_len16 = htonl(wr_mid | FW_WR_LEN16_V(len16)); + wr->r3 = 0; + + cpl = (void *)(wr + 1); + + /* CPL header */ + cpl->ctrl0 = htonl(TXPKT_OPCODE_V(CPL_TX_PKT) | + TXPKT_INTF_V(tx_info->tx_chan) | + TXPKT_PF_V(tx_info->adap->pf)); + cpl->pack = 0; + cntrl1 = TXPKT_CSUM_TYPE_V(tx_info->ip_family == AF_INET ? + TX_CSUM_TCPIP : TX_CSUM_TCPIP6); + cntrl1 |= T6_TXPKT_ETHHDR_LEN_V(maclen - ETH_HLEN) | + TXPKT_IPHDR_LEN_V(iplen); + /* checksum offload */ + cpl->ctrl1 = cpu_to_be64(cntrl1); + cpl->len = htons(skb->len); + + pos = cpl + 1; + + cxgb4_write_sgl(skb, &q->q, pos, end, 0, sgl_sdesc->addr); + sgl_sdesc->skb = skb; + chcr_txq_advance(&q->q, ndesc); + cxgb4_ring_tx_db(tx_info->adap, &q->q, ndesc); + return 0; +} + /* * chcr_ktls_copy_record_in_skb * @nskb - new skb where the frags to be added. @@ -1733,7 +1815,7 @@ static int chcr_short_record_handler(struct chcr_ktls_info *tx_info, (TLS_CIPHER_AES_GCM_128_TAG_SIZE - remaining_record); if (!trimmed_len) - goto out; + return FALLBACK; WARN_ON(trimmed_len > data_len); @@ -1837,6 +1919,34 @@ out: return NETDEV_TX_BUSY; } +static int chcr_ktls_sw_fallback(struct sk_buff *skb, + struct chcr_ktls_info *tx_info, + struct sge_eth_txq *q) +{ + u32 data_len, skb_offset; + struct sk_buff *nskb; + struct tcphdr *th; + + nskb = tls_encrypt_skb(skb); + + if (!nskb) + return 0; + + th = tcp_hdr(nskb); + skb_offset = skb_transport_offset(nskb) + tcp_hdrlen(nskb); + data_len = nskb->len - skb_offset; + skb_tx_timestamp(nskb); + + if (chcr_ktls_tunnel_pkt(tx_info, nskb, q)) + goto out; + + tx_info->prev_seq = ntohl(th->seq) + data_len; + atomic64_inc(&tx_info->adap->ch_ktls_stats.ktls_tx_fallback); + return 0; +out: + dev_kfree_skb_any(nskb); + return 0; +} /* nic tls TX handler */ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev) { @@ -2012,6 +2122,10 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev) if (ret) { if (th->fin) dev_kfree_skb_any(skb); + + if (ret == FALLBACK) + return chcr_ktls_sw_fallback(skb, tx_info, q); + return NETDEV_TX_OK; } diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.h b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.h index c1651b1431a0..18b3b1f02415 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.h +++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.h @@ -26,6 +26,7 @@ #define CHCR_KTLS_WR_SIZE (CHCR_PLAIN_TX_DATA_LEN +\ sizeof(struct cpl_tx_sec_pdu)) +#define FALLBACK 35 enum ch_ktls_open_state { CH_KTLS_OPEN_SUCCESS = 0, From 7d01c428c86b525dc780226924d74df2048cf411 Mon Sep 17 00:00:00 2001 From: Rohit Maheshwari Date: Mon, 9 Nov 2020 16:21:41 +0530 Subject: [PATCH 375/710] ch_ktls: tcb update fails sometimes context id and port id should be filled while sending tcb update. Fixes: 5a4b9fe7fece ("cxgb4/chcr: complete record tx handling") Signed-off-by: Rohit Maheshwari Signed-off-by: Jakub Kicinski --- .../chelsio/inline_crypto/ch_ktls/chcr_ktls.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c index b182c940b4a0..a732051b21e4 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c @@ -733,7 +733,8 @@ static int chcr_ktls_cpl_set_tcb_rpl(struct adapter *adap, unsigned char *input) } static void *__chcr_write_cpl_set_tcb_ulp(struct chcr_ktls_info *tx_info, - u32 tid, void *pos, u16 word, u64 mask, + u32 tid, void *pos, u16 word, + struct sge_eth_txq *q, u64 mask, u64 val, u32 reply) { struct cpl_set_tcb_field_core *cpl; @@ -742,7 +743,10 @@ static void *__chcr_write_cpl_set_tcb_ulp(struct chcr_ktls_info *tx_info, /* ULP_TXPKT */ txpkt = pos; - txpkt->cmd_dest = htonl(ULPTX_CMD_V(ULP_TX_PKT) | ULP_TXPKT_DEST_V(0)); + txpkt->cmd_dest = htonl(ULPTX_CMD_V(ULP_TX_PKT) | + ULP_TXPKT_CHANNELID_V(tx_info->port_id) | + ULP_TXPKT_FID_V(q->q.cntxt_id) | + ULP_TXPKT_RO_F); txpkt->len = htonl(DIV_ROUND_UP(CHCR_SET_TCB_FIELD_LEN, 16)); /* ULPTX_IDATA sub-command */ @@ -797,7 +801,7 @@ static void *chcr_write_cpl_set_tcb_ulp(struct chcr_ktls_info *tx_info, } else { u8 buf[48] = {0}; - __chcr_write_cpl_set_tcb_ulp(tx_info, tid, buf, word, + __chcr_write_cpl_set_tcb_ulp(tx_info, tid, buf, word, q, mask, val, reply); return chcr_copy_to_txd(buf, &q->q, pos, @@ -805,7 +809,7 @@ static void *chcr_write_cpl_set_tcb_ulp(struct chcr_ktls_info *tx_info, } } - pos = __chcr_write_cpl_set_tcb_ulp(tx_info, tid, pos, word, + pos = __chcr_write_cpl_set_tcb_ulp(tx_info, tid, pos, word, q, mask, val, reply); /* check again if we are at the end of the queue */ From 83a95df04bee77c74df5151c961b19d870a70180 Mon Sep 17 00:00:00 2001 From: Rohit Maheshwari Date: Mon, 9 Nov 2020 16:21:42 +0530 Subject: [PATCH 376/710] ch_ktls: stop the txq if reaches threshold Stop the queue and ask for the credits if queue reaches to threashold. Fixes: 5a4b9fe7fece ("cxgb4/chcr: complete record tx handling") Signed-off-by: Rohit Maheshwari Signed-off-by: Jakub Kicinski --- .../chelsio/inline_crypto/ch_ktls/chcr_ktls.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c index a732051b21e4..c24485c0d512 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c @@ -835,7 +835,7 @@ static int chcr_ktls_xmit_tcb_cpls(struct chcr_ktls_info *tx_info, { bool first_wr = ((tx_info->prev_ack == 0) && (tx_info->prev_win == 0)); struct ch_ktls_port_stats_debug *port_stats; - u32 len, cpl = 0, ndesc, wr_len; + u32 len, cpl = 0, ndesc, wr_len, wr_mid = 0; struct fw_ulptx_wr *wr; int credits; void *pos; @@ -851,6 +851,11 @@ static int chcr_ktls_xmit_tcb_cpls(struct chcr_ktls_info *tx_info, return NETDEV_TX_BUSY; } + if (unlikely(credits < ETHTXQ_STOP_THRES)) { + chcr_eth_txq_stop(q); + wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F; + } + pos = &q->q.desc[q->q.pidx]; /* make space for WR, we'll fill it later when we know all the cpls * being sent out and have complete length. @@ -905,7 +910,8 @@ static int chcr_ktls_xmit_tcb_cpls(struct chcr_ktls_info *tx_info, wr->op_to_compl = htonl(FW_WR_OP_V(FW_ULPTX_WR)); wr->cookie = 0; /* fill len in wr field */ - wr->flowid_len16 = htonl(FW_WR_LEN16_V(DIV_ROUND_UP(len, 16))); + wr->flowid_len16 = htonl(wr_mid | + FW_WR_LEN16_V(DIV_ROUND_UP(len, 16))); ndesc = DIV_ROUND_UP(len, 64); chcr_txq_advance(&q->q, ndesc); @@ -986,6 +992,7 @@ chcr_ktls_write_tcp_options(struct chcr_ktls_info *tx_info, struct sk_buff *skb, struct tcphdr *tcp; int len16, pktlen; struct iphdr *ip; + u32 wr_mid = 0; int credits; u8 buf[150]; u64 cntrl1; @@ -1010,6 +1017,11 @@ chcr_ktls_write_tcp_options(struct chcr_ktls_info *tx_info, struct sk_buff *skb, return NETDEV_TX_BUSY; } + if (unlikely(credits < ETHTXQ_STOP_THRES)) { + chcr_eth_txq_stop(q); + wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F; + } + pos = &q->q.desc[q->q.pidx]; wr = pos; @@ -1017,7 +1029,7 @@ chcr_ktls_write_tcp_options(struct chcr_ktls_info *tx_info, struct sk_buff *skb, wr->op_immdlen = htonl(FW_WR_OP_V(FW_ETH_TX_PKT_WR) | FW_WR_IMMDLEN_V(ctrl)); - wr->equiq_to_len16 = htonl(FW_WR_LEN16_V(len16)); + wr->equiq_to_len16 = htonl(wr_mid | FW_WR_LEN16_V(len16)); wr->r3 = 0; cpl = (void *)(wr + 1); From 460cd17e9f7d60eaa22028baa6a056c478fa7dc6 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 9 Nov 2020 19:51:20 -0800 Subject: [PATCH 377/710] net: switch to the kernel.org patchwork instance Move to the kernel.org patchwork instance, it has significantly lower latency for accessing from Europe and the US. Other quirks include the reply bot. Link: https://lore.kernel.org/r/20201110035120.642746-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- Documentation/networking/netdev-FAQ.rst | 4 ++-- Documentation/process/stable-kernel-rules.rst | 2 +- .../it_IT/process/stable-kernel-rules.rst | 2 +- MAINTAINERS | 20 +++++++++---------- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/Documentation/networking/netdev-FAQ.rst b/Documentation/networking/netdev-FAQ.rst index d5c9320901c3..21537766be4d 100644 --- a/Documentation/networking/netdev-FAQ.rst +++ b/Documentation/networking/netdev-FAQ.rst @@ -110,7 +110,7 @@ Q: I sent a patch and I'm wondering what happened to it? Q: How can I tell whether it got merged? A: Start by looking at the main patchworks queue for netdev: - http://patchwork.ozlabs.org/project/netdev/list/ + https://patchwork.kernel.org/project/netdevbpf/list/ The "State" field will tell you exactly where things are at with your patch. @@ -152,7 +152,7 @@ networking subsystem, and then hands them off to Greg. There is a patchworks queue that you can see here: - http://patchwork.ozlabs.org/bundle/davem/stable/?state=* + https://patchwork.kernel.org/bundle/netdev/stable/?state=* It contains the patches which Dave has selected, but not yet handed off to Greg. If Greg already has the patch, then it will be here: diff --git a/Documentation/process/stable-kernel-rules.rst b/Documentation/process/stable-kernel-rules.rst index 06f743b612c4..3973556250e1 100644 --- a/Documentation/process/stable-kernel-rules.rst +++ b/Documentation/process/stable-kernel-rules.rst @@ -39,7 +39,7 @@ Procedure for submitting patches to the -stable tree submission guidelines as described in :ref:`Documentation/networking/netdev-FAQ.rst ` after first checking the stable networking queue at - https://patchwork.ozlabs.org/bundle/davem/stable/?series=&submitter=&state=*&q=&archive= + https://patchwork.kernel.org/bundle/netdev/stable/?state=* to ensure the requested patch is not already queued up. - Security patches should not be handled (solely) by the -stable review process but should follow the procedures in diff --git a/Documentation/translations/it_IT/process/stable-kernel-rules.rst b/Documentation/translations/it_IT/process/stable-kernel-rules.rst index 4f206cee31a7..283d62541c4f 100644 --- a/Documentation/translations/it_IT/process/stable-kernel-rules.rst +++ b/Documentation/translations/it_IT/process/stable-kernel-rules.rst @@ -46,7 +46,7 @@ Procedura per sottomettere patch per i sorgenti -stable :ref:`Documentation/translations/it_IT/networking/netdev-FAQ.rst `; ma solo dopo aver verificato al seguente indirizzo che la patch non sia già in coda: - https://patchwork.ozlabs.org/bundle/davem/stable/?series=&submitter=&state=*&q=&archive= + https://patchwork.kernel.org/bundle/netdev/stable/?state=* - Una patch di sicurezza non dovrebbero essere gestite (solamente) dal processo di revisione -stable, ma dovrebbe seguire le procedure descritte in :ref:`Documentation/translations/it_IT/admin-guide/security-bugs.rst `. diff --git a/MAINTAINERS b/MAINTAINERS index fa11fe773df5..eaded39a3858 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1279,7 +1279,7 @@ M: Igor Russkikh L: netdev@vger.kernel.org S: Supported W: https://www.marvell.com/ -Q: http://patchwork.ozlabs.org/project/netdev/list/ +Q: https://patchwork.kernel.org/project/netdevbpf/list/ F: Documentation/networking/device_drivers/ethernet/aquantia/atlantic.rst F: drivers/net/ethernet/aquantia/atlantic/ @@ -11173,7 +11173,7 @@ M: Tariq Toukan L: netdev@vger.kernel.org S: Supported W: http://www.mellanox.com -Q: http://patchwork.ozlabs.org/project/netdev/list/ +Q: https://patchwork.kernel.org/project/netdevbpf/list/ F: drivers/net/ethernet/mellanox/mlx4/en_* MELLANOX ETHERNET DRIVER (mlx5e) @@ -11181,7 +11181,7 @@ M: Saeed Mahameed L: netdev@vger.kernel.org S: Supported W: http://www.mellanox.com -Q: http://patchwork.ozlabs.org/project/netdev/list/ +Q: https://patchwork.kernel.org/project/netdevbpf/list/ F: drivers/net/ethernet/mellanox/mlx5/core/en_* MELLANOX ETHERNET INNOVA DRIVERS @@ -11189,7 +11189,7 @@ R: Boris Pismenny L: netdev@vger.kernel.org S: Supported W: http://www.mellanox.com -Q: http://patchwork.ozlabs.org/project/netdev/list/ +Q: https://patchwork.kernel.org/project/netdevbpf/list/ F: drivers/net/ethernet/mellanox/mlx5/core/accel/* F: drivers/net/ethernet/mellanox/mlx5/core/en_accel/* F: drivers/net/ethernet/mellanox/mlx5/core/fpga/* @@ -11201,7 +11201,7 @@ M: Ido Schimmel L: netdev@vger.kernel.org S: Supported W: http://www.mellanox.com -Q: http://patchwork.ozlabs.org/project/netdev/list/ +Q: https://patchwork.kernel.org/project/netdevbpf/list/ F: drivers/net/ethernet/mellanox/mlxsw/ F: tools/testing/selftests/drivers/net/mlxsw/ @@ -11210,7 +11210,7 @@ M: mlxsw@nvidia.com L: netdev@vger.kernel.org S: Supported W: http://www.mellanox.com -Q: http://patchwork.ozlabs.org/project/netdev/list/ +Q: https://patchwork.kernel.org/project/netdevbpf/list/ F: drivers/net/ethernet/mellanox/mlxfw/ MELLANOX HARDWARE PLATFORM SUPPORT @@ -11229,7 +11229,7 @@ L: netdev@vger.kernel.org L: linux-rdma@vger.kernel.org S: Supported W: http://www.mellanox.com -Q: http://patchwork.ozlabs.org/project/netdev/list/ +Q: https://patchwork.kernel.org/project/netdevbpf/list/ F: drivers/net/ethernet/mellanox/mlx4/ F: include/linux/mlx4/ @@ -11250,7 +11250,7 @@ L: netdev@vger.kernel.org L: linux-rdma@vger.kernel.org S: Supported W: http://www.mellanox.com -Q: http://patchwork.ozlabs.org/project/netdev/list/ +Q: https://patchwork.kernel.org/project/netdevbpf/list/ F: Documentation/networking/device_drivers/ethernet/mellanox/ F: drivers/net/ethernet/mellanox/mlx5/core/ F: include/linux/mlx5/ @@ -12130,7 +12130,7 @@ M: Jakub Kicinski L: netdev@vger.kernel.org S: Maintained W: http://www.linuxfoundation.org/en/Net -Q: http://patchwork.ozlabs.org/project/netdev/list/ +Q: https://patchwork.kernel.org/project/netdevbpf/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git T: git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git F: Documentation/devicetree/bindings/net/ @@ -12175,7 +12175,7 @@ M: Jakub Kicinski L: netdev@vger.kernel.org S: Maintained W: http://www.linuxfoundation.org/en/Net -Q: http://patchwork.ozlabs.org/project/netdev/list/ +Q: https://patchwork.kernel.org/project/netdevbpf/list/ B: mailto:netdev@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git T: git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git From 676650d007e06fddcf3fe38238251d71bd179641 Mon Sep 17 00:00:00 2001 From: Necip Fazil Yildiran Date: Wed, 11 Nov 2020 17:48:52 -0800 Subject: [PATCH 378/710] Input: resistive-adc-touch - fix kconfig dependency on IIO_BUFFER When TOUCHSCREEN_ADC is enabled and IIO_BUFFER is disabled, it results in the following Kbuild warning: WARNING: unmet direct dependencies detected for IIO_BUFFER_CB Depends on [n]: IIO [=y] && IIO_BUFFER [=n] Selected by [y]: - TOUCHSCREEN_ADC [=y] && !UML && INPUT [=y] && INPUT_TOUCHSCREEN [=y] && IIO [=y] The reason is that TOUCHSCREEN_ADC selects IIO_BUFFER_CB without depending on or selecting IIO_BUFFER while IIO_BUFFER_CB depends on IIO_BUFFER. This can also fail building the kernel. Honor the kconfig dependency to remove unmet direct dependency warnings and avoid any potential build failures. Fixes: aa132ffb6b0a ("input: touchscreen: resistive-adc-touch: add generic resistive ADC touchscreen") Signed-off-by: Necip Fazil Yildiran Acked-by: Jonathan Cameron Link: https://lore.kernel.org/r/20201102221504.541279-1-fazilyildiran@gmail.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig index f012fe746df0..cc18f54ea887 100644 --- a/drivers/input/touchscreen/Kconfig +++ b/drivers/input/touchscreen/Kconfig @@ -96,6 +96,7 @@ config TOUCHSCREEN_AD7879_SPI config TOUCHSCREEN_ADC tristate "Generic ADC based resistive touchscreen" depends on IIO + select IIO_BUFFER select IIO_BUFFER_CB help Say Y here if you want to use the generic ADC From 52755b66ddcef2e897778fac5656df18817b59ab Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Tue, 10 Nov 2020 22:46:14 +0800 Subject: [PATCH 379/710] cosa: Add missing kfree in error path of cosa_write If memory allocation for 'kbuf' succeed, cosa_write() doesn't have a corresponding kfree() in exception handling. Thus add kfree() for this function implementation. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Hulk Robot Signed-off-by: Wang Hai Acked-by: Jan "Yenya" Kasprzak Link: https://lore.kernel.org/r/20201110144614.43194-1-wanghai38@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/wan/cosa.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c index f8aed0696d77..2369ca250cd6 100644 --- a/drivers/net/wan/cosa.c +++ b/drivers/net/wan/cosa.c @@ -889,6 +889,7 @@ static ssize_t cosa_write(struct file *file, chan->tx_status = 1; spin_unlock_irqrestore(&cosa->lock, flags); up(&chan->wsem); + kfree(kbuf); return -ERESTARTSYS; } } From ae3d6083acf60116d4f409677452399547ed2009 Mon Sep 17 00:00:00 2001 From: "jingle.wu" Date: Wed, 11 Nov 2020 20:06:24 -0800 Subject: [PATCH 380/710] Input: elan_i2c - fix firmware update on newer ICs The argument to iap page type command depends on the firmware page size. Fixes: bfd9b92bc8f9 ("Input: elan_i2c - handle firmware updated on newer ICs") Signed-off-by: Jingle Wu Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/elan_i2c.h | 2 +- drivers/input/mouse/elan_i2c_core.c | 3 ++- drivers/input/mouse/elan_i2c_i2c.c | 10 +++++----- drivers/input/mouse/elan_i2c_smbus.c | 2 +- 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/input/mouse/elan_i2c.h b/drivers/input/mouse/elan_i2c.h index c75b00c45d75..36e3cd908671 100644 --- a/drivers/input/mouse/elan_i2c.h +++ b/drivers/input/mouse/elan_i2c.h @@ -78,7 +78,7 @@ struct elan_transport_ops { int (*iap_reset)(struct i2c_client *client); int (*prepare_fw_update)(struct i2c_client *client, u16 ic_type, - u8 iap_version); + u8 iap_version, u16 fw_page_size); int (*write_fw_block)(struct i2c_client *client, u16 fw_page_size, const u8 *page, u16 checksum, int idx); int (*finish_fw_update)(struct i2c_client *client, diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index c599e21a8478..61ed3f5ca219 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -497,7 +497,8 @@ static int __elan_update_firmware(struct elan_tp_data *data, u16 sw_checksum = 0, fw_checksum = 0; error = data->ops->prepare_fw_update(client, data->ic_type, - data->iap_version); + data->iap_version, + data->fw_page_size); if (error) return error; diff --git a/drivers/input/mouse/elan_i2c_i2c.c b/drivers/input/mouse/elan_i2c_i2c.c index 5a496d4ffa49..13dc097eb6c6 100644 --- a/drivers/input/mouse/elan_i2c_i2c.c +++ b/drivers/input/mouse/elan_i2c_i2c.c @@ -517,7 +517,7 @@ static int elan_i2c_set_flash_key(struct i2c_client *client) return 0; } -static int elan_read_write_iap_type(struct i2c_client *client) +static int elan_read_write_iap_type(struct i2c_client *client, u16 fw_page_size) { int error; u16 constant; @@ -526,7 +526,7 @@ static int elan_read_write_iap_type(struct i2c_client *client) do { error = elan_i2c_write_cmd(client, ETP_I2C_IAP_TYPE_CMD, - ETP_I2C_IAP_TYPE_REG); + fw_page_size / 2); if (error) { dev_err(&client->dev, "cannot write iap type: %d\n", error); @@ -543,7 +543,7 @@ static int elan_read_write_iap_type(struct i2c_client *client) constant = le16_to_cpup((__le16 *)val); dev_dbg(&client->dev, "iap type reg: 0x%04x\n", constant); - if (constant == ETP_I2C_IAP_TYPE_REG) + if (constant == fw_page_size / 2) return 0; } while (--retry > 0); @@ -553,7 +553,7 @@ static int elan_read_write_iap_type(struct i2c_client *client) } static int elan_i2c_prepare_fw_update(struct i2c_client *client, u16 ic_type, - u8 iap_version) + u8 iap_version, u16 fw_page_size) { struct device *dev = &client->dev; int error; @@ -594,7 +594,7 @@ static int elan_i2c_prepare_fw_update(struct i2c_client *client, u16 ic_type, } if (ic_type >= 0x0D && iap_version >= 1) { - error = elan_read_write_iap_type(client); + error = elan_read_write_iap_type(client, fw_page_size); if (error) return error; } diff --git a/drivers/input/mouse/elan_i2c_smbus.c b/drivers/input/mouse/elan_i2c_smbus.c index 8ff823751f3b..1820f1cfc1dc 100644 --- a/drivers/input/mouse/elan_i2c_smbus.c +++ b/drivers/input/mouse/elan_i2c_smbus.c @@ -340,7 +340,7 @@ static int elan_smbus_set_flash_key(struct i2c_client *client) } static int elan_smbus_prepare_fw_update(struct i2c_client *client, u16 ic_type, - u8 iap_version) + u8 iap_version, u16 fw_page_size) { struct device *dev = &client->dev; int len; From d19d8d345eecd9247cbe6cbf27aef271bd88aba7 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 10 Nov 2020 17:52:24 -0800 Subject: [PATCH 381/710] fscrypt: fix inline encryption not used on new files The new helper function fscrypt_prepare_new_inode() runs before S_ENCRYPTED has been set on the new inode. This accidentally made fscrypt_select_encryption_impl() never enable inline encryption on newly created files, due to its use of fscrypt_needs_contents_encryption() which only returns true when S_ENCRYPTED is set. Fix this by using S_ISREG() directly instead of fscrypt_needs_contents_encryption(), analogous to what select_encryption_mode() does. I didn't notice this earlier because by design, the user-visible behavior is the same (other than performance, potentially) regardless of whether inline encryption is used or not. Fixes: a992b20cd4ee ("fscrypt: add fscrypt_prepare_new_inode() and fscrypt_set_context()") Reviewed-by: Satya Tangirala Link: https://lore.kernel.org/r/20201111015224.303073-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- fs/crypto/inline_crypt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/crypto/inline_crypt.c b/fs/crypto/inline_crypt.c index 89bffa82ed74..c57bebfa48fe 100644 --- a/fs/crypto/inline_crypt.c +++ b/fs/crypto/inline_crypt.c @@ -74,7 +74,7 @@ int fscrypt_select_encryption_impl(struct fscrypt_info *ci) int i; /* The file must need contents encryption, not filenames encryption */ - if (!fscrypt_needs_contents_encryption(inode)) + if (!S_ISREG(inode->i_mode)) return 0; /* The crypto mode must have a blk-crypto counterpart */ From edb8d77a939c422f3ae57f557cd1d6899d9bafad Mon Sep 17 00:00:00 2001 From: Tina Zhang Date: Mon, 21 Sep 2020 02:58:07 -0400 Subject: [PATCH 382/710] drm/i915/gvt: Set ENHANCED_FRAME_CAP bit Specification says the bit7 of the DPCD MAX_LANE_COUNT (offset 0x02) must be set to 1 when comes to the displayport version 1.2. This patch respects the definition. W/o this patch, guest i915 driver can only set the resolution to 1024*768, and complains about the unsuccessful link training: [ 5.692193] i915 0000:00:02.0: [drm] *ERROR* index 0, lane_count 1 Link Training Unsuccessful Fixes: e2e02cbb5beb ("drm/i915/gvt: make dpcd_fix_data supports DP1.2") Reviewed-by: Zhenyu Wang Signed-off-by: Tina Zhang Signed-off-by: Zhenyu Wang Link: http://patchwork.freedesktop.org/patch/msgid/20200921065807.247847-1-tina.zhang@intel.com --- drivers/gpu/drm/i915/gvt/display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c index 7ba16ddfe75f..d7898e87791f 100644 --- a/drivers/gpu/drm/i915/gvt/display.c +++ b/drivers/gpu/drm/i915/gvt/display.c @@ -164,7 +164,7 @@ static unsigned char virtual_dp_monitor_edid[GVT_EDID_NUM][EDID_SIZE] = { /* let the virtual display supports DP1.2 */ static u8 dpcd_fix_data[DPCD_HEADER_SIZE] = { - 0x12, 0x014, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 + 0x12, 0x014, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; static void emulate_monitor_status_change(struct intel_vgpu *vgpu) From 94e2bd0b259ed39a755fdded47e6734acf1ce464 Mon Sep 17 00:00:00 2001 From: Claire Chang Date: Tue, 10 Nov 2020 16:49:08 +0800 Subject: [PATCH 383/710] rfkill: Fix use-after-free in rfkill_resume() If a device is getting removed or reprobed during resume, use-after-free might happen. For example, h5_btrtl_resume() schedules a work queue for device reprobing, which of course requires removal first. If the removal happens in parallel with the device_resume() and wins the race to acquire device_lock(), removal may remove the device from the PM lists and all, but device_resume() is already running and will continue when the lock can be acquired, thus calling rfkill_resume(). During this, if rfkill_set_block() is then called after the corresponding *_unregister() and kfree() are called, there will be an use-after-free in hci_rfkill_set_block(): BUG: KASAN: use-after-free in hci_rfkill_set_block+0x58/0xc0 [bluetooth] ... Call trace: dump_backtrace+0x0/0x154 show_stack+0x20/0x2c dump_stack+0xbc/0x12c print_address_description+0x88/0x4b0 __kasan_report+0x144/0x168 kasan_report+0x10/0x18 check_memory_region+0x19c/0x1ac __kasan_check_write+0x18/0x24 hci_rfkill_set_block+0x58/0xc0 [bluetooth] rfkill_set_block+0x9c/0x120 rfkill_resume+0x34/0x70 dpm_run_callback+0xf0/0x1f4 device_resume+0x210/0x22c Fix this by checking rfkill->registered in rfkill_resume(). device_del() in rfkill_unregister() requires device_lock() and the whole rfkill_resume() is also protected by the same lock via device_resume(), we can make sure either the rfkill->registered is false before rfkill_resume() starts or the rfkill device won't be unregistered before rfkill_resume() returns. As async_resume() holds a reference to the device, at this level there can be no use-after-free; only in the user that doesn't expect this scenario. Fixes: 8589086f4efd ("Bluetooth: hci_h5: Turn off RTL8723BS on suspend, reprobe on resume") Signed-off-by: Claire Chang Link: https://lore.kernel.org/r/20201110084908.219088-1-tientzu@chromium.org [edit commit message for clarity and add more info provided later] Signed-off-by: Johannes Berg --- net/rfkill/core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/rfkill/core.c b/net/rfkill/core.c index 971c73c7d34c..97101c55763d 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -876,6 +876,9 @@ static int rfkill_resume(struct device *dev) rfkill->suspended = false; + if (!rfkill->registered) + return 0; + if (!rfkill->persistent) { cur = !!(rfkill->state & RFKILL_BLOCK_SW); rfkill_set_block(rfkill, cur); From 33f16855dcb973f745c51882d0e286601ff3be2b Mon Sep 17 00:00:00 2001 From: Sam Nobs Date: Tue, 10 Nov 2020 09:50:06 +1300 Subject: [PATCH 384/710] tty: serial: imx: fix potential deadlock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enabling the lock dependency validator has revealed that the way spinlocks are used in the IMX serial port could result in a deadlock. Specifically, imx_uart_int() acquires a spinlock without disabling the interrupts, meaning that another interrupt could come along and try to acquire the same spinlock, potentially causing the two to wait for each other indefinitely. Use spin_lock_irqsave() instead to disable interrupts upon acquisition of the spinlock. Fixes: c974991d2620 ("tty:serial:imx: use spin_lock instead of spin_lock_irqsave in isr") Reviewed-by: Uwe Kleine-König Signed-off-by: Sam Nobs Link: https://lore.kernel.org/r/1604955006-9363-1-git-send-email-samuel.nobs@taitradio.com Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/imx.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c index 1731d9728865..3c53a3c89959 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c @@ -942,8 +942,14 @@ static irqreturn_t imx_uart_int(int irq, void *dev_id) struct imx_port *sport = dev_id; unsigned int usr1, usr2, ucr1, ucr2, ucr3, ucr4; irqreturn_t ret = IRQ_NONE; + unsigned long flags = 0; - spin_lock(&sport->port.lock); + /* + * IRQs might not be disabled upon entering this interrupt handler, + * e.g. when interrupt handlers are forced to be threaded. To support + * this scenario as well, disable IRQs when acquiring the spinlock. + */ + spin_lock_irqsave(&sport->port.lock, flags); usr1 = imx_uart_readl(sport, USR1); usr2 = imx_uart_readl(sport, USR2); @@ -1013,7 +1019,7 @@ static irqreturn_t imx_uart_int(int irq, void *dev_id) ret = IRQ_HANDLED; } - spin_unlock(&sport->port.lock); + spin_unlock_irqrestore(&sport->port.lock, flags); return ret; } From d4122754442799187d5d537a9c039a49a67e57f1 Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Tue, 10 Nov 2020 19:35:41 +0100 Subject: [PATCH 385/710] speakup: Do not let the line discipline be used several times MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Speakup has only one speakup_tty variable to store the tty it is managing. This makes sense since its codebase currently assumes that there is only one user who controls the screen reading. That however means that we have to forbid using the line discipline several times, otherwise the second closure would try to free a NULL ldisc_data, leading to general protection fault: 0000 [#1] SMP KASAN PTI RIP: 0010:spk_ttyio_ldisc_close+0x2c/0x60 Call Trace: tty_ldisc_release+0xa2/0x340 tty_release_struct+0x17/0xd0 tty_release+0x9d9/0xcc0 __fput+0x231/0x740 task_work_run+0x12c/0x1a0 do_exit+0x9b5/0x2230 ? release_task+0x1240/0x1240 ? __do_page_fault+0x562/0xa30 do_group_exit+0xd5/0x2a0 __x64_sys_exit_group+0x35/0x40 do_syscall_64+0x89/0x2b0 ? page_fault+0x8/0x30 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Cc: stable@vger.kernel.org Reported-by: 秦世松 Signed-off-by: Samuel Thibault Tested-by: Shisong Qin Link: https://lore.kernel.org/r/20201110183541.fzgnlwhjpgqzjeth@function Signed-off-by: Greg Kroah-Hartman --- drivers/accessibility/speakup/spk_ttyio.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/accessibility/speakup/spk_ttyio.c b/drivers/accessibility/speakup/spk_ttyio.c index a831ff64f8ba..e69ea0c7aa20 100644 --- a/drivers/accessibility/speakup/spk_ttyio.c +++ b/drivers/accessibility/speakup/spk_ttyio.c @@ -49,15 +49,25 @@ static int spk_ttyio_ldisc_open(struct tty_struct *tty) if (!tty->ops->write) return -EOPNOTSUPP; + + mutex_lock(&speakup_tty_mutex); + if (speakup_tty) { + mutex_unlock(&speakup_tty_mutex); + return -EBUSY; + } speakup_tty = tty; ldisc_data = kmalloc(sizeof(*ldisc_data), GFP_KERNEL); - if (!ldisc_data) + if (!ldisc_data) { + speakup_tty = NULL; + mutex_unlock(&speakup_tty_mutex); return -ENOMEM; + } init_completion(&ldisc_data->completion); ldisc_data->buf_free = true; speakup_tty->disc_data = ldisc_data; + mutex_unlock(&speakup_tty_mutex); return 0; } From e67c139c488e84e7eae6c333231e791f0e89b3fb Mon Sep 17 00:00:00 2001 From: Fugang Duan Date: Wed, 11 Nov 2020 10:51:36 +0800 Subject: [PATCH 386/710] tty: serial: imx: keep console clocks always on MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For below code, there has chance to cause deadlock in SMP system: Thread 1: clk_enable_lock(); pr_info("debug message"); clk_enable_unlock(); Thread 2: imx_uart_console_write() clk_enable() clk_enable_lock(); Thread 1: Acuired clk enable_lock -> printk -> console_trylock_spinning Thread 2: console_unlock() -> imx_uart_console_write -> clk_disable -> Acquite clk enable_lock So the patch is to keep console port clocks always on like other console drivers. Fixes: 1cf93e0d5488 ("serial: imx: remove the uart_console() check") Acked-by: Uwe Kleine-König Signed-off-by: Fugang Duan Link: https://lore.kernel.org/r/20201111025136.29818-1-fugang.duan@nxp.com Cc: stable [fix up build warning - gregkh] Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/imx.c | 20 +++----------------- 1 file changed, 3 insertions(+), 17 deletions(-) diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c index 3c53a3c89959..cacf7266a262 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c @@ -2008,16 +2008,6 @@ imx_uart_console_write(struct console *co, const char *s, unsigned int count) unsigned int ucr1; unsigned long flags = 0; int locked = 1; - int retval; - - retval = clk_enable(sport->clk_per); - if (retval) - return; - retval = clk_enable(sport->clk_ipg); - if (retval) { - clk_disable(sport->clk_per); - return; - } if (sport->port.sysrq) locked = 0; @@ -2053,9 +2043,6 @@ imx_uart_console_write(struct console *co, const char *s, unsigned int count) if (locked) spin_unlock_irqrestore(&sport->port.lock, flags); - - clk_disable(sport->clk_ipg); - clk_disable(sport->clk_per); } /* @@ -2156,15 +2143,14 @@ imx_uart_console_setup(struct console *co, char *options) retval = uart_set_options(&sport->port, co, baud, parity, bits, flow); - clk_disable(sport->clk_ipg); if (retval) { - clk_unprepare(sport->clk_ipg); + clk_disable_unprepare(sport->clk_ipg); goto error_console; } - retval = clk_prepare(sport->clk_per); + retval = clk_prepare_enable(sport->clk_per); if (retval) - clk_unprepare(sport->clk_ipg); + clk_disable_unprepare(sport->clk_ipg); error_console: return retval; From 425af483523b76bc78e14674a430579d38b2a593 Mon Sep 17 00:00:00 2001 From: Zheng Zengkai Date: Wed, 11 Nov 2020 20:44:26 +0800 Subject: [PATCH 387/710] serial: ar933x_uart: disable clk on error handling path in probe ar933x_uart_probe() does not invoke clk_disable_unprepare() on one error handling path. This patch fixes that. Fixes: 9be1064fe524 ("serial: ar933x_uart: add RS485 support") Reported-by: Hulk Robot Signed-off-by: Zheng Zengkai Link: https://lore.kernel.org/r/20201111124426.42638-1-zhengzengkai@huawei.com Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/ar933x_uart.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/ar933x_uart.c b/drivers/tty/serial/ar933x_uart.c index 0c80a79d7442..c2be7cf91399 100644 --- a/drivers/tty/serial/ar933x_uart.c +++ b/drivers/tty/serial/ar933x_uart.c @@ -789,8 +789,10 @@ static int ar933x_uart_probe(struct platform_device *pdev) goto err_disable_clk; up->gpios = mctrl_gpio_init(port, 0); - if (IS_ERR(up->gpios) && PTR_ERR(up->gpios) != -ENOSYS) - return PTR_ERR(up->gpios); + if (IS_ERR(up->gpios) && PTR_ERR(up->gpios) != -ENOSYS) { + ret = PTR_ERR(up->gpios); + goto err_disable_clk; + } up->rts_gpiod = mctrl_gpio_to_gpiod(up->gpios, UART_GPIO_RTS); From 1d18288555b3265f84d08f1f75582415e4ec343a Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 11 Nov 2020 19:33:57 +0100 Subject: [PATCH 388/710] mac80211: fix memory leak on filtered powersave frames After the status rework, ieee80211_tx_status_ext is leaking un-acknowledged packets for stations in powersave mode. To fix this, move the code handling those packets from __ieee80211_tx_status into ieee80211_tx_status_ext Reported-by: Tobias Waldvogel Fixes: 3318111cf63d ("mac80211: reduce duplication in tx status functions") Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20201111183359.43528-1-nbd@nbd.name Signed-off-by: Johannes Berg --- net/mac80211/status.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 6feb45135020..3485610755ef 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -49,7 +49,8 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local, int ac; if (info->flags & (IEEE80211_TX_CTL_NO_PS_BUFFER | - IEEE80211_TX_CTL_AMPDU)) { + IEEE80211_TX_CTL_AMPDU | + IEEE80211_TX_CTL_HW_80211_ENCAP)) { ieee80211_free_txskb(&local->hw, skb); return; } @@ -915,15 +916,6 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw, ieee80211_mpsp_trigger_process( ieee80211_get_qos_ctl(hdr), sta, true, acked); - if (!acked && test_sta_flag(sta, WLAN_STA_PS_STA)) { - /* - * The STA is in power save mode, so assume - * that this TX packet failed because of that. - */ - ieee80211_handle_filtered_frame(local, sta, skb); - return; - } - if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL) && (ieee80211_is_data(hdr->frame_control)) && (rates_idx != -1)) @@ -1150,6 +1142,12 @@ void ieee80211_tx_status_ext(struct ieee80211_hw *hw, -info->status.ack_signal); } } else if (test_sta_flag(sta, WLAN_STA_PS_STA)) { + /* + * The STA is in power save mode, so assume + * that this TX packet failed because of that. + */ + if (skb) + ieee80211_handle_filtered_frame(local, sta, skb); return; } else if (noack_success) { /* nothing to do here, do not account as lost */ From 4fe40b8e1566dad04c87fbf299049a1d0d4bd58d Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 11 Nov 2020 19:33:58 +0100 Subject: [PATCH 389/710] mac80211: minstrel: remove deferred sampling code Deferring sampling attempts to the second stage has some bad interactions with drivers that process the rate table in hardware and use the probe flag to indicate probing packets (e.g. most mt76 drivers). On affected drivers it can lead to probing not working at all. If the link conditions turn worse, it might not be such a good idea to do a lot of sampling for lower rates in this case. Fix this by simply skipping the sample attempt instead of deferring it, but keep the checks that would allow it to be sampled if it was skipped too often, but only if it has less than 95% success probability. Also ensure that IEEE80211_TX_CTL_RATE_CTRL_PROBE is set for all probing packets. Cc: stable@vger.kernel.org Fixes: cccf129f820e ("mac80211: add the 'minstrel' rate control algorithm") Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20201111183359.43528-2-nbd@nbd.name Signed-off-by: Johannes Berg --- net/mac80211/rc80211_minstrel.c | 25 ++++--------------------- net/mac80211/rc80211_minstrel.h | 1 - 2 files changed, 4 insertions(+), 22 deletions(-) diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c index 86bc469a28bc..a8a940e97a9a 100644 --- a/net/mac80211/rc80211_minstrel.c +++ b/net/mac80211/rc80211_minstrel.c @@ -287,12 +287,6 @@ minstrel_tx_status(void *priv, struct ieee80211_supported_band *sband, mi->r[ndx].stats.success += success; } - if ((info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) && (i >= 0)) - mi->sample_packets++; - - if (mi->sample_deferred > 0) - mi->sample_deferred--; - if (time_after(jiffies, mi->last_stats_update + mp->update_interval / (mp->new_avg ? 2 : 1))) minstrel_update_stats(mp, mi); @@ -367,7 +361,7 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta, return; delta = (mi->total_packets * sampling_ratio / 100) - - (mi->sample_packets + mi->sample_deferred / 2); + mi->sample_packets; /* delta < 0: no sampling required */ prev_sample = mi->prev_sample; @@ -376,7 +370,6 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta, return; if (mi->total_packets >= 10000) { - mi->sample_deferred = 0; mi->sample_packets = 0; mi->total_packets = 0; } else if (delta > mi->n_rates * 2) { @@ -401,19 +394,8 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta, * rate sampling method should be used. * Respect such rates that are not sampled for 20 interations. */ - if (mrr_capable && - msr->perfect_tx_time > mr->perfect_tx_time && - msr->stats.sample_skipped < 20) { - /* Only use IEEE80211_TX_CTL_RATE_CTRL_PROBE to mark - * packets that have the sampling rate deferred to the - * second MRR stage. Increase the sample counter only - * if the deferred sample rate was actually used. - * Use the sample_deferred counter to make sure that - * the sampling is not done in large bursts */ - info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE; - rate++; - mi->sample_deferred++; - } else { + if (msr->perfect_tx_time < mr->perfect_tx_time || + msr->stats.sample_skipped >= 20) { if (!msr->sample_limit) return; @@ -433,6 +415,7 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta, rate->idx = mi->r[ndx].rix; rate->count = minstrel_get_retry_count(&mi->r[ndx], info); + info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE; } diff --git a/net/mac80211/rc80211_minstrel.h b/net/mac80211/rc80211_minstrel.h index dbb43bcd3c45..86cd80b3ffde 100644 --- a/net/mac80211/rc80211_minstrel.h +++ b/net/mac80211/rc80211_minstrel.h @@ -126,7 +126,6 @@ struct minstrel_sta_info { u8 max_prob_rate; unsigned int total_packets; unsigned int sample_packets; - int sample_deferred; unsigned int sample_row; unsigned int sample_column; From b2911a84396f72149dce310a3b64d8948212c1b3 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 11 Nov 2020 19:33:59 +0100 Subject: [PATCH 390/710] mac80211: minstrel: fix tx status processing corner case Some drivers fill the status rate list without setting the rate index after the final rate to -1. minstrel_ht already deals with this, but minstrel doesn't, which causes it to get stuck at the lowest rate on these drivers. Fix this by checking the count as well. Cc: stable@vger.kernel.org Fixes: cccf129f820e ("mac80211: add the 'minstrel' rate control algorithm") Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20201111183359.43528-3-nbd@nbd.name Signed-off-by: Johannes Berg --- net/mac80211/rc80211_minstrel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c index a8a940e97a9a..b13b1da19386 100644 --- a/net/mac80211/rc80211_minstrel.c +++ b/net/mac80211/rc80211_minstrel.c @@ -274,7 +274,7 @@ minstrel_tx_status(void *priv, struct ieee80211_supported_band *sband, success = !!(info->flags & IEEE80211_TX_STAT_ACK); for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) { - if (ar[i].idx < 0) + if (ar[i].idx < 0 || !ar[i].count) break; ndx = rix_to_ndx(mi, ar[i].idx); From 966e7ea434484a006700c144bca629a14f93530c Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 10 Nov 2020 14:46:55 +0100 Subject: [PATCH 391/710] s390: update defconfigs Signed-off-by: Heiko Carstens --- arch/s390/configs/debug_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index a4d3c578fbd8..fe6f529ac82c 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -1,3 +1,4 @@ +CONFIG_UAPI_HEADER_TEST=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_WATCH_QUEUE=y From 78d732e1f326f74f240d416af9484928303d9951 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Wed, 11 Nov 2020 16:26:25 +0100 Subject: [PATCH 392/710] s390/cpum_sf.c: fix file permission for cpum_sfb_size This file is installed by the s390 CPU Measurement sampling facility device driver to export supported minimum and maximum sample buffer sizes. This file is read by lscpumf tool to display the details of the device driver capabilities. The lscpumf tool might be invoked by a non-root user. In this case it does not print anything because the file contents can not be read. Fix this by allowing read access for all users. Reading the file contents is ok, changing the file contents is left to the root user only. For further reference and details see: [1] https://github.com/ibm-s390-tools/s390-tools/issues/97 Fixes: 69f239ed335a ("s390/cpum_sf: Dynamically extend the sampling buffer if overflows occur") Cc: # 3.14 Signed-off-by: Thomas Richter Acked-by: Sumanth Korikkar Signed-off-by: Heiko Carstens --- arch/s390/kernel/perf_cpum_sf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 4f9e4626df55..f100c9209743 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -2228,4 +2228,4 @@ out: } arch_initcall(init_cpum_sampling_pmu); -core_param(cpum_sfb_size, CPUM_SF_MAX_SDB, sfb_size, 0640); +core_param(cpum_sfb_size, CPUM_SF_MAX_SDB, sfb_size, 0644); From b98467fe96d2415836d154ecfe1cd389bf4147b5 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Thu, 29 Oct 2020 12:03:35 +0200 Subject: [PATCH 393/710] thermal: ti-soc-thermal: Disable the CPU PM notifier for OMAP4430 It has been observed that on OMAP4430 (ES2.0, ES2.1 and ES2.3) the enabled notifier causes errors on the DTEMP readout values: ti-soc-thermal 4a002260.bandgap: in range ADC val: 52 ti-soc-thermal 4a002260.bandgap: in range ADC val: 64 ti-soc-thermal 4a002260.bandgap: in range ADC val: 64 ti-soc-thermal 4a002260.bandgap: out of range ADC val: 0 thermal thermal_zone0: failed to read out thermal zone (-5) ti-soc-thermal 4a002260.bandgap: out of range ADC val: 0 thermal thermal_zone0: failed to read out thermal zone (-5) ti-soc-thermal 4a002260.bandgap: out of range ADC val: 4 thermal thermal_zone0: failed to read out thermal zone (-5) ti-soc-thermal 4a002260.bandgap: in range ADC val: 100 raw 100 translates to 133 Celsius on omap4-sdp, triggering shutdown due to critical temperature. When the notifier is disable for OMAP4430 the DTEMP values are stable: ti-soc-thermal 4a002260.bandgap: in range ADC val: 56 ti-soc-thermal 4a002260.bandgap: in range ADC val: 56 ti-soc-thermal 4a002260.bandgap: in range ADC val: 57 ti-soc-thermal 4a002260.bandgap: in range ADC val: 57 ti-soc-thermal 4a002260.bandgap: in range ADC val: 56 Fixes: 5093402e5b44 ("thermal: ti-soc-thermal: Enable addition power management") Signed-off-by: Peter Ujfalusi Tested-by: Tony Lindgren Acked-by: Keerthy Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20201029100335.27665-1-peter.ujfalusi@ti.com --- drivers/thermal/ti-soc-thermal/ti-bandgap.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/thermal/ti-soc-thermal/ti-bandgap.c b/drivers/thermal/ti-soc-thermal/ti-bandgap.c index 5e596168ba73..dcac99f327b0 100644 --- a/drivers/thermal/ti-soc-thermal/ti-bandgap.c +++ b/drivers/thermal/ti-soc-thermal/ti-bandgap.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -864,6 +865,17 @@ static struct ti_bandgap *ti_bandgap_build(struct platform_device *pdev) return bgp; } +/* + * List of SoCs on which the CPU PM notifier can cause erros on the DTEMP + * readout. + * Enabled notifier on these machines results in erroneous, random values which + * could trigger unexpected thermal shutdown. + */ +static const struct soc_device_attribute soc_no_cpu_notifier[] = { + { .machine = "OMAP4430" }, + { /* sentinel */ }, +}; + /*** Device driver call backs ***/ static @@ -1020,7 +1032,8 @@ int ti_bandgap_probe(struct platform_device *pdev) #ifdef CONFIG_PM_SLEEP bgp->nb.notifier_call = bandgap_omap_cpu_notifier; - cpu_pm_register_notifier(&bgp->nb); + if (!soc_device_match(soc_no_cpu_notifier)) + cpu_pm_register_notifier(&bgp->nb); #endif return 0; @@ -1056,7 +1069,8 @@ int ti_bandgap_remove(struct platform_device *pdev) struct ti_bandgap *bgp = platform_get_drvdata(pdev); int i; - cpu_pm_unregister_notifier(&bgp->nb); + if (!soc_device_match(soc_no_cpu_notifier)) + cpu_pm_unregister_notifier(&bgp->nb); /* Remove sensor interfaces */ for (i = 0; i < bgp->conf->sensor_count; i++) { From bc923818b190c8b63c91a47702969c8053574f5b Mon Sep 17 00:00:00 2001 From: Zhang Qilong Date: Sun, 8 Nov 2020 17:27:41 +0800 Subject: [PATCH 394/710] gfs2: fix possible reference leak in gfs2_check_blk_type In the fail path of gfs2_check_blk_type, forgetting to call gfs2_glock_dq_uninit will result in rgd_gh reference leak. Signed-off-by: Zhang Qilong Signed-off-by: Andreas Gruenbacher --- fs/gfs2/rgrp.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 92d799a193b8..f7addc6197ed 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -2529,13 +2529,13 @@ int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type) rbm.rgd = rgd; error = gfs2_rbm_from_block(&rbm, no_addr); - if (WARN_ON_ONCE(error)) - goto fail; - - if (gfs2_testbit(&rbm, false) != type) - error = -ESTALE; + if (!WARN_ON_ONCE(error)) { + if (gfs2_testbit(&rbm, false) != type) + error = -ESTALE; + } gfs2_glock_dq_uninit(&rgd_gh); + fail: return error; } From ee5e58418a854755201eb4952b1230d873a457d5 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 2 Nov 2020 14:36:56 +0100 Subject: [PATCH 395/710] HID: logitech-dj: Handle quad/bluetooth keyboards with a builtin trackpad Some quad/bluetooth keyboards, such as the Dinovo Edge (Y-RAY81) have a builtin touchpad. In this case when asking the receiver for paired devices, we get only 1 paired device with a device_type of REPORT_TYPE_KEYBOARD. This means that we do not instantiate a second dj_hiddev for the mouse (as we normally would) and thus there is no place for us to forward the mouse input reports to, causing the touchpad part of the keyboard to not work. There is no way for us to detect these keyboards, so this commit adds an array with device-ids for such keyboards and when a keyboard is on this list it adds STD_MOUSE to the reports_supported bitmap for the dj_hiddev created for the keyboard fixing the touchpad not working. Using a list of device-ids for this is not ideal, but there are only very few such keyboards so this should be fine. Besides the Dinovo Edge, other known wireless Logitech keyboards with a builtin touchpad are: * Dinovo Mini (TODO add its device-id to the list) * K400 (uses a unifying receiver so is not affected) * K600 (uses a unifying receiver so is not affected) Cc: stable@vger.kernel.org BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1811424 Fixes: f2113c3020ef ("HID: logitech-dj: add support for Logitech Bluetooth Mini-Receiver") Signed-off-by: Hans de Goede Signed-off-by: Benjamin Tissoires --- drivers/hid/hid-logitech-dj.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/hid/hid-logitech-dj.c b/drivers/hid/hid-logitech-dj.c index 38ee25a813b9..1cafb65428b0 100644 --- a/drivers/hid/hid-logitech-dj.c +++ b/drivers/hid/hid-logitech-dj.c @@ -866,11 +866,23 @@ static void logi_dj_recv_queue_notification(struct dj_receiver_dev *djrcv_dev, schedule_work(&djrcv_dev->work); } +/* + * Some quad/bluetooth keyboards have a builtin touchpad in this case we see + * only 1 paired device with a device_type of REPORT_TYPE_KEYBOARD. For the + * touchpad to work we must also forward mouse input reports to the dj_hiddev + * created for the keyboard (instead of forwarding them to a second paired + * device with a device_type of REPORT_TYPE_MOUSE as we normally would). + */ +static const u16 kbd_builtin_touchpad_ids[] = { + 0xb309, /* Dinovo Edge */ +}; + static void logi_hidpp_dev_conn_notif_equad(struct hid_device *hdev, struct hidpp_event *hidpp_report, struct dj_workitem *workitem) { struct dj_receiver_dev *djrcv_dev = hid_get_drvdata(hdev); + int i, id; workitem->type = WORKITEM_TYPE_PAIRED; workitem->device_type = hidpp_report->params[HIDPP_PARAM_DEVICE_INFO] & @@ -882,6 +894,13 @@ static void logi_hidpp_dev_conn_notif_equad(struct hid_device *hdev, workitem->reports_supported |= STD_KEYBOARD | MULTIMEDIA | POWER_KEYS | MEDIA_CENTER | HIDPP; + id = (workitem->quad_id_msb << 8) | workitem->quad_id_lsb; + for (i = 0; i < ARRAY_SIZE(kbd_builtin_touchpad_ids); i++) { + if (id == kbd_builtin_touchpad_ids[i]) { + workitem->reports_supported |= STD_MOUSE; + break; + } + } break; case REPORT_TYPE_MOUSE: workitem->reports_supported |= STD_MOUSE | HIDPP; From fd8feec665fef840277515a5c2b9b7c3e3970fad Mon Sep 17 00:00:00 2001 From: Paul Barker Date: Wed, 11 Nov 2020 16:46:43 +0000 Subject: [PATCH 396/710] hwmon: (pwm-fan) Fix RPM calculation To convert the number of pulses counted into an RPM estimation, we need to divide by the width of our measurement interval instead of multiplying by it. If the width of the measurement interval is zero we don't update the RPM value to avoid dividing by zero. We also don't need to do 64-bit division, with 32-bits we can handle a fan running at over 4 million RPM. Signed-off-by: Paul Barker Link: https://lore.kernel.org/r/20201111164643.7087-1-pbarker@konsulko.com Signed-off-by: Guenter Roeck --- drivers/hwmon/pwm-fan.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/hwmon/pwm-fan.c b/drivers/hwmon/pwm-fan.c index bdba2143021a..1f63807c0399 100644 --- a/drivers/hwmon/pwm-fan.c +++ b/drivers/hwmon/pwm-fan.c @@ -54,16 +54,18 @@ static irqreturn_t pulse_handler(int irq, void *dev_id) static void sample_timer(struct timer_list *t) { struct pwm_fan_ctx *ctx = from_timer(ctx, t, rpm_timer); + unsigned int delta = ktime_ms_delta(ktime_get(), ctx->sample_start); int pulses; - u64 tmp; - pulses = atomic_read(&ctx->pulses); - atomic_sub(pulses, &ctx->pulses); - tmp = (u64)pulses * ktime_ms_delta(ktime_get(), ctx->sample_start) * 60; - do_div(tmp, ctx->pulses_per_revolution * 1000); - ctx->rpm = tmp; + if (delta) { + pulses = atomic_read(&ctx->pulses); + atomic_sub(pulses, &ctx->pulses); + ctx->rpm = (unsigned int)(pulses * 1000 * 60) / + (ctx->pulses_per_revolution * delta); + + ctx->sample_start = ktime_get(); + } - ctx->sample_start = ktime_get(); mod_timer(&ctx->rpm_timer, jiffies + HZ); } From 4d64bb4ba5ecf4831448cdb2fe16d0ae91b2b40b Mon Sep 17 00:00:00 2001 From: Brad Campbell Date: Thu, 12 Nov 2020 14:08:23 +1100 Subject: [PATCH 397/710] hwmon: (applesmc) Re-work SMC comms Commit fff2d0f701e6 ("hwmon: (applesmc) avoid overlong udelay()") introduced an issue whereby communication with the SMC became unreliable with write errors like : [ 120.378614] applesmc: send_byte(0x00, 0x0300) fail: 0x40 [ 120.378621] applesmc: LKSB: write data fail [ 120.512782] applesmc: send_byte(0x00, 0x0300) fail: 0x40 [ 120.512787] applesmc: LKSB: write data fail The original code appeared to be timing sensitive and was not reliable with the timing changes in the aforementioned commit. This patch re-factors the SMC communication to remove the timing dependencies and restore function with the changes previously committed. Tested on : MacbookAir6,2 MacBookPro11,1 iMac12,2, MacBookAir1,1, MacBookAir3,1 Fixes: fff2d0f701e6 ("hwmon: (applesmc) avoid overlong udelay()") Reported-by: Andreas Kemnade Tested-by: Andreas Kemnade # MacBookAir6,2 Acked-by: Arnd Bergmann Signed-off-by: Brad Campbell Signed-off-by: Henrik Rydberg Link: https://lore.kernel.org/r/194a7d71-a781-765a-d177-c962ef296b90@fnarfbargle.com Signed-off-by: Guenter Roeck --- drivers/hwmon/applesmc.c | 130 ++++++++++++++++++++++++--------------- 1 file changed, 82 insertions(+), 48 deletions(-) diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c index a18887990f4a..79b498f816fe 100644 --- a/drivers/hwmon/applesmc.c +++ b/drivers/hwmon/applesmc.c @@ -32,6 +32,7 @@ #include #include #include +#include /* data port used by Apple SMC */ #define APPLESMC_DATA_PORT 0x300 @@ -42,10 +43,13 @@ #define APPLESMC_MAX_DATA_LENGTH 32 -/* wait up to 128 ms for a status change. */ -#define APPLESMC_MIN_WAIT 0x0010 -#define APPLESMC_RETRY_WAIT 0x0100 -#define APPLESMC_MAX_WAIT 0x20000 +/* Apple SMC status bits */ +#define SMC_STATUS_AWAITING_DATA BIT(0) /* SMC has data waiting to be read */ +#define SMC_STATUS_IB_CLOSED BIT(1) /* Will ignore any input */ +#define SMC_STATUS_BUSY BIT(2) /* Command in progress */ + +/* Initial wait is 8us */ +#define APPLESMC_MIN_WAIT 0x0008 #define APPLESMC_READ_CMD 0x10 #define APPLESMC_WRITE_CMD 0x11 @@ -151,65 +155,84 @@ static unsigned int key_at_index; static struct workqueue_struct *applesmc_led_wq; /* - * wait_read - Wait for a byte to appear on SMC port. Callers must - * hold applesmc_lock. + * Wait for specific status bits with a mask on the SMC. + * Used before all transactions. + * This does 10 fast loops of 8us then exponentially backs off for a + * minimum total wait of 262ms. Depending on usleep_range this could + * run out past 500ms. */ -static int wait_read(void) + +static int wait_status(u8 val, u8 mask) { - unsigned long end = jiffies + (APPLESMC_MAX_WAIT * HZ) / USEC_PER_SEC; u8 status; int us; + int i; - for (us = APPLESMC_MIN_WAIT; us < APPLESMC_MAX_WAIT; us <<= 1) { - usleep_range(us, us * 16); + us = APPLESMC_MIN_WAIT; + for (i = 0; i < 24 ; i++) { status = inb(APPLESMC_CMD_PORT); - /* read: wait for smc to settle */ - if (status & 0x01) + if ((status & mask) == val) return 0; - /* timeout: give up */ - if (time_after(jiffies, end)) - break; + usleep_range(us, us * 2); + if (i > 9) + us <<= 1; } - - pr_warn("wait_read() fail: 0x%02x\n", status); return -EIO; } -/* - * send_byte - Write to SMC port, retrying when necessary. Callers - * must hold applesmc_lock. - */ +/* send_byte - Write to SMC data port. Callers must hold applesmc_lock. */ + static int send_byte(u8 cmd, u16 port) { - u8 status; - int us; - unsigned long end = jiffies + (APPLESMC_MAX_WAIT * HZ) / USEC_PER_SEC; + int status; + + status = wait_status(0, SMC_STATUS_IB_CLOSED); + if (status) + return status; + /* + * This needs to be a separate read looking for bit 0x04 + * after bit 0x02 falls. If consolidated with the wait above + * this extra read may not happen if status returns both + * simultaneously and this would appear to be required. + */ + status = wait_status(SMC_STATUS_BUSY, SMC_STATUS_BUSY); + if (status) + return status; outb(cmd, port); - for (us = APPLESMC_MIN_WAIT; us < APPLESMC_MAX_WAIT; us <<= 1) { - usleep_range(us, us * 16); - status = inb(APPLESMC_CMD_PORT); - /* write: wait for smc to settle */ - if (status & 0x02) - continue; - /* ready: cmd accepted, return */ - if (status & 0x04) - return 0; - /* timeout: give up */ - if (time_after(jiffies, end)) - break; - /* busy: long wait and resend */ - udelay(APPLESMC_RETRY_WAIT); - outb(cmd, port); - } - - pr_warn("send_byte(0x%02x, 0x%04x) fail: 0x%02x\n", cmd, port, status); - return -EIO; + return 0; } +/* send_command - Write a command to the SMC. Callers must hold applesmc_lock. */ + static int send_command(u8 cmd) { - return send_byte(cmd, APPLESMC_CMD_PORT); + int ret; + + ret = wait_status(0, SMC_STATUS_IB_CLOSED); + if (ret) + return ret; + outb(cmd, APPLESMC_CMD_PORT); + return 0; +} + +/* + * Based on logic from the Apple driver. This is issued before any interaction + * If busy is stuck high, issue a read command to reset the SMC state machine. + * If busy is stuck high after the command then the SMC is jammed. + */ + +static int smc_sane(void) +{ + int ret; + + ret = wait_status(0, SMC_STATUS_BUSY); + if (!ret) + return ret; + ret = send_command(APPLESMC_READ_CMD); + if (ret) + return ret; + return wait_status(0, SMC_STATUS_BUSY); } static int send_argument(const char *key) @@ -226,6 +249,11 @@ static int read_smc(u8 cmd, const char *key, u8 *buffer, u8 len) { u8 status, data = 0; int i; + int ret; + + ret = smc_sane(); + if (ret) + return ret; if (send_command(cmd) || send_argument(key)) { pr_warn("%.4s: read arg fail\n", key); @@ -239,7 +267,8 @@ static int read_smc(u8 cmd, const char *key, u8 *buffer, u8 len) } for (i = 0; i < len; i++) { - if (wait_read()) { + if (wait_status(SMC_STATUS_AWAITING_DATA | SMC_STATUS_BUSY, + SMC_STATUS_AWAITING_DATA | SMC_STATUS_BUSY)) { pr_warn("%.4s: read data[%d] fail\n", key, i); return -EIO; } @@ -250,19 +279,24 @@ static int read_smc(u8 cmd, const char *key, u8 *buffer, u8 len) for (i = 0; i < 16; i++) { udelay(APPLESMC_MIN_WAIT); status = inb(APPLESMC_CMD_PORT); - if (!(status & 0x01)) + if (!(status & SMC_STATUS_AWAITING_DATA)) break; data = inb(APPLESMC_DATA_PORT); } if (i) pr_warn("flushed %d bytes, last value is: %d\n", i, data); - return 0; + return wait_status(0, SMC_STATUS_BUSY); } static int write_smc(u8 cmd, const char *key, const u8 *buffer, u8 len) { int i; + int ret; + + ret = smc_sane(); + if (ret) + return ret; if (send_command(cmd) || send_argument(key)) { pr_warn("%s: write arg fail\n", key); @@ -281,7 +315,7 @@ static int write_smc(u8 cmd, const char *key, const u8 *buffer, u8 len) } } - return 0; + return wait_status(0, SMC_STATUS_BUSY); } static int read_register_count(unsigned int *count) From c27168a04a438a457c100253b1aaf0c779218aae Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 2 Nov 2020 14:36:57 +0100 Subject: [PATCH 398/710] HID: logitech-hidpp: Add HIDPP_CONSUMER_VENDOR_KEYS quirk for the Dinovo Edge Like the MX5000 and MX5500 quad/bluetooth keyboards the Dinovo Edge also needs the HIDPP_CONSUMER_VENDOR_KEYS quirk for some special keys to work. Specifically without this the "Phone" and the 'A' - 'D' Smart Keys do not send any events. In addition to fixing these keys not sending any events, adding the Bluetooth match, so that hid-logitech-hidpp is used instead of the generic HID driver, also adds battery monitoring support when the keyboard is connected over Bluetooth. Signed-off-by: Hans de Goede Signed-off-by: Benjamin Tissoires --- drivers/hid/hid-logitech-hidpp.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c index 730036650f7d..40b7c75b3384 100644 --- a/drivers/hid/hid-logitech-hidpp.c +++ b/drivers/hid/hid-logitech-hidpp.c @@ -3972,6 +3972,9 @@ static const struct hid_device_id hidpp_devices[] = { { /* Keyboard MX5000 (Bluetooth-receiver in HID proxy mode) */ LDJ_DEVICE(0xb305), .driver_data = HIDPP_QUIRK_HIDPP_CONSUMER_VENDOR_KEYS }, + { /* Dinovo Edge (Bluetooth-receiver in HID proxy mode) */ + LDJ_DEVICE(0xb309), + .driver_data = HIDPP_QUIRK_HIDPP_CONSUMER_VENDOR_KEYS }, { /* Keyboard MX5500 (Bluetooth-receiver in HID proxy mode) */ LDJ_DEVICE(0xb30b), .driver_data = HIDPP_QUIRK_HIDPP_CONSUMER_VENDOR_KEYS }, @@ -4014,6 +4017,9 @@ static const struct hid_device_id hidpp_devices[] = { { /* MX5000 keyboard over Bluetooth */ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb305), .driver_data = HIDPP_QUIRK_HIDPP_CONSUMER_VENDOR_KEYS }, + { /* Dinovo Edge keyboard over Bluetooth */ + HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb309), + .driver_data = HIDPP_QUIRK_HIDPP_CONSUMER_VENDOR_KEYS }, { /* MX5500 keyboard over Bluetooth */ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb30b), .driver_data = HIDPP_QUIRK_HIDPP_CONSUMER_VENDOR_KEYS }, From 7940fb035abd88040d56be209962feffa33b03d0 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 2 Nov 2020 14:36:58 +0100 Subject: [PATCH 399/710] HID: Add Logitech Dinovo Edge battery quirk The battery status is also being reported by the logitech-hidpp driver, so ignore the standard HID battery status to avoid reporting the same info twice. Note the logitech-hidpp battery driver provides more info, such as properly differentiating between charging and discharging. Also the standard HID battery info seems to be wrong, reporting a capacity of just 26% after fully charging the device. Signed-off-by: Hans de Goede Signed-off-by: Benjamin Tissoires --- drivers/hid/hid-ids.h | 1 + drivers/hid/hid-input.c | 3 +++ 2 files changed, 4 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index af361cd04bf0..f170feaac40b 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -750,6 +750,7 @@ #define USB_VENDOR_ID_LOGITECH 0x046d #define USB_DEVICE_ID_LOGITECH_AUDIOHUB 0x0a0e #define USB_DEVICE_ID_LOGITECH_T651 0xb00c +#define USB_DEVICE_ID_LOGITECH_DINOVO_EDGE_KBD 0xb309 #define USB_DEVICE_ID_LOGITECH_C007 0xc007 #define USB_DEVICE_ID_LOGITECH_C077 0xc077 #define USB_DEVICE_ID_LOGITECH_RECEIVER 0xc101 diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index 9770db624bfa..4dca11392459 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -319,6 +319,9 @@ static const struct hid_device_id hid_battery_quirks[] = { { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_T100CHI_KEYBOARD), HID_BATTERY_QUIRK_IGNORE }, + { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, + USB_DEVICE_ID_LOGITECH_DINOVO_EDGE_KBD), + HID_BATTERY_QUIRK_IGNORE }, {} }; From 5e844cc37a5cbaa460e68f9a989d321d63088a89 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Wed, 11 Nov 2020 20:07:10 +0100 Subject: [PATCH 400/710] spi: Introduce device-managed SPI controller allocation SPI driver probing currently comprises two steps, whereas removal comprises only one step: spi_alloc_master() spi_register_controller() spi_unregister_controller() That's because spi_unregister_controller() calls device_unregister() instead of device_del(), thereby releasing the reference on the spi_controller which was obtained by spi_alloc_master(). An SPI driver's private data is contained in the same memory allocation as the spi_controller struct. Thus, once spi_unregister_controller() has been called, the private data is inaccessible. But some drivers need to access it after spi_unregister_controller() to perform further teardown steps. Introduce devm_spi_alloc_master() and devm_spi_alloc_slave(), which release a reference on the spi_controller struct only after the driver has unbound, thereby keeping the memory allocation accessible. Change spi_unregister_controller() to not release a reference if the spi_controller was allocated by one of these new devm functions. The present commit is small enough to be backportable to stable. It allows fixing drivers which use the private data in their ->remove() hook after it's been freed. It also allows fixing drivers which neglect to release a reference on the spi_controller in the probe error path. Long-term, most SPI drivers shall be moved over to the devm functions introduced herein. The few that can't shall be changed in a treewide commit to explicitly release the last reference on the controller. That commit shall amend spi_unregister_controller() to no longer release a reference, thereby completing the migration. As a result, the behaviour will be less surprising and more consistent with subsystems such as IIO, which also includes the private data in the allocation of the generic iio_dev struct, but calls device_del() in iio_device_unregister(). Signed-off-by: Lukas Wunner Link: https://lore.kernel.org/r/272bae2ef08abd21388c98e23729886663d19192.1605121038.git.lukas@wunner.de Signed-off-by: Mark Brown --- drivers/spi/spi.c | 58 ++++++++++++++++++++++++++++++++++++++++- include/linux/spi/spi.h | 19 ++++++++++++++ 2 files changed, 76 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 7566482c052c..05c75f890ace 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -2442,6 +2442,49 @@ struct spi_controller *__spi_alloc_controller(struct device *dev, } EXPORT_SYMBOL_GPL(__spi_alloc_controller); +static void devm_spi_release_controller(struct device *dev, void *ctlr) +{ + spi_controller_put(*(struct spi_controller **)ctlr); +} + +/** + * __devm_spi_alloc_controller - resource-managed __spi_alloc_controller() + * @dev: physical device of SPI controller + * @size: how much zeroed driver-private data to allocate + * @slave: whether to allocate an SPI master (false) or SPI slave (true) + * Context: can sleep + * + * Allocate an SPI controller and automatically release a reference on it + * when @dev is unbound from its driver. Drivers are thus relieved from + * having to call spi_controller_put(). + * + * The arguments to this function are identical to __spi_alloc_controller(). + * + * Return: the SPI controller structure on success, else NULL. + */ +struct spi_controller *__devm_spi_alloc_controller(struct device *dev, + unsigned int size, + bool slave) +{ + struct spi_controller **ptr, *ctlr; + + ptr = devres_alloc(devm_spi_release_controller, sizeof(*ptr), + GFP_KERNEL); + if (!ptr) + return NULL; + + ctlr = __spi_alloc_controller(dev, size, slave); + if (ctlr) { + *ptr = ctlr; + devres_add(dev, ptr); + } else { + devres_free(ptr); + } + + return ctlr; +} +EXPORT_SYMBOL_GPL(__devm_spi_alloc_controller); + #ifdef CONFIG_OF static int of_spi_get_gpio_numbers(struct spi_controller *ctlr) { @@ -2778,6 +2821,11 @@ int devm_spi_register_controller(struct device *dev, } EXPORT_SYMBOL_GPL(devm_spi_register_controller); +static int devm_spi_match_controller(struct device *dev, void *res, void *ctlr) +{ + return *(struct spi_controller **)res == ctlr; +} + static int __unregister(struct device *dev, void *null) { spi_unregister_device(to_spi_device(dev)); @@ -2819,7 +2867,15 @@ void spi_unregister_controller(struct spi_controller *ctlr) list_del(&ctlr->list); mutex_unlock(&board_lock); - device_unregister(&ctlr->dev); + device_del(&ctlr->dev); + + /* Release the last reference on the controller if its driver + * has not yet been converted to devm_spi_alloc_master/slave(). + */ + if (!devres_find(ctlr->dev.parent, devm_spi_release_controller, + devm_spi_match_controller, ctlr)) + put_device(&ctlr->dev); + /* free bus id */ mutex_lock(&board_lock); if (found == ctlr) diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 99380c0825db..b390fdac1587 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -734,6 +734,25 @@ static inline struct spi_controller *spi_alloc_slave(struct device *host, return __spi_alloc_controller(host, size, true); } +struct spi_controller *__devm_spi_alloc_controller(struct device *dev, + unsigned int size, + bool slave); + +static inline struct spi_controller *devm_spi_alloc_master(struct device *dev, + unsigned int size) +{ + return __devm_spi_alloc_controller(dev, size, false); +} + +static inline struct spi_controller *devm_spi_alloc_slave(struct device *dev, + unsigned int size) +{ + if (!IS_ENABLED(CONFIG_SPI_SLAVE)) + return NULL; + + return __devm_spi_alloc_controller(dev, size, true); +} + extern int spi_register_controller(struct spi_controller *ctlr); extern int devm_spi_register_controller(struct device *dev, struct spi_controller *ctlr); From e1483ac030fb4c57734289742f1c1d38dca61e22 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Wed, 11 Nov 2020 20:07:20 +0100 Subject: [PATCH 401/710] spi: bcm2835: Fix use-after-free on unbind bcm2835_spi_remove() accesses the driver's private data after calling spi_unregister_controller() even though that function releases the last reference on the spi_controller and thereby frees the private data. Fix by switching over to the new devm_spi_alloc_master() helper which keeps the private data accessible until the driver has unbound. Fixes: f8043872e796 ("spi: add driver for BCM2835") Reported-by: Sascha Hauer Reported-by: Florian Fainelli Signed-off-by: Lukas Wunner Cc: # v3.10+: 123456789abc: spi: Introduce device-managed SPI controller allocation Cc: # v3.10+ Cc: Vladimir Oltean Tested-by: Florian Fainelli Acked-by: Florian Fainelli Link: https://lore.kernel.org/r/ad66e0a0ad96feb848814842ecf5b6a4539ef35c.1605121038.git.lukas@wunner.de Signed-off-by: Mark Brown --- drivers/spi/spi-bcm2835.c | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/drivers/spi/spi-bcm2835.c b/drivers/spi/spi-bcm2835.c index 7104cf17b848..197485f2c2b2 100644 --- a/drivers/spi/spi-bcm2835.c +++ b/drivers/spi/spi-bcm2835.c @@ -1278,7 +1278,7 @@ static int bcm2835_spi_probe(struct platform_device *pdev) struct bcm2835_spi *bs; int err; - ctlr = spi_alloc_master(&pdev->dev, ALIGN(sizeof(*bs), + ctlr = devm_spi_alloc_master(&pdev->dev, ALIGN(sizeof(*bs), dma_get_cache_alignment())); if (!ctlr) return -ENOMEM; @@ -1299,23 +1299,17 @@ static int bcm2835_spi_probe(struct platform_device *pdev) bs->ctlr = ctlr; bs->regs = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(bs->regs)) { - err = PTR_ERR(bs->regs); - goto out_controller_put; - } + if (IS_ERR(bs->regs)) + return PTR_ERR(bs->regs); bs->clk = devm_clk_get(&pdev->dev, NULL); - if (IS_ERR(bs->clk)) { - err = dev_err_probe(&pdev->dev, PTR_ERR(bs->clk), - "could not get clk\n"); - goto out_controller_put; - } + if (IS_ERR(bs->clk)) + return dev_err_probe(&pdev->dev, PTR_ERR(bs->clk), + "could not get clk\n"); bs->irq = platform_get_irq(pdev, 0); - if (bs->irq <= 0) { - err = bs->irq ? bs->irq : -ENODEV; - goto out_controller_put; - } + if (bs->irq <= 0) + return bs->irq ? bs->irq : -ENODEV; clk_prepare_enable(bs->clk); @@ -1349,8 +1343,6 @@ out_dma_release: bcm2835_dma_release(ctlr, bs); out_clk_disable: clk_disable_unprepare(bs->clk); -out_controller_put: - spi_controller_put(ctlr); return err; } From e13ee6cc4781edaf8c7321bee19217e3702ed481 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Wed, 11 Nov 2020 20:07:30 +0100 Subject: [PATCH 402/710] spi: bcm2835aux: Fix use-after-free on unbind bcm2835aux_spi_remove() accesses the driver's private data after calling spi_unregister_master() even though that function releases the last reference on the spi_master and thereby frees the private data. Fix by switching over to the new devm_spi_alloc_master() helper which keeps the private data accessible until the driver has unbound. Fixes: b9dd3f6d4172 ("spi: bcm2835aux: Fix controller unregister order") Signed-off-by: Lukas Wunner Cc: # v4.4+: 123456789abc: spi: Introduce device-managed SPI controller allocation Cc: # v4.4+: b9dd3f6d4172: spi: bcm2835aux: Fix controller unregister order Cc: # v4.4+ Link: https://lore.kernel.org/r/b290b06357d0c0bdee9cecc539b840a90630f101.1605121038.git.lukas@wunner.de Signed-off-by: Mark Brown --- drivers/spi/spi-bcm2835aux.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/drivers/spi/spi-bcm2835aux.c b/drivers/spi/spi-bcm2835aux.c index 03b034c15d2b..fd58547110e6 100644 --- a/drivers/spi/spi-bcm2835aux.c +++ b/drivers/spi/spi-bcm2835aux.c @@ -494,7 +494,7 @@ static int bcm2835aux_spi_probe(struct platform_device *pdev) unsigned long clk_hz; int err; - master = spi_alloc_master(&pdev->dev, sizeof(*bs)); + master = devm_spi_alloc_master(&pdev->dev, sizeof(*bs)); if (!master) return -ENOMEM; @@ -524,29 +524,24 @@ static int bcm2835aux_spi_probe(struct platform_device *pdev) /* the main area */ bs->regs = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(bs->regs)) { - err = PTR_ERR(bs->regs); - goto out_master_put; - } + if (IS_ERR(bs->regs)) + return PTR_ERR(bs->regs); bs->clk = devm_clk_get(&pdev->dev, NULL); if (IS_ERR(bs->clk)) { - err = PTR_ERR(bs->clk); dev_err(&pdev->dev, "could not get clk: %d\n", err); - goto out_master_put; + return PTR_ERR(bs->clk); } bs->irq = platform_get_irq(pdev, 0); - if (bs->irq <= 0) { - err = bs->irq ? bs->irq : -ENODEV; - goto out_master_put; - } + if (bs->irq <= 0) + return bs->irq ? bs->irq : -ENODEV; /* this also enables the HW block */ err = clk_prepare_enable(bs->clk); if (err) { dev_err(&pdev->dev, "could not prepare clock: %d\n", err); - goto out_master_put; + return err; } /* just checking if the clock returns a sane value */ @@ -581,8 +576,6 @@ static int bcm2835aux_spi_probe(struct platform_device *pdev) out_clk_disable: clk_disable_unprepare(bs->clk); -out_master_put: - spi_master_put(master); return err; } From 63c5395bb7a9777a33f0e7b5906f2c0170a23692 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Wed, 11 Nov 2020 20:07:40 +0100 Subject: [PATCH 403/710] spi: bcm-qspi: Fix use-after-free on unbind bcm_qspi_remove() calls spi_unregister_master() even though bcm_qspi_probe() calls devm_spi_register_master(). The spi_master is therefore unregistered and freed twice on unbind. Moreover, since commit 0392727c261b ("spi: bcm-qspi: Handle clock probe deferral"), bcm_qspi_probe() leaks the spi_master allocation if the call to devm_clk_get_optional() fails. Fix by switching over to the new devm_spi_alloc_master() helper which keeps the private data accessible until the driver has unbound and also avoids the spi_master leak on probe. While at it, fix an ordering issue in bcm_qspi_remove() wherein spi_unregister_master() is called after uninitializing the hardware, disabling the clock and freeing an IRQ data structure. The correct order is to call spi_unregister_master() *before* those teardown steps because bus accesses may still be ongoing until that function returns. Fixes: fa236a7ef240 ("spi: bcm-qspi: Add Broadcom MSPI driver") Signed-off-by: Lukas Wunner Cc: # v4.9+: 123456789abc: spi: Introduce device-managed SPI controller allocation Cc: # v4.9+ Cc: Kamal Dasu Acked-by: Florian Fainelli Tested-by: Florian Fainelli Link: https://lore.kernel.org/r/5e31a9a59fd1c0d0b795b2fe219f25e5ee855f9d.1605121038.git.lukas@wunner.de Signed-off-by: Mark Brown --- drivers/spi/spi-bcm-qspi.c | 34 ++++++++++++---------------------- 1 file changed, 12 insertions(+), 22 deletions(-) diff --git a/drivers/spi/spi-bcm-qspi.c b/drivers/spi/spi-bcm-qspi.c index 14c9d0133bce..c028446c7460 100644 --- a/drivers/spi/spi-bcm-qspi.c +++ b/drivers/spi/spi-bcm-qspi.c @@ -1327,7 +1327,7 @@ int bcm_qspi_probe(struct platform_device *pdev, data = of_id->data; - master = spi_alloc_master(dev, sizeof(struct bcm_qspi)); + master = devm_spi_alloc_master(dev, sizeof(struct bcm_qspi)); if (!master) { dev_err(dev, "error allocating spi_master\n"); return -ENOMEM; @@ -1367,21 +1367,17 @@ int bcm_qspi_probe(struct platform_device *pdev, if (res) { qspi->base[MSPI] = devm_ioremap_resource(dev, res); - if (IS_ERR(qspi->base[MSPI])) { - ret = PTR_ERR(qspi->base[MSPI]); - goto qspi_resource_err; - } + if (IS_ERR(qspi->base[MSPI])) + return PTR_ERR(qspi->base[MSPI]); } else { - goto qspi_resource_err; + return 0; } res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "bspi"); if (res) { qspi->base[BSPI] = devm_ioremap_resource(dev, res); - if (IS_ERR(qspi->base[BSPI])) { - ret = PTR_ERR(qspi->base[BSPI]); - goto qspi_resource_err; - } + if (IS_ERR(qspi->base[BSPI])) + return PTR_ERR(qspi->base[BSPI]); qspi->bspi_mode = true; } else { qspi->bspi_mode = false; @@ -1392,18 +1388,14 @@ int bcm_qspi_probe(struct platform_device *pdev, res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "cs_reg"); if (res) { qspi->base[CHIP_SELECT] = devm_ioremap_resource(dev, res); - if (IS_ERR(qspi->base[CHIP_SELECT])) { - ret = PTR_ERR(qspi->base[CHIP_SELECT]); - goto qspi_resource_err; - } + if (IS_ERR(qspi->base[CHIP_SELECT])) + return PTR_ERR(qspi->base[CHIP_SELECT]); } qspi->dev_ids = kcalloc(num_irqs, sizeof(struct bcm_qspi_dev_id), GFP_KERNEL); - if (!qspi->dev_ids) { - ret = -ENOMEM; - goto qspi_resource_err; - } + if (!qspi->dev_ids) + return -ENOMEM; for (val = 0; val < num_irqs; val++) { irq = -1; @@ -1484,7 +1476,7 @@ int bcm_qspi_probe(struct platform_device *pdev, qspi->xfer_mode.addrlen = -1; qspi->xfer_mode.hp = -1; - ret = devm_spi_register_master(&pdev->dev, master); + ret = spi_register_master(master); if (ret < 0) { dev_err(dev, "can't register master\n"); goto qspi_reg_err; @@ -1497,8 +1489,6 @@ qspi_reg_err: clk_disable_unprepare(qspi->clk); qspi_probe_err: kfree(qspi->dev_ids); -qspi_resource_err: - spi_master_put(master); return ret; } /* probe function to be called by SoC specific platform driver probe */ @@ -1508,10 +1498,10 @@ int bcm_qspi_remove(struct platform_device *pdev) { struct bcm_qspi *qspi = platform_get_drvdata(pdev); + spi_unregister_master(qspi->master); bcm_qspi_hw_uninit(qspi); clk_disable_unprepare(qspi->clk); kfree(qspi->dev_ids); - spi_unregister_master(qspi->master); return 0; } From 70438afbf17e5194dd607dd17759560a363b7bb4 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 21 Oct 2020 10:34:15 -0400 Subject: [PATCH 404/710] NFSv4.2: fix failure to unregister shrinker We forgot to unregister the nfs4_xattr_large_entry_shrinker. That leaves the global list of shrinkers corrupted after unload of the nfs module, after which possibly unrelated code that calls register_shrinker() or unregister_shrinker() gets a BUG() with "supervisor write access in kernel mode". And similarly for the nfs4_xattr_large_entry_lru. Reported-by: Kris Karas Tested-By: Kris Karas Fixes: 95ad37f90c33 "NFSv4.2: add client side xattr caching." Signed-off-by: J. Bruce Fields CC: stable@vger.kernel.org Signed-off-by: Anna Schumaker --- fs/nfs/nfs42xattr.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/nfs/nfs42xattr.c b/fs/nfs/nfs42xattr.c index b51424ff8159..6c2ce799150f 100644 --- a/fs/nfs/nfs42xattr.c +++ b/fs/nfs/nfs42xattr.c @@ -1047,8 +1047,10 @@ out4: void nfs4_xattr_cache_exit(void) { + unregister_shrinker(&nfs4_xattr_large_entry_shrinker); unregister_shrinker(&nfs4_xattr_entry_shrinker); unregister_shrinker(&nfs4_xattr_cache_shrinker); + list_lru_destroy(&nfs4_xattr_large_entry_lru); list_lru_destroy(&nfs4_xattr_entry_lru); list_lru_destroy(&nfs4_xattr_cache_lru); kmem_cache_destroy(nfs4_xattr_cache_cachep); From 6c2190b3fcbc92cb79e39cc7e7531656b341e463 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 31 Oct 2020 12:44:25 -0400 Subject: [PATCH 405/710] NFS: Fix listxattr receive buffer size Certain NFSv4.2/RDMA tests fail with v5.9-rc1. rpcrdma_convert_kvec() runs off the end of the rl_segments array because rq_rcv_buf.tail[0].iov_len holds a very large positive value. The resultant kernel memory corruption is enough to crash the client system. Callers of rpc_prepare_reply_pages() must reserve an extra XDR_UNIT in the maximum decode size for a possible XDR pad of the contents of the xdr_buf's pages. That guarantees the allocated receive buffer will be large enough to accommodate the usual contents plus that XDR pad word. encode_op_hdr() cannot add that extra word. If it does, xdr_inline_pages() underruns the length of the tail iovec. Fixes: 3e1f02123fba ("NFSv4.2: add client side XDR handling for extended attributes") Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- fs/nfs/nfs42xdr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index 0dc31ad2362e..6e060a88f98c 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -196,7 +196,7 @@ 1 + nfs4_xattr_name_maxsz + 1) #define decode_setxattr_maxsz (op_decode_hdr_maxsz + decode_change_info_maxsz) #define encode_listxattrs_maxsz (op_encode_hdr_maxsz + 2 + 1) -#define decode_listxattrs_maxsz (op_decode_hdr_maxsz + 2 + 1 + 1) +#define decode_listxattrs_maxsz (op_decode_hdr_maxsz + 2 + 1 + 1 + 1) #define encode_removexattr_maxsz (op_encode_hdr_maxsz + 1 + \ nfs4_xattr_name_maxsz) #define decode_removexattr_maxsz (op_decode_hdr_maxsz + \ @@ -531,7 +531,7 @@ static void encode_listxattrs(struct xdr_stream *xdr, { __be32 *p; - encode_op_hdr(xdr, OP_LISTXATTRS, decode_listxattrs_maxsz + 1, hdr); + encode_op_hdr(xdr, OP_LISTXATTRS, decode_listxattrs_maxsz, hdr); p = reserve_space(xdr, 12); if (unlikely(!p)) From 83f2c45e63935a325f73bde98b1609e0976a12e0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 30 Oct 2020 17:57:29 -0400 Subject: [PATCH 406/710] NFS: Remove unnecessary inode locking in nfs_llseek_dir() Remove the contentious inode lock, and instead provide thread safety using the file->f_lock spinlock. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/dir.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index cb52db9a0cfb..e56b1bd99537 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -955,7 +955,6 @@ out: static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence) { - struct inode *inode = file_inode(filp); struct nfs_open_dir_context *dir_ctx = filp->private_data; dfprintk(FILE, "NFS: llseek dir(%pD2, %lld, %d)\n", @@ -967,15 +966,15 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence) case SEEK_SET: if (offset < 0) return -EINVAL; - inode_lock(inode); + spin_lock(&filp->f_lock); break; case SEEK_CUR: if (offset == 0) return filp->f_pos; - inode_lock(inode); + spin_lock(&filp->f_lock); offset += filp->f_pos; if (offset < 0) { - inode_unlock(inode); + spin_unlock(&filp->f_lock); return -EINVAL; } } @@ -987,7 +986,7 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence) dir_ctx->dir_cookie = 0; dir_ctx->duped = 0; } - inode_unlock(inode); + spin_unlock(&filp->f_lock); return offset; } From 11decaf8127b035242cb55de2fc6946f8961f671 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 30 Oct 2020 17:57:30 -0400 Subject: [PATCH 407/710] NFS: Remove unnecessary inode lock in nfs_fsync_dir() nfs_inc_stats() is already thread-safe, and there are no other reasons to hold the inode lock here. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/dir.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index e56b1bd99537..4e011adaf967 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -997,13 +997,9 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence) static int nfs_fsync_dir(struct file *filp, loff_t start, loff_t end, int datasync) { - struct inode *inode = file_inode(filp); - dfprintk(FILE, "NFS: fsync dir(%pD2) datasync %d\n", filp, datasync); - inode_lock(inode); - nfs_inc_stats(inode, NFSIOS_VFSFSYNC); - inode_unlock(inode); + nfs_inc_stats(file_inode(filp), NFSIOS_VFSFSYNC); return 0; } From 9e2b7fa2df4365e99934901da4fb4af52d81e820 Mon Sep 17 00:00:00 2001 From: Martin Willi Date: Fri, 6 Nov 2020 08:30:30 +0100 Subject: [PATCH 408/710] vrf: Fix fast path output packet handling with async Netfilter rules VRF devices use an optimized direct path on output if a default qdisc is involved, calling Netfilter hooks directly. This path, however, does not consider Netfilter rules completing asynchronously, such as with NFQUEUE. The Netfilter okfn() is called for asynchronously accepted packets, but the VRF never passes that packet down the stack to send it out over the slave device. Using the slower redirect path for this seems not feasible, as we do not know beforehand if a Netfilter hook has asynchronously completing rules. Fix the use of asynchronously completing Netfilter rules in OUTPUT and POSTROUTING by using a special completion function that additionally calls dst_output() to pass the packet down the stack. Also, slightly adjust the use of nf_reset_ct() so that is called in the asynchronous case, too. Fixes: dcdd43c41e60 ("net: vrf: performance improvements for IPv4") Fixes: a9ec54d1b0cd ("net: vrf: performance improvements for IPv6") Signed-off-by: Martin Willi Link: https://lore.kernel.org/r/20201106073030.3974927-1-martin@strongswan.org Signed-off-by: Jakub Kicinski --- drivers/net/vrf.c | 92 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 69 insertions(+), 23 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 60c1aadece89..f2793ffde191 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -608,8 +608,7 @@ static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev) return ret; } -static int vrf_finish_direct(struct net *net, struct sock *sk, - struct sk_buff *skb) +static void vrf_finish_direct(struct sk_buff *skb) { struct net_device *vrf_dev = skb->dev; @@ -628,7 +627,8 @@ static int vrf_finish_direct(struct net *net, struct sock *sk, skb_pull(skb, ETH_HLEN); } - return 1; + /* reset skb device */ + nf_reset_ct(skb); } #if IS_ENABLED(CONFIG_IPV6) @@ -707,15 +707,41 @@ static struct sk_buff *vrf_ip6_out_redirect(struct net_device *vrf_dev, return skb; } +static int vrf_output6_direct_finish(struct net *net, struct sock *sk, + struct sk_buff *skb) +{ + vrf_finish_direct(skb); + + return vrf_ip6_local_out(net, sk, skb); +} + static int vrf_output6_direct(struct net *net, struct sock *sk, struct sk_buff *skb) { + int err = 1; + skb->protocol = htons(ETH_P_IPV6); - return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, - net, sk, skb, NULL, skb->dev, - vrf_finish_direct, - !(IPCB(skb)->flags & IPSKB_REROUTED)); + if (!(IPCB(skb)->flags & IPSKB_REROUTED)) + err = nf_hook(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, sk, skb, + NULL, skb->dev, vrf_output6_direct_finish); + + if (likely(err == 1)) + vrf_finish_direct(skb); + + return err; +} + +static int vrf_ip6_out_direct_finish(struct net *net, struct sock *sk, + struct sk_buff *skb) +{ + int err; + + err = vrf_output6_direct(net, sk, skb); + if (likely(err == 1)) + err = vrf_ip6_local_out(net, sk, skb); + + return err; } static struct sk_buff *vrf_ip6_out_direct(struct net_device *vrf_dev, @@ -728,18 +754,15 @@ static struct sk_buff *vrf_ip6_out_direct(struct net_device *vrf_dev, skb->dev = vrf_dev; err = nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, - skb, NULL, vrf_dev, vrf_output6_direct); + skb, NULL, vrf_dev, vrf_ip6_out_direct_finish); if (likely(err == 1)) err = vrf_output6_direct(net, sk, skb); - /* reset skb device */ if (likely(err == 1)) - nf_reset_ct(skb); - else - skb = NULL; + return skb; - return skb; + return NULL; } static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev, @@ -919,15 +942,41 @@ static struct sk_buff *vrf_ip_out_redirect(struct net_device *vrf_dev, return skb; } +static int vrf_output_direct_finish(struct net *net, struct sock *sk, + struct sk_buff *skb) +{ + vrf_finish_direct(skb); + + return vrf_ip_local_out(net, sk, skb); +} + static int vrf_output_direct(struct net *net, struct sock *sk, struct sk_buff *skb) { + int err = 1; + skb->protocol = htons(ETH_P_IP); - return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, - net, sk, skb, NULL, skb->dev, - vrf_finish_direct, - !(IPCB(skb)->flags & IPSKB_REROUTED)); + if (!(IPCB(skb)->flags & IPSKB_REROUTED)) + err = nf_hook(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, sk, skb, + NULL, skb->dev, vrf_output_direct_finish); + + if (likely(err == 1)) + vrf_finish_direct(skb); + + return err; +} + +static int vrf_ip_out_direct_finish(struct net *net, struct sock *sk, + struct sk_buff *skb) +{ + int err; + + err = vrf_output_direct(net, sk, skb); + if (likely(err == 1)) + err = vrf_ip_local_out(net, sk, skb); + + return err; } static struct sk_buff *vrf_ip_out_direct(struct net_device *vrf_dev, @@ -940,18 +989,15 @@ static struct sk_buff *vrf_ip_out_direct(struct net_device *vrf_dev, skb->dev = vrf_dev; err = nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk, - skb, NULL, vrf_dev, vrf_output_direct); + skb, NULL, vrf_dev, vrf_ip_out_direct_finish); if (likely(err == 1)) err = vrf_output_direct(net, sk, skb); - /* reset skb device */ if (likely(err == 1)) - nf_reset_ct(skb); - else - skb = NULL; + return skb; - return skb; + return NULL; } static struct sk_buff *vrf_ip_out(struct net_device *vrf_dev, From 9f73bd1c2c4c304b238051fc92b3f807326f0a89 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 11 Nov 2020 05:47:44 +0200 Subject: [PATCH 409/710] devlink: Avoid overwriting port attributes of registered port Cited commit in fixes tag overwrites the port attributes for the registered port. Avoid such error by checking registered flag before setting attributes. Fixes: 71ad8d55f8e5 ("devlink: Replace devlink_port_attrs_set parameters with a struct") Signed-off-by: Parav Pandit Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20201111034744.35554-1-parav@nvidia.com Signed-off-by: Jakub Kicinski --- net/core/devlink.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/core/devlink.c b/net/core/devlink.c index a932d95be798..ab4b1368904f 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -8254,8 +8254,6 @@ static int __devlink_port_attrs_set(struct devlink_port *devlink_port, { struct devlink_port_attrs *attrs = &devlink_port->attrs; - if (WARN_ON(devlink_port->registered)) - return -EEXIST; devlink_port->attrs_set = true; attrs->flavour = flavour; if (attrs->switch_id.id_len) { @@ -8279,6 +8277,8 @@ void devlink_port_attrs_set(struct devlink_port *devlink_port, { int ret; + if (WARN_ON(devlink_port->registered)) + return; devlink_port->attrs = *attrs; ret = __devlink_port_attrs_set(devlink_port, attrs->flavour); if (ret) @@ -8301,6 +8301,8 @@ void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, u32 contro struct devlink_port_attrs *attrs = &devlink_port->attrs; int ret; + if (WARN_ON(devlink_port->registered)) + return; ret = __devlink_port_attrs_set(devlink_port, DEVLINK_PORT_FLAVOUR_PCI_PF); if (ret) @@ -8326,6 +8328,8 @@ void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, u32 contro struct devlink_port_attrs *attrs = &devlink_port->attrs; int ret; + if (WARN_ON(devlink_port->registered)) + return; ret = __devlink_port_attrs_set(devlink_port, DEVLINK_PORT_FLAVOUR_PCI_VF); if (ret) From fd63729cc0a6872bdabd393ee933a969642e4076 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 11 Nov 2020 15:12:15 -0800 Subject: [PATCH 410/710] selftests/bpf: Fix unused attribute usage in subprogs_unused test Correct attribute name is "unused". maybe_unused is a C++17 addition. This patch fixes compilation warning during selftests compilation. Fixes: 197afc631413 ("libbpf: Don't attempt to load unused subprog as an entry-point BPF program") Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20201111231215.1779147-1-andrii@kernel.org --- tools/testing/selftests/bpf/progs/test_subprogs_unused.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/test_subprogs_unused.c b/tools/testing/selftests/bpf/progs/test_subprogs_unused.c index 75d975f8cf90..bc49e050d342 100644 --- a/tools/testing/selftests/bpf/progs/test_subprogs_unused.c +++ b/tools/testing/selftests/bpf/progs/test_subprogs_unused.c @@ -4,12 +4,12 @@ const char LICENSE[] SEC("license") = "GPL"; -__attribute__((maybe_unused)) __noinline int unused1(int x) +__attribute__((unused)) __noinline int unused1(int x) { return x + 1; } -static __attribute__((maybe_unused)) __noinline int unused2(int x) +static __attribute__((unused)) __noinline int unused2(int x) { return x + 2; } From d3039c0615c3f80eaf735e581ed11242c0064299 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Wed, 11 Nov 2020 11:09:55 -0600 Subject: [PATCH 411/710] Revert "gfs2: Ignore journal log writes for jdata holes" This reverts commit b2a846dbef4ef54ef032f0f5ee188c609a0278a7. That commit changed the behavior of function gfs2_block_map to return -ENODATA in cases where a hole (IOMAP_HOLE) is encountered and create is false. While that fixed the intended problem for jdata, it also broke other callers of gfs2_block_map such as some jdata block reads. Before the patch, an encountered hole would be skipped and the buffer seen as unmapped by the caller. The patch changed the behavior to return -ENODATA, which is interpreted as an error by the caller. The -ENODATA return code should be restricted to the specific case where jdata holes are encountered during ail1 writes. That will be done in a later patch. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/bmap.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 8dff9cbd0a87..62d9081d1e26 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -1301,12 +1301,8 @@ int gfs2_block_map(struct inode *inode, sector_t lblock, trace_gfs2_bmap(ip, bh_map, lblock, create, 1); ret = gfs2_iomap_get(inode, pos, length, flags, &iomap, &mp); - if (!ret && iomap.type == IOMAP_HOLE) { - if (create) - ret = gfs2_iomap_alloc(inode, &iomap, &mp); - else - ret = -ENODATA; - } + if (create && !ret && iomap.type == IOMAP_HOLE) + ret = gfs2_iomap_alloc(inode, &iomap, &mp); release_metapath(&mp); if (ret) goto out; From 4e79e3f08e576acd51dffb4520037188703238b3 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Thu, 12 Nov 2020 10:02:48 -0600 Subject: [PATCH 412/710] gfs2: Fix case in which ail writes are done to jdata holes Patch b2a846dbef4e ("gfs2: Ignore journal log writes for jdata holes") tried (unsuccessfully) to fix a case in which writes were done to jdata blocks, the blocks are sent to the ail list, then a punch_hole or truncate operation caused the blocks to be freed. In other words, the ail items are for jdata holes. Before b2a846dbef4e, the jdata hole caused function gfs2_block_map to return -EIO, which was eventually interpreted as an IO error to the journal, and then withdraw. This patch changes function gfs2_get_block_noalloc, which is only used for jdata writes, so it returns -ENODATA rather than -EIO, and when -ENODATA is returned to gfs2_ail1_start_one, the error is ignored. We can safely ignore it because gfs2_ail1_start_one is only called when the jdata pages have already been written and truncated, so the ail1 content no longer applies. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/aops.c | 2 +- fs/gfs2/log.c | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 9cd2ecad07db..cc4f987687f3 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -77,7 +77,7 @@ static int gfs2_get_block_noalloc(struct inode *inode, sector_t lblock, if (error) return error; if (!buffer_mapped(bh_result)) - return -EIO; + return -ENODATA; return 0; } diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 9133b3178677..2e9314091c81 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -132,6 +132,8 @@ __acquires(&sdp->sd_ail_lock) spin_unlock(&sdp->sd_ail_lock); ret = generic_writepages(mapping, wbc); spin_lock(&sdp->sd_ail_lock); + if (ret == -ENODATA) /* if a jdata write into a new hole */ + ret = 0; /* ignore it */ if (ret || wbc->nr_to_write <= 0) break; return -EBUSY; From 4b1a86281cc1d0de46df3ad2cb8c1f86ac07681c Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Wed, 11 Nov 2020 20:45:25 +0000 Subject: [PATCH 413/710] net: udp: fix UDP header access on Fast/frag0 UDP GRO UDP GRO uses udp_hdr(skb) in its .gro_receive() callback. While it's probably OK for non-frag0 paths (when all headers or even the entire frame are already in skb head), this inline points to junk when using Fast GRO (napi_gro_frags() or napi_gro_receive() with only Ethernet header in skb head and all the rest in the frags) and breaks GRO packet compilation and the packet flow itself. To support both modes, skb_gro_header_fast() + skb_gro_header_slow() are typically used. UDP even has an inline helper that makes use of them, udp_gro_udphdr(). Use that instead of troublemaking udp_hdr() to get rid of the out-of-order delivers. Present since the introduction of plain UDP GRO in 5.0-rc1. Fixes: e20cf8d3f1f7 ("udp: implement GRO for plain UDP sockets.") Cc: Eric Dumazet Signed-off-by: Alexander Lobakin Acked-by: Willem de Bruijn Signed-off-by: Jakub Kicinski --- net/ipv4/udp_offload.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index e67a66fbf27b..13740e9fe6ec 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -366,7 +366,7 @@ out: static struct sk_buff *udp_gro_receive_segment(struct list_head *head, struct sk_buff *skb) { - struct udphdr *uh = udp_hdr(skb); + struct udphdr *uh = udp_gro_udphdr(skb); struct sk_buff *pp = NULL; struct udphdr *uh2; struct sk_buff *p; From 55e729889bb07d68ab071660ce3f5e7a7872ebe8 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Wed, 11 Nov 2020 20:45:38 +0000 Subject: [PATCH 414/710] net: udp: fix IP header access and skb lookup on Fast/frag0 UDP GRO udp{4,6}_lib_lookup_skb() use ip{,v6}_hdr() to get IP header of the packet. While it's probably OK for non-frag0 paths, this helpers will also point to junk on Fast/frag0 GRO when all headers are located in frags. As a result, sk/skb lookup may fail or give wrong results. To support both GRO modes, skb_gro_network_header() might be used. To not modify original functions, add private versions of udp{4,6}_lib_lookup_skb() only to perform correct sk lookups on GRO. Present since the introduction of "application-level" UDP GRO in 4.7-rc1. Misc: replace totally unneeded ternaries with plain ifs. Fixes: a6024562ffd7 ("udp: Add GRO functions to UDP socket") Suggested-by: Willem de Bruijn Cc: Eric Dumazet Signed-off-by: Alexander Lobakin Acked-by: Willem de Bruijn Signed-off-by: Jakub Kicinski --- net/ipv4/udp_offload.c | 17 +++++++++++++++-- net/ipv6/udp_offload.c | 17 +++++++++++++++-- 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 13740e9fe6ec..c62805cd3131 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -500,12 +500,22 @@ out: } EXPORT_SYMBOL(udp_gro_receive); +static struct sock *udp4_gro_lookup_skb(struct sk_buff *skb, __be16 sport, + __be16 dport) +{ + const struct iphdr *iph = skb_gro_network_header(skb); + + return __udp4_lib_lookup(dev_net(skb->dev), iph->saddr, sport, + iph->daddr, dport, inet_iif(skb), + inet_sdif(skb), &udp_table, NULL); +} + INDIRECT_CALLABLE_SCOPE struct sk_buff *udp4_gro_receive(struct list_head *head, struct sk_buff *skb) { struct udphdr *uh = udp_gro_udphdr(skb); + struct sock *sk = NULL; struct sk_buff *pp; - struct sock *sk; if (unlikely(!uh)) goto flush; @@ -523,7 +533,10 @@ struct sk_buff *udp4_gro_receive(struct list_head *head, struct sk_buff *skb) skip: NAPI_GRO_CB(skb)->is_ipv6 = 0; rcu_read_lock(); - sk = static_branch_unlikely(&udp_encap_needed_key) ? udp4_lib_lookup_skb(skb, uh->source, uh->dest) : NULL; + + if (static_branch_unlikely(&udp_encap_needed_key)) + sk = udp4_gro_lookup_skb(skb, uh->source, uh->dest); + pp = udp_gro_receive(head, skb, uh, sk); rcu_read_unlock(); return pp; diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 584157a07759..f9e888d1b9af 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -111,12 +111,22 @@ out: return segs; } +static struct sock *udp6_gro_lookup_skb(struct sk_buff *skb, __be16 sport, + __be16 dport) +{ + const struct ipv6hdr *iph = skb_gro_network_header(skb); + + return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport, + &iph->daddr, dport, inet6_iif(skb), + inet6_sdif(skb), &udp_table, NULL); +} + INDIRECT_CALLABLE_SCOPE struct sk_buff *udp6_gro_receive(struct list_head *head, struct sk_buff *skb) { struct udphdr *uh = udp_gro_udphdr(skb); + struct sock *sk = NULL; struct sk_buff *pp; - struct sock *sk; if (unlikely(!uh)) goto flush; @@ -135,7 +145,10 @@ struct sk_buff *udp6_gro_receive(struct list_head *head, struct sk_buff *skb) skip: NAPI_GRO_CB(skb)->is_ipv6 = 1; rcu_read_lock(); - sk = static_branch_unlikely(&udpv6_encap_needed_key) ? udp6_lib_lookup_skb(skb, uh->source, uh->dest) : NULL; + + if (static_branch_unlikely(&udpv6_encap_needed_key)) + sk = udp6_gro_lookup_skb(skb, uh->source, uh->dest); + pp = udp_gro_receive(head, skb, uh, sk); rcu_read_unlock(); return pp; From edbc21113bde13ca3d06eec24b621b1f628583dd Mon Sep 17 00:00:00 2001 From: Sven Van Asbroeck Date: Thu, 12 Nov 2020 10:25:13 -0500 Subject: [PATCH 415/710] lan743x: fix use of uninitialized variable When no devicetree is present, the driver will use an uninitialized variable. Fix by initializing this variable. Fixes: 902a66e08cea ("lan743x: correctly handle chips with internal PHY") Reported-by: kernel test robot Signed-off-by: Sven Van Asbroeck Link: https://lore.kernel.org/r/20201112152513.1941-1-TheSven73@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/lan743x_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index 173158656559..e2c99d909247 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -1014,8 +1014,8 @@ static void lan743x_phy_close(struct lan743x_adapter *adapter) static int lan743x_phy_open(struct lan743x_adapter *adapter) { struct lan743x_phy *phy = &adapter->phy; + struct phy_device *phydev = NULL; struct device_node *phynode; - struct phy_device *phydev; struct net_device *netdev; int ret = -EIO; From 4def49da620c84a682d9361d6bef0a97eed46fe0 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sun, 8 Nov 2020 23:41:00 +0100 Subject: [PATCH 416/710] spi: lpspi: Fix use-after-free on unbind Normally the last reference on an spi_controller is released by spi_unregister_controller(). In the case of the i.MX lpspi driver, the spi_controller is registered with devm_spi_register_controller(), so spi_unregister_controller() is invoked automatically after the driver has unbound. However the driver already releases the last reference in fsl_lpspi_remove() through a gratuitous call to spi_master_put(), causing a use-after-free when spi_unregister_controller() is subsequently invoked by the devres framework. Fix by dropping the superfluous spi_master_put(). Fixes: 944c01a889d9 ("spi: lpspi: enable runtime pm for lpspi") Signed-off-by: Lukas Wunner Cc: # v5.2+ Cc: Han Xu Link: https://lore.kernel.org/r/ab3c0b18bd820501a12c85e440006e09ec0e275f.1604874488.git.lukas@wunner.de Signed-off-by: Mark Brown --- drivers/spi/spi-fsl-lpspi.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/spi/spi-fsl-lpspi.c b/drivers/spi/spi-fsl-lpspi.c index 986b9793fd3c..a2886ee44e4c 100644 --- a/drivers/spi/spi-fsl-lpspi.c +++ b/drivers/spi/spi-fsl-lpspi.c @@ -938,9 +938,6 @@ static int fsl_lpspi_remove(struct platform_device *pdev) spi_controller_get_devdata(controller); pm_runtime_disable(fsl_lpspi->dev); - - spi_master_put(controller); - return 0; } From 9602182810cc15e241f06c63c90b828ef63d0507 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 12 Nov 2020 10:03:40 -0800 Subject: [PATCH 417/710] MAINTAINERS/bpf: Update Andrii's entry. Andrii has been a de-facto maintainer for libbpf and other components. Update maintainers entry to acknowledge his work de-jure. The folks with git write permissions will continue to follow the rule of not applying their own patches unless absolutely trivial. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201112180340.45265-1-alexei.starovoitov@gmail.com --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index cd123d0a6a2d..008ee2bf753b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3243,10 +3243,10 @@ F: drivers/iio/accel/bma400* BPF (Safe dynamic programs and tools) M: Alexei Starovoitov M: Daniel Borkmann +M: Andrii Nakryiko R: Martin KaFai Lau R: Song Liu R: Yonghong Song -R: Andrii Nakryiko R: John Fastabend R: KP Singh L: netdev@vger.kernel.org From e24a87b54ef3e39261f1d859b7f78416349dfb14 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Wed, 4 Nov 2020 17:42:28 +0800 Subject: [PATCH 418/710] perf lock: Correct field name "flags" The tracepoint "lock:lock_acquire" contains field "flags" but not "flag". Current code wrongly retrieves value from field "flag" and it always gets zero for the value, thus "perf lock" doesn't report the correct result. This patch replaces the field name "flag" with "flags", so can read out the correct flags for locking. Fixes: e4cef1f65061 ("perf lock: Fix state machine to recognize lock sequence") Signed-off-by: Leo Yan Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20201104094229.17509-1-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-lock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index f0a1dbacb46c..5cecc1ad78e1 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -406,7 +406,7 @@ static int report_lock_acquire_event(struct evsel *evsel, struct lock_seq_stat *seq; const char *name = evsel__strval(evsel, sample, "name"); u64 tmp = evsel__intval(evsel, sample, "lockdep_addr"); - int flag = evsel__intval(evsel, sample, "flag"); + int flag = evsel__intval(evsel, sample, "flags"); memcpy(&addr, &tmp, sizeof(void *)); From b0e5a05cc9e37763c7f19366d94b1a6160c755bc Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Wed, 4 Nov 2020 17:42:29 +0800 Subject: [PATCH 419/710] perf lock: Don't free "lock_seq_stat" if read_count isn't zero When execute command "perf lock report", it hits failure and outputs log as follows: perf: builtin-lock.c:623: report_lock_release_event: Assertion `!(seq->read_count < 0)' failed. Aborted This is an imbalance issue. The locking sequence structure "lock_seq_stat" contains the reader counter and it is used to check if the locking sequence is balance or not between acquiring and releasing. If the tool wrongly frees "lock_seq_stat" when "read_count" isn't zero, the "read_count" will be reset to zero when allocate a new structure at the next time; thus it causes the wrong counting for reader and finally results in imbalance issue. To fix this issue, if detects "read_count" is not zero (means still have read user in the locking sequence), goto the "end" tag to skip freeing structure "lock_seq_stat". Fixes: e4cef1f65061 ("perf lock: Fix state machine to recognize lock sequence") Signed-off-by: Leo Yan Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20201104094229.17509-2-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-lock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 5cecc1ad78e1..a2f1e53f37a7 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -621,7 +621,7 @@ static int report_lock_release_event(struct evsel *evsel, case SEQ_STATE_READ_ACQUIRED: seq->read_count--; BUG_ON(seq->read_count < 0); - if (!seq->read_count) { + if (seq->read_count) { ls->nr_release++; goto end; } From db1a8b97a0a36155171dbb805fbcb276e07559f6 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 9 Nov 2020 13:59:15 -0300 Subject: [PATCH 420/710] tools arch: Update arch/x86/lib/mem{cpy,set}_64.S copies used in 'perf bench mem memcpy' To bring in the change made in this cset: 4d6ffa27b8e5116c ("x86/lib: Change .weak to SYM_FUNC_START_WEAK for arch/x86/lib/mem*_64.S") 6dcc5627f6aec4cb ("x86/asm: Change all ENTRY+ENDPROC to SYM_FUNC_*") I needed to define SYM_FUNC_START_LOCAL() as SYM_L_GLOBAL as mem{cpy,set}_{orig,erms} are used by 'perf bench'. This silences these perf tools build warnings: Warning: Kernel ABI header at 'tools/arch/x86/lib/memcpy_64.S' differs from latest version at 'arch/x86/lib/memcpy_64.S' diff -u tools/arch/x86/lib/memcpy_64.S arch/x86/lib/memcpy_64.S Warning: Kernel ABI header at 'tools/arch/x86/lib/memset_64.S' differs from latest version at 'arch/x86/lib/memset_64.S' diff -u tools/arch/x86/lib/memset_64.S arch/x86/lib/memset_64.S Cc: Adrian Hunter Cc: Borislav Petkov Cc: Fangrui Song Cc: Ian Rogers Cc: Jiri Olsa Cc: Jiri Slaby Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/lib/memcpy_64.S | 8 +++----- tools/arch/x86/lib/memset_64.S | 11 ++++++----- tools/perf/bench/mem-memcpy-x86-64-asm.S | 3 +++ tools/perf/bench/mem-memset-x86-64-asm.S | 3 +++ tools/perf/util/include/linux/linkage.h | 7 +++++++ 5 files changed, 22 insertions(+), 10 deletions(-) diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S index 0b5b8ae56bd9..1e299ac73c86 100644 --- a/tools/arch/x86/lib/memcpy_64.S +++ b/tools/arch/x86/lib/memcpy_64.S @@ -16,8 +16,6 @@ * to a jmp to memcpy_erms which does the REP; MOVSB mem copy. */ -.weak memcpy - /* * memcpy - Copy a memory block. * @@ -30,7 +28,7 @@ * rax original destination */ SYM_FUNC_START_ALIAS(__memcpy) -SYM_FUNC_START_LOCAL(memcpy) +SYM_FUNC_START_WEAK(memcpy) ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \ "jmp memcpy_erms", X86_FEATURE_ERMS @@ -51,14 +49,14 @@ EXPORT_SYMBOL(__memcpy) * memcpy_erms() - enhanced fast string memcpy. This is faster and * simpler than memcpy. Use memcpy_erms when possible. */ -SYM_FUNC_START(memcpy_erms) +SYM_FUNC_START_LOCAL(memcpy_erms) movq %rdi, %rax movq %rdx, %rcx rep movsb ret SYM_FUNC_END(memcpy_erms) -SYM_FUNC_START(memcpy_orig) +SYM_FUNC_START_LOCAL(memcpy_orig) movq %rdi, %rax cmpq $0x20, %rdx diff --git a/tools/arch/x86/lib/memset_64.S b/tools/arch/x86/lib/memset_64.S index fd5d25a474b7..0bfd26e4ca9e 100644 --- a/tools/arch/x86/lib/memset_64.S +++ b/tools/arch/x86/lib/memset_64.S @@ -4,8 +4,7 @@ #include #include #include - -.weak memset +#include /* * ISO C memset - set a memory block to a byte value. This function uses fast @@ -18,7 +17,7 @@ * * rax original destination */ -SYM_FUNC_START_ALIAS(memset) +SYM_FUNC_START_WEAK(memset) SYM_FUNC_START(__memset) /* * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended @@ -44,6 +43,8 @@ SYM_FUNC_START(__memset) ret SYM_FUNC_END(__memset) SYM_FUNC_END_ALIAS(memset) +EXPORT_SYMBOL(memset) +EXPORT_SYMBOL(__memset) /* * ISO C memset - set a memory block to a byte value. This function uses @@ -56,7 +57,7 @@ SYM_FUNC_END_ALIAS(memset) * * rax original destination */ -SYM_FUNC_START(memset_erms) +SYM_FUNC_START_LOCAL(memset_erms) movq %rdi,%r9 movb %sil,%al movq %rdx,%rcx @@ -65,7 +66,7 @@ SYM_FUNC_START(memset_erms) ret SYM_FUNC_END(memset_erms) -SYM_FUNC_START(memset_orig) +SYM_FUNC_START_LOCAL(memset_orig) movq %rdi,%r10 /* expand byte value */ diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm.S b/tools/perf/bench/mem-memcpy-x86-64-asm.S index 9ad015a1e202..6eb45a2aa8db 100644 --- a/tools/perf/bench/mem-memcpy-x86-64-asm.S +++ b/tools/perf/bench/mem-memcpy-x86-64-asm.S @@ -2,6 +2,9 @@ /* Various wrappers to make the kernel .S file build in user-space: */ +// memcpy_orig and memcpy_erms are being defined as SYM_L_LOCAL but we need it +#define SYM_FUNC_START_LOCAL(name) \ + SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) #define memcpy MEMCPY /* don't hide glibc's memcpy() */ #define altinstr_replacement text #define globl p2align 4; .globl diff --git a/tools/perf/bench/mem-memset-x86-64-asm.S b/tools/perf/bench/mem-memset-x86-64-asm.S index d550bd526162..6f093c483842 100644 --- a/tools/perf/bench/mem-memset-x86-64-asm.S +++ b/tools/perf/bench/mem-memset-x86-64-asm.S @@ -1,4 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ +// memset_orig and memset_erms are being defined as SYM_L_LOCAL but we need it +#define SYM_FUNC_START_LOCAL(name) \ + SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) #define memset MEMSET /* don't hide glibc's memset() */ #define altinstr_replacement text #define globl p2align 4; .globl diff --git a/tools/perf/util/include/linux/linkage.h b/tools/perf/util/include/linux/linkage.h index b8a5159361b4..5acf053fca7d 100644 --- a/tools/perf/util/include/linux/linkage.h +++ b/tools/perf/util/include/linux/linkage.h @@ -25,6 +25,7 @@ /* SYM_L_* -- linkage of symbols */ #define SYM_L_GLOBAL(name) .globl name +#define SYM_L_WEAK(name) .weak name #define SYM_L_LOCAL(name) /* nothing */ #define ALIGN __ALIGN @@ -84,6 +85,12 @@ SYM_END(name, SYM_T_FUNC) #endif +/* SYM_FUNC_START_WEAK -- use for weak functions */ +#ifndef SYM_FUNC_START_WEAK +#define SYM_FUNC_START_WEAK(name) \ + SYM_START(name, SYM_L_WEAK, SYM_A_ALIGN) +#endif + /* * SYM_FUNC_END -- the end of SYM_FUNC_START_LOCAL, SYM_FUNC_START, * SYM_FUNC_START_WEAK, ... From db2ac2e49e564c2b219c4b33d9903aa383334256 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Tue, 10 Nov 2020 14:34:16 +0800 Subject: [PATCH 421/710] perf test: Fix a typo in cs-etm testing Fix a typo: s/devce_name/device_name. Fixes: fe0aed19b266 ("perf test: Introduce script for Arm CoreSight testing") Signed-off-by: Leo Yan Reviewed-by: Mathieu Poirier Cc: Alexander Shishkin Cc: Jin Yao Cc: Jiri Olsa Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki Poulouse Cc: coresight ml Cc: stable@kernel.org Link: http://lore.kernel.org/lkml/20201110063417.14467-1-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/test_arm_coresight.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/shell/test_arm_coresight.sh b/tools/perf/tests/shell/test_arm_coresight.sh index 8d84fdbed6a6..59d847d4981d 100755 --- a/tools/perf/tests/shell/test_arm_coresight.sh +++ b/tools/perf/tests/shell/test_arm_coresight.sh @@ -105,7 +105,7 @@ arm_cs_iterate_devices() { # `> device_name = 'tmc_etf0' device_name=$(basename $path) - if is_device_sink $path $devce_name; then + if is_device_sink $path $device_name; then record_touch_file $device_name $2 && perf_script_branch_samples touch && From dd94ac807a5e10e0b25b68397c473276905cca73 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Tue, 10 Nov 2020 14:34:17 +0800 Subject: [PATCH 422/710] perf test: Update branch sample pattern for cs-etm Since the commit 943b69ac1884 ("perf parse-events: Set exclude_guest=1 for user-space counting"), 'exclude_guest=1' is set for user-space counting; and the branch sample's modifier has been altered, the sample event name has been changed from "branches:u:" to "branches:uH:", which gives out info for "user-space and host counting". But the cs-etm testing's regular expression cannot match the updated branch sample event and leads to test failure. This patch updates the branch sample pattern by using a more flexible expression '.*' to match branch sample's modifiers, so that allows the testing to work as expected. Fixes: 943b69ac1884 ("perf parse-events: Set exclude_guest=1 for user-space counting") Signed-off-by: Leo Yan Reviewed-by: Mathieu Poirier Cc: Alexander Shishkin Cc: Jin Yao Cc: Jiri Olsa Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki Poulouse Cc: coresight ml Cc: stable@kernel.org Link: http://lore.kernel.org/lkml/20201110063417.14467-2-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/test_arm_coresight.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/shell/test_arm_coresight.sh b/tools/perf/tests/shell/test_arm_coresight.sh index 59d847d4981d..18fde2f179cd 100755 --- a/tools/perf/tests/shell/test_arm_coresight.sh +++ b/tools/perf/tests/shell/test_arm_coresight.sh @@ -44,7 +44,7 @@ perf_script_branch_samples() { # touch 6512 1 branches:u: ffffb22082e0 strcmp+0xa0 (/lib/aarch64-linux-gnu/ld-2.27.so) # touch 6512 1 branches:u: ffffb2208320 strcmp+0xe0 (/lib/aarch64-linux-gnu/ld-2.27.so) perf script -F,-time -i ${perfdata} | \ - egrep " +$1 +[0-9]+ .* +branches:([u|k]:)? +" + egrep " +$1 +[0-9]+ .* +branches:(.*:)? +" } perf_report_branch_samples() { From c3213d260a23e263ef85ba21ac68c9e7578020b5 Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Thu, 12 Nov 2020 15:17:32 -0500 Subject: [PATCH 423/710] SUNRPC: Fix oops in the rpc_xdr_buf event class Backchannel rpc tasks don't have task->tk_client set, so it's necessary to check it for NULL before dereferencing. Fixes: c509f15a5801 ("SUNRPC: Split the xdr_buf event class") Signed-off-by: Scott Mayhew Reviewed-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/trace/events/sunrpc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 2477014e3fa6..2a03263b5f9d 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -68,7 +68,8 @@ DECLARE_EVENT_CLASS(rpc_xdr_buf_class, TP_fast_assign( __entry->task_id = task->tk_pid; - __entry->client_id = task->tk_client->cl_clid; + __entry->client_id = task->tk_client ? + task->tk_client->cl_clid : -1; __entry->head_base = xdr->head[0].iov_base; __entry->head_len = xdr->head[0].iov_len; __entry->tail_base = xdr->tail[0].iov_base; From 7e890c37c25c7cbca37ff0ab292873d8146e713b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 12 Nov 2020 17:50:04 +0100 Subject: [PATCH 424/710] block: add a return value to set_capacity_revalidate_and_notify Return if the function ended up sending an uevent or not. Cc: stable@vger.kernel.org # v5.9 Signed-off-by: Christoph Hellwig Reviewed-by: Petr Vorel Signed-off-by: Jens Axboe --- block/genhd.c | 5 ++++- include/linux/genhd.h | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/block/genhd.c b/block/genhd.c index 0a273211fec2..9387f050c248 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -49,7 +49,7 @@ static void disk_release_events(struct gendisk *disk); * Set disk capacity and notify if the size is not currently * zero and will not be set to zero */ -void set_capacity_revalidate_and_notify(struct gendisk *disk, sector_t size, +bool set_capacity_revalidate_and_notify(struct gendisk *disk, sector_t size, bool update_bdev) { sector_t capacity = get_capacity(disk); @@ -62,7 +62,10 @@ void set_capacity_revalidate_and_notify(struct gendisk *disk, sector_t size, char *envp[] = { "RESIZE=1", NULL }; kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp); + return true; } + + return false; } EXPORT_SYMBOL_GPL(set_capacity_revalidate_and_notify); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 38f23d757013..03da3f603d30 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -315,7 +315,7 @@ static inline int get_disk_ro(struct gendisk *disk) extern void disk_block_events(struct gendisk *disk); extern void disk_unblock_events(struct gendisk *disk); extern void disk_flush_events(struct gendisk *disk, unsigned int mask); -void set_capacity_revalidate_and_notify(struct gendisk *disk, sector_t size, +bool set_capacity_revalidate_and_notify(struct gendisk *disk, sector_t size, bool update_bdev); /* drivers/char/random.c */ From c01a21b77722db0474bbcc4eafc8c4e0d8fed6d8 Mon Sep 17 00:00:00 2001 From: Petr Vorel Date: Thu, 12 Nov 2020 17:50:05 +0100 Subject: [PATCH 425/710] loop: Fix occasional uevent drop Commit 716ad0986cbd ("loop: Switch to set_capacity_revalidate_and_notify") causes an occasional drop of loop device uevent, which are no longer triggered in loop_set_size() but in a different part of code. Bug is reproducible with LTP test uevent01 [1]: i=0; while true; do i=$((i+1)); echo "== $i ==" lsmod |grep -q loop && rmmod -f loop ./uevent01 || break done Put back triggering through code called in loop_set_size(). Fix required to add yet another parameter to set_capacity_revalidate_and_notify(). [1] https://github.com/linux-test-project/ltp/blob/master/testcases/kernel/uevents/uevent01.c [hch: rebased on a different change to the prototype of set_capacity_revalidate_and_notify] Cc: stable@vger.kernel.org # v5.9 Fixes: 716ad0986cbd ("loop: Switch to set_capacity_revalidate_and_notify") Reported-by: Signed-off-by: Petr Vorel Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/block/loop.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index cb1191d6e945..a58084c2ed7c 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -255,7 +255,8 @@ static void loop_set_size(struct loop_device *lo, loff_t size) bd_set_nr_sectors(bdev, size); - set_capacity_revalidate_and_notify(lo->lo_disk, size, false); + if (!set_capacity_revalidate_and_notify(lo->lo_disk, size, false)) + kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); } static inline int From bc551d776b691022f49b5bb5379bd58f7c4eb76a Mon Sep 17 00:00:00 2001 From: Jonathan Liu Date: Sat, 31 Oct 2020 19:17:47 +1100 Subject: [PATCH 426/710] drm: bridge: dw-hdmi: Avoid resetting force in the detect function It has been observed that resetting force in the detect function can result in the PHY being powered down in response to hot-plug detect being asserted, even when the HDMI connector is forced on. Enabling debug messages and adding a call to dump_stack() in dw_hdmi_phy_power_off() shows the following in dmesg: [ 160.637413] dwhdmi-rockchip ff940000.hdmi: EVENT=plugin [ 160.637433] dwhdmi-rockchip ff940000.hdmi: PHY powered down in 0 iterations Call trace: dw_hdmi_phy_power_off dw_hdmi_phy_disable dw_hdmi_update_power dw_hdmi_detect dw_hdmi_connector_detect drm_helper_probe_detect_ctx drm_helper_hpd_irq_event dw_hdmi_irq irq_thread_fn irq_thread kthread ret_from_fork Fixes: 381f05a7a842 ("drm: bridge/dw_hdmi: add connector mode forcing") Signed-off-by: Jonathan Liu Reviewed-by: Sam Ravnborg Signed-off-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/20201031081747.372599-1-net147@gmail.com --- drivers/gpu/drm/bridge/synopsys/dw-hdmi.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c index 748df1cacd2b..0c79a9ba48bb 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c @@ -2327,12 +2327,6 @@ static enum drm_connector_status dw_hdmi_detect(struct dw_hdmi *hdmi) { enum drm_connector_status result; - mutex_lock(&hdmi->mutex); - hdmi->force = DRM_FORCE_UNSPECIFIED; - dw_hdmi_update_power(hdmi); - dw_hdmi_update_phy_mask(hdmi); - mutex_unlock(&hdmi->mutex); - result = hdmi->phy.ops->read_hpd(hdmi, hdmi->phy.data); mutex_lock(&hdmi->mutex); From 23711a5e662c1a66e14cb9288e7dfd2b840efcd5 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 10 Nov 2020 14:13:06 +0000 Subject: [PATCH 427/710] KVM: arm64: Allow setting of ID_AA64PFR0_EL1.CSV2 from userspace We now expose ID_AA64PFR0_EL1.CSV2=1 to guests running on hosts that are immune to Spectre-v2, but that don't have this field set, most likely because they predate the specification. However, this prevents the migration of guests that have started on a host the doesn't fake this CSV2 setting to one that does, as KVM rejects the write to ID_AA64PFR0_EL2 on the grounds that it isn't what is already there. In order to fix this, allow userspace to set this field as long as this doesn't result in a promising more than what is already there (setting CSV2 to 0 is acceptable, but setting it to 1 when it is already set to 0 isn't). Fixes: e1026237f9067 ("KVM: arm64: Set CSV2 for guests on hardware unaffected by Spectre-v2") Reported-by: Peng Liang Signed-off-by: Marc Zyngier Acked-by: Will Deacon Link: https://lore.kernel.org/r/20201110141308.451654-2-maz@kernel.org --- arch/arm64/include/asm/kvm_host.h | 2 ++ arch/arm64/kvm/arm.c | 16 ++++++++++++ arch/arm64/kvm/sys_regs.c | 42 ++++++++++++++++++++++++++++--- 3 files changed, 56 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 781d029b8aa8..0cd9f0f75c13 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -118,6 +118,8 @@ struct kvm_arch { */ unsigned long *pmu_filter; unsigned int pmuver; + + u8 pfr0_csv2; }; struct kvm_vcpu_fault_info { diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index a3b32df1afb0..a6e25a4b4dc5 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -102,6 +102,20 @@ static int kvm_arm_default_max_vcpus(void) return vgic_present ? kvm_vgic_get_max_vcpus() : KVM_MAX_VCPUS; } +static void set_default_csv2(struct kvm *kvm) +{ + /* + * The default is to expose CSV2 == 1 if the HW isn't affected. + * Although this is a per-CPU feature, we make it global because + * asymmetric systems are just a nuisance. + * + * Userspace can override this as long as it doesn't promise + * the impossible. + */ + if (arm64_get_spectre_v2_state() == SPECTRE_UNAFFECTED) + kvm->arch.pfr0_csv2 = 1; +} + /** * kvm_arch_init_vm - initializes a VM data structure * @kvm: pointer to the KVM struct @@ -127,6 +141,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) /* The maximum number of VCPUs is limited by the host's GIC model */ kvm->arch.max_vcpus = kvm_arm_default_max_vcpus(); + set_default_csv2(kvm); + return ret; out_free_stage2_pgd: kvm_free_stage2_pgd(&kvm->arch.mmu); diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 6d287eefd9f1..0aa86250e354 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1128,9 +1128,8 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, if (!vcpu_has_sve(vcpu)) val &= ~(0xfUL << ID_AA64PFR0_SVE_SHIFT); val &= ~(0xfUL << ID_AA64PFR0_AMU_SHIFT); - if (!(val & (0xfUL << ID_AA64PFR0_CSV2_SHIFT)) && - arm64_get_spectre_v2_state() == SPECTRE_UNAFFECTED) - val |= (1UL << ID_AA64PFR0_CSV2_SHIFT); + val &= ~(0xfUL << ID_AA64PFR0_CSV2_SHIFT); + val |= ((u64)vcpu->kvm->arch.pfr0_csv2 << ID_AA64PFR0_CSV2_SHIFT); } else if (id == SYS_ID_AA64PFR1_EL1) { val &= ~(0xfUL << ID_AA64PFR1_MTE_SHIFT); } else if (id == SYS_ID_AA64ISAR1_EL1 && !vcpu_has_ptrauth(vcpu)) { @@ -1213,6 +1212,40 @@ static unsigned int sve_visibility(const struct kvm_vcpu *vcpu, return REG_HIDDEN; } +static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd, + const struct kvm_one_reg *reg, void __user *uaddr) +{ + const u64 id = sys_reg_to_index(rd); + int err; + u64 val; + u8 csv2; + + err = reg_from_user(&val, uaddr, id); + if (err) + return err; + + /* + * Allow AA64PFR0_EL1.CSV2 to be set from userspace as long as + * it doesn't promise more than what is actually provided (the + * guest could otherwise be covered in ectoplasmic residue). + */ + csv2 = cpuid_feature_extract_unsigned_field(val, ID_AA64PFR0_CSV2_SHIFT); + if (csv2 > 1 || + (csv2 && arm64_get_spectre_v2_state() != SPECTRE_UNAFFECTED)) + return -EINVAL; + + /* We can only differ with CSV2, and anything else is an error */ + val ^= read_id_reg(vcpu, rd, false); + val &= ~(0xFUL << ID_AA64PFR0_CSV2_SHIFT); + if (val) + return -EINVAL; + + vcpu->kvm->arch.pfr0_csv2 = csv2; + + return 0; +} + /* * cpufeature ID register user accessors * @@ -1472,7 +1505,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { /* AArch64 ID registers */ /* CRm=4 */ - ID_SANITISED(ID_AA64PFR0_EL1), + { SYS_DESC(SYS_ID_AA64PFR0_EL1), .access = access_id_reg, + .get_user = get_id_reg, .set_user = set_id_aa64pfr0_el1, }, ID_SANITISED(ID_AA64PFR1_EL1), ID_UNALLOCATED(4,2), ID_UNALLOCATED(4,3), From 338b17933a6077bb5406b33d8b9fb9616fffc1af Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 10 Nov 2020 14:13:07 +0000 Subject: [PATCH 428/710] KVM: arm64: Unify trap handlers injecting an UNDEF A large number of system register trap handlers only inject an UNDEF exeption, and yet each class of sysreg seems to provide its own, identical function. Let's unify them all, saving us introducing yet another one later. Signed-off-by: Marc Zyngier Acked-by: Will Deacon Link: https://lore.kernel.org/r/20201110141308.451654-3-maz@kernel.org --- arch/arm64/kvm/sys_regs.c | 65 +++++++++++++++------------------------ 1 file changed, 25 insertions(+), 40 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 0aa86250e354..b0022f37c8f1 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1038,8 +1038,8 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, { SYS_DESC(SYS_PMEVTYPERn_EL0(n)), \ access_pmu_evtyper, reset_unknown, (PMEVTYPER0_EL0 + n), } -static bool access_amu(struct kvm_vcpu *vcpu, struct sys_reg_params *p, - const struct sys_reg_desc *r) +static bool undef_access(struct kvm_vcpu *vcpu, struct sys_reg_params *p, + const struct sys_reg_desc *r) { kvm_inject_undefined(vcpu); @@ -1047,24 +1047,10 @@ static bool access_amu(struct kvm_vcpu *vcpu, struct sys_reg_params *p, } /* Macro to expand the AMU counter and type registers*/ -#define AMU_AMEVCNTR0_EL0(n) { SYS_DESC(SYS_AMEVCNTR0_EL0(n)), access_amu } -#define AMU_AMEVTYPER0_EL0(n) { SYS_DESC(SYS_AMEVTYPER0_EL0(n)), access_amu } -#define AMU_AMEVCNTR1_EL0(n) { SYS_DESC(SYS_AMEVCNTR1_EL0(n)), access_amu } -#define AMU_AMEVTYPER1_EL0(n) { SYS_DESC(SYS_AMEVTYPER1_EL0(n)), access_amu } - -static bool trap_ptrauth(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - const struct sys_reg_desc *rd) -{ - /* - * If we land here, that is because we didn't fixup the access on exit - * by allowing the PtrAuth sysregs. The only way this happens is when - * the guest does not have PtrAuth support enabled. - */ - kvm_inject_undefined(vcpu); - - return false; -} +#define AMU_AMEVCNTR0_EL0(n) { SYS_DESC(SYS_AMEVCNTR0_EL0(n)), undef_access } +#define AMU_AMEVTYPER0_EL0(n) { SYS_DESC(SYS_AMEVTYPER0_EL0(n)), undef_access } +#define AMU_AMEVCNTR1_EL0(n) { SYS_DESC(SYS_AMEVCNTR1_EL0(n)), undef_access } +#define AMU_AMEVTYPER1_EL0(n) { SYS_DESC(SYS_AMEVTYPER1_EL0(n)), undef_access } static unsigned int ptrauth_visibility(const struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd) @@ -1072,8 +1058,14 @@ static unsigned int ptrauth_visibility(const struct kvm_vcpu *vcpu, return vcpu_has_ptrauth(vcpu) ? 0 : REG_HIDDEN; } +/* + * If we land here on a PtrAuth access, that is because we didn't + * fixup the access on exit by allowing the PtrAuth sysregs. The only + * way this happens is when the guest does not have PtrAuth support + * enabled. + */ #define __PTRAUTH_KEY(k) \ - { SYS_DESC(SYS_## k), trap_ptrauth, reset_unknown, k, \ + { SYS_DESC(SYS_## k), undef_access, reset_unknown, k, \ .visibility = ptrauth_visibility} #define PTRAUTH_KEY(k) \ @@ -1374,13 +1366,6 @@ static bool access_ccsidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, return true; } -static bool access_mte_regs(struct kvm_vcpu *vcpu, struct sys_reg_params *p, - const struct sys_reg_desc *r) -{ - kvm_inject_undefined(vcpu); - return false; -} - /* sys_reg_desc initialiser for known cpufeature ID registers */ #define ID_SANITISED(name) { \ SYS_DESC(SYS_##name), \ @@ -1549,8 +1534,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_ACTLR_EL1), access_actlr, reset_actlr, ACTLR_EL1 }, { SYS_DESC(SYS_CPACR_EL1), NULL, reset_val, CPACR_EL1, 0 }, - { SYS_DESC(SYS_RGSR_EL1), access_mte_regs }, - { SYS_DESC(SYS_GCR_EL1), access_mte_regs }, + { SYS_DESC(SYS_RGSR_EL1), undef_access }, + { SYS_DESC(SYS_GCR_EL1), undef_access }, { SYS_DESC(SYS_ZCR_EL1), NULL, reset_val, ZCR_EL1, 0, .visibility = sve_visibility }, { SYS_DESC(SYS_TTBR0_EL1), access_vm_reg, reset_unknown, TTBR0_EL1 }, @@ -1576,8 +1561,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_ERXMISC0_EL1), trap_raz_wi }, { SYS_DESC(SYS_ERXMISC1_EL1), trap_raz_wi }, - { SYS_DESC(SYS_TFSR_EL1), access_mte_regs }, - { SYS_DESC(SYS_TFSRE0_EL1), access_mte_regs }, + { SYS_DESC(SYS_TFSR_EL1), undef_access }, + { SYS_DESC(SYS_TFSRE0_EL1), undef_access }, { SYS_DESC(SYS_FAR_EL1), access_vm_reg, reset_unknown, FAR_EL1 }, { SYS_DESC(SYS_PAR_EL1), NULL, reset_unknown, PAR_EL1 }, @@ -1641,14 +1626,14 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_TPIDR_EL0), NULL, reset_unknown, TPIDR_EL0 }, { SYS_DESC(SYS_TPIDRRO_EL0), NULL, reset_unknown, TPIDRRO_EL0 }, - { SYS_DESC(SYS_AMCR_EL0), access_amu }, - { SYS_DESC(SYS_AMCFGR_EL0), access_amu }, - { SYS_DESC(SYS_AMCGCR_EL0), access_amu }, - { SYS_DESC(SYS_AMUSERENR_EL0), access_amu }, - { SYS_DESC(SYS_AMCNTENCLR0_EL0), access_amu }, - { SYS_DESC(SYS_AMCNTENSET0_EL0), access_amu }, - { SYS_DESC(SYS_AMCNTENCLR1_EL0), access_amu }, - { SYS_DESC(SYS_AMCNTENSET1_EL0), access_amu }, + { SYS_DESC(SYS_AMCR_EL0), undef_access }, + { SYS_DESC(SYS_AMCFGR_EL0), undef_access }, + { SYS_DESC(SYS_AMCGCR_EL0), undef_access }, + { SYS_DESC(SYS_AMUSERENR_EL0), undef_access }, + { SYS_DESC(SYS_AMCNTENCLR0_EL0), undef_access }, + { SYS_DESC(SYS_AMCNTENSET0_EL0), undef_access }, + { SYS_DESC(SYS_AMCNTENCLR1_EL0), undef_access }, + { SYS_DESC(SYS_AMCNTENSET1_EL0), undef_access }, AMU_AMEVCNTR0_EL0(0), AMU_AMEVCNTR0_EL0(1), AMU_AMEVCNTR0_EL0(2), From ed4ffaf49bf9ce1002b516d8c6aa04937b7950bc Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 10 Nov 2020 14:13:08 +0000 Subject: [PATCH 429/710] KVM: arm64: Handle SCXTNUM_ELx traps As the kernel never sets HCR_EL2.EnSCXT, accesses to SCXTNUM_ELx will trap to EL2. Let's handle that as gracefully as possible by injecting an UNDEF exception into the guest. This is consistent with the guest's view of ID_AA64PFR0_EL1.CSV2 being at most 1. Signed-off-by: Marc Zyngier Acked-by: Will Deacon Link: https://lore.kernel.org/r/20201110141308.451654-4-maz@kernel.org --- arch/arm64/include/asm/sysreg.h | 4 ++++ arch/arm64/kvm/sys_regs.c | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index d52c1b3ce589..a427a5653369 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -372,6 +372,8 @@ #define SYS_CONTEXTIDR_EL1 sys_reg(3, 0, 13, 0, 1) #define SYS_TPIDR_EL1 sys_reg(3, 0, 13, 0, 4) +#define SYS_SCXTNUM_EL1 sys_reg(3, 0, 13, 0, 7) + #define SYS_CNTKCTL_EL1 sys_reg(3, 0, 14, 1, 0) #define SYS_CCSIDR_EL1 sys_reg(3, 1, 0, 0, 0) @@ -404,6 +406,8 @@ #define SYS_TPIDR_EL0 sys_reg(3, 3, 13, 0, 2) #define SYS_TPIDRRO_EL0 sys_reg(3, 3, 13, 0, 3) +#define SYS_SCXTNUM_EL0 sys_reg(3, 3, 13, 0, 7) + /* Definitions for system register interface to AMU for ARMv8.4 onwards */ #define SYS_AM_EL0(crm, op2) sys_reg(3, 3, 13, (crm), (op2)) #define SYS_AMCR_EL0 SYS_AM_EL0(2, 0) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index b0022f37c8f1..c1726fb7f3d9 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1598,6 +1598,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_CONTEXTIDR_EL1), access_vm_reg, reset_val, CONTEXTIDR_EL1, 0 }, { SYS_DESC(SYS_TPIDR_EL1), NULL, reset_unknown, TPIDR_EL1 }, + { SYS_DESC(SYS_SCXTNUM_EL1), undef_access }, + { SYS_DESC(SYS_CNTKCTL_EL1), NULL, reset_val, CNTKCTL_EL1, 0}, { SYS_DESC(SYS_CCSIDR_EL1), access_ccsidr }, @@ -1626,6 +1628,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_TPIDR_EL0), NULL, reset_unknown, TPIDR_EL0 }, { SYS_DESC(SYS_TPIDRRO_EL0), NULL, reset_unknown, TPIDRRO_EL0 }, + { SYS_DESC(SYS_SCXTNUM_EL0), undef_access }, + { SYS_DESC(SYS_AMCR_EL0), undef_access }, { SYS_DESC(SYS_AMCFGR_EL0), undef_access }, { SYS_DESC(SYS_AMCGCR_EL0), undef_access }, From 77c7e1bc060deab6430f1dff5922ccd3093d9776 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Tue, 10 Nov 2020 19:04:18 -0600 Subject: [PATCH 430/710] x86/platform/uv: Fix copied UV5 output archtype A test shows that the output contains a space: # cat /proc/sgi_uv/archtype NSGI4 U/UVX Remove that embedded space by copying the "trimmed" buffer instead of the untrimmed input character list. Use sizeof to remove size dependency on copy out length. Increase output buffer size by one character just in case BIOS sends an 8 character string for archtype. Fixes: 1e61f5a95f19 ("Add and decode Arch Type in UVsystab") Signed-off-by: Mike Travis Signed-off-by: Thomas Gleixner Reviewed-by: Steve Wahl Link: https://lore.kernel.org/r/20201111010418.82133-1-mike.travis@hpe.com --- arch/x86/kernel/apic/x2apic_uv_x.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 3115caa7d7d0..1b98f8c12b96 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -33,7 +33,7 @@ static union uvh_apicid uvh_apicid; static int uv_node_id; /* Unpack AT/OEM/TABLE ID's to be NULL terminated strings */ -static u8 uv_archtype[UV_AT_SIZE]; +static u8 uv_archtype[UV_AT_SIZE + 1]; static u8 oem_id[ACPI_OEM_ID_SIZE + 1]; static u8 oem_table_id[ACPI_OEM_TABLE_ID_SIZE + 1]; @@ -320,7 +320,7 @@ static int __init decode_arch_type(unsigned long ptr) if (n > 0 && n < sizeof(uv_ate->archtype)) { pr_info("UV: UVarchtype received from BIOS\n"); - uv_stringify(UV_AT_SIZE, uv_archtype, uv_ate->archtype); + uv_stringify(sizeof(uv_archtype), uv_archtype, uv_ate->archtype); return 1; } return 0; @@ -378,7 +378,7 @@ static int __init uv_set_system_type(char *_oem_id, char *_oem_table_id) if (!early_get_arch_type()) /* If not use OEM ID for UVarchtype */ - uv_stringify(UV_AT_SIZE, uv_archtype, _oem_id); + uv_stringify(sizeof(uv_archtype), uv_archtype, oem_id); /* Check if not hubbed */ if (strncmp(uv_archtype, "SGI", 3) != 0) { From 0a1db6f0841288274f0d1e3a8fa8a3a787e05633 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 5 Nov 2020 15:49:33 +0000 Subject: [PATCH 431/710] drm/i915/gem: Allow backends to override pread implementation As there are more and more complicated interactions between the different backing stores and userspace, push the control into the backends rather than accumulate them all inside the ioctl handlers. Signed-off-by: Matthew Auld Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20201105154934.16022-1-chris@chris-wilson.co.uk (cherry picked from commit 0049b688459b846f819b6e51c24cd0781fcfde41) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gem/i915_gem_object_types.h | 2 ++ drivers/gpu/drm/i915/i915_gem.c | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index b5c15557cc87..d6711caa7f39 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -56,6 +56,8 @@ struct drm_i915_gem_object_ops { void (*truncate)(struct drm_i915_gem_object *obj); void (*writeback)(struct drm_i915_gem_object *obj); + int (*pread)(struct drm_i915_gem_object *obj, + const struct drm_i915_gem_pread *arg); int (*pwrite)(struct drm_i915_gem_object *obj, const struct drm_i915_gem_pwrite *arg); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index bb0c12975f38..d58fe1ddc3e1 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -527,6 +527,12 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, trace_i915_gem_object_pread(obj, args->offset, args->size); + ret = -ENODEV; + if (obj->ops->pread) + ret = obj->ops->pread(obj, args); + if (ret != -ENODEV) + goto out; + ret = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); From 0eb0feb9aeac392edf01b525a54acde9b002312e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 5 Nov 2020 15:49:34 +0000 Subject: [PATCH 432/710] drm/i915/gem: Pull phys pread/pwrite implementations to the backend Move the specialised interactions with the physical GEM object from the pread/pwrite ioctl handler into the phys backend. Currently, if one is able to exhaust the entire aperture and then try to pwrite into an object not backed by struct page, we accidentally invoked the phys pwrite handler on a non-phys object; calamitous. Fixes: c6790dc22312 ("drm/i915: Wean off drm_pci_alloc/drm_pci_free") Testcase: igt/gem_pwrite/exhaustion Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Cc: stable@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20201105154934.16022-2-chris@chris-wilson.co.uk (cherry picked from commit 852e1b3644817f071427b83859b889c788a0cf69) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gem/i915_gem_phys.c | 55 ++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_gem.c | 26 ----------- 2 files changed, 55 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/i915_gem_phys.c index 28147aab47b9..3a4dfe2ef1da 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_phys.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c @@ -134,6 +134,58 @@ i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj, vaddr, dma); } +static int +phys_pwrite(struct drm_i915_gem_object *obj, + const struct drm_i915_gem_pwrite *args) +{ + void *vaddr = sg_page(obj->mm.pages->sgl) + args->offset; + char __user *user_data = u64_to_user_ptr(args->data_ptr); + int err; + + err = i915_gem_object_wait(obj, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_ALL, + MAX_SCHEDULE_TIMEOUT); + if (err) + return err; + + /* + * We manually control the domain here and pretend that it + * remains coherent i.e. in the GTT domain, like shmem_pwrite. + */ + i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); + + if (copy_from_user(vaddr, user_data, args->size)) + return -EFAULT; + + drm_clflush_virt_range(vaddr, args->size); + intel_gt_chipset_flush(&to_i915(obj->base.dev)->gt); + + i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); + return 0; +} + +static int +phys_pread(struct drm_i915_gem_object *obj, + const struct drm_i915_gem_pread *args) +{ + void *vaddr = sg_page(obj->mm.pages->sgl) + args->offset; + char __user *user_data = u64_to_user_ptr(args->data_ptr); + int err; + + err = i915_gem_object_wait(obj, + I915_WAIT_INTERRUPTIBLE, + MAX_SCHEDULE_TIMEOUT); + if (err) + return err; + + drm_clflush_virt_range(vaddr, args->size); + if (copy_to_user(user_data, vaddr, args->size)) + return -EFAULT; + + return 0; +} + static void phys_release(struct drm_i915_gem_object *obj) { fput(obj->base.filp); @@ -144,6 +196,9 @@ static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { .get_pages = i915_gem_object_get_pages_phys, .put_pages = i915_gem_object_put_pages_phys, + .pread = phys_pread, + .pwrite = phys_pwrite, + .release = phys_release, }; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d58fe1ddc3e1..58276694c848 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -179,30 +179,6 @@ try_again: return ret; } -static int -i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, - struct drm_i915_gem_pwrite *args, - struct drm_file *file) -{ - void *vaddr = sg_page(obj->mm.pages->sgl) + args->offset; - char __user *user_data = u64_to_user_ptr(args->data_ptr); - - /* - * We manually control the domain here and pretend that it - * remains coherent i.e. in the GTT domain, like shmem_pwrite. - */ - i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); - - if (copy_from_user(vaddr, user_data, args->size)) - return -EFAULT; - - drm_clflush_virt_range(vaddr, args->size); - intel_gt_chipset_flush(&to_i915(obj->base.dev)->gt); - - i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); - return 0; -} - static int i915_gem_create(struct drm_file *file, struct intel_memory_region *mr, @@ -872,8 +848,6 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, if (ret == -EFAULT || ret == -ENOSPC) { if (i915_gem_object_has_struct_page(obj)) ret = i915_gem_shmem_pwrite(obj, args); - else - ret = i915_gem_phys_pwrite(obj, args, file); } i915_gem_object_unpin_pages(obj); From 5ce6861d36ed5207aff9e5eead4c7cc38a986586 Mon Sep 17 00:00:00 2001 From: Venkata Sandeep Dhanalakota Date: Thu, 5 Nov 2020 17:18:42 -0800 Subject: [PATCH 433/710] drm/i915: Correctly set SFC capability for video engines SFC capability of video engines is not set correctly because i915 is testing for incorrect bits. Fixes: c5d3e39caa45 ("drm/i915: Engine discovery query") Cc: Matt Roper Cc: Tvrtko Ursulin Signed-off-by: Venkata Sandeep Dhanalakota Signed-off-by: Daniele Ceraolo Spurio Reviewed-by: Tvrtko Ursulin Cc: # v5.3+ Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20201106011842.36203-1-daniele.ceraolospurio@intel.com (cherry picked from commit ad18fa0f5f052046cad96fee762b5c64f42dd86a) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 5bfb5f7ed02c..efdeb7b7b2a0 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -371,7 +371,8 @@ static void __setup_engine_capabilities(struct intel_engine_cs *engine) * instances. */ if ((INTEL_GEN(i915) >= 11 && - engine->gt->info.vdbox_sfc_access & engine->mask) || + (engine->gt->info.vdbox_sfc_access & + BIT(engine->instance))) || (INTEL_GEN(i915) >= 9 && engine->instance == 0)) engine->uabi_capabilities |= I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC; From 1922a46b8c18cb09d33e06a6cc2e43844ac1b9d0 Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Thu, 12 Nov 2020 16:42:10 +1030 Subject: [PATCH 434/710] net/ncsi: Fix netlink registration If a user unbinds and re-binds a NC-SI aware driver the kernel will attempt to register the netlink interface at runtime. The structure is marked __ro_after_init so registration fails spectacularly at this point. # echo 1e660000.ethernet > /sys/bus/platform/drivers/ftgmac100/unbind # echo 1e660000.ethernet > /sys/bus/platform/drivers/ftgmac100/bind ftgmac100 1e660000.ethernet: Read MAC address 52:54:00:12:34:56 from chip ftgmac100 1e660000.ethernet: Using NCSI interface 8<--- cut here --- Unable to handle kernel paging request at virtual address 80a8f858 pgd = 8c768dd6 [80a8f858] *pgd=80a0841e(bad) Internal error: Oops: 80d [#1] SMP ARM CPU: 0 PID: 116 Comm: sh Not tainted 5.10.0-rc3-next-20201111-00003-gdd25b227ec1e #51 Hardware name: Generic DT based system PC is at genl_register_family+0x1f8/0x6d4 LR is at 0xff26ffff pc : [<8073f930>] lr : [] psr: 20000153 sp : 8553bc80 ip : 81406244 fp : 8553bd04 r10: 8085d12c r9 : 80a8f73c r8 : 85739000 r7 : 00000017 r6 : 80a8f860 r5 : 80c8ab98 r4 : 80a8f858 r3 : 00000000 r2 : 00000000 r1 : 81406130 r0 : 00000017 Flags: nzCv IRQs on FIQs off Mode SVC_32 ISA ARM Segment none Control: 00c5387d Table: 85524008 DAC: 00000051 Process sh (pid: 116, stack limit = 0x1f1988d6) ... Backtrace: [<8073f738>] (genl_register_family) from [<80860ac0>] (ncsi_init_netlink+0x20/0x48) r10:8085d12c r9:80c8fb0c r8:85739000 r7:00000000 r6:81218000 r5:85739000 r4:8121c000 [<80860aa0>] (ncsi_init_netlink) from [<8085d740>] (ncsi_register_dev+0x1b0/0x210) r5:8121c400 r4:8121c000 [<8085d590>] (ncsi_register_dev) from [<805a8060>] (ftgmac100_probe+0x6e0/0x778) r10:00000004 r9:80950228 r8:8115bc10 r7:8115ab00 r6:9eae2c24 r5:813b6f88 r4:85739000 [<805a7980>] (ftgmac100_probe) from [<805355ec>] (platform_drv_probe+0x58/0xa8) r9:80c76bb0 r8:00000000 r7:80cd4974 r6:80c76bb0 r5:8115bc10 r4:00000000 [<80535594>] (platform_drv_probe) from [<80532d58>] (really_probe+0x204/0x514) r7:80cd4974 r6:00000000 r5:80cd4868 r4:8115bc10 Jakub pointed out that ncsi_register_dev is obviously broken, because there is only one family so it would never work if there was more than one ncsi netdev. Fix the crash by registering the netlink family once on boot, and drop the code to unregister it. Fixes: 955dc68cb9b2 ("net/ncsi: Add generic netlink family") Signed-off-by: Joel Stanley Reviewed-by: Samuel Mendoza-Jonas Link: https://lore.kernel.org/r/20201112061210.914621-1-joel@jms.id.au Signed-off-by: Jakub Kicinski --- net/ncsi/ncsi-manage.c | 5 ----- net/ncsi/ncsi-netlink.c | 22 +++------------------- net/ncsi/ncsi-netlink.h | 3 --- 3 files changed, 3 insertions(+), 27 deletions(-) diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index f1be3e3f6425..a9cb355324d1 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -1726,9 +1726,6 @@ struct ncsi_dev *ncsi_register_dev(struct net_device *dev, ndp->ptype.dev = dev; dev_add_pack(&ndp->ptype); - /* Set up generic netlink interface */ - ncsi_init_netlink(dev); - pdev = to_platform_device(dev->dev.parent); if (pdev) { np = pdev->dev.of_node; @@ -1892,8 +1889,6 @@ void ncsi_unregister_dev(struct ncsi_dev *nd) list_del_rcu(&ndp->node); spin_unlock_irqrestore(&ncsi_dev_lock, flags); - ncsi_unregister_netlink(nd->dev); - kfree(ndp); } EXPORT_SYMBOL_GPL(ncsi_unregister_dev); diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c index adddc7707aa4..bb5f1650f11c 100644 --- a/net/ncsi/ncsi-netlink.c +++ b/net/ncsi/ncsi-netlink.c @@ -766,24 +766,8 @@ static struct genl_family ncsi_genl_family __ro_after_init = { .n_small_ops = ARRAY_SIZE(ncsi_ops), }; -int ncsi_init_netlink(struct net_device *dev) +static int __init ncsi_init_netlink(void) { - int rc; - - rc = genl_register_family(&ncsi_genl_family); - if (rc) - netdev_err(dev, "ncsi: failed to register netlink family\n"); - - return rc; -} - -int ncsi_unregister_netlink(struct net_device *dev) -{ - int rc; - - rc = genl_unregister_family(&ncsi_genl_family); - if (rc) - netdev_err(dev, "ncsi: failed to unregister netlink family\n"); - - return rc; + return genl_register_family(&ncsi_genl_family); } +subsys_initcall(ncsi_init_netlink); diff --git a/net/ncsi/ncsi-netlink.h b/net/ncsi/ncsi-netlink.h index 7502723fba83..39a1a9d7bf77 100644 --- a/net/ncsi/ncsi-netlink.h +++ b/net/ncsi/ncsi-netlink.h @@ -22,7 +22,4 @@ int ncsi_send_netlink_err(struct net_device *dev, struct nlmsghdr *nlhdr, int err); -int ncsi_init_netlink(struct net_device *dev); -int ncsi_unregister_netlink(struct net_device *dev); - #endif /* __NCSI_NETLINK_H__ */ From e8aa6d520b448efc88670a98eccd196713639f2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20Stehl=C3=A9?= Date: Thu, 12 Nov 2020 09:48:33 +0100 Subject: [PATCH 435/710] net: ethernet: mtk-star-emac: return ok when xmit drops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ndo_start_xmit() method must return NETDEV_TX_OK if the DMA mapping fails, after freeing the socket buffer. Fix the mtk_star_netdev_start_xmit() function accordingly. Fixes: 8c7bd5a454ff ("net: ethernet: mtk-star-emac: new driver") Signed-off-by: Vincent Stehlé Acked-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20201112084833.21842-1-vincent.stehle@laposte.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mediatek/mtk_star_emac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mediatek/mtk_star_emac.c b/drivers/net/ethernet/mediatek/mtk_star_emac.c index 13250553263b..e56a26f797f2 100644 --- a/drivers/net/ethernet/mediatek/mtk_star_emac.c +++ b/drivers/net/ethernet/mediatek/mtk_star_emac.c @@ -1053,7 +1053,7 @@ static int mtk_star_netdev_start_xmit(struct sk_buff *skb, err_drop_packet: dev_kfree_skb(skb); ndev->stats.tx_dropped++; - return NETDEV_TX_BUSY; + return NETDEV_TX_OK; } /* Returns the number of bytes sent or a negative number on the first From c350f8bea271782e2733419bd2ab9bf4ec2051ef Mon Sep 17 00:00:00 2001 From: Chen Zhou Date: Thu, 12 Nov 2020 21:53:32 +0800 Subject: [PATCH 436/710] selinux: Fix error return code in sel_ib_pkey_sid_slow() Fix to return a negative error code from the error handling case instead of 0 in function sel_ib_pkey_sid_slow(), as done elsewhere in this function. Cc: stable@vger.kernel.org Fixes: 409dcf31538a ("selinux: Add a cache for quicker retreival of PKey SIDs") Reported-by: Hulk Robot Signed-off-by: Chen Zhou Signed-off-by: Paul Moore --- security/selinux/ibpkey.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/security/selinux/ibpkey.c b/security/selinux/ibpkey.c index f68a7617cfb9..3a63a989e55e 100644 --- a/security/selinux/ibpkey.c +++ b/security/selinux/ibpkey.c @@ -151,8 +151,10 @@ static int sel_ib_pkey_sid_slow(u64 subnet_prefix, u16 pkey_num, u32 *sid) * is valid, it just won't be added to the cache. */ new = kzalloc(sizeof(*new), GFP_ATOMIC); - if (!new) + if (!new) { + ret = -ENOMEM; goto out; + } new->psec.subnet_prefix = subnet_prefix; new->psec.pkey = pkey_num; From 50b8a742850fce7293bed45753152c425f7e931b Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 13 Nov 2020 02:27:31 +0900 Subject: [PATCH 437/710] bootconfig: Extend the magic check range to the preceding 3 bytes Since Grub may align the size of initrd to 4 if user pass initrd from cpio, we have to check the preceding 3 bytes as well. Link: https://lkml.kernel.org/r/160520205132.303174.4876760192433315429.stgit@devnote2 Cc: stable@vger.kernel.org Fixes: 85c46b78da58 ("bootconfig: Add bootconfig magic word for indicating bootconfig explicitly") Reported-by: Chen Yu Tested-by: Chen Yu Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- init/main.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/init/main.c b/init/main.c index 130376ec10ba..20baced721ad 100644 --- a/init/main.c +++ b/init/main.c @@ -269,14 +269,24 @@ static void * __init get_boot_config_from_initrd(u32 *_size, u32 *_csum) u32 size, csum; char *data; u32 *hdr; + int i; if (!initrd_end) return NULL; data = (char *)initrd_end - BOOTCONFIG_MAGIC_LEN; - if (memcmp(data, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN)) - return NULL; + /* + * Since Grub may align the size of initrd to 4, we must + * check the preceding 3 bytes as well. + */ + for (i = 0; i < 4; i++) { + if (!memcmp(data, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN)) + goto found; + data--; + } + return NULL; +found: hdr = (u32 *)(data - 8); size = hdr[0]; csum = hdr[1]; From baee1991fad928d6c8dd5be3197ecb413c420c97 Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Thu, 12 Nov 2020 19:34:39 +0800 Subject: [PATCH 438/710] net: ethernet: mtk-star-emac: fix error return code in mtk_star_enable() Fix to return a negative error code from the error handling case instead of 0, as done elsewhere in this function. Fixes: 8c7bd5a454ff ("net: ethernet: mtk-star-emac: new driver") Reported-by: Hulk Robot Signed-off-by: Zhang Changzhong Acked-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/1605180879-2573-1-git-send-email-zhangchangzhong@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mediatek/mtk_star_emac.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mediatek/mtk_star_emac.c b/drivers/net/ethernet/mediatek/mtk_star_emac.c index e56a26f797f2..a8641a407c06 100644 --- a/drivers/net/ethernet/mediatek/mtk_star_emac.c +++ b/drivers/net/ethernet/mediatek/mtk_star_emac.c @@ -966,6 +966,7 @@ static int mtk_star_enable(struct net_device *ndev) mtk_star_adjust_link, 0, priv->phy_intf); if (!priv->phydev) { netdev_err(ndev, "failed to connect to PHY\n"); + ret = -ENODEV; goto err_free_irq; } From 4ee18c179e5e815fa5575e0d2db0c05795a804ee Mon Sep 17 00:00:00 2001 From: Xie He Date: Thu, 12 Nov 2020 02:35:06 -0800 Subject: [PATCH 439/710] net: x25: Increase refcnt of "struct x25_neigh" in x25_rx_call_request The x25_disconnect function in x25_subr.c would decrease the refcount of "x25->neighbour" (struct x25_neigh) and reset this pointer to NULL. However, the x25_rx_call_request function in af_x25.c, which is called when we receive a connection request, does not increase the refcount when it assigns the pointer. Fix this issue by increasing the refcount of "struct x25_neigh" in x25_rx_call_request. This patch fixes frequent kernel crashes when using AF_X25 sockets. Fixes: 4becb7ee5b3d ("net/x25: Fix x25_neigh refcnt leak when x25 disconnect") Cc: Martin Schiller Signed-off-by: Xie He Link: https://lore.kernel.org/r/20201112103506.5875-1-xie.he.0141@gmail.com Signed-off-by: Jakub Kicinski --- net/x25/af_x25.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 046d3fee66a9..a10487e7574c 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -1050,6 +1050,7 @@ int x25_rx_call_request(struct sk_buff *skb, struct x25_neigh *nb, makex25->lci = lci; makex25->dest_addr = dest_addr; makex25->source_addr = source_addr; + x25_neigh_hold(nb); makex25->neighbour = nb; makex25->facilities = facilities; makex25->dte_facilities= dte_facilities; From 4ec2b69da5e1544dbadb30cddb49c8df60209b0c Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Thu, 12 Nov 2020 21:22:32 +0800 Subject: [PATCH 440/710] drm/i915/gvt: return error when failing to take the module reference When we fail to take the module reference, we go to the 'undo*' branch and return. But the returned variable 'ret' has been set as zero by the above code. Change 'ret' to '-ENODEV' in this situation. Fixes: 9bdb073464d6 ("drm/i915/gvt: Change KVMGT as self load module") Reviewed-by: Zhenyu Wang Reported-by: Hulk Robot Signed-off-by: Xiongfeng Wang Signed-off-by: Zhenyu Wang Link: http://patchwork.freedesktop.org/patch/msgid/1605187352-51761-1-git-send-email-wangxiongfeng2@huawei.com --- drivers/gpu/drm/i915/gvt/kvmgt.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index ad8a9df49f29..778eb8cab610 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -829,8 +829,10 @@ static int intel_vgpu_open(struct mdev_device *mdev) /* Take a module reference as mdev core doesn't take * a reference for vendor driver. */ - if (!try_module_get(THIS_MODULE)) + if (!try_module_get(THIS_MODULE)) { + ret = -ENODEV; goto undo_group; + } ret = kvmgt_guest_init(mdev); if (ret) From 266421925574f91bf9d373128f38771c565f107a Mon Sep 17 00:00:00 2001 From: Roman Li Date: Mon, 26 Oct 2020 17:12:34 -0400 Subject: [PATCH 441/710] drm/amdgpu: add ta firmware load for green-sardine [Why] In preparation to enabling hdcp on green sardine. [How] Add green-sardine ta f/w loading in psp_v12 Signed-off-by: Roman Li Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/psp_v12_0.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c index dff5c15b4858..c4828bd3264b 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c @@ -40,6 +40,7 @@ MODULE_FIRMWARE("amdgpu/renoir_asd.bin"); MODULE_FIRMWARE("amdgpu/renoir_ta.bin"); MODULE_FIRMWARE("amdgpu/green_sardine_asd.bin"); +MODULE_FIRMWARE("amdgpu/green_sardine_ta.bin"); /* address block */ #define smnMP1_FIRMWARE_FLAGS 0x3010024 From 38a2509184952f799d465b26279ef1bd36fb8277 Mon Sep 17 00:00:00 2001 From: "Tianci.Yin" Date: Fri, 6 Nov 2020 14:56:35 +0800 Subject: [PATCH 442/710] drm/amdgpu: enable DCN for navi10 headless SKU There is a NULL pointer crash when DCN disabled on headless SKU. On normal SKU, the variable adev->ddev.mode_config.funcs is initialized in dm_hw_init(), and it is fine to access it in amdgpu_device_resume(). But on headless SKU, DCN is disabled, the funcs variable is not initialized, then crash arises. Enable DCN to fix this issue. Reviewed-by: Alex Deucher Reviewed-by: Guchun Chen Signed-off-by: Tianci.Yin Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/nv.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index d5715c1d177b..8eeba8096493 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -492,8 +492,7 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); #if defined(CONFIG_DRM_AMD_DC) - else if (amdgpu_device_has_dc_support(adev) && - !nv_is_headless_sku(adev->pdev)) + else if (amdgpu_device_has_dc_support(adev)) amdgpu_device_ip_block_add(adev, &dm_ip_block); #endif amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block); From 7bc40aedf24d31d8bea80e1161e996ef4299fb10 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 12 Nov 2020 11:22:04 +0100 Subject: [PATCH 443/710] mac80211: free sta in sta_info_insert_finish() on errors If sta_info_insert_finish() fails, we currently keep the station around and free it only in the caller, but there's only one such caller and it always frees it immediately. As syzbot found, another consequence of this split is that we can put things that sleep only into __cleanup_single_sta() and not in sta_info_free(), but this is the only place that requires such of sta_info_free() now. Change this to free the station in sta_info_insert_finish(), in which case we can still sleep. This will also let us unify the cleanup code later. Cc: stable@vger.kernel.org Fixes: dcd479e10a05 ("mac80211: always wind down STA state") Reported-by: syzbot+32c6c38c4812d22f2f0b@syzkaller.appspotmail.com Reported-by: syzbot+4c81fe92e372d26c4246@syzkaller.appspotmail.com Reported-by: syzbot+6a7fe9faf0d1d61bc24a@syzkaller.appspotmail.com Reported-by: syzbot+abed06851c5ffe010921@syzkaller.appspotmail.com Reported-by: syzbot+b7aeb9318541a1c709f1@syzkaller.appspotmail.com Reported-by: syzbot+d5a9416c6cafe53b5dd0@syzkaller.appspotmail.com Link: https://lore.kernel.org/r/20201112112201.ee6b397b9453.I9c31d667a0ea2151441cc64ed6613d36c18a48e0@changeid Signed-off-by: Johannes Berg --- net/mac80211/sta_info.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 4fe284ff1ea3..ec6973ee88ef 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -705,7 +705,7 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) out_drop_sta: local->num_sta--; synchronize_net(); - __cleanup_single_sta(sta); + cleanup_single_sta(sta); out_err: mutex_unlock(&local->sta_mtx); kfree(sinfo); @@ -724,19 +724,13 @@ int sta_info_insert_rcu(struct sta_info *sta) __acquires(RCU) err = sta_info_insert_check(sta); if (err) { + sta_info_free(local, sta); mutex_unlock(&local->sta_mtx); rcu_read_lock(); - goto out_free; + return err; } - err = sta_info_insert_finish(sta); - if (err) - goto out_free; - - return 0; - out_free: - sta_info_free(local, sta); - return err; + return sta_info_insert_finish(sta); } int sta_info_insert(struct sta_info *sta) From 58284a901b426e6130672e9f14c30dfd5a9dbde0 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Fri, 13 Nov 2020 13:00:14 +0530 Subject: [PATCH 444/710] arm64/mm: Validate hotplug range before creating linear mapping During memory hotplug process, the linear mapping should not be created for a given memory range if that would fall outside the maximum allowed linear range. Else it might cause memory corruption in the kernel virtual space. Maximum linear mapping region is [PAGE_OFFSET..(PAGE_END -1)] accommodating both its ends but excluding PAGE_END. Max physical range that can be mapped inside this linear mapping range, must also be derived from its end points. This ensures that arch_add_memory() validates memory hot add range for its potential linear mapping requirements, before creating it with __create_pgd_mapping(). Fixes: 4ab215061554 ("arm64: Add memory hotplug support") Signed-off-by: Anshuman Khandual Reviewed-by: Ard Biesheuvel Cc: Catalin Marinas Cc: Will Deacon Cc: Mark Rutland Cc: Ard Biesheuvel Cc: Steven Price Cc: Robin Murphy Cc: David Hildenbrand Cc: Andrew Morton Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Link: https://lore.kernel.org/r/1605252614-761-1-git-send-email-anshuman.khandual@arm.com Signed-off-by: Will Deacon --- arch/arm64/mm/mmu.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 1c0f3e02f731..ca692a815731 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -1444,11 +1444,28 @@ static void __remove_pgd_mapping(pgd_t *pgdir, unsigned long start, u64 size) free_empty_tables(start, end, PAGE_OFFSET, PAGE_END); } +static bool inside_linear_region(u64 start, u64 size) +{ + /* + * Linear mapping region is the range [PAGE_OFFSET..(PAGE_END - 1)] + * accommodating both its ends but excluding PAGE_END. Max physical + * range which can be mapped inside this linear mapping range, must + * also be derived from its end points. + */ + return start >= __pa(_PAGE_OFFSET(vabits_actual)) && + (start + size - 1) <= __pa(PAGE_END - 1); +} + int arch_add_memory(int nid, u64 start, u64 size, struct mhp_params *params) { int ret, flags = 0; + if (!inside_linear_region(start, size)) { + pr_err("[%llx %llx] is outside linear mapping region\n", start, start + size); + return -EINVAL; + } + if (rodata_full || debug_pagealloc_enabled()) flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; From 77473cffef21611b4423f613fe32836afb26405e Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Thu, 5 Nov 2020 00:22:10 +0100 Subject: [PATCH 445/710] arm64: Add MIDR value for KRYO2XX gold/silver CPU cores Add MIDR value for KRYO2XX gold (big) and silver (LITTLE) CPU cores which are used in Qualcomm Technologies, Inc. SoCs. This will be used to identify and apply errata which are applicable for these CPU cores. Signed-off-by: Konrad Dybcio Link: https://lore.kernel.org/r/20201104232218.198800-2-konrad.dybcio@somainline.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/cputype.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 9e2e9a63c7b6..ef5b040dee44 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -86,6 +86,8 @@ #define QCOM_CPU_PART_FALKOR_V1 0x800 #define QCOM_CPU_PART_FALKOR 0xC00 #define QCOM_CPU_PART_KRYO 0x200 +#define QCOM_CPU_PART_KRYO_2XX_GOLD 0x800 +#define QCOM_CPU_PART_KRYO_2XX_SILVER 0x801 #define QCOM_CPU_PART_KRYO_3XX_SILVER 0x803 #define QCOM_CPU_PART_KRYO_4XX_GOLD 0x804 #define QCOM_CPU_PART_KRYO_4XX_SILVER 0x805 @@ -116,6 +118,8 @@ #define MIDR_QCOM_FALKOR_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR_V1) #define MIDR_QCOM_FALKOR MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR) #define MIDR_QCOM_KRYO MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO) +#define MIDR_QCOM_KRYO_2XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_2XX_GOLD) +#define MIDR_QCOM_KRYO_2XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_2XX_SILVER) #define MIDR_QCOM_KRYO_3XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_3XX_SILVER) #define MIDR_QCOM_KRYO_4XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_GOLD) #define MIDR_QCOM_KRYO_4XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_SILVER) From e3dd11a9f2521cecbcf30c2fd17ecc5a445dfb94 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Thu, 5 Nov 2020 00:22:11 +0100 Subject: [PATCH 446/710] arm64: kpti: Add KRYO2XX gold/silver CPU cores to kpti safelist QCOM KRYO2XX gold (big) silver (LITTLE) CPU cores are based on Cortex-A73 and Cortex-A53 respectively and are meltdown safe, hence add them to kpti_safe_list[]. Signed-off-by: Konrad Dybcio Link: https://lore.kernel.org/r/20201104232218.198800-3-konrad.dybcio@somainline.org Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index dcc165b3fc04..6f36c4f62f69 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1337,6 +1337,8 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), MIDR_ALL_VERSIONS(MIDR_HISI_TSV110), MIDR_ALL_VERSIONS(MIDR_NVIDIA_CARMEL), + MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_2XX_GOLD), + MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_2XX_SILVER), MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_3XX_SILVER), MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_4XX_SILVER), { /* sentinel */ } From 38328d40116739af0692748427bedda35b286c33 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Thu, 5 Nov 2020 00:22:12 +0100 Subject: [PATCH 447/710] arm64: proton-pack: Add KRYO2XX silver CPUs to spectre-v2 safe-list KRYO2XX silver (LITTLE) CPUs are based on Cortex-A53 and they are not affected by spectre-v2. Signed-off-by: Konrad Dybcio Link: https://lore.kernel.org/r/20201104232218.198800-4-konrad.dybcio@somainline.org Signed-off-by: Will Deacon --- arch/arm64/kernel/proton-pack.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index c18eb7d41274..f6e4e3737405 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -118,6 +118,7 @@ static enum mitigation_state spectre_v2_get_cpu_hw_mitigation_state(void) MIDR_ALL_VERSIONS(MIDR_CORTEX_A55), MIDR_ALL_VERSIONS(MIDR_BRAHMA_B53), MIDR_ALL_VERSIONS(MIDR_HISI_TSV110), + MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_2XX_SILVER), MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_3XX_SILVER), MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_4XX_SILVER), { /* sentinel */ } From 23c216416056148136bdaf0cdd18caf4904bb6e1 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Thu, 5 Nov 2020 00:22:13 +0100 Subject: [PATCH 448/710] arm64: cpu_errata: Apply Erratum 845719 to KRYO2XX Silver QCOM KRYO2XX Silver cores are Cortex-A53 based and are susceptible to the 845719 erratum. Add them to the lookup list to apply the erratum. Signed-off-by: Konrad Dybcio Link: https://lore.kernel.org/r/20201104232218.198800-5-konrad.dybcio@somainline.org Signed-off-by: Will Deacon --- arch/arm64/kernel/cpu_errata.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 61314fd70f13..cafaf0da05b7 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -299,6 +299,8 @@ static const struct midr_range erratum_845719_list[] = { MIDR_REV_RANGE(MIDR_CORTEX_A53, 0, 0, 4), /* Brahma-B53 r0p[0] */ MIDR_REV(MIDR_BRAHMA_B53, 0, 0), + /* Kryo2XX Silver rAp4 */ + MIDR_REV(MIDR_QCOM_KRYO_2XX_SILVER, 0xa, 0x4), {}, }; #endif From a0ccbc5319d57b9efdc55c943a3fde30a0776502 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Fri, 6 Nov 2020 15:20:38 +0800 Subject: [PATCH 449/710] ALSA: hda/realtek - Add supported mute Led for HP HP Pavilion x360 Convertible machine, it supported mute led. GPIO4 high will turn on led. The patch will enable control led via GPIO4 pin. Signed-off-by: Kailang Yang Cc: Link: https://lore.kernel.org/r/1ae4d98e92c147b780ace3911c4e1d73@realtek.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 69a952c9e011..371f6b865439 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4226,6 +4226,12 @@ static void alc286_fixup_hp_gpio_led(struct hda_codec *codec, alc_fixup_hp_gpio_led(codec, action, 0x02, 0x20); } +static void alc287_fixup_hp_gpio_led(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + alc_fixup_hp_gpio_led(codec, action, 0x10, 0); +} + /* turn on/off mic-mute LED per capture hook via VREF change */ static int vref_micmute_led_set(struct led_classdev *led_cdev, enum led_brightness brightness) @@ -6312,6 +6318,7 @@ enum { ALC274_FIXUP_HP_HEADSET_MIC, ALC256_FIXUP_ASUS_HPE, ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK, + ALC287_FIXUP_HP_GPIO_LED, }; static const struct hda_fixup alc269_fixups[] = { @@ -7722,6 +7729,10 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC269_FIXUP_THINKPAD_ACPI }, + [ALC287_FIXUP_HP_GPIO_LED] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc287_fixup_hp_gpio_led, + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -7876,6 +7887,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8760, "HP", ALC285_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x877a, "HP", ALC285_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x877d, "HP", ALC236_FIXUP_HP_MUTE_LED), + SND_PCI_QUIRK(0x103c, 0x87f4, "HP", ALC287_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x87f5, "HP", ALC287_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300), SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), From 9e885770277d2ed8d85f9cbd4992515ec324242f Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Tue, 3 Nov 2020 15:30:51 +0800 Subject: [PATCH 450/710] ALSA: hda/realtek - HP Headset Mic can't detect after boot System boot or warm boot with plugged headset. If it turn on power save mode, Headset Mic will lose. This patch will solve this issue. Signed-off-by: Kailang Yang Cc: Link: https://lore.kernel.org/r/1ae4d98e92c147b780ace3911c4e1d73@realtek.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 371f6b865439..739dbaf54517 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6319,6 +6319,7 @@ enum { ALC256_FIXUP_ASUS_HPE, ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK, ALC287_FIXUP_HP_GPIO_LED, + ALC256_FIXUP_HP_HEADSET_MIC, }; static const struct hda_fixup alc269_fixups[] = { @@ -7733,6 +7734,10 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc287_fixup_hp_gpio_led, }, + [ALC256_FIXUP_HP_HEADSET_MIC] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc274_fixup_hp_headset_mic, + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -8348,6 +8353,10 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x19, 0x02a11020}, {0x1a, 0x02a11030}, {0x21, 0x0221101f}), + SND_HDA_PIN_QUIRK(0x10ec0236, 0x103c, "HP", ALC256_FIXUP_HP_HEADSET_MIC, + {0x14, 0x90170110}, + {0x19, 0x02a11020}, + {0x21, 0x02211030}), SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL2_MIC_NO_PRESENCE, {0x14, 0x90170110}, {0x21, 0x02211020}), @@ -8450,6 +8459,10 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x1a, 0x90a70130}, {0x1b, 0x90170110}, {0x21, 0x03211020}), + SND_HDA_PIN_QUIRK(0x10ec0256, 0x103c, "HP", ALC256_FIXUP_HP_HEADSET_MIC, + {0x14, 0x90170110}, + {0x19, 0x02a11020}, + {0x21, 0x0221101f}), SND_HDA_PIN_QUIRK(0x10ec0274, 0x103c, "HP", ALC274_FIXUP_HP_HEADSET_MIC, {0x17, 0x90170110}, {0x19, 0x03a11030}, From 95a793c3bc75cf888e0e641d656e7d080f487d8b Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Fri, 13 Nov 2020 18:20:43 +0900 Subject: [PATCH 451/710] ALSA: ctl: fix error path at adding user-defined element set When processing request to add/replace user-defined element set, check of given element identifier and decision of numeric identifier is done in "__snd_ctl_add_replace()" helper function. When the result of check is wrong, the helper function returns error code. The error code shall be returned to userspace application. Current implementation includes bug to return zero to userspace application regardless of the result. This commit fixes the bug. Cc: Fixes: e1a7bfe38079 ("ALSA: control: Fix race between adding and removing a user element") Signed-off-by: Takashi Sakamoto Link: https://lore.kernel.org/r/20201113092043.16148-1-o-takashi@sakamocchi.jp Signed-off-by: Takashi Iwai --- sound/core/control.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/core/control.c b/sound/core/control.c index 4373de42a5a0..3b44378b9dec 100644 --- a/sound/core/control.c +++ b/sound/core/control.c @@ -1539,7 +1539,7 @@ static int snd_ctl_elem_add(struct snd_ctl_file *file, unlock: up_write(&card->controls_rwsem); - return 0; + return err; } static int snd_ctl_elem_add_user(struct snd_ctl_file *file, From ff828729be446b86957f7c294068758231cd2183 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 12 Nov 2020 20:14:16 +0100 Subject: [PATCH 452/710] iommu/vt-d: Cure VF irqdomain hickup The recent changes to store the MSI irqdomain pointer in struct device missed that Intel DMAR does not register virtual function devices. Due to that a VF device gets the plain PCI-MSI domain assigned and then issues compat MSI messages which get caught by the interrupt remapping unit. Cure that by inheriting the irq domain from the physical function device. Ideally the irqdomain would be associated to the bus, but DMAR can have multiple units and therefore irqdomains on a single bus. The VF 'bus' could of course inherit the domain from the PF, but that'd be yet another x86 oddity. Fixes: 85a8dfc57a0b ("iommm/vt-d: Store irq domain in struct device") Reported-by: Jason Gunthorpe Signed-off-by: Thomas Gleixner Acked-by: Lu Baolu Cc: Joerg Roedel Cc: Bjorn Helgaas Cc: Marc Zyngier Cc: David Woodhouse Link: https://lore.kernel.org/r/draft-87eekymlpz.fsf@nanos.tec.linutronix.de --- drivers/iommu/intel/dmar.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 404b40af31cb..b2e804473209 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -333,6 +333,11 @@ static void dmar_pci_bus_del_dev(struct dmar_pci_notify_info *info) dmar_iommu_notify_scope_dev(info); } +static inline void vf_inherit_msi_domain(struct pci_dev *pdev) +{ + dev_set_msi_domain(&pdev->dev, dev_get_msi_domain(&pdev->physfn->dev)); +} + static int dmar_pci_bus_notifier(struct notifier_block *nb, unsigned long action, void *data) { @@ -342,8 +347,20 @@ static int dmar_pci_bus_notifier(struct notifier_block *nb, /* Only care about add/remove events for physical functions. * For VFs we actually do the lookup based on the corresponding * PF in device_to_iommu() anyway. */ - if (pdev->is_virtfn) + if (pdev->is_virtfn) { + /* + * Ensure that the VF device inherits the irq domain of the + * PF device. Ideally the device would inherit the domain + * from the bus, but DMAR can have multiple units per bus + * which makes this impossible. The VF 'bus' could inherit + * from the PF device, but that's yet another x86'sism to + * inflict on everybody else. + */ + if (action == BUS_NOTIFY_ADD_DEVICE) + vf_inherit_msi_domain(pdev); return NOTIFY_DONE; + } + if (action != BUS_NOTIFY_ADD_DEVICE && action != BUS_NOTIFY_REMOVED_DEVICE) return NOTIFY_DONE; From 51b958e5aeb1e18c00332e0b37c5d4e95a3eff84 Mon Sep 17 00:00:00 2001 From: David Edmondson Date: Tue, 3 Nov 2020 12:04:00 +0000 Subject: [PATCH 453/710] KVM: x86: clflushopt should be treated as a no-op by emulation The instruction emulator ignores clflush instructions, yet fails to support clflushopt. Treat both similarly. Fixes: 13e457e0eebf ("KVM: x86: Emulator does not decode clflush well") Signed-off-by: David Edmondson Message-Id: <20201103120400.240882-1-david.edmondson@oracle.com> Reviewed-by: Joao Martins Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 0d917eb70319..56cae1ff9e3f 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -4046,6 +4046,12 @@ static int em_clflush(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } +static int em_clflushopt(struct x86_emulate_ctxt *ctxt) +{ + /* emulating clflushopt regardless of cpuid */ + return X86EMUL_CONTINUE; +} + static int em_movsxd(struct x86_emulate_ctxt *ctxt) { ctxt->dst.val = (s32) ctxt->src.val; @@ -4585,7 +4591,7 @@ static const struct opcode group11[] = { }; static const struct gprefix pfx_0f_ae_7 = { - I(SrcMem | ByteOp, em_clflush), N, N, N, + I(SrcMem | ByteOp, em_clflush), I(SrcMem | ByteOp, em_clflushopt), N, N, }; static const struct group_dual group15 = { { From 0107973a80adad5b73232d3fbcd26f710ab1f851 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Thu, 12 Nov 2020 16:17:56 -0600 Subject: [PATCH 454/710] KVM: x86: Introduce cr3_lm_rsvd_bits in kvm_vcpu_arch SEV guests fail to boot on a system that supports the PCID feature. While emulating the RSM instruction, KVM reads the guest CR3 and calls kvm_set_cr3(). If the vCPU is in the long mode, kvm_set_cr3() does a sanity check for the CR3 value. In this case, it validates whether the value has any reserved bits set. The reserved bit range is 63:cpuid_maxphysaddr(). When AMD memory encryption is enabled, the memory encryption bit is set in the CR3 value. The memory encryption bit may fall within the KVM reserved bit range, causing the KVM emulation failure. Introduce a new field cr3_lm_rsvd_bits in kvm_vcpu_arch which will cache the reserved bits in the CR3 value. This will be initialized to rsvd_bits(cpuid_maxphyaddr(vcpu), 63). If the architecture has any special bits(like AMD SEV encryption bit) that needs to be masked from the reserved bits, should be cleared in vendor specific kvm_x86_ops.vcpu_after_set_cpuid handler. Fixes: a780a3ea628268b2 ("KVM: X86: Fix reserved bits check for MOV to CR3") Signed-off-by: Babu Moger Message-Id: <160521947657.32054.3264016688005356563.stgit@bmoger-ubuntu> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/cpuid.c | 2 ++ arch/x86/kvm/x86.c | 2 +- 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index d44858b69353..324ddd7fd0aa 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -639,6 +639,7 @@ struct kvm_vcpu_arch { int cpuid_nent; struct kvm_cpuid_entry2 *cpuid_entries; + unsigned long cr3_lm_rsvd_bits; int maxphyaddr; int max_tdp_level; diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index d50041f570e8..f87b5dfbaba4 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -178,6 +178,8 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) vcpu->arch.cr4_guest_rsvd_bits = __cr4_reserved_bits(guest_cpuid_has, vcpu); + vcpu->arch.cr3_lm_rsvd_bits = rsvd_bits(cpuid_maxphyaddr(vcpu), 63); + /* Invoke the vendor callback only after the above state is updated. */ kvm_x86_ops.vcpu_after_set_cpuid(vcpu); } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 447edc0d1d5a..078a39d489fe 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1041,7 +1041,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) } if (is_long_mode(vcpu) && - (cr3 & rsvd_bits(cpuid_maxphyaddr(vcpu), 63))) + (cr3 & vcpu->arch.cr3_lm_rsvd_bits)) return 1; else if (is_pae_paging(vcpu) && !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) From 96308b066184d6dcdb677890e620e68290ae98ae Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Thu, 12 Nov 2020 16:18:03 -0600 Subject: [PATCH 455/710] KVM: SVM: Update cr3_lm_rsvd_bits for AMD SEV guests For AMD SEV guests, update the cr3_lm_rsvd_bits to mask the memory encryption bit in reserved bits. Signed-off-by: Babu Moger Message-Id: <160521948301.32054.5783800787423231162.stgit@bmoger-ubuntu> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 2f32fd09e259..1e81cfebd491 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3741,6 +3741,7 @@ static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); + struct kvm_cpuid_entry2 *best; vcpu->arch.xsaves_enabled = guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && boot_cpu_has(X86_FEATURE_XSAVE) && @@ -3753,6 +3754,13 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) /* Check again if INVPCID interception if required */ svm_check_invpcid(svm); + /* For sev guests, the memory encryption bit is not reserved in CR3. */ + if (sev_guest(vcpu->kvm)) { + best = kvm_find_cpuid_entry(vcpu, 0x8000001F, 0); + if (best) + vcpu->arch.cr3_lm_rsvd_bits &= ~(1UL << (best->ebx & 0x3f)); + } + if (!kvm_vcpu_apicv_active(vcpu)) return; From 53bf2776e31376f0b6a1fd7c9e1abc61241825a2 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 5 Nov 2020 22:47:02 +0100 Subject: [PATCH 456/710] ARM: dts: exynos: revert "add input clock to CMU in Exynos4412 Odroid" This reverts commit eaf2d2f6895d676dda6c95a652b58594f2887720. The commit eaf2d2f6895d ("ARM: dts: exynos: add input clock to CMU in Exynos4412 Odroid") breaks probing of usb3503 USB hub on Odroid U3. It changes the order of clock drivers probe: the clkout (Exynos PMU) driver is probed before the main clk-exynos4 driver. The clkout driver on Exynos4412 depends on clk-exynos4 but it does not support deferred probe, therefore this dependency and changed probe order causes probe failure. The usb3503 USB hub on Odroid U3 on the other hand requires clkout clock. This can be seen in logs: [ 5.007442] usb3503 0-0008: unable to request refclk (-517) Reported-by: Marek Szyprowski Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20200921174818.15525-1-krzk@kernel.org Signed-off-by: Arnd Bergmann --- arch/arm/boot/dts/exynos4412-odroid-common.dtsi | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm/boot/dts/exynos4412-odroid-common.dtsi b/arch/arm/boot/dts/exynos4412-odroid-common.dtsi index ab291cec650a..2983e91bc7dd 100644 --- a/arch/arm/boot/dts/exynos4412-odroid-common.dtsi +++ b/arch/arm/boot/dts/exynos4412-odroid-common.dtsi @@ -122,7 +122,6 @@ }; &clock { - clocks = <&clock CLK_XUSBXTI>; assigned-clocks = <&clock CLK_FOUT_EPLL>; assigned-clock-rates = <45158401>; }; From ffa13d2d94029882eca22a565551783787f121e5 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 12 Nov 2020 14:59:00 +0100 Subject: [PATCH 457/710] Revert "usb: musb: convert to devm_platform_ioremap_resource_byname" This reverts commit 2d30e408a2a6b3443d3232593e3d472584a3e9f8. On Beaglebone Black, where each interface has 2 children: musb-dsps 47401c00.usb: can't request region for resource [mem 0x47401800-0x474019ff] musb-hdrc musb-hdrc.1: musb_init_controller failed with status -16 musb-hdrc: probe of musb-hdrc.1 failed with error -16 musb-dsps 47401400.usb: can't request region for resource [mem 0x47401000-0x474011ff] musb-hdrc musb-hdrc.0: musb_init_controller failed with status -16 musb-hdrc: probe of musb-hdrc.0 failed with error -16 Before, devm_ioremap_resource() was called on "dev" ("musb-hdrc.0" or "musb-hdrc.1"), after it is called on "&pdev->dev" ("47401400.usb" or "47401c00.usb"), leading to a duplicate region request, which fails. Signed-off-by: Geert Uytterhoeven Fixes: 2d30e408a2a6 ("usb: musb: convert to devm_platform_ioremap_resource_byname") Cc: stable Link: https://lore.kernel.org/r/20201112135900.3822599-1-geert+renesas@glider.be Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_dsps.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/musb/musb_dsps.c b/drivers/usb/musb/musb_dsps.c index 30085b2be7b9..5892f3ce0cdc 100644 --- a/drivers/usb/musb/musb_dsps.c +++ b/drivers/usb/musb/musb_dsps.c @@ -429,10 +429,12 @@ static int dsps_musb_init(struct musb *musb) struct platform_device *parent = to_platform_device(dev->parent); const struct dsps_musb_wrapper *wrp = glue->wrp; void __iomem *reg_base; + struct resource *r; u32 rev, val; int ret; - reg_base = devm_platform_ioremap_resource_byname(parent, "control"); + r = platform_get_resource_byname(parent, IORESOURCE_MEM, "control"); + reg_base = devm_ioremap_resource(dev, r); if (IS_ERR(reg_base)) return PTR_ERR(reg_base); musb->ctrl_base = reg_base; From 76255470ffa2795a44032e8b3c1ced11d81aa2db Mon Sep 17 00:00:00 2001 From: Zhang Qilong Date: Fri, 6 Nov 2020 20:22:21 +0800 Subject: [PATCH 458/710] xhci: hisilicon: fix refercence leak in xhci_histb_probe pm_runtime_get_sync() will increment pm usage at first and it will resume the device later. We should decrease the usage count whetever it succeeded or failed(maybe runtime of the device has error, or device is in inaccessible state, or other error state). If we do not call put operation to decrease the reference, it will result in reference leak in xhci_histb_probe. Moreover, this device cannot enter the idle state and always stay busy or other non-idle state later. So we fixed it by jumping to error handling branch. Fixes: c508f41da0788 ("xhci: hisilicon: support HiSilicon STB xHCI host controller") Signed-off-by: Zhang Qilong Link: https://lore.kernel.org/r/20201106122221.2304528-1-zhangqilong3@huawei.com Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-histb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-histb.c b/drivers/usb/host/xhci-histb.c index 5546e7e013a8..08369857686e 100644 --- a/drivers/usb/host/xhci-histb.c +++ b/drivers/usb/host/xhci-histb.c @@ -240,7 +240,7 @@ static int xhci_histb_probe(struct platform_device *pdev) /* Initialize dma_mask and coherent_dma_mask to 32-bits */ ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32)); if (ret) - return ret; + goto disable_pm; hcd = usb_create_hcd(driver, dev, dev_name(dev)); if (!hcd) { From 106e6d8df4842d816dae23076c501ae48386afcb Mon Sep 17 00:00:00 2001 From: Shuming Fan Date: Fri, 13 Nov 2020 17:21:25 +0800 Subject: [PATCH 459/710] ASoC: rt1015: increase the time to detect BCLK To meet the most platform, the detection time should be increased to avoid that the flushing DAC data fails. Signed-off-by: Shuming Fan Link: https://lore.kernel.org/r/20201113092125.19206-1-shumingf@realtek.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt1015.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/rt1015.c b/sound/soc/codecs/rt1015.c index 967193518349..3db07293c70b 100644 --- a/sound/soc/codecs/rt1015.c +++ b/sound/soc/codecs/rt1015.c @@ -544,7 +544,7 @@ static void rt1015_flush_work(struct work_struct *work) struct rt1015_priv *rt1015 = container_of(work, struct rt1015_priv, flush_work.work); struct snd_soc_component *component = rt1015->component; - unsigned int val, i = 0, count = 20; + unsigned int val, i = 0, count = 200; while (i < count) { usleep_range(1000, 1500); From 57a6ad482af256b2a13de14194fb8f67c1a65f10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Fri, 13 Nov 2020 01:20:27 +0100 Subject: [PATCH 460/710] regulator: fix memory leak with repeated set_machine_constraints() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixed commit introduced a possible second call to set_machine_constraints() and that allocates memory for rdev->constraints. Move the allocation to the caller so it's easier to manage and done once. Fixes: aea6cb99703e ("regulator: resolve supply after creating regulator") Cc: stable@vger.kernel.org Signed-off-by: Michał Mirosław Tested-by: Ahmad Fatoum # stpmic1 Link: https://lore.kernel.org/r/78c3d4016cebc08d441aad18cb924b4e4d9cf9df.1605226675.git.mirq-linux@rere.qmqm.pl Signed-off-by: Mark Brown --- drivers/regulator/core.c | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 70168f2a53ed..84dd0aea97c5 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -1315,7 +1315,6 @@ static int _regulator_do_enable(struct regulator_dev *rdev); /** * set_machine_constraints - sets regulator constraints * @rdev: regulator source - * @constraints: constraints to apply * * Allows platform initialisation code to define and constrain * regulator circuits e.g. valid voltage/current ranges, etc. NOTE: @@ -1323,21 +1322,11 @@ static int _regulator_do_enable(struct regulator_dev *rdev); * regulator operations to proceed i.e. set_voltage, set_current_limit, * set_mode. */ -static int set_machine_constraints(struct regulator_dev *rdev, - const struct regulation_constraints *constraints) +static int set_machine_constraints(struct regulator_dev *rdev) { int ret = 0; const struct regulator_ops *ops = rdev->desc->ops; - if (constraints) - rdev->constraints = kmemdup(constraints, sizeof(*constraints), - GFP_KERNEL); - else - rdev->constraints = kzalloc(sizeof(*constraints), - GFP_KERNEL); - if (!rdev->constraints) - return -ENOMEM; - ret = machine_constraints_voltage(rdev, rdev->constraints); if (ret != 0) return ret; @@ -5146,7 +5135,6 @@ struct regulator_dev * regulator_register(const struct regulator_desc *regulator_desc, const struct regulator_config *cfg) { - const struct regulation_constraints *constraints = NULL; const struct regulator_init_data *init_data; struct regulator_config *config = NULL; static atomic_t regulator_no = ATOMIC_INIT(-1); @@ -5285,14 +5273,23 @@ regulator_register(const struct regulator_desc *regulator_desc, /* set regulator constraints */ if (init_data) - constraints = &init_data->constraints; + rdev->constraints = kmemdup(&init_data->constraints, + sizeof(*rdev->constraints), + GFP_KERNEL); + else + rdev->constraints = kzalloc(sizeof(*rdev->constraints), + GFP_KERNEL); + if (!rdev->constraints) { + ret = -ENOMEM; + goto wash; + } if (init_data && init_data->supply_regulator) rdev->supply_name = init_data->supply_regulator; else if (regulator_desc->supply_name) rdev->supply_name = regulator_desc->supply_name; - ret = set_machine_constraints(rdev, constraints); + ret = set_machine_constraints(rdev); if (ret == -EPROBE_DEFER) { /* Regulator might be in bypass mode and so needs its supply * to set the constraints */ @@ -5301,7 +5298,7 @@ regulator_register(const struct regulator_desc *regulator_desc, * that is just being created */ ret = regulator_resolve_supply(rdev); if (!ret) - ret = set_machine_constraints(rdev, constraints); + ret = set_machine_constraints(rdev); else rdev_dbg(rdev, "unable to resolve supply early: %pe\n", ERR_PTR(ret)); From 4b639e254d3d4f15ee4ff2b890a447204cfbeea9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Fri, 13 Nov 2020 01:20:28 +0100 Subject: [PATCH 461/710] regulator: avoid resolve_supply() infinite recursion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a regulator's name equals its supply's name the regulator_resolve_supply() recurses indefinitely. Add a check so that debugging the problem is easier. The "fixed" commit just exposed the problem. Fixes: aea6cb99703e ("regulator: resolve supply after creating regulator") Cc: stable@vger.kernel.org Reported-by: Ahmad Fatoum Signed-off-by: Michał Mirosław Tested-by: Ahmad Fatoum # stpmic1 Link: https://lore.kernel.org/r/c6171057cfc0896f950c4d8cb82df0f9f1b89ad9.1605226675.git.mirq-linux@rere.qmqm.pl Signed-off-by: Mark Brown --- drivers/regulator/core.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 84dd0aea97c5..d3b96efa20fd 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -1841,6 +1841,12 @@ static int regulator_resolve_supply(struct regulator_dev *rdev) } } + if (r == rdev) { + dev_err(dev, "Supply for %s (%s) resolved to itself\n", + rdev->desc->name, rdev->supply_name); + return -EINVAL; + } + /* * If the supply's parent device is not the same as the * regulator's parent device, then ensure the parent device From f5c042b23f7429e5c2ac987b01a31c69059a978b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Fri, 13 Nov 2020 01:20:28 +0100 Subject: [PATCH 462/710] regulator: workaround self-referent regulators MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Workaround regulators whose supply name happens to be the same as its own name. This fixes boards that used to work before the early supply resolving was removed. The error message is left in place so that offending drivers can be detected. Fixes: aea6cb99703e ("regulator: resolve supply after creating regulator") Cc: stable@vger.kernel.org Reported-by: Ahmad Fatoum Signed-off-by: Michał Mirosław Tested-by: Ahmad Fatoum # stpmic1 Link: https://lore.kernel.org/r/d703acde2a93100c3c7a81059d716c50ad1b1f52.1605226675.git.mirq-linux@rere.qmqm.pl Signed-off-by: Mark Brown --- drivers/regulator/core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index d3b96efa20fd..42bbd99a36ac 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -1844,7 +1844,10 @@ static int regulator_resolve_supply(struct regulator_dev *rdev) if (r == rdev) { dev_err(dev, "Supply for %s (%s) resolved to itself\n", rdev->desc->name, rdev->supply_name); - return -EINVAL; + if (!have_full_constraints()) + return -EINVAL; + r = dummy_regulator_rdev; + get_device(&r->dev); } /* From 0e6371fbfba3a4f76489e6e97c1c7f8386ad5fd2 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Tue, 10 Nov 2020 15:05:47 +0300 Subject: [PATCH 463/710] usb: typec: ucsi: Report power supply changes When the ucsi power supply goes online/offline, and when the power levels change, the power supply class needs to be notified so it can inform the user space. Fixes: 992a60ed0d5e ("usb: typec: ucsi: register with power_supply class") Cc: stable@vger.kernel.org Reported-and-tested-by: Vladimir Yerilov Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/20201110120547.67922-1-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/psy.c | 9 +++++++++ drivers/usb/typec/ucsi/ucsi.c | 7 ++++++- drivers/usb/typec/ucsi/ucsi.h | 2 ++ 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/usb/typec/ucsi/psy.c b/drivers/usb/typec/ucsi/psy.c index 26ed0b520749..571a51e16234 100644 --- a/drivers/usb/typec/ucsi/psy.c +++ b/drivers/usb/typec/ucsi/psy.c @@ -238,4 +238,13 @@ void ucsi_unregister_port_psy(struct ucsi_connector *con) return; power_supply_unregister(con->psy); + con->psy = NULL; +} + +void ucsi_port_psy_changed(struct ucsi_connector *con) +{ + if (IS_ERR_OR_NULL(con->psy)) + return; + + power_supply_changed(con->psy); } diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index 758b988ac518..51a570d40a42 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -643,8 +643,10 @@ static void ucsi_handle_connector_change(struct work_struct *work) role = !!(con->status.flags & UCSI_CONSTAT_PWR_DIR); if (con->status.change & UCSI_CONSTAT_POWER_OPMODE_CHANGE || - con->status.change & UCSI_CONSTAT_POWER_LEVEL_CHANGE) + con->status.change & UCSI_CONSTAT_POWER_LEVEL_CHANGE) { ucsi_pwr_opmode_change(con); + ucsi_port_psy_changed(con); + } if (con->status.change & UCSI_CONSTAT_POWER_DIR_CHANGE) { typec_set_pwr_role(con->port, role); @@ -674,6 +676,8 @@ static void ucsi_handle_connector_change(struct work_struct *work) ucsi_register_partner(con); else ucsi_unregister_partner(con); + + ucsi_port_psy_changed(con); } if (con->status.change & UCSI_CONSTAT_CAM_CHANGE) { @@ -994,6 +998,7 @@ static int ucsi_register_port(struct ucsi *ucsi, int index) !!(con->status.flags & UCSI_CONSTAT_PWR_DIR)); ucsi_pwr_opmode_change(con); ucsi_register_partner(con); + ucsi_port_psy_changed(con); } if (con->partner) { diff --git a/drivers/usb/typec/ucsi/ucsi.h b/drivers/usb/typec/ucsi/ucsi.h index cba6f77bea61..b7a92f246050 100644 --- a/drivers/usb/typec/ucsi/ucsi.h +++ b/drivers/usb/typec/ucsi/ucsi.h @@ -340,9 +340,11 @@ int ucsi_resume(struct ucsi *ucsi); #if IS_ENABLED(CONFIG_POWER_SUPPLY) int ucsi_register_port_psy(struct ucsi_connector *con); void ucsi_unregister_port_psy(struct ucsi_connector *con); +void ucsi_port_psy_changed(struct ucsi_connector *con); #else static inline int ucsi_register_port_psy(struct ucsi_connector *con) { return 0; } static inline void ucsi_unregister_port_psy(struct ucsi_connector *con) { } +static inline void ucsi_port_psy_changed(struct ucsi_connector *con) { } #endif /* CONFIG_POWER_SUPPLY */ #if IS_ENABLED(CONFIG_TYPEC_DP_ALTMODE) From 4df694a477685a3df7b561bfe6393db073bf476c Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Wed, 11 Nov 2020 03:17:55 +0100 Subject: [PATCH 464/710] MAINTAINERS: add usb raw gadget entry Add myself (using the personal email address) as a reviewer for the USB Raw Gadget driver. Acked-by: Andrey Konovalov Signed-off-by: Andrey Konovalov Link: https://lore.kernel.org/r/245047b3fffaf5c0b791ed226d1ea272b2aef031.1605060950.git.andreyknvl@google.com Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 3da6d8c154e4..f572d525a172 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -18175,6 +18175,14 @@ L: linux-usb@vger.kernel.org S: Supported F: drivers/usb/class/usblp.c +USB RAW GADGET DRIVER +R: Andrey Konovalov +L: linux-usb@vger.kernel.org +S: Maintained +F: Documentation/usb/raw-gadget.rst +F: drivers/usb/gadget/legacy/raw_gadget.c +F: include/uapi/linux/usb/raw_gadget.h + USB QMI WWAN NETWORK DRIVER M: Bjørn Mork L: netdev@vger.kernel.org From 6d853c9e4104b4fc8d55dc9cd3b99712aa347174 Mon Sep 17 00:00:00 2001 From: Chris Brandt Date: Wed, 11 Nov 2020 08:12:09 -0500 Subject: [PATCH 465/710] usb: cdc-acm: Add DISABLE_ECHO for Renesas USB Download mode Renesas R-Car and RZ/G SoCs have a firmware download mode over USB. However, on reset a banner string is transmitted out which is not expected to be echoed back and will corrupt the protocol. Cc: stable Acked-by: Oliver Neukum Signed-off-by: Chris Brandt Link: https://lore.kernel.org/r/20201111131209.3977903-1-chris.brandt@renesas.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 1e7568867910..f52f1bc0559f 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -1693,6 +1693,15 @@ static const struct usb_device_id acm_ids[] = { { USB_DEVICE(0x0870, 0x0001), /* Metricom GS Modem */ .driver_info = NO_UNION_NORMAL, /* has no union descriptor */ }, + { USB_DEVICE(0x045b, 0x023c), /* Renesas USB Download mode */ + .driver_info = DISABLE_ECHO, /* Don't echo banner */ + }, + { USB_DEVICE(0x045b, 0x0248), /* Renesas USB Download mode */ + .driver_info = DISABLE_ECHO, /* Don't echo banner */ + }, + { USB_DEVICE(0x045b, 0x024D), /* Renesas USB Download mode */ + .driver_info = DISABLE_ECHO, /* Don't echo banner */ + }, { USB_DEVICE(0x0e8d, 0x0003), /* FIREFLY, MediaTek Inc; andrey.arapov@gmail.com */ .driver_info = NO_UNION_NORMAL, /* has no union descriptor */ }, From 60268b0e8258fdea9a3c9f4b51e161c123571db3 Mon Sep 17 00:00:00 2001 From: Naveen Krishna Chatradhi Date: Thu, 12 Nov 2020 22:51:59 +0530 Subject: [PATCH 466/710] hwmon: (amd_energy) modify the visibility of the counters This patch limits the visibility to owner and groups only for the energy counters exposed through the hwmon based amd_energy driver. Cc: stable@vger.kernel.org Reviewed-by: Greg Kroah-Hartman Signed-off-by: Naveen Krishna Chatradhi Link: https://lore.kernel.org/r/20201112172159.8781-1-nchatrad@amd.com Signed-off-by: Guenter Roeck --- drivers/hwmon/amd_energy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/amd_energy.c b/drivers/hwmon/amd_energy.c index d06597303d5a..3197cda7bcd9 100644 --- a/drivers/hwmon/amd_energy.c +++ b/drivers/hwmon/amd_energy.c @@ -171,7 +171,7 @@ static umode_t amd_energy_is_visible(const void *_data, enum hwmon_sensor_types type, u32 attr, int channel) { - return 0444; + return 0440; } static int energy_accumulator(void *p) From 3bbb73f8e60f505aced2ae820436cdacdbb19bca Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Wed, 11 Nov 2020 14:05:06 +0100 Subject: [PATCH 467/710] dt-bindings: can: fsl,flexcan.yaml: fix compatible for i.MX35 and i.MX53 As both the i.MX35 and i.MX53 flexcan IP cores are compatible to the i.MX25, they are listed as: compatible = "fsl,imx35-flexcan", "fsl,imx25-flexcan"; and: compatible = "fsl,imx53-flexcan", "fsl,imx25-flexcan"; in the SoC device trees. This patch fixes the following errors, which shows up during a dtbs_check: arch/arm/boot/dts/imx53-ard.dt.yaml: can@53fc8000: compatible: 'oneOf' conditional failed, one must be fixed: ['fsl,imx53-flexcan', 'fsl,imx25-flexcan'] is too long Additional items are not allowed ('fsl,imx25-flexcan' was unexpected) 'fsl,imx53-flexcan' is not one of ['fsl,imx7d-flexcan', 'fsl,imx6ul-flexcan', 'fsl,imx6sx-flexcan'] 'fsl,imx53-flexcan' is not one of ['fsl,ls1028ar1-flexcan'] 'fsl,imx6q-flexcan' was expected 'fsl,lx2160ar1-flexcan' was expected From schema: Documentation/devicetree/bindings/net/can/fsl,flexcan.yaml Fixes: e5ab9aa7e49b ("dt-bindings: can: flexcan: convert fsl,*flexcan bindings to yaml") Reported-by: Rob Herring Cc: Oleksij Rempel Signed-off-by: Marc Kleine-Budde Link: https://lore.kernel.org/r/20201111130507.1560881-4-mkl@pengutronix.de [robh: drop singular fsl,imx53-flexcan and fsl,imx35-flexcan] Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/net/can/fsl,flexcan.yaml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/net/can/fsl,flexcan.yaml b/Documentation/devicetree/bindings/net/can/fsl,flexcan.yaml index 3093c22e761c..13875eab2ed6 100644 --- a/Documentation/devicetree/bindings/net/can/fsl,flexcan.yaml +++ b/Documentation/devicetree/bindings/net/can/fsl,flexcan.yaml @@ -20,14 +20,17 @@ properties: - fsl,imx8qm-flexcan - fsl,imx8mp-flexcan - fsl,imx6q-flexcan - - fsl,imx53-flexcan - - fsl,imx35-flexcan - fsl,imx28-flexcan - fsl,imx25-flexcan - fsl,p1010-flexcan - fsl,vf610-flexcan - fsl,ls1021ar2-flexcan - fsl,lx2160ar1-flexcan + - items: + - enum: + - fsl,imx53-flexcan + - fsl,imx35-flexcan + - const: fsl,imx25-flexcan - items: - enum: - fsl,imx7d-flexcan From bdac39a3bd28891fb0ded91c9152459c57773462 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Wed, 11 Nov 2020 22:35:48 +0100 Subject: [PATCH 468/710] dt-bindings: clock: imx5: fix example Since commit: 0e030a373df3 ("can: flexcan: fix endianess detection") the fsl,imx53-flexcan isn't compatible with the fsl,p1010-flexcan any more. As the former accesses the IP core in Little Endian mode and the latter uses Big Endian mode. With the conversion of the flexcan DT bindings to yaml, the dt_binding_check this throws the following error: Documentation/devicetree/bindings/clock/imx5-clock.example.dt.yaml: can@53fc8000: compatible: 'oneOf' conditional failed, one must be fixed: ['fsl,imx53-flexcan', 'fsl,p1010-flexcan'] is too long Additional items are not allowed ('fsl,p1010-flexcan' was unexpected) 'fsl,imx53-flexcan' is not one of ['fsl,imx7d-flexcan', 'fsl,imx6ul-flexcan', 'fsl,imx6sx-flexcan'] 'fsl,imx53-flexcan' is not one of ['fsl,ls1028ar1-flexcan'] 'fsl,imx6q-flexcan' was expected 'fsl,lx2160ar1-flexcan' was expected From schema: /builds/robherring/linux-dt-bindings/Documentation/devicetree/bindings/net/can/fsl,flexcan.yaml The error is fixed by replacing the "fsl,p1010-flexcan" compatible (which turned out the be incompatible) with "fsl,imx25-flexcan" in the binding example. Reported-by: Rob Herring Cc: Fabio Estevam Signed-off-by: Marc Kleine-Budde Link: https://lore.kernel.org/r/20201111213548.1621094-1-mkl@pengutronix.de [robh: Add "fsl,imx25-flexcan" as fallback] Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/clock/imx5-clock.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/clock/imx5-clock.yaml b/Documentation/devicetree/bindings/clock/imx5-clock.yaml index 4d9e7c73dce9..90775c2669b8 100644 --- a/Documentation/devicetree/bindings/clock/imx5-clock.yaml +++ b/Documentation/devicetree/bindings/clock/imx5-clock.yaml @@ -57,7 +57,7 @@ examples: }; can@53fc8000 { - compatible = "fsl,imx53-flexcan", "fsl,p1010-flexcan"; + compatible = "fsl,imx53-flexcan", "fsl,imx25-flexcan"; reg = <0x53fc8000 0x4000>; interrupts = <82>; clocks = <&clks IMX5_CLK_CAN1_IPG_GATE>, <&clks IMX5_CLK_CAN1_SERIAL_GATE>; From 50431b45685b600fc2851a3f2b53e24643efe6d3 Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Fri, 13 Nov 2020 19:51:52 +0800 Subject: [PATCH 469/710] tools, bpftool: Add missing close before bpftool net attach exit progfd is created by prog_parse_fd() in do_attach() and before the latter returns in case of success, the file descriptor should be closed. Fixes: 04949ccc273e ("tools: bpftool: add net attach command to attach XDP on interface") Signed-off-by: Wang Hai Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20201113115152.53178-1-wanghai38@huawei.com --- tools/bpf/bpftool/net.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c index 910e7bac6e9e..3fae61ef6339 100644 --- a/tools/bpf/bpftool/net.c +++ b/tools/bpf/bpftool/net.c @@ -578,8 +578,8 @@ static int do_attach(int argc, char **argv) ifindex = net_parse_dev(&argc, &argv); if (ifindex < 1) { - close(progfd); - return -EINVAL; + err = -EINVAL; + goto cleanup; } if (argc) { @@ -587,8 +587,8 @@ static int do_attach(int argc, char **argv) overwrite = true; } else { p_err("expected 'overwrite', got: '%s'?", *argv); - close(progfd); - return -EINVAL; + err = -EINVAL; + goto cleanup; } } @@ -596,17 +596,17 @@ static int do_attach(int argc, char **argv) if (is_prefix("xdp", attach_type_strings[attach_type])) err = do_attach_detach_xdp(progfd, attach_type, ifindex, overwrite); - - if (err < 0) { + if (err) { p_err("interface %s attach failed: %s", attach_type_strings[attach_type], strerror(-err)); - return err; + goto cleanup; } if (json_output) jsonw_null(json_wtr); - - return 0; +cleanup: + close(progfd); + return err; } static int do_detach(int argc, char **argv) From 18db36a073db6377a52e22ec44eb0500f0a0ecc6 Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Fri, 6 Nov 2020 18:50:16 +0100 Subject: [PATCH 470/710] docs: ABI: testing: iio: stm32: remove re-introduced unsupported ABI Remove unsupported ABI that has been re-introduced due to a rebase hunk. This ABI has been moved in the past in commit b299d00420e2 ("IIO: stm32: Remove quadrature related functions from trigger driver") This also fixes a couple of warnings seen with: ./scripts/get_abi.pl validate 2>&1|grep iio Fixes: 34433332841d ("docs: ABI: testing: make the files compatible with ReST output") Acked-by: Jonathan Cameron Signed-off-by: Fabrice Gasnier Link: https://lore.kernel.org/r/1604685016-2434-1-git-send-email-fabrice.gasnier@st.com Signed-off-by: Greg Kroah-Hartman --- .../ABI/testing/sysfs-bus-iio-timer-stm32 | 24 ------------------- 1 file changed, 24 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-bus-iio-timer-stm32 b/Documentation/ABI/testing/sysfs-bus-iio-timer-stm32 index a10a4de3e5fe..c4a4497c249a 100644 --- a/Documentation/ABI/testing/sysfs-bus-iio-timer-stm32 +++ b/Documentation/ABI/testing/sysfs-bus-iio-timer-stm32 @@ -109,30 +109,6 @@ Description: When counting down the counter start from preset value and fire event when reach 0. -What: /sys/bus/iio/devices/iio:deviceX/in_count_quadrature_mode_available -KernelVersion: 4.12 -Contact: benjamin.gaignard@st.com -Description: - Reading returns the list possible quadrature modes. - -What: /sys/bus/iio/devices/iio:deviceX/in_count0_quadrature_mode -KernelVersion: 4.12 -Contact: benjamin.gaignard@st.com -Description: - Configure the device counter quadrature modes: - - channel_A: - Encoder A input servers as the count input and B as - the UP/DOWN direction control input. - - channel_B: - Encoder B input serves as the count input and A as - the UP/DOWN direction control input. - - quadrature: - Encoder A and B inputs are mixed to get direction - and count with a scale of 0.25. - What: /sys/bus/iio/devices/iio:deviceX/in_count_enable_mode_available KernelVersion: 4.12 Contact: benjamin.gaignard@st.com From 02a9c6ee4183af2e438454c55098b828a96085fb Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 13 Nov 2020 13:12:41 +0300 Subject: [PATCH 471/710] ALSA: firewire: Clean up a locking issue in copy_resp_to_buf() The spin_lock/unlock_irq() functions cannot be nested. The problem is that presumably we would want the IRQs to be re-enabled on the second call the spin_unlock_irq() but instead it will be enabled at the first call so IRQs will be enabled earlier than expected. In this situation the copy_resp_to_buf() function is only called from one function and it is called with IRQs disabled. We can just use the regular spin_lock/unlock() functions. Fixes: 555e8a8f7f14 ("ALSA: fireworks: Add command/response functionality into hwdep interface") Signed-off-by: Dan Carpenter Acked-by: Takashi Sakamoto Cc: Link: https://lore.kernel.org/r/20201113101241.GB168908@mwanda Signed-off-by: Takashi Iwai --- sound/firewire/fireworks/fireworks_transaction.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/firewire/fireworks/fireworks_transaction.c b/sound/firewire/fireworks/fireworks_transaction.c index 0f533f5bd960..9f8c53b39f95 100644 --- a/sound/firewire/fireworks/fireworks_transaction.c +++ b/sound/firewire/fireworks/fireworks_transaction.c @@ -123,7 +123,7 @@ copy_resp_to_buf(struct snd_efw *efw, void *data, size_t length, int *rcode) t = (struct snd_efw_transaction *)data; length = min_t(size_t, be32_to_cpu(t->length) * sizeof(u32), length); - spin_lock_irq(&efw->lock); + spin_lock(&efw->lock); if (efw->push_ptr < efw->pull_ptr) capacity = (unsigned int)(efw->pull_ptr - efw->push_ptr); @@ -190,7 +190,7 @@ handle_resp_for_user(struct fw_card *card, int generation, int source, copy_resp_to_buf(efw, data, length, rcode); end: - spin_unlock_irq(&instances_lock); + spin_unlock(&instances_lock); } static void From d853b3406903a7dc5b14eb5bada3e8cd677f66a2 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 13 Nov 2020 11:07:02 -0700 Subject: [PATCH 472/710] spi: bcm2835aux: Restore err assignment in bcm2835aux_spi_probe Clang warns: drivers/spi/spi-bcm2835aux.c:532:50: warning: variable 'err' is uninitialized when used here [-Wuninitialized] dev_err(&pdev->dev, "could not get clk: %d\n", err); ^~~ ./include/linux/dev_printk.h:112:32: note: expanded from macro 'dev_err' _dev_err(dev, dev_fmt(fmt), ##__VA_ARGS__) ^~~~~~~~~~~ drivers/spi/spi-bcm2835aux.c:495:9: note: initialize the variable 'err' to silence this warning int err; ^ = 0 1 warning generated. Restore the assignment so that the error value can be used in the dev_err statement and there is no uninitialized memory being leaked. Fixes: e13ee6cc4781 ("spi: bcm2835aux: Fix use-after-free on unbind") Link: https://github.com/ClangBuiltLinux/linux/issues/1199 Signed-off-by: Nathan Chancellor Link: https://lore.kernel.org/r/20201113180701.455541-1-natechancellor@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-bcm2835aux.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-bcm2835aux.c b/drivers/spi/spi-bcm2835aux.c index fd58547110e6..1a26865c42f8 100644 --- a/drivers/spi/spi-bcm2835aux.c +++ b/drivers/spi/spi-bcm2835aux.c @@ -529,8 +529,9 @@ static int bcm2835aux_spi_probe(struct platform_device *pdev) bs->clk = devm_clk_get(&pdev->dev, NULL); if (IS_ERR(bs->clk)) { + err = PTR_ERR(bs->clk); dev_err(&pdev->dev, "could not get clk: %d\n", err); - return PTR_ERR(bs->clk); + return err; } bs->irq = platform_get_irq(pdev, 0); From 9f16a66733c90b5f33f624b0b0e36a345b0aaf93 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 13 Nov 2020 21:44:48 +0800 Subject: [PATCH 473/710] block: mark flush request as IDLE when it is really finished For avoiding use-after-free on flush request, we call its .end_io() from both timeout code path and __blk_mq_end_request(). When flush request's ref doesn't drop to zero, it is still used, we can't mark it as IDLE, so fix it by marking IDLE when its refcount drops to zero really. Fixes: 65ff5cd04551 ("blk-mq: mark flush request as IDLE in flush_end_io()") Signed-off-by: Ming Lei Cc: Yi Zhang Signed-off-by: Jens Axboe --- block/blk-flush.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/block/blk-flush.c b/block/blk-flush.c index e32958f0b687..fd5cee9f1a3b 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -225,13 +225,18 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error) /* release the tag's ownership to the req cloned from */ spin_lock_irqsave(&fq->mq_flush_lock, flags); - WRITE_ONCE(flush_rq->state, MQ_RQ_IDLE); if (!refcount_dec_and_test(&flush_rq->ref)) { fq->rq_status = error; spin_unlock_irqrestore(&fq->mq_flush_lock, flags); return; } + /* + * Flush request has to be marked as IDLE when it is really ended + * because its .end_io() is called from timeout code path too for + * avoiding use-after-free. + */ + WRITE_ONCE(flush_rq->state, MQ_RQ_IDLE); if (fq->rq_status != BLK_STS_OK) error = fq->rq_status; From 2b5668733050fca85f0ab458c5b91732f9496a38 Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Thu, 12 Nov 2020 13:15:46 +0200 Subject: [PATCH 474/710] net: ethernet: ti: cpsw: fix cpts irq after suspend Depending on the SoC/platform the CPSW can completely lose context after a suspend/resume cycle, including CPSW wrapper (WR) which will cause reset of WR_C0_MISC_EN register, so CPTS IRQ will became disabled. Fix it by moving CPTS IRQ enabling in cpsw_ndo_open() where CPTS is actually started. Fixes: 84ea9c0a95d7 ("net: ethernet: ti: cpsw: enable cpts irq") Reported-by: Tony Lindgren Signed-off-by: Grygorii Strashko Tested-by: Tony Lindgren Link: https://lore.kernel.org/r/20201112111546.20343-1-grygorii.strashko@ti.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/cpsw.c | 10 ++++++---- drivers/net/ethernet/ti/cpsw_new.c | 9 ++++++--- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 9fd1f77190ad..fa2d1025cbb2 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -838,9 +838,12 @@ static int cpsw_ndo_open(struct net_device *ndev) if (ret < 0) goto err_cleanup; - if (cpts_register(cpsw->cpts)) - dev_err(priv->dev, "error registering cpts device\n"); - + if (cpsw->cpts) { + if (cpts_register(cpsw->cpts)) + dev_err(priv->dev, "error registering cpts device\n"); + else + writel(0x10, &cpsw->wr_regs->misc_en); + } } cpsw_restore(priv); @@ -1716,7 +1719,6 @@ static int cpsw_probe(struct platform_device *pdev) /* Enable misc CPTS evnt_pend IRQ */ cpts_set_irqpoll(cpsw->cpts, false); - writel(0x10, &cpsw->wr_regs->misc_en); skip_cpts: cpsw_notice(priv, probe, diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c index f779d2e1b5c5..2f5e0ad23ad7 100644 --- a/drivers/net/ethernet/ti/cpsw_new.c +++ b/drivers/net/ethernet/ti/cpsw_new.c @@ -873,8 +873,12 @@ static int cpsw_ndo_open(struct net_device *ndev) if (ret < 0) goto err_cleanup; - if (cpts_register(cpsw->cpts)) - dev_err(priv->dev, "error registering cpts device\n"); + if (cpsw->cpts) { + if (cpts_register(cpsw->cpts)) + dev_err(priv->dev, "error registering cpts device\n"); + else + writel(0x10, &cpsw->wr_regs->misc_en); + } napi_enable(&cpsw->napi_rx); napi_enable(&cpsw->napi_tx); @@ -2006,7 +2010,6 @@ static int cpsw_probe(struct platform_device *pdev) /* Enable misc CPTS evnt_pend IRQ */ cpts_set_irqpoll(cpsw->cpts, false); - writel(0x10, &cpsw->wr_regs->misc_en); skip_cpts: ret = cpsw_register_notifiers(cpsw); From 8cf8821e15cd553339a5b48ee555a0439c2b2742 Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Thu, 12 Nov 2020 20:58:15 -0500 Subject: [PATCH 475/710] net: Exempt multicast addresses from five-second neighbor lifetime Commit 58956317c8de ("neighbor: Improve garbage collection") guarantees neighbour table entries a five-second lifetime. Processes which make heavy use of multicast can fill the neighour table with multicast addresses in five seconds. At that point, neighbour entries can't be GC-ed because they aren't five seconds old yet, the kernel log starts to fill up with "neighbor table overflow!" messages, and sends start to fail. This patch allows multicast addresses to be thrown out before they've lived out their five seconds. This makes room for non-multicast addresses and makes messages to all addresses more reliable in these circumstances. Fixes: 58956317c8de ("neighbor: Improve garbage collection") Signed-off-by: Jeff Dike Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20201113015815.31397-1-jdike@akamai.com Signed-off-by: Jakub Kicinski --- include/net/neighbour.h | 1 + net/core/neighbour.c | 2 ++ net/ipv4/arp.c | 6 ++++++ net/ipv6/ndisc.c | 7 +++++++ 4 files changed, 16 insertions(+) diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 81ee17594c32..22ced1381ede 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -204,6 +204,7 @@ struct neigh_table { int (*pconstructor)(struct pneigh_entry *); void (*pdestructor)(struct pneigh_entry *); void (*proxy_redo)(struct sk_buff *skb); + int (*is_multicast)(const void *pkey); bool (*allow_add)(const struct net_device *dev, struct netlink_ext_ack *extack); char *id; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 8e39e28b0a8d..9500d28a43b0 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -235,6 +235,8 @@ static int neigh_forced_gc(struct neigh_table *tbl) write_lock(&n->lock); if ((n->nud_state == NUD_FAILED) || + (tbl->is_multicast && + tbl->is_multicast(n->primary_key)) || time_after(tref, n->updated)) remove = true; write_unlock(&n->lock); diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 687971d83b4e..922dd73e5740 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -125,6 +125,7 @@ static int arp_constructor(struct neighbour *neigh); static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb); static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb); static void parp_redo(struct sk_buff *skb); +static int arp_is_multicast(const void *pkey); static const struct neigh_ops arp_generic_ops = { .family = AF_INET, @@ -156,6 +157,7 @@ struct neigh_table arp_tbl = { .key_eq = arp_key_eq, .constructor = arp_constructor, .proxy_redo = parp_redo, + .is_multicast = arp_is_multicast, .id = "arp_cache", .parms = { .tbl = &arp_tbl, @@ -928,6 +930,10 @@ static void parp_redo(struct sk_buff *skb) arp_process(dev_net(skb->dev), NULL, skb); } +static int arp_is_multicast(const void *pkey) +{ + return ipv4_is_multicast(*((__be32 *)pkey)); +} /* * Receive an arp request from the device layer. diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 27f29b957ee7..76717478f173 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -81,6 +81,7 @@ static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb); static int pndisc_constructor(struct pneigh_entry *n); static void pndisc_destructor(struct pneigh_entry *n); static void pndisc_redo(struct sk_buff *skb); +static int ndisc_is_multicast(const void *pkey); static const struct neigh_ops ndisc_generic_ops = { .family = AF_INET6, @@ -115,6 +116,7 @@ struct neigh_table nd_tbl = { .pconstructor = pndisc_constructor, .pdestructor = pndisc_destructor, .proxy_redo = pndisc_redo, + .is_multicast = ndisc_is_multicast, .allow_add = ndisc_allow_add, .id = "ndisc_cache", .parms = { @@ -1706,6 +1708,11 @@ static void pndisc_redo(struct sk_buff *skb) kfree_skb(skb); } +static int ndisc_is_multicast(const void *pkey) +{ + return ipv6_addr_is_multicast((struct in6_addr *)pkey); +} + static bool ndisc_suppress_frag_ndisc(struct sk_buff *skb) { struct inet6_dev *idev = __in6_dev_get(skb->dev); From aa6306a8481e0223f3783d24045daea80897238e Mon Sep 17 00:00:00 2001 From: Steen Hegelund Date: Fri, 13 Nov 2020 10:11:16 +0100 Subject: [PATCH 476/710] net: phy: mscc: remove non-MACSec compatible phy Selecting VSC8575 as a MACSec PHY was not correct The relevant datasheet can be found here: - VSC8575: https://www.microchip.com/wwwproducts/en/VSC8575 History: v1 -> v2: - Corrected the sha in the "Fixes:" tag Fixes: 1bbe0ecc2a1a ("net: phy: mscc: macsec initialization") Signed-off-by: Steen Hegelund Reviewed-by: Antoine Tenart Link: https://lore.kernel.org/r/20201113091116.1102450-1-steen.hegelund@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/mscc/mscc_macsec.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/phy/mscc/mscc_macsec.c b/drivers/net/phy/mscc/mscc_macsec.c index 6cf9b798b710..10be266e48e8 100644 --- a/drivers/net/phy/mscc/mscc_macsec.c +++ b/drivers/net/phy/mscc/mscc_macsec.c @@ -981,7 +981,6 @@ int vsc8584_macsec_init(struct phy_device *phydev) switch (phydev->phy_id & phydev->drv->phy_id_mask) { case PHY_ID_VSC856X: - case PHY_ID_VSC8575: case PHY_ID_VSC8582: case PHY_ID_VSC8584: INIT_LIST_HEAD(&vsc8531->macsec_flows); From 8c07205aea36ccebe9fc5f97287a8bc416cea197 Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Fri, 13 Nov 2020 19:32:36 +0800 Subject: [PATCH 477/710] net: marvell: prestera: fix error return code in prestera_pci_probe() Fix to return a negative error code from the error handling case instead of 0, as done elsewhere in this function. Fixes: 4c2703dfd7fa ("net: marvell: prestera: Add PCI interface support") Reported-by: Hulk Robot Signed-off-by: Wang Hai Reviewed-by: Vadym Kochan Acked-by: Vadym Kochan Link: https://lore.kernel.org/r/20201113113236.71678-1-wanghai38@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/prestera/prestera_pci.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/prestera/prestera_pci.c b/drivers/net/ethernet/marvell/prestera/prestera_pci.c index 1b97adae542e..be5677623455 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_pci.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_pci.c @@ -676,7 +676,8 @@ static int prestera_pci_probe(struct pci_dev *pdev, if (err) return err; - if (dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(30))) { + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(30)); + if (err) { dev_err(&pdev->dev, "fail to set DMA mask\n"); goto err_dma_mask; } @@ -702,8 +703,10 @@ static int prestera_pci_probe(struct pci_dev *pdev, dev_info(fw->dev.dev, "Prestera FW is ready\n"); fw->wq = alloc_workqueue("prestera_fw_wq", WQ_HIGHPRI, 1); - if (!fw->wq) + if (!fw->wq) { + err = -ENOMEM; goto err_wq_alloc; + } INIT_WORK(&fw->evt_work, prestera_fw_evt_work_fn); From 81e329e93b860b31c216b40eb5e1373db0ffe0ba Mon Sep 17 00:00:00 2001 From: Wang Qing Date: Thu, 12 Nov 2020 18:45:41 +0200 Subject: [PATCH 478/710] net: ethernet: ti: am65-cpts: update ret when ptp_clock is ERROR We always have to update the value of ret, otherwise the error value may be the previous one. Fixes: f6bd59526ca5 ("net: ethernet: ti: introduce am654 common platform time sync driver") Signed-off-by: Wang Qing [grygorii.strashko@ti.com: fix build warn, subj add fixes tag] Signed-off-by: Grygorii Strashko Acked-by: Richard Cochran Link: https://lore.kernel.org/r/20201112164541.3223-1-grygorii.strashko@ti.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/am65-cpts.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ti/am65-cpts.c b/drivers/net/ethernet/ti/am65-cpts.c index 75056c14b161..5dc60ecabe56 100644 --- a/drivers/net/ethernet/ti/am65-cpts.c +++ b/drivers/net/ethernet/ti/am65-cpts.c @@ -1001,8 +1001,7 @@ struct am65_cpts *am65_cpts_create(struct device *dev, void __iomem *regs, if (IS_ERR_OR_NULL(cpts->ptp_clock)) { dev_err(dev, "Failed to register ptp clk %ld\n", PTR_ERR(cpts->ptp_clock)); - if (!cpts->ptp_clock) - ret = -ENODEV; + ret = cpts->ptp_clock ? PTR_ERR(cpts->ptp_clock) : -ENODEV; goto refclk_disable; } cpts->phc_index = ptp_clock_index(cpts->ptp_clock); From 8d4c3e76e3be11a64df95ddee52e99092d42fc19 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 13 Nov 2020 16:47:52 -0700 Subject: [PATCH 479/710] proc: don't allow async path resolution of /proc/self components If this is attempted by a kthread, then return -EOPNOTSUPP as we don't currently support that. Once we can get task_pid_ptr() doing the right thing, then this can go away again. Signed-off-by: Jens Axboe --- fs/proc/self.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/proc/self.c b/fs/proc/self.c index 72cd69bcaf4a..cc71ce3466dc 100644 --- a/fs/proc/self.c +++ b/fs/proc/self.c @@ -16,6 +16,13 @@ static const char *proc_self_get_link(struct dentry *dentry, pid_t tgid = task_tgid_nr_ns(current, ns); char *name; + /* + * Not currently supported. Once we can inherit all of struct pid, + * we can allow this. + */ + if (current->flags & PF_KTHREAD) + return ERR_PTR(-EOPNOTSUPP); + if (!tgid) return ERR_PTR(-ENOENT); /* max length of unsigned int in decimal + NULL term */ From 9c2e14b48119b39446031d29d994044ae958d8fc Mon Sep 17 00:00:00 2001 From: Yi-Hung Wei Date: Tue, 10 Nov 2020 16:16:40 -0800 Subject: [PATCH 480/710] ip_tunnels: Set tunnel option flag when tunnel metadata is present Currently, we may set the tunnel option flag when the size of metadata is zero. For example, we set TUNNEL_GENEVE_OPT in the receive function no matter the geneve option is present or not. As this may result in issues on the tunnel flags consumers, this patch fixes the issue. Related discussion: * https://lore.kernel.org/netdev/1604448694-19351-1-git-send-email-yihung.wei@gmail.com/T/#u Fixes: 256c87c17c53 ("net: check tunnel option type in tunnel flags") Signed-off-by: Yi-Hung Wei Link: https://lore.kernel.org/r/1605053800-74072-1-git-send-email-yihung.wei@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/geneve.c | 3 +-- include/net/ip_tunnels.h | 7 ++++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index d07008a818df..1426bfc009bc 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -224,8 +224,7 @@ static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs, if (ip_tunnel_collect_metadata() || gs->collect_md) { __be16 flags; - flags = TUNNEL_KEY | TUNNEL_GENEVE_OPT | - (gnvh->oam ? TUNNEL_OAM : 0) | + flags = TUNNEL_KEY | (gnvh->oam ? TUNNEL_OAM : 0) | (gnvh->critical ? TUNNEL_CRIT_OPT : 0); tun_dst = udp_tun_rx_dst(skb, geneve_get_sk_family(gs), flags, diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 02ccd32542d0..61620677b034 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -478,9 +478,11 @@ static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info, const void *from, int len, __be16 flags) { - memcpy(ip_tunnel_info_opts(info), from, len); info->options_len = len; - info->key.tun_flags |= flags; + if (len > 0) { + memcpy(ip_tunnel_info_opts(info), from, len); + info->key.tun_flags |= flags; + } } static inline struct ip_tunnel_info *lwt_tun_info(struct lwtunnel_state *lwtstate) @@ -526,7 +528,6 @@ static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info, __be16 flags) { info->options_len = 0; - info->key.tun_flags |= flags; } #endif /* CONFIG_INET */ From ceb736e1d45c253f5e86b185ca9b497cdd43063f Mon Sep 17 00:00:00 2001 From: Zhang Qilong Date: Thu, 12 Nov 2020 16:09:50 +0800 Subject: [PATCH 481/710] ipv6: Fix error path to cancel the meseage genlmsg_cancel() needs to be called in the error path of inet6_fill_ifmcaddr and inet6_fill_ifacaddr to cancel the message. Fixes: 6ecf4c37eb3e ("ipv6: enable IFA_TARGET_NETNSID for RTM_GETADDR") Reported-by: Hulk Robot Signed-off-by: Zhang Qilong Link: https://lore.kernel.org/r/20201112080950.1476302-1-zhangqilong3@huawei.com Signed-off-by: Jakub Kicinski --- net/ipv6/addrconf.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 01146b66d666..8b6eb384bac7 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5022,8 +5022,10 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, return -EMSGSIZE; if (args->netnsid >= 0 && - nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid)) + nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid)) { + nlmsg_cancel(skb, nlh); return -EMSGSIZE; + } put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex); if (nla_put_in6_addr(skb, IFA_MULTICAST, &ifmca->mca_addr) < 0 || @@ -5054,8 +5056,10 @@ static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca, return -EMSGSIZE; if (args->netnsid >= 0 && - nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid)) + nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid)) { + nlmsg_cancel(skb, nlh); return -EMSGSIZE; + } put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex); if (nla_put_in6_addr(skb, IFA_ANYCAST, &ifaca->aca_addr) < 0 || From 630f512280604eecae0ddc2b3f8402f7931c56fd Mon Sep 17 00:00:00 2001 From: Alexander Kapshuk Date: Tue, 13 Oct 2020 15:47:25 +0300 Subject: [PATCH 482/710] drm/nouveau/kms: Fix NULL pointer dereference in nouveau_connector_detect_depth This oops manifests itself on the following hardware: 01:00.0 VGA compatible controller: NVIDIA Corporation G98M [GeForce G 103M] (rev a1) Oct 09 14:17:46 lp-sasha kernel: BUG: kernel NULL pointer dereference, address: 0000000000000000 Oct 09 14:17:46 lp-sasha kernel: #PF: supervisor read access in kernel mode Oct 09 14:17:46 lp-sasha kernel: #PF: error_code(0x0000) - not-present page Oct 09 14:17:46 lp-sasha kernel: PGD 0 P4D 0 Oct 09 14:17:46 lp-sasha kernel: Oops: 0000 [#1] SMP PTI Oct 09 14:17:46 lp-sasha kernel: CPU: 1 PID: 191 Comm: systemd-udevd Not tainted 5.9.0-rc8-next-20201009 #38 Oct 09 14:17:46 lp-sasha kernel: Hardware name: Hewlett-Packard Compaq Presario CQ61 Notebook PC/306A, BIOS F.03 03/23/2009 Oct 09 14:17:46 lp-sasha kernel: RIP: 0010:nouveau_connector_detect_depth+0x71/0xc0 [nouveau] Oct 09 14:17:46 lp-sasha kernel: Code: 0a 00 00 48 8b 49 48 c7 87 b8 00 00 00 06 00 00 00 80 b9 4d 0a 00 00 00 75 1e 83 fa 41 75 05 48 85 c0 75 29 8b 81 10 0d 00 00 <39> 06 7c 25 f6 81 14 0d 00 00 02 75 b7 c3 80 b9 0c 0d 00 00 00 75 Oct 09 14:17:46 lp-sasha kernel: RSP: 0018:ffffc9000028f8c0 EFLAGS: 00010297 Oct 09 14:17:46 lp-sasha kernel: RAX: 0000000000014c08 RBX: ffff8880369d4000 RCX: ffff8880369d3000 Oct 09 14:17:46 lp-sasha kernel: RDX: 0000000000000040 RSI: 0000000000000000 RDI: ffff8880369d4000 Oct 09 14:17:46 lp-sasha kernel: RBP: ffff88800601cc00 R08: ffff8880051da298 R09: ffffffff8226201a Oct 09 14:17:46 lp-sasha kernel: R10: ffff88800469aa80 R11: ffff888004c84ff8 R12: 0000000000000000 Oct 09 14:17:46 lp-sasha kernel: R13: ffff8880051da000 R14: 0000000000002000 R15: 0000000000000003 Oct 09 14:17:46 lp-sasha kernel: FS: 00007fd0192b3440(0000) GS:ffff8880bc900000(0000) knlGS:0000000000000000 Oct 09 14:17:46 lp-sasha kernel: CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 Oct 09 14:17:46 lp-sasha kernel: CR2: 0000000000000000 CR3: 0000000004976000 CR4: 00000000000006e0 Oct 09 14:17:46 lp-sasha kernel: Call Trace: Oct 09 14:17:46 lp-sasha kernel: nouveau_connector_get_modes+0x1e6/0x240 [nouveau] Oct 09 14:17:46 lp-sasha kernel: ? kfree+0xb9/0x240 Oct 09 14:17:46 lp-sasha kernel: ? drm_connector_list_iter_next+0x7c/0xa0 Oct 09 14:17:46 lp-sasha kernel: drm_helper_probe_single_connector_modes+0x1ba/0x7c0 Oct 09 14:17:46 lp-sasha kernel: drm_client_modeset_probe+0x27e/0x1360 Oct 09 14:17:46 lp-sasha kernel: ? nvif_object_sclass_put+0xc/0x20 [nouveau] Oct 09 14:17:46 lp-sasha kernel: ? nouveau_cli_init+0x3cc/0x440 [nouveau] Oct 09 14:17:46 lp-sasha kernel: ? ktime_get_mono_fast_ns+0x49/0xa0 Oct 09 14:17:46 lp-sasha kernel: ? nouveau_drm_open+0x4e/0x180 [nouveau] Oct 09 14:17:46 lp-sasha kernel: __drm_fb_helper_initial_config_and_unlock+0x3f/0x4a0 Oct 09 14:17:46 lp-sasha kernel: ? drm_file_alloc+0x18f/0x260 Oct 09 14:17:46 lp-sasha kernel: ? mutex_lock+0x9/0x40 Oct 09 14:17:46 lp-sasha kernel: ? drm_client_init+0x110/0x160 Oct 09 14:17:46 lp-sasha kernel: nouveau_fbcon_init+0x14d/0x1c0 [nouveau] Oct 09 14:17:46 lp-sasha kernel: nouveau_drm_device_init+0x1c0/0x880 [nouveau] Oct 09 14:17:46 lp-sasha kernel: nouveau_drm_probe+0x11a/0x1e0 [nouveau] Oct 09 14:17:46 lp-sasha kernel: pci_device_probe+0xcd/0x140 Oct 09 14:17:46 lp-sasha kernel: really_probe+0xd8/0x400 Oct 09 14:17:46 lp-sasha kernel: driver_probe_device+0x4a/0xa0 Oct 09 14:17:46 lp-sasha kernel: device_driver_attach+0x9c/0xc0 Oct 09 14:17:46 lp-sasha kernel: __driver_attach+0x6f/0x100 Oct 09 14:17:46 lp-sasha kernel: ? device_driver_attach+0xc0/0xc0 Oct 09 14:17:46 lp-sasha kernel: bus_for_each_dev+0x75/0xc0 Oct 09 14:17:46 lp-sasha kernel: bus_add_driver+0x106/0x1c0 Oct 09 14:17:46 lp-sasha kernel: driver_register+0x86/0xe0 Oct 09 14:17:46 lp-sasha kernel: ? 0xffffffffa044e000 Oct 09 14:17:46 lp-sasha kernel: do_one_initcall+0x48/0x1e0 Oct 09 14:17:46 lp-sasha kernel: ? _cond_resched+0x11/0x60 Oct 09 14:17:46 lp-sasha kernel: ? kmem_cache_alloc_trace+0x19c/0x1e0 Oct 09 14:17:46 lp-sasha kernel: do_init_module+0x57/0x220 Oct 09 14:17:46 lp-sasha kernel: __do_sys_finit_module+0xa0/0xe0 Oct 09 14:17:46 lp-sasha kernel: do_syscall_64+0x33/0x40 Oct 09 14:17:46 lp-sasha kernel: entry_SYSCALL_64_after_hwframe+0x44/0xa9 Oct 09 14:17:46 lp-sasha kernel: RIP: 0033:0x7fd01a060d5d Oct 09 14:17:46 lp-sasha kernel: Code: 00 c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d e3 70 0c 00 f7 d8 64 89 01 48 Oct 09 14:17:46 lp-sasha kernel: RSP: 002b:00007ffc8ad38a98 EFLAGS: 00000246 ORIG_RAX: 0000000000000139 Oct 09 14:17:46 lp-sasha kernel: RAX: ffffffffffffffda RBX: 0000563f6e7fd530 RCX: 00007fd01a060d5d Oct 09 14:17:46 lp-sasha kernel: RDX: 0000000000000000 RSI: 00007fd01a19f95d RDI: 000000000000000f Oct 09 14:17:46 lp-sasha kernel: RBP: 0000000000020000 R08: 0000000000000000 R09: 0000000000000007 Oct 09 14:17:46 lp-sasha kernel: R10: 000000000000000f R11: 0000000000000246 R12: 00007fd01a19f95d Oct 09 14:17:46 lp-sasha kernel: R13: 0000000000000000 R14: 0000563f6e7fbc10 R15: 0000563f6e7fd530 Oct 09 14:17:46 lp-sasha kernel: Modules linked in: nouveau(+) ttm xt_string xt_mark xt_LOG vgem v4l2_dv_timings uvcvideo ulpi udf ts_kmp ts_fsm ts_bm snd_aloop sil164 qat_dh895xccvf nf_nat_sip nf_nat_irc nf_nat_ftp nf_nat nf_log_ipv6 nf_log_ipv4 nf_log_common ltc2990 lcd intel_qat input_leds i2c_mux gspca_main videobuf2_vmalloc videobuf2_memops videobuf2_v4l2 videobuf2_common videodev mc drivetemp cuse fuse crc_itu_t coretemp ch7006 ath5k ath algif_hash Oct 09 14:17:46 lp-sasha kernel: CR2: 0000000000000000 Oct 09 14:17:46 lp-sasha kernel: ---[ end trace 0ddafe218ad30017 ]--- Oct 09 14:17:46 lp-sasha kernel: RIP: 0010:nouveau_connector_detect_depth+0x71/0xc0 [nouveau] Oct 09 14:17:46 lp-sasha kernel: Code: 0a 00 00 48 8b 49 48 c7 87 b8 00 00 00 06 00 00 00 80 b9 4d 0a 00 00 00 75 1e 83 fa 41 75 05 48 85 c0 75 29 8b 81 10 0d 00 00 <39> 06 7c 25 f6 81 14 0d 00 00 02 75 b7 c3 80 b9 0c 0d 00 00 00 75 Oct 09 14:17:46 lp-sasha kernel: RSP: 0018:ffffc9000028f8c0 EFLAGS: 00010297 Oct 09 14:17:46 lp-sasha kernel: RAX: 0000000000014c08 RBX: ffff8880369d4000 RCX: ffff8880369d3000 Oct 09 14:17:46 lp-sasha kernel: RDX: 0000000000000040 RSI: 0000000000000000 RDI: ffff8880369d4000 Oct 09 14:17:46 lp-sasha kernel: RBP: ffff88800601cc00 R08: ffff8880051da298 R09: ffffffff8226201a Oct 09 14:17:46 lp-sasha kernel: R10: ffff88800469aa80 R11: ffff888004c84ff8 R12: 0000000000000000 Oct 09 14:17:46 lp-sasha kernel: R13: ffff8880051da000 R14: 0000000000002000 R15: 0000000000000003 Oct 09 14:17:46 lp-sasha kernel: FS: 00007fd0192b3440(0000) GS:ffff8880bc900000(0000) knlGS:0000000000000000 Oct 09 14:17:46 lp-sasha kernel: CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 Oct 09 14:17:46 lp-sasha kernel: CR2: 0000000000000000 CR3: 0000000004976000 CR4: 00000000000006e0 The disassembly: Code: 0a 00 00 48 8b 49 48 c7 87 b8 00 00 00 06 00 00 00 80 b9 4d 0a 00 00 00 75 1e 83 fa 41 75 05 48 85 c0 75 29 8b 81 10 0d 00 00 <39> 06 7c 25 f6 81 14 0d 00 00 02 75 b7 c3 80 b9 0c 0d 00 00 00 75 All code ======== 0: 0a 00 or (%rax),%al 2: 00 48 8b add %cl,-0x75(%rax) 5: 49 rex.WB 6: 48 c7 87 b8 00 00 00 movq $0x6,0xb8(%rdi) d: 06 00 00 00 11: 80 b9 4d 0a 00 00 00 cmpb $0x0,0xa4d(%rcx) 18: 75 1e jne 0x38 1a: 83 fa 41 cmp $0x41,%edx 1d: 75 05 jne 0x24 1f: 48 85 c0 test %rax,%rax 22: 75 29 jne 0x4d 24: 8b 81 10 0d 00 00 mov 0xd10(%rcx),%eax 2a:* 39 06 cmp %eax,(%rsi) <-- trapping instruction 2c: 7c 25 jl 0x53 2e: f6 81 14 0d 00 00 02 testb $0x2,0xd14(%rcx) 35: 75 b7 jne 0xffffffffffffffee 37: c3 retq 38: 80 b9 0c 0d 00 00 00 cmpb $0x0,0xd0c(%rcx) 3f: 75 .byte 0x75 Code starting with the faulting instruction =========================================== 0: 39 06 cmp %eax,(%rsi) 2: 7c 25 jl 0x29 4: f6 81 14 0d 00 00 02 testb $0x2,0xd14(%rcx) b: 75 b7 jne 0xffffffffffffffc4 d: c3 retq e: 80 b9 0c 0d 00 00 00 cmpb $0x0,0xd0c(%rcx) 15: 75 .byte 0x75 objdump -SF --disassemble=nouveau_connector_detect_depth [...] if (nv_connector->edid && c85e1: 83 fa 41 cmp $0x41,%edx c85e4: 75 05 jne c85eb (File Offset: 0xc866b) c85e6: 48 85 c0 test %rax,%rax c85e9: 75 29 jne c8614 (File Offset: 0xc8694) nv_connector->type == DCB_CONNECTOR_LVDS_SPWG) duallink = ((u8 *)nv_connector->edid)[121] == 2; else duallink = mode->clock >= bios->fp.duallink_transition_clk; if ((!duallink && (bios->fp.strapless_is_24bit & 1)) || c85eb: 8b 81 10 0d 00 00 mov 0xd10(%rcx),%eax c85f1: 39 06 cmp %eax,(%rsi) c85f3: 7c 25 jl c861a (File Offset: 0xc869a) ( duallink && (bios->fp.strapless_is_24bit & 2))) c85f5: f6 81 14 0d 00 00 02 testb $0x2,0xd14(%rcx) c85fc: 75 b7 jne c85b5 (File Offset: 0xc8635) connector->display_info.bpc = 8; [...] % scripts/faddr2line /lib/modules/5.9.0-rc8-next-20201009/kernel/drivers/gpu/drm/nouveau/nouveau.ko nouveau_connector_detect_depth+0x71/0xc0 nouveau_connector_detect_depth+0x71/0xc0: nouveau_connector_detect_depth at /home/sasha/linux-next/drivers/gpu/drm/nouveau/nouveau_connector.c:891 It is actually line 889. See the disassembly below. 889 duallink = mode->clock >= bios->fp.duallink_transition_clk; The NULL pointer being dereferenced is mode. Git bisect has identified the following commit as bad: f28e32d3906e drm/nouveau/kms: Don't change EDID when it hasn't actually changed Here is the chain of events that causes the oops. On entry to nouveau_connector_detect_lvds, edid is set to NULL. The call to nouveau_connector_detect sets nv_connector->edid to valid memory, with status set to connector_status_connected and the flow of execution branching to the out label. The subsequent call to nouveau_connector_set_edid erronously clears nv_connector->edid, via the local edid pointer which remains set to NULL. Fix this by setting edid to the value of the just acquired nv_connector->edid and executing the body of nouveau_connector_set_edid only if nv_connector->edid and edid point to different memory addresses thus preventing nv_connector->edid from being turned into a dangling pointer. Fixes: f28e32d3906e ("drm/nouveau/kms: Don't change EDID when it hasn't actually changed") Signed-off-by: Alexander Kapshuk Reviewed-by: Lyude Paul Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_connector.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c index 6f21f36719fc..8b4b3688c7ae 100644 --- a/drivers/gpu/drm/nouveau/nouveau_connector.c +++ b/drivers/gpu/drm/nouveau/nouveau_connector.c @@ -532,11 +532,13 @@ static void nouveau_connector_set_edid(struct nouveau_connector *nv_connector, struct edid *edid) { - struct edid *old_edid = nv_connector->edid; + if (nv_connector->edid != edid) { + struct edid *old_edid = nv_connector->edid; - drm_connector_update_edid_property(&nv_connector->base, edid); - kfree(old_edid); - nv_connector->edid = edid; + drm_connector_update_edid_property(&nv_connector->base, edid); + kfree(old_edid); + nv_connector->edid = edid; + } } static enum drm_connector_status @@ -669,8 +671,10 @@ nouveau_connector_detect_lvds(struct drm_connector *connector, bool force) /* Try retrieving EDID via DDC */ if (!drm->vbios.fp_no_ddc) { status = nouveau_connector_detect(connector, force); - if (status == connector_status_connected) + if (status == connector_status_connected) { + edid = nv_connector->edid; goto out; + } } /* On some laptops (Sony, i'm looking at you) there appears to From 6c27ffabeb19ebf7dd6d4ccc29f1e57d1ef445d8 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Sat, 14 Nov 2020 13:50:44 +1000 Subject: [PATCH 483/710] drm/nouveau/ttm: avoid using nouveau_drm.ttm.type_vram prior to nv50 Pre-NV50 chipsets don't currently use the MMU subsystem that later chipsets use, and type_vram is negative here, leading to an OOB memory access. This was previously guarded by a chipset check, restore that. Reported-by: Thomas Zimmermann Fixes: 5839172f0980 ("drm/nouveau: explicitly specify caching to use") Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_bo.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 2ee75646ad6f..56b335a55966 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -350,14 +350,13 @@ set_placement_list(struct nouveau_drm *drm, struct ttm_place *pl, unsigned *n, if (domain & NOUVEAU_GEM_DOMAIN_VRAM) { struct nvif_mmu *mmu = &drm->client.mmu; - const u8 type = mmu->type[drm->ttm.type_vram].type; pl[*n].mem_type = TTM_PL_VRAM; pl[*n].flags = flags & ~TTM_PL_FLAG_CACHED; /* Some BARs do not support being ioremapped WC */ if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA && - type & NVIF_MEM_UNCACHED) + mmu->type[drm->ttm.type_vram].type & NVIF_MEM_UNCACHED) pl[*n].flags &= ~TTM_PL_FLAG_WC; (*n)++; From 5c6fb4b28b165887c42c66731c90eaca818b04c6 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Fri, 13 Nov 2020 19:14:10 -0500 Subject: [PATCH 484/710] drm/nouveau/kms/nv50-: Use atomic encoder callbacks everywhere It turns out that I forgot to go through and make sure that I converted all encoder callbacks to use atomic_enable/atomic_disable(), so let's go and actually do that. Signed-off-by: Lyude Paul Cc: Kirill A. Shutemov Fixes: 09838c4efe9a ("drm/nouveau/kms: Search for encoders' connectors properly") Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/dispnv50/disp.c | 29 ++++++++++++------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c index b111fe24a06b..36d6b6093d16 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c @@ -455,7 +455,7 @@ nv50_outp_get_old_connector(struct nouveau_encoder *outp, * DAC *****************************************************************************/ static void -nv50_dac_disable(struct drm_encoder *encoder) +nv50_dac_disable(struct drm_encoder *encoder, struct drm_atomic_state *state) { struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder); struct nv50_core *core = nv50_disp(encoder->dev)->core; @@ -467,7 +467,7 @@ nv50_dac_disable(struct drm_encoder *encoder) } static void -nv50_dac_enable(struct drm_encoder *encoder) +nv50_dac_enable(struct drm_encoder *encoder, struct drm_atomic_state *state) { struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder); struct nouveau_crtc *nv_crtc = nouveau_crtc(encoder->crtc); @@ -525,8 +525,8 @@ nv50_dac_detect(struct drm_encoder *encoder, struct drm_connector *connector) static const struct drm_encoder_helper_funcs nv50_dac_help = { .atomic_check = nv50_outp_atomic_check, - .enable = nv50_dac_enable, - .disable = nv50_dac_disable, + .atomic_enable = nv50_dac_enable, + .atomic_disable = nv50_dac_disable, .detect = nv50_dac_detect }; @@ -1055,7 +1055,7 @@ nv50_dp_bpc_to_depth(unsigned int bpc) } static void -nv50_msto_enable(struct drm_encoder *encoder) +nv50_msto_enable(struct drm_encoder *encoder, struct drm_atomic_state *state) { struct nv50_head *head = nv50_head(encoder->crtc); struct nv50_head_atom *armh = nv50_head_atom(head->base.base.state); @@ -1101,7 +1101,7 @@ nv50_msto_enable(struct drm_encoder *encoder) } static void -nv50_msto_disable(struct drm_encoder *encoder) +nv50_msto_disable(struct drm_encoder *encoder, struct drm_atomic_state *state) { struct nv50_msto *msto = nv50_msto(encoder); struct nv50_mstc *mstc = msto->mstc; @@ -1118,8 +1118,8 @@ nv50_msto_disable(struct drm_encoder *encoder) static const struct drm_encoder_helper_funcs nv50_msto_help = { - .disable = nv50_msto_disable, - .enable = nv50_msto_enable, + .atomic_disable = nv50_msto_disable, + .atomic_enable = nv50_msto_enable, .atomic_check = nv50_msto_atomic_check, }; @@ -1645,8 +1645,7 @@ nv50_sor_disable(struct drm_encoder *encoder, } static void -nv50_sor_enable(struct drm_encoder *encoder, - struct drm_atomic_state *state) +nv50_sor_enable(struct drm_encoder *encoder, struct drm_atomic_state *state) { struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder); struct nouveau_crtc *nv_crtc = nouveau_crtc(encoder->crtc); @@ -1873,7 +1872,7 @@ nv50_pior_atomic_check(struct drm_encoder *encoder, } static void -nv50_pior_disable(struct drm_encoder *encoder) +nv50_pior_disable(struct drm_encoder *encoder, struct drm_atomic_state *state) { struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder); struct nv50_core *core = nv50_disp(encoder->dev)->core; @@ -1885,7 +1884,7 @@ nv50_pior_disable(struct drm_encoder *encoder) } static void -nv50_pior_enable(struct drm_encoder *encoder) +nv50_pior_enable(struct drm_encoder *encoder, struct drm_atomic_state *state) { struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder); struct nouveau_crtc *nv_crtc = nouveau_crtc(encoder->crtc); @@ -1921,14 +1920,14 @@ nv50_pior_enable(struct drm_encoder *encoder) } core->func->pior->ctrl(core, nv_encoder->or, ctrl, asyh); - nv_encoder->crtc = encoder->crtc; + nv_encoder->crtc = &nv_crtc->base; } static const struct drm_encoder_helper_funcs nv50_pior_help = { .atomic_check = nv50_pior_atomic_check, - .enable = nv50_pior_enable, - .disable = nv50_pior_disable, + .atomic_enable = nv50_pior_enable, + .atomic_disable = nv50_pior_disable, }; static void From 0f0d2c876c96d4908a9ef40959a44bec21bdd6cf Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Thu, 5 Nov 2020 23:28:47 +0900 Subject: [PATCH 485/710] nvme: free sq/cq dbbuf pointers when dbbuf set fails If Doorbell Buffer Config command fails even 'dev->dbbuf_dbs != NULL' which means OACS indicates that NVME_CTRL_OACS_DBBUF_SUPP is set, nvme_dbbuf_update_and_check_event() will check event even it's not been successfully set. This patch fixes mismatch among dbbuf for sq/cqs in case that dbbuf command fails. Signed-off-by: Minwoo Im Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 0578ff253c47..3be352403839 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -292,9 +292,21 @@ static void nvme_dbbuf_init(struct nvme_dev *dev, nvmeq->dbbuf_cq_ei = &dev->dbbuf_eis[cq_idx(qid, dev->db_stride)]; } +static void nvme_dbbuf_free(struct nvme_queue *nvmeq) +{ + if (!nvmeq->qid) + return; + + nvmeq->dbbuf_sq_db = NULL; + nvmeq->dbbuf_cq_db = NULL; + nvmeq->dbbuf_sq_ei = NULL; + nvmeq->dbbuf_cq_ei = NULL; +} + static void nvme_dbbuf_set(struct nvme_dev *dev) { struct nvme_command c; + unsigned int i; if (!dev->dbbuf_dbs) return; @@ -308,6 +320,9 @@ static void nvme_dbbuf_set(struct nvme_dev *dev) dev_warn(dev->ctrl.device, "unable to set dbbuf\n"); /* Free memory and continue on */ nvme_dbbuf_dma_free(dev); + + for (i = 1; i <= dev->online_queues; i++) + nvme_dbbuf_free(&dev->queues[i]); } } From f6224b8681326856937420e1db18564a934bf32b Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 13 Nov 2020 10:28:30 -0800 Subject: [PATCH 486/710] nvme: directly cache command effects log Remove the struct used for tracking known command effects logs in a list. This is now saved in an xarray that doesn't use these elements. Instead, store the log directly instead of the wrapper struct. Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 9 ++++----- drivers/nvme/host/nvme.h | 6 ------ 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 9b01afcb7777..2f96675cc0cf 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2929,7 +2929,7 @@ int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp, u8 csi, static int nvme_get_effects_log(struct nvme_ctrl *ctrl, u8 csi, struct nvme_effects_log **log) { - struct nvme_cel *cel = xa_load(&ctrl->cels, csi); + struct nvme_effects_log *cel = xa_load(&ctrl->cels, csi); int ret; if (cel) @@ -2940,16 +2940,15 @@ static int nvme_get_effects_log(struct nvme_ctrl *ctrl, u8 csi, return -ENOMEM; ret = nvme_get_log(ctrl, 0x00, NVME_LOG_CMD_EFFECTS, 0, csi, - &cel->log, sizeof(cel->log), 0); + cel, sizeof(*cel), 0); if (ret) { kfree(cel); return ret; } - cel->csi = csi; - xa_store(&ctrl->cels, cel->csi, cel, GFP_KERNEL); + xa_store(&ctrl->cels, csi, cel, GFP_KERNEL); out: - *log = &cel->log; + *log = cel; return 0; } diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index bc330bf0d3bd..567f7ad18a91 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -226,12 +226,6 @@ struct nvme_fault_inject { #endif }; -struct nvme_cel { - struct list_head entry; - struct nvme_effects_log log; - u8 csi; -}; - struct nvme_ctrl { bool comp_seen; enum nvme_ctrl_state state; From 8168d23fbcee4f9f6c5a1ce8650417f09aef70eb Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 13 Nov 2020 10:45:45 -0800 Subject: [PATCH 487/710] nvme: fix memory leak freeing command effects xa_destroy() frees only internal data. The caller is responsible for freeing the exteranl objects referenced by an xarray. Fixes: 1cf7a12e09aa4 ("nvme: use an xarray to lookup the Commands Supported and Effects log") Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 2f96675cc0cf..9a270e49df17 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4373,6 +4373,19 @@ void nvme_uninit_ctrl(struct nvme_ctrl *ctrl) } EXPORT_SYMBOL_GPL(nvme_uninit_ctrl); +static void nvme_free_cels(struct nvme_ctrl *ctrl) +{ + struct nvme_effects_log *cel; + unsigned long i; + + xa_for_each (&ctrl->cels, i, cel) { + xa_erase(&ctrl->cels, i); + kfree(cel); + } + + xa_destroy(&ctrl->cels); +} + static void nvme_free_ctrl(struct device *dev) { struct nvme_ctrl *ctrl = @@ -4382,8 +4395,7 @@ static void nvme_free_ctrl(struct device *dev) if (!subsys || ctrl->instance != subsys->instance) ida_simple_remove(&nvme_instance_ida, ctrl->instance); - xa_destroy(&ctrl->cels); - + nvme_free_cels(ctrl); nvme_mpath_uninit(ctrl); __free_page(ctrl->discard_page); From f782e2c300a717233b64697affda3ea7aac00b2b Mon Sep 17 00:00:00 2001 From: Dmitrii Banshchikov Date: Fri, 13 Nov 2020 17:17:56 +0000 Subject: [PATCH 488/710] bpf: Relax return code check for subprograms Currently verifier enforces return code checks for subprograms in the same manner as it does for program entry points. This prevents returning arbitrary scalar values from subprograms. Scalar type of returned values is checked by btf_prepare_func_args() and hence it should be safe to allow only scalars for now. Relax return code checks for subprograms and allow any correct scalar values. Fixes: 51c39bb1d5d10 (bpf: Introduce function-by-function verification) Signed-off-by: Dmitrii Banshchikov Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201113171756.90594-1-me@ubique.spb.ru --- kernel/bpf/verifier.c | 15 +++++++++++++-- .../bpf/prog_tests/test_global_funcs.c | 1 + .../selftests/bpf/progs/test_global_func8.c | 19 +++++++++++++++++++ 3 files changed, 33 insertions(+), 2 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/test_global_func8.c diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 6204ec705d80..1388bf733071 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -7786,9 +7786,11 @@ static int check_return_code(struct bpf_verifier_env *env) struct tnum range = tnum_range(0, 1); enum bpf_prog_type prog_type = resolve_prog_type(env->prog); int err; + const bool is_subprog = env->cur_state->frame[0]->subprogno; /* LSM and struct_ops func-ptr's return type could be "void" */ - if ((prog_type == BPF_PROG_TYPE_STRUCT_OPS || + if (!is_subprog && + (prog_type == BPF_PROG_TYPE_STRUCT_OPS || prog_type == BPF_PROG_TYPE_LSM) && !prog->aux->attach_func_proto->type) return 0; @@ -7808,6 +7810,16 @@ static int check_return_code(struct bpf_verifier_env *env) return -EACCES; } + reg = cur_regs(env) + BPF_REG_0; + if (is_subprog) { + if (reg->type != SCALAR_VALUE) { + verbose(env, "At subprogram exit the register R0 is not a scalar value (%s)\n", + reg_type_str[reg->type]); + return -EINVAL; + } + return 0; + } + switch (prog_type) { case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG || @@ -7861,7 +7873,6 @@ static int check_return_code(struct bpf_verifier_env *env) return 0; } - reg = cur_regs(env) + BPF_REG_0; if (reg->type != SCALAR_VALUE) { verbose(env, "At program exit the register R0 is not a known value (%s)\n", reg_type_str[reg->type]); diff --git a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c index 193002b14d7f..32e4348b714b 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c +++ b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c @@ -60,6 +60,7 @@ void test_test_global_funcs(void) { "test_global_func5.o" , "expected pointer to ctx, but got PTR" }, { "test_global_func6.o" , "modified ctx ptr R2" }, { "test_global_func7.o" , "foo() doesn't return scalar" }, + { "test_global_func8.o" }, }; libbpf_print_fn_t old_print_fn = NULL; int err, i, duration = 0; diff --git a/tools/testing/selftests/bpf/progs/test_global_func8.c b/tools/testing/selftests/bpf/progs/test_global_func8.c new file mode 100644 index 000000000000..d55a6544b1ab --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_global_func8.c @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2020 Facebook */ +#include +#include +#include + +__noinline int foo(struct __sk_buff *skb) +{ + return bpf_get_prandom_u32(); +} + +SEC("cgroup_skb/ingress") +int test_cls(struct __sk_buff *skb) +{ + if (!foo(skb)) + return 0; + + return 1; +} From 944d1444d53f5a213457e5096db370cfd06923d4 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 13 Nov 2020 16:48:44 -0700 Subject: [PATCH 489/710] io_uring: handle -EOPNOTSUPP on path resolution Any attempt to do path resolution on /proc/self from an async worker will yield -EOPNOTSUPP. We can safely do that resolution from the task itself, and without blocking, so retry it from there. Ideally io_uring would know this upfront and not have to go through the worker thread to find out, but that doesn't currently seem feasible. Signed-off-by: Jens Axboe --- fs/io_uring.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index c77584de68d7..f05978a74ce1 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -478,6 +478,7 @@ struct io_sr_msg { struct io_open { struct file *file; int dfd; + bool ignore_nonblock; struct filename *filename; struct open_how how; unsigned long nofile; @@ -3796,6 +3797,7 @@ static int __io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe return ret; } req->open.nofile = rlimit(RLIMIT_NOFILE); + req->open.ignore_nonblock = false; req->flags |= REQ_F_NEED_CLEANUP; return 0; } @@ -3839,7 +3841,7 @@ static int io_openat2(struct io_kiocb *req, bool force_nonblock) struct file *file; int ret; - if (force_nonblock) + if (force_nonblock && !req->open.ignore_nonblock) return -EAGAIN; ret = build_open_flags(&req->open.how, &op); @@ -3854,6 +3856,21 @@ static int io_openat2(struct io_kiocb *req, bool force_nonblock) if (IS_ERR(file)) { put_unused_fd(ret); ret = PTR_ERR(file); + /* + * A work-around to ensure that /proc/self works that way + * that it should - if we get -EOPNOTSUPP back, then assume + * that proc_self_get_link() failed us because we're in async + * context. We should be safe to retry this from the task + * itself with force_nonblock == false set, as it should not + * block on lookup. Would be nice to know this upfront and + * avoid the async dance, but doesn't seem feasible. + */ + if (ret == -EOPNOTSUPP && io_wq_current_is_worker()) { + req->open.ignore_nonblock = true; + refcount_inc(&req->refs); + io_req_task_queue(req); + return 0; + } } else { fsnotify_open(file); fd_install(ret, file); From 11e94f28c3de35d5ad1ac6a242a5b30f4378991a Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 10 Nov 2020 14:38:34 +0100 Subject: [PATCH 490/710] iio: accel: kxcjk1013: Replace is_smo8500_device with an acpi_type enum Replace the boolean is_smo8500_device variable with an acpi_type enum. For now this can be either ACPI_GENERIC or ACPI_SMO8500, this is a preparation patch for adding special handling for the KIOX010A ACPI HID, which will add a ACPI_KIOX010A acpi_type to the introduced enum. For stable as needed as precursor for next patch. Signed-off-by: Hans de Goede Fixes: 7f6232e69539 ("iio: accel: kxcjk1013: Add KIOX010A ACPI Hardware-ID") Cc: Link: https://lore.kernel.org/r/20201110133835.129080-2-hdegoede@redhat.com Signed-off-by: Jonathan Cameron --- drivers/iio/accel/kxcjk-1013.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/iio/accel/kxcjk-1013.c b/drivers/iio/accel/kxcjk-1013.c index beb38d9d607d..abeb0d254046 100644 --- a/drivers/iio/accel/kxcjk-1013.c +++ b/drivers/iio/accel/kxcjk-1013.c @@ -126,6 +126,11 @@ enum kx_chipset { KX_MAX_CHIPS /* this must be last */ }; +enum kx_acpi_type { + ACPI_GENERIC, + ACPI_SMO8500, +}; + struct kxcjk1013_data { struct i2c_client *client; struct iio_trigger *dready_trig; @@ -143,7 +148,7 @@ struct kxcjk1013_data { bool motion_trigger_on; int64_t timestamp; enum kx_chipset chipset; - bool is_smo8500_device; + enum kx_acpi_type acpi_type; }; enum kxcjk1013_axis { @@ -1247,7 +1252,7 @@ static irqreturn_t kxcjk1013_data_rdy_trig_poll(int irq, void *private) static const char *kxcjk1013_match_acpi_device(struct device *dev, enum kx_chipset *chipset, - bool *is_smo8500_device) + enum kx_acpi_type *acpi_type) { const struct acpi_device_id *id; @@ -1256,7 +1261,7 @@ static const char *kxcjk1013_match_acpi_device(struct device *dev, return NULL; if (strcmp(id->id, "SMO8500") == 0) - *is_smo8500_device = true; + *acpi_type = ACPI_SMO8500; *chipset = (enum kx_chipset)id->driver_data; @@ -1299,7 +1304,7 @@ static int kxcjk1013_probe(struct i2c_client *client, } else if (ACPI_HANDLE(&client->dev)) { name = kxcjk1013_match_acpi_device(&client->dev, &data->chipset, - &data->is_smo8500_device); + &data->acpi_type); } else return -ENODEV; @@ -1316,7 +1321,7 @@ static int kxcjk1013_probe(struct i2c_client *client, indio_dev->modes = INDIO_DIRECT_MODE; indio_dev->info = &kxcjk1013_info; - if (client->irq > 0 && !data->is_smo8500_device) { + if (client->irq > 0 && data->acpi_type != ACPI_SMO8500) { ret = devm_request_threaded_irq(&client->dev, client->irq, kxcjk1013_data_rdy_trig_poll, kxcjk1013_event_handler, From e5b1032a656e9aa4c7a4df77cb9156a2a651a5f9 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 10 Nov 2020 14:38:35 +0100 Subject: [PATCH 491/710] iio: accel: kxcjk1013: Add support for KIOX010A ACPI DSM for setting tablet-mode Some 360 degree hinges (yoga) style 2-in-1 devices use 2 KXCJ91008-s to allow the OS to determine the angle between the display and the base of the device, so that the OS can determine if the 2-in-1 is in laptop or in tablet-mode. On Windows both accelerometers are read by a special HingeAngleService process; and this process calls a DSM (Device Specific Method) on the ACPI KIOX010A device node for the sensor in the display, to let the embedded-controller (EC) know about the mode so that it can disable the kbd and touchpad to avoid spurious input while folded into tablet-mode. This notifying of the EC is problematic because sometimes the EC comes up thinking that device is in tablet-mode and the kbd and touchpad do not work. This happens for example on Irbis NB111 devices after a suspend / resume cycle (after a complete battery drain / hard reset without having booted Windows at least once). Other 2-in-1s which are likely affected too are e.g. the Teclast F5 and F6 series. The kxcjk-1013 driver may seem like a strange place to deal with this, but since it is *the* driver for the ACPI KIOX010A device, it is also the driver which has access to the ACPI handle needed by the DSM. Add support for calling the DSM and on probe unconditionally tell the EC that the device is laptop mode, fixing the kbd and touchpad sometimes not working. Fixes: 7f6232e69539 ("iio: accel: kxcjk1013: Add KIOX010A ACPI Hardware-ID") Reported-and-tested-by: russianneuromancer Signed-off-by: Hans de Goede Cc: Link: https://lore.kernel.org/r/20201110133835.129080-3-hdegoede@redhat.com Signed-off-by: Jonathan Cameron --- drivers/iio/accel/kxcjk-1013.c | 36 ++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/drivers/iio/accel/kxcjk-1013.c b/drivers/iio/accel/kxcjk-1013.c index abeb0d254046..560a3373ff20 100644 --- a/drivers/iio/accel/kxcjk-1013.c +++ b/drivers/iio/accel/kxcjk-1013.c @@ -129,6 +129,7 @@ enum kx_chipset { enum kx_acpi_type { ACPI_GENERIC, ACPI_SMO8500, + ACPI_KIOX010A, }; struct kxcjk1013_data { @@ -275,6 +276,32 @@ static const struct { {19163, 1, 0}, {38326, 0, 1} }; +#ifdef CONFIG_ACPI +enum kiox010a_fn_index { + KIOX010A_SET_LAPTOP_MODE = 1, + KIOX010A_SET_TABLET_MODE = 2, +}; + +static int kiox010a_dsm(struct device *dev, int fn_index) +{ + acpi_handle handle = ACPI_HANDLE(dev); + guid_t kiox010a_dsm_guid; + union acpi_object *obj; + + if (!handle) + return -ENODEV; + + guid_parse("1f339696-d475-4e26-8cad-2e9f8e6d7a91", &kiox010a_dsm_guid); + + obj = acpi_evaluate_dsm(handle, &kiox010a_dsm_guid, 1, fn_index, NULL); + if (!obj) + return -EIO; + + ACPI_FREE(obj); + return 0; +} +#endif + static int kxcjk1013_set_mode(struct kxcjk1013_data *data, enum kxcjk1013_mode mode) { @@ -352,6 +379,13 @@ static int kxcjk1013_chip_init(struct kxcjk1013_data *data) { int ret; +#ifdef CONFIG_ACPI + if (data->acpi_type == ACPI_KIOX010A) { + /* Make sure the kbd and touchpad on 2-in-1s using 2 KXCJ91008-s work */ + kiox010a_dsm(&data->client->dev, KIOX010A_SET_LAPTOP_MODE); + } +#endif + ret = i2c_smbus_read_byte_data(data->client, KXCJK1013_REG_WHO_AM_I); if (ret < 0) { dev_err(&data->client->dev, "Error reading who_am_i\n"); @@ -1262,6 +1296,8 @@ static const char *kxcjk1013_match_acpi_device(struct device *dev, if (strcmp(id->id, "SMO8500") == 0) *acpi_type = ACPI_SMO8500; + else if (strcmp(id->id, "KIOX010A") == 0) + *acpi_type = ACPI_KIOX010A; *chipset = (enum kx_chipset)id->driver_data; From b7131ee0bac5e5df73e4098e77bbddb3a31d06ff Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 14 Nov 2020 19:12:46 +0100 Subject: [PATCH 492/710] blk-cgroup: fix a hd_struct leak in blkcg_fill_root_iostats disk_get_part needs to be paired with a disk_put_part. Cc: stable@vger.kernel.org Fixes: ef45fe470e1 ("blk-cgroup: show global disk stats in root cgroup io.stat") Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 1 + 1 file changed, 1 insertion(+) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index c68bdf58c9a6..54fbe1e80cc4 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -849,6 +849,7 @@ static void blkcg_fill_root_iostats(void) blkg_iostat_set(&blkg->iostat.cur, &tmp); u64_stats_update_end(&blkg->iostat.sync); } + disk_put_part(part); } } From 37344718bd7032639a02053e06b51697f90154ce Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Thu, 12 Nov 2020 19:23:59 +0800 Subject: [PATCH 493/710] net: phy: smsc: add missed clk_disable_unprepare in smsc_phy_probe() Add the missing clk_disable_unprepare() before return from smsc_phy_probe() in the error handling case. Fixes: bedd8d78aba3 ("net: phy: smsc: LAN8710/20: add phy refclk in support") Reported-by: Hulk Robot Signed-off-by: Zhang Changzhong Link: https://lore.kernel.org/r/1605180239-1792-1-git-send-email-zhangchangzhong@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/smsc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c index ec97669be5c2..0fc39ac5ca88 100644 --- a/drivers/net/phy/smsc.c +++ b/drivers/net/phy/smsc.c @@ -291,8 +291,10 @@ static int smsc_phy_probe(struct phy_device *phydev) return ret; ret = clk_set_rate(priv->refclk, 50 * 1000 * 1000); - if (ret) + if (ret) { + clk_disable_unprepare(priv->refclk); return ret; + } return 0; } From 38935861d85a4d9a353d1dd5a156c97700e2765d Mon Sep 17 00:00:00 2001 From: Zi Yan Date: Fri, 13 Nov 2020 22:51:40 -0800 Subject: [PATCH 494/710] mm/compaction: count pages and stop correctly during page isolation In isolate_migratepages_block, when cc->alloc_contig is true, we are able to isolate compound pages. But nr_migratepages and nr_isolated did not count compound pages correctly, causing us to isolate more pages than we thought. So count compound pages as the number of base pages they contain. Otherwise, we might be trapped in too_many_isolated while loop, since the actual isolated pages can go up to COMPACT_CLUSTER_MAX*512=16384, where COMPACT_CLUSTER_MAX is 32, since we stop isolation after cc->nr_migratepages reaches to COMPACT_CLUSTER_MAX. In addition, after we fix the issue above, cc->nr_migratepages could never be equal to COMPACT_CLUSTER_MAX if compound pages are isolated, thus page isolation could not stop as we intended. Change the isolation stop condition to '>='. The issue can be triggered as follows: In a system with 16GB memory and an 8GB CMA region reserved by hugetlb_cma, if we first allocate 10GB THPs and mlock them (so some THPs are allocated in the CMA region and mlocked), reserving 6 1GB hugetlb pages via /sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages will get stuck (looping in too_many_isolated function) until we kill either task. With the patch applied, oom will kill the application with 10GB THPs and let hugetlb page reservation finish. [ziy@nvidia.com: v3] Link: https://lkml.kernel.org/r/20201030183809.3616803-1-zi.yan@sent.com Fixes: 1da2f328fa64 ("cmm,thp,compaction,cma: allow THP migration for CMA allocations") Signed-off-by: Zi Yan Signed-off-by: Andrew Morton Reviewed-by: Yang Shi Acked-by: Vlastimil Babka Cc: Rik van Riel Cc: Michal Hocko Cc: Mel Gorman Cc: Link: https://lkml.kernel.org/r/20201029200435.3386066-1-zi.yan@sent.com Signed-off-by: Linus Torvalds --- mm/compaction.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mm/compaction.c b/mm/compaction.c index 6e0ee5641788..d549a4521edf 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -1012,8 +1012,8 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, isolate_success: list_add(&page->lru, &cc->migratepages); - cc->nr_migratepages++; - nr_isolated++; + cc->nr_migratepages += compound_nr(page); + nr_isolated += compound_nr(page); /* * Avoid isolating too much unless this block is being @@ -1021,7 +1021,7 @@ isolate_success: * or a lock is contended. For contention, isolate quickly to * potentially remove one source of contention. */ - if (cc->nr_migratepages == COMPACT_CLUSTER_MAX && + if (cc->nr_migratepages >= COMPACT_CLUSTER_MAX && !cc->rescan && !cc->contended) { ++low_pfn; break; @@ -1132,7 +1132,7 @@ isolate_migratepages_range(struct compact_control *cc, unsigned long start_pfn, if (!pfn) break; - if (cc->nr_migratepages == COMPACT_CLUSTER_MAX) + if (cc->nr_migratepages >= COMPACT_CLUSTER_MAX) break; } From d20bdd571ee5c9966191568527ecdb1bd4b52368 Mon Sep 17 00:00:00 2001 From: Zi Yan Date: Fri, 13 Nov 2020 22:51:43 -0800 Subject: [PATCH 495/710] mm/compaction: stop isolation if too many pages are isolated and we have pages to migrate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In isolate_migratepages_block, if we have too many isolated pages and nr_migratepages is not zero, we should try to migrate what we have without wasting time on isolating. In theory it's possible that multiple parallel compactions will cause too_many_isolated() to become true even if each has isolated less than COMPACT_CLUSTER_MAX, and loop forever in the while loop. Bailing immediately prevents that. [vbabka@suse.cz: changelog addition] Fixes: 1da2f328fa64 (“mm,thp,compaction,cma: allow THP migration for CMA allocations”) Suggested-by: Vlastimil Babka Signed-off-by: Zi Yan Signed-off-by: Andrew Morton Cc: Cc: Mel Gorman Cc: Michal Hocko Cc: Rik van Riel Cc: Yang Shi Link: https://lkml.kernel.org/r/20201030183809.3616803-2-zi.yan@sent.com Signed-off-by: Linus Torvalds --- mm/compaction.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mm/compaction.c b/mm/compaction.c index d549a4521edf..13cb7a961b31 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -817,6 +817,10 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, * delay for some time until fewer pages are isolated */ while (unlikely(too_many_isolated(pgdat))) { + /* stop isolation if there are still pages not migrated */ + if (cc->nr_migratepages) + return 0; + /* async migration should just abort */ if (cc->mode == MIGRATE_ASYNC) return 0; From 2da9f6305f306ffbbb44790675799328fb73119d Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 13 Nov 2020 22:51:46 -0800 Subject: [PATCH 496/710] mm/vmscan: fix NR_ISOLATED_FILE corruption on 64-bit Previously the negated unsigned long would be cast back to signed long which would have the correct negative value. After commit 730ec8c01a2b ("mm/vmscan.c: change prototype for shrink_page_list"), the large unsigned int converts to a large positive signed long. Symptoms include CMA allocations hanging forever holding the cma_mutex due to alloc_contig_range->...->isolate_migratepages_block waiting forever in "while (unlikely(too_many_isolated(pgdat)))". [akpm@linux-foundation.org: fix -stat.nr_lazyfree_fail as well, per Michal] Fixes: 730ec8c01a2b ("mm/vmscan.c: change prototype for shrink_page_list") Signed-off-by: Nicholas Piggin Signed-off-by: Andrew Morton Acked-by: Michal Hocko Cc: Vaneet Narang Cc: Maninder Singh Cc: Amit Sahrawat Cc: Mel Gorman Cc: Vlastimil Babka Cc: Link: https://lkml.kernel.org/r/20201029032320.1448441-1-npiggin@gmail.com Signed-off-by: Linus Torvalds --- mm/vmscan.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index 1b8f0e059767..7b4e31eac2cf 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1516,7 +1516,8 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone, nr_reclaimed = shrink_page_list(&clean_pages, zone->zone_pgdat, &sc, TTU_IGNORE_ACCESS, &stat, true); list_splice(&clean_pages, page_list); - mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_FILE, -nr_reclaimed); + mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_FILE, + -(long)nr_reclaimed); /* * Since lazyfree pages are isolated from file LRU from the beginning, * they will rotate back to anonymous LRU in the end if it failed to @@ -1526,7 +1527,7 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone, mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_ANON, stat.nr_lazyfree_fail); mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_FILE, - -stat.nr_lazyfree_fail); + -(long)stat.nr_lazyfree_fail); return nr_reclaimed; } From 044747e971ace469064e68a0e8b3666011f0f3bd Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Fri, 13 Nov 2020 22:51:49 -0800 Subject: [PATCH 497/710] mailmap: fix entry for Dmitry Baryshkov/Eremin-Solenikov Change back surname to new (old) one. Dmitry Baryshkov -> Dmitry Eremin-Solenikov -> Dmitry Baryshkov. Map several odd entries to main identity. Signed-off-by: Dmitry Baryshkov Signed-off-by: Andrew Morton Link: https://lkml.kernel.org/r/20201103005158.1181426-1-dmitry.baryshkov@linaro.org Signed-off-by: Linus Torvalds --- .mailmap | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.mailmap b/.mailmap index 1e14566a3d56..505b3d771964 100644 --- a/.mailmap +++ b/.mailmap @@ -82,7 +82,10 @@ Dengcheng Zhu Dengcheng Zhu Dengcheng Zhu -Dmitry Eremin-Solenikov +Dmitry Baryshkov +Dmitry Baryshkov <[dbaryshkov@gmail.com]> +Dmitry Baryshkov +Dmitry Baryshkov Dmitry Safonov <0x7f454c46@gmail.com> Dmitry Safonov <0x7f454c46@gmail.com> Dmitry Safonov <0x7f454c46@gmail.com> From 22e4663e916321b72972c69ca0c6b962f529bd78 Mon Sep 17 00:00:00 2001 From: Laurent Dufour Date: Fri, 13 Nov 2020 22:51:53 -0800 Subject: [PATCH 498/710] mm/slub: fix panic in slab_alloc_node() While doing memory hot-unplug operation on a PowerPC VM running 1024 CPUs with 11TB of ram, I hit the following panic: BUG: Kernel NULL pointer dereference on read at 0x00000007 Faulting instruction address: 0xc000000000456048 Oops: Kernel access of bad area, sig: 11 [#2] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS= 2048 NUMA pSeries Modules linked in: rpadlpar_io rpaphp CPU: 160 PID: 1 Comm: systemd Tainted: G D 5.9.0 #1 NIP: c000000000456048 LR: c000000000455fd4 CTR: c00000000047b350 REGS: c00006028d1b77a0 TRAP: 0300 Tainted: G D (5.9.0) MSR: 8000000000009033 CR: 24004228 XER: 00000000 CFAR: c00000000000f1b0 DAR: 0000000000000007 DSISR: 40000000 IRQMASK: 0 GPR00: c000000000455fd4 c00006028d1b7a30 c000000001bec800 0000000000000000 GPR04: 0000000000000dc0 0000000000000000 00000000000374ef c00007c53df99320 GPR08: 000007c53c980000 0000000000000000 000007c53c980000 0000000000000000 GPR12: 0000000000004400 c00000001e8e4400 0000000000000000 0000000000000f6a GPR16: 0000000000000000 c000000001c25930 c000000001d62528 00000000000000c1 GPR20: c000000001d62538 c00006be469e9000 0000000fffffffe0 c0000000003c0ff8 GPR24: 0000000000000018 0000000000000000 0000000000000dc0 0000000000000000 GPR28: c00007c513755700 c000000001c236a4 c00007bc4001f800 0000000000000001 NIP [c000000000456048] __kmalloc_node+0x108/0x790 LR [c000000000455fd4] __kmalloc_node+0x94/0x790 Call Trace: kvmalloc_node+0x58/0x110 mem_cgroup_css_online+0x10c/0x270 online_css+0x48/0xd0 cgroup_apply_control_enable+0x2c4/0x470 cgroup_mkdir+0x408/0x5f0 kernfs_iop_mkdir+0x90/0x100 vfs_mkdir+0x138/0x250 do_mkdirat+0x154/0x1c0 system_call_exception+0xf8/0x200 system_call_common+0xf0/0x27c Instruction dump: e93e0000 e90d0030 39290008 7cc9402a e94d0030 e93e0000 7ce95214 7f89502a 2fbc0000 419e0018 41920230 e9270010 <89290007> 7f994800 419e0220 7ee6bb78 This pointing to the following code: mm/slub.c:2851 if (unlikely(!object || !node_match(page, node))) { c000000000456038: 00 00 bc 2f cmpdi cr7,r28,0 c00000000045603c: 18 00 9e 41 beq cr7,c000000000456054 <__kmalloc_node+0x114> node_match(): mm/slub.c:2491 if (node != NUMA_NO_NODE && page_to_nid(page) != node) c000000000456040: 30 02 92 41 beq cr4,c000000000456270 <__kmalloc_node+0x330> page_to_nid(): include/linux/mm.h:1294 c000000000456044: 10 00 27 e9 ld r9,16(r7) c000000000456048: 07 00 29 89 lbz r9,7(r9) <<<< r9 = NULL node_match(): mm/slub.c:2491 c00000000045604c: 00 48 99 7f cmpw cr7,r25,r9 c000000000456050: 20 02 9e 41 beq cr7,c000000000456270 <__kmalloc_node+0x330> The panic occurred in slab_alloc_node() when checking for the page's node: object = c->freelist; page = c->page; if (unlikely(!object || !node_match(page, node))) { object = __slab_alloc(s, gfpflags, node, addr, c); stat(s, ALLOC_SLOWPATH); The issue is that object is not NULL while page is NULL which is odd but may happen if the cache flush happened after loading object but before loading page. Thus checking for the page pointer is required too. The cache flush is done through an inter processor interrupt when a piece of memory is off-lined. That interrupt is triggered when a memory hot-unplug operation is initiated and offline_pages() is calling the slub's MEM_GOING_OFFLINE callback slab_mem_going_offline_callback() which is calling flush_cpu_slab(). If that interrupt is caught between the reading of c->freelist and the reading of c->page, this could lead to such a situation. That situation is expected and the later call to this_cpu_cmpxchg_double() will detect the change to c->freelist and redo the whole operation. In commit 6159d0f5c03e ("mm/slub.c: page is always non-NULL in node_match()") check on the page pointer has been removed assuming that page is always valid when it is called. It happens that this is not true in that particular case, so check for page before calling node_match() here. Fixes: 6159d0f5c03e ("mm/slub.c: page is always non-NULL in node_match()") Signed-off-by: Laurent Dufour Signed-off-by: Andrew Morton Acked-by: Vlastimil Babka Acked-by: Christoph Lameter Cc: Wei Yang Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Nathan Lynch Cc: Scott Cheloha Cc: Michal Hocko Cc: Link: https://lkml.kernel.org/r/20201027190406.33283-1-ldufour@linux.ibm.com Signed-off-by: Linus Torvalds --- mm/slub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/slub.c b/mm/slub.c index b30be2385d1c..34dcc09e2ec9 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2852,7 +2852,7 @@ redo: object = c->freelist; page = c->page; - if (unlikely(!object || !node_match(page, node))) { + if (unlikely(!object || !page || !node_match(page, node))) { object = __slab_alloc(s, gfpflags, node, addr, c); } else { void *next_object = get_freepointer_safe(s, object); From 96e1fac162cc0086c50b2b14062112adb2ba640e Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 13 Nov 2020 22:51:56 -0800 Subject: [PATCH 499/710] mm/gup: use unpin_user_pages() in __gup_longterm_locked() When FOLL_PIN is passed to __get_user_pages() the page list must be put back using unpin_user_pages() otherwise the page pin reference persists in a corrupted state. There are two places in the unwind of __gup_longterm_locked() that put the pages back without checking. Normally on error this function would return the partial page list making this the caller's responsibility, but in these two cases the caller is not allowed to see these pages at all. Fixes: 3faa52c03f44 ("mm/gup: track FOLL_PIN pages") Reported-by: Ira Weiny Signed-off-by: Jason Gunthorpe Signed-off-by: Andrew Morton Reviewed-by: Ira Weiny Reviewed-by: John Hubbard Cc: Aneesh Kumar K.V Cc: Dan Williams Cc: Link: https://lkml.kernel.org/r/0-v2-3ae7d9d162e2+2a7-gup_cma_fix_jgg@nvidia.com Signed-off-by: Linus Torvalds --- mm/gup.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/mm/gup.c b/mm/gup.c index 102877ed77a4..98eb8e6d2609 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1647,8 +1647,11 @@ check_again: /* * drop the above get_user_pages reference. */ - for (i = 0; i < nr_pages; i++) - put_page(pages[i]); + if (gup_flags & FOLL_PIN) + unpin_user_pages(pages, nr_pages); + else + for (i = 0; i < nr_pages; i++) + put_page(pages[i]); if (migrate_pages(&cma_page_list, alloc_migration_target, NULL, (unsigned long)&mtc, MIGRATE_SYNC, MR_CONTIG_RANGE)) { @@ -1728,8 +1731,11 @@ static long __gup_longterm_locked(struct mm_struct *mm, goto out; if (check_dax_vmas(vmas_tmp, rc)) { - for (i = 0; i < rc; i++) - put_page(pages[i]); + if (gup_flags & FOLL_PIN) + unpin_user_pages(pages, rc); + else + for (i = 0; i < rc; i++) + put_page(pages[i]); rc = -EOPNOTSUPP; goto out; } From 3347acc6fcd4ee71ad18a9ff9d9dac176b517329 Mon Sep 17 00:00:00 2001 From: Arvind Sankar Date: Fri, 13 Nov 2020 22:51:59 -0800 Subject: [PATCH 500/710] compiler.h: fix barrier_data() on clang Commit 815f0ddb346c ("include/linux/compiler*.h: make compiler-*.h mutually exclusive") neglected to copy barrier_data() from compiler-gcc.h into compiler-clang.h. The definition in compiler-gcc.h was really to work around clang's more aggressive optimization, so this broke barrier_data() on clang, and consequently memzero_explicit() as well. For example, this results in at least the memzero_explicit() call in lib/crypto/sha256.c:sha256_transform() being optimized away by clang. Fix this by moving the definition of barrier_data() into compiler.h. Also move the gcc/clang definition of barrier() into compiler.h, __memory_barrier() is icc-specific (and barrier() is already defined using it in compiler-intel.h) and doesn't belong in compiler.h. [rdunlap@infradead.org: fix ALPHA builds when SMP is not enabled] Link: https://lkml.kernel.org/r/20201101231835.4589-1-rdunlap@infradead.org Fixes: 815f0ddb346c ("include/linux/compiler*.h: make compiler-*.h mutually exclusive") Signed-off-by: Arvind Sankar Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Tested-by: Nick Desaulniers Reviewed-by: Nick Desaulniers Reviewed-by: Kees Cook Cc: Link: https://lkml.kernel.org/r/20201014212631.207844-1-nivedita@alum.mit.edu Signed-off-by: Linus Torvalds --- include/asm-generic/barrier.h | 1 + include/linux/compiler-clang.h | 6 ------ include/linux/compiler-gcc.h | 19 ------------------- include/linux/compiler.h | 18 ++++++++++++++++-- 4 files changed, 17 insertions(+), 27 deletions(-) diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h index 798027bb89be..640f09479bdf 100644 --- a/include/asm-generic/barrier.h +++ b/include/asm-generic/barrier.h @@ -13,6 +13,7 @@ #ifndef __ASSEMBLY__ +#include #include #ifndef nop diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index 230604e7f057..dd7233c48bf3 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -60,12 +60,6 @@ #define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1 #endif -/* The following are for compatibility with GCC, from compiler-gcc.h, - * and may be redefined here because they should not be shared with other - * compilers, like ICC. - */ -#define barrier() __asm__ __volatile__("" : : : "memory") - #if __has_feature(shadow_call_stack) # define __noscs __attribute__((__no_sanitize__("shadow-call-stack"))) #endif diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 5deb37024574..74c6c0486eed 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -15,25 +15,6 @@ # error Sorry, your version of GCC is too old - please use 4.9 or newer. #endif -/* Optimization barrier */ - -/* The "volatile" is due to gcc bugs */ -#define barrier() __asm__ __volatile__("": : :"memory") -/* - * This version is i.e. to prevent dead stores elimination on @ptr - * where gcc and llvm may behave differently when otherwise using - * normal barrier(): while gcc behavior gets along with a normal - * barrier(), llvm needs an explicit input variable to be assumed - * clobbered. The issue is as follows: while the inline asm might - * access any memory it wants, the compiler could have fit all of - * @ptr into memory registers instead, and since @ptr never escaped - * from that, it proved that the inline asm wasn't touching any of - * it. This version works well with both compilers, i.e. we're telling - * the compiler that the inline asm absolutely may see the contents - * of @ptr. See also: https://llvm.org/bugs/show_bug.cgi?id=15495 - */ -#define barrier_data(ptr) __asm__ __volatile__("": :"r"(ptr) :"memory") - /* * This macro obfuscates arithmetic on a variable address so that gcc * shouldn't recognize the original var, and make assumptions about it. diff --git a/include/linux/compiler.h b/include/linux/compiler.h index e512f5505dad..b8fe0c23cfff 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -80,11 +80,25 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, /* Optimization barrier */ #ifndef barrier -# define barrier() __memory_barrier() +/* The "volatile" is due to gcc bugs */ +# define barrier() __asm__ __volatile__("": : :"memory") #endif #ifndef barrier_data -# define barrier_data(ptr) barrier() +/* + * This version is i.e. to prevent dead stores elimination on @ptr + * where gcc and llvm may behave differently when otherwise using + * normal barrier(): while gcc behavior gets along with a normal + * barrier(), llvm needs an explicit input variable to be assumed + * clobbered. The issue is as follows: while the inline asm might + * access any memory it wants, the compiler could have fit all of + * @ptr into memory registers instead, and since @ptr never escaped + * from that, it proved that the inline asm wasn't touching any of + * it. This version works well with both compilers, i.e. we're telling + * the compiler that the inline asm absolutely may see the contents + * of @ptr. See also: https://llvm.org/bugs/show_bug.cgi?id=15495 + */ +# define barrier_data(ptr) __asm__ __volatile__("": :"r"(ptr) :"memory") #endif /* workaround for GCC PR82365 if needed */ From 8b92c4ff4423aa9900cf838d3294fcade4dbda35 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Fri, 13 Nov 2020 22:52:02 -0800 Subject: [PATCH 501/710] Revert "kernel/reboot.c: convert simple_strtoul to kstrtoint" Patch series "fix parsing of reboot= cmdline", v3. The parsing of the reboot= cmdline has two major errors: - a missing bound check can crash the system on reboot - parsing of the cpu number only works if specified last Fix both. This patch (of 2): This reverts commit 616feab753972b97. kstrtoint() and simple_strtoul() have a subtle difference which makes them non interchangeable: if a non digit character is found amid the parsing, the former will return an error, while the latter will just stop parsing, e.g. simple_strtoul("123xyx") = 123. The kernel cmdline reboot= argument allows to specify the CPU used for rebooting, with the syntax `s####` among the other flags, e.g. "reboot=warm,s31,force", so if this flag is not the last given, it's silently ignored as well as the subsequent ones. Fixes: 616feab75397 ("kernel/reboot.c: convert simple_strtoul to kstrtoint") Signed-off-by: Matteo Croce Signed-off-by: Andrew Morton Cc: Guenter Roeck Cc: Petr Mladek Cc: Arnd Bergmann Cc: Mike Rapoport Cc: Kees Cook Cc: Pavel Tatashin Cc: Robin Holt Cc: Fabian Frederick Cc: Greg Kroah-Hartman Cc: Link: https://lkml.kernel.org/r/20201103214025.116799-2-mcroce@linux.microsoft.com Signed-off-by: Linus Torvalds --- kernel/reboot.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/kernel/reboot.c b/kernel/reboot.c index e7b78d5ae1ab..8fbba433725e 100644 --- a/kernel/reboot.c +++ b/kernel/reboot.c @@ -551,22 +551,15 @@ static int __init reboot_setup(char *str) break; case 's': - { - int rc; - - if (isdigit(*(str+1))) { - rc = kstrtoint(str+1, 0, &reboot_cpu); - if (rc) - return rc; - } else if (str[1] == 'm' && str[2] == 'p' && - isdigit(*(str+3))) { - rc = kstrtoint(str+3, 0, &reboot_cpu); - if (rc) - return rc; - } else + if (isdigit(*(str+1))) + reboot_cpu = simple_strtoul(str+1, NULL, 0); + else if (str[1] == 'm' && str[2] == 'p' && + isdigit(*(str+3))) + reboot_cpu = simple_strtoul(str+3, NULL, 0); + else *mode = REBOOT_SOFT; break; - } + case 'g': *mode = REBOOT_GPIO; break; From df5b0ab3e08a156701b537809914b339b0daa526 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Fri, 13 Nov 2020 22:52:07 -0800 Subject: [PATCH 502/710] reboot: fix overflow parsing reboot cpu number Limit the CPU number to num_possible_cpus(), because setting it to a value lower than INT_MAX but higher than NR_CPUS produces the following error on reboot and shutdown: BUG: unable to handle page fault for address: ffffffff90ab1bb0 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 1c09067 P4D 1c09067 PUD 1c0a063 PMD 0 Oops: 0000 [#1] SMP CPU: 1 PID: 1 Comm: systemd-shutdow Not tainted 5.9.0-rc8-kvm #110 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.13.0-2.fc32 04/01/2014 RIP: 0010:migrate_to_reboot_cpu+0xe/0x60 Code: ea ea 00 48 89 fa 48 c7 c7 30 57 f1 81 e9 fa ef ff ff 66 2e 0f 1f 84 00 00 00 00 00 53 8b 1d d5 ea ea 00 e8 14 33 fe ff 89 da <48> 0f a3 15 ea fc bd 00 48 89 d0 73 29 89 c2 c1 e8 06 65 48 8b 3c RSP: 0018:ffffc90000013e08 EFLAGS: 00010246 RAX: ffff88801f0a0000 RBX: 0000000077359400 RCX: 0000000000000000 RDX: 0000000077359400 RSI: 0000000000000002 RDI: ffffffff81c199e0 RBP: ffffffff81c1e3c0 R08: ffff88801f41f000 R09: ffffffff81c1e348 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 R13: 00007f32bedf8830 R14: 00000000fee1dead R15: 0000000000000000 FS: 00007f32bedf8980(0000) GS:ffff88801f480000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffff90ab1bb0 CR3: 000000001d057000 CR4: 00000000000006a0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __do_sys_reboot.cold+0x34/0x5b do_syscall_64+0x2d/0x40 Fixes: 1b3a5d02ee07 ("reboot: move arch/x86 reboot= handling to generic kernel") Signed-off-by: Matteo Croce Signed-off-by: Andrew Morton Cc: Arnd Bergmann Cc: Fabian Frederick Cc: Greg Kroah-Hartman Cc: Guenter Roeck Cc: Kees Cook Cc: Mike Rapoport Cc: Pavel Tatashin Cc: Petr Mladek Cc: Robin Holt Cc: Link: https://lkml.kernel.org/r/20201103214025.116799-3-mcroce@linux.microsoft.com Signed-off-by: Linus Torvalds --- kernel/reboot.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/kernel/reboot.c b/kernel/reboot.c index 8fbba433725e..af6f23d8bea1 100644 --- a/kernel/reboot.c +++ b/kernel/reboot.c @@ -558,6 +558,13 @@ static int __init reboot_setup(char *str) reboot_cpu = simple_strtoul(str+3, NULL, 0); else *mode = REBOOT_SOFT; + if (reboot_cpu >= num_possible_cpus()) { + pr_err("Ignoring the CPU number in reboot= option. " + "CPU %d exceeds possible cpu number %d\n", + reboot_cpu, num_possible_cpus()); + reboot_cpu = 0; + break; + } break; case 'g': From e7e046155af04cdca5e1157f28b07e1651eb317b Mon Sep 17 00:00:00 2001 From: Santosh Sivaraj Date: Fri, 13 Nov 2020 22:52:10 -0800 Subject: [PATCH 503/710] kernel/watchdog: fix watchdog_allowed_mask not used warning Define watchdog_allowed_mask only when SOFTLOCKUP_DETECTOR is enabled. Fixes: 7feeb9cd4f5b ("watchdog/sysctl: Clean up sysctl variable name space") Signed-off-by: Santosh Sivaraj Signed-off-by: Andrew Morton Reviewed-by: Petr Mladek Cc: Thomas Gleixner Link: https://lkml.kernel.org/r/20201106015025.1281561-1-santosh@fossix.org Signed-off-by: Linus Torvalds --- kernel/watchdog.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 5abb5b22ad13..71109065bd8e 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -44,8 +44,6 @@ int __read_mostly soft_watchdog_user_enabled = 1; int __read_mostly watchdog_thresh = 10; static int __read_mostly nmi_watchdog_available; -static struct cpumask watchdog_allowed_mask __read_mostly; - struct cpumask watchdog_cpumask __read_mostly; unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask); @@ -162,6 +160,8 @@ static void lockup_detector_update_enable(void) int __read_mostly sysctl_softlockup_all_cpu_backtrace; #endif +static struct cpumask watchdog_allowed_mask __read_mostly; + /* Global variables, exported for sysctl */ unsigned int __read_mostly softlockup_panic = CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE; From 8b21ca0218d29cc6bb7028125c7e5a10dfb4730c Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Fri, 13 Nov 2020 22:52:13 -0800 Subject: [PATCH 504/710] mm: memcontrol: fix missing wakeup polling thread When we poll the swap.events, we can miss being woken up when the swap event occurs. Because we didn't notify. Fixes: f3a53a3a1e5b ("mm, memcontrol: implement memory.swap.events") Signed-off-by: Muchun Song Signed-off-by: Andrew Morton Reviewed-by: Shakeel Butt Acked-by: Johannes Weiner Cc: Roman Gushchin Cc: Michal Hocko Cc: Yafang Shao Cc: Chris Down Cc: Tejun Heo Link: https://lkml.kernel.org/r/20201105161936.98312-1-songmuchun@bytedance.com Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index e391e3c56de5..a80c59af2c60 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -900,12 +900,19 @@ static inline void count_memcg_event_mm(struct mm_struct *mm, static inline void memcg_memory_event(struct mem_cgroup *memcg, enum memcg_memory_event event) { + bool swap_event = event == MEMCG_SWAP_HIGH || event == MEMCG_SWAP_MAX || + event == MEMCG_SWAP_FAIL; + atomic_long_inc(&memcg->memory_events_local[event]); - cgroup_file_notify(&memcg->events_local_file); + if (!swap_event) + cgroup_file_notify(&memcg->events_local_file); do { atomic_long_inc(&memcg->memory_events[event]); - cgroup_file_notify(&memcg->events_file); + if (swap_event) + cgroup_file_notify(&memcg->swap_events_file); + else + cgroup_file_notify(&memcg->events_file); if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) break; From 336bf30eb76580b579dc711ded5d599d905c0217 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Fri, 13 Nov 2020 22:52:16 -0800 Subject: [PATCH 505/710] hugetlbfs: fix anon huge page migration race Qian Cai reported the following BUG in [1] LTP: starting move_pages12 BUG: unable to handle page fault for address: ffffffffffffffe0 ... RIP: 0010:anon_vma_interval_tree_iter_first+0xa2/0x170 avc_start_pgoff at mm/interval_tree.c:63 Call Trace: rmap_walk_anon+0x141/0xa30 rmap_walk_anon at mm/rmap.c:1864 try_to_unmap+0x209/0x2d0 try_to_unmap at mm/rmap.c:1763 migrate_pages+0x1005/0x1fb0 move_pages_and_store_status.isra.47+0xd7/0x1a0 __x64_sys_move_pages+0xa5c/0x1100 do_syscall_64+0x5f/0x310 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Hugh Dickins diagnosed this as a migration bug caused by code introduced to use i_mmap_rwsem for pmd sharing synchronization. Specifically, the routine unmap_and_move_huge_page() is always passing the TTU_RMAP_LOCKED flag to try_to_unmap() while holding i_mmap_rwsem. This is wrong for anon pages as the anon_vma_lock should be held in this case. Further analysis suggested that i_mmap_rwsem was not required to he held at all when calling try_to_unmap for anon pages as an anon page could never be part of a shared pmd mapping. Discussion also revealed that the hack in hugetlb_page_mapping_lock_write to drop page lock and acquire i_mmap_rwsem is wrong. There is no way to keep mapping valid while dropping page lock. This patch does the following: - Do not take i_mmap_rwsem and set TTU_RMAP_LOCKED for anon pages when calling try_to_unmap. - Remove the hacky code in hugetlb_page_mapping_lock_write. The routine will now simply do a 'trylock' while still holding the page lock. If the trylock fails, it will return NULL. This could impact the callers: - migration calling code will receive -EAGAIN and retry up to the hard coded limit (10). - memory error code will treat the page as BUSY. This will force killing (SIGKILL) instead of SIGBUS any mapping tasks. Do note that this change in behavior only happens when there is a race. None of the standard kernel testing suites actually hit this race, but it is possible. [1] https://lore.kernel.org/lkml/20200708012044.GC992@lca.pw/ [2] https://lore.kernel.org/linux-mm/alpine.LSU.2.11.2010071833100.2214@eggly.anvils/ Fixes: c0d0381ade79 ("hugetlbfs: use i_mmap_rwsem for more pmd sharing synchronization") Reported-by: Qian Cai Suggested-by: Hugh Dickins Signed-off-by: Mike Kravetz Signed-off-by: Andrew Morton Acked-by: Naoya Horiguchi Cc: Link: https://lkml.kernel.org/r/20201105195058.78401-1-mike.kravetz@oracle.com Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 90 +++------------------------------------------ mm/memory-failure.c | 36 +++++++++--------- mm/migrate.c | 44 ++++++++++++---------- mm/rmap.c | 5 +-- 4 files changed, 47 insertions(+), 128 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 5a620f690911..37f15c3c24dc 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1567,104 +1567,24 @@ int PageHeadHuge(struct page *page_head) return page_head[1].compound_dtor == HUGETLB_PAGE_DTOR; } -/* - * Find address_space associated with hugetlbfs page. - * Upon entry page is locked and page 'was' mapped although mapped state - * could change. If necessary, use anon_vma to find vma and associated - * address space. The returned mapping may be stale, but it can not be - * invalid as page lock (which is held) is required to destroy mapping. - */ -static struct address_space *_get_hugetlb_page_mapping(struct page *hpage) -{ - struct anon_vma *anon_vma; - pgoff_t pgoff_start, pgoff_end; - struct anon_vma_chain *avc; - struct address_space *mapping = page_mapping(hpage); - - /* Simple file based mapping */ - if (mapping) - return mapping; - - /* - * Even anonymous hugetlbfs mappings are associated with an - * underlying hugetlbfs file (see hugetlb_file_setup in mmap - * code). Find a vma associated with the anonymous vma, and - * use the file pointer to get address_space. - */ - anon_vma = page_lock_anon_vma_read(hpage); - if (!anon_vma) - return mapping; /* NULL */ - - /* Use first found vma */ - pgoff_start = page_to_pgoff(hpage); - pgoff_end = pgoff_start + pages_per_huge_page(page_hstate(hpage)) - 1; - anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, - pgoff_start, pgoff_end) { - struct vm_area_struct *vma = avc->vma; - - mapping = vma->vm_file->f_mapping; - break; - } - - anon_vma_unlock_read(anon_vma); - return mapping; -} - /* * Find and lock address space (mapping) in write mode. * - * Upon entry, the page is locked which allows us to find the mapping - * even in the case of an anon page. However, locking order dictates - * the i_mmap_rwsem be acquired BEFORE the page lock. This is hugetlbfs - * specific. So, we first try to lock the sema while still holding the - * page lock. If this works, great! If not, then we need to drop the - * page lock and then acquire i_mmap_rwsem and reacquire page lock. Of - * course, need to revalidate state along the way. + * Upon entry, the page is locked which means that page_mapping() is + * stable. Due to locking order, we can only trylock_write. If we can + * not get the lock, simply return NULL to caller. */ struct address_space *hugetlb_page_mapping_lock_write(struct page *hpage) { - struct address_space *mapping, *mapping2; + struct address_space *mapping = page_mapping(hpage); - mapping = _get_hugetlb_page_mapping(hpage); -retry: if (!mapping) return mapping; - /* - * If no contention, take lock and return - */ if (i_mmap_trylock_write(mapping)) return mapping; - /* - * Must drop page lock and wait on mapping sema. - * Note: Once page lock is dropped, mapping could become invalid. - * As a hack, increase map count until we lock page again. - */ - atomic_inc(&hpage->_mapcount); - unlock_page(hpage); - i_mmap_lock_write(mapping); - lock_page(hpage); - atomic_add_negative(-1, &hpage->_mapcount); - - /* verify page is still mapped */ - if (!page_mapped(hpage)) { - i_mmap_unlock_write(mapping); - return NULL; - } - - /* - * Get address space again and verify it is the same one - * we locked. If not, drop lock and retry. - */ - mapping2 = _get_hugetlb_page_mapping(hpage); - if (mapping2 != mapping) { - i_mmap_unlock_write(mapping); - mapping = mapping2; - goto retry; - } - - return mapping; + return NULL; } pgoff_t __basepage_index(struct page *page) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index c0bb186bba62..5d880d4eb9a2 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1057,27 +1057,25 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn, if (!PageHuge(hpage)) { unmap_success = try_to_unmap(hpage, ttu); } else { - /* - * For hugetlb pages, try_to_unmap could potentially call - * huge_pmd_unshare. Because of this, take semaphore in - * write mode here and set TTU_RMAP_LOCKED to indicate we - * have taken the lock at this higer level. - * - * Note that the call to hugetlb_page_mapping_lock_write - * is necessary even if mapping is already set. It handles - * ugliness of potentially having to drop page lock to obtain - * i_mmap_rwsem. - */ - mapping = hugetlb_page_mapping_lock_write(hpage); - - if (mapping) { - unmap_success = try_to_unmap(hpage, + if (!PageAnon(hpage)) { + /* + * For hugetlb pages in shared mappings, try_to_unmap + * could potentially call huge_pmd_unshare. Because of + * this, take semaphore in write mode here and set + * TTU_RMAP_LOCKED to indicate we have taken the lock + * at this higer level. + */ + mapping = hugetlb_page_mapping_lock_write(hpage); + if (mapping) { + unmap_success = try_to_unmap(hpage, ttu|TTU_RMAP_LOCKED); - i_mmap_unlock_write(mapping); + i_mmap_unlock_write(mapping); + } else { + pr_info("Memory failure: %#lx: could not lock mapping for mapped huge page\n", pfn); + unmap_success = false; + } } else { - pr_info("Memory failure: %#lx: could not find mapping for mapped huge page\n", - pfn); - unmap_success = false; + unmap_success = try_to_unmap(hpage, ttu); } } if (!unmap_success) diff --git a/mm/migrate.c b/mm/migrate.c index 5ca5842df5db..5795cb82e27c 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1328,34 +1328,38 @@ static int unmap_and_move_huge_page(new_page_t get_new_page, goto put_anon; if (page_mapped(hpage)) { - /* - * try_to_unmap could potentially call huge_pmd_unshare. - * Because of this, take semaphore in write mode here and - * set TTU_RMAP_LOCKED to let lower levels know we have - * taken the lock. - */ - mapping = hugetlb_page_mapping_lock_write(hpage); - if (unlikely(!mapping)) - goto unlock_put_anon; + bool mapping_locked = false; + enum ttu_flags ttu = TTU_MIGRATION|TTU_IGNORE_MLOCK| + TTU_IGNORE_ACCESS; - try_to_unmap(hpage, - TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS| - TTU_RMAP_LOCKED); + if (!PageAnon(hpage)) { + /* + * In shared mappings, try_to_unmap could potentially + * call huge_pmd_unshare. Because of this, take + * semaphore in write mode here and set TTU_RMAP_LOCKED + * to let lower levels know we have taken the lock. + */ + mapping = hugetlb_page_mapping_lock_write(hpage); + if (unlikely(!mapping)) + goto unlock_put_anon; + + mapping_locked = true; + ttu |= TTU_RMAP_LOCKED; + } + + try_to_unmap(hpage, ttu); page_was_mapped = 1; - /* - * Leave mapping locked until after subsequent call to - * remove_migration_ptes() - */ + + if (mapping_locked) + i_mmap_unlock_write(mapping); } if (!page_mapped(hpage)) rc = move_to_new_page(new_hpage, hpage, mode); - if (page_was_mapped) { + if (page_was_mapped) remove_migration_ptes(hpage, - rc == MIGRATEPAGE_SUCCESS ? new_hpage : hpage, true); - i_mmap_unlock_write(mapping); - } + rc == MIGRATEPAGE_SUCCESS ? new_hpage : hpage, false); unlock_put_anon: unlock_page(new_hpage); diff --git a/mm/rmap.c b/mm/rmap.c index 1b84945d655c..31b29321adfe 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1413,9 +1413,6 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, /* * If sharing is possible, start and end will be adjusted * accordingly. - * - * If called for a huge page, caller must hold i_mmap_rwsem - * in write mode as it is possible to call huge_pmd_unshare. */ adjust_range_if_pmd_sharing_possible(vma, &range.start, &range.end); @@ -1462,7 +1459,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte); address = pvmw.address; - if (PageHuge(page)) { + if (PageHuge(page) && !PageAnon(page)) { /* * To call huge_pmd_unshare, i_mmap_rwsem must be * held in write mode. Caller needs to explicitly From 2f31ad64a9cce8b2409d2d4563482adfb8664082 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 13 Nov 2020 22:52:20 -0800 Subject: [PATCH 506/710] panic: don't dump stack twice on warn Before commit 3f388f28639f ("panic: dump registers on panic_on_warn"), __warn() was calling show_regs() when regs was not NULL, and show_stack() otherwise. After that commit, show_stack() is called regardless of whether show_regs() has been called or not, leading to duplicated Call Trace: ------------[ cut here ]------------ WARNING: CPU: 0 PID: 1 at arch/powerpc/mm/nohash/8xx.c:186 mmu_mark_initmem_nx+0x24/0x94 CPU: 0 PID: 1 Comm: swapper Not tainted 5.10.0-rc2-s3k-dev-01375-gf46ec0d3ecbd-dirty #4092 NIP: c00128b4 LR: c0010228 CTR: 00000000 REGS: c9023e40 TRAP: 0700 Not tainted (5.10.0-rc2-s3k-dev-01375-gf46ec0d3ecbd-dirty) MSR: 00029032 CR: 24000424 XER: 00000000 GPR00: c0010228 c9023ef8 c2100000 0074c000 ffffffff 00000000 c2151000 c07b3880 GPR08: ff000900 0074c000 c8000000 c33b53a8 24000822 00000000 c0003a20 00000000 GPR16: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 GPR24: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00800000 NIP [c00128b4] mmu_mark_initmem_nx+0x24/0x94 LR [c0010228] free_initmem+0x20/0x58 Call Trace: free_initmem+0x20/0x58 kernel_init+0x1c/0x114 ret_from_kernel_thread+0x14/0x1c Instruction dump: 7d291850 7d234b78 4e800020 9421ffe0 7c0802a6 bfc10018 3fe0c060 3bff0000 3fff4080 3bffffff 90010024 57ff0010 <0fe00000> 392001cd 7c3e0b78 953e0008 CPU: 0 PID: 1 Comm: swapper Not tainted 5.10.0-rc2-s3k-dev-01375-gf46ec0d3ecbd-dirty #4092 Call Trace: __warn+0x8c/0xd8 (unreliable) report_bug+0x11c/0x154 program_check_exception+0x1dc/0x6e0 ret_from_except_full+0x0/0x4 --- interrupt: 700 at mmu_mark_initmem_nx+0x24/0x94 LR = free_initmem+0x20/0x58 free_initmem+0x20/0x58 kernel_init+0x1c/0x114 ret_from_kernel_thread+0x14/0x1c ---[ end trace 31702cd2a9570752 ]--- Only call show_stack() when regs is NULL. Fixes: 3f388f28639f ("panic: dump registers on panic_on_warn") Signed-off-by: Christophe Leroy Signed-off-by: Andrew Morton Cc: Alexey Kardashevskiy Cc: Kefeng Wang Link: https://lkml.kernel.org/r/e8c055458b080707f1bc1a98ff8bea79d0cec445.1604748361.git.christophe.leroy@csgroup.eu Signed-off-by: Linus Torvalds --- kernel/panic.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/panic.c b/kernel/panic.c index 396142ee43fd..332736a72a58 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -605,7 +605,8 @@ void __warn(const char *file, int line, void *caller, unsigned taint, panic("panic_on_warn set ...\n"); } - dump_stack(); + if (!regs) + dump_stack(); print_irqtrace_events(current); From f5785283dd64867a711ca1fb1f5bb172f252ecdf Mon Sep 17 00:00:00 2001 From: Wengang Wang Date: Fri, 13 Nov 2020 22:52:23 -0800 Subject: [PATCH 507/710] ocfs2: initialize ip_next_orphan Though problem if found on a lower 4.1.12 kernel, I think upstream has same issue. In one node in the cluster, there is the following callback trace: # cat /proc/21473/stack __ocfs2_cluster_lock.isra.36+0x336/0x9e0 [ocfs2] ocfs2_inode_lock_full_nested+0x121/0x520 [ocfs2] ocfs2_evict_inode+0x152/0x820 [ocfs2] evict+0xae/0x1a0 iput+0x1c6/0x230 ocfs2_orphan_filldir+0x5d/0x100 [ocfs2] ocfs2_dir_foreach_blk+0x490/0x4f0 [ocfs2] ocfs2_dir_foreach+0x29/0x30 [ocfs2] ocfs2_recover_orphans+0x1b6/0x9a0 [ocfs2] ocfs2_complete_recovery+0x1de/0x5c0 [ocfs2] process_one_work+0x169/0x4a0 worker_thread+0x5b/0x560 kthread+0xcb/0xf0 ret_from_fork+0x61/0x90 The above stack is not reasonable, the final iput shouldn't happen in ocfs2_orphan_filldir() function. Looking at the code, 2067 /* Skip inodes which are already added to recover list, since dio may 2068 * happen concurrently with unlink/rename */ 2069 if (OCFS2_I(iter)->ip_next_orphan) { 2070 iput(iter); 2071 return 0; 2072 } 2073 The logic thinks the inode is already in recover list on seeing ip_next_orphan is non-NULL, so it skip this inode after dropping a reference which incremented in ocfs2_iget(). While, if the inode is already in recover list, it should have another reference and the iput() at line 2070 should not be the final iput (dropping the last reference). So I don't think the inode is really in the recover list (no vmcore to confirm). Note that ocfs2_queue_orphans(), though not shown up in the call back trace, is holding cluster lock on the orphan directory when looking up for unlinked inodes. The on disk inode eviction could involve a lot of IOs which may need long time to finish. That means this node could hold the cluster lock for very long time, that can lead to the lock requests (from other nodes) to the orhpan directory hang for long time. Looking at more on ip_next_orphan, I found it's not initialized when allocating a new ocfs2_inode_info structure. This causes te reflink operations from some nodes hang for very long time waiting for the cluster lock on the orphan directory. Fix: initialize ip_next_orphan as NULL. Signed-off-by: Wengang Wang Signed-off-by: Andrew Morton Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Cc: Link: https://lkml.kernel.org/r/20201109171746.27884-1-wen.gang.wang@oracle.com Signed-off-by: Linus Torvalds --- fs/ocfs2/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 1d91dd1e8711..2febc76e9de7 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1713,6 +1713,7 @@ static void ocfs2_inode_init_once(void *data) oi->ip_blkno = 0ULL; oi->ip_clusters = 0; + oi->ip_next_orphan = NULL; ocfs2_resv_init_once(&oi->ip_la_data_resv); From 92307069a96c07d9b6e74b96b79390e7cd7d2111 Mon Sep 17 00:00:00 2001 From: Tobias Waldekranz Date: Thu, 12 Nov 2020 12:43:35 +0100 Subject: [PATCH 508/710] net: dsa: mv88e6xxx: Avoid VTU corruption on 6097 As soon as you add the second port to a VLAN, all other port membership configuration is overwritten with zeroes. The HW interprets this as all ports being "unmodified members" of the VLAN. In the simple case when all ports belong to the same VLAN, switching will still work. But using multiple VLANs or trying to set multiple ports as tagged members will not work. On the 6352, doing a VTU GetNext op, followed by an STU GetNext op will leave you with both the member- and state- data in the VTU/STU data registers. But on the 6097 (which uses the same implementation), the STU GetNext will override the information gathered from the VTU GetNext. Separate the two stages, parsing the result of the VTU GetNext before doing the STU GetNext. We opt to update the existing implementation for all applicable chips, as opposed to creating a separate callback for 6097, because although the previous implementation did work for (at least) 6352, the datasheet does not mention the masking behavior. Fixes: ef6fcea37f01 ("net: dsa: mv88e6xxx: get STU entry on VTU GetNext") Signed-off-by: Tobias Waldekranz Link: https://lore.kernel.org/r/20201112114335.27371-1-tobias@waldekranz.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mv88e6xxx/global1_vtu.c | 61 ++++++++++++++++++++----- 1 file changed, 50 insertions(+), 11 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/global1_vtu.c b/drivers/net/dsa/mv88e6xxx/global1_vtu.c index 48390b7b18ad..1048509a849b 100644 --- a/drivers/net/dsa/mv88e6xxx/global1_vtu.c +++ b/drivers/net/dsa/mv88e6xxx/global1_vtu.c @@ -125,11 +125,9 @@ static int mv88e6xxx_g1_vtu_vid_write(struct mv88e6xxx_chip *chip, * Offset 0x08: VTU/STU Data Register 2 * Offset 0x09: VTU/STU Data Register 3 */ - -static int mv88e6185_g1_vtu_data_read(struct mv88e6xxx_chip *chip, - struct mv88e6xxx_vtu_entry *entry) +static int mv88e6185_g1_vtu_stu_data_read(struct mv88e6xxx_chip *chip, + u16 *regs) { - u16 regs[3]; int i; /* Read all 3 VTU/STU Data registers */ @@ -142,12 +140,45 @@ static int mv88e6185_g1_vtu_data_read(struct mv88e6xxx_chip *chip, return err; } - /* Extract MemberTag and PortState data */ + return 0; +} + +static int mv88e6185_g1_vtu_data_read(struct mv88e6xxx_chip *chip, + struct mv88e6xxx_vtu_entry *entry) +{ + u16 regs[3]; + int err; + int i; + + err = mv88e6185_g1_vtu_stu_data_read(chip, regs); + if (err) + return err; + + /* Extract MemberTag data */ for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) { unsigned int member_offset = (i % 4) * 4; - unsigned int state_offset = member_offset + 2; entry->member[i] = (regs[i / 4] >> member_offset) & 0x3; + } + + return 0; +} + +static int mv88e6185_g1_stu_data_read(struct mv88e6xxx_chip *chip, + struct mv88e6xxx_vtu_entry *entry) +{ + u16 regs[3]; + int err; + int i; + + err = mv88e6185_g1_vtu_stu_data_read(chip, regs); + if (err) + return err; + + /* Extract PortState data */ + for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) { + unsigned int state_offset = (i % 4) * 4 + 2; + entry->state[i] = (regs[i / 4] >> state_offset) & 0x3; } @@ -349,6 +380,10 @@ int mv88e6185_g1_vtu_getnext(struct mv88e6xxx_chip *chip, if (err) return err; + err = mv88e6185_g1_stu_data_read(chip, entry); + if (err) + return err; + /* VTU DBNum[3:0] are located in VTU Operation 3:0 * VTU DBNum[7:4] are located in VTU Operation 11:8 */ @@ -374,11 +409,6 @@ int mv88e6352_g1_vtu_getnext(struct mv88e6xxx_chip *chip, return err; if (entry->valid) { - /* Fetch (and mask) VLAN PortState data from the STU */ - err = mv88e6xxx_g1_vtu_stu_get(chip, entry); - if (err) - return err; - err = mv88e6185_g1_vtu_data_read(chip, entry); if (err) return err; @@ -386,6 +416,15 @@ int mv88e6352_g1_vtu_getnext(struct mv88e6xxx_chip *chip, err = mv88e6xxx_g1_vtu_fid_read(chip, entry); if (err) return err; + + /* Fetch VLAN PortState data from the STU */ + err = mv88e6xxx_g1_vtu_stu_get(chip, entry); + if (err) + return err; + + err = mv88e6185_g1_stu_data_read(chip, entry); + if (err) + return err; } return 0; From 65b422d9b61ba12c08150784e8012fa1892ad03e Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Thu, 12 Nov 2020 14:38:37 +0100 Subject: [PATCH 509/710] vsock: forward all packets to the host when no H2G is registered Before commit c0cfa2d8a788 ("vsock: add multi-transports support"), if a G2H transport was loaded (e.g. virtio transport), every packets was forwarded to the host, regardless of the destination CID. The H2G transports implemented until then (vhost-vsock, VMCI) always responded with an error, if the destination CID was not VMADDR_CID_HOST. From that commit, we are using the remote CID to decide which transport to use, so packets with remote CID > VMADDR_CID_HOST(2) are sent only through H2G transport. If no H2G is available, packets are discarded directly in the guest. Some use cases (e.g. Nitro Enclaves [1]) rely on the old behaviour to implement sibling VMs communication, so we restore the old behavior when no H2G is registered. It will be up to the host to discard packets if the destination is not the right one. As it was already implemented before adding multi-transport support. Tested with nested QEMU/KVM by me and Nitro Enclaves by Andra. [1] Documentation/virt/ne_overview.rst Cc: Jorgen Hansen Cc: Dexuan Cui Fixes: c0cfa2d8a788 ("vsock: add multi-transports support") Reported-by: Andra Paraschiv Tested-by: Andra Paraschiv Signed-off-by: Stefano Garzarella Link: https://lore.kernel.org/r/20201112133837.34183-1-sgarzare@redhat.com Signed-off-by: Jakub Kicinski --- net/vmw_vsock/af_vsock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index b4d7b8aba003..d10916ab4526 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -438,7 +438,7 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) case SOCK_STREAM: if (vsock_use_local_transport(remote_cid)) new_transport = transport_local; - else if (remote_cid <= VMADDR_CID_HOST) + else if (remote_cid <= VMADDR_CID_HOST || !transport_h2g) new_transport = transport_g2h; else new_transport = transport_h2g; From 3ad216ee73abc554ed8f13f4f8b70845a7bef6da Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 14 Nov 2020 17:27:57 +0000 Subject: [PATCH 510/710] afs: Fix afs_write_end() when called with copied == 0 [ver #3] When afs_write_end() is called with copied == 0, it tries to set the dirty region, but there's no way to actually encode a 0-length region in the encoding in page->private. "0,0", for example, indicates a 1-byte region at offset 0. The maths miscalculates this and sets it incorrectly. Fix it to just do nothing but unlock and put the page in this case. We don't actually need to mark the page dirty as nothing presumably changed. Fixes: 65dd2d6072d3 ("afs: Alter dirty range encoding in page->private") Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- fs/afs/write.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/afs/write.c b/fs/afs/write.c index 50371207f327..c9195fc67fd8 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -169,11 +169,14 @@ int afs_write_end(struct file *file, struct address_space *mapping, unsigned int f, from = pos & (PAGE_SIZE - 1); unsigned int t, to = from + copied; loff_t i_size, maybe_i_size; - int ret; + int ret = 0; _enter("{%llx:%llu},{%lx}", vnode->fid.vid, vnode->fid.vnode, page->index); + if (copied == 0) + goto out; + maybe_i_size = pos + copied; i_size = i_size_read(&vnode->vfs_inode); From 057a10fa1f73d745c8e69aa54ab147715f5630ae Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 14 Nov 2020 13:22:53 +0800 Subject: [PATCH 511/710] sctp: change to hold/put transport for proto_unreach_timer A call trace was found in Hangbin's Codenomicon testing with debug kernel: [ 2615.981988] ODEBUG: free active (active state 0) object type: timer_list hint: sctp_generate_proto_unreach_event+0x0/0x3a0 [sctp] [ 2615.995050] WARNING: CPU: 17 PID: 0 at lib/debugobjects.c:328 debug_print_object+0x199/0x2b0 [ 2616.095934] RIP: 0010:debug_print_object+0x199/0x2b0 [ 2616.191533] Call Trace: [ 2616.194265] [ 2616.202068] debug_check_no_obj_freed+0x25e/0x3f0 [ 2616.207336] slab_free_freelist_hook+0xeb/0x140 [ 2616.220971] kfree+0xd6/0x2c0 [ 2616.224293] rcu_do_batch+0x3bd/0xc70 [ 2616.243096] rcu_core+0x8b9/0xd00 [ 2616.256065] __do_softirq+0x23d/0xacd [ 2616.260166] irq_exit+0x236/0x2a0 [ 2616.263879] smp_apic_timer_interrupt+0x18d/0x620 [ 2616.269138] apic_timer_interrupt+0xf/0x20 [ 2616.273711] This is because it holds asoc when transport->proto_unreach_timer starts and puts asoc when the timer stops, and without holding transport the transport could be freed when the timer is still running. So fix it by holding/putting transport instead for proto_unreach_timer in transport, just like other timers in transport. v1->v2: - Also use sctp_transport_put() for the "out_unlock:" path in sctp_generate_proto_unreach_event(), as Marcelo noticed. Fixes: 50b5d6ad6382 ("sctp: Fix a race between ICMP protocol unreachable and connect()") Reported-by: Hangbin Liu Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Link: https://lore.kernel.org/r/102788809b554958b13b95d33440f5448113b8d6.1605331373.git.lucien.xin@gmail.com Signed-off-by: Jakub Kicinski --- net/sctp/input.c | 4 ++-- net/sctp/sm_sideeffect.c | 4 ++-- net/sctp/transport.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/net/sctp/input.c b/net/sctp/input.c index 55d4fc6f371d..d508f6f3dd08 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -449,7 +449,7 @@ void sctp_icmp_proto_unreachable(struct sock *sk, else { if (!mod_timer(&t->proto_unreach_timer, jiffies + (HZ/20))) - sctp_association_hold(asoc); + sctp_transport_hold(t); } } else { struct net *net = sock_net(sk); @@ -458,7 +458,7 @@ void sctp_icmp_proto_unreachable(struct sock *sk, "encountered!\n", __func__); if (del_timer(&t->proto_unreach_timer)) - sctp_association_put(asoc); + sctp_transport_put(t); sctp_do_sm(net, SCTP_EVENT_T_OTHER, SCTP_ST_OTHER(SCTP_EVENT_ICMP_PROTO_UNREACH), diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 813d30767204..0948f14ce221 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -419,7 +419,7 @@ void sctp_generate_proto_unreach_event(struct timer_list *t) /* Try again later. */ if (!mod_timer(&transport->proto_unreach_timer, jiffies + (HZ/20))) - sctp_association_hold(asoc); + sctp_transport_hold(transport); goto out_unlock; } @@ -435,7 +435,7 @@ void sctp_generate_proto_unreach_event(struct timer_list *t) out_unlock: bh_unlock_sock(sk); - sctp_association_put(asoc); + sctp_transport_put(transport); } /* Handle the timeout of the RE-CONFIG timer. */ diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 806af58f4375..60fcf31cdcfb 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -133,7 +133,7 @@ void sctp_transport_free(struct sctp_transport *transport) /* Delete the ICMP proto unreachable timer if it's active. */ if (del_timer(&transport->proto_unreach_timer)) - sctp_association_put(transport->asoc); + sctp_transport_put(transport); sctp_transport_put(transport); } From 1ba86d4366e023d96df3dbe415eea7f1dc08c303 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Fri, 13 Nov 2020 16:30:40 -0500 Subject: [PATCH 512/710] netlabel: fix an uninitialized warning in netlbl_unlabel_staticlist() Static checking revealed that a previous fix to netlbl_unlabel_staticlist() leaves a stack variable uninitialized, this patches fixes that. Fixes: 866358ec331f ("netlabel: fix our progress tracking in netlbl_unlabel_staticlist()") Reported-by: Dan Carpenter Signed-off-by: Paul Moore Reviewed-by: James Morris Link: https://lore.kernel.org/r/160530304068.15651.18355773009751195447.stgit@sifl Signed-off-by: Jakub Kicinski --- net/netlabel/netlabel_unlabeled.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index fc55c9116da0..ccb491642811 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -1167,7 +1167,7 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb, u32 skip_bkt = cb->args[0]; u32 skip_chain = cb->args[1]; u32 skip_addr4 = cb->args[2]; - u32 iter_bkt, iter_chain, iter_addr4 = 0, iter_addr6 = 0; + u32 iter_bkt, iter_chain = 0, iter_addr4 = 0, iter_addr6 = 0; struct netlbl_unlhsh_iface *iface; struct list_head *iter_list; struct netlbl_af4list *addr4; From e35df62e04cc6fc4b9d90d054732f138349ff9b1 Mon Sep 17 00:00:00 2001 From: Sven Van Asbroeck Date: Thu, 12 Nov 2020 13:59:49 -0500 Subject: [PATCH 513/710] lan743x: fix issue causing intermittent kernel log warnings When running this chip on arm imx6, we intermittently observe the following kernel warning in the log, especially when the system is under high load: [ 50.119484] ------------[ cut here ]------------ [ 50.124377] WARNING: CPU: 0 PID: 303 at kernel/softirq.c:169 __local_bh_enable_ip+0x100/0x184 [ 50.132925] IRQs not enabled as expected [ 50.159250] CPU: 0 PID: 303 Comm: rngd Not tainted 5.7.8 #1 [ 50.164837] Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree) [ 50.171395] [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [ 50.179162] [] (show_stack) from [] (dump_stack+0xac/0xd8) [ 50.186408] [] (dump_stack) from [] (__warn+0xd0/0x10c) [ 50.193391] [] (__warn) from [] (warn_slowpath_fmt+0x98/0xc4) [ 50.200892] [] (warn_slowpath_fmt) from [] (__local_bh_enable_ip+0x100/0x184) [ 50.209860] [] (__local_bh_enable_ip) from [] (destroy_conntrack+0x48/0xd8 [nf_conntrack]) [ 50.220038] [] (destroy_conntrack [nf_conntrack]) from [] (nf_conntrack_destroy+0x94/0x168) [ 50.230160] [] (nf_conntrack_destroy) from [] (skb_release_head_state+0xa0/0xd0) [ 50.239314] [] (skb_release_head_state) from [] (skb_release_all+0xc/0x24) [ 50.247946] [] (skb_release_all) from [] (consume_skb+0x74/0x17c) [ 50.255796] [] (consume_skb) from [] (lan743x_tx_release_desc+0x120/0x124) [ 50.264428] [] (lan743x_tx_release_desc) from [] (lan743x_tx_napi_poll+0x5c/0x18c) [ 50.273755] [] (lan743x_tx_napi_poll) from [] (net_rx_action+0x118/0x4a4) [ 50.282306] [] (net_rx_action) from [] (__do_softirq+0x13c/0x53c) [ 50.290157] [] (__do_softirq) from [] (irq_exit+0x150/0x17c) [ 50.297575] [] (irq_exit) from [] (__handle_domain_irq+0x60/0xb0) [ 50.305423] [] (__handle_domain_irq) from [] (gic_handle_irq+0x4c/0x90) [ 50.313790] [] (gic_handle_irq) from [] (__irq_usr+0x54/0x80) [ 50.321287] Exception stack(0xecd99fb0 to 0xecd99ff8) [ 50.326355] 9fa0: 1cf1aa74 00000001 00000001 00000000 [ 50.334547] 9fc0: 00000001 00000000 00000000 00000000 00000000 00000000 00004097 b6d17d14 [ 50.342738] 9fe0: 00000001 b6d17c60 00000000 b6e71f94 800b0010 ffffffff [ 50.349364] irq event stamp: 2525027 [ 50.352955] hardirqs last enabled at (2525026): [] net_rx_action+0xb4/0x4a4 [ 50.360892] hardirqs last disabled at (2525027): [] _raw_spin_lock_irqsave+0x1c/0x50 [ 50.369517] softirqs last enabled at (2524660): [] __do_softirq+0x38c/0x53c [ 50.377446] softirqs last disabled at (2524693): [] irq_exit+0x150/0x17c [ 50.385027] ---[ end trace c0b571db4bc8087d ]--- The driver is calling dev_kfree_skb() from code inside a spinlock, where h/w interrupts are disabled. This is forbidden, as documented in include/linux/netdevice.h. The correct function to use dev_kfree_skb_irq(), or dev_kfree_skb_any(). Fix by using the correct dev_kfree_skb_xxx() functions: in lan743x_tx_release_desc(): called by lan743x_tx_release_completed_descriptors() called by in lan743x_tx_napi_poll() which holds a spinlock called by lan743x_tx_release_all_descriptors() called by lan743x_tx_close() which can-sleep conclusion: use dev_kfree_skb_any() in lan743x_tx_xmit_frame(): which holds a spinlock conclusion: use dev_kfree_skb_irq() in lan743x_tx_close(): which can-sleep conclusion: use dev_kfree_skb() in lan743x_rx_release_ring_element(): called by lan743x_rx_close() which can-sleep called by lan743x_rx_open() which can-sleep conclusion: use dev_kfree_skb() Fixes: 23f0703c125b ("lan743x: Add main source files for new lan743x driver") Signed-off-by: Sven Van Asbroeck Link: https://lore.kernel.org/r/20201112185949.11315-1-TheSven73@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/lan743x_main.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index e2c99d909247..18c053b860ab 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -1307,13 +1307,13 @@ clean_up_data_descriptor: goto clear_active; if (!(buffer_info->flags & TX_BUFFER_INFO_FLAG_TIMESTAMP_REQUESTED)) { - dev_kfree_skb(buffer_info->skb); + dev_kfree_skb_any(buffer_info->skb); goto clear_skb; } if (cleanup) { lan743x_ptp_unrequest_tx_timestamp(tx->adapter); - dev_kfree_skb(buffer_info->skb); + dev_kfree_skb_any(buffer_info->skb); } else { ignore_sync = (buffer_info->flags & TX_BUFFER_INFO_FLAG_IGNORE_SYNC) != 0; @@ -1623,7 +1623,7 @@ static netdev_tx_t lan743x_tx_xmit_frame(struct lan743x_tx *tx, if (required_number_of_descriptors > lan743x_tx_get_avail_desc(tx)) { if (required_number_of_descriptors > (tx->ring_size - 1)) { - dev_kfree_skb(skb); + dev_kfree_skb_irq(skb); } else { /* save to overflow buffer */ tx->overflow_skb = skb; @@ -1656,7 +1656,7 @@ static netdev_tx_t lan743x_tx_xmit_frame(struct lan743x_tx *tx, start_frame_length, do_timestamp, skb->ip_summed == CHECKSUM_PARTIAL)) { - dev_kfree_skb(skb); + dev_kfree_skb_irq(skb); goto unlock; } @@ -1675,7 +1675,7 @@ static netdev_tx_t lan743x_tx_xmit_frame(struct lan743x_tx *tx, * frame assembler clean up was performed inside * lan743x_tx_frame_add_fragment */ - dev_kfree_skb(skb); + dev_kfree_skb_irq(skb); goto unlock; } } From 796a2665ca3e91ebaba7222f76fd9a035714e2d8 Mon Sep 17 00:00:00 2001 From: Sven Van Asbroeck Date: Thu, 12 Nov 2020 15:47:41 -0500 Subject: [PATCH 514/710] lan743x: prevent entire kernel HANG on open, for some platforms On arm imx6, when opening the chip's netdev, the whole Linux kernel intermittently hangs/freezes. This is caused by a bug in the driver code which tests if pcie interrupts are working correctly, using the software interrupt: 1. open: enable the software interrupt 2. open: tell the chip to assert the software interrupt 3. open: wait for flag 4. ISR: acknowledge s/w interrupt, set flag 5. open: notice flag, disable the s/w interrupt, continue Unfortunately the ISR only acknowledges the s/w interrupt, but does not disable it. This will re-trigger the ISR in a tight loop. On some (lucky) platforms, open proceeds to disable the s/w interrupt even while the ISR is 'spinning'. On arm imx6, the spinning ISR does not allow open to proceed, resulting in a hung Linux kernel. Fix minimally by disabling the s/w interrupt in the ISR, which will prevent it from spinning. This won't break anything because the s/w interrupt is used as a one-shot interrupt. Note that this is a minimal fix, overlooking many possible cleanups, e.g.: - lan743x_intr_software_isr() is completely redundant and reads INT_STS twice for no apparent reason - disabling the s/w interrupt in lan743x_intr_test_isr() is now redundant, but harmless - waiting on software_isr_flag can be converted from a sleeping poll loop to wait_event_timeout() Fixes: 23f0703c125b ("lan743x: Add main source files for new lan743x driver") Tested-by: Sven Van Asbroeck # arm imx6 lan7430 Signed-off-by: Sven Van Asbroeck Link: https://lore.kernel.org/r/20201112204741.12375-1-TheSven73@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/lan743x_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index 18c053b860ab..b319c22c211c 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -148,7 +148,8 @@ static void lan743x_intr_software_isr(void *context) int_sts = lan743x_csr_read(adapter, INT_STS); if (int_sts & INT_BIT_SW_GP_) { - lan743x_csr_write(adapter, INT_STS, INT_BIT_SW_GP_); + /* disable the interrupt to prevent repeated re-triggering */ + lan743x_csr_write(adapter, INT_EN_CLR, INT_BIT_SW_GP_); intr->software_isr_flag = 1; } } From 56311a315da7ebc668dbcc2f1c99689cc10796c4 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Fri, 13 Nov 2020 09:09:02 +0800 Subject: [PATCH 515/710] net: stmmac: dwmac_lib: enlarge dma reset timeout If the phy enables power saving technology, the dwmac's software reset needs more time to complete, enlarge dma reset timeout to 200000us. Signed-off-by: Jisheng Zhang Link: https://lore.kernel.org/r/20201113090902.5c7aab1a@xhacker.debian Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c index cb87d31a99df..57a53a600aa5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c @@ -23,7 +23,7 @@ int dwmac_dma_reset(void __iomem *ioaddr) return readl_poll_timeout(ioaddr + DMA_BUS_MODE, value, !(value & DMA_BUS_MODE_SFT_RESET), - 10000, 100000); + 10000, 200000); } /* CSR1 enables the transmit DMA to check for new descriptor */ From 849920c703392957f94023f77ec89ca6cf119d43 Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Fri, 13 Nov 2020 19:16:22 +0800 Subject: [PATCH 516/710] devlink: Add missing genlmsg_cancel() in devlink_nl_sb_port_pool_fill() If sb_occ_port_pool_get() failed in devlink_nl_sb_port_pool_fill(), msg should be canceled by genlmsg_cancel(). Fixes: df38dafd2559 ("devlink: implement shared buffer occupancy monitoring interface") Reported-by: Hulk Robot Signed-off-by: Wang Hai Link: https://lore.kernel.org/r/20201113111622.11040-1-wanghai38@huawei.com Signed-off-by: Jakub Kicinski --- net/core/devlink.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/core/devlink.c b/net/core/devlink.c index ab4b1368904f..4b0211590aac 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -1448,7 +1448,7 @@ static int devlink_nl_sb_port_pool_fill(struct sk_buff *msg, err = ops->sb_occ_port_pool_get(devlink_port, devlink_sb->index, pool_index, &cur, &max); if (err && err != -EOPNOTSUPP) - return err; + goto sb_occ_get_failure; if (!err) { if (nla_put_u32(msg, DEVLINK_ATTR_SB_OCC_CUR, cur)) goto nla_put_failure; @@ -1461,8 +1461,10 @@ static int devlink_nl_sb_port_pool_fill(struct sk_buff *msg, return 0; nla_put_failure: + err = -EMSGSIZE; +sb_occ_get_failure: genlmsg_cancel(msg, hdr); - return -EMSGSIZE; + return err; } static int devlink_nl_cmd_sb_port_pool_get_doit(struct sk_buff *skb, From c887c9b9ca62c051d339b1c7b796edf2724029ed Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Sun, 15 Nov 2020 08:55:43 -0500 Subject: [PATCH 517/710] kvm: mmu: fix is_tdp_mmu_check when the TDP MMU is not in use In some cases where shadow paging is in use, the root page will be either mmu->pae_root or vcpu->arch.mmu->lm_root. Then it will not have an associated struct kvm_mmu_page, because it is allocated with alloc_page instead of kvm_mmu_alloc_page. Just return false quickly from is_tdp_mmu_root if the TDP MMU is not in use, which also includes the case where shadow paging is enabled. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/tdp_mmu.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 27e381c9da6c..ff28a5c6abd6 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -49,7 +49,14 @@ bool is_tdp_mmu_root(struct kvm *kvm, hpa_t hpa) { struct kvm_mmu_page *sp; + if (!kvm->arch.tdp_mmu_enabled) + return false; + if (WARN_ON(!VALID_PAGE(hpa))) + return false; + sp = to_shadow_page(hpa); + if (WARN_ON(!sp)) + return false; return sp->tdp_mmu_page && sp->root_count; } From c8c958a58fc67f353289986850a0edf553435702 Mon Sep 17 00:00:00 2001 From: Anant Thazhemadam Date: Wed, 4 Nov 2020 03:09:05 +0530 Subject: [PATCH 518/710] can: af_can: prevent potential access of uninitialized member in can_rcv() In can_rcv(), cfd->len is uninitialized when skb->len = 0, and this uninitialized cfd->len is accessed nonetheless by pr_warn_once(). Fix this uninitialized variable access by checking cfd->len's validity condition (cfd->len > CAN_MAX_DLEN) separately after the skb->len's condition is checked, and appropriately modify the log messages that are generated as well. In case either of the required conditions fail, the skb is freed and NET_RX_DROP is returned, same as before. Fixes: 8cb68751c115 ("can: af_can: can_rcv(): replace WARN_ONCE by pr_warn_once") Reported-by: syzbot+9bcb0c9409066696d3aa@syzkaller.appspotmail.com Tested-by: Anant Thazhemadam Signed-off-by: Anant Thazhemadam Link: https://lore.kernel.org/r/20201103213906.24219-2-anant.thazhemadam@gmail.com Signed-off-by: Marc Kleine-Budde --- net/can/af_can.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/net/can/af_can.c b/net/can/af_can.c index 6373ab9c5507..e8d4e3ef5322 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -677,16 +677,25 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev, { struct canfd_frame *cfd = (struct canfd_frame *)skb->data; - if (unlikely(dev->type != ARPHRD_CAN || skb->len != CAN_MTU || - cfd->len > CAN_MAX_DLEN)) { - pr_warn_once("PF_CAN: dropped non conform CAN skbuf: dev type %d, len %d, datalen %d\n", + if (unlikely(dev->type != ARPHRD_CAN || skb->len != CAN_MTU)) { + pr_warn_once("PF_CAN: dropped non conform CAN skbuff: dev type %d, len %d\n", + dev->type, skb->len); + goto free_skb; + } + + /* This check is made separately since cfd->len would be uninitialized if skb->len = 0. */ + if (unlikely(cfd->len > CAN_MAX_DLEN)) { + pr_warn_once("PF_CAN: dropped non conform CAN skbuff: dev type %d, len %d, datalen %d\n", dev->type, skb->len, cfd->len); - kfree_skb(skb); - return NET_RX_DROP; + goto free_skb; } can_receive(skb, dev); return NET_RX_SUCCESS; + +free_skb: + kfree_skb(skb); + return NET_RX_DROP; } static int canfd_rcv(struct sk_buff *skb, struct net_device *dev, From 9aa9379d8f868e91719333a7f063ccccc0579acc Mon Sep 17 00:00:00 2001 From: Anant Thazhemadam Date: Wed, 4 Nov 2020 03:09:06 +0530 Subject: [PATCH 519/710] can: af_can: prevent potential access of uninitialized member in canfd_rcv() In canfd_rcv(), cfd->len is uninitialized when skb->len = 0, and this uninitialized cfd->len is accessed nonetheless by pr_warn_once(). Fix this uninitialized variable access by checking cfd->len's validity condition (cfd->len > CANFD_MAX_DLEN) separately after the skb->len's condition is checked, and appropriately modify the log messages that are generated as well. In case either of the required conditions fail, the skb is freed and NET_RX_DROP is returned, same as before. Fixes: d4689846881d ("can: af_can: canfd_rcv(): replace WARN_ONCE by pr_warn_once") Reported-by: syzbot+9bcb0c9409066696d3aa@syzkaller.appspotmail.com Tested-by: Anant Thazhemadam Signed-off-by: Anant Thazhemadam Link: https://lore.kernel.org/r/20201103213906.24219-3-anant.thazhemadam@gmail.com Signed-off-by: Marc Kleine-Budde --- net/can/af_can.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/net/can/af_can.c b/net/can/af_can.c index e8d4e3ef5322..5d124c155904 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -703,16 +703,25 @@ static int canfd_rcv(struct sk_buff *skb, struct net_device *dev, { struct canfd_frame *cfd = (struct canfd_frame *)skb->data; - if (unlikely(dev->type != ARPHRD_CAN || skb->len != CANFD_MTU || - cfd->len > CANFD_MAX_DLEN)) { - pr_warn_once("PF_CAN: dropped non conform CAN FD skbuf: dev type %d, len %d, datalen %d\n", + if (unlikely(dev->type != ARPHRD_CAN || skb->len != CANFD_MTU)) { + pr_warn_once("PF_CAN: dropped non conform CAN FD skbuff: dev type %d, len %d\n", + dev->type, skb->len); + goto free_skb; + } + + /* This check is made separately since cfd->len would be uninitialized if skb->len = 0. */ + if (unlikely(cfd->len > CANFD_MAX_DLEN)) { + pr_warn_once("PF_CAN: dropped non conform CAN FD skbuff: dev type %d, len %d, datalen %d\n", dev->type, skb->len, cfd->len); - kfree_skb(skb); - return NET_RX_DROP; + goto free_skb; } can_receive(skb, dev); return NET_RX_SUCCESS; + +free_skb: + kfree_skb(skb); + return NET_RX_DROP; } /* af_can protocol functions */ From a1e654070a60d5d4f7cce59c38f4ca790bb79121 Mon Sep 17 00:00:00 2001 From: Alejandro Concepcion Rodriguez Date: Thu, 5 Nov 2020 21:51:47 +0000 Subject: [PATCH 520/710] can: dev: can_restart(): post buffer from the right context netif_rx() is meant to be called from interrupt contexts. can_restart() may be called by can_restart_work(), which is called from a worqueue, so it may run in process context. Use netif_rx_ni() instead. Fixes: 39549eef3587 ("can: CAN Network device driver and Netlink interface") Co-developed-by: Loris Fauster Signed-off-by: Loris Fauster Signed-off-by: Alejandro Concepcion Rodriguez Link: https://lore.kernel.org/r/4e84162b-fb31-3a73-fa9a-9438b4bd5234@acoro.eu [mkl: use netif_rx_ni() instead of netif_rx_any_context()] Signed-off-by: Marc Kleine-Budde --- drivers/net/can/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index 6dee4f8f2024..81e39d7507d8 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -592,7 +592,7 @@ static void can_restart(struct net_device *dev) cf->can_id |= CAN_ERR_RESTARTED; - netif_rx(skb); + netif_rx_ni(skb); stats->rx_packets++; stats->rx_bytes += cf->can_dlc; From 7968c7c79d3be8987feb8021f0c46e6866831408 Mon Sep 17 00:00:00 2001 From: Zhang Qilong Date: Sat, 14 Nov 2020 19:17:08 +0800 Subject: [PATCH 521/710] can: ti_hecc: Fix memleak in ti_hecc_probe In the error handling, we should goto the probe_exit_candev to free ndev to prevent memory leak. Fixes: dabf54dd1c63 ("can: ti_hecc: Convert TI HECC driver to DT only driver") Signed-off-by: Zhang Qilong Link: https://lore.kernel.org/r/20201114111708.3465543-1-zhangqilong3@huawei.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/ti_hecc.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c index 9913f5458279..2c22f40e12bd 100644 --- a/drivers/net/can/ti_hecc.c +++ b/drivers/net/can/ti_hecc.c @@ -881,7 +881,8 @@ static int ti_hecc_probe(struct platform_device *pdev) priv->base = devm_platform_ioremap_resource_byname(pdev, "hecc"); if (IS_ERR(priv->base)) { dev_err(&pdev->dev, "hecc ioremap failed\n"); - return PTR_ERR(priv->base); + err = PTR_ERR(priv->base); + goto probe_exit_candev; } /* handle hecc-ram memory */ @@ -889,20 +890,22 @@ static int ti_hecc_probe(struct platform_device *pdev) "hecc-ram"); if (IS_ERR(priv->hecc_ram)) { dev_err(&pdev->dev, "hecc-ram ioremap failed\n"); - return PTR_ERR(priv->hecc_ram); + err = PTR_ERR(priv->hecc_ram); + goto probe_exit_candev; } /* handle mbx memory */ priv->mbx = devm_platform_ioremap_resource_byname(pdev, "mbx"); if (IS_ERR(priv->mbx)) { dev_err(&pdev->dev, "mbx ioremap failed\n"); - return PTR_ERR(priv->mbx); + err = PTR_ERR(priv->mbx); + goto probe_exit_candev; } irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0); if (!irq) { dev_err(&pdev->dev, "No irq resource\n"); - goto probe_exit; + goto probe_exit_candev; } priv->ndev = ndev; @@ -966,7 +969,7 @@ probe_exit_release_clk: clk_put(priv->clk); probe_exit_candev: free_candev(ndev); -probe_exit: + return err; } From 81c9c8e0adef3285336b942f93287c554c89e6c6 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Wed, 28 Aug 2019 21:16:55 +0200 Subject: [PATCH 522/710] can: mcba_usb: mcba_usb_start_xmit(): first fill skb, then pass to can_put_echo_skb() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver has to first fill the skb with data and then handle it to can_put_echo_skb(). This patch moves the can_put_echo_skb() down, right before sending the skb out via USB. Fixes: 51f3baad7de9 ("can: mcba_usb: Add support for Microchip CAN BUS Analyzer") Cc: Remigiusz Kołłątaj Link: https://lore.kernel.org/r/20201111221204.1639007-1-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/mcba_usb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/usb/mcba_usb.c b/drivers/net/can/usb/mcba_usb.c index 5857b37dcd96..e97f2e0da6b0 100644 --- a/drivers/net/can/usb/mcba_usb.c +++ b/drivers/net/can/usb/mcba_usb.c @@ -326,8 +326,6 @@ static netdev_tx_t mcba_usb_start_xmit(struct sk_buff *skb, if (!ctx) return NETDEV_TX_BUSY; - can_put_echo_skb(skb, priv->netdev, ctx->ndx); - if (cf->can_id & CAN_EFF_FLAG) { /* SIDH | SIDL | EIDH | EIDL * 28 - 21 | 20 19 18 x x x 17 16 | 15 - 8 | 7 - 0 @@ -357,6 +355,8 @@ static netdev_tx_t mcba_usb_start_xmit(struct sk_buff *skb, if (cf->can_id & CAN_RTR_FLAG) usb_msg.dlc |= MCBA_DLC_RTR_MASK; + can_put_echo_skb(skb, priv->netdev, ctx->ndx); + err = mcba_usb_xmit(priv, (struct mcba_usb_msg *)&usb_msg, ctx); if (err) goto xmit_failed; From 8a68cc0d690c9e5730d676b764c6f059343b842c Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 5 Nov 2020 11:24:27 +0000 Subject: [PATCH 523/710] can: peak_usb: fix potential integer overflow on shift of a int The left shift of int 32 bit integer constant 1 is evaluated using 32 bit arithmetic and then assigned to a signed 64 bit variable. In the case where time_ref->adapter->ts_used_bits is 32 or more this can lead to an oveflow. Avoid this by shifting using the BIT_ULL macro instead. Fixes: bb4785551f64 ("can: usb: PEAK-System Technik USB adapters driver core") Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20201105112427.40688-1-colin.king@canonical.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/peak_usb/pcan_usb_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.c b/drivers/net/can/usb/peak_usb/pcan_usb_core.c index c2764799f9ef..204ccb27d6d9 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_core.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.c @@ -156,7 +156,7 @@ void peak_usb_get_ts_time(struct peak_time_ref *time_ref, u32 ts, ktime_t *time) if (time_ref->ts_dev_1 < time_ref->ts_dev_2) { /* case when event time (tsw) wraps */ if (ts < time_ref->ts_dev_1) - delta_ts = 1 << time_ref->adapter->ts_used_bits; + delta_ts = BIT_ULL(time_ref->adapter->ts_used_bits); /* Otherwise, sync time counter (ts_dev_2) has wrapped: * handle case when event time (tsn) hasn't. @@ -168,7 +168,7 @@ void peak_usb_get_ts_time(struct peak_time_ref *time_ref, u32 ts, ktime_t *time) * tsn ts */ } else if (time_ref->ts_dev_1 < ts) { - delta_ts = -(1 << time_ref->adapter->ts_used_bits); + delta_ts = -BIT_ULL(time_ref->adapter->ts_used_bits); } /* add delay between last sync and event timestamps */ From 499aa923c56769274f81e60414b8de4912864b8d Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Wed, 14 Oct 2020 13:41:36 +0200 Subject: [PATCH 524/710] can: flexcan: flexcan_setup_stop_mode(): add missing "req_bit" to stop mode property comment In the patch d9b081e3fc4b ("can: flexcan: remove ack_grp and ack_bit handling from driver") the unused ack_grp and ack_bit were removed from the driver. However in the comment, the "req_bit" was accidentally removed, too. This patch adds back the "req_bit" bit. Fixes: d9b081e3fc4b ("can: flexcan: remove ack_grp and ack_bit handling from driver") Reported-by: Joakim Zhang Link: http://lore.kernel.org/r/20201014114810.2911135-1-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/flexcan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index 881799bd9c5e..4e8fdb6064bd 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -1852,7 +1852,7 @@ static int flexcan_setup_stop_mode(struct platform_device *pdev) return -EINVAL; /* stop mode property format is: - * <&gpr req_gpr>. + * <&gpr req_gpr req_bit>. */ ret = of_property_read_u32_array(np, "fsl,stop-mode", out_val, ARRAY_SIZE(out_val)); From b7ee5bc3e1006433601a058a6a7c24c5272635f4 Mon Sep 17 00:00:00 2001 From: Zhang Qilong Date: Sun, 8 Nov 2020 16:30:00 +0800 Subject: [PATCH 525/710] can: flexcan: fix failure handling of pm_runtime_get_sync() pm_runtime_get_sync() will increment pm usage at first and it will resume the device later. If runtime of the device has error or device is in inaccessible state(or other error state), resume operation will fail. If we do not call put operation to decrease the reference, it will result in reference leak in the two functions flexcan_get_berr_counter() and flexcan_open(). Moreover, this device cannot enter the idle state and always stay busy or other non-idle state later. So we should fix it through adding pm_runtime_put_noidle(). Fixes: ca10989632d88 ("can: flexcan: implement can Runtime PM") Signed-off-by: Zhang Qilong Link: https://lore.kernel.org/r/20201108083000.2599705-1-zhangqilong3@huawei.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/flexcan.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index 4e8fdb6064bd..d6a9cf0e9b60 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -728,8 +728,10 @@ static int flexcan_get_berr_counter(const struct net_device *dev, int err; err = pm_runtime_get_sync(priv->dev); - if (err < 0) + if (err < 0) { + pm_runtime_put_noidle(priv->dev); return err; + } err = __flexcan_get_berr_counter(dev, bec); @@ -1654,8 +1656,10 @@ static int flexcan_open(struct net_device *dev) } err = pm_runtime_get_sync(priv->dev); - if (err < 0) + if (err < 0) { + pm_runtime_put_noidle(priv->dev); return err; + } err = open_candev(dev); if (err) From 3fcce133f0d9a50d3a23f8e2bc950197b4e03900 Mon Sep 17 00:00:00 2001 From: Enric Balletbo i Serra Date: Mon, 13 Apr 2020 16:10:13 +0200 Subject: [PATCH 526/710] can: tcan4x5x: replace depends on REGMAP_SPI with depends on SPI regmap is a library function that gets selected by drivers that need it. No driver modules should depend on it. Instead depends on SPI and select REGMAP_SPI. Depending on REGMAP_SPI makes this driver only build if another driver already selected REGMAP_SPI, as the symbol can't be selected through the menu kernel configuration. Signed-off-by: Enric Balletbo i Serra Link: http://lore.kernel.org/r/20200413141013.506613-1-enric.balletbo@collabora.com Reviewed-by: Dan Murphy Fixes: 5443c226ba91 ("can: tcan4x5x: Add tcan4x5x driver to the kernel") Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/m_can/Kconfig b/drivers/net/can/m_can/Kconfig index 48be627c85c2..5f9f8192dd0b 100644 --- a/drivers/net/can/m_can/Kconfig +++ b/drivers/net/can/m_can/Kconfig @@ -16,7 +16,8 @@ config CAN_M_CAN_PLATFORM config CAN_M_CAN_TCAN4X5X depends on CAN_M_CAN - depends on REGMAP_SPI + depends on SPI + select REGMAP_SPI tristate "TCAN4X5X M_CAN device" help Say Y here if you want support for Texas Instruments TCAN4x5x From 1ff203badbbf1738027c8395d5b40b0d462b6e4d Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Fri, 3 Jan 2020 11:30:34 +0100 Subject: [PATCH 527/710] can: tcan4x5x: tcan4x5x_can_probe(): add missing error checking for devm_regmap_init() This patch adds the missing error checking when initializing the regmap interface fails. Fixes: 5443c226ba91 ("can: tcan4x5x: Add tcan4x5x driver to the kernel") Cc: Dan Murphy Link: http://lore.kernel.org/r/20201019154233.1262589-7-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/tcan4x5x.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/can/m_can/tcan4x5x.c b/drivers/net/can/m_can/tcan4x5x.c index eacd428e07e9..f058bd9104e9 100644 --- a/drivers/net/can/m_can/tcan4x5x.c +++ b/drivers/net/can/m_can/tcan4x5x.c @@ -487,6 +487,10 @@ static int tcan4x5x_can_probe(struct spi_device *spi) priv->regmap = devm_regmap_init(&spi->dev, &tcan4x5x_bus, &spi->dev, &tcan4x5x_regmap); + if (IS_ERR(priv->regmap)) { + ret = PTR_ERR(priv->regmap); + goto out_clk; + } ret = tcan4x5x_power_enable(priv->power, 1); if (ret) From c81d0b6ca665477c761f227807010762630b089f Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Mon, 10 Aug 2020 22:23:49 +0200 Subject: [PATCH 528/710] can: tcan4x5x: tcan4x5x_can_remove(): fix order of deregistration Change the order in tcan4x5x_can_remove() to be the exact inverse of tcan4x5x_can_probe(). First m_can_class_unregister(), then power down the device. Fixes: 5443c226ba91 ("can: tcan4x5x: Add tcan4x5x driver to the kernel") Cc: Dan Murphy Link: http://lore.kernel.org/r/20201019154233.1262589-10-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/tcan4x5x.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/m_can/tcan4x5x.c b/drivers/net/can/m_can/tcan4x5x.c index f058bd9104e9..4fdb7121403a 100644 --- a/drivers/net/can/m_can/tcan4x5x.c +++ b/drivers/net/can/m_can/tcan4x5x.c @@ -527,10 +527,10 @@ static int tcan4x5x_can_remove(struct spi_device *spi) { struct tcan4x5x_priv *priv = spi_get_drvdata(spi); - tcan4x5x_power_enable(priv->power, 0); - m_can_class_unregister(priv->mcan_dev); + tcan4x5x_power_enable(priv->power, 0); + return 0; } From cd0d83eab2e0c26fe87a10debfedbb23901853c1 Mon Sep 17 00:00:00 2001 From: Wu Bo Date: Wed, 29 Jan 2020 10:23:30 +0800 Subject: [PATCH 529/710] can: m_can: m_can_handle_state_change(): fix state change m_can_handle_state_change() is called with the new_state as an argument. In the switch statements for CAN_STATE_ERROR_ACTIVE, the comment and the following code indicate that a CAN_STATE_ERROR_WARNING is handled. This patch fixes this problem by changing the case to CAN_STATE_ERROR_WARNING. Signed-off-by: Wu Bo Link: http://lore.kernel.org/r/20200129022330.21248-2-wubo.oduw@gmail.com Cc: Dan Murphy Fixes: e0d1f4816f2a ("can: m_can: add Bosch M_CAN controller support") Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 02c5795b7393..63887e23d89c 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -665,7 +665,7 @@ static int m_can_handle_state_change(struct net_device *dev, unsigned int ecr; switch (new_state) { - case CAN_STATE_ERROR_ACTIVE: + case CAN_STATE_ERROR_WARNING: /* error warning state */ cdev->can.can_stats.error_warning++; cdev->can.state = CAN_STATE_ERROR_WARNING; @@ -694,7 +694,7 @@ static int m_can_handle_state_change(struct net_device *dev, __m_can_get_berr_counter(dev, &bec); switch (new_state) { - case CAN_STATE_ERROR_ACTIVE: + case CAN_STATE_ERROR_WARNING: /* error warning state */ cf->can_id |= CAN_ERR_CRTL; cf->data[1] = (bec.txerr > bec.rxerr) ? From a8c22f5b0c689a29f45ef4a110d09fd391debcbc Mon Sep 17 00:00:00 2001 From: Dan Murphy Date: Thu, 27 Feb 2020 12:38:29 -0600 Subject: [PATCH 530/710] can: m_can: m_can_class_free_dev(): introduce new function This patch creates a common function that peripherials can call to free the netdev device when failures occur. Fixes: f524f829b75a ("can: m_can: Create a m_can platform framework") Reported-by: Marc Kleine-Budde Signed-off-by: Dan Murphy Link: http://lore.kernel.org/r/20200227183829.21854-2-dmurphy@ti.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 6 ++++++ drivers/net/can/m_can/m_can.h | 1 + 2 files changed, 7 insertions(+) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 63887e23d89c..f2c87b76e569 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -1812,6 +1812,12 @@ out: } EXPORT_SYMBOL_GPL(m_can_class_allocate_dev); +void m_can_class_free_dev(struct net_device *net) +{ + free_candev(net); +} +EXPORT_SYMBOL_GPL(m_can_class_free_dev); + int m_can_class_register(struct m_can_classdev *m_can_dev) { int ret; diff --git a/drivers/net/can/m_can/m_can.h b/drivers/net/can/m_can/m_can.h index 49f42b50627a..b2699a7c9997 100644 --- a/drivers/net/can/m_can/m_can.h +++ b/drivers/net/can/m_can/m_can.h @@ -99,6 +99,7 @@ struct m_can_classdev { }; struct m_can_classdev *m_can_class_allocate_dev(struct device *dev); +void m_can_class_free_dev(struct net_device *net); int m_can_class_register(struct m_can_classdev *cdev); void m_can_class_unregister(struct m_can_classdev *cdev); int m_can_class_get_clocks(struct m_can_classdev *cdev); From 85816aba460ceebed0047381395615891df68c8f Mon Sep 17 00:00:00 2001 From: Dan Murphy Date: Thu, 27 Feb 2020 12:38:29 -0600 Subject: [PATCH 531/710] can: m_can: Fix freeing of can device from peripherials Fix leaking netdev device from peripherial devices. The call to allocate the netdev device is made from and managed by the peripherial. Fixes: f524f829b75a ("can: m_can: Create a m_can platform framework") Reported-by: Marc Kleine-Budde Signed-off-by: Dan Murphy Link: http://lore.kernel.org/r/20200227183829.21854-2-dmurphy@ti.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 3 --- drivers/net/can/m_can/m_can_platform.c | 23 +++++++++++++++-------- drivers/net/can/m_can/tcan4x5x.c | 26 ++++++++++++++++++-------- 3 files changed, 33 insertions(+), 19 deletions(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index f2c87b76e569..645101d19989 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -1856,7 +1856,6 @@ pm_runtime_fail: if (ret) { if (m_can_dev->pm_clock_support) pm_runtime_disable(m_can_dev->dev); - free_candev(m_can_dev->net); } return ret; @@ -1914,8 +1913,6 @@ void m_can_class_unregister(struct m_can_classdev *m_can_dev) unregister_candev(m_can_dev->net); m_can_clk_stop(m_can_dev); - - free_candev(m_can_dev->net); } EXPORT_SYMBOL_GPL(m_can_class_unregister); diff --git a/drivers/net/can/m_can/m_can_platform.c b/drivers/net/can/m_can/m_can_platform.c index e6d0cb9ee02f..161cb9be018c 100644 --- a/drivers/net/can/m_can/m_can_platform.c +++ b/drivers/net/can/m_can/m_can_platform.c @@ -67,32 +67,36 @@ static int m_can_plat_probe(struct platform_device *pdev) return -ENOMEM; priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); - if (!priv) - return -ENOMEM; + if (!priv) { + ret = -ENOMEM; + goto probe_fail; + } mcan_class->device_data = priv; - m_can_class_get_clocks(mcan_class); + ret = m_can_class_get_clocks(mcan_class); + if (ret) + goto probe_fail; res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "m_can"); addr = devm_ioremap_resource(&pdev->dev, res); irq = platform_get_irq_byname(pdev, "int0"); if (IS_ERR(addr) || irq < 0) { ret = -EINVAL; - goto failed_ret; + goto probe_fail; } /* message ram could be shared */ res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "message_ram"); if (!res) { ret = -ENODEV; - goto failed_ret; + goto probe_fail; } mram_addr = devm_ioremap(&pdev->dev, res->start, resource_size(res)); if (!mram_addr) { ret = -ENOMEM; - goto failed_ret; + goto probe_fail; } priv->base = addr; @@ -111,9 +115,10 @@ static int m_can_plat_probe(struct platform_device *pdev) m_can_init_ram(mcan_class); - ret = m_can_class_register(mcan_class); + return m_can_class_register(mcan_class); -failed_ret: +probe_fail: + m_can_class_free_dev(mcan_class->net); return ret; } @@ -134,6 +139,8 @@ static int m_can_plat_remove(struct platform_device *pdev) m_can_class_unregister(mcan_class); + m_can_class_free_dev(mcan_class->net); + platform_set_drvdata(pdev, NULL); return 0; diff --git a/drivers/net/can/m_can/tcan4x5x.c b/drivers/net/can/m_can/tcan4x5x.c index 4fdb7121403a..e5d7d85e0b6d 100644 --- a/drivers/net/can/m_can/tcan4x5x.c +++ b/drivers/net/can/m_can/tcan4x5x.c @@ -440,14 +440,18 @@ static int tcan4x5x_can_probe(struct spi_device *spi) return -ENOMEM; priv = devm_kzalloc(&spi->dev, sizeof(*priv), GFP_KERNEL); - if (!priv) - return -ENOMEM; + if (!priv) { + ret = -ENOMEM; + goto out_m_can_class_free_dev; + } priv->power = devm_regulator_get_optional(&spi->dev, "vsup"); - if (PTR_ERR(priv->power) == -EPROBE_DEFER) - return -EPROBE_DEFER; - else + if (PTR_ERR(priv->power) == -EPROBE_DEFER) { + ret = -EPROBE_DEFER; + goto out_m_can_class_free_dev; + } else { priv->power = NULL; + } mcan_class->device_data = priv; @@ -460,8 +464,10 @@ static int tcan4x5x_can_probe(struct spi_device *spi) } /* Sanity check */ - if (freq < 20000000 || freq > TCAN4X5X_EXT_CLK_DEF) - return -ERANGE; + if (freq < 20000000 || freq > TCAN4X5X_EXT_CLK_DEF) { + ret = -ERANGE; + goto out_m_can_class_free_dev; + } priv->reg_offset = TCAN4X5X_MCAN_OFFSET; priv->mram_start = TCAN4X5X_MRAM_START; @@ -518,8 +524,10 @@ out_clk: clk_disable_unprepare(mcan_class->cclk); clk_disable_unprepare(mcan_class->hclk); } - + out_m_can_class_free_dev: + m_can_class_free_dev(mcan_class->net); dev_err(&spi->dev, "Probe failed, err=%d\n", ret); + return ret; } @@ -531,6 +539,8 @@ static int tcan4x5x_can_remove(struct spi_device *spi) tcan4x5x_power_enable(priv->power, 0); + m_can_class_free_dev(priv->mcan_dev->net); + return 0; } From a584e9bc1b7e88f24f8504886eafbe6c73d8a97c Mon Sep 17 00:00:00 2001 From: Faiz Abbas Date: Tue, 25 Aug 2020 11:24:42 +0530 Subject: [PATCH 532/710] can: m_can: m_can_stop(): set device to software init mode before closing There might be some requests pending in the buffer when the interface close sequence occurs. In some devices, these pending requests might lead to the module not shutting down properly when m_can_clk_stop() is called. Therefore, move the device to init state before potentially powering it down. Fixes: e0d1f4816f2a ("can: m_can: add Bosch M_CAN controller support") Signed-off-by: Faiz Abbas Acked-by: Dan Murphy Link: https://lore.kernel.org/r/20200825055442.16994-1-faiz_abbas@ti.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 645101d19989..e7264043f79a 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -1414,6 +1414,9 @@ static void m_can_stop(struct net_device *dev) /* disable all interrupts */ m_can_disable_all_interrupts(cdev); + /* Set init mode to disengage from the network */ + m_can_config_endisable(cdev, true); + /* set the state as STOPPED */ cdev->can.state = CAN_STATE_STOPPED; } From a312db697cb05dfa781848afe8585a1e1f2a5a99 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Fri, 13 Nov 2020 16:57:06 +0100 Subject: [PATCH 533/710] vdpasim: fix "mac_pton" undefined error ERROR: modpost: "mac_pton" [drivers/vdpa/vdpa_sim/vdpa_sim.ko] undefined! mac_pton() is defined in lib/net_utils.c and is not built if NET is not set. Select GENERIC_NET_UTILS as vdpasim doesn't depend on NET. Reported-by: kernel test robot Signed-off-by: Laurent Vivier Link: https://lore.kernel.org/r/20201113155706.599434-1-lvivier@redhat.com Signed-off-by: Michael S. Tsirkin Acked-by: Randy Dunlap # build-tested --- drivers/vdpa/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/vdpa/Kconfig b/drivers/vdpa/Kconfig index d7d32b656102..358f6048dd3c 100644 --- a/drivers/vdpa/Kconfig +++ b/drivers/vdpa/Kconfig @@ -13,6 +13,7 @@ config VDPA_SIM depends on RUNTIME_TESTING_MENU && HAS_DMA select DMA_OPS select VHOST_RING + select GENERIC_NET_UTILS default n help vDPA networking device simulator which loop TX traffic back From 6bcf34224ac1e94103797fd68b9836061762f2b2 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Mon, 9 Nov 2020 23:33:19 -0600 Subject: [PATCH 534/710] vhost: add helper to check if a vq has been setup This adds a helper check if a vq has been setup. The next patches will use this when we move the vhost scsi cmd preallocation from per session to per vq. In the per vq case, we only want to allocate cmds for vqs that have actually been setup and not for all the possible vqs. Signed-off-by: Mike Christie Link: https://lore.kernel.org/r/1604986403-4931-2-git-send-email-michael.christie@oracle.com Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Acked-by: Stefan Hajnoczi --- drivers/vhost/vhost.c | 6 ++++++ drivers/vhost/vhost.h | 1 + 2 files changed, 7 insertions(+) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 5c835a292783..a262e12c6dc2 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -304,6 +304,12 @@ static void vhost_vring_call_reset(struct vhost_vring_call *call_ctx) memset(&call_ctx->producer, 0x0, sizeof(struct irq_bypass_producer)); } +bool vhost_vq_is_setup(struct vhost_virtqueue *vq) +{ + return vq->avail && vq->desc && vq->used && vhost_vq_access_ok(vq); +} +EXPORT_SYMBOL_GPL(vhost_vq_is_setup); + static void vhost_vq_reset(struct vhost_dev *dev, struct vhost_virtqueue *vq) { diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index e016cd3fa02f..b063324c7669 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -190,6 +190,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *, struct vhost_log *log, unsigned int *log_num); void vhost_discard_vq_desc(struct vhost_virtqueue *, int n); +bool vhost_vq_is_setup(struct vhost_virtqueue *vq); int vhost_vq_init_access(struct vhost_virtqueue *); int vhost_add_used(struct vhost_virtqueue *, unsigned int head, int len); int vhost_add_used_n(struct vhost_virtqueue *, struct vring_used_elem *heads, From 25b98b64e28423b0769313dcaf96423836b1f93d Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Mon, 9 Nov 2020 23:33:20 -0600 Subject: [PATCH 535/710] vhost scsi: alloc cmds per vq instead of session We currently are limited to 256 cmds per session. This leads to problems where if the user has increased virtqueue_size to more than 2 or cmd_per_lun to more than 256 vhost_scsi_get_tag can fail and the guest will get IO errors. This patch moves the cmd allocation to per vq so we can easily match whatever the user has specified for num_queues and virtqueue_size/cmd_per_lun. It also makes it easier to control how much memory we preallocate. For cases, where perf is not as important and we can use the current defaults (1 vq and 128 cmds per vq) memory use from preallocate cmds is cut in half. For cases, where we are willing to use more memory for higher perf, cmd mem use will now increase as the num queues and queue depth increases. Signed-off-by: Mike Christie Link: https://lore.kernel.org/r/1604986403-4931-3-git-send-email-michael.christie@oracle.com Signed-off-by: Michael S. Tsirkin Reviewed-by: Maurizio Lombardi Acked-by: Stefan Hajnoczi --- drivers/vhost/scsi.c | 207 ++++++++++++++++++++++++++----------------- 1 file changed, 128 insertions(+), 79 deletions(-) diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index b22adf03f584..e31339be7dd7 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -52,7 +52,6 @@ #define VHOST_SCSI_VERSION "v0.1" #define VHOST_SCSI_NAMELEN 256 #define VHOST_SCSI_MAX_CDB_SIZE 32 -#define VHOST_SCSI_DEFAULT_TAGS 256 #define VHOST_SCSI_PREALLOC_SGLS 2048 #define VHOST_SCSI_PREALLOC_UPAGES 2048 #define VHOST_SCSI_PREALLOC_PROT_SGLS 2048 @@ -189,6 +188,9 @@ struct vhost_scsi_virtqueue { * Writers must also take dev mutex and flush under it. */ int inflight_idx; + struct vhost_scsi_cmd *scsi_cmds; + struct sbitmap scsi_tags; + int max_cmds; }; struct vhost_scsi { @@ -324,7 +326,9 @@ static void vhost_scsi_release_cmd(struct se_cmd *se_cmd) { struct vhost_scsi_cmd *tv_cmd = container_of(se_cmd, struct vhost_scsi_cmd, tvc_se_cmd); - struct se_session *se_sess = tv_cmd->tvc_nexus->tvn_se_sess; + struct vhost_scsi_virtqueue *svq = container_of(tv_cmd->tvc_vq, + struct vhost_scsi_virtqueue, vq); + struct vhost_scsi_inflight *inflight = tv_cmd->inflight; int i; if (tv_cmd->tvc_sgl_count) { @@ -336,8 +340,8 @@ static void vhost_scsi_release_cmd(struct se_cmd *se_cmd) put_page(sg_page(&tv_cmd->tvc_prot_sgl[i])); } - vhost_scsi_put_inflight(tv_cmd->inflight); - target_free_tag(se_sess, se_cmd); + sbitmap_clear_bit(&svq->scsi_tags, se_cmd->map_tag); + vhost_scsi_put_inflight(inflight); } static u32 vhost_scsi_sess_get_index(struct se_session *se_sess) @@ -566,31 +570,31 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work) } static struct vhost_scsi_cmd * -vhost_scsi_get_tag(struct vhost_virtqueue *vq, struct vhost_scsi_tpg *tpg, +vhost_scsi_get_cmd(struct vhost_virtqueue *vq, struct vhost_scsi_tpg *tpg, unsigned char *cdb, u64 scsi_tag, u16 lun, u8 task_attr, u32 exp_data_len, int data_direction) { + struct vhost_scsi_virtqueue *svq = container_of(vq, + struct vhost_scsi_virtqueue, vq); struct vhost_scsi_cmd *cmd; struct vhost_scsi_nexus *tv_nexus; - struct se_session *se_sess; struct scatterlist *sg, *prot_sg; struct page **pages; - int tag, cpu; + int tag; tv_nexus = tpg->tpg_nexus; if (!tv_nexus) { pr_err("Unable to locate active struct vhost_scsi_nexus\n"); return ERR_PTR(-EIO); } - se_sess = tv_nexus->tvn_se_sess; - tag = sbitmap_queue_get(&se_sess->sess_tag_pool, &cpu); + tag = sbitmap_get(&svq->scsi_tags, 0, false); if (tag < 0) { pr_err("Unable to obtain tag for vhost_scsi_cmd\n"); return ERR_PTR(-ENOMEM); } - cmd = &((struct vhost_scsi_cmd *)se_sess->sess_cmd_map)[tag]; + cmd = &svq->scsi_cmds[tag]; sg = cmd->tvc_sgl; prot_sg = cmd->tvc_prot_sgl; pages = cmd->tvc_upages; @@ -599,7 +603,6 @@ vhost_scsi_get_tag(struct vhost_virtqueue *vq, struct vhost_scsi_tpg *tpg, cmd->tvc_prot_sgl = prot_sg; cmd->tvc_upages = pages; cmd->tvc_se_cmd.map_tag = tag; - cmd->tvc_se_cmd.map_cpu = cpu; cmd->tvc_tag = scsi_tag; cmd->tvc_lun = lun; cmd->tvc_task_attr = task_attr; @@ -1065,11 +1068,11 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) scsi_command_size(cdb), VHOST_SCSI_MAX_CDB_SIZE); goto err; } - cmd = vhost_scsi_get_tag(vq, tpg, cdb, tag, lun, task_attr, + cmd = vhost_scsi_get_cmd(vq, tpg, cdb, tag, lun, task_attr, exp_data_len + prot_bytes, data_direction); if (IS_ERR(cmd)) { - vq_err(vq, "vhost_scsi_get_tag failed %ld\n", + vq_err(vq, "vhost_scsi_get_cmd failed %ld\n", PTR_ERR(cmd)); goto err; } @@ -1373,6 +1376,83 @@ static void vhost_scsi_flush(struct vhost_scsi *vs) wait_for_completion(&old_inflight[i]->comp); } +static void vhost_scsi_destroy_vq_cmds(struct vhost_virtqueue *vq) +{ + struct vhost_scsi_virtqueue *svq = container_of(vq, + struct vhost_scsi_virtqueue, vq); + struct vhost_scsi_cmd *tv_cmd; + unsigned int i; + + if (!svq->scsi_cmds) + return; + + for (i = 0; i < svq->max_cmds; i++) { + tv_cmd = &svq->scsi_cmds[i]; + + kfree(tv_cmd->tvc_sgl); + kfree(tv_cmd->tvc_prot_sgl); + kfree(tv_cmd->tvc_upages); + } + + sbitmap_free(&svq->scsi_tags); + kfree(svq->scsi_cmds); + svq->scsi_cmds = NULL; +} + +static int vhost_scsi_setup_vq_cmds(struct vhost_virtqueue *vq, int max_cmds) +{ + struct vhost_scsi_virtqueue *svq = container_of(vq, + struct vhost_scsi_virtqueue, vq); + struct vhost_scsi_cmd *tv_cmd; + unsigned int i; + + if (svq->scsi_cmds) + return 0; + + if (sbitmap_init_node(&svq->scsi_tags, max_cmds, -1, GFP_KERNEL, + NUMA_NO_NODE)) + return -ENOMEM; + svq->max_cmds = max_cmds; + + svq->scsi_cmds = kcalloc(max_cmds, sizeof(*tv_cmd), GFP_KERNEL); + if (!svq->scsi_cmds) { + sbitmap_free(&svq->scsi_tags); + return -ENOMEM; + } + + for (i = 0; i < max_cmds; i++) { + tv_cmd = &svq->scsi_cmds[i]; + + tv_cmd->tvc_sgl = kcalloc(VHOST_SCSI_PREALLOC_SGLS, + sizeof(struct scatterlist), + GFP_KERNEL); + if (!tv_cmd->tvc_sgl) { + pr_err("Unable to allocate tv_cmd->tvc_sgl\n"); + goto out; + } + + tv_cmd->tvc_upages = kcalloc(VHOST_SCSI_PREALLOC_UPAGES, + sizeof(struct page *), + GFP_KERNEL); + if (!tv_cmd->tvc_upages) { + pr_err("Unable to allocate tv_cmd->tvc_upages\n"); + goto out; + } + + tv_cmd->tvc_prot_sgl = kcalloc(VHOST_SCSI_PREALLOC_PROT_SGLS, + sizeof(struct scatterlist), + GFP_KERNEL); + if (!tv_cmd->tvc_prot_sgl) { + pr_err("Unable to allocate tv_cmd->tvc_prot_sgl\n"); + goto out; + } + } + return 0; +out: + vhost_scsi_destroy_vq_cmds(vq); + return -ENOMEM; +} + /* * Called from vhost_scsi_ioctl() context to walk the list of available * vhost_scsi_tpg with an active struct vhost_scsi_nexus @@ -1427,10 +1507,9 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs, if (!strcmp(tv_tport->tport_name, t->vhost_wwpn)) { if (vs->vs_tpg && vs->vs_tpg[tpg->tport_tpgt]) { - kfree(vs_tpg); mutex_unlock(&tpg->tv_tpg_mutex); ret = -EEXIST; - goto out; + goto undepend; } /* * In order to ensure individual vhost-scsi configfs @@ -1442,9 +1521,8 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs, ret = target_depend_item(&se_tpg->tpg_group.cg_item); if (ret) { pr_warn("target_depend_item() failed: %d\n", ret); - kfree(vs_tpg); mutex_unlock(&tpg->tv_tpg_mutex); - goto out; + goto undepend; } tpg->tv_tpg_vhost_count++; tpg->vhost_scsi = vs; @@ -1457,6 +1535,16 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs, if (match) { memcpy(vs->vs_vhost_wwpn, t->vhost_wwpn, sizeof(vs->vs_vhost_wwpn)); + + for (i = VHOST_SCSI_VQ_IO; i < VHOST_SCSI_MAX_VQ; i++) { + vq = &vs->vqs[i].vq; + if (!vhost_vq_is_setup(vq)) + continue; + + if (vhost_scsi_setup_vq_cmds(vq, vq->num)) + goto destroy_vq_cmds; + } + for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) { vq = &vs->vqs[i].vq; mutex_lock(&vq->mutex); @@ -1476,7 +1564,22 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs, vhost_scsi_flush(vs); kfree(vs->vs_tpg); vs->vs_tpg = vs_tpg; + goto out; +destroy_vq_cmds: + for (i--; i >= VHOST_SCSI_VQ_IO; i--) { + if (!vhost_vq_get_backend(&vs->vqs[i].vq)) + vhost_scsi_destroy_vq_cmds(&vs->vqs[i].vq); + } +undepend: + for (i = 0; i < VHOST_SCSI_MAX_TARGET; i++) { + tpg = vs_tpg[i]; + if (tpg) { + tpg->tv_tpg_vhost_count--; + target_undepend_item(&tpg->se_tpg.tpg_group.cg_item); + } + } + kfree(vs_tpg); out: mutex_unlock(&vs->dev.mutex); mutex_unlock(&vhost_scsi_mutex); @@ -1549,6 +1652,12 @@ vhost_scsi_clear_endpoint(struct vhost_scsi *vs, mutex_lock(&vq->mutex); vhost_vq_set_backend(vq, NULL); mutex_unlock(&vq->mutex); + /* + * Make sure cmds are not running before tearing them + * down. + */ + vhost_scsi_flush(vs); + vhost_scsi_destroy_vq_cmds(vq); } } /* @@ -1842,23 +1951,6 @@ static void vhost_scsi_port_unlink(struct se_portal_group *se_tpg, mutex_unlock(&vhost_scsi_mutex); } -static void vhost_scsi_free_cmd_map_res(struct se_session *se_sess) -{ - struct vhost_scsi_cmd *tv_cmd; - unsigned int i; - - if (!se_sess->sess_cmd_map) - return; - - for (i = 0; i < VHOST_SCSI_DEFAULT_TAGS; i++) { - tv_cmd = &((struct vhost_scsi_cmd *)se_sess->sess_cmd_map)[i]; - - kfree(tv_cmd->tvc_sgl); - kfree(tv_cmd->tvc_prot_sgl); - kfree(tv_cmd->tvc_upages); - } -} - static ssize_t vhost_scsi_tpg_attrib_fabric_prot_type_store( struct config_item *item, const char *page, size_t count) { @@ -1898,45 +1990,6 @@ static struct configfs_attribute *vhost_scsi_tpg_attrib_attrs[] = { NULL, }; -static int vhost_scsi_nexus_cb(struct se_portal_group *se_tpg, - struct se_session *se_sess, void *p) -{ - struct vhost_scsi_cmd *tv_cmd; - unsigned int i; - - for (i = 0; i < VHOST_SCSI_DEFAULT_TAGS; i++) { - tv_cmd = &((struct vhost_scsi_cmd *)se_sess->sess_cmd_map)[i]; - - tv_cmd->tvc_sgl = kcalloc(VHOST_SCSI_PREALLOC_SGLS, - sizeof(struct scatterlist), - GFP_KERNEL); - if (!tv_cmd->tvc_sgl) { - pr_err("Unable to allocate tv_cmd->tvc_sgl\n"); - goto out; - } - - tv_cmd->tvc_upages = kcalloc(VHOST_SCSI_PREALLOC_UPAGES, - sizeof(struct page *), - GFP_KERNEL); - if (!tv_cmd->tvc_upages) { - pr_err("Unable to allocate tv_cmd->tvc_upages\n"); - goto out; - } - - tv_cmd->tvc_prot_sgl = kcalloc(VHOST_SCSI_PREALLOC_PROT_SGLS, - sizeof(struct scatterlist), - GFP_KERNEL); - if (!tv_cmd->tvc_prot_sgl) { - pr_err("Unable to allocate tv_cmd->tvc_prot_sgl\n"); - goto out; - } - } - return 0; -out: - vhost_scsi_free_cmd_map_res(se_sess); - return -ENOMEM; -} - static int vhost_scsi_make_nexus(struct vhost_scsi_tpg *tpg, const char *name) { @@ -1960,12 +2013,9 @@ static int vhost_scsi_make_nexus(struct vhost_scsi_tpg *tpg, * struct se_node_acl for the vhost_scsi struct se_portal_group with * the SCSI Initiator port name of the passed configfs group 'name'. */ - tv_nexus->tvn_se_sess = target_setup_session(&tpg->se_tpg, - VHOST_SCSI_DEFAULT_TAGS, - sizeof(struct vhost_scsi_cmd), + tv_nexus->tvn_se_sess = target_setup_session(&tpg->se_tpg, 0, 0, TARGET_PROT_DIN_PASS | TARGET_PROT_DOUT_PASS, - (unsigned char *)name, tv_nexus, - vhost_scsi_nexus_cb); + (unsigned char *)name, tv_nexus, NULL); if (IS_ERR(tv_nexus->tvn_se_sess)) { mutex_unlock(&tpg->tv_tpg_mutex); kfree(tv_nexus); @@ -2015,7 +2065,6 @@ static int vhost_scsi_drop_nexus(struct vhost_scsi_tpg *tpg) " %s Initiator Port: %s\n", vhost_scsi_dump_proto_id(tpg->tport), tv_nexus->tvn_se_sess->se_node_acl->initiatorname); - vhost_scsi_free_cmd_map_res(se_sess); /* * Release the SCSI I_T Nexus to the emulated vhost Target Port */ From 47a3565e8bb14ec48a75b48daf57aa830e2691f8 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Mon, 9 Nov 2020 23:33:21 -0600 Subject: [PATCH 536/710] vhost scsi: fix cmd completion race We might not do the final se_cmd put from vhost_scsi_complete_cmd_work. When the last put happens a little later then we could race where vhost_scsi_complete_cmd_work does vhost_signal, the guest runs and sends more IO, and vhost_scsi_handle_vq runs but does not find any free cmds. This patch has us delay completing the cmd until the last lio core ref is dropped. We then know that once we signal to the guest that the cmd is completed that if it queues a new command it will find a free cmd. Signed-off-by: Mike Christie Reviewed-by: Maurizio Lombardi Link: https://lore.kernel.org/r/1604986403-4931-4-git-send-email-michael.christie@oracle.com Signed-off-by: Michael S. Tsirkin Acked-by: Stefan Hajnoczi --- drivers/vhost/scsi.c | 42 +++++++++++++++--------------------------- 1 file changed, 15 insertions(+), 27 deletions(-) diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index e31339be7dd7..5d8850f5aef1 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -322,7 +322,7 @@ static u32 vhost_scsi_tpg_get_inst_index(struct se_portal_group *se_tpg) return 1; } -static void vhost_scsi_release_cmd(struct se_cmd *se_cmd) +static void vhost_scsi_release_cmd_res(struct se_cmd *se_cmd) { struct vhost_scsi_cmd *tv_cmd = container_of(se_cmd, struct vhost_scsi_cmd, tvc_se_cmd); @@ -344,6 +344,16 @@ static void vhost_scsi_release_cmd(struct se_cmd *se_cmd) vhost_scsi_put_inflight(inflight); } +static void vhost_scsi_release_cmd(struct se_cmd *se_cmd) +{ + struct vhost_scsi_cmd *cmd = container_of(se_cmd, + struct vhost_scsi_cmd, tvc_se_cmd); + struct vhost_scsi *vs = cmd->tvc_vhost; + + llist_add(&cmd->tvc_completion_list, &vs->vs_completion_list); + vhost_work_queue(&vs->dev, &vs->vs_completion_work); +} + static u32 vhost_scsi_sess_get_index(struct se_session *se_sess) { return 0; @@ -366,28 +376,15 @@ static int vhost_scsi_get_cmd_state(struct se_cmd *se_cmd) return 0; } -static void vhost_scsi_complete_cmd(struct vhost_scsi_cmd *cmd) -{ - struct vhost_scsi *vs = cmd->tvc_vhost; - - llist_add(&cmd->tvc_completion_list, &vs->vs_completion_list); - - vhost_work_queue(&vs->dev, &vs->vs_completion_work); -} - static int vhost_scsi_queue_data_in(struct se_cmd *se_cmd) { - struct vhost_scsi_cmd *cmd = container_of(se_cmd, - struct vhost_scsi_cmd, tvc_se_cmd); - vhost_scsi_complete_cmd(cmd); + transport_generic_free_cmd(se_cmd, 0); return 0; } static int vhost_scsi_queue_status(struct se_cmd *se_cmd) { - struct vhost_scsi_cmd *cmd = container_of(se_cmd, - struct vhost_scsi_cmd, tvc_se_cmd); - vhost_scsi_complete_cmd(cmd); + transport_generic_free_cmd(se_cmd, 0); return 0; } @@ -433,15 +430,6 @@ vhost_scsi_allocate_evt(struct vhost_scsi *vs, return evt; } -static void vhost_scsi_free_cmd(struct vhost_scsi_cmd *cmd) -{ - struct se_cmd *se_cmd = &cmd->tvc_se_cmd; - - /* TODO locking against target/backend threads? */ - transport_generic_free_cmd(se_cmd, 0); - -} - static int vhost_scsi_check_stop_free(struct se_cmd *se_cmd) { return target_put_sess_cmd(se_cmd); @@ -560,7 +548,7 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work) } else pr_err("Faulted on virtio_scsi_cmd_resp\n"); - vhost_scsi_free_cmd(cmd); + vhost_scsi_release_cmd_res(se_cmd); } vq = -1; @@ -1091,7 +1079,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) &prot_iter, exp_data_len, &data_iter))) { vq_err(vq, "Failed to map iov to sgl\n"); - vhost_scsi_release_cmd(&cmd->tvc_se_cmd); + vhost_scsi_release_cmd_res(&cmd->tvc_se_cmd); goto err; } } From 18f1becb6948cd411fd01968a0a54af63732e73c Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Mon, 9 Nov 2020 23:33:22 -0600 Subject: [PATCH 537/710] vhost scsi: add lun parser helper Move code to parse lun from req's lun_buf to helper, so tmf code can use it in the next patch. Signed-off-by: Mike Christie Reviewed-by: Paolo Bonzini Acked-by: Jason Wang Link: https://lore.kernel.org/r/1604986403-4931-5-git-send-email-michael.christie@oracle.com Signed-off-by: Michael S. Tsirkin Acked-by: Stefan Hajnoczi --- drivers/vhost/scsi.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 5d8850f5aef1..ed7dc6b998f6 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -898,6 +898,11 @@ vhost_scsi_get_req(struct vhost_virtqueue *vq, struct vhost_scsi_ctx *vc, return ret; } +static u16 vhost_buf_to_lun(u8 *lun_buf) +{ + return ((lun_buf[2] << 8) | lun_buf[3]) & 0x3FFF; +} + static void vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) { @@ -1036,12 +1041,12 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) tag = vhost64_to_cpu(vq, v_req_pi.tag); task_attr = v_req_pi.task_attr; cdb = &v_req_pi.cdb[0]; - lun = ((v_req_pi.lun[2] << 8) | v_req_pi.lun[3]) & 0x3FFF; + lun = vhost_buf_to_lun(v_req_pi.lun); } else { tag = vhost64_to_cpu(vq, v_req.tag); task_attr = v_req.task_attr; cdb = &v_req.cdb[0]; - lun = ((v_req.lun[2] << 8) | v_req.lun[3]) & 0x3FFF; + lun = vhost_buf_to_lun(v_req.lun); } /* * Check that the received CDB size does not exceeded our From efd838fec17bd8756da852a435800a7e6281bfbc Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Mon, 9 Nov 2020 23:33:23 -0600 Subject: [PATCH 538/710] vhost scsi: Add support for LUN resets. In newer versions of virtio-scsi we just reset the timer when an a command times out, so TMFs are never sent for the cmd time out case. However, in older kernels and for the TMF inject cases, we can still get resets and we end up just failing immediately so the guest might see the device get offlined and IO errors. For the older kernel cases, we want the same end result as the modern virtio-scsi driver where we let the lower levels fire their error handling and handle the problem. And at the upper levels we want to wait. This patch ties the LUN reset handling into the LIO TMF code which will just wait for outstanding commands to complete like we are doing in the modern virtio-scsi case. Note: I did not handle the ABORT case to keep this simple. For ABORTs LIO just waits on the cmd like how it does for the RESET case. If an ABORT fails, the guest OS ends up escalating to LUN RESET, so in the end we get the same behavior where we wait on the outstanding cmds. Signed-off-by: Mike Christie Link: https://lore.kernel.org/r/1604986403-4931-6-git-send-email-michael.christie@oracle.com Signed-off-by: Michael S. Tsirkin Acked-by: Stefan Hajnoczi --- drivers/vhost/scsi.c | 149 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 135 insertions(+), 14 deletions(-) diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index ed7dc6b998f6..f22fce549862 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -139,6 +139,7 @@ struct vhost_scsi_tpg { struct se_portal_group se_tpg; /* Pointer back to vhost_scsi, protected by tv_tpg_mutex */ struct vhost_scsi *vhost_scsi; + struct list_head tmf_queue; }; struct vhost_scsi_tport { @@ -211,6 +212,20 @@ struct vhost_scsi { int vs_events_nr; /* num of pending events, protected by vq->mutex */ }; +struct vhost_scsi_tmf { + struct vhost_work vwork; + struct vhost_scsi_tpg *tpg; + struct vhost_scsi *vhost; + struct vhost_scsi_virtqueue *svq; + struct list_head queue_entry; + + struct se_cmd se_cmd; + struct vhost_scsi_inflight *inflight; + struct iovec resp_iov; + int in_iovs; + int vq_desc; +}; + /* * Context for processing request and control queue operations. */ @@ -344,14 +359,32 @@ static void vhost_scsi_release_cmd_res(struct se_cmd *se_cmd) vhost_scsi_put_inflight(inflight); } +static void vhost_scsi_release_tmf_res(struct vhost_scsi_tmf *tmf) +{ + struct vhost_scsi_tpg *tpg = tmf->tpg; + struct vhost_scsi_inflight *inflight = tmf->inflight; + + mutex_lock(&tpg->tv_tpg_mutex); + list_add_tail(&tpg->tmf_queue, &tmf->queue_entry); + mutex_unlock(&tpg->tv_tpg_mutex); + vhost_scsi_put_inflight(inflight); +} + static void vhost_scsi_release_cmd(struct se_cmd *se_cmd) { - struct vhost_scsi_cmd *cmd = container_of(se_cmd, - struct vhost_scsi_cmd, tvc_se_cmd); - struct vhost_scsi *vs = cmd->tvc_vhost; + if (se_cmd->se_cmd_flags & SCF_SCSI_TMR_CDB) { + struct vhost_scsi_tmf *tmf = container_of(se_cmd, + struct vhost_scsi_tmf, se_cmd); - llist_add(&cmd->tvc_completion_list, &vs->vs_completion_list); - vhost_work_queue(&vs->dev, &vs->vs_completion_work); + vhost_work_queue(&tmf->vhost->dev, &tmf->vwork); + } else { + struct vhost_scsi_cmd *cmd = container_of(se_cmd, + struct vhost_scsi_cmd, tvc_se_cmd); + struct vhost_scsi *vs = cmd->tvc_vhost; + + llist_add(&cmd->tvc_completion_list, &vs->vs_completion_list); + vhost_work_queue(&vs->dev, &vs->vs_completion_work); + } } static u32 vhost_scsi_sess_get_index(struct se_session *se_sess) @@ -390,7 +423,10 @@ static int vhost_scsi_queue_status(struct se_cmd *se_cmd) static void vhost_scsi_queue_tm_rsp(struct se_cmd *se_cmd) { - return; + struct vhost_scsi_tmf *tmf = container_of(se_cmd, struct vhost_scsi_tmf, + se_cmd); + + transport_generic_free_cmd(&tmf->se_cmd, 0); } static void vhost_scsi_aborted_task(struct se_cmd *se_cmd) @@ -1120,9 +1156,9 @@ out: } static void -vhost_scsi_send_tmf_reject(struct vhost_scsi *vs, - struct vhost_virtqueue *vq, - struct vhost_scsi_ctx *vc) +vhost_scsi_send_tmf_resp(struct vhost_scsi *vs, struct vhost_virtqueue *vq, + int in_iovs, int vq_desc, struct iovec *resp_iov, + int tmf_resp_code) { struct virtio_scsi_ctrl_tmf_resp rsp; struct iov_iter iov_iter; @@ -1130,17 +1166,87 @@ vhost_scsi_send_tmf_reject(struct vhost_scsi *vs, pr_debug("%s\n", __func__); memset(&rsp, 0, sizeof(rsp)); - rsp.response = VIRTIO_SCSI_S_FUNCTION_REJECTED; + rsp.response = tmf_resp_code; - iov_iter_init(&iov_iter, READ, &vq->iov[vc->out], vc->in, sizeof(rsp)); + iov_iter_init(&iov_iter, READ, resp_iov, in_iovs, sizeof(rsp)); ret = copy_to_iter(&rsp, sizeof(rsp), &iov_iter); if (likely(ret == sizeof(rsp))) - vhost_add_used_and_signal(&vs->dev, vq, vc->head, 0); + vhost_add_used_and_signal(&vs->dev, vq, vq_desc, 0); else pr_err("Faulted on virtio_scsi_ctrl_tmf_resp\n"); } +static void vhost_scsi_tmf_resp_work(struct vhost_work *work) +{ + struct vhost_scsi_tmf *tmf = container_of(work, struct vhost_scsi_tmf, + vwork); + int resp_code; + + if (tmf->se_cmd.se_tmr_req->response == TMR_FUNCTION_COMPLETE) + resp_code = VIRTIO_SCSI_S_FUNCTION_SUCCEEDED; + else + resp_code = VIRTIO_SCSI_S_FUNCTION_REJECTED; + + vhost_scsi_send_tmf_resp(tmf->vhost, &tmf->svq->vq, tmf->in_iovs, + tmf->vq_desc, &tmf->resp_iov, resp_code); + vhost_scsi_release_tmf_res(tmf); +} + +static void +vhost_scsi_handle_tmf(struct vhost_scsi *vs, struct vhost_scsi_tpg *tpg, + struct vhost_virtqueue *vq, + struct virtio_scsi_ctrl_tmf_req *vtmf, + struct vhost_scsi_ctx *vc) +{ + struct vhost_scsi_virtqueue *svq = container_of(vq, + struct vhost_scsi_virtqueue, vq); + struct vhost_scsi_tmf *tmf; + + if (vhost32_to_cpu(vq, vtmf->subtype) != + VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET) + goto send_reject; + + if (!tpg->tpg_nexus || !tpg->tpg_nexus->tvn_se_sess) { + pr_err("Unable to locate active struct vhost_scsi_nexus for LUN RESET.\n"); + goto send_reject; + } + + mutex_lock(&tpg->tv_tpg_mutex); + if (list_empty(&tpg->tmf_queue)) { + pr_err("Missing reserve TMF. Could not handle LUN RESET.\n"); + mutex_unlock(&tpg->tv_tpg_mutex); + goto send_reject; + } + + tmf = list_first_entry(&tpg->tmf_queue, struct vhost_scsi_tmf, + queue_entry); + list_del_init(&tmf->queue_entry); + mutex_unlock(&tpg->tv_tpg_mutex); + + tmf->tpg = tpg; + tmf->vhost = vs; + tmf->svq = svq; + tmf->resp_iov = vq->iov[vc->out]; + tmf->vq_desc = vc->head; + tmf->in_iovs = vc->in; + tmf->inflight = vhost_scsi_get_inflight(vq); + + if (target_submit_tmr(&tmf->se_cmd, tpg->tpg_nexus->tvn_se_sess, NULL, + vhost_buf_to_lun(vtmf->lun), NULL, + TMR_LUN_RESET, GFP_KERNEL, 0, + TARGET_SCF_ACK_KREF) < 0) { + vhost_scsi_release_tmf_res(tmf); + goto send_reject; + } + + return; + +send_reject: + vhost_scsi_send_tmf_resp(vs, vq, vc->in, vc->head, &vq->iov[vc->out], + VIRTIO_SCSI_S_FUNCTION_REJECTED); +} + static void vhost_scsi_send_an_resp(struct vhost_scsi *vs, struct vhost_virtqueue *vq, @@ -1166,6 +1272,7 @@ vhost_scsi_send_an_resp(struct vhost_scsi *vs, static void vhost_scsi_ctl_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) { + struct vhost_scsi_tpg *tpg; union { __virtio32 type; struct virtio_scsi_ctrl_an_req an; @@ -1247,12 +1354,12 @@ vhost_scsi_ctl_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) vc.req += typ_size; vc.req_size -= typ_size; - ret = vhost_scsi_get_req(vq, &vc, NULL); + ret = vhost_scsi_get_req(vq, &vc, &tpg); if (ret) goto err; if (v_req.type == VIRTIO_SCSI_T_TMF) - vhost_scsi_send_tmf_reject(vs, vq, &vc); + vhost_scsi_handle_tmf(vs, tpg, vq, &v_req.tmf, &vc); else vhost_scsi_send_an_resp(vs, vq, &vc); err: @@ -1913,11 +2020,19 @@ static int vhost_scsi_port_link(struct se_portal_group *se_tpg, { struct vhost_scsi_tpg *tpg = container_of(se_tpg, struct vhost_scsi_tpg, se_tpg); + struct vhost_scsi_tmf *tmf; + + tmf = kzalloc(sizeof(*tmf), GFP_KERNEL); + if (!tmf) + return -ENOMEM; + INIT_LIST_HEAD(&tmf->queue_entry); + vhost_work_init(&tmf->vwork, vhost_scsi_tmf_resp_work); mutex_lock(&vhost_scsi_mutex); mutex_lock(&tpg->tv_tpg_mutex); tpg->tv_tpg_port_count++; + list_add_tail(&tmf->queue_entry, &tpg->tmf_queue); mutex_unlock(&tpg->tv_tpg_mutex); vhost_scsi_hotplug(tpg, lun); @@ -1932,11 +2047,16 @@ static void vhost_scsi_port_unlink(struct se_portal_group *se_tpg, { struct vhost_scsi_tpg *tpg = container_of(se_tpg, struct vhost_scsi_tpg, se_tpg); + struct vhost_scsi_tmf *tmf; mutex_lock(&vhost_scsi_mutex); mutex_lock(&tpg->tv_tpg_mutex); tpg->tv_tpg_port_count--; + tmf = list_first_entry(&tpg->tmf_queue, struct vhost_scsi_tmf, + queue_entry); + list_del(&tmf->queue_entry); + kfree(tmf); mutex_unlock(&tpg->tv_tpg_mutex); vhost_scsi_hotunplug(tpg, lun); @@ -2197,6 +2317,7 @@ vhost_scsi_make_tpg(struct se_wwn *wwn, const char *name) } mutex_init(&tpg->tv_tpg_mutex); INIT_LIST_HEAD(&tpg->tv_tpg_list); + INIT_LIST_HEAD(&tpg->tmf_queue); tpg->tport = tport; tpg->tport_tpgt = tpgt; From 09162bc32c880a791c6c0668ce0745cf7958f576 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 15 Nov 2020 16:44:31 -0800 Subject: [PATCH 539/710] Linux 5.10-rc4 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 008aba5f1a20..e2c3f65c4721 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 10 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Kleptomaniac Octopus # *DOCUMENTATION* From e5633b95dce915c2ade5ce1c90d295d555396c60 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Sun, 15 Nov 2020 23:38:42 +0800 Subject: [PATCH 540/710] ALSA: usb-audio: Use ALC1220-VB-DT mapping for ASUS ROG Strix TRX40 mobo ASUS ROG Strix also uses ALC1220-VB-DT, so adjust the mapping and add profile name to let userspace pick correct UCM profile. BugLink: https://gitlab.freedesktop.org/pulseaudio/pulseaudio/-/issues/1031 Signed-off-by: Kai-Heng Feng Link: https://lore.kernel.org/r/20201115153843.1109200-1-kai.heng.feng@canonical.com Signed-off-by: Takashi Iwai --- sound/usb/card.c | 4 ++++ sound/usb/mixer_maps.c | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/sound/usb/card.c b/sound/usb/card.c index fa764b61fe9c..4457214a3ae6 100644 --- a/sound/usb/card.c +++ b/sound/usb/card.c @@ -379,6 +379,10 @@ static const struct usb_audio_device_name usb_audio_names[] = { DEVICE_NAME(0x046d, 0x0990, "Logitech, Inc.", "QuickCam Pro 9000"), + /* ASUS ROG Strix */ + PROFILE_NAME(0x0b05, 0x1917, + "Realtek", "ALC1220-VB-DT", "Realtek-ALC1220-VB-Desktop"), + /* Dell WD15 Dock */ PROFILE_NAME(0x0bda, 0x4014, "Dell", "WD15 Dock", "Dell-WD15-Dock"), /* Dell WD19 Dock */ diff --git a/sound/usb/mixer_maps.c b/sound/usb/mixer_maps.c index c369c81e74c4..a7212f16660e 100644 --- a/sound/usb/mixer_maps.c +++ b/sound/usb/mixer_maps.c @@ -561,7 +561,8 @@ static const struct usbmix_ctl_map usbmix_ctl_maps[] = { }, { /* ASUS ROG Strix */ .id = USB_ID(0x0b05, 0x1917), - .map = asus_rog_map, + .map = trx40_mobo_map, + .connector_map = trx40_mobo_connector_map, }, { /* MSI TRX40 Creator */ .id = USB_ID(0x0db0, 0x0d64), From 481535c5b41d191b22775a6873de5ec0e1cdced1 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Mon, 16 Nov 2020 01:25:56 -0800 Subject: [PATCH 541/710] xtensa: fix TLBTEMP area placement fast_second_level_miss handler for the TLBTEMP area has an assumption that page table directory entry for the TLBTEMP address range is 0. For it to be true the TLBTEMP area must be aligned to 4MB boundary and not share its 4MB region with anything that may use a page table. This is not true currently: TLBTEMP shares space with vmalloc space which results in the following kinds of runtime errors when fast_second_level_miss loads page table directory entry for the vmalloc space instead of fixing up the TLBTEMP area: Unable to handle kernel paging request at virtual address c7ff0e00 pc = d0009275, ra = 90009478 Oops: sig: 9 [#1] PREEMPT CPU: 1 PID: 61 Comm: kworker/u9:2 Not tainted 5.10.0-rc3-next-20201110-00007-g1fe4962fa983-dirty #58 Workqueue: xprtiod xs_stream_data_receive_workfn a00: 90009478 d11e1dc0 c7ff0e00 00000020 c7ff0000 00000001 7f8b8107 00000000 a08: 900c5992 d11e1d90 d0cc88b8 5506e97c 00000000 5506e97c d06c8074 d11e1d90 pc: d0009275, ps: 00060310, depc: 00000014, excvaddr: c7ff0e00 lbeg: d0009275, lend: d0009287 lcount: 00000003, sar: 00000010 Call Trace: xs_stream_data_receive_workfn+0x43c/0x770 process_one_work+0x1a1/0x324 worker_thread+0x1cc/0x3c0 kthread+0x10d/0x124 ret_from_kernel_thread+0xc/0x18 Cc: stable@vger.kernel.org Signed-off-by: Max Filippov --- Documentation/xtensa/mmu.rst | 9 ++++++--- arch/xtensa/include/asm/pgtable.h | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/Documentation/xtensa/mmu.rst b/Documentation/xtensa/mmu.rst index e52a12960fdc..450573afa31a 100644 --- a/Documentation/xtensa/mmu.rst +++ b/Documentation/xtensa/mmu.rst @@ -82,7 +82,8 @@ Default MMUv2-compatible layout:: +------------------+ | VMALLOC area | VMALLOC_START 0xc0000000 128MB - 64KB +------------------+ VMALLOC_END - | Cache aliasing | TLBTEMP_BASE_1 0xc7ff0000 DCACHE_WAY_SIZE + +------------------+ + | Cache aliasing | TLBTEMP_BASE_1 0xc8000000 DCACHE_WAY_SIZE | remap area 1 | +------------------+ | Cache aliasing | TLBTEMP_BASE_2 DCACHE_WAY_SIZE @@ -124,7 +125,8 @@ Default MMUv2-compatible layout:: +------------------+ | VMALLOC area | VMALLOC_START 0xa0000000 128MB - 64KB +------------------+ VMALLOC_END - | Cache aliasing | TLBTEMP_BASE_1 0xa7ff0000 DCACHE_WAY_SIZE + +------------------+ + | Cache aliasing | TLBTEMP_BASE_1 0xa8000000 DCACHE_WAY_SIZE | remap area 1 | +------------------+ | Cache aliasing | TLBTEMP_BASE_2 DCACHE_WAY_SIZE @@ -167,7 +169,8 @@ Default MMUv2-compatible layout:: +------------------+ | VMALLOC area | VMALLOC_START 0x90000000 128MB - 64KB +------------------+ VMALLOC_END - | Cache aliasing | TLBTEMP_BASE_1 0x97ff0000 DCACHE_WAY_SIZE + +------------------+ + | Cache aliasing | TLBTEMP_BASE_1 0x98000000 DCACHE_WAY_SIZE | remap area 1 | +------------------+ | Cache aliasing | TLBTEMP_BASE_2 DCACHE_WAY_SIZE diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h index fa054a1772e1..4dc04e6c01d7 100644 --- a/arch/xtensa/include/asm/pgtable.h +++ b/arch/xtensa/include/asm/pgtable.h @@ -69,7 +69,7 @@ */ #define VMALLOC_START (XCHAL_KSEG_CACHED_VADDR - 0x10000000) #define VMALLOC_END (VMALLOC_START + 0x07FEFFFF) -#define TLBTEMP_BASE_1 (VMALLOC_END + 1) +#define TLBTEMP_BASE_1 (VMALLOC_START + 0x08000000) #define TLBTEMP_BASE_2 (TLBTEMP_BASE_1 + DCACHE_WAY_SIZE) #if 2 * DCACHE_WAY_SIZE > ICACHE_WAY_SIZE #define TLBTEMP_SIZE (2 * DCACHE_WAY_SIZE) From 3a860d165eb5f4d7cf0bf81ef6a5b5c5e1754422 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Mon, 16 Nov 2020 01:38:59 -0800 Subject: [PATCH 542/710] xtensa: disable preemption around cache alias management calls Although cache alias management calls set up and tear down TLB entries and fast_second_level_miss is able to restore TLB entry should it be evicted they absolutely cannot preempt each other because they use the same TLBTEMP area for different purposes. Disable preemption around all cache alias management calls to enforce that. Cc: stable@vger.kernel.org Signed-off-by: Max Filippov --- arch/xtensa/mm/cache.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/arch/xtensa/mm/cache.c b/arch/xtensa/mm/cache.c index 5835406b3cec..085b8c77b9d9 100644 --- a/arch/xtensa/mm/cache.c +++ b/arch/xtensa/mm/cache.c @@ -70,8 +70,10 @@ static inline void kmap_invalidate_coherent(struct page *page, kvaddr = TLBTEMP_BASE_1 + (page_to_phys(page) & DCACHE_ALIAS_MASK); + preempt_disable(); __invalidate_dcache_page_alias(kvaddr, page_to_phys(page)); + preempt_enable(); } } } @@ -156,6 +158,7 @@ void flush_dcache_page(struct page *page) if (!alias && !mapping) return; + preempt_disable(); virt = TLBTEMP_BASE_1 + (phys & DCACHE_ALIAS_MASK); __flush_invalidate_dcache_page_alias(virt, phys); @@ -166,6 +169,7 @@ void flush_dcache_page(struct page *page) if (mapping) __invalidate_icache_page_alias(virt, phys); + preempt_enable(); } /* There shouldn't be an entry in the cache for this page anymore. */ @@ -199,8 +203,10 @@ void local_flush_cache_page(struct vm_area_struct *vma, unsigned long address, unsigned long phys = page_to_phys(pfn_to_page(pfn)); unsigned long virt = TLBTEMP_BASE_1 + (address & DCACHE_ALIAS_MASK); + preempt_disable(); __flush_invalidate_dcache_page_alias(virt, phys); __invalidate_icache_page_alias(virt, phys); + preempt_enable(); } EXPORT_SYMBOL(local_flush_cache_page); @@ -227,11 +233,13 @@ update_mmu_cache(struct vm_area_struct * vma, unsigned long addr, pte_t *ptep) unsigned long phys = page_to_phys(page); unsigned long tmp; + preempt_disable(); tmp = TLBTEMP_BASE_1 + (phys & DCACHE_ALIAS_MASK); __flush_invalidate_dcache_page_alias(tmp, phys); tmp = TLBTEMP_BASE_1 + (addr & DCACHE_ALIAS_MASK); __flush_invalidate_dcache_page_alias(tmp, phys); __invalidate_icache_page_alias(tmp, phys); + preempt_enable(); clear_bit(PG_arch_1, &page->flags); } @@ -265,7 +273,9 @@ void copy_to_user_page(struct vm_area_struct *vma, struct page *page, if (alias) { unsigned long t = TLBTEMP_BASE_1 + (vaddr & DCACHE_ALIAS_MASK); + preempt_disable(); __flush_invalidate_dcache_page_alias(t, phys); + preempt_enable(); } /* Copy data */ @@ -280,9 +290,11 @@ void copy_to_user_page(struct vm_area_struct *vma, struct page *page, if (alias) { unsigned long t = TLBTEMP_BASE_1 + (vaddr & DCACHE_ALIAS_MASK); + preempt_disable(); __flush_invalidate_dcache_range((unsigned long) dst, len); if ((vma->vm_flags & VM_EXEC) != 0) __invalidate_icache_page_alias(t, phys); + preempt_enable(); } else if ((vma->vm_flags & VM_EXEC) != 0) { __flush_dcache_range((unsigned long)dst,len); @@ -304,7 +316,9 @@ extern void copy_from_user_page(struct vm_area_struct *vma, struct page *page, if (alias) { unsigned long t = TLBTEMP_BASE_1 + (vaddr & DCACHE_ALIAS_MASK); + preempt_disable(); __flush_invalidate_dcache_page_alias(t, phys); + preempt_enable(); } memcpy(dst, src, len); From c39de538a06e76d89b7e598a71e16688009cd56c Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 4 Nov 2020 16:21:26 +0300 Subject: [PATCH 543/710] cpuidle: tegra: Annotate tegra_pm_set_cpu_in_lp2() with RCU_NONIDLE Annotate tegra_pm_set[clear]_cpu_in_lp2() with RCU_NONIDLE in order to fix lockdep warning about suspicious RCU usage of a spinlock during late idling phase. WARNING: suspicious RCU usage ... include/trace/events/lock.h:13 suspicious rcu_dereference_check() usage! ... (dump_stack) from (lock_acquire) (lock_acquire) from (_raw_spin_lock) (_raw_spin_lock) from (tegra_pm_set_cpu_in_lp2) (tegra_pm_set_cpu_in_lp2) from (tegra_cpuidle_enter) (tegra_cpuidle_enter) from (cpuidle_enter_state) (cpuidle_enter_state) from (cpuidle_enter_state_coupled) (cpuidle_enter_state_coupled) from (cpuidle_enter) (cpuidle_enter) from (do_idle) ... Tested-by: Peter Geis Reported-by: Peter Geis Signed-off-by: Dmitry Osipenko Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/cpuidle-tegra.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpuidle/cpuidle-tegra.c b/drivers/cpuidle/cpuidle-tegra.c index e8956706a291..191966dc8d02 100644 --- a/drivers/cpuidle/cpuidle-tegra.c +++ b/drivers/cpuidle/cpuidle-tegra.c @@ -189,7 +189,7 @@ static int tegra_cpuidle_state_enter(struct cpuidle_device *dev, } local_fiq_disable(); - tegra_pm_set_cpu_in_lp2(); + RCU_NONIDLE(tegra_pm_set_cpu_in_lp2()); cpu_pm_enter(); switch (index) { @@ -207,7 +207,7 @@ static int tegra_cpuidle_state_enter(struct cpuidle_device *dev, } cpu_pm_exit(); - tegra_pm_clear_cpu_in_lp2(); + RCU_NONIDLE(tegra_pm_clear_cpu_in_lp2()); local_fiq_enable(); return err ?: index; From 8986f223bd777a73119f5d593c15b4d630ff49bb Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 16 Nov 2020 13:52:47 +0100 Subject: [PATCH 544/710] iommu/vt-d: Take CONFIG_PCI_ATS into account pci_dev::physfn is only available when CONFIG_PCI_ATS is set. The recent fix for the irqdomain rework missed that dependency which makes the build fail when CONFIG_PCI_ATS=n. Add the necessary #ifdeffery. Reported-by: Geert Uytterhoeven Fixes: ff828729be44 ("iommu/vt-d: Cure VF irqdomain hickup") Signed-off-by: Thomas Gleixner Cc: Joerg Roedel --- drivers/iommu/intel/dmar.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index b2e804473209..bc9f4cf72240 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -335,7 +335,9 @@ static void dmar_pci_bus_del_dev(struct dmar_pci_notify_info *info) static inline void vf_inherit_msi_domain(struct pci_dev *pdev) { +#ifdef CONFIG_PCI_ATS dev_set_msi_domain(&pdev->dev, dev_get_msi_domain(&pdev->physfn->dev)); +#endif } static int dmar_pci_bus_notifier(struct notifier_block *nb, From 7dc7a8b04f3da8aa3c3be514e155e2fa094e976f Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 10 Nov 2020 15:52:01 -0800 Subject: [PATCH 545/710] ACPI: fan: Initialize performance state sysfs attribute The following warning is reported if lock debugging is enabled. DEBUG_LOCKS_WARN_ON(1) WARNING: CPU: 1 PID: 1 at kernel/locking/lockdep.c:4617 lockdep_init_map_waits+0x141/0x222 ... Call Trace: __kernfs_create_file+0x7a/0xd8 sysfs_add_file_mode_ns+0x135/0x189 sysfs_create_file_ns+0x70/0xa0 acpi_fan_probe+0x547/0x621 platform_drv_probe+0x67/0x8b ... Dynamically allocated sysfs attributes need to be initialized to avoid the warning. Fixes: d19e470b6605 ("ACPI: fan: Expose fan performance state information") Signed-off-by: Guenter Roeck Cc: 5.6+ # 5.6+ Signed-off-by: Rafael J. Wysocki --- drivers/acpi/fan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/acpi/fan.c b/drivers/acpi/fan.c index 48354f82fba6..66c3983f0ccc 100644 --- a/drivers/acpi/fan.c +++ b/drivers/acpi/fan.c @@ -352,6 +352,7 @@ static int acpi_fan_get_fps(struct acpi_device *device) struct acpi_fan_fps *fps = &fan->fps[i]; snprintf(fps->name, ACPI_FPS_NAME_LEN, "state%d", i); + sysfs_attr_init(&fps->dev_attr.attr); fps->dev_attr.show = show_state; fps->dev_attr.store = NULL; fps->dev_attr.attr.name = fps->name; From d78359b25f7c6759a23189145be8141b6fdfe385 Mon Sep 17 00:00:00 2001 From: Kai Vehmanen Date: Mon, 16 Nov 2020 16:19:55 +0200 Subject: [PATCH 546/710] ALSA: hda: Add Alderlake-S PCI ID and HDMI codec vid Add HD Audio PCI ID and HDMI codec vendor ID for Intel Alder Lake. Signed-off-by: Kai Vehmanen Reviewed-by: Pierre-Louis Bossart Reviewed-by: Guennadi Liakhovetski Link: https://lore.kernel.org/r/20201116141955.2091240-1-kai.vehmanen@linux.intel.com Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_intel.c | 3 +++ sound/pci/hda/patch_hdmi.c | 1 + 2 files changed, 4 insertions(+) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index d539f52009a1..6852668f1bcb 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2506,6 +2506,9 @@ static const struct pci_device_id azx_ids[] = { /* DG1 */ { PCI_DEVICE(0x8086, 0x490d), .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE}, + /* Alderlake-S */ + { PCI_DEVICE(0x8086, 0x7ad0), + .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE}, /* Elkhart Lake */ { PCI_DEVICE(0x8086, 0x4b55), .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE}, diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index ccd1df059654..b0068f8ca46d 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -4274,6 +4274,7 @@ HDA_CODEC_ENTRY(0x8086280d, "Geminilake HDMI", patch_i915_glk_hdmi), HDA_CODEC_ENTRY(0x8086280f, "Icelake HDMI", patch_i915_icl_hdmi), HDA_CODEC_ENTRY(0x80862812, "Tigerlake HDMI", patch_i915_tgl_hdmi), HDA_CODEC_ENTRY(0x80862814, "DG1 HDMI", patch_i915_tgl_hdmi), +HDA_CODEC_ENTRY(0x80862815, "Alderlake HDMI", patch_i915_tgl_hdmi), HDA_CODEC_ENTRY(0x80862816, "Rocketlake HDMI", patch_i915_tgl_hdmi), HDA_CODEC_ENTRY(0x8086281a, "Jasperlake HDMI", patch_i915_icl_hdmi), HDA_CODEC_ENTRY(0x8086281b, "Elkhartlake HDMI", patch_i915_icl_hdmi), From 6f117cb854a44a79898d844e6ae3fd23bd94e786 Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Mon, 16 Nov 2020 16:23:47 +0100 Subject: [PATCH 547/710] s390/dasd: fix null pointer dereference for ERP requests When requeueing all requests on the device request queue to the blocklayer we might get to an ERP (error recovery) request that is a copy of an original CQR. Those requests do not have blocklayer request information or a pointer to the dasd_queue set. When trying to access those data it will lead to a null pointer dereference in dasd_requeue_all_requests(). Fix by checking if the request is an ERP request that can simply be ignored. The blocklayer request will be requeued by the original CQR that is on the device queue right behind the ERP request. Fixes: 9487cfd3430d ("s390/dasd: fix handling of internal requests") Cc: #4.16 Signed-off-by: Stefan Haberland Reviewed-by: Jan Hoeppner Signed-off-by: Jens Axboe --- drivers/s390/block/dasd.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index eb17fea8075c..217a7b84abdf 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -2980,6 +2980,12 @@ static int _dasd_requeue_request(struct dasd_ccw_req *cqr) if (!block) return -EINVAL; + /* + * If the request is an ERP request there is nothing to requeue. + * This will be done with the remaining original request. + */ + if (cqr->refers) + return 0; spin_lock_irq(&cqr->dq->lock); req = (struct request *) cqr->callback_data; blk_mq_requeue_request(req, false); From 1c756cd429d8f3da33d31f2a970284b9d5260534 Mon Sep 17 00:00:00 2001 From: Al Grant Date: Fri, 13 Nov 2020 20:38:26 +0000 Subject: [PATCH 548/710] perf inject: Fix file corruption due to event deletion "perf inject" can create corrupt files when synthesizing sample events from AUX data. This happens when in the input file, the first event (for the AUX data) has a different sample_type from the second event (generally dummy). Specifically, they differ in the bits that indicate the standard fields appended to perf records in the mmap buffer. "perf inject" deletes the first event and moves up the second event to first position. The problem is with the synthetic PERF_RECORD_MMAP (etc.) events created by "perf record". Since these are synthetic versions of events which are normally produced by the kernel, they have to have the standard fields appended as described by sample_type. "perf record" fills these in with zeroes, including the IDENTIFIER field; perf readers interpret records with zero IDENTIFIER using the descriptor for the first event in the file. Since "perf inject" changes the first event, these synthetic records are then processed with the wrong value of sample_type, and the perf reader reads bad data, reports on incorrect length records etc. Mismatching sample_types are seen with "perf record -e cs_etm//", where the AUX event has TID|TIME|CPU|IDENTIFIER and the dummy event has TID|TIME|IDENTIFIER. Perhaps they could be the same, but it isn't normally a problem if they aren't - perf has no problems reading the file. The sample_types have to agree on the position of IDENTIFIER, because that's how perf finds the right event descriptor in the first place, but they don't normally have to agree on other fields, and perf doesn't check that they do. The problem is specific to the way "perf inject" reorganizes the events and the way synthetic MMAP events are recorded with a zero identifier. A simple solution is to stop "perf inject" deleting the tracing event. Committer testing Removed the now unused 'evsel' variable, update the comment about the evsel removal not being performed anymore, and apply the patch manually as it failed with this warning: warning: Patch sent with format=flowed; space at the end of lines might be lost. Testing it with: $ perf bench internals inject-build-id # Running 'internals/inject-build-id' benchmark: Average build-id injection took: 8.543 msec (+- 0.130 msec) Average time per event: 0.838 usec (+- 0.013 usec) Average memory usage: 12717 KB (+- 9 KB) Average build-id-all injection took: 5.710 msec (+- 0.058 msec) Average time per event: 0.560 usec (+- 0.006 usec) Average memory usage: 12079 KB (+- 7 KB) $ Signed-off-by: Al Grant Acked-by: Adrian Hunter Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra LPU-Reference: b9cf5611-daae-2390-3439-6617f8f0a34b@foss.arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-inject.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 452a75fe68e5..0462dc8db2e3 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -779,25 +779,15 @@ static int __cmd_inject(struct perf_inject *inject) dsos__hit_all(session); /* * The AUX areas have been removed and replaced with - * synthesized hardware events, so clear the feature flag and - * remove the evsel. + * synthesized hardware events, so clear the feature flag. */ if (inject->itrace_synth_opts.set) { - struct evsel *evsel; - perf_header__clear_feat(&session->header, HEADER_AUXTRACE); if (inject->itrace_synth_opts.last_branch || inject->itrace_synth_opts.add_last_branch) perf_header__set_feat(&session->header, HEADER_BRANCH_STACK); - evsel = perf_evlist__id2evsel_strict(session->evlist, - inject->aux_id); - if (evsel) { - pr_debug("Deleting %s\n", evsel__name(evsel)); - evlist__remove(session->evlist, evsel); - evsel__delete(evsel); - } } session->header.data_offset = output_data_offset; session->header.data_size = inject->bytes_written; From 8326be9f1c0bb498baf134878a8deb8a952e0135 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 11 Nov 2020 15:23:46 -0700 Subject: [PATCH 549/710] dmaengine: idxd: fix mapping of portal size Portal size is 4k. Current code is mapping all 4 portals in a single chunk. Restrict the mapped portal size to a single portal to ensure that submission only goes to the intended portal address. Fixes: c52ca478233c ("dmaengine: idxd: add configuration component of driver") Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/160513342642.510187.16450549281618747065.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/device.c | 2 +- drivers/dma/idxd/registers.h | 2 +- drivers/dma/idxd/submit.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 506ac85c4a7f..663344987e3f 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -271,7 +271,7 @@ int idxd_wq_map_portal(struct idxd_wq *wq) resource_size_t start; start = pci_resource_start(pdev, IDXD_WQ_BAR); - start = start + wq->id * IDXD_PORTAL_SIZE; + start += idxd_get_wq_portal_full_offset(wq->id, IDXD_PORTAL_LIMITED); wq->dportal = devm_ioremap(dev, start, IDXD_PORTAL_SIZE); if (!wq->dportal) diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h index aef5a902829e..54390334c243 100644 --- a/drivers/dma/idxd/registers.h +++ b/drivers/dma/idxd/registers.h @@ -8,7 +8,7 @@ #define IDXD_MMIO_BAR 0 #define IDXD_WQ_BAR 2 -#define IDXD_PORTAL_SIZE 0x4000 +#define IDXD_PORTAL_SIZE PAGE_SIZE /* MMIO Device BAR0 Registers */ #define IDXD_VER_OFFSET 0x00 diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c index 156a1ee233aa..417048e3c42a 100644 --- a/drivers/dma/idxd/submit.c +++ b/drivers/dma/idxd/submit.c @@ -74,7 +74,7 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) if (idxd->state != IDXD_DEV_ENABLED) return -EIO; - portal = wq->dportal + idxd_get_wq_portal_offset(IDXD_PORTAL_UNLIMITED); + portal = wq->dportal; /* * The wmb() flushes writes to coherent DMA data before possibly * triggering a DMA read. The wmb() is necessary even on UP because From 568beb27959b0515d325ea1c6cf211eed2d66740 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 13 Nov 2020 10:20:53 -0800 Subject: [PATCH 550/710] perf test: Avoid an msan warning in a copied stack. This fix is for a failure that occurred in the DWARF unwind perf test. Stack unwinders may probe memory when looking for frames. Memory sanitizer will poison and track uninitialized memory on the stack, and on the heap if the value is copied to the heap. This can lead to false memory sanitizer failures for the use of an uninitialized value. Avoid this problem by removing the poison on the copied stack. The full msan failure with track origins looks like: ==2168==WARNING: MemorySanitizer: use-of-uninitialized-value #0 0x559ceb10755b in handle_cfi elfutils/libdwfl/frame_unwind.c:648:8 #1 0x559ceb105448 in __libdwfl_frame_unwind elfutils/libdwfl/frame_unwind.c:741:4 #2 0x559ceb0ece90 in dwfl_thread_getframes elfutils/libdwfl/dwfl_frame.c:435:7 #3 0x559ceb0ec6b7 in get_one_thread_frames_cb elfutils/libdwfl/dwfl_frame.c:379:10 #4 0x559ceb0ec6b7 in get_one_thread_cb elfutils/libdwfl/dwfl_frame.c:308:17 #5 0x559ceb0ec6b7 in dwfl_getthreads elfutils/libdwfl/dwfl_frame.c:283:17 #6 0x559ceb0ec6b7 in getthread elfutils/libdwfl/dwfl_frame.c:354:14 #7 0x559ceb0ec6b7 in dwfl_getthread_frames elfutils/libdwfl/dwfl_frame.c:388:10 #8 0x559ceaff6ae6 in unwind__get_entries tools/perf/util/unwind-libdw.c:236:8 #9 0x559ceabc9dbc in test_dwarf_unwind__thread tools/perf/tests/dwarf-unwind.c:111:8 #10 0x559ceabca5cf in test_dwarf_unwind__compare tools/perf/tests/dwarf-unwind.c:138:26 #11 0x7f812a6865b0 in bsearch (libc.so.6+0x4e5b0) #12 0x559ceabca871 in test_dwarf_unwind__krava_3 tools/perf/tests/dwarf-unwind.c:162:2 #13 0x559ceabca926 in test_dwarf_unwind__krava_2 tools/perf/tests/dwarf-unwind.c:169:9 #14 0x559ceabca946 in test_dwarf_unwind__krava_1 tools/perf/tests/dwarf-unwind.c:174:9 #15 0x559ceabcae12 in test__dwarf_unwind tools/perf/tests/dwarf-unwind.c:211:8 #16 0x559ceabbc4ab in run_test tools/perf/tests/builtin-test.c:418:9 #17 0x559ceabbc4ab in test_and_print tools/perf/tests/builtin-test.c:448:9 #18 0x559ceabbac70 in __cmd_test tools/perf/tests/builtin-test.c:669:4 #19 0x559ceabbac70 in cmd_test tools/perf/tests/builtin-test.c:815:9 #20 0x559cea960e30 in run_builtin tools/perf/perf.c:313:11 #21 0x559cea95fbce in handle_internal_command tools/perf/perf.c:365:8 #22 0x559cea95fbce in run_argv tools/perf/perf.c:409:2 #23 0x559cea95fbce in main tools/perf/perf.c:539:3 Uninitialized value was stored to memory at #0 0x559ceb106acf in __libdwfl_frame_reg_set elfutils/libdwfl/frame_unwind.c:77:22 #1 0x559ceb106acf in handle_cfi elfutils/libdwfl/frame_unwind.c:627:13 #2 0x559ceb105448 in __libdwfl_frame_unwind elfutils/libdwfl/frame_unwind.c:741:4 #3 0x559ceb0ece90 in dwfl_thread_getframes elfutils/libdwfl/dwfl_frame.c:435:7 #4 0x559ceb0ec6b7 in get_one_thread_frames_cb elfutils/libdwfl/dwfl_frame.c:379:10 #5 0x559ceb0ec6b7 in get_one_thread_cb elfutils/libdwfl/dwfl_frame.c:308:17 #6 0x559ceb0ec6b7 in dwfl_getthreads elfutils/libdwfl/dwfl_frame.c:283:17 #7 0x559ceb0ec6b7 in getthread elfutils/libdwfl/dwfl_frame.c:354:14 #8 0x559ceb0ec6b7 in dwfl_getthread_frames elfutils/libdwfl/dwfl_frame.c:388:10 #9 0x559ceaff6ae6 in unwind__get_entries tools/perf/util/unwind-libdw.c:236:8 #10 0x559ceabc9dbc in test_dwarf_unwind__thread tools/perf/tests/dwarf-unwind.c:111:8 #11 0x559ceabca5cf in test_dwarf_unwind__compare tools/perf/tests/dwarf-unwind.c:138:26 #12 0x7f812a6865b0 in bsearch (libc.so.6+0x4e5b0) #13 0x559ceabca871 in test_dwarf_unwind__krava_3 tools/perf/tests/dwarf-unwind.c:162:2 #14 0x559ceabca926 in test_dwarf_unwind__krava_2 tools/perf/tests/dwarf-unwind.c:169:9 #15 0x559ceabca946 in test_dwarf_unwind__krava_1 tools/perf/tests/dwarf-unwind.c:174:9 #16 0x559ceabcae12 in test__dwarf_unwind tools/perf/tests/dwarf-unwind.c:211:8 #17 0x559ceabbc4ab in run_test tools/perf/tests/builtin-test.c:418:9 #18 0x559ceabbc4ab in test_and_print tools/perf/tests/builtin-test.c:448:9 #19 0x559ceabbac70 in __cmd_test tools/perf/tests/builtin-test.c:669:4 #20 0x559ceabbac70 in cmd_test tools/perf/tests/builtin-test.c:815:9 #21 0x559cea960e30 in run_builtin tools/perf/perf.c:313:11 #22 0x559cea95fbce in handle_internal_command tools/perf/perf.c:365:8 #23 0x559cea95fbce in run_argv tools/perf/perf.c:409:2 #24 0x559cea95fbce in main tools/perf/perf.c:539:3 Uninitialized value was stored to memory at #0 0x559ceb106a54 in handle_cfi elfutils/libdwfl/frame_unwind.c:613:9 #1 0x559ceb105448 in __libdwfl_frame_unwind elfutils/libdwfl/frame_unwind.c:741:4 #2 0x559ceb0ece90 in dwfl_thread_getframes elfutils/libdwfl/dwfl_frame.c:435:7 #3 0x559ceb0ec6b7 in get_one_thread_frames_cb elfutils/libdwfl/dwfl_frame.c:379:10 #4 0x559ceb0ec6b7 in get_one_thread_cb elfutils/libdwfl/dwfl_frame.c:308:17 #5 0x559ceb0ec6b7 in dwfl_getthreads elfutils/libdwfl/dwfl_frame.c:283:17 #6 0x559ceb0ec6b7 in getthread elfutils/libdwfl/dwfl_frame.c:354:14 #7 0x559ceb0ec6b7 in dwfl_getthread_frames elfutils/libdwfl/dwfl_frame.c:388:10 #8 0x559ceaff6ae6 in unwind__get_entries tools/perf/util/unwind-libdw.c:236:8 #9 0x559ceabc9dbc in test_dwarf_unwind__thread tools/perf/tests/dwarf-unwind.c:111:8 #10 0x559ceabca5cf in test_dwarf_unwind__compare tools/perf/tests/dwarf-unwind.c:138:26 #11 0x7f812a6865b0 in bsearch (libc.so.6+0x4e5b0) #12 0x559ceabca871 in test_dwarf_unwind__krava_3 tools/perf/tests/dwarf-unwind.c:162:2 #13 0x559ceabca926 in test_dwarf_unwind__krava_2 tools/perf/tests/dwarf-unwind.c:169:9 #14 0x559ceabca946 in test_dwarf_unwind__krava_1 tools/perf/tests/dwarf-unwind.c:174:9 #15 0x559ceabcae12 in test__dwarf_unwind tools/perf/tests/dwarf-unwind.c:211:8 #16 0x559ceabbc4ab in run_test tools/perf/tests/builtin-test.c:418:9 #17 0x559ceabbc4ab in test_and_print tools/perf/tests/builtin-test.c:448:9 #18 0x559ceabbac70 in __cmd_test tools/perf/tests/builtin-test.c:669:4 #19 0x559ceabbac70 in cmd_test tools/perf/tests/builtin-test.c:815:9 #20 0x559cea960e30 in run_builtin tools/perf/perf.c:313:11 #21 0x559cea95fbce in handle_internal_command tools/perf/perf.c:365:8 #22 0x559cea95fbce in run_argv tools/perf/perf.c:409:2 #23 0x559cea95fbce in main tools/perf/perf.c:539:3 Uninitialized value was stored to memory at #0 0x559ceaff8800 in memory_read tools/perf/util/unwind-libdw.c:156:10 #1 0x559ceb10f053 in expr_eval elfutils/libdwfl/frame_unwind.c:501:13 #2 0x559ceb1060cc in handle_cfi elfutils/libdwfl/frame_unwind.c:603:18 #3 0x559ceb105448 in __libdwfl_frame_unwind elfutils/libdwfl/frame_unwind.c:741:4 #4 0x559ceb0ece90 in dwfl_thread_getframes elfutils/libdwfl/dwfl_frame.c:435:7 #5 0x559ceb0ec6b7 in get_one_thread_frames_cb elfutils/libdwfl/dwfl_frame.c:379:10 #6 0x559ceb0ec6b7 in get_one_thread_cb elfutils/libdwfl/dwfl_frame.c:308:17 #7 0x559ceb0ec6b7 in dwfl_getthreads elfutils/libdwfl/dwfl_frame.c:283:17 #8 0x559ceb0ec6b7 in getthread elfutils/libdwfl/dwfl_frame.c:354:14 #9 0x559ceb0ec6b7 in dwfl_getthread_frames elfutils/libdwfl/dwfl_frame.c:388:10 #10 0x559ceaff6ae6 in unwind__get_entries tools/perf/util/unwind-libdw.c:236:8 #11 0x559ceabc9dbc in test_dwarf_unwind__thread tools/perf/tests/dwarf-unwind.c:111:8 #12 0x559ceabca5cf in test_dwarf_unwind__compare tools/perf/tests/dwarf-unwind.c:138:26 #13 0x7f812a6865b0 in bsearch (libc.so.6+0x4e5b0) #14 0x559ceabca871 in test_dwarf_unwind__krava_3 tools/perf/tests/dwarf-unwind.c:162:2 #15 0x559ceabca926 in test_dwarf_unwind__krava_2 tools/perf/tests/dwarf-unwind.c:169:9 #16 0x559ceabca946 in test_dwarf_unwind__krava_1 tools/perf/tests/dwarf-unwind.c:174:9 #17 0x559ceabcae12 in test__dwarf_unwind tools/perf/tests/dwarf-unwind.c:211:8 #18 0x559ceabbc4ab in run_test tools/perf/tests/builtin-test.c:418:9 #19 0x559ceabbc4ab in test_and_print tools/perf/tests/builtin-test.c:448:9 #20 0x559ceabbac70 in __cmd_test tools/perf/tests/builtin-test.c:669:4 #21 0x559ceabbac70 in cmd_test tools/perf/tests/builtin-test.c:815:9 #22 0x559cea960e30 in run_builtin tools/perf/perf.c:313:11 #23 0x559cea95fbce in handle_internal_command tools/perf/perf.c:365:8 #24 0x559cea95fbce in run_argv tools/perf/perf.c:409:2 #25 0x559cea95fbce in main tools/perf/perf.c:539:3 Uninitialized value was stored to memory at #0 0x559cea9027d9 in __msan_memcpy llvm/llvm-project/compiler-rt/lib/msan/msan_interceptors.cpp:1558:3 #1 0x559cea9d2185 in sample_ustack tools/perf/arch/x86/tests/dwarf-unwind.c:41:2 #2 0x559cea9d202c in test__arch_unwind_sample tools/perf/arch/x86/tests/dwarf-unwind.c:72:9 #3 0x559ceabc9cbd in test_dwarf_unwind__thread tools/perf/tests/dwarf-unwind.c:106:6 #4 0x559ceabca5cf in test_dwarf_unwind__compare tools/perf/tests/dwarf-unwind.c:138:26 #5 0x7f812a6865b0 in bsearch (libc.so.6+0x4e5b0) #6 0x559ceabca871 in test_dwarf_unwind__krava_3 tools/perf/tests/dwarf-unwind.c:162:2 #7 0x559ceabca926 in test_dwarf_unwind__krava_2 tools/perf/tests/dwarf-unwind.c:169:9 #8 0x559ceabca946 in test_dwarf_unwind__krava_1 tools/perf/tests/dwarf-unwind.c:174:9 #9 0x559ceabcae12 in test__dwarf_unwind tools/perf/tests/dwarf-unwind.c:211:8 #10 0x559ceabbc4ab in run_test tools/perf/tests/builtin-test.c:418:9 #11 0x559ceabbc4ab in test_and_print tools/perf/tests/builtin-test.c:448:9 #12 0x559ceabbac70 in __cmd_test tools/perf/tests/builtin-test.c:669:4 #13 0x559ceabbac70 in cmd_test tools/perf/tests/builtin-test.c:815:9 #14 0x559cea960e30 in run_builtin tools/perf/perf.c:313:11 #15 0x559cea95fbce in handle_internal_command tools/perf/perf.c:365:8 #16 0x559cea95fbce in run_argv tools/perf/perf.c:409:2 #17 0x559cea95fbce in main tools/perf/perf.c:539:3 Uninitialized value was created by an allocation of 'bf' in the stack frame of function 'perf_event__synthesize_mmap_events' #0 0x559ceafc5f60 in perf_event__synthesize_mmap_events tools/perf/util/synthetic-events.c:445 SUMMARY: MemorySanitizer: use-of-uninitialized-value elfutils/libdwfl/frame_unwind.c:648:8 in handle_cfi Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: clang-built-linux@googlegroups.com Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Sandeep Dasgupta Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201113182053.754625-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/tests/dwarf-unwind.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/perf/arch/x86/tests/dwarf-unwind.c b/tools/perf/arch/x86/tests/dwarf-unwind.c index 4e40402a4f81..478078fb0f22 100644 --- a/tools/perf/arch/x86/tests/dwarf-unwind.c +++ b/tools/perf/arch/x86/tests/dwarf-unwind.c @@ -38,6 +38,13 @@ static int sample_ustack(struct perf_sample *sample, stack_size = stack_size > STACK_SIZE ? STACK_SIZE : stack_size; memcpy(buf, (void *) sp, stack_size); +#ifdef MEMORY_SANITIZER + /* + * Copying the stack may copy msan poison, avoid false positives in the + * unwinder by removing the poison here. + */ + __msan_unpoison(buf, stack_size); +#endif stack->data = (char *) buf; stack->size = stack_size; return 0; From 4e7d4f295dee1feed96b2b0a31d80d673b5465e8 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Fri, 13 Nov 2020 09:12:48 +0100 Subject: [PATCH 551/710] dmaengine: ioatdma: remove unused function missed during dma_v2 removal Commit 7f832645d0e5 ("dmaengine: ioatdma: remove ioatdma v2 registration") missed to remove dca2_tag_map_valid() during its removal. Hence, since then, dca2_tag_map_valid() is unused and make CC=clang W=1 warns: drivers/dma/ioat/dca.c:44:19: warning: unused function 'dca2_tag_map_valid' [-Wunused-function] So, remove this unused function and get rid of a -Wused-function warning. Signed-off-by: Lukas Bulwahn Reviewed-by: Nathan Chancellor Link: https://lore.kernel.org/r/20201113081248.26416-1-lukas.bulwahn@gmail.com Signed-off-by: Vinod Koul --- drivers/dma/ioat/dca.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/dma/ioat/dca.c b/drivers/dma/ioat/dca.c index 0be385587c4c..289c59ed74b9 100644 --- a/drivers/dma/ioat/dca.c +++ b/drivers/dma/ioat/dca.c @@ -40,16 +40,6 @@ #define DCA2_TAG_MAP_BYTE3 0x82 #define DCA2_TAG_MAP_BYTE4 0x82 -/* verify if tag map matches expected values */ -static inline int dca2_tag_map_valid(u8 *tag_map) -{ - return ((tag_map[0] == DCA2_TAG_MAP_BYTE0) && - (tag_map[1] == DCA2_TAG_MAP_BYTE1) && - (tag_map[2] == DCA2_TAG_MAP_BYTE2) && - (tag_map[3] == DCA2_TAG_MAP_BYTE3) && - (tag_map[4] == DCA2_TAG_MAP_BYTE4)); -} - /* * "Legacy" DCA systems do not implement the DCA register set in the * I/OAT device. Software needs direct support for their tag mappings. From e773ca7da8beeca7f17fe4c9d1284a2b66839cc1 Mon Sep 17 00:00:00 2001 From: Sugar Zhang Date: Sat, 14 Nov 2020 11:55:06 +0800 Subject: [PATCH 552/710] dmaengine: pl330: _prep_dma_memcpy: Fix wrong burst size Actually, burst size is equal to '1 << desc->rqcfg.brst_size'. we should use burst size, not desc->rqcfg.brst_size. dma memcpy performance on Rockchip RV1126 @ 1512MHz A7, 1056MHz LPDDR3, 200MHz DMA: dmatest: /# echo dma0chan0 > /sys/module/dmatest/parameters/channel /# echo 4194304 > /sys/module/dmatest/parameters/test_buf_size /# echo 8 > /sys/module/dmatest/parameters/iterations /# echo y > /sys/module/dmatest/parameters/norandom /# echo y > /sys/module/dmatest/parameters/verbose /# echo 1 > /sys/module/dmatest/parameters/run dmatest: dma0chan0-copy0: result #1: 'test passed' with src_off=0x0 dst_off=0x0 len=0x400000 dmatest: dma0chan0-copy0: result #2: 'test passed' with src_off=0x0 dst_off=0x0 len=0x400000 dmatest: dma0chan0-copy0: result #3: 'test passed' with src_off=0x0 dst_off=0x0 len=0x400000 dmatest: dma0chan0-copy0: result #4: 'test passed' with src_off=0x0 dst_off=0x0 len=0x400000 dmatest: dma0chan0-copy0: result #5: 'test passed' with src_off=0x0 dst_off=0x0 len=0x400000 dmatest: dma0chan0-copy0: result #6: 'test passed' with src_off=0x0 dst_off=0x0 len=0x400000 dmatest: dma0chan0-copy0: result #7: 'test passed' with src_off=0x0 dst_off=0x0 len=0x400000 dmatest: dma0chan0-copy0: result #8: 'test passed' with src_off=0x0 dst_off=0x0 len=0x400000 Before: dmatest: dma0chan0-copy0: summary 8 tests, 0 failures 48 iops 200338 KB/s (0) After this patch: dmatest: dma0chan0-copy0: summary 8 tests, 0 failures 179 iops 734873 KB/s (0) After this patch and increase dma clk to 400MHz: dmatest: dma0chan0-copy0: summary 8 tests, 0 failures 259 iops 1062929 KB/s (0) Signed-off-by: Sugar Zhang Link: https://lore.kernel.org/r/1605326106-55681-1-git-send-email-sugar.zhang@rock-chips.com Signed-off-by: Vinod Koul --- drivers/dma/pl330.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index e9f0101d92fa..0f5c19370f6d 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -2799,7 +2799,7 @@ pl330_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dst, * If burst size is smaller than bus width then make sure we only * transfer one at a time to avoid a burst stradling an MFIFO entry. */ - if (desc->rqcfg.brst_size * 8 < pl330->pcfg.data_bus_width) + if (burst * 8 < pl330->pcfg.data_bus_width) desc->rqcfg.brst_len = 1; desc->bytes_requested = len; From dd8088d5a8969dc2b42f71d7bc01c25c61a78066 Mon Sep 17 00:00:00 2001 From: Zhang Qilong Date: Tue, 10 Nov 2020 17:29:32 +0800 Subject: [PATCH 553/710] PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter In many case, we need to check return value of pm_runtime_get_sync, but it brings a trouble to the usage counter processing. Many callers forget to decrease the usage counter when it failed, which could resulted in reference leak. It has been discussed a lot[0][1]. So we add a function to deal with the usage counter for better coding. [0]https://lkml.org/lkml/2020/6/14/88 [1]https://patchwork.ozlabs.org/project/linux-tegra/list/?series=178139 Signed-off-by: Zhang Qilong Acked-by: Rafael J. Wysocki Signed-off-by: Jakub Kicinski --- include/linux/pm_runtime.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 4b708f4e8eed..b492ae00cc90 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -386,6 +386,27 @@ static inline int pm_runtime_get_sync(struct device *dev) return __pm_runtime_resume(dev, RPM_GET_PUT); } +/** + * pm_runtime_resume_and_get - Bump up usage counter of a device and resume it. + * @dev: Target device. + * + * Resume @dev synchronously and if that is successful, increment its runtime + * PM usage counter. Return 0 if the runtime PM usage counter of @dev has been + * incremented or a negative error code otherwise. + */ +static inline int pm_runtime_resume_and_get(struct device *dev) +{ + int ret; + + ret = __pm_runtime_resume(dev, RPM_GET_PUT); + if (ret < 0) { + pm_runtime_put_noidle(dev); + return ret; + } + + return 0; +} + /** * pm_runtime_put - Drop device usage counter and queue up "idle check" if 0. * @dev: Target device. From da875fa5040b0f951cb4bf7efbf59f6dcff44d3c Mon Sep 17 00:00:00 2001 From: Zhang Qilong Date: Tue, 10 Nov 2020 17:29:33 +0800 Subject: [PATCH 554/710] net: fec: Fix reference count leak in fec series ops pm_runtime_get_sync() will increment pm usage at first and it will resume the device later. If runtime of the device has error or device is in inaccessible state(or other error state), resume operation will fail. If we do not call put operation to decrease the reference, it will result in reference count leak. Moreover, this device cannot enter the idle state and always stay busy or other non-idle state later. So we fixed it by replacing it with pm_runtime_resume_and_get. Fixes: 8fff755e9f8d0 ("net: fec: Ensure clocks are enabled while using mdio bus") Signed-off-by: Zhang Qilong Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/fec_main.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index d7919555250d..04f24c66cf36 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1808,7 +1808,7 @@ static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum) int ret = 0, frame_start, frame_addr, frame_op; bool is_c45 = !!(regnum & MII_ADDR_C45); - ret = pm_runtime_get_sync(dev); + ret = pm_runtime_resume_and_get(dev); if (ret < 0) return ret; @@ -1867,11 +1867,9 @@ static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum, int ret, frame_start, frame_addr; bool is_c45 = !!(regnum & MII_ADDR_C45); - ret = pm_runtime_get_sync(dev); + ret = pm_runtime_resume_and_get(dev); if (ret < 0) return ret; - else - ret = 0; if (is_c45) { frame_start = FEC_MMFR_ST_C45; @@ -2275,7 +2273,7 @@ static void fec_enet_get_regs(struct net_device *ndev, u32 i, off; int ret; - ret = pm_runtime_get_sync(dev); + ret = pm_runtime_resume_and_get(dev); if (ret < 0) return; @@ -2976,7 +2974,7 @@ fec_enet_open(struct net_device *ndev) int ret; bool reset_again; - ret = pm_runtime_get_sync(&fep->pdev->dev); + ret = pm_runtime_resume_and_get(&fep->pdev->dev); if (ret < 0) return ret; @@ -3770,7 +3768,7 @@ fec_drv_remove(struct platform_device *pdev) struct device_node *np = pdev->dev.of_node; int ret; - ret = pm_runtime_get_sync(&pdev->dev); + ret = pm_runtime_resume_and_get(&pdev->dev); if (ret < 0) return ret; From 9d9e937b1c8be97b424e3e11938e183fcde905c0 Mon Sep 17 00:00:00 2001 From: Georg Kohmann Date: Wed, 11 Nov 2020 12:50:25 +0100 Subject: [PATCH 555/710] ipv6/netfilter: Discard first fragment not including all headers Packets are processed even though the first fragment don't include all headers through the upper layer header. This breaks TAHI IPv6 Core Conformance Test v6LC.1.3.6. Referring to RFC8200 SECTION 4.5: "If the first fragment does not include all headers through an Upper-Layer header, then that fragment should be discarded and an ICMP Parameter Problem, Code 3, message should be sent to the source of the fragment, with the Pointer field set to zero." The fragment needs to be validated the same way it is done in commit 2efdaaaf883a ("IPv6: reply ICMP error if the first fragment don't include all headers") for ipv6. Wrap the validation into a common function, ipv6_frag_thdr_truncated() to check for truncation in the upper layer header. This validation does not fullfill all aspects of RFC 8200, section 4.5, but is at the moment sufficient to pass mentioned TAHI test. In netfilter, utilize the fragment offset returned by find_prev_fhdr() to let ipv6_frag_thdr_truncated() start it's traverse from the fragment header. Return 0 to drop the fragment in the netfilter. This is the same behaviour as used on other protocol errors in this function, e.g. when nf_ct_frag6_queue() returns -EPROTO. The Fragment will later be picked up by ipv6_frag_rcv() in reassembly.c. ipv6_frag_rcv() will then send an appropriate ICMP Parameter Problem message back to the source. References commit 2efdaaaf883a ("IPv6: reply ICMP error if the first fragment don't include all headers") Signed-off-by: Georg Kohmann Acked-by: Pablo Neira Ayuso Link: https://lore.kernel.org/r/20201111115025.28879-1-geokohma@cisco.com Signed-off-by: Jakub Kicinski --- include/net/ipv6.h | 2 + net/ipv6/netfilter/nf_conntrack_reasm.c | 9 ++++ net/ipv6/reassembly.c | 55 ++++++++++++++++--------- 3 files changed, 46 insertions(+), 20 deletions(-) diff --git a/include/net/ipv6.h b/include/net/ipv6.h index bd1f396cc9c7..637cc6dd12b7 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -1064,6 +1064,8 @@ int ipv6_skip_exthdr(const struct sk_buff *, int start, u8 *nexthdrp, bool ipv6_ext_hdr(u8 nexthdr); +bool ipv6_frag_thdr_truncated(struct sk_buff *skb, int start, u8 *nexthdrp); + enum { IP6_FH_F_FRAG = (1 << 0), IP6_FH_F_AUTH = (1 << 1), diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 054d287eb13d..b9cc0b330dbe 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -440,6 +440,7 @@ find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff) int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) { u16 savethdr = skb->transport_header; + u8 nexthdr = NEXTHDR_FRAGMENT; int fhoff, nhoff, ret; struct frag_hdr *fhdr; struct frag_queue *fq; @@ -455,6 +456,14 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) if (find_prev_fhdr(skb, &prevhdr, &nhoff, &fhoff) < 0) return 0; + /* Discard the first fragment if it does not include all headers + * RFC 8200, Section 4.5 + */ + if (ipv6_frag_thdr_truncated(skb, fhoff, &nexthdr)) { + pr_debug("Drop incomplete fragment\n"); + return 0; + } + if (!pskb_may_pull(skb, fhoff + sizeof(*fhdr))) return -ENOMEM; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index c8cf1bbad74a..e3869bac9c88 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -318,15 +318,43 @@ out_fail: return -1; } +/* Check if the upper layer header is truncated in the first fragment. */ +bool ipv6_frag_thdr_truncated(struct sk_buff *skb, int start, u8 *nexthdrp) +{ + u8 nexthdr = *nexthdrp; + __be16 frag_off; + int offset; + + offset = ipv6_skip_exthdr(skb, start, &nexthdr, &frag_off); + if (offset < 0 || (frag_off & htons(IP6_OFFSET))) + return false; + switch (nexthdr) { + case NEXTHDR_TCP: + offset += sizeof(struct tcphdr); + break; + case NEXTHDR_UDP: + offset += sizeof(struct udphdr); + break; + case NEXTHDR_ICMP: + offset += sizeof(struct icmp6hdr); + break; + default: + offset += 1; + } + if (offset > skb->len) + return true; + return false; +} +EXPORT_SYMBOL(ipv6_frag_thdr_truncated); + static int ipv6_frag_rcv(struct sk_buff *skb) { struct frag_hdr *fhdr; struct frag_queue *fq; const struct ipv6hdr *hdr = ipv6_hdr(skb); struct net *net = dev_net(skb_dst(skb)->dev); - __be16 frag_off; - int iif, offset; u8 nexthdr; + int iif; if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED) goto fail_hdr; @@ -362,24 +390,11 @@ static int ipv6_frag_rcv(struct sk_buff *skb) * the source of the fragment, with the Pointer field set to zero. */ nexthdr = hdr->nexthdr; - offset = ipv6_skip_exthdr(skb, skb_transport_offset(skb), &nexthdr, &frag_off); - if (offset >= 0) { - /* Check some common protocols' header */ - if (nexthdr == IPPROTO_TCP) - offset += sizeof(struct tcphdr); - else if (nexthdr == IPPROTO_UDP) - offset += sizeof(struct udphdr); - else if (nexthdr == IPPROTO_ICMPV6) - offset += sizeof(struct icmp6hdr); - else - offset += 1; - - if (!(frag_off & htons(IP6_OFFSET)) && offset > skb->len) { - __IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev), - IPSTATS_MIB_INHDRERRORS); - icmpv6_param_prob(skb, ICMPV6_HDR_INCOMP, 0); - return -1; - } + if (ipv6_frag_thdr_truncated(skb, skb_transport_offset(skb), &nexthdr)) { + __IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev), + IPSTATS_MIB_INHDRERRORS); + icmpv6_param_prob(skb, ICMPV6_HDR_INCOMP, 0); + return -1; } iif = skb->dev ? skb->dev->ifindex : 0; From 857524564eae8aefc3006a3d35139bb69ca53210 Mon Sep 17 00:00:00 2001 From: Xie He Date: Sat, 14 Nov 2020 03:10:29 -0800 Subject: [PATCH 556/710] MAINTAINERS: Add Martin Schiller as a maintainer for the X.25 stack Martin Schiller is an active developer and reviewer for the X.25 code. His company is providing products based on the Linux X.25 stack. So he is a good candidate for maintainers of the X.25 code. The original maintainer of the X.25 network layer (Andrew Hendry) has not sent any email to the netdev mail list since 2013. So he is probably inactive now. Cc: Andrew Hendry Signed-off-by: Xie He Acked-by: Martin Schiller Link: https://lore.kernel.org/r/20201114111029.326972-1-xie.he.0141@gmail.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 4a34b25ecc1f..e0144f396462 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9843,13 +9843,6 @@ S: Maintained F: arch/mips/lantiq F: drivers/soc/lantiq -LAPB module -L: linux-x25@vger.kernel.org -S: Orphan -F: Documentation/networking/lapb-module.rst -F: include/*/lapb.h -F: net/lapb/ - LASI 53c700 driver for PARISC M: "James E.J. Bottomley" L: linux-scsi@vger.kernel.org @@ -18998,12 +18991,18 @@ L: linux-kernel@vger.kernel.org S: Maintained N: axp[128] -X.25 NETWORK LAYER -M: Andrew Hendry +X.25 STACK +M: Martin Schiller L: linux-x25@vger.kernel.org -S: Odd Fixes +S: Maintained +F: Documentation/networking/lapb-module.rst F: Documentation/networking/x25* +F: drivers/net/wan/hdlc_x25.c +F: drivers/net/wan/lapbether.c +F: include/*/lapb.h F: include/net/x25* +F: include/uapi/linux/x25.h +F: net/lapb/ F: net/x25/ X86 ARCHITECTURE (32-BIT AND 64-BIT) From 4fba15fbb8106e8db17f486d653484e64969eb87 Mon Sep 17 00:00:00 2001 From: Aili Yao Date: Tue, 10 Nov 2020 00:33:34 -0800 Subject: [PATCH 557/710] ACPI, APEI, Fix error return value in apei_map_generic_address() From commit 6915564dc5a8 ("ACPI: OSL: Change the type of acpi_os_map_generic_address() return value"), acpi_os_map_generic_address() will return logical address or NULL for error, but for ACPI_ADR_SPACE_SYSTEM_IO case, it should be also return 0 as it's a normal case, but now it will return -ENXIO. So check it out for such case to avoid einj module initialization fail. Fixes: 6915564dc5a8 ("ACPI: OSL: Change the type of acpi_os_map_generic_address() return value") Cc: Reviewed-by: James Morse Tested-by: Tony Luck Signed-off-by: Aili Yao Signed-off-by: Rafael J. Wysocki --- drivers/acpi/apei/apei-base.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/acpi/apei/apei-base.c b/drivers/acpi/apei/apei-base.c index 552fd9ffaca4..3294cc8dc073 100644 --- a/drivers/acpi/apei/apei-base.c +++ b/drivers/acpi/apei/apei-base.c @@ -633,6 +633,10 @@ int apei_map_generic_address(struct acpi_generic_address *reg) if (rc) return rc; + /* IO space doesn't need mapping */ + if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_IO) + return 0; + if (!acpi_os_map_generic_address(reg)) return -ENXIO; From 728321e53045d2668bf2b8627a8d61bc2c480d3b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 13 Nov 2020 02:21:19 -0500 Subject: [PATCH 558/710] drm/amd/display: Add missing pflip irq for dcn2.0 If we have more than 4 displays we will run into dummy irq calls or flip timout issues. Reviewed-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.c b/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.c index 2a1fea501f8c..3f1e7a196a23 100644 --- a/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.c +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.c @@ -299,8 +299,8 @@ irq_source_info_dcn20[DAL_IRQ_SOURCES_NUMBER] = { pflip_int_entry(1), pflip_int_entry(2), pflip_int_entry(3), - [DC_IRQ_SOURCE_PFLIP5] = dummy_irq_entry(), - [DC_IRQ_SOURCE_PFLIP6] = dummy_irq_entry(), + pflip_int_entry(4), + pflip_int_entry(5), [DC_IRQ_SOURCE_PFLIP_UNDERLAY0] = dummy_irq_entry(), gpio_pad_int_entry(0), gpio_pad_int_entry(1), From 1bd7b0fc0165694897b7d2fb39751a07b98f6bf1 Mon Sep 17 00:00:00 2001 From: Michael Sit Wei Hong Date: Mon, 16 Nov 2020 14:19:01 +0800 Subject: [PATCH 559/710] ASoC: Intel: KMB: Fix S24_LE configuration S24_LE is 24 bit audio in 32 bit container configuration Fixing the configuration to match the data arrangement of this audio format. Fixes: c5477e966728 ("ASoC: Intel: Add KeemBay platform driver") Signed-off-by: Michael Sit Wei Hong Reviewed-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20201116061905.32431-2-michael.wei.hong.sit@intel.com Signed-off-by: Mark Brown --- sound/soc/intel/keembay/kmb_platform.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/soc/intel/keembay/kmb_platform.c b/sound/soc/intel/keembay/kmb_platform.c index f54b710ee1c2..291a686568c2 100644 --- a/sound/soc/intel/keembay/kmb_platform.c +++ b/sound/soc/intel/keembay/kmb_platform.c @@ -487,9 +487,9 @@ static int kmb_dai_hw_params(struct snd_pcm_substream *substream, kmb_i2s->xfer_resolution = 0x02; break; case SNDRV_PCM_FORMAT_S24_LE: - config->data_width = 24; - kmb_i2s->ccr = 0x08; - kmb_i2s->xfer_resolution = 0x04; + config->data_width = 32; + kmb_i2s->ccr = 0x14; + kmb_i2s->xfer_resolution = 0x05; break; case SNDRV_PCM_FORMAT_S32_LE: config->data_width = 32; From bd6327fda2f3ded85b69b3c3125c99aaa51c7881 Mon Sep 17 00:00:00 2001 From: Srinivasa Rao Mandadapu Date: Sun, 15 Nov 2020 10:26:50 +0530 Subject: [PATCH 560/710] ASoC: qcom: lpass-platform: Fix memory leak lpass_pcm_data is not freed in error paths. Free it in error paths to avoid memory leak. Fixes: 022d00ee0b55 ("ASoC: lpass-platform: Fix broken pcm data usage") Signed-off-by: Pavel Machek Signed-off-by: V Sujith Kumar Reddy Signed-off-by: Srinivasa Rao Mandadapu Link: https://lore.kernel.org/r/1605416210-14530-1-git-send-email-srivasam@codeaurora.org Signed-off-by: Mark Brown --- sound/soc/qcom/lpass-platform.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sound/soc/qcom/lpass-platform.c b/sound/soc/qcom/lpass-platform.c index 36d1512ffd1f..7a3fdf89968a 100644 --- a/sound/soc/qcom/lpass-platform.c +++ b/sound/soc/qcom/lpass-platform.c @@ -122,8 +122,10 @@ static int lpass_platform_pcmops_open(struct snd_soc_component *component, else dma_ch = 0; - if (dma_ch < 0) + if (dma_ch < 0) { + kfree(data); return dma_ch; + } if (cpu_dai->driver->id == LPASS_DP_RX) { map = drvdata->hdmiif_map; @@ -147,6 +149,7 @@ static int lpass_platform_pcmops_open(struct snd_soc_component *component, ret = snd_pcm_hw_constraint_integer(runtime, SNDRV_PCM_HW_PARAM_PERIODS); if (ret < 0) { + kfree(data); dev_err(soc_runtime->dev, "setting constraints failed: %d\n", ret); return -EINVAL; From ac9978fcad3c5abc43cdd225441ce9459c36e16b Mon Sep 17 00:00:00 2001 From: Zhihao Cheng Date: Mon, 16 Nov 2020 22:18:36 +0800 Subject: [PATCH 561/710] spi: cadence-quadspi: Fix error return code in cqspi_probe Fix to return the error code from devm_reset_control_get_optional_exclusive() instaed of 0 in cqspi_probe(). Fixes: 31fb632b5d43ca ("spi: Move cadence-quadspi driver to drivers/spi/") Reported-by: Hulk Robot Signed-off-by: Zhihao Cheng Reviewed-by: Philipp Zabel Link: https://lore.kernel.org/r/20201116141836.2970579-1-chengzhihao1@huawei.com Signed-off-by: Mark Brown --- drivers/spi/spi-cadence-quadspi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c index 40938cf3806d..ba7d40c2922f 100644 --- a/drivers/spi/spi-cadence-quadspi.c +++ b/drivers/spi/spi-cadence-quadspi.c @@ -1260,12 +1260,14 @@ static int cqspi_probe(struct platform_device *pdev) /* Obtain QSPI reset control */ rstc = devm_reset_control_get_optional_exclusive(dev, "qspi"); if (IS_ERR(rstc)) { + ret = PTR_ERR(rstc); dev_err(dev, "Cannot get QSPI reset.\n"); goto probe_reset_failed; } rstc_ocp = devm_reset_control_get_optional_exclusive(dev, "qspi-ocp"); if (IS_ERR(rstc_ocp)) { + ret = PTR_ERR(rstc_ocp); dev_err(dev, "Cannot get QSPI OCP reset.\n"); goto probe_reset_failed; } From aa9e3fa4992d83acb7311fc86d11d0d53e7ffb8e Mon Sep 17 00:00:00 2001 From: Cezary Rojewski Date: Mon, 16 Nov 2020 14:33:28 +0100 Subject: [PATCH 562/710] ASoC: Intel: catpt: Skip position update for unprepared streams Playing with very low period sizes may lead to timeouts when awaiting RESET_STREAM reply for offload streams. This is caused by NOTIFY_POSITION appearing in the middle of trigger(stop). Stream is unprepared during trigger(stop) where PAUSE_STREAM IPC gets invoked. However, all data that is already mixed in DSP firmware's mixer stream will still be played regardless of the pause. For offload streams, this means possibility for another NOTIFY_POSITION to process. Keep these notifications in check by only handling them when stream is in prepared state. Fixes: a126750fc865 ("ASoC: Intel: catpt: PCM operations") Signed-off-by: Cezary Rojewski Link: https://lore.kernel.org/r/20201116133332.8530-2-cezary.rojewski@intel.com Signed-off-by: Mark Brown --- sound/soc/intel/catpt/pcm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/intel/catpt/pcm.c b/sound/soc/intel/catpt/pcm.c index f78018c857b8..174a786cd776 100644 --- a/sound/soc/intel/catpt/pcm.c +++ b/sound/soc/intel/catpt/pcm.c @@ -534,6 +534,8 @@ void catpt_stream_update_position(struct catpt_dev *cdev, dsppos = bytes_to_frames(r, pos->stream_position); + if (!stream->prepared) + goto exit; /* only offload is set_write_pos driven */ if (stream->template->type != CATPT_STRM_TYPE_RENDER) goto exit; From 1072460a1aabacf6ececda98acd3b5ecaad23fd2 Mon Sep 17 00:00:00 2001 From: Cezary Rojewski Date: Mon, 16 Nov 2020 14:33:29 +0100 Subject: [PATCH 563/710] ASoC: Intel: catpt: Correct clock selection for dai trigger During stream start DSP firmware requires LPCS disabled as that moment in time is resource heavy. Currently high-clock is selected on start of second stream onwards while low-clock is re-selected before stream actually leaves RESUME state i.e. PAUSE_STREAM call. Fix this by always updating clock before RESUME_STREAM and directly after PAUSE_STREAM. Fixes: a126750fc865 ("ASoC: Intel: catpt: PCM operations") Signed-off-by: Cezary Rojewski Link: https://lore.kernel.org/r/20201116133332.8530-3-cezary.rojewski@intel.com Signed-off-by: Mark Brown --- sound/soc/intel/catpt/pcm.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/sound/soc/intel/catpt/pcm.c b/sound/soc/intel/catpt/pcm.c index 174a786cd776..2061978b6cb9 100644 --- a/sound/soc/intel/catpt/pcm.c +++ b/sound/soc/intel/catpt/pcm.c @@ -458,10 +458,6 @@ static int catpt_dai_prepare(struct snd_pcm_substream *substream, if (ret) return CATPT_IPC_ERROR(ret); - ret = catpt_dsp_update_lpclock(cdev); - if (ret) - return ret; - ret = catpt_dai_apply_usettings(dai, stream); if (ret) return ret; @@ -500,6 +496,7 @@ static int catpt_dai_trigger(struct snd_pcm_substream *substream, int cmd, case SNDRV_PCM_TRIGGER_RESUME: case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: resume_stream: + catpt_dsp_update_lpclock(cdev); ret = catpt_ipc_resume_stream(cdev, stream->info.stream_hw_id); if (ret) return CATPT_IPC_ERROR(ret); @@ -507,11 +504,11 @@ static int catpt_dai_trigger(struct snd_pcm_substream *substream, int cmd, case SNDRV_PCM_TRIGGER_STOP: stream->prepared = false; - catpt_dsp_update_lpclock(cdev); fallthrough; case SNDRV_PCM_TRIGGER_SUSPEND: case SNDRV_PCM_TRIGGER_PAUSE_PUSH: ret = catpt_ipc_pause_stream(cdev, stream->info.stream_hw_id); + catpt_dsp_update_lpclock(cdev); if (ret) return CATPT_IPC_ERROR(ret); break; From 0abed7c69b956d135cb6d320c350b2adb213e7d8 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 16 Nov 2020 13:36:24 -0700 Subject: [PATCH 564/710] mm: never attempt async page lock if we've transferred data already We catch the case where we enter generic_file_buffered_read() with data already transferred, but we also need to be careful not to allow an async page lock if we're looping transferring data. If not, we could be returning -EIOCBQUEUED instead of the transferred amount, and it could result in double waitqueue additions as well. Cc: stable@vger.kernel.org # v5.9 Fixes: 1a0a7853b901 ("mm: support async buffered reads in generic_file_buffered_read()") Signed-off-by: Jens Axboe --- mm/filemap.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index d5e7c2029d16..3ebbe64a0106 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2347,10 +2347,15 @@ page_ok: page_not_up_to_date: /* Get exclusive access to the page ... */ - if (iocb->ki_flags & IOCB_WAITQ) + if (iocb->ki_flags & IOCB_WAITQ) { + if (written) { + put_page(page); + goto out; + } error = lock_page_async(page, iocb->ki_waitq); - else + } else { error = lock_page_killable(page); + } if (unlikely(error)) goto readpage_error; @@ -2393,10 +2398,15 @@ readpage: } if (!PageUptodate(page)) { - if (iocb->ki_flags & IOCB_WAITQ) + if (iocb->ki_flags & IOCB_WAITQ) { + if (written) { + put_page(page); + goto out; + } error = lock_page_async(page, iocb->ki_waitq); - else + } else { error = lock_page_killable(page); + } if (unlikely(error)) goto readpage_error; From 2acc3c1bc8e98bc66b1badec42e9ea205b4fcdaa Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Mon, 16 Nov 2020 18:16:33 +0800 Subject: [PATCH 565/710] selftests/bpf: Fix error return code in run_getsockopt_test() Fix to return a negative error code from the error handling case instead of 0, as done elsewhere in this function. Fixes: 65b4414a05eb ("selftests/bpf: add sockopt test that exercises BPF_F_ALLOW_MULTI") Reported-by: Hulk Robot Signed-off-by: Wang Hai Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20201116101633.64627-1-wanghai38@huawei.com --- tools/testing/selftests/bpf/prog_tests/sockopt_multi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c b/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c index 29188d6f5c8d..51fac975b316 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c +++ b/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c @@ -138,7 +138,8 @@ static int run_getsockopt_test(struct bpf_object *obj, int cg_parent, */ buf = 0x40; - if (setsockopt(sock_fd, SOL_IP, IP_TOS, &buf, 1) < 0) { + err = setsockopt(sock_fd, SOL_IP, IP_TOS, &buf, 1); + if (err < 0) { log_err("Failed to call setsockopt(IP_TOS)"); goto detach; } From 2a1828e378c1b5ba1ff283ed8f8c5cc37bb391dc Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sun, 15 Nov 2020 17:57:57 +0100 Subject: [PATCH 566/710] net: lantiq: Wait for the GPHY firmware to be ready A user reports (slightly shortened from the original message): libphy: lantiq,xrx200-mdio: probed mdio_bus 1e108000.switch-mii: MDIO device at address 17 is missing. gswip 1e108000.switch lan: no phy at 2 gswip 1e108000.switch lan: failed to connect to port 2: -19 lantiq,xrx200-net 1e10b308.eth eth0: error -19 setting up slave phy This is a single-port board using the internal Fast Ethernet PHY. The user reports that switching to PHY scanning instead of configuring the PHY within device-tree works around this issue. The documentation for the standalone variant of the PHY11G (which is probably very similar to what is used inside the xRX200 SoCs but having the firmware burnt onto that standalone chip in the factory) states that the PHY needs 300ms to be ready for MDIO communication after releasing the reset. Add a 300ms delay after initializing all GPHYs to ensure that the GPHY firmware had enough time to initialize and to appear on the MDIO bus. Unfortunately there is no (known) documentation on what the minimum time to wait after releasing the reset on an internal PHY so play safe and take the one for the external variant. Only wait after the last GPHY firmware is loaded to not slow down the initialization too much ( xRX200 has two GPHYs but newer SoCs have at least three GPHYs). Fixes: 14fceff4771e51 ("net: dsa: Add Lantiq / Intel DSA driver for vrx200") Reviewed-by: Andrew Lunn Signed-off-by: Martin Blumenstingl Acked-by: Hauke Mehrtens Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20201115165757.552641-1-martin.blumenstingl@googlemail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/lantiq_gswip.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq_gswip.c index 74db81dafee3..09701c17f3f6 100644 --- a/drivers/net/dsa/lantiq_gswip.c +++ b/drivers/net/dsa/lantiq_gswip.c @@ -26,6 +26,7 @@ */ #include +#include #include #include #include @@ -1837,6 +1838,16 @@ static int gswip_gphy_fw_list(struct gswip_priv *priv, i++; } + /* The standalone PHY11G requires 300ms to be fully + * initialized and ready for any MDIO communication after being + * taken out of reset. For the SoC-internal GPHY variant there + * is no (known) documentation for the minimum time after a + * reset. Use the same value as for the standalone variant as + * some users have reported internal PHYs not being detected + * without any delay. + */ + msleep(300); + return 0; remove_gphy: From c1609f0e2882095408708a80693e298a90f17904 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 16 Nov 2020 10:10:52 -0500 Subject: [PATCH 567/710] drm/amdgpu: remove experimental flag from arcturus This has been stable for a while. Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 42d9748921f5..8e988f07f085 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -1055,10 +1055,10 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x15dd, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU}, {0x1002, 0x15d8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU}, /* Arcturus */ - {0x1002, 0x738C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS|AMD_EXP_HW_SUPPORT}, - {0x1002, 0x7388, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS|AMD_EXP_HW_SUPPORT}, - {0x1002, 0x738E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS|AMD_EXP_HW_SUPPORT}, - {0x1002, 0x7390, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS|AMD_EXP_HW_SUPPORT}, + {0x1002, 0x738C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS}, + {0x1002, 0x7388, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS}, + {0x1002, 0x738E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS}, + {0x1002, 0x7390, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS}, /* Navi10 */ {0x1002, 0x7310, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10}, {0x1002, 0x7312, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10}, From 794e442ca39e6c8d46003c430559bdb67a73690c Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Mon, 16 Nov 2020 16:13:22 +0530 Subject: [PATCH 568/710] MAINTAINERS: update cxgb4 and cxgb3 maintainer Update cxgb4 and cxgb3 driver maintainer Signed-off-by: Raju Rangoju Link: https://lore.kernel.org/r/20201116104322.3959-1-rajur@chelsio.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index e0144f396462..8cb1aae96bdf 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4710,7 +4710,7 @@ T: git git://linuxtv.org/anttip/media_tree.git F: drivers/media/dvb-frontends/cxd2820r* CXGB3 ETHERNET DRIVER (CXGB3) -M: Vishal Kulkarni +M: Raju Rangoju L: netdev@vger.kernel.org S: Supported W: http://www.chelsio.com @@ -4742,7 +4742,7 @@ W: http://www.chelsio.com F: drivers/net/ethernet/chelsio/inline_crypto/ CXGB4 ETHERNET DRIVER (CXGB4) -M: Vishal Kulkarni +M: Raju Rangoju L: netdev@vger.kernel.org S: Supported W: http://www.chelsio.com @@ -4764,7 +4764,7 @@ F: drivers/infiniband/hw/cxgb4/ F: include/uapi/rdma/cxgb4-abi.h CXGB4VF ETHERNET DRIVER (CXGB4VF) -M: Vishal Kulkarni +M: Raju Rangoju L: netdev@vger.kernel.org S: Supported W: http://www.chelsio.com From 85a12d7eb8fe449cf38f1aa9ead5ca744729a98f Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 11 Nov 2020 09:09:36 -0500 Subject: [PATCH 569/710] drm/i915/tgl: Fix Media power gate sequence. Some media power gates are disabled by default. commit 5d86923060fc ("drm/i915/tgl: Enable VD HCP/MFX sub-pipe power gating") tried to enable it, but it duplicated an existent register. So, the main PG setup sequences ended up overwriting it. So, let's now merge this to the main PG setup sequence. v2: (Chris): s/BIT/REG_BIT, remove useless comment, remove useless =0, use the right gt, remove rc6 sequence doubt from commit message. Fixes: 5d86923060fc ("drm/i915/tgl: Enable VD HCP/MFX sub-pipe power gating") Cc: Lucas De Marchi Cc: stable@vger.kernel.org#v5.5+ Cc: Dale B Stimson Signed-off-by: Rodrigo Vivi Cc: Chris Wilson Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20201111072859.1186070-1-rodrigo.vivi@intel.com (cherry picked from commit 695dc55b573985569259e18f8e6261a77924342b) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_rc6.c | 22 +++++++++++++++++----- drivers/gpu/drm/i915/i915_reg.h | 12 +++++------- drivers/gpu/drm/i915/intel_pm.c | 13 ------------- 3 files changed, 22 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index ab675d35030d..d7b8e4457fc2 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -56,9 +56,12 @@ static inline void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val) static void gen11_rc6_enable(struct intel_rc6 *rc6) { - struct intel_uncore *uncore = rc6_to_uncore(rc6); + struct intel_gt *gt = rc6_to_gt(rc6); + struct intel_uncore *uncore = gt->uncore; struct intel_engine_cs *engine; enum intel_engine_id id; + u32 pg_enable; + int i; /* 2b: Program RC6 thresholds.*/ set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85); @@ -102,10 +105,19 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6) GEN6_RC_CTL_RC6_ENABLE | GEN6_RC_CTL_EI_MODE(1); - set(uncore, GEN9_PG_ENABLE, - GEN9_RENDER_PG_ENABLE | - GEN9_MEDIA_PG_ENABLE | - GEN11_MEDIA_SAMPLER_PG_ENABLE); + pg_enable = + GEN9_RENDER_PG_ENABLE | + GEN9_MEDIA_PG_ENABLE | + GEN11_MEDIA_SAMPLER_PG_ENABLE; + + if (INTEL_GEN(gt->i915) >= 12) { + for (i = 0; i < I915_MAX_VCS; i++) + if (HAS_ENGINE(gt, _VCS(i))) + pg_enable |= (VDN_HCP_POWERGATE_ENABLE(i) | + VDN_MFX_POWERGATE_ENABLE(i)); + } + + set(uncore, GEN9_PG_ENABLE, pg_enable); } static void gen9_rc6_enable(struct intel_rc6 *rc6) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index d805d4da6181..664f3bf9af03 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -8971,10 +8971,6 @@ enum { #define GEN9_PWRGT_MEDIA_STATUS_MASK (1 << 0) #define GEN9_PWRGT_RENDER_STATUS_MASK (1 << 1) -#define POWERGATE_ENABLE _MMIO(0xa210) -#define VDN_HCP_POWERGATE_ENABLE(n) BIT(((n) * 2) + 3) -#define VDN_MFX_POWERGATE_ENABLE(n) BIT(((n) * 2) + 4) - #define GTFIFODBG _MMIO(0x120000) #define GT_FIFO_SBDEDICATE_FREE_ENTRY_CHV (0x1f << 20) #define GT_FIFO_FREE_ENTRIES_CHV (0x7f << 13) @@ -9114,9 +9110,11 @@ enum { #define GEN9_MEDIA_PG_IDLE_HYSTERESIS _MMIO(0xA0C4) #define GEN9_RENDER_PG_IDLE_HYSTERESIS _MMIO(0xA0C8) #define GEN9_PG_ENABLE _MMIO(0xA210) -#define GEN9_RENDER_PG_ENABLE REG_BIT(0) -#define GEN9_MEDIA_PG_ENABLE REG_BIT(1) -#define GEN11_MEDIA_SAMPLER_PG_ENABLE REG_BIT(2) +#define GEN9_RENDER_PG_ENABLE REG_BIT(0) +#define GEN9_MEDIA_PG_ENABLE REG_BIT(1) +#define GEN11_MEDIA_SAMPLER_PG_ENABLE REG_BIT(2) +#define VDN_HCP_POWERGATE_ENABLE(n) REG_BIT(3 + 2 * (n)) +#define VDN_MFX_POWERGATE_ENABLE(n) REG_BIT(4 + 2 * (n)) #define GEN8_PUSHBUS_CONTROL _MMIO(0xA248) #define GEN8_PUSHBUS_ENABLE _MMIO(0xA250) #define GEN8_PUSHBUS_SHIFT _MMIO(0xA25C) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 34e0d22d456b..cfb806767fc5 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -7118,23 +7118,10 @@ static void icl_init_clock_gating(struct drm_i915_private *dev_priv) static void tgl_init_clock_gating(struct drm_i915_private *dev_priv) { - u32 vd_pg_enable = 0; - unsigned int i; - /* Wa_1409120013:tgl */ I915_WRITE(ILK_DPFC_CHICKEN, ILK_DPFC_CHICKEN_COMP_DUMMY_PIXEL); - /* This is not a WA. Enable VD HCP & MFX_ENC powergate */ - for (i = 0; i < I915_MAX_VCS; i++) { - if (HAS_ENGINE(&dev_priv->gt, _VCS(i))) - vd_pg_enable |= VDN_HCP_POWERGATE_ENABLE(i) | - VDN_MFX_POWERGATE_ENABLE(i); - } - - I915_WRITE(POWERGATE_ENABLE, - I915_READ(POWERGATE_ENABLE) | vd_pg_enable); - /* Wa_1409825376:tgl (pre-prod)*/ if (IS_TGL_DISP_REVID(dev_priv, TGL_REVID_A0, TGL_REVID_B1)) I915_WRITE(GEN9_CLKGATE_DIS_3, I915_READ(GEN9_CLKGATE_DIS_3) | From 973dd87fa56ac943ce1060fd07244d7652115164 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 13 Nov 2020 13:25:10 +0000 Subject: [PATCH 570/710] drm/i915: Avoid memory leak with more than 16 workarounds on a list I forgot to free the old list when growing past 16 entries. Luckily, as much as I checked, none of the current platforms has more than 16 workarounds on a single list. Signed-off-by: Tvrtko Ursulin Fixes: 452420d22d5b ("drm/i915: Fuse per-context workaround handling with the common framework") Reported-by: Chris Wilson Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20201113132510.2298483-1-tvrtko.ursulin@linux.intel.com (cherry picked from commit 77c296966e866a795742a46fc52a218771894867) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 6c580d0d9ea8..4a3bde7c9f21 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -131,8 +131,10 @@ static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa) return; } - if (wal->list) + if (wal->list) { memcpy(list, wal->list, sizeof(*wa) * wal->count); + kfree(wal->list); + } wal->list = list; } From 2106edbdfd15e37afa6c5225421b8036bf0e38ec Mon Sep 17 00:00:00 2001 From: Zhang Xiaoxu Date: Mon, 16 Nov 2020 09:41:12 -0500 Subject: [PATCH 571/710] drm/i915/selftests: Fix wrong return value of perf_series_engines() If intel context create failed, the perf_series_engines() will return 0 rather than error, because we doesn't initialize the return value. Fixes: cbfd3a0c5a55 ("drm/i915/selftests: Add request throughput measurement to perf") Reported-by: Hulk Robot Signed-off-by: Zhang Xiaoxu Reviewed-by: Mika Kuoppala Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20201116144112.3673011-1-zhangxiaoxu5@huawei.com (cherry picked from commit 01d708840c26c9532579677eaca942363a009fd5) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/selftests/i915_request.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index 64bbb8288249..480b3da4d8a6 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -2467,8 +2467,10 @@ static int perf_series_engines(void *arg) struct intel_context *ce; ce = intel_context_create(engine); - if (IS_ERR(ce)) + if (IS_ERR(ce)) { + err = PTR_ERR(ce); goto out; + } err = intel_context_pin(ce); if (err) { From b5462cc377748181af2b05729c69f5faecec3717 Mon Sep 17 00:00:00 2001 From: Zhang Xiaoxu Date: Mon, 16 Nov 2020 09:35:40 -0500 Subject: [PATCH 572/710] drm/i915/selftests: Fix wrong return value of perf_request_latency() If intel context create failed, the perf_request_latency() will return 0 rather than error, because we doesn't initialize the return value. Fixes: 25c26f18ea79 ("drm/i915/selftests: Measure dispatch latency") Reported-by: Hulk Robot Signed-off-by: Zhang Xiaoxu Reviewed-by: Mika Kuoppala Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20201116143540.3648870-1-zhangxiaoxu5@huawei.com (cherry picked from commit 19384452052a1e0525e663bfbdd62ac1399bb647) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/selftests/i915_request.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index 480b3da4d8a6..e424a6d1a68c 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -2293,8 +2293,10 @@ static int perf_request_latency(void *arg) struct intel_context *ce; ce = intel_context_create(engine); - if (IS_ERR(ce)) + if (IS_ERR(ce)) { + err = PTR_ERR(ce); goto out; + } err = intel_context_pin(ce); if (err) { From cfbaa8b33e022aca62a3f2815ffbc02874d4cb8b Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Fri, 13 Nov 2020 14:07:07 +0800 Subject: [PATCH 573/710] cx82310_eth: fix error return code in cx82310_bind() Fix to return a negative error code from the error handling case instead of 0, as done elsewhere in this function. Fixes: ca139d76b0d9 ("cx82310_eth: re-enable ethernet mode after router reboot") Reported-by: Hulk Robot Signed-off-by: Zhang Changzhong Link: https://lore.kernel.org/r/1605247627-15385-1-git-send-email-zhangchangzhong@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/cx82310_eth.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/cx82310_eth.c b/drivers/net/usb/cx82310_eth.c index ca89d8258dd3..c4568a491dc4 100644 --- a/drivers/net/usb/cx82310_eth.c +++ b/drivers/net/usb/cx82310_eth.c @@ -197,7 +197,8 @@ static int cx82310_bind(struct usbnet *dev, struct usb_interface *intf) } /* enable ethernet mode (?) */ - if (cx82310_enable_ethernet(dev)) + ret = cx82310_enable_ethernet(dev); + if (ret) goto err; /* get the MAC address */ From 3beb9be165083c2964eba1923601c3bfac0b02d4 Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Fri, 13 Nov 2020 14:16:26 +0800 Subject: [PATCH 574/710] qlcnic: fix error return code in qlcnic_83xx_restart_hw() Fix to return a negative error code from the error handling case instead of 0, as done elsewhere in this function. Fixes: 3ced0a88cd4c ("qlcnic: Add support to run firmware POST") Reported-by: Hulk Robot Signed-off-by: Zhang Changzhong Link: https://lore.kernel.org/r/1605248186-16013-1-git-send-email-zhangchangzhong@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c index b8af59fc1aa4..d2c190732d3e 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c @@ -2231,7 +2231,8 @@ static int qlcnic_83xx_restart_hw(struct qlcnic_adapter *adapter) /* Boot either flash image or firmware image from host file system */ if (qlcnic_load_fw_file == 1) { - if (qlcnic_83xx_load_fw_image_from_host(adapter)) + err = qlcnic_83xx_load_fw_image_from_host(adapter); + if (err) return err; } else { QLC_SHARED_REG_WR32(adapter, QLCNIC_FW_IMG_VALID, From 661710bfd5039267f911e42675ab743760b6449d Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Fri, 13 Nov 2020 14:34:03 +0800 Subject: [PATCH 575/710] net: stmmac: dwmac-intel-plat: fix error return code in intel_eth_plat_probe() Fix to return a negative error code from the error handling case instead of 0, as done elsewhere in this function. Fixes: 9efc9b2b04c7 ("net: stmmac: Add dwmac-intel-plat for GBE driver") Reported-by: Hulk Robot Signed-off-by: Zhang Changzhong Link: https://lore.kernel.org/r/1605249243-17262-1-git-send-email-zhangchangzhong@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c index f61cb997a8f6..82b1c7a5a7a9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c @@ -113,8 +113,10 @@ static int intel_eth_plat_probe(struct platform_device *pdev) /* Enable TX clock */ if (dwmac->data->tx_clk_en) { dwmac->tx_clk = devm_clk_get(&pdev->dev, "tx_clk"); - if (IS_ERR(dwmac->tx_clk)) + if (IS_ERR(dwmac->tx_clk)) { + ret = PTR_ERR(dwmac->tx_clk); goto err_remove_config_dt; + } clk_prepare_enable(dwmac->tx_clk); From 35f735c665114840dcd3142f41148d07870f51f7 Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Fri, 13 Nov 2020 14:49:33 +0800 Subject: [PATCH 576/710] net: ethernet: ti: cpsw: fix error return code in cpsw_probe() Fix to return a negative error code from the error handling case instead of 0, as done elsewhere in this function. Fixes: 83a8471ba255 ("net: ethernet: ti: cpsw: refactor probe to group common hw initialization") Reported-by: Hulk Robot Signed-off-by: Zhang Changzhong Link: https://lore.kernel.org/r/1605250173-18438-1-git-send-email-zhangchangzhong@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/cpsw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index fa2d1025cbb2..b0f00b4edd94 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -1634,6 +1634,7 @@ static int cpsw_probe(struct platform_device *pdev) CPSW_MAX_QUEUES, CPSW_MAX_QUEUES); if (!ndev) { dev_err(dev, "error allocating net_device\n"); + ret = -ENOMEM; goto clean_cpts; } From 7a30ecc9237681bb125cbd30eee92bef7e86293d Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 13 Nov 2020 10:27:27 +0100 Subject: [PATCH 577/710] net: bridge: add missing counters to ndo_get_stats64 callback In br_forward.c and br_input.c fields dev->stats.tx_dropped and dev->stats.multicast are populated, but they are ignored in ndo_get_stats64. Fixes: 28172739f0a2 ("net: fix 64 bit counters on 32 bit arches") Signed-off-by: Heiner Kallweit Link: https://lore.kernel.org/r/58ea9963-77ad-a7cf-8dfd-fc95ab95f606@gmail.com Signed-off-by: Jakub Kicinski --- net/bridge/br_device.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 6f742fee874a..7730c8f3cb53 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -207,6 +207,7 @@ static void br_get_stats64(struct net_device *dev, { struct net_bridge *br = netdev_priv(dev); + netdev_stats_to_stats64(stats, &dev->stats); dev_fetch_sw_netstats(stats, br->stats); } From 8e5debed39017836a850c6c7bfacc93299d19bad Mon Sep 17 00:00:00 2001 From: Wong Vee Khee Date: Sun, 15 Nov 2020 15:42:10 +0800 Subject: [PATCH 578/710] net: stmmac: Use rtnl_lock/unlock on netif_set_real_num_rx_queues() call Fix an issue where dump stack is printed on suspend resume flow due to netif_set_real_num_rx_queues() is not called with rtnl_lock held(). Fixes: 686cff3d7022 ("net: stmmac: Fix incorrect location to set real_num_rx|tx_queues") Reported-by: Christophe ROULLIER Tested-by: Christophe ROULLIER Cc: Alexandre TORGUE Reviewed-by: Ong Boon Leong Signed-off-by: Wong Vee Khee Link: https://lore.kernel.org/r/20201115074210.23605-1-vee.khee.wong@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index d833908b660a..ba45fe237512 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -5247,6 +5247,7 @@ int stmmac_resume(struct device *dev) return ret; } + rtnl_lock(); mutex_lock(&priv->lock); stmmac_reset_queues_param(priv); @@ -5262,6 +5263,7 @@ int stmmac_resume(struct device *dev) stmmac_enable_all_queues(priv); mutex_unlock(&priv->lock); + rtnl_unlock(); if (!device_may_wakeup(priv->device) || !priv->plat->pmt) { rtnl_lock(); From 9c79a8ab5f124db01eb1d7287454a702f0d4252f Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 13 Nov 2020 19:16:57 +0100 Subject: [PATCH 579/710] net: mvneta: fix possible memory leak in mvneta_swbm_add_rx_fragment Recycle the page running page_pool_put_full_page() in mvneta_swbm_add_rx_fragment routine when the last descriptor contains just the FCS or if the received packet contains more than MAX_SKB_FRAGS fragments Fixes: ca0e014609f0 ("net: mvneta: move skb build after descriptors processing") Signed-off-by: Lorenzo Bianconi Link: https://lore.kernel.org/r/df6a2bad70323ee58d3901491ada31c1ca2a40b9.1605291228.git.lorenzo@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/mvneta.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 54b0bf574c05..4a9041ee1b39 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -2287,6 +2287,7 @@ mvneta_swbm_add_rx_fragment(struct mvneta_port *pp, dma_sync_single_for_cpu(dev->dev.parent, rx_desc->buf_phys_addr, len, dma_dir); + rx_desc->buf_phys_addr = 0; if (data_len > 0 && sinfo->nr_frags < MAX_SKB_FRAGS) { skb_frag_t *frag = &sinfo->frags[sinfo->nr_frags]; @@ -2295,8 +2296,8 @@ mvneta_swbm_add_rx_fragment(struct mvneta_port *pp, skb_frag_size_set(frag, data_len); __skb_frag_set_page(frag, page); sinfo->nr_frags++; - - rx_desc->buf_phys_addr = 0; + } else { + page_pool_put_full_page(rxq->page_pool, page, true); } *size -= len; } From fc70f5bf5e525dde81565f0a30d5e39168062eba Mon Sep 17 00:00:00 2001 From: Subash Abhinov Kasiviswanathan Date: Fri, 13 Nov 2020 13:12:05 -0700 Subject: [PATCH 580/710] net: qualcomm: rmnet: Fix incorrect receive packet handling during cleanup During rmnet unregistration, the real device rx_handler is first cleared followed by the removal of rx_handler_data after the rcu synchronization. Any packets in the receive path may observe that the rx_handler is NULL. However, there is no check when dereferencing this value to use the rmnet_port information. This fixes following splat by adding the NULL check. Unable to handle kernel NULL pointer dereference at virtual address 000000000000000d pc : rmnet_rx_handler+0x124/0x284 lr : rmnet_rx_handler+0x124/0x284 rmnet_rx_handler+0x124/0x284 __netif_receive_skb_core+0x758/0xd74 __netif_receive_skb+0x50/0x17c process_backlog+0x15c/0x1b8 napi_poll+0x88/0x284 net_rx_action+0xbc/0x23c __do_softirq+0x20c/0x48c Fixes: ceed73a2cf4a ("drivers: net: ethernet: qualcomm: rmnet: Initial implementation") Signed-off-by: Sean Tranchetti Signed-off-by: Subash Abhinov Kasiviswanathan Link: https://lore.kernel.org/r/1605298325-3705-1-git-send-email-subashab@codeaurora.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c index 29a7bfa2584d..3d7d3ab383f8 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c @@ -188,6 +188,11 @@ rx_handler_result_t rmnet_rx_handler(struct sk_buff **pskb) dev = skb->dev; port = rmnet_get_port_rcu(dev); + if (unlikely(!port)) { + atomic_long_inc(&skb->dev->rx_nohandler); + kfree_skb(skb); + goto done; + } switch (port->rmnet_mode) { case RMNET_EPMODE_VND: From 3fe16edf6767decd640fa2654308bc64f8d656dc Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Sun, 15 Nov 2020 07:16:00 +0300 Subject: [PATCH 581/710] net/tls: fix corrupted data in recvmsg If tcp socket has more data than Encrypted Handshake Message then tls_sw_recvmsg will try to decrypt next record instead of returning full control message to userspace as mentioned in comment. The next message - usually Application Data - gets corrupted because it uses zero copy for decryption that's why the data is not stored in skb for next iteration. Revert check to not decrypt next record if current is not Application Data. Fixes: 692d7b5d1f91 ("tls: Fix recvmsg() to be able to peek across multiple records") Signed-off-by: Vadim Fedorenko Link: https://lore.kernel.org/r/1605413760-21153-1-git-send-email-vfedorenko@novek.ru Signed-off-by: Jakub Kicinski --- net/tls/tls_sw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 95ab5545a931..2fe9e2cf8659 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1913,7 +1913,7 @@ pick_next_record: * another message type */ msg->msg_flags |= MSG_EOR; - if (ctx->control != TLS_RECORD_TYPE_DATA) + if (control != TLS_RECORD_TYPE_DATA) goto recv_end; } else { break; From 064c9c32b17ca9b36f95eba32ee790dbbebd9a5f Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Sat, 14 Nov 2020 12:20:17 -0600 Subject: [PATCH 582/710] net: ipa: lock when freeing transaction Transactions sit on one of several lists, depending on their state (allocated, pending, complete, or polled). A spinlock protects against concurrent access when transactions are moved between these lists. Transactions are also reference counted. A newly-allocated transaction has an initial count of 1; a transaction is released in gsi_trans_free() only if its decremented reference count reaches 0. Releasing a transaction includes removing it from the polled (or if unused, allocated) list, so the spinlock is acquired when we release a transaction. The reference count is used to allow a caller to synchronously wait for a committed transaction to complete. In this case, the waiter takes an extra reference to the transaction *before* committing it (so it won't be freed), and releases its reference (calls gsi_trans_free()) when it is done with it. Similarly, gsi_channel_update() takes an extra reference to ensure a transaction isn't released before the function is done operating on it. Until the transaction is moved to the completed list (by this function) it won't be freed, so this reference is taken "safely." But in the quiesce path, we want to wait for the "last" transaction, which we find in the completed or polled list. Transactions on these lists can be freed at any time, so we (try to) prevent that by taking the reference while holding the spinlock. Currently gsi_trans_free() decrements a transaction's reference count unconditionally, acquiring the lock to remove the transaction from its list *only* when the count reaches 0. This does not protect the quiesce path, which depends on the lock to ensure its extra reference prevents release of the transaction. Fix this by only dropping the last reference to a transaction in gsi_trans_free() while holding the spinlock. Fixes: 9dd441e4ed575 ("soc: qcom: ipa: GSI transactions") Reported-by: Stephen Boyd Signed-off-by: Alex Elder Link: https://lore.kernel.org/r/20201114182017.28270-1-elder@linaro.org Signed-off-by: Jakub Kicinski --- drivers/net/ipa/gsi_trans.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/net/ipa/gsi_trans.c b/drivers/net/ipa/gsi_trans.c index 92642030e735..e8599bb948c0 100644 --- a/drivers/net/ipa/gsi_trans.c +++ b/drivers/net/ipa/gsi_trans.c @@ -362,22 +362,31 @@ struct gsi_trans *gsi_channel_trans_alloc(struct gsi *gsi, u32 channel_id, return trans; } -/* Free a previously-allocated transaction (used only in case of error) */ +/* Free a previously-allocated transaction */ void gsi_trans_free(struct gsi_trans *trans) { + refcount_t *refcount = &trans->refcount; struct gsi_trans_info *trans_info; + bool last; - if (!refcount_dec_and_test(&trans->refcount)) + /* We must hold the lock to release the last reference */ + if (refcount_dec_not_one(refcount)) return; trans_info = &trans->gsi->channel[trans->channel_id].trans_info; spin_lock_bh(&trans_info->spinlock); - list_del(&trans->links); + /* Reference might have been added before we got the lock */ + last = refcount_dec_and_test(refcount); + if (last) + list_del(&trans->links); spin_unlock_bh(&trans_info->spinlock); + if (!last) + return; + ipa_gsi_trans_release(trans); /* Releasing the reserved TREs implicitly frees the sgl[] and From 4260330b32b14330cfe427d568ac5f5b29b5be3d Mon Sep 17 00:00:00 2001 From: Edwin Peer Date: Sun, 15 Nov 2020 19:27:49 -0500 Subject: [PATCH 583/710] bnxt_en: read EEPROM A2h address using page 0 The module eeprom address range returned by bnxt_get_module_eeprom() should be 256 bytes of A0h address space, the lower half of the A2h address space, and page 0 for the upper half of the A2h address space. Fix the firmware call by passing page_number 0 for the A2h slave address space. Fixes: 42ee18fe4ca2 ("bnxt_en: Add Support for ETHTOOL_GMODULEINFO and ETHTOOL_GMODULEEEPRO") Signed-off-by: Edwin Peer Signed-off-by: Michael Chan Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 53687bc7fcf5..f133ea5674cb 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -2997,7 +2997,7 @@ static int bnxt_get_module_eeprom(struct net_device *dev, /* Read A2 portion of the EEPROM */ if (length) { start -= ETH_MODULE_SFF_8436_LEN; - rc = bnxt_read_sfp_module_eeprom_info(bp, I2C_DEV_ADDR_A2, 1, + rc = bnxt_read_sfp_module_eeprom_info(bp, I2C_DEV_ADDR_A2, 0, start, length, data); } return rc; From eba93de6d31c1734dee59909020a162de612e41e Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Sun, 15 Nov 2020 19:27:50 -0500 Subject: [PATCH 584/710] bnxt_en: Free port stats during firmware reset. Firmware is unable to retain the port counters during any kind of fatal or non-fatal resets, so we must clear the port counters to avoid false detection of port counter overflow. Fixes: fea6b3335527 ("bnxt_en: Accumulate all counters.") Reviewed-by: Edwin Peer Reviewed-by: Vasundhara Volam Signed-off-by: Michael Chan Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 7975f59735d6..448e1ba762ee 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4099,7 +4099,8 @@ static void bnxt_free_mem(struct bnxt *bp, bool irq_re_init) bnxt_free_ntp_fltrs(bp, irq_re_init); if (irq_re_init) { bnxt_free_ring_stats(bp); - if (!(bp->fw_cap & BNXT_FW_CAP_PORT_STATS_NO_RESET)) + if (!(bp->fw_cap & BNXT_FW_CAP_PORT_STATS_NO_RESET) || + test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) bnxt_free_port_stats(bp); bnxt_free_ring_grps(bp); bnxt_free_vnics(bp); From fa97f303fa4cf8469fd3d1ef29da69c0a3f6ddc8 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Sun, 15 Nov 2020 19:27:51 -0500 Subject: [PATCH 585/710] bnxt_en: Fix counter overflow logic. bnxt_add_one_ctr() adds a hardware counter to a software counter and adjusts for the hardware counter wraparound against the mask. The logic assumes that the hardware counter is always smaller than or equal to the mask. This assumption is mostly correct. But in some cases if the firmware is older and does not provide the accurate mask, the driver can use a mask that is smaller than the actual hardware mask. This can cause some extra carry bits to be added to the software counter, resulting in counters that far exceed the actual value. Fix it by masking the hardware counter with the mask passed into bnxt_add_one_ctr(). Fixes: fea6b3335527 ("bnxt_en: Accumulate all counters.") Reviewed-by: Vasundhara Volam Reviewed-by: Pavan Chebbi Reviewed-by: Edwin Peer Signed-off-by: Michael Chan Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 448e1ba762ee..7c21aaa8b9af 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -7758,6 +7758,7 @@ static void bnxt_add_one_ctr(u64 hw, u64 *sw, u64 mask) { u64 sw_tmp; + hw &= mask; sw_tmp = (*sw & ~mask) | hw; if (hw < (*sw & mask)) sw_tmp += mask + 1; From 0ae0a779efb8840a0cdb2d6bd9a5d07663ac3ee2 Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Sun, 15 Nov 2020 19:27:52 -0500 Subject: [PATCH 586/710] bnxt_en: Avoid unnecessary NVM_GET_DEV_INFO cmd error log on VFs. VFs do not have access permissions to issue NVM_GET_DEV_INFO firmware command. Fixes: 4933f6753b50 ("bnxt_en: Add bnxt_hwrm_nvm_get_dev_info() to query NVM info.") Signed-off-by: Vasundhara Volam Signed-off-by: Michael Chan Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index f133ea5674cb..1471c9a36238 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -2079,6 +2079,9 @@ int bnxt_hwrm_nvm_get_dev_info(struct bnxt *bp, struct hwrm_nvm_get_dev_info_input req = {0}; int rc; + if (BNXT_VF(bp)) + return -EOPNOTSUPP; + bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_NVM_GET_DEV_INFO, -1, -1); mutex_lock(&bp->hwrm_cmd_lock); rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); From fe0a8a95e7134d0b44cd407bc0085b9ba8d8fe31 Mon Sep 17 00:00:00 2001 From: Lee Duncan Date: Fri, 6 Nov 2020 11:33:17 -0800 Subject: [PATCH 587/710] scsi: libiscsi: Fix NOP race condition iSCSI NOPs are sometimes "lost", mistakenly sent to the user-land iscsid daemon instead of handled in the kernel, as they should be, resulting in a message from the daemon like: iscsid: Got nop in, but kernel supports nop handling. This can occur because of the new forward- and back-locks, and the fact that an iSCSI NOP response can occur before processing of the NOP send is complete. This can result in "conn->ping_task" being NULL in iscsi_nop_out_rsp(), when the pointer is actually in the process of being set. To work around this, we add a new state to the "ping_task" pointer. In addition to NULL (not assigned) and a pointer (assigned), we add the state "being set", which is signaled with an INVALID pointer (using "-1"). Link: https://lore.kernel.org/r/20201106193317.16993-1-leeman.duncan@gmail.com Reviewed-by: Mike Christie Signed-off-by: Lee Duncan Signed-off-by: Martin K. Petersen --- drivers/scsi/libiscsi.c | 23 +++++++++++++++-------- include/scsi/libiscsi.h | 3 +++ 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index 1e9c3171fa9f..f9314f1393fb 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -533,8 +533,8 @@ static void iscsi_complete_task(struct iscsi_task *task, int state) if (conn->task == task) conn->task = NULL; - if (conn->ping_task == task) - conn->ping_task = NULL; + if (READ_ONCE(conn->ping_task) == task) + WRITE_ONCE(conn->ping_task, NULL); /* release get from queueing */ __iscsi_put_task(task); @@ -738,6 +738,9 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr, task->conn->session->age); } + if (unlikely(READ_ONCE(conn->ping_task) == INVALID_SCSI_TASK)) + WRITE_ONCE(conn->ping_task, task); + if (!ihost->workq) { if (iscsi_prep_mgmt_task(conn, task)) goto free_task; @@ -941,8 +944,11 @@ static int iscsi_send_nopout(struct iscsi_conn *conn, struct iscsi_nopin *rhdr) struct iscsi_nopout hdr; struct iscsi_task *task; - if (!rhdr && conn->ping_task) - return -EINVAL; + if (!rhdr) { + if (READ_ONCE(conn->ping_task)) + return -EINVAL; + WRITE_ONCE(conn->ping_task, INVALID_SCSI_TASK); + } memset(&hdr, 0, sizeof(struct iscsi_nopout)); hdr.opcode = ISCSI_OP_NOOP_OUT | ISCSI_OP_IMMEDIATE; @@ -957,11 +963,12 @@ static int iscsi_send_nopout(struct iscsi_conn *conn, struct iscsi_nopin *rhdr) task = __iscsi_conn_send_pdu(conn, (struct iscsi_hdr *)&hdr, NULL, 0); if (!task) { + if (!rhdr) + WRITE_ONCE(conn->ping_task, NULL); iscsi_conn_printk(KERN_ERR, conn, "Could not send nopout\n"); return -EIO; } else if (!rhdr) { /* only track our nops */ - conn->ping_task = task; conn->last_ping = jiffies; } @@ -984,7 +991,7 @@ static int iscsi_nop_out_rsp(struct iscsi_task *task, struct iscsi_conn *conn = task->conn; int rc = 0; - if (conn->ping_task != task) { + if (READ_ONCE(conn->ping_task) != task) { /* * If this is not in response to one of our * nops then it must be from userspace. @@ -1923,7 +1930,7 @@ static void iscsi_start_tx(struct iscsi_conn *conn) */ static int iscsi_has_ping_timed_out(struct iscsi_conn *conn) { - if (conn->ping_task && + if (READ_ONCE(conn->ping_task) && time_before_eq(conn->last_recv + (conn->recv_timeout * HZ) + (conn->ping_timeout * HZ), jiffies)) return 1; @@ -2058,7 +2065,7 @@ enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc) * Checking the transport already or nop from a cmd timeout still * running */ - if (conn->ping_task) { + if (READ_ONCE(conn->ping_task)) { task->have_checked_conn = true; rc = BLK_EH_RESET_TIMER; goto done; diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h index c25fb86ffae9..b3bbd10eb3f0 100644 --- a/include/scsi/libiscsi.h +++ b/include/scsi/libiscsi.h @@ -132,6 +132,9 @@ struct iscsi_task { void *dd_data; /* driver/transport data */ }; +/* invalid scsi_task pointer */ +#define INVALID_SCSI_TASK (struct iscsi_task *)-1l + static inline int iscsi_task_has_unsol_data(struct iscsi_task *task) { return task->unsol_r2t.data_length > task->unsol_r2t.sent; From f36199355c64a39fe82cfddc7623d827c7e050da Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Fri, 13 Nov 2020 19:46:18 -0600 Subject: [PATCH 588/710] scsi: target: iscsi: Fix cmd abort fabric stop race Maurizio found a race where the abort and cmd stop paths can race as follows: 1. thread1 runs iscsit_release_commands_from_conn and sets CMD_T_FABRIC_STOP. 2. thread2 runs iscsit_aborted_task and then does __iscsit_free_cmd. It then returns from the aborted_task callout and we finish target_handle_abort and do: target_handle_abort -> transport_cmd_check_stop_to_fabric -> lio_check_stop_free -> target_put_sess_cmd The cmd is now freed. 3. thread1 now finishes iscsit_release_commands_from_conn and runs iscsit_free_cmd while accessing a command we just released. In __target_check_io_state we check for CMD_T_FABRIC_STOP and set the CMD_T_ABORTED if the driver is not cleaning up the cmd because of a session shutdown. However, iscsit_release_commands_from_conn only sets the CMD_T_FABRIC_STOP and does not check to see if the abort path has claimed completion ownership of the command. This adds a check in iscsit_release_commands_from_conn so only the abort or fabric stop path cleanup the command. Link: https://lore.kernel.org/r/1605318378-9269-1-git-send-email-michael.christie@oracle.com Reported-by: Maurizio Lombardi Reviewed-by: Maurizio Lombardi Signed-off-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/target/iscsi/iscsi_target.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index f77e5eee6b80..518fac4864cf 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -483,8 +483,7 @@ EXPORT_SYMBOL(iscsit_queue_rsp); void iscsit_aborted_task(struct iscsi_conn *conn, struct iscsi_cmd *cmd) { spin_lock_bh(&conn->cmd_lock); - if (!list_empty(&cmd->i_conn_node) && - !(cmd->se_cmd.transport_state & CMD_T_FABRIC_STOP)) + if (!list_empty(&cmd->i_conn_node)) list_del_init(&cmd->i_conn_node); spin_unlock_bh(&conn->cmd_lock); @@ -4083,12 +4082,22 @@ static void iscsit_release_commands_from_conn(struct iscsi_conn *conn) spin_lock_bh(&conn->cmd_lock); list_splice_init(&conn->conn_cmd_list, &tmp_list); - list_for_each_entry(cmd, &tmp_list, i_conn_node) { + list_for_each_entry_safe(cmd, cmd_tmp, &tmp_list, i_conn_node) { struct se_cmd *se_cmd = &cmd->se_cmd; if (se_cmd->se_tfo != NULL) { spin_lock_irq(&se_cmd->t_state_lock); - se_cmd->transport_state |= CMD_T_FABRIC_STOP; + if (se_cmd->transport_state & CMD_T_ABORTED) { + /* + * LIO's abort path owns the cleanup for this, + * so put it back on the list and let + * aborted_task handle it. + */ + list_move_tail(&cmd->i_conn_node, + &conn->conn_cmd_list); + } else { + se_cmd->transport_state |= CMD_T_FABRIC_STOP; + } spin_unlock_irq(&se_cmd->t_state_lock); } } From e010d1d25e47642fb91023479a4965000cf934a8 Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Tue, 3 Nov 2020 11:55:14 +0000 Subject: [PATCH 589/710] cpufreq: tegra186: Fix get frequency callback Commit b89c01c96051 ("cpufreq: tegra186: Fix initial frequency") implemented the CPUFREQ 'get' callback to determine the current operating frequency for each CPU. This implementation used a simple looked up to determine the current operating frequency. The problem with this is that frequency table for different Tegra186 devices may vary and so the default boot frequency for Tegra186 device may or may not be present in the frequency table. If the default boot frequency is not present in the frequency table, this causes the function tegra186_cpufreq_get() to return 0 and in turn causes cpufreq_online() to fail which prevents CPUFREQ from working. Fix this by always calculating the CPU frequency based upon the current 'ndiv' setting for the CPU. Note that the CPU frequency for Tegra186 is calculated by reading the current 'ndiv' setting, multiplying by the CPU reference clock and dividing by a constant divisor. Fixes: b89c01c96051 ("cpufreq: tegra186: Fix initial frequency") Signed-off-by: Jon Hunter Signed-off-by: Viresh Kumar --- drivers/cpufreq/tegra186-cpufreq.c | 33 +++++++++++++++++++----------- 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/drivers/cpufreq/tegra186-cpufreq.c b/drivers/cpufreq/tegra186-cpufreq.c index 4b4079f51559..7eb2c56c65de 100644 --- a/drivers/cpufreq/tegra186-cpufreq.c +++ b/drivers/cpufreq/tegra186-cpufreq.c @@ -42,6 +42,8 @@ static const struct tegra186_cpufreq_cluster_info tegra186_clusters[] = { struct tegra186_cpufreq_cluster { const struct tegra186_cpufreq_cluster_info *info; struct cpufreq_frequency_table *table; + u32 ref_clk_khz; + u32 div; }; struct tegra186_cpufreq_data { @@ -94,7 +96,7 @@ static int tegra186_cpufreq_set_target(struct cpufreq_policy *policy, static unsigned int tegra186_cpufreq_get(unsigned int cpu) { - struct cpufreq_frequency_table *tbl; + struct tegra186_cpufreq_data *data = cpufreq_get_driver_data(); struct cpufreq_policy *policy; void __iomem *edvd_reg; unsigned int i, freq = 0; @@ -104,17 +106,23 @@ static unsigned int tegra186_cpufreq_get(unsigned int cpu) if (!policy) return 0; - tbl = policy->freq_table; edvd_reg = policy->driver_data; ndiv = readl(edvd_reg) & EDVD_CORE_VOLT_FREQ_F_MASK; - for (i = 0; tbl[i].frequency != CPUFREQ_TABLE_END; i++) { - if ((tbl[i].driver_data & EDVD_CORE_VOLT_FREQ_F_MASK) == ndiv) { - freq = tbl[i].frequency; - break; + for (i = 0; i < data->num_clusters; i++) { + struct tegra186_cpufreq_cluster *cluster = &data->clusters[i]; + int core; + + for (core = 0; core < ARRAY_SIZE(cluster->info->cpus); core++) { + if (cluster->info->cpus[core] != policy->cpu) + continue; + + freq = (cluster->ref_clk_khz * ndiv) / cluster->div; + goto out; } } +out: cpufreq_cpu_put(policy); return freq; @@ -133,7 +141,7 @@ static struct cpufreq_driver tegra186_cpufreq_driver = { static struct cpufreq_frequency_table *init_vhint_table( struct platform_device *pdev, struct tegra_bpmp *bpmp, - unsigned int cluster_id) + struct tegra186_cpufreq_cluster *cluster) { struct cpufreq_frequency_table *table; struct mrq_cpu_vhint_request req; @@ -152,7 +160,7 @@ static struct cpufreq_frequency_table *init_vhint_table( memset(&req, 0, sizeof(req)); req.addr = phys; - req.cluster_id = cluster_id; + req.cluster_id = cluster->info->bpmp_cluster_id; memset(&msg, 0, sizeof(msg)); msg.mrq = MRQ_CPU_VHINT; @@ -185,6 +193,9 @@ static struct cpufreq_frequency_table *init_vhint_table( goto free; } + cluster->ref_clk_khz = data->ref_clk_hz / 1000; + cluster->div = data->pdiv * data->mdiv; + for (i = data->vfloor, j = 0; i <= data->vceil; i++) { struct cpufreq_frequency_table *point; u16 ndiv = data->ndiv[i]; @@ -202,8 +213,7 @@ static struct cpufreq_frequency_table *init_vhint_table( point = &table[j++]; point->driver_data = edvd_val; - point->frequency = data->ref_clk_hz * ndiv / data->pdiv / - data->mdiv / 1000; + point->frequency = (cluster->ref_clk_khz * ndiv) / cluster->div; } table[j].frequency = CPUFREQ_TABLE_END; @@ -245,8 +255,7 @@ static int tegra186_cpufreq_probe(struct platform_device *pdev) struct tegra186_cpufreq_cluster *cluster = &data->clusters[i]; cluster->info = &tegra186_clusters[i]; - cluster->table = init_vhint_table( - pdev, bpmp, cluster->info->bpmp_cluster_id); + cluster->table = init_vhint_table(pdev, bpmp, cluster); if (IS_ERR(cluster->table)) { err = PTR_ERR(cluster->table); goto put_bpmp; From 8410e7f3b31e53bfa7a34c282b4313e79ed7ff8d Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Tue, 10 Nov 2020 11:10:40 +0000 Subject: [PATCH 590/710] cpufreq: scmi: Fix OPP addition failure with a dummy clock provider Commit dd461cd9183f ("opp: Allow dev_pm_opp_get_opp_table() to return -EPROBE_DEFER") handles -EPROBE_DEFER for the clock/interconnects within _allocate_opp_table() which is called from dev_pm_opp_add and it now propagates the error back to the caller. SCMI performance domain re-used clock bindings to keep it simple. However with the above mentioned change, if clock property is present in a device node, opps fails to get added with below errors until clk_get succeeds. cpu0: failed to add opp 450000000Hz cpu0: failed to add opps to the device ....(errors on cpu1-cpu4) cpu5: failed to add opp 450000000Hz cpu5: failed to add opps to the device So, in order to fix the issue, we need to register dummy clock provider. With the dummy clock provider, clk_get returns NULL(no errors!), then opp core proceeds to add OPPs for the CPUs. Cc: Rafael J. Wysocki Fixes: dd461cd9183f ("opp: Allow dev_pm_opp_get_opp_table() to return -EPROBE_DEFER") Signed-off-by: Sudeep Holla Signed-off-by: Viresh Kumar --- drivers/cpufreq/scmi-cpufreq.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/cpufreq/scmi-cpufreq.c b/drivers/cpufreq/scmi-cpufreq.c index e855e8612a67..78318508a6d6 100644 --- a/drivers/cpufreq/scmi-cpufreq.c +++ b/drivers/cpufreq/scmi-cpufreq.c @@ -8,6 +8,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include #include #include #include @@ -228,12 +229,17 @@ static struct cpufreq_driver scmi_cpufreq_driver = { static int scmi_cpufreq_probe(struct scmi_device *sdev) { int ret; + struct device *dev = &sdev->dev; handle = sdev->handle; if (!handle || !handle->perf_ops) return -ENODEV; + /* dummy clock provider as needed by OPP if clocks property is used */ + if (of_find_property(dev->of_node, "#clock-cells", NULL)) + devm_of_clk_add_hw_provider(dev, of_clk_hw_simple_get, NULL); + ret = cpufreq_register_driver(&scmi_cpufreq_driver); if (ret) { dev_err(&sdev->dev, "%s: registering cpufreq failed, err: %d\n", From 61a2f1aecf6052f7bcf900829ca2b9d74437ec07 Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Mon, 16 Nov 2020 18:45:15 +0100 Subject: [PATCH 591/710] MIPS: kernel: Fix for_each_memblock conversion The loop over all memblocks works with PFNs and not physical addresses, so we need for_each_mem_pfn_range(). Fixes: b10d6bca8720 ("arch, drivers: replace for_each_membock() with for_each_mem_range()") Signed-off-by: Thomas Bogendoerfer Reviewed-by: Mike Rapoport Reviewed-by: Serge Semin --- arch/mips/kernel/setup.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index 0d4253208bde..ca579deef939 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -262,8 +262,8 @@ static void __init bootmem_init(void) static void __init bootmem_init(void) { phys_addr_t ramstart, ramend; - phys_addr_t start, end; - u64 i; + unsigned long start, end; + int i; ramstart = memblock_start_of_DRAM(); ramend = memblock_end_of_DRAM(); @@ -300,7 +300,7 @@ static void __init bootmem_init(void) min_low_pfn = ARCH_PFN_OFFSET; max_pfn = PFN_DOWN(ramend); - for_each_mem_range(i, &start, &end) { + for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, NULL) { /* * Skip highmem here so we get an accurate max_low_pfn if low * memory stops short of high memory. From 1a371e67dc77125736cc56d3a0893f06b75855b6 Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Fri, 13 Nov 2020 09:59:23 +0800 Subject: [PATCH 592/710] x86/microcode/intel: Check patch signature before saving microcode for early loading Currently, scan_microcode() leverages microcode_matches() to check if the microcode matches the CPU by comparing the family and model. However, the processor stepping and flags of the microcode signature should also be considered when saving a microcode patch for early update. Use find_matching_signature() in scan_microcode() and get rid of the now-unused microcode_matches() which is a good cleanup in itself. Complete the verification of the patch being saved for early loading in save_microcode_patch() directly. This needs to be done there too because save_mc_for_early() will call save_microcode_patch() too. The second reason why this needs to be done is because the loader still tries to support, at least hypothetically, mixed-steppings systems and thus adds all patches to the cache that belong to the same CPU model albeit with different steppings. For example: microcode: CPU: sig=0x906ec, pf=0x2, rev=0xd6 microcode: mc_saved[0]: sig=0x906e9, pf=0x2a, rev=0xd6, total size=0x19400, date = 2020-04-23 microcode: mc_saved[1]: sig=0x906ea, pf=0x22, rev=0xd6, total size=0x19000, date = 2020-04-27 microcode: mc_saved[2]: sig=0x906eb, pf=0x2, rev=0xd6, total size=0x19400, date = 2020-04-23 microcode: mc_saved[3]: sig=0x906ec, pf=0x22, rev=0xd6, total size=0x19000, date = 2020-04-27 microcode: mc_saved[4]: sig=0x906ed, pf=0x22, rev=0xd6, total size=0x19400, date = 2020-04-23 The patch which is being saved for early loading, however, can only be the one which fits the CPU this runs on so do the signature verification before saving. [ bp: Do signature verification in save_microcode_patch() and rewrite commit message. ] Fixes: ec400ddeff20 ("x86/microcode_intel_early.c: Early update ucode on Intel's CPU") Signed-off-by: Chen Yu Signed-off-by: Borislav Petkov Cc: stable@vger.kernel.org Link: https://bugzilla.kernel.org/show_bug.cgi?id=208535 Link: https://lkml.kernel.org/r/20201113015923.13960-1-yu.c.chen@intel.com --- arch/x86/kernel/cpu/microcode/intel.c | 63 +++++---------------------- 1 file changed, 10 insertions(+), 53 deletions(-) diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 6a99535d7f37..7e8e07bddd5f 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -100,53 +100,6 @@ static int has_newer_microcode(void *mc, unsigned int csig, int cpf, int new_rev return find_matching_signature(mc, csig, cpf); } -/* - * Given CPU signature and a microcode patch, this function finds if the - * microcode patch has matching family and model with the CPU. - * - * %true - if there's a match - * %false - otherwise - */ -static bool microcode_matches(struct microcode_header_intel *mc_header, - unsigned long sig) -{ - unsigned long total_size = get_totalsize(mc_header); - unsigned long data_size = get_datasize(mc_header); - struct extended_sigtable *ext_header; - unsigned int fam_ucode, model_ucode; - struct extended_signature *ext_sig; - unsigned int fam, model; - int ext_sigcount, i; - - fam = x86_family(sig); - model = x86_model(sig); - - fam_ucode = x86_family(mc_header->sig); - model_ucode = x86_model(mc_header->sig); - - if (fam == fam_ucode && model == model_ucode) - return true; - - /* Look for ext. headers: */ - if (total_size <= data_size + MC_HEADER_SIZE) - return false; - - ext_header = (void *) mc_header + data_size + MC_HEADER_SIZE; - ext_sig = (void *)ext_header + EXT_HEADER_SIZE; - ext_sigcount = ext_header->count; - - for (i = 0; i < ext_sigcount; i++) { - fam_ucode = x86_family(ext_sig->sig); - model_ucode = x86_model(ext_sig->sig); - - if (fam == fam_ucode && model == model_ucode) - return true; - - ext_sig++; - } - return false; -} - static struct ucode_patch *memdup_patch(void *data, unsigned int size) { struct ucode_patch *p; @@ -164,7 +117,7 @@ static struct ucode_patch *memdup_patch(void *data, unsigned int size) return p; } -static void save_microcode_patch(void *data, unsigned int size) +static void save_microcode_patch(struct ucode_cpu_info *uci, void *data, unsigned int size) { struct microcode_header_intel *mc_hdr, *mc_saved_hdr; struct ucode_patch *iter, *tmp, *p = NULL; @@ -210,6 +163,9 @@ static void save_microcode_patch(void *data, unsigned int size) if (!p) return; + if (!find_matching_signature(p->data, uci->cpu_sig.sig, uci->cpu_sig.pf)) + return; + /* * Save for early loading. On 32-bit, that needs to be a physical * address as the APs are running from physical addresses, before @@ -344,13 +300,14 @@ scan_microcode(void *data, size_t size, struct ucode_cpu_info *uci, bool save) size -= mc_size; - if (!microcode_matches(mc_header, uci->cpu_sig.sig)) { + if (!find_matching_signature(data, uci->cpu_sig.sig, + uci->cpu_sig.pf)) { data += mc_size; continue; } if (save) { - save_microcode_patch(data, mc_size); + save_microcode_patch(uci, data, mc_size); goto next; } @@ -483,14 +440,14 @@ static void show_saved_mc(void) * Save this microcode patch. It will be loaded early when a CPU is * hot-added or resumes. */ -static void save_mc_for_early(u8 *mc, unsigned int size) +static void save_mc_for_early(struct ucode_cpu_info *uci, u8 *mc, unsigned int size) { /* Synchronization during CPU hotplug. */ static DEFINE_MUTEX(x86_cpu_microcode_mutex); mutex_lock(&x86_cpu_microcode_mutex); - save_microcode_patch(mc, size); + save_microcode_patch(uci, mc, size); show_saved_mc(); mutex_unlock(&x86_cpu_microcode_mutex); @@ -935,7 +892,7 @@ static enum ucode_state generic_load_microcode(int cpu, struct iov_iter *iter) * permanent memory. So it will be loaded early when a CPU is hot added * or resumes. */ - save_mc_for_early(new_mc, new_mc_size); + save_mc_for_early(uci, new_mc, new_mc_size); pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n", cpu, new_rev, uci->cpu_sig.rev); From 60d53566100abde4acc5504b524bc97f89015690 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 12 Nov 2020 15:36:56 +0200 Subject: [PATCH 593/710] mmc: sdhci-pci: Prefer SDR25 timing for High Speed mode for BYT-based Intel controllers A UHS setting of SDR25 can give better results for High Speed mode. This is because there is no setting corresponding to high speed. Currently SDHCI sets no value, which means zero which is also the setting for SDR12. There was an attempt to change this in sdhci.c but it caused problems for some drivers, so it was reverted and the change was made to sdhci-brcmstb in commit 2fefc7c5f7d16e ("mmc: sdhci-brcmstb: Fix incorrect switch to HS mode"). Several other drivers also do this. Signed-off-by: Adrian Hunter Cc: stable@vger.kernel.org # v5.4+ Link: https://lore.kernel.org/r/20201112133656.20317-1-adrian.hunter@intel.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-pci-core.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c index 23da7f7fe093..9552708846ca 100644 --- a/drivers/mmc/host/sdhci-pci-core.c +++ b/drivers/mmc/host/sdhci-pci-core.c @@ -665,6 +665,15 @@ static void sdhci_intel_set_power(struct sdhci_host *host, unsigned char mode, } } +static void sdhci_intel_set_uhs_signaling(struct sdhci_host *host, + unsigned int timing) +{ + /* Set UHS timing to SDR25 for High Speed mode */ + if (timing == MMC_TIMING_MMC_HS || timing == MMC_TIMING_SD_HS) + timing = MMC_TIMING_UHS_SDR25; + sdhci_set_uhs_signaling(host, timing); +} + #define INTEL_HS400_ES_REG 0x78 #define INTEL_HS400_ES_BIT BIT(0) @@ -721,7 +730,7 @@ static const struct sdhci_ops sdhci_intel_byt_ops = { .enable_dma = sdhci_pci_enable_dma, .set_bus_width = sdhci_set_bus_width, .reset = sdhci_reset, - .set_uhs_signaling = sdhci_set_uhs_signaling, + .set_uhs_signaling = sdhci_intel_set_uhs_signaling, .hw_reset = sdhci_pci_hw_reset, }; @@ -731,7 +740,7 @@ static const struct sdhci_ops sdhci_intel_glk_ops = { .enable_dma = sdhci_pci_enable_dma, .set_bus_width = sdhci_set_bus_width, .reset = sdhci_cqhci_reset, - .set_uhs_signaling = sdhci_set_uhs_signaling, + .set_uhs_signaling = sdhci_intel_set_uhs_signaling, .hw_reset = sdhci_pci_hw_reset, .irq = sdhci_cqhci_irq, }; From 9e9534329306fcd7ea1b84f14860a3c04ebe7f1a Mon Sep 17 00:00:00 2001 From: Manish Narani Date: Mon, 16 Nov 2020 14:02:43 +0530 Subject: [PATCH 594/710] mmc: sdhci-of-arasan: Allow configuring zero tap values Allow configuring the Output and Input tap values with zero to avoid failures in some cases (one of them is SD boot mode) where the output and input tap values may be already set to non-zero. Fixes: a5c8b2ae2e51 ("mmc: sdhci-of-arasan: Add support for ZynqMP Platform Tap Delays Setup") Signed-off-by: Sai Krishna Potthuri Signed-off-by: Manish Narani Acked-by: Michal Simek Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/1605515565-117562-2-git-send-email-manish.narani@xilinx.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-of-arasan.c | 40 ++++++------------------------ 1 file changed, 8 insertions(+), 32 deletions(-) diff --git a/drivers/mmc/host/sdhci-of-arasan.c b/drivers/mmc/host/sdhci-of-arasan.c index 829ccef87426..100621e55427 100644 --- a/drivers/mmc/host/sdhci-of-arasan.c +++ b/drivers/mmc/host/sdhci-of-arasan.c @@ -600,14 +600,8 @@ static int sdhci_zynqmp_sdcardclk_set_phase(struct clk_hw *hw, int degrees) u8 tap_delay, tap_max = 0; int ret; - /* - * This is applicable for SDHCI_SPEC_300 and above - * ZynqMP does not set phase for <=25MHz clock. - * If degrees is zero, no need to do anything. - */ - if (host->version < SDHCI_SPEC_300 || - host->timing == MMC_TIMING_LEGACY || - host->timing == MMC_TIMING_UHS_SDR12 || !degrees) + /* This is applicable for SDHCI_SPEC_300 and above */ + if (host->version < SDHCI_SPEC_300) return 0; switch (host->timing) { @@ -668,14 +662,8 @@ static int sdhci_zynqmp_sampleclk_set_phase(struct clk_hw *hw, int degrees) u8 tap_delay, tap_max = 0; int ret; - /* - * This is applicable for SDHCI_SPEC_300 and above - * ZynqMP does not set phase for <=25MHz clock. - * If degrees is zero, no need to do anything. - */ - if (host->version < SDHCI_SPEC_300 || - host->timing == MMC_TIMING_LEGACY || - host->timing == MMC_TIMING_UHS_SDR12 || !degrees) + /* This is applicable for SDHCI_SPEC_300 and above */ + if (host->version < SDHCI_SPEC_300) return 0; switch (host->timing) { @@ -733,14 +721,8 @@ static int sdhci_versal_sdcardclk_set_phase(struct clk_hw *hw, int degrees) struct sdhci_host *host = sdhci_arasan->host; u8 tap_delay, tap_max = 0; - /* - * This is applicable for SDHCI_SPEC_300 and above - * Versal does not set phase for <=25MHz clock. - * If degrees is zero, no need to do anything. - */ - if (host->version < SDHCI_SPEC_300 || - host->timing == MMC_TIMING_LEGACY || - host->timing == MMC_TIMING_UHS_SDR12 || !degrees) + /* This is applicable for SDHCI_SPEC_300 and above */ + if (host->version < SDHCI_SPEC_300) return 0; switch (host->timing) { @@ -804,14 +786,8 @@ static int sdhci_versal_sampleclk_set_phase(struct clk_hw *hw, int degrees) struct sdhci_host *host = sdhci_arasan->host; u8 tap_delay, tap_max = 0; - /* - * This is applicable for SDHCI_SPEC_300 and above - * Versal does not set phase for <=25MHz clock. - * If degrees is zero, no need to do anything. - */ - if (host->version < SDHCI_SPEC_300 || - host->timing == MMC_TIMING_LEGACY || - host->timing == MMC_TIMING_UHS_SDR12 || !degrees) + /* This is applicable for SDHCI_SPEC_300 and above */ + if (host->version < SDHCI_SPEC_300) return 0; switch (host->timing) { From d338c6d01dc614cad253d6c042501fa0eb242d5c Mon Sep 17 00:00:00 2001 From: Manish Narani Date: Mon, 16 Nov 2020 14:02:44 +0530 Subject: [PATCH 595/710] mmc: sdhci-of-arasan: Use Mask writes for Tap delays Mask the ITAP and OTAP delay bits before updating with the new tap value for Versal platform. Fixes: 1a470721c8f5 ("sdhci: arasan: Add support for Versal Tap Delays") Signed-off-by: Sai Krishna Potthuri Signed-off-by: Manish Narani Acked-by: Michal Simek Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/1605515565-117562-3-git-send-email-manish.narani@xilinx.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-of-arasan.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/mmc/host/sdhci-of-arasan.c b/drivers/mmc/host/sdhci-of-arasan.c index 100621e55427..3ec5ecad637c 100644 --- a/drivers/mmc/host/sdhci-of-arasan.c +++ b/drivers/mmc/host/sdhci-of-arasan.c @@ -30,7 +30,10 @@ #define SDHCI_ARASAN_VENDOR_REGISTER 0x78 #define SDHCI_ARASAN_ITAPDLY_REGISTER 0xF0F8 +#define SDHCI_ARASAN_ITAPDLY_SEL_MASK 0xFF + #define SDHCI_ARASAN_OTAPDLY_REGISTER 0xF0FC +#define SDHCI_ARASAN_OTAPDLY_SEL_MASK 0x3F #define SDHCI_ARASAN_CQE_BASE_ADDR 0x200 #define VENDOR_ENHANCED_STROBE BIT(0) @@ -755,6 +758,7 @@ static int sdhci_versal_sdcardclk_set_phase(struct clk_hw *hw, int degrees) regval = sdhci_readl(host, SDHCI_ARASAN_OTAPDLY_REGISTER); regval |= SDHCI_OTAPDLY_ENABLE; sdhci_writel(host, regval, SDHCI_ARASAN_OTAPDLY_REGISTER); + regval &= ~SDHCI_ARASAN_OTAPDLY_SEL_MASK; regval |= tap_delay; sdhci_writel(host, regval, SDHCI_ARASAN_OTAPDLY_REGISTER); } @@ -822,6 +826,7 @@ static int sdhci_versal_sampleclk_set_phase(struct clk_hw *hw, int degrees) sdhci_writel(host, regval, SDHCI_ARASAN_ITAPDLY_REGISTER); regval |= SDHCI_ITAPDLY_ENABLE; sdhci_writel(host, regval, SDHCI_ARASAN_ITAPDLY_REGISTER); + regval &= ~SDHCI_ARASAN_ITAPDLY_SEL_MASK; regval |= tap_delay; sdhci_writel(host, regval, SDHCI_ARASAN_ITAPDLY_REGISTER); regval &= ~SDHCI_ITAPDLY_CHGWIN; From d06d60d52ec0b0eef702dd3e7b4699f0b589ad0f Mon Sep 17 00:00:00 2001 From: Manish Narani Date: Mon, 16 Nov 2020 14:02:45 +0530 Subject: [PATCH 596/710] mmc: sdhci-of-arasan: Issue DLL reset explicitly In the current implementation DLL reset will be issued for each ITAP and OTAP setting inside ATF, this is creating issues in some scenarios and this sequence is not inline with the TRM. To fix the issue, DLL reset should be removed from the ATF and host driver will request it explicitly. This patch update host driver to explicitly request for DLL reset before ITAP (assert DLL) and after OTAP (release DLL) settings. Fixes: a5c8b2ae2e51 ("mmc: sdhci-of-arasan: Add support for ZynqMP Platform Tap Delays Setup") Signed-off-by: Sai Krishna Potthuri Signed-off-by: Manish Narani Acked-by: Michal Simek Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/1605515565-117562-4-git-send-email-manish.narani@xilinx.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-of-arasan.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/mmc/host/sdhci-of-arasan.c b/drivers/mmc/host/sdhci-of-arasan.c index 3ec5ecad637c..d25a4b50c2f3 100644 --- a/drivers/mmc/host/sdhci-of-arasan.c +++ b/drivers/mmc/host/sdhci-of-arasan.c @@ -635,6 +635,9 @@ static int sdhci_zynqmp_sdcardclk_set_phase(struct clk_hw *hw, int degrees) if (ret) pr_err("Error setting Output Tap Delay\n"); + /* Release DLL Reset */ + zynqmp_pm_sd_dll_reset(node_id, PM_DLL_RESET_RELEASE); + return ret; } @@ -669,6 +672,9 @@ static int sdhci_zynqmp_sampleclk_set_phase(struct clk_hw *hw, int degrees) if (host->version < SDHCI_SPEC_300) return 0; + /* Assert DLL Reset */ + zynqmp_pm_sd_dll_reset(node_id, PM_DLL_RESET_ASSERT); + switch (host->timing) { case MMC_TIMING_MMC_HS: case MMC_TIMING_SD_HS: From ac3b57adf87ad9bac7e33ca26bbbb13fae1ed62b Mon Sep 17 00:00:00 2001 From: Zhang Qilong Date: Fri, 13 Nov 2020 21:18:56 +0800 Subject: [PATCH 597/710] MIPS: Alchemy: Fix memleak in alchemy_clk_setup_cpu If the clk_register fails, we should free h before function returns to prevent memleak. Fixes: 474402291a0ad ("MIPS: Alchemy: clock framework integration of onchip clocks") Reported-by: Hulk Robot Signed-off-by: Zhang Qilong Signed-off-by: Thomas Bogendoerfer --- arch/mips/alchemy/common/clock.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/mips/alchemy/common/clock.c b/arch/mips/alchemy/common/clock.c index a95a894aceaf..f0c830337104 100644 --- a/arch/mips/alchemy/common/clock.c +++ b/arch/mips/alchemy/common/clock.c @@ -152,6 +152,7 @@ static struct clk __init *alchemy_clk_setup_cpu(const char *parent_name, { struct clk_init_data id; struct clk_hw *h; + struct clk *clk; h = kzalloc(sizeof(*h), GFP_KERNEL); if (!h) @@ -164,7 +165,13 @@ static struct clk __init *alchemy_clk_setup_cpu(const char *parent_name, id.ops = &alchemy_clkops_cpu; h->init = &id; - return clk_register(NULL, h); + clk = clk_register(NULL, h); + if (IS_ERR(clk)) { + pr_err("failed to register clock\n"); + kfree(h); + } + + return clk; } /* AUXPLLs ************************************************************/ From 8e1ac4299a6e8726de42310d9c1379f188140c71 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Thu, 12 Nov 2020 11:12:01 +0000 Subject: [PATCH 598/710] sched/fair: Fix overutilized update in enqueue_task_fair() enqueue_task_fair() attempts to skip the overutilized update for new tasks as their util_avg is not accurate yet. However, the flag we check to do so is overwritten earlier on in the function, which makes the condition pretty much a nop. Fix this by saving the flag early on. Fixes: 2802bf3cd936 ("sched/fair: Add over-utilization/tipping point indicator") Reported-by: Rick Yiu Signed-off-by: Quentin Perret Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Vincent Guittot Reviewed-by: Valentin Schneider Link: https://lkml.kernel.org/r/20201112111201.2081902-1-qperret@google.com --- kernel/sched/fair.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 8e563cf2b5e7..56a8ca93a971 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5477,6 +5477,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) struct cfs_rq *cfs_rq; struct sched_entity *se = &p->se; int idle_h_nr_running = task_has_idle_policy(p); + int task_new = !(flags & ENQUEUE_WAKEUP); /* * The code below (indirectly) updates schedutil which looks at @@ -5549,7 +5550,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) * into account, but that is not straightforward to implement, * and the following generally works well enough in practice. */ - if (flags & ENQUEUE_WAKEUP) + if (!task_new) update_overutilized_status(rq); enqueue_throttle: From f97bb5272d9e95d400d6c8643ebb146b3e3e7842 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 17 Nov 2020 09:08:41 +0100 Subject: [PATCH 599/710] sched: Fix data-race in wakeup Mel reported that on some ARM64 platforms loadavg goes bananas and Will tracked it down to the following race: CPU0 CPU1 schedule() prev->sched_contributes_to_load = X; deactivate_task(prev); try_to_wake_up() if (p->on_rq &&) // false if (smp_load_acquire(&p->on_cpu) && // true ttwu_queue_wakelist()) p->sched_remote_wakeup = Y; smp_store_release(prev->on_cpu, 0); where both p->sched_contributes_to_load and p->sched_remote_wakeup are in the same word, and thus the stores X and Y race (and can clobber one another's data). Whereas prior to commit c6e7bd7afaeb ("sched/core: Optimize ttwu() spinning on p->on_cpu") the p->on_cpu handoff serialized access to p->sched_remote_wakeup (just as it still does with p->sched_contributes_to_load) that commit broke that by calling ttwu_queue_wakelist() with p->on_cpu != 0. However, due to p->XXX = X ttwu() schedule() if (p->on_rq && ...) // false smp_mb__after_spinlock() if (smp_load_acquire(&p->on_cpu) && deactivate_task() ttwu_queue_wakelist()) p->on_rq = 0; p->sched_remote_wakeup = Y; We can be sure any 'current' store is complete and 'current' is guaranteed asleep. Therefore we can move p->sched_remote_wakeup into the current flags word. Note: while the observed failure was loadavg accounting gone wrong due to ttwu() cobbering p->sched_contributes_to_load, the reverse problem is also possible where schedule() clobbers p->sched_remote_wakeup, this could result in enqueue_entity() wrecking ->vruntime and causing scheduling artifacts. Fixes: c6e7bd7afaeb ("sched/core: Optimize ttwu() spinning on p->on_cpu") Reported-by: Mel Gorman Debugged-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20201117083016.GK3121392@hirez.programming.kicks-ass.net --- include/linux/sched.h | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index d383cf09e78f..0e91b451d2a2 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -769,7 +769,6 @@ struct task_struct { unsigned sched_reset_on_fork:1; unsigned sched_contributes_to_load:1; unsigned sched_migrated:1; - unsigned sched_remote_wakeup:1; #ifdef CONFIG_PSI unsigned sched_psi_wake_requeue:1; #endif @@ -779,6 +778,21 @@ struct task_struct { /* Unserialized, strictly 'current' */ + /* + * This field must not be in the scheduler word above due to wakelist + * queueing no longer being serialized by p->on_cpu. However: + * + * p->XXX = X; ttwu() + * schedule() if (p->on_rq && ..) // false + * smp_mb__after_spinlock(); if (smp_load_acquire(&p->on_cpu) && //true + * deactivate_task() ttwu_queue_wakelist()) + * p->on_rq = 0; p->sched_remote_wakeup = Y; + * + * guarantees all stores of 'current' are visible before + * ->sched_remote_wakeup gets used, so it can be in this word. + */ + unsigned sched_remote_wakeup:1; + /* Bit to tell LSMs we're in execve(): */ unsigned in_execve:1; unsigned in_iowait:1; From ec618b84f6e15281cc3660664d34cd0dd2f2579e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 24 Sep 2020 13:50:42 +0200 Subject: [PATCH 600/710] sched: Fix rq->nr_iowait ordering schedule() ttwu() deactivate_task(); if (p->on_rq && ...) // false atomic_dec(&task_rq(p)->nr_iowait); if (prev->in_iowait) atomic_inc(&rq->nr_iowait); Allows nr_iowait to be decremented before it gets incremented, resulting in more dodgy IO-wait numbers than usual. Note that because we can now do ttwu_queue_wakelist() before p->on_cpu==0, we lose the natural ordering and have to further delay the decrement. Fixes: c6e7bd7afaeb ("sched/core: Optimize ttwu() spinning on p->on_cpu") Reported-by: Tejun Heo Signed-off-by: Peter Zijlstra (Intel) Acked-by: Mel Gorman Link: https://lkml.kernel.org/r/20201117093829.GD3121429@hirez.programming.kicks-ass.net --- kernel/sched/core.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index d2003a7d5ab5..9f0ebfb0d17b 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2501,7 +2501,12 @@ ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags, #ifdef CONFIG_SMP if (wake_flags & WF_MIGRATED) en_flags |= ENQUEUE_MIGRATED; + else #endif + if (p->in_iowait) { + delayacct_blkio_end(p); + atomic_dec(&task_rq(p)->nr_iowait); + } activate_task(rq, p, en_flags); ttwu_do_wakeup(rq, p, wake_flags, rf); @@ -2888,11 +2893,6 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) if (READ_ONCE(p->on_rq) && ttwu_runnable(p, wake_flags)) goto unlock; - if (p->in_iowait) { - delayacct_blkio_end(p); - atomic_dec(&task_rq(p)->nr_iowait); - } - #ifdef CONFIG_SMP /* * Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be @@ -2963,6 +2963,11 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) cpu = select_task_rq(p, p->wake_cpu, SD_BALANCE_WAKE, wake_flags); if (task_cpu(p) != cpu) { + if (p->in_iowait) { + delayacct_blkio_end(p); + atomic_dec(&task_rq(p)->nr_iowait); + } + wake_flags |= WF_MIGRATED; psi_ttwu_dequeue(p); set_task_cpu(p, cpu); From 2279f540ea7d05f22d2f0c4224319330228586bc Mon Sep 17 00:00:00 2001 From: Juri Lelli Date: Tue, 17 Nov 2020 07:14:32 +0100 Subject: [PATCH 601/710] sched/deadline: Fix priority inheritance with multiple scheduling classes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Glenn reported that "an application [he developed produces] a BUG in deadline.c when a SCHED_DEADLINE task contends with CFS tasks on nested PTHREAD_PRIO_INHERIT mutexes. I believe the bug is triggered when a CFS task that was boosted by a SCHED_DEADLINE task boosts another CFS task (nested priority inheritance). ------------[ cut here ]------------ kernel BUG at kernel/sched/deadline.c:1462! invalid opcode: 0000 [#1] PREEMPT SMP CPU: 12 PID: 19171 Comm: dl_boost_bug Tainted: ... Hardware name: ... RIP: 0010:enqueue_task_dl+0x335/0x910 Code: ... RSP: 0018:ffffc9000c2bbc68 EFLAGS: 00010002 RAX: 0000000000000009 RBX: ffff888c0af94c00 RCX: ffffffff81e12500 RDX: 000000000000002e RSI: ffff888c0af94c00 RDI: ffff888c10b22600 RBP: ffffc9000c2bbd08 R08: 0000000000000009 R09: 0000000000000078 R10: ffffffff81e12440 R11: ffffffff81e1236c R12: ffff888bc8932600 R13: ffff888c0af94eb8 R14: ffff888c10b22600 R15: ffff888bc8932600 FS: 00007fa58ac55700(0000) GS:ffff888c10b00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fa58b523230 CR3: 0000000bf44ab003 CR4: 00000000007606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: ? intel_pstate_update_util_hwp+0x13/0x170 rt_mutex_setprio+0x1cc/0x4b0 task_blocks_on_rt_mutex+0x225/0x260 rt_spin_lock_slowlock_locked+0xab/0x2d0 rt_spin_lock_slowlock+0x50/0x80 hrtimer_grab_expiry_lock+0x20/0x30 hrtimer_cancel+0x13/0x30 do_nanosleep+0xa0/0x150 hrtimer_nanosleep+0xe1/0x230 ? __hrtimer_init_sleeper+0x60/0x60 __x64_sys_nanosleep+0x8d/0xa0 do_syscall_64+0x4a/0x100 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x7fa58b52330d ... ---[ end trace 0000000000000002 ]— He also provided a simple reproducer creating the situation below: So the execution order of locking steps are the following (N1 and N2 are non-deadline tasks. D1 is a deadline task. M1 and M2 are mutexes that are enabled * with priority inheritance.) Time moves forward as this timeline goes down: N1 N2 D1 | | | | | | Lock(M1) | | | | | | Lock(M2) | | | | | | Lock(M2) | | | | Lock(M1) | | (!!bug triggered!) | Daniel reported a similar situation as well, by just letting ksoftirqd run with DEADLINE (and eventually block on a mutex). Problem is that boosted entities (Priority Inheritance) use static DEADLINE parameters of the top priority waiter. However, there might be cases where top waiter could be a non-DEADLINE entity that is currently boosted by a DEADLINE entity from a different lock chain (i.e., nested priority chains involving entities of non-DEADLINE classes). In this case, top waiter static DEADLINE parameters could be null (initialized to 0 at fork()) and replenish_dl_entity() would hit a BUG(). Fix this by keeping track of the original donor and using its parameters when a task is boosted. Reported-by: Glenn Elliott Reported-by: Daniel Bristot de Oliveira Signed-off-by: Juri Lelli Signed-off-by: Peter Zijlstra (Intel) Tested-by: Daniel Bristot de Oliveira Link: https://lkml.kernel.org/r/20201117061432.517340-1-juri.lelli@redhat.com --- include/linux/sched.h | 10 ++++- kernel/sched/core.c | 11 ++--- kernel/sched/deadline.c | 97 ++++++++++++++++++++++------------------- 3 files changed, 68 insertions(+), 50 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 0e91b451d2a2..095fdec07b38 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -551,7 +551,6 @@ struct sched_dl_entity { * overruns. */ unsigned int dl_throttled : 1; - unsigned int dl_boosted : 1; unsigned int dl_yielded : 1; unsigned int dl_non_contending : 1; unsigned int dl_overrun : 1; @@ -570,6 +569,15 @@ struct sched_dl_entity { * time. */ struct hrtimer inactive_timer; + +#ifdef CONFIG_RT_MUTEXES + /* + * Priority Inheritance. When a DEADLINE scheduling entity is boosted + * pi_se points to the donor, otherwise points to the dl_se it belongs + * to (the original one/itself). + */ + struct sched_dl_entity *pi_se; +#endif }; #ifdef CONFIG_UCLAMP_TASK diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 9f0ebfb0d17b..e7e453492cff 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4912,20 +4912,21 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task) if (!dl_prio(p->normal_prio) || (pi_task && dl_prio(pi_task->prio) && dl_entity_preempt(&pi_task->dl, &p->dl))) { - p->dl.dl_boosted = 1; + p->dl.pi_se = pi_task->dl.pi_se; queue_flag |= ENQUEUE_REPLENISH; - } else - p->dl.dl_boosted = 0; + } else { + p->dl.pi_se = &p->dl; + } p->sched_class = &dl_sched_class; } else if (rt_prio(prio)) { if (dl_prio(oldprio)) - p->dl.dl_boosted = 0; + p->dl.pi_se = &p->dl; if (oldprio < prio) queue_flag |= ENQUEUE_HEAD; p->sched_class = &rt_sched_class; } else { if (dl_prio(oldprio)) - p->dl.dl_boosted = 0; + p->dl.pi_se = &p->dl; if (rt_prio(oldprio)) p->rt.timeout = 0; p->sched_class = &fair_sched_class; diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 6d93f4518734..949bc5c083c1 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -43,6 +43,28 @@ static inline int on_dl_rq(struct sched_dl_entity *dl_se) return !RB_EMPTY_NODE(&dl_se->rb_node); } +#ifdef CONFIG_RT_MUTEXES +static inline struct sched_dl_entity *pi_of(struct sched_dl_entity *dl_se) +{ + return dl_se->pi_se; +} + +static inline bool is_dl_boosted(struct sched_dl_entity *dl_se) +{ + return pi_of(dl_se) != dl_se; +} +#else +static inline struct sched_dl_entity *pi_of(struct sched_dl_entity *dl_se) +{ + return dl_se; +} + +static inline bool is_dl_boosted(struct sched_dl_entity *dl_se) +{ + return false; +} +#endif + #ifdef CONFIG_SMP static inline struct dl_bw *dl_bw_of(int i) { @@ -698,7 +720,7 @@ static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se) struct dl_rq *dl_rq = dl_rq_of_se(dl_se); struct rq *rq = rq_of_dl_rq(dl_rq); - WARN_ON(dl_se->dl_boosted); + WARN_ON(is_dl_boosted(dl_se)); WARN_ON(dl_time_before(rq_clock(rq), dl_se->deadline)); /* @@ -736,21 +758,20 @@ static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se) * could happen are, typically, a entity voluntarily trying to overcome its * runtime, or it just underestimated it during sched_setattr(). */ -static void replenish_dl_entity(struct sched_dl_entity *dl_se, - struct sched_dl_entity *pi_se) +static void replenish_dl_entity(struct sched_dl_entity *dl_se) { struct dl_rq *dl_rq = dl_rq_of_se(dl_se); struct rq *rq = rq_of_dl_rq(dl_rq); - BUG_ON(pi_se->dl_runtime <= 0); + BUG_ON(pi_of(dl_se)->dl_runtime <= 0); /* * This could be the case for a !-dl task that is boosted. * Just go with full inherited parameters. */ if (dl_se->dl_deadline == 0) { - dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline; - dl_se->runtime = pi_se->dl_runtime; + dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline; + dl_se->runtime = pi_of(dl_se)->dl_runtime; } if (dl_se->dl_yielded && dl_se->runtime > 0) @@ -763,8 +784,8 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se, * arbitrary large. */ while (dl_se->runtime <= 0) { - dl_se->deadline += pi_se->dl_period; - dl_se->runtime += pi_se->dl_runtime; + dl_se->deadline += pi_of(dl_se)->dl_period; + dl_se->runtime += pi_of(dl_se)->dl_runtime; } /* @@ -778,8 +799,8 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se, */ if (dl_time_before(dl_se->deadline, rq_clock(rq))) { printk_deferred_once("sched: DL replenish lagged too much\n"); - dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline; - dl_se->runtime = pi_se->dl_runtime; + dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline; + dl_se->runtime = pi_of(dl_se)->dl_runtime; } if (dl_se->dl_yielded) @@ -812,8 +833,7 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se, * task with deadline equal to period this is the same of using * dl_period instead of dl_deadline in the equation above. */ -static bool dl_entity_overflow(struct sched_dl_entity *dl_se, - struct sched_dl_entity *pi_se, u64 t) +static bool dl_entity_overflow(struct sched_dl_entity *dl_se, u64 t) { u64 left, right; @@ -835,9 +855,9 @@ static bool dl_entity_overflow(struct sched_dl_entity *dl_se, * of anything below microseconds resolution is actually fiction * (but still we want to give the user that illusion >;). */ - left = (pi_se->dl_deadline >> DL_SCALE) * (dl_se->runtime >> DL_SCALE); + left = (pi_of(dl_se)->dl_deadline >> DL_SCALE) * (dl_se->runtime >> DL_SCALE); right = ((dl_se->deadline - t) >> DL_SCALE) * - (pi_se->dl_runtime >> DL_SCALE); + (pi_of(dl_se)->dl_runtime >> DL_SCALE); return dl_time_before(right, left); } @@ -922,24 +942,23 @@ static inline bool dl_is_implicit(struct sched_dl_entity *dl_se) * Please refer to the comments update_dl_revised_wakeup() function to find * more about the Revised CBS rule. */ -static void update_dl_entity(struct sched_dl_entity *dl_se, - struct sched_dl_entity *pi_se) +static void update_dl_entity(struct sched_dl_entity *dl_se) { struct dl_rq *dl_rq = dl_rq_of_se(dl_se); struct rq *rq = rq_of_dl_rq(dl_rq); if (dl_time_before(dl_se->deadline, rq_clock(rq)) || - dl_entity_overflow(dl_se, pi_se, rq_clock(rq))) { + dl_entity_overflow(dl_se, rq_clock(rq))) { if (unlikely(!dl_is_implicit(dl_se) && !dl_time_before(dl_se->deadline, rq_clock(rq)) && - !dl_se->dl_boosted)){ + !is_dl_boosted(dl_se))) { update_dl_revised_wakeup(dl_se, rq); return; } - dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline; - dl_se->runtime = pi_se->dl_runtime; + dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline; + dl_se->runtime = pi_of(dl_se)->dl_runtime; } } @@ -1038,7 +1057,7 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer) * The task might have been boosted by someone else and might be in the * boosting/deboosting path, its not throttled. */ - if (dl_se->dl_boosted) + if (is_dl_boosted(dl_se)) goto unlock; /* @@ -1066,7 +1085,7 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer) * but do not enqueue -- wait for our wakeup to do that. */ if (!task_on_rq_queued(p)) { - replenish_dl_entity(dl_se, dl_se); + replenish_dl_entity(dl_se); goto unlock; } @@ -1156,7 +1175,7 @@ static inline void dl_check_constrained_dl(struct sched_dl_entity *dl_se) if (dl_time_before(dl_se->deadline, rq_clock(rq)) && dl_time_before(rq_clock(rq), dl_next_period(dl_se))) { - if (unlikely(dl_se->dl_boosted || !start_dl_timer(p))) + if (unlikely(is_dl_boosted(dl_se) || !start_dl_timer(p))) return; dl_se->dl_throttled = 1; if (dl_se->runtime > 0) @@ -1287,7 +1306,7 @@ throttle: dl_se->dl_overrun = 1; __dequeue_task_dl(rq, curr, 0); - if (unlikely(dl_se->dl_boosted || !start_dl_timer(curr))) + if (unlikely(is_dl_boosted(dl_se) || !start_dl_timer(curr))) enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH); if (!is_leftmost(curr, &rq->dl)) @@ -1481,8 +1500,7 @@ static void __dequeue_dl_entity(struct sched_dl_entity *dl_se) } static void -enqueue_dl_entity(struct sched_dl_entity *dl_se, - struct sched_dl_entity *pi_se, int flags) +enqueue_dl_entity(struct sched_dl_entity *dl_se, int flags) { BUG_ON(on_dl_rq(dl_se)); @@ -1493,9 +1511,9 @@ enqueue_dl_entity(struct sched_dl_entity *dl_se, */ if (flags & ENQUEUE_WAKEUP) { task_contending(dl_se, flags); - update_dl_entity(dl_se, pi_se); + update_dl_entity(dl_se); } else if (flags & ENQUEUE_REPLENISH) { - replenish_dl_entity(dl_se, pi_se); + replenish_dl_entity(dl_se); } else if ((flags & ENQUEUE_RESTORE) && dl_time_before(dl_se->deadline, rq_clock(rq_of_dl_rq(dl_rq_of_se(dl_se))))) { @@ -1512,19 +1530,7 @@ static void dequeue_dl_entity(struct sched_dl_entity *dl_se) static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags) { - struct task_struct *pi_task = rt_mutex_get_top_task(p); - struct sched_dl_entity *pi_se = &p->dl; - - /* - * Use the scheduling parameters of the top pi-waiter task if: - * - we have a top pi-waiter which is a SCHED_DEADLINE task AND - * - our dl_boosted is set (i.e. the pi-waiter's (absolute) deadline is - * smaller than our deadline OR we are a !SCHED_DEADLINE task getting - * boosted due to a SCHED_DEADLINE pi-waiter). - * Otherwise we keep our runtime and deadline. - */ - if (pi_task && dl_prio(pi_task->normal_prio) && p->dl.dl_boosted) { - pi_se = &pi_task->dl; + if (is_dl_boosted(&p->dl)) { /* * Because of delays in the detection of the overrun of a * thread's runtime, it might be the case that a thread @@ -1557,7 +1563,7 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags) * the throttle. */ p->dl.dl_throttled = 0; - BUG_ON(!p->dl.dl_boosted || flags != ENQUEUE_REPLENISH); + BUG_ON(!is_dl_boosted(&p->dl) || flags != ENQUEUE_REPLENISH); return; } @@ -1594,7 +1600,7 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags) return; } - enqueue_dl_entity(&p->dl, pi_se, flags); + enqueue_dl_entity(&p->dl, flags); if (!task_current(rq, p) && p->nr_cpus_allowed > 1) enqueue_pushable_dl_task(rq, p); @@ -2787,11 +2793,14 @@ void __dl_clear_params(struct task_struct *p) dl_se->dl_bw = 0; dl_se->dl_density = 0; - dl_se->dl_boosted = 0; dl_se->dl_throttled = 0; dl_se->dl_yielded = 0; dl_se->dl_non_contending = 0; dl_se->dl_overrun = 0; + +#ifdef CONFIG_RT_MUTEXES + dl_se->pi_se = dl_se; +#endif } bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr) From 43be4388e94b915799a24f0eaf664bf95b85231f Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Fri, 13 Nov 2020 19:05:03 +0800 Subject: [PATCH 602/710] lockdep: Put graph lock/unlock under lock_recursion protection A warning was hit when running xfstests/generic/068 in a Hyper-V guest: [...] ------------[ cut here ]------------ [...] DEBUG_LOCKS_WARN_ON(lockdep_hardirqs_enabled()) [...] WARNING: CPU: 2 PID: 1350 at kernel/locking/lockdep.c:5280 check_flags.part.0+0x165/0x170 [...] ... [...] Workqueue: events pwq_unbound_release_workfn [...] RIP: 0010:check_flags.part.0+0x165/0x170 [...] ... [...] Call Trace: [...] lock_is_held_type+0x72/0x150 [...] ? lock_acquire+0x16e/0x4a0 [...] rcu_read_lock_sched_held+0x3f/0x80 [...] __send_ipi_one+0x14d/0x1b0 [...] hv_send_ipi+0x12/0x30 [...] __pv_queued_spin_unlock_slowpath+0xd1/0x110 [...] __raw_callee_save___pv_queued_spin_unlock_slowpath+0x11/0x20 [...] .slowpath+0x9/0xe [...] lockdep_unregister_key+0x128/0x180 [...] pwq_unbound_release_workfn+0xbb/0xf0 [...] process_one_work+0x227/0x5c0 [...] worker_thread+0x55/0x3c0 [...] ? process_one_work+0x5c0/0x5c0 [...] kthread+0x153/0x170 [...] ? __kthread_bind_mask+0x60/0x60 [...] ret_from_fork+0x1f/0x30 The cause of the problem is we have call chain lockdep_unregister_key() -> lockdep_unlock() -> arch_spin_unlock() -> __pv_queued_spin_unlock_slowpath() -> pv_kick() -> __send_ipi_one() -> trace_hyperv_send_ipi_one(). Although this particular warning is triggered because Hyper-V has a trace point in ipi sending, but in general arch_spin_unlock() may call another function having a trace point in it, so put the arch_spin_lock() and arch_spin_unlock() after lock_recursion protection to fix this problem and avoid similiar problems. Signed-off-by: Boqun Feng Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20201113110512.1056501-1-boqun.feng@gmail.com --- kernel/locking/lockdep.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index d9fb9e19d2ed..c1418b47f625 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -108,19 +108,21 @@ static inline void lockdep_lock(void) { DEBUG_LOCKS_WARN_ON(!irqs_disabled()); + __this_cpu_inc(lockdep_recursion); arch_spin_lock(&__lock); __owner = current; - __this_cpu_inc(lockdep_recursion); } static inline void lockdep_unlock(void) { + DEBUG_LOCKS_WARN_ON(!irqs_disabled()); + if (debug_locks && DEBUG_LOCKS_WARN_ON(__owner != current)) return; - __this_cpu_dec(lockdep_recursion); __owner = NULL; arch_spin_unlock(&__lock); + __this_cpu_dec(lockdep_recursion); } static inline bool lockdep_assert_locked(void) From ebd19fc372e3e78bf165f230e7c084e304441c08 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Fri, 13 Nov 2020 10:31:26 -0800 Subject: [PATCH 603/710] perf/x86: fix sysfs type mismatches This change switches rapl to use PMU_FORMAT_ATTR, and fixes two other macros to use device_attribute instead of kobj_attribute to avoid callback type mismatches that trip indirect call checking with Clang's Control-Flow Integrity (CFI). Reported-by: Sedat Dilek Signed-off-by: Sami Tolvanen Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Kees Cook Link: https://lkml.kernel.org/r/20201113183126.1239404-1-samitolvanen@google.com --- arch/x86/events/intel/cstate.c | 6 +++--- arch/x86/events/intel/uncore.c | 4 ++-- arch/x86/events/intel/uncore.h | 12 ++++++------ arch/x86/events/rapl.c | 14 +------------- 4 files changed, 12 insertions(+), 24 deletions(-) diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index 442e1ed4acd4..4eb7ee5fed72 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -107,14 +107,14 @@ MODULE_LICENSE("GPL"); #define DEFINE_CSTATE_FORMAT_ATTR(_var, _name, _format) \ -static ssize_t __cstate_##_var##_show(struct kobject *kobj, \ - struct kobj_attribute *attr, \ +static ssize_t __cstate_##_var##_show(struct device *dev, \ + struct device_attribute *attr, \ char *page) \ { \ BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \ return sprintf(page, _format "\n"); \ } \ -static struct kobj_attribute format_attr_##_var = \ +static struct device_attribute format_attr_##_var = \ __ATTR(_name, 0444, __cstate_##_var##_show, NULL) static ssize_t cstate_get_attr_cpumask(struct device *dev, diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 86d012b3e0b4..80d52cbe2fde 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -94,8 +94,8 @@ end: return map; } -ssize_t uncore_event_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) +ssize_t uncore_event_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct uncore_event_desc *event = container_of(attr, struct uncore_event_desc, attr); diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index 83d2a7d490e0..9efea154349d 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -157,7 +157,7 @@ struct intel_uncore_box { #define UNCORE_BOX_FLAG_CFL8_CBOX_MSR_OFFS 2 struct uncore_event_desc { - struct kobj_attribute attr; + struct device_attribute attr; const char *config; }; @@ -179,8 +179,8 @@ struct pci2phy_map { struct pci2phy_map *__find_pci2phy_map(int segment); int uncore_pcibus_to_physid(struct pci_bus *bus); -ssize_t uncore_event_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf); +ssize_t uncore_event_show(struct device *dev, + struct device_attribute *attr, char *buf); static inline struct intel_uncore_pmu *dev_to_uncore_pmu(struct device *dev) { @@ -201,14 +201,14 @@ extern int __uncore_max_dies; } #define DEFINE_UNCORE_FORMAT_ATTR(_var, _name, _format) \ -static ssize_t __uncore_##_var##_show(struct kobject *kobj, \ - struct kobj_attribute *attr, \ +static ssize_t __uncore_##_var##_show(struct device *dev, \ + struct device_attribute *attr, \ char *page) \ { \ BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \ return sprintf(page, _format "\n"); \ } \ -static struct kobj_attribute format_attr_##_var = \ +static struct device_attribute format_attr_##_var = \ __ATTR(_name, 0444, __uncore_##_var##_show, NULL) static inline bool uncore_pmc_fixed(int idx) diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c index 7c0120e2e957..7dbbeaacd995 100644 --- a/arch/x86/events/rapl.c +++ b/arch/x86/events/rapl.c @@ -93,18 +93,6 @@ static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = { * any other bit is reserved */ #define RAPL_EVENT_MASK 0xFFULL - -#define DEFINE_RAPL_FORMAT_ATTR(_var, _name, _format) \ -static ssize_t __rapl_##_var##_show(struct kobject *kobj, \ - struct kobj_attribute *attr, \ - char *page) \ -{ \ - BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \ - return sprintf(page, _format "\n"); \ -} \ -static struct kobj_attribute format_attr_##_var = \ - __ATTR(_name, 0444, __rapl_##_var##_show, NULL) - #define RAPL_CNTR_WIDTH 32 #define RAPL_EVENT_ATTR_STR(_name, v, str) \ @@ -441,7 +429,7 @@ static struct attribute_group rapl_pmu_events_group = { .attrs = attrs_empty, }; -DEFINE_RAPL_FORMAT_ATTR(event, event, "config:0-7"); +PMU_FORMAT_ATTR(event, "config:0-7"); static struct attribute *rapl_formats_attr[] = { &format_attr_event.attr, NULL, From 54a2a3898f469a915510038fe84ef4f083131d3e Mon Sep 17 00:00:00 2001 From: Joakim Tjernlund Date: Tue, 17 Nov 2020 13:28:03 +0100 Subject: [PATCH 604/710] ALSA: usb-audio: Add delay quirk for all Logitech USB devices Found one more Logitech device, BCC950 ConferenceCam, which needs the same delay here. This makes 3 out of 3 devices I have tried. Therefore, add a delay for all Logitech devices as it does not hurt. Signed-off-by: Joakim Tjernlund Cc: # 4.19.y, 5.4.y Link: https://lore.kernel.org/r/20201117122803.24310-1-joakim.tjernlund@infinera.com Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index c989ad8052ae..c50be2f75f70 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1672,13 +1672,13 @@ void snd_usb_ctl_msg_quirk(struct usb_device *dev, unsigned int pipe, && (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS) msleep(20); - /* Zoom R16/24, Logitech H650e/H570e, Jabra 550a, Kingston HyperX - * needs a tiny delay here, otherwise requests like get/set - * frequency return as failed despite actually succeeding. + /* Zoom R16/24, many Logitech(at least H650e/H570e/BCC950), + * Jabra 550a, Kingston HyperX needs a tiny delay here, + * otherwise requests like get/set frequency return + * as failed despite actually succeeding. */ if ((chip->usb_id == USB_ID(0x1686, 0x00dd) || - chip->usb_id == USB_ID(0x046d, 0x0a46) || - chip->usb_id == USB_ID(0x046d, 0x0a56) || + USB_ID_VENDOR(chip->usb_id) == 0x046d || /* Logitech */ chip->usb_id == USB_ID(0x0b0e, 0x0349) || chip->usb_id == USB_ID(0x0951, 0x16ad)) && (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS) From dc293f2106903ab9c24e9cea18c276e32c394c33 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Tue, 1 Sep 2020 00:09:37 +0300 Subject: [PATCH 605/710] xtensa: uaccess: Add missing __user to strncpy_from_user() prototype When adding __user annotations in commit 2adf5352a34a, the strncpy_from_user() function declaration for the CONFIG_GENERIC_STRNCPY_FROM_USER case was missed. Fix it. Reported-by: kernel test robot Signed-off-by: Laurent Pinchart Message-Id: <20200831210937.17938-1-laurent.pinchart@ideasonboard.com> Signed-off-by: Max Filippov --- arch/xtensa/include/asm/uaccess.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/xtensa/include/asm/uaccess.h b/arch/xtensa/include/asm/uaccess.h index b9758119feca..5c9fb8005aa8 100644 --- a/arch/xtensa/include/asm/uaccess.h +++ b/arch/xtensa/include/asm/uaccess.h @@ -302,7 +302,7 @@ strncpy_from_user(char *dst, const char __user *src, long count) return -EFAULT; } #else -long strncpy_from_user(char *dst, const char *src, long count); +long strncpy_from_user(char *dst, const char __user *src, long count); #endif /* From a41b0ad07bfa081584218431cb0cd7e7ecc71210 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Tue, 17 Nov 2020 12:40:54 +0300 Subject: [PATCH 606/710] spi: dw: Set transfer handler before unmasking the IRQs It turns out the IRQs most like can be unmasked before the controller is enabled with no problematic consequences. The manual doesn't explicitly state that, but the examples perform the controller initialization procedure in that order. So the commit da8f58909e7e ("spi: dw: Unmask IRQs after enabling the chip") hasn't been that required as I thought. But anyway setting the IRQs up after the chip enabling still worth adding since it has simplified the code a bit. The problem is that it has introduced a potential bug. The transfer handler pointer is now initialized after the IRQs are enabled. That may and eventually will cause an invalid or uninitialized callback invocation. Fix that just by performing the callback initialization before the IRQ unmask procedure. Fixes: da8f58909e7e ("spi: dw: Unmask IRQs after enabling the chip") Signed-off-by: Serge Semin Link: https://lore.kernel.org/r/20201117094054.4696-1-Sergey.Semin@baikalelectronics.ru Signed-off-by: Mark Brown --- drivers/spi/spi-dw-core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-dw-core.c b/drivers/spi/spi-dw-core.c index 2e50cc0a9291..0b2236ade412 100644 --- a/drivers/spi/spi-dw-core.c +++ b/drivers/spi/spi-dw-core.c @@ -357,11 +357,11 @@ static void dw_spi_irq_setup(struct dw_spi *dws) dw_writel(dws, DW_SPI_TXFTLR, level); dw_writel(dws, DW_SPI_RXFTLR, level - 1); + dws->transfer_handler = dw_spi_transfer_handler; + imask = SPI_INT_TXEI | SPI_INT_TXOI | SPI_INT_RXUI | SPI_INT_RXOI | SPI_INT_RXFI; spi_umask_intr(dws, imask); - - dws->transfer_handler = dw_spi_transfer_handler; } /* From 04a9cd51d3f3308a98cbc6adc07acb12fbade011 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Mon, 16 Nov 2020 09:23:10 +0100 Subject: [PATCH 607/710] spi: npcm-fiu: Don't leak SPI master in probe error path If the calls to of_match_device(), of_alias_get_id(), devm_ioremap_resource(), devm_regmap_init_mmio() or devm_clk_get() fail on probe of the NPCM FIU SPI driver, the spi_controller struct is erroneously not freed. Fix by switching over to the new devm_spi_alloc_master() helper. Fixes: ace55c411b11 ("spi: npcm-fiu: add NPCM FIU controller driver") Signed-off-by: Lukas Wunner Cc: # v5.4+: 5e844cc37a5c: spi: Introduce device-managed SPI controller allocation Cc: # v5.4+ Cc: Tomer Maimon Link: https://lore.kernel.org/r/a420c23a363a3bc9aa684c6e790c32a8af106d17.1605512876.git.lukas@wunner.de Signed-off-by: Mark Brown --- drivers/spi/spi-npcm-fiu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-npcm-fiu.c b/drivers/spi/spi-npcm-fiu.c index 341f7cffeaac..1cb9329de945 100644 --- a/drivers/spi/spi-npcm-fiu.c +++ b/drivers/spi/spi-npcm-fiu.c @@ -679,7 +679,7 @@ static int npcm_fiu_probe(struct platform_device *pdev) struct resource *res; int id; - ctrl = spi_alloc_master(dev, sizeof(*fiu)); + ctrl = devm_spi_alloc_master(dev, sizeof(*fiu)); if (!ctrl) return -ENOMEM; From 6654b57866b98230a270953dd34f67de17ab1708 Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Mon, 16 Nov 2020 09:09:29 +0800 Subject: [PATCH 608/710] drm/sun4i: dw-hdmi: fix error return code in sun8i_dw_hdmi_bind() Fix to return a negative error code from the error handling case instead of 0 in function sun8i_dw_hdmi_bind(). Fixes: b7c7436a5ff0 ("drm/sun4i: Implement A83T HDMI driver") Reported-by: Hulk Robot Signed-off-by: Xiongfeng Wang Reviewed-by: Jernej Skrabec Signed-off-by: Jernej Skrabec Link: https://patchwork.freedesktop.org/patch/msgid/1605488969-5211-1-git-send-email-wangxiongfeng2@huawei.com --- drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c b/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c index d4c08043dd81..92add2cef2e7 100644 --- a/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c +++ b/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c @@ -208,6 +208,7 @@ static int sun8i_dw_hdmi_bind(struct device *dev, struct device *master, phy_node = of_parse_phandle(dev->of_node, "phys", 0); if (!phy_node) { dev_err(dev, "Can't found PHY phandle\n"); + ret = -EINVAL; goto err_disable_clk_tmds; } From cb47d16ea21045c66eebbf5ed792e74a8537e27a Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Mon, 16 Nov 2020 21:07:13 +0800 Subject: [PATCH 609/710] qed: fix error return code in qed_iwarp_ll2_start() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix to return a negative error code from the error handling case instead of 0, as done elsewhere in this function. Fixes: 469981b17a4f ("qed: Add unaligned and packed packet processing") Fixes: fcb39f6c10b2 ("qed: Add mpa buffer descriptors for storing and processing mpa fpdus") Fixes: 1e28eaad07ea ("qed: Add iWARP support for fpdu spanned over more than two tcp packets") Reported-by: Hulk Robot Signed-off-by: Zhang Changzhong Acked-by: Michal Kalderon  Link: https://lore.kernel.org/r/1605532033-27373-1-git-send-email-zhangchangzhong@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qlogic/qed/qed_iwarp.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c index 512cbef24097..a99861124630 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c @@ -2754,14 +2754,18 @@ qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn, iwarp_info->partial_fpdus = kcalloc((u16)p_hwfn->p_rdma_info->num_qps, sizeof(*iwarp_info->partial_fpdus), GFP_KERNEL); - if (!iwarp_info->partial_fpdus) + if (!iwarp_info->partial_fpdus) { + rc = -ENOMEM; goto err; + } iwarp_info->max_num_partial_fpdus = (u16)p_hwfn->p_rdma_info->num_qps; iwarp_info->mpa_intermediate_buf = kzalloc(buff_size, GFP_KERNEL); - if (!iwarp_info->mpa_intermediate_buf) + if (!iwarp_info->mpa_intermediate_buf) { + rc = -ENOMEM; goto err; + } /* The mpa_bufs array serves for pending RX packets received on the * mpa ll2 that don't have place on the tx ring and require later @@ -2771,8 +2775,10 @@ qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn, iwarp_info->mpa_bufs = kcalloc(data.input.rx_num_desc, sizeof(*iwarp_info->mpa_bufs), GFP_KERNEL); - if (!iwarp_info->mpa_bufs) + if (!iwarp_info->mpa_bufs) { + rc = -ENOMEM; goto err; + } INIT_LIST_HEAD(&iwarp_info->mpa_buf_pending_list); INIT_LIST_HEAD(&iwarp_info->mpa_buf_list); From 7b027c249da54f492699c43e26cba486cfd48035 Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Tue, 17 Nov 2020 11:02:11 +0800 Subject: [PATCH 610/710] net: b44: fix error return code in b44_init_one() Fix to return a negative error code from the error handling case instead of 0, as done elsewhere in this function. Fixes: 39a6f4bce6b4 ("b44: replace the ssb_dma API with the generic DMA API") Reported-by: Hulk Robot Signed-off-by: Zhang Changzhong Reviewed-by: Michael Chan Link: https://lore.kernel.org/r/1605582131-36735-1-git-send-email-zhangchangzhong@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/b44.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index 74c1778d841e..b455b60a5434 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c @@ -2383,7 +2383,8 @@ static int b44_init_one(struct ssb_device *sdev, goto err_out_free_dev; } - if (dma_set_mask_and_coherent(sdev->dma_dev, DMA_BIT_MASK(30))) { + err = dma_set_mask_and_coherent(sdev->dma_dev, DMA_BIT_MASK(30)); + if (err) { dev_err(sdev->dev, "Required 30BIT DMA mask unsupported by the system\n"); goto err_out_powerdown; From 3d5179458d22dc0b4fdc724e4bed4231a655112a Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Tue, 17 Nov 2020 13:14:48 +1030 Subject: [PATCH 611/710] net: ftgmac100: Fix crash when removing driver When removing the driver we would hit BUG_ON(!list_empty(&dev->ptype_specific)) in net/core/dev.c due to still having the NC-SI packet handler registered. # echo 1e660000.ethernet > /sys/bus/platform/drivers/ftgmac100/unbind ------------[ cut here ]------------ kernel BUG at net/core/dev.c:10254! Internal error: Oops - BUG: 0 [#1] SMP ARM CPU: 0 PID: 115 Comm: sh Not tainted 5.10.0-rc3-next-20201111-00007-g02e0365710c4 #46 Hardware name: Generic DT based system PC is at netdev_run_todo+0x314/0x394 LR is at cpumask_next+0x20/0x24 pc : [<806f5830>] lr : [<80863cb0>] psr: 80000153 sp : 855bbd58 ip : 00000001 fp : 855bbdac r10: 80c03d00 r9 : 80c06228 r8 : 81158c54 r7 : 00000000 r6 : 80c05dec r5 : 80c05d18 r4 : 813b9280 r3 : 813b9054 r2 : 8122c470 r1 : 00000002 r0 : 00000002 Flags: Nzcv IRQs on FIQs off Mode SVC_32 ISA ARM Segment none Control: 00c5387d Table: 85514008 DAC: 00000051 Process sh (pid: 115, stack limit = 0x7cb5703d) ... Backtrace: [<806f551c>] (netdev_run_todo) from [<80707eec>] (rtnl_unlock+0x18/0x1c) r10:00000051 r9:854ed710 r8:81158c54 r7:80c76bb0 r6:81158c10 r5:8115b410 r4:813b9000 [<80707ed4>] (rtnl_unlock) from [<806f5db8>] (unregister_netdev+0x2c/0x30) [<806f5d8c>] (unregister_netdev) from [<805a8180>] (ftgmac100_remove+0x20/0xa8) r5:8115b410 r4:813b9000 [<805a8160>] (ftgmac100_remove) from [<805355e4>] (platform_drv_remove+0x34/0x4c) Fixes: bd466c3fb5a4 ("net/faraday: Support NCSI mode") Signed-off-by: Joel Stanley Link: https://lore.kernel.org/r/20201117024448.1170761-1-joel@jms.id.au Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/faraday/ftgmac100.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 00024dd41147..80fb1f537bb3 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -1907,6 +1907,8 @@ err_register_netdev: clk_disable_unprepare(priv->rclk); clk_disable_unprepare(priv->clk); err_ncsi_dev: + if (priv->ndev) + ncsi_unregister_dev(priv->ndev); ftgmac100_destroy_mdio(netdev); err_setup_mdio: iounmap(priv->base); @@ -1926,6 +1928,8 @@ static int ftgmac100_remove(struct platform_device *pdev) netdev = platform_get_drvdata(pdev); priv = netdev_priv(netdev); + if (priv->ndev) + ncsi_unregister_dev(priv->ndev); unregister_netdev(netdev); clk_disable_unprepare(priv->rclk); From 1b9e2a8c99a5c021041bfb2d512dc3ed92a94ffd Mon Sep 17 00:00:00 2001 From: Ryan Sharpelletti Date: Mon, 16 Nov 2020 17:44:13 +0000 Subject: [PATCH 612/710] tcp: only postpone PROBE_RTT if RTT is < current min_rtt estimate During loss recovery, retransmitted packets are forced to use TCP timestamps to calculate the RTT samples, which have a millisecond granularity. BBR is designed using a microsecond granularity. As a result, multiple RTT samples could be truncated to the same RTT value during loss recovery. This is problematic, as BBR will not enter PROBE_RTT if the RTT sample is <= the current min_rtt sample, meaning that if there are persistent losses, PROBE_RTT will constantly be pushed off and potentially never re-entered. This patch makes sure that BBR enters PROBE_RTT by checking if RTT sample is < the current min_rtt sample, rather than <=. The Netflix transport/TCP team discovered this bug in the Linux TCP BBR code during lab tests. Fixes: 0f8782ea1497 ("tcp_bbr: add BBR congestion control") Signed-off-by: Ryan Sharpelletti Signed-off-by: Neal Cardwell Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: Yuchung Cheng Link: https://lore.kernel.org/r/20201116174412.1433277-1-sharpelletti.kdev@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_bbr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 6c4d79baff26..6ea3dc2e4219 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -945,7 +945,7 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs) filter_expired = after(tcp_jiffies32, bbr->min_rtt_stamp + bbr_min_rtt_win_sec * HZ); if (rs->rtt_us >= 0 && - (rs->rtt_us <= bbr->min_rtt_us || + (rs->rtt_us < bbr->min_rtt_us || (filter_expired && !rs->is_ack_delayed))) { bbr->min_rtt_us = rs->rtt_us; bbr->min_rtt_stamp = tcp_jiffies32; From ea63609857321c38fd4ad096388b413b66001c6c Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Thu, 8 Oct 2020 12:34:10 +0300 Subject: [PATCH 613/710] net/mlx5e: Fix refcount leak on kTLS RX resync On resync, the driver calls inet_lookup_established (__inet6_lookup_established) that increases sk_refcnt of the socket. To decrease it, the driver set skb->destructor to sock_edemux. However, it didn't work well, because the TCP stack also sets this destructor for early demux, and the refcount gets decreased only once, while increased two times (in mlx5e and in the TCP stack). It leads to a socket leak, a TLS context leak, which in the end leads to calling tls_dev_del twice: on socket close and on driver unload, which in turn leads to a crash. This commit fixes the refcount leak by calling sock_gen_put right away after using the socket, thus fixing all the subsequent issues. Fixes: 0419d8c9d8f8 ("net/mlx5e: kTLS, Add kTLS RX resync support") Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c index 7f6221b8b1f7..6a1d82503ef8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c @@ -476,19 +476,22 @@ static void resync_update_sn(struct mlx5e_rq *rq, struct sk_buff *skb) depth += sizeof(struct tcphdr); - if (unlikely(!sk || sk->sk_state == TCP_TIME_WAIT)) + if (unlikely(!sk)) return; + if (unlikely(sk->sk_state == TCP_TIME_WAIT)) + goto unref; + if (unlikely(!resync_queue_get_psv(sk))) - return; - - skb->sk = sk; - skb->destructor = sock_edemux; + goto unref; seq = th->seq; datalen = skb->len - depth; tls_offload_rx_resync_async_request_start(sk, seq, datalen); rq->stats->tls_resync_req_start++; + +unref: + sock_gen_put(sk); } void mlx5e_ktls_rx_resync(struct net_device *netdev, struct sock *sk, From 5cfb540ef27b5b763a3b181d142847ef0411728e Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Thu, 22 Oct 2020 11:22:56 -0500 Subject: [PATCH 614/710] net/mlx5e: Set IPsec WAs only in IP's non checksum partial case. The IP's checksum partial still requires L4 csum flag on Ethernet WQE. Make the IPsec WAs only for the IP's non checksum partial case (for example icmd packet) Fixes: 5be019040cb7 ("net/mlx5e: IPsec: Add Connect-X IPsec Tx data path offload") Signed-off-by: Huy Nguyen Reviewed-by: Raed Salem Reviewed-by: Alaa Hleihel Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 82b4419af9d4..6dd3ea3cbbed 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -144,7 +144,9 @@ static inline void mlx5e_insert_vlan(void *start, struct sk_buff *skb, u16 ihs) memcpy(&vhdr->h_vlan_encapsulated_proto, skb->data + cpy1_sz, cpy2_sz); } -/* RM 2311217: no L4 inner checksum for IPsec tunnel type packet */ +/* If packet is not IP's CHECKSUM_PARTIAL (e.g. icmd packet), + * need to set L3 checksum flag for IPsec + */ static void ipsec_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg) @@ -154,7 +156,6 @@ ipsec_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb, eseg->cs_flags |= MLX5_ETH_WQE_L3_INNER_CSUM; sq->stats->csum_partial_inner++; } else { - eseg->cs_flags |= MLX5_ETH_WQE_L4_CSUM; sq->stats->csum_partial++; } } @@ -162,11 +163,6 @@ ipsec_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb, static inline void mlx5e_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg) { - if (unlikely(eseg->flow_table_metadata & cpu_to_be32(MLX5_ETH_WQE_FT_META_IPSEC))) { - ipsec_txwqe_build_eseg_csum(sq, skb, eseg); - return; - } - if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM; if (skb->encapsulation) { @@ -177,6 +173,9 @@ mlx5e_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct eseg->cs_flags |= MLX5_ETH_WQE_L4_CSUM; sq->stats->csum_partial++; } + } else if (unlikely(eseg->flow_table_metadata & cpu_to_be32(MLX5_ETH_WQE_FT_META_IPSEC))) { + ipsec_txwqe_build_eseg_csum(sq, skb, eseg); + } else sq->stats->csum_none++; } From 6248ce991f8eed4f2f0fdec694f5749156105629 Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Tue, 3 Nov 2020 12:56:18 -0600 Subject: [PATCH 615/710] net/mlx5e: Fix IPsec packet drop by mlx5e_tc_update_skb Both TC and IPsec crypto offload use metadata_regB to store private information. Since TC does not use bit 31 of regB, IPsec will use bit 31 as the IPsec packet marker. The IPsec's regB usage is changed to: Bit31: IPsec marker Bit30-24: IPsec syndrome Bit23-0: IPsec obj id Fixes: b2ac7541e377 ("net/mlx5e: IPsec: Add Connect-X IPsec Rx data path offload") Signed-off-by: Huy Nguyen Reviewed-by: Raed Salem Reviewed-by: Ariel Levkovich Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/en_accel/ipsec_fs.c | 14 +++++++------- .../mellanox/mlx5/core/en_accel/ipsec_rxtx.c | 3 +-- .../mellanox/mlx5/core/en_accel/ipsec_rxtx.h | 9 +++++---- drivers/net/ethernet/mellanox/mlx5/core/en_tc.h | 3 +++ 4 files changed, 16 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c index 0e45590662a8..381a9c8c9da9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c @@ -64,13 +64,13 @@ static int rx_err_add_rule(struct mlx5e_priv *priv, if (!spec) return -ENOMEM; - /* Action to copy 7 bit ipsec_syndrome to regB[0:6] */ + /* Action to copy 7 bit ipsec_syndrome to regB[24:30] */ MLX5_SET(copy_action_in, action, action_type, MLX5_ACTION_TYPE_COPY); MLX5_SET(copy_action_in, action, src_field, MLX5_ACTION_IN_FIELD_IPSEC_SYNDROME); MLX5_SET(copy_action_in, action, src_offset, 0); MLX5_SET(copy_action_in, action, length, 7); MLX5_SET(copy_action_in, action, dst_field, MLX5_ACTION_IN_FIELD_METADATA_REG_B); - MLX5_SET(copy_action_in, action, dst_offset, 0); + MLX5_SET(copy_action_in, action, dst_offset, 24); modify_hdr = mlx5_modify_header_alloc(mdev, MLX5_FLOW_NAMESPACE_KERNEL, 1, action); @@ -488,13 +488,13 @@ static int rx_add_rule(struct mlx5e_priv *priv, setup_fte_common(attrs, ipsec_obj_id, spec, &flow_act); - /* Set 1 bit ipsec marker */ - /* Set 24 bit ipsec_obj_id */ + /* Set bit[31] ipsec marker */ + /* Set bit[23-0] ipsec_obj_id */ MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET); MLX5_SET(set_action_in, action, field, MLX5_ACTION_IN_FIELD_METADATA_REG_B); - MLX5_SET(set_action_in, action, data, (ipsec_obj_id << 1) | 0x1); - MLX5_SET(set_action_in, action, offset, 7); - MLX5_SET(set_action_in, action, length, 25); + MLX5_SET(set_action_in, action, data, (ipsec_obj_id | BIT(31))); + MLX5_SET(set_action_in, action, offset, 0); + MLX5_SET(set_action_in, action, length, 32); modify_hdr = mlx5_modify_header_alloc(priv->mdev, MLX5_FLOW_NAMESPACE_KERNEL, 1, action); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c index 11e31a3db2be..a9b45606dbdb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c @@ -453,7 +453,6 @@ void mlx5e_ipsec_offload_handle_rx_skb(struct net_device *netdev, struct mlx5_cqe64 *cqe) { u32 ipsec_meta_data = be32_to_cpu(cqe->ft_metadata); - u8 ipsec_syndrome = ipsec_meta_data & 0xFF; struct mlx5e_priv *priv; struct xfrm_offload *xo; struct xfrm_state *xs; @@ -481,7 +480,7 @@ void mlx5e_ipsec_offload_handle_rx_skb(struct net_device *netdev, xo = xfrm_offload(skb); xo->flags = CRYPTO_DONE; - switch (ipsec_syndrome & MLX5_IPSEC_METADATA_SYNDROM_MASK) { + switch (MLX5_IPSEC_METADATA_SYNDROM(ipsec_meta_data)) { case MLX5E_IPSEC_OFFLOAD_RX_SYNDROME_DECRYPTED: xo->status = CRYPTO_SUCCESS; if (WARN_ON_ONCE(priv->ipsec->no_trailer)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h index 056dacb612b0..9df9b9a8e09b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h @@ -39,9 +39,10 @@ #include "en.h" #include "en/txrx.h" -#define MLX5_IPSEC_METADATA_MARKER_MASK (0x80) -#define MLX5_IPSEC_METADATA_SYNDROM_MASK (0x7F) -#define MLX5_IPSEC_METADATA_HANDLE(metadata) (((metadata) >> 8) & 0xFF) +/* Bit31: IPsec marker, Bit30-24: IPsec syndrome, Bit23-0: IPsec obj id */ +#define MLX5_IPSEC_METADATA_MARKER(metadata) (((metadata) >> 31) & 0x1) +#define MLX5_IPSEC_METADATA_SYNDROM(metadata) (((metadata) >> 24) & GENMASK(6, 0)) +#define MLX5_IPSEC_METADATA_HANDLE(metadata) ((metadata) & GENMASK(23, 0)) struct mlx5e_accel_tx_ipsec_state { struct xfrm_offload *xo; @@ -78,7 +79,7 @@ static inline unsigned int mlx5e_ipsec_tx_ids_len(struct mlx5e_accel_tx_ipsec_st static inline bool mlx5_ipsec_is_rx_flow(struct mlx5_cqe64 *cqe) { - return !!(MLX5_IPSEC_METADATA_MARKER_MASK & be32_to_cpu(cqe->ft_metadata)); + return MLX5_IPSEC_METADATA_MARKER(be32_to_cpu(cqe->ft_metadata)); } static inline bool mlx5e_ipsec_is_tx_flow(struct mlx5e_accel_tx_ipsec_state *ipsec_st) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h index 3b979008143d..4a2ce241522e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@ -283,6 +283,9 @@ static inline bool mlx5e_cqe_regb_chain(struct mlx5_cqe64 *cqe) reg_b = be32_to_cpu(cqe->ft_metadata); + if (reg_b >> (MLX5E_TC_TABLE_CHAIN_TAG_BITS + ZONE_RESTORE_BITS)) + return false; + chain = reg_b & MLX5E_TC_TABLE_CHAIN_TAG_MASK; if (chain) return true; From 219b3267ca102a35092f5998921a9e6f99074af2 Mon Sep 17 00:00:00 2001 From: Maor Dickman Date: Wed, 4 Nov 2020 14:10:30 +0200 Subject: [PATCH 616/710] net/mlx5e: Fix check if netdev is bond slave Bond events handler uses bond_slave_get_rtnl to check if net device is bond slave. bond_slave_get_rtnl return the rcu rx_handler pointer from the netdev which exists for bond slaves but also exists for devices that are attached to linux bridge so using it as indication for bond slave is wrong. Fix by using netif_is_lag_port instead. Fixes: 7e51891a237f ("net/mlx5e: Use netdev events to set/del egress acl forward-to-vport rule") Signed-off-by: Maor Dickman Reviewed-by: Raed Salem Reviewed-by: Ariel Levkovich Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c index 3e44e4d820c5..95f2b26a3ee3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c @@ -187,7 +187,7 @@ static bool mlx5e_rep_is_lag_netdev(struct net_device *netdev) struct mlx5e_priv *priv; /* A given netdev is not a representor or not a slave of LAG configuration */ - if (!mlx5e_eswitch_rep(netdev) || !bond_slave_get_rtnl(netdev)) + if (!mlx5e_eswitch_rep(netdev) || !netif_is_lag_port(netdev)) return false; priv = netdev_priv(netdev); From 8cbcc5ef2a281f6bb10099f4572a08cb765ffbf4 Mon Sep 17 00:00:00 2001 From: Michael Guralnik Date: Mon, 2 Nov 2020 17:34:44 +0200 Subject: [PATCH 617/710] net/mlx5: Add handling of port type in rule deletion Handle destruction of rules with port destination type to enable full destruction of flow. Without this handling of TX rules the deletion of these rules fails. Dmesg of flow destruction failure: [ 203.714146] mlx5_core 0000:00:0b.0: mlx5_cmd_check:753:(pid 342): SET_FLOW_TABLE_ENTRY(0x936) op_mod(0x0) failed, status bad parameter(0x3), syndrome (0x144b7a) [ 210.547387] ------------[ cut here ]------------ [ 210.548663] refcount_t: decrement hit 0; leaking memory. [ 210.550651] WARNING: CPU: 4 PID: 342 at lib/refcount.c:31 refcount_warn_saturate+0x5c/0x110 [ 210.550654] Modules linked in: mlx5_ib mlx5_core ib_ipoib rdma_ucm rdma_cm iw_cm ib_cm ib_umad ib_uverbs ib_core [ 210.550675] CPU: 4 PID: 342 Comm: test Not tainted 5.8.0-rc2+ #116 [ 210.550678] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014 [ 210.550680] RIP: 0010:refcount_warn_saturate+0x5c/0x110 [ 210.550685] Code: c6 d1 1b 01 00 0f 84 ad 00 00 00 5b 5d c3 80 3d b5 d1 1b 01 00 75 f4 48 c7 c7 20 d1 15 82 c6 05 a5 d1 1b 01 01 e8 a7 eb af ff <0f> 0b eb dd 80 3d 99 d1 1b 01 00 75 d4 48 c7 c7 c0 cf 15 82 c6 05 [ 210.550687] RSP: 0018:ffff8881642e77e8 EFLAGS: 00010282 [ 210.550691] RAX: 0000000000000000 RBX: 0000000000000004 RCX: 0000000000000000 [ 210.550694] RDX: 0000000000000027 RSI: 0000000000000004 RDI: ffffed102c85ceef [ 210.550696] RBP: ffff888161720428 R08: ffffffff8124c10e R09: ffffed103243beae [ 210.550698] R10: ffff8881921df56b R11: ffffed103243bead R12: ffff8881841b4180 [ 210.550701] R13: ffff888161720428 R14: ffff8881616d0000 R15: ffff888161720380 [ 210.550704] FS: 00007fc27f025740(0000) GS:ffff888192000000(0000) knlGS:0000000000000000 [ 210.550706] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 210.550708] CR2: 0000557e4b41a6a0 CR3: 0000000002415004 CR4: 0000000000360ea0 [ 210.550711] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 210.550713] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 210.550715] Call Trace: [ 210.550717] mlx5_del_flow_rules+0x484/0x490 [mlx5_core] [ 210.550720] ? mlx5_cmd_set_fte+0xa80/0xa80 [mlx5_core] [ 210.550722] mlx5_ib_destroy_flow+0x17f/0x280 [mlx5_ib] [ 210.550724] uverbs_free_flow+0x4c/0x90 [ib_uverbs] [ 210.550726] destroy_hw_idr_uobject+0x41/0xb0 [ib_uverbs] [ 210.550728] uverbs_destroy_uobject+0xaa/0x390 [ib_uverbs] [ 210.550731] __uverbs_cleanup_ufile+0x129/0x1b0 [ib_uverbs] [ 210.550733] ? uverbs_destroy_uobject+0x390/0x390 [ib_uverbs] [ 210.550735] uverbs_destroy_ufile_hw+0x78/0x190 [ib_uverbs] [ 210.550737] ib_uverbs_close+0x36/0x140 [ib_uverbs] [ 210.550739] __fput+0x181/0x380 [ 210.550741] task_work_run+0x88/0xd0 [ 210.550743] do_exit+0x5f6/0x13b0 [ 210.550745] ? sched_clock_cpu+0x30/0x140 [ 210.550747] ? is_current_pgrp_orphaned+0x70/0x70 [ 210.550750] ? lock_downgrade+0x360/0x360 [ 210.550752] ? mark_held_locks+0x1d/0x90 [ 210.550754] do_group_exit+0x8a/0x140 [ 210.550756] get_signal+0x20a/0xf50 [ 210.550758] do_signal+0x8c/0xbe0 [ 210.550760] ? hrtimer_nanosleep+0x1d8/0x200 [ 210.550762] ? nanosleep_copyout+0x50/0x50 [ 210.550764] ? restore_sigcontext+0x320/0x320 [ 210.550766] ? __hrtimer_init+0xf0/0xf0 [ 210.550768] ? timespec64_add_safe+0x150/0x150 [ 210.550770] ? mark_held_locks+0x1d/0x90 [ 210.550772] ? lockdep_hardirqs_on_prepare+0x14c/0x240 [ 210.550774] __prepare_exit_to_usermode+0x119/0x170 [ 210.550776] do_syscall_64+0x65/0x300 [ 210.550778] ? trace_hardirqs_off+0x10/0x120 [ 210.550781] ? mark_held_locks+0x1d/0x90 [ 210.550783] ? asm_sysvec_apic_timer_interrupt+0xa/0x20 [ 210.550785] ? lockdep_hardirqs_on+0x112/0x190 [ 210.550787] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 210.550789] RIP: 0033:0x7fc27f1cd157 [ 210.550791] Code: Bad RIP value. [ 210.550793] RSP: 002b:00007ffd4db27ea8 EFLAGS: 00000246 ORIG_RAX: 0000000000000023 [ 210.550798] RAX: fffffffffffffdfc RBX: ffffffffffffff80 RCX: 00007fc27f1cd157 [ 210.550800] RDX: 00007fc27f025740 RSI: 00007ffd4db27eb0 RDI: 00007ffd4db27eb0 [ 210.550803] RBP: 0000000000000016 R08: 0000000000000000 R09: 000000000000000e [ 210.550805] R10: 00007ffd4db27dc7 R11: 0000000000000246 R12: 0000000000400c00 [ 210.550808] R13: 00007ffd4db285f0 R14: 0000000000000000 R15: 0000000000000000 [ 210.550809] irq event stamp: 49399 [ 210.550812] hardirqs last enabled at (49399): [] console_unlock+0x556/0x6f0 [ 210.550815] hardirqs last disabled at (49398): [] console_unlock+0xb7/0x6f0 [ 210.550818] softirqs last enabled at (48706): [] __do_softirq+0x37b/0x60c [ 210.550820] softirqs last disabled at (48697): [] asm_call_on_stack+0xf/0x20 [ 210.550822] ---[ end trace ad18c0e6fa846454 ]--- [ 210.581862] mlx5_core 0000:00:0c.0: mlx5_destroy_flow_table:2132:(pid 342): Flow table 262150 wasn't destroyed, refcount > 1 Fixes: a7ee18bdee83 ("RDMA/mlx5: Allow creating a matcher for a NIC TX flow table") Signed-off-by: Michael Guralnik Reviewed-by: Mark Bloch Reviewed-by: Maor Gottlieb Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 325a5b0d6829..9fdd99272e31 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -534,6 +534,13 @@ static void del_sw_hw_rule(struct fs_node *node) goto out; } + if (rule->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_PORT && + --fte->dests_size) { + fte->modify_mask |= BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION); + fte->action.action &= ~MLX5_FLOW_CONTEXT_ACTION_ALLOW; + goto out; + } + if ((fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) && --fte->dests_size) { fte->modify_mask |= From 1ce5fc724a26e0b476e42c5d588bdb80caea003b Mon Sep 17 00:00:00 2001 From: Vladyslav Tarasiuk Date: Mon, 2 Nov 2020 13:45:24 +0200 Subject: [PATCH 618/710] net/mlx5: Clear bw_share upon VF disable Currently, if user disables VFs with some min and max rates configured, they are cleared. But QoS data is not cleared and restored upon next VF enable placing limits on minimal rate for given VF, when user expects none. To match cleared vport->info struct with QoS-related min and max rates upon VF disable, clear vport->qos struct too. Fixes: 556b9d16d3f5 ("net/mlx5: Clear VF's configuration on disabling SRIOV") Signed-off-by: Vladyslav Tarasiuk Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index e8e6294c7cca..6562f4d484e6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1408,6 +1408,7 @@ static void mlx5_eswitch_clear_vf_vports_info(struct mlx5_eswitch *esw) int i; mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs) { + memset(&vport->qos, 0, sizeof(vport->qos)); memset(&vport->info, 0, sizeof(vport->info)); vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO; } From 470b74758260e4abc2508cf1614573c00a00465c Mon Sep 17 00:00:00 2001 From: Vladyslav Tarasiuk Date: Wed, 21 Oct 2020 11:05:41 +0300 Subject: [PATCH 619/710] net/mlx5: Disable QoS when min_rates on all VFs are zero MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently when QoS is enabled for VF and any min_rate is configured, the driver sets bw_share value to at least 1 and doesn’t allow to set it to 0 to make minimal rate unlimited. It means there is always a minimal rate configured for every VF, even if user tries to remove it. In order to make QoS disable possible, check whether all vports have configured min_rate = 0. If this is true, set their bw_share to 0 to disable min_rate limitations. Fixes: c9497c98901c ("net/mlx5: Add support for setting VF min rate") Signed-off-by: Vladyslav Tarasiuk Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 6562f4d484e6..5ad2308a2a6b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -2222,12 +2222,15 @@ static u32 calculate_vports_min_rate_divider(struct mlx5_eswitch *esw) max_guarantee = evport->info.min_rate; } - return max_t(u32, max_guarantee / fw_max_bw_share, 1); + if (max_guarantee) + return max_t(u32, max_guarantee / fw_max_bw_share, 1); + return 0; } -static int normalize_vports_min_rate(struct mlx5_eswitch *esw, u32 divider) +static int normalize_vports_min_rate(struct mlx5_eswitch *esw) { u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); + u32 divider = calculate_vports_min_rate_divider(esw); struct mlx5_vport *evport; u32 vport_max_rate; u32 vport_min_rate; @@ -2240,9 +2243,9 @@ static int normalize_vports_min_rate(struct mlx5_eswitch *esw, u32 divider) continue; vport_min_rate = evport->info.min_rate; vport_max_rate = evport->info.max_rate; - bw_share = MLX5_MIN_BW_SHARE; + bw_share = 0; - if (vport_min_rate) + if (divider) bw_share = MLX5_RATE_TO_BW_SHARE(vport_min_rate, divider, fw_max_bw_share); @@ -2267,7 +2270,6 @@ int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, u16 vport, struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport); u32 fw_max_bw_share; u32 previous_min_rate; - u32 divider; bool min_rate_supported; bool max_rate_supported; int err = 0; @@ -2292,8 +2294,7 @@ int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, u16 vport, previous_min_rate = evport->info.min_rate; evport->info.min_rate = min_rate; - divider = calculate_vports_min_rate_divider(esw); - err = normalize_vports_min_rate(esw, divider); + err = normalize_vports_min_rate(esw); if (err) { evport->info.min_rate = previous_min_rate; goto unlock; From 5b8631c7b21ca8bc039f0bc030048973b039e0d2 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 9 Nov 2020 11:35:52 +0200 Subject: [PATCH 620/710] net/mlx5: E-Switch, Fail mlx5_esw_modify_vport_rate if qos disabled Avoid calling mlx5_esw_modify_vport_rate() if qos is not enabled and avoid unnecessary syndrome messages from firmware. Fixes: fcb64c0f5640 ("net/mlx5: E-Switch, add ingress rate support") Signed-off-by: Eli Cohen Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 5ad2308a2a6b..d4ee0a9c03db 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1142,6 +1142,10 @@ int mlx5_esw_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, struct mlx5_vport *vport; vport = mlx5_eswitch_get_vport(esw, vport_num); + + if (!vport->qos.enabled) + return -EOPNOTSUPP; + MLX5_SET(scheduling_context, ctx, max_average_bw, rate_mbps); return mlx5_modify_scheduling_element_cmd(esw->dev, From 68ec32daf7d50a9f7425f8607a7402c13aa0c587 Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Sat, 14 Nov 2020 19:52:23 +0800 Subject: [PATCH 621/710] net/mlx5: fix error return code in mlx5e_tc_nic_init() Fix to return a negative error code from the error handling case instead of 0, as done elsewhere in this function. Fixes: aedd133d17bc ("net/mlx5e: Support CT offload for tc nic flows") Reported-by: Hulk Robot Signed-off-by: Wang Hai Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 2e2fa0440032..ce710f22b1ff 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -5229,8 +5229,10 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv) tc->ct = mlx5_tc_ct_init(priv, tc->chains, &priv->fs.tc.mod_hdr, MLX5_FLOW_NAMESPACE_KERNEL); - if (IS_ERR(tc->ct)) + if (IS_ERR(tc->ct)) { + err = PTR_ERR(tc->ct); goto err_ct; + } tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event; err = register_netdevice_notifier_dev_net(priv->netdev, From fd5736bf9f235d26c83cac8a16c70bbdafa55abe Mon Sep 17 00:00:00 2001 From: Alex Marginean Date: Thu, 12 Nov 2020 20:26:08 +0200 Subject: [PATCH 622/710] enetc: Workaround for MDIO register access issue Due to a hardware issue, an access to MDIO registers that is concurrent with other ENETC register accesses may lead to the MDIO access being dropped or corrupted. The workaround introduces locking for all register accesses to the ENETC register space. To reduce performance impact, a readers-writers locking scheme has been implemented. The writer in this case is the MDIO access code (irrelevant whether that MDIO access is a register read or write), and the reader is any access code to non-MDIO ENETC registers. Also, the datapath functions acquire the read lock fewer times and use _hot accessors. All the rest of the code uses the _wa accessors which lock every register access. The commit introducing MDIO support is - commit ebfcb23d62ab ("enetc: Add ENETC PF level external MDIO support") but due to subsequent refactoring this patch is applicable on top of a later commit. Fixes: 6517798dd343 ("enetc: Make MDIO accessors more generic and export to include/linux/fsl") Signed-off-by: Alex Marginean Signed-off-by: Vladimir Oltean Signed-off-by: Claudiu Manoil Link: https://lore.kernel.org/r/20201112182608.26177-1-claudiu.manoil@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/Kconfig | 1 + drivers/net/ethernet/freescale/enetc/enetc.c | 62 +++++++--- .../net/ethernet/freescale/enetc/enetc_hw.h | 115 +++++++++++++++++- .../net/ethernet/freescale/enetc/enetc_mdio.c | 8 +- 4 files changed, 161 insertions(+), 25 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/Kconfig b/drivers/net/ethernet/freescale/enetc/Kconfig index 0fa18b00c49b..d99ea0f4e4a6 100644 --- a/drivers/net/ethernet/freescale/enetc/Kconfig +++ b/drivers/net/ethernet/freescale/enetc/Kconfig @@ -16,6 +16,7 @@ config FSL_ENETC config FSL_ENETC_VF tristate "ENETC VF driver" depends on PCI && PCI_MSI + select FSL_ENETC_MDIO select PHYLINK select DIMLIB help diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index 52be6e315752..fc2075ea57fe 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -33,7 +33,10 @@ netdev_tx_t enetc_xmit(struct sk_buff *skb, struct net_device *ndev) return NETDEV_TX_BUSY; } + enetc_lock_mdio(); count = enetc_map_tx_buffs(tx_ring, skb, priv->active_offloads); + enetc_unlock_mdio(); + if (unlikely(!count)) goto drop_packet_err; @@ -239,7 +242,7 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb, skb_tx_timestamp(skb); /* let H/W know BD ring has been updated */ - enetc_wr_reg(tx_ring->tpir, i); /* includes wmb() */ + enetc_wr_reg_hot(tx_ring->tpir, i); /* includes wmb() */ return count; @@ -262,12 +265,16 @@ static irqreturn_t enetc_msix(int irq, void *data) struct enetc_int_vector *v = data; int i; + enetc_lock_mdio(); + /* disable interrupts */ - enetc_wr_reg(v->rbier, 0); - enetc_wr_reg(v->ricr1, v->rx_ictt); + enetc_wr_reg_hot(v->rbier, 0); + enetc_wr_reg_hot(v->ricr1, v->rx_ictt); for_each_set_bit(i, &v->tx_rings_map, ENETC_MAX_NUM_TXQS) - enetc_wr_reg(v->tbier_base + ENETC_BDR_OFF(i), 0); + enetc_wr_reg_hot(v->tbier_base + ENETC_BDR_OFF(i), 0); + + enetc_unlock_mdio(); napi_schedule(&v->napi); @@ -334,19 +341,23 @@ static int enetc_poll(struct napi_struct *napi, int budget) v->rx_napi_work = false; + enetc_lock_mdio(); + /* enable interrupts */ - enetc_wr_reg(v->rbier, ENETC_RBIER_RXTIE); + enetc_wr_reg_hot(v->rbier, ENETC_RBIER_RXTIE); for_each_set_bit(i, &v->tx_rings_map, ENETC_MAX_NUM_TXQS) - enetc_wr_reg(v->tbier_base + ENETC_BDR_OFF(i), - ENETC_TBIER_TXTIE); + enetc_wr_reg_hot(v->tbier_base + ENETC_BDR_OFF(i), + ENETC_TBIER_TXTIE); + + enetc_unlock_mdio(); return work_done; } static int enetc_bd_ready_count(struct enetc_bdr *tx_ring, int ci) { - int pi = enetc_rd_reg(tx_ring->tcir) & ENETC_TBCIR_IDX_MASK; + int pi = enetc_rd_reg_hot(tx_ring->tcir) & ENETC_TBCIR_IDX_MASK; return pi >= ci ? pi - ci : tx_ring->bd_count - ci + pi; } @@ -386,7 +397,10 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget) i = tx_ring->next_to_clean; tx_swbd = &tx_ring->tx_swbd[i]; + + enetc_lock_mdio(); bds_to_clean = enetc_bd_ready_count(tx_ring, i); + enetc_unlock_mdio(); do_tstamp = false; @@ -429,16 +443,20 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget) tx_swbd = tx_ring->tx_swbd; } + enetc_lock_mdio(); + /* BD iteration loop end */ if (is_eof) { tx_frm_cnt++; /* re-arm interrupt source */ - enetc_wr_reg(tx_ring->idr, BIT(tx_ring->index) | - BIT(16 + tx_ring->index)); + enetc_wr_reg_hot(tx_ring->idr, BIT(tx_ring->index) | + BIT(16 + tx_ring->index)); } if (unlikely(!bds_to_clean)) bds_to_clean = enetc_bd_ready_count(tx_ring, i); + + enetc_unlock_mdio(); } tx_ring->next_to_clean = i; @@ -515,8 +533,6 @@ static int enetc_refill_rx_ring(struct enetc_bdr *rx_ring, const int buff_cnt) if (likely(j)) { rx_ring->next_to_alloc = i; /* keep track from page reuse */ rx_ring->next_to_use = i; - /* update ENETC's consumer index */ - enetc_wr_reg(rx_ring->rcir, i); } return j; @@ -534,8 +550,8 @@ static void enetc_get_rx_tstamp(struct net_device *ndev, u64 tstamp; if (le16_to_cpu(rxbd->r.flags) & ENETC_RXBD_FLAG_TSTMP) { - lo = enetc_rd(hw, ENETC_SICTR0); - hi = enetc_rd(hw, ENETC_SICTR1); + lo = enetc_rd_reg_hot(hw->reg + ENETC_SICTR0); + hi = enetc_rd_reg_hot(hw->reg + ENETC_SICTR1); rxbd = enetc_rxbd_ext(rxbd); tstamp_lo = le32_to_cpu(rxbd->ext.tstamp); if (lo <= tstamp_lo) @@ -684,23 +700,31 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring, u32 bd_status; u16 size; + enetc_lock_mdio(); + if (cleaned_cnt >= ENETC_RXBD_BUNDLE) { int count = enetc_refill_rx_ring(rx_ring, cleaned_cnt); + /* update ENETC's consumer index */ + enetc_wr_reg_hot(rx_ring->rcir, rx_ring->next_to_use); cleaned_cnt -= count; } rxbd = enetc_rxbd(rx_ring, i); bd_status = le32_to_cpu(rxbd->r.lstatus); - if (!bd_status) + if (!bd_status) { + enetc_unlock_mdio(); break; + } - enetc_wr_reg(rx_ring->idr, BIT(rx_ring->index)); + enetc_wr_reg_hot(rx_ring->idr, BIT(rx_ring->index)); dma_rmb(); /* for reading other rxbd fields */ size = le16_to_cpu(rxbd->r.buf_len); skb = enetc_map_rx_buff_to_skb(rx_ring, i, size); - if (!skb) + if (!skb) { + enetc_unlock_mdio(); break; + } enetc_get_offloads(rx_ring, rxbd, skb); @@ -712,6 +736,7 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring, if (unlikely(bd_status & ENETC_RXBD_LSTATUS(ENETC_RXBD_ERR_MASK))) { + enetc_unlock_mdio(); dev_kfree_skb(skb); while (!(bd_status & ENETC_RXBD_LSTATUS_F)) { dma_rmb(); @@ -751,6 +776,8 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring, enetc_process_skb(rx_ring, skb); + enetc_unlock_mdio(); + napi_gro_receive(napi, skb); rx_frm_cnt++; @@ -1225,6 +1252,7 @@ static void enetc_setup_rxbdr(struct enetc_hw *hw, struct enetc_bdr *rx_ring) rx_ring->idr = hw->reg + ENETC_SIRXIDR; enetc_refill_rx_ring(rx_ring, enetc_bd_unused(rx_ring)); + enetc_wr(hw, ENETC_SIRXIDR, rx_ring->next_to_use); /* enable ring */ enetc_rxbdr_wr(hw, idx, ENETC_RBMR, rbmr); diff --git a/drivers/net/ethernet/freescale/enetc/enetc_hw.h b/drivers/net/ethernet/freescale/enetc/enetc_hw.h index 17cf7c94fdb5..eb6bbf1113c7 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_hw.h +++ b/drivers/net/ethernet/freescale/enetc/enetc_hw.h @@ -324,14 +324,100 @@ struct enetc_hw { void __iomem *global; }; -/* general register accessors */ -#define enetc_rd_reg(reg) ioread32((reg)) -#define enetc_wr_reg(reg, val) iowrite32((val), (reg)) +/* ENETC register accessors */ + +/* MDIO issue workaround (on LS1028A) - + * Due to a hardware issue, an access to MDIO registers + * that is concurrent with other ENETC register accesses + * may lead to the MDIO access being dropped or corrupted. + * To protect the MDIO accesses a readers-writers locking + * scheme is used, where the MDIO register accesses are + * protected by write locks to insure exclusivity, while + * the remaining ENETC registers are accessed under read + * locks since they only compete with MDIO accesses. + */ +extern rwlock_t enetc_mdio_lock; + +/* use this locking primitive only on the fast datapath to + * group together multiple non-MDIO register accesses to + * minimize the overhead of the lock + */ +static inline void enetc_lock_mdio(void) +{ + read_lock(&enetc_mdio_lock); +} + +static inline void enetc_unlock_mdio(void) +{ + read_unlock(&enetc_mdio_lock); +} + +/* use these accessors only on the fast datapath under + * the enetc_lock_mdio() locking primitive to minimize + * the overhead of the lock + */ +static inline u32 enetc_rd_reg_hot(void __iomem *reg) +{ + lockdep_assert_held(&enetc_mdio_lock); + + return ioread32(reg); +} + +static inline void enetc_wr_reg_hot(void __iomem *reg, u32 val) +{ + lockdep_assert_held(&enetc_mdio_lock); + + iowrite32(val, reg); +} + +/* internal helpers for the MDIO w/a */ +static inline u32 _enetc_rd_reg_wa(void __iomem *reg) +{ + u32 val; + + enetc_lock_mdio(); + val = ioread32(reg); + enetc_unlock_mdio(); + + return val; +} + +static inline void _enetc_wr_reg_wa(void __iomem *reg, u32 val) +{ + enetc_lock_mdio(); + iowrite32(val, reg); + enetc_unlock_mdio(); +} + +static inline u32 _enetc_rd_mdio_reg_wa(void __iomem *reg) +{ + unsigned long flags; + u32 val; + + write_lock_irqsave(&enetc_mdio_lock, flags); + val = ioread32(reg); + write_unlock_irqrestore(&enetc_mdio_lock, flags); + + return val; +} + +static inline void _enetc_wr_mdio_reg_wa(void __iomem *reg, u32 val) +{ + unsigned long flags; + + write_lock_irqsave(&enetc_mdio_lock, flags); + iowrite32(val, reg); + write_unlock_irqrestore(&enetc_mdio_lock, flags); +} + #ifdef ioread64 -#define enetc_rd_reg64(reg) ioread64((reg)) +static inline u64 _enetc_rd_reg64(void __iomem *reg) +{ + return ioread64(reg); +} #else /* using this to read out stats on 32b systems */ -static inline u64 enetc_rd_reg64(void __iomem *reg) +static inline u64 _enetc_rd_reg64(void __iomem *reg) { u32 low, high, tmp; @@ -345,12 +431,29 @@ static inline u64 enetc_rd_reg64(void __iomem *reg) } #endif +static inline u64 _enetc_rd_reg64_wa(void __iomem *reg) +{ + u64 val; + + enetc_lock_mdio(); + val = _enetc_rd_reg64(reg); + enetc_unlock_mdio(); + + return val; +} + +/* general register accessors */ +#define enetc_rd_reg(reg) _enetc_rd_reg_wa((reg)) +#define enetc_wr_reg(reg, val) _enetc_wr_reg_wa((reg), (val)) #define enetc_rd(hw, off) enetc_rd_reg((hw)->reg + (off)) #define enetc_wr(hw, off, val) enetc_wr_reg((hw)->reg + (off), val) -#define enetc_rd64(hw, off) enetc_rd_reg64((hw)->reg + (off)) +#define enetc_rd64(hw, off) _enetc_rd_reg64_wa((hw)->reg + (off)) /* port register accessors - PF only */ #define enetc_port_rd(hw, off) enetc_rd_reg((hw)->port + (off)) #define enetc_port_wr(hw, off, val) enetc_wr_reg((hw)->port + (off), val) +#define enetc_port_rd_mdio(hw, off) _enetc_rd_mdio_reg_wa((hw)->port + (off)) +#define enetc_port_wr_mdio(hw, off, val) _enetc_wr_mdio_reg_wa(\ + (hw)->port + (off), val) /* global register accessors - PF only */ #define enetc_global_rd(hw, off) enetc_rd_reg((hw)->global + (off)) #define enetc_global_wr(hw, off, val) enetc_wr_reg((hw)->global + (off), val) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_mdio.c b/drivers/net/ethernet/freescale/enetc/enetc_mdio.c index 48c32a171afa..ee0116ed4738 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_mdio.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_mdio.c @@ -16,13 +16,13 @@ static inline u32 _enetc_mdio_rd(struct enetc_mdio_priv *mdio_priv, int off) { - return enetc_port_rd(mdio_priv->hw, mdio_priv->mdio_base + off); + return enetc_port_rd_mdio(mdio_priv->hw, mdio_priv->mdio_base + off); } static inline void _enetc_mdio_wr(struct enetc_mdio_priv *mdio_priv, int off, u32 val) { - enetc_port_wr(mdio_priv->hw, mdio_priv->mdio_base + off, val); + enetc_port_wr_mdio(mdio_priv->hw, mdio_priv->mdio_base + off, val); } #define enetc_mdio_rd(mdio_priv, off) \ @@ -174,3 +174,7 @@ struct enetc_hw *enetc_hw_alloc(struct device *dev, void __iomem *port_regs) return hw; } EXPORT_SYMBOL_GPL(enetc_hw_alloc); + +/* Lock for MDIO access errata on LS1028A */ +DEFINE_RWLOCK(enetc_mdio_lock); +EXPORT_SYMBOL_GPL(enetc_mdio_lock); From cf23705244c947151179f929774fabf71e239eee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Fri, 30 Oct 2020 13:38:48 +0100 Subject: [PATCH 623/710] ptrace: Set PF_SUPERPRIV when checking capability MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 69f594a38967 ("ptrace: do not audit capability check when outputing /proc/pid/stat") replaced the use of ns_capable() with has_ns_capability{,_noaudit}() which doesn't set PF_SUPERPRIV. Commit 6b3ad6649a4c ("ptrace: reintroduce usage of subjective credentials in ptrace_has_cap()") replaced has_ns_capability{,_noaudit}() with security_capable(), which doesn't set PF_SUPERPRIV neither. Since commit 98f368e9e263 ("kernel: Add noaudit variant of ns_capable()"), a new ns_capable_noaudit() helper is available. Let's use it! As a result, the signature of ptrace_has_cap() is restored to its original one. Cc: Christian Brauner Cc: Eric Paris Cc: Jann Horn Cc: Kees Cook Cc: Oleg Nesterov Cc: Serge E. Hallyn Cc: Tyler Hicks Cc: stable@vger.kernel.org Fixes: 6b3ad6649a4c ("ptrace: reintroduce usage of subjective credentials in ptrace_has_cap()") Fixes: 69f594a38967 ("ptrace: do not audit capability check when outputing /proc/pid/stat") Signed-off-by: Mickaël Salaün Reviewed-by: Jann Horn Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20201030123849.770769-2-mic@digikod.net --- kernel/ptrace.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 43d6179508d6..79de1294f8eb 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -264,17 +264,11 @@ static int ptrace_check_attach(struct task_struct *child, bool ignore_state) return ret; } -static bool ptrace_has_cap(const struct cred *cred, struct user_namespace *ns, - unsigned int mode) +static bool ptrace_has_cap(struct user_namespace *ns, unsigned int mode) { - int ret; - if (mode & PTRACE_MODE_NOAUDIT) - ret = security_capable(cred, ns, CAP_SYS_PTRACE, CAP_OPT_NOAUDIT); - else - ret = security_capable(cred, ns, CAP_SYS_PTRACE, CAP_OPT_NONE); - - return ret == 0; + return ns_capable_noaudit(ns, CAP_SYS_PTRACE); + return ns_capable(ns, CAP_SYS_PTRACE); } /* Returns 0 on success, -errno on denial. */ @@ -326,7 +320,7 @@ static int __ptrace_may_access(struct task_struct *task, unsigned int mode) gid_eq(caller_gid, tcred->sgid) && gid_eq(caller_gid, tcred->gid)) goto ok; - if (ptrace_has_cap(cred, tcred->user_ns, mode)) + if (ptrace_has_cap(tcred->user_ns, mode)) goto ok; rcu_read_unlock(); return -EPERM; @@ -345,7 +339,7 @@ ok: mm = task->mm; if (mm && ((get_dumpable(mm) != SUID_DUMP_USER) && - !ptrace_has_cap(cred, mm->user_ns, mode))) + !ptrace_has_cap(mm->user_ns, mode))) return -EPERM; return security_ptrace_access_check(task, mode); From fb14528e443646dd3fd02df4437fcf5265b66baa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Fri, 30 Oct 2020 13:38:49 +0100 Subject: [PATCH 624/710] seccomp: Set PF_SUPERPRIV when checking capability MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the use of security_capable(current_cred(), ...) with ns_capable_noaudit() which set PF_SUPERPRIV. Since commit 98f368e9e263 ("kernel: Add noaudit variant of ns_capable()"), a new ns_capable_noaudit() helper is available. Let's use it! Cc: Jann Horn Cc: Kees Cook Cc: Tyler Hicks Cc: Will Drewry Cc: stable@vger.kernel.org Fixes: e2cfabdfd075 ("seccomp: add system call filtering using BPF") Signed-off-by: Mickaël Salaün Reviewed-by: Jann Horn Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20201030123849.770769-3-mic@digikod.net --- kernel/seccomp.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 8ad7a293255a..53a7d1512dd7 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -38,7 +38,7 @@ #include #include #include -#include +#include #include #include #include @@ -558,8 +558,7 @@ static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog) * behavior of privileged children. */ if (!task_no_new_privs(current) && - security_capable(current_cred(), current_user_ns(), - CAP_SYS_ADMIN, CAP_OPT_NOAUDIT) != 0) + !ns_capable_noaudit(current_user_ns(), CAP_SYS_ADMIN)) return ERR_PTR(-EACCES); /* Allocate a new seccomp_filter */ From a5bbcbf29089a1252c201b1a7fd38151de355db9 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sun, 15 Nov 2020 10:30:41 +0000 Subject: [PATCH 625/710] netdevsim: set .owner to THIS_MODULE If THIS_MODULE is not set, the module would be removed while debugfs is being used. It eventually makes kernel panic. Fixes: 82c93a87bf8b ("netdevsim: implement couple of testing devlink health reporters") Fixes: 424be63ad831 ("netdevsim: add UDP tunnel port offload support") Fixes: 4418f862d675 ("netdevsim: implement support for devlink region and snapshots") Fixes: d3cbb907ae57 ("netdevsim: add ACL trap reporting cookie as a metadata") Signed-off-by: Taehee Yoo Link: https://lore.kernel.org/r/20201115103041.30701-1-ap420073@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/netdevsim/dev.c | 2 ++ drivers/net/netdevsim/health.c | 1 + drivers/net/netdevsim/udp_tunnels.c | 1 + 3 files changed, 4 insertions(+) diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index d07061417675..e7972e88ffe0 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -96,6 +96,7 @@ static const struct file_operations nsim_dev_take_snapshot_fops = { .open = simple_open, .write = nsim_dev_take_snapshot_write, .llseek = generic_file_llseek, + .owner = THIS_MODULE, }; static ssize_t nsim_dev_trap_fa_cookie_read(struct file *file, @@ -188,6 +189,7 @@ static const struct file_operations nsim_dev_trap_fa_cookie_fops = { .read = nsim_dev_trap_fa_cookie_read, .write = nsim_dev_trap_fa_cookie_write, .llseek = generic_file_llseek, + .owner = THIS_MODULE, }; static int nsim_dev_debugfs_init(struct nsim_dev *nsim_dev) diff --git a/drivers/net/netdevsim/health.c b/drivers/net/netdevsim/health.c index 62958b238d50..21e2974660e7 100644 --- a/drivers/net/netdevsim/health.c +++ b/drivers/net/netdevsim/health.c @@ -261,6 +261,7 @@ static const struct file_operations nsim_dev_health_break_fops = { .open = simple_open, .write = nsim_dev_health_break_write, .llseek = generic_file_llseek, + .owner = THIS_MODULE, }; int nsim_dev_health_init(struct nsim_dev *nsim_dev, struct devlink *devlink) diff --git a/drivers/net/netdevsim/udp_tunnels.c b/drivers/net/netdevsim/udp_tunnels.c index 6ab023acefd6..02dc3123eb6c 100644 --- a/drivers/net/netdevsim/udp_tunnels.c +++ b/drivers/net/netdevsim/udp_tunnels.c @@ -124,6 +124,7 @@ static const struct file_operations nsim_udp_tunnels_info_reset_fops = { .open = simple_open, .write = nsim_udp_tunnels_info_reset_write, .llseek = generic_file_llseek, + .owner = THIS_MODULE, }; int nsim_udp_tunnels_info_create(struct nsim_dev *nsim_dev, From c993df5a688975bf9ce899706ca13d2bc8d6be25 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 17 Nov 2020 07:59:16 -0700 Subject: [PATCH 626/710] io_uring: don't double complete failed reissue request Zorro reports that an xfstest test case is failing, and it turns out that for the reissue path we can potentially issue a double completion on the request for the failure path. There's an issue around the retry as well, but for now, at least just make sure that we handle the error path correctly. Cc: stable@vger.kernel.org Fixes: b63534c41e20 ("io_uring: re-issue block requests that failed because of resources") Reported-by: Zorro Lang Signed-off-by: Jens Axboe --- fs/io_uring.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index f05978a74ce1..b205c1df3f74 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2578,7 +2578,6 @@ static bool io_resubmit_prep(struct io_kiocb *req, int error) } end_req: req_set_fail_links(req); - io_req_complete(req, ret); return false; } #endif From 138559b9f99d3b6b1d5e75c78facc067a23871c6 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Sun, 15 Nov 2020 15:14:48 +0200 Subject: [PATCH 627/710] net/tls: Fix wrong record sn in async mode of device resync In async_resync mode, we log the TCP seq of records until the async request is completed. Later, in case one of the logged seqs matches the resync request, we return it, together with its record serial number. Before this fix, we mistakenly returned the serial number of the current record instead. Fixes: ed9b7646b06a ("net/tls: Add asynchronous resync") Signed-off-by: Tariq Toukan Reviewed-by: Boris Pismenny Link: https://lore.kernel.org/r/20201115131448.2702-1-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- include/net/tls.h | 16 +++++++++++++++- net/tls/tls_device.c | 37 +++++++++++++++++++++++++++---------- 2 files changed, 42 insertions(+), 11 deletions(-) diff --git a/include/net/tls.h b/include/net/tls.h index baf1e99d8193..cf1473099453 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -300,7 +300,8 @@ enum tls_offload_sync_type { #define TLS_DEVICE_RESYNC_ASYNC_LOGMAX 13 struct tls_offload_resync_async { atomic64_t req; - u32 loglen; + u16 loglen; + u16 rcd_delta; u32 log[TLS_DEVICE_RESYNC_ASYNC_LOGMAX]; }; @@ -471,6 +472,18 @@ static inline bool tls_bigint_increment(unsigned char *seq, int len) return (i == -1); } +static inline void tls_bigint_subtract(unsigned char *seq, int n) +{ + u64 rcd_sn; + __be64 *p; + + BUILD_BUG_ON(TLS_MAX_REC_SEQ_SIZE != 8); + + p = (__be64 *)seq; + rcd_sn = be64_to_cpu(*p); + *p = cpu_to_be64(rcd_sn - n); +} + static inline struct tls_context *tls_get_ctx(const struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); @@ -639,6 +652,7 @@ tls_offload_rx_resync_async_request_start(struct sock *sk, __be32 seq, u16 len) atomic64_set(&rx_ctx->resync_async->req, ((u64)ntohl(seq) << 32) | ((u64)len << 16) | RESYNC_REQ | RESYNC_REQ_ASYNC); rx_ctx->resync_async->loglen = 0; + rx_ctx->resync_async->rcd_delta = 0; } static inline void diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index cec86229a6a0..54d3e161d198 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -694,36 +694,51 @@ static void tls_device_resync_rx(struct tls_context *tls_ctx, static bool tls_device_rx_resync_async(struct tls_offload_resync_async *resync_async, - s64 resync_req, u32 *seq) + s64 resync_req, u32 *seq, u16 *rcd_delta) { u32 is_async = resync_req & RESYNC_REQ_ASYNC; u32 req_seq = resync_req >> 32; u32 req_end = req_seq + ((resync_req >> 16) & 0xffff); + u16 i; + + *rcd_delta = 0; if (is_async) { + /* shouldn't get to wraparound: + * too long in async stage, something bad happened + */ + if (WARN_ON_ONCE(resync_async->rcd_delta == USHRT_MAX)) + return false; + /* asynchronous stage: log all headers seq such that * req_seq <= seq <= end_seq, and wait for real resync request */ - if (between(*seq, req_seq, req_end) && + if (before(*seq, req_seq)) + return false; + if (!after(*seq, req_end) && resync_async->loglen < TLS_DEVICE_RESYNC_ASYNC_LOGMAX) resync_async->log[resync_async->loglen++] = *seq; + resync_async->rcd_delta++; + return false; } /* synchronous stage: check against the logged entries and * proceed to check the next entries if no match was found */ - while (resync_async->loglen) { - if (req_seq == resync_async->log[resync_async->loglen - 1] && - atomic64_try_cmpxchg(&resync_async->req, - &resync_req, 0)) { - resync_async->loglen = 0; + for (i = 0; i < resync_async->loglen; i++) + if (req_seq == resync_async->log[i] && + atomic64_try_cmpxchg(&resync_async->req, &resync_req, 0)) { + *rcd_delta = resync_async->rcd_delta - i; *seq = req_seq; + resync_async->loglen = 0; + resync_async->rcd_delta = 0; return true; } - resync_async->loglen--; - } + + resync_async->loglen = 0; + resync_async->rcd_delta = 0; if (req_seq == *seq && atomic64_try_cmpxchg(&resync_async->req, @@ -741,6 +756,7 @@ void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq) u32 sock_data, is_req_pending; struct tls_prot_info *prot; s64 resync_req; + u16 rcd_delta; u32 req_seq; if (tls_ctx->rx_conf != TLS_HW) @@ -786,8 +802,9 @@ void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq) return; if (!tls_device_rx_resync_async(rx_ctx->resync_async, - resync_req, &seq)) + resync_req, &seq, &rcd_delta)) return; + tls_bigint_subtract(rcd_sn, rcd_delta); break; } From c9c89dcd872ea33327673fcb97398993a1f22736 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 16 Nov 2020 14:27:46 -0800 Subject: [PATCH 628/710] bpf, sockmap: Fix partial copy_page_to_iter so progress can still be made If copy_page_to_iter() fails or even partially completes, but with fewer bytes copied than expected we currently reset sg.start and return EFAULT. This proves problematic if we already copied data into the user buffer before we return an error. Because we leave the copied data in the user buffer and fail to unwind the scatterlist so kernel side believes data has been copied and user side believes data has _not_ been received. Expected behavior should be to return number of bytes copied and then on the next read we need to return the error assuming its still there. This can happen if we have a copy length spanning multiple scatterlist elements and one or more complete before the error is hit. The error is rare enough though that my normal testing with server side programs, such as nginx, httpd, envoy, etc., I have never seen this. The only reliable way to reproduce that I've found is to stream movies over my browser for a day or so and wait for it to hang. Not very scientific, but with a few extra WARN_ON()s in the code the bug was obvious. When we review the errors from copy_page_to_iter() it seems we are hitting a page fault from copy_page_to_iter_iovec() where the code checks fault_in_pages_writeable(buf, copy) where buf is the user buffer. It also seems typical server applications don't hit this case. The other way to try and reproduce this is run the sockmap selftest tool test_sockmap with data verification enabled, but it doesn't reproduce the fault. Perhaps we can trigger this case artificially somehow from the test tools. I haven't sorted out a way to do that yet though. Fixes: 604326b41a6fb ("bpf, sockmap: convert to generic sk_msg interface") Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann Reviewed-by: Jakub Sitnicki Link: https://lore.kernel.org/bpf/160556566659.73229.15694973114605301063.stgit@john-XPS-13-9370 --- net/ipv4/tcp_bpf.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index 37f4cb2bba5c..8e950b0bfabc 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -15,8 +15,8 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock, { struct iov_iter *iter = &msg->msg_iter; int peek = flags & MSG_PEEK; - int i, ret, copied = 0; struct sk_msg *msg_rx; + int i, copied = 0; msg_rx = list_first_entry_or_null(&psock->ingress_msg, struct sk_msg, list); @@ -37,11 +37,9 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock, page = sg_page(sge); if (copied + copy > len) copy = len - copied; - ret = copy_page_to_iter(page, sge->offset, copy, iter); - if (ret != copy) { - msg_rx->sg.start = i; - return -EFAULT; - } + copy = copy_page_to_iter(page, sge->offset, copy, iter); + if (!copy) + return copied ? copied : -EFAULT; copied += copy; if (likely(!peek)) { @@ -56,6 +54,11 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock, put_page(page); } } else { + /* Lets not optimize peek case if copy_page_to_iter + * didn't copy the entire length lets just break. + */ + if (copy != sge->length) + return copied; sk_msg_iter_var_next(i); } From 36cd0e696a832a00247fca522034703566ac8885 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 16 Nov 2020 14:28:06 -0800 Subject: [PATCH 629/710] bpf, sockmap: Ensure SO_RCVBUF memory is observed on ingress redirect Fix sockmap sk_skb programs so that they observe sk_rcvbuf limits. This allows users to tune SO_RCVBUF and sockmap will honor them. We can refactor the if(charge) case out in later patches. But, keep this fix to the point. Fixes: 51199405f9672 ("bpf: skb_verdict, support SK_PASS on RX BPF path") Suggested-by: Jakub Sitnicki Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann Reviewed-by: Jakub Sitnicki Link: https://lore.kernel.org/bpf/160556568657.73229.8404601585878439060.stgit@john-XPS-13-9370 --- net/core/skmsg.c | 20 ++++++++++++++++---- net/ipv4/tcp_bpf.c | 3 ++- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 654182ecf87b..fe44280c033e 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -170,10 +170,12 @@ static int sk_msg_free_elem(struct sock *sk, struct sk_msg *msg, u32 i, struct scatterlist *sge = sk_msg_elem(msg, i); u32 len = sge->length; - if (charge) - sk_mem_uncharge(sk, len); - if (!msg->skb) + /* When the skb owns the memory we free it from consume_skb path. */ + if (!msg->skb) { + if (charge) + sk_mem_uncharge(sk, len); put_page(sg_page(sge)); + } memset(sge, 0, sizeof(*sge)); return len; } @@ -403,6 +405,9 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb) int copied = 0, num_sge; struct sk_msg *msg; + if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) + return -EAGAIN; + msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC); if (unlikely(!msg)) return -EAGAIN; @@ -418,7 +423,14 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb) return num_sge; } - sk_mem_charge(sk, skb->len); + /* This will transition ownership of the data from the socket where + * the BPF program was run initiating the redirect to the socket + * we will eventually receive this data on. The data will be released + * from skb_consume found in __tcp_bpf_recvmsg() after its been copied + * into user buffers. + */ + skb_set_owner_r(skb, sk); + copied = skb->len; msg->sg.start = 0; msg->sg.size = copied; diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index 8e950b0bfabc..bc7d2a586e18 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -45,7 +45,8 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock, if (likely(!peek)) { sge->offset += copy; sge->length -= copy; - sk_mem_uncharge(sk, copy); + if (!msg_rx->skb) + sk_mem_uncharge(sk, copy); msg_rx->sg.size -= copy; if (!sge->length) { From 70796fb751f1d34cc650e640572a174faf009cd4 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 16 Nov 2020 14:28:26 -0800 Subject: [PATCH 630/710] bpf, sockmap: Use truesize with sk_rmem_schedule() We use skb->size with sk_rmem_scheduled() which is not correct. Instead use truesize to align with socket and tcp stack usage of sk_rmem_schedule. Suggested-by: Daniel Borkman Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann Reviewed-by: Jakub Sitnicki Link: https://lore.kernel.org/bpf/160556570616.73229.17003722112077507863.stgit@john-XPS-13-9370 --- net/core/skmsg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/skmsg.c b/net/core/skmsg.c index fe44280c033e..d09426ce4af3 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -411,7 +411,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb) msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC); if (unlikely(!msg)) return -EAGAIN; - if (!sk_rmem_schedule(sk, skb, skb->len)) { + if (!sk_rmem_schedule(sk, skb, skb->truesize)) { kfree(msg); return -EAGAIN; } From 6fa9201a898983da731fca068bb4b5c941537588 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 16 Nov 2020 14:28:46 -0800 Subject: [PATCH 631/710] bpf, sockmap: Avoid returning unneeded EAGAIN when redirecting to self If a socket redirects to itself and it is under memory pressure it is possible to get a socket stuck so that recv() returns EAGAIN and the socket can not advance for some time. This happens because when redirecting a skb to the same socket we received the skb on we first check if it is OK to enqueue the skb on the receiving socket by checking memory limits. But, if the skb is itself the object holding the memory needed to enqueue the skb we will keep retrying from kernel side and always fail with EAGAIN. Then userspace will get a recv() EAGAIN error if there are no skbs in the psock ingress queue. This will continue until either some skbs get kfree'd causing the memory pressure to reduce far enough that we can enqueue the pending packet or the socket is destroyed. In some cases its possible to get a socket stuck for a noticeable amount of time if the socket is only receiving skbs from sk_skb verdict programs. To reproduce I make the socket memory limits ridiculously low so sockets are always under memory pressure. More often though if under memory pressure it looks like a spurious EAGAIN error on user space side causing userspace to retry and typically enough has moved on the memory side that it works. To fix skip memory checks and skb_orphan if receiving on the same sock as already assigned. For SK_PASS cases this is easy, its always the same socket so we can just omit the orphan/set_owner pair. For backlog cases we need to check skb->sk and decide if the orphan and set_owner pair are needed. Fixes: 51199405f9672 ("bpf: skb_verdict, support SK_PASS on RX BPF path") Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann Reviewed-by: Jakub Sitnicki Link: https://lore.kernel.org/bpf/160556572660.73229.12566203819812939627.stgit@john-XPS-13-9370 --- net/core/skmsg.c | 72 +++++++++++++++++++++++++++++++++++------------- 1 file changed, 53 insertions(+), 19 deletions(-) diff --git a/net/core/skmsg.c b/net/core/skmsg.c index d09426ce4af3..9aed5a2c7c5b 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -399,38 +399,38 @@ out: } EXPORT_SYMBOL_GPL(sk_msg_memcopy_from_iter); -static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb) +static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk, + struct sk_buff *skb) { - struct sock *sk = psock->sk; - int copied = 0, num_sge; struct sk_msg *msg; if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) - return -EAGAIN; + return NULL; + + if (!sk_rmem_schedule(sk, skb, skb->truesize)) + return NULL; msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC); if (unlikely(!msg)) - return -EAGAIN; - if (!sk_rmem_schedule(sk, skb, skb->truesize)) { - kfree(msg); - return -EAGAIN; - } + return NULL; sk_msg_init(msg); - num_sge = skb_to_sgvec(skb, msg->sg.data, 0, skb->len); + return msg; +} + +static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb, + struct sk_psock *psock, + struct sock *sk, + struct sk_msg *msg) +{ + int num_sge = skb_to_sgvec(skb, msg->sg.data, 0, skb->len); + int copied; + if (unlikely(num_sge < 0)) { kfree(msg); return num_sge; } - /* This will transition ownership of the data from the socket where - * the BPF program was run initiating the redirect to the socket - * we will eventually receive this data on. The data will be released - * from skb_consume found in __tcp_bpf_recvmsg() after its been copied - * into user buffers. - */ - skb_set_owner_r(skb, sk); - copied = skb->len; msg->sg.start = 0; msg->sg.size = copied; @@ -442,6 +442,40 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb) return copied; } +static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb) +{ + struct sock *sk = psock->sk; + struct sk_msg *msg; + + msg = sk_psock_create_ingress_msg(sk, skb); + if (!msg) + return -EAGAIN; + + /* This will transition ownership of the data from the socket where + * the BPF program was run initiating the redirect to the socket + * we will eventually receive this data on. The data will be released + * from skb_consume found in __tcp_bpf_recvmsg() after its been copied + * into user buffers. + */ + skb_set_owner_r(skb, sk); + return sk_psock_skb_ingress_enqueue(skb, psock, sk, msg); +} + +/* Puts an skb on the ingress queue of the socket already assigned to the + * skb. In this case we do not need to check memory limits or skb_set_owner_r + * because the skb is already accounted for here. + */ +static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb) +{ + struct sk_msg *msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC); + struct sock *sk = psock->sk; + + if (unlikely(!msg)) + return -EAGAIN; + sk_msg_init(msg); + return sk_psock_skb_ingress_enqueue(skb, psock, sk, msg); +} + static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb, u32 off, u32 len, bool ingress) { @@ -801,7 +835,7 @@ static void sk_psock_verdict_apply(struct sk_psock *psock, * retrying later from workqueue. */ if (skb_queue_empty(&psock->ingress_skb)) { - err = sk_psock_skb_ingress(psock, skb); + err = sk_psock_skb_ingress_self(psock, skb); } if (err < 0) { skb_queue_tail(&psock->ingress_skb, skb); From 2443ca66676d50a4eb3305c236bccd84a9828ce2 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 16 Nov 2020 14:29:08 -0800 Subject: [PATCH 632/710] bpf, sockmap: Handle memory acct if skb_verdict prog redirects to self If the skb_verdict_prog redirects an skb knowingly to itself, fix your BPF program this is not optimal and an abuse of the API please use SK_PASS. That said there may be cases, such as socket load balancing, where picking the socket is hashed based or otherwise picks the same socket it was received on in some rare cases. If this happens we don't want to confuse userspace giving them an EAGAIN error if we can avoid it. To avoid double accounting in these cases. At the moment even if the skb has already been charged against the sockets rcvbuf and forward alloc we check it again and do set_owner_r() causing it to be orphaned and recharged. For one this is useless work, but more importantly we can have a case where the skb could be put on the ingress queue, but because we are under memory pressure we return EAGAIN. The trouble here is the skb has already been accounted for so any rcvbuf checks include the memory associated with the packet already. This rolls up and can result in unnecessary EAGAIN errors in userspace read() calls. Fix by doing an unlikely check and skipping checks if skb->sk == sk. Fixes: 51199405f9672 ("bpf: skb_verdict, support SK_PASS on RX BPF path") Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann Reviewed-by: Jakub Sitnicki Link: https://lore.kernel.org/bpf/160556574804.73229.11328201020039674147.stgit@john-XPS-13-9370 --- net/core/skmsg.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 9aed5a2c7c5b..514bc9f6f8ae 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -442,11 +442,19 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb, return copied; } +static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb); + static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb) { struct sock *sk = psock->sk; struct sk_msg *msg; + /* If we are receiving on the same sock skb->sk is already assigned, + * skip memory accounting and owner transition seeing it already set + * correctly. + */ + if (unlikely(skb->sk == sk)) + return sk_psock_skb_ingress_self(psock, skb); msg = sk_psock_create_ingress_msg(sk, skb); if (!msg) return -EAGAIN; From 4363023d2668e621b0743db351a9555d6e6ea57e Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 16 Nov 2020 14:29:28 -0800 Subject: [PATCH 633/710] bpf, sockmap: Avoid failures from skb_to_sgvec when skb has frag_list When skb has a frag_list its possible for skb_to_sgvec() to fail. This happens when the scatterlist has fewer elements to store pages than would be needed for the initial skb plus any of its frags. This case appears rare, but is possible when running an RX parser/verdict programs exposed to the internet. Currently, when this happens we throw an error, break the pipe, and kfree the msg. This effectively breaks the application or forces it to do a retry. Lets catch this case and handle it by doing an skb_linearize() on any skb we receive with frags. At this point skb_to_sgvec should not fail because the failing conditions would require frags to be in place. Fixes: 604326b41a6fb ("bpf, sockmap: convert to generic sk_msg interface") Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann Reviewed-by: Jakub Sitnicki Link: https://lore.kernel.org/bpf/160556576837.73229.14800682790808797635.stgit@john-XPS-13-9370 --- net/core/skmsg.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 514bc9f6f8ae..25cdbb20f3a0 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -423,9 +423,16 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb, struct sock *sk, struct sk_msg *msg) { - int num_sge = skb_to_sgvec(skb, msg->sg.data, 0, skb->len); - int copied; + int num_sge, copied; + /* skb linearize may fail with ENOMEM, but lets simply try again + * later if this happens. Under memory pressure we don't want to + * drop the skb. We need to linearize the skb so that the mapping + * in skb_to_sgvec can not error. + */ + if (skb_linearize(skb)) + return -EAGAIN; + num_sge = skb_to_sgvec(skb, msg->sg.data, 0, skb->len); if (unlikely(num_sge < 0)) { kfree(msg); return num_sge; From ee415d73dcc24caef7f6bbf292dcc365613d2188 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Sun, 15 Nov 2020 14:06:23 +0200 Subject: [PATCH 634/710] tools/testing/scatterlist: Fix test to compile and run Add missing define of ALIGN_DOWN to make the test build and run. In addition, __sg_alloc_table_from_pages now support unaligned maximum segment, so adapt the test result accordingly. Fixes: 07da1223ec93 ("lib/scatterlist: Add support in dynamic allocation of SG table from pages") Link: https://lore.kernel.org/r/20201115120623.139113-1-leon@kernel.org Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- tools/testing/scatterlist/linux/mm.h | 1 + tools/testing/scatterlist/main.c | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/testing/scatterlist/linux/mm.h b/tools/testing/scatterlist/linux/mm.h index 6ae907f375d2..f9a12005fcea 100644 --- a/tools/testing/scatterlist/linux/mm.h +++ b/tools/testing/scatterlist/linux/mm.h @@ -33,6 +33,7 @@ typedef unsigned long dma_addr_t; #define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1) #define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask)) #define ALIGN(x, a) __ALIGN_KERNEL((x), (a)) +#define ALIGN_DOWN(x, a) __ALIGN_KERNEL((x) - ((a) - 1), (a)) #define PAGE_ALIGN(addr) ALIGN(addr, PAGE_SIZE) diff --git a/tools/testing/scatterlist/main.c b/tools/testing/scatterlist/main.c index b2c7e9f7b8d3..f561aed7c657 100644 --- a/tools/testing/scatterlist/main.c +++ b/tools/testing/scatterlist/main.c @@ -52,9 +52,9 @@ int main(void) { const unsigned int sgmax = SCATTERLIST_MAX_SEGMENT; struct test *test, tests[] = { - { -EINVAL, 1, pfn(0), PAGE_SIZE, PAGE_SIZE + 1, 1 }, { -EINVAL, 1, pfn(0), PAGE_SIZE, 0, 1 }, - { -EINVAL, 1, pfn(0), PAGE_SIZE, sgmax + 1, 1 }, + { 0, 1, pfn(0), PAGE_SIZE, PAGE_SIZE + 1, 1 }, + { 0, 1, pfn(0), PAGE_SIZE, sgmax + 1, 1 }, { 0, 1, pfn(0), PAGE_SIZE, sgmax, 1 }, { 0, 1, pfn(0), 1, sgmax, 1 }, { 0, 2, pfn(0, 1), 2 * PAGE_SIZE, sgmax, 1 }, From e33de7c5317e2827b2ba6fd120a505e9eb727b05 Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Mon, 16 Nov 2020 16:20:18 +0800 Subject: [PATCH 635/710] inet_diag: Fix error path to cancel the meseage in inet_req_diag_fill() nlmsg_cancel() needs to be called in the error path of inet_req_diag_fill to cancel the message. Fixes: d545caca827b ("net: inet: diag: expose the socket mark to privileged processes.") Reported-by: Hulk Robot Signed-off-by: Wang Hai Link: https://lore.kernel.org/r/20201116082018.16496-1-wanghai38@huawei.com Signed-off-by: Jakub Kicinski --- net/ipv4/inet_diag.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 366a4507b5a3..93474b1bea4e 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -479,8 +479,10 @@ static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb, r->idiag_inode = 0; if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, - inet_rsk(reqsk)->ir_mark)) + inet_rsk(reqsk)->ir_mark)) { + nlmsg_cancel(skb, nlh); return -EMSGSIZE; + } nlmsg_end(skb, nlh); return 0; From 93be52612431e71ee8cb980ef11468997857e4c4 Mon Sep 17 00:00:00 2001 From: Dmitry Bogdanov Date: Mon, 16 Nov 2020 16:29:44 +0300 Subject: [PATCH 636/710] qed: fix ILT configuration of SRC block The code refactoring of ILT configuration was not complete, the old unused variables were used for the SRC block. That could lead to the memory corruption by HW when rx filters are configured. This patch completes that refactoring. Fixes: 8a52bbab39c9 (qed: Debug feature: ilt and mdump) Signed-off-by: Igor Russkikh Signed-off-by: Ariel Elior Signed-off-by: Dmitry Bogdanov Link: https://lore.kernel.org/r/20201116132944.2055-1-dbogdanov@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qlogic/qed/qed_cxt.c | 4 ++-- drivers/net/ethernet/qlogic/qed/qed_cxt.h | 3 --- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c index 0e4cd8890cff..0a22f8ce9a2c 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c +++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c @@ -1647,9 +1647,9 @@ static void qed_src_init_pf(struct qed_hwfn *p_hwfn) ilog2(rounded_conn_num)); STORE_RT_REG_AGG(p_hwfn, SRC_REG_FIRSTFREE_RT_OFFSET, - p_hwfn->p_cxt_mngr->first_free); + p_hwfn->p_cxt_mngr->src_t2.first_free); STORE_RT_REG_AGG(p_hwfn, SRC_REG_LASTFREE_RT_OFFSET, - p_hwfn->p_cxt_mngr->last_free); + p_hwfn->p_cxt_mngr->src_t2.last_free); } /* Timers PF */ diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.h b/drivers/net/ethernet/qlogic/qed/qed_cxt.h index 8b64495f8745..056e79620a0e 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_cxt.h +++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.h @@ -326,9 +326,6 @@ struct qed_cxt_mngr { /* SRC T2 */ struct qed_src_t2 src_t2; - u32 t2_num_pages; - u64 first_free; - u64 last_free; /* total number of SRQ's for this hwfn */ u32 srq_count; From c09c8a27b9baa417864b9adc3228b10ae5eeec93 Mon Sep 17 00:00:00 2001 From: Florian Klink Date: Sun, 15 Nov 2020 23:45:09 +0100 Subject: [PATCH 637/710] ipv4: use IS_ENABLED instead of ifdef Checking for ifdef CONFIG_x fails if CONFIG_x=m. Use IS_ENABLED instead, which is true for both built-ins and modules. Otherwise, a > ip -4 route add 1.2.3.4/32 via inet6 fe80::2 dev eth1 fails with the message "Error: IPv6 support not enabled in kernel." if CONFIG_IPV6 is `m`. In the spirit of b8127113d01e53adba15b41aefd37b90ed83d631. Fixes: d15662682db2 ("ipv4: Allow ipv6 gateway with ipv4 routes") Cc: Kim Phillips Signed-off-by: Florian Klink Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20201115224509.2020651-1-flokli@flokli.de Signed-off-by: Jakub Kicinski --- net/ipv4/fib_frontend.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 86a23e4a6a50..b87140a1fa28 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -696,7 +696,7 @@ int fib_gw_from_via(struct fib_config *cfg, struct nlattr *nla, cfg->fc_gw4 = *((__be32 *)via->rtvia_addr); break; case AF_INET6: -#ifdef CONFIG_IPV6 +#if IS_ENABLED(CONFIG_IPV6) if (alen != sizeof(struct in6_addr)) { NL_SET_ERR_MSG(extack, "Invalid IPv6 address in RTA_VIA"); return -EINVAL; From 957a1ea3599210e9996777a734ea5284eaef75c7 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Fri, 6 Nov 2020 15:22:31 +0100 Subject: [PATCH 638/710] drm/sun4i: backend: Fix probe failure with multiple backends Commit e0d072782c73 ("dma-mapping: introduce DMA range map, supplanting dma_pfn_offset") introduced a regression in our code since the second backed to probe will now get -EINVAL back from dma_direct_set_offset and will prevent the entire DRM device from probing. Ignore -EINVAL as a temporary measure to get it back working, before removing that call entirely. Fixes: e0d072782c73 ("dma-mapping: introduce DMA range map, supplanting dma_pfn_offset") Signed-off-by: Maxime Ripard Reviewed-by: Chen-Yu Tsai Reviewed-by: Christoph Hellwig Acked-by: Daniel Vetter --- drivers/gpu/drm/sun4i/sun4i_backend.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/sun4i/sun4i_backend.c b/drivers/gpu/drm/sun4i/sun4i_backend.c index 77497b45f9a2..55960cbb1019 100644 --- a/drivers/gpu/drm/sun4i/sun4i_backend.c +++ b/drivers/gpu/drm/sun4i/sun4i_backend.c @@ -814,9 +814,15 @@ static int sun4i_backend_bind(struct device *dev, struct device *master, * * XXX(hch): this has no business in a driver and needs to move * to the device tree. + * + * If we have two subsequent calls to dma_direct_set_offset + * returns -EINVAL. Unfortunately, this happens when we have two + * backends in the system, and will result in the driver + * reporting an error while it has been setup properly before. + * Ignore EINVAL, but it should really be removed eventually. */ ret = dma_direct_set_offset(drm->dev, PHYS_OFFSET, 0, SZ_4G); - if (ret) + if (ret && ret != -EINVAL) return ret; } From 470e14c00c63752466ac44de392f584dfdddd82e Mon Sep 17 00:00:00 2001 From: Jimmy Assarsson Date: Sun, 15 Nov 2020 17:30:22 +0100 Subject: [PATCH 639/710] can: kvaser_pciefd: Fix KCAN bittiming limits Use correct bittiming limits for the KCAN CAN controller. Fixes: 26ad340e582d ("can: kvaser_pciefd: Add driver for Kvaser PCIEcan devices") Signed-off-by: Jimmy Assarsson Link: https://lore.kernel.org/r/20201115163027.16851-1-jimmyassarsson@gmail.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/kvaser_pciefd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/kvaser_pciefd.c b/drivers/net/can/kvaser_pciefd.c index 6f766918211a..72acd1ba162d 100644 --- a/drivers/net/can/kvaser_pciefd.c +++ b/drivers/net/can/kvaser_pciefd.c @@ -287,12 +287,12 @@ struct kvaser_pciefd_tx_packet { static const struct can_bittiming_const kvaser_pciefd_bittiming_const = { .name = KVASER_PCIEFD_DRV_NAME, .tseg1_min = 1, - .tseg1_max = 255, + .tseg1_max = 512, .tseg2_min = 1, .tseg2_max = 32, .sjw_max = 16, .brp_min = 1, - .brp_max = 4096, + .brp_max = 8192, .brp_inc = 1, }; From d003868d7f8579838ed58b6429af91844039b6f8 Mon Sep 17 00:00:00 2001 From: Jimmy Assarsson Date: Sun, 15 Nov 2020 17:30:23 +0100 Subject: [PATCH 640/710] can: kvaser_usb: kvaser_usb_hydra: Fix KCAN bittiming limits Use correct bittiming limits for the KCAN CAN controller. Fixes: aec5fb2268b7 ("can: kvaser_usb: Add support for Kvaser USB hydra family") Signed-off-by: Jimmy Assarsson Link: https://lore.kernel.org/r/20201115163027.16851-2-jimmyassarsson@gmail.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c index 7ab87a758754..218fadc91155 100644 --- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c +++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c @@ -367,7 +367,7 @@ static const struct can_bittiming_const kvaser_usb_hydra_kcan_bittiming_c = { .tseg2_max = 32, .sjw_max = 16, .brp_min = 1, - .brp_max = 4096, + .brp_max = 8192, .brp_inc = 1, }; From 860aaabac8235cfde10fe556aa82abbbe3117888 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 17 Nov 2020 21:23:34 +0100 Subject: [PATCH 641/710] x86/dumpstack: Do not try to access user space code of other tasks sysrq-t ends up invoking show_opcodes() for each task which tries to access the user space code of other processes, which is obviously bogus. It either manages to dump where the foreign task's regs->ip points to in a valid mapping of the current task or triggers a pagefault and prints "Code: Bad RIP value.". Both is just wrong. Add a safeguard in copy_code() and check whether the @regs pointer matches currents pt_regs. If not, do not even try to access it. While at it, add commentary why using copy_from_user_nmi() is safe in copy_code() even if the function name suggests otherwise. Reported-by: Oleg Nesterov Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Borislav Petkov Acked-by: Oleg Nesterov Tested-by: Borislav Petkov Link: https://lkml.kernel.org/r/20201117202753.667274723@linutronix.de --- arch/x86/kernel/dumpstack.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 25c06b67e7e0..97aa900386cb 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -78,6 +78,9 @@ static int copy_code(struct pt_regs *regs, u8 *buf, unsigned long src, if (!user_mode(regs)) return copy_from_kernel_nofault(buf, (u8 *)src, nbytes); + /* The user space code from other tasks cannot be accessed. */ + if (regs != task_pt_regs(current)) + return -EPERM; /* * Make sure userspace isn't trying to trick us into dumping kernel * memory by pointing the userspace instruction pointer at it. @@ -85,6 +88,12 @@ static int copy_code(struct pt_regs *regs, u8 *buf, unsigned long src, if (__chk_range_not_ok(src, nbytes, TASK_SIZE_MAX)) return -EINVAL; + /* + * Even if named copy_from_user_nmi() this can be invoked from + * other contexts and will not try to resolve a pagefault, which is + * the correct thing to do here as this code can be called from any + * context. + */ return copy_from_user_nmi(buf, (void __user *)src, nbytes); } @@ -115,13 +124,19 @@ void show_opcodes(struct pt_regs *regs, const char *loglvl) u8 opcodes[OPCODE_BUFSIZE]; unsigned long prologue = regs->ip - PROLOGUE_SIZE; - if (copy_code(regs, opcodes, prologue, sizeof(opcodes))) { - printk("%sCode: Unable to access opcode bytes at RIP 0x%lx.\n", - loglvl, prologue); - } else { + switch (copy_code(regs, opcodes, prologue, sizeof(opcodes))) { + case 0: printk("%sCode: %" __stringify(PROLOGUE_SIZE) "ph <%02x> %" __stringify(EPILOGUE_SIZE) "ph\n", loglvl, opcodes, opcodes[PROLOGUE_SIZE], opcodes + PROLOGUE_SIZE + 1); + break; + case -EPERM: + /* No access to the user space stack of other tasks. Ignore. */ + break; + default: + printk("%sCode: Unable to access opcode bytes at RIP 0x%lx.\n", + loglvl, prologue); + break; } } From 7e4be1290a38b3dd4a77cdf4565c9ffe7e620013 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 13 Nov 2020 13:16:31 +0300 Subject: [PATCH 642/710] dmaengine: fix error codes in channel_register() The error codes were not set on some of these error paths. Also the error handling was more confusing than it needed to be so I cleaned it up and shuffled it around a bit. Fixes: d2fb0a043838 ("dmaengine: break out channel registration") Signed-off-by: Dan Carpenter Reviewed-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20201113101631.GE168908@mwanda Signed-off-by: Vinod Koul --- drivers/dma/dmaengine.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index 7974fa0400d8..962cbb5e5f7f 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -1039,16 +1039,15 @@ static int get_dma_id(struct dma_device *device) static int __dma_async_device_channel_register(struct dma_device *device, struct dma_chan *chan) { - int rc = 0; + int rc; chan->local = alloc_percpu(typeof(*chan->local)); if (!chan->local) - goto err_out; + return -ENOMEM; chan->dev = kzalloc(sizeof(*chan->dev), GFP_KERNEL); if (!chan->dev) { - free_percpu(chan->local); - chan->local = NULL; - goto err_out; + rc = -ENOMEM; + goto err_free_local; } /* @@ -1061,7 +1060,8 @@ static int __dma_async_device_channel_register(struct dma_device *device, if (chan->chan_id < 0) { pr_err("%s: unable to alloc ida for chan: %d\n", __func__, chan->chan_id); - goto err_out; + rc = chan->chan_id; + goto err_free_dev; } chan->dev->device.class = &dma_devclass; @@ -1082,9 +1082,10 @@ static int __dma_async_device_channel_register(struct dma_device *device, mutex_lock(&device->chan_mutex); ida_free(&device->chan_ida, chan->chan_id); mutex_unlock(&device->chan_mutex); - err_out: - free_percpu(chan->local); + err_free_dev: kfree(chan->dev); + err_free_local: + free_percpu(chan->local); return rc; } From 4d213e76a359e540ca786ee937da7f35faa8e5f8 Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Tue, 10 Nov 2020 15:19:08 +0800 Subject: [PATCH 643/710] iommu/vt-d: Avoid panic if iommu init fails in tboot system "intel_iommu=off" command line is used to disable iommu but iommu is force enabled in a tboot system for security reason. However for better performance on high speed network device, a new option "intel_iommu=tboot_noforce" is introduced to disable the force on. By default kernel should panic if iommu init fail in tboot for security reason, but it's unnecessory if we use "intel_iommu=tboot_noforce,off". Fix the code setting force_on and move intel_iommu_tboot_noforce from tboot code to intel iommu code. Fixes: 7304e8f28bb2 ("iommu/vt-d: Correctly disable Intel IOMMU force on") Signed-off-by: Zhenzhong Duan Tested-by: Lukasz Hawrylko Acked-by: Lu Baolu Link: https://lore.kernel.org/r/20201110071908.3133-1-zhenzhong.duan@gmail.com Signed-off-by: Will Deacon --- arch/x86/kernel/tboot.c | 3 --- drivers/iommu/intel/iommu.c | 5 +++-- include/linux/intel-iommu.h | 1 - 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index 992fb1415c0f..420be871d9d4 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c @@ -514,9 +514,6 @@ int tboot_force_iommu(void) if (!tboot_enabled()) return 0; - if (intel_iommu_tboot_noforce) - return 1; - if (no_iommu || swiotlb || dmar_disabled) pr_warn("Forcing Intel-IOMMU to enabled\n"); diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 1b1ca63e6bbe..4d9b298002f0 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -179,7 +179,7 @@ static int rwbf_quirk; * (used when kernel is launched w/ TXT) */ static int force_on = 0; -int intel_iommu_tboot_noforce; +static int intel_iommu_tboot_noforce; static int no_platform_optin; #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry)) @@ -4885,7 +4885,8 @@ int __init intel_iommu_init(void) * Intel IOMMU is required for a TXT/tboot launch or platform * opt in, so enforce that. */ - force_on = tboot_force_iommu() || platform_optin_force_iommu(); + force_on = (!intel_iommu_tboot_noforce && tboot_force_iommu()) || + platform_optin_force_iommu(); if (iommu_init_mempool()) { if (force_on) diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index fbf5b3e7707e..d956987ed032 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -798,7 +798,6 @@ extern int iommu_calculate_agaw(struct intel_iommu *iommu); extern int iommu_calculate_max_sagaw(struct intel_iommu *iommu); extern int dmar_disabled; extern int intel_iommu_enabled; -extern int intel_iommu_tboot_noforce; extern int intel_iommu_gfx_mapped; #else static inline int iommu_calculate_agaw(struct intel_iommu *iommu) From 1e5d770bb8a23dd01e28e92f4fb0b1093c8bdbe6 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 18 Nov 2020 14:56:25 +0000 Subject: [PATCH 644/710] io_uring: get an active ref_node from files_data An active ref_node always can be found in ctx->files_data, it's much safer to get it this way instead of poking into files_data->ref_list. Signed-off-by: Pavel Begunkov Cc: stable@vger.kernel.org # v5.7+ Signed-off-by: Jens Axboe --- fs/io_uring.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index b205c1df3f74..5cb194ca4fce 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -6974,9 +6974,7 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx) return -ENXIO; spin_lock(&data->lock); - if (!list_empty(&data->ref_list)) - ref_node = list_first_entry(&data->ref_list, - struct fixed_file_ref_node, node); + ref_node = data->node; spin_unlock(&data->lock); if (ref_node) percpu_ref_kill(&ref_node->refs); From e297822b20e7fe683e107aea46e6402adcf99c70 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 18 Nov 2020 14:56:26 +0000 Subject: [PATCH 645/710] io_uring: order refnode recycling Don't recycle a refnode until we're done with all requests of nodes ejected before. Signed-off-by: Pavel Begunkov Cc: stable@vger.kernel.org # v5.7+ Signed-off-by: Jens Axboe --- fs/io_uring.c | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 5cb194ca4fce..7d4b755ab451 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -205,6 +205,7 @@ struct fixed_file_ref_node { struct list_head file_list; struct fixed_file_data *file_data; struct llist_node llist; + bool done; }; struct fixed_file_data { @@ -7323,10 +7324,6 @@ static void __io_file_put_work(struct fixed_file_ref_node *ref_node) kfree(pfile); } - spin_lock(&file_data->lock); - list_del(&ref_node->node); - spin_unlock(&file_data->lock); - percpu_ref_exit(&ref_node->refs); kfree(ref_node); percpu_ref_put(&file_data->refs); @@ -7353,17 +7350,32 @@ static void io_file_put_work(struct work_struct *work) static void io_file_data_ref_zero(struct percpu_ref *ref) { struct fixed_file_ref_node *ref_node; + struct fixed_file_data *data; struct io_ring_ctx *ctx; - bool first_add; + bool first_add = false; int delay = HZ; ref_node = container_of(ref, struct fixed_file_ref_node, refs); - ctx = ref_node->file_data->ctx; + data = ref_node->file_data; + ctx = data->ctx; - if (percpu_ref_is_dying(&ctx->file_data->refs)) + spin_lock(&data->lock); + ref_node->done = true; + + while (!list_empty(&data->ref_list)) { + ref_node = list_first_entry(&data->ref_list, + struct fixed_file_ref_node, node); + /* recycle ref nodes in order */ + if (!ref_node->done) + break; + list_del(&ref_node->node); + first_add |= llist_add(&ref_node->llist, &ctx->file_put_llist); + } + spin_unlock(&data->lock); + + if (percpu_ref_is_dying(&data->refs)) delay = 0; - first_add = llist_add(&ref_node->llist, &ctx->file_put_llist); if (!delay) mod_delayed_work(system_wq, &ctx->file_put_work, 0); else if (first_add) @@ -7387,6 +7399,7 @@ static struct fixed_file_ref_node *alloc_fixed_file_ref_node( INIT_LIST_HEAD(&ref_node->node); INIT_LIST_HEAD(&ref_node->file_list); ref_node->file_data = ctx->file_data; + ref_node->done = false; return ref_node; } @@ -7482,7 +7495,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, file_data->node = ref_node; spin_lock(&file_data->lock); - list_add(&ref_node->node, &file_data->ref_list); + list_add_tail(&ref_node->node, &file_data->ref_list); spin_unlock(&file_data->lock); percpu_ref_get(&file_data->refs); return ret; @@ -7641,7 +7654,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx, if (needs_switch) { percpu_ref_kill(&data->node->refs); spin_lock(&data->lock); - list_add(&ref_node->node, &data->ref_list); + list_add_tail(&ref_node->node, &data->ref_list); data->node = ref_node; spin_unlock(&data->lock); percpu_ref_get(&ctx->file_data->refs); From cd9f13c59461351d7a5fd07924264fb49b287359 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Wed, 18 Nov 2020 16:01:48 +0100 Subject: [PATCH 646/710] can: flexcan: flexcan_chip_start(): fix erroneous flexcan_transceiver_enable() during bus-off recovery If the CAN controller goes into bus off, the do_set_mode() callback with CAN_MODE_START can be used to recover the controller, which then calls flexcan_chip_start(). If configured, this is done automatically by the framework or manually by the user. In flexcan_chip_start() there is an explicit call to flexcan_transceiver_enable(), which does a regulator_enable() on the transceiver regulator. This results in a net usage counter increase, as there is no corresponding flexcan_transceiver_disable() in the bus off code path. This further leads to the transceiver stuck enabled, even if the CAN interface is shut down. To fix this problem the flexcan_transceiver_enable()/flexcan_transceiver_disable() are moved out of flexcan_chip_start()/flexcan_chip_stop() into flexcan_open()/flexcan_close(). Fixes: e955cead0311 ("CAN: Add Flexcan CAN controller driver") Link: https://lore.kernel.org/r/20201118150148.2664024-1-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/flexcan.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index d6a9cf0e9b60..99e5f272205d 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -1567,14 +1567,10 @@ static int flexcan_chip_start(struct net_device *dev) priv->write(reg_ctrl2, ®s->ctrl2); } - err = flexcan_transceiver_enable(priv); - if (err) - goto out_chip_disable; - /* synchronize with the can bus */ err = flexcan_chip_unfreeze(priv); if (err) - goto out_transceiver_disable; + goto out_chip_disable; priv->can.state = CAN_STATE_ERROR_ACTIVE; @@ -1592,8 +1588,6 @@ static int flexcan_chip_start(struct net_device *dev) return 0; - out_transceiver_disable: - flexcan_transceiver_disable(priv); out_chip_disable: flexcan_chip_disable(priv); return err; @@ -1623,7 +1617,6 @@ static int __flexcan_chip_stop(struct net_device *dev, bool disable_on_error) priv->write(priv->reg_ctrl_default & ~FLEXCAN_CTRL_ERR_ALL, ®s->ctrl); - flexcan_transceiver_disable(priv); priv->can.state = CAN_STATE_STOPPED; return 0; @@ -1665,10 +1658,14 @@ static int flexcan_open(struct net_device *dev) if (err) goto out_runtime_put; - err = request_irq(dev->irq, flexcan_irq, IRQF_SHARED, dev->name, dev); + err = flexcan_transceiver_enable(priv); if (err) goto out_close; + err = request_irq(dev->irq, flexcan_irq, IRQF_SHARED, dev->name, dev); + if (err) + goto out_transceiver_disable; + if (priv->can.ctrlmode & CAN_CTRLMODE_FD) priv->mb_size = sizeof(struct flexcan_mb) + CANFD_MAX_DLEN; else @@ -1720,6 +1717,8 @@ static int flexcan_open(struct net_device *dev) can_rx_offload_del(&priv->offload); out_free_irq: free_irq(dev->irq, dev); + out_transceiver_disable: + flexcan_transceiver_disable(priv); out_close: close_candev(dev); out_runtime_put: @@ -1738,6 +1737,7 @@ static int flexcan_close(struct net_device *dev) can_rx_offload_del(&priv->offload); free_irq(dev->irq, dev); + flexcan_transceiver_disable(priv); close_candev(dev); pm_runtime_put(priv->dev); From 20b329129009caf1c646152abe09b697227e1c37 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Wed, 18 Nov 2020 08:54:31 -0500 Subject: [PATCH 647/710] gfs2: Fix regression in freeze_go_sync Patch 541656d3a513 ("gfs2: freeze should work on read-only mounts") changed the check for glock state in function freeze_go_sync() from "gl->gl_state == LM_ST_SHARED" to "gl->gl_req == LM_ST_EXCLUSIVE". That's wrong and it regressed gfs2's freeze/thaw mechanism because it caused only the freezing node (which requests the glock in EX) to queue freeze work. All nodes go through this go_sync code path during the freeze to drop their SHared hold on the freeze glock, allowing the freezing node to acquire it in EXclusive mode. But all the nodes must freeze access to the file system locally, so they ALL must queue freeze work. The freeze_work calls freeze_func, which makes a request to reacquire the freeze glock in SH, effectively blocking until the thaw from the EX holder. Once thawed, the freezing node drops its EX hold on the freeze glock, then the (blocked) freeze_func reacquires the freeze glock in SH again (on all nodes, including the freezer) so all nodes go back to a thawed state. This patch changes the check back to gl_state == LM_ST_SHARED like it was prior to 541656d3a513. Fixes: 541656d3a513 ("gfs2: freeze should work on read-only mounts") Cc: stable@vger.kernel.org # v5.8+ Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glops.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 6c1432d78dce..67f2921ae8d4 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -571,7 +571,18 @@ static int freeze_go_sync(struct gfs2_glock *gl) int error = 0; struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; - if (gl->gl_req == LM_ST_EXCLUSIVE && !gfs2_withdrawn(sdp)) { + /* + * We need to check gl_state == LM_ST_SHARED here and not gl_req == + * LM_ST_EXCLUSIVE. That's because when any node does a freeze, + * all the nodes should have the freeze glock in SH mode and they all + * call do_xmote: One for EX and the others for UN. They ALL must + * freeze locally, and they ALL must queue freeze work. The freeze_work + * calls freeze_func, which tries to reacquire the freeze glock in SH, + * effectively waiting for the thaw on the node who holds it in EX. + * Once thawed, the work func acquires the freeze glock in + * SH and everybody goes back to thawed. + */ + if (gl->gl_state == LM_ST_SHARED && !gfs2_withdrawn(sdp)) { atomic_set(&sdp->sd_freeze_state, SFS_STARTING_FREEZE); error = freeze_super(sdp->sd_vfs); if (error) { From a1f634463aaf2c94dfa13001dbdea011303124cc Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Tue, 15 Sep 2020 16:47:15 +0300 Subject: [PATCH 648/710] can: m_can: process interrupt only when not runtime suspended Avoid processing bogus interrupt statuses when the HW is runtime suspended and the M_CAN_IR register read may get all bits 1's. Handler can be called if the interrupt request is shared with other peripherals or at the end of free_irq(). Therefore check the runtime suspended status before processing. Fixes: cdf8259d6573 ("can: m_can: Add PM Support") Signed-off-by: Jarkko Nikula Link: https://lore.kernel.org/r/20200915134715.696303-1-jarkko.nikula@linux.intel.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index e7264043f79a..f3fc37e96b08 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -956,6 +956,8 @@ static irqreturn_t m_can_isr(int irq, void *dev_id) struct net_device_stats *stats = &dev->stats; u32 ir; + if (pm_runtime_suspended(cdev->dev)) + return IRQ_NONE; ir = m_can_read(cdev, M_CAN_IR); if (!ir) return IRQ_NONE; From e95b6c3ef1311dd7b20467d932a24b6d0fd88395 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 8 Nov 2020 16:32:41 -0800 Subject: [PATCH 649/710] xfs: fix the minrecs logic when dealing with inode root child blocks The comment and logic in xchk_btree_check_minrecs for dealing with inode-rooted btrees isn't quite correct. While the direct children of the inode root are allowed to have fewer records than what would normally be allowed for a regular ondisk btree block, this is only true if there is only one child block and the number of records don't fit in the inode root. Fixes: 08a3a692ef58 ("xfs: btree scrub should check minrecs") Signed-off-by: Darrick J. Wong Reviewed-by: Chandan Babu R Reviewed-by: Christoph Hellwig --- fs/xfs/scrub/btree.c | 47 ++++++++++++++++++++++++++------------------ 1 file changed, 28 insertions(+), 19 deletions(-) diff --git a/fs/xfs/scrub/btree.c b/fs/xfs/scrub/btree.c index f52a7b8256f9..debf392e0515 100644 --- a/fs/xfs/scrub/btree.c +++ b/fs/xfs/scrub/btree.c @@ -452,32 +452,41 @@ xchk_btree_check_minrecs( int level, struct xfs_btree_block *block) { - unsigned int numrecs; - int ok_level; - - numrecs = be16_to_cpu(block->bb_numrecs); + struct xfs_btree_cur *cur = bs->cur; + unsigned int root_level = cur->bc_nlevels - 1; + unsigned int numrecs = be16_to_cpu(block->bb_numrecs); /* More records than minrecs means the block is ok. */ - if (numrecs >= bs->cur->bc_ops->get_minrecs(bs->cur, level)) + if (numrecs >= cur->bc_ops->get_minrecs(cur, level)) return; /* - * Certain btree blocks /can/ have fewer than minrecs records. Any - * level greater than or equal to the level of the highest dedicated - * btree block are allowed to violate this constraint. - * - * For a btree rooted in a block, the btree root can have fewer than - * minrecs records. If the btree is rooted in an inode and does not - * store records in the root, the direct children of the root and the - * root itself can have fewer than minrecs records. + * For btrees rooted in the inode, it's possible that the root block + * contents spilled into a regular ondisk block because there wasn't + * enough space in the inode root. The number of records in that + * child block might be less than the standard minrecs, but that's ok + * provided that there's only one direct child of the root. */ - ok_level = bs->cur->bc_nlevels - 1; - if (bs->cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) - ok_level--; - if (level >= ok_level) - return; + if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && + level == cur->bc_nlevels - 2) { + struct xfs_btree_block *root_block; + struct xfs_buf *root_bp; + int root_maxrecs; - xchk_btree_set_corrupt(bs->sc, bs->cur, level); + root_block = xfs_btree_get_block(cur, root_level, &root_bp); + root_maxrecs = cur->bc_ops->get_dmaxrecs(cur, root_level); + if (be16_to_cpu(root_block->bb_numrecs) != 1 || + numrecs <= root_maxrecs) + xchk_btree_set_corrupt(bs->sc, cur, level); + return; + } + + /* + * Otherwise, only the root level is allowed to have fewer than minrecs + * records or keyptrs. + */ + if (level < root_level) + xchk_btree_set_corrupt(bs->sc, cur, level); } /* From 498fe261f0d6d5189f8e11d283705dd97b474b54 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 8 Nov 2020 16:32:41 -0800 Subject: [PATCH 650/710] xfs: strengthen rmap record flags checking We always know the correct state of the rmap record flags (attr, bmbt, unwritten) so check them by direct comparison. Fixes: d852657ccfc0 ("xfs: cross-reference reverse-mapping btree") Signed-off-by: Darrick J. Wong Reviewed-by: Chandan Babu R Reviewed-by: Christoph Hellwig --- fs/xfs/scrub/bmap.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c index 412e2ec55e38..fed56d213a3f 100644 --- a/fs/xfs/scrub/bmap.c +++ b/fs/xfs/scrub/bmap.c @@ -218,13 +218,13 @@ xchk_bmap_xref_rmap( * which doesn't track unwritten state. */ if (owner != XFS_RMAP_OWN_COW && - irec->br_state == XFS_EXT_UNWRITTEN && - !(rmap.rm_flags & XFS_RMAP_UNWRITTEN)) + !!(irec->br_state == XFS_EXT_UNWRITTEN) != + !!(rmap.rm_flags & XFS_RMAP_UNWRITTEN)) xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, irec->br_startoff); - if (info->whichfork == XFS_ATTR_FORK && - !(rmap.rm_flags & XFS_RMAP_ATTR_FORK)) + if (!!(info->whichfork == XFS_ATTR_FORK) != + !!(rmap.rm_flags & XFS_RMAP_ATTR_FORK)) xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, irec->br_startoff); if (rmap.rm_flags & XFS_RMAP_BMBT_BLOCK) From 6b48e5b8a20f653b7d64ccf99a498f2523bff752 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 8 Nov 2020 16:32:42 -0800 Subject: [PATCH 651/710] xfs: directory scrub should check the null bestfree entries too Teach the directory scrubber to check all the bestfree entries, including the null ones. We want to be able to detect the case where the entry is null but there actually /is/ a directory data block. Found by fuzzing lbests[0] = ones in xfs/391. Fixes: df481968f33b ("xfs: scrub directory freespace") Signed-off-by: Darrick J. Wong Reviewed-by: Chandan Babu R Reviewed-by: Christoph Hellwig --- fs/xfs/scrub/dir.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c index 7c432997edad..b045e95c2ea7 100644 --- a/fs/xfs/scrub/dir.c +++ b/fs/xfs/scrub/dir.c @@ -558,14 +558,27 @@ xchk_directory_leaf1_bestfree( /* Check all the bestfree entries. */ for (i = 0; i < bestcount; i++, bestp++) { best = be16_to_cpu(*bestp); - if (best == NULLDATAOFF) - continue; error = xfs_dir3_data_read(sc->tp, sc->ip, - i * args->geo->fsbcount, 0, &dbp); + xfs_dir2_db_to_da(args->geo, i), + XFS_DABUF_MAP_HOLE_OK, + &dbp); if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error)) break; - xchk_directory_check_freesp(sc, lblk, dbp, best); + + if (!dbp) { + if (best != NULLDATAOFF) { + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, + lblk); + break; + } + continue; + } + + if (best == NULLDATAOFF) + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); + else + xchk_directory_check_freesp(sc, lblk, dbp, best); xfs_trans_brelse(sc->tp, dbp); if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) break; From ada49d64fb3538144192181db05de17e2ffc3551 Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Sat, 14 Nov 2020 11:06:01 -0800 Subject: [PATCH 652/710] xfs: fix forkoff miscalculation related to XFS_LITINO(mp) Currently, commit e9e2eae89ddb dropped a (int) decoration from XFS_LITINO(mp), and since sizeof() expression is also involved, the result of XFS_LITINO(mp) is simply as the size_t type (commonly unsigned long). Considering the expression in xfs_attr_shortform_bytesfit(): offset = (XFS_LITINO(mp) - bytes) >> 3; let "bytes" be (int)340, and "XFS_LITINO(mp)" be (unsigned long)336. on 64-bit platform, the expression is offset = ((unsigned long)336 - (int)340) >> 3 = (int)(0xfffffffffffffffcUL >> 3) = -1 but on 32-bit platform, the expression is offset = ((unsigned long)336 - (int)340) >> 3 = (int)(0xfffffffcUL >> 3) = 0x1fffffff instead. so offset becomes a large positive number on 32-bit platform, and cause xfs_attr_shortform_bytesfit() returns maxforkoff rather than 0. Therefore, one result is "ASSERT(new_size <= XFS_IFORK_SIZE(ip, whichfork));" assertion failure in xfs_idata_realloc(), which was also the root cause of the original bugreport from Dennis, see: https://bugzilla.redhat.com/show_bug.cgi?id=1894177 And it can also be manually triggered with the following commands: $ touch a; $ setfattr -n user.0 -v "`seq 0 80`" a; $ setfattr -n user.1 -v "`seq 0 80`" a on 32-bit platform. Fix the case in xfs_attr_shortform_bytesfit() by bailing out "XFS_LITINO(mp) < bytes" in advance suggested by Eric and a misleading comment together with this bugfix suggested by Darrick. It seems the other users of XFS_LITINO(mp) are not impacted. Fixes: e9e2eae89ddb ("xfs: only check the superblock version for dinode size calculation") Cc: # 5.7+ Reported-and-tested-by: Dennis Gilmore Reviewed-by: Christoph Hellwig Signed-off-by: Gao Xiang Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_attr_leaf.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index bb128db220ac..d6ef69ab1c67 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -515,7 +515,7 @@ xfs_attr_copy_value( *========================================================================*/ /* - * Query whether the requested number of additional bytes of extended + * Query whether the total requested number of attr fork bytes of extended * attribute space will be able to fit inline. * * Returns zero if not, else the di_forkoff fork offset to be used in the @@ -535,6 +535,12 @@ xfs_attr_shortform_bytesfit( int maxforkoff; int offset; + /* + * Check if the new size could fit at all first: + */ + if (bytes > XFS_LITINO(mp)) + return 0; + /* rounded down */ offset = (XFS_LITINO(mp) - bytes) >> 3; From 27c14b5daa82861220d6fa6e27b51f05f21ffaa7 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sat, 14 Nov 2020 09:59:22 -0800 Subject: [PATCH 653/710] xfs: ensure inobt record walks always make forward progress The aim of the inode btree record iterator function is to call a callback on every record in the btree. To avoid having to tear down and recreate the inode btree cursor around every callback, it caches a certain number of records in a memory buffer. After each batch of callback invocations, we have to perform a btree lookup to find the next record after where we left off. However, if the keys of the inode btree are corrupt, the lookup might put us in the wrong part of the inode btree, causing the walk function to loop forever. Therefore, we add extra cursor tracking to make sure that we never go backwards neither when performing the lookup nor when jumping to the next inobt record. This also fixes an off by one error where upon resume the lookup should have been for the inode /after/ the point at which we stopped. Found by fuzzing xfs/460 with keys[2].startino = ones causing bulkstat and quotacheck to hang. Fixes: a211432c27ff ("xfs: create simplified inode walk function") Signed-off-by: Darrick J. Wong Reviewed-by: Chandan Babu R --- fs/xfs/xfs_iwalk.c | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/fs/xfs/xfs_iwalk.c b/fs/xfs/xfs_iwalk.c index 233dcc8784db..2a45138831e3 100644 --- a/fs/xfs/xfs_iwalk.c +++ b/fs/xfs/xfs_iwalk.c @@ -55,6 +55,9 @@ struct xfs_iwalk_ag { /* Where do we start the traversal? */ xfs_ino_t startino; + /* What was the last inode number we saw when iterating the inobt? */ + xfs_ino_t lastino; + /* Array of inobt records we cache. */ struct xfs_inobt_rec_incore *recs; @@ -301,6 +304,9 @@ xfs_iwalk_ag_start( if (XFS_IS_CORRUPT(mp, *has_more != 1)) return -EFSCORRUPTED; + iwag->lastino = XFS_AGINO_TO_INO(mp, agno, + irec->ir_startino + XFS_INODES_PER_CHUNK - 1); + /* * If the LE lookup yielded an inobt record before the cursor position, * skip it and see if there's another one after it. @@ -347,15 +353,17 @@ xfs_iwalk_run_callbacks( struct xfs_mount *mp = iwag->mp; struct xfs_trans *tp = iwag->tp; struct xfs_inobt_rec_incore *irec; - xfs_agino_t restart; + xfs_agino_t next_agino; int error; + next_agino = XFS_INO_TO_AGINO(mp, iwag->lastino) + 1; + ASSERT(iwag->nr_recs > 0); /* Delete cursor but remember the last record we cached... */ xfs_iwalk_del_inobt(tp, curpp, agi_bpp, 0); irec = &iwag->recs[iwag->nr_recs - 1]; - restart = irec->ir_startino + XFS_INODES_PER_CHUNK - 1; + ASSERT(next_agino == irec->ir_startino + XFS_INODES_PER_CHUNK); error = xfs_iwalk_ag_recs(iwag); if (error) @@ -372,7 +380,7 @@ xfs_iwalk_run_callbacks( if (error) return error; - return xfs_inobt_lookup(*curpp, restart, XFS_LOOKUP_GE, has_more); + return xfs_inobt_lookup(*curpp, next_agino, XFS_LOOKUP_GE, has_more); } /* Walk all inodes in a single AG, from @iwag->startino to the end of the AG. */ @@ -396,6 +404,7 @@ xfs_iwalk_ag( while (!error && has_more) { struct xfs_inobt_rec_incore *irec; + xfs_ino_t rec_fsino; cond_resched(); if (xfs_pwork_want_abort(&iwag->pwork)) @@ -407,6 +416,15 @@ xfs_iwalk_ag( if (error || !has_more) break; + /* Make sure that we always move forward. */ + rec_fsino = XFS_AGINO_TO_INO(mp, agno, irec->ir_startino); + if (iwag->lastino != NULLFSINO && + XFS_IS_CORRUPT(mp, iwag->lastino >= rec_fsino)) { + error = -EFSCORRUPTED; + goto out; + } + iwag->lastino = rec_fsino + XFS_INODES_PER_CHUNK - 1; + /* No allocated inodes in this chunk; skip it. */ if (iwag->skip_empty && irec->ir_freecount == irec->ir_count) { error = xfs_btree_increment(cur, 0, &has_more); @@ -535,6 +553,7 @@ xfs_iwalk( .trim_start = 1, .skip_empty = 1, .pwork = XFS_PWORK_SINGLE_THREADED, + .lastino = NULLFSINO, }; xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, startino); int error; @@ -623,6 +642,7 @@ xfs_iwalk_threaded( iwag->data = data; iwag->startino = startino; iwag->sz_recs = xfs_iwalk_prefetch(inode_records); + iwag->lastino = NULLFSINO; xfs_pwork_queue(&pctl, &iwag->pwork); startino = XFS_AGINO_TO_INO(mp, agno + 1, 0); if (flags & XFS_INOBT_WALK_SAME_AG) @@ -696,6 +716,7 @@ xfs_inobt_walk( .startino = startino, .sz_recs = xfs_inobt_walk_prefetch(inobt_records), .pwork = XFS_PWORK_SINGLE_THREADED, + .lastino = NULLFSINO, }; xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, startino); int error; From 595189c25c28a55523354336bf24453242c81c15 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Wed, 18 Nov 2020 09:21:26 -0800 Subject: [PATCH 654/710] xfs: return corresponding errcode if xfs_initialize_perag() fail In xfs_initialize_perag(), if kmem_zalloc(), xfs_buf_hash_init(), or radix_tree_preload() failed, the returned value 'error' is not set accordingly. Reported-as-fixing: 8b26c5825e02 ("xfs: handle ENOMEM correctly during initialisation of perag structures") Fixes: 9b2471797942 ("xfs: cache unlinked pointers in an rhashtable") Reported-by: Hulk Robot Signed-off-by: Yu Kuai Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_mount.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 150ee5cb8645..7110507a2b6b 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -194,20 +194,25 @@ xfs_initialize_perag( } pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL); - if (!pag) + if (!pag) { + error = -ENOMEM; goto out_unwind_new_pags; + } pag->pag_agno = index; pag->pag_mount = mp; spin_lock_init(&pag->pag_ici_lock); INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC); - if (xfs_buf_hash_init(pag)) + + error = xfs_buf_hash_init(pag); + if (error) goto out_free_pag; init_waitqueue_head(&pag->pagb_wait); spin_lock_init(&pag->pagb_lock); pag->pagb_count = 0; pag->pagb_tree = RB_ROOT; - if (radix_tree_preload(GFP_NOFS)) + error = radix_tree_preload(GFP_NOFS); + if (error) goto out_hash_destroy; spin_lock(&mp->m_perag_lock); From 879ee8b6f2bae0cc4a25536f8841db1dbc969523 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 11 Nov 2020 12:54:34 -0800 Subject: [PATCH 655/710] ASOC: Intel: kbl_rt5663_rt5514_max98927: Do not try to disable disabled clock In kabylake_set_bias_level(), enabling mclk may fail if the clock has already been enabled by the firmware. Attempts to disable that clock later will fail with a warning backtrace. mclk already disabled WARNING: CPU: 2 PID: 108 at drivers/clk/clk.c:952 clk_core_disable+0x1b6/0x1cf ... Call Trace: clk_disable+0x2d/0x3a kabylake_set_bias_level+0x72/0xfd [snd_soc_kbl_rt5663_rt5514_max98927] snd_soc_card_set_bias_level+0x2b/0x6f snd_soc_dapm_set_bias_level+0xe1/0x209 dapm_pre_sequence_async+0x63/0x96 async_run_entry_fn+0x3d/0xd1 process_one_work+0x2a9/0x526 ... Only disable the clock if it has been enabled. Fixes: 15747a802075 ("ASoC: eve: implement set_bias_level function for rt5514") Cc: Brent Lu Cc: Curtis Malainey Signed-off-by: Guenter Roeck Link: https://lore.kernel.org/r/20201111205434.207610-1-linux@roeck-us.net Signed-off-by: Mark Brown --- sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c b/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c index 922cd0176e1f..f95546c184aa 100644 --- a/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c +++ b/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c @@ -700,6 +700,8 @@ static int kabylake_set_bias_level(struct snd_soc_card *card, switch (level) { case SND_SOC_BIAS_PREPARE: if (dapm->bias_level == SND_SOC_BIAS_ON) { + if (!__clk_is_enabled(priv->mclk)) + return 0; dev_dbg(card->dev, "Disable mclk"); clk_disable_unprepare(priv->mclk); } else { From 2ba546ebe0ce2af47833d8912ced9b4a579f13cb Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Wed, 18 Nov 2020 08:50:09 -0600 Subject: [PATCH 656/710] regulator: ti-abb: Fix array out of bound read access on the first transition At the start of driver initialization, we do not know what bias setting the bootloader has configured the system for and we only know for certain the very first time we do a transition. However, since the initial value of the comparison index is -EINVAL, this negative value results in an array out of bound access on the very first transition. Since we don't know what the setting is, we just set the bias configuration as there is nothing to compare against. This prevents the array out of bound access. NOTE: Even though we could use a more relaxed check of "< 0" the only valid values(ignoring cosmic ray induced bitflips) are -EINVAL, 0+. Fixes: 40b1936efebd ("regulator: Introduce TI Adaptive Body Bias(ABB) on-chip LDO driver") Link: https://lore.kernel.org/linux-mm/CA+G9fYuk4imvhyCN7D7T6PMDH6oNp6HDCRiTUKMQ6QXXjBa4ag@mail.gmail.com/ Reported-by: Naresh Kamboju Reviewed-by: Arnd Bergmann Signed-off-by: Nishanth Menon Link: https://lore.kernel.org/r/20201118145009.10492-1-nm@ti.com Signed-off-by: Mark Brown --- drivers/regulator/ti-abb-regulator.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/regulator/ti-abb-regulator.c b/drivers/regulator/ti-abb-regulator.c index 3e60bff76194..9f0a4d50cead 100644 --- a/drivers/regulator/ti-abb-regulator.c +++ b/drivers/regulator/ti-abb-regulator.c @@ -342,8 +342,17 @@ static int ti_abb_set_voltage_sel(struct regulator_dev *rdev, unsigned sel) return ret; } - /* If data is exactly the same, then just update index, no change */ info = &abb->info[sel]; + /* + * When Linux kernel is starting up, we are'nt sure of the + * Bias configuration that bootloader has configured. + * So, we get to know the actual setting the first time + * we are asked to transition. + */ + if (abb->current_info_idx == -EINVAL) + goto just_set_abb; + + /* If data is exactly the same, then just update index, no change */ oinfo = &abb->info[abb->current_info_idx]; if (!memcmp(info, oinfo, sizeof(*info))) { dev_dbg(dev, "%s: Same data new idx=%d, old idx=%d\n", __func__, @@ -351,6 +360,7 @@ static int ti_abb_set_voltage_sel(struct regulator_dev *rdev, unsigned sel) goto out; } +just_set_abb: ret = ti_abb_set_opp(rdev, abb, info); out: From df8d85d8c69d6837817e54dcb73c84a8b5a13877 Mon Sep 17 00:00:00 2001 From: Filip Moc Date: Tue, 17 Nov 2020 18:36:31 +0100 Subject: [PATCH 657/710] net: usb: qmi_wwan: Set DTR quirk for MR400 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit LTE module MR400 embedded in TL-MR6400 v4 requires DTR to be set. Signed-off-by: Filip Moc Acked-by: Bjørn Mork Link: https://lore.kernel.org/r/20201117173631.GA550981@moc6.cz Signed-off-by: Jakub Kicinski --- drivers/net/usb/qmi_wwan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 581ed51abb53..fc378ff56775 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1070,7 +1070,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x05c6, 0x9011, 4)}, {QMI_FIXED_INTF(0x05c6, 0x9021, 1)}, {QMI_FIXED_INTF(0x05c6, 0x9022, 2)}, - {QMI_FIXED_INTF(0x05c6, 0x9025, 4)}, /* Alcatel-sbell ASB TL131 TDD LTE (China Mobile) */ + {QMI_QUIRK_SET_DTR(0x05c6, 0x9025, 4)}, /* Alcatel-sbell ASB TL131 TDD LTE (China Mobile) */ {QMI_FIXED_INTF(0x05c6, 0x9026, 3)}, {QMI_FIXED_INTF(0x05c6, 0x902e, 5)}, {QMI_FIXED_INTF(0x05c6, 0x9031, 5)}, From a5ebcbdf34b65fcc07f38eaf2d60563b42619a59 Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Tue, 17 Nov 2020 10:45:05 +0800 Subject: [PATCH 658/710] ah6: fix error return code in ah6_input() Fix to return a negative error code from the error handling case instead of 0, as done elsewhere in this function. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Hulk Robot Signed-off-by: Zhang Changzhong Link: https://lore.kernel.org/r/1605581105-35295-1-git-send-email-zhangchangzhong@huawei.com Signed-off-by: Jakub Kicinski --- net/ipv6/ah6.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index d88d97617f7e..440080da805b 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -588,7 +588,8 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len); memset(ah->auth_data, 0, ahp->icv_trunc_len); - if (ipv6_clear_mutable_options(ip6h, hdr_len, XFRM_POLICY_IN)) + err = ipv6_clear_mutable_options(ip6h, hdr_len, XFRM_POLICY_IN); + if (err) goto out_free; ip6h->priority = 0; From 537a14726582c4e7bfe4dff9cb7fca19dc912cf6 Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Tue, 17 Nov 2020 10:55:21 +0800 Subject: [PATCH 659/710] atl1c: fix error return code in atl1c_probe() Fix to return a negative error code from the error handling case instead of 0, as done elsewhere in this function. Fixes: 43250ddd75a3 ("atl1c: Atheros L1C Gigabit Ethernet driver") Reported-by: Hulk Robot Signed-off-by: Zhang Changzhong Link: https://lore.kernel.org/r/1605581721-36028-1-git-send-email-zhangchangzhong@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/atheros/atl1c/atl1c_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c index 0c12cf7bda50..3f65f2b370c5 100644 --- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c +++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c @@ -2543,8 +2543,8 @@ static int atl1c_probe(struct pci_dev *pdev, const struct pci_device_id *ent) * various kernel subsystems to support the mechanics required by a * fixed-high-32-bit system. */ - if ((dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)) != 0) || - (dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32)) != 0)) { + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); + if (err) { dev_err(&pdev->dev, "No usable DMA configuration,aborting\n"); goto err_dma; } From 3a36060bf294e7b7e33c5dddcc4f5d2c1c834e56 Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Tue, 17 Nov 2020 10:57:55 +0800 Subject: [PATCH 660/710] atl1e: fix error return code in atl1e_probe() Fix to return a negative error code from the error handling case instead of 0, as done elsewhere in this function. Fixes: a6a5325239c2 ("atl1e: Atheros L1E Gigabit Ethernet driver") Reported-by: Hulk Robot Signed-off-by: Zhang Changzhong Link: https://lore.kernel.org/r/1605581875-36281-1-git-send-email-zhangchangzhong@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/atheros/atl1e/atl1e_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c index 098b0328e3cb..ff9f96de74b8 100644 --- a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c +++ b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c @@ -2312,8 +2312,8 @@ static int atl1e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) * various kernel subsystems to support the mechanics required by a * fixed-high-32-bit system. */ - if ((dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)) != 0) || - (dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32)) != 0)) { + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); + if (err) { dev_err(&pdev->dev, "No usable DMA configuration,aborting\n"); goto err_dma; } From 1532b9778478577152201adbafa7738b1e844868 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 16 Nov 2020 19:52:34 -0800 Subject: [PATCH 661/710] net: Have netpoll bring-up DSA management interface DSA network devices rely on having their DSA management interface up and running otherwise their ndo_open() will return -ENETDOWN. Without doing this it would not be possible to use DSA devices as netconsole when configured on the command line. These devices also do not utilize the upper/lower linking so the check about the netpoll device having upper is not going to be a problem. The solution adopted here is identical to the one done for net/ipv4/ipconfig.c with 728c02089a0e ("net: ipv4: handle DSA enabled master network devices"), with the network namespace scope being restricted to that of the process configuring netpoll. Fixes: 04ff53f96a93 ("net: dsa: Add netconsole support") Tested-by: Vladimir Oltean Signed-off-by: Florian Fainelli Link: https://lore.kernel.org/r/20201117035236.22658-1-f.fainelli@gmail.com Signed-off-by: Jakub Kicinski --- net/core/netpoll.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/net/core/netpoll.c b/net/core/netpoll.c index c310c7c1cef7..960948290001 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -657,15 +658,15 @@ EXPORT_SYMBOL_GPL(__netpoll_setup); int netpoll_setup(struct netpoll *np) { - struct net_device *ndev = NULL; + struct net_device *ndev = NULL, *dev = NULL; + struct net *net = current->nsproxy->net_ns; struct in_device *in_dev; int err; rtnl_lock(); - if (np->dev_name[0]) { - struct net *net = current->nsproxy->net_ns; + if (np->dev_name[0]) ndev = __dev_get_by_name(net, np->dev_name); - } + if (!ndev) { np_err(np, "%s doesn't exist, aborting\n", np->dev_name); err = -ENODEV; @@ -673,6 +674,19 @@ int netpoll_setup(struct netpoll *np) } dev_hold(ndev); + /* bring up DSA management network devices up first */ + for_each_netdev(net, dev) { + if (!netdev_uses_dsa(dev)) + continue; + + err = dev_change_flags(dev, dev->flags | IFF_UP, NULL); + if (err < 0) { + np_err(np, "%s failed to open %s\n", + np->dev_name, dev->name); + goto put; + } + } + if (netdev_master_upper_dev_get(ndev)) { np_err(np, "%s is a slave device, aborting\n", np->dev_name); err = -EBUSY; From fb738b99ef229bd3d25f1b3e5503925dba9b1a7c Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 17 Nov 2020 19:33:51 +0200 Subject: [PATCH 662/710] mlxsw: Fix firmware flashing The commit cited below moved firmware flashing functionality from mlxsw_spectrum to mlxsw_core, but did not adjust the Kconfig dependencies. This makes it possible to have mlxsw_core as built-in and mlxfw as a module. The mlxfw code is therefore not reachable from mlxsw_core and firmware flashing fails: # devlink dev flash pci/0000:01:00.0 file mellanox/mlxsw_spectrum-13.2008.1310.mfa2 devlink answers: Operation not supported Fix by having mlxsw_core select mlxfw. Fixes: b79cb787ac70 ("mlxsw: Move fw flashing code into core.c") Signed-off-by: Ido Schimmel Reported-by: Vadim Pasternak Tested-by: Vadim Pasternak Reviewed-by: Jiri Pirko Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/Kconfig b/drivers/net/ethernet/mellanox/mlxsw/Kconfig index 872e9910bb7c..a619d90559f7 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Kconfig +++ b/drivers/net/ethernet/mellanox/mlxsw/Kconfig @@ -6,6 +6,7 @@ config MLXSW_CORE tristate "Mellanox Technologies Switch ASICs support" select NET_DEVLINK + select MLXFW help This driver supports Mellanox Technologies Switch ASICs family. @@ -82,7 +83,6 @@ config MLXSW_SPECTRUM select GENERIC_ALLOCATOR select PARMAN select OBJAGG - select MLXFW imply PTP_1588_CLOCK select NET_PTP_CLASSIFY if PTP_1588_CLOCK default m From 1f492eab67bced119a0ac7db75ef2047e29a30c6 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 17 Nov 2020 19:33:52 +0200 Subject: [PATCH 663/710] mlxsw: core: Use variable timeout for EMAD retries The driver sends Ethernet Management Datagram (EMAD) packets to the device for configuration purposes and waits for up to 200ms for a reply. A request is retried up to 5 times. When the system is under heavy load, replies are not always processed in time and EMAD transactions fail. Make the process more robust to such delays by using exponential backoff. First wait for up to 200ms, then retransmit and wait for up to 400ms and so on. Fixes: caf7297e7ab5 ("mlxsw: core: Introduce support for asynchronous EMAD register access") Reported-by: Denis Yulevich Tested-by: Denis Yulevich Signed-off-by: Ido Schimmel Reviewed-by: Jiri Pirko Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 937b8e46f8c7..1a86535c4968 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -571,7 +571,8 @@ static void mlxsw_emad_trans_timeout_schedule(struct mlxsw_reg_trans *trans) if (trans->core->fw_flash_in_progress) timeout = msecs_to_jiffies(MLXSW_EMAD_TIMEOUT_DURING_FW_FLASH_MS); - queue_delayed_work(trans->core->emad_wq, &trans->timeout_dw, timeout); + queue_delayed_work(trans->core->emad_wq, &trans->timeout_dw, + timeout << trans->retries); } static int mlxsw_emad_transmit(struct mlxsw_core *mlxsw_core, From a3dcb3e7e70c72a68a79b30fc3a3adad5612731c Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Mon, 16 Nov 2020 08:43:01 -0800 Subject: [PATCH 664/710] net: dsa: mv88e6xxx: Wait for EEPROM done after HW reset When the switch is hardware reset, it reads the contents of the EEPROM. This can contain instructions for programming values into registers and to perform waits between such programming. Reading the EEPROM can take longer than the 100ms mv88e6xxx_hardware_reset() waits after deasserting the reset GPIO. So poll the EEPROM done bit to ensure it is complete. Signed-off-by: Andrew Lunn Signed-off-by: Ruslan Sushko Link: https://lore.kernel.org/r/20201116164301.977661-1-rus@sushko.dev Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mv88e6xxx/chip.c | 2 ++ drivers/net/dsa/mv88e6xxx/global1.c | 31 +++++++++++++++++++++++++++++ drivers/net/dsa/mv88e6xxx/global1.h | 1 + 3 files changed, 34 insertions(+) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index bd297ae7cf9e..34cca0a4b31c 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -2297,6 +2297,8 @@ static void mv88e6xxx_hardware_reset(struct mv88e6xxx_chip *chip) usleep_range(10000, 20000); gpiod_set_value_cansleep(gpiod, 0); usleep_range(10000, 20000); + + mv88e6xxx_g1_wait_eeprom_done(chip); } } diff --git a/drivers/net/dsa/mv88e6xxx/global1.c b/drivers/net/dsa/mv88e6xxx/global1.c index f62aa83ca08d..33d443a37efc 100644 --- a/drivers/net/dsa/mv88e6xxx/global1.c +++ b/drivers/net/dsa/mv88e6xxx/global1.c @@ -75,6 +75,37 @@ static int mv88e6xxx_g1_wait_init_ready(struct mv88e6xxx_chip *chip) return mv88e6xxx_g1_wait_bit(chip, MV88E6XXX_G1_STS, bit, 1); } +void mv88e6xxx_g1_wait_eeprom_done(struct mv88e6xxx_chip *chip) +{ + const unsigned long timeout = jiffies + 1 * HZ; + u16 val; + int err; + + /* Wait up to 1 second for the switch to finish reading the + * EEPROM. + */ + while (time_before(jiffies, timeout)) { + err = mv88e6xxx_g1_read(chip, MV88E6XXX_G1_STS, &val); + if (err) { + dev_err(chip->dev, "Error reading status"); + return; + } + + /* If the switch is still resetting, it may not + * respond on the bus, and so MDIO read returns + * 0xffff. Differentiate between that, and waiting for + * the EEPROM to be done by bit 0 being set. + */ + if (val != 0xffff && + val & BIT(MV88E6XXX_G1_STS_IRQ_EEPROM_DONE)) + return; + + usleep_range(1000, 2000); + } + + dev_err(chip->dev, "Timeout waiting for EEPROM done"); +} + /* Offset 0x01: Switch MAC Address Register Bytes 0 & 1 * Offset 0x02: Switch MAC Address Register Bytes 2 & 3 * Offset 0x03: Switch MAC Address Register Bytes 4 & 5 diff --git a/drivers/net/dsa/mv88e6xxx/global1.h b/drivers/net/dsa/mv88e6xxx/global1.h index 1e3546f8b072..e05abe61fa11 100644 --- a/drivers/net/dsa/mv88e6xxx/global1.h +++ b/drivers/net/dsa/mv88e6xxx/global1.h @@ -278,6 +278,7 @@ int mv88e6xxx_g1_set_switch_mac(struct mv88e6xxx_chip *chip, u8 *addr); int mv88e6185_g1_reset(struct mv88e6xxx_chip *chip); int mv88e6352_g1_reset(struct mv88e6xxx_chip *chip); int mv88e6250_g1_reset(struct mv88e6xxx_chip *chip); +void mv88e6xxx_g1_wait_eeprom_done(struct mv88e6xxx_chip *chip); int mv88e6185_g1_ppu_enable(struct mv88e6xxx_chip *chip); int mv88e6185_g1_ppu_disable(struct mv88e6xxx_chip *chip); From 2b3af2705645c87eee0f386e075871886fd429b3 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Tue, 17 Nov 2020 09:33:51 -0500 Subject: [PATCH 665/710] drm/amd/display: Always get CRTC updated constant values inside commit tail We recently improved our display atomic commit and tail sequence to avoid some issues related to concurrency. One of the major changes consisted of moving the interrupt disable and the stream release from our atomic commit to our atomic tail (commit 6d90a208cfff ("drm/amd/display: Move disable interrupt into commit tail")) . However, the new code introduced inside our commit tail function was inserted right after the function drm_atomic_helper_update_legacy_modeset_state(), which has routines for updating internal data structs related to timestamps. As a result, in certain conditions, the display module can reach a situation where we update our constants and, after that, clean it. This situation generates the following warning: amdgpu 0000:03:00.0: drm_WARN_ON_ONCE(drm_drv_uses_atomic_modeset(dev)) WARNING: CPU: 6 PID: 1269 at drivers/gpu/drm/drm_vblank.c:722 drm_crtc_vblank_helper_get_vblank_timestamp_internal+0x32b/0x340 [drm] ... RIP: 0010:drm_crtc_vblank_helper_get_vblank_timestamp_internal+0x32b/0x340 [drm] ... Call Trace: ? dc_stream_get_vblank_counter+0x57/0x60 [amdgpu] drm_crtc_vblank_helper_get_vblank_timestamp+0x1c/0x20 [drm] drm_get_last_vbltimestamp+0xad/0xc0 [drm] drm_reset_vblank_timestamp+0x63/0xd0 [drm] drm_crtc_vblank_on+0x85/0x150 [drm] amdgpu_dm_atomic_commit_tail+0xaf1/0x2330 [amdgpu] commit_tail+0x99/0x130 [drm_kms_helper] drm_atomic_helper_commit+0x123/0x150 [drm_kms_helper] amdgpu_dm_atomic_commit+0x11/0x20 [amdgpu] drm_atomic_commit+0x4a/0x50 [drm] drm_atomic_helper_set_config+0x7c/0xc0 [drm_kms_helper] drm_mode_setcrtc+0x20b/0x7e0 [drm] ? tomoyo_path_number_perm+0x6f/0x200 ? drm_mode_getcrtc+0x190/0x190 [drm] drm_ioctl_kernel+0xae/0xf0 [drm] drm_ioctl+0x245/0x400 [drm] ? drm_mode_getcrtc+0x190/0x190 [drm] amdgpu_drm_ioctl+0x4e/0x80 [amdgpu] __x64_sys_ioctl+0x91/0xc0 do_syscall_64+0x38/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xa9 ... For fixing this issue we rely upon a refactor introduced on drm_atomic_helper_update_legacy_modeset_state ("Remove the timestamping constant update from drm_atomic_helper_update_legacy_modeset_state()") which decouples constant values update from drm_atomic_helper_update_legacy_modeset_state to a new helper. Basically, this commit uses this new helper and place it right after our release module to avoid a situation where our CRTC struct gets wrong values. Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1373 Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1349 Reviewed-by: Harry Wentland Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index e93e18c06c0e..0e7118000919 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -7506,7 +7506,6 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) bool mode_set_reset_required = false; drm_atomic_helper_update_legacy_modeset_state(dev, state); - drm_atomic_helper_calc_timestamping_constants(state); dm_state = dm_atomic_get_new_state(state); if (dm_state && dm_state->context) { @@ -7533,6 +7532,8 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) } } + drm_atomic_helper_calc_timestamping_constants(state); + /* update changed items */ for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); From d8c19014bba8f565d8a2f1f46b4e38d1d97bf1a7 Mon Sep 17 00:00:00 2001 From: Dongli Zhang Date: Sun, 15 Nov 2020 12:10:29 -0800 Subject: [PATCH 666/710] page_frag: Recover from memory pressure The ethernet driver may allocate skb (and skb->data) via napi_alloc_skb(). This ends up to page_frag_alloc() to allocate skb->data from page_frag_cache->va. During the memory pressure, page_frag_cache->va may be allocated as pfmemalloc page. As a result, the skb->pfmemalloc is always true as skb->data is from page_frag_cache->va. The skb will be dropped if the sock (receiver) does not have SOCK_MEMALLOC. This is expected behaviour under memory pressure. However, once kernel is not under memory pressure any longer (suppose large amount of memory pages are just reclaimed), the page_frag_alloc() may still re-use the prior pfmemalloc page_frag_cache->va to allocate skb->data. As a result, the skb->pfmemalloc is always true unless page_frag_cache->va is re-allocated, even if the kernel is not under memory pressure any longer. Here is how kernel runs into issue. 1. The kernel is under memory pressure and allocation of PAGE_FRAG_CACHE_MAX_ORDER in __page_frag_cache_refill() will fail. Instead, the pfmemalloc page is allocated for page_frag_cache->va. 2: All skb->data from page_frag_cache->va (pfmemalloc) will have skb->pfmemalloc=true. The skb will always be dropped by sock without SOCK_MEMALLOC. This is an expected behaviour. 3. Suppose a large amount of pages are reclaimed and kernel is not under memory pressure any longer. We expect skb->pfmemalloc drop will not happen. 4. Unfortunately, page_frag_alloc() does not proactively re-allocate page_frag_alloc->va and will always re-use the prior pfmemalloc page. The skb->pfmemalloc is always true even kernel is not under memory pressure any longer. Fix this by freeing and re-allocating the page instead of recycling it. References: https://lore.kernel.org/lkml/20201103193239.1807-1-dongli.zhang@oracle.com/ References: https://lore.kernel.org/linux-mm/20201105042140.5253-1-willy@infradead.org/ Suggested-by: Matthew Wilcox (Oracle) Cc: Aruna Ramakrishna Cc: Bert Barbe Cc: Rama Nichanamatlu Cc: Venkat Venkatsubra Cc: Manjunath Patil Cc: Joe Jin Cc: SRINIVAS Fixes: 79930f5892e1 ("net: do not deplete pfmemalloc reserve") Signed-off-by: Dongli Zhang Acked-by: Vlastimil Babka Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20201115201029.11903-1-dongli.zhang@oracle.com Signed-off-by: Jakub Kicinski --- mm/page_alloc.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 23f5066bd4a5..eaa227a479e4 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5103,6 +5103,11 @@ refill: if (!page_ref_sub_and_test(page, nc->pagecnt_bias)) goto refill; + if (unlikely(nc->pfmemalloc)) { + free_the_page(page, compound_order(page)); + goto refill; + } + #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) /* if size can vary use size else just use PAGE_SIZE */ size = nc->size; From 6dceaa9f56e22d0f9b4c4ad2ed9e04e315ce7fe5 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 16 Nov 2020 17:21:14 +0100 Subject: [PATCH 667/710] atm: nicstar: Unmap DMA on send error The `skb' is mapped for DMA in ns_send() but does not unmap DMA in case push_scqe() fails to submit the `skb'. The memory of the `skb' is released so only the DMA mapping is leaking. Unmap the DMA mapping in case push_scqe() failed. Fixes: 864a3ff635fa7 ("atm: [nicstar] remove virt_to_bus() and support 64-bit platforms") Cc: Chas Williams <3chas3@gmail.com> Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Jakub Kicinski --- drivers/atm/nicstar.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c index 7af74fb450a0..09ad73361879 100644 --- a/drivers/atm/nicstar.c +++ b/drivers/atm/nicstar.c @@ -1706,6 +1706,8 @@ static int ns_send(struct atm_vcc *vcc, struct sk_buff *skb) if (push_scqe(card, vc, scq, &scqe, skb) != 0) { atomic_inc(&vcc->stats->tx_err); + dma_unmap_single(&card->pcidev->dev, NS_PRV_DMA(skb), skb->len, + DMA_TO_DEVICE); dev_kfree_skb_any(skb); return -EIO; } From 6d9c8d15af0ef20a66a0b432cac0d08319920602 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Wed, 18 Nov 2020 10:19:22 +0200 Subject: [PATCH 668/710] net/mlx4_core: Fix init_hca fields offset Slave function read the following capabilities from the wrong offset: 1. log_mc_entry_sz 2. fs_log_entry_sz 3. log_mc_hash_sz Fix that by adjusting these capabilities offset to match firmware layout. Due to the wrong offset read, the following issues might occur: 1+2. Negative value reported at max_mcast_qp_attach. 3. Driver to init FW with multicast hash size of zero. Fixes: a40ded604365 ("net/mlx4_core: Add masking for a few queries on HCA caps") Signed-off-by: Aya Levin Reviewed-by: Moshe Shemesh Reviewed-by: Eran Ben Elisha Signed-off-by: Tariq Toukan Link: https://lore.kernel.org/r/20201118081922.553-1-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx4/fw.c | 6 +++--- drivers/net/ethernet/mellanox/mlx4/fw.h | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index f6ff9620a137..f6cfec81ccc3 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -1864,8 +1864,8 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param) #define INIT_HCA_LOG_RD_OFFSET (INIT_HCA_QPC_OFFSET + 0x77) #define INIT_HCA_MCAST_OFFSET 0x0c0 #define INIT_HCA_MC_BASE_OFFSET (INIT_HCA_MCAST_OFFSET + 0x00) -#define INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x12) -#define INIT_HCA_LOG_MC_HASH_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x16) +#define INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x13) +#define INIT_HCA_LOG_MC_HASH_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x17) #define INIT_HCA_UC_STEERING_OFFSET (INIT_HCA_MCAST_OFFSET + 0x18) #define INIT_HCA_LOG_MC_TABLE_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x1b) #define INIT_HCA_DEVICE_MANAGED_FLOW_STEERING_EN 0x6 @@ -1873,7 +1873,7 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param) #define INIT_HCA_DRIVER_VERSION_SZ 0x40 #define INIT_HCA_FS_PARAM_OFFSET 0x1d0 #define INIT_HCA_FS_BASE_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x00) -#define INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x12) +#define INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x13) #define INIT_HCA_FS_A0_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x18) #define INIT_HCA_FS_LOG_TABLE_SZ_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x1b) #define INIT_HCA_FS_ETH_BITS_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x21) diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h index 650ae08c71de..8f020f26ebf5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.h +++ b/drivers/net/ethernet/mellanox/mlx4/fw.h @@ -182,8 +182,8 @@ struct mlx4_init_hca_param { u64 cmpt_base; u64 mtt_base; u64 global_caps; - u16 log_mc_entry_sz; - u16 log_mc_hash_sz; + u8 log_mc_entry_sz; + u8 log_mc_hash_sz; u16 hca_core_clock; /* Internal Clock Frequency (in MHz) */ u8 log_num_qps; u8 log_num_srqs; From d2e3fce9ddafe689c6f7cb355f23560637e30b9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 10 Nov 2020 23:04:47 +0200 Subject: [PATCH 669/710] drm/i915: Handle max_bpc==16 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit EDID can declare the maximum supported bpc up to 16, and apparently there are displays that do so. Currently we assume 12 bpc is tha max. Fix the assumption and toss in a MISSING_CASE() for any other value we don't expect to see. This fixes modesets with a display with EDID max bpc > 12. Previously any modeset would just silently fail on platforms that didn't otherwise limit this via the max_bpc property. In particular we don't add the max_bpc property to HDMI ports on gmch platforms, and thus we would see the raw max_bpc coming from the EDID. I suppose we could already adjust this to also allow 16bpc, but seeing as no current platform supports that there is little point. Cc: stable@vger.kernel.org Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2632 Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20201110210447.27454-1-ville.syrjala@linux.intel.com Reviewed-by: José Roberto de Souza (cherry picked from commit 2ca5a7b85b0c2b97ef08afbd7799b022e29f192e) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_display.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 31337d2a2cde..99e682563d47 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -12878,10 +12878,11 @@ compute_sink_pipe_bpp(const struct drm_connector_state *conn_state, case 10 ... 11: bpp = 10 * 3; break; - case 12: + case 12 ... 16: bpp = 12 * 3; break; default: + MISSING_CASE(conn_state->max_bpc); return -EINVAL; } From b4ca4354b42e59f13365a6901bdc5e729cf4adb4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 18 Nov 2020 13:38:39 +0000 Subject: [PATCH 670/710] drm/i915/gt: Remember to free the virtual breadcrumbs Since we allocate some breadcrumbs for the virtual engine, and the virtual engine has a custom destructor, we also need to free the breadcrumbs after use. Fixes: b3786b29379c ("drm/i915/gt: Distinguish the virtual breadcrumbs from the irq breadcrumbs") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20201118133839.1783-1-chris@chris-wilson.co.uk (cherry picked from commit 45e50f48b7907e650cfbbc7879abfe3a0c419c73) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_lrc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index f82c6dd1de18..9bb16bdf93cf 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -5457,6 +5457,7 @@ static void virtual_context_destroy(struct kref *kref) __execlists_context_fini(&ve->context); intel_context_fini(&ve->context); + intel_breadcrumbs_free(ve->base.breadcrumbs); intel_engine_free_request_pool(&ve->base); kfree(ve->bonds); From 3645a34f5b962aeedeb02f30cdf048eaae9b5f5c Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 19 Nov 2020 13:51:19 +0800 Subject: [PATCH 671/710] iommu/vt-d: Fix compile error with CONFIG_PCI_ATS not set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the compile error below (CONFIG_PCI_ATS not set): drivers/iommu/intel/dmar.c: In function ‘vf_inherit_msi_domain’: drivers/iommu/intel/dmar.c:338:59: error: ‘struct pci_dev’ has no member named ‘physfn’; did you mean ‘is_physfn’? 338 | dev_set_msi_domain(&pdev->dev, dev_get_msi_domain(&pdev->physfn->dev)); | ^~~~~~ | is_physfn Fixes: ff828729be44 ("iommu/vt-d: Cure VF irqdomain hickup") Reported-by: Geert Uytterhoeven Signed-off-by: Lu Baolu Cc: Thomas Gleixner Link: https://lore.kernel.org/linux-iommu/CAMuHMdXA7wfJovmfSH2nbAhN0cPyCiFHodTvg4a8Hm9rx5Dj-w@mail.gmail.com/ Link: https://lore.kernel.org/r/20201119055119.2862701-1-baolu.lu@linux.intel.com Signed-off-by: Will Deacon --- drivers/iommu/intel/dmar.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index b2e804473209..11319e4dce4a 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -335,7 +335,9 @@ static void dmar_pci_bus_del_dev(struct dmar_pci_notify_info *info) static inline void vf_inherit_msi_domain(struct pci_dev *pdev) { - dev_set_msi_domain(&pdev->dev, dev_get_msi_domain(&pdev->physfn->dev)); + struct pci_dev *physfn = pci_physfn(pdev); + + dev_set_msi_domain(&pdev->dev, dev_get_msi_domain(&physfn->dev)); } static int dmar_pci_bus_notifier(struct notifier_block *nb, From 91c2c28d8de34815ea9bb4d16e9db7308ad33d3e Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 19 Nov 2020 11:12:17 +0000 Subject: [PATCH 672/710] MAINTAINERS: Temporarily add myself to the IOMMU entry Joerg is recovering from an injury, so temporarily add myself to the IOMMU MAINTAINERS entry so that I'm more likely to get CC'd on patches while I help to look after the tree for him. Suggested-by: Joerg Roedel Link: https://lore.kernel.org/r/20201117100953.GR22888@8bytes.org Signed-off-by: Will Deacon --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 3da6d8c154e4..9f60b7a25136 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9171,6 +9171,7 @@ F: include/linux/iomap.h IOMMU DRIVERS M: Joerg Roedel +M: Will Deacon L: iommu@lists.linux-foundation.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git From 551310e7356cb8af4eb4c618961ad1e7b2f89e19 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 19 Nov 2020 13:04:04 +0100 Subject: [PATCH 673/710] ALSA: hda/ca0132: Fix compile warning without PCI CONFIG_PCI=n leads to a compile warning like: sound/pci/hda/patch_ca0132.c:8214:10: warning: no case matching constant switch condition '0' due to the missed handling of QUIRK_NONE in ca0132_mmio_init(). Fix it. Fixes: bf2aa9ccc8e5 ("ALSA: hda/ca0132 - Cleanup ca0132_mmio_init function.") Reported-by: kernel test robot Link: https://lore.kernel.org/r/20201119120404.16833-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_ca0132.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c index e0c38f2735c6..d8370a417e3d 100644 --- a/sound/pci/hda/patch_ca0132.c +++ b/sound/pci/hda/patch_ca0132.c @@ -9183,6 +9183,8 @@ static void ca0132_mmio_init(struct hda_codec *codec) case QUIRK_AE5: ca0132_mmio_init_ae5(codec); break; + default: + break; } } From fcb48454c23c5679d1a2e252f127642e91b05cbe Mon Sep 17 00:00:00 2001 From: Russell Currey Date: Tue, 17 Nov 2020 16:59:11 +1100 Subject: [PATCH 674/710] selftests/powerpc: rfi_flush: disable entry flush if present We are about to add an entry flush. The rfi (exit) flush test measures the number of L1D flushes over a syscall with the RFI flush enabled and disabled. But if the entry flush is also enabled, the effect of enabling and disabling the RFI flush is masked. If there is a debugfs entry for the entry flush, disable it during the RFI flush and restore it later. Reported-by: Spoorthy S Signed-off-by: Russell Currey Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman --- .../selftests/powerpc/security/rfi_flush.c | 35 +++++++++++++++---- 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/powerpc/security/rfi_flush.c b/tools/testing/selftests/powerpc/security/rfi_flush.c index 93a65bd1f231..4e7b2776c367 100644 --- a/tools/testing/selftests/powerpc/security/rfi_flush.c +++ b/tools/testing/selftests/powerpc/security/rfi_flush.c @@ -85,19 +85,33 @@ int rfi_flush_test(void) __u64 l1d_misses_total = 0; unsigned long iterations = 100000, zero_size = 24 * 1024; unsigned long l1d_misses_expected; - int rfi_flush_org, rfi_flush; + int rfi_flush_orig, rfi_flush; + int have_entry_flush, entry_flush_orig; SKIP_IF(geteuid() != 0); // The PMU event we use only works on Power7 or later SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06)); - if (read_debugfs_file("powerpc/rfi_flush", &rfi_flush_org)) { + if (read_debugfs_file("powerpc/rfi_flush", &rfi_flush_orig) < 0) { perror("Unable to read powerpc/rfi_flush debugfs file"); SKIP_IF(1); } - rfi_flush = rfi_flush_org; + if (read_debugfs_file("powerpc/entry_flush", &entry_flush_orig) < 0) { + have_entry_flush = 0; + } else { + have_entry_flush = 1; + + if (entry_flush_orig != 0) { + if (write_debugfs_file("powerpc/entry_flush", 0) < 0) { + perror("error writing to powerpc/entry_flush debugfs file"); + return 1; + } + } + } + + rfi_flush = rfi_flush_orig; fd = perf_event_open_counter(PERF_TYPE_RAW, /* L1d miss */ 0x400f0, -1); FAIL_IF(fd < 0); @@ -106,6 +120,7 @@ int rfi_flush_test(void) FAIL_IF(perf_event_enable(fd)); + // disable L1 prefetching set_dscr(1); iter = repetitions; @@ -147,8 +162,8 @@ again: repetitions * l1d_misses_expected / 2, passes, repetitions); - if (rfi_flush == rfi_flush_org) { - rfi_flush = !rfi_flush_org; + if (rfi_flush == rfi_flush_orig) { + rfi_flush = !rfi_flush_orig; if (write_debugfs_file("powerpc/rfi_flush", rfi_flush) < 0) { perror("error writing to powerpc/rfi_flush debugfs file"); return 1; @@ -164,11 +179,19 @@ again: set_dscr(0); - if (write_debugfs_file("powerpc/rfi_flush", rfi_flush_org) < 0) { + if (write_debugfs_file("powerpc/rfi_flush", rfi_flush_orig) < 0) { perror("unable to restore original value of powerpc/rfi_flush debugfs file"); return 1; } + if (have_entry_flush) { + if (write_debugfs_file("powerpc/entry_flush", entry_flush_orig) < 0) { + perror("unable to restore original value of powerpc/entry_flush " + "debugfs file"); + return 1; + } + } + return rc; } From f79643787e0a0762d2409b7b8334e83f22d85695 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 17 Nov 2020 16:59:12 +1100 Subject: [PATCH 675/710] powerpc/64s: flush L1D on kernel entry IBM Power9 processors can speculatively operate on data in the L1 cache before it has been completely validated, via a way-prediction mechanism. It is not possible for an attacker to determine the contents of impermissible memory using this method, since these systems implement a combination of hardware and software security measures to prevent scenarios where protected data could be leaked. However these measures don't address the scenario where an attacker induces the operating system to speculatively execute instructions using data that the attacker controls. This can be used for example to speculatively bypass "kernel user access prevention" techniques, as discovered by Anthony Steinhauser of Google's Safeside Project. This is not an attack by itself, but there is a possibility it could be used in conjunction with side-channels or other weaknesses in the privileged code to construct an attack. This issue can be mitigated by flushing the L1 cache between privilege boundaries of concern. This patch flushes the L1 cache on kernel entry. This is part of the fix for CVE-2020-4788. Signed-off-by: Nicholas Piggin Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman --- .../admin-guide/kernel-parameters.txt | 3 + arch/powerpc/include/asm/exception-64s.h | 9 ++- arch/powerpc/include/asm/feature-fixups.h | 10 ++++ arch/powerpc/include/asm/security_features.h | 4 ++ arch/powerpc/include/asm/setup.h | 3 + arch/powerpc/kernel/exceptions-64s.S | 37 ++++++++++++ arch/powerpc/kernel/setup_64.c | 60 ++++++++++++++++++- arch/powerpc/kernel/vmlinux.lds.S | 7 +++ arch/powerpc/lib/feature-fixups.c | 54 +++++++++++++++++ arch/powerpc/platforms/powernv/setup.c | 11 ++++ arch/powerpc/platforms/pseries/setup.c | 4 ++ 11 files changed, 200 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 526d65d8573a..383889fe771f 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2858,6 +2858,7 @@ mds=off [X86] tsx_async_abort=off [X86] kvm.nx_huge_pages=off [X86] + no_entry_flush [PPC] Exceptions: This does not have any effect on @@ -3186,6 +3187,8 @@ noefi Disable EFI runtime services support. + no_entry_flush [PPC] Don't flush the L1-D cache when entering the kernel. + noexec [IA-64] noexec [X86] diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index ebe95aa04d53..83fa88bc9935 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -57,11 +57,18 @@ nop; \ nop +#define ENTRY_FLUSH_SLOT \ + ENTRY_FLUSH_FIXUP_SECTION; \ + nop; \ + nop; \ + nop; + /* * r10 must be free to use, r13 must be paca */ #define INTERRUPT_TO_KERNEL \ - STF_ENTRY_BARRIER_SLOT + STF_ENTRY_BARRIER_SLOT; \ + ENTRY_FLUSH_SLOT /* * Macros for annotating the expected destination of (h)rfid diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h index b0af97add751..06a48219bbf2 100644 --- a/arch/powerpc/include/asm/feature-fixups.h +++ b/arch/powerpc/include/asm/feature-fixups.h @@ -205,6 +205,14 @@ label##3: \ FTR_ENTRY_OFFSET 955b-956b; \ .popsection; +#define ENTRY_FLUSH_FIXUP_SECTION \ +957: \ + .pushsection __entry_flush_fixup,"a"; \ + .align 2; \ +958: \ + FTR_ENTRY_OFFSET 957b-958b; \ + .popsection; + #define RFI_FLUSH_FIXUP_SECTION \ 951: \ .pushsection __rfi_flush_fixup,"a"; \ @@ -237,8 +245,10 @@ label##3: \ #include extern long stf_barrier_fallback; +extern long entry_flush_fallback; extern long __start___stf_entry_barrier_fixup, __stop___stf_entry_barrier_fixup; extern long __start___stf_exit_barrier_fixup, __stop___stf_exit_barrier_fixup; +extern long __start___entry_flush_fixup, __stop___entry_flush_fixup; extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup; extern long __start___barrier_nospec_fixup, __stop___barrier_nospec_fixup; extern long __start__btb_flush_fixup, __stop__btb_flush_fixup; diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h index fbb8fa32150f..9e7459d2edca 100644 --- a/arch/powerpc/include/asm/security_features.h +++ b/arch/powerpc/include/asm/security_features.h @@ -86,12 +86,16 @@ static inline bool security_ftr_enabled(u64 feature) // Software required to flush link stack on context switch #define SEC_FTR_FLUSH_LINK_STACK 0x0000000000001000ull +// The L1-D cache should be flushed when entering the kernel +#define SEC_FTR_L1D_FLUSH_ENTRY 0x0000000000004000ull + // Features enabled by default #define SEC_FTR_DEFAULT \ (SEC_FTR_L1D_FLUSH_HV | \ SEC_FTR_L1D_FLUSH_PR | \ SEC_FTR_BNDS_CHK_SPEC_BAR | \ + SEC_FTR_L1D_FLUSH_ENTRY | \ SEC_FTR_FAVOUR_SECURITY) #endif /* _ASM_POWERPC_SECURITY_FEATURES_H */ diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index 9efbddee2bca..e2fcd3e874f8 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -52,12 +52,15 @@ enum l1d_flush_type { }; void setup_rfi_flush(enum l1d_flush_type, bool enable); +void setup_entry_flush(bool enable); +void setup_uaccess_flush(bool enable); void do_rfi_flush_fixups(enum l1d_flush_type types); #ifdef CONFIG_PPC_BARRIER_NOSPEC void setup_barrier_nospec(void); #else static inline void setup_barrier_nospec(void) { }; #endif +void do_entry_flush_fixups(enum l1d_flush_type types); void do_barrier_nospec_fixups(bool enable); extern bool barrier_nospec_enabled; diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index f7d748b88705..5577dd887d37 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -2951,6 +2951,43 @@ TRAMP_REAL_BEGIN(stf_barrier_fallback) .endr blr +TRAMP_REAL_BEGIN(entry_flush_fallback) + std r9,PACA_EXRFI+EX_R9(r13) + std r10,PACA_EXRFI+EX_R10(r13) + std r11,PACA_EXRFI+EX_R11(r13) + mfctr r9 + ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13) + ld r11,PACA_L1D_FLUSH_SIZE(r13) + srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */ + mtctr r11 + DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */ + + /* order ld/st prior to dcbt stop all streams with flushing */ + sync + + /* + * The load addresses are at staggered offsets within cachelines, + * which suits some pipelines better (on others it should not + * hurt). + */ +1: + ld r11,(0x80 + 8)*0(r10) + ld r11,(0x80 + 8)*1(r10) + ld r11,(0x80 + 8)*2(r10) + ld r11,(0x80 + 8)*3(r10) + ld r11,(0x80 + 8)*4(r10) + ld r11,(0x80 + 8)*5(r10) + ld r11,(0x80 + 8)*6(r10) + ld r11,(0x80 + 8)*7(r10) + addi r10,r10,0x80*8 + bdnz 1b + + mtctr r9 + ld r9,PACA_EXRFI+EX_R9(r13) + ld r10,PACA_EXRFI+EX_R10(r13) + ld r11,PACA_EXRFI+EX_R11(r13) + blr + TRAMP_REAL_BEGIN(rfi_flush_fallback) SET_SCRATCH0(r13); GET_PACA(r13); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index bb9cab3641d7..7d447b43ad13 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -945,7 +945,9 @@ early_initcall(disable_hardlockup_detector); static enum l1d_flush_type enabled_flush_types; static void *l1d_flush_fallback_area; static bool no_rfi_flush; +static bool no_entry_flush; bool rfi_flush; +bool entry_flush; static int __init handle_no_rfi_flush(char *p) { @@ -955,6 +957,14 @@ static int __init handle_no_rfi_flush(char *p) } early_param("no_rfi_flush", handle_no_rfi_flush); +static int __init handle_no_entry_flush(char *p) +{ + pr_info("entry-flush: disabled on command line."); + no_entry_flush = true; + return 0; +} +early_param("no_entry_flush", handle_no_entry_flush); + /* * The RFI flush is not KPTI, but because users will see doco that says to use * nopti we hijack that option here to also disable the RFI flush. @@ -986,6 +996,18 @@ void rfi_flush_enable(bool enable) rfi_flush = enable; } +void entry_flush_enable(bool enable) +{ + if (enable) { + do_entry_flush_fixups(enabled_flush_types); + on_each_cpu(do_nothing, NULL, 1); + } else { + do_entry_flush_fixups(L1D_FLUSH_NONE); + } + + entry_flush = enable; +} + static void __ref init_fallback_flush(void) { u64 l1d_size, limit; @@ -1044,10 +1066,19 @@ void setup_rfi_flush(enum l1d_flush_type types, bool enable) enabled_flush_types = types; - if (!no_rfi_flush && !cpu_mitigations_off()) + if (!cpu_mitigations_off() && !no_rfi_flush) rfi_flush_enable(enable); } +void setup_entry_flush(bool enable) +{ + if (cpu_mitigations_off()) + return; + + if (!no_entry_flush) + entry_flush_enable(enable); +} + #ifdef CONFIG_DEBUG_FS static int rfi_flush_set(void *data, u64 val) { @@ -1075,9 +1106,36 @@ static int rfi_flush_get(void *data, u64 *val) DEFINE_SIMPLE_ATTRIBUTE(fops_rfi_flush, rfi_flush_get, rfi_flush_set, "%llu\n"); +static int entry_flush_set(void *data, u64 val) +{ + bool enable; + + if (val == 1) + enable = true; + else if (val == 0) + enable = false; + else + return -EINVAL; + + /* Only do anything if we're changing state */ + if (enable != entry_flush) + entry_flush_enable(enable); + + return 0; +} + +static int entry_flush_get(void *data, u64 *val) +{ + *val = entry_flush ? 1 : 0; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_entry_flush, entry_flush_get, entry_flush_set, "%llu\n"); + static __init int rfi_flush_debugfs_init(void) { debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush); + debugfs_create_file("entry_flush", 0600, powerpc_debugfs_root, NULL, &fops_entry_flush); return 0; } device_initcall(rfi_flush_debugfs_init); diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index e0548b4950de..715ec4d4614a 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -131,6 +131,13 @@ SECTIONS __stop___stf_entry_barrier_fixup = .; } + . = ALIGN(8); + __entry_flush_fixup : AT(ADDR(__entry_flush_fixup) - LOAD_OFFSET) { + __start___entry_flush_fixup = .; + *(__entry_flush_fixup) + __stop___entry_flush_fixup = .; + } + . = ALIGN(8); __stf_exit_barrier_fixup : AT(ADDR(__stf_exit_barrier_fixup) - LOAD_OFFSET) { __start___stf_exit_barrier_fixup = .; diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 4c0a7ee9fa00..70e83cfd74aa 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -234,6 +234,60 @@ void do_stf_barrier_fixups(enum stf_barrier_type types) do_stf_exit_barrier_fixups(types); } +void do_entry_flush_fixups(enum l1d_flush_type types) +{ + unsigned int instrs[3], *dest; + long *start, *end; + int i; + + start = PTRRELOC(&__start___entry_flush_fixup); + end = PTRRELOC(&__stop___entry_flush_fixup); + + instrs[0] = 0x60000000; /* nop */ + instrs[1] = 0x60000000; /* nop */ + instrs[2] = 0x60000000; /* nop */ + + i = 0; + if (types == L1D_FLUSH_FALLBACK) { + instrs[i++] = 0x7d4802a6; /* mflr r10 */ + instrs[i++] = 0x60000000; /* branch patched below */ + instrs[i++] = 0x7d4803a6; /* mtlr r10 */ + } + + if (types & L1D_FLUSH_ORI) { + instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */ + instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/ + } + + if (types & L1D_FLUSH_MTTRIG) + instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */ + + for (i = 0; start < end; start++, i++) { + dest = (void *)start + *start; + + pr_devel("patching dest %lx\n", (unsigned long)dest); + + patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); + + if (types == L1D_FLUSH_FALLBACK) + patch_branch((struct ppc_inst *)(dest + 1), (unsigned long)&entry_flush_fallback, + BRANCH_SET_LINK); + else + patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1])); + + patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); + } + + printk(KERN_DEBUG "entry-flush: patched %d locations (%s flush)\n", i, + (types == L1D_FLUSH_NONE) ? "no" : + (types == L1D_FLUSH_FALLBACK) ? "fallback displacement" : + (types & L1D_FLUSH_ORI) ? (types & L1D_FLUSH_MTTRIG) + ? "ori+mttrig type" + : "ori type" : + (types & L1D_FLUSH_MTTRIG) ? "mttrig type" + : "unknown"); +} + void do_rfi_flush_fixups(enum l1d_flush_type types) { unsigned int instrs[3], *dest; diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 9acaa0f131b9..d04a085c423d 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -122,12 +122,23 @@ static void pnv_setup_rfi_flush(void) type = L1D_FLUSH_ORI; } + /* + * If we are non-Power9 bare metal, we don't need to flush on kernel + * entry: it fixes a P9 specific vulnerability. + */ + if (!pvr_version_is(PVR_POWER9)) + security_ftr_clear(SEC_FTR_L1D_FLUSH_ENTRY); + enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && \ (security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR) || \ security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV)); setup_rfi_flush(type, enable); setup_count_cache_flush(); + + enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && + security_ftr_enabled(SEC_FTR_L1D_FLUSH_ENTRY); + setup_entry_flush(enable); } static void __init pnv_check_guarded_cores(void) diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 633c45ec406d..8136c5368ee4 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -579,6 +579,10 @@ void pseries_setup_rfi_flush(void) setup_rfi_flush(types, enable); setup_count_cache_flush(); + + enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && + security_ftr_enabled(SEC_FTR_L1D_FLUSH_ENTRY); + setup_entry_flush(enable); } #ifdef CONFIG_PCI_IOV From 9a32a7e78bd0cd9a9b6332cbdc345ee5ffd0c5de Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 17 Nov 2020 16:59:13 +1100 Subject: [PATCH 676/710] powerpc/64s: flush L1D after user accesses IBM Power9 processors can speculatively operate on data in the L1 cache before it has been completely validated, via a way-prediction mechanism. It is not possible for an attacker to determine the contents of impermissible memory using this method, since these systems implement a combination of hardware and software security measures to prevent scenarios where protected data could be leaked. However these measures don't address the scenario where an attacker induces the operating system to speculatively execute instructions using data that the attacker controls. This can be used for example to speculatively bypass "kernel user access prevention" techniques, as discovered by Anthony Steinhauser of Google's Safeside Project. This is not an attack by itself, but there is a possibility it could be used in conjunction with side-channels or other weaknesses in the privileged code to construct an attack. This issue can be mitigated by flushing the L1 cache between privilege boundaries of concern. This patch flushes the L1 cache after user accesses. This is part of the fix for CVE-2020-4788. Signed-off-by: Nicholas Piggin Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman --- .../admin-guide/kernel-parameters.txt | 4 + .../powerpc/include/asm/book3s/64/kup-radix.h | 66 ++++++++------ arch/powerpc/include/asm/exception-64s.h | 3 + arch/powerpc/include/asm/feature-fixups.h | 9 ++ arch/powerpc/include/asm/kup.h | 19 +++-- arch/powerpc/include/asm/security_features.h | 3 + arch/powerpc/include/asm/setup.h | 1 + arch/powerpc/kernel/exceptions-64s.S | 85 ++++++------------- arch/powerpc/kernel/setup_64.c | 62 ++++++++++++++ arch/powerpc/kernel/vmlinux.lds.S | 7 ++ arch/powerpc/lib/feature-fixups.c | 50 +++++++++++ arch/powerpc/platforms/powernv/setup.c | 10 ++- arch/powerpc/platforms/pseries/setup.c | 4 + 13 files changed, 233 insertions(+), 90 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 383889fe771f..44fde25bb221 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2859,6 +2859,7 @@ tsx_async_abort=off [X86] kvm.nx_huge_pages=off [X86] no_entry_flush [PPC] + no_uaccess_flush [PPC] Exceptions: This does not have any effect on @@ -3238,6 +3239,9 @@ nospec_store_bypass_disable [HW] Disable all mitigations for the Speculative Store Bypass vulnerability + no_uaccess_flush + [PPC] Don't flush the L1-D cache after accessing user data. + noxsave [BUGS=X86] Disables x86 extended register state save and restore using xsave. The kernel will fallback to enabling legacy floating-point and sse state. diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h index 3ee1ec60be84..97c2394e7dea 100644 --- a/arch/powerpc/include/asm/book3s/64/kup-radix.h +++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h @@ -61,6 +61,8 @@ #else /* !__ASSEMBLY__ */ +DECLARE_STATIC_KEY_FALSE(uaccess_flush_key); + #ifdef CONFIG_PPC_KUAP #include @@ -103,8 +105,16 @@ static inline void kuap_check_amr(void) static inline unsigned long get_kuap(void) { + /* + * We return AMR_KUAP_BLOCKED when we don't support KUAP because + * prevent_user_access_return needs to return AMR_KUAP_BLOCKED to + * cause restore_user_access to do a flush. + * + * This has no effect in terms of actually blocking things on hash, + * so it doesn't break anything. + */ if (!early_mmu_has_feature(MMU_FTR_RADIX_KUAP)) - return 0; + return AMR_KUAP_BLOCKED; return mfspr(SPRN_AMR); } @@ -123,6 +133,31 @@ static inline void set_kuap(unsigned long value) isync(); } +static inline bool +bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) +{ + return WARN(mmu_has_feature(MMU_FTR_RADIX_KUAP) && + (regs->kuap & (is_write ? AMR_KUAP_BLOCK_WRITE : AMR_KUAP_BLOCK_READ)), + "Bug: %s fault blocked by AMR!", is_write ? "Write" : "Read"); +} +#else /* CONFIG_PPC_KUAP */ +static inline void kuap_restore_amr(struct pt_regs *regs, unsigned long amr) { } + +static inline unsigned long kuap_get_and_check_amr(void) +{ + return 0UL; +} + +static inline void kuap_check_amr(void) { } + +static inline unsigned long get_kuap(void) +{ + return AMR_KUAP_BLOCKED; +} + +static inline void set_kuap(unsigned long value) { } +#endif /* !CONFIG_PPC_KUAP */ + static __always_inline void allow_user_access(void __user *to, const void __user *from, unsigned long size, unsigned long dir) { @@ -142,6 +177,8 @@ static inline void prevent_user_access(void __user *to, const void __user *from, unsigned long size, unsigned long dir) { set_kuap(AMR_KUAP_BLOCKED); + if (static_branch_unlikely(&uaccess_flush_key)) + do_uaccess_flush(); } static inline unsigned long prevent_user_access_return(void) @@ -149,6 +186,8 @@ static inline unsigned long prevent_user_access_return(void) unsigned long flags = get_kuap(); set_kuap(AMR_KUAP_BLOCKED); + if (static_branch_unlikely(&uaccess_flush_key)) + do_uaccess_flush(); return flags; } @@ -156,30 +195,9 @@ static inline unsigned long prevent_user_access_return(void) static inline void restore_user_access(unsigned long flags) { set_kuap(flags); + if (static_branch_unlikely(&uaccess_flush_key) && flags == AMR_KUAP_BLOCKED) + do_uaccess_flush(); } - -static inline bool -bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) -{ - return WARN(mmu_has_feature(MMU_FTR_RADIX_KUAP) && - (regs->kuap & (is_write ? AMR_KUAP_BLOCK_WRITE : AMR_KUAP_BLOCK_READ)), - "Bug: %s fault blocked by AMR!", is_write ? "Write" : "Read"); -} -#else /* CONFIG_PPC_KUAP */ -static inline void kuap_restore_amr(struct pt_regs *regs, unsigned long amr) -{ -} - -static inline void kuap_check_amr(void) -{ -} - -static inline unsigned long kuap_get_and_check_amr(void) -{ - return 0; -} -#endif /* CONFIG_PPC_KUAP */ - #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_BOOK3S_64_KUP_RADIX_H */ diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 83fa88bc9935..1d32b174ab6a 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -144,6 +144,9 @@ RFSCV; \ b rfscv_flush_fallback +#else /* __ASSEMBLY__ */ +/* Prototype for function defined in exceptions-64s.S */ +void do_uaccess_flush(void); #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_EXCEPTION_H */ diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h index 06a48219bbf2..fbd406cd6916 100644 --- a/arch/powerpc/include/asm/feature-fixups.h +++ b/arch/powerpc/include/asm/feature-fixups.h @@ -205,6 +205,14 @@ label##3: \ FTR_ENTRY_OFFSET 955b-956b; \ .popsection; +#define UACCESS_FLUSH_FIXUP_SECTION \ +959: \ + .pushsection __uaccess_flush_fixup,"a"; \ + .align 2; \ +960: \ + FTR_ENTRY_OFFSET 959b-960b; \ + .popsection; + #define ENTRY_FLUSH_FIXUP_SECTION \ 957: \ .pushsection __entry_flush_fixup,"a"; \ @@ -248,6 +256,7 @@ extern long stf_barrier_fallback; extern long entry_flush_fallback; extern long __start___stf_entry_barrier_fixup, __stop___stf_entry_barrier_fixup; extern long __start___stf_exit_barrier_fixup, __stop___stf_exit_barrier_fixup; +extern long __start___uaccess_flush_fixup, __stop___uaccess_flush_fixup; extern long __start___entry_flush_fixup, __stop___entry_flush_fixup; extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup; extern long __start___barrier_nospec_fixup, __stop___barrier_nospec_fixup; diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h index 1d0f7d838b2e..0f5c606ae057 100644 --- a/arch/powerpc/include/asm/kup.h +++ b/arch/powerpc/include/asm/kup.h @@ -53,17 +53,26 @@ static inline void setup_kuep(bool disabled) { } void setup_kuap(bool disabled); #else static inline void setup_kuap(bool disabled) { } + +static inline bool +bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) +{ + return false; +} + +/* + * book3s/64/kup-radix.h defines these functions for the !KUAP case to flush + * the L1D cache after user accesses. Only include the empty stubs for other + * platforms. + */ +#ifndef CONFIG_PPC64 static inline void allow_user_access(void __user *to, const void __user *from, unsigned long size, unsigned long dir) { } static inline void prevent_user_access(void __user *to, const void __user *from, unsigned long size, unsigned long dir) { } static inline unsigned long prevent_user_access_return(void) { return 0UL; } static inline void restore_user_access(unsigned long flags) { } -static inline bool -bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) -{ - return false; -} +#endif /* CONFIG_PPC64 */ #endif /* CONFIG_PPC_KUAP */ static inline void allow_read_from_user(const void __user *from, unsigned long size) diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h index 9e7459d2edca..b774a4477d5f 100644 --- a/arch/powerpc/include/asm/security_features.h +++ b/arch/powerpc/include/asm/security_features.h @@ -89,6 +89,8 @@ static inline bool security_ftr_enabled(u64 feature) // The L1-D cache should be flushed when entering the kernel #define SEC_FTR_L1D_FLUSH_ENTRY 0x0000000000004000ull +// The L1-D cache should be flushed after user accesses from the kernel +#define SEC_FTR_L1D_FLUSH_UACCESS 0x0000000000008000ull // Features enabled by default #define SEC_FTR_DEFAULT \ @@ -96,6 +98,7 @@ static inline bool security_ftr_enabled(u64 feature) SEC_FTR_L1D_FLUSH_PR | \ SEC_FTR_BNDS_CHK_SPEC_BAR | \ SEC_FTR_L1D_FLUSH_ENTRY | \ + SEC_FTR_L1D_FLUSH_UACCESS | \ SEC_FTR_FAVOUR_SECURITY) #endif /* _ASM_POWERPC_SECURITY_FEATURES_H */ diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index e2fcd3e874f8..a466749703f1 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -60,6 +60,7 @@ void setup_barrier_nospec(void); #else static inline void setup_barrier_nospec(void) { }; #endif +void do_uaccess_flush_fixups(enum l1d_flush_type types); void do_entry_flush_fixups(enum l1d_flush_type types); void do_barrier_nospec_fixups(bool enable); extern bool barrier_nospec_enabled; diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 5577dd887d37..f63a3d3bca3d 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -2951,11 +2951,8 @@ TRAMP_REAL_BEGIN(stf_barrier_fallback) .endr blr -TRAMP_REAL_BEGIN(entry_flush_fallback) - std r9,PACA_EXRFI+EX_R9(r13) - std r10,PACA_EXRFI+EX_R10(r13) - std r11,PACA_EXRFI+EX_R11(r13) - mfctr r9 +/* Clobbers r10, r11, ctr */ +.macro L1D_DISPLACEMENT_FLUSH ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13) ld r11,PACA_L1D_FLUSH_SIZE(r13) srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */ @@ -2981,7 +2978,14 @@ TRAMP_REAL_BEGIN(entry_flush_fallback) ld r11,(0x80 + 8)*7(r10) addi r10,r10,0x80*8 bdnz 1b +.endm +TRAMP_REAL_BEGIN(entry_flush_fallback) + std r9,PACA_EXRFI+EX_R9(r13) + std r10,PACA_EXRFI+EX_R10(r13) + std r11,PACA_EXRFI+EX_R11(r13) + mfctr r9 + L1D_DISPLACEMENT_FLUSH mtctr r9 ld r9,PACA_EXRFI+EX_R9(r13) ld r10,PACA_EXRFI+EX_R10(r13) @@ -2997,32 +3001,7 @@ TRAMP_REAL_BEGIN(rfi_flush_fallback) std r10,PACA_EXRFI+EX_R10(r13) std r11,PACA_EXRFI+EX_R11(r13) mfctr r9 - ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13) - ld r11,PACA_L1D_FLUSH_SIZE(r13) - srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */ - mtctr r11 - DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */ - - /* order ld/st prior to dcbt stop all streams with flushing */ - sync - - /* - * The load adresses are at staggered offsets within cachelines, - * which suits some pipelines better (on others it should not - * hurt). - */ -1: - ld r11,(0x80 + 8)*0(r10) - ld r11,(0x80 + 8)*1(r10) - ld r11,(0x80 + 8)*2(r10) - ld r11,(0x80 + 8)*3(r10) - ld r11,(0x80 + 8)*4(r10) - ld r11,(0x80 + 8)*5(r10) - ld r11,(0x80 + 8)*6(r10) - ld r11,(0x80 + 8)*7(r10) - addi r10,r10,0x80*8 - bdnz 1b - + L1D_DISPLACEMENT_FLUSH mtctr r9 ld r9,PACA_EXRFI+EX_R9(r13) ld r10,PACA_EXRFI+EX_R10(r13) @@ -3040,32 +3019,7 @@ TRAMP_REAL_BEGIN(hrfi_flush_fallback) std r10,PACA_EXRFI+EX_R10(r13) std r11,PACA_EXRFI+EX_R11(r13) mfctr r9 - ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13) - ld r11,PACA_L1D_FLUSH_SIZE(r13) - srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */ - mtctr r11 - DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */ - - /* order ld/st prior to dcbt stop all streams with flushing */ - sync - - /* - * The load adresses are at staggered offsets within cachelines, - * which suits some pipelines better (on others it should not - * hurt). - */ -1: - ld r11,(0x80 + 8)*0(r10) - ld r11,(0x80 + 8)*1(r10) - ld r11,(0x80 + 8)*2(r10) - ld r11,(0x80 + 8)*3(r10) - ld r11,(0x80 + 8)*4(r10) - ld r11,(0x80 + 8)*5(r10) - ld r11,(0x80 + 8)*6(r10) - ld r11,(0x80 + 8)*7(r10) - addi r10,r10,0x80*8 - bdnz 1b - + L1D_DISPLACEMENT_FLUSH mtctr r9 ld r9,PACA_EXRFI+EX_R9(r13) ld r10,PACA_EXRFI+EX_R10(r13) @@ -3116,8 +3070,21 @@ TRAMP_REAL_BEGIN(rfscv_flush_fallback) RFSCV USE_TEXT_SECTION() - MASKED_INTERRUPT - MASKED_INTERRUPT hsrr=1 + +_GLOBAL(do_uaccess_flush) + UACCESS_FLUSH_FIXUP_SECTION + nop + nop + nop + blr + L1D_DISPLACEMENT_FLUSH + blr +_ASM_NOKPROBE_SYMBOL(do_uaccess_flush) +EXPORT_SYMBOL(do_uaccess_flush) + + +MASKED_INTERRUPT +MASKED_INTERRUPT hsrr=1 #ifdef CONFIG_KVM_BOOK3S_64_HANDLER kvmppc_skip_interrupt: diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 7d447b43ad13..74fd47f46fa5 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -946,8 +946,12 @@ static enum l1d_flush_type enabled_flush_types; static void *l1d_flush_fallback_area; static bool no_rfi_flush; static bool no_entry_flush; +static bool no_uaccess_flush; bool rfi_flush; bool entry_flush; +bool uaccess_flush; +DEFINE_STATIC_KEY_FALSE(uaccess_flush_key); +EXPORT_SYMBOL(uaccess_flush_key); static int __init handle_no_rfi_flush(char *p) { @@ -965,6 +969,14 @@ static int __init handle_no_entry_flush(char *p) } early_param("no_entry_flush", handle_no_entry_flush); +static int __init handle_no_uaccess_flush(char *p) +{ + pr_info("uaccess-flush: disabled on command line."); + no_uaccess_flush = true; + return 0; +} +early_param("no_uaccess_flush", handle_no_uaccess_flush); + /* * The RFI flush is not KPTI, but because users will see doco that says to use * nopti we hijack that option here to also disable the RFI flush. @@ -1008,6 +1020,20 @@ void entry_flush_enable(bool enable) entry_flush = enable; } +void uaccess_flush_enable(bool enable) +{ + if (enable) { + do_uaccess_flush_fixups(enabled_flush_types); + static_branch_enable(&uaccess_flush_key); + on_each_cpu(do_nothing, NULL, 1); + } else { + static_branch_disable(&uaccess_flush_key); + do_uaccess_flush_fixups(L1D_FLUSH_NONE); + } + + uaccess_flush = enable; +} + static void __ref init_fallback_flush(void) { u64 l1d_size, limit; @@ -1079,6 +1105,15 @@ void setup_entry_flush(bool enable) entry_flush_enable(enable); } +void setup_uaccess_flush(bool enable) +{ + if (cpu_mitigations_off()) + return; + + if (!no_uaccess_flush) + uaccess_flush_enable(enable); +} + #ifdef CONFIG_DEBUG_FS static int rfi_flush_set(void *data, u64 val) { @@ -1132,10 +1167,37 @@ static int entry_flush_get(void *data, u64 *val) DEFINE_SIMPLE_ATTRIBUTE(fops_entry_flush, entry_flush_get, entry_flush_set, "%llu\n"); +static int uaccess_flush_set(void *data, u64 val) +{ + bool enable; + + if (val == 1) + enable = true; + else if (val == 0) + enable = false; + else + return -EINVAL; + + /* Only do anything if we're changing state */ + if (enable != uaccess_flush) + uaccess_flush_enable(enable); + + return 0; +} + +static int uaccess_flush_get(void *data, u64 *val) +{ + *val = uaccess_flush ? 1 : 0; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_uaccess_flush, uaccess_flush_get, uaccess_flush_set, "%llu\n"); + static __init int rfi_flush_debugfs_init(void) { debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush); debugfs_create_file("entry_flush", 0600, powerpc_debugfs_root, NULL, &fops_entry_flush); + debugfs_create_file("uaccess_flush", 0600, powerpc_debugfs_root, NULL, &fops_uaccess_flush); return 0; } device_initcall(rfi_flush_debugfs_init); diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 715ec4d4614a..6db90cdf11da 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -131,6 +131,13 @@ SECTIONS __stop___stf_entry_barrier_fixup = .; } + . = ALIGN(8); + __uaccess_flush_fixup : AT(ADDR(__uaccess_flush_fixup) - LOAD_OFFSET) { + __start___uaccess_flush_fixup = .; + *(__uaccess_flush_fixup) + __stop___uaccess_flush_fixup = .; + } + . = ALIGN(8); __entry_flush_fixup : AT(ADDR(__entry_flush_fixup) - LOAD_OFFSET) { __start___entry_flush_fixup = .; diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 70e83cfd74aa..321c12a9ef6b 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -234,6 +234,56 @@ void do_stf_barrier_fixups(enum stf_barrier_type types) do_stf_exit_barrier_fixups(types); } +void do_uaccess_flush_fixups(enum l1d_flush_type types) +{ + unsigned int instrs[4], *dest; + long *start, *end; + int i; + + start = PTRRELOC(&__start___uaccess_flush_fixup); + end = PTRRELOC(&__stop___uaccess_flush_fixup); + + instrs[0] = 0x60000000; /* nop */ + instrs[1] = 0x60000000; /* nop */ + instrs[2] = 0x60000000; /* nop */ + instrs[3] = 0x4e800020; /* blr */ + + i = 0; + if (types == L1D_FLUSH_FALLBACK) { + instrs[3] = 0x60000000; /* nop */ + /* fallthrough to fallback flush */ + } + + if (types & L1D_FLUSH_ORI) { + instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */ + instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/ + } + + if (types & L1D_FLUSH_MTTRIG) + instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */ + + for (i = 0; start < end; start++, i++) { + dest = (void *)start + *start; + + pr_devel("patching dest %lx\n", (unsigned long)dest); + + patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); + + patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1])); + patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); + patch_instruction((struct ppc_inst *)(dest + 3), ppc_inst(instrs[3])); + } + + printk(KERN_DEBUG "uaccess-flush: patched %d locations (%s flush)\n", i, + (types == L1D_FLUSH_NONE) ? "no" : + (types == L1D_FLUSH_FALLBACK) ? "fallback displacement" : + (types & L1D_FLUSH_ORI) ? (types & L1D_FLUSH_MTTRIG) + ? "ori+mttrig type" + : "ori type" : + (types & L1D_FLUSH_MTTRIG) ? "mttrig type" + : "unknown"); +} + void do_entry_flush_fixups(enum l1d_flush_type types) { unsigned int instrs[3], *dest; diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index d04a085c423d..087ec92acfc4 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -124,10 +124,12 @@ static void pnv_setup_rfi_flush(void) /* * If we are non-Power9 bare metal, we don't need to flush on kernel - * entry: it fixes a P9 specific vulnerability. + * entry or after user access: they fix a P9 specific vulnerability. */ - if (!pvr_version_is(PVR_POWER9)) + if (!pvr_version_is(PVR_POWER9)) { security_ftr_clear(SEC_FTR_L1D_FLUSH_ENTRY); + security_ftr_clear(SEC_FTR_L1D_FLUSH_UACCESS); + } enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && \ (security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR) || \ @@ -139,6 +141,10 @@ static void pnv_setup_rfi_flush(void) enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && security_ftr_enabled(SEC_FTR_L1D_FLUSH_ENTRY); setup_entry_flush(enable); + + enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && + security_ftr_enabled(SEC_FTR_L1D_FLUSH_UACCESS); + setup_uaccess_flush(enable); } static void __init pnv_check_guarded_cores(void) diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 8136c5368ee4..3617cdb079f6 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -583,6 +583,10 @@ void pseries_setup_rfi_flush(void) enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && security_ftr_enabled(SEC_FTR_L1D_FLUSH_ENTRY); setup_entry_flush(enable); + + enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && + security_ftr_enabled(SEC_FTR_L1D_FLUSH_UACCESS); + setup_uaccess_flush(enable); } #ifdef CONFIG_PCI_IOV From 178d52c6e89c38d0553b0ac8b99927b11eb995b0 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 19 Nov 2020 23:43:53 +1100 Subject: [PATCH 677/710] powerpc: Only include kup-radix.h for 64-bit Book3S In kup.h we currently include kup-radix.h for all 64-bit builds, which includes Book3S and Book3E. The latter doesn't make sense, Book3E never uses the Radix MMU. This has worked up until now, but almost by accident, and the recent uaccess flush changes introduced a build breakage on Book3E because of the bad structure of the code. So disentangle things so that we only use kup-radix.h for Book3S. This requires some more stubs in kup.h and fixing an include in syscall_64.c. Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/kup-radix.h | 4 ++-- arch/powerpc/include/asm/kup.h | 11 ++++++++--- arch/powerpc/kernel/syscall_64.c | 2 +- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h index 97c2394e7dea..28716e2f13e3 100644 --- a/arch/powerpc/include/asm/book3s/64/kup-radix.h +++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h @@ -27,6 +27,7 @@ #endif .endm +#ifdef CONFIG_PPC_KUAP .macro kuap_check_amr gpr1, gpr2 #ifdef CONFIG_PPC_KUAP_DEBUG BEGIN_MMU_FTR_SECTION_NESTED(67) @@ -38,6 +39,7 @@ END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_RADIX_KUAP, 67) #endif .endm +#endif .macro kuap_save_amr_and_lock gpr1, gpr2, use_cr, msr_pr_cr #ifdef CONFIG_PPC_KUAP @@ -148,8 +150,6 @@ static inline unsigned long kuap_get_and_check_amr(void) return 0UL; } -static inline void kuap_check_amr(void) { } - static inline unsigned long get_kuap(void) { return AMR_KUAP_BLOCKED; diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h index 0f5c606ae057..0d93331d0fab 100644 --- a/arch/powerpc/include/asm/kup.h +++ b/arch/powerpc/include/asm/kup.h @@ -14,7 +14,7 @@ #define KUAP_CURRENT_WRITE 8 #define KUAP_CURRENT (KUAP_CURRENT_READ | KUAP_CURRENT_WRITE) -#ifdef CONFIG_PPC64 +#ifdef CONFIG_PPC_BOOK3S_64 #include #endif #ifdef CONFIG_PPC_8xx @@ -35,6 +35,9 @@ .macro kuap_check current, gpr .endm +.macro kuap_check_amr gpr1, gpr2 +.endm + #endif #else /* !__ASSEMBLY__ */ @@ -60,19 +63,21 @@ bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) return false; } +static inline void kuap_check_amr(void) { } + /* * book3s/64/kup-radix.h defines these functions for the !KUAP case to flush * the L1D cache after user accesses. Only include the empty stubs for other * platforms. */ -#ifndef CONFIG_PPC64 +#ifndef CONFIG_PPC_BOOK3S_64 static inline void allow_user_access(void __user *to, const void __user *from, unsigned long size, unsigned long dir) { } static inline void prevent_user_access(void __user *to, const void __user *from, unsigned long size, unsigned long dir) { } static inline unsigned long prevent_user_access_return(void) { return 0UL; } static inline void restore_user_access(unsigned long flags) { } -#endif /* CONFIG_PPC64 */ +#endif /* CONFIG_PPC_BOOK3S_64 */ #endif /* CONFIG_PPC_KUAP */ static inline void allow_read_from_user(const void __user *from, unsigned long size) diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/syscall_64.c index 8e50818aa50b..310bcd768cd5 100644 --- a/arch/powerpc/kernel/syscall_64.c +++ b/arch/powerpc/kernel/syscall_64.c @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include #include From 89a83a0c69c81a25ce91002b90ca27ed86132a0a Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Tue, 17 Nov 2020 16:59:14 +1100 Subject: [PATCH 678/710] selftests/powerpc: entry flush test Add a test modelled on the RFI flush test which counts the number of L1D misses doing a simple syscall with the entry flush on and off. For simplicity of backporting, this test duplicates a lot of code from rfi_flush. We clean that up in the next patch. Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman --- .../selftests/powerpc/security/.gitignore | 1 + .../selftests/powerpc/security/Makefile | 2 +- .../selftests/powerpc/security/entry_flush.c | 198 ++++++++++++++++++ 3 files changed, 200 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/security/entry_flush.c diff --git a/tools/testing/selftests/powerpc/security/.gitignore b/tools/testing/selftests/powerpc/security/.gitignore index f795e06f5ae3..4257a1f156bb 100644 --- a/tools/testing/selftests/powerpc/security/.gitignore +++ b/tools/testing/selftests/powerpc/security/.gitignore @@ -1,2 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only rfi_flush +entry_flush diff --git a/tools/testing/selftests/powerpc/security/Makefile b/tools/testing/selftests/powerpc/security/Makefile index eadbbff50be6..921152caf1dc 100644 --- a/tools/testing/selftests/powerpc/security/Makefile +++ b/tools/testing/selftests/powerpc/security/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0+ -TEST_GEN_PROGS := rfi_flush spectre_v2 +TEST_GEN_PROGS := rfi_flush entry_flush spectre_v2 top_srcdir = ../../../../.. CFLAGS += -I../../../../../usr/include diff --git a/tools/testing/selftests/powerpc/security/entry_flush.c b/tools/testing/selftests/powerpc/security/entry_flush.c new file mode 100644 index 000000000000..7ae7e37204c5 --- /dev/null +++ b/tools/testing/selftests/powerpc/security/entry_flush.c @@ -0,0 +1,198 @@ +// SPDX-License-Identifier: GPL-2.0+ + +/* + * Copyright 2018 IBM Corporation. + */ + +#define __SANE_USERSPACE_TYPES__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "utils.h" + +#define CACHELINE_SIZE 128 + +struct perf_event_read { + __u64 nr; + __u64 l1d_misses; +}; + +static inline __u64 load(void *addr) +{ + __u64 tmp; + + asm volatile("ld %0,0(%1)" : "=r"(tmp) : "b"(addr)); + + return tmp; +} + +static void syscall_loop(char *p, unsigned long iterations, + unsigned long zero_size) +{ + for (unsigned long i = 0; i < iterations; i++) { + for (unsigned long j = 0; j < zero_size; j += CACHELINE_SIZE) + load(p + j); + getppid(); + } +} + +static void sigill_handler(int signr, siginfo_t *info, void *unused) +{ + static int warned; + ucontext_t *ctx = (ucontext_t *)unused; + unsigned long *pc = &UCONTEXT_NIA(ctx); + + /* mtspr 3,RS to check for move to DSCR below */ + if ((*((unsigned int *)*pc) & 0xfc1fffff) == 0x7c0303a6) { + if (!warned++) + printf("WARNING: Skipping over dscr setup. Consider running 'ppc64_cpu --dscr=1' manually.\n"); + *pc += 4; + } else { + printf("SIGILL at %p\n", pc); + abort(); + } +} + +static void set_dscr(unsigned long val) +{ + static int init; + struct sigaction sa; + + if (!init) { + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = sigill_handler; + sa.sa_flags = SA_SIGINFO; + if (sigaction(SIGILL, &sa, NULL)) + perror("sigill_handler"); + init = 1; + } + + asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR)); +} + +int entry_flush_test(void) +{ + char *p; + int repetitions = 10; + int fd, passes = 0, iter, rc = 0; + struct perf_event_read v; + __u64 l1d_misses_total = 0; + unsigned long iterations = 100000, zero_size = 24 * 1024; + unsigned long l1d_misses_expected; + int rfi_flush_orig; + int entry_flush, entry_flush_orig; + + SKIP_IF(geteuid() != 0); + + // The PMU event we use only works on Power7 or later + SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06)); + + if (read_debugfs_file("powerpc/rfi_flush", &rfi_flush_orig) < 0) { + perror("Unable to read powerpc/rfi_flush debugfs file"); + SKIP_IF(1); + } + + if (read_debugfs_file("powerpc/entry_flush", &entry_flush_orig) < 0) { + perror("Unable to read powerpc/entry_flush debugfs file"); + SKIP_IF(1); + } + + if (rfi_flush_orig != 0) { + if (write_debugfs_file("powerpc/rfi_flush", 0) < 0) { + perror("error writing to powerpc/rfi_flush debugfs file"); + FAIL_IF(1); + } + } + + entry_flush = entry_flush_orig; + + fd = perf_event_open_counter(PERF_TYPE_RAW, /* L1d miss */ 0x400f0, -1); + FAIL_IF(fd < 0); + + p = (char *)memalign(zero_size, CACHELINE_SIZE); + + FAIL_IF(perf_event_enable(fd)); + + // disable L1 prefetching + set_dscr(1); + + iter = repetitions; + + /* + * We expect to see l1d miss for each cacheline access when entry_flush + * is set. Allow a small variation on this. + */ + l1d_misses_expected = iterations * (zero_size / CACHELINE_SIZE - 2); + +again: + FAIL_IF(perf_event_reset(fd)); + + syscall_loop(p, iterations, zero_size); + + FAIL_IF(read(fd, &v, sizeof(v)) != sizeof(v)); + + if (entry_flush && v.l1d_misses >= l1d_misses_expected) + passes++; + else if (!entry_flush && v.l1d_misses < (l1d_misses_expected / 2)) + passes++; + + l1d_misses_total += v.l1d_misses; + + while (--iter) + goto again; + + if (passes < repetitions) { + printf("FAIL (L1D misses with entry_flush=%d: %llu %c %lu) [%d/%d failures]\n", + entry_flush, l1d_misses_total, entry_flush ? '<' : '>', + entry_flush ? repetitions * l1d_misses_expected : + repetitions * l1d_misses_expected / 2, + repetitions - passes, repetitions); + rc = 1; + } else { + printf("PASS (L1D misses with entry_flush=%d: %llu %c %lu) [%d/%d pass]\n", + entry_flush, l1d_misses_total, entry_flush ? '>' : '<', + entry_flush ? repetitions * l1d_misses_expected : + repetitions * l1d_misses_expected / 2, + passes, repetitions); + } + + if (entry_flush == entry_flush_orig) { + entry_flush = !entry_flush_orig; + if (write_debugfs_file("powerpc/entry_flush", entry_flush) < 0) { + perror("error writing to powerpc/entry_flush debugfs file"); + return 1; + } + iter = repetitions; + l1d_misses_total = 0; + passes = 0; + goto again; + } + + perf_event_disable(fd); + close(fd); + + set_dscr(0); + + if (write_debugfs_file("powerpc/rfi_flush", rfi_flush_orig) < 0) { + perror("unable to restore original value of powerpc/rfi_flush debugfs file"); + return 1; + } + + if (write_debugfs_file("powerpc/entry_flush", entry_flush_orig) < 0) { + perror("unable to restore original value of powerpc/entry_flush debugfs file"); + return 1; + } + + return rc; +} + +int main(int argc, char *argv[]) +{ + return test_harness(entry_flush_test, "entry_flush_test"); +} From 0d239f3b03efc78fb5b290aff6c747fecd3b98cb Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Tue, 17 Nov 2020 16:59:15 +1100 Subject: [PATCH 679/710] selftests/powerpc: refactor entry and rfi_flush tests For simplicity in backporting, the original entry_flush test contained a lot of duplicated code from the rfi_flush test. De-duplicate that code. Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman --- .../testing/selftests/powerpc/include/utils.h | 5 ++ .../selftests/powerpc/security/Makefile | 2 + .../selftests/powerpc/security/entry_flush.c | 61 +--------------- .../selftests/powerpc/security/flush_utils.c | 70 +++++++++++++++++++ .../selftests/powerpc/security/flush_utils.h | 17 +++++ .../selftests/powerpc/security/rfi_flush.c | 61 +--------------- 6 files changed, 96 insertions(+), 120 deletions(-) create mode 100644 tools/testing/selftests/powerpc/security/flush_utils.c create mode 100644 tools/testing/selftests/powerpc/security/flush_utils.h diff --git a/tools/testing/selftests/powerpc/include/utils.h b/tools/testing/selftests/powerpc/include/utils.h index 052b5a775dc2..b7d188fc87c7 100644 --- a/tools/testing/selftests/powerpc/include/utils.h +++ b/tools/testing/selftests/powerpc/include/utils.h @@ -42,6 +42,11 @@ int perf_event_enable(int fd); int perf_event_disable(int fd); int perf_event_reset(int fd); +struct perf_event_read { + __u64 nr; + __u64 l1d_misses; +}; + #if !defined(__GLIBC_PREREQ) || !__GLIBC_PREREQ(2, 30) #include #include diff --git a/tools/testing/selftests/powerpc/security/Makefile b/tools/testing/selftests/powerpc/security/Makefile index 921152caf1dc..f25e854fe370 100644 --- a/tools/testing/selftests/powerpc/security/Makefile +++ b/tools/testing/selftests/powerpc/security/Makefile @@ -11,3 +11,5 @@ $(TEST_GEN_PROGS): ../harness.c ../utils.c $(OUTPUT)/spectre_v2: CFLAGS += -m64 $(OUTPUT)/spectre_v2: ../pmu/event.c branch_loops.S +$(OUTPUT)/rfi_flush: flush_utils.c +$(OUTPUT)/entry_flush: flush_utils.c diff --git a/tools/testing/selftests/powerpc/security/entry_flush.c b/tools/testing/selftests/powerpc/security/entry_flush.c index 7ae7e37204c5..78cf914fa321 100644 --- a/tools/testing/selftests/powerpc/security/entry_flush.c +++ b/tools/testing/selftests/powerpc/security/entry_flush.c @@ -15,66 +15,7 @@ #include #include #include "utils.h" - -#define CACHELINE_SIZE 128 - -struct perf_event_read { - __u64 nr; - __u64 l1d_misses; -}; - -static inline __u64 load(void *addr) -{ - __u64 tmp; - - asm volatile("ld %0,0(%1)" : "=r"(tmp) : "b"(addr)); - - return tmp; -} - -static void syscall_loop(char *p, unsigned long iterations, - unsigned long zero_size) -{ - for (unsigned long i = 0; i < iterations; i++) { - for (unsigned long j = 0; j < zero_size; j += CACHELINE_SIZE) - load(p + j); - getppid(); - } -} - -static void sigill_handler(int signr, siginfo_t *info, void *unused) -{ - static int warned; - ucontext_t *ctx = (ucontext_t *)unused; - unsigned long *pc = &UCONTEXT_NIA(ctx); - - /* mtspr 3,RS to check for move to DSCR below */ - if ((*((unsigned int *)*pc) & 0xfc1fffff) == 0x7c0303a6) { - if (!warned++) - printf("WARNING: Skipping over dscr setup. Consider running 'ppc64_cpu --dscr=1' manually.\n"); - *pc += 4; - } else { - printf("SIGILL at %p\n", pc); - abort(); - } -} - -static void set_dscr(unsigned long val) -{ - static int init; - struct sigaction sa; - - if (!init) { - memset(&sa, 0, sizeof(sa)); - sa.sa_sigaction = sigill_handler; - sa.sa_flags = SA_SIGINFO; - if (sigaction(SIGILL, &sa, NULL)) - perror("sigill_handler"); - init = 1; - } - - asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR)); -} +#include "flush_utils.h" int entry_flush_test(void) { diff --git a/tools/testing/selftests/powerpc/security/flush_utils.c b/tools/testing/selftests/powerpc/security/flush_utils.c new file mode 100644 index 000000000000..0c3c4c40c7fb --- /dev/null +++ b/tools/testing/selftests/powerpc/security/flush_utils.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0+ + +/* + * Copyright 2018 IBM Corporation. + */ + +#define __SANE_USERSPACE_TYPES__ + +#include +#include +#include +#include +#include +#include +#include +#include "utils.h" +#include "flush_utils.h" + +static inline __u64 load(void *addr) +{ + __u64 tmp; + + asm volatile("ld %0,0(%1)" : "=r"(tmp) : "b"(addr)); + + return tmp; +} + +void syscall_loop(char *p, unsigned long iterations, + unsigned long zero_size) +{ + for (unsigned long i = 0; i < iterations; i++) { + for (unsigned long j = 0; j < zero_size; j += CACHELINE_SIZE) + load(p + j); + getppid(); + } +} + +static void sigill_handler(int signr, siginfo_t *info, void *unused) +{ + static int warned; + ucontext_t *ctx = (ucontext_t *)unused; + unsigned long *pc = &UCONTEXT_NIA(ctx); + + /* mtspr 3,RS to check for move to DSCR below */ + if ((*((unsigned int *)*pc) & 0xfc1fffff) == 0x7c0303a6) { + if (!warned++) + printf("WARNING: Skipping over dscr setup. Consider running 'ppc64_cpu --dscr=1' manually.\n"); + *pc += 4; + } else { + printf("SIGILL at %p\n", pc); + abort(); + } +} + +void set_dscr(unsigned long val) +{ + static int init; + struct sigaction sa; + + if (!init) { + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = sigill_handler; + sa.sa_flags = SA_SIGINFO; + if (sigaction(SIGILL, &sa, NULL)) + perror("sigill_handler"); + init = 1; + } + + asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR)); +} diff --git a/tools/testing/selftests/powerpc/security/flush_utils.h b/tools/testing/selftests/powerpc/security/flush_utils.h new file mode 100644 index 000000000000..07a5eb301466 --- /dev/null +++ b/tools/testing/selftests/powerpc/security/flush_utils.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ + +/* + * Copyright 2018 IBM Corporation. + */ + +#ifndef _SELFTESTS_POWERPC_SECURITY_FLUSH_UTILS_H +#define _SELFTESTS_POWERPC_SECURITY_FLUSH_UTILS_H + +#define CACHELINE_SIZE 128 + +void syscall_loop(char *p, unsigned long iterations, + unsigned long zero_size); + +void set_dscr(unsigned long val); + +#endif /* _SELFTESTS_POWERPC_SECURITY_FLUSH_UTILS_H */ diff --git a/tools/testing/selftests/powerpc/security/rfi_flush.c b/tools/testing/selftests/powerpc/security/rfi_flush.c index 4e7b2776c367..7565fd786640 100644 --- a/tools/testing/selftests/powerpc/security/rfi_flush.c +++ b/tools/testing/selftests/powerpc/security/rfi_flush.c @@ -10,71 +10,12 @@ #include #include #include -#include #include #include #include #include "utils.h" +#include "flush_utils.h" -#define CACHELINE_SIZE 128 - -struct perf_event_read { - __u64 nr; - __u64 l1d_misses; -}; - -static inline __u64 load(void *addr) -{ - __u64 tmp; - - asm volatile("ld %0,0(%1)" : "=r"(tmp) : "b"(addr)); - - return tmp; -} - -static void syscall_loop(char *p, unsigned long iterations, - unsigned long zero_size) -{ - for (unsigned long i = 0; i < iterations; i++) { - for (unsigned long j = 0; j < zero_size; j += CACHELINE_SIZE) - load(p + j); - getppid(); - } -} - -static void sigill_handler(int signr, siginfo_t *info, void *unused) -{ - static int warned = 0; - ucontext_t *ctx = (ucontext_t *)unused; - unsigned long *pc = &UCONTEXT_NIA(ctx); - - /* mtspr 3,RS to check for move to DSCR below */ - if ((*((unsigned int *)*pc) & 0xfc1fffff) == 0x7c0303a6) { - if (!warned++) - printf("WARNING: Skipping over dscr setup. Consider running 'ppc64_cpu --dscr=1' manually.\n"); - *pc += 4; - } else { - printf("SIGILL at %p\n", pc); - abort(); - } -} - -static void set_dscr(unsigned long val) -{ - static int init = 0; - struct sigaction sa; - - if (!init) { - memset(&sa, 0, sizeof(sa)); - sa.sa_sigaction = sigill_handler; - sa.sa_flags = SA_SIGINFO; - if (sigaction(SIGILL, &sa, NULL)) - perror("sigill_handler"); - init = 1; - } - - asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR)); -} int rfi_flush_test(void) { From da631f7fd623b6c180c8d93a93040d1e0d61291f Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Tue, 17 Nov 2020 16:59:16 +1100 Subject: [PATCH 680/710] powerpc/64s: rename pnv|pseries_setup_rfi_flush to _setup_security_mitigations pseries|pnv_setup_rfi_flush already does the count cache flush setup, and we just added entry and uaccess flushes. So the name is not very accurate any more. In both platforms we then also immediately setup the STF flush. Rename them to _setup_security_mitigations and fold the STF flush in. Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/setup.c | 7 ++++--- arch/powerpc/platforms/pseries/mobility.c | 4 ++-- arch/powerpc/platforms/pseries/pseries.h | 2 +- arch/powerpc/platforms/pseries/setup.c | 7 ++++--- 4 files changed, 11 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 087ec92acfc4..46115231a3b2 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -98,7 +98,7 @@ static void init_fw_feat_flags(struct device_node *np) security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR); } -static void pnv_setup_rfi_flush(void) +static void pnv_setup_security_mitigations(void) { struct device_node *np, *fw_features; enum l1d_flush_type type; @@ -145,6 +145,8 @@ static void pnv_setup_rfi_flush(void) enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && security_ftr_enabled(SEC_FTR_L1D_FLUSH_UACCESS); setup_uaccess_flush(enable); + + setup_stf_barrier(); } static void __init pnv_check_guarded_cores(void) @@ -173,8 +175,7 @@ static void __init pnv_setup_arch(void) { set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT); - pnv_setup_rfi_flush(); - setup_stf_barrier(); + pnv_setup_security_mitigations(); /* Initialize SMP */ pnv_smp_init(); diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index d6f4162478a5..2f73cb5bf12d 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -349,8 +349,8 @@ void post_mobility_fixup(void) cpus_read_unlock(); - /* Possibly switch to a new RFI flush type */ - pseries_setup_rfi_flush(); + /* Possibly switch to a new L1 flush type */ + pseries_setup_security_mitigations(); /* Reinitialise system information for hv-24x7 */ read_24x7_sys_info(); diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index 13fa370a87e4..593840847cd3 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -111,7 +111,7 @@ static inline unsigned long cmo_get_page_size(void) int dlpar_workqueue_init(void); -void pseries_setup_rfi_flush(void); +void pseries_setup_security_mitigations(void); void pseries_lpar_read_hblkrm_characteristics(void); #endif /* _PSERIES_PSERIES_H */ diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 3617cdb079f6..090c13f6c881 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -542,7 +542,7 @@ static void init_cpu_char_feature_flags(struct h_cpu_char_result *result) security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR); } -void pseries_setup_rfi_flush(void) +void pseries_setup_security_mitigations(void) { struct h_cpu_char_result result; enum l1d_flush_type types; @@ -587,6 +587,8 @@ void pseries_setup_rfi_flush(void) enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && security_ftr_enabled(SEC_FTR_L1D_FLUSH_UACCESS); setup_uaccess_flush(enable); + + setup_stf_barrier(); } #ifdef CONFIG_PCI_IOV @@ -776,8 +778,7 @@ static void __init pSeries_setup_arch(void) fwnmi_init(); - pseries_setup_rfi_flush(); - setup_stf_barrier(); + pseries_setup_security_mitigations(); pseries_lpar_read_hblkrm_characteristics(); /* By default, only probe PCI (can be overridden by rtas_pci) */ From 01cf158e48d2b5ce947430de5896c10f4f7c1822 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 19 Nov 2020 15:07:19 +0100 Subject: [PATCH 681/710] Revert "iommu/vt-d: Take CONFIG_PCI_ATS into account" This reverts commit 8986f223bd777a73119f5d593c15b4d630ff49bb. The proper fix is queued in Will's tree now Signed-off-by: Thomas Gleixner --- drivers/iommu/intel/dmar.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index bc9f4cf72240..b2e804473209 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -335,9 +335,7 @@ static void dmar_pci_bus_del_dev(struct dmar_pci_notify_info *info) static inline void vf_inherit_msi_domain(struct pci_dev *pdev) { -#ifdef CONFIG_PCI_ATS dev_set_msi_domain(&pdev->dev, dev_get_msi_domain(&pdev->physfn->dev)); -#endif } static int dmar_pci_bus_notifier(struct notifier_block *nb, From eec231e060fb79923c349f6e89f022b286f32c1e Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 14 Nov 2020 10:45:31 +0100 Subject: [PATCH 682/710] HID: logitech-dj: Fix an error in mse_bluetooth_descriptor Fix an error in the mouse / INPUT(2) descriptor used for quad/bt2.0 combo receivers. Replace INPUT with INPUT (Data,Var,Abs) for the field for the 4 extra buttons which share their report-byte with the low-res hwheel. This is likely a copy and paste error. I've verified that the new 0x81, 0x02 value matches both the mouse descriptor for the currently supported MX5000 / MX5500 receivers, as well as the INPUT(2) mouse descriptors for the Dinovo receivers for which support is being worked on. Cc: stable@vger.kernel.org Fixes: f2113c3020ef ("HID: logitech-dj: add support for Logitech Bluetooth Mini-Receiver") Signed-off-by: Hans de Goede Signed-off-by: Benjamin Tissoires --- drivers/hid/hid-logitech-dj.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-logitech-dj.c b/drivers/hid/hid-logitech-dj.c index 1cafb65428b0..19b9bca0f40a 100644 --- a/drivers/hid/hid-logitech-dj.c +++ b/drivers/hid/hid-logitech-dj.c @@ -328,7 +328,7 @@ static const char mse_bluetooth_descriptor[] = { 0x25, 0x01, /* LOGICAL_MAX (1) */ 0x75, 0x01, /* REPORT_SIZE (1) */ 0x95, 0x04, /* REPORT_COUNT (4) */ - 0x81, 0x06, /* INPUT */ + 0x81, 0x02, /* INPUT (Data,Var,Abs) */ 0xC0, /* END_COLLECTION */ 0xC0, /* END_COLLECTION */ }; From b4c00e7976636f33a4f67eab436a11666c8afd60 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 14 Nov 2020 22:20:56 +0100 Subject: [PATCH 683/710] HID: logitech-dj: Fix Dinovo Mini when paired with a MX5x00 receiver Some users are pairing the Dinovo keyboards with the MX5000 or MX5500 receivers, instead of with the Dinovo receivers. The receivers are mostly the same (and the air protocol obviously is compatible) but currently the Dinovo receivers are handled by hid-lg.c while the MX5x00 receivers are handled by logitech-dj.c. When using a Dinovo keyboard, with its builtin touchpad, through logitech-dj.c then the touchpad stops working because when asking the receiver for paired devices, we get only 1 paired device with a device_type of REPORT_TYPE_KEYBOARD. And since we don't see a paired mouse, we have nowhere to send mouse-events to, so we drop them. Extend the existing fix for the Dinovo Edge for this to also cover the Dinovo Mini keyboard and also add a mapping to logitech-hidpp for the Media key on the Dinovo Mini, so that that keeps working too. BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1811424 Fixes: f2113c3020ef ("HID: logitech-dj: add support for Logitech Bluetooth Mini-Receiver") Signed-off-by: Hans de Goede Signed-off-by: Benjamin Tissoires --- drivers/hid/hid-logitech-dj.c | 1 + drivers/hid/hid-logitech-hidpp.c | 25 +++++++++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/drivers/hid/hid-logitech-dj.c b/drivers/hid/hid-logitech-dj.c index 19b9bca0f40a..1cbbcf607ee9 100644 --- a/drivers/hid/hid-logitech-dj.c +++ b/drivers/hid/hid-logitech-dj.c @@ -875,6 +875,7 @@ static void logi_dj_recv_queue_notification(struct dj_receiver_dev *djrcv_dev, */ static const u16 kbd_builtin_touchpad_ids[] = { 0xb309, /* Dinovo Edge */ + 0xb30c, /* Dinovo Mini */ }; static void logi_hidpp_dev_conn_notif_equad(struct hid_device *hdev, diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c index 40b7c75b3384..0ca723119547 100644 --- a/drivers/hid/hid-logitech-hidpp.c +++ b/drivers/hid/hid-logitech-hidpp.c @@ -93,6 +93,8 @@ MODULE_PARM_DESC(disable_tap_to_click, #define HIDPP_CAPABILITY_BATTERY_LEVEL_STATUS BIT(3) #define HIDPP_CAPABILITY_BATTERY_VOLTAGE BIT(4) +#define lg_map_key_clear(c) hid_map_usage_clear(hi, usage, bit, max, EV_KEY, (c)) + /* * There are two hidpp protocols in use, the first version hidpp10 is known * as register access protocol or RAP, the second version hidpp20 is known as @@ -2950,6 +2952,26 @@ static int g920_get_config(struct hidpp_device *hidpp, return g920_ff_set_autocenter(hidpp, data); } +/* -------------------------------------------------------------------------- */ +/* Logitech Dinovo Mini keyboard with builtin touchpad */ +/* -------------------------------------------------------------------------- */ +#define DINOVO_MINI_PRODUCT_ID 0xb30c + +static int lg_dinovo_input_mapping(struct hid_device *hdev, struct hid_input *hi, + struct hid_field *field, struct hid_usage *usage, + unsigned long **bit, int *max) +{ + if ((usage->hid & HID_USAGE_PAGE) != HID_UP_LOGIVENDOR) + return 0; + + switch (usage->hid & HID_USAGE) { + case 0x00d: lg_map_key_clear(KEY_MEDIA); break; + default: + return 0; + } + return 1; +} + /* -------------------------------------------------------------------------- */ /* HID++1.0 devices which use HID++ reports for their wheels */ /* -------------------------------------------------------------------------- */ @@ -3185,6 +3207,9 @@ static int hidpp_input_mapping(struct hid_device *hdev, struct hid_input *hi, field->application != HID_GD_MOUSE) return m560_input_mapping(hdev, hi, field, usage, bit, max); + if (hdev->product == DINOVO_MINI_PRODUCT_ID) + return lg_dinovo_input_mapping(hdev, hi, field, usage, bit, max); + return 0; } From 1fd6cee127e2ddff36d648573d7566aafb0d0b77 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 18 Nov 2020 22:13:50 +0100 Subject: [PATCH 684/710] libbpf: Fix VERSIONED_SYM_COUNT number parsing We remove "other info" from "readelf -s --wide" output when parsing GLOBAL_SYM_COUNT variable, which was added in [1]. But we don't do that for VERSIONED_SYM_COUNT and it's failing the check_abi target on powerpc Fedora 33. The extra "other info" wasn't problem for VERSIONED_SYM_COUNT parsing until commit [2] added awk in the pipe, which assumes that the last column is symbol, but it can be "other info". Adding "other info" removal for VERSIONED_SYM_COUNT the same way as we did for GLOBAL_SYM_COUNT parsing. [1] aa915931ac3e ("libbpf: Fix readelf output parsing for Fedora") [2] 746f534a4809 ("tools/libbpf: Avoid counting local symbols in ABI check") Fixes: 746f534a4809 ("tools/libbpf: Avoid counting local symbols in ABI check") Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201118211350.1493421-1-jolsa@kernel.org --- tools/lib/bpf/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 5f9abed3e226..55bd78b3496f 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -146,6 +146,7 @@ GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN_SHARED) | \ awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}' | \ sort -u | wc -l) VERSIONED_SYM_COUNT = $(shell readelf --dyn-syms --wide $(OUTPUT)libbpf.so | \ + sed 's/\[.*\]//' | \ awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}' | \ grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | sort -u | wc -l) @@ -214,6 +215,7 @@ check_abi: $(OUTPUT)libbpf.so awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}'| \ sort -u > $(OUTPUT)libbpf_global_syms.tmp; \ readelf --dyn-syms --wide $(OUTPUT)libbpf.so | \ + sed 's/\[.*\]//' | \ awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}'| \ grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | \ sort -u > $(OUTPUT)libbpf_versioned_syms.tmp; \ From 883a790a84401f6f55992887fd7263d808d4d05d Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 19 Nov 2020 08:59:11 -0800 Subject: [PATCH 685/710] xfs: don't allow NOWAIT DIO across extent boundaries Jens has reported a situation where partial direct IOs can be issued and completed yet still return -EAGAIN. We don't want this to report a short IO as we want XFS to complete user DIO entirely or not at all. This partial IO situation can occur on a write IO that is split across an allocated extent and a hole, and the second mapping is returning EAGAIN because allocation would be required. The trivial reproducer: $ sudo xfs_io -fdt -c "pwrite 0 4k" -c "pwrite -V 1 -b 8k -N 0 8k" /mnt/scr/foo wrote 4096/4096 bytes at offset 0 4 KiB, 1 ops; 0.0001 sec (27.509 MiB/sec and 7042.2535 ops/sec) pwrite: Resource temporarily unavailable $ The pwritev2(0, 8kB, RWF_NOWAIT) call returns EAGAIN having done the first 4kB write: xfs_file_direct_write: dev 259:1 ino 0x83 size 0x1000 offset 0x0 count 0x2000 iomap_apply: dev 259:1 ino 0x83 pos 0 length 8192 flags WRITE|DIRECT|NOWAIT (0x31) ops xfs_direct_write_iomap_ops caller iomap_dio_rw actor iomap_dio_actor xfs_ilock_nowait: dev 259:1 ino 0x83 flags ILOCK_SHARED caller xfs_ilock_for_iomap xfs_iunlock: dev 259:1 ino 0x83 flags ILOCK_SHARED caller xfs_direct_write_iomap_begin xfs_iomap_found: dev 259:1 ino 0x83 size 0x1000 offset 0x0 count 8192 fork data startoff 0x0 startblock 24 blockcount 0x1 iomap_apply_dstmap: dev 259:1 ino 0x83 bdev 259:1 addr 102400 offset 0 length 4096 type MAPPED flags DIRTY Here the first iomap loop has mapped the first 4kB of the file and issued the IO, and we enter the second iomap_apply loop: iomap_apply: dev 259:1 ino 0x83 pos 4096 length 4096 flags WRITE|DIRECT|NOWAIT (0x31) ops xfs_direct_write_iomap_ops caller iomap_dio_rw actor iomap_dio_actor xfs_ilock_nowait: dev 259:1 ino 0x83 flags ILOCK_SHARED caller xfs_ilock_for_iomap xfs_iunlock: dev 259:1 ino 0x83 flags ILOCK_SHARED caller xfs_direct_write_iomap_begin And we exit with -EAGAIN out because we hit the allocate case trying to make the second 4kB block. Then IO completes on the first 4kB and the original IO context completes and unlocks the inode, returning -EAGAIN to userspace: xfs_end_io_direct_write: dev 259:1 ino 0x83 isize 0x1000 disize 0x1000 offset 0x0 count 4096 xfs_iunlock: dev 259:1 ino 0x83 flags IOLOCK_SHARED caller xfs_file_dio_aio_write There are other vectors to the same problem when we re-enter the mapping code if we have to make multiple mappinfs under NOWAIT conditions. e.g. failing trylocks, COW extents being found, allocation being required, and so on. Avoid all these potential problems by only allowing IOMAP_NOWAIT IO to go ahead if the mapping we retrieve for the IO spans an entire allocated extent. This avoids the possibility of subsequent mappings to complete the IO from triggering NOWAIT semantics by any means as NOWAIT IO will now only enter the mapping code once per NOWAIT IO. Reported-and-tested-by: Jens Axboe Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_iomap.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 3abb8b9d6f4c..7b9ff824e82d 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -706,6 +706,23 @@ relock: return 0; } +/* + * Check that the imap we are going to return to the caller spans the entire + * range that the caller requested for the IO. + */ +static bool +imap_spans_range( + struct xfs_bmbt_irec *imap, + xfs_fileoff_t offset_fsb, + xfs_fileoff_t end_fsb) +{ + if (imap->br_startoff > offset_fsb) + return false; + if (imap->br_startoff + imap->br_blockcount < end_fsb) + return false; + return true; +} + static int xfs_direct_write_iomap_begin( struct inode *inode, @@ -766,6 +783,18 @@ xfs_direct_write_iomap_begin( if (imap_needs_alloc(inode, flags, &imap, nimaps)) goto allocate_blocks; + /* + * NOWAIT IO needs to span the entire requested IO with a single map so + * that we avoid partial IO failures due to the rest of the IO range not + * covered by this map triggering an EAGAIN condition when it is + * subsequently mapped and aborting the IO. + */ + if ((flags & IOMAP_NOWAIT) && + !imap_spans_range(&imap, offset_fsb, end_fsb)) { + error = -EAGAIN; + goto out_unlock; + } + xfs_iunlock(ip, lockmode); trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap); return xfs_bmbt_to_iomap(ip, iomap, &imap, iomap_flags); From d21b96c8ed2aea7e6b7bf4735e1d2503cfbf4072 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 19 Nov 2020 13:14:40 +0100 Subject: [PATCH 686/710] ALSA: mixart: Fix mutex deadlock The code change for switching to non-atomic mode brought the unexpected mutex deadlock in get_msg(). It converted the spinlock with the existing mutex, but there were calls with the already holding the mutex. Since the only place that needs the extra lock is the code path from snd_mixart_send_msg(), remove the mutex lock in get_msg() and apply in the caller side for fixing the mutex deadlock. Fixes: 8d3a8b5cb57d ("ALSA: mixart: Use nonatomic PCM ops") Reported-by: Dan Carpenter Cc: Link: https://lore.kernel.org/r/20201119121440.18945-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/mixart/mixart_core.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sound/pci/mixart/mixart_core.c b/sound/pci/mixart/mixart_core.c index 0bdd33b0af65..fb8895af0363 100644 --- a/sound/pci/mixart/mixart_core.c +++ b/sound/pci/mixart/mixart_core.c @@ -70,7 +70,6 @@ static int get_msg(struct mixart_mgr *mgr, struct mixart_msg *resp, unsigned int i; #endif - mutex_lock(&mgr->msg_lock); err = 0; /* copy message descriptor from miXart to driver */ @@ -119,8 +118,6 @@ static int get_msg(struct mixart_mgr *mgr, struct mixart_msg *resp, writel_be(headptr, MIXART_MEM(mgr, MSG_OUTBOUND_FREE_HEAD)); _clean_exit: - mutex_unlock(&mgr->msg_lock); - return err; } @@ -258,7 +255,9 @@ int snd_mixart_send_msg(struct mixart_mgr *mgr, struct mixart_msg *request, int resp.data = resp_data; resp.size = max_resp_size; + mutex_lock(&mgr->msg_lock); err = get_msg(mgr, &resp, msg_frame); + mutex_unlock(&mgr->msg_lock); if( request->message_id != resp.message_id ) dev_err(&mgr->pci->dev, "RESPONSE ERROR!\n"); From 2d8f6481c17db9fa5238b277cdbc392084060b09 Mon Sep 17 00:00:00 2001 From: Georg Kohmann Date: Thu, 19 Nov 2020 10:58:33 +0100 Subject: [PATCH 687/710] ipv6: Remove dependency of ipv6_frag_thdr_truncated on ipv6 module IPV6=m NF_DEFRAG_IPV6=y ld: net/ipv6/netfilter/nf_conntrack_reasm.o: in function `nf_ct_frag6_gather': net/ipv6/netfilter/nf_conntrack_reasm.c:462: undefined reference to `ipv6_frag_thdr_truncated' Netfilter is depending on ipv6 symbol ipv6_frag_thdr_truncated. This dependency is forcing IPV6=y. Remove this dependency by moving ipv6_frag_thdr_truncated out of ipv6. This is the same solution as used with a similar issues: Referring to commit 70b095c843266 ("ipv6: remove dependency of nf_defrag_ipv6 on ipv6 module") Fixes: 9d9e937b1c8b ("ipv6/netfilter: Discard first fragment not including all headers") Reported-by: Randy Dunlap Reported-by: kernel test robot Signed-off-by: Georg Kohmann Acked-by: Pablo Neira Ayuso Acked-by: Randy Dunlap # build-tested Link: https://lore.kernel.org/r/20201119095833.8409-1-geokohma@cisco.com Signed-off-by: Jakub Kicinski --- include/net/ipv6.h | 2 -- include/net/ipv6_frag.h | 30 ++++++++++++++++++++++++ net/ipv6/netfilter/nf_conntrack_reasm.c | 2 +- net/ipv6/reassembly.c | 31 +------------------------ 4 files changed, 32 insertions(+), 33 deletions(-) diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 637cc6dd12b7..bd1f396cc9c7 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -1064,8 +1064,6 @@ int ipv6_skip_exthdr(const struct sk_buff *, int start, u8 *nexthdrp, bool ipv6_ext_hdr(u8 nexthdr); -bool ipv6_frag_thdr_truncated(struct sk_buff *skb, int start, u8 *nexthdrp); - enum { IP6_FH_F_FRAG = (1 << 0), IP6_FH_F_AUTH = (1 << 1), diff --git a/include/net/ipv6_frag.h b/include/net/ipv6_frag.h index a21e8b1381a1..851029ecff13 100644 --- a/include/net/ipv6_frag.h +++ b/include/net/ipv6_frag.h @@ -108,5 +108,35 @@ out_rcu_unlock: rcu_read_unlock(); inet_frag_put(&fq->q); } + +/* Check if the upper layer header is truncated in the first fragment. */ +static inline bool +ipv6frag_thdr_truncated(struct sk_buff *skb, int start, u8 *nexthdrp) +{ + u8 nexthdr = *nexthdrp; + __be16 frag_off; + int offset; + + offset = ipv6_skip_exthdr(skb, start, &nexthdr, &frag_off); + if (offset < 0 || (frag_off & htons(IP6_OFFSET))) + return false; + switch (nexthdr) { + case NEXTHDR_TCP: + offset += sizeof(struct tcphdr); + break; + case NEXTHDR_UDP: + offset += sizeof(struct udphdr); + break; + case NEXTHDR_ICMP: + offset += sizeof(struct icmp6hdr); + break; + default: + offset += 1; + } + if (offset > skb->len) + return true; + return false; +} + #endif #endif diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index b9cc0b330dbe..c129ad334eb3 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -459,7 +459,7 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) /* Discard the first fragment if it does not include all headers * RFC 8200, Section 4.5 */ - if (ipv6_frag_thdr_truncated(skb, fhoff, &nexthdr)) { + if (ipv6frag_thdr_truncated(skb, fhoff, &nexthdr)) { pr_debug("Drop incomplete fragment\n"); return 0; } diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index e3869bac9c88..47a0dc46cbdb 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -318,35 +318,6 @@ out_fail: return -1; } -/* Check if the upper layer header is truncated in the first fragment. */ -bool ipv6_frag_thdr_truncated(struct sk_buff *skb, int start, u8 *nexthdrp) -{ - u8 nexthdr = *nexthdrp; - __be16 frag_off; - int offset; - - offset = ipv6_skip_exthdr(skb, start, &nexthdr, &frag_off); - if (offset < 0 || (frag_off & htons(IP6_OFFSET))) - return false; - switch (nexthdr) { - case NEXTHDR_TCP: - offset += sizeof(struct tcphdr); - break; - case NEXTHDR_UDP: - offset += sizeof(struct udphdr); - break; - case NEXTHDR_ICMP: - offset += sizeof(struct icmp6hdr); - break; - default: - offset += 1; - } - if (offset > skb->len) - return true; - return false; -} -EXPORT_SYMBOL(ipv6_frag_thdr_truncated); - static int ipv6_frag_rcv(struct sk_buff *skb) { struct frag_hdr *fhdr; @@ -390,7 +361,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb) * the source of the fragment, with the Pointer field set to zero. */ nexthdr = hdr->nexthdr; - if (ipv6_frag_thdr_truncated(skb, skb_transport_offset(skb), &nexthdr)) { + if (ipv6frag_thdr_truncated(skb, skb_transport_offset(skb), &nexthdr)) { __IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev), IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_INCOMP, 0); From 0530bd6e6a3d5a0292a1a5f33ea980ae7e8b56ca Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Wed, 18 Nov 2020 22:40:37 +0100 Subject: [PATCH 688/710] net/smc: fix matching of existing link groups With the multi-subnet support of SMC-Dv2 the match for existing link groups should not include the vlanid of the network device. Set ini->smcd_version accordingly before the call to smc_conn_create() and use this value in smc_conn_create() to skip the vlanid check. Fixes: 5c21c4ccafe8 ("net/smc: determine accepted ISM devices") Signed-off-by: Karsten Graul Signed-off-by: Jakub Kicinski --- net/smc/af_smc.c | 3 ++- net/smc/smc_core.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index e9f487c8c6d5..5dd4faaf7d6e 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -979,7 +979,8 @@ static int __smc_connect(struct smc_sock *smc) /* check if smc modes and versions of CLC proposal and accept match */ rc = smc_connect_check_aclc(ini, aclc); - version = aclc->hdr.version == SMC_V1 ? SMC_V1 : version; + version = aclc->hdr.version == SMC_V1 ? SMC_V1 : SMC_V2; + ini->smcd_version = version; if (rc) goto vlan_cleanup; diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 2b19863f7171..af96f813c075 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -1309,7 +1309,8 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini) ini->ism_peer_gid[ini->ism_selected]) : smcr_lgr_match(lgr, ini->ib_lcl, role, ini->ib_clcqpn)) && !lgr->sync_err && - lgr->vlan_id == ini->vlan_id && + (ini->smcd_version == SMC_V2 || + lgr->vlan_id == ini->vlan_id) && (role == SMC_CLNT || ini->is_smcd || lgr->conns_num < SMC_RMBS_PER_LGR_MAX)) { /* link group found */ From 41a0be3f8f6be893860b991eb10c47fc3ee09d7f Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Wed, 18 Nov 2020 22:40:38 +0100 Subject: [PATCH 689/710] net/smc: fix direct access to ib_gid_addr->ndev in smc_ib_determine_gid() Sparse complaints 3 times about: net/smc/smc_ib.c:203:52: warning: incorrect type in argument 1 (different address spaces) net/smc/smc_ib.c:203:52: expected struct net_device const *dev net/smc/smc_ib.c:203:52: got struct net_device [noderef] __rcu *const ndev Fix that by using the existing and validated ndev variable instead of accessing attr->ndev directly. Fixes: 5102eca9039b ("net/smc: Use rdma_read_gid_l2_fields to L2 fields") Signed-off-by: Karsten Graul Signed-off-by: Jakub Kicinski --- net/smc/smc_ib.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 1c314dbdc7fa..fc766b537ac7 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -198,9 +198,9 @@ int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport, rcu_read_lock(); ndev = rdma_read_gid_attr_ndev_rcu(attr); if (!IS_ERR(ndev) && - ((!vlan_id && !is_vlan_dev(attr->ndev)) || - (vlan_id && is_vlan_dev(attr->ndev) && - vlan_dev_vlan_id(attr->ndev) == vlan_id)) && + ((!vlan_id && !is_vlan_dev(ndev)) || + (vlan_id && is_vlan_dev(ndev) && + vlan_dev_vlan_id(ndev) == vlan_id)) && attr->gid_type == IB_GID_TYPE_ROCE) { rcu_read_unlock(); if (gid) From 01822dd1bacfce25418cd4662c14240e6eb17ad6 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 21 Sep 2020 16:25:36 +0200 Subject: [PATCH 690/710] drm/vram-helper: Fix use of top-down placement MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 7053e0eab473 ("drm/vram-helper: stop using TTM placement flags") cleared the BO placement flags if top-down placement had been selected. Hence, BOs that were supposed to go into VRAM are now placed in a default location in system memory. Trying to scanout the incorrectly pinned BO results in displayed garbage and an error message. [ 146.108127] ------------[ cut here ]------------ [ 146.1V08180] WARNING: CPU: 0 PID: 152 at drivers/gpu/drm/drm_gem_vram_helper.c:284 drm_gem_vram_offset+0x59/0x60 [drm_vram_helper] ... [ 146.108591] ast_cursor_page_flip+0x3e/0x150 [ast] [ 146.108622] ast_cursor_plane_helper_atomic_update+0x8a/0xc0 [ast] [ 146.108654] drm_atomic_helper_commit_planes+0x197/0x4c0 [ 146.108699] drm_atomic_helper_commit_tail_rpm+0x59/0xa0 [ 146.108718] commit_tail+0x103/0x1c0 ... [ 146.109302] ---[ end trace d901a1ba1d949036 ]--- Fix the bug by keeping the placement flags. The top-down placement flag is stored in a separate variable. Signed-off-by: Thomas Zimmermann Reviewed-by: Christian König Fixes: 7053e0eab473 ("drm/vram-helper: stop using TTM placement flags") Reported-by: Pu Wen [for 5.10-rc1] Tested-by: Pu Wen Cc: Christian König Cc: Dave Airlie Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Thomas Zimmermann Cc: David Airlie Cc: Daniel Vetter Cc: dri-devel@lists.freedesktop.org Link: https://patchwork.freedesktop.org/patch/msgid/20200921142536.4392-1-tzimmermann@suse.de (cherry picked from commit b8f8dbf6495850b0babc551377bde754b7bc0eea) [pulled into fixes from drm-next] Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_gem_vram_helper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_gem_vram_helper.c b/drivers/gpu/drm/drm_gem_vram_helper.c index 50cad0e4a92e..375c79e23ca5 100644 --- a/drivers/gpu/drm/drm_gem_vram_helper.c +++ b/drivers/gpu/drm/drm_gem_vram_helper.c @@ -140,7 +140,7 @@ static void drm_gem_vram_placement(struct drm_gem_vram_object *gbo, unsigned int c = 0; if (pl_flag & DRM_GEM_VRAM_PL_FLAG_TOPDOWN) - pl_flag = TTM_PL_FLAG_TOPDOWN; + invariant_flags = TTM_PL_FLAG_TOPDOWN; gbo->placement.placement = gbo->placements; gbo->placement.busy_placement = gbo->placements; From 6fa6d28051e9fcaa1570e69648ea13a353a5d218 Mon Sep 17 00:00:00 2001 From: Daniel Xu Date: Tue, 17 Nov 2020 12:05:45 -0800 Subject: [PATCH 691/710] lib/strncpy_from_user.c: Mask out bytes after NUL terminator. do_strncpy_from_user() may copy some extra bytes after the NUL terminator into the destination buffer. This usually does not matter for normal string operations. However, when BPF programs key BPF maps with strings, this matters a lot. A BPF program may read strings from user memory by calling the bpf_probe_read_user_str() helper which eventually calls do_strncpy_from_user(). The program can then key a map with the destination buffer. BPF map keys are fixed-width and string-agnostic, meaning that map keys are treated as a set of bytes. The issue is when do_strncpy_from_user() overcopies bytes after the NUL terminator, it can result in seemingly identical strings occupying multiple slots in a BPF map. This behavior is subtle and totally unexpected by the user. This commit masks out the bytes following the NUL while preserving long-sized stride in the fast path. Fixes: 6ae08ae3dea2 ("bpf: Add probe_read_{user, kernel} and probe_read_{user, kernel}_str helpers") Signed-off-by: Daniel Xu Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/21efc982b3e9f2f7b0379eed642294caaa0c27a7.1605642949.git.dxu@dxuuu.xyz --- kernel/trace/bpf_trace.c | 10 ++++++++++ lib/strncpy_from_user.c | 19 +++++++++++++++++-- 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 5113fd423cdf..048c655315f1 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -181,6 +181,16 @@ bpf_probe_read_user_str_common(void *dst, u32 size, { int ret; + /* + * NB: We rely on strncpy_from_user() not copying junk past the NUL + * terminator into `dst`. + * + * strncpy_from_user() does long-sized strides in the fast path. If the + * strncpy does not mask out the bytes after the NUL in `unsafe_ptr`, + * then there could be junk after the NUL in `dst`. If user takes `dst` + * and keys a hash map with it, then semantically identical strings can + * occupy multiple entries in the map. + */ ret = strncpy_from_user_nofault(dst, unsafe_ptr, size); if (unlikely(ret < 0)) memset(dst, 0, size); diff --git a/lib/strncpy_from_user.c b/lib/strncpy_from_user.c index e6d5fcc2cdf3..122d8d0e253c 100644 --- a/lib/strncpy_from_user.c +++ b/lib/strncpy_from_user.c @@ -35,17 +35,32 @@ static inline long do_strncpy_from_user(char *dst, const char __user *src, goto byte_at_a_time; while (max >= sizeof(unsigned long)) { - unsigned long c, data; + unsigned long c, data, mask; /* Fall back to byte-at-a-time if we get a page fault */ unsafe_get_user(c, (unsigned long __user *)(src+res), byte_at_a_time); - *(unsigned long *)(dst+res) = c; + /* + * Note that we mask out the bytes following the NUL. This is + * important to do because string oblivious code may read past + * the NUL. For those routines, we don't want to give them + * potentially random bytes after the NUL in `src`. + * + * One example of such code is BPF map keys. BPF treats map keys + * as an opaque set of bytes. Without the post-NUL mask, any BPF + * maps keyed by strings returned from strncpy_from_user() may + * have multiple entries for semantically identical strings. + */ if (has_zero(c, &data, &constants)) { data = prep_zero_mask(c, data, &constants); data = create_zero_mask(data); + mask = zero_bytemask(data); + *(unsigned long *)(dst+res) = c & mask; return res + find_zero(data); } + + *(unsigned long *)(dst+res) = c; + res += sizeof(unsigned long); max -= sizeof(unsigned long); } From c8a36aedf3e24768e94d87fdcdd37684bd241c44 Mon Sep 17 00:00:00 2001 From: Daniel Xu Date: Tue, 17 Nov 2020 12:05:46 -0800 Subject: [PATCH 692/710] selftest/bpf: Test bpf_probe_read_user_str() strips trailing bytes after NUL Previously, bpf_probe_read_user_str() could potentially overcopy the trailing bytes after the NUL due to how do_strncpy_from_user() does the copy in long-sized strides. The issue has been fixed in the previous commit. This commit adds a selftest that ensures we don't regress bpf_probe_read_user_str() again. Signed-off-by: Daniel Xu Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/4d977508fab4ec5b7b574b85bdf8b398868b6ee9.1605642949.git.dxu@dxuuu.xyz --- .../bpf/prog_tests/probe_read_user_str.c | 71 +++++++++++++++++++ .../bpf/progs/test_probe_read_user_str.c | 25 +++++++ 2 files changed, 96 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/probe_read_user_str.c create mode 100644 tools/testing/selftests/bpf/progs/test_probe_read_user_str.c diff --git a/tools/testing/selftests/bpf/prog_tests/probe_read_user_str.c b/tools/testing/selftests/bpf/prog_tests/probe_read_user_str.c new file mode 100644 index 000000000000..e419298132b5 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/probe_read_user_str.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include "test_probe_read_user_str.skel.h" + +static const char str1[] = "mestring"; +static const char str2[] = "mestringalittlebigger"; +static const char str3[] = "mestringblubblubblubblubblub"; + +static int test_one_str(struct test_probe_read_user_str *skel, const char *str, + size_t len) +{ + int err, duration = 0; + char buf[256]; + + /* Ensure bytes after string are ones */ + memset(buf, 1, sizeof(buf)); + memcpy(buf, str, len); + + /* Give prog our userspace pointer */ + skel->bss->user_ptr = buf; + + /* Trigger tracepoint */ + usleep(1); + + /* Did helper fail? */ + if (CHECK(skel->bss->ret < 0, "prog_ret", "prog returned: %ld\n", + skel->bss->ret)) + return 1; + + /* Check that string was copied correctly */ + err = memcmp(skel->bss->buf, str, len); + if (CHECK(err, "memcmp", "prog copied wrong string")) + return 1; + + /* Now check that no extra trailing bytes were copied */ + memset(buf, 0, sizeof(buf)); + err = memcmp(skel->bss->buf + len, buf, sizeof(buf) - len); + if (CHECK(err, "memcmp", "trailing bytes were not stripped")) + return 1; + + return 0; +} + +void test_probe_read_user_str(void) +{ + struct test_probe_read_user_str *skel; + int err, duration = 0; + + skel = test_probe_read_user_str__open_and_load(); + if (CHECK(!skel, "test_probe_read_user_str__open_and_load", + "skeleton open and load failed\n")) + return; + + /* Give pid to bpf prog so it doesn't read from anyone else */ + skel->bss->pid = getpid(); + + err = test_probe_read_user_str__attach(skel); + if (CHECK(err, "test_probe_read_user_str__attach", + "skeleton attach failed: %d\n", err)) + goto out; + + if (test_one_str(skel, str1, sizeof(str1))) + goto out; + if (test_one_str(skel, str2, sizeof(str2))) + goto out; + if (test_one_str(skel, str3, sizeof(str3))) + goto out; + +out: + test_probe_read_user_str__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/test_probe_read_user_str.c b/tools/testing/selftests/bpf/progs/test_probe_read_user_str.c new file mode 100644 index 000000000000..3ae398b75dcd --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_probe_read_user_str.c @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include + +#include + +pid_t pid = 0; +long ret = 0; +void *user_ptr = 0; +char buf[256] = {}; + +SEC("tracepoint/syscalls/sys_enter_nanosleep") +int on_write(void *ctx) +{ + if (pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; + + ret = bpf_probe_read_user_str(buf, sizeof(buf), user_ptr); + + return 0; +} + +char _license[] SEC("license") = "GPL"; From 2801a5da5b25b7af9dd2addd19b2315c02d17b64 Mon Sep 17 00:00:00 2001 From: Luo Meng Date: Wed, 18 Nov 2020 22:49:31 +0900 Subject: [PATCH 693/710] fail_function: Remove a redundant mutex unlock Fix a mutex_unlock() issue where before copy_from_user() is not called mutex_locked. Fixes: 4b1a29a7f542 ("error-injection: Support fault injection framework") Reported-by: Hulk Robot Signed-off-by: Luo Meng Signed-off-by: Masami Hiramatsu Signed-off-by: Alexei Starovoitov Acked-by: Masami Hiramatsu Link: https://lore.kernel.org/bpf/160570737118.263807.8358435412898356284.stgit@devnote2 --- kernel/fail_function.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/fail_function.c b/kernel/fail_function.c index 63b349168da7..b0b1ad93fa95 100644 --- a/kernel/fail_function.c +++ b/kernel/fail_function.c @@ -253,7 +253,7 @@ static ssize_t fei_write(struct file *file, const char __user *buffer, if (copy_from_user(buf, buffer, count)) { ret = -EFAULT; - goto out; + goto out_free; } buf[count] = '\0'; sym = strstrip(buf); @@ -307,8 +307,9 @@ static ssize_t fei_write(struct file *file, const char __user *buffer, ret = count; } out: - kfree(buf); mutex_unlock(&fei_lock); +out_free: + kfree(buf); return ret; } From be33805c65297611971003d72e7f9235e23ec84d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 15 Oct 2020 13:21:38 +0100 Subject: [PATCH 694/710] drm/i915/gt: Fixup tgl mocs for PTE tracking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Forcing mocs:1 [used for our winsys follows-pte mode] to be cached caused display glitches. Though it is documented as deprecated (and so likely behaves as uncached) use the follow-pte bit and force it out of L3 cache. Testcase: igt/kms_frontbuffer_tracking Testcase: igt/kms_big_fb Signed-off-by: Chris Wilson Cc: Ayaz A Siddiqui Cc: Lucas De Marchi Cc: Matt Roper Cc: Ville Syrjälä Cc: Joonas Lahtinen Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20201015122138.30161-4-chris@chris-wilson.co.uk (cherry picked from commit a04ac827366594c7244f60e9be79fcb404af69f0) Fixes: 849c0fe9e831 ("drm/i915/gt: Initialize reserved and unspecified MOCS indices") Signed-off-by: Rodrigo Vivi [Rodrigo: Updated Fixes tag] --- drivers/gpu/drm/i915/gt/intel_mocs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index b8f56e62158e..313e51e7d4f7 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -243,8 +243,9 @@ static const struct drm_i915_mocs_entry tgl_mocs_table[] = { * only, __init_mocs_table() take care to program unused index with * this entry. */ - MOCS_ENTRY(1, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), - L3_3_WB), + MOCS_ENTRY(I915_MOCS_PTE, + LE_0_PAGETABLE | LE_TC_0_PAGETABLE, + L3_1_UC), GEN11_MOCS_ENTRIES, /* Implicitly enable L1 - HDC:L1 + L3 + LLC */ From 704c2317cab5571da0e5763cd47ad07f8900aa76 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 19 Nov 2020 15:36:25 -0500 Subject: [PATCH 695/710] ext4: drop fast_commit from /proc/mounts The options in /proc/mounts must be valid mount options --- and fast_commit is not a mount option. Otherwise, command sequences like this will fail: # mount /dev/vdc /vdc # mkdir -p /vdc/phoronix_test_suite /pts # mount --bind /vdc/phoronix_test_suite /pts # mount -o remount,nodioread_nolock /pts mount: /pts: mount point not mounted or bad option. And in the system logs, you'll find: EXT4-fs (vdc): Unrecognized mount option "fast_commit" or missing value Fixes: 995a3ed67fc8 ("ext4: add fast_commit feature and handling for extended mount options") Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 6633b20224d5..94472044f4c1 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2638,10 +2638,6 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb, } else if (test_opt2(sb, DAX_INODE)) { SEQ_OPTS_PUTS("dax=inode"); } - - if (test_opt2(sb, JOURNAL_FAST_COMMIT)) - SEQ_OPTS_PUTS("fast_commit"); - ext4_show_quota_options(seq, sb); return 0; } From eb8409071a1d47e3593cfe077107ac46853182ab Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 19 Nov 2020 15:17:50 -0800 Subject: [PATCH 696/710] xfs: revert "xfs: fix rmap key and record comparison functions" This reverts commit 6ff646b2ceb0eec916101877f38da0b73e3a5b7f. Your maintainer committed a major braino in the rmap code by adding the attr fork, bmbt, and unwritten extent usage bits into rmap record key comparisons. While XFS uses the usage bits *in the rmap records* for cross-referencing metadata in xfs_scrub and xfs_repair, it only needs the owner and offset information to distinguish between reverse mappings of the same physical extent into the data fork of a file at multiple offsets. The other bits are not important for key comparisons for index lookups, and never have been. Eric Sandeen reports that this causes regressions in generic/299, so undo this patch before it does more damage. Reported-by: Eric Sandeen Fixes: 6ff646b2ceb0 ("xfs: fix rmap key and record comparison functions") Signed-off-by: Darrick J. Wong Reviewed-by: Eric Sandeen --- fs/xfs/libxfs/xfs_rmap_btree.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c index 577a66381327..beb81c84a937 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.c +++ b/fs/xfs/libxfs/xfs_rmap_btree.c @@ -243,8 +243,8 @@ xfs_rmapbt_key_diff( else if (y > x) return -1; - x = be64_to_cpu(kp->rm_offset); - y = xfs_rmap_irec_offset_pack(rec); + x = XFS_RMAP_OFF(be64_to_cpu(kp->rm_offset)); + y = rec->rm_offset; if (x > y) return 1; else if (y > x) @@ -275,8 +275,8 @@ xfs_rmapbt_diff_two_keys( else if (y > x) return -1; - x = be64_to_cpu(kp1->rm_offset); - y = be64_to_cpu(kp2->rm_offset); + x = XFS_RMAP_OFF(be64_to_cpu(kp1->rm_offset)); + y = XFS_RMAP_OFF(be64_to_cpu(kp2->rm_offset)); if (x > y) return 1; else if (y > x) @@ -390,8 +390,8 @@ xfs_rmapbt_keys_inorder( return 1; else if (a > b) return 0; - a = be64_to_cpu(k1->rmap.rm_offset); - b = be64_to_cpu(k2->rmap.rm_offset); + a = XFS_RMAP_OFF(be64_to_cpu(k1->rmap.rm_offset)); + b = XFS_RMAP_OFF(be64_to_cpu(k2->rmap.rm_offset)); if (a <= b) return 1; return 0; @@ -420,8 +420,8 @@ xfs_rmapbt_recs_inorder( return 1; else if (a > b) return 0; - a = be64_to_cpu(r1->rmap.rm_offset); - b = be64_to_cpu(r2->rmap.rm_offset); + a = XFS_RMAP_OFF(be64_to_cpu(r1->rmap.rm_offset)); + b = XFS_RMAP_OFF(be64_to_cpu(r2->rmap.rm_offset)); if (a <= b) return 1; return 0; From 2bf31d94423c8ae3ff58e38a115b177df6940399 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 16 Nov 2020 11:18:08 +0100 Subject: [PATCH 697/710] jbd2: fix kernel-doc markups Kernel-doc markup should use this format: identifier - description They should not have any type before that, as otherwise the parser won't do the right thing. Also, some identifiers have different names between their prototypes and the kernel-doc markup. Reviewed-by: Jan Kara Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/72f5c6628f5f278d67625f60893ffbc2ca28d46e.1605521731.git.mchehab+huawei@kernel.org Signed-off-by: Theodore Ts'o --- fs/jbd2/journal.c | 34 ++++++++++++++++++---------------- fs/jbd2/transaction.c | 31 ++++++++++++++++--------------- include/linux/jbd2.h | 2 +- 3 files changed, 35 insertions(+), 32 deletions(-) diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 0c3d5e3b24b2..188f79d76988 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -566,12 +566,14 @@ static int __jbd2_journal_force_commit(journal_t *journal) } /** - * Force and wait upon a commit if the calling process is not within - * transaction. This is used for forcing out undo-protected data which contains - * bitmaps, when the fs is running out of space. + * jbd2_journal_force_commit_nested - Force and wait upon a commit if the + * calling process is not within transaction. * * @journal: journal to force * Returns true if progress was made. + * + * This is used for forcing out undo-protected data which contains + * bitmaps, when the fs is running out of space. */ int jbd2_journal_force_commit_nested(journal_t *journal) { @@ -582,7 +584,7 @@ int jbd2_journal_force_commit_nested(journal_t *journal) } /** - * int journal_force_commit() - force any uncommitted transactions + * jbd2_journal_force_commit() - force any uncommitted transactions * @journal: journal to force * * Caller want unconditional commit. We can only force the running transaction @@ -1881,7 +1883,7 @@ static int load_superblock(journal_t *journal) /** - * int jbd2_journal_load() - Read journal from disk. + * jbd2_journal_load() - Read journal from disk. * @journal: Journal to act on. * * Given a journal_t structure which tells us which disk blocks contain @@ -1951,7 +1953,7 @@ recovery_error: } /** - * void jbd2_journal_destroy() - Release a journal_t structure. + * jbd2_journal_destroy() - Release a journal_t structure. * @journal: Journal to act on. * * Release a journal_t structure once it is no longer in use by the @@ -2028,7 +2030,7 @@ int jbd2_journal_destroy(journal_t *journal) /** - *int jbd2_journal_check_used_features() - Check if features specified are used. + * jbd2_journal_check_used_features() - Check if features specified are used. * @journal: Journal to check. * @compat: bitmask of compatible features * @ro: bitmask of features that force read-only mount @@ -2063,7 +2065,7 @@ int jbd2_journal_check_used_features(journal_t *journal, unsigned long compat, } /** - * int jbd2_journal_check_available_features() - Check feature set in journalling layer + * jbd2_journal_check_available_features() - Check feature set in journalling layer * @journal: Journal to check. * @compat: bitmask of compatible features * @ro: bitmask of features that force read-only mount @@ -2126,7 +2128,7 @@ jbd2_journal_initialize_fast_commit(journal_t *journal) } /** - * int jbd2_journal_set_features() - Mark a given journal feature in the superblock + * jbd2_journal_set_features() - Mark a given journal feature in the superblock * @journal: Journal to act on. * @compat: bitmask of compatible features * @ro: bitmask of features that force read-only mount @@ -2217,7 +2219,7 @@ int jbd2_journal_set_features(journal_t *journal, unsigned long compat, } /* - * jbd2_journal_clear_features () - Clear a given journal feature in the + * jbd2_journal_clear_features() - Clear a given journal feature in the * superblock * @journal: Journal to act on. * @compat: bitmask of compatible features @@ -2246,7 +2248,7 @@ void jbd2_journal_clear_features(journal_t *journal, unsigned long compat, EXPORT_SYMBOL(jbd2_journal_clear_features); /** - * int jbd2_journal_flush () - Flush journal + * jbd2_journal_flush() - Flush journal * @journal: Journal to act on. * * Flush all data for a given journal to disk and empty the journal. @@ -2321,7 +2323,7 @@ out: } /** - * int jbd2_journal_wipe() - Wipe journal contents + * jbd2_journal_wipe() - Wipe journal contents * @journal: Journal to act on. * @write: flag (see below) * @@ -2362,7 +2364,7 @@ int jbd2_journal_wipe(journal_t *journal, int write) } /** - * void jbd2_journal_abort () - Shutdown the journal immediately. + * jbd2_journal_abort () - Shutdown the journal immediately. * @journal: the journal to shutdown. * @errno: an error number to record in the journal indicating * the reason for the shutdown. @@ -2453,7 +2455,7 @@ void jbd2_journal_abort(journal_t *journal, int errno) } /** - * int jbd2_journal_errno () - returns the journal's error state. + * jbd2_journal_errno() - returns the journal's error state. * @journal: journal to examine. * * This is the errno number set with jbd2_journal_abort(), the last @@ -2477,7 +2479,7 @@ int jbd2_journal_errno(journal_t *journal) } /** - * int jbd2_journal_clear_err () - clears the journal's error state + * jbd2_journal_clear_err() - clears the journal's error state * @journal: journal to act on. * * An error must be cleared or acked to take a FS out of readonly @@ -2497,7 +2499,7 @@ int jbd2_journal_clear_err(journal_t *journal) } /** - * void jbd2_journal_ack_err() - Ack journal err. + * jbd2_journal_ack_err() - Ack journal err. * @journal: journal to act on. * * An error must be cleared or acked to take a FS out of readonly diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index d54f04674e8e..9396666b7314 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -519,7 +519,7 @@ EXPORT_SYMBOL(jbd2__journal_start); /** - * handle_t *jbd2_journal_start() - Obtain a new handle. + * jbd2_journal_start() - Obtain a new handle. * @journal: Journal to start transaction on. * @nblocks: number of block buffer we might modify * @@ -566,7 +566,7 @@ void jbd2_journal_free_reserved(handle_t *handle) EXPORT_SYMBOL(jbd2_journal_free_reserved); /** - * int jbd2_journal_start_reserved() - start reserved handle + * jbd2_journal_start_reserved() - start reserved handle * @handle: handle to start * @type: for handle statistics * @line_no: for handle statistics @@ -620,7 +620,7 @@ int jbd2_journal_start_reserved(handle_t *handle, unsigned int type, EXPORT_SYMBOL(jbd2_journal_start_reserved); /** - * int jbd2_journal_extend() - extend buffer credits. + * jbd2_journal_extend() - extend buffer credits. * @handle: handle to 'extend' * @nblocks: nr blocks to try to extend by. * @revoke_records: number of revoke records to try to extend by. @@ -745,7 +745,7 @@ static void stop_this_handle(handle_t *handle) } /** - * int jbd2_journal_restart() - restart a handle . + * jbd2__journal_restart() - restart a handle . * @handle: handle to restart * @nblocks: nr credits requested * @revoke_records: number of revoke record credits requested @@ -815,7 +815,7 @@ int jbd2_journal_restart(handle_t *handle, int nblocks) EXPORT_SYMBOL(jbd2_journal_restart); /** - * void jbd2_journal_lock_updates () - establish a transaction barrier. + * jbd2_journal_lock_updates () - establish a transaction barrier. * @journal: Journal to establish a barrier on. * * This locks out any further updates from being started, and blocks @@ -874,7 +874,7 @@ void jbd2_journal_lock_updates(journal_t *journal) } /** - * void jbd2_journal_unlock_updates (journal_t* journal) - release barrier + * jbd2_journal_unlock_updates () - release barrier * @journal: Journal to release the barrier on. * * Release a transaction barrier obtained with jbd2_journal_lock_updates(). @@ -1182,7 +1182,8 @@ out: } /** - * int jbd2_journal_get_write_access() - notify intent to modify a buffer for metadata (not data) update. + * jbd2_journal_get_write_access() - notify intent to modify a buffer + * for metadata (not data) update. * @handle: transaction to add buffer modifications to * @bh: bh to be used for metadata writes * @@ -1226,7 +1227,7 @@ int jbd2_journal_get_write_access(handle_t *handle, struct buffer_head *bh) * unlocked buffer beforehand. */ /** - * int jbd2_journal_get_create_access () - notify intent to use newly created bh + * jbd2_journal_get_create_access () - notify intent to use newly created bh * @handle: transaction to new buffer to * @bh: new buffer. * @@ -1306,7 +1307,7 @@ out: } /** - * int jbd2_journal_get_undo_access() - Notify intent to modify metadata with + * jbd2_journal_get_undo_access() - Notify intent to modify metadata with * non-rewindable consequences * @handle: transaction * @bh: buffer to undo @@ -1383,7 +1384,7 @@ out: } /** - * void jbd2_journal_set_triggers() - Add triggers for commit writeout + * jbd2_journal_set_triggers() - Add triggers for commit writeout * @bh: buffer to trigger on * @type: struct jbd2_buffer_trigger_type containing the trigger(s). * @@ -1425,7 +1426,7 @@ void jbd2_buffer_abort_trigger(struct journal_head *jh, } /** - * int jbd2_journal_dirty_metadata() - mark a buffer as containing dirty metadata + * jbd2_journal_dirty_metadata() - mark a buffer as containing dirty metadata * @handle: transaction to add buffer to. * @bh: buffer to mark * @@ -1593,7 +1594,7 @@ out: } /** - * void jbd2_journal_forget() - bforget() for potentially-journaled buffers. + * jbd2_journal_forget() - bforget() for potentially-journaled buffers. * @handle: transaction handle * @bh: bh to 'forget' * @@ -1762,7 +1763,7 @@ drop: } /** - * int jbd2_journal_stop() - complete a transaction + * jbd2_journal_stop() - complete a transaction * @handle: transaction to complete. * * All done for a particular handle. @@ -2080,7 +2081,7 @@ out: } /** - * int jbd2_journal_try_to_free_buffers() - try to free page buffers. + * jbd2_journal_try_to_free_buffers() - try to free page buffers. * @journal: journal for operation * @page: to try and free * @@ -2411,7 +2412,7 @@ zap_buffer_unlocked: } /** - * void jbd2_journal_invalidatepage() + * jbd2_journal_invalidatepage() * @journal: journal to use for flush... * @page: page to flush * @offset: start of the range to invalidate diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 1c49fd62ff2e..578ff196b3ce 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -401,7 +401,7 @@ static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh) #define JI_WAIT_DATA (1 << __JI_WAIT_DATA) /** - * struct jbd_inode - The jbd_inode type is the structure linking inodes in + * struct jbd2_inode - The jbd_inode type is the structure linking inodes in * ordered mode present in a transaction so that we can sync them during commit. */ struct jbd2_inode { From f902b216501094495ff75834035656e8119c537f Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 18 Nov 2020 16:30:32 +0100 Subject: [PATCH 698/710] ext4: fix bogus warning in ext4_update_dx_flag() The idea of the warning in ext4_update_dx_flag() is that we should warn when we are clearing EXT4_INODE_INDEX on a filesystem with metadata checksums enabled since after clearing the flag, checksums for internal htree nodes will become invalid. So there's no need to warn (or actually do anything) when EXT4_INODE_INDEX is not set. Link: https://lore.kernel.org/r/20201118153032.17281-1-jack@suse.cz Fixes: 48a34311953d ("ext4: fix checksum errors with indexed dirs") Reported-by: Eric Biggers Reviewed-by: Eric Biggers Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/ext4.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index bf9429484462..65ecaf96d0a4 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2695,7 +2695,8 @@ void ext4_insert_dentry(struct inode *inode, struct ext4_filename *fname); static inline void ext4_update_dx_flag(struct inode *inode) { - if (!ext4_has_feature_dir_index(inode->i_sb)) { + if (!ext4_has_feature_dir_index(inode->i_sb) && + ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) { /* ext4_iget() should have caught this... */ WARN_ON_ONCE(ext4_has_feature_metadata_csum(inode->i_sb)); ext4_clear_inode_flag(inode, EXT4_INODE_INDEX); From f5098e34dd4c774c3040e417960f1637e5daade8 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 17 Nov 2020 11:33:02 -0800 Subject: [PATCH 699/710] selftests/seccomp: powerpc: Fix typo in macro variable name A typo sneaked into the powerpc selftest. Fix the name so it builds again. Fixes: 46138329faea ("selftests/seccomp: powerpc: Fix seccomp return value testing") Acked-by: Michael Ellerman Link: https://lore.kernel.org/lkml/87y2ix2895.fsf@mpe.ellerman.id.au Signed-off-by: Kees Cook --- tools/testing/selftests/seccomp/seccomp_bpf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 4a180439ee9e..7f7ecfcd66db 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1758,10 +1758,10 @@ TEST_F(TRACE_poke, getpid_runs_normally) * and the code is stored as a positive value. \ */ \ if (_result < 0) { \ - SYSCALL_RET(_regs) = -result; \ + SYSCALL_RET(_regs) = -_result; \ (_regs).ccr |= 0x10000000; \ } else { \ - SYSCALL_RET(_regs) = result; \ + SYSCALL_RET(_regs) = _result; \ (_regs).ccr &= ~0x10000000; \ } \ } while (0) From 4c222f31fb1db4d590503a181a6268ced9252379 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 17 Nov 2020 11:54:43 -0800 Subject: [PATCH 700/710] selftests/seccomp: sh: Fix register names It looks like the seccomp selftests was never actually built for sh. This fixes it, though I don't have an environment to do a runtime test of it yet. Fixes: 0bb605c2c7f2b4b3 ("sh: Add SECCOMP_FILTER") Tested-by: John Paul Adrian Glaubitz Link: https://lore.kernel.org/lkml/a36d7b48-6598-1642-e403-0c77a86f416d@physik.fu-berlin.de Signed-off-by: Kees Cook --- tools/testing/selftests/seccomp/seccomp_bpf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 7f7ecfcd66db..26c72f2b61b1 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1804,8 +1804,8 @@ TEST_F(TRACE_poke, getpid_runs_normally) #define SYSCALL_RET(_regs) (_regs).a[(_regs).windowbase * 4 + 2] #elif defined(__sh__) # define ARCH_REGS struct pt_regs -# define SYSCALL_NUM(_regs) (_regs).gpr[3] -# define SYSCALL_RET(_regs) (_regs).gpr[0] +# define SYSCALL_NUM(_regs) (_regs).regs[3] +# define SYSCALL_RET(_regs) (_regs).regs[0] #else # error "Do not know how to find your architecture's registers and syscalls" #endif From 450677dcb0cce5cb751538360b7196c28b733f3e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 21 Nov 2020 22:16:58 -0800 Subject: [PATCH 701/710] mm/madvise: fix memory leak from process_madvise The early return in process_madvise() will produce a memory leak. Fix it. Fixes: ecb8ac8b1f14 ("mm/madvise: introduce process_madvise() syscall: an external memory hinting API") Signed-off-by: Eric Dumazet Signed-off-by: Minchan Kim Signed-off-by: Andrew Morton Link: https://lkml.kernel.org/r/20201116155132.GA3805951@google.com Signed-off-by: Linus Torvalds --- mm/madvise.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/mm/madvise.c b/mm/madvise.c index 416a56b8e757..7e773f949ef9 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -1231,8 +1231,6 @@ SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec, ret = total_len - iov_iter_count(&iter); mmput(mm); - return ret; - release_task: put_task_struct(task); put_pid: From bc2dc4406c463174613047d8b7946e12c8808cda Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Sat, 21 Nov 2020 22:17:01 -0800 Subject: [PATCH 702/710] compiler-clang: remove version check for BPF Tracing bpftrace parses the kernel headers and uses Clang under the hood. Remove the version check when __BPF_TRACING__ is defined (as bpftrace does) so that this tool can continue to parse kernel headers, even with older clang sources. Fixes: commit 1f7a44f63e6c ("compiler-clang: add build check for clang 10.0.1") Reported-by: Chen Yu Reported-by: Jarkko Sakkinen Signed-off-by: Nick Desaulniers Signed-off-by: Andrew Morton Tested-by: Jarkko Sakkinen Acked-by: Jarkko Sakkinen Acked-by: Song Liu Acked-by: Nathan Chancellor Acked-by: Miguel Ojeda Link: https://lkml.kernel.org/r/20201104191052.390657-1-ndesaulniers@google.com Signed-off-by: Linus Torvalds --- include/linux/compiler-clang.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index dd7233c48bf3..98cff1b4b088 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -8,8 +8,10 @@ + __clang_patchlevel__) #if CLANG_VERSION < 100001 +#ifndef __BPF_TRACING__ # error Sorry, your version of Clang is too old - please use 10.0.1 or newer. #endif +#endif /* Compiler specific definitions for Clang compiler */ From a927bd6ba952d13c52b8b385030943032f659a3e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sat, 21 Nov 2020 22:17:05 -0800 Subject: [PATCH 703/710] mm: fix phys_to_target_node() and memory_add_physaddr_to_nid() exports The core-mm has a default __weak implementation of phys_to_target_node() to mirror the weak definition of memory_add_physaddr_to_nid(). That symbol is exported for modules. However, while the export in mm/memory_hotplug.c exported the symbol in the configuration cases of: CONFIG_NUMA_KEEP_MEMINFO=y CONFIG_MEMORY_HOTPLUG=y ...and: CONFIG_NUMA_KEEP_MEMINFO=n CONFIG_MEMORY_HOTPLUG=y ...it failed to export the symbol in the case of: CONFIG_NUMA_KEEP_MEMINFO=y CONFIG_MEMORY_HOTPLUG=n Not only is that broken, but Christoph points out that the kernel should not be exporting any __weak symbol, which means that memory_add_physaddr_to_nid() example that phys_to_target_node() copied is broken too. Rework the definition of phys_to_target_node() and memory_add_physaddr_to_nid() to not require weak symbols. Move to the common arch override design-pattern of an asm header defining a symbol to replace the default implementation. The only common header that all memory_add_physaddr_to_nid() producing architectures implement is asm/sparsemem.h. In fact, powerpc already defines its memory_add_physaddr_to_nid() helper in sparsemem.h. Double-down on that observation and define phys_to_target_node() where necessary in asm/sparsemem.h. An alternate consideration that was discarded was to put this override in asm/numa.h, but that entangles with the definition of MAX_NUMNODES relative to the inclusion of linux/nodemask.h, and requires powerpc to grow a new header. The dependency on NUMA_KEEP_MEMINFO for DEV_DAX_HMEM_DEVICES is invalid now that the symbol is properly exported / stubbed in all combinations of CONFIG_NUMA_KEEP_MEMINFO and CONFIG_MEMORY_HOTPLUG. [dan.j.williams@intel.com: v4] Link: https://lkml.kernel.org/r/160461461867.1505359.5301571728749534585.stgit@dwillia2-desk3.amr.corp.intel.com [dan.j.williams@intel.com: powerpc: fix create_section_mapping compile warning] Link: https://lkml.kernel.org/r/160558386174.2948926.2740149041249041764.stgit@dwillia2-desk3.amr.corp.intel.com Fixes: a035b6bf863e ("mm/memory_hotplug: introduce default phys_to_target_node() implementation") Reported-by: Randy Dunlap Reported-by: Thomas Gleixner Reported-by: kernel test robot Reported-by: Christoph Hellwig Signed-off-by: Dan Williams Signed-off-by: Andrew Morton Tested-by: Randy Dunlap Tested-by: Thomas Gleixner Reviewed-by: Thomas Gleixner Reviewed-by: Christoph Hellwig Cc: Joao Martins Cc: Tony Luck Cc: Fenghua Yu Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Vishal Verma Cc: Stephen Rothwell Link: https://lkml.kernel.org/r/160447639846.1133764.7044090803980177548.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Linus Torvalds --- arch/ia64/include/asm/sparsemem.h | 6 ++++++ arch/powerpc/include/asm/mmzone.h | 5 +++++ arch/powerpc/include/asm/sparsemem.h | 5 ++--- arch/powerpc/mm/mem.c | 1 + arch/x86/include/asm/sparsemem.h | 10 ++++++++++ arch/x86/mm/numa.c | 2 ++ drivers/dax/Kconfig | 1 - include/linux/memory_hotplug.h | 14 ------------- include/linux/numa.h | 30 +++++++++++++++++++++++++++- mm/memory_hotplug.c | 18 ----------------- 10 files changed, 55 insertions(+), 37 deletions(-) diff --git a/arch/ia64/include/asm/sparsemem.h b/arch/ia64/include/asm/sparsemem.h index 336d0570e1fa..dd8c166ffd7b 100644 --- a/arch/ia64/include/asm/sparsemem.h +++ b/arch/ia64/include/asm/sparsemem.h @@ -18,4 +18,10 @@ #endif #endif /* CONFIG_SPARSEMEM */ + +#ifdef CONFIG_MEMORY_HOTPLUG +int memory_add_physaddr_to_nid(u64 addr); +#define memory_add_physaddr_to_nid memory_add_physaddr_to_nid +#endif + #endif /* _ASM_IA64_SPARSEMEM_H */ diff --git a/arch/powerpc/include/asm/mmzone.h b/arch/powerpc/include/asm/mmzone.h index 91c69ff53a8a..6cda76b57c5d 100644 --- a/arch/powerpc/include/asm/mmzone.h +++ b/arch/powerpc/include/asm/mmzone.h @@ -46,5 +46,10 @@ u64 memory_hotplug_max(void); #define __HAVE_ARCH_RESERVED_KERNEL_PAGES #endif +#ifdef CONFIG_MEMORY_HOTPLUG +extern int create_section_mapping(unsigned long start, unsigned long end, + int nid, pgprot_t prot); +#endif + #endif /* __KERNEL__ */ #endif /* _ASM_MMZONE_H_ */ diff --git a/arch/powerpc/include/asm/sparsemem.h b/arch/powerpc/include/asm/sparsemem.h index 1e6fa371cc38..d072866842e4 100644 --- a/arch/powerpc/include/asm/sparsemem.h +++ b/arch/powerpc/include/asm/sparsemem.h @@ -13,9 +13,9 @@ #endif /* CONFIG_SPARSEMEM */ #ifdef CONFIG_MEMORY_HOTPLUG -extern int create_section_mapping(unsigned long start, unsigned long end, - int nid, pgprot_t prot); extern int remove_section_mapping(unsigned long start, unsigned long end); +extern int memory_add_physaddr_to_nid(u64 start); +#define memory_add_physaddr_to_nid memory_add_physaddr_to_nid #ifdef CONFIG_NUMA extern int hot_add_scn_to_nid(unsigned long scn_addr); @@ -26,6 +26,5 @@ static inline int hot_add_scn_to_nid(unsigned long scn_addr) } #endif /* CONFIG_NUMA */ #endif /* CONFIG_MEMORY_HOTPLUG */ - #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_SPARSEMEM_H */ diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 01ec2a252f09..3fc325bebe4d 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -50,6 +50,7 @@ #include #include #include +#include #include diff --git a/arch/x86/include/asm/sparsemem.h b/arch/x86/include/asm/sparsemem.h index 6bfc878f6771..6a9ccc1b2be5 100644 --- a/arch/x86/include/asm/sparsemem.h +++ b/arch/x86/include/asm/sparsemem.h @@ -28,4 +28,14 @@ #endif #endif /* CONFIG_SPARSEMEM */ + +#ifndef __ASSEMBLY__ +#ifdef CONFIG_NUMA_KEEP_MEMINFO +extern int phys_to_target_node(phys_addr_t start); +#define phys_to_target_node phys_to_target_node +extern int memory_add_physaddr_to_nid(u64 start); +#define memory_add_physaddr_to_nid memory_add_physaddr_to_nid +#endif +#endif /* __ASSEMBLY__ */ + #endif /* _ASM_X86_SPARSEMEM_H */ diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 44148691d78b..5eb4dc2b97da 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -938,6 +938,7 @@ int phys_to_target_node(phys_addr_t start) return meminfo_to_nid(&numa_reserved_meminfo, start); } +EXPORT_SYMBOL_GPL(phys_to_target_node); int memory_add_physaddr_to_nid(u64 start) { @@ -947,4 +948,5 @@ int memory_add_physaddr_to_nid(u64 start) nid = numa_meminfo.blk[0].nid; return nid; } +EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); #endif diff --git a/drivers/dax/Kconfig b/drivers/dax/Kconfig index 567428e10b7b..d2834c2cfa10 100644 --- a/drivers/dax/Kconfig +++ b/drivers/dax/Kconfig @@ -50,7 +50,6 @@ config DEV_DAX_HMEM Say M if unsure. config DEV_DAX_HMEM_DEVICES - depends on NUMA_KEEP_MEMINFO # for phys_to_target_node() depends on DEV_DAX_HMEM && DAX=y def_bool y diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index d65c6fdc5cfc..551093b74596 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -281,20 +281,6 @@ static inline bool movable_node_is_enabled(void) } #endif /* ! CONFIG_MEMORY_HOTPLUG */ -#ifdef CONFIG_NUMA -extern int memory_add_physaddr_to_nid(u64 start); -extern int phys_to_target_node(u64 start); -#else -static inline int memory_add_physaddr_to_nid(u64 start) -{ - return 0; -} -static inline int phys_to_target_node(u64 start) -{ - return 0; -} -#endif - #if defined(CONFIG_MEMORY_HOTPLUG) || defined(CONFIG_DEFERRED_STRUCT_PAGE_INIT) /* * pgdat resizing functions diff --git a/include/linux/numa.h b/include/linux/numa.h index 8cb33ccfb671..cb44cfe2b725 100644 --- a/include/linux/numa.h +++ b/include/linux/numa.h @@ -21,13 +21,41 @@ #endif #ifdef CONFIG_NUMA +#include +#include + /* Generic implementation available */ int numa_map_to_online_node(int node); -#else + +#ifndef memory_add_physaddr_to_nid +static inline int memory_add_physaddr_to_nid(u64 start) +{ + pr_info_once("Unknown online node for memory at 0x%llx, assuming node 0\n", + start); + return 0; +} +#endif +#ifndef phys_to_target_node +static inline int phys_to_target_node(u64 start) +{ + pr_info_once("Unknown target node for memory at 0x%llx, assuming node 0\n", + start); + return 0; +} +#endif +#else /* !CONFIG_NUMA */ static inline int numa_map_to_online_node(int node) { return NUMA_NO_NODE; } +static inline int memory_add_physaddr_to_nid(u64 start) +{ + return 0; +} +static inline int phys_to_target_node(u64 start) +{ + return 0; +} #endif #endif /* _LINUX_NUMA_H */ diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index b44d4c7ba73b..63b2e46b6555 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -350,24 +350,6 @@ int __ref __add_pages(int nid, unsigned long pfn, unsigned long nr_pages, return err; } -#ifdef CONFIG_NUMA -int __weak memory_add_physaddr_to_nid(u64 start) -{ - pr_info_once("Unknown online node for memory at 0x%llx, assuming node 0\n", - start); - return 0; -} -EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); - -int __weak phys_to_target_node(u64 start) -{ - pr_info_once("Unknown target node for memory at 0x%llx, assuming node 0\n", - start); - return 0; -} -EXPORT_SYMBOL_GPL(phys_to_target_node); -#endif - /* find the smallest valid pfn in the range [start_pfn, end_pfn) */ static unsigned long find_smallest_section_pfn(int nid, struct zone *zone, unsigned long start_pfn, From 4349a83a3190c1d4414371161b0f4a4c3ccd3f9d Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 21 Nov 2020 22:17:08 -0800 Subject: [PATCH 704/710] mm: fix readahead_page_batch for retry entries Both btrfs and fuse have reported faults caused by seeing a retry entry instead of the page they were looking for. This was caused by a missing check in the iterator. As can be seen in the below panic log, the accessing 0x402 causes a panic. In the xarray.h, 0x402 means RETRY_ENTRY. BUG: kernel NULL pointer dereference, address: 0000000000000402 CPU: 14 PID: 306003 Comm: as Not tainted 5.9.0-1-amd64 #1 Debian 5.9.1-1 Hardware name: Lenovo ThinkSystem SR665/7D2VCTO1WW, BIOS D8E106Q-1.01 05/30/2020 RIP: 0010:fuse_readahead+0x152/0x470 [fuse] Code: 41 8b 57 18 4c 8d 54 10 ff 4c 89 d6 48 8d 7c 24 10 e8 d2 e3 28 f9 48 85 c0 0f 84 fe 00 00 00 44 89 f2 49 89 04 d4 44 8d 72 01 <48> 8b 10 41 8b 4f 1c 48 c1 ea 10 83 e2 01 80 fa 01 19 d2 81 e2 01 RSP: 0018:ffffad99ceaebc50 EFLAGS: 00010246 RAX: 0000000000000402 RBX: 0000000000000001 RCX: 0000000000000002 RDX: 0000000000000000 RSI: ffff94c5af90bd98 RDI: ffffad99ceaebc60 RBP: ffff94ddc1749a00 R08: 0000000000000402 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000100 R12: ffff94de6c429ce0 R13: ffff94de6c4d3700 R14: 0000000000000001 R15: ffffad99ceaebd68 FS: 00007f228c5c7040(0000) GS:ffff94de8ed80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000402 CR3: 0000001dbd9b4000 CR4: 0000000000350ee0 Call Trace: read_pages+0x83/0x270 page_cache_readahead_unbounded+0x197/0x230 generic_file_buffered_read+0x57a/0xa20 new_sync_read+0x112/0x1a0 vfs_read+0xf8/0x180 ksys_read+0x5f/0xe0 do_syscall_64+0x33/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: 042124cc64c3 ("mm: add new readahead_control API") Reported-by: David Sterba Reported-by: Wonhyuk Yang Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Cc: Link: https://lkml.kernel.org/r/20201103142852.8543-1-willy@infradead.org Link: https://lkml.kernel.org/r/20201103124349.16722-1-vvghjk1234@gmail.com Signed-off-by: Linus Torvalds --- include/linux/pagemap.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index e1e19c1f9ec9..d5570deff400 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -906,6 +906,8 @@ static inline unsigned int __readahead_batch(struct readahead_control *rac, xas_set(&xas, rac->_index); rcu_read_lock(); xas_for_each(&xas, page, rac->_index + rac->_nr_pages - 1) { + if (xas_retry(&xas, page)) + continue; VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON_PAGE(PageTail(page), page); array[i++] = page; From 8faeb1ffd79593c9cd8a2a80ecdda371e3b826cb Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Sat, 21 Nov 2020 22:17:12 -0800 Subject: [PATCH 705/710] mm: memcg/slab: fix root memcg vmstats If we reparent the slab objects to the root memcg, when we free the slab object, we need to update the per-memcg vmstats to keep it correct for the root memcg. Now this at least affects the vmstat of NR_KERNEL_STACK_KB for !CONFIG_VMAP_STACK when the thread stack size is smaller than the PAGE_SIZE. David said: "I assume that without this fix that the root memcg's vmstat would always be inflated if we reparented" Fixes: ec9f02384f60 ("mm: workingset: fix vmstat counters for shadow nodes") Signed-off-by: Muchun Song Signed-off-by: Andrew Morton Reviewed-by: Shakeel Butt Acked-by: Roman Gushchin Acked-by: Johannes Weiner Acked-by: David Rientjes Cc: Michal Hocko Cc: Vladimir Davydov Cc: Christopher Lameter Cc: Pekka Enberg Cc: Joonsoo Kim Cc: Roman Gushchin Cc: Vlastimil Babka Cc: Yafang Shao Cc: Chris Down Cc: [5.3+] Link: https://lkml.kernel.org/r/20201110031015.15715-1-songmuchun@bytedance.com Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 3dcbf24d2227..29459a6ce1c7 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -867,8 +867,13 @@ void __mod_lruvec_slab_state(void *p, enum node_stat_item idx, int val) rcu_read_lock(); memcg = mem_cgroup_from_obj(p); - /* Untracked pages have no memcg, no lruvec. Update only the node */ - if (!memcg || memcg == root_mem_cgroup) { + /* + * Untracked pages have no memcg, no lruvec. Update only the + * node. If we reparent the slab objects to the root memcg, + * when we free the slab object, we need to update the per-memcg + * vmstats to keep it correct for the root memcg. + */ + if (!memcg) { __mod_node_page_state(pgdat, idx, val); } else { lruvec = mem_cgroup_lruvec(memcg, pgdat); From bfe8cc1db02ab243c62780f17fc57f65bde0afe1 Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Sat, 21 Nov 2020 22:17:15 -0800 Subject: [PATCH 706/710] mm/userfaultfd: do not access vma->vm_mm after calling handle_userfault() Alexander reported a syzkaller / KASAN finding on s390, see below for complete output. In do_huge_pmd_anonymous_page(), the pre-allocated pagetable will be freed in some cases. In the case of userfaultfd_missing(), this will happen after calling handle_userfault(), which might have released the mmap_lock. Therefore, the following pte_free(vma->vm_mm, pgtable) will access an unstable vma->vm_mm, which could have been freed or re-used already. For all architectures other than s390 this will go w/o any negative impact, because pte_free() simply frees the page and ignores the passed-in mm. The implementation for SPARC32 would also access mm->page_table_lock for pte_free(), but there is no THP support in SPARC32, so the buggy code path will not be used there. For s390, the mm->context.pgtable_list is being used to maintain the 2K pagetable fragments, and operating on an already freed or even re-used mm could result in various more or less subtle bugs due to list / pagetable corruption. Fix this by calling pte_free() before handle_userfault(), similar to how it is already done in __do_huge_pmd_anonymous_page() for the WRITE / non-huge_zero_page case. Commit 6b251fc96cf2c ("userfaultfd: call handle_userfault() for userfaultfd_missing() faults") actually introduced both, the do_huge_pmd_anonymous_page() and also __do_huge_pmd_anonymous_page() changes wrt to calling handle_userfault(), but only in the latter case it put the pte_free() before calling handle_userfault(). BUG: KASAN: use-after-free in do_huge_pmd_anonymous_page+0xcda/0xd90 mm/huge_memory.c:744 Read of size 8 at addr 00000000962d6988 by task syz-executor.0/9334 CPU: 1 PID: 9334 Comm: syz-executor.0 Not tainted 5.10.0-rc1-syzkaller-07083-g4c9720875573 #0 Hardware name: IBM 3906 M04 701 (KVM/Linux) Call Trace: do_huge_pmd_anonymous_page+0xcda/0xd90 mm/huge_memory.c:744 create_huge_pmd mm/memory.c:4256 [inline] __handle_mm_fault+0xe6e/0x1068 mm/memory.c:4480 handle_mm_fault+0x288/0x748 mm/memory.c:4607 do_exception+0x394/0xae0 arch/s390/mm/fault.c:479 do_dat_exception+0x34/0x80 arch/s390/mm/fault.c:567 pgm_check_handler+0x1da/0x22c arch/s390/kernel/entry.S:706 copy_from_user_mvcos arch/s390/lib/uaccess.c:111 [inline] raw_copy_from_user+0x3a/0x88 arch/s390/lib/uaccess.c:174 _copy_from_user+0x48/0xa8 lib/usercopy.c:16 copy_from_user include/linux/uaccess.h:192 [inline] __do_sys_sigaltstack kernel/signal.c:4064 [inline] __s390x_sys_sigaltstack+0xc8/0x240 kernel/signal.c:4060 system_call+0xe0/0x28c arch/s390/kernel/entry.S:415 Allocated by task 9334: slab_alloc_node mm/slub.c:2891 [inline] slab_alloc mm/slub.c:2899 [inline] kmem_cache_alloc+0x118/0x348 mm/slub.c:2904 vm_area_dup+0x9c/0x2b8 kernel/fork.c:356 __split_vma+0xba/0x560 mm/mmap.c:2742 split_vma+0xca/0x108 mm/mmap.c:2800 mlock_fixup+0x4ae/0x600 mm/mlock.c:550 apply_vma_lock_flags+0x2c6/0x398 mm/mlock.c:619 do_mlock+0x1aa/0x718 mm/mlock.c:711 __do_sys_mlock2 mm/mlock.c:738 [inline] __s390x_sys_mlock2+0x86/0xa8 mm/mlock.c:728 system_call+0xe0/0x28c arch/s390/kernel/entry.S:415 Freed by task 9333: slab_free mm/slub.c:3142 [inline] kmem_cache_free+0x7c/0x4b8 mm/slub.c:3158 __vma_adjust+0x7b2/0x2508 mm/mmap.c:960 vma_merge+0x87e/0xce0 mm/mmap.c:1209 userfaultfd_release+0x412/0x6b8 fs/userfaultfd.c:868 __fput+0x22c/0x7a8 fs/file_table.c:281 task_work_run+0x200/0x320 kernel/task_work.c:151 tracehook_notify_resume include/linux/tracehook.h:188 [inline] do_notify_resume+0x100/0x148 arch/s390/kernel/signal.c:538 system_call+0xe6/0x28c arch/s390/kernel/entry.S:416 The buggy address belongs to the object at 00000000962d6948 which belongs to the cache vm_area_struct of size 200 The buggy address is located 64 bytes inside of 200-byte region [00000000962d6948, 00000000962d6a10) The buggy address belongs to the page: page:00000000313a09fe refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x962d6 flags: 0x3ffff00000000200(slab) raw: 3ffff00000000200 000040000257e080 0000000c0000000c 000000008020ba00 raw: 0000000000000000 000f001e00000000 ffffffff00000001 0000000096959501 page dumped because: kasan: bad access detected page->mem_cgroup:0000000096959501 Memory state around the buggy address: 00000000962d6880: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00000000962d6900: 00 fc fc fc fc fc fc fc fc fa fb fb fb fb fb fb >00000000962d6980: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ 00000000962d6a00: fb fb fc fc fc fc fc fc fc fc 00 00 00 00 00 00 00000000962d6a80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ================================================================== Fixes: 6b251fc96cf2c ("userfaultfd: call handle_userfault() for userfaultfd_missing() faults") Reported-by: Alexander Egorenkov Signed-off-by: Gerald Schaefer Signed-off-by: Andrew Morton Cc: Andrea Arcangeli Cc: Heiko Carstens Cc: [4.3+] Link: https://lkml.kernel.org/r/20201110190329.11920-1-gerald.schaefer@linux.ibm.com Signed-off-by: Linus Torvalds --- mm/huge_memory.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 9474dbc150ed..ec2bb93f7431 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -710,7 +710,6 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf) transparent_hugepage_use_zero_page()) { pgtable_t pgtable; struct page *zero_page; - bool set; vm_fault_t ret; pgtable = pte_alloc_one(vma->vm_mm); if (unlikely(!pgtable)) @@ -723,25 +722,25 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf) } vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); ret = 0; - set = false; if (pmd_none(*vmf->pmd)) { ret = check_stable_address_space(vma->vm_mm); if (ret) { spin_unlock(vmf->ptl); + pte_free(vma->vm_mm, pgtable); } else if (userfaultfd_missing(vma)) { spin_unlock(vmf->ptl); + pte_free(vma->vm_mm, pgtable); ret = handle_userfault(vmf, VM_UFFD_MISSING); VM_BUG_ON(ret & VM_FAULT_FALLBACK); } else { set_huge_zero_page(pgtable, vma->vm_mm, vma, haddr, vmf->pmd, zero_page); spin_unlock(vmf->ptl); - set = true; } - } else + } else { spin_unlock(vmf->ptl); - if (!set) pte_free(vma->vm_mm, pgtable); + } return ret; } gfp = alloc_hugepage_direct_gfpmask(vma); From 488dac0c9237647e9b8f788b6a342595bfa40bda Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Sat, 21 Nov 2020 22:17:19 -0800 Subject: [PATCH 707/710] libfs: fix error cast of negative value in simple_attr_write() The attr->set() receive a value of u64, but simple_strtoll() is used for doing the conversion. It will lead to the error cast if user inputs a negative value. Use kstrtoull() instead of simple_strtoll() to convert a string got from the user to an unsigned value. The former will return '-EINVAL' if it gets a negetive value, but the latter can't handle the situation correctly. Make 'val' unsigned long long as what kstrtoull() takes, this will eliminate the compile warning on no 64-bit architectures. Fixes: f7b88631a897 ("fs/libfs.c: fix simple_attr_write() on 32bit machines") Signed-off-by: Yicong Yang Signed-off-by: Andrew Morton Cc: Al Viro Link: https://lkml.kernel.org/r/1605341356-11872-1-git-send-email-yangyicong@hisilicon.com Signed-off-by: Linus Torvalds --- fs/libfs.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/libfs.c b/fs/libfs.c index fc34361c1489..7124c2e8df2f 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -959,7 +959,7 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf, size_t len, loff_t *ppos) { struct simple_attr *attr; - u64 val; + unsigned long long val; size_t size; ssize_t ret; @@ -977,7 +977,9 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf, goto out; attr->set_buf[size] = '\0'; - val = simple_strtoll(attr->set_buf, NULL, 0); + ret = kstrtoull(attr->set_buf, 0, &val); + if (ret) + goto out; ret = attr->set(attr->data, val); if (ret == 0) ret = len; /* on success, claim we got the whole input */ From 66383800df9cbdbf3b0c34d5a51bf35bcdb72fd2 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 21 Nov 2020 22:17:22 -0800 Subject: [PATCH 708/710] mm: fix madvise WILLNEED performance problem The calculation of the end page index was incorrect, leading to a regression of 70% when running stress-ng. With this fix, we instead see a performance improvement of 3%. Fixes: e6e88712e43b ("mm: optimise madvise WILLNEED") Reported-by: kernel test robot Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Tested-by: Xing Zhengjun Acked-by: Johannes Weiner Cc: William Kucharski Cc: Feng Tang Cc: "Chen, Rong A" Link: https://lkml.kernel.org/r/20201109134851.29692-1-willy@infradead.org Signed-off-by: Linus Torvalds --- mm/madvise.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/madvise.c b/mm/madvise.c index 7e773f949ef9..a8d8d48a57fe 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -226,7 +226,7 @@ static void force_shm_swapin_readahead(struct vm_area_struct *vma, struct address_space *mapping) { XA_STATE(xas, &mapping->i_pages, linear_page_index(vma, start)); - pgoff_t end_index = end / PAGE_SIZE; + pgoff_t end_index = linear_page_index(vma, end + PAGE_SIZE - 1); struct page *page; rcu_read_lock(); From a9e5c87ca7443d09fb530fffa4d96ce1c76dbe4d Mon Sep 17 00:00:00 2001 From: David Howells Date: Sun, 22 Nov 2020 13:13:45 +0000 Subject: [PATCH 709/710] afs: Fix speculative status fetch going out of order wrt to modifications When doing a lookup in a directory, the afs filesystem uses a bulk status fetch to speculatively retrieve the statuses of up to 48 other vnodes found in the same directory and it will then either update extant inodes or create new ones - effectively doing 'lookup ahead'. To avoid the possibility of deadlocking itself, however, the filesystem doesn't lock all of those inodes; rather just the directory inode is locked (by the VFS). When the operation completes, afs_inode_init_from_status() or afs_apply_status() is called, depending on whether the inode already exists, to commit the new status. A case exists, however, where the speculative status fetch operation may straddle a modification operation on one of those vnodes. What can then happen is that the speculative bulk status RPC retrieves the old status, and whilst that is happening, the modification happens - which returns an updated status, then the modification status is committed, then we attempt to commit the speculative status. This results in something like the following being seen in dmesg: kAFS: vnode modified {100058:861} 8->9 YFS.InlineBulkStatus showing that for vnode 861 on volume 100058, we saw YFS.InlineBulkStatus say that the vnode had data version 8 when we'd already recorded version 9 due to a local modification. This was causing the cache to be invalidated for that vnode when it shouldn't have been. If it happens on a data file, this might lead to local changes being lost. Fix this by ignoring speculative status updates if the data version doesn't match the expected value. Note that it is possible to get a DV regression if a volume gets restored from a backup - but we should get a callback break in such a case that should trigger a recheck anyway. It might be worth checking the volume creation time in the volsync info and, if a change is observed in that (as would happen on a restore), invalidate all caches associated with the volume. Fixes: 5cf9dd55a0ec ("afs: Prospectively look up extra files when doing a single lookup") Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- fs/afs/dir.c | 1 + fs/afs/inode.c | 8 ++++++++ fs/afs/internal.h | 1 + 3 files changed, 10 insertions(+) diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 1bb5b9d7f0a2..9068d5578a26 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -823,6 +823,7 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry, vp->cb_break_before = afs_calc_vnode_cb_break(vnode); vp->vnode = vnode; vp->put_vnode = true; + vp->speculative = true; /* vnode not locked */ } } } diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 0fe8844b4bee..b0d7b892090d 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -294,6 +294,13 @@ void afs_vnode_commit_status(struct afs_operation *op, struct afs_vnode_param *v op->flags &= ~AFS_OPERATION_DIR_CONFLICT; } } else if (vp->scb.have_status) { + if (vp->dv_before + vp->dv_delta != vp->scb.status.data_version && + vp->speculative) + /* Ignore the result of a speculative bulk status fetch + * if it splits around a modification op, thereby + * appearing to regress the data version. + */ + goto out; afs_apply_status(op, vp); if (vp->scb.have_cb) afs_apply_callback(op, vp); @@ -305,6 +312,7 @@ void afs_vnode_commit_status(struct afs_operation *op, struct afs_vnode_param *v } } +out: write_sequnlock(&vnode->cb_lock); if (vp->scb.have_status) diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 14d5d75f4b6e..0d150a29e39e 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -755,6 +755,7 @@ struct afs_vnode_param { bool update_ctime:1; /* Need to update the ctime */ bool set_size:1; /* Must update i_size */ bool op_unlinked:1; /* True if file was unlinked by op */ + bool speculative:1; /* T if speculative status fetch (no vnode lock) */ }; /* From 418baf2c28f3473039f2f7377760bd8f6897ae18 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 22 Nov 2020 15:36:08 -0800 Subject: [PATCH 710/710] Linux 5.10-rc5 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index e2c3f65c4721..ed081e3eb800 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 10 SUBLEVEL = 0 -EXTRAVERSION = -rc4 +EXTRAVERSION = -rc5 NAME = Kleptomaniac Octopus # *DOCUMENTATION*