From eca7de7cdc382eb6e0d344c07b1449ed75f5b435 Mon Sep 17 00:00:00 2001 From: Wang Yong Date: Mon, 13 Feb 2023 14:08:08 +0800 Subject: [PATCH 01/50] delayacct: improve the average delay precision of getdelay tool to microsecond Improve the average delay precision of getdelay tool to microsecond. When using the getdelay tool, it is sometimes found that the average delay except CPU is not 0, but display is 0, because the precison is too low. For example, see delay average of SWAP below when using ZRAM. print delayacct stats ON PID 32915 CPU count real total virtual total delay total delay average 339202 2793871936 9233585504 7951112 0.000ms IO count delay total delay average 41 419296904 10ms SWAP count delay total delay average 242589 1045792384 0ms This wrong display is misleading, so improve the millisecond precision of the average delay to microsecond just like CPU. Then user would get more accurate information of delay time. Link: https://lkml.kernel.org/r/202302131408087983857@zte.com.cn Signed-off-by: Wang Yong Reviewed-by: Yang Yang Cc: David Hildenbrand Signed-off-by: Andrew Morton --- Documentation/accounting/delay-accounting.rst | 14 +++++------ .../zh_CN/accounting/delay-accounting.rst | 10 ++++---- tools/accounting/getdelays.c | 24 +++++++++---------- 3 files changed, 24 insertions(+), 24 deletions(-) diff --git a/Documentation/accounting/delay-accounting.rst b/Documentation/accounting/delay-accounting.rst index 7103b62ba6d7..79f537c9f160 100644 --- a/Documentation/accounting/delay-accounting.rst +++ b/Documentation/accounting/delay-accounting.rst @@ -109,17 +109,17 @@ Get sum of delays, since system boot, for all pids with tgid 5:: CPU count real total virtual total delay total delay average 8 7000000 6872122 3382277 0.423ms IO count delay total delay average - 0 0 0ms + 0 0 0.000ms SWAP count delay total delay average - 0 0 0ms + 0 0 0.000ms RECLAIM count delay total delay average - 0 0 0ms + 0 0 0.000ms THRASHING count delay total delay average - 0 0 0ms + 0 0 0.000ms COMPACT count delay total delay average - 0 0 0ms - WPCOPY count delay total delay average - 0 0 0ms + 0 0 0.000ms + WPCOPY count delay total delay average + 0 0 0.000ms Get IO accounting for pid 1, it works only with -p:: diff --git a/Documentation/translations/zh_CN/accounting/delay-accounting.rst b/Documentation/translations/zh_CN/accounting/delay-accounting.rst index a01dc3d5b0db..7b8693ccf80a 100644 --- a/Documentation/translations/zh_CN/accounting/delay-accounting.rst +++ b/Documentation/translations/zh_CN/accounting/delay-accounting.rst @@ -92,15 +92,15 @@ getdelays命令的一般格式:: CPU count real total virtual total delay total delay average 8 7000000 6872122 3382277 0.423ms IO count delay total delay average - 0 0 0ms + 0 0 0.000ms SWAP count delay total delay average - 0 0 0ms + 0 0 0.000ms RECLAIM count delay total delay average - 0 0 0ms + 0 0 0.000ms THRASHING count delay total delay average - 0 0 0ms + 0 0 0.000ms COMPACT count delay total delay average - 0 0 0ms + 0 0 0.000ms WPCOPY count delay total delay average 0 0 0ms diff --git a/tools/accounting/getdelays.c b/tools/accounting/getdelays.c index 938dec0dfaad..23a15d8f2bf4 100644 --- a/tools/accounting/getdelays.c +++ b/tools/accounting/getdelays.c @@ -198,17 +198,17 @@ static void print_delayacct(struct taskstats *t) printf("\n\nCPU %15s%15s%15s%15s%15s\n" " %15llu%15llu%15llu%15llu%15.3fms\n" "IO %15s%15s%15s\n" - " %15llu%15llu%15llums\n" + " %15llu%15llu%15.3fms\n" "SWAP %15s%15s%15s\n" - " %15llu%15llu%15llums\n" + " %15llu%15llu%15.3fms\n" "RECLAIM %12s%15s%15s\n" - " %15llu%15llu%15llums\n" + " %15llu%15llu%15.3fms\n" "THRASHING%12s%15s%15s\n" - " %15llu%15llu%15llums\n" + " %15llu%15llu%15.3fms\n" "COMPACT %12s%15s%15s\n" - " %15llu%15llu%15llums\n" + " %15llu%15llu%15.3fms\n" "WPCOPY %12s%15s%15s\n" - " %15llu%15llu%15llums\n", + " %15llu%15llu%15.3fms\n", "count", "real total", "virtual total", "delay total", "delay average", (unsigned long long)t->cpu_count, @@ -219,27 +219,27 @@ static void print_delayacct(struct taskstats *t) "count", "delay total", "delay average", (unsigned long long)t->blkio_count, (unsigned long long)t->blkio_delay_total, - average_ms(t->blkio_delay_total, t->blkio_count), + average_ms((double)t->blkio_delay_total, t->blkio_count), "count", "delay total", "delay average", (unsigned long long)t->swapin_count, (unsigned long long)t->swapin_delay_total, - average_ms(t->swapin_delay_total, t->swapin_count), + average_ms((double)t->swapin_delay_total, t->swapin_count), "count", "delay total", "delay average", (unsigned long long)t->freepages_count, (unsigned long long)t->freepages_delay_total, - average_ms(t->freepages_delay_total, t->freepages_count), + average_ms((double)t->freepages_delay_total, t->freepages_count), "count", "delay total", "delay average", (unsigned long long)t->thrashing_count, (unsigned long long)t->thrashing_delay_total, - average_ms(t->thrashing_delay_total, t->thrashing_count), + average_ms((double)t->thrashing_delay_total, t->thrashing_count), "count", "delay total", "delay average", (unsigned long long)t->compact_count, (unsigned long long)t->compact_delay_total, - average_ms(t->compact_delay_total, t->compact_count), + average_ms((double)t->compact_delay_total, t->compact_count), "count", "delay total", "delay average", (unsigned long long)t->wpcopy_count, (unsigned long long)t->wpcopy_delay_total, - average_ms(t->wpcopy_delay_total, t->wpcopy_count)); + average_ms((double)t->wpcopy_delay_total, t->wpcopy_count)); } static void task_context_switch_counts(struct taskstats *t) From 58deeb4ef3b054498747d0929d94ac53ab90981f Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 22 Feb 2023 19:42:58 -0800 Subject: [PATCH 02/50] ia64: mm/contig: fix section mismatch warning/error alloc_per_cpu_data() is called by find_memory(), which is marked as __init. Therefore alloc_per_cpu_data() can also be marked as __init to remedy this modpost problem. WARNING: modpost: vmlinux.o: section mismatch in reference: alloc_per_cpu_data (section: .text) -> memblock_alloc_try_nid (section: .init.text) Link: https://lkml.kernel.org/r/20230223034258.12917-1-rdunlap@infradead.org Fixes: 4b9ddc7cf272 ("[IA64] Fix section mismatch in contig.c version of per_cpu_init()") Signed-off-by: Randy Dunlap Cc: Christoph Hellwig Signed-off-by: Andrew Morton --- arch/ia64/mm/contig.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c index 24901d809301..1e9eaa107eb7 100644 --- a/arch/ia64/mm/contig.c +++ b/arch/ia64/mm/contig.c @@ -77,7 +77,7 @@ skip: return __per_cpu_start + __per_cpu_offset[smp_processor_id()]; } -static inline void +static inline __init void alloc_per_cpu_data(void) { size_t size = PERCPU_PAGE_SIZE * num_possible_cpus(); From 0de155752b152d6bcd96b5b5bf20af336abd183a Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 22 Feb 2023 19:43:09 -0800 Subject: [PATCH 03/50] ia64: salinfo: placate defined-but-not-used warning When CONFIG_PROC_FS is not set, proc_salinfo_show() is not used. Mark the function as __maybe_unused to quieten the warning message. ../arch/ia64/kernel/salinfo.c:584:12: warning: 'proc_salinfo_show' defined but not used [-Wunused-function] 584 | static int proc_salinfo_show(struct seq_file *m, void *v) | ^~~~~~~~~~~~~~~~~ Link: https://lkml.kernel.org/r/20230223034309.13375-1-rdunlap@infradead.org Fixes: 3f3942aca6da ("proc: introduce proc_create_single{,_data}") Signed-off-by: Randy Dunlap Cc: Christoph Hellwig Signed-off-by: Andrew Morton --- arch/ia64/kernel/salinfo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c index bd3ba276e69c..03b632c56899 100644 --- a/arch/ia64/kernel/salinfo.c +++ b/arch/ia64/kernel/salinfo.c @@ -581,7 +581,7 @@ static int salinfo_cpu_pre_down(unsigned int cpu) * 'data' contains an integer that corresponds to the feature we're * testing */ -static int proc_salinfo_show(struct seq_file *m, void *v) +static int __maybe_unused proc_salinfo_show(struct seq_file *m, void *v) { unsigned long data = (unsigned long)v; seq_puts(m, (sal_platform_features & data) ? "1\n" : "0\n"); From b99b258899d7a59e60169f402e5bce6b8272ce97 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 31 Jan 2023 23:08:40 +0800 Subject: [PATCH 04/50] proc: remove mark_inode_dirty() in .setattr() procfs' .setattr() has updated i_uid, i_gid and i_mode into proc dirent, we don't need to call mark_inode_dirty() for delayed update, remove it. Link: https://lkml.kernel.org/r/20230131150840.34726-1-chao@kernel.org Signed-off-by: Chao Yu Reviewed-by: Alexey Dobriyan Signed-off-by: Andrew Morton --- fs/proc/base.c | 1 - fs/proc/generic.c | 1 - fs/proc/proc_sysctl.c | 1 - 3 files changed, 3 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index 5e0e0ccd47aa..e34e06091775 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -699,7 +699,6 @@ int proc_setattr(struct mnt_idmap *idmap, struct dentry *dentry, return error; setattr_copy(&nop_mnt_idmap, inode, attr); - mark_inode_dirty(inode); return 0; } diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 8379593fa4bb..42ae38ff6e7e 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -127,7 +127,6 @@ static int proc_notify_change(struct mnt_idmap *idmap, return error; setattr_copy(&nop_mnt_idmap, inode, iattr); - mark_inode_dirty(inode); proc_set_user(de, inode->i_uid, inode->i_gid); de->mode = inode->i_mode; diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 5851eb5bc726..875771bf1f93 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -841,7 +841,6 @@ static int proc_sys_setattr(struct mnt_idmap *idmap, return error; setattr_copy(&nop_mnt_idmap, inode, attr); - mark_inode_dirty(inode); return 0; } From f9641a36d38daee3ccf2f3d832cbc9c3442b6078 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Wed, 30 Nov 2022 16:12:31 +0100 Subject: [PATCH 05/50] nfs: remove empty if statement from nfs3_prepare_get_acl Remove empty if statement from nfs3_prepare_get_acl and update comment to follow the one from the referred fs/posix_acl.c:get_acl(). No functional change intended. Link: https://lkml.kernel.org/r/20221130151231.3654-1-ubizjak@gmail.com Signed-off-by: Uros Bizjak Cc: Trond Myklebust Cc: Anna Schumaker Signed-off-by: Andrew Morton --- fs/nfs/nfs3acl.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index 1247f544a440..ccc4da54cc93 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c @@ -21,9 +21,8 @@ static void nfs3_prepare_get_acl(struct posix_acl **p) { struct posix_acl *sentinel = uncached_acl_sentinel(current); - if (cmpxchg(p, ACL_NOT_CACHED, sentinel) != ACL_NOT_CACHED) { - /* Not the first reader or sentinel already in place. */ - } + /* If the ACL isn't being read yet, set our sentinel. */ + cmpxchg(p, ACL_NOT_CACHED, sentinel); } static void nfs3_complete_get_acl(struct posix_acl **p, struct posix_acl *acl) From 7b32137bc027701cc9c57967af64fcdea0ef113b Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Mon, 27 Feb 2023 18:17:03 +0100 Subject: [PATCH 06/50] kcov: improve documentation Improve KCOV documentation: - Use KCOV instead of kcov, as the former is more widely-used. - Mention Clang in compiler requirements. - Use ``annotations`` for inline code. - Rework remote coverage collection documentation for better clarity. - Various smaller changes. [andreyknvl@google.com: v2] Link: https://lkml.kernel.org/r/583f41c49eef15210fa813e8229730d11427efa7.1677614637.git.andreyknvl@google.com [andreyknvl@google.com: fix ``annotation`` for KCOV_REMOTE_ENABLE] Link: https://lkml.kernel.org/r/72be5c215c275f35891229b90622ed859f196a46.1677684837.git.andreyknvl@google.com Link: https://lkml.kernel.org/r/0b5efd70e31bba7912cf9a6c951f0e76a8df27df.1677517724.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Dmitry Vyukov Reviewed-by: Bagas Sanjaya Cc: Alexander Potapenko Cc: Marco Elver Signed-off-by: Andrew Morton --- Documentation/dev-tools/kcov.rst | 159 +++++++++++++++++++------------ 1 file changed, 97 insertions(+), 62 deletions(-) diff --git a/Documentation/dev-tools/kcov.rst b/Documentation/dev-tools/kcov.rst index d83c9ab49427..6611434e2dd2 100644 --- a/Documentation/dev-tools/kcov.rst +++ b/Documentation/dev-tools/kcov.rst @@ -1,42 +1,50 @@ -kcov: code coverage for fuzzing +KCOV: code coverage for fuzzing =============================== -kcov exposes kernel code coverage information in a form suitable for coverage- -guided fuzzing (randomized testing). Coverage data of a running kernel is -exported via the "kcov" debugfs file. Coverage collection is enabled on a task -basis, and thus it can capture precise coverage of a single system call. +KCOV collects and exposes kernel code coverage information in a form suitable +for coverage-guided fuzzing. Coverage data of a running kernel is exported via +the ``kcov`` debugfs file. Coverage collection is enabled on a task basis, and +thus KCOV can capture precise coverage of a single system call. -Note that kcov does not aim to collect as much coverage as possible. It aims -to collect more or less stable coverage that is function of syscall inputs. -To achieve this goal it does not collect coverage in soft/hard interrupts -and instrumentation of some inherently non-deterministic parts of kernel is -disabled (e.g. scheduler, locking). +Note that KCOV does not aim to collect as much coverage as possible. It aims +to collect more or less stable coverage that is a function of syscall inputs. +To achieve this goal, it does not collect coverage in soft/hard interrupts +(unless remove coverage collection is enabled, see below) and from some +inherently non-deterministic parts of the kernel (e.g. scheduler, locking). -kcov is also able to collect comparison operands from the instrumented code -(this feature currently requires that the kernel is compiled with clang). +Besides collecting code coverage, KCOV can also collect comparison operands. +See the "Comparison operands collection" section for details. + +Besides collecting coverage data from syscall handlers, KCOV can also collect +coverage for annotated parts of the kernel executing in background kernel +tasks or soft interrupts. See the "Remote coverage collection" section for +details. Prerequisites ------------- -Configure the kernel with:: +KCOV relies on compiler instrumentation and requires GCC 6.1.0 or later +or any Clang version supported by the kernel. + +Collecting comparison operands is supported with GCC 8+ or with Clang. + +To enable KCOV, configure the kernel with:: CONFIG_KCOV=y -CONFIG_KCOV requires gcc 6.1.0 or later. - -If the comparison operands need to be collected, set:: +To enable comparison operands collection, set:: CONFIG_KCOV_ENABLE_COMPARISONS=y -Profiling data will only become accessible once debugfs has been mounted:: +Coverage data only becomes accessible once debugfs has been mounted:: mount -t debugfs none /sys/kernel/debug Coverage collection ------------------- -The following program demonstrates coverage collection from within a test -program using kcov: +The following program demonstrates how to use KCOV to collect coverage for a +single syscall from within a test program: .. code-block:: c @@ -84,7 +92,7 @@ program using kcov: perror("ioctl"), exit(1); /* Reset coverage from the tail of the ioctl() call. */ __atomic_store_n(&cover[0], 0, __ATOMIC_RELAXED); - /* That's the target syscal call. */ + /* Call the target syscall call. */ read(-1, NULL, 0); /* Read number of PCs collected. */ n = __atomic_load_n(&cover[0], __ATOMIC_RELAXED); @@ -103,7 +111,7 @@ program using kcov: return 0; } -After piping through addr2line output of the program looks as follows:: +After piping through ``addr2line`` the output of the program looks as follows:: SyS_read fs/read_write.c:562 @@ -121,12 +129,13 @@ After piping through addr2line output of the program looks as follows:: fs/read_write.c:562 If a program needs to collect coverage from several threads (independently), -it needs to open /sys/kernel/debug/kcov in each thread separately. +it needs to open ``/sys/kernel/debug/kcov`` in each thread separately. The interface is fine-grained to allow efficient forking of test processes. -That is, a parent process opens /sys/kernel/debug/kcov, enables trace mode, -mmaps coverage buffer and then forks child processes in a loop. Child processes -only need to enable coverage (disable happens automatically on thread end). +That is, a parent process opens ``/sys/kernel/debug/kcov``, enables trace mode, +mmaps coverage buffer, and then forks child processes in a loop. The child +processes only need to enable coverage (it gets disabled automatically when +a thread exits). Comparison operands collection ------------------------------ @@ -205,52 +214,78 @@ Comparison operands collection is similar to coverage collection: return 0; } -Note that the kcov modes (coverage collection or comparison operands) are -mutually exclusive. +Note that the KCOV modes (collection of code coverage or comparison operands) +are mutually exclusive. Remote coverage collection -------------------------- -With KCOV_ENABLE coverage is collected only for syscalls that are issued -from the current process. With KCOV_REMOTE_ENABLE it's possible to collect -coverage for arbitrary parts of the kernel code, provided that those parts -are annotated with kcov_remote_start()/kcov_remote_stop(). +Besides collecting coverage data from handlers of syscalls issued from a +userspace process, KCOV can also collect coverage for parts of the kernel +executing in other contexts - so-called "remote" coverage. -This allows to collect coverage from two types of kernel background -threads: the global ones, that are spawned during kernel boot in a limited -number of instances (e.g. one USB hub_event() worker thread is spawned per -USB HCD); and the local ones, that are spawned when a user interacts with -some kernel interface (e.g. vhost workers); as well as from soft -interrupts. +Using KCOV to collect remote coverage requires: -To enable collecting coverage from a global background thread or from a -softirq, a unique global handle must be assigned and passed to the -corresponding kcov_remote_start() call. Then a userspace process can pass -a list of such handles to the KCOV_REMOTE_ENABLE ioctl in the handles -array field of the kcov_remote_arg struct. This will attach the used kcov -device to the code sections, that are referenced by those handles. +1. Modifying kernel code to annotate the code section from where coverage + should be collected with ``kcov_remote_start`` and ``kcov_remote_stop``. -Since there might be many local background threads spawned from different -userspace processes, we can't use a single global handle per annotation. -Instead, the userspace process passes a non-zero handle through the -common_handle field of the kcov_remote_arg struct. This common handle gets -saved to the kcov_handle field in the current task_struct and needs to be -passed to the newly spawned threads via custom annotations. Those threads -should in turn be annotated with kcov_remote_start()/kcov_remote_stop(). +2. Using ``KCOV_REMOTE_ENABLE`` instead of ``KCOV_ENABLE`` in the userspace + process that collects coverage. -Internally kcov stores handles as u64 integers. The top byte of a handle -is used to denote the id of a subsystem that this handle belongs to, and -the lower 4 bytes are used to denote the id of a thread instance within -that subsystem. A reserved value 0 is used as a subsystem id for common -handles as they don't belong to a particular subsystem. The bytes 4-7 are -currently reserved and must be zero. In the future the number of bytes -used for the subsystem or handle ids might be increased. +Both ``kcov_remote_start`` and ``kcov_remote_stop`` annotations and the +``KCOV_REMOTE_ENABLE`` ioctl accept handles that identify particular coverage +collection sections. The way a handle is used depends on the context where the +matching code section executes. -When a particular userspace process collects coverage via a common -handle, kcov will collect coverage for each code section that is annotated -to use the common handle obtained as kcov_handle from the current -task_struct. However non common handles allow to collect coverage -selectively from different subsystems. +KCOV supports collecting remote coverage from the following contexts: + +1. Global kernel background tasks. These are the tasks that are spawned during + kernel boot in a limited number of instances (e.g. one USB ``hub_event`` + worker is spawned per one USB HCD). + +2. Local kernel background tasks. These are spawned when a userspace process + interacts with some kernel interface and are usually killed when the process + exits (e.g. vhost workers). + +3. Soft interrupts. + +For #1 and #3, a unique global handle must be chosen and passed to the +corresponding ``kcov_remote_start`` call. Then a userspace process must pass +this handle to ``KCOV_REMOTE_ENABLE`` in the ``handles`` array field of the +``kcov_remote_arg`` struct. This will attach the used KCOV device to the code +section referenced by this handle. Multiple global handles identifying +different code sections can be passed at once. + +For #2, the userspace process instead must pass a non-zero handle through the +``common_handle`` field of the ``kcov_remote_arg`` struct. This common handle +gets saved to the ``kcov_handle`` field in the current ``task_struct`` and +needs to be passed to the newly spawned local tasks via custom kernel code +modifications. Those tasks should in turn use the passed handle in their +``kcov_remote_start`` and ``kcov_remote_stop`` annotations. + +KCOV follows a predefined format for both global and common handles. Each +handle is a ``u64`` integer. Currently, only the one top and the lower 4 bytes +are used. Bytes 4-7 are reserved and must be zero. + +For global handles, the top byte of the handle denotes the id of a subsystem +this handle belongs to. For example, KCOV uses ``1`` as the USB subsystem id. +The lower 4 bytes of a global handle denote the id of a task instance within +that subsystem. For example, each ``hub_event`` worker uses the USB bus number +as the task instance id. + +For common handles, a reserved value ``0`` is used as a subsystem id, as such +handles don't belong to a particular subsystem. The lower 4 bytes of a common +handle identify a collective instance of all local tasks spawned by the +userspace process that passed a common handle to ``KCOV_REMOTE_ENABLE``. + +In practice, any value can be used for common handle instance id if coverage +is only collected from a single userspace process on the system. However, if +common handles are used by multiple processes, unique instance ids must be +used for each process. One option is to use the process id as the common +handle instance id. + +The following program demonstrates using KCOV to collect coverage from both +local tasks spawned by the process and the global task that handles USB bus #1: .. code-block:: c From 3ac39d208d485ef5f5518b96f23ec9ade077683f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 27 Feb 2023 13:06:12 +0300 Subject: [PATCH 07/50] dca: delete unnecessary variable It's more readable to just pass NULL directly instead of using a variable for that. Link: https://lkml.kernel.org/r/Y/yAlDytLH0ZNLNz@kili Signed-off-by: Dan Carpenter Cc: Greg Kroah-Hartman Signed-off-by: Andrew Morton --- drivers/dca/dca-core.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/dca/dca-core.c b/drivers/dca/dca-core.c index c40c2ebfdae9..ed3dac546dd6 100644 --- a/drivers/dca/dca-core.c +++ b/drivers/dca/dca-core.c @@ -294,9 +294,7 @@ EXPORT_SYMBOL_GPL(dca3_get_tag); */ u8 dca_get_tag(int cpu) { - struct device *dev = NULL; - - return dca_common_get_tag(dev, cpu); + return dca_common_get_tag(NULL, cpu); } EXPORT_SYMBOL_GPL(dca_get_tag); From 56fe487062b5561ceabb9f866327cd0b041f3a09 Mon Sep 17 00:00:00 2001 From: Glenn Washburn Date: Sun, 26 Feb 2023 22:05:59 -0600 Subject: [PATCH 08/50] scripts/gdb: correct indentation in get_current_task Patch series "scripts/gdb: Support getting current task struct in UML", v3. A running x86 UML kernel reports with architecture "i386:x86-64" as it is a sub-architecture. However, a difference with bare-metal x86 kernels is in how it manages tasks and the current task struct. To identify that the inferior is a UML kernel and not bare-metal, check for the existence of the UML specific symbol "cpu_tasks" which contains the current task struct. This patch (of 3): There is an extra space in a couple blocks in get_current_task. Though python does not care, let's make the spacing consistent. Also, format better an if expression, removing unneeded parenthesis. Link: https://lkml.kernel.org/r/cover.1677469905.git.development@efficientek.com Link: https://lkml.kernel.org/r/2e117b82240de6893f27cb6507242ce455ed7b5b.1677469905.git.development@efficientek.com Signed-off-by: Glenn Washburn Reviewed-by: Jan Kiszka Cc: Anton Ivanov Cc: Johannes Berg Cc: Kieran Bingham Cc: Richard Weinberger Signed-off-by: Andrew Morton --- scripts/gdb/linux/cpus.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/scripts/gdb/linux/cpus.py b/scripts/gdb/linux/cpus.py index 9ee99f9fae8d..e8d2a62ff119 100644 --- a/scripts/gdb/linux/cpus.py +++ b/scripts/gdb/linux/cpus.py @@ -163,16 +163,16 @@ def get_current_task(cpu): task_ptr_type = task_type.get_type().pointer() if utils.is_target_arch("x86"): - var_ptr = gdb.parse_and_eval("&pcpu_hot.current_task") - return per_cpu(var_ptr, cpu).dereference() + var_ptr = gdb.parse_and_eval("&pcpu_hot.current_task") + return per_cpu(var_ptr, cpu).dereference() elif utils.is_target_arch("aarch64"): - current_task_addr = gdb.parse_and_eval("$SP_EL0") - if((current_task_addr >> 63) != 0): - current_task = current_task_addr.cast(task_ptr_type) - return current_task.dereference() - else: - raise gdb.GdbError("Sorry, obtaining the current task is not allowed " - "while running in userspace(EL0)") + current_task_addr = gdb.parse_and_eval("$SP_EL0") + if (current_task_addr >> 63) != 0: + current_task = current_task_addr.cast(task_ptr_type) + return current_task.dereference() + else: + raise gdb.GdbError("Sorry, obtaining the current task is not allowed " + "while running in userspace(EL0)") else: raise gdb.GdbError("Sorry, obtaining the current task is not yet " "supported with this arch") From 6d51363d53db4f5f11a13509ef28e917b97eb2b3 Mon Sep 17 00:00:00 2001 From: Glenn Washburn Date: Sun, 26 Feb 2023 22:06:00 -0600 Subject: [PATCH 09/50] scripts/gdb: support getting current task struct in UML A running x86 UML kernel reports with architecture "i386:x86-64" as it is a sub-architecture. However, a difference with bare-metal x86 kernels is in how it manages tasks and the current task struct. To identify that the inferior is a UML kernel and not bare-metal, check for the existence of the UML specific symbol "cpu_tasks" which contains the current task struct. Link: https://lkml.kernel.org/r/b839d611e2906ccef2725c34d8e353fab35fe75e.1677469905.git.development@efficientek.com Signed-off-by: Glenn Washburn Reviewed-by: Jan Kiszka Cc: Anton Ivanov Cc: Johannes Berg Cc: Kieran Bingham Cc: Richard Weinberger Signed-off-by: Andrew Morton --- scripts/gdb/linux/cpus.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/scripts/gdb/linux/cpus.py b/scripts/gdb/linux/cpus.py index e8d2a62ff119..255dc18cb9da 100644 --- a/scripts/gdb/linux/cpus.py +++ b/scripts/gdb/linux/cpus.py @@ -163,8 +163,14 @@ def get_current_task(cpu): task_ptr_type = task_type.get_type().pointer() if utils.is_target_arch("x86"): - var_ptr = gdb.parse_and_eval("&pcpu_hot.current_task") - return per_cpu(var_ptr, cpu).dereference() + if gdb.lookup_global_symbol("cpu_tasks"): + # This is a UML kernel, which stores the current task + # differently than other x86 sub architectures + var_ptr = gdb.parse_and_eval("(struct task_struct *)cpu_tasks[0].task") + return var_ptr.dereference() + else: + var_ptr = gdb.parse_and_eval("&pcpu_hot.current_task") + return per_cpu(var_ptr, cpu).dereference() elif utils.is_target_arch("aarch64"): current_task_addr = gdb.parse_and_eval("$SP_EL0") if (current_task_addr >> 63) != 0: From 2a6772ebf05a9b6bc450e42bbb459f6327f68aca Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 28 Feb 2023 18:34:38 +0300 Subject: [PATCH 10/50] mm: uninline kstrdup() gcc inlines kstrdup into kstrdup_const() but it can very efficiently tail call into it instead: $ ./scripts/bloat-o-meter ../vmlinux-000 ../obj/vmlinux add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-84 (-84) Function old new delta kstrdup_const 119 35 -84 Link: https://lkml.kernel.org/r/Y/4fDlbIhTLNLFHz@p183 Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton --- mm/util.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/util.c b/mm/util.c index b8ed9dbc7fd5..dd12b9531ac4 100644 --- a/mm/util.c +++ b/mm/util.c @@ -49,6 +49,7 @@ EXPORT_SYMBOL(kfree_const); * * Return: newly allocated copy of @s or %NULL in case of error */ +noinline char *kstrdup(const char *s, gfp_t gfp) { size_t len; From 70e79866ab36feaaed8ef26dacfbcbac6a0631c9 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 28 Feb 2023 15:14:17 +0300 Subject: [PATCH 11/50] ELF: fix all "Elf" typos ELF is acronym and therefore should be spelled in all caps. I left one exception at Documentation/arm/nwfpe/nwfpe.rst which looks like being written in the first person. Link: https://lkml.kernel.org/r/Y/3wGWQviIOkyLJW@p183 Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton --- arch/mips/boot/tools/relocs.c | 2 +- arch/um/os-Linux/elf_aux.c | 2 +- arch/x86/tools/relocs.c | 2 +- drivers/remoteproc/remoteproc_coredump.c | 4 ++-- drivers/remoteproc/remoteproc_elf_loader.c | 4 ++-- fs/binfmt_elf.c | 2 +- fs/binfmt_elf_fdpic.c | 2 +- fs/proc/vmcore.c | 22 +++++++++++----------- lib/buildid.c | 2 +- sound/soc/codecs/rt5677.c | 2 +- tools/bpf/resolve_btfids/main.c | 2 +- tools/lib/bpf/libbpf.c | 2 +- tools/lib/bpf/usdt.c | 2 +- tools/perf/util/symbol-elf.c | 2 +- 14 files changed, 26 insertions(+), 26 deletions(-) diff --git a/arch/mips/boot/tools/relocs.c b/arch/mips/boot/tools/relocs.c index 02fc85f3e8ff..a88d66c46d7f 100644 --- a/arch/mips/boot/tools/relocs.c +++ b/arch/mips/boot/tools/relocs.c @@ -245,7 +245,7 @@ static void read_ehdr(FILE *fp) die("Unknown ELF version\n"); if (ehdr.e_ehsize != sizeof(Elf_Ehdr)) - die("Bad Elf header size\n"); + die("Bad ELF header size\n"); if (ehdr.e_phentsize != sizeof(Elf_Phdr)) die("Bad program header entry\n"); diff --git a/arch/um/os-Linux/elf_aux.c b/arch/um/os-Linux/elf_aux.c index 77a9321379b7..344ac403fb5d 100644 --- a/arch/um/os-Linux/elf_aux.c +++ b/arch/um/os-Linux/elf_aux.c @@ -2,7 +2,7 @@ /* * arch/um/kernel/elf_aux.c * - * Scan the Elf auxiliary vector provided by the host to extract + * Scan the ELF auxiliary vector provided by the host to extract * information about vsyscall-page, etc. * * Copyright (C) 2004 Fujitsu Siemens Computers GmbH diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c index 2925074b9a58..d30949e25ebd 100644 --- a/arch/x86/tools/relocs.c +++ b/arch/x86/tools/relocs.c @@ -406,7 +406,7 @@ static void read_ehdr(FILE *fp) if (ehdr.e_version != EV_CURRENT) die("Unknown ELF version\n"); if (ehdr.e_ehsize != sizeof(Elf_Ehdr)) - die("Bad Elf header size\n"); + die("Bad ELF header size\n"); if (ehdr.e_phentsize != sizeof(Elf_Phdr)) die("Bad program header entry\n"); if (ehdr.e_shentsize != sizeof(Elf_Shdr)) diff --git a/drivers/remoteproc/remoteproc_coredump.c b/drivers/remoteproc/remoteproc_coredump.c index 4b093420d98a..bc0e1603a7a3 100644 --- a/drivers/remoteproc/remoteproc_coredump.c +++ b/drivers/remoteproc/remoteproc_coredump.c @@ -249,7 +249,7 @@ void rproc_coredump(struct rproc *rproc) return; if (class == ELFCLASSNONE) { - dev_err(&rproc->dev, "Elf class is not set\n"); + dev_err(&rproc->dev, "ELF class is not set\n"); return; } @@ -361,7 +361,7 @@ void rproc_coredump_using_sections(struct rproc *rproc) return; if (class == ELFCLASSNONE) { - dev_err(&rproc->dev, "Elf class is not set\n"); + dev_err(&rproc->dev, "ELF class is not set\n"); return; } diff --git a/drivers/remoteproc/remoteproc_elf_loader.c b/drivers/remoteproc/remoteproc_elf_loader.c index 5a412d7b6e0b..94177e416047 100644 --- a/drivers/remoteproc/remoteproc_elf_loader.c +++ b/drivers/remoteproc/remoteproc_elf_loader.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Remote Processor Framework Elf loader + * Remote Processor Framework ELF loader * * Copyright (C) 2011 Texas Instruments, Inc. * Copyright (C) 2011 Google, Inc. @@ -39,7 +39,7 @@ int rproc_elf_sanity_check(struct rproc *rproc, const struct firmware *fw) const char *name = rproc->firmware; struct device *dev = &rproc->dev; /* - * Elf files are beginning with the same structure. Thus, to simplify + * ELF files are beginning with the same structure. Thus, to simplify * header parsing, we can use the elf32_hdr one for both elf64 and * elf32. */ diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 8a884e795f6a..a5b054fdb331 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -2058,7 +2058,7 @@ static int elf_core_dump(struct coredump_params *cprm) has_dumped = 1; - offset += sizeof(elf); /* Elf header */ + offset += sizeof(elf); /* ELF header */ offset += segs * sizeof(struct elf_phdr); /* Program headers */ /* Write notes phdr entry */ diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index a05eafcacfb2..05a1471d5283 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1540,7 +1540,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) fill_note(&auxv_note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv); thread_status_size += notesize(&auxv_note); - offset = sizeof(*elf); /* Elf header */ + offset = sizeof(*elf); /* ELF header */ offset += segs * sizeof(struct elf_phdr); /* Program headers */ /* Write notes phdr entry */ diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 12af614f33ce..03f5963914a1 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -339,7 +339,7 @@ static ssize_t __read_vmcore(struct iov_iter *iter, loff_t *fpos) return acc; } - /* Read Elf note segment */ + /* Read ELF note segment */ if (*fpos < elfcorebuf_sz + elfnotes_sz) { void *kaddr; @@ -1109,7 +1109,7 @@ static int __init process_ptload_program_headers_elf64(char *elfptr, ehdr_ptr = (Elf64_Ehdr *)elfptr; phdr_ptr = (Elf64_Phdr*)(elfptr + sizeof(Elf64_Ehdr)); /* PT_NOTE hdr */ - /* Skip Elf header, program headers and Elf note segment. */ + /* Skip ELF header, program headers and ELF note segment. */ vmcore_off = elfsz + elfnotes_sz; for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { @@ -1152,7 +1152,7 @@ static int __init process_ptload_program_headers_elf32(char *elfptr, ehdr_ptr = (Elf32_Ehdr *)elfptr; phdr_ptr = (Elf32_Phdr*)(elfptr + sizeof(Elf32_Ehdr)); /* PT_NOTE hdr */ - /* Skip Elf header, program headers and Elf note segment. */ + /* Skip ELF header, program headers and ELF note segment. */ vmcore_off = elfsz + elfnotes_sz; for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { @@ -1188,7 +1188,7 @@ static void set_vmcore_list_offsets(size_t elfsz, size_t elfnotes_sz, loff_t vmcore_off; struct vmcore *m; - /* Skip Elf header, program headers and Elf note segment. */ + /* Skip ELF header, program headers and ELF note segment. */ vmcore_off = elfsz + elfnotes_sz; list_for_each_entry(m, vc_list, list) { @@ -1213,7 +1213,7 @@ static int __init parse_crash_elf64_headers(void) addr = elfcorehdr_addr; - /* Read Elf header */ + /* Read ELF header */ rc = elfcorehdr_read((char *)&ehdr, sizeof(Elf64_Ehdr), &addr); if (rc < 0) return rc; @@ -1269,7 +1269,7 @@ static int __init parse_crash_elf32_headers(void) addr = elfcorehdr_addr; - /* Read Elf header */ + /* Read ELF header */ rc = elfcorehdr_read((char *)&ehdr, sizeof(Elf32_Ehdr), &addr); if (rc < 0) return rc; @@ -1376,12 +1376,12 @@ static void vmcoredd_write_header(void *buf, struct vmcoredd_data *data, } /** - * vmcoredd_update_program_headers - Update all Elf program headers + * vmcoredd_update_program_headers - Update all ELF program headers * @elfptr: Pointer to elf header * @elfnotesz: Size of elf notes aligned to page size * @vmcoreddsz: Size of device dumps to be added to elf note header * - * Determine type of Elf header (Elf64 or Elf32) and update the elf note size. + * Determine type of ELF header (Elf64 or Elf32) and update the elf note size. * Also update the offsets of all the program headers after the elf note header. */ static void vmcoredd_update_program_headers(char *elfptr, size_t elfnotesz, @@ -1439,10 +1439,10 @@ static void vmcoredd_update_program_headers(char *elfptr, size_t elfnotesz, /** * vmcoredd_update_size - Update the total size of the device dumps and update - * Elf header + * ELF header * @dump_size: Size of the current device dump to be added to total size * - * Update the total size of all the device dumps and update the Elf program + * Update the total size of all the device dumps and update the ELF program * headers. Calculate the new offsets for the vmcore list and update the * total vmcore size. */ @@ -1466,7 +1466,7 @@ static void vmcoredd_update_size(size_t dump_size) * @data: dump info. * * Allocate a buffer and invoke the calling driver's dump collect routine. - * Write Elf note at the beginning of the buffer to indicate vmcore device + * Write ELF note at the beginning of the buffer to indicate vmcore device * dump and add the dump to global list. */ int vmcore_add_device_dump(struct vmcoredd_data *data) diff --git a/lib/buildid.c b/lib/buildid.c index dfc62625cae4..e3a7acdeef0e 100644 --- a/lib/buildid.c +++ b/lib/buildid.c @@ -163,7 +163,7 @@ out: /** * build_id_parse_buf - Get build ID from a buffer - * @buf: Elf note section(s) to parse + * @buf: ELF note section(s) to parse * @buf_size: Size of @buf in bytes * @build_id: Build ID parsed from @buf, at least BUILD_ID_SIZE_MAX long * diff --git a/sound/soc/codecs/rt5677.c b/sound/soc/codecs/rt5677.c index c26395f42d8e..3bf019b3f700 100644 --- a/sound/soc/codecs/rt5677.c +++ b/sound/soc/codecs/rt5677.c @@ -829,7 +829,7 @@ static int rt5677_parse_and_load_dsp(struct rt5677_priv *rt5677, const u8 *buf, if (strncmp(elf_hdr->e_ident, ELFMAG, sizeof(ELFMAG) - 1)) dev_err(component->dev, "Wrong ELF header prefix\n"); if (elf_hdr->e_ehsize != sizeof(Elf32_Ehdr)) - dev_err(component->dev, "Wrong Elf header size\n"); + dev_err(component->dev, "Wrong ELF header size\n"); if (elf_hdr->e_machine != EM_XTENSA) dev_err(component->dev, "Wrong DSP code file\n"); diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c index 77058174082d..27a23196d58e 100644 --- a/tools/bpf/resolve_btfids/main.c +++ b/tools/bpf/resolve_btfids/main.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) /* - * resolve_btfids scans Elf object for .BTF_ids section and resolves + * resolve_btfids scans ELF object for .BTF_ids section and resolves * its symbols with BTF ID values. * * Each symbol points to 4 bytes data and is expected to have diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 05c4db355f28..67b2f5ff185d 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1334,7 +1334,7 @@ static int bpf_object__elf_init(struct bpf_object *obj) goto errout; } - /* Elf is corrupted/truncated, avoid calling elf_strptr. */ + /* ELF is corrupted/truncated, avoid calling elf_strptr. */ if (!elf_rawdata(elf_getscn(elf, obj->efile.shstrndx), NULL)) { pr_warn("elf: failed to get section names strings from %s: %s\n", obj->path, elf_errmsg(-1)); diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c index 75b411fc2c77..3496df332e8e 100644 --- a/tools/lib/bpf/usdt.c +++ b/tools/lib/bpf/usdt.c @@ -771,7 +771,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char * target->rel_ip = usdt_rel_ip; target->sema_off = usdt_sema_off; - /* notes.args references strings from Elf itself, so they can + /* notes.args references strings from ELF itself, so they can * be referenced safely until elf_end() call */ target->spec_str = note.args; diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 41882ae8452e..554289fd6df9 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -213,7 +213,7 @@ Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep, Elf_Scn *sec = NULL; size_t cnt = 1; - /* Elf is corrupted/truncated, avoid calling elf_strptr. */ + /* ELF is corrupted/truncated, avoid calling elf_strptr. */ if (!elf_rawdata(elf_getscn(elf, ep->e_shstrndx), NULL)) return NULL; From 4b3d049f1c567560191884d4bd8f6e99ab885e20 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Mon, 6 Mar 2023 13:32:53 -0800 Subject: [PATCH 12/50] scripts/link-vmlinux.sh: fix error message presentation This comes out as Try make KALLSYMS_EXTRA_PASS=1 as a workaround but we want quotes: Try "make KALLSYMS_EXTRA_PASS=1" as a workaround Link: https://lkml.kernel.org/r/202303042034.Cjc7JTd0-lkp@intel.com Cc: kernel test robot Signed-off-by: Andrew Morton --- scripts/link-vmlinux.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index 32e573943cf0..0512c313a590 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -291,7 +291,7 @@ fi if is_enabled CONFIG_KALLSYMS; then if ! cmp -s System.map ${kallsyms_vmlinux}.syms; then echo >&2 Inconsistent kallsyms data - echo >&2 Try "make KALLSYMS_EXTRA_PASS=1" as a workaround + echo >&2 'Try "make KALLSYMS_EXTRA_PASS=1" as a workaround' exit 1 fi fi From d99a4158c4483c7f47274937deb142cd8c461b77 Mon Sep 17 00:00:00 2001 From: Gerhard Engleder Date: Wed, 4 Jan 2023 21:15:24 +0100 Subject: [PATCH 13/50] checkpatch: ignore ETHTOOL_LINK_MODE_ enum values Since commit 4104a20646 ("checkpatch: ignore generated CamelCase defines and enum values") enum values like ETHTOOL_LINK_MODE_Asym_Pause_BIT are ignored. But there are other enums like ETHTOOL_LINK_MODE_1000baseT_Full_BIT, which are not ignored because of the not matching '1000baseT' substring. Add regex to match all ETHTOOL_LINK_MODE enums. Link: https://lkml.kernel.org/r/20230104201524.28078-1-gerhard@engleder-embedded.com Signed-off-by: Gerhard Engleder Cc: Andy Whitcroft Cc: Dwaipayan Ray Cc: Gerhard Engleder Cc: Joe Perches Cc: Lukas Bulwahn Signed-off-by: Andrew Morton --- scripts/checkpatch.pl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index bd44d12965c9..c7cd0750b41e 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -5809,6 +5809,8 @@ sub process { $var !~ /^(?:[A-Z]+_){1,5}[A-Z]{1,3}[a-z]/ && #Ignore Page variants $var !~ /^(?:Clear|Set|TestClear|TestSet|)Page[A-Z]/ && +#Ignore ETHTOOL_LINK_MODE_ variants + $var !~ /^ETHTOOL_LINK_MODE_/ && #Ignore SI style variants like nS, mV and dB #(ie: max_uV, regulator_min_uA_show, RANGE_mA_VALUE) $var !~ /^(?:[a-z0-9_]*|[A-Z0-9_]*)?_?[a-z][A-Z](?:_[a-z0-9_]+|_[A-Z0-9_]+)?$/ && From 725e374050ab0b4aa3663807a597773db30f1ae8 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Wed, 8 Mar 2023 16:06:25 +0100 Subject: [PATCH 14/50] MAINTAINERS: remove the obsolete section EMBEDDED LINUX By now, many developers are working on Linux for embedded systems. There is no need to point out single developers. The linux-embedded mailing list has only little traffic, and most of it is just spam. Remove this obsolete section. Link: https://lkml.kernel.org/r/20230308150625.28732-1-lukas.bulwahn@gmail.com Signed-off-by: Lukas Bulwahn Acked-by: David Woodhouse Cc: Olivia Mackall Signed-off-by: Andrew Morton --- MAINTAINERS | 6 ------ 1 file changed, 6 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 90abe83c02f3..63cba614d233 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7525,12 +7525,6 @@ T: git git://linuxtv.org/media_tree.git F: Documentation/admin-guide/media/em28xx* F: drivers/media/usb/em28xx/ -EMBEDDED LINUX -M: Olivia Mackall -M: David Woodhouse -L: linux-embedded@vger.kernel.org -S: Maintained - EMMC CMDQ HOST CONTROLLER INTERFACE (CQHCI) DRIVER M: Adrian Hunter M: Ritesh Harjani From 882c5b261f404ebdfe69dcc055bfffc39f80c5a6 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Sun, 12 Mar 2023 12:46:45 -0400 Subject: [PATCH 15/50] kernel/hung_task.c: set some hung_task.c variables storage-class-specifier to static smatch reports several warnings kernel/hung_task.c:31:19: warning: symbol 'sysctl_hung_task_check_count' was not declared. Should it be static? kernel/hung_task.c:50:29: warning: symbol 'sysctl_hung_task_check_interval_secs' was not declared. Should it be static? kernel/hung_task.c:52:19: warning: symbol 'sysctl_hung_task_warnings' was not declared. Should it be static? kernel/hung_task.c:75:28: warning: symbol 'sysctl_hung_task_panic' was not declared. Should it be static? These variables are only used in hung_task.c, so they should be static Link: https://lkml.kernel.org/r/20230312164645.471259-1-trix@redhat.com Signed-off-by: Tom Rix Cc: Ben Dooks Cc: fuyuanli Cc: John Ogness Cc: Peter Zijlstra Cc: Petr Mladek Cc: Rafael J. Wysocki Cc: Rasmus Villemoes Signed-off-by: Andrew Morton --- kernel/hung_task.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 322813366c6c..9a24574988d2 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -28,7 +28,7 @@ /* * The number of tasks checked: */ -int __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT; +static int __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT; /* * Limit number of tasks checked in a batch. @@ -47,9 +47,9 @@ unsigned long __read_mostly sysctl_hung_task_timeout_secs = CONFIG_DEFAULT_HUNG_ /* * Zero (default value) means use sysctl_hung_task_timeout_secs: */ -unsigned long __read_mostly sysctl_hung_task_check_interval_secs; +static unsigned long __read_mostly sysctl_hung_task_check_interval_secs; -int __read_mostly sysctl_hung_task_warnings = 10; +static int __read_mostly sysctl_hung_task_warnings = 10; static int __read_mostly did_panic; static bool hung_task_show_lock; @@ -72,8 +72,8 @@ static unsigned int __read_mostly sysctl_hung_task_all_cpu_backtrace; * Should we panic (and reboot, if panic_timeout= is set) when a * hung task is detected: */ -unsigned int __read_mostly sysctl_hung_task_panic = - IS_ENABLED(CONFIG_BOOTPARAM_HUNG_TASK_PANIC); +static unsigned int __read_mostly sysctl_hung_task_panic = + IS_ENABLED(CONFIG_BOOTPARAM_HUNG_TASK_PANIC); static int hung_task_panic(struct notifier_block *this, unsigned long event, void *ptr) From f4708a82dc45fbf08bd3c68fc3eb32397e9121a1 Mon Sep 17 00:00:00 2001 From: "Guilherme G. Piccoli" Date: Tue, 14 Mar 2023 17:00:58 -0300 Subject: [PATCH 16/50] notifiers: add tracepoints to the notifiers infrastructure Currently there is no way to show the callback names for registered, unregistered or executed notifiers. This is very useful for debug purposes, hence add this functionality here in the form of notifiers' tracepoints, one per operation. [akpm@linux-foundation.org: coding-style cleanups] Link: https://lkml.kernel.org/r/20230314200058.1326909-1-gpiccoli@igalia.com Signed-off-by: Guilherme G. Piccoli Cc: Arjan van de Ven Cc: Michael Kelley Cc: Steven Rostedt Cc: Xiaoming Ni Cc: Baoquan He Cc: Cong Wang Cc: Dmitry Osipenko Cc: Guilherme G. Piccoli Cc: Guilherme G. Piccoli Cc: Petr Mladek Cc: Rafael J. Wysocki Cc: Sebastian Andrzej Siewior Cc: Valentin Schneider Signed-off-by: Andrew Morton --- include/trace/events/notifier.h | 69 +++++++++++++++++++++++++++++++++ kernel/notifier.c | 6 +++ 2 files changed, 75 insertions(+) create mode 100644 include/trace/events/notifier.h diff --git a/include/trace/events/notifier.h b/include/trace/events/notifier.h new file mode 100644 index 000000000000..26b298a31950 --- /dev/null +++ b/include/trace/events/notifier.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM notifier + +#if !defined(_TRACE_NOTIFIERS_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_NOTIFIERS_H + +#include + +DECLARE_EVENT_CLASS(notifier_info, + + TP_PROTO(void *cb), + + TP_ARGS(cb), + + TP_STRUCT__entry( + __field(void *, cb) + ), + + TP_fast_assign( + __entry->cb = cb; + ), + + TP_printk("%ps", __entry->cb) +); + +/* + * notifier_register - called upon notifier callback registration + * + * @cb: callback pointer + * + */ +DEFINE_EVENT(notifier_info, notifier_register, + + TP_PROTO(void *cb), + + TP_ARGS(cb) +); + +/* + * notifier_unregister - called upon notifier callback unregistration + * + * @cb: callback pointer + * + */ +DEFINE_EVENT(notifier_info, notifier_unregister, + + TP_PROTO(void *cb), + + TP_ARGS(cb) +); + +/* + * notifier_run - called upon notifier callback execution + * + * @cb: callback pointer + * + */ +DEFINE_EVENT(notifier_info, notifier_run, + + TP_PROTO(void *cb), + + TP_ARGS(cb) +); + +#endif /* _TRACE_NOTIFIERS_H */ + +/* This part must be outside protection */ +#include diff --git a/kernel/notifier.c b/kernel/notifier.c index d353e4b5402d..b3ce28f39eb6 100644 --- a/kernel/notifier.c +++ b/kernel/notifier.c @@ -7,6 +7,9 @@ #include #include +#define CREATE_TRACE_POINTS +#include + /* * Notifier list for kernel code which wants to be called * at shutdown. This is used to stop any idling DMA operations @@ -37,6 +40,7 @@ static int notifier_chain_register(struct notifier_block **nl, } n->next = *nl; rcu_assign_pointer(*nl, n); + trace_notifier_register((void *)n->notifier_call); return 0; } @@ -46,6 +50,7 @@ static int notifier_chain_unregister(struct notifier_block **nl, while ((*nl) != NULL) { if ((*nl) == n) { rcu_assign_pointer(*nl, n->next); + trace_notifier_unregister((void *)n->notifier_call); return 0; } nl = &((*nl)->next); @@ -84,6 +89,7 @@ static int notifier_call_chain(struct notifier_block **nl, continue; } #endif + trace_notifier_run((void *)nb->notifier_call); ret = nb->notifier_call(nb, val, v); if (nr_calls) From 58c9b016e12855286370dfb704c08498edbc857a Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 22 Mar 2023 17:57:02 +0100 Subject: [PATCH 17/50] epoll: use refcount to reduce ep_mutex contention We are observing huge contention on the epmutex during an http connection/rate test: 83.17% 0.25% nginx [kernel.kallsyms] [k] entry_SYSCALL_64_after_hwframe [...] |--66.96%--__fput |--60.04%--eventpoll_release_file |--58.41%--__mutex_lock.isra.6 |--56.56%--osq_lock The application is multi-threaded, creates a new epoll entry for each incoming connection, and does not delete it before the connection shutdown - that is, before the connection's fd close(). Many different threads compete frequently for the epmutex lock, affecting the overall performance. To reduce the contention this patch introduces explicit reference counting for the eventpoll struct. Each registered event acquires a reference, and references are released at ep_remove() time. The eventpoll struct is released by whoever - among EP file close() and and the monitored file close() drops its last reference. Additionally, this introduces a new 'dying' flag to prevent races between the EP file close() and the monitored file close(). ep_eventpoll_release() marks, under f_lock spinlock, each epitem as dying before removing it, while EP file close() does not touch dying epitems. The above is needed as both close operations could run concurrently and drop the EP reference acquired via the epitem entry. Without the above flag, the monitored file close() could reach the EP struct via the epitem list while the epitem is still listed and then try to put it after its disposal. An alternative could be avoiding touching the references acquired via the epitems at EP file close() time, but that could leave the EP struct alive for potentially unlimited time after EP file close(), with nasty side effects. With all the above in place, we can drop the epmutex usage at disposal time. Overall this produces a significant performance improvement in the mentioned connection/rate scenario: the mutex operations disappear from the topmost offenders in the perf report, and the measured connections/rate grows by ~60%. To make the change more readable this additionally renames ep_free() to ep_clear_and_put(), and moves the actual memory cleanup in a separate ep_free() helper. Link: https://lkml.kernel.org/r/4a57788dcaf28f5eb4f8dfddcc3a8b172a7357bb.1679504153.git.pabeni@redhat.com Signed-off-by: Paolo Abeni Co-developed-by: Eric Dumazet Signed-off-by: Eric Dumazet Tested-by: Xiumei Mu Acked-by: Soheil Hassas Yeganeh Reviewed-by: Davidlohr Bueso Cc: Alexander Viro Cc: Carlos Maiolino Cc: Christian Brauner Cc: Eric Biggers Cc: Jacob Keller Cc: Jens Axboe Signed-off-by: Andrew Morton --- fs/eventpoll.c | 197 +++++++++++++++++++++++++++++++------------------ 1 file changed, 124 insertions(+), 73 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 64659b110973..0ecdfd3043a3 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -57,13 +57,7 @@ * we need a lock that will allow us to sleep. This lock is a * mutex (ep->mtx). It is acquired during the event transfer loop, * during epoll_ctl(EPOLL_CTL_DEL) and during eventpoll_release_file(). - * Then we also need a global mutex to serialize eventpoll_release_file() - * and ep_free(). - * This mutex is acquired by ep_free() during the epoll file - * cleanup path and it is also acquired by eventpoll_release_file() - * if a file has been pushed inside an epoll set and it is then - * close()d without a previous call to epoll_ctl(EPOLL_CTL_DEL). - * It is also acquired when inserting an epoll fd onto another epoll + * The epmutex is acquired when inserting an epoll fd onto another epoll * fd. We do this so that we walk the epoll tree and ensure that this * insertion does not create a cycle of epoll file descriptors, which * could lead to deadlock. We need a global mutex to prevent two @@ -153,6 +147,13 @@ struct epitem { /* The file descriptor information this item refers to */ struct epoll_filefd ffd; + /* + * Protected by file->f_lock, true for to-be-released epitem already + * removed from the "struct file" items list; together with + * eventpoll->refcount orchestrates "struct eventpoll" disposal + */ + bool dying; + /* List containing poll wait queues */ struct eppoll_entry *pwqlist; @@ -217,6 +218,12 @@ struct eventpoll { u64 gen; struct hlist_head refs; + /* + * usage count, used together with epitem->dying to + * orchestrate the disposal of this struct + */ + refcount_t refcount; + #ifdef CONFIG_NET_RX_BUSY_POLL /* used to track busy poll napi_id */ unsigned int napi_id; @@ -240,9 +247,7 @@ struct ep_pqueue { /* Maximum number of epoll watched descriptors, per user */ static long max_user_watches __read_mostly; -/* - * This mutex is used to serialize ep_free() and eventpoll_release_file(). - */ +/* Used for cycles detection */ static DEFINE_MUTEX(epmutex); static u64 loop_check_gen = 0; @@ -557,8 +562,7 @@ static void ep_remove_wait_queue(struct eppoll_entry *pwq) /* * This function unregisters poll callbacks from the associated file - * descriptor. Must be called with "mtx" held (or "epmutex" if called from - * ep_free). + * descriptor. Must be called with "mtx" held. */ static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi) { @@ -681,11 +685,40 @@ static void epi_rcu_free(struct rcu_head *head) kmem_cache_free(epi_cache, epi); } +static void ep_get(struct eventpoll *ep) +{ + refcount_inc(&ep->refcount); +} + +/* + * Returns true if the event poll can be disposed + */ +static bool ep_refcount_dec_and_test(struct eventpoll *ep) +{ + if (!refcount_dec_and_test(&ep->refcount)) + return false; + + WARN_ON_ONCE(!RB_EMPTY_ROOT(&ep->rbr.rb_root)); + return true; +} + +static void ep_free(struct eventpoll *ep) +{ + mutex_destroy(&ep->mtx); + free_uid(ep->user); + wakeup_source_unregister(ep->ws); + kfree(ep); +} + /* * Removes a "struct epitem" from the eventpoll RB tree and deallocates * all the associated resources. Must be called with "mtx" held. + * If the dying flag is set, do the removal only if force is true. + * This prevents ep_clear_and_put() from dropping all the ep references + * while running concurrently with eventpoll_release_file(). + * Returns true if the eventpoll can be disposed. */ -static int ep_remove(struct eventpoll *ep, struct epitem *epi) +static bool __ep_remove(struct eventpoll *ep, struct epitem *epi, bool force) { struct file *file = epi->ffd.file; struct epitems_head *to_free; @@ -700,6 +733,11 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi) /* Remove the current item from the list of epoll hooks */ spin_lock(&file->f_lock); + if (epi->dying && !force) { + spin_unlock(&file->f_lock); + return false; + } + to_free = NULL; head = file->f_ep; if (head->first == &epi->fllink && !epi->fllink.next) { @@ -733,28 +771,28 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi) call_rcu(&epi->rcu, epi_rcu_free); percpu_counter_dec(&ep->user->epoll_watches); - - return 0; + return ep_refcount_dec_and_test(ep); } -static void ep_free(struct eventpoll *ep) +/* + * ep_remove variant for callers owing an additional reference to the ep + */ +static void ep_remove_safe(struct eventpoll *ep, struct epitem *epi) { - struct rb_node *rbp; + WARN_ON_ONCE(__ep_remove(ep, epi, false)); +} + +static void ep_clear_and_put(struct eventpoll *ep) +{ + struct rb_node *rbp, *next; struct epitem *epi; + bool dispose; /* We need to release all tasks waiting for these file */ if (waitqueue_active(&ep->poll_wait)) ep_poll_safewake(ep, NULL, 0); - /* - * We need to lock this because we could be hit by - * eventpoll_release_file() while we're freeing the "struct eventpoll". - * We do not need to hold "ep->mtx" here because the epoll file - * is on the way to be removed and no one has references to it - * anymore. The only hit might come from eventpoll_release_file() but - * holding "epmutex" is sufficient here. - */ - mutex_lock(&epmutex); + mutex_lock(&ep->mtx); /* * Walks through the whole tree by unregistering poll callbacks. @@ -767,26 +805,25 @@ static void ep_free(struct eventpoll *ep) } /* - * Walks through the whole tree by freeing each "struct epitem". At this - * point we are sure no poll callbacks will be lingering around, and also by - * holding "epmutex" we can be sure that no file cleanup code will hit - * us during this operation. So we can avoid the lock on "ep->lock". - * We do not need to lock ep->mtx, either, we only do it to prevent - * a lockdep warning. + * Walks through the whole tree and try to free each "struct epitem". + * Note that ep_remove_safe() will not remove the epitem in case of a + * racing eventpoll_release_file(); the latter will do the removal. + * At this point we are sure no poll callbacks will be lingering around. + * Since we still own a reference to the eventpoll struct, the loop can't + * dispose it. */ - mutex_lock(&ep->mtx); - while ((rbp = rb_first_cached(&ep->rbr)) != NULL) { + for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = next) { + next = rb_next(rbp); epi = rb_entry(rbp, struct epitem, rbn); - ep_remove(ep, epi); + ep_remove_safe(ep, epi); cond_resched(); } + + dispose = ep_refcount_dec_and_test(ep); mutex_unlock(&ep->mtx); - mutex_unlock(&epmutex); - mutex_destroy(&ep->mtx); - free_uid(ep->user); - wakeup_source_unregister(ep->ws); - kfree(ep); + if (dispose) + ep_free(ep); } static int ep_eventpoll_release(struct inode *inode, struct file *file) @@ -794,7 +831,7 @@ static int ep_eventpoll_release(struct inode *inode, struct file *file) struct eventpoll *ep = file->private_data; if (ep) - ep_free(ep); + ep_clear_and_put(ep); return 0; } @@ -906,33 +943,34 @@ void eventpoll_release_file(struct file *file) { struct eventpoll *ep; struct epitem *epi; - struct hlist_node *next; + bool dispose; /* - * We don't want to get "file->f_lock" because it is not - * necessary. It is not necessary because we're in the "struct file" - * cleanup path, and this means that no one is using this file anymore. - * So, for example, epoll_ctl() cannot hit here since if we reach this - * point, the file counter already went to zero and fget() would fail. - * The only hit might come from ep_free() but by holding the mutex - * will correctly serialize the operation. We do need to acquire - * "ep->mtx" after "epmutex" because ep_remove() requires it when called - * from anywhere but ep_free(). - * - * Besides, ep_remove() acquires the lock, so we can't hold it here. + * Use the 'dying' flag to prevent a concurrent ep_clear_and_put() from + * touching the epitems list before eventpoll_release_file() can access + * the ep->mtx. */ - mutex_lock(&epmutex); - if (unlikely(!file->f_ep)) { - mutex_unlock(&epmutex); - return; - } - hlist_for_each_entry_safe(epi, next, file->f_ep, fllink) { +again: + spin_lock(&file->f_lock); + if (file->f_ep && file->f_ep->first) { + epi = hlist_entry(file->f_ep->first, struct epitem, fllink); + epi->dying = true; + spin_unlock(&file->f_lock); + + /* + * ep access is safe as we still own a reference to the ep + * struct + */ ep = epi->ep; - mutex_lock_nested(&ep->mtx, 0); - ep_remove(ep, epi); + mutex_lock(&ep->mtx); + dispose = __ep_remove(ep, epi, true); mutex_unlock(&ep->mtx); + + if (dispose) + ep_free(ep); + goto again; } - mutex_unlock(&epmutex); + spin_unlock(&file->f_lock); } static int ep_alloc(struct eventpoll **pep) @@ -955,6 +993,7 @@ static int ep_alloc(struct eventpoll **pep) ep->rbr = RB_ROOT_CACHED; ep->ovflist = EP_UNACTIVE_PTR; ep->user = user; + refcount_set(&ep->refcount, 1); *pep = ep; @@ -1223,10 +1262,10 @@ out_unlock: */ list_del_init(&wait->entry); /* - * ->whead != NULL protects us from the race with ep_free() - * or ep_remove(), ep_remove_wait_queue() takes whead->lock - * held by the caller. Once we nullify it, nothing protects - * ep/epi or even wait. + * ->whead != NULL protects us from the race with + * ep_clear_and_put() or ep_remove(), ep_remove_wait_queue() + * takes whead->lock held by the caller. Once we nullify it, + * nothing protects ep/epi or even wait. */ smp_store_release(&ep_pwq_from_wait(wait)->whead, NULL); } @@ -1496,16 +1535,22 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event, if (tep) mutex_unlock(&tep->mtx); + /* + * ep_remove_safe() calls in the later error paths can't lead to + * ep_free() as the ep file itself still holds an ep reference. + */ + ep_get(ep); + /* now check if we've created too many backpaths */ if (unlikely(full_check && reverse_path_check())) { - ep_remove(ep, epi); + ep_remove_safe(ep, epi); return -EINVAL; } if (epi->event.events & EPOLLWAKEUP) { error = ep_create_wakeup_source(epi); if (error) { - ep_remove(ep, epi); + ep_remove_safe(ep, epi); return error; } } @@ -1529,7 +1574,7 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event, * high memory pressure. */ if (unlikely(!epq.epi)) { - ep_remove(ep, epi); + ep_remove_safe(ep, epi); return -ENOMEM; } @@ -2025,7 +2070,7 @@ static int do_epoll_create(int flags) out_free_fd: put_unused_fd(fd); out_free_ep: - ep_free(ep); + ep_clear_and_put(ep); return error; } @@ -2167,10 +2212,16 @@ int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds, error = -EEXIST; break; case EPOLL_CTL_DEL: - if (epi) - error = ep_remove(ep, epi); - else + if (epi) { + /* + * The eventpoll itself is still alive: the refcount + * can't go to zero here. + */ + ep_remove_safe(ep, epi); + error = 0; + } else { error = -ENOENT; + } break; case EPOLL_CTL_MOD: if (epi) { From 890a3ee3ce416e2f1aed41718a8c1d42c82cf1b2 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 23 Mar 2023 17:50:29 +0200 Subject: [PATCH 18/50] kernel.h: split the hexadecimal related helpers to hex.h For the sake of cleaning up the kernel.h split the hexadecimal related helpers to own header called 'hex.h'. Link: https://lkml.kernel.org/r/20230323155029.40000-1-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Cc: Rasmus Villemoes Signed-off-by: Andrew Morton --- include/linux/hex.h | 35 +++++++++++++++++++++++++++++++++++ include/linux/kernel.h | 29 +---------------------------- 2 files changed, 36 insertions(+), 28 deletions(-) create mode 100644 include/linux/hex.h diff --git a/include/linux/hex.h b/include/linux/hex.h new file mode 100644 index 000000000000..2618382e5b0c --- /dev/null +++ b/include/linux/hex.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_HEX_H +#define _LINUX_HEX_H + +#include + +extern const char hex_asc[]; +#define hex_asc_lo(x) hex_asc[((x) & 0x0f)] +#define hex_asc_hi(x) hex_asc[((x) & 0xf0) >> 4] + +static inline char *hex_byte_pack(char *buf, u8 byte) +{ + *buf++ = hex_asc_hi(byte); + *buf++ = hex_asc_lo(byte); + return buf; +} + +extern const char hex_asc_upper[]; +#define hex_asc_upper_lo(x) hex_asc_upper[((x) & 0x0f)] +#define hex_asc_upper_hi(x) hex_asc_upper[((x) & 0xf0) >> 4] + +static inline char *hex_byte_pack_upper(char *buf, u8 byte) +{ + *buf++ = hex_asc_upper_hi(byte); + *buf++ = hex_asc_upper_lo(byte); + return buf; +} + +extern int hex_to_bin(unsigned char ch); +extern int __must_check hex2bin(u8 *dst, const char *src, size_t count); +extern char *bin2hex(char *dst, const void *src, size_t count); + +bool mac_pton(const char *s, u8 *mac); + +#endif diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 40bce7495af8..0d91e0af0125 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -263,34 +264,6 @@ extern enum system_states { SYSTEM_SUSPEND, } system_state; -extern const char hex_asc[]; -#define hex_asc_lo(x) hex_asc[((x) & 0x0f)] -#define hex_asc_hi(x) hex_asc[((x) & 0xf0) >> 4] - -static inline char *hex_byte_pack(char *buf, u8 byte) -{ - *buf++ = hex_asc_hi(byte); - *buf++ = hex_asc_lo(byte); - return buf; -} - -extern const char hex_asc_upper[]; -#define hex_asc_upper_lo(x) hex_asc_upper[((x) & 0x0f)] -#define hex_asc_upper_hi(x) hex_asc_upper[((x) & 0xf0) >> 4] - -static inline char *hex_byte_pack_upper(char *buf, u8 byte) -{ - *buf++ = hex_asc_upper_hi(byte); - *buf++ = hex_asc_upper_lo(byte); - return buf; -} - -extern int hex_to_bin(unsigned char ch); -extern int __must_check hex2bin(u8 *dst, const char *src, size_t count); -extern char *bin2hex(char *dst, const void *src, size_t count); - -bool mac_pton(const char *s, u8 *mac); - /* * General tracing related utility functions - trace_printk(), * tracing_on/tracing_off and tracing_start()/tracing_stop From a74d9a3f4fc96520c8f85cd372a16a0b29430701 Mon Sep 17 00:00:00 2001 From: Cai Huoqing Date: Thu, 23 Mar 2023 19:37:09 +0800 Subject: [PATCH 19/50] rapidio/tsi721: remove redundant pci_clear_master Remove pci_clear_master to simplify the code, the bus-mastering is also cleared in do_pci_disable_device, like this: ./drivers/pci/pci.c:2197 static void do_pci_disable_device(struct pci_dev *dev) { u16 pci_command; pci_read_config_word(dev, PCI_COMMAND, &pci_command); if (pci_command & PCI_COMMAND_MASTER) { pci_command &= ~PCI_COMMAND_MASTER; pci_write_config_word(dev, PCI_COMMAND, pci_command); } pcibios_disable_device(dev); }. And dev->is_busmaster is set to 0 in pci_disable_device. Link: https://lkml.kernel.org/r/20230323113711.10523-1-cai.huoqing@linux.dev Signed-off-by: Cai Huoqing Cc: Alexandre Bounine Cc: Matt Porter Cc: Tetsuo Handa Signed-off-by: Andrew Morton --- drivers/rapidio/devices/tsi721.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/rapidio/devices/tsi721.c b/drivers/rapidio/devices/tsi721.c index 0a42d6a2af24..83323c3d10af 100644 --- a/drivers/rapidio/devices/tsi721.c +++ b/drivers/rapidio/devices/tsi721.c @@ -2924,7 +2924,6 @@ err_unmap_bars: iounmap(priv->odb_base); err_free_res: pci_release_regions(pdev); - pci_clear_master(pdev); err_disable_pdev: pci_disable_device(pdev); err_clean: @@ -2962,7 +2961,6 @@ static void tsi721_remove(struct pci_dev *pdev) pci_disable_msi(priv->pdev); #endif pci_release_regions(pdev); - pci_clear_master(pdev); pci_disable_device(pdev); pci_set_drvdata(pdev, NULL); kfree(priv); @@ -2977,7 +2975,6 @@ static void tsi721_shutdown(struct pci_dev *pdev) tsi721_disable_ints(priv); tsi721_dma_stop_all(priv); - pci_clear_master(pdev); pci_disable_device(pdev); } From 7982722ff7286596a1e2f343ec4219e309ddc82a Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 7 Mar 2023 16:44:15 -0600 Subject: [PATCH 20/50] x86/kexec: remove unnecessary arch_kexec_kernel_image_load() Patch series "kexec: Remove unnecessary arch hook", v2. There are no arch-specific things in arch_kexec_kernel_image_load(), so remove it and just use the generic version. This patch (of 2): The x86 implementation of arch_kexec_kernel_image_load() is functionally identical to the generic arch_kexec_kernel_image_load(): arch_kexec_kernel_image_load # x86 if (!image->fops || !image->fops->load) return ERR_PTR(-ENOEXEC); return image->fops->load(image, image->kernel_buf, ...) arch_kexec_kernel_image_load # generic kexec_image_load_default if (!image->fops || !image->fops->load) return ERR_PTR(-ENOEXEC); return image->fops->load(image, image->kernel_buf, ...) Remove the x86-specific version and use the generic arch_kexec_kernel_image_load(). No functional change intended. Link: https://lkml.kernel.org/r/20230307224416.907040-1-helgaas@kernel.org Link: https://lkml.kernel.org/r/20230307224416.907040-2-helgaas@kernel.org Signed-off-by: Bjorn Helgaas Reviewed-by: Simon Horman Acked-by: Baoquan He Cc: Borislav Petkov (AMD) Cc: Dave Hansen Cc: Eric Biederman Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Thomas Gleixner Signed-off-by: Andrew Morton --- arch/x86/include/asm/kexec.h | 3 --- arch/x86/kernel/machine_kexec_64.c | 11 ----------- include/linux/kexec.h | 2 -- 3 files changed, 16 deletions(-) diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h index a3760ca796aa..5b77bbc28f96 100644 --- a/arch/x86/include/asm/kexec.h +++ b/arch/x86/include/asm/kexec.h @@ -200,9 +200,6 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi, const Elf_Shdr *symtab); #define arch_kexec_apply_relocations_add arch_kexec_apply_relocations_add -void *arch_kexec_kernel_image_load(struct kimage *image); -#define arch_kexec_kernel_image_load arch_kexec_kernel_image_load - int arch_kimage_file_post_load_cleanup(struct kimage *image); #define arch_kimage_file_post_load_cleanup arch_kimage_file_post_load_cleanup #endif diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 0611fd83858e..1a3e2c05a8a5 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -374,17 +374,6 @@ void machine_kexec(struct kimage *image) /* arch-dependent functionality related to kexec file-based syscall */ #ifdef CONFIG_KEXEC_FILE -void *arch_kexec_kernel_image_load(struct kimage *image) -{ - if (!image->fops || !image->fops->load) - return ERR_PTR(-ENOEXEC); - - return image->fops->load(image, image->kernel_buf, - image->kernel_buf_len, image->initrd_buf, - image->initrd_buf_len, image->cmdline_buf, - image->cmdline_buf_len); -} - /* * Apply purgatory relocations. * diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 6883c5922701..4746bc9d39c9 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -207,12 +207,10 @@ static inline int arch_kimage_file_post_load_cleanup(struct kimage *image) } #endif -#ifndef arch_kexec_kernel_image_load static inline void *arch_kexec_kernel_image_load(struct kimage *image) { return kexec_image_load_default(image); } -#endif #ifdef CONFIG_KEXEC_SIG #ifdef CONFIG_SIGNED_PE_FILE_VERIFICATION From fb15abdca64503511bb32cb6ff70da306f24fa06 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 7 Mar 2023 16:44:16 -0600 Subject: [PATCH 21/50] kexec: remove unnecessary arch_kexec_kernel_image_load() arch_kexec_kernel_image_load() only calls kexec_image_load_default(), and there are no arch-specific implementations. Remove the unnecessary arch_kexec_kernel_image_load() and make kexec_image_load_default() static. No functional change intended. Link: https://lkml.kernel.org/r/20230307224416.907040-3-helgaas@kernel.org Signed-off-by: Bjorn Helgaas Reviewed-by: Simon Horman Acked-by: Baoquan He Cc: Borislav Petkov (AMD) Cc: Dave Hansen Cc: Eric Biederman Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Thomas Gleixner Signed-off-by: Andrew Morton --- include/linux/kexec.h | 6 ------ kernel/kexec_file.c | 6 +++--- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 4746bc9d39c9..22b5cd24f581 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -190,7 +190,6 @@ int kexec_purgatory_get_set_symbol(struct kimage *image, const char *name, void *buf, unsigned int size, bool get_value); void *kexec_purgatory_get_symbol_addr(struct kimage *image, const char *name); -void *kexec_image_load_default(struct kimage *image); #ifndef arch_kexec_kernel_image_probe static inline int @@ -207,11 +206,6 @@ static inline int arch_kimage_file_post_load_cleanup(struct kimage *image) } #endif -static inline void *arch_kexec_kernel_image_load(struct kimage *image) -{ - return kexec_image_load_default(image); -} - #ifdef CONFIG_KEXEC_SIG #ifdef CONFIG_SIGNED_PE_FILE_VERIFICATION int kexec_kernel_verify_pe_sig(const char *kernel, unsigned long kernel_len); diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index f1a0e4e3fb5c..f989f5f1933b 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -65,7 +65,7 @@ int kexec_image_probe_default(struct kimage *image, void *buf, return ret; } -void *kexec_image_load_default(struct kimage *image) +static void *kexec_image_load_default(struct kimage *image) { if (!image->fops || !image->fops->load) return ERR_PTR(-ENOEXEC); @@ -249,8 +249,8 @@ kimage_file_prepare_segments(struct kimage *image, int kernel_fd, int initrd_fd, /* IMA needs to pass the measurement list to the next kernel. */ ima_add_kexec_buffer(image); - /* Call arch image load handlers */ - ldata = arch_kexec_kernel_image_load(image); + /* Call image load handler */ + ldata = kexec_image_load_default(image); if (IS_ERR(ldata)) { ret = PTR_ERR(ldata); From 1d7adbc74c009057ed9dc3112f388e91a9c79acc Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 23 Mar 2023 15:52:45 -0700 Subject: [PATCH 22/50] scripts/gdb: bail early if there are no clocks Avoid generating an exception if there are no clocks registered: (gdb) lx-clk-summary enable prepare protect clock count count count rate ------------------------------------------------------------------------ Python Exception : No symbol "clk_root_list" in current context. Error occurred in Python: No symbol "clk_root_list" in current context. Link: https://lkml.kernel.org/r/20230323225246.3302977-1-f.fainelli@gmail.com Fixes: d1e9710b63d8 ("scripts/gdb: initial clk support: lx-clk-summary") Signed-off-by: Florian Fainelli Cc: Jan Kiszka Cc: Kieran Bingham Cc: Leonard Crestez Cc: Stephen Boyd Signed-off-by: Andrew Morton --- scripts/gdb/linux/clk.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/gdb/linux/clk.py b/scripts/gdb/linux/clk.py index 061aecfa294e..7a01fdc3e844 100644 --- a/scripts/gdb/linux/clk.py +++ b/scripts/gdb/linux/clk.py @@ -41,6 +41,8 @@ are cached and potentially out of date""" self.show_subtree(child, level + 1) def invoke(self, arg, from_tty): + if utils.gdb_eval_or_none("clk_root_list") is None: + raise gdb.GdbError("No clocks registered") gdb.write(" enable prepare protect \n") gdb.write(" clock count count count rate \n") gdb.write("------------------------------------------------------------------------\n") From f19c3c2959e465209ade1a7a699e6cbf4359ce78 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 23 Mar 2023 16:16:57 -0700 Subject: [PATCH 23/50] scripts/gdb: bail early if there are no generic PD Avoid generating an exception if there are no generic power domain(s) registered: (gdb) lx-genpd-summary domain status children /device runtime status ---------------------------------------------------------------------- Python Exception : No symbol "gpd_list" in current context. Error occurred in Python: No symbol "gpd_list" in current context. (gdb) quit [f.fainelli@gmail.com: correctly invoke gdb_eval_or_none] Link: https://lkml.kernel.org/r/20230327185746.3856407-1-f.fainelli@gmail.com Link: https://lkml.kernel.org/r/20230323231659.3319941-1-f.fainelli@gmail.com Fixes: 8207d4a88e1e ("scripts/gdb: add lx-genpd-summary command") Signed-off-by: Florian Fainelli Cc: Jan Kiszka Cc: Kieran Bingham Cc: Leonard Crestez Cc: Stephen Boyd Signed-off-by: Andrew Morton --- scripts/gdb/linux/genpd.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/gdb/linux/genpd.py b/scripts/gdb/linux/genpd.py index 39cd1abd8559..b53649c0a77a 100644 --- a/scripts/gdb/linux/genpd.py +++ b/scripts/gdb/linux/genpd.py @@ -5,7 +5,7 @@ import gdb import sys -from linux.utils import CachedType +from linux.utils import CachedType, gdb_eval_or_none from linux.lists import list_for_each_entry generic_pm_domain_type = CachedType('struct generic_pm_domain') @@ -70,6 +70,8 @@ Output is similar to /sys/kernel/debug/pm_genpd/pm_genpd_summary''' gdb.write(' %-50s %s\n' % (kobj_path, rtpm_status_str(dev))) def invoke(self, arg, from_tty): + if gdb_eval_or_none("&gpd_list") is None: + raise gdb.GdbError("No power domain(s) registered") gdb.write('domain status children\n'); gdb.write(' /device runtime status\n'); gdb.write('----------------------------------------------------------------------\n'); From ef55ef3e6400ede7d4020f5fd0bc7aeac4de1ceb Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 27 Mar 2023 17:26:04 +0300 Subject: [PATCH 24/50] lib/test-string_helpers: replace UNESCAPE_ANY by UNESCAPE_ALL_MASK When we get a random number to generate a flag in the valid range of UNESCAPE flags, use UNESCAPE_ALL_MASK, It's more correct and prevents from missed updates of the test coverage in the future if any. Link: https://lkml.kernel.org/r/20230327142604.48213-1-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Cc: Rasmus Villemoes Signed-off-by: Andrew Morton --- lib/test-string_helpers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/test-string_helpers.c b/lib/test-string_helpers.c index 41d3447bc3b4..9a68849a5d55 100644 --- a/lib/test-string_helpers.c +++ b/lib/test-string_helpers.c @@ -587,7 +587,7 @@ static int __init test_string_helpers_init(void) for (i = 0; i < UNESCAPE_ALL_MASK + 1; i++) test_string_unescape("unescape", i, false); test_string_unescape("unescape inplace", - get_random_u32_below(UNESCAPE_ANY + 1), true); + get_random_u32_below(UNESCAPE_ALL_MASK + 1), true); /* Without dictionary */ for (i = 0; i < ESCAPE_ALL_MASK + 1; i++) From 747cd84f677cedb27db10d4ada777d7ec20431ee Mon Sep 17 00:00:00 2001 From: Peng Liu Date: Tue, 21 Mar 2023 14:18:43 +0800 Subject: [PATCH 25/50] scripts/gdb: fix lx-timerlist for struct timequeue_head change commit 511885d7061e ("lib/timerqueue: Rely on rbtree semantics for next timer") changed struct timerqueue_head, and so print_active_timers() should be changed accordingly with its way to interpret the structure. Link: https://lkml.kernel.org/r/TYCP286MB21463BD277330B26DDC18903C6819@TYCP286MB2146.JPNP286.PROD.OUTLOOK.COM Signed-off-by: Peng Liu Reviewed-by: Jan Kiszka Cc: Kieran Bingham Cc: Florian Fainelli Signed-off-by: Andrew Morton --- scripts/gdb/linux/timerlist.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scripts/gdb/linux/timerlist.py b/scripts/gdb/linux/timerlist.py index 071d0dd5a634..44e39dc3eb64 100644 --- a/scripts/gdb/linux/timerlist.py +++ b/scripts/gdb/linux/timerlist.py @@ -43,8 +43,7 @@ def print_timer(rb_node, idx): def print_active_timers(base): - curr = base['active']['next']['node'] - curr = curr.address.cast(rbtree.rb_node_type.get_type().pointer()) + curr = base['active']['rb_root']['rb_leftmost'] idx = 0 while curr: yield print_timer(curr, idx) From 7362042f3556528e9e9b1eb5ce8d7a3a6331476b Mon Sep 17 00:00:00 2001 From: Peng Liu Date: Tue, 21 Mar 2023 14:19:29 +0800 Subject: [PATCH 26/50] scripts/gdb: fix lx-timerlist for Python3 Below incompatibilities between Python2 and Python3 made lx-timerlist fail to run under Python3. o xrange() is replaced by range() in Python3 o bytes and str are different types in Python3 o the return value of Inferior.read_memory() is memoryview object in Python3 akpm: cc stable so that older kernels are properly debuggable under newer Python. Link: https://lkml.kernel.org/r/TYCP286MB2146EE1180A4D5176CBA8AB2C6819@TYCP286MB2146.JPNP286.PROD.OUTLOOK.COM Signed-off-by: Peng Liu Reviewed-by: Jan Kiszka Cc: Florian Fainelli Cc: Kieran Bingham Cc: Signed-off-by: Andrew Morton --- scripts/gdb/linux/timerlist.py | 4 +++- scripts/gdb/linux/utils.py | 5 ++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/scripts/gdb/linux/timerlist.py b/scripts/gdb/linux/timerlist.py index 44e39dc3eb64..8281da068c5b 100644 --- a/scripts/gdb/linux/timerlist.py +++ b/scripts/gdb/linux/timerlist.py @@ -72,7 +72,7 @@ def print_cpu(hrtimer_bases, cpu, max_clock_bases): ts = cpus.per_cpu(tick_sched_ptr, cpu) text = "cpu: {}\n".format(cpu) - for i in xrange(max_clock_bases): + for i in range(max_clock_bases): text += " clock {}:\n".format(i) text += print_base(cpu_base['clock_base'][i]) @@ -157,6 +157,8 @@ def pr_cpumask(mask): num_bytes = (nr_cpu_ids + 7) / 8 buf = utils.read_memoryview(inf, bits, num_bytes).tobytes() buf = binascii.b2a_hex(buf) + if type(buf) is not str: + buf=buf.decode() chunks = [] i = num_bytes diff --git a/scripts/gdb/linux/utils.py b/scripts/gdb/linux/utils.py index 1553f68716cc..7f36aee32ac6 100644 --- a/scripts/gdb/linux/utils.py +++ b/scripts/gdb/linux/utils.py @@ -88,7 +88,10 @@ def get_target_endianness(): def read_memoryview(inf, start, length): - return memoryview(inf.read_memory(start, length)) + m = inf.read_memory(start, length) + if type(m) is memoryview: + return m + return memoryview(m) def read_u16(buffer, offset): From 8fc2a304f57cb304231a4b0564d5995b2dd04f63 Mon Sep 17 00:00:00 2001 From: Peng Liu Date: Tue, 21 Mar 2023 14:20:04 +0800 Subject: [PATCH 27/50] scripts/gdb: fix lx-timerlist for HRTIMER_MAX_CLOCK_BASES printing HRTIMER_MAX_CLOCK_BASES is of enum type hrtimer_base_type. To print it as an integer, HRTIMER_MAX_CLOCK_BASES should be converted first. Link: https://lkml.kernel.org/r/TYCP286MB214640FF0E7F04AC3926A39EC6819@TYCP286MB2146.JPNP286.PROD.OUTLOOK.COM Signed-off-by: Peng Liu Reviewed-by: Jan Kiszka Cc: Florian Fainelli Cc: Kieran Bingham Signed-off-by: Andrew Morton --- scripts/gdb/linux/timerlist.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/gdb/linux/timerlist.py b/scripts/gdb/linux/timerlist.py index 8281da068c5b..249f0e804b24 100644 --- a/scripts/gdb/linux/timerlist.py +++ b/scripts/gdb/linux/timerlist.py @@ -188,7 +188,8 @@ class LxTimerList(gdb.Command): max_clock_bases = gdb.parse_and_eval("HRTIMER_MAX_CLOCK_BASES") text = "Timer List Version: gdb scripts\n" - text += "HRTIMER_MAX_CLOCK_BASES: {}\n".format(max_clock_bases) + text += "HRTIMER_MAX_CLOCK_BASES: {}\n".format( + max_clock_bases.type.fields()[max_clock_bases].enumval) text += "now at {} nsecs\n".format(ktime_get()) for cpu in cpus.each_online_cpu(): From 0d828200ad56505a827610af876ca0b138b943a6 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Mon, 3 Apr 2023 18:23:46 +0200 Subject: [PATCH 28/50] docs: process: allow Closes tags with links MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since v6.3, checkpatch.pl now complains about the use of "Closes:" tags followed by a link [1]. It also complains if a "Reported-by:" tag is followed by a "Closes:" one [2]. As detailed in the first patch, this "Closes:" tag is used for a bit of time, mainly by DRM and MPTCP subsystems. It is used by some bug trackers to automate the closure of issues when a patch is accepted. It is even planned to use this tag with bugzilla.kernel.org [3]. The first patch updates the documentation to explain what is this "Closes:" tag and how/when to use it. The second patch modifies checkpatch.pl to stop complaining about it. The DRM maintainers and their mailing list have been added in Cc as they are probably interested by these two patches as well. [1] https://lore.kernel.org/all/3b036087d80b8c0e07a46a1dbaaf4ad0d018f8d5.1674217480.git.linux@leemhuis.info/ [2] https://lore.kernel.org/all/bb5dfd55ea2026303ab2296f4a6df3da7dd64006.1674217480.git.linux@leemhuis.info/ [3] https://lore.kernel.org/linux-doc/20230315181205.f3av7h6owqzzw64p@meerkat.local/ This patch (of 5): Making sure a bug tracker is up to date is not an easy task. For example, a first version of a patch fixing a tracked issue can be sent a long time after having created the issue. But also, it can take some time to have this patch accepted upstream in its final form. When it is done, someone -- probably not the person who accepted the patch -- has to remember about closing the corresponding issue. This task of closing and tracking the patch can be done automatically by bug trackers like GitLab [1], GitHub [2] and hopefully soon [3] bugzilla.kernel.org when the appropriated tag is used. The two first ones accept multiple tags but it is probably better to pick one. According to commit 76f381bb77a0 ("checkpatch: warn when unknown tags are used for links"), the "Closes" tag seems to have been used in the past by a few people and it is supported by popular bug trackers. Here is how it has been used in the past: $ git log --no-merges --format=email -P --grep='^Closes: http' | \ grep '^Closes: http' | cut -d/ -f3-5 | sort | uniq -c | sort -rn 391 gitlab.freedesktop.org/drm/intel 79 github.com/multipath-tcp/mptcp_net-next 8 gitlab.freedesktop.org/drm/msm 3 gitlab.freedesktop.org/drm/amd 2 gitlab.freedesktop.org/mesa/mesa 1 patchwork.freedesktop.org/series/73320 1 gitlab.freedesktop.org/lima/linux 1 gitlab.freedesktop.org/drm/nouveau 1 github.com/ClangBuiltLinux/linux 1 bugzilla.netfilter.org/show_bug.cgi?id=1579 1 bugzilla.netfilter.org/show_bug.cgi?id=1543 1 bugzilla.netfilter.org/show_bug.cgi?id=1436 1 bugzilla.netfilter.org/show_bug.cgi?id=1427 1 bugs.debian.org/625804 Likely here, the "Closes" tag was only properly used with GitLab and GitHub. We can also see that it has been used quite a few times (and still used recently) and this is then not a "random tag that makes no sense" like it was the case with "BugLink" recently [4]. It has also been misused but that was a long time ago, when it was common to use many different random tags. checkpatch.pl script should then stop complaining about this "Closes" tag. As suggested by Thorsten [5], if this tag is accepted, it should first be described in the documentation. This is what is done here in this patch. To avoid confusion, the "Closes" should be used with any public bug report. No need to check if the underlying bug tracker supports automations. Having this tag with any kind of public bug reports allows bots like regzbot to clearly identify patches fixing a specific bug and avoid false-positives, e.g. patches mentioning it is related to an issue but not fixing it. As suggested by Thorsten [6] again, if we follow the same logic, the "Closes" tag should then be used after a "Reported-by" one. Note that thanks to this "Closes" tag, the mentioned bug trackers can also locate where a patch has been applied in different branches and repositories. If only the "Link" tag is used, the tracking can also be done but the ticket will not be closed and a manual operation will be needed. Also, these bug trackers have some safeguards: the closure is only done if a commit having the "Closes:" tag is applied in a specific branch. It will then not be closed if a random commit having the same tag is published elsewhere. Also in case of closure, a notification is sent to the owners. Link: https://lkml.kernel.org/r/20230314-doc-checkpatch-closes-tag-v4-0-d26d1fa66f9f@tessares.net Link: https://docs.gitlab.com/ee/user/project/issues/managing_issues.html#default-closing-pattern [1] Link: https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/using-keywords-in-issues-and-pull-requests [2] Link: https://lore.kernel.org/linux-doc/20230315181205.f3av7h6owqzzw64p@meerkat.local/ [3] Link: https://lore.kernel.org/all/CAHk-=wgs38ZrfPvy=nOwVkVzjpM3VFU1zobP37Fwd_h9iAD5JQ@mail.gmail.com/ [4] Link: https://lore.kernel.org/all/688cd6cb-90ab-6834-a6f5-97080e39ca8e@leemhuis.info/ [5] Link: https://lore.kernel.org/linux-doc/2194d19d-f195-1a1e-41fc-7827ae569351@leemhuis.info/ [6] Link: https://github.com/multipath-tcp/mptcp_net-next/issues/373 Link: https://lkml.kernel.org/r/20230314-doc-checkpatch-closes-tag-v4-1-d26d1fa66f9f@tessares.net Signed-off-by: Matthieu Baerts Suggested-by: Thorsten Leemhuis Acked-by: Konstantin Ryabitsev Acked-by: Joe Perches Cc: Andy Whitcroft Cc: Bagas Sanjaya Cc: Daniel Vetter Cc: David Airlie Cc: Dwaipayan Ray Cc: Jonathan Corbet Cc: Kai Wasserbäch Cc: Linus Torvalds Cc: Lukas Bulwahn Signed-off-by: Andrew Morton --- Documentation/process/5.Posting.rst | 22 +++++++++++++---- Documentation/process/submitting-patches.rst | 26 ++++++++++++++------ 2 files changed, 35 insertions(+), 13 deletions(-) diff --git a/Documentation/process/5.Posting.rst b/Documentation/process/5.Posting.rst index 7a670a075ab6..de4edd42d5c0 100644 --- a/Documentation/process/5.Posting.rst +++ b/Documentation/process/5.Posting.rst @@ -207,8 +207,8 @@ the patch:: Fixes: 1f2e3d4c5b6a ("The first line of the commit specified by the first 12 characters of its SHA-1 ID") Another tag is used for linking web pages with additional backgrounds or -details, for example a report about a bug fixed by the patch or a document -with a specification implemented by the patch:: +details, for example an earlier discussion which leads to the patch or a +document with a specification implemented by the patch:: Link: https://example.com/somewhere.html optional-other-stuff @@ -217,7 +217,17 @@ latest public review posting of the patch; often this is automatically done by tools like b4 or a git hook like the one described in 'Documentation/maintainer/configure-git.rst'. -A third kind of tag is used to document who was involved in the development of +If the URL points to a public bug report being fixed by the patch, use the +"Closes:" tag instead:: + + Closes: https://example.com/issues/1234 optional-other-stuff + +Some bug trackers have the ability to close issues automatically when a +commit with such a tag is applied. Some bots monitoring mailing lists can +also track such tags and take certain actions. Private bug trackers and +invalid URLs are forbidden. + +Another kind of tag is used to document who was involved in the development of the patch. Each of these uses this format:: tag: Full Name optional-other-stuff @@ -251,8 +261,10 @@ The tags in common use are: - Reported-by: names a user who reported a problem which is fixed by this patch; this tag is used to give credit to the (often underappreciated) people who test our code and let us know when things do not work - correctly. Note, this tag should be followed by a Link: tag pointing to the - report, unless the report is not available on the web. + correctly. Note, this tag should be followed by a Closes: tag pointing to + the report, unless the report is not available on the web. The Link: tag + can be used instead of Closes: if the patch fixes a part of the issue(s) + being reported. - Cc: the named person received a copy of the patch and had the opportunity to comment on it. diff --git a/Documentation/process/submitting-patches.rst b/Documentation/process/submitting-patches.rst index 828997bc9ff9..12d58ddc2b8a 100644 --- a/Documentation/process/submitting-patches.rst +++ b/Documentation/process/submitting-patches.rst @@ -113,11 +113,9 @@ there is no collision with your six-character ID now, that condition may change five years from now. If related discussions or any other background information behind the change -can be found on the web, add 'Link:' tags pointing to it. In case your patch -fixes a bug, for example, add a tag with a URL referencing the report in the -mailing list archives or a bug tracker; if the patch is a result of some -earlier mailing list discussion or something documented on the web, point to -it. +can be found on the web, add 'Link:' tags pointing to it. If the patch is a +result of some earlier mailing list discussions or something documented on the +web, point to it. When linking to mailing list archives, preferably use the lore.kernel.org message archiver service. To create the link URL, use the contents of the @@ -134,6 +132,16 @@ resources. In addition to giving a URL to a mailing list archive or bug, summarize the relevant points of the discussion that led to the patch as submitted. +In case your patch fixes a bug, use the 'Closes:' tag with a URL referencing +the report in the mailing list archives or a public bug tracker. For example:: + + Closes: https://example.com/issues/1234 + +Some bug trackers have the ability to close issues automatically when a +commit with such a tag is applied. Some bots monitoring mailing lists can +also track such tags and take certain actions. Private bug trackers and +invalid URLs are forbidden. + If your patch fixes a bug in a specific commit, e.g. you found an issue using ``git bisect``, please use the 'Fixes:' tag with the first 12 characters of the SHA-1 ID, and the one line summary. Do not split the tag across multiple @@ -498,9 +506,11 @@ Using Reported-by:, Tested-by:, Reviewed-by:, Suggested-by: and Fixes: The Reported-by tag gives credit to people who find bugs and report them and it hopefully inspires them to help us again in the future. The tag is intended for bugs; please do not use it to credit feature requests. The tag should be -followed by a Link: tag pointing to the report, unless the report is not -available on the web. Please note that if the bug was reported in private, then -ask for permission first before using the Reported-by tag. +followed by a Closes: tag pointing to the report, unless the report is not +available on the web. The Link: tag can be used instead of Closes: if the patch +fixes a part of the issue(s) being reported. Please note that if the bug was +reported in private, then ask for permission first before using the Reported-by +tag. A Tested-by: tag indicates that the patch has been successfully tested (in some environment) by the person named. This tag informs maintainers that From c917a872cee480d1fc8461da9be93e27e7977877 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Mon, 3 Apr 2023 18:23:47 +0200 Subject: [PATCH 29/50] checkpatch: don't print the next line if not defined MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When checking if "Reported-by" tag is followed by "Link:", there is no need to print the next line if there is no next line. While at it, also mention in this case that the "Link:" tag should be followed by a URL, similar to the next warning. By doing that, the code is now similar to what is done above when checking if the Co-developed-by tag is properly used. Link: https://lkml.kernel.org/r/20230314-doc-checkpatch-closes-tag-v4-2-d26d1fa66f9f@tessares.net Fixes: d7f1d71e5ef6 ("checkpatch: warn when Reported-by: is not followed by Link:") Signed-off-by: Matthieu Baerts Acked-by: Joe Perches Cc: Andy Whitcroft Cc: Bagas Sanjaya Cc: Daniel Vetter Cc: David Airlie Cc: Dwaipayan Ray Cc: Jonathan Corbet Cc: Kai Wasserbäch Cc: Konstantin Ryabitsev Cc: Linus Torvalds Cc: Lukas Bulwahn Cc: Thorsten Leemhuis Signed-off-by: Andrew Morton --- scripts/checkpatch.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index c7cd0750b41e..8e22eb45ab2d 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -3162,7 +3162,7 @@ sub process { if ($sign_off =~ /^reported(?:|-and-tested)-by:$/i) { if (!defined $lines[$linenr]) { WARN("BAD_REPORTED_BY_LINK", - "Reported-by: should be immediately followed by Link: to the report\n" . $herecurr . $rawlines[$linenr] . "\n"); + "Reported-by: should be immediately followed by Link: with a URL to the report\n" . $herecurr . "\n"); } elsif ($rawlines[$linenr] !~ m{^link:\s*https?://}i) { WARN("BAD_REPORTED_BY_LINK", "Reported-by: should be immediately followed by Link: with a URL to the report\n" . $herecurr . $rawlines[$linenr] . "\n"); From f94e40ea272b9847833de1114677eb5ad8b12a0a Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Mon, 3 Apr 2023 18:23:48 +0200 Subject: [PATCH 30/50] checkpatch: use a list of "link" tags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The following commit will allow the use of a similar "link" tag. Because there is a possibility that other similar tags will be added in the future and to reduce the number of places where the code will be modified to allow this new tag, a list with all these "link" tags is now used. Two variables are created from it: one to search for such tags and one to print all tags in a warning message. Link: https://lkml.kernel.org/r/20230314-doc-checkpatch-closes-tag-v4-3-d26d1fa66f9f@tessares.net Signed-off-by: Matthieu Baerts Suggested-by: Joe Perches Acked-by: Joe Perches Cc: Andy Whitcroft Cc: Bagas Sanjaya Cc: Daniel Vetter Cc: David Airlie Cc: Dwaipayan Ray Cc: Jonathan Corbet Cc: Kai Wasserbäch Cc: Konstantin Ryabitsev Cc: Linus Torvalds Cc: Lukas Bulwahn Cc: Thorsten Leemhuis Signed-off-by: Andrew Morton --- scripts/checkpatch.pl | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 8e22eb45ab2d..209898b8f0b6 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -620,6 +620,22 @@ our $signature_tags = qr{(?xi: Cc: )}; +our @link_tags = qw(Link); + +#Create a search and print patterns for all these strings to be used directly below +our $link_tags_search = ""; +our $link_tags_print = ""; +foreach my $entry (@link_tags) { + if ($link_tags_search ne "") { + $link_tags_search .= '|'; + $link_tags_print .= ' or '; + } + $entry .= ':'; + $link_tags_search .= $entry; + $link_tags_print .= "'$entry'"; +} +$link_tags_search = "(?:${link_tags_search})"; + our $tracing_logging_tags = qr{(?xi: [=-]*> | <[=-]* | @@ -3250,8 +3266,8 @@ sub process { # file delta changes $line =~ /^\s*(?:[\w\.\-\+]*\/)++[\w\.\-\+]+:/ || # filename then : - $line =~ /^\s*(?:Fixes:|Link:|$signature_tags)/i || - # A Fixes: or Link: line or signature tag line + $line =~ /^\s*(?:Fixes:|$link_tags_search|$signature_tags)/i || + # A Fixes:, link or signature tag line $commit_log_possible_stack_dump)) { WARN("COMMIT_LOG_LONG_LINE", "Possible unwrapped commit description (prefer a maximum 75 chars per line)\n" . $herecurr); @@ -3266,13 +3282,13 @@ sub process { # Check for odd tags before a URI/URL if ($in_commit_log && - $line =~ /^\s*(\w+):\s*http/ && $1 ne 'Link') { + $line =~ /^\s*(\w+:)\s*http/ && $1 !~ /^$link_tags_search$/) { if ($1 =~ /^v(?:ersion)?\d+/i) { WARN("COMMIT_LOG_VERSIONING", "Patch version information should be after the --- line\n" . $herecurr); } else { WARN("COMMIT_LOG_USE_LINK", - "Unknown link reference '$1:', use 'Link:' instead\n" . $herecurr); + "Unknown link reference '$1', use $link_tags_print instead\n" . $herecurr); } } From 44c31888098a590b8ec5ba37009e5a983f7c4b46 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Mon, 3 Apr 2023 18:23:49 +0200 Subject: [PATCH 31/50] checkpatch: allow Closes tags with links MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As a follow-up of a previous patch modifying the documentation to allow using the "Closes:" tag, checkpatch.pl is updated accordingly. checkpatch.pl now no longer complain when the "Closes:" tag is used by itself: commit 76f381bb77a0 ("checkpatch: warn when unknown tags are used for links") ... or after the "Reported-by:" tag: commit d7f1d71e5ef6 ("checkpatch: warn when Reported-by: is not followed by Link:") Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/373 Link: https://lkml.kernel.org/r/20230314-doc-checkpatch-closes-tag-v4-4-d26d1fa66f9f@tessares.net Signed-off-by: Matthieu Baerts Acked-by: Joe Perches Cc: Andy Whitcroft Cc: Bagas Sanjaya Cc: Daniel Vetter Cc: David Airlie Cc: Dwaipayan Ray Cc: Jonathan Corbet Cc: Kai Wasserbäch Cc: Konstantin Ryabitsev Cc: Linus Torvalds Cc: Lukas Bulwahn Cc: Thorsten Leemhuis Signed-off-by: Andrew Morton --- scripts/checkpatch.pl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 209898b8f0b6..922428ee3be5 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -620,7 +620,7 @@ our $signature_tags = qr{(?xi: Cc: )}; -our @link_tags = qw(Link); +our @link_tags = qw(Link Closes); #Create a search and print patterns for all these strings to be used directly below our $link_tags_search = ""; @@ -3174,14 +3174,14 @@ sub process { } } -# check if Reported-by: is followed by a Link: +# check if Reported-by: is followed by a Closes: tag if ($sign_off =~ /^reported(?:|-and-tested)-by:$/i) { if (!defined $lines[$linenr]) { WARN("BAD_REPORTED_BY_LINK", - "Reported-by: should be immediately followed by Link: with a URL to the report\n" . $herecurr . "\n"); - } elsif ($rawlines[$linenr] !~ m{^link:\s*https?://}i) { + "Reported-by: should be immediately followed by Closes: with a URL to the report\n" . $herecurr . "\n"); + } elsif ($rawlines[$linenr] !~ m{^closes:\s*https?://}i) { WARN("BAD_REPORTED_BY_LINK", - "Reported-by: should be immediately followed by Link: with a URL to the report\n" . $herecurr . $rawlines[$linenr] . "\n"); + "Reported-by: should be immediately followed by Closes: with a URL to the report\n" . $herecurr . $rawlines[$linenr] . "\n"); } } } From d6ccdd678e459fdcfe675a45c76b3f1a75b9a8e8 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Mon, 3 Apr 2023 18:23:50 +0200 Subject: [PATCH 32/50] checkpatch: check for misuse of the link tags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit "Link:" and "Closes:" tags have to be used with public URLs. It is difficult to make sure the link is public but at least we can verify the tag is followed by 'http(s)://'. With that, we avoid such a tag that is not allowed [1]: Closes: Now that we check the "link" tags are followed by a URL, we can relax the check linked to "Reported-by being followed by a link tag" to only verify if a "link" tag is present after the "Reported-by" one. Link: https://lore.kernel.org/linux-doc/CAHk-=wh0v1EeDV3v8TzK81nDC40=XuTdY2MCr0xy3m3FiBV3+Q@mail.gmail.com/ [1] Link: https://lkml.kernel.org/r/20230314-doc-checkpatch-closes-tag-v4-5-d26d1fa66f9f@tessares.net Signed-off-by: Matthieu Baerts Acked-by: Joe Perches Cc: Andy Whitcroft Cc: Bagas Sanjaya Cc: Daniel Vetter Cc: David Airlie Cc: Dwaipayan Ray Cc: Jonathan Corbet Cc: Kai Wasserbäch Cc: Konstantin Ryabitsev Cc: Linus Torvalds Cc: Lukas Bulwahn Cc: Thorsten Leemhuis Signed-off-by: Andrew Morton --- scripts/checkpatch.pl | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 922428ee3be5..3e6f5a8614d3 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -3179,7 +3179,7 @@ sub process { if (!defined $lines[$linenr]) { WARN("BAD_REPORTED_BY_LINK", "Reported-by: should be immediately followed by Closes: with a URL to the report\n" . $herecurr . "\n"); - } elsif ($rawlines[$linenr] !~ m{^closes:\s*https?://}i) { + } elsif ($rawlines[$linenr] !~ /^closes:\s*/i) { WARN("BAD_REPORTED_BY_LINK", "Reported-by: should be immediately followed by Closes: with a URL to the report\n" . $herecurr . $rawlines[$linenr] . "\n"); } @@ -3292,6 +3292,17 @@ sub process { } } +# Check for misuse of the link tags + if ($in_commit_log && + $line =~ /^\s*(\w+:)\s*(\S+)/) { + my $tag = $1; + my $value = $2; + if ($tag =~ /^$link_tags_search$/ && $value !~ m{^https?://}) { + WARN("COMMIT_LOG_WRONG_LINK", + "'$tag' should be followed by a public http(s) link\n" . $herecurr); + } + } + # Check for lines starting with a # if ($in_commit_log && $line =~ /^#/) { if (WARN("COMMIT_COMMENT_SYMBOL", From 1be2edb25c72c3fe1bf955694c5521b035fcb276 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 5 Apr 2023 16:34:52 +0200 Subject: [PATCH 33/50] proc/stat: remove arch_idle_time() The last (only) architecture specific arch_idle_time() implementation was removed with commit be76ea614460 ("s390/idle: remove arch_cpu_idle_time() and corresponding code"). Therefore remove the now dead code in fs/proc/stat.c as well. Link: https://lkml.kernel.org/r/20230405143452.2677172-1-hca@linux.ibm.com Signed-off-by: Heiko Carstens Cc: Alexey Dobriyan Signed-off-by: Andrew Morton --- fs/proc/stat.c | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/fs/proc/stat.c b/fs/proc/stat.c index 4fb8729a68d4..da60956b2915 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@ -22,30 +22,6 @@ #define arch_irq_stat() 0 #endif -#ifdef arch_idle_time - -u64 get_idle_time(struct kernel_cpustat *kcs, int cpu) -{ - u64 idle; - - idle = kcs->cpustat[CPUTIME_IDLE]; - if (cpu_online(cpu) && !nr_iowait_cpu(cpu)) - idle += arch_idle_time(cpu); - return idle; -} - -static u64 get_iowait_time(struct kernel_cpustat *kcs, int cpu) -{ - u64 iowait; - - iowait = kcs->cpustat[CPUTIME_IOWAIT]; - if (cpu_online(cpu) && nr_iowait_cpu(cpu)) - iowait += arch_idle_time(cpu); - return iowait; -} - -#else - u64 get_idle_time(struct kernel_cpustat *kcs, int cpu) { u64 idle, idle_usecs = -1ULL; @@ -78,8 +54,6 @@ static u64 get_iowait_time(struct kernel_cpustat *kcs, int cpu) return iowait; } -#endif - static void show_irq_gap(struct seq_file *p, unsigned int gap) { static const char zeros[] = " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0"; From b0687c1119b4e8c88a651b6e876b7eae28d076e3 Mon Sep 17 00:00:00 2001 From: Noah Goldstein Date: Tue, 4 Apr 2023 17:13:51 -0500 Subject: [PATCH 34/50] lib/rbtree: use '+' instead of '|' for setting color. This has a slight benefit for x86 and has no effect on other targets. The benefit to x86 is it change the codegen for setting a node to block from `mov %r0, %r1; or $RB_BLACK, %r1` to `lea RB_BLACK(%r0), %r1` which saves an instructions. In all other cases it just replace ALU with ALU (or -> and) which perform the same on all machines I am aware of. Total instructions in rbtree.o: Before - 802 After - 782 so it saves about 20 `mov` instructions. Link: https://lkml.kernel.org/r/20230404221350.3806566-1-goldstein.w.n@gmail.com Signed-off-by: Noah Goldstein Cc: Michel Lespinasse Signed-off-by: Andrew Morton --- include/linux/rbtree_augmented.h | 4 ++-- lib/rbtree.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h index d1c53e9d8c75..7ee7ed5de722 100644 --- a/include/linux/rbtree_augmented.h +++ b/include/linux/rbtree_augmented.h @@ -156,13 +156,13 @@ RB_DECLARE_CALLBACKS(RBSTATIC, RBNAME, \ static inline void rb_set_parent(struct rb_node *rb, struct rb_node *p) { - rb->__rb_parent_color = rb_color(rb) | (unsigned long)p; + rb->__rb_parent_color = rb_color(rb) + (unsigned long)p; } static inline void rb_set_parent_color(struct rb_node *rb, struct rb_node *p, int color) { - rb->__rb_parent_color = (unsigned long)p | color; + rb->__rb_parent_color = (unsigned long)p + color; } static inline void diff --git a/lib/rbtree.c b/lib/rbtree.c index c4ac5c2421f2..5114eda6309c 100644 --- a/lib/rbtree.c +++ b/lib/rbtree.c @@ -58,7 +58,7 @@ static inline void rb_set_black(struct rb_node *rb) { - rb->__rb_parent_color |= RB_BLACK; + rb->__rb_parent_color += RB_BLACK; } static inline struct rb_node *rb_red_parent(struct rb_node *red) From b7235d6bb516fed6d62d2c9e30e7123b6ce5124c Mon Sep 17 00:00:00 2001 From: Kieran Bingham Date: Tue, 4 Apr 2023 14:40:49 -0700 Subject: [PATCH 35/50] scripts/gdb: add a Radix Tree Parser Linux makes use of the Radix Tree data structure to store pointers indexed by integer values. This structure is utilised across many structures in the kernel including the IRQ descriptor tables, and several filesystems. This module provides a method to lookup values from a structure given its head node. Usage: The function lx_radix_tree_lookup, must be given a symbol of type struct radix_tree_root, and an index into that tree. The object returned is a generic integer value, and must be cast correctly to the type based on the storage in the data structure. For example, to print the irq descriptor in the sparse irq_desc_tree at index 18, try the following: (gdb) print (struct irq_desc)$lx_radix_tree_lookup(irq_desc_tree, 18) This script previously existed under commit e127a73d41ac471d7e3ba950cf128f42d6ee3448 ("scripts/gdb: add a Radix Tree Parser") and was later reverted with b447e02548a3304c47b78b5e2d75a4312a8f17e1i (Revert "scripts/gdb: add a Radix Tree Parser"). This version expects the XArray based radix tree implementation and has been verified using QEMU/x86 on Linux 6.3-rc5. [f.fainelli@gmail.com: revive and update for xarray implementation] [f.fainelli@gmail.com: guard against a NULL node in the while loop] Link: https://lkml.kernel.org/r/20230405222743.1191674-1-f.fainelli@gmail.com Link: https://lkml.kernel.org/r/20230404214049.1016811-1-f.fainelli@gmail.com Signed-off-by: Kieran Bingham Signed-off-by: Florian Fainelli Cc: Jan Kiszka Cc: Kieran Bingham Signed-off-by: Andrew Morton --- scripts/gdb/linux/constants.py.in | 8 +++ scripts/gdb/linux/radixtree.py | 90 +++++++++++++++++++++++++++++++ scripts/gdb/vmlinux-gdb.py | 1 + 3 files changed, 99 insertions(+) create mode 100644 scripts/gdb/linux/radixtree.py diff --git a/scripts/gdb/linux/constants.py.in b/scripts/gdb/linux/constants.py.in index 2efbec6b6b8d..6c886deb0b18 100644 --- a/scripts/gdb/linux/constants.py.in +++ b/scripts/gdb/linux/constants.py.in @@ -17,6 +17,7 @@ #include #include #include +#include #include /* We need to stringify expanded macros so that they can be parsed */ @@ -68,6 +69,13 @@ LX_VALUE(NR_CPUS) /* linux/of_fdt.h> */ LX_VALUE(OF_DT_HEADER) +/* linux/radix-tree.h */ +LX_GDBPARSED(RADIX_TREE_ENTRY_MASK) +LX_GDBPARSED(RADIX_TREE_INTERNAL_NODE) +LX_GDBPARSED(RADIX_TREE_MAP_SIZE) +LX_GDBPARSED(RADIX_TREE_MAP_SHIFT) +LX_GDBPARSED(RADIX_TREE_MAP_MASK) + /* Kernel Configs */ LX_CONFIG(CONFIG_GENERIC_CLOCKEVENTS) LX_CONFIG(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) diff --git a/scripts/gdb/linux/radixtree.py b/scripts/gdb/linux/radixtree.py new file mode 100644 index 000000000000..074543ac763d --- /dev/null +++ b/scripts/gdb/linux/radixtree.py @@ -0,0 +1,90 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Radix Tree Parser +# +# Copyright (c) 2016 Linaro Ltd +# Copyright (c) 2023 Broadcom +# +# Authors: +# Kieran Bingham +# Florian Fainelli + +import gdb + +from linux import utils +from linux import constants + +radix_tree_root_type = utils.CachedType("struct xarray") +radix_tree_node_type = utils.CachedType("struct xa_node") + +def is_internal_node(node): + long_type = utils.get_long_type() + return ((node.cast(long_type) & constants.LX_RADIX_TREE_ENTRY_MASK) == constants.LX_RADIX_TREE_INTERNAL_NODE) + +def entry_to_node(node): + long_type = utils.get_long_type() + node_type = node.type + indirect_ptr = node.cast(long_type) & ~constants.LX_RADIX_TREE_INTERNAL_NODE + return indirect_ptr.cast(radix_tree_node_type.get_type().pointer()) + +def node_maxindex(node): + return (constants.LX_RADIX_TREE_MAP_SIZE << node['shift']) - 1 + +def lookup(root, index): + if root.type == radix_tree_root_type.get_type().pointer(): + node = root.dereference() + elif root.type != radix_tree_root_type.get_type(): + raise gdb.GdbError("must be {} not {}" + .format(radix_tree_root_type.get_type(), root.type)) + + node = root['xa_head'] + if node == 0: + return None + + if not (is_internal_node(node)): + if (index > 0): + return None + return node + + node = entry_to_node(node) + maxindex = node_maxindex(node) + + if (index > maxindex): + return None + + shift = node['shift'] + constants.LX_RADIX_TREE_MAP_SHIFT + + while True: + offset = (index >> node['shift']) & constants.LX_RADIX_TREE_MAP_MASK + slot = node['slots'][offset] + + if slot == 0: + return None + + node = slot.cast(node.type.pointer()).dereference() + if node == 0: + return None + + shift -= constants.LX_RADIX_TREE_MAP_SHIFT + if (shift <= 0): + break + + return node + +class LxRadixTree(gdb.Function): + """ Lookup and return a node from a RadixTree. + +$lx_radix_tree_lookup(root_node [, index]): Return the node at the given index. +If index is omitted, the root node is dereference and returned.""" + + def __init__(self): + super(LxRadixTree, self).__init__("lx_radix_tree_lookup") + + def invoke(self, root, index=0): + result = lookup(root, index) + if result is None: + raise gdb.GdbError("No entry in tree at index {}".format(index)) + + return result + +LxRadixTree() diff --git a/scripts/gdb/vmlinux-gdb.py b/scripts/gdb/vmlinux-gdb.py index 3a5b44cd6bfe..4a5056f2c247 100644 --- a/scripts/gdb/vmlinux-gdb.py +++ b/scripts/gdb/vmlinux-gdb.py @@ -38,3 +38,4 @@ else: import linux.genpd import linux.device import linux.mm + import linux.radixtree From 8af055ae25bff48f57227f5e3d48a4306f3dd1c4 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 6 Apr 2023 14:52:51 -0700 Subject: [PATCH 36/50] scripts/gdb: raise error with reduced debugging information If CONFIG_DEBUG_INFO_REDUCED is enabled in the kernel configuration, we will typically not be able to load vmlinux-gdb.py and will fail with: Traceback (most recent call last): File "/home/fainelli/work/buildroot/output/arm64/build/linux-custom/vmlinux-gdb.py", line 25, in import linux.utils File "/home/fainelli/work/buildroot/output/arm64/build/linux-custom/scripts/gdb/linux/utils.py", line 131, in atomic_long_counter_offset = atomic_long_type.get_type()['counter'].bitpos KeyError: 'counter' Rather be left wondering what is happening only to find out that reduced debug information is the cause, raise an eror. This was not typically a problem until e3c8d33e0d62 ("scripts/gdb: fix 'lx-dmesg' on 32 bits arch") but it has since then. Link: https://lkml.kernel.org/r/20230406215252.1580538-1-f.fainelli@gmail.com Fixes: e3c8d33e0d62 ("scripts/gdb: fix 'lx-dmesg' on 32 bits arch") Signed-off-by: Florian Fainelli Cc: Antonio Borneo Cc: Jan Kiszka Cc: John Ogness Cc: Kieran Bingham Cc: Petr Mladek Signed-off-by: Andrew Morton --- scripts/gdb/linux/constants.py.in | 2 ++ scripts/gdb/vmlinux-gdb.py | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/scripts/gdb/linux/constants.py.in b/scripts/gdb/linux/constants.py.in index 6c886deb0b18..e484e2e7e4d5 100644 --- a/scripts/gdb/linux/constants.py.in +++ b/scripts/gdb/linux/constants.py.in @@ -40,6 +40,8 @@ import gdb +LX_CONFIG(CONFIG_DEBUG_INFO_REDUCED) + /* linux/clk-provider.h */ if IS_BUILTIN(CONFIG_COMMON_CLK): LX_GDBPARSED(CLK_GET_RATE_NOCACHE) diff --git a/scripts/gdb/vmlinux-gdb.py b/scripts/gdb/vmlinux-gdb.py index 4a5056f2c247..2f57adcf3dff 100644 --- a/scripts/gdb/vmlinux-gdb.py +++ b/scripts/gdb/vmlinux-gdb.py @@ -22,6 +22,10 @@ except: gdb.write("NOTE: gdb 7.2 or later required for Linux helper scripts to " "work.\n") else: + import linux.constants + if linux.constants.LX_CONFIG_DEBUG_INFO_REDUCED: + raise gdb.GdbError("Reduced debug information will prevent GDB " + "from having complete types.\n") import linux.utils import linux.symbols import linux.modules @@ -32,7 +36,6 @@ else: import linux.lists import linux.rbtree import linux.proc - import linux.constants import linux.timerlist import linux.clk import linux.genpd From b0969d7687a7aaa82dcf2d1f245ef699387886da Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 6 Apr 2023 15:04:51 -0700 Subject: [PATCH 37/50] scripts/gdb: print interrupts This GDB script prints the interrupts in the system in the same way that /proc/interrupts does. This does include the architecture specific part done by arch_show_interrupts() for x86, ARM, ARM64 and MIPS. Example output from an ARM64 system: (gdb) lx-interruptlist CPU0 CPU1 CPU2 CPU3 10: 3167 1225 1276 2629 GICv2 30 Level arch_timer 13: 0 0 0 0 GICv2 36 Level arm-pmu 14: 0 0 0 0 GICv2 37 Level arm-pmu 15: 0 0 0 0 GICv2 38 Level arm-pmu 16: 0 0 0 0 GICv2 39 Level arm-pmu 28: 0 0 0 0 interrupt-controller@8410640 5 Edge brcmstb-gpio-wake 30: 125 0 0 0 GICv2 128 Level ttyS0 31: 0 0 0 0 interrupt-controller@8416000 0 Level mspi_done 32: 0 0 0 0 interrupt-controller@8410640 3 Edge brcmstb-waketimer 33: 0 0 0 0 interrupt-controller@8418580 8 Edge brcmstb-waketimer-rtc 34: 872 0 0 0 GICv2 230 Level brcm_scmi@0 35: 0 0 0 0 interrupt-controller@8410640 10 Edge 8d0f200.usb-phy 37: 0 0 0 0 GICv2 97 Level PCIe PME 42: 0 0 0 0 GICv2 145 Level xhci-hcd:usb1 43: 94 0 0 0 GICv2 71 Level mmc1 44: 0 0 0 0 GICv2 70 Level mmc0 IPI0: 23 666 154 98 Rescheduling interrupts IPI1: 247 1053 1701 634 Function call interrupts IPI2: 0 0 0 0 CPU stop interrupts IPI3: 0 0 0 0 CPU stop (for crash dump) interrupts IPI4: 0 0 0 0 Timer broadcast interrupts IPI5: 7 9 5 0 IRQ work interrupts IPI6: 0 0 0 0 CPU wake-up interrupts ERR: 0 Link: https://lkml.kernel.org/r/20230406220451.1583239-1-f.fainelli@gmail.com Signed-off-by: Florian Fainelli Cc: Jan Kiszka Cc: Kieran Bingham Signed-off-by: Andrew Morton --- scripts/gdb/linux/constants.py.in | 14 ++ scripts/gdb/linux/interrupts.py | 232 ++++++++++++++++++++++++++++++ scripts/gdb/vmlinux-gdb.py | 1 + 3 files changed, 247 insertions(+) create mode 100644 scripts/gdb/linux/interrupts.py diff --git a/scripts/gdb/linux/constants.py.in b/scripts/gdb/linux/constants.py.in index e484e2e7e4d5..36fd2b145853 100644 --- a/scripts/gdb/linux/constants.py.in +++ b/scripts/gdb/linux/constants.py.in @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -57,6 +58,10 @@ LX_VALUE(SB_NODIRATIME) /* linux/htimer.h */ LX_GDBPARSED(hrtimer_resolution) +/* linux/irq.h */ +LX_GDBPARSED(IRQD_LEVEL) +LX_GDBPARSED(IRQ_HIDDEN) + /* linux/mount.h */ LX_VALUE(MNT_NOSUID) LX_VALUE(MNT_NODEV) @@ -85,3 +90,12 @@ LX_CONFIG(CONFIG_HIGH_RES_TIMERS) LX_CONFIG(CONFIG_NR_CPUS) LX_CONFIG(CONFIG_OF) LX_CONFIG(CONFIG_TICK_ONESHOT) +LX_CONFIG(CONFIG_GENERIC_IRQ_SHOW_LEVEL) +LX_CONFIG(CONFIG_X86_LOCAL_APIC) +LX_CONFIG(CONFIG_SMP) +LX_CONFIG(CONFIG_X86_THERMAL_VECTOR) +LX_CONFIG(CONFIG_X86_MCE_THRESHOLD) +LX_CONFIG(CONFIG_X86_MCE_AMD) +LX_CONFIG(CONFIG_X86_MCE) +LX_CONFIG(CONFIG_X86_IO_APIC) +LX_CONFIG(CONFIG_HAVE_KVM) diff --git a/scripts/gdb/linux/interrupts.py b/scripts/gdb/linux/interrupts.py new file mode 100644 index 000000000000..ef478e273791 --- /dev/null +++ b/scripts/gdb/linux/interrupts.py @@ -0,0 +1,232 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright 2023 Broadcom + +import gdb + +from linux import constants +from linux import cpus +from linux import utils +from linux import radixtree + +irq_desc_type = utils.CachedType("struct irq_desc") + +def irq_settings_is_hidden(desc): + return desc['status_use_accessors'] & constants.LX_IRQ_HIDDEN + +def irq_desc_is_chained(desc): + return desc['action'] and desc['action'] == gdb.parse_and_eval("&chained_action") + +def irqd_is_level(desc): + return desc['irq_data']['common']['state_use_accessors'] & constants.LX_IRQD_LEVEL + +def show_irq_desc(prec, irq): + text = "" + + desc = radixtree.lookup(gdb.parse_and_eval("&irq_desc_tree"), irq) + if desc is None: + return text + + desc = desc.cast(irq_desc_type.get_type()) + if desc is None: + return text + + if irq_settings_is_hidden(desc): + return text + + any_count = 0 + if desc['kstat_irqs']: + for cpu in cpus.each_online_cpu(): + any_count += cpus.per_cpu(desc['kstat_irqs'], cpu) + + if (desc['action'] == 0 or irq_desc_is_chained(desc)) and any_count == 0: + return text; + + text += "%*d: " % (prec, irq) + for cpu in cpus.each_online_cpu(): + if desc['kstat_irqs']: + count = cpus.per_cpu(desc['kstat_irqs'], cpu) + else: + count = 0 + text += "%10u" % (count) + + name = "None" + if desc['irq_data']['chip']: + chip = desc['irq_data']['chip'] + if chip['name']: + name = chip['name'].string() + else: + name = "-" + + text += " %8s" % (name) + + if desc['irq_data']['domain']: + text += " %*lu" % (prec, desc['irq_data']['hwirq']) + else: + text += " %*s" % (prec, "") + + if constants.LX_CONFIG_GENERIC_IRQ_SHOW_LEVEL: + text += " %-8s" % ("Level" if irqd_is_level(desc) else "Edge") + + if desc['name']: + text += "-%-8s" % (desc['name'].string()) + + """ Some toolchains may not be able to provide information about irqaction """ + try: + gdb.lookup_type("struct irqaction") + action = desc['action'] + if action is not None: + text += " %s" % (action['name'].string()) + while True: + action = action['next'] + if action is not None: + break + if action['name']: + text += ", %s" % (action['name'].string()) + except: + pass + + text += "\n" + + return text + +def show_irq_err_count(prec): + cnt = utils.gdb_eval_or_none("irq_err_count") + text = "" + if cnt is not None: + text += "%*s: %10u\n" % (prec, "ERR", cnt['counter']) + return text + +def x86_show_irqstat(prec, pfx, field, desc): + irq_stat = gdb.parse_and_eval("&irq_stat") + text = "%*s: " % (prec, pfx) + for cpu in cpus.each_online_cpu(): + stat = cpus.per_cpu(irq_stat, cpu) + text += "%10u " % (stat[field]) + text += " %s\n" % (desc) + return text + +def x86_show_mce(prec, var, pfx, desc): + pvar = gdb.parse_and_eval(var) + text = "%*s: " % (prec, pfx) + for cpu in cpus.each_online_cpu(): + text += "%10u " % (cpus.per_cpu(pvar, cpu)) + text += " %s\n" % (desc) + return text + +def x86_show_interupts(prec): + text = x86_show_irqstat(prec, "NMI", '__nmi_count', 'Non-maskable interrupts') + + if constants.LX_CONFIG_X86_LOCAL_APIC: + text += x86_show_irqstat(prec, "LOC", 'apic_timer_irqs', "Local timer interrupts") + text += x86_show_irqstat(prec, "SPU", 'irq_spurious_count', "Spurious interrupts") + text += x86_show_irqstat(prec, "PMI", 'apic_perf_irqs', "Performance monitoring interrupts") + text += x86_show_irqstat(prec, "IWI", 'apic_irq_work_irqs', "IRQ work interrupts") + text += x86_show_irqstat(prec, "RTR", 'icr_read_retry_count', "APIC ICR read retries") + if utils.gdb_eval_or_none("x86_platform_ipi_callback") is not None: + text += x86_show_irqstat(prec, "PLT", 'x86_platform_ipis', "Platform interrupts") + + if constants.LX_CONFIG_SMP: + text += x86_show_irqstat(prec, "RES", 'irq_resched_count', "Rescheduling interrupts") + text += x86_show_irqstat(prec, "CAL", 'irq_call_count', "Function call interrupts") + text += x86_show_irqstat(prec, "TLB", 'irq_tlb_count', "TLB shootdowns") + + if constants.LX_CONFIG_X86_THERMAL_VECTOR: + text += x86_show_irqstat(prec, "TRM", 'irq_thermal_count', "Thermal events interrupts") + + if constants.LX_CONFIG_X86_MCE_THRESHOLD: + text += x86_show_irqstat(prec, "THR", 'irq_threshold_count', "Threshold APIC interrupts") + + if constants.LX_CONFIG_X86_MCE_AMD: + text += x86_show_irqstat(prec, "DFR", 'irq_deferred_error_count', "Deferred Error APIC interrupts") + + if constants.LX_CONFIG_X86_MCE: + text += x86_show_mce(prec, "&mce_exception_count", "MCE", "Machine check exceptions") + text == x86_show_mce(prec, "&mce_poll_count", "MCP", "Machine check polls") + + text += show_irq_err_count(prec) + + if constants.LX_CONFIG_X86_IO_APIC: + cnt = utils.gdb_eval_or_none("irq_mis_count") + if cnt is not None: + text += "%*s: %10u\n" % (prec, "MIS", cnt['counter']) + + if constants.LX_CONFIG_HAVE_KVM: + text += x86_show_irqstat(prec, "PIN", 'kvm_posted_intr_ipis', 'Posted-interrupt notification event') + text += x86_show_irqstat(prec, "NPI", 'kvm_posted_intr_nested_ipis', 'Nested posted-interrupt event') + text += x86_show_irqstat(prec, "PIW", 'kvm_posted_intr_wakeup_ipis', 'Posted-interrupt wakeup event') + + return text + +def arm_common_show_interrupts(prec): + text = "" + nr_ipi = utils.gdb_eval_or_none("nr_ipi") + ipi_desc = utils.gdb_eval_or_none("ipi_desc") + ipi_types = utils.gdb_eval_or_none("ipi_types") + if nr_ipi is None or ipi_desc is None or ipi_types is None: + return text + + if prec >= 4: + sep = " " + else: + sep = "" + + for ipi in range(nr_ipi): + text += "%*s%u:%s" % (prec - 1, "IPI", ipi, sep) + desc = ipi_desc[ipi].cast(irq_desc_type.get_type().pointer()) + if desc == 0: + continue + for cpu in cpus.each_online_cpu(): + text += "%10u" % (cpus.per_cpu(desc['kstat_irqs'], cpu)) + text += " %s" % (ipi_types[ipi].string()) + text += "\n" + return text + +def aarch64_show_interrupts(prec): + text = arm_common_show_interrupts(prec) + text += "%*s: %10lu\n" % (prec, "ERR", gdb.parse_and_eval("irq_err_count")) + return text + +def arch_show_interrupts(prec): + text = "" + if utils.is_target_arch("x86"): + text += x86_show_interupts(prec) + elif utils.is_target_arch("aarch64"): + text += aarch64_show_interrupts(prec) + elif utils.is_target_arch("arm"): + text += arm_common_show_interrupts(prec) + elif utils.is_target_arch("mips"): + text += show_irq_err_count(prec) + else: + raise gdb.GdbError("Unsupported architecture: {}".format(target_arch)) + + return text + +class LxInterruptList(gdb.Command): + """Print /proc/interrupts""" + + def __init__(self): + super(LxInterruptList, self).__init__("lx-interruptlist", gdb.COMMAND_DATA) + + def invoke(self, arg, from_tty): + nr_irqs = gdb.parse_and_eval("nr_irqs") + prec = 3 + j = 1000 + while prec < 10 and j <= nr_irqs: + prec += 1 + j *= 10 + + gdb.write("%*s" % (prec + 8, "")) + for cpu in cpus.each_online_cpu(): + gdb.write("CPU%-8d" % cpu) + gdb.write("\n") + + if utils.gdb_eval_or_none("&irq_desc_tree") is None: + return + + for irq in range(nr_irqs): + gdb.write(show_irq_desc(prec, irq)) + gdb.write(arch_show_interrupts(prec)) + + +LxInterruptList() diff --git a/scripts/gdb/vmlinux-gdb.py b/scripts/gdb/vmlinux-gdb.py index 2f57adcf3dff..2a72f91059b5 100644 --- a/scripts/gdb/vmlinux-gdb.py +++ b/scripts/gdb/vmlinux-gdb.py @@ -42,3 +42,4 @@ else: import linux.device import linux.mm import linux.radixtree + import linux.interrupts From 29692fc92c5b6a2c7cfe6f588ef68272e3343647 Mon Sep 17 00:00:00 2001 From: Amjad Ouled-Ameur Date: Thu, 6 Apr 2023 15:12:17 -0700 Subject: [PATCH 38/50] scripts/gdb: timerlist: convert int chunks to str join() expects strings but integers are given. Convert chunks list to strings before passing it to join() Link: https://lkml.kernel.org/r/20230406221217.1585486-4-f.fainelli@gmail.com Signed-off-by: Amjad Ouled-Ameur Signed-by: Florian Fainelli Tested-by: Florian Fainelli Cc: Jan Kiszka Signed-off-by: Andrew Morton --- scripts/gdb/linux/timerlist.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/gdb/linux/timerlist.py b/scripts/gdb/linux/timerlist.py index 249f0e804b24..64bc87191003 100644 --- a/scripts/gdb/linux/timerlist.py +++ b/scripts/gdb/linux/timerlist.py @@ -174,7 +174,7 @@ def pr_cpumask(mask): if 0 < extra <= 4: chunks[0] = chunks[0][0] # Cut off the first 0 - return "".join(chunks) + return "".join(str(chunks)) class LxTimerList(gdb.Command): From a3b2aeac9d154e5e15ddbf19de934c0c606b6acd Mon Sep 17 00:00:00 2001 From: Yang Yang Date: Sat, 8 Apr 2023 17:28:35 +0800 Subject: [PATCH 39/50] delayacct: track delays from IRQ/SOFTIRQ Delay accounting does not track the delay of IRQ/SOFTIRQ. While IRQ/SOFTIRQ could have obvious impact on some workloads productivity, such as when workloads are running on system which is busy handling network IRQ/SOFTIRQ. Get the delay of IRQ/SOFTIRQ could help users to reduce such delay. Such as setting interrupt affinity or task affinity, using kernel thread for NAPI etc. This is inspired by "sched/psi: Add PSI_IRQ to track IRQ/SOFTIRQ pressure"[1]. Also fix some code indent problems of older code. And update tools/accounting/getdelays.c: / # ./getdelays -p 156 -di print delayacct stats ON printing IO accounting PID 156 CPU count real total virtual total delay total delay average 15 15836008 16218149 275700790 18.380ms IO count delay total delay average 0 0 0.000ms SWAP count delay total delay average 0 0 0.000ms RECLAIM count delay total delay average 0 0 0.000ms THRASHING count delay total delay average 0 0 0.000ms COMPACT count delay total delay average 0 0 0.000ms WPCOPY count delay total delay average 36 7586118 0.211ms IRQ count delay total delay average 42 929161 0.022ms [1] commit 52b1364ba0b1("sched/psi: Add PSI_IRQ to track IRQ/SOFTIRQ pressure") Link: https://lkml.kernel.org/r/202304081728353557233@zte.com.cn Signed-off-by: Yang Yang Cc: Jiang Xuexin Cc: wangyong Cc: junhua huang Cc: Balbir Singh Cc: Ingo Molnar Cc: Jonathan Corbet Cc: Juri Lelli Cc: Peter Zijlstra Signed-off-by: Andrew Morton --- Documentation/accounting/delay-accounting.rst | 7 +++-- include/linux/delayacct.h | 15 ++++++++++ include/uapi/linux/taskstats.h | 6 +++- kernel/delayacct.c | 14 +++++++++ kernel/sched/core.c | 1 + tools/accounting/getdelays.c | 30 +++++++++++-------- 6 files changed, 58 insertions(+), 15 deletions(-) diff --git a/Documentation/accounting/delay-accounting.rst b/Documentation/accounting/delay-accounting.rst index 79f537c9f160..f61c01fc376e 100644 --- a/Documentation/accounting/delay-accounting.rst +++ b/Documentation/accounting/delay-accounting.rst @@ -16,6 +16,7 @@ d) memory reclaim e) thrashing f) direct compact g) write-protect copy +h) IRQ/SOFTIRQ and makes these statistics available to userspace through the taskstats interface. @@ -49,7 +50,7 @@ this structure. See for a description of the fields pertaining to delay accounting. It will generally be in the form of counters returning the cumulative delay seen for cpu, sync block I/O, swapin, memory reclaim, thrash page -cache, direct compact, write-protect copy etc. +cache, direct compact, write-protect copy, IRQ/SOFTIRQ etc. Taking the difference of two successive readings of a given counter (say cpu_delay_total) for a task will give the delay @@ -118,7 +119,9 @@ Get sum of delays, since system boot, for all pids with tgid 5:: 0 0 0.000ms COMPACT count delay total delay average 0 0 0.000ms - WPCOPY count delay total delay average + WPCOPY count delay total delay average + 0 0 0.000ms + IRQ count delay total delay average 0 0 0.000ms Get IO accounting for pid 1, it works only with -p:: diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h index 0da97dba9ef8..6639f48dac36 100644 --- a/include/linux/delayacct.h +++ b/include/linux/delayacct.h @@ -48,10 +48,13 @@ struct task_delay_info { u64 wpcopy_start; u64 wpcopy_delay; /* wait for write-protect copy */ + u64 irq_delay; /* wait for IRQ/SOFTIRQ */ + u32 freepages_count; /* total count of memory reclaim */ u32 thrashing_count; /* total count of thrash waits */ u32 compact_count; /* total count of memory compact */ u32 wpcopy_count; /* total count of write-protect copy */ + u32 irq_count; /* total count of IRQ/SOFTIRQ */ }; #endif @@ -81,6 +84,7 @@ extern void __delayacct_compact_start(void); extern void __delayacct_compact_end(void); extern void __delayacct_wpcopy_start(void); extern void __delayacct_wpcopy_end(void); +extern void __delayacct_irq(struct task_struct *task, u32 delta); static inline void delayacct_tsk_init(struct task_struct *tsk) { @@ -215,6 +219,15 @@ static inline void delayacct_wpcopy_end(void) __delayacct_wpcopy_end(); } +static inline void delayacct_irq(struct task_struct *task, u32 delta) +{ + if (!static_branch_unlikely(&delayacct_key)) + return; + + if (task->delays) + __delayacct_irq(task, delta); +} + #else static inline void delayacct_init(void) {} @@ -253,6 +266,8 @@ static inline void delayacct_wpcopy_start(void) {} static inline void delayacct_wpcopy_end(void) {} +static inline void delayacct_irq(struct task_struct *task, u32 delta) +{} #endif /* CONFIG_TASK_DELAY_ACCT */ diff --git a/include/uapi/linux/taskstats.h b/include/uapi/linux/taskstats.h index a7f5b11a8f1b..b50b2eb257a0 100644 --- a/include/uapi/linux/taskstats.h +++ b/include/uapi/linux/taskstats.h @@ -34,7 +34,7 @@ */ -#define TASKSTATS_VERSION 13 +#define TASKSTATS_VERSION 14 #define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN * in linux/sched.h */ @@ -198,6 +198,10 @@ struct taskstats { /* v13: Delay waiting for write-protect copy */ __u64 wpcopy_count; __u64 wpcopy_delay_total; + + /* v14: Delay waiting for IRQ/SOFTIRQ */ + __u64 irq_count; + __u64 irq_delay_total; }; diff --git a/kernel/delayacct.c b/kernel/delayacct.c index e39cb696cfbd..6f0c358e73d8 100644 --- a/kernel/delayacct.c +++ b/kernel/delayacct.c @@ -179,12 +179,15 @@ int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) d->compact_delay_total = (tmp < d->compact_delay_total) ? 0 : tmp; tmp = d->wpcopy_delay_total + tsk->delays->wpcopy_delay; d->wpcopy_delay_total = (tmp < d->wpcopy_delay_total) ? 0 : tmp; + tmp = d->irq_delay_total + tsk->delays->irq_delay; + d->irq_delay_total = (tmp < d->irq_delay_total) ? 0 : tmp; d->blkio_count += tsk->delays->blkio_count; d->swapin_count += tsk->delays->swapin_count; d->freepages_count += tsk->delays->freepages_count; d->thrashing_count += tsk->delays->thrashing_count; d->compact_count += tsk->delays->compact_count; d->wpcopy_count += tsk->delays->wpcopy_count; + d->irq_count += tsk->delays->irq_count; raw_spin_unlock_irqrestore(&tsk->delays->lock, flags); return 0; @@ -274,3 +277,14 @@ void __delayacct_wpcopy_end(void) ¤t->delays->wpcopy_delay, ¤t->delays->wpcopy_count); } + +void __delayacct_irq(struct task_struct *task, u32 delta) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&task->delays->lock, flags); + task->delays->irq_delay += delta; + task->delays->irq_count++; + raw_spin_unlock_irqrestore(&task->delays->lock, flags); +} + diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 0d18c3969f90..5473e831daf3 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -704,6 +704,7 @@ static void update_rq_clock_task(struct rq *rq, s64 delta) rq->prev_irq_time += irq_delta; delta -= irq_delta; psi_account_irqtime(rq->curr, irq_delta); + delayacct_irq(rq->curr, irq_delta); #endif #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING if (static_key_false((¶virt_steal_rq_enabled))) { diff --git a/tools/accounting/getdelays.c b/tools/accounting/getdelays.c index 23a15d8f2bf4..1334214546d7 100644 --- a/tools/accounting/getdelays.c +++ b/tools/accounting/getdelays.c @@ -198,17 +198,19 @@ static void print_delayacct(struct taskstats *t) printf("\n\nCPU %15s%15s%15s%15s%15s\n" " %15llu%15llu%15llu%15llu%15.3fms\n" "IO %15s%15s%15s\n" - " %15llu%15llu%15.3fms\n" + " %15llu%15llu%15.3fms\n" "SWAP %15s%15s%15s\n" - " %15llu%15llu%15.3fms\n" + " %15llu%15llu%15.3fms\n" "RECLAIM %12s%15s%15s\n" - " %15llu%15llu%15.3fms\n" + " %15llu%15llu%15.3fms\n" "THRASHING%12s%15s%15s\n" - " %15llu%15llu%15.3fms\n" + " %15llu%15llu%15.3fms\n" "COMPACT %12s%15s%15s\n" - " %15llu%15llu%15.3fms\n" + " %15llu%15llu%15.3fms\n" "WPCOPY %12s%15s%15s\n" - " %15llu%15llu%15.3fms\n", + " %15llu%15llu%15.3fms\n" + "IRQ %15s%15s%15s\n" + " %15llu%15llu%15.3fms\n", "count", "real total", "virtual total", "delay total", "delay average", (unsigned long long)t->cpu_count, @@ -219,27 +221,31 @@ static void print_delayacct(struct taskstats *t) "count", "delay total", "delay average", (unsigned long long)t->blkio_count, (unsigned long long)t->blkio_delay_total, - average_ms((double)t->blkio_delay_total, t->blkio_count), + average_ms((double)t->blkio_delay_total, t->blkio_count), "count", "delay total", "delay average", (unsigned long long)t->swapin_count, (unsigned long long)t->swapin_delay_total, - average_ms((double)t->swapin_delay_total, t->swapin_count), + average_ms((double)t->swapin_delay_total, t->swapin_count), "count", "delay total", "delay average", (unsigned long long)t->freepages_count, (unsigned long long)t->freepages_delay_total, - average_ms((double)t->freepages_delay_total, t->freepages_count), + average_ms((double)t->freepages_delay_total, t->freepages_count), "count", "delay total", "delay average", (unsigned long long)t->thrashing_count, (unsigned long long)t->thrashing_delay_total, - average_ms((double)t->thrashing_delay_total, t->thrashing_count), + average_ms((double)t->thrashing_delay_total, t->thrashing_count), "count", "delay total", "delay average", (unsigned long long)t->compact_count, (unsigned long long)t->compact_delay_total, - average_ms((double)t->compact_delay_total, t->compact_count), + average_ms((double)t->compact_delay_total, t->compact_count), "count", "delay total", "delay average", (unsigned long long)t->wpcopy_count, (unsigned long long)t->wpcopy_delay_total, - average_ms((double)t->wpcopy_delay_total, t->wpcopy_count)); + average_ms((double)t->wpcopy_delay_total, t->wpcopy_count), + "count", "delay total", "delay average", + (unsigned long long)t->irq_count, + (unsigned long long)t->irq_delay_total, + average_ms((double)t->irq_delay_total, t->irq_count)); } static void task_context_switch_counts(struct taskstats *t) From 31088f6f7906253ef4577f6a9b84e2d42447dba0 Mon Sep 17 00:00:00 2001 From: Kevin Brodsky Date: Tue, 11 Apr 2023 10:27:47 +0100 Subject: [PATCH 40/50] uapi/linux/const.h: prefer ISO-friendly __typeof__ typeof is (still) a GNU extension, which means that it cannot be used when building ISO C (e.g. -std=c99). It should therefore be avoided in uapi headers in favour of the ISO-friendly __typeof__. Unfortunately this issue could not be detected by CONFIG_UAPI_HEADER_TEST=y as the __ALIGN_KERNEL() macro is not expanded in any uapi header. This matters from a userspace perspective, not a kernel one. uapi headers and their contents are expected to be usable in a variety of situations, and in particular when building ISO C applications (with -std=c99 or similar). This particular problem can be reproduced by trying to use the __ALIGN_KERNEL macro directly in application code, say: #include int align(int x, int a) { return __KERNEL_ALIGN(x, a); } and trying to build that with -std=c99. Link: https://lkml.kernel.org/r/20230411092747.3759032-1-kevin.brodsky@arm.com Fixes: a79ff731a1b2 ("netfilter: xtables: make XT_ALIGN() usable in exported headers by exporting __ALIGN_KERNEL()") Signed-off-by: Kevin Brodsky Reported-by: Ruben Ayrapetyan Tested-by: Ruben Ayrapetyan Reviewed-by: Petr Vorel Tested-by: Petr Vorel Reviewed-by: Masahiro Yamada Cc: Sam Ravnborg Signed-off-by: Andrew Morton --- include/uapi/linux/const.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/const.h b/include/uapi/linux/const.h index af2a44c08683..a429381e7ca5 100644 --- a/include/uapi/linux/const.h +++ b/include/uapi/linux/const.h @@ -28,7 +28,7 @@ #define _BITUL(x) (_UL(1) << (x)) #define _BITULL(x) (_ULL(1) << (x)) -#define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1) +#define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (__typeof__(x))(a) - 1) #define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask)) #define __KERNEL_DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) From f4efbdaf59e959507a1a931ec4afecdfb09db76e Mon Sep 17 00:00:00 2001 From: Glenn Washburn Date: Tue, 28 Feb 2023 18:53:34 -0600 Subject: [PATCH 41/50] scripts/gdb: create linux/vfs.py for VFS related GDB helpers Patch series "GDB VFS utils". I've created a couple GDB convenience functions that I found useful when debugging some VFS issues and figure others might find them useful. For instance, they are useful in setting conditional breakpoints on VFS functions where you only care if the dentry path is a certain value. I took the opportunity to create a new "vfs" python module to give VFS related utilities a home. This patch (of 2): This will allow for more VFS specific GDB helpers to be collected in one place. Move utils.dentry_name into the vfs modules. Also a local variable in proc.py was changed from vfs to mnt to prevent a naming collision with the new vfs module. [akpm@linux-foundation.org: add SPDX-License-Identifier] Link: https://lkml.kernel.org/r/cover.1677631565.git.development@efficientek.com Link: https://lkml.kernel.org/r/7bba4c065a8c2c47f1fc5b03a7278005b04db251.1677631565.git.development@efficientek.com Signed-off-by: Glenn Washburn Cc: Alexander Viro Cc: Antonio Borneo Cc: Jan Kiszka Cc: John Ogness Cc: Kieran Bingham Cc: Petr Mladek Signed-off-by: Andrew Morton --- scripts/gdb/linux/proc.py | 16 +++++++++------- scripts/gdb/linux/utils.py | 8 -------- scripts/gdb/linux/vfs.py | 22 ++++++++++++++++++++++ scripts/gdb/vmlinux-gdb.py | 1 + 4 files changed, 32 insertions(+), 15 deletions(-) create mode 100644 scripts/gdb/linux/vfs.py diff --git a/scripts/gdb/linux/proc.py b/scripts/gdb/linux/proc.py index 09cd871925a5..43c687e7a69d 100644 --- a/scripts/gdb/linux/proc.py +++ b/scripts/gdb/linux/proc.py @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # gdb helper commands and functions for Linux kernel debugging # @@ -16,6 +17,7 @@ from linux import constants from linux import utils from linux import tasks from linux import lists +from linux import vfs from struct import * @@ -170,16 +172,16 @@ values of that process namespace""" gdb.write("{:^18} {:^15} {:>9} {} {} options\n".format( "mount", "super_block", "devname", "pathname", "fstype")) - for vfs in lists.list_for_each_entry(namespace['list'], + for mnt in lists.list_for_each_entry(namespace['list'], mount_ptr_type, "mnt_list"): - devname = vfs['mnt_devname'].string() + devname = mnt['mnt_devname'].string() devname = devname if devname else "none" pathname = "" - parent = vfs + parent = mnt while True: mntpoint = parent['mnt_mountpoint'] - pathname = utils.dentry_name(mntpoint) + pathname + pathname = vfs.dentry_name(mntpoint) + pathname if (parent == parent['mnt_parent']): break parent = parent['mnt_parent'] @@ -187,14 +189,14 @@ values of that process namespace""" if (pathname == ""): pathname = "/" - superblock = vfs['mnt']['mnt_sb'] + superblock = mnt['mnt']['mnt_sb'] fstype = superblock['s_type']['name'].string() s_flags = int(superblock['s_flags']) - m_flags = int(vfs['mnt']['mnt_flags']) + m_flags = int(mnt['mnt']['mnt_flags']) rd = "ro" if (s_flags & constants.LX_SB_RDONLY) else "rw" gdb.write("{} {} {} {} {} {}{}{} 0 0\n".format( - vfs.format_string(), superblock.format_string(), devname, + mnt.format_string(), superblock.format_string(), devname, pathname, fstype, rd, info_opts(FS_INFO, s_flags), info_opts(MNT_INFO, m_flags))) diff --git a/scripts/gdb/linux/utils.py b/scripts/gdb/linux/utils.py index 7f36aee32ac6..9f44df13761e 100644 --- a/scripts/gdb/linux/utils.py +++ b/scripts/gdb/linux/utils.py @@ -196,11 +196,3 @@ def gdb_eval_or_none(expresssion): return gdb.parse_and_eval(expresssion) except gdb.error: return None - - -def dentry_name(d): - parent = d['d_parent'] - if parent == d or parent == 0: - return "" - p = dentry_name(d['d_parent']) + "/" - return p + d['d_iname'].string() diff --git a/scripts/gdb/linux/vfs.py b/scripts/gdb/linux/vfs.py new file mode 100644 index 000000000000..62d4f9ad7d79 --- /dev/null +++ b/scripts/gdb/linux/vfs.py @@ -0,0 +1,22 @@ +# +# gdb helper commands and functions for Linux kernel debugging +# +# VFS tools +# +# Copyright (c) 2023 Glenn Washburn +# Copyright (c) 2016 Linaro Ltd +# +# Authors: +# Glenn Washburn +# Kieran Bingham +# +# This work is licensed under the terms of the GNU GPL version 2. +# + + +def dentry_name(d): + parent = d['d_parent'] + if parent == d or parent == 0: + return "" + p = dentry_name(d['d_parent']) + "/" + return p + d['d_iname'].string() diff --git a/scripts/gdb/vmlinux-gdb.py b/scripts/gdb/vmlinux-gdb.py index 2a72f91059b5..2d32308c3f7a 100644 --- a/scripts/gdb/vmlinux-gdb.py +++ b/scripts/gdb/vmlinux-gdb.py @@ -40,6 +40,7 @@ else: import linux.clk import linux.genpd import linux.device + import linux.vfs import linux.mm import linux.radixtree import linux.interrupts From 5a10562bdeb58006de4b1cd5f671b7da26b20bb3 Mon Sep 17 00:00:00 2001 From: Glenn Washburn Date: Tue, 28 Feb 2023 18:53:35 -0600 Subject: [PATCH 42/50] scripts/gdb: add GDB convenience functions $lx_dentry_name() and $lx_i_dentry() $lx_dentry_name() generates a full VFS path from a given dentry pointer, and $lx_i_dentry() returns the dentry pointer associated with the given inode pointer, if there is one. Link: https://lkml.kernel.org/r/c9a5ad8efbfbd2cc6559e082734eed7628f43a16.1677631565.git.development@efficientek.com Signed-off-by: Glenn Washburn Cc: Alexander Viro Cc: Antonio Borneo Cc: Jan Kiszka Cc: John Ogness Cc: Kieran Bingham Cc: Petr Mladek Signed-off-by: Andrew Morton --- scripts/gdb/linux/vfs.py | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/scripts/gdb/linux/vfs.py b/scripts/gdb/linux/vfs.py index 62d4f9ad7d79..c77b9ce75f6d 100644 --- a/scripts/gdb/linux/vfs.py +++ b/scripts/gdb/linux/vfs.py @@ -13,6 +13,9 @@ # This work is licensed under the terms of the GNU GPL version 2. # +import gdb +from linux import utils + def dentry_name(d): parent = d['d_parent'] @@ -20,3 +23,37 @@ def dentry_name(d): return "" p = dentry_name(d['d_parent']) + "/" return p + d['d_iname'].string() + +class DentryName(gdb.Function): + """Return string of the full path of a dentry. + +$lx_dentry_name(PTR): Given PTR to a dentry struct, return a string +of the full path of the dentry.""" + + def __init__(self): + super(DentryName, self).__init__("lx_dentry_name") + + def invoke(self, dentry_ptr): + return dentry_name(dentry_ptr) + +DentryName() + + +dentry_type = utils.CachedType("struct dentry") + +class InodeDentry(gdb.Function): + """Return dentry pointer for inode. + +$lx_i_dentry(PTR): Given PTR to an inode struct, return a pointer to +the associated dentry struct, if there is one.""" + + def __init__(self): + super(InodeDentry, self).__init__("lx_i_dentry") + + def invoke(self, inode_ptr): + d_u = inode_ptr["i_dentry"]["first"] + if d_u == 0: + return "" + return utils.container_of(d_u, dentry_type.get_type().pointer(), "d_u") + +InodeDentry() From d4cb626d6f3e86121e08ce1492836f37f91858dd Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Tue, 11 Apr 2023 16:41:59 -0700 Subject: [PATCH 43/50] epoll: rename global epmutex As of 4f04cbaf128 ("epoll: use refcount to reduce ep_mutex contention"), this lock is now specific to nesting cases - inserting an epoll fd onto another epoll fd. Rename the lock to be less generic. Link: https://lkml.kernel.org/r/20230411234159.20421-1-dave@stgolabs.net Signed-off-by: Davidlohr Bueso Cc: Paolo Abeni Cc: Eric Dumazet Signed-off-by: Andrew Morton --- fs/eventpoll.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 0ecdfd3043a3..98134f0af6a8 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -43,7 +43,7 @@ * LOCKING: * There are three level of locking required by epoll : * - * 1) epmutex (mutex) + * 1) epnested_mutex (mutex) * 2) ep->mtx (mutex) * 3) ep->lock (rwlock) * @@ -57,8 +57,8 @@ * we need a lock that will allow us to sleep. This lock is a * mutex (ep->mtx). It is acquired during the event transfer loop, * during epoll_ctl(EPOLL_CTL_DEL) and during eventpoll_release_file(). - * The epmutex is acquired when inserting an epoll fd onto another epoll - * fd. We do this so that we walk the epoll tree and ensure that this + * The epnested_mutex is acquired when inserting an epoll fd onto another + * epoll fd. We do this so that we walk the epoll tree and ensure that this * insertion does not create a cycle of epoll file descriptors, which * could lead to deadlock. We need a global mutex to prevent two * simultaneous inserts (A into B and B into A) from racing and @@ -74,9 +74,9 @@ * of epoll file descriptors, we use the current recursion depth as * the lockdep subkey. * It is possible to drop the "ep->mtx" and to use the global - * mutex "epmutex" (together with "ep->lock") to have it working, + * mutex "epnested_mutex" (together with "ep->lock") to have it working, * but having "ep->mtx" will make the interface more scalable. - * Events that require holding "epmutex" are very rare, while for + * Events that require holding "epnested_mutex" are very rare, while for * normal operations the epoll private "ep->mtx" will guarantee * a better scalability. */ @@ -248,7 +248,7 @@ struct ep_pqueue { static long max_user_watches __read_mostly; /* Used for cycles detection */ -static DEFINE_MUTEX(epmutex); +static DEFINE_MUTEX(epnested_mutex); static u64 loop_check_gen = 0; @@ -263,7 +263,7 @@ static struct kmem_cache *pwq_cache __read_mostly; /* * List of files with newly added links, where we may need to limit the number - * of emanating paths. Protected by the epmutex. + * of emanating paths. Protected by the epnested_mutex. */ struct epitems_head { struct hlist_head epitems; @@ -1337,7 +1337,7 @@ static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi) * is connected to n file sources. In this case each file source has 1 path * of length 1. Thus, the numbers below should be more than sufficient. These * path limits are enforced during an EPOLL_CTL_ADD operation, since a modify - * and delete can't add additional paths. Protected by the epmutex. + * and delete can't add additional paths. Protected by the epnested_mutex. */ static const int path_limits[PATH_ARR_SIZE] = { 1000, 500, 100, 50, 10 }; static int path_count[PATH_ARR_SIZE]; @@ -2167,7 +2167,7 @@ int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds, * We do not need to take the global 'epumutex' on EPOLL_CTL_ADD when * the epoll file descriptor is attaching directly to a wakeup source, * unless the epoll file descriptor is nested. The purpose of taking the - * 'epmutex' on add is to prevent complex toplogies such as loops and + * 'epnested_mutex' on add is to prevent complex toplogies such as loops and * deep wakeup paths from forming in parallel through multiple * EPOLL_CTL_ADD operations. */ @@ -2178,7 +2178,7 @@ int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds, if (READ_ONCE(f.file->f_ep) || ep->gen == loop_check_gen || is_file_epoll(tf.file)) { mutex_unlock(&ep->mtx); - error = epoll_mutex_lock(&epmutex, 0, nonblock); + error = epoll_mutex_lock(&epnested_mutex, 0, nonblock); if (error) goto error_tgt_fput; loop_check_gen++; @@ -2239,7 +2239,7 @@ error_tgt_fput: if (full_check) { clear_tfile_check_list(); loop_check_gen++; - mutex_unlock(&epmutex); + mutex_unlock(&epnested_mutex); } fdput(tf); From a04bb4c24a4846232a81047784d5eff2848e45bd Mon Sep 17 00:00:00 2001 From: Dmitry Rokosov Date: Tue, 4 Apr 2023 22:17:15 +0300 Subject: [PATCH 44/50] checkpatch: introduce proper bindings license check All headers from 'include/dt-bindings/' must be verified by checkpatch together with Documentation bindings, because all of them are part of the whole DT bindings system. The requirement is dual licensed and matching patterns: * Schemas: /GPL-2\.0(?:-only)? OR BSD-2-Clause/ * Headers: /GPL-2\.0(?:-only)? OR \S+/ Above patterns suggested by Rob at: https://lore.kernel.org/all/CAL_Jsq+-YJsBO+LuPJ=ZQ=eb-monrwzuCppvReH+af7hYZzNaQ@mail.gmail.com The issue was found during patch review: https://lore.kernel.org/all/20230313201259.19998-4-ddrokosov@sberdevices.ru/ Link: https://lkml.kernel.org/r/20230404191715.7319-1-ddrokosov@sberdevices.ru Signed-off-by: Dmitry Rokosov Reviewed-by: Rob Herring Cc: Andy Whitcroft Cc: Dwaipayan Ray Cc: Joe Perches Cc: Krzysztof Kozlowski Cc: Lukas Bulwahn Signed-off-by: Andrew Morton --- scripts/checkpatch.pl | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 3e6f5a8614d3..23bb211de4ce 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -3763,7 +3763,7 @@ sub process { "'$spdx_license' is not supported in LICENSES/...\n" . $herecurr); } if ($realfile =~ m@^Documentation/devicetree/bindings/@ && - not $spdx_license =~ /GPL-2\.0.*BSD-2-Clause/) { + $spdx_license !~ /GPL-2\.0(?:-only)? OR BSD-2-Clause/) { my $msg_level = \&WARN; $msg_level = \&CHK if ($file); if (&{$msg_level}("SPDX_LICENSE_TAG", @@ -3773,6 +3773,11 @@ sub process { $fixed[$fixlinenr] =~ s/SPDX-License-Identifier: .*/SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)/; } } + if ($realfile =~ m@^include/dt-bindings/@ && + $spdx_license !~ /GPL-2\.0(?:-only)? OR \S+/) { + WARN("SPDX_LICENSE_TAG", + "DT binding headers should be licensed (GPL-2.0-only OR .*)\n" . $herecurr); + } } } } From 3647ebcfbfca384840231fe13fae665453238a61 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sun, 16 Apr 2023 22:17:05 -0700 Subject: [PATCH 45/50] ia64: fix an addr to taddr in huge_pte_offset() I know nothing of ia64 htlbpage_to_page(), but guess that the p4d line should be using taddr rather than addr, like everywhere else. Link: https://lkml.kernel.org/r/732eae88-3beb-246-2c72-281de786740@google.com Fixes: c03ab9e32a2c ("ia64: add support for folded p4d page tables") Signed-off-by: Hugh Dickins Acked-by: Mike Rapoport (IBM) Cc: Ard Biesheuvel Cc: Signed-off-by: Andrew Morton --- arch/ia64/mm/hugetlbpage.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c index 380d2f3966c9..9e8960e49962 100644 --- a/arch/ia64/mm/hugetlbpage.c +++ b/arch/ia64/mm/hugetlbpage.c @@ -58,7 +58,7 @@ huge_pte_offset (struct mm_struct *mm, unsigned long addr, unsigned long sz) pgd = pgd_offset(mm, taddr); if (pgd_present(*pgd)) { - p4d = p4d_offset(pgd, addr); + p4d = p4d_offset(pgd, taddr); if (p4d_present(*p4d)) { pud = pud_offset(p4d, taddr); if (pud_present(*pud)) { From 522dc4e5f51e3d51c4ff55ad1c725d12176b71ea Mon Sep 17 00:00:00 2001 From: Chunguang Wu Date: Sun, 16 Apr 2023 13:24:04 +0800 Subject: [PATCH 46/50] fs/proc: add Kthread flag to /proc/$pid/status The command `ps -ef ` and `top -c` mark kernel thread by '[' and ']', but sometimes the result is not correct. The task->flags in /proc/$pid/stat is good, but we need remember the value of PF_KTHREAD is 0x00200000 and convert dec to hex. If we have no binary program and shell script which read /proc/$pid/stat, we can know it directly by `cat /proc/$pid/status`. Link: https://lkml.kernel.org/r/20230416052404.2920-1-fullspring2018@gmail.com Signed-off-by: Chunguang Wu Reviewed-by: Randy Dunlap Cc: Alexey Dobriyan Cc: Jonathan Corbet Signed-off-by: Andrew Morton --- Documentation/filesystems/proc.rst | 2 ++ fs/proc/array.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst index 9d5fd9424e8b..8a563684586c 100644 --- a/Documentation/filesystems/proc.rst +++ b/Documentation/filesystems/proc.rst @@ -179,6 +179,7 @@ read the file /proc/PID/status:: Gid: 100 100 100 100 FDSize: 256 Groups: 100 14 16 + Kthread: 0 VmPeak: 5004 kB VmSize: 5004 kB VmLck: 0 kB @@ -256,6 +257,7 @@ It's slow but very precise. NSpid descendant namespace process ID hierarchy NSpgid descendant namespace process group ID hierarchy NSsid descendant namespace session ID hierarchy + Kthread kernel thread flag, 1 is yes, 0 is no VmPeak peak virtual memory size VmSize total program size VmLck locked memory size diff --git a/fs/proc/array.c b/fs/proc/array.c index 9b0315d34c58..425824ad85e1 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -219,6 +219,8 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, seq_put_decimal_ull(m, "\t", task_session_nr_ns(p, pid->numbers[g].ns)); #endif seq_putc(m, '\n'); + + seq_printf(m, "Kthread:\t%c\n", p->flags & PF_KTHREAD ? '1' : '0'); } void render_sigset_t(struct seq_file *m, const char *header, From 09d49eb90fc8d03e1dab62dd7060389040f1d32b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 17 Apr 2023 22:56:24 +0200 Subject: [PATCH 47/50] ocfs2: reduce ioctl stack usage On 32-bit architectures with KASAN_STACK enabled, the total stack usage of the ocfs2_ioctl function grows beyond the warning limit: fs/ocfs2/ioctl.c: In function 'ocfs2_ioctl': fs/ocfs2/ioctl.c:934:1: error: the frame size of 1448 bytes is larger than 1400 bytes [-Werror=frame-larger-than=] Move each of the variables into a basic block, and mark ocfs2_info_handle() as noinline_for_stack, in order to have the variable share stack slots. Link: https://lkml.kernel.org/r/20230417205631.1956027-1-arnd@kernel.org Signed-off-by: Arnd Bergmann Reviewed-by: Joseph Qi Reviewed-by: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Signed-off-by: Andrew Morton --- fs/ocfs2/ioctl.c | 37 ++++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index 811a6ea374bb..b1550ba73f96 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c @@ -803,8 +803,8 @@ bail: * a better backward&forward compatibility, since a small piece of * request will be less likely to be broken if disk layout get changed. */ -static int ocfs2_info_handle(struct inode *inode, struct ocfs2_info *info, - int compat_flag) +static noinline_for_stack int +ocfs2_info_handle(struct inode *inode, struct ocfs2_info *info, int compat_flag) { int i, status = 0; u64 req_addr; @@ -840,27 +840,26 @@ bail: long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct inode *inode = file_inode(filp); - int new_clusters; - int status; - struct ocfs2_space_resv sr; - struct ocfs2_new_group_input input; - struct reflink_arguments args; - const char __user *old_path; - const char __user *new_path; - bool preserve; - struct ocfs2_info info; void __user *argp = (void __user *)arg; + int status; switch (cmd) { case OCFS2_IOC_RESVSP: case OCFS2_IOC_RESVSP64: case OCFS2_IOC_UNRESVSP: case OCFS2_IOC_UNRESVSP64: + { + struct ocfs2_space_resv sr; + if (copy_from_user(&sr, (int __user *) arg, sizeof(sr))) return -EFAULT; return ocfs2_change_file_space(filp, cmd, &sr); + } case OCFS2_IOC_GROUP_EXTEND: + { + int new_clusters; + if (!capable(CAP_SYS_RESOURCE)) return -EPERM; @@ -873,8 +872,12 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) status = ocfs2_group_extend(inode, new_clusters); mnt_drop_write_file(filp); return status; + } case OCFS2_IOC_GROUP_ADD: case OCFS2_IOC_GROUP_ADD64: + { + struct ocfs2_new_group_input input; + if (!capable(CAP_SYS_RESOURCE)) return -EPERM; @@ -887,7 +890,14 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) status = ocfs2_group_add(inode, &input); mnt_drop_write_file(filp); return status; + } case OCFS2_IOC_REFLINK: + { + struct reflink_arguments args; + const char __user *old_path; + const char __user *new_path; + bool preserve; + if (copy_from_user(&args, argp, sizeof(args))) return -EFAULT; old_path = (const char __user *)(unsigned long)args.old_path; @@ -895,11 +905,16 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) preserve = (args.preserve != 0); return ocfs2_reflink_ioctl(inode, old_path, new_path, preserve); + } case OCFS2_IOC_INFO: + { + struct ocfs2_info info; + if (copy_from_user(&info, argp, sizeof(struct ocfs2_info))) return -EFAULT; return ocfs2_info_handle(inode, &info, 0); + } case FITRIM: { struct super_block *sb = inode->i_sb; From 22ba509dd4dad053ad88fc387b5a74c2a9296a01 Mon Sep 17 00:00:00 2001 From: Oleksandr Natalenko Date: Wed, 19 Apr 2023 15:47:34 +0200 Subject: [PATCH 48/50] mailmap: add entry for Oleksandr Map my corporate email to my personal one. Link: https://lkml.kernel.org/r/20230419134734.454630-1-oleksandr@natalenko.name Signed-off-by: Oleksandr Natalenko Cc: Jakub Kicinski Cc: Konrad Dybcio Signed-off-by: Andrew Morton --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index e2af78f67f7c..0d8940d37bf4 100644 --- a/.mailmap +++ b/.mailmap @@ -354,6 +354,7 @@ Nicolas Pitre Nicolas Saenz Julienne Nicolas Saenz Julienne Niklas Söderlund +Oleksandr Natalenko Oleksij Rempel Oleksij Rempel Oleksij Rempel From 4f20b7471c57032860065591a17efd3325216bde Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 21 Apr 2023 16:54:24 +0200 Subject: [PATCH 49/50] libgcc: add forward declarations for generic library routines With W=1 on platforms that use the generic gcc library routines (csky/loongarch/mips/riscv/sh/xtensa): lib/ashldi3.c:9:19: warning: no previous prototype for '__ashldi3' [-Wmissing-prototypes] 9 | long long notrace __ashldi3(long long u, word_type b) | ^~~~~~~~~ CC lib/ashrdi3.o lib/ashrdi3.c:9:19: warning: no previous prototype for '__ashrdi3' [-Wmissing-prototypes] 9 | long long notrace __ashrdi3(long long u, word_type b) | ^~~~~~~~~ CC lib/cmpdi2.o lib/cmpdi2.c:9:19: warning: no previous prototype for '__cmpdi2' [-Wmissing-prototypes] 9 | word_type notrace __cmpdi2(long long a, long long b) | ^~~~~~~~ CC lib/lshrdi3.o lib/lshrdi3.c:9:19: warning: no previous prototype for '__lshrdi3' [-Wmissing-prototypes] 9 | long long notrace __lshrdi3(long long u, word_type b) | ^~~~~~~~~ CC lib/muldi3.o lib/muldi3.c:49:19: warning: no previous prototype for '__muldi3' [-Wmissing-prototypes] 49 | long long notrace __muldi3(long long u, long long v) | ^~~~~~~~ CC lib/ucmpdi2.o lib/ucmpdi2.c:8:19: warning: no previous prototype for '__ucmpdi2' [-Wmissing-prototypes] 8 | word_type notrace __ucmpdi2(unsigned long long a, unsigned long long b) | ^~~~~~~~~ Fix this by adding forward declarations to the common libgcc header file. Link: https://lkml.kernel.org/r/5cdbe08296693dd53849f199c3933e16e97b33c1.1682088593.git.geert+renesas@glider.be Signed-off-by: Geert Uytterhoeven Reported-by: kernel test robot Link: https://lore.kernel.org/oe-kbuild-all/202303272214.RxzpA6bP-lkp@intel.com/ Acked-by: Arnd Bergmann Cc: Chris Zankel Cc: Max Filippov Signed-off-by: Andrew Morton --- include/linux/libgcc.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/linux/libgcc.h b/include/linux/libgcc.h index b8dc75f0c830..fc388da6a027 100644 --- a/include/linux/libgcc.h +++ b/include/linux/libgcc.h @@ -27,4 +27,11 @@ typedef union { long long ll; } DWunion; +long long notrace __ashldi3(long long u, word_type b); +long long notrace __ashrdi3(long long u, word_type b); +word_type notrace __cmpdi2(long long a, long long b); +long long notrace __lshrdi3(long long u, word_type b); +long long notrace __muldi3(long long u, long long v); +word_type notrace __ucmpdi2(unsigned long long a, unsigned long long b); + #endif /* __ASM_LIBGCC_H */ From d88f2f72ca89ead8743ee15e547274ba248e7c59 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 24 Apr 2023 17:01:32 +1000 Subject: [PATCH 50/50] mailmap: add entries for Paul Mackerras Link: https://lkml.kernel.org/r/ZEYpTAufVHTvsO1n@cleo Signed-off-by: Paul Mackerras Signed-off-by: Andrew Morton --- .mailmap | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.mailmap b/.mailmap index 0d8940d37bf4..347d47e27da2 100644 --- a/.mailmap +++ b/.mailmap @@ -370,6 +370,8 @@ Paul E. McKenney Paul E. McKenney Paul E. McKenney Paul E. McKenney +Paul Mackerras +Paul Mackerras Peter A Jonsson Peter Oruba Peter Oruba