From 2a1b02bcba78f8498ab00d6142e1238d85b01591 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 20 May 2024 13:28:33 -1000 Subject: [PATCH 001/778] workqueue: Refactor worker ID formatting and make wq_worker_comm() use full ID string Currently, worker ID formatting is open coded in create_worker(), init_rescuer() and worker_thread() (for %WORKER_DIE case). The formatted ID is saved into task->comm and wq_worker_comm() uses it as the base name to append extra information to when generating the name to be shown to userspace. However, TASK_COMM_LEN is only 16 leading to badly truncated names for rescuers. For example, the rescuer for the inet_frag_wq workqueue becomes: $ ps -ef | grep '[k]worker/R-inet' root 483 2 0 Apr26 ? 00:00:00 [kworker/R-inet_] Even for non-rescue workers, it's easy to run over 15 characters on moderately large machines. Fit it by consolidating worker ID formatting into a new helper format_worker_id() and calling it from wq_worker_comm() to obtain the untruncated worker ID string. $ ps -ef | grep '[k]worker/R-inet' root 60 2 0 12:10 ? 00:00:00 [kworker/R-inet_frag_wq] Signed-off-by: Tejun Heo Reported-and-tested-by: Jan Engelhardt Suggested-by: Linus Torvalds --- kernel/workqueue.c | 51 ++++++++++++++++++++++++++++++---------------- 1 file changed, 34 insertions(+), 17 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 003474c9a77d..3fbaecfc88c2 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -125,6 +125,7 @@ enum wq_internal_consts { HIGHPRI_NICE_LEVEL = MIN_NICE, WQ_NAME_LEN = 32, + WORKER_ID_LEN = 10 + WQ_NAME_LEN, /* "kworker/R-" + WQ_NAME_LEN */ }; /* @@ -2742,6 +2743,26 @@ static void worker_detach_from_pool(struct worker *worker) complete(detach_completion); } +static int format_worker_id(char *buf, size_t size, struct worker *worker, + struct worker_pool *pool) +{ + if (worker->rescue_wq) + return scnprintf(buf, size, "kworker/R-%s", + worker->rescue_wq->name); + + if (pool) { + if (pool->cpu >= 0) + return scnprintf(buf, size, "kworker/%d:%d%s", + pool->cpu, worker->id, + pool->attrs->nice < 0 ? "H" : ""); + else + return scnprintf(buf, size, "kworker/u%d:%d", + pool->id, worker->id); + } else { + return scnprintf(buf, size, "kworker/dying"); + } +} + /** * create_worker - create a new workqueue worker * @pool: pool the new worker will belong to @@ -2758,7 +2779,6 @@ static struct worker *create_worker(struct worker_pool *pool) { struct worker *worker; int id; - char id_buf[23]; /* ID is needed to determine kthread name */ id = ida_alloc(&pool->worker_ida, GFP_KERNEL); @@ -2777,17 +2797,14 @@ static struct worker *create_worker(struct worker_pool *pool) worker->id = id; if (!(pool->flags & POOL_BH)) { - if (pool->cpu >= 0) - snprintf(id_buf, sizeof(id_buf), "%d:%d%s", pool->cpu, id, - pool->attrs->nice < 0 ? "H" : ""); - else - snprintf(id_buf, sizeof(id_buf), "u%d:%d", pool->id, id); + char id_buf[WORKER_ID_LEN]; + format_worker_id(id_buf, sizeof(id_buf), worker, pool); worker->task = kthread_create_on_node(worker_thread, worker, - pool->node, "kworker/%s", id_buf); + pool->node, "%s", id_buf); if (IS_ERR(worker->task)) { if (PTR_ERR(worker->task) == -EINTR) { - pr_err("workqueue: Interrupted when creating a worker thread \"kworker/%s\"\n", + pr_err("workqueue: Interrupted when creating a worker thread \"%s\"\n", id_buf); } else { pr_err_once("workqueue: Failed to create a worker thread: %pe", @@ -3350,7 +3367,6 @@ woke_up: raw_spin_unlock_irq(&pool->lock); set_pf_worker(false); - set_task_comm(worker->task, "kworker/dying"); ida_free(&pool->worker_ida, worker->id); worker_detach_from_pool(worker); WARN_ON_ONCE(!list_empty(&worker->entry)); @@ -5542,6 +5558,7 @@ static int wq_clamp_max_active(int max_active, unsigned int flags, static int init_rescuer(struct workqueue_struct *wq) { struct worker *rescuer; + char id_buf[WORKER_ID_LEN]; int ret; if (!(wq->flags & WQ_MEM_RECLAIM)) @@ -5555,7 +5572,9 @@ static int init_rescuer(struct workqueue_struct *wq) } rescuer->rescue_wq = wq; - rescuer->task = kthread_create(rescuer_thread, rescuer, "kworker/R-%s", wq->name); + format_worker_id(id_buf, sizeof(id_buf), rescuer, NULL); + + rescuer->task = kthread_create(rescuer_thread, rescuer, "%s", id_buf); if (IS_ERR(rescuer->task)) { ret = PTR_ERR(rescuer->task); pr_err("workqueue: Failed to create a rescuer kthread for wq \"%s\": %pe", @@ -6384,19 +6403,15 @@ void show_freezable_workqueues(void) /* used to show worker information through /proc/PID/{comm,stat,status} */ void wq_worker_comm(char *buf, size_t size, struct task_struct *task) { - int off; - - /* always show the actual comm */ - off = strscpy(buf, task->comm, size); - if (off < 0) - return; - /* stabilize PF_WQ_WORKER and worker pool association */ mutex_lock(&wq_pool_attach_mutex); if (task->flags & PF_WQ_WORKER) { struct worker *worker = kthread_data(task); struct worker_pool *pool = worker->pool; + int off; + + off = format_worker_id(buf, size, worker, pool); if (pool) { raw_spin_lock_irq(&pool->lock); @@ -6415,6 +6430,8 @@ void wq_worker_comm(char *buf, size_t size, struct task_struct *task) } raw_spin_unlock_irq(&pool->lock); } + } else { + strscpy(buf, task->comm, size); } mutex_unlock(&wq_pool_attach_mutex); From aad11473f8f4be3df86461081ce35ec5b145ba68 Mon Sep 17 00:00:00 2001 From: Dmitry Mastykin Date: Wed, 22 May 2024 10:45:24 +0300 Subject: [PATCH 002/778] NFSv4: Fix memory leak in nfs4_set_security_label We leak nfs_fattr and nfs4_label every time we set a security xattr. Signed-off-by: Dmitry Mastykin Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c93c12063b3a..94c07875aa3f 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6268,6 +6268,7 @@ nfs4_set_security_label(struct inode *inode, const void *buf, size_t buflen) if (status == 0) nfs_setsecurity(inode, fattr); + nfs_free_fattr(fattr); return status; } #endif /* CONFIG_NFS_V4_SECURITY_LABEL */ From 6cbe14f42be3b596e9590d48e12436982ba26e4b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 22 May 2024 11:27:32 -0400 Subject: [PATCH 003/778] MAINTAINERS: Change email address for Trond Myklebust Ensure that patches are sent to an email server that won't corrupt them. Signed-off-by: Trond Myklebust --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 28e20975c26f..481d76fbf16c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15549,7 +15549,7 @@ F: drivers/nfc/virtual_ncidev.c F: tools/testing/selftests/nci/ NFS, SUNRPC, AND LOCKD CLIENTS -M: Trond Myklebust +M: Trond Myklebust M: Anna Schumaker L: linux-nfs@vger.kernel.org S: Maintained From 134d0b3f2440cdddd12fc3444c9c0f62331ce6fc Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 21 May 2024 15:58:40 +0300 Subject: [PATCH 004/778] nfs: propagate readlink errors in nfs_symlink_filler There is an inherent race where a symlink file may have been overriden (by a different client) between lookup and readlink, resulting in a spurious EIO error returned to userspace. Fix this by propagating back ESTALE errors such that the vfs will retry the lookup/get_link (similar to nfs4_file_open) at least once. Cc: Dan Aloni Signed-off-by: Sagi Grimberg Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/symlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c index 0e27a2e4e68b..13818129d268 100644 --- a/fs/nfs/symlink.c +++ b/fs/nfs/symlink.c @@ -41,7 +41,7 @@ static int nfs_symlink_filler(struct file *file, struct folio *folio) error: folio_set_error(folio); folio_unlock(folio); - return -EIO; + return error; } static const char *nfs_get_link(struct dentry *dentry, From a527c3ba41c4c61e2069bfce4091e5515f06a8dd Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 24 May 2024 18:14:19 +0200 Subject: [PATCH 005/778] nfs: Avoid flushing many pages with NFS_FILE_SYNC When we are doing WB_SYNC_ALL writeback, nfs submits write requests with NFS_FILE_SYNC flag to the server (which then generally treats it as an O_SYNC write). This helps to reduce latency for single requests but when submitting more requests, additional fsyncs on the server side hurt latency. NFS generally avoids this additional overhead by not setting NFS_FILE_SYNC if desc->pg_moreio is set. However this logic doesn't always work. When we do random 4k writes to a huge file and then call fsync(2), each page writeback is going to be sent with NFS_FILE_SYNC because after preparing one page for writeback, we start writing back next, nfs_do_writepage() will call nfs_pageio_cond_complete() which finds the page is not contiguous with previously prepared IO and submits is *without* setting desc->pg_moreio. Hence NFS_FILE_SYNC is used resulting in poor performance. Fix the problem by setting desc->pg_moreio in nfs_pageio_cond_complete() before submitting outstanding IO. This improves throughput of fsync-after-random-writes on my test SSD from ~70MB/s to ~250MB/s. Signed-off-by: Jan Kara Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 6efb5068c116..040b6b79c75e 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -1545,6 +1545,11 @@ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index) continue; } else if (index == prev->wb_index + 1) continue; + /* + * We will submit more requests after these. Indicate + * this to the underlying layers. + */ + desc->pg_moreio = 1; nfs_pageio_complete(desc); break; } From 0c8c7c559740d2d8b66048162af6c4dba8f0c88c Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Thu, 23 May 2024 15:01:22 -0400 Subject: [PATCH 006/778] nfs: don't invalidate dentries on transient errors This is a slight variation on a patch previously proposed by Neil Brown that never got merged. Prior to commit 5ceb9d7fdaaf ("NFS: Refactor nfs_lookup_revalidate()"), any error from nfs_lookup_verify_inode() other than -ESTALE would result in nfs_lookup_revalidate() returning that error (-ESTALE is mapped to zero). Since that commit, all errors result in nfs_lookup_revalidate() returning zero, resulting in dentries being invalidated where they previously were not (particularly in the case of -ERESTARTSYS). Fix it by passing the actual error code to nfs_lookup_revalidate_done(), and leaving the decision on whether to map the error code to zero or one to nfs_lookup_revalidate_done(). A simple reproducer is to run the following python code in a subdirectory of an NFS mount (not in the root of the NFS mount): ---8<--- import os import multiprocessing import time if __name__=="__main__": multiprocessing.set_start_method("spawn") count = 0 while True: try: os.getcwd() pool = multiprocessing.Pool(10) pool.close() pool.terminate() count += 1 except Exception as e: print(f"Failed after {count} iterations") print(e) break ---8<--- Prior to commit 5ceb9d7fdaaf, the above code would run indefinitely. After commit 5ceb9d7fdaaf, it fails almost immediately with -ENOENT. Signed-off-by: Scott Mayhew Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 342930996226..788077a4feb9 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1627,7 +1627,16 @@ nfs_lookup_revalidate_done(struct inode *dir, struct dentry *dentry, switch (error) { case 1: break; - case 0: + case -ETIMEDOUT: + if (inode && (IS_ROOT(dentry) || + NFS_SERVER(inode)->flags & NFS_MOUNT_SOFTREVAL)) + error = 1; + break; + case -ESTALE: + case -ENOENT: + error = 0; + fallthrough; + default: /* * We can't d_drop the root of a disconnected tree: * its d_hash is on the s_anon list and d_drop() would hide @@ -1682,18 +1691,8 @@ static int nfs_lookup_revalidate_dentry(struct inode *dir, dir_verifier = nfs_save_change_attribute(dir); ret = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr); - if (ret < 0) { - switch (ret) { - case -ESTALE: - case -ENOENT: - ret = 0; - break; - case -ETIMEDOUT: - if (NFS_SERVER(inode)->flags & NFS_MOUNT_SOFTREVAL) - ret = 1; - } + if (ret < 0) goto out; - } /* Request help from readdirplus */ nfs_lookup_advise_force_readdirplus(dir, flags); @@ -1737,7 +1736,7 @@ nfs_do_lookup_revalidate(struct inode *dir, struct dentry *dentry, unsigned int flags) { struct inode *inode; - int error; + int error = 0; nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE); inode = d_inode(dentry); @@ -1782,7 +1781,7 @@ out_valid: out_bad: if (flags & LOOKUP_RCU) return -ECHILD; - return nfs_lookup_revalidate_done(dir, dentry, inode, 0); + return nfs_lookup_revalidate_done(dir, dentry, inode, error); } static int From 8a01ef749b0a632f0e1f4ead0f08b3310d99fcb1 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Fri, 3 May 2024 14:45:05 -0500 Subject: [PATCH 007/778] iio: adc: ad9467: fix scan type sign According to the IIO documentation, the sign in the scan type should be lower case. The ad9467 driver was incorrectly using upper case. Fix by changing to lower case. Fixes: 4606d0f4b05f ("iio: adc: ad9467: add support for AD9434 high-speed ADC") Fixes: ad6797120238 ("iio: adc: ad9467: add support AD9467 ADC") Signed-off-by: David Lechner Link: https://lore.kernel.org/r/20240503-ad9467-fix-scan-type-sign-v1-1-c7a1a066ebb9@baylibre.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad9467.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/adc/ad9467.c b/drivers/iio/adc/ad9467.c index e85b763b9ffc..8f5b9c3f6e3d 100644 --- a/drivers/iio/adc/ad9467.c +++ b/drivers/iio/adc/ad9467.c @@ -243,11 +243,11 @@ static void __ad9467_get_scale(struct ad9467_state *st, int index, } static const struct iio_chan_spec ad9434_channels[] = { - AD9467_CHAN(0, 0, 12, 'S'), + AD9467_CHAN(0, 0, 12, 's'), }; static const struct iio_chan_spec ad9467_channels[] = { - AD9467_CHAN(0, 0, 16, 'S'), + AD9467_CHAN(0, 0, 16, 's'), }; static const struct ad9467_chip_info ad9467_chip_tbl = { From 72d0a20fabcf231c9b4b17b0cabdcde0949d05eb Mon Sep 17 00:00:00 2001 From: Angelo Dureghello Date: Fri, 3 May 2024 20:55:28 +0200 Subject: [PATCH 008/778] dt-bindings: iio: dac: fix ad354xr output range Fix output range, as per datasheet must be -2.5 to 7.5. Signed-off-by: Angelo Dureghello Fixes: b0a96c5f599e ("dt-bindings: iio: dac: Add adi,ad3552r.yaml") Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240503185528.2043127-1-adureghello@baylibre.org Signed-off-by: Jonathan Cameron --- Documentation/devicetree/bindings/iio/dac/adi,ad3552r.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/iio/dac/adi,ad3552r.yaml b/Documentation/devicetree/bindings/iio/dac/adi,ad3552r.yaml index 96340a05754c..8265d709094d 100644 --- a/Documentation/devicetree/bindings/iio/dac/adi,ad3552r.yaml +++ b/Documentation/devicetree/bindings/iio/dac/adi,ad3552r.yaml @@ -139,7 +139,7 @@ allOf: Voltage output range of the channel as Required connections: Rfb1x for: 0 to 2.5 V; 0 to 3V; 0 to 5 V; - Rfb2x for: 0 to 10 V; 2.5 to 7.5V; -5 to 5 V; + Rfb2x for: 0 to 10 V; -2.5 to 7.5V; -5 to 5 V; oneOf: - items: - const: 0 From 0f0f6306617cb4b6231fc9d4ec68ab9a56dba7c0 Mon Sep 17 00:00:00 2001 From: Adam Rizkalla Date: Thu, 25 Apr 2024 01:22:49 -0500 Subject: [PATCH 009/778] iio: pressure: bmp280: Fix BMP580 temperature reading Fix overflow issue when storing BMP580 temperature reading and properly preserve sign of 24-bit data. Signed-off-by: Adam Rizkalla Tested-By: Vasileios Amoiridis Acked-by: Angel Iglesias Link: https://lore.kernel.org/r/Zin2udkXRD0+GrML@adam-asahi.lan Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/pressure/bmp280-core.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/iio/pressure/bmp280-core.c b/drivers/iio/pressure/bmp280-core.c index 09f53d987c7d..221fa2c552ae 100644 --- a/drivers/iio/pressure/bmp280-core.c +++ b/drivers/iio/pressure/bmp280-core.c @@ -1394,12 +1394,12 @@ static int bmp580_read_temp(struct bmp280_data *data, int *val, int *val2) /* * Temperature is returned in Celsius degrees in fractional - * form down 2^16. We rescale by x1000 to return milli Celsius - * to respect IIO ABI. + * form down 2^16. We rescale by x1000 to return millidegrees + * Celsius to respect IIO ABI. */ - *val = raw_temp * 1000; - *val2 = 16; - return IIO_VAL_FRACTIONAL_LOG2; + raw_temp = sign_extend32(raw_temp, 23); + *val = ((s64)raw_temp * 1000) / (1 << 16); + return IIO_VAL_INT; } static int bmp580_read_press(struct bmp280_data *data, int *val, int *val2) From 279428df888319bf68f2686934897301a250bb84 Mon Sep 17 00:00:00 2001 From: Marc Ferland Date: Wed, 1 May 2024 11:05:54 -0400 Subject: [PATCH 010/778] iio: dac: ad5592r: fix temperature channel scaling value The scale value for the temperature channel is (assuming Vref=2.5 and the datasheet): 376.7897513 When calculating both val and val2 for the temperature scale we use (3767897513/25) and multiply it by Vref (here I assume 2500mV) to obtain: 2500 * (3767897513/25) ==> 376789751300 Finally we divide with remainder by 10^9 to get: val = 376 val2 = 789751300 However, we return IIO_VAL_INT_PLUS_MICRO (should have been NANO) as the scale type. So when converting the raw temperature value to the 'processed' temperature value we will get (assuming raw=810, offset=-753): processed = (raw + offset) * scale_val = (810 + -753) * 376 = 21432 processed += div((raw + offset) * scale_val2, 10^6) += div((810 + -753) * 789751300, 10^6) += 45015 ==> 66447 ==> 66.4 Celcius instead of the expected 21.5 Celsius. Fix this issue by changing IIO_VAL_INT_PLUS_MICRO to IIO_VAL_INT_PLUS_NANO. Fixes: 56ca9db862bf ("iio: dac: Add support for the AD5592R/AD5593R ADCs/DACs") Signed-off-by: Marc Ferland Link: https://lore.kernel.org/r/20240501150554.1871390-1-marc.ferland@sonatest.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/dac/ad5592r-base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/dac/ad5592r-base.c b/drivers/iio/dac/ad5592r-base.c index 076bc9ecfb49..4763402dbcd6 100644 --- a/drivers/iio/dac/ad5592r-base.c +++ b/drivers/iio/dac/ad5592r-base.c @@ -415,7 +415,7 @@ static int ad5592r_read_raw(struct iio_dev *iio_dev, s64 tmp = *val * (3767897513LL / 25LL); *val = div_s64_rem(tmp, 1000000000LL, val2); - return IIO_VAL_INT_PLUS_MICRO; + return IIO_VAL_INT_PLUS_NANO; } mutex_lock(&st->lock); From bedb2ccb566de5ca0c336ca3fd3588cea6d50414 Mon Sep 17 00:00:00 2001 From: Vasileios Amoiridis Date: Wed, 8 May 2024 17:54:07 +0200 Subject: [PATCH 011/778] iio: imu: bmi323: Fix trigger notification in case of error In case of error in the bmi323_trigger_handler() function, the function exits without calling the iio_trigger_notify_done() which is responsible for informing the attached trigger that the process is done and in case there is a .reenable(), to call it. Fixes: 8a636db3aa57 ("iio: imu: Add driver for BMI323 IMU") Signed-off-by: Vasileios Amoiridis Link: https://lore.kernel.org/r/20240508155407.139805-1-vassilisamir@gmail.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/imu/bmi323/bmi323_core.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/iio/imu/bmi323/bmi323_core.c b/drivers/iio/imu/bmi323/bmi323_core.c index 5d42ab9b176a..67d74a1a1b26 100644 --- a/drivers/iio/imu/bmi323/bmi323_core.c +++ b/drivers/iio/imu/bmi323/bmi323_core.c @@ -1391,7 +1391,7 @@ static irqreturn_t bmi323_trigger_handler(int irq, void *p) &data->buffer.channels, ARRAY_SIZE(data->buffer.channels)); if (ret) - return IRQ_NONE; + goto out; } else { for_each_set_bit(bit, indio_dev->active_scan_mask, BMI323_CHAN_MAX) { @@ -1400,13 +1400,14 @@ static irqreturn_t bmi323_trigger_handler(int irq, void *p) &data->buffer.channels[index++], BMI323_BYTES_PER_SAMPLE); if (ret) - return IRQ_NONE; + goto out; } } iio_push_to_buffers_with_timestamp(indio_dev, &data->buffer, iio_get_time_ns(indio_dev)); +out: iio_trigger_notify_done(indio_dev->trig); return IRQ_HANDLED; From a23c14b062d8800a2192077d83273bbfe6c7552d Mon Sep 17 00:00:00 2001 From: Harshit Mogalapalli Date: Mon, 13 May 2024 13:34:27 -0700 Subject: [PATCH 012/778] iio: temperature: mlx90635: Fix ERR_PTR dereference in mlx90635_probe() When devm_regmap_init_i2c() fails, regmap_ee could be error pointer, instead of checking for IS_ERR(regmap_ee), regmap is checked which looks like a copy paste error. Fixes: a1d1ba5e1c28 ("iio: temperature: mlx90635 MLX90635 IR Temperature sensor") Reviewed-by: Crt Mori Signed-off-by: Harshit Mogalapalli Link: https://lore.kernel.org/r/20240513203427.3208696-1-harshit.m.mogalapalli@oracle.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/temperature/mlx90635.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/temperature/mlx90635.c b/drivers/iio/temperature/mlx90635.c index 1f5c962c1818..f7f88498ba0e 100644 --- a/drivers/iio/temperature/mlx90635.c +++ b/drivers/iio/temperature/mlx90635.c @@ -947,9 +947,9 @@ static int mlx90635_probe(struct i2c_client *client) "failed to allocate regmap\n"); regmap_ee = devm_regmap_init_i2c(client, &mlx90635_regmap_ee); - if (IS_ERR(regmap)) - return dev_err_probe(&client->dev, PTR_ERR(regmap), - "failed to allocate regmap\n"); + if (IS_ERR(regmap_ee)) + return dev_err_probe(&client->dev, PTR_ERR(regmap_ee), + "failed to allocate EEPROM regmap\n"); mlx90635 = iio_priv(indio_dev); i2c_set_clientdata(client, indio_dev); From ab6f0ab178137170a6b40f8f3d7a3806708a202c Mon Sep 17 00:00:00 2001 From: Dumitru Ceclan Date: Tue, 21 May 2024 11:45:39 +0300 Subject: [PATCH 013/778] iio: adc: ad7173: fix buffers enablement for ad7176-2 AD7176-2 does not feature input buffers and marks corespondent register bits as read only. Enable buffers only on supported models. Fixes: 76a1e6a42802 ("iio: adc: ad7173: add AD7173 driver") Reviewed-by: David Lechner Signed-off-by: Dumitru Ceclan Link: https://lore.kernel.org/r/20240521-ad7173-fixes-v1-1-8161cc7f3ad1@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7173.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/iio/adc/ad7173.c b/drivers/iio/adc/ad7173.c index a7826bba0852..91acedaddb57 100644 --- a/drivers/iio/adc/ad7173.c +++ b/drivers/iio/adc/ad7173.c @@ -145,6 +145,7 @@ struct ad7173_device_info { unsigned int id; char *name; bool has_temp; + bool has_input_buf; bool has_int_ref; bool has_ref2; u8 num_gpios; @@ -212,6 +213,7 @@ static const struct ad7173_device_info ad7173_device_info[] = { .num_configs = 4, .num_gpios = 2, .has_temp = true, + .has_input_buf = true, .has_int_ref = true, .clock = 2 * HZ_PER_MHZ, .sinc5_data_rates = ad7173_sinc5_data_rates, @@ -224,6 +226,7 @@ static const struct ad7173_device_info ad7173_device_info[] = { .num_configs = 8, .num_gpios = 4, .has_temp = false, + .has_input_buf = true, .has_ref2 = true, .clock = 2 * HZ_PER_MHZ, .sinc5_data_rates = ad7173_sinc5_data_rates, @@ -237,6 +240,7 @@ static const struct ad7173_device_info ad7173_device_info[] = { .num_configs = 8, .num_gpios = 4, .has_temp = true, + .has_input_buf = true, .has_int_ref = true, .has_ref2 = true, .clock = 2 * HZ_PER_MHZ, @@ -251,6 +255,7 @@ static const struct ad7173_device_info ad7173_device_info[] = { .num_configs = 4, .num_gpios = 2, .has_temp = true, + .has_input_buf = true, .has_int_ref = true, .clock = 16 * HZ_PER_MHZ, .sinc5_data_rates = ad7175_sinc5_data_rates, @@ -263,6 +268,7 @@ static const struct ad7173_device_info ad7173_device_info[] = { .num_configs = 8, .num_gpios = 4, .has_temp = true, + .has_input_buf = true, .has_int_ref = true, .has_ref2 = true, .clock = 16 * HZ_PER_MHZ, @@ -277,6 +283,7 @@ static const struct ad7173_device_info ad7173_device_info[] = { .num_configs = 4, .num_gpios = 2, .has_temp = false, + .has_input_buf = false, .has_int_ref = true, .clock = 16 * HZ_PER_MHZ, .sinc5_data_rates = ad7175_sinc5_data_rates, @@ -289,6 +296,7 @@ static const struct ad7173_device_info ad7173_device_info[] = { .num_configs = 4, .num_gpios = 2, .has_temp = true, + .has_input_buf = true, .has_int_ref = true, .clock = 16 * HZ_PER_MHZ, .odr_start_value = AD7177_ODR_START_VALUE, @@ -932,7 +940,7 @@ static int ad7173_fw_parse_channel_config(struct iio_dev *indio_dev) AD7173_CH_ADDRESS(chan_arr[chan_index].channel, chan_arr[chan_index].channel2); chan_st_priv->cfg.bipolar = false; - chan_st_priv->cfg.input_buf = true; + chan_st_priv->cfg.input_buf = st->info->has_input_buf; chan_st_priv->cfg.ref_sel = AD7173_SETUP_REF_SEL_INT_REF; st->adc_mode |= AD7173_ADC_MODE_REF_EN; @@ -989,7 +997,7 @@ static int ad7173_fw_parse_channel_config(struct iio_dev *indio_dev) chan_st_priv->ain = AD7173_CH_ADDRESS(ain[0], ain[1]); chan_st_priv->chan_reg = chan_index; - chan_st_priv->cfg.input_buf = true; + chan_st_priv->cfg.input_buf = st->info->has_input_buf; chan_st_priv->cfg.odr = 0; chan_st_priv->cfg.bipolar = fwnode_property_read_bool(child, "bipolar"); From 3450ee7e800a8dc83290780c1b6ef66898e709d3 Mon Sep 17 00:00:00 2001 From: Dumitru Ceclan Date: Tue, 21 May 2024 11:45:40 +0300 Subject: [PATCH 014/778] iio: adc: ad7173: Add ad7173_device_info names Add missing names from the device info struct for 3 models to ensure consistency with the rest of the models. Fixes: 76a1e6a42802 ("iio: adc: ad7173: add AD7173 driver") Signed-off-by: Dumitru Ceclan Link: https://lore.kernel.org/r/20240521-ad7173-fixes-v1-2-8161cc7f3ad1@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7173.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iio/adc/ad7173.c b/drivers/iio/adc/ad7173.c index 91acedaddb57..4649af5f9d76 100644 --- a/drivers/iio/adc/ad7173.c +++ b/drivers/iio/adc/ad7173.c @@ -220,6 +220,7 @@ static const struct ad7173_device_info ad7173_device_info[] = { .num_sinc5_data_rates = ARRAY_SIZE(ad7173_sinc5_data_rates), }, [ID_AD7172_4] = { + .name = "ad7172-4", .id = AD7172_4_ID, .num_inputs = 9, .num_channels = 8, @@ -262,6 +263,7 @@ static const struct ad7173_device_info ad7173_device_info[] = { .num_sinc5_data_rates = ARRAY_SIZE(ad7175_sinc5_data_rates), }, [ID_AD7175_8] = { + .name = "ad7175-8", .id = AD7175_8_ID, .num_inputs = 17, .num_channels = 16, @@ -290,6 +292,7 @@ static const struct ad7173_device_info ad7173_device_info[] = { .num_sinc5_data_rates = ARRAY_SIZE(ad7175_sinc5_data_rates), }, [ID_AD7177_2] = { + .name = "ad7177-2", .id = AD7177_ID, .num_inputs = 5, .num_channels = 4, From f00dd8953094091a8e9c8cc00661d01c33b93615 Mon Sep 17 00:00:00 2001 From: Dumitru Ceclan Date: Tue, 21 May 2024 11:45:41 +0300 Subject: [PATCH 015/778] iio: adc: ad7173: Remove index from temp channel Temperature channel is unique per device, index is not needed. This is breaking userspace: Include fixes tag to be released within the same rc cycle. Fixes: 76a1e6a42802 ("iio: adc: ad7173: add AD7173 driver") Signed-off-by: Dumitru Ceclan Link: https://lore.kernel.org/r/20240521-ad7173-fixes-v1-3-8161cc7f3ad1@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7173.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/iio/adc/ad7173.c b/drivers/iio/adc/ad7173.c index 4649af5f9d76..048127bf979a 100644 --- a/drivers/iio/adc/ad7173.c +++ b/drivers/iio/adc/ad7173.c @@ -815,7 +815,6 @@ static const struct iio_chan_spec ad7173_channel_template = { static const struct iio_chan_spec ad7173_temp_iio_channel_template = { .type = IIO_TEMP, - .indexed = 1, .channel = AD7173_AIN_TEMP_POS, .channel2 = AD7173_AIN_TEMP_NEG, .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | From 95444b9eeb8c5c0330563931d70c61ca3b101548 Mon Sep 17 00:00:00 2001 From: Jean-Baptiste Maneyrol Date: Fri, 24 May 2024 12:48:51 +0000 Subject: [PATCH 016/778] iio: invensense: fix odr switching to same value ODR switching happens in 2 steps, update to store the new value and then apply when the ODR change flag is received in the data. When switching to the same ODR value, the ODR change flag is never happening, and frequency switching is blocked waiting for the never coming apply. Fix the issue by preventing update to happen when switching to same ODR value. Fixes: 0ecc363ccea7 ("iio: make invensense timestamp module generic") Cc: stable@vger.kernel.org Signed-off-by: Jean-Baptiste Maneyrol Link: https://lore.kernel.org/r/20240524124851.567485-1-inv.git-commit@tdk.com Signed-off-by: Jonathan Cameron --- drivers/iio/common/inv_sensors/inv_sensors_timestamp.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/iio/common/inv_sensors/inv_sensors_timestamp.c b/drivers/iio/common/inv_sensors/inv_sensors_timestamp.c index fa205f17bd90..f44458c380d9 100644 --- a/drivers/iio/common/inv_sensors/inv_sensors_timestamp.c +++ b/drivers/iio/common/inv_sensors/inv_sensors_timestamp.c @@ -60,11 +60,15 @@ EXPORT_SYMBOL_NS_GPL(inv_sensors_timestamp_init, IIO_INV_SENSORS_TIMESTAMP); int inv_sensors_timestamp_update_odr(struct inv_sensors_timestamp *ts, uint32_t period, bool fifo) { + uint32_t mult; + /* when FIFO is on, prevent odr change if one is already pending */ if (fifo && ts->new_mult != 0) return -EAGAIN; - ts->new_mult = period / ts->chip.clock_period; + mult = period / ts->chip.clock_period; + if (mult != ts->mult) + ts->new_mult = mult; return 0; } From a39741d38c048a48ae0d65226d9548005a088f5f Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Wed, 22 May 2024 08:54:21 +0300 Subject: [PATCH 017/778] pinctrl: renesas: rzg2l: Use spin_{lock,unlock}_irq{save,restore} On PREEMPT_RT kernels the spinlock_t maps to an rtmutex. Using raw_spin_lock_irqsave()/raw_spin_unlock_irqrestore() on &pctrl->lock.rlock breaks the PREEMPT_RT builds. To fix this use spin_lock_irqsave()/spin_unlock_irqrestore() on &pctrl->lock. Fixes: 02cd2d3be1c3 ("pinctrl: renesas: rzg2l: Configure the interrupt type on resume") Reported-by: Diederik de Haas Closes: https://lore.kernel.org/all/131999629.KQPSlr0Zke@bagend Signed-off-by: Claudiu Beznea Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20240522055421.2842689-1-claudiu.beznea.uj@bp.renesas.com Signed-off-by: Geert Uytterhoeven --- drivers/pinctrl/renesas/pinctrl-rzg2l.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/renesas/pinctrl-rzg2l.c b/drivers/pinctrl/renesas/pinctrl-rzg2l.c index c3256bfde502..60be78da9f52 100644 --- a/drivers/pinctrl/renesas/pinctrl-rzg2l.c +++ b/drivers/pinctrl/renesas/pinctrl-rzg2l.c @@ -2071,11 +2071,11 @@ static void rzg2l_gpio_irq_restore(struct rzg2l_pinctrl *pctrl) * This has to be atomically executed to protect against a concurrent * interrupt. */ - raw_spin_lock_irqsave(&pctrl->lock.rlock, flags); + spin_lock_irqsave(&pctrl->lock, flags); ret = rzg2l_gpio_irq_set_type(data, irqd_get_trigger_type(data)); if (!ret && !irqd_irq_disabled(data)) rzg2l_gpio_irq_enable(data); - raw_spin_unlock_irqrestore(&pctrl->lock.rlock, flags); + spin_unlock_irqrestore(&pctrl->lock, flags); if (ret) dev_crit(pctrl->dev, "Failed to set IRQ type for virq=%u\n", virq); From bb3ca38ef7aa56dcfa7f6e81675c7a39d5ee9bf1 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Fri, 3 May 2024 08:43:11 -0700 Subject: [PATCH 018/778] hv_balloon: Use kernel macros to simplify open coded sequences Code sequences equivalent to ALIGN(), ALIGN_DOWN(), and umin() are currently open coded. Change these to use the kernel macro to improve code clarity. ALIGN() and ALIGN_DOWN() require the alignment value to be a power of 2, which is the case here. Reviewed-by: David Hildenbrand Signed-off-by: Michael Kelley Link: https://lore.kernel.org/r/20240503154312.142466-1-mhklinux@outlook.com Signed-off-by: Wei Liu Message-ID: <20240503154312.142466-1-mhklinux@outlook.com> --- drivers/hv/hv_balloon.c | 40 ++++++++-------------------------------- 1 file changed, 8 insertions(+), 32 deletions(-) diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index e000fa3b9f97..9f45b8a6762c 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -729,15 +729,8 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size, scoped_guard(spinlock_irqsave, &dm_device.ha_lock) { has->ha_end_pfn += HA_CHUNK; - - if (total_pfn > HA_CHUNK) { - processed_pfn = HA_CHUNK; - total_pfn -= HA_CHUNK; - } else { - processed_pfn = total_pfn; - total_pfn = 0; - } - + processed_pfn = umin(total_pfn, HA_CHUNK); + total_pfn -= processed_pfn; has->covered_end_pfn += processed_pfn; } @@ -800,7 +793,7 @@ static int pfn_covered(unsigned long start_pfn, unsigned long pfn_cnt) { struct hv_hotadd_state *has; struct hv_hotadd_gap *gap; - unsigned long residual, new_inc; + unsigned long residual; int ret = 0; guard(spinlock_irqsave)(&dm_device.ha_lock); @@ -836,15 +829,9 @@ static int pfn_covered(unsigned long start_pfn, unsigned long pfn_cnt) * our current limit; extend it. */ if ((start_pfn + pfn_cnt) > has->end_pfn) { + /* Extend the region by multiples of HA_CHUNK */ residual = (start_pfn + pfn_cnt - has->end_pfn); - /* - * Extend the region by multiples of HA_CHUNK. - */ - new_inc = (residual / HA_CHUNK) * HA_CHUNK; - if (residual % HA_CHUNK) - new_inc += HA_CHUNK; - - has->end_pfn += new_inc; + has->end_pfn += ALIGN(residual, HA_CHUNK); } ret = 1; @@ -915,9 +902,7 @@ static unsigned long handle_pg_range(unsigned long pg_start, */ size = (has->end_pfn - has->ha_end_pfn); if (pfn_cnt <= size) { - size = ((pfn_cnt / HA_CHUNK) * HA_CHUNK); - if (pfn_cnt % HA_CHUNK) - size += HA_CHUNK; + size = ALIGN(pfn_cnt, HA_CHUNK); } else { pfn_cnt = size; } @@ -1011,9 +996,6 @@ static void hot_add_req(struct work_struct *dummy) rg_sz = dm->ha_wrk.ha_region_range.finfo.page_cnt; if ((rg_start == 0) && (!dm->host_specified_ha_region)) { - unsigned long region_size; - unsigned long region_start; - /* * The host has not specified the hot-add region. * Based on the hot-add page range being specified, @@ -1021,14 +1003,8 @@ static void hot_add_req(struct work_struct *dummy) * that need to be hot-added while ensuring the alignment * and size requirements of Linux as it relates to hot-add. */ - region_size = (pfn_cnt / HA_CHUNK) * HA_CHUNK; - if (pfn_cnt % HA_CHUNK) - region_size += HA_CHUNK; - - region_start = (pg_start / HA_CHUNK) * HA_CHUNK; - - rg_start = region_start; - rg_sz = region_size; + rg_start = ALIGN_DOWN(pg_start, HA_CHUNK); + rg_sz = ALIGN(pfn_cnt, HA_CHUNK); } if (do_hot_add) From 8852ebf1948d94ecaf4d1113032dda7e58e72b84 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Fri, 3 May 2024 08:43:12 -0700 Subject: [PATCH 019/778] hv_balloon: Enable hot-add for memblock sizes > 128 MiB The Hyper-V balloon driver supports hot-add of memory in addition to ballooning. Current code hot-adds in fixed size chunks of 128 MiB (fixed constant HA_CHUNK in the code). While this works in Hyper-V VMs with 64 GiB or less or memory where the Linux memblock size is 128 MiB, the hot-add fails for larger memblock sizes because add_memory() expects memory to be added in chunks that match the memblock size. Messages like the following are reported when Linux has a 256 MiB memblock size: [ 312.668859] Block size [0x10000000] unaligned hotplug range: start 0x310000000, size 0x8000000 [ 312.668880] hv_balloon: hot_add memory failed error is -22 [ 312.668984] hv_balloon: Memory hot add failed Larger memblock sizes are usually used in VMs with more than 64 GiB of memory, depending on the alignment of the VM's physical address space. Fix this problem by having the Hyper-V balloon driver determine the Linux memblock size, and process hot-add requests in that chunk size instead of a fixed 128 MiB. Also update the hot-add alignment requested of the Hyper-V host to match the memblock size. The code changes look significant, but in fact are just a simple text substitution of a new global variable for the previous HA_CHUNK constant. No algorithms are changed except to initialize the new global variable and to calculate the alignment value to pass to Hyper-V. Testing with memblock sizes of 256 MiB and 2 GiB shows correct operation. Reviewed-by: David Hildenbrand Signed-off-by: Michael Kelley Link: https://lore.kernel.org/r/20240503154312.142466-2-mhklinux@outlook.com Signed-off-by: Wei Liu Message-ID: <20240503154312.142466-2-mhklinux@outlook.com> --- drivers/hv/hv_balloon.c | 64 +++++++++++++++++++++++++++-------------- 1 file changed, 43 insertions(+), 21 deletions(-) diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index 9f45b8a6762c..4370ad31b5b3 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -425,11 +426,11 @@ struct dm_info_msg { * The range start_pfn : end_pfn specifies the range * that the host has asked us to hot add. The range * start_pfn : ha_end_pfn specifies the range that we have - * currently hot added. We hot add in multiples of 128M - * chunks; it is possible that we may not be able to bring - * online all the pages in the region. The range + * currently hot added. We hot add in chunks equal to the + * memory block size; it is possible that we may not be able + * to bring online all the pages in the region. The range * covered_start_pfn:covered_end_pfn defines the pages that can - * be brough online. + * be brought online. */ struct hv_hotadd_state { @@ -505,8 +506,11 @@ enum hv_dm_state { static __u8 recv_buffer[HV_HYP_PAGE_SIZE]; static __u8 balloon_up_send_buffer[HV_HYP_PAGE_SIZE]; + +static unsigned long ha_pages_in_chunk; +#define HA_BYTES_IN_CHUNK (ha_pages_in_chunk << PAGE_SHIFT) + #define PAGES_IN_2M (2 * 1024 * 1024 / PAGE_SIZE) -#define HA_CHUNK (128 * 1024 * 1024 / PAGE_SIZE) struct hv_dynmem_device { struct hv_device *dev; @@ -724,21 +728,21 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size, unsigned long processed_pfn; unsigned long total_pfn = pfn_count; - for (i = 0; i < (size/HA_CHUNK); i++) { - start_pfn = start + (i * HA_CHUNK); + for (i = 0; i < (size/ha_pages_in_chunk); i++) { + start_pfn = start + (i * ha_pages_in_chunk); scoped_guard(spinlock_irqsave, &dm_device.ha_lock) { - has->ha_end_pfn += HA_CHUNK; - processed_pfn = umin(total_pfn, HA_CHUNK); + has->ha_end_pfn += ha_pages_in_chunk; + processed_pfn = umin(total_pfn, ha_pages_in_chunk); total_pfn -= processed_pfn; - has->covered_end_pfn += processed_pfn; + has->covered_end_pfn += processed_pfn; } reinit_completion(&dm_device.ol_waitevent); nid = memory_add_physaddr_to_nid(PFN_PHYS(start_pfn)); ret = add_memory(nid, PFN_PHYS((start_pfn)), - (HA_CHUNK << PAGE_SHIFT), MHP_MERGE_RESOURCE); + HA_BYTES_IN_CHUNK, MHP_MERGE_RESOURCE); if (ret) { pr_err("hot_add memory failed error is %d\n", ret); @@ -753,7 +757,7 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size, do_hot_add = false; } scoped_guard(spinlock_irqsave, &dm_device.ha_lock) { - has->ha_end_pfn -= HA_CHUNK; + has->ha_end_pfn -= ha_pages_in_chunk; has->covered_end_pfn -= processed_pfn; } break; @@ -829,9 +833,9 @@ static int pfn_covered(unsigned long start_pfn, unsigned long pfn_cnt) * our current limit; extend it. */ if ((start_pfn + pfn_cnt) > has->end_pfn) { - /* Extend the region by multiples of HA_CHUNK */ + /* Extend the region by multiples of ha_pages_in_chunk */ residual = (start_pfn + pfn_cnt - has->end_pfn); - has->end_pfn += ALIGN(residual, HA_CHUNK); + has->end_pfn += ALIGN(residual, ha_pages_in_chunk); } ret = 1; @@ -897,12 +901,12 @@ static unsigned long handle_pg_range(unsigned long pg_start, * We have some residual hot add range * that needs to be hot added; hot add * it now. Hot add a multiple of - * HA_CHUNK that fully covers the pages + * ha_pages_in_chunk that fully covers the pages * we have. */ size = (has->end_pfn - has->ha_end_pfn); if (pfn_cnt <= size) { - size = ALIGN(pfn_cnt, HA_CHUNK); + size = ALIGN(pfn_cnt, ha_pages_in_chunk); } else { pfn_cnt = size; } @@ -1003,8 +1007,8 @@ static void hot_add_req(struct work_struct *dummy) * that need to be hot-added while ensuring the alignment * and size requirements of Linux as it relates to hot-add. */ - rg_start = ALIGN_DOWN(pg_start, HA_CHUNK); - rg_sz = ALIGN(pfn_cnt, HA_CHUNK); + rg_start = ALIGN_DOWN(pg_start, ha_pages_in_chunk); + rg_sz = ALIGN(pfn_cnt, ha_pages_in_chunk); } if (do_hot_add) @@ -1807,10 +1811,13 @@ static int balloon_connect_vsp(struct hv_device *dev) cap_msg.caps.cap_bits.hot_add = hot_add_enabled(); /* - * Specify our alignment requirements as it relates - * memory hot-add. Specify 128MB alignment. + * Specify our alignment requirements for memory hot-add. The value is + * the log base 2 of the number of megabytes in a chunk. For example, + * with 256 MiB chunks, the value is 8. The number of MiB in a chunk + * must be a power of 2. */ - cap_msg.caps.cap_bits.hot_add_alignment = 7; + cap_msg.caps.cap_bits.hot_add_alignment = + ilog2(HA_BYTES_IN_CHUNK / SZ_1M); /* * Currently the host does not use these @@ -1960,8 +1967,23 @@ static int balloon_probe(struct hv_device *dev, hot_add = false; #ifdef CONFIG_MEMORY_HOTPLUG + /* + * Hot-add must operate in chunks that are of size equal to the + * memory block size because that's what the core add_memory() + * interface requires. The Hyper-V interface requires that the memory + * block size be a power of 2, which is guaranteed by the check in + * memory_dev_init(). + */ + ha_pages_in_chunk = memory_block_size_bytes() / PAGE_SIZE; do_hot_add = hot_add; #else + /* + * Without MEMORY_HOTPLUG, the guest returns a failure status for all + * hot add requests from Hyper-V, and the chunk size is used only to + * specify alignment to Hyper-V as required by the host/guest protocol. + * Somewhat arbitrarily, use 128 MiB. + */ + ha_pages_in_chunk = SZ_128M / PAGE_SIZE; do_hot_add = false; #endif dm_device.dev = dev; From 207e03b00b47ccbd692941b183510026e1bd6ce9 Mon Sep 17 00:00:00 2001 From: Saurabh Sengar Date: Sun, 5 May 2024 22:38:58 -0700 Subject: [PATCH 020/778] tools: hv: suppress the invalid warning for packed member alignment Packed struct vmbus_bufring is 4096 byte aligned and the reporting warning is for the first member of that struct which shouldn't add any offset to create alignment issue. Suppress the warning by adding -Wno-address-of-packed-member flag to gcc. Fixes: 45bab4d74651 ("tools: hv: Add vmbus_bufring") Reported-by: kernel test robot Closes: https://lore.kernel.org/all/202404121913.GhtSoKbW-lkp@intel.com/ Signed-off-by: Saurabh Sengar Link: https://lore.kernel.org/r/1714973938-4063-1-git-send-email-ssengar@linux.microsoft.com Signed-off-by: Wei Liu Message-ID: <1714973938-4063-1-git-send-email-ssengar@linux.microsoft.com> --- tools/hv/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/hv/Makefile b/tools/hv/Makefile index bb52871da341..2e60e2c212cd 100644 --- a/tools/hv/Makefile +++ b/tools/hv/Makefile @@ -17,6 +17,7 @@ endif MAKEFLAGS += -r override CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include +override CFLAGS += -Wno-address-of-packed-member ALL_TARGETS := hv_kvp_daemon hv_vss_daemon ifneq ($(ARCH), aarch64) From 4c5a65fd10895708952106652b2ac2ca3b7bb9d9 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Sat, 11 May 2024 06:38:17 -0700 Subject: [PATCH 021/778] Documentation: hyperv: Update spelling and fix typo Update spelling from "VMbus" to "VMBus" to match Hyper-V product documentation. Also correct typo: "SNP-SEV" should be "SEV-SNP". Signed-off-by: Michael Kelley Reviewed-by: Easwar Hariharan Reviewed-by: Bagas Sanjaya Link: https://lore.kernel.org/r/20240511133818.19649-1-mhklinux@outlook.com Signed-off-by: Wei Liu Message-ID: <20240511133818.19649-1-mhklinux@outlook.com> --- Documentation/virt/hyperv/overview.rst | 22 +++---- Documentation/virt/hyperv/vmbus.rst | 82 +++++++++++++------------- 2 files changed, 52 insertions(+), 52 deletions(-) diff --git a/Documentation/virt/hyperv/overview.rst b/Documentation/virt/hyperv/overview.rst index cd493332c88a..77408a89d1a4 100644 --- a/Documentation/virt/hyperv/overview.rst +++ b/Documentation/virt/hyperv/overview.rst @@ -40,7 +40,7 @@ Linux guests communicate with Hyper-V in four different ways: arm64, these synthetic registers must be accessed using explicit hypercalls. -* VMbus: VMbus is a higher-level software construct that is built on +* VMBus: VMBus is a higher-level software construct that is built on the other 3 mechanisms. It is a message passing interface between the Hyper-V host and the Linux guest. It uses memory that is shared between Hyper-V and the guest, along with various signaling @@ -54,8 +54,8 @@ x86/x64 architecture only. .. _Hyper-V Top Level Functional Spec (TLFS): https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/tlfs/tlfs -VMbus is not documented. This documentation provides a high-level -overview of VMbus and how it works, but the details can be discerned +VMBus is not documented. This documentation provides a high-level +overview of VMBus and how it works, but the details can be discerned only from the code. Sharing Memory @@ -74,7 +74,7 @@ follows: physical address space. How Hyper-V is told about the GPA or list of GPAs varies. In some cases, a single GPA is written to a synthetic register. In other cases, a GPA or list of GPAs is sent - in a VMbus message. + in a VMBus message. * Hyper-V translates the GPAs into "real" physical memory addresses, and creates a virtual mapping that it can use to access the memory. @@ -133,9 +133,9 @@ only the CPUs actually present in the VM, so Linux does not report any hot-add CPUs. A Linux guest CPU may be taken offline using the normal Linux -mechanisms, provided no VMbus channel interrupts are assigned to -the CPU. See the section on VMbus Interrupts for more details -on how VMbus channel interrupts can be re-assigned to permit +mechanisms, provided no VMBus channel interrupts are assigned to +the CPU. See the section on VMBus Interrupts for more details +on how VMBus channel interrupts can be re-assigned to permit taking a CPU offline. 32-bit and 64-bit @@ -169,14 +169,14 @@ and functionality. Hyper-V indicates feature/function availability via flags in synthetic MSRs that Hyper-V provides to the guest, and the guest code tests these flags. -VMbus has its own protocol version that is negotiated during the -initial VMbus connection from the guest to Hyper-V. This version +VMBus has its own protocol version that is negotiated during the +initial VMBus connection from the guest to Hyper-V. This version number is also output to dmesg during boot. This version number is checked in a few places in the code to determine if specific functionality is present. -Furthermore, each synthetic device on VMbus also has a protocol -version that is separate from the VMbus protocol version. Device +Furthermore, each synthetic device on VMBus also has a protocol +version that is separate from the VMBus protocol version. Device drivers for these synthetic devices typically negotiate the device protocol version, and may test that protocol version to determine if specific device functionality is present. diff --git a/Documentation/virt/hyperv/vmbus.rst b/Documentation/virt/hyperv/vmbus.rst index d2012d9022c5..f0d83ebda626 100644 --- a/Documentation/virt/hyperv/vmbus.rst +++ b/Documentation/virt/hyperv/vmbus.rst @@ -1,8 +1,8 @@ .. SPDX-License-Identifier: GPL-2.0 -VMbus +VMBus ===== -VMbus is a software construct provided by Hyper-V to guest VMs. It +VMBus is a software construct provided by Hyper-V to guest VMs. It consists of a control path and common facilities used by synthetic devices that Hyper-V presents to guest VMs. The control path is used to offer synthetic devices to the guest VM and, in some cases, @@ -12,9 +12,9 @@ and the synthetic device implementation that is part of Hyper-V, and signaling primitives to allow Hyper-V and the guest to interrupt each other. -VMbus is modeled in Linux as a bus, with the expected /sys/bus/vmbus -entry in a running Linux guest. The VMbus driver (drivers/hv/vmbus_drv.c) -establishes the VMbus control path with the Hyper-V host, then +VMBus is modeled in Linux as a bus, with the expected /sys/bus/vmbus +entry in a running Linux guest. The VMBus driver (drivers/hv/vmbus_drv.c) +establishes the VMBus control path with the Hyper-V host, then registers itself as a Linux bus driver. It implements the standard bus functions for adding and removing devices to/from the bus. @@ -49,9 +49,9 @@ synthetic NIC is referred to as "netvsc" and the Linux driver for the synthetic SCSI controller is "storvsc". These drivers contain functions with names like "storvsc_connect_to_vsp". -VMbus channels +VMBus channels -------------- -An instance of a synthetic device uses VMbus channels to communicate +An instance of a synthetic device uses VMBus channels to communicate between the VSP and the VSC. Channels are bi-directional and used for passing messages. Most synthetic devices use a single channel, but the synthetic SCSI controller and synthetic NIC may use multiple @@ -73,7 +73,7 @@ write indices and some control flags, followed by the memory for the actual ring. The size of the ring is determined by the VSC in the guest and is specific to each synthetic device. The list of GPAs making up the ring is communicated to the Hyper-V host over the -VMbus control path as a GPA Descriptor List (GPADL). See function +VMBus control path as a GPA Descriptor List (GPADL). See function vmbus_establish_gpadl(). Each ring buffer is mapped into contiguous Linux kernel virtual @@ -102,9 +102,9 @@ resources. For Windows Server 2019 and later, this limit is approximately 1280 Mbytes. For versions prior to Windows Server 2019, the limit is approximately 384 Mbytes. -VMbus messages +VMBus messages -------------- -All VMbus messages have a standard header that includes the message +All VMBus messages have a standard header that includes the message length, the offset of the message payload, some flags, and a transactionID. The portion of the message after the header is unique to each VSP/VSC pair. @@ -137,7 +137,7 @@ control message contains a list of GPAs that describe the data buffer. For example, the storvsc driver uses this approach to specify the data buffers to/from which disk I/O is done. -Three functions exist to send VMbus messages: +Three functions exist to send VMBus messages: 1. vmbus_sendpacket(): Control-only messages and messages with embedded data -- no GPAs @@ -154,20 +154,20 @@ Historically, Linux guests have trusted Hyper-V to send well-formed and valid messages, and Linux drivers for synthetic devices did not fully validate messages. With the introduction of processor technologies that fully encrypt guest memory and that allow the -guest to not trust the hypervisor (AMD SNP-SEV, Intel TDX), trusting +guest to not trust the hypervisor (AMD SEV-SNP, Intel TDX), trusting the Hyper-V host is no longer a valid assumption. The drivers for -VMbus synthetic devices are being updated to fully validate any +VMBus synthetic devices are being updated to fully validate any values read from memory that is shared with Hyper-V, which includes -messages from VMbus devices. To facilitate such validation, +messages from VMBus devices. To facilitate such validation, messages read by the guest from the "in" ring buffer are copied to a temporary buffer that is not shared with Hyper-V. Validation is performed in this temporary buffer without the risk of Hyper-V maliciously modifying the message after it is validated but before it is used. -VMbus interrupts +VMBus interrupts ---------------- -VMbus provides a mechanism for the guest to interrupt the host when +VMBus provides a mechanism for the guest to interrupt the host when the guest has queued new messages in a ring buffer. The host expects that the guest will send an interrupt only when an "out" ring buffer transitions from empty to non-empty. If the guest sends @@ -177,62 +177,62 @@ interrupts, the host may throttle that guest by suspending its execution for a few seconds to prevent a denial-of-service attack. Similarly, the host will interrupt the guest when it sends a new -message on the VMbus control path, or when a VMbus channel "in" ring +message on the VMBus control path, or when a VMBus channel "in" ring buffer transitions from empty to non-empty. Each CPU in the guest -may receive VMbus interrupts, so they are best modeled as per-CPU +may receive VMBus interrupts, so they are best modeled as per-CPU interrupts in Linux. This model works well on arm64 where a single -per-CPU IRQ is allocated for VMbus. Since x86/x64 lacks support for +per-CPU IRQ is allocated for VMBus. Since x86/x64 lacks support for per-CPU IRQs, an x86 interrupt vector is statically allocated (see HYPERVISOR_CALLBACK_VECTOR) across all CPUs and explicitly coded to -call the VMbus interrupt service routine. These interrupts are +call the VMBus interrupt service routine. These interrupts are visible in /proc/interrupts on the "HYP" line. -The guest CPU that a VMbus channel will interrupt is selected by the +The guest CPU that a VMBus channel will interrupt is selected by the guest when the channel is created, and the host is informed of that -selection. VMbus devices are broadly grouped into two categories: +selection. VMBus devices are broadly grouped into two categories: -1. "Slow" devices that need only one VMbus channel. The devices +1. "Slow" devices that need only one VMBus channel. The devices (such as keyboard, mouse, heartbeat, and timesync) generate - relatively few interrupts. Their VMbus channels are all + relatively few interrupts. Their VMBus channels are all assigned to interrupt the VMBUS_CONNECT_CPU, which is always CPU 0. -2. "High speed" devices that may use multiple VMbus channels for +2. "High speed" devices that may use multiple VMBus channels for higher parallelism and performance. These devices include the - synthetic SCSI controller and synthetic NIC. Their VMbus + synthetic SCSI controller and synthetic NIC. Their VMBus channels interrupts are assigned to CPUs that are spread out among the available CPUs in the VM so that interrupts on multiple channels can be processed in parallel. -The assignment of VMbus channel interrupts to CPUs is done in the +The assignment of VMBus channel interrupts to CPUs is done in the function init_vp_index(). This assignment is done outside of the normal Linux interrupt affinity mechanism, so the interrupts are neither "unmanaged" nor "managed" interrupts. -The CPU that a VMbus channel will interrupt can be seen in +The CPU that a VMBus channel will interrupt can be seen in /sys/bus/vmbus/devices// channels//cpu. When running on later versions of Hyper-V, the CPU can be changed by writing a new value to this sysfs entry. Because the interrupt assignment is done outside of the normal Linux affinity mechanism, there are no entries in /proc/irq corresponding to individual -VMbus channel interrupts. +VMBus channel interrupts. An online CPU in a Linux guest may not be taken offline if it has -VMbus channel interrupts assigned to it. Any such channel +VMBus channel interrupts assigned to it. Any such channel interrupts must first be manually reassigned to another CPU as described above. When no channel interrupts are assigned to the CPU, it can be taken offline. -When a guest CPU receives a VMbus interrupt from the host, the +When a guest CPU receives a VMBus interrupt from the host, the function vmbus_isr() handles the interrupt. It first checks for channel interrupts by calling vmbus_chan_sched(), which looks at a bitmap setup by the host to determine which channels have pending interrupts on this CPU. If multiple channels have pending interrupts for this CPU, they are processed sequentially. When all channel interrupts have been processed, vmbus_isr() checks for and -processes any message received on the VMbus control path. +processes any message received on the VMBus control path. -The VMbus channel interrupt handling code is designed to work +The VMBus channel interrupt handling code is designed to work correctly even if an interrupt is received on a CPU other than the CPU assigned to the channel. Specifically, the code does not use CPU-based exclusion for correctness. In normal operation, Hyper-V @@ -242,23 +242,23 @@ when Hyper-V will make the transition. The code must work correctly even if there is a time lag before Hyper-V starts interrupting the new CPU. See comments in target_cpu_store(). -VMbus device creation/deletion +VMBus device creation/deletion ------------------------------ Hyper-V and the Linux guest have a separate message-passing path that is used for synthetic device creation and deletion. This -path does not use a VMbus channel. See vmbus_post_msg() and +path does not use a VMBus channel. See vmbus_post_msg() and vmbus_on_msg_dpc(). The first step is for the guest to connect to the generic -Hyper-V VMbus mechanism. As part of establishing this connection, -the guest and Hyper-V agree on a VMbus protocol version they will +Hyper-V VMBus mechanism. As part of establishing this connection, +the guest and Hyper-V agree on a VMBus protocol version they will use. This negotiation allows newer Linux kernels to run on older Hyper-V versions, and vice versa. The guest then tells Hyper-V to "send offers". Hyper-V sends an offer message to the guest for each synthetic device that the VM -is configured to have. Each VMbus device type has a fixed GUID -known as the "class ID", and each VMbus device instance is also +is configured to have. Each VMBus device type has a fixed GUID +known as the "class ID", and each VMBus device instance is also identified by a GUID. The offer message from Hyper-V contains both GUIDs to uniquely (within the VM) identify the device. There is one offer message for each device instance, so a VM with @@ -275,7 +275,7 @@ type based on the class ID, and invokes the correct driver to set up the device. Driver/device matching is performed using the standard Linux mechanism. -The device driver probe function opens the primary VMbus channel to +The device driver probe function opens the primary VMBus channel to the corresponding VSP. It allocates guest memory for the channel ring buffers and shares the ring buffer with the Hyper-V host by giving the host a list of GPAs for the ring buffer memory. See @@ -285,7 +285,7 @@ Once the ring buffer is set up, the device driver and VSP exchange setup messages via the primary channel. These messages may include negotiating the device protocol version to be used between the Linux VSC and the VSP on the Hyper-V host. The setup messages may also -include creating additional VMbus channels, which are somewhat +include creating additional VMBus channels, which are somewhat mis-named as "sub-channels" since they are functionally equivalent to the primary channel once they are created. From a0b134032e6c5552635c7142ad7f181eba2f3256 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Sat, 11 May 2024 06:38:18 -0700 Subject: [PATCH 022/778] Documentation: hyperv: Improve synic and interrupt handling description Current documentation does not describe how Linux handles the synthetic interrupt controller (synic) that Hyper-V provides to guest VMs, nor how VMBus or timer interrupts are handled. Add text describing the synic and reorganize existing text to make this more clear. Signed-off-by: Michael Kelley Reviewed-by: Easwar Hariharan Reviewed-by: Bagas Sanjaya Link: https://lore.kernel.org/r/20240511133818.19649-2-mhklinux@outlook.com Signed-off-by: Wei Liu Message-ID: <20240511133818.19649-2-mhklinux@outlook.com> --- Documentation/virt/hyperv/clocks.rst | 21 +++++--- Documentation/virt/hyperv/vmbus.rst | 79 ++++++++++++++++++---------- 2 files changed, 66 insertions(+), 34 deletions(-) diff --git a/Documentation/virt/hyperv/clocks.rst b/Documentation/virt/hyperv/clocks.rst index a56f4837d443..176043265803 100644 --- a/Documentation/virt/hyperv/clocks.rst +++ b/Documentation/virt/hyperv/clocks.rst @@ -62,12 +62,21 @@ shared page with scale and offset values into user space. User space code performs the same algorithm of reading the TSC and applying the scale and offset to get the constant 10 MHz clock. -Linux clockevents are based on Hyper-V synthetic timer 0. While -Hyper-V offers 4 synthetic timers for each CPU, Linux only uses -timer 0. Interrupts from stimer0 are recorded on the "HVS" line in -/proc/interrupts. Clockevents based on the virtualized PIT and -local APIC timer also work, but the Hyper-V synthetic timer is -preferred. +Linux clockevents are based on Hyper-V synthetic timer 0 (stimer0). +While Hyper-V offers 4 synthetic timers for each CPU, Linux only uses +timer 0. In older versions of Hyper-V, an interrupt from stimer0 +results in a VMBus control message that is demultiplexed by +vmbus_isr() as described in the Documentation/virt/hyperv/vmbus.rst +documentation. In newer versions of Hyper-V, stimer0 interrupts can +be mapped to an architectural interrupt, which is referred to as +"Direct Mode". Linux prefers to use Direct Mode when available. Since +x86/x64 doesn't support per-CPU interrupts, Direct Mode statically +allocates an x86 interrupt vector (HYPERV_STIMER0_VECTOR) across all CPUs +and explicitly codes it to call the stimer0 interrupt handler. Hence +interrupts from stimer0 are recorded on the "HVS" line in /proc/interrupts +rather than being associated with a Linux IRQ. Clockevents based on the +virtualized PIT and local APIC timer also work, but Hyper-V stimer0 +is preferred. The driver for the Hyper-V synthetic system clock and timers is drivers/clocksource/hyperv_timer.c. diff --git a/Documentation/virt/hyperv/vmbus.rst b/Documentation/virt/hyperv/vmbus.rst index f0d83ebda626..1dcef6a7fda3 100644 --- a/Documentation/virt/hyperv/vmbus.rst +++ b/Documentation/virt/hyperv/vmbus.rst @@ -102,10 +102,10 @@ resources. For Windows Server 2019 and later, this limit is approximately 1280 Mbytes. For versions prior to Windows Server 2019, the limit is approximately 384 Mbytes. -VMBus messages --------------- -All VMBus messages have a standard header that includes the message -length, the offset of the message payload, some flags, and a +VMBus channel messages +---------------------- +All messages sent in a VMBus channel have a standard header that includes +the message length, the offset of the message payload, some flags, and a transactionID. The portion of the message after the header is unique to each VSP/VSC pair. @@ -137,7 +137,7 @@ control message contains a list of GPAs that describe the data buffer. For example, the storvsc driver uses this approach to specify the data buffers to/from which disk I/O is done. -Three functions exist to send VMBus messages: +Three functions exist to send VMBus channel messages: 1. vmbus_sendpacket(): Control-only messages and messages with embedded data -- no GPAs @@ -165,6 +165,37 @@ performed in this temporary buffer without the risk of Hyper-V maliciously modifying the message after it is validated but before it is used. +Synthetic Interrupt Controller (synic) +-------------------------------------- +Hyper-V provides each guest CPU with a synthetic interrupt controller +that is used by VMBus for host-guest communication. While each synic +defines 16 synthetic interrupts (SINT), Linux uses only one of the 16 +(VMBUS_MESSAGE_SINT). All interrupts related to communication between +the Hyper-V host and a guest CPU use that SINT. + +The SINT is mapped to a single per-CPU architectural interrupt (i.e, +an 8-bit x86/x64 interrupt vector, or an arm64 PPI INTID). Because +each CPU in the guest has a synic and may receive VMBus interrupts, +they are best modeled in Linux as per-CPU interrupts. This model works +well on arm64 where a single per-CPU Linux IRQ is allocated for +VMBUS_MESSAGE_SINT. This IRQ appears in /proc/interrupts as an IRQ labelled +"Hyper-V VMbus". Since x86/x64 lacks support for per-CPU IRQs, an x86 +interrupt vector is statically allocated (HYPERVISOR_CALLBACK_VECTOR) +across all CPUs and explicitly coded to call vmbus_isr(). In this case, +there's no Linux IRQ, and the interrupts are visible in aggregate in +/proc/interrupts on the "HYP" line. + +The synic provides the means to demultiplex the architectural interrupt into +one or more logical interrupts and route the logical interrupt to the proper +VMBus handler in Linux. This demultiplexing is done by vmbus_isr() and +related functions that access synic data structures. + +The synic is not modeled in Linux as an irq chip or irq domain, +and the demultiplexed logical interrupts are not Linux IRQs. As such, +they don't appear in /proc/interrupts or /proc/irq. The CPU +affinity for one of these logical interrupts is controlled via an +entry under /sys/bus/vmbus as described below. + VMBus interrupts ---------------- VMBus provides a mechanism for the guest to interrupt the host when @@ -176,16 +207,18 @@ unnecessary. If a guest sends an excessive number of unnecessary interrupts, the host may throttle that guest by suspending its execution for a few seconds to prevent a denial-of-service attack. -Similarly, the host will interrupt the guest when it sends a new -message on the VMBus control path, or when a VMBus channel "in" ring -buffer transitions from empty to non-empty. Each CPU in the guest -may receive VMBus interrupts, so they are best modeled as per-CPU -interrupts in Linux. This model works well on arm64 where a single -per-CPU IRQ is allocated for VMBus. Since x86/x64 lacks support for -per-CPU IRQs, an x86 interrupt vector is statically allocated (see -HYPERVISOR_CALLBACK_VECTOR) across all CPUs and explicitly coded to -call the VMBus interrupt service routine. These interrupts are -visible in /proc/interrupts on the "HYP" line. +Similarly, the host will interrupt the guest via the synic when +it sends a new message on the VMBus control path, or when a VMBus +channel "in" ring buffer transitions from empty to non-empty due to +the host inserting a new VMBus channel message. The control message stream +and each VMBus channel "in" ring buffer are separate logical interrupts +that are demultiplexed by vmbus_isr(). It demultiplexes by first checking +for channel interrupts by calling vmbus_chan_sched(), which looks at a synic +bitmap to determine which channels have pending interrupts on this CPU. +If multiple channels have pending interrupts for this CPU, they are +processed sequentially. When all channel interrupts have been processed, +vmbus_isr() checks for and processes any messages received on the VMBus +control path. The guest CPU that a VMBus channel will interrupt is selected by the guest when the channel is created, and the host is informed of that @@ -212,10 +245,9 @@ neither "unmanaged" nor "managed" interrupts. The CPU that a VMBus channel will interrupt can be seen in /sys/bus/vmbus/devices// channels//cpu. When running on later versions of Hyper-V, the CPU can be changed -by writing a new value to this sysfs entry. Because the interrupt -assignment is done outside of the normal Linux affinity mechanism, -there are no entries in /proc/irq corresponding to individual -VMBus channel interrupts. +by writing a new value to this sysfs entry. Because VMBus channel +interrupts are not Linux IRQs, there are no entries in /proc/interrupts +or /proc/irq corresponding to individual VMBus channel interrupts. An online CPU in a Linux guest may not be taken offline if it has VMBus channel interrupts assigned to it. Any such channel @@ -223,15 +255,6 @@ interrupts must first be manually reassigned to another CPU as described above. When no channel interrupts are assigned to the CPU, it can be taken offline. -When a guest CPU receives a VMBus interrupt from the host, the -function vmbus_isr() handles the interrupt. It first checks for -channel interrupts by calling vmbus_chan_sched(), which looks at a -bitmap setup by the host to determine which channels have pending -interrupts on this CPU. If multiple channels have pending -interrupts for this CPU, they are processed sequentially. When all -channel interrupts have been processed, vmbus_isr() checks for and -processes any message received on the VMBus control path. - The VMBus channel interrupt handling code is designed to work correctly even if an interrupt is received on a CPU other than the CPU assigned to the channel. Specifically, the code does not use From db03d39053a97d2f2a6baec025ebdacbab5886d2 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 27 May 2024 15:44:48 +0200 Subject: [PATCH 023/778] ovl: fix copy-up in tmpfile Move ovl_copy_up() call outside of ovl_want_write()/ovl_drop_write() region, since copy up may also call ovl_want_write() resulting in recursive locking on sb->s_writers. Reported-and-tested-by: syzbot+85e58cdf5b3136471d4b@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/000000000000f6865106191c3e58@google.com/ Fixes: 9a87907de359 ("ovl: implement tmpfile") Signed-off-by: Miklos Szeredi --- fs/overlayfs/dir.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 116f542442dd..ab65e98a1def 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -1314,10 +1314,6 @@ static int ovl_create_tmpfile(struct file *file, struct dentry *dentry, int flags = file->f_flags | OVL_OPEN_FLAGS; int err; - err = ovl_copy_up(dentry->d_parent); - if (err) - return err; - old_cred = ovl_override_creds(dentry->d_sb); err = ovl_setup_cred_for_create(dentry, inode, mode, old_cred); if (err) @@ -1360,6 +1356,10 @@ static int ovl_tmpfile(struct mnt_idmap *idmap, struct inode *dir, if (!OVL_FS(dentry->d_sb)->tmpfile) return -EOPNOTSUPP; + err = ovl_copy_up(dentry->d_parent); + if (err) + return err; + err = ovl_want_write(dentry); if (err) return err; From 0c07c273a5fe1a25d4f477fe7edf64b3e8b19b3d Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 27 May 2024 14:15:22 +0200 Subject: [PATCH 024/778] debugfs: continue to ignore unknown mount options Wolfram reported that debugfs remained empty on some of his boards triggering the message "debugfs: Unknown parameter 'auto'". The root of the issue is that we ignored unknown mount options in the old mount api but we started rejecting unknown mount options in the new mount api. Continue to ignore unknown mount options to not regress userspace. Fixes: a20971c18752 ("vfs: Convert debugfs to use the new mount API") Link: https://lore.kernel.org/r/20240527100618.np2wqiw5mz7as3vk@ninjato Reported-by: Wolfram Sang Tested-by: Wolfram Sang Signed-off-by: Christian Brauner --- fs/debugfs/inode.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index dc51df0b118d..8fd928899a59 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -107,8 +107,16 @@ static int debugfs_parse_param(struct fs_context *fc, struct fs_parameter *param int opt; opt = fs_parse(fc, debugfs_param_specs, param, &result); - if (opt < 0) + if (opt < 0) { + /* + * We might like to report bad mount options here; but + * traditionally debugfs has ignored all mount options + */ + if (opt == -ENOPARAM) + return 0; + return opt; + } switch (opt) { case Opt_uid: From db003a28e03f95f2bcb63f037a2078b8870b1ecd Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 28 May 2024 14:33:20 +0200 Subject: [PATCH 025/778] netfs: fix kernel doc for nets_wait_for_outstanding_io() The @inode parameter wasn't documented leading to new doc build warnings. Fixes: f89ea63f1c65 ("netfs, 9p: Fix race between umount and async request completion") Link: https://lore.kernel.org/r/20240528133050.7e09d78e@canb.auug.org.au Signed-off-by: Christian Brauner --- include/linux/netfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 3ca3906bb8da..5d0288938cc2 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -521,7 +521,7 @@ static inline struct fscache_cookie *netfs_i_cookie(struct netfs_inode *ctx) /** * netfs_wait_for_outstanding_io - Wait for outstanding I/O to complete - * @ctx: The netfs inode to wait on + * @inode: The netfs inode to wait on * * Wait for outstanding I/O requests of any type to complete. This is intended * to be called from inode eviction routines. This makes sure that any From a69ce592cbe0417664bc5a075205aa75c2ec1273 Mon Sep 17 00:00:00 2001 From: Jonathan Denose Date: Fri, 3 May 2024 16:12:07 +0000 Subject: [PATCH 026/778] Input: elantech - fix touchpad state on resume for Lenovo N24 The Lenovo N24 on resume becomes stuck in a state where it sends incorrect packets, causing elantech_packet_check_v4 to fail. The only way for the device to resume sending the correct packets is for it to be disabled and then re-enabled. This change adds a dmi check to trigger this behavior on resume. Signed-off-by: Jonathan Denose Link: https://lore.kernel.org/r/20240503155020.v2.1.Ifa0e25ebf968d8f307f58d678036944141ab17e6@changeid Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/elantech.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c index 4e38229404b4..b4723ea395eb 100644 --- a/drivers/input/mouse/elantech.c +++ b/drivers/input/mouse/elantech.c @@ -1476,16 +1476,47 @@ static void elantech_disconnect(struct psmouse *psmouse) psmouse->private = NULL; } +/* + * Some hw_version 4 models fail to properly activate absolute mode on + * resume without going through disable/enable cycle. + */ +static const struct dmi_system_id elantech_needs_reenable[] = { +#if defined(CONFIG_DMI) && defined(CONFIG_X86) + { + /* Lenovo N24 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "81AF"), + }, + }, +#endif + { } +}; + /* * Put the touchpad back into absolute mode when reconnecting */ static int elantech_reconnect(struct psmouse *psmouse) { + int err; + psmouse_reset(psmouse); if (elantech_detect(psmouse, 0)) return -1; + if (dmi_check_system(elantech_needs_reenable)) { + err = ps2_command(&psmouse->ps2dev, NULL, PSMOUSE_CMD_DISABLE); + if (err) + psmouse_warn(psmouse, "failed to deactivate mouse on %s: %d\n", + psmouse->ps2dev.serio->phys, err); + + err = ps2_command(&psmouse->ps2dev, NULL, PSMOUSE_CMD_ENABLE); + if (err) + psmouse_warn(psmouse, "failed to reactivate mouse on %s: %d\n", + psmouse->ps2dev.serio->phys, err); + } + if (elantech_set_absolute_mode(psmouse)) { psmouse_err(psmouse, "failed to put touchpad back into absolute mode.\n"); From ed7ee6a69f9289337af4835a908aa782263d4852 Mon Sep 17 00:00:00 2001 From: John Garry Date: Wed, 29 May 2024 10:39:01 +0200 Subject: [PATCH 027/778] statx: Update offset commentary for struct statx In commit 2a82bb02941f ("statx: stx_subvol"), a new member was added to struct statx, but the offset comment was not correct. Update it. Signed-off-by: John Garry Link: https://lore.kernel.org/r/20240529081725.3769290-1-john.g.garry@oracle.com Signed-off-by: Christian Brauner --- include/uapi/linux/stat.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h index 67626d535316..95770941ee2c 100644 --- a/include/uapi/linux/stat.h +++ b/include/uapi/linux/stat.h @@ -126,8 +126,8 @@ struct statx { __u64 stx_mnt_id; __u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */ __u32 stx_dio_offset_align; /* File offset alignment for direct I/O */ - __u64 stx_subvol; /* Subvolume identifier */ /* 0xa0 */ + __u64 stx_subvol; /* Subvolume identifier */ __u64 __spare3[11]; /* Spare space for future expansion */ /* 0x100 */ }; From cc5ac966f26193ab185cc43d64d9f1ae998ccb6e Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Wed, 22 May 2024 19:42:57 +0800 Subject: [PATCH 028/778] cachefiles: add output string to cachefiles_obj_[get|put]_ondemand_fd This lets us see the correct trace output. Fixes: c8383054506c ("cachefiles: notify the user daemon when looking up cookie") Signed-off-by: Baokun Li Link: https://lore.kernel.org/r/20240522114308.2402121-2-libaokun@huaweicloud.com Acked-by: Jeff Layton Reviewed-by: Jingbo Xu Signed-off-by: Christian Brauner --- include/trace/events/cachefiles.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/trace/events/cachefiles.h b/include/trace/events/cachefiles.h index cf4b98b9a9ed..e3213af847cd 100644 --- a/include/trace/events/cachefiles.h +++ b/include/trace/events/cachefiles.h @@ -127,7 +127,9 @@ enum cachefiles_error_trace { EM(cachefiles_obj_see_lookup_cookie, "SEE lookup_cookie") \ EM(cachefiles_obj_see_lookup_failed, "SEE lookup_failed") \ EM(cachefiles_obj_see_withdraw_cookie, "SEE withdraw_cookie") \ - E_(cachefiles_obj_see_withdrawal, "SEE withdrawal") + EM(cachefiles_obj_see_withdrawal, "SEE withdrawal") \ + EM(cachefiles_obj_get_ondemand_fd, "GET ondemand_fd") \ + E_(cachefiles_obj_put_ondemand_fd, "PUT ondemand_fd") #define cachefiles_coherency_traces \ EM(cachefiles_coherency_check_aux, "BAD aux ") \ From 0fc75c5940fa634d84e64c93bfc388e1274ed013 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Wed, 22 May 2024 19:42:58 +0800 Subject: [PATCH 029/778] cachefiles: remove requests from xarray during flushing requests Even with CACHEFILES_DEAD set, we can still read the requests, so in the following concurrency the request may be used after it has been freed: mount | daemon_thread1 | daemon_thread2 ------------------------------------------------------------ cachefiles_ondemand_init_object cachefiles_ondemand_send_req REQ_A = kzalloc(sizeof(*req) + data_len) wait_for_completion(&REQ_A->done) cachefiles_daemon_read cachefiles_ondemand_daemon_read // close dev fd cachefiles_flush_reqs complete(&REQ_A->done) kfree(REQ_A) xa_lock(&cache->reqs); cachefiles_ondemand_select_req req->msg.opcode != CACHEFILES_OP_READ // req use-after-free !!! xa_unlock(&cache->reqs); xa_destroy(&cache->reqs) Hence remove requests from cache->reqs when flushing them to avoid accessing freed requests. Fixes: c8383054506c ("cachefiles: notify the user daemon when looking up cookie") Signed-off-by: Baokun Li Link: https://lore.kernel.org/r/20240522114308.2402121-3-libaokun@huaweicloud.com Acked-by: Jeff Layton Reviewed-by: Jia Zhu Reviewed-by: Gao Xiang Reviewed-by: Jingbo Xu Signed-off-by: Christian Brauner --- fs/cachefiles/daemon.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c index 6465e2574230..ccb7b707ea4b 100644 --- a/fs/cachefiles/daemon.c +++ b/fs/cachefiles/daemon.c @@ -159,6 +159,7 @@ static void cachefiles_flush_reqs(struct cachefiles_cache *cache) xa_for_each(xa, index, req) { req->error = -EIO; complete(&req->done); + __xa_erase(xa, index); } xa_unlock(xa); From de3e26f9e5b76fc628077578c001c4a51bf54d06 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Wed, 22 May 2024 19:42:59 +0800 Subject: [PATCH 030/778] cachefiles: fix slab-use-after-free in cachefiles_ondemand_get_fd() We got the following issue in a fuzz test of randomly issuing the restore command: ================================================================== BUG: KASAN: slab-use-after-free in cachefiles_ondemand_daemon_read+0x609/0xab0 Write of size 4 at addr ffff888109164a80 by task ondemand-04-dae/4962 CPU: 11 PID: 4962 Comm: ondemand-04-dae Not tainted 6.8.0-rc7-dirty #542 Call Trace: kasan_report+0x94/0xc0 cachefiles_ondemand_daemon_read+0x609/0xab0 vfs_read+0x169/0xb50 ksys_read+0xf5/0x1e0 Allocated by task 626: __kmalloc+0x1df/0x4b0 cachefiles_ondemand_send_req+0x24d/0x690 cachefiles_create_tmpfile+0x249/0xb30 cachefiles_create_file+0x6f/0x140 cachefiles_look_up_object+0x29c/0xa60 cachefiles_lookup_cookie+0x37d/0xca0 fscache_cookie_state_machine+0x43c/0x1230 [...] Freed by task 626: kfree+0xf1/0x2c0 cachefiles_ondemand_send_req+0x568/0x690 cachefiles_create_tmpfile+0x249/0xb30 cachefiles_create_file+0x6f/0x140 cachefiles_look_up_object+0x29c/0xa60 cachefiles_lookup_cookie+0x37d/0xca0 fscache_cookie_state_machine+0x43c/0x1230 [...] ================================================================== Following is the process that triggers the issue: mount | daemon_thread1 | daemon_thread2 ------------------------------------------------------------ cachefiles_ondemand_init_object cachefiles_ondemand_send_req REQ_A = kzalloc(sizeof(*req) + data_len) wait_for_completion(&REQ_A->done) cachefiles_daemon_read cachefiles_ondemand_daemon_read REQ_A = cachefiles_ondemand_select_req cachefiles_ondemand_get_fd copy_to_user(_buffer, msg, n) process_open_req(REQ_A) ------ restore ------ cachefiles_ondemand_restore xas_for_each(&xas, req, ULONG_MAX) xas_set_mark(&xas, CACHEFILES_REQ_NEW); cachefiles_daemon_read cachefiles_ondemand_daemon_read REQ_A = cachefiles_ondemand_select_req write(devfd, ("copen %u,%llu", msg->msg_id, size)); cachefiles_ondemand_copen xa_erase(&cache->reqs, id) complete(&REQ_A->done) kfree(REQ_A) cachefiles_ondemand_get_fd(REQ_A) fd = get_unused_fd_flags file = anon_inode_getfile fd_install(fd, file) load = (void *)REQ_A->msg.data; load->fd = fd; // load UAF !!! This issue is caused by issuing a restore command when the daemon is still alive, which results in a request being processed multiple times thus triggering a UAF. So to avoid this problem, add an additional reference count to cachefiles_req, which is held while waiting and reading, and then released when the waiting and reading is over. Note that since there is only one reference count for waiting, we need to avoid the same request being completed multiple times, so we can only complete the request if it is successfully removed from the xarray. Fixes: e73fa11a356c ("cachefiles: add restore command to recover inflight ondemand read requests") Suggested-by: Hou Tao Signed-off-by: Baokun Li Link: https://lore.kernel.org/r/20240522114308.2402121-4-libaokun@huaweicloud.com Acked-by: Jeff Layton Reviewed-by: Jia Zhu Reviewed-by: Jingbo Xu Signed-off-by: Christian Brauner --- fs/cachefiles/internal.h | 1 + fs/cachefiles/ondemand.c | 23 +++++++++++++++++++---- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h index d33169f0018b..7745b8abc3aa 100644 --- a/fs/cachefiles/internal.h +++ b/fs/cachefiles/internal.h @@ -138,6 +138,7 @@ static inline bool cachefiles_in_ondemand_mode(struct cachefiles_cache *cache) struct cachefiles_req { struct cachefiles_object *object; struct completion done; + refcount_t ref; int error; struct cachefiles_msg msg; }; diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c index 4ba42f1fa3b4..c011fb24d238 100644 --- a/fs/cachefiles/ondemand.c +++ b/fs/cachefiles/ondemand.c @@ -4,6 +4,12 @@ #include #include "internal.h" +static inline void cachefiles_req_put(struct cachefiles_req *req) +{ + if (refcount_dec_and_test(&req->ref)) + kfree(req); +} + static int cachefiles_ondemand_fd_release(struct inode *inode, struct file *file) { @@ -330,6 +336,7 @@ ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache, xas_clear_mark(&xas, CACHEFILES_REQ_NEW); cache->req_id_next = xas.xa_index + 1; + refcount_inc(&req->ref); xa_unlock(&cache->reqs); id = xas.xa_index; @@ -356,15 +363,22 @@ ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache, complete(&req->done); } + cachefiles_req_put(req); return n; err_put_fd: if (msg->opcode == CACHEFILES_OP_OPEN) close_fd(((struct cachefiles_open *)msg->data)->fd); error: - xa_erase(&cache->reqs, id); - req->error = ret; - complete(&req->done); + xas_reset(&xas); + xas_lock(&xas); + if (xas_load(&xas) == req) { + req->error = ret; + complete(&req->done); + xas_store(&xas, NULL); + } + xas_unlock(&xas); + cachefiles_req_put(req); return ret; } @@ -395,6 +409,7 @@ static int cachefiles_ondemand_send_req(struct cachefiles_object *object, goto out; } + refcount_set(&req->ref, 1); req->object = object; init_completion(&req->done); req->msg.opcode = opcode; @@ -456,7 +471,7 @@ static int cachefiles_ondemand_send_req(struct cachefiles_object *object, wake_up_all(&cache->daemon_pollwq); wait_for_completion(&req->done); ret = req->error; - kfree(req); + cachefiles_req_put(req); return ret; out: /* Reset the object to close state in error handling path. From da4a827416066191aafeeccee50a8836a826ba10 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Wed, 22 May 2024 19:43:00 +0800 Subject: [PATCH 031/778] cachefiles: fix slab-use-after-free in cachefiles_ondemand_daemon_read() We got the following issue in a fuzz test of randomly issuing the restore command: ================================================================== BUG: KASAN: slab-use-after-free in cachefiles_ondemand_daemon_read+0xb41/0xb60 Read of size 8 at addr ffff888122e84088 by task ondemand-04-dae/963 CPU: 13 PID: 963 Comm: ondemand-04-dae Not tainted 6.8.0-dirty #564 Call Trace: kasan_report+0x93/0xc0 cachefiles_ondemand_daemon_read+0xb41/0xb60 vfs_read+0x169/0xb50 ksys_read+0xf5/0x1e0 Allocated by task 116: kmem_cache_alloc+0x140/0x3a0 cachefiles_lookup_cookie+0x140/0xcd0 fscache_cookie_state_machine+0x43c/0x1230 [...] Freed by task 792: kmem_cache_free+0xfe/0x390 cachefiles_put_object+0x241/0x480 fscache_cookie_state_machine+0x5c8/0x1230 [...] ================================================================== Following is the process that triggers the issue: mount | daemon_thread1 | daemon_thread2 ------------------------------------------------------------ cachefiles_withdraw_cookie cachefiles_ondemand_clean_object(object) cachefiles_ondemand_send_req REQ_A = kzalloc(sizeof(*req) + data_len) wait_for_completion(&REQ_A->done) cachefiles_daemon_read cachefiles_ondemand_daemon_read REQ_A = cachefiles_ondemand_select_req msg->object_id = req->object->ondemand->ondemand_id ------ restore ------ cachefiles_ondemand_restore xas_for_each(&xas, req, ULONG_MAX) xas_set_mark(&xas, CACHEFILES_REQ_NEW) cachefiles_daemon_read cachefiles_ondemand_daemon_read REQ_A = cachefiles_ondemand_select_req copy_to_user(_buffer, msg, n) xa_erase(&cache->reqs, id) complete(&REQ_A->done) ------ close(fd) ------ cachefiles_ondemand_fd_release cachefiles_put_object cachefiles_put_object kmem_cache_free(cachefiles_object_jar, object) REQ_A->object->ondemand->ondemand_id // object UAF !!! When we see the request within xa_lock, req->object must not have been freed yet, so grab the reference count of object before xa_unlock to avoid the above issue. Fixes: 0a7e54c1959c ("cachefiles: resend an open request if the read request's object is closed") Signed-off-by: Baokun Li Link: https://lore.kernel.org/r/20240522114308.2402121-5-libaokun@huaweicloud.com Acked-by: Jeff Layton Reviewed-by: Jia Zhu Reviewed-by: Jingbo Xu Signed-off-by: Christian Brauner --- fs/cachefiles/ondemand.c | 3 +++ include/trace/events/cachefiles.h | 6 +++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c index c011fb24d238..3dd002108a87 100644 --- a/fs/cachefiles/ondemand.c +++ b/fs/cachefiles/ondemand.c @@ -337,6 +337,7 @@ ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache, xas_clear_mark(&xas, CACHEFILES_REQ_NEW); cache->req_id_next = xas.xa_index + 1; refcount_inc(&req->ref); + cachefiles_grab_object(req->object, cachefiles_obj_get_read_req); xa_unlock(&cache->reqs); id = xas.xa_index; @@ -357,6 +358,7 @@ ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache, goto err_put_fd; } + cachefiles_put_object(req->object, cachefiles_obj_put_read_req); /* CLOSE request has no reply */ if (msg->opcode == CACHEFILES_OP_CLOSE) { xa_erase(&cache->reqs, id); @@ -370,6 +372,7 @@ err_put_fd: if (msg->opcode == CACHEFILES_OP_OPEN) close_fd(((struct cachefiles_open *)msg->data)->fd); error: + cachefiles_put_object(req->object, cachefiles_obj_put_read_req); xas_reset(&xas); xas_lock(&xas); if (xas_load(&xas) == req) { diff --git a/include/trace/events/cachefiles.h b/include/trace/events/cachefiles.h index e3213af847cd..7d931db02b93 100644 --- a/include/trace/events/cachefiles.h +++ b/include/trace/events/cachefiles.h @@ -33,6 +33,8 @@ enum cachefiles_obj_ref_trace { cachefiles_obj_see_withdrawal, cachefiles_obj_get_ondemand_fd, cachefiles_obj_put_ondemand_fd, + cachefiles_obj_get_read_req, + cachefiles_obj_put_read_req, }; enum fscache_why_object_killed { @@ -129,7 +131,9 @@ enum cachefiles_error_trace { EM(cachefiles_obj_see_withdraw_cookie, "SEE withdraw_cookie") \ EM(cachefiles_obj_see_withdrawal, "SEE withdrawal") \ EM(cachefiles_obj_get_ondemand_fd, "GET ondemand_fd") \ - E_(cachefiles_obj_put_ondemand_fd, "PUT ondemand_fd") + EM(cachefiles_obj_put_ondemand_fd, "PUT ondemand_fd") \ + EM(cachefiles_obj_get_read_req, "GET read_req") \ + E_(cachefiles_obj_put_read_req, "PUT read_req") #define cachefiles_coherency_traces \ EM(cachefiles_coherency_check_aux, "BAD aux ") \ From 3e6d704f02aa4c50c7bc5fe91a4401df249a137b Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Wed, 22 May 2024 19:43:01 +0800 Subject: [PATCH 032/778] cachefiles: remove err_put_fd label in cachefiles_ondemand_daemon_read() The err_put_fd label is only used once, so remove it to make the code more readable. In addition, the logic for deleting error request and CLOSE request is merged to simplify the code. Signed-off-by: Baokun Li Link: https://lore.kernel.org/r/20240522114308.2402121-6-libaokun@huaweicloud.com Acked-by: Jeff Layton Reviewed-by: Jia Zhu Reviewed-by: Gao Xiang Reviewed-by: Jingbo Xu Signed-off-by: Christian Brauner --- fs/cachefiles/ondemand.c | 45 ++++++++++++++-------------------------- 1 file changed, 16 insertions(+), 29 deletions(-) diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c index 3dd002108a87..bb94ef6a6f61 100644 --- a/fs/cachefiles/ondemand.c +++ b/fs/cachefiles/ondemand.c @@ -305,7 +305,6 @@ ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache, { struct cachefiles_req *req; struct cachefiles_msg *msg; - unsigned long id = 0; size_t n; int ret = 0; XA_STATE(xas, &cache->reqs, cache->req_id_next); @@ -340,49 +339,37 @@ ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache, cachefiles_grab_object(req->object, cachefiles_obj_get_read_req); xa_unlock(&cache->reqs); - id = xas.xa_index; - if (msg->opcode == CACHEFILES_OP_OPEN) { ret = cachefiles_ondemand_get_fd(req); if (ret) { cachefiles_ondemand_set_object_close(req->object); - goto error; + goto out; } } - msg->msg_id = id; + msg->msg_id = xas.xa_index; msg->object_id = req->object->ondemand->ondemand_id; if (copy_to_user(_buffer, msg, n) != 0) { ret = -EFAULT; - goto err_put_fd; + if (msg->opcode == CACHEFILES_OP_OPEN) + close_fd(((struct cachefiles_open *)msg->data)->fd); } - +out: cachefiles_put_object(req->object, cachefiles_obj_put_read_req); - /* CLOSE request has no reply */ - if (msg->opcode == CACHEFILES_OP_CLOSE) { - xa_erase(&cache->reqs, id); - complete(&req->done); + /* Remove error request and CLOSE request has no reply */ + if (ret || msg->opcode == CACHEFILES_OP_CLOSE) { + xas_reset(&xas); + xas_lock(&xas); + if (xas_load(&xas) == req) { + req->error = ret; + complete(&req->done); + xas_store(&xas, NULL); + } + xas_unlock(&xas); } - cachefiles_req_put(req); - return n; - -err_put_fd: - if (msg->opcode == CACHEFILES_OP_OPEN) - close_fd(((struct cachefiles_open *)msg->data)->fd); -error: - cachefiles_put_object(req->object, cachefiles_obj_put_read_req); - xas_reset(&xas); - xas_lock(&xas); - if (xas_load(&xas) == req) { - req->error = ret; - complete(&req->done); - xas_store(&xas, NULL); - } - xas_unlock(&xas); - cachefiles_req_put(req); - return ret; + return ret ? ret : n; } typedef int (*init_req_fn)(struct cachefiles_req *req, void *private); From a26dc49df37e996876f50a0210039b2d211fdd6f Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Wed, 22 May 2024 19:43:02 +0800 Subject: [PATCH 033/778] cachefiles: add consistency check for copen/cread This prevents malicious processes from completing random copen/cread requests and crashing the system. Added checks are listed below: * Generic, copen can only complete open requests, and cread can only complete read requests. * For copen, ondemand_id must not be 0, because this indicates that the request has not been read by the daemon. * For cread, the object corresponding to fd and req should be the same. Signed-off-by: Baokun Li Link: https://lore.kernel.org/r/20240522114308.2402121-7-libaokun@huaweicloud.com Acked-by: Jeff Layton Reviewed-by: Jingbo Xu Signed-off-by: Christian Brauner --- fs/cachefiles/ondemand.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c index bb94ef6a6f61..898fab68332b 100644 --- a/fs/cachefiles/ondemand.c +++ b/fs/cachefiles/ondemand.c @@ -82,12 +82,12 @@ static loff_t cachefiles_ondemand_fd_llseek(struct file *filp, loff_t pos, } static long cachefiles_ondemand_fd_ioctl(struct file *filp, unsigned int ioctl, - unsigned long arg) + unsigned long id) { struct cachefiles_object *object = filp->private_data; struct cachefiles_cache *cache = object->volume->cache; struct cachefiles_req *req; - unsigned long id; + XA_STATE(xas, &cache->reqs, id); if (ioctl != CACHEFILES_IOC_READ_COMPLETE) return -EINVAL; @@ -95,10 +95,15 @@ static long cachefiles_ondemand_fd_ioctl(struct file *filp, unsigned int ioctl, if (!test_bit(CACHEFILES_ONDEMAND_MODE, &cache->flags)) return -EOPNOTSUPP; - id = arg; - req = xa_erase(&cache->reqs, id); - if (!req) + xa_lock(&cache->reqs); + req = xas_load(&xas); + if (!req || req->msg.opcode != CACHEFILES_OP_READ || + req->object != object) { + xa_unlock(&cache->reqs); return -EINVAL; + } + xas_store(&xas, NULL); + xa_unlock(&cache->reqs); trace_cachefiles_ondemand_cread(object, id); complete(&req->done); @@ -126,6 +131,7 @@ int cachefiles_ondemand_copen(struct cachefiles_cache *cache, char *args) unsigned long id; long size; int ret; + XA_STATE(xas, &cache->reqs, 0); if (!test_bit(CACHEFILES_ONDEMAND_MODE, &cache->flags)) return -EOPNOTSUPP; @@ -149,9 +155,16 @@ int cachefiles_ondemand_copen(struct cachefiles_cache *cache, char *args) if (ret) return ret; - req = xa_erase(&cache->reqs, id); - if (!req) + xa_lock(&cache->reqs); + xas.xa_index = id; + req = xas_load(&xas); + if (!req || req->msg.opcode != CACHEFILES_OP_OPEN || + !req->object->ondemand->ondemand_id) { + xa_unlock(&cache->reqs); return -EINVAL; + } + xas_store(&xas, NULL); + xa_unlock(&cache->reqs); /* fail OPEN request if copen format is invalid */ ret = kstrtol(psize, 0, &size); From 0a790040838c736495d5afd6b2d636f159f817f1 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Wed, 22 May 2024 19:43:03 +0800 Subject: [PATCH 034/778] cachefiles: add spin_lock for cachefiles_ondemand_info The following concurrency may cause a read request to fail to be completed and result in a hung: t1 | t2 --------------------------------------------------------- cachefiles_ondemand_copen req = xa_erase(&cache->reqs, id) // Anon fd is maliciously closed. cachefiles_ondemand_fd_release xa_lock(&cache->reqs) cachefiles_ondemand_set_object_close(object) xa_unlock(&cache->reqs) cachefiles_ondemand_set_object_open // No one will ever close it again. cachefiles_ondemand_daemon_read cachefiles_ondemand_select_req // Get a read req but its fd is already closed. // The daemon can't issue a cread ioctl with an closed fd, then hung. So add spin_lock for cachefiles_ondemand_info to protect ondemand_id and state, thus we can avoid the above problem in cachefiles_ondemand_copen() by using ondemand_id to determine if fd has been closed. Fixes: c8383054506c ("cachefiles: notify the user daemon when looking up cookie") Signed-off-by: Baokun Li Link: https://lore.kernel.org/r/20240522114308.2402121-8-libaokun@huaweicloud.com Acked-by: Jeff Layton Signed-off-by: Christian Brauner --- fs/cachefiles/internal.h | 1 + fs/cachefiles/ondemand.c | 35 ++++++++++++++++++++++++++++++++++- 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h index 7745b8abc3aa..45c8bed60538 100644 --- a/fs/cachefiles/internal.h +++ b/fs/cachefiles/internal.h @@ -55,6 +55,7 @@ struct cachefiles_ondemand_info { int ondemand_id; enum cachefiles_object_state state; struct cachefiles_object *object; + spinlock_t lock; }; /* diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c index 898fab68332b..d04ddc6576e3 100644 --- a/fs/cachefiles/ondemand.c +++ b/fs/cachefiles/ondemand.c @@ -16,13 +16,16 @@ static int cachefiles_ondemand_fd_release(struct inode *inode, struct cachefiles_object *object = file->private_data; struct cachefiles_cache *cache = object->volume->cache; struct cachefiles_ondemand_info *info = object->ondemand; - int object_id = info->ondemand_id; + int object_id; struct cachefiles_req *req; XA_STATE(xas, &cache->reqs, 0); xa_lock(&cache->reqs); + spin_lock(&info->lock); + object_id = info->ondemand_id; info->ondemand_id = CACHEFILES_ONDEMAND_ID_CLOSED; cachefiles_ondemand_set_object_close(object); + spin_unlock(&info->lock); /* Only flush CACHEFILES_REQ_NEW marked req to avoid race with daemon_read */ xas_for_each_marked(&xas, req, ULONG_MAX, CACHEFILES_REQ_NEW) { @@ -127,6 +130,7 @@ int cachefiles_ondemand_copen(struct cachefiles_cache *cache, char *args) { struct cachefiles_req *req; struct fscache_cookie *cookie; + struct cachefiles_ondemand_info *info; char *pid, *psize; unsigned long id; long size; @@ -185,6 +189,33 @@ int cachefiles_ondemand_copen(struct cachefiles_cache *cache, char *args) goto out; } + info = req->object->ondemand; + spin_lock(&info->lock); + /* + * The anonymous fd was closed before copen ? Fail the request. + * + * t1 | t2 + * --------------------------------------------------------- + * cachefiles_ondemand_copen + * req = xa_erase(&cache->reqs, id) + * // Anon fd is maliciously closed. + * cachefiles_ondemand_fd_release + * xa_lock(&cache->reqs) + * cachefiles_ondemand_set_object_close(object) + * xa_unlock(&cache->reqs) + * cachefiles_ondemand_set_object_open + * // No one will ever close it again. + * cachefiles_ondemand_daemon_read + * cachefiles_ondemand_select_req + * + * Get a read req but its fd is already closed. The daemon can't + * issue a cread ioctl with an closed fd, then hung. + */ + if (info->ondemand_id == CACHEFILES_ONDEMAND_ID_CLOSED) { + spin_unlock(&info->lock); + req->error = -EBADFD; + goto out; + } cookie = req->object->cookie; cookie->object_size = size; if (size) @@ -194,6 +225,7 @@ int cachefiles_ondemand_copen(struct cachefiles_cache *cache, char *args) trace_cachefiles_ondemand_copen(req->object, id, size); cachefiles_ondemand_set_object_open(req->object); + spin_unlock(&info->lock); wake_up_all(&cache->daemon_pollwq); out: @@ -596,6 +628,7 @@ int cachefiles_ondemand_init_obj_info(struct cachefiles_object *object, return -ENOMEM; object->ondemand->object = object; + spin_lock_init(&object->ondemand->lock); INIT_WORK(&object->ondemand->ondemand_work, ondemand_object_worker); return 0; } From 4988e35e95fc938bdde0e15880fe72042fc86acf Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Wed, 22 May 2024 19:43:04 +0800 Subject: [PATCH 035/778] cachefiles: never get a new anonymous fd if ondemand_id is valid Now every time the daemon reads an open request, it gets a new anonymous fd and ondemand_id. With the introduction of "restore", it is possible to read the same open request more than once, and therefore an object can have more than one anonymous fd. If the anonymous fd is not unique, the following concurrencies will result in an fd leak: t1 | t2 | t3 ------------------------------------------------------------ cachefiles_ondemand_init_object cachefiles_ondemand_send_req REQ_A = kzalloc(sizeof(*req) + data_len) wait_for_completion(&REQ_A->done) cachefiles_daemon_read cachefiles_ondemand_daemon_read REQ_A = cachefiles_ondemand_select_req cachefiles_ondemand_get_fd load->fd = fd0 ondemand_id = object_id0 ------ restore ------ cachefiles_ondemand_restore // restore REQ_A cachefiles_daemon_read cachefiles_ondemand_daemon_read REQ_A = cachefiles_ondemand_select_req cachefiles_ondemand_get_fd load->fd = fd1 ondemand_id = object_id1 process_open_req(REQ_A) write(devfd, ("copen %u,%llu", msg->msg_id, size)) cachefiles_ondemand_copen xa_erase(&cache->reqs, id) complete(&REQ_A->done) kfree(REQ_A) process_open_req(REQ_A) // copen fails due to no req // daemon close(fd1) cachefiles_ondemand_fd_release // set object closed -- umount -- cachefiles_withdraw_cookie cachefiles_ondemand_clean_object cachefiles_ondemand_init_close_req if (!cachefiles_ondemand_object_is_open(object)) return -ENOENT; // The fd0 is not closed until the daemon exits. However, the anonymous fd holds the reference count of the object and the object holds the reference count of the cookie. So even though the cookie has been relinquished, it will not be unhashed and freed until the daemon exits. In fscache_hash_cookie(), when the same cookie is found in the hash list, if the cookie is set with the FSCACHE_COOKIE_RELINQUISHED bit, then the new cookie waits for the old cookie to be unhashed, while the old cookie is waiting for the leaked fd to be closed, if the daemon does not exit in time it will trigger a hung task. To avoid this, allocate a new anonymous fd only if no anonymous fd has been allocated (ondemand_id == 0) or if the previously allocated anonymous fd has been closed (ondemand_id == -1). Moreover, returns an error if ondemand_id is valid, letting the daemon know that the current userland restore logic is abnormal and needs to be checked. Fixes: c8383054506c ("cachefiles: notify the user daemon when looking up cookie") Signed-off-by: Baokun Li Link: https://lore.kernel.org/r/20240522114308.2402121-9-libaokun@huaweicloud.com Acked-by: Jeff Layton Signed-off-by: Christian Brauner --- fs/cachefiles/ondemand.c | 34 ++++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c index d04ddc6576e3..d2d4e27fca6f 100644 --- a/fs/cachefiles/ondemand.c +++ b/fs/cachefiles/ondemand.c @@ -14,11 +14,18 @@ static int cachefiles_ondemand_fd_release(struct inode *inode, struct file *file) { struct cachefiles_object *object = file->private_data; - struct cachefiles_cache *cache = object->volume->cache; - struct cachefiles_ondemand_info *info = object->ondemand; + struct cachefiles_cache *cache; + struct cachefiles_ondemand_info *info; int object_id; struct cachefiles_req *req; - XA_STATE(xas, &cache->reqs, 0); + XA_STATE(xas, NULL, 0); + + if (!object) + return 0; + + info = object->ondemand; + cache = object->volume->cache; + xas.xa = &cache->reqs; xa_lock(&cache->reqs); spin_lock(&info->lock); @@ -288,22 +295,39 @@ static int cachefiles_ondemand_get_fd(struct cachefiles_req *req) goto err_put_fd; } + spin_lock(&object->ondemand->lock); + if (object->ondemand->ondemand_id > 0) { + spin_unlock(&object->ondemand->lock); + /* Pair with check in cachefiles_ondemand_fd_release(). */ + file->private_data = NULL; + ret = -EEXIST; + goto err_put_file; + } + file->f_mode |= FMODE_PWRITE | FMODE_LSEEK; fd_install(fd, file); load = (void *)req->msg.data; load->fd = fd; object->ondemand->ondemand_id = object_id; + spin_unlock(&object->ondemand->lock); cachefiles_get_unbind_pincount(cache); trace_cachefiles_ondemand_open(object, &req->msg, load); return 0; +err_put_file: + fput(file); err_put_fd: put_unused_fd(fd); err_free_id: xa_erase(&cache->ondemand_ids, object_id); err: + spin_lock(&object->ondemand->lock); + /* Avoid marking an opened object as closed. */ + if (object->ondemand->ondemand_id <= 0) + cachefiles_ondemand_set_object_close(object); + spin_unlock(&object->ondemand->lock); cachefiles_put_object(object, cachefiles_obj_put_ondemand_fd); return ret; } @@ -386,10 +410,8 @@ ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache, if (msg->opcode == CACHEFILES_OP_OPEN) { ret = cachefiles_ondemand_get_fd(req); - if (ret) { - cachefiles_ondemand_set_object_close(req->object); + if (ret) goto out; - } } msg->msg_id = xas.xa_index; From 4b4391e77a6bf24cba2ef1590e113d9b73b11039 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Wed, 22 May 2024 19:43:05 +0800 Subject: [PATCH 036/778] cachefiles: defer exposing anon_fd until after copy_to_user() succeeds After installing the anonymous fd, we can now see it in userland and close it. However, at this point we may not have gotten the reference count of the cache, but we will put it during colse fd, so this may cause a cache UAF. So grab the cache reference count before fd_install(). In addition, by kernel convention, fd is taken over by the user land after fd_install(), and the kernel should not call close_fd() after that, i.e., it should call fd_install() after everything is ready, thus fd_install() is called after copy_to_user() succeeds. Fixes: c8383054506c ("cachefiles: notify the user daemon when looking up cookie") Suggested-by: Hou Tao Signed-off-by: Baokun Li Link: https://lore.kernel.org/r/20240522114308.2402121-10-libaokun@huaweicloud.com Acked-by: Jeff Layton Signed-off-by: Christian Brauner --- fs/cachefiles/ondemand.c | 53 +++++++++++++++++++++++++--------------- 1 file changed, 33 insertions(+), 20 deletions(-) diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c index d2d4e27fca6f..6f815e7c5086 100644 --- a/fs/cachefiles/ondemand.c +++ b/fs/cachefiles/ondemand.c @@ -4,6 +4,11 @@ #include #include "internal.h" +struct ondemand_anon_file { + struct file *file; + int fd; +}; + static inline void cachefiles_req_put(struct cachefiles_req *req) { if (refcount_dec_and_test(&req->ref)) @@ -263,14 +268,14 @@ int cachefiles_ondemand_restore(struct cachefiles_cache *cache, char *args) return 0; } -static int cachefiles_ondemand_get_fd(struct cachefiles_req *req) +static int cachefiles_ondemand_get_fd(struct cachefiles_req *req, + struct ondemand_anon_file *anon_file) { struct cachefiles_object *object; struct cachefiles_cache *cache; struct cachefiles_open *load; - struct file *file; u32 object_id; - int ret, fd; + int ret; object = cachefiles_grab_object(req->object, cachefiles_obj_get_ondemand_fd); @@ -282,16 +287,16 @@ static int cachefiles_ondemand_get_fd(struct cachefiles_req *req) if (ret < 0) goto err; - fd = get_unused_fd_flags(O_WRONLY); - if (fd < 0) { - ret = fd; + anon_file->fd = get_unused_fd_flags(O_WRONLY); + if (anon_file->fd < 0) { + ret = anon_file->fd; goto err_free_id; } - file = anon_inode_getfile("[cachefiles]", &cachefiles_ondemand_fd_fops, - object, O_WRONLY); - if (IS_ERR(file)) { - ret = PTR_ERR(file); + anon_file->file = anon_inode_getfile("[cachefiles]", + &cachefiles_ondemand_fd_fops, object, O_WRONLY); + if (IS_ERR(anon_file->file)) { + ret = PTR_ERR(anon_file->file); goto err_put_fd; } @@ -299,16 +304,15 @@ static int cachefiles_ondemand_get_fd(struct cachefiles_req *req) if (object->ondemand->ondemand_id > 0) { spin_unlock(&object->ondemand->lock); /* Pair with check in cachefiles_ondemand_fd_release(). */ - file->private_data = NULL; + anon_file->file->private_data = NULL; ret = -EEXIST; goto err_put_file; } - file->f_mode |= FMODE_PWRITE | FMODE_LSEEK; - fd_install(fd, file); + anon_file->file->f_mode |= FMODE_PWRITE | FMODE_LSEEK; load = (void *)req->msg.data; - load->fd = fd; + load->fd = anon_file->fd; object->ondemand->ondemand_id = object_id; spin_unlock(&object->ondemand->lock); @@ -317,9 +321,11 @@ static int cachefiles_ondemand_get_fd(struct cachefiles_req *req) return 0; err_put_file: - fput(file); + fput(anon_file->file); + anon_file->file = NULL; err_put_fd: - put_unused_fd(fd); + put_unused_fd(anon_file->fd); + anon_file->fd = ret; err_free_id: xa_erase(&cache->ondemand_ids, object_id); err: @@ -376,6 +382,7 @@ ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache, struct cachefiles_msg *msg; size_t n; int ret = 0; + struct ondemand_anon_file anon_file; XA_STATE(xas, &cache->reqs, cache->req_id_next); xa_lock(&cache->reqs); @@ -409,7 +416,7 @@ ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache, xa_unlock(&cache->reqs); if (msg->opcode == CACHEFILES_OP_OPEN) { - ret = cachefiles_ondemand_get_fd(req); + ret = cachefiles_ondemand_get_fd(req, &anon_file); if (ret) goto out; } @@ -417,10 +424,16 @@ ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache, msg->msg_id = xas.xa_index; msg->object_id = req->object->ondemand->ondemand_id; - if (copy_to_user(_buffer, msg, n) != 0) { + if (copy_to_user(_buffer, msg, n) != 0) ret = -EFAULT; - if (msg->opcode == CACHEFILES_OP_OPEN) - close_fd(((struct cachefiles_open *)msg->data)->fd); + + if (msg->opcode == CACHEFILES_OP_OPEN) { + if (ret < 0) { + fput(anon_file.file); + put_unused_fd(anon_file.fd); + goto out; + } + fd_install(anon_file.fd, anon_file.file); } out: cachefiles_put_object(req->object, cachefiles_obj_put_read_req); From 4f8703fb3482f92edcfd31661857b16fec89c2c0 Mon Sep 17 00:00:00 2001 From: Zizhi Wo Date: Wed, 22 May 2024 19:43:06 +0800 Subject: [PATCH 037/778] cachefiles: Set object to close if ondemand_id < 0 in copen If copen is maliciously called in the user mode, it may delete the request corresponding to the random id. And the request may have not been read yet. Note that when the object is set to reopen, the open request will be done with the still reopen state in above case. As a result, the request corresponding to this object is always skipped in select_req function, so the read request is never completed and blocks other process. Fix this issue by simply set object to close if its id < 0 in copen. Signed-off-by: Zizhi Wo Signed-off-by: Baokun Li Link: https://lore.kernel.org/r/20240522114308.2402121-11-libaokun@huaweicloud.com Acked-by: Jeff Layton Reviewed-by: Jia Zhu Signed-off-by: Christian Brauner --- fs/cachefiles/ondemand.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c index 6f815e7c5086..922cab1a314b 100644 --- a/fs/cachefiles/ondemand.c +++ b/fs/cachefiles/ondemand.c @@ -182,6 +182,7 @@ int cachefiles_ondemand_copen(struct cachefiles_cache *cache, char *args) xas_store(&xas, NULL); xa_unlock(&cache->reqs); + info = req->object->ondemand; /* fail OPEN request if copen format is invalid */ ret = kstrtol(psize, 0, &size); if (ret) { @@ -201,7 +202,6 @@ int cachefiles_ondemand_copen(struct cachefiles_cache *cache, char *args) goto out; } - info = req->object->ondemand; spin_lock(&info->lock); /* * The anonymous fd was closed before copen ? Fail the request. @@ -241,6 +241,11 @@ int cachefiles_ondemand_copen(struct cachefiles_cache *cache, char *args) wake_up_all(&cache->daemon_pollwq); out: + spin_lock(&info->lock); + /* Need to set object close to avoid reopen status continuing */ + if (info->ondemand_id == CACHEFILES_ONDEMAND_ID_CLOSED) + cachefiles_ondemand_set_object_close(req->object); + spin_unlock(&info->lock); complete(&req->done); return ret; } From 85e833cd7243bda7285492b0653c3abb1e2e757b Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Wed, 22 May 2024 19:43:07 +0800 Subject: [PATCH 038/778] cachefiles: flush all requests after setting CACHEFILES_DEAD In ondemand mode, when the daemon is processing an open request, if the kernel flags the cache as CACHEFILES_DEAD, the cachefiles_daemon_write() will always return -EIO, so the daemon can't pass the copen to the kernel. Then the kernel process that is waiting for the copen triggers a hung_task. Since the DEAD state is irreversible, it can only be exited by closing /dev/cachefiles. Therefore, after calling cachefiles_io_error() to mark the cache as CACHEFILES_DEAD, if in ondemand mode, flush all requests to avoid the above hungtask. We may still be able to read some of the cached data before closing the fd of /dev/cachefiles. Note that this relies on the patch that adds reference counting to the req, otherwise it may UAF. Fixes: c8383054506c ("cachefiles: notify the user daemon when looking up cookie") Signed-off-by: Baokun Li Link: https://lore.kernel.org/r/20240522114308.2402121-12-libaokun@huaweicloud.com Acked-by: Jeff Layton Signed-off-by: Christian Brauner --- fs/cachefiles/daemon.c | 2 +- fs/cachefiles/internal.h | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c index ccb7b707ea4b..06cdf1a8a16f 100644 --- a/fs/cachefiles/daemon.c +++ b/fs/cachefiles/daemon.c @@ -133,7 +133,7 @@ static int cachefiles_daemon_open(struct inode *inode, struct file *file) return 0; } -static void cachefiles_flush_reqs(struct cachefiles_cache *cache) +void cachefiles_flush_reqs(struct cachefiles_cache *cache) { struct xarray *xa = &cache->reqs; struct cachefiles_req *req; diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h index 45c8bed60538..6845a90cdfcc 100644 --- a/fs/cachefiles/internal.h +++ b/fs/cachefiles/internal.h @@ -188,6 +188,7 @@ extern int cachefiles_has_space(struct cachefiles_cache *cache, * daemon.c */ extern const struct file_operations cachefiles_daemon_fops; +extern void cachefiles_flush_reqs(struct cachefiles_cache *cache); extern void cachefiles_get_unbind_pincount(struct cachefiles_cache *cache); extern void cachefiles_put_unbind_pincount(struct cachefiles_cache *cache); @@ -426,6 +427,8 @@ do { \ pr_err("I/O Error: " FMT"\n", ##__VA_ARGS__); \ fscache_io_error((___cache)->cache); \ set_bit(CACHEFILES_DEAD, &(___cache)->flags); \ + if (cachefiles_in_ondemand_mode(___cache)) \ + cachefiles_flush_reqs(___cache); \ } while (0) #define cachefiles_io_error_obj(object, FMT, ...) \ From bc9dde6155464e906e630a0a5c17a4cab241ffbb Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Wed, 22 May 2024 19:43:08 +0800 Subject: [PATCH 039/778] cachefiles: make on-demand read killable Replacing wait_for_completion() with wait_for_completion_killable() in cachefiles_ondemand_send_req() allows us to kill processes that might trigger a hunk_task if the daemon is abnormal. But now only CACHEFILES_OP_READ is killable, because OP_CLOSE and OP_OPEN is initiated from kworker context and the signal is prohibited in these kworker. Note that when the req in xas changes, i.e. xas_load(&xas) != req, it means that a process will complete the current request soon, so wait again for the request to be completed. In addition, add the cachefiles_ondemand_finish_req() helper function to simplify the code. Suggested-by: Hou Tao Signed-off-by: Baokun Li Link: https://lore.kernel.org/r/20240522114308.2402121-13-libaokun@huaweicloud.com Acked-by: Jeff Layton Reviewed-by: Jia Zhu Signed-off-by: Christian Brauner --- fs/cachefiles/ondemand.c | 40 ++++++++++++++++++++++++++++------------ 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c index 922cab1a314b..58bd80956c5a 100644 --- a/fs/cachefiles/ondemand.c +++ b/fs/cachefiles/ondemand.c @@ -380,6 +380,20 @@ static struct cachefiles_req *cachefiles_ondemand_select_req(struct xa_state *xa return NULL; } +static inline bool cachefiles_ondemand_finish_req(struct cachefiles_req *req, + struct xa_state *xas, int err) +{ + if (unlikely(!xas || !req)) + return false; + + if (xa_cmpxchg(xas->xa, xas->xa_index, req, NULL, 0) != req) + return false; + + req->error = err; + complete(&req->done); + return true; +} + ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache, char __user *_buffer, size_t buflen) { @@ -443,16 +457,8 @@ ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache, out: cachefiles_put_object(req->object, cachefiles_obj_put_read_req); /* Remove error request and CLOSE request has no reply */ - if (ret || msg->opcode == CACHEFILES_OP_CLOSE) { - xas_reset(&xas); - xas_lock(&xas); - if (xas_load(&xas) == req) { - req->error = ret; - complete(&req->done); - xas_store(&xas, NULL); - } - xas_unlock(&xas); - } + if (ret || msg->opcode == CACHEFILES_OP_CLOSE) + cachefiles_ondemand_finish_req(req, &xas, ret); cachefiles_req_put(req); return ret ? ret : n; } @@ -544,8 +550,18 @@ static int cachefiles_ondemand_send_req(struct cachefiles_object *object, goto out; wake_up_all(&cache->daemon_pollwq); - wait_for_completion(&req->done); - ret = req->error; +wait: + ret = wait_for_completion_killable(&req->done); + if (!ret) { + ret = req->error; + } else { + ret = -EINTR; + if (!cachefiles_ondemand_finish_req(req, &xas, ret)) { + /* Someone will complete it soon. */ + cpu_relax(); + goto wait; + } + } cachefiles_req_put(req); return ret; out: From ed8c7fbdfe117abbef81f65428ba263118ef298a Mon Sep 17 00:00:00 2001 From: Yuntao Wang Date: Thu, 30 May 2024 00:06:56 +0800 Subject: [PATCH 040/778] fs/file: fix the check in find_next_fd() The maximum possible return value of find_next_zero_bit(fdt->full_fds_bits, maxbit, bitbit) is maxbit. This return value, multiplied by BITS_PER_LONG, gives the value of bitbit, which can never be greater than maxfd, it can only be equal to maxfd at most, so the following check 'if (bitbit > maxfd)' will never be true. Moreover, when bitbit equals maxfd, it indicates that there are no unused fds, and the function can directly return. Fix this check. Signed-off-by: Yuntao Wang Link: https://lore.kernel.org/r/20240529160656.209352-1-yuntao.wang@linux.dev Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- fs/file.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/file.c b/fs/file.c index 8076aef9c210..a3b72aa64f11 100644 --- a/fs/file.c +++ b/fs/file.c @@ -486,12 +486,12 @@ struct files_struct init_files = { static unsigned int find_next_fd(struct fdtable *fdt, unsigned int start) { - unsigned int maxfd = fdt->max_fds; + unsigned int maxfd = fdt->max_fds; /* always multiple of BITS_PER_LONG */ unsigned int maxbit = maxfd / BITS_PER_LONG; unsigned int bitbit = start / BITS_PER_LONG; bitbit = find_next_zero_bit(fdt->full_fds_bits, maxbit, bitbit) * BITS_PER_LONG; - if (bitbit > maxfd) + if (bitbit >= maxfd) return maxfd; if (bitbit > start) start = bitbit; From 056620da899527c14cf36e5019a0decaf4cf0f79 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Mon, 20 May 2024 01:56:58 -0700 Subject: [PATCH 041/778] RDMA/bnxt_re: Fix the max msix vectors macro bnxt_re no longer decide the number of MSI-x vectors used by itself. Its decided by bnxt_en now. So when bnxt_en changes this value, system crash is seen. Depend on the max value reported by bnxt_en instead of using the its own macros. Fixes: 303432211324 ("bnxt_en: Remove runtime interrupt vector allocation") Signed-off-by: Selvin Xavier Link: https://lore.kernel.org/r/1716195418-11767-1-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/bnxt_re.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h index 9dca451ed522..6974922e5609 100644 --- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h +++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h @@ -107,8 +107,6 @@ struct bnxt_re_gsi_context { struct bnxt_re_sqp_entries *sqp_tbl; }; -#define BNXT_RE_MIN_MSIX 2 -#define BNXT_RE_MAX_MSIX 9 #define BNXT_RE_AEQ_IDX 0 #define BNXT_RE_NQ_IDX 1 #define BNXT_RE_GEN_P5_MAX_VF 64 @@ -168,7 +166,7 @@ struct bnxt_re_dev { struct bnxt_qplib_rcfw rcfw; /* NQ */ - struct bnxt_qplib_nq nq[BNXT_RE_MAX_MSIX]; + struct bnxt_qplib_nq nq[BNXT_MAX_ROCE_MSIX]; /* Device Resources */ struct bnxt_qplib_dev_attr dev_attr; From 03fa18a992d5626fd7bf3557a52e826bf8b326b3 Mon Sep 17 00:00:00 2001 From: Honggang LI Date: Thu, 16 May 2024 17:50:52 +0800 Subject: [PATCH 042/778] RDMA/rxe: Fix data copy for IB_SEND_INLINE For RDMA Send and Write with IB_SEND_INLINE, the memory buffers specified in sge list will be placed inline in the Send Request. The data should be copied by CPU from the virtual addresses of corresponding sge list DMA addresses. Cc: stable@kernel.org Fixes: 8d7c7c0eeb74 ("RDMA: Add ib_virt_dma_to_page()") Signed-off-by: Honggang LI Link: https://lore.kernel.org/r/20240516095052.542767-1-honggangli@163.com Reviewed-by: Zhu Yanjun Reviewed-by: Li Zhijian Reviewed-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/rxe/rxe_verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index c7d4d8ab5a09..de6238ee4379 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -812,7 +812,7 @@ static void copy_inline_data_to_wqe(struct rxe_send_wqe *wqe, int i; for (i = 0; i < ibwr->num_sge; i++, sge++) { - memcpy(p, ib_virt_dma_to_page(sge->addr), sge->length); + memcpy(p, ib_virt_dma_to_ptr(sge->addr), sge->length); p += sge->length; } } From 296f4ce81d08e73c22408c49f4938a85bd075e5c Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 29 May 2024 09:11:36 +1000 Subject: [PATCH 043/778] NFS: abort nfs_atomic_open_v23 if name is too long. An attempt to open a file with a name longer than NFS3_MAXNAMLEN will trigger a WARN_ON_ONCE in encode_filename3() because nfs_atomic_open_v23() doesn't have the test on ->d_name.len that nfs_atomic_open() has. So add that test. Reported-by: James Clark Closes: https://lore.kernel.org/all/20240528105249.69200-1-james.clark@arm.com/ Fixes: 7c6c5249f061 ("NFS: add atomic_open for NFSv3 to handle O_TRUNC correctly.") Signed-off-by: NeilBrown Reviewed-by: Christoph Hellwig Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 788077a4feb9..2b68a14982c8 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2254,6 +2254,9 @@ int nfs_atomic_open_v23(struct inode *dir, struct dentry *dentry, */ int error = 0; + if (dentry->d_name.len > NFS_SERVER(dir)->namelen) + return -ENAMETOOLONG; + if (open_flags & O_CREAT) { file->f_mode |= FMODE_CREATED; error = nfs_do_create(dir, dentry, mode, open_flags); From 28568c906c1bb5f7560e18082ed7d6295860f1c2 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Wed, 29 May 2024 15:44:35 -0400 Subject: [PATCH 044/778] NFSv4.1 enforce rootpath check in fs_location query In commit 4ca9f31a2be66 ("NFSv4.1 test and add 4.1 trunking transport"), we introduce the ability to query the NFS server for possible trunking locations of the existing filesystem. However, we never checked the returned file system path for these alternative locations. According to the RFC, the server can say that the filesystem currently known under "fs_root" of fs_location also resides under these server locations under the following "rootpath" pathname. The client cannot handle trunking a filesystem that reside under different location under different paths other than what the main path is. This patch enforces the check that fs_root path and rootpath path in fs_location reply is the same. Fixes: 4ca9f31a2be6 ("NFSv4.1 test and add 4.1 trunking transport") Signed-off-by: Olga Kornievskaia Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 94c07875aa3f..a691fa10b3e9 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4023,6 +4023,23 @@ static void test_fs_location_for_trunking(struct nfs4_fs_location *location, } } +static bool _is_same_nfs4_pathname(struct nfs4_pathname *path1, + struct nfs4_pathname *path2) +{ + int i; + + if (path1->ncomponents != path2->ncomponents) + return false; + for (i = 0; i < path1->ncomponents; i++) { + if (path1->components[i].len != path2->components[i].len) + return false; + if (memcmp(path1->components[i].data, path2->components[i].data, + path1->components[i].len)) + return false; + } + return true; +} + static int _nfs4_discover_trunking(struct nfs_server *server, struct nfs_fh *fhandle) { @@ -4056,9 +4073,13 @@ static int _nfs4_discover_trunking(struct nfs_server *server, if (status) goto out_free_3; - for (i = 0; i < locations->nlocations; i++) + for (i = 0; i < locations->nlocations; i++) { + if (!_is_same_nfs4_pathname(&locations->fs_path, + &locations->locations[i].rootpath)) + continue; test_fs_location_for_trunking(&locations->locations[i], clp, server); + } out_free_3: kfree(locations->fattr); out_free_2: From 33c94d7e3cb84f6d130678d6d59ba475a6c489cf Mon Sep 17 00:00:00 2001 From: Chen Hanxiao Date: Thu, 23 May 2024 16:47:16 +0800 Subject: [PATCH 045/778] SUNRPC: return proper error from gss_wrap_req_priv don't return 0 if snd_buf->len really greater than snd_buf->buflen Signed-off-by: Chen Hanxiao Fixes: 0c77668ddb4e ("SUNRPC: Introduce trace points in rpc_auth_gss.ko") Reviewed-by: Benjamin Coddington Reviewed-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/auth_gss/auth_gss.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index c7af0220f82f..369310909fc9 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -1875,8 +1875,10 @@ gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx, offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base; maj_stat = gss_wrap(ctx->gc_gss_ctx, offset, snd_buf, inpages); /* slack space should prevent this ever happening: */ - if (unlikely(snd_buf->len > snd_buf->buflen)) + if (unlikely(snd_buf->len > snd_buf->buflen)) { + status = -EIO; goto wrap_failed; + } /* We're assuming that when GSS_S_CONTEXT_EXPIRED, the encryption was * done anyway, so it's safe to put the request on the wire: */ if (maj_stat == GSS_S_CONTEXT_EXPIRED) From 99bc9f2eb3f79a2b4296d9bf43153e1d10ca50d3 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 28 May 2024 13:27:17 +1000 Subject: [PATCH 046/778] NFS: add barriers when testing for NFS_FSDATA_BLOCKED dentry->d_fsdata is set to NFS_FSDATA_BLOCKED while unlinking or renaming-over a file to ensure that no open succeeds while the NFS operation progressed on the server. Setting dentry->d_fsdata to NFS_FSDATA_BLOCKED is done under ->d_lock after checking the refcount is not elevated. Any attempt to open the file (through that name) will go through lookp_open() which will take ->d_lock while incrementing the refcount, we can be sure that once the new value is set, __nfs_lookup_revalidate() *will* see the new value and will block. We don't have any locking guarantee that when we set ->d_fsdata to NULL, the wait_var_event() in __nfs_lookup_revalidate() will notice. wait/wake primitives do NOT provide barriers to guarantee order. We must use smp_load_acquire() in wait_var_event() to ensure we look at an up-to-date value, and must use smp_store_release() before wake_up_var(). This patch adds those barrier functions and factors out block_revalidate() and unblock_revalidate() far clarity. There is also a hypothetical bug in that if memory allocation fails (which never happens in practice) we might leave ->d_fsdata locked. This patch adds the missing call to unblock_revalidate(). Reported-and-tested-by: Richard Kojedzinszky Closes: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1071501 Fixes: 3c59366c207e ("NFS: don't unhash dentry during unlink/rename") Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 47 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 32 insertions(+), 15 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 2b68a14982c8..07a7be27182e 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1803,9 +1803,10 @@ __nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags, if (parent != READ_ONCE(dentry->d_parent)) return -ECHILD; } else { - /* Wait for unlink to complete */ + /* Wait for unlink to complete - see unblock_revalidate() */ wait_var_event(&dentry->d_fsdata, - dentry->d_fsdata != NFS_FSDATA_BLOCKED); + smp_load_acquire(&dentry->d_fsdata) + != NFS_FSDATA_BLOCKED); parent = dget_parent(dentry); ret = reval(d_inode(parent), dentry, flags); dput(parent); @@ -1818,6 +1819,29 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) return __nfs_lookup_revalidate(dentry, flags, nfs_do_lookup_revalidate); } +static void block_revalidate(struct dentry *dentry) +{ + /* old devname - just in case */ + kfree(dentry->d_fsdata); + + /* Any new reference that could lead to an open + * will take ->d_lock in lookup_open() -> d_lookup(). + * Holding this lock ensures we cannot race with + * __nfs_lookup_revalidate() and removes and need + * for further barriers. + */ + lockdep_assert_held(&dentry->d_lock); + + dentry->d_fsdata = NFS_FSDATA_BLOCKED; +} + +static void unblock_revalidate(struct dentry *dentry) +{ + /* store_release ensures wait_var_event() sees the update */ + smp_store_release(&dentry->d_fsdata, NULL); + wake_up_var(&dentry->d_fsdata); +} + /* * A weaker form of d_revalidate for revalidating just the d_inode(dentry) * when we don't really care about the dentry name. This is called when a @@ -2551,15 +2575,12 @@ int nfs_unlink(struct inode *dir, struct dentry *dentry) spin_unlock(&dentry->d_lock); goto out; } - /* old devname */ - kfree(dentry->d_fsdata); - dentry->d_fsdata = NFS_FSDATA_BLOCKED; + block_revalidate(dentry); spin_unlock(&dentry->d_lock); error = nfs_safe_remove(dentry); nfs_dentry_remove_handle_error(dir, dentry, error); - dentry->d_fsdata = NULL; - wake_up_var(&dentry->d_fsdata); + unblock_revalidate(dentry); out: trace_nfs_unlink_exit(dir, dentry, error); return error; @@ -2666,8 +2687,7 @@ nfs_unblock_rename(struct rpc_task *task, struct nfs_renamedata *data) { struct dentry *new_dentry = data->new_dentry; - new_dentry->d_fsdata = NULL; - wake_up_var(&new_dentry->d_fsdata); + unblock_revalidate(new_dentry); } /* @@ -2729,11 +2749,6 @@ int nfs_rename(struct mnt_idmap *idmap, struct inode *old_dir, if (WARN_ON(new_dentry->d_flags & DCACHE_NFSFS_RENAMED) || WARN_ON(new_dentry->d_fsdata == NFS_FSDATA_BLOCKED)) goto out; - if (new_dentry->d_fsdata) { - /* old devname */ - kfree(new_dentry->d_fsdata); - new_dentry->d_fsdata = NULL; - } spin_lock(&new_dentry->d_lock); if (d_count(new_dentry) > 2) { @@ -2755,7 +2770,7 @@ int nfs_rename(struct mnt_idmap *idmap, struct inode *old_dir, new_dentry = dentry; new_inode = NULL; } else { - new_dentry->d_fsdata = NFS_FSDATA_BLOCKED; + block_revalidate(new_dentry); must_unblock = true; spin_unlock(&new_dentry->d_lock); } @@ -2767,6 +2782,8 @@ int nfs_rename(struct mnt_idmap *idmap, struct inode *old_dir, task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, must_unblock ? nfs_unblock_rename : NULL); if (IS_ERR(task)) { + if (must_unblock) + unblock_revalidate(new_dentry); error = PTR_ERR(task); goto out; } From e0eec24e2e199873f43df99ec39773ad3af2bff7 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 29 May 2024 09:39:10 +0200 Subject: [PATCH 047/778] memblock: make memblock_set_node() also warn about use of MAX_NUMNODES On an (old) x86 system with SRAT just covering space above 4Gb: ACPI: SRAT: Node 0 PXM 0 [mem 0x100000000-0xfffffffff] hotplug the commit referenced below leads to this NUMA configuration no longer being refused by a CONFIG_NUMA=y kernel (previously NUMA: nodes only cover 6144MB of your 8185MB e820 RAM. Not used. No NUMA configuration found Faking a node at [mem 0x0000000000000000-0x000000027fffffff] was seen in the log directly after the message quoted above), because of memblock_validate_numa_coverage() checking for NUMA_NO_NODE (only). This in turn led to memblock_alloc_range_nid()'s warning about MAX_NUMNODES triggering, followed by a NULL deref in memmap_init() when trying to access node 64's (NODE_SHIFT=6) node data. To compensate said change, make memblock_set_node() warn on and adjust a passed in value of MAX_NUMNODES, just like various other functions already do. Fixes: ff6c3d81f2e8 ("NUMA: optimize detection of memory with no node id assigned by firmware") Signed-off-by: Jan Beulich Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/1c8a058c-5365-4f27-a9f1-3aeb7fb3e7b2@suse.com Signed-off-by: Mike Rapoport (IBM) --- mm/memblock.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mm/memblock.c b/mm/memblock.c index d09136e040d3..08e9806b1cf9 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -1339,6 +1339,10 @@ int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size, int start_rgn, end_rgn; int i, ret; + if (WARN_ONCE(nid == MAX_NUMNODES, + "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n")) + nid = NUMA_NO_NODE; + ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn); if (ret) return ret; From 985cfe501b74f214905ab4817acee0df24627268 Mon Sep 17 00:00:00 2001 From: Aapo Vienamo Date: Fri, 24 May 2024 18:53:17 +0300 Subject: [PATCH 048/778] thunderbolt: debugfs: Fix margin debugfs node creation condition The margin debugfs node controls the "Enable Margin Test" field of the lane margining operations. This field selects between either low or high voltage margin values for voltage margin test or left or right timing margin values for timing margin test. According to the USB4 specification, whether or not the "Enable Margin Test" control applies, depends on the values of the "Independent High/Low Voltage Margin" or "Independent Left/Right Timing Margin" capability fields for voltage and timing margin tests respectively. The pre-existing condition enabled the debugfs node also in the case where both low/high or left/right margins are returned, which is incorrect. This change only enables the debugfs node in question, if the specific required capability values are met. Signed-off-by: Aapo Vienamo Fixes: d0f1e0c2a699 ("thunderbolt: Add support for receiver lane margining") Cc: stable@vger.kernel.org Signed-off-by: Mika Westerberg --- drivers/thunderbolt/debugfs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/thunderbolt/debugfs.c b/drivers/thunderbolt/debugfs.c index 193e9dfc983b..70b52aac3d97 100644 --- a/drivers/thunderbolt/debugfs.c +++ b/drivers/thunderbolt/debugfs.c @@ -943,8 +943,9 @@ static void margining_port_init(struct tb_port *port) debugfs_create_file("run", 0600, dir, port, &margining_run_fops); debugfs_create_file("results", 0600, dir, port, &margining_results_fops); debugfs_create_file("test", 0600, dir, port, &margining_test_fops); - if (independent_voltage_margins(usb4) || - (supports_time(usb4) && independent_time_margins(usb4))) + if (independent_voltage_margins(usb4) == USB4_MARGIN_CAP_0_VOLTAGE_HL || + (supports_time(usb4) && + independent_time_margins(usb4) == USB4_MARGIN_CAP_1_TIME_LR)) debugfs_create_file("margin", 0600, dir, port, &margining_margin_fops); } From d339131bf02d4ed918415574082caf5e8af6e664 Mon Sep 17 00:00:00 2001 From: Simon Trimmer Date: Fri, 31 May 2024 12:27:16 +0100 Subject: [PATCH 049/778] ALSA: hda: cs35l56: Fix lifecycle of codec pointer The codec should be cleared when the amp driver is unbound and when resuming it should be tested to prevent loading firmware into the device and ALSA in a partially configured system state. Signed-off-by: Simon Trimmer Link: https://lore.kernel.org/r/20240531112716.25323-1-simont@opensource.cirrus.com Signed-off-by: Takashi Iwai --- sound/pci/hda/cs35l56_hda.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sound/pci/hda/cs35l56_hda.c b/sound/pci/hda/cs35l56_hda.c index 11b0570ff56d..0923e2589f5f 100644 --- a/sound/pci/hda/cs35l56_hda.c +++ b/sound/pci/hda/cs35l56_hda.c @@ -735,6 +735,8 @@ static void cs35l56_hda_unbind(struct device *dev, struct device *master, void * if (comps[cs35l56->index].dev == dev) memset(&comps[cs35l56->index], 0, sizeof(*comps)); + cs35l56->codec = NULL; + dev_dbg(cs35l56->base.dev, "Unbound\n"); } @@ -840,6 +842,9 @@ static int cs35l56_hda_system_resume(struct device *dev) cs35l56->suspended = false; + if (!cs35l56->codec) + return 0; + ret = cs35l56_is_fw_reload_needed(&cs35l56->base); dev_dbg(cs35l56->base.dev, "fw_reload_needed: %d\n", ret); if (ret > 0) { From 6386682cdc8b41319c92fbbe421953e33a28840c Mon Sep 17 00:00:00 2001 From: Simon Trimmer Date: Fri, 31 May 2024 13:08:20 +0100 Subject: [PATCH 050/778] ALSA: hda: cs35l41: Possible null pointer dereference in cs35l41_hda_unbind() The cs35l41_hda_unbind() function clears the hda_component entry matching it's index and then dereferences the codec pointer held in the first element of the hda_component array, this is an issue when the device index was 0. Instead use the codec pointer stashed in the cs35l41_hda structure as it will still be valid. Fixes: 7cf5ce66dfda ("ALSA: hda: cs35l41: Add device_link between HDA and cs35l41_hda") Signed-off-by: Simon Trimmer Link: https://lore.kernel.org/r/20240531120820.35367-1-simont@opensource.cirrus.com Signed-off-by: Takashi Iwai --- sound/pci/hda/cs35l41_hda.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/cs35l41_hda.c b/sound/pci/hda/cs35l41_hda.c index 6c49e5c6cd20..d54d4d60b03e 100644 --- a/sound/pci/hda/cs35l41_hda.c +++ b/sound/pci/hda/cs35l41_hda.c @@ -1495,7 +1495,7 @@ static void cs35l41_hda_unbind(struct device *dev, struct device *master, void * if (comps[cs35l41->index].dev == dev) { memset(&comps[cs35l41->index], 0, sizeof(*comps)); sleep_flags = lock_system_sleep(); - device_link_remove(&comps->codec->core.dev, cs35l41->dev); + device_link_remove(&cs35l41->codec->core.dev, cs35l41->dev); unlock_system_sleep(sleep_flags); } } From 55fac50ea46f46a22a92e2139b92afaa3822ad19 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 31 May 2024 14:37:17 +0200 Subject: [PATCH 051/778] ALSA: seq: ump: Fix missing System Reset message handling The conversion from System Reset event to UMP was missing. Add the entry for a conversion to a proper UMP System message. Fixes: e9e02819a98a ("ALSA: seq: Automatic conversion of UMP events") Link: https://lore.kernel.org/r/20240531123718.13420-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/seq/seq_ump_convert.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/core/seq/seq_ump_convert.c b/sound/core/seq/seq_ump_convert.c index 171fb75267af..d81f776a4c3d 100644 --- a/sound/core/seq/seq_ump_convert.c +++ b/sound/core/seq/seq_ump_convert.c @@ -1075,6 +1075,8 @@ static const struct seq_ev_to_ump seq_ev_ump_encoders[] = { system_ev_to_ump_midi1, system_ev_to_ump_midi2 }, { SNDRV_SEQ_EVENT_SENSING, UMP_SYSTEM_STATUS_ACTIVE_SENSING, system_ev_to_ump_midi1, system_ev_to_ump_midi2 }, + { SNDRV_SEQ_EVENT_RESET, UMP_SYSTEM_STATUS_RESET, + system_ev_to_ump_midi1, system_ev_to_ump_midi2 }, }; static const struct seq_ev_to_ump *find_ump_encoder(int type) From b7c5e64fecfa88764791679cca4786ac65de739e Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 29 May 2024 22:52:30 -0600 Subject: [PATCH 052/778] vfio: Create vfio_fs_type with inode per device By linking all the device fds we provide to userspace to an address space through a new pseudo fs, we can use tools like unmap_mapping_range() to zap all vmas associated with a device. Suggested-by: Jason Gunthorpe Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240530045236.1005864-2-alex.williamson@redhat.com Signed-off-by: Alex Williamson --- drivers/vfio/device_cdev.c | 7 ++++++ drivers/vfio/group.c | 7 ++++++ drivers/vfio/vfio_main.c | 44 ++++++++++++++++++++++++++++++++++++++ include/linux/vfio.h | 1 + 4 files changed, 59 insertions(+) diff --git a/drivers/vfio/device_cdev.c b/drivers/vfio/device_cdev.c index e75da0a70d1f..bb1817bd4ff3 100644 --- a/drivers/vfio/device_cdev.c +++ b/drivers/vfio/device_cdev.c @@ -39,6 +39,13 @@ int vfio_device_fops_cdev_open(struct inode *inode, struct file *filep) filep->private_data = df; + /* + * Use the pseudo fs inode on the device to link all mmaps + * to the same address space, allowing us to unmap all vmas + * associated to this device using unmap_mapping_range(). + */ + filep->f_mapping = device->inode->i_mapping; + return 0; err_put_registration: diff --git a/drivers/vfio/group.c b/drivers/vfio/group.c index 610a429c6191..ded364588d29 100644 --- a/drivers/vfio/group.c +++ b/drivers/vfio/group.c @@ -286,6 +286,13 @@ static struct file *vfio_device_open_file(struct vfio_device *device) */ filep->f_mode |= (FMODE_PREAD | FMODE_PWRITE); + /* + * Use the pseudo fs inode on the device to link all mmaps + * to the same address space, allowing us to unmap all vmas + * associated to this device using unmap_mapping_range(). + */ + filep->f_mapping = device->inode->i_mapping; + if (device->group->type == VFIO_NO_IOMMU) dev_warn(device->dev, "vfio-noiommu device opened by user " "(%s:%d)\n", current->comm, task_pid_nr(current)); diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c index e97d796a54fb..a5a62d9d963f 100644 --- a/drivers/vfio/vfio_main.c +++ b/drivers/vfio/vfio_main.c @@ -22,8 +22,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -43,9 +45,13 @@ #define DRIVER_AUTHOR "Alex Williamson " #define DRIVER_DESC "VFIO - User Level meta-driver" +#define VFIO_MAGIC 0x5646494f /* "VFIO" */ + static struct vfio { struct class *device_class; struct ida device_ida; + struct vfsmount *vfs_mount; + int fs_count; } vfio; #ifdef CONFIG_VFIO_NOIOMMU @@ -186,6 +192,8 @@ static void vfio_device_release(struct device *dev) if (device->ops->release) device->ops->release(device); + iput(device->inode); + simple_release_fs(&vfio.vfs_mount, &vfio.fs_count); kvfree(device); } @@ -228,6 +236,34 @@ out_free: } EXPORT_SYMBOL_GPL(_vfio_alloc_device); +static int vfio_fs_init_fs_context(struct fs_context *fc) +{ + return init_pseudo(fc, VFIO_MAGIC) ? 0 : -ENOMEM; +} + +static struct file_system_type vfio_fs_type = { + .name = "vfio", + .owner = THIS_MODULE, + .init_fs_context = vfio_fs_init_fs_context, + .kill_sb = kill_anon_super, +}; + +static struct inode *vfio_fs_inode_new(void) +{ + struct inode *inode; + int ret; + + ret = simple_pin_fs(&vfio_fs_type, &vfio.vfs_mount, &vfio.fs_count); + if (ret) + return ERR_PTR(ret); + + inode = alloc_anon_inode(vfio.vfs_mount->mnt_sb); + if (IS_ERR(inode)) + simple_release_fs(&vfio.vfs_mount, &vfio.fs_count); + + return inode; +} + /* * Initialize a vfio_device so it can be registered to vfio core. */ @@ -246,6 +282,11 @@ static int vfio_init_device(struct vfio_device *device, struct device *dev, init_completion(&device->comp); device->dev = dev; device->ops = ops; + device->inode = vfio_fs_inode_new(); + if (IS_ERR(device->inode)) { + ret = PTR_ERR(device->inode); + goto out_inode; + } if (ops->init) { ret = ops->init(device); @@ -260,6 +301,9 @@ static int vfio_init_device(struct vfio_device *device, struct device *dev, return 0; out_uninit: + iput(device->inode); + simple_release_fs(&vfio.vfs_mount, &vfio.fs_count); +out_inode: vfio_release_device_set(device); ida_free(&vfio.device_ida, device->index); return ret; diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 8b1a29820409..000a6cab2d31 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -64,6 +64,7 @@ struct vfio_device { struct completion comp; struct iommufd_access *iommufd_access; void (*put_kvm)(struct kvm *kvm); + struct inode *inode; #if IS_ENABLED(CONFIG_IOMMUFD) struct iommufd_device *iommufd_device; u8 iommufd_attached:1; From aac6db75a9fc2c7a6f73e152df8f15101dda38e6 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 29 May 2024 22:52:31 -0600 Subject: [PATCH 053/778] vfio/pci: Use unmap_mapping_range() With the vfio device fd tied to the address space of the pseudo fs inode, we can use the mm to track all vmas that might be mmap'ing device BARs, which removes our vma_list and all the complicated lock ordering necessary to manually zap each related vma. Note that we can no longer store the pfn in vm_pgoff if we want to use unmap_mapping_range() to zap a selective portion of the device fd corresponding to BAR mappings. This also converts our mmap fault handler to use vmf_insert_pfn() because we no longer have a vma_list to avoid the concurrency problem with io_remap_pfn_range(). The goal is to eventually use the vm_ops huge_fault handler to avoid the additional faulting overhead, but vmf_insert_pfn_{pmd,pud}() need to learn about pfnmaps first. Also, Jason notes that a race exists between unmap_mapping_range() and the fops mmap callback if we were to call io_remap_pfn_range() to populate the vma on mmap. Specifically, mmap_region() does call_mmap() before it does vma_link_file() which gives a window where the vma is populated but invisible to unmap_mapping_range(). Suggested-by: Jason Gunthorpe Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240530045236.1005864-3-alex.williamson@redhat.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci_core.c | 268 +++++++------------------------ include/linux/vfio_pci_core.h | 2 - 2 files changed, 57 insertions(+), 213 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index 80cae87fff36..db31c27bf78b 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -1610,100 +1610,20 @@ ssize_t vfio_pci_core_write(struct vfio_device *core_vdev, const char __user *bu } EXPORT_SYMBOL_GPL(vfio_pci_core_write); -/* Return 1 on zap and vma_lock acquired, 0 on contention (only with @try) */ -static int vfio_pci_zap_and_vma_lock(struct vfio_pci_core_device *vdev, bool try) +static void vfio_pci_zap_bars(struct vfio_pci_core_device *vdev) { - struct vfio_pci_mmap_vma *mmap_vma, *tmp; + struct vfio_device *core_vdev = &vdev->vdev; + loff_t start = VFIO_PCI_INDEX_TO_OFFSET(VFIO_PCI_BAR0_REGION_INDEX); + loff_t end = VFIO_PCI_INDEX_TO_OFFSET(VFIO_PCI_ROM_REGION_INDEX); + loff_t len = end - start; - /* - * Lock ordering: - * vma_lock is nested under mmap_lock for vm_ops callback paths. - * The memory_lock semaphore is used by both code paths calling - * into this function to zap vmas and the vm_ops.fault callback - * to protect the memory enable state of the device. - * - * When zapping vmas we need to maintain the mmap_lock => vma_lock - * ordering, which requires using vma_lock to walk vma_list to - * acquire an mm, then dropping vma_lock to get the mmap_lock and - * reacquiring vma_lock. This logic is derived from similar - * requirements in uverbs_user_mmap_disassociate(). - * - * mmap_lock must always be the top-level lock when it is taken. - * Therefore we can only hold the memory_lock write lock when - * vma_list is empty, as we'd need to take mmap_lock to clear - * entries. vma_list can only be guaranteed empty when holding - * vma_lock, thus memory_lock is nested under vma_lock. - * - * This enables the vm_ops.fault callback to acquire vma_lock, - * followed by memory_lock read lock, while already holding - * mmap_lock without risk of deadlock. - */ - while (1) { - struct mm_struct *mm = NULL; - - if (try) { - if (!mutex_trylock(&vdev->vma_lock)) - return 0; - } else { - mutex_lock(&vdev->vma_lock); - } - while (!list_empty(&vdev->vma_list)) { - mmap_vma = list_first_entry(&vdev->vma_list, - struct vfio_pci_mmap_vma, - vma_next); - mm = mmap_vma->vma->vm_mm; - if (mmget_not_zero(mm)) - break; - - list_del(&mmap_vma->vma_next); - kfree(mmap_vma); - mm = NULL; - } - if (!mm) - return 1; - mutex_unlock(&vdev->vma_lock); - - if (try) { - if (!mmap_read_trylock(mm)) { - mmput(mm); - return 0; - } - } else { - mmap_read_lock(mm); - } - if (try) { - if (!mutex_trylock(&vdev->vma_lock)) { - mmap_read_unlock(mm); - mmput(mm); - return 0; - } - } else { - mutex_lock(&vdev->vma_lock); - } - list_for_each_entry_safe(mmap_vma, tmp, - &vdev->vma_list, vma_next) { - struct vm_area_struct *vma = mmap_vma->vma; - - if (vma->vm_mm != mm) - continue; - - list_del(&mmap_vma->vma_next); - kfree(mmap_vma); - - zap_vma_ptes(vma, vma->vm_start, - vma->vm_end - vma->vm_start); - } - mutex_unlock(&vdev->vma_lock); - mmap_read_unlock(mm); - mmput(mm); - } + unmap_mapping_range(core_vdev->inode->i_mapping, start, len, true); } void vfio_pci_zap_and_down_write_memory_lock(struct vfio_pci_core_device *vdev) { - vfio_pci_zap_and_vma_lock(vdev, false); down_write(&vdev->memory_lock); - mutex_unlock(&vdev->vma_lock); + vfio_pci_zap_bars(vdev); } u16 vfio_pci_memory_lock_and_enable(struct vfio_pci_core_device *vdev) @@ -1725,99 +1645,41 @@ void vfio_pci_memory_unlock_and_restore(struct vfio_pci_core_device *vdev, u16 c up_write(&vdev->memory_lock); } -/* Caller holds vma_lock */ -static int __vfio_pci_add_vma(struct vfio_pci_core_device *vdev, - struct vm_area_struct *vma) -{ - struct vfio_pci_mmap_vma *mmap_vma; - - mmap_vma = kmalloc(sizeof(*mmap_vma), GFP_KERNEL_ACCOUNT); - if (!mmap_vma) - return -ENOMEM; - - mmap_vma->vma = vma; - list_add(&mmap_vma->vma_next, &vdev->vma_list); - - return 0; -} - -/* - * Zap mmaps on open so that we can fault them in on access and therefore - * our vma_list only tracks mappings accessed since last zap. - */ -static void vfio_pci_mmap_open(struct vm_area_struct *vma) -{ - zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start); -} - -static void vfio_pci_mmap_close(struct vm_area_struct *vma) +static unsigned long vma_to_pfn(struct vm_area_struct *vma) { struct vfio_pci_core_device *vdev = vma->vm_private_data; - struct vfio_pci_mmap_vma *mmap_vma; + int index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT); + u64 pgoff; - mutex_lock(&vdev->vma_lock); - list_for_each_entry(mmap_vma, &vdev->vma_list, vma_next) { - if (mmap_vma->vma == vma) { - list_del(&mmap_vma->vma_next); - kfree(mmap_vma); - break; - } - } - mutex_unlock(&vdev->vma_lock); + pgoff = vma->vm_pgoff & + ((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1); + + return (pci_resource_start(vdev->pdev, index) >> PAGE_SHIFT) + pgoff; } static vm_fault_t vfio_pci_mmap_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct vfio_pci_core_device *vdev = vma->vm_private_data; - struct vfio_pci_mmap_vma *mmap_vma; - vm_fault_t ret = VM_FAULT_NOPAGE; + unsigned long pfn, pgoff = vmf->pgoff - vma->vm_pgoff; + vm_fault_t ret = VM_FAULT_SIGBUS; + + pfn = vma_to_pfn(vma); - mutex_lock(&vdev->vma_lock); down_read(&vdev->memory_lock); - /* - * Memory region cannot be accessed if the low power feature is engaged - * or memory access is disabled. - */ - if (vdev->pm_runtime_engaged || !__vfio_pci_memory_enabled(vdev)) { - ret = VM_FAULT_SIGBUS; - goto up_out; - } + if (vdev->pm_runtime_engaged || !__vfio_pci_memory_enabled(vdev)) + goto out_disabled; - /* - * We populate the whole vma on fault, so we need to test whether - * the vma has already been mapped, such as for concurrent faults - * to the same vma. io_remap_pfn_range() will trigger a BUG_ON if - * we ask it to fill the same range again. - */ - list_for_each_entry(mmap_vma, &vdev->vma_list, vma_next) { - if (mmap_vma->vma == vma) - goto up_out; - } + ret = vmf_insert_pfn(vma, vmf->address, pfn + pgoff); - if (io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, - vma->vm_end - vma->vm_start, - vma->vm_page_prot)) { - ret = VM_FAULT_SIGBUS; - zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start); - goto up_out; - } - - if (__vfio_pci_add_vma(vdev, vma)) { - ret = VM_FAULT_OOM; - zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start); - } - -up_out: +out_disabled: up_read(&vdev->memory_lock); - mutex_unlock(&vdev->vma_lock); + return ret; } static const struct vm_operations_struct vfio_pci_mmap_ops = { - .open = vfio_pci_mmap_open, - .close = vfio_pci_mmap_close, .fault = vfio_pci_mmap_fault, }; @@ -1880,11 +1742,12 @@ int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma vma->vm_private_data = vdev; vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - vma->vm_pgoff = (pci_resource_start(pdev, index) >> PAGE_SHIFT) + pgoff; + vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot); /* - * See remap_pfn_range(), called from vfio_pci_fault() but we can't - * change vm_flags within the fault handler. Set them now. + * Set vm_flags now, they should not be changed in the fault handler. + * We want the same flags and page protection (decrypted above) as + * io_remap_pfn_range() would set. * * VM_ALLOW_ANY_UNCACHED: The VMA flag is implemented for ARM64, * allowing KVM stage 2 device mapping attributes to use Normal-NC @@ -2202,8 +2065,6 @@ int vfio_pci_core_init_dev(struct vfio_device *core_vdev) mutex_init(&vdev->ioeventfds_lock); INIT_LIST_HEAD(&vdev->dummy_resources_list); INIT_LIST_HEAD(&vdev->ioeventfds_list); - mutex_init(&vdev->vma_lock); - INIT_LIST_HEAD(&vdev->vma_list); INIT_LIST_HEAD(&vdev->sriov_pfs_item); init_rwsem(&vdev->memory_lock); xa_init(&vdev->ctx); @@ -2219,7 +2080,6 @@ void vfio_pci_core_release_dev(struct vfio_device *core_vdev) mutex_destroy(&vdev->igate); mutex_destroy(&vdev->ioeventfds_lock); - mutex_destroy(&vdev->vma_lock); kfree(vdev->region); kfree(vdev->pm_save); } @@ -2497,26 +2357,15 @@ unwind: return ret; } -/* - * We need to get memory_lock for each device, but devices can share mmap_lock, - * therefore we need to zap and hold the vma_lock for each device, and only then - * get each memory_lock. - */ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set, struct vfio_pci_group_info *groups, struct iommufd_ctx *iommufd_ctx) { - struct vfio_pci_core_device *cur_mem; - struct vfio_pci_core_device *cur_vma; - struct vfio_pci_core_device *cur; + struct vfio_pci_core_device *vdev; struct pci_dev *pdev; - bool is_mem = true; int ret; mutex_lock(&dev_set->lock); - cur_mem = list_first_entry(&dev_set->device_list, - struct vfio_pci_core_device, - vdev.dev_set_list); pdev = vfio_pci_dev_set_resettable(dev_set); if (!pdev) { @@ -2533,7 +2382,7 @@ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set, if (ret) goto err_unlock; - list_for_each_entry(cur_vma, &dev_set->device_list, vdev.dev_set_list) { + list_for_each_entry(vdev, &dev_set->device_list, vdev.dev_set_list) { bool owned; /* @@ -2557,38 +2406,38 @@ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set, * Otherwise, reset is not allowed. */ if (iommufd_ctx) { - int devid = vfio_iommufd_get_dev_id(&cur_vma->vdev, + int devid = vfio_iommufd_get_dev_id(&vdev->vdev, iommufd_ctx); owned = (devid > 0 || devid == -ENOENT); } else { - owned = vfio_dev_in_groups(&cur_vma->vdev, groups); + owned = vfio_dev_in_groups(&vdev->vdev, groups); } if (!owned) { ret = -EINVAL; - goto err_undo; + break; } /* - * Locking multiple devices is prone to deadlock, runaway and - * unwind if we hit contention. + * Take the memory write lock for each device and zap BAR + * mappings to prevent the user accessing the device while in + * reset. Locking multiple devices is prone to deadlock, + * runaway and unwind if we hit contention. */ - if (!vfio_pci_zap_and_vma_lock(cur_vma, true)) { + if (!down_write_trylock(&vdev->memory_lock)) { ret = -EBUSY; - goto err_undo; + break; } - } - cur_vma = NULL; - list_for_each_entry(cur_mem, &dev_set->device_list, vdev.dev_set_list) { - if (!down_write_trylock(&cur_mem->memory_lock)) { - ret = -EBUSY; - goto err_undo; - } - mutex_unlock(&cur_mem->vma_lock); + vfio_pci_zap_bars(vdev); + } + + if (!list_entry_is_head(vdev, + &dev_set->device_list, vdev.dev_set_list)) { + vdev = list_prev_entry(vdev, vdev.dev_set_list); + goto err_undo; } - cur_mem = NULL; /* * The pci_reset_bus() will reset all the devices in the bus. @@ -2599,25 +2448,22 @@ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set, * cause the PCI config space reset without restoring the original * state (saved locally in 'vdev->pm_save'). */ - list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list) - vfio_pci_set_power_state(cur, PCI_D0); + list_for_each_entry(vdev, &dev_set->device_list, vdev.dev_set_list) + vfio_pci_set_power_state(vdev, PCI_D0); ret = pci_reset_bus(pdev); -err_undo: - list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list) { - if (cur == cur_mem) - is_mem = false; - if (cur == cur_vma) - break; - if (is_mem) - up_write(&cur->memory_lock); - else - mutex_unlock(&cur->vma_lock); - } + vdev = list_last_entry(&dev_set->device_list, + struct vfio_pci_core_device, vdev.dev_set_list); + +err_undo: + list_for_each_entry_from_reverse(vdev, &dev_set->device_list, + vdev.dev_set_list) + up_write(&vdev->memory_lock); + + list_for_each_entry(vdev, &dev_set->device_list, vdev.dev_set_list) + pm_runtime_put(&vdev->pdev->dev); - list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list) - pm_runtime_put(&cur->pdev->dev); err_unlock: mutex_unlock(&dev_set->lock); return ret; diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h index a2c8b8bba711..f87067438ed4 100644 --- a/include/linux/vfio_pci_core.h +++ b/include/linux/vfio_pci_core.h @@ -93,8 +93,6 @@ struct vfio_pci_core_device { struct list_head sriov_pfs_item; struct vfio_pci_core_device *sriov_pf_core_dev; struct notifier_block nb; - struct mutex vma_lock; - struct list_head vma_list; struct rw_semaphore memory_lock; }; From 955af6355ddfe35140f9706a635838212a32513b Mon Sep 17 00:00:00 2001 From: Tobias Jakobi Date: Fri, 31 May 2024 15:43:07 -0700 Subject: [PATCH 054/778] Input: i8042 - add Ayaneo Kun to i8042 quirk table See the added comment for details. Also fix a typo in the quirk's define. Signed-off-by: Tobias Jakobi Link: https://lore.kernel.org/r/20240531190100.3874731-1-tjakobi@math.uni-bielefeld.de Signed-off-by: Dmitry Torokhov --- drivers/input/serio/i8042-acpipnpio.h | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/input/serio/i8042-acpipnpio.h b/drivers/input/serio/i8042-acpipnpio.h index dfc6c581873b..5b50475ec414 100644 --- a/drivers/input/serio/i8042-acpipnpio.h +++ b/drivers/input/serio/i8042-acpipnpio.h @@ -76,7 +76,7 @@ static inline void i8042_write_command(int val) #define SERIO_QUIRK_PROBE_DEFER BIT(5) #define SERIO_QUIRK_RESET_ALWAYS BIT(6) #define SERIO_QUIRK_RESET_NEVER BIT(7) -#define SERIO_QUIRK_DIECT BIT(8) +#define SERIO_QUIRK_DIRECT BIT(8) #define SERIO_QUIRK_DUMBKBD BIT(9) #define SERIO_QUIRK_NOLOOP BIT(10) #define SERIO_QUIRK_NOTIMEOUT BIT(11) @@ -1332,6 +1332,20 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) }, + { + /* + * The Ayaneo Kun is a handheld device where some the buttons + * are handled by an AT keyboard. The keyboard is usually + * detected as raw, but sometimes, usually after a cold boot, + * it is detected as translated. Make sure that the keyboard + * is always in raw mode. + */ + .matches = { + DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "AYANEO"), + DMI_MATCH(DMI_BOARD_NAME, "KUN"), + }, + .driver_data = (void *)(SERIO_QUIRK_DIRECT) + }, { } }; @@ -1655,7 +1669,7 @@ static void __init i8042_check_quirks(void) if (quirks & SERIO_QUIRK_RESET_NEVER) i8042_reset = I8042_RESET_NEVER; } - if (quirks & SERIO_QUIRK_DIECT) + if (quirks & SERIO_QUIRK_DIRECT) i8042_direct = true; if (quirks & SERIO_QUIRK_DUMBKBD) i8042_dumbkbd = true; From d7bd473632d07f8a54655c270c0940cc3671c548 Mon Sep 17 00:00:00 2001 From: Jean-Baptiste Maneyrol Date: Wed, 29 May 2024 15:47:17 +0000 Subject: [PATCH 055/778] iio: imu: inv_icm42600: stabilized timestamp in interrupt Use IRQF_ONESHOT flag to ensure the timestamp is not updated in the hard handler during the thread handler. And compute and use the effective watermark value that correspond to this first timestamp. This way we can ensure the timestamp is always corresponding to the value used by the timestamping mechanism. Otherwise, it is possible that between FIFO count read and FIFO processing the timestamp is overwritten in the hard handler. Fixes: ec74ae9fd37c ("iio: imu: inv_icm42600: add accurate timestamping") Cc: stable@vger.kernel.org Signed-off-by: Jean-Baptiste Maneyrol Link: https://lore.kernel.org/r/20240529154717.651863-1-inv.git-commit@tdk.com Signed-off-by: Jonathan Cameron --- .../imu/inv_icm42600/inv_icm42600_buffer.c | 19 +++++++++++++++++-- .../imu/inv_icm42600/inv_icm42600_buffer.h | 2 ++ .../iio/imu/inv_icm42600/inv_icm42600_core.c | 1 + 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/drivers/iio/imu/inv_icm42600/inv_icm42600_buffer.c b/drivers/iio/imu/inv_icm42600/inv_icm42600_buffer.c index 63b85ec88c13..a8cf74c84c3c 100644 --- a/drivers/iio/imu/inv_icm42600/inv_icm42600_buffer.c +++ b/drivers/iio/imu/inv_icm42600/inv_icm42600_buffer.c @@ -222,10 +222,15 @@ int inv_icm42600_buffer_update_watermark(struct inv_icm42600_state *st) latency_accel = period_accel * wm_accel; /* 0 value for watermark means that the sensor is turned off */ + if (wm_gyro == 0 && wm_accel == 0) + return 0; + if (latency_gyro == 0) { watermark = wm_accel; + st->fifo.watermark.eff_accel = wm_accel; } else if (latency_accel == 0) { watermark = wm_gyro; + st->fifo.watermark.eff_gyro = wm_gyro; } else { /* compute the smallest latency that is a multiple of both */ if (latency_gyro <= latency_accel) @@ -241,6 +246,13 @@ int inv_icm42600_buffer_update_watermark(struct inv_icm42600_state *st) watermark = latency / period; if (watermark < 1) watermark = 1; + /* update effective watermark */ + st->fifo.watermark.eff_gyro = latency / period_gyro; + if (st->fifo.watermark.eff_gyro < 1) + st->fifo.watermark.eff_gyro = 1; + st->fifo.watermark.eff_accel = latency / period_accel; + if (st->fifo.watermark.eff_accel < 1) + st->fifo.watermark.eff_accel = 1; } /* compute watermark value in bytes */ @@ -514,7 +526,7 @@ int inv_icm42600_buffer_fifo_parse(struct inv_icm42600_state *st) /* handle gyroscope timestamp and FIFO data parsing */ if (st->fifo.nb.gyro > 0) { ts = &gyro_st->ts; - inv_sensors_timestamp_interrupt(ts, st->fifo.nb.gyro, + inv_sensors_timestamp_interrupt(ts, st->fifo.watermark.eff_gyro, st->timestamp.gyro); ret = inv_icm42600_gyro_parse_fifo(st->indio_gyro); if (ret) @@ -524,7 +536,7 @@ int inv_icm42600_buffer_fifo_parse(struct inv_icm42600_state *st) /* handle accelerometer timestamp and FIFO data parsing */ if (st->fifo.nb.accel > 0) { ts = &accel_st->ts; - inv_sensors_timestamp_interrupt(ts, st->fifo.nb.accel, + inv_sensors_timestamp_interrupt(ts, st->fifo.watermark.eff_accel, st->timestamp.accel); ret = inv_icm42600_accel_parse_fifo(st->indio_accel); if (ret) @@ -577,6 +589,9 @@ int inv_icm42600_buffer_init(struct inv_icm42600_state *st) unsigned int val; int ret; + st->fifo.watermark.eff_gyro = 1; + st->fifo.watermark.eff_accel = 1; + /* * Default FIFO configuration (bits 7 to 5) * - use invalid value diff --git a/drivers/iio/imu/inv_icm42600/inv_icm42600_buffer.h b/drivers/iio/imu/inv_icm42600/inv_icm42600_buffer.h index 8b85ee333bf8..f6c85daf42b0 100644 --- a/drivers/iio/imu/inv_icm42600/inv_icm42600_buffer.h +++ b/drivers/iio/imu/inv_icm42600/inv_icm42600_buffer.h @@ -32,6 +32,8 @@ struct inv_icm42600_fifo { struct { unsigned int gyro; unsigned int accel; + unsigned int eff_gyro; + unsigned int eff_accel; } watermark; size_t count; struct { diff --git a/drivers/iio/imu/inv_icm42600/inv_icm42600_core.c b/drivers/iio/imu/inv_icm42600/inv_icm42600_core.c index 96116a68ab29..62fdae530334 100644 --- a/drivers/iio/imu/inv_icm42600/inv_icm42600_core.c +++ b/drivers/iio/imu/inv_icm42600/inv_icm42600_core.c @@ -537,6 +537,7 @@ static int inv_icm42600_irq_init(struct inv_icm42600_state *st, int irq, if (ret) return ret; + irq_type |= IRQF_ONESHOT; return devm_request_threaded_irq(dev, irq, inv_icm42600_irq_timestamp, inv_icm42600_irq_handler, irq_type, "inv_icm42600", st); From 245f3b149e6cc3ac6ee612cdb7042263bfc9e73c Mon Sep 17 00:00:00 2001 From: Jean-Baptiste Maneyrol Date: Mon, 27 May 2024 21:00:08 +0000 Subject: [PATCH 056/778] iio: imu: inv_icm42600: delete unneeded update watermark call Update watermark will be done inside the hwfifo_set_watermark callback just after the update_scan_mode. It is useless to do it here. Fixes: 7f85e42a6c54 ("iio: imu: inv_icm42600: add buffer support in iio devices") Cc: stable@vger.kernel.org Signed-off-by: Jean-Baptiste Maneyrol Link: https://lore.kernel.org/r/20240527210008.612932-1-inv.git-commit@tdk.com Signed-off-by: Jonathan Cameron --- drivers/iio/imu/inv_icm42600/inv_icm42600_accel.c | 4 ---- drivers/iio/imu/inv_icm42600/inv_icm42600_gyro.c | 4 ---- 2 files changed, 8 deletions(-) diff --git a/drivers/iio/imu/inv_icm42600/inv_icm42600_accel.c b/drivers/iio/imu/inv_icm42600/inv_icm42600_accel.c index 83d8504ebfff..4b2566693614 100644 --- a/drivers/iio/imu/inv_icm42600/inv_icm42600_accel.c +++ b/drivers/iio/imu/inv_icm42600/inv_icm42600_accel.c @@ -130,10 +130,6 @@ static int inv_icm42600_accel_update_scan_mode(struct iio_dev *indio_dev, /* update data FIFO write */ inv_sensors_timestamp_apply_odr(ts, 0, 0, 0); ret = inv_icm42600_buffer_set_fifo_en(st, fifo_en | st->fifo.en); - if (ret) - goto out_unlock; - - ret = inv_icm42600_buffer_update_watermark(st); out_unlock: mutex_unlock(&st->lock); diff --git a/drivers/iio/imu/inv_icm42600/inv_icm42600_gyro.c b/drivers/iio/imu/inv_icm42600/inv_icm42600_gyro.c index e6f8de80128c..938af5b640b0 100644 --- a/drivers/iio/imu/inv_icm42600/inv_icm42600_gyro.c +++ b/drivers/iio/imu/inv_icm42600/inv_icm42600_gyro.c @@ -130,10 +130,6 @@ static int inv_icm42600_gyro_update_scan_mode(struct iio_dev *indio_dev, /* update data FIFO write */ inv_sensors_timestamp_apply_odr(ts, 0, 0, 0); ret = inv_icm42600_buffer_set_fifo_en(st, fifo_en | st->fifo.en); - if (ret) - goto out_unlock; - - ret = inv_icm42600_buffer_update_watermark(st); out_unlock: mutex_unlock(&st->lock); From 08f0fa5d6aa9488f752eb5410e32636f143b3d8e Mon Sep 17 00:00:00 2001 From: Joao Paulo Goncalves Date: Tue, 7 May 2024 11:35:55 -0300 Subject: [PATCH 057/778] arm64: dts: freescale: imx8mm-verdin: Fix GPU speed The GPU clock was reduced on iMX8MM SOC device tree to prevent boards that don't support GPU overdrive from being out of specification. However, this caused a regression in GPU speed for the Verdin iMX8MM, which does support GPU overdrive. This patch fixes this by enabling overdrive mode in the SOM dtsi. Fixes: 1f794d3eed53 ("arm64: dts: imx8mm: Reduce GPU to nominal speed") Signed-off-by: Joao Paulo Goncalves Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi index 4768b05fd765..0d9abca58821 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi @@ -6,6 +6,7 @@ #include #include #include "imx8mm.dtsi" +#include "imx8mm-overdrive.dtsi" / { chosen { From 537a350d14321c8cca5efbf0a33a404fec3a9f9e Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sat, 4 May 2024 21:57:30 +0200 Subject: [PATCH 058/778] batman-adv: Don't accept TT entries for out-of-spec VIDs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The internal handling of VLAN IDs in batman-adv is only specified for following encodings: * VLAN is used - bit 15 is 1 - bit 11 - bit 0 is the VLAN ID (0-4095) - remaining bits are 0 * No VLAN is used - bit 15 is 0 - remaining bits are 0 batman-adv was only preparing new translation table entries (based on its soft interface information) using this encoding format. But the receive path was never checking if entries in the roam or TT TVLVs were also following this encoding. It was therefore possible to create more than the expected maximum of 4096 + 1 entries in the originator VLAN list. Simply by setting the "remaining bits" to "random" values in corresponding TVLV. Cc: stable@vger.kernel.org Fixes: 7ea7b4a14275 ("batman-adv: make the TT CRC logic VLAN specific") Reported-by: Linus Lüssing Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/originator.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index ac74f6ead62d..8f6dd2c6ee41 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -131,6 +132,29 @@ batadv_orig_node_vlan_get(struct batadv_orig_node *orig_node, return vlan; } +/** + * batadv_vlan_id_valid() - check if vlan id is in valid batman-adv encoding + * @vid: the VLAN identifier + * + * Return: true when either no vlan is set or if VLAN is in correct range, + * false otherwise + */ +static bool batadv_vlan_id_valid(unsigned short vid) +{ + unsigned short non_vlan = vid & ~(BATADV_VLAN_HAS_TAG | VLAN_VID_MASK); + + if (vid == 0) + return true; + + if (!(vid & BATADV_VLAN_HAS_TAG)) + return false; + + if (non_vlan) + return false; + + return true; +} + /** * batadv_orig_node_vlan_new() - search and possibly create an orig_node_vlan * object @@ -149,6 +173,9 @@ batadv_orig_node_vlan_new(struct batadv_orig_node *orig_node, { struct batadv_orig_node_vlan *vlan; + if (!batadv_vlan_id_valid(vid)) + return NULL; + spin_lock_bh(&orig_node->vlan_list_lock); /* first look if an object for this vid already exists */ From e2d8ea0a066a6db51f31efd2710057271d685d2e Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Mon, 29 Apr 2024 00:49:35 +0000 Subject: [PATCH 059/778] soundwire: fix usages of device_get_named_child_node() The documentation for device_get_named_child_node() mentions this important point: " The caller is responsible for calling fwnode_handle_put() on the returned fwnode pointer. " Add fwnode_handle_put() to avoid leaked references. Signed-off-by: Pierre-Louis Bossart Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20240429004935.2400191-1-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- drivers/soundwire/amd_manager.c | 3 +++ drivers/soundwire/intel_auxdevice.c | 6 +++++- drivers/soundwire/mipi_disco.c | 30 +++++++++++++++++++++++------ 3 files changed, 32 insertions(+), 7 deletions(-) diff --git a/drivers/soundwire/amd_manager.c b/drivers/soundwire/amd_manager.c index 20d94bcfc9b4..795e223f7e5c 100644 --- a/drivers/soundwire/amd_manager.c +++ b/drivers/soundwire/amd_manager.c @@ -571,6 +571,9 @@ static int sdw_master_read_amd_prop(struct sdw_bus *bus) amd_manager->wake_en_mask = wake_en_mask; fwnode_property_read_u32(link, "amd-sdw-power-mode", &power_mode_mask); amd_manager->power_mode_mask = power_mode_mask; + + fwnode_handle_put(link); + return 0; } diff --git a/drivers/soundwire/intel_auxdevice.c b/drivers/soundwire/intel_auxdevice.c index 17cf27e6ea73..18517121cc89 100644 --- a/drivers/soundwire/intel_auxdevice.c +++ b/drivers/soundwire/intel_auxdevice.c @@ -155,8 +155,10 @@ static int sdw_master_read_intel_prop(struct sdw_bus *bus) SDW_MASTER_QUIRKS_CLEAR_INITIAL_PARITY; intel_prop = devm_kzalloc(bus->dev, sizeof(*intel_prop), GFP_KERNEL); - if (!intel_prop) + if (!intel_prop) { + fwnode_handle_put(link); return -ENOMEM; + } /* initialize with hardware defaults, in case the properties are not found */ intel_prop->doaise = 0x1; @@ -184,6 +186,8 @@ static int sdw_master_read_intel_prop(struct sdw_bus *bus) intel_prop->dodse, intel_prop->dods); + fwnode_handle_put(link); + return 0; } diff --git a/drivers/soundwire/mipi_disco.c b/drivers/soundwire/mipi_disco.c index 55a9c51c84c1..e5d9df26d4dc 100644 --- a/drivers/soundwire/mipi_disco.c +++ b/drivers/soundwire/mipi_disco.c @@ -66,8 +66,10 @@ int sdw_master_read_prop(struct sdw_bus *bus) prop->clk_freq = devm_kcalloc(bus->dev, prop->num_clk_freq, sizeof(*prop->clk_freq), GFP_KERNEL); - if (!prop->clk_freq) + if (!prop->clk_freq) { + fwnode_handle_put(link); return -ENOMEM; + } fwnode_property_read_u32_array(link, "mipi-sdw-clock-frequencies-supported", @@ -92,8 +94,10 @@ int sdw_master_read_prop(struct sdw_bus *bus) prop->clk_gears = devm_kcalloc(bus->dev, prop->num_clk_gears, sizeof(*prop->clk_gears), GFP_KERNEL); - if (!prop->clk_gears) + if (!prop->clk_gears) { + fwnode_handle_put(link); return -ENOMEM; + } fwnode_property_read_u32_array(link, "mipi-sdw-supported-clock-gears", @@ -116,6 +120,8 @@ int sdw_master_read_prop(struct sdw_bus *bus) fwnode_property_read_u32(link, "mipi-sdw-command-error-threshold", &prop->err_threshold); + fwnode_handle_put(link); + return 0; } EXPORT_SYMBOL(sdw_master_read_prop); @@ -197,8 +203,10 @@ static int sdw_slave_read_dpn(struct sdw_slave *slave, dpn[i].num_words, sizeof(*dpn[i].words), GFP_KERNEL); - if (!dpn[i].words) + if (!dpn[i].words) { + fwnode_handle_put(node); return -ENOMEM; + } fwnode_property_read_u32_array(node, "mipi-sdw-port-wordlength-configs", @@ -236,8 +244,10 @@ static int sdw_slave_read_dpn(struct sdw_slave *slave, dpn[i].num_channels, sizeof(*dpn[i].channels), GFP_KERNEL); - if (!dpn[i].channels) + if (!dpn[i].channels) { + fwnode_handle_put(node); return -ENOMEM; + } fwnode_property_read_u32_array(node, "mipi-sdw-channel-number-list", @@ -251,8 +261,10 @@ static int sdw_slave_read_dpn(struct sdw_slave *slave, dpn[i].num_ch_combinations, sizeof(*dpn[i].ch_combinations), GFP_KERNEL); - if (!dpn[i].ch_combinations) + if (!dpn[i].ch_combinations) { + fwnode_handle_put(node); return -ENOMEM; + } fwnode_property_read_u32_array(node, "mipi-sdw-channel-combination-list", @@ -274,6 +286,8 @@ static int sdw_slave_read_dpn(struct sdw_slave *slave, /* TODO: Read audio mode */ + fwnode_handle_put(node); + i++; } @@ -348,10 +362,14 @@ int sdw_slave_read_prop(struct sdw_slave *slave) prop->dp0_prop = devm_kzalloc(&slave->dev, sizeof(*prop->dp0_prop), GFP_KERNEL); - if (!prop->dp0_prop) + if (!prop->dp0_prop) { + fwnode_handle_put(port); return -ENOMEM; + } sdw_slave_read_dp0(slave, port, prop->dp0_prop); + + fwnode_handle_put(port); } /* From 3d117494e2a88b9c1e8ad41bbbf2cf453a73620e Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Mon, 3 Jun 2024 14:23:44 +0800 Subject: [PATCH 060/778] cachefiles: remove unneeded include of close_fd() has been killed, let's get rid of unneeded as Al Viro pointed out [1]. [1] https://lore.kernel.org/r/20240603034055.GI1629371@ZenIV Suggested-by: Al Viro Signed-off-by: Gao Xiang Link: https://lore.kernel.org/r/20240603062344.818290-1-hsiangkao@linux.alibaba.com Signed-off-by: Christian Brauner --- fs/cachefiles/ondemand.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c index 58bd80956c5a..bce005f2b456 100644 --- a/fs/cachefiles/ondemand.c +++ b/fs/cachefiles/ondemand.c @@ -1,5 +1,4 @@ // SPDX-License-Identifier: GPL-2.0-or-later -#include #include #include #include "internal.h" From 5314e84c33e7ad61df5203df540626ac59f9dcd9 Mon Sep 17 00:00:00 2001 From: Abel Vesa Date: Mon, 27 May 2024 10:20:35 +0300 Subject: [PATCH 061/778] phy: qcom-qmp: qserdes-txrx: Add missing registers offsets Currently, the x1e80100 uses pure V6 register offsets for DP part of the combo PHY. This hasn't been an issue because external DP is not yet enabled on any of the boards yet. But in order to enabled it, all these new V6 N4 register offsets are needed. So add them. Fixes: 762c3565f3c8 ("phy: qcom-qmp: qserdes-txrx: Add V6 N4 register offsets") Co-developed-by: Kuogee Hsieh Signed-off-by: Kuogee Hsieh Signed-off-by: Abel Vesa Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20240527-x1e80100-phy-qualcomm-combo-fix-dp-v1-1-be8a0b882117@linaro.org Signed-off-by: Vinod Koul --- .../phy/qualcomm/phy-qcom-qmp-qserdes-txrx-v6_n4.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-qserdes-txrx-v6_n4.h b/drivers/phy/qualcomm/phy-qcom-qmp-qserdes-txrx-v6_n4.h index a814ad11af07..d37cc0d4fd36 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-qserdes-txrx-v6_n4.h +++ b/drivers/phy/qualcomm/phy-qcom-qmp-qserdes-txrx-v6_n4.h @@ -6,11 +6,24 @@ #ifndef QCOM_PHY_QMP_QSERDES_TXRX_V6_N4_H_ #define QCOM_PHY_QMP_QSERDES_TXRX_V6_N4_H_ +#define QSERDES_V6_N4_TX_CLKBUF_ENABLE 0x08 +#define QSERDES_V6_N4_TX_TX_EMP_POST1_LVL 0x0c +#define QSERDES_V6_N4_TX_TX_DRV_LVL 0x14 +#define QSERDES_V6_N4_TX_RESET_TSYNC_EN 0x1c +#define QSERDES_V6_N4_TX_PRE_STALL_LDO_BOOST_EN 0x20 #define QSERDES_V6_N4_TX_RES_CODE_LANE_OFFSET_TX 0x30 #define QSERDES_V6_N4_TX_RES_CODE_LANE_OFFSET_RX 0x34 +#define QSERDES_V6_N4_TX_TRANSCEIVER_BIAS_EN 0x48 +#define QSERDES_V6_N4_TX_HIGHZ_DRVR_EN 0x4c +#define QSERDES_V6_N4_TX_TX_POL_INV 0x50 +#define QSERDES_V6_N4_TX_PARRATE_REC_DETECT_IDLE_EN 0x54 #define QSERDES_V6_N4_TX_LANE_MODE_1 0x78 #define QSERDES_V6_N4_TX_LANE_MODE_2 0x7c #define QSERDES_V6_N4_TX_LANE_MODE_3 0x80 +#define QSERDES_V6_N4_TX_TRAN_DRVR_EMP_EN 0xac +#define QSERDES_V6_N4_TX_TX_BAND 0xd8 +#define QSERDES_V6_N4_TX_INTERFACE_SELECT 0xe4 +#define QSERDES_V6_N4_TX_VMODE_CTRL1 0xb0 #define QSERDES_V6_N4_RX_UCDR_FO_GAIN_RATE2 0x8 #define QSERDES_V6_N4_RX_UCDR_SO_GAIN_RATE2 0x18 From 99bf89626335bbec71d8461f0faec88551440850 Mon Sep 17 00:00:00 2001 From: Abel Vesa Date: Mon, 27 May 2024 10:20:36 +0300 Subject: [PATCH 062/778] phy: qcom-qmp: pcs: Add missing v6 N4 register offsets The new X1E80100 SoC bumps up the HW version of QMP phy to v6 N4 for combo USB and DP PHY. Currently, the X1E80100 uses the pure V6 PCS register offsets, which are different. Add the offsets so the mentioned platform can be fixed later on. Add the new PCS offsets in a dedicated header file. Fixes: d7b3579f84f7 ("phy: qcom-qmp-combo: Add x1e80100 USB/DP combo phys") Co-developed-by: Kuogee Hsieh Signed-off-by: Kuogee Hsieh Signed-off-by: Abel Vesa Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20240527-x1e80100-phy-qualcomm-combo-fix-dp-v1-2-be8a0b882117@linaro.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcs-v6-n4.h | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 drivers/phy/qualcomm/phy-qcom-qmp-pcs-v6-n4.h diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcs-v6-n4.h b/drivers/phy/qualcomm/phy-qcom-qmp-pcs-v6-n4.h new file mode 100644 index 000000000000..b3024714dab4 --- /dev/null +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcs-v6-n4.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2023, Linaro Limited + */ + +#ifndef QCOM_PHY_QMP_PCS_V6_N4_H_ +#define QCOM_PHY_QMP_PCS_V6_N4_H_ + +/* Only for QMP V6 N4 PHY - USB/PCIe PCS registers */ +#define QPHY_V6_N4_PCS_SW_RESET 0x000 +#define QPHY_V6_N4_PCS_PCS_STATUS1 0x014 +#define QPHY_V6_N4_PCS_POWER_DOWN_CONTROL 0x040 +#define QPHY_V6_N4_PCS_START_CONTROL 0x044 +#define QPHY_V6_N4_PCS_POWER_STATE_CONFIG1 0x090 +#define QPHY_V6_N4_PCS_LOCK_DETECT_CONFIG1 0x0c4 +#define QPHY_V6_N4_PCS_LOCK_DETECT_CONFIG2 0x0c8 +#define QPHY_V6_N4_PCS_LOCK_DETECT_CONFIG3 0x0cc +#define QPHY_V6_N4_PCS_LOCK_DETECT_CONFIG6 0x0d8 +#define QPHY_V6_N4_PCS_REFGEN_REQ_CONFIG1 0x0dc +#define QPHY_V6_N4_PCS_RX_SIGDET_LVL 0x188 +#define QPHY_V6_N4_PCS_RCVR_DTCT_DLY_P1U2_L 0x190 +#define QPHY_V6_N4_PCS_RCVR_DTCT_DLY_P1U2_H 0x194 +#define QPHY_V6_N4_PCS_RATE_SLEW_CNTRL1 0x198 +#define QPHY_V6_N4_PCS_RX_CONFIG 0x1b0 +#define QPHY_V6_N4_PCS_ALIGN_DETECT_CONFIG1 0x1c0 +#define QPHY_V6_N4_PCS_ALIGN_DETECT_CONFIG2 0x1c4 +#define QPHY_V6_N4_PCS_PCS_TX_RX_CONFIG 0x1d0 +#define QPHY_V6_N4_PCS_EQ_CONFIG1 0x1dc +#define QPHY_V6_N4_PCS_EQ_CONFIG2 0x1e0 +#define QPHY_V6_N4_PCS_EQ_CONFIG5 0x1ec + +#endif From 163c1a356a847ab4767200fd4a45b3f8e4ddc900 Mon Sep 17 00:00:00 2001 From: Abel Vesa Date: Mon, 27 May 2024 10:20:37 +0300 Subject: [PATCH 063/778] phy: qcom: qmp-combo: Switch from V6 to V6 N4 register offsets Currently, none of the X1E80100 supported boards upstream have enabled DP. As for USB, the reason it is not broken when it's obvious that the offsets are wrong is because the only difference with respect to USB is the difference in register name. The V6 uses QPHY_V6_PCS_CDR_RESET_TIME while V6 N4 uses QPHY_V6_N4_PCS_RX_CONFIG. Now, in order for the DP to work, the DP serdes tables need to be added as they have different values for V6 N4 when compared to V6 ones, even though they use the same V6 offsets. While at it, switch swing and pre-emphasis tables to V6 as well. Fixes: d7b3579f84f7 ("phy: qcom-qmp-combo: Add x1e80100 USB/DP combo phys") Co-developed-by: Kuogee Hsieh Signed-off-by: Kuogee Hsieh Signed-off-by: Abel Vesa Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20240527-x1e80100-phy-qualcomm-combo-fix-dp-v1-3-be8a0b882117@linaro.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 189 ++++++++++++++++++---- drivers/phy/qualcomm/phy-qcom-qmp.h | 2 + 2 files changed, 162 insertions(+), 29 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 7f999e8a433d..7b00945f7191 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -187,6 +187,31 @@ static const unsigned int qmp_v6_usb3phy_regs_layout[QPHY_LAYOUT_SIZE] = { [QPHY_TX_TRANSCEIVER_BIAS_EN] = QSERDES_V6_TX_TRANSCEIVER_BIAS_EN, }; +static const unsigned int qmp_v6_n4_usb3phy_regs_layout[QPHY_LAYOUT_SIZE] = { + [QPHY_SW_RESET] = QPHY_V6_N4_PCS_SW_RESET, + [QPHY_START_CTRL] = QPHY_V6_N4_PCS_START_CONTROL, + [QPHY_PCS_STATUS] = QPHY_V6_N4_PCS_PCS_STATUS1, + [QPHY_PCS_POWER_DOWN_CONTROL] = QPHY_V6_N4_PCS_POWER_DOWN_CONTROL, + + /* In PCS_USB */ + [QPHY_PCS_AUTONOMOUS_MODE_CTRL] = QPHY_V6_PCS_USB3_AUTONOMOUS_MODE_CTRL, + [QPHY_PCS_LFPS_RXTERM_IRQ_CLEAR] = QPHY_V6_PCS_USB3_LFPS_RXTERM_IRQ_CLEAR, + + [QPHY_COM_RESETSM_CNTRL] = QSERDES_V6_COM_RESETSM_CNTRL, + [QPHY_COM_C_READY_STATUS] = QSERDES_V6_COM_C_READY_STATUS, + [QPHY_COM_CMN_STATUS] = QSERDES_V6_COM_CMN_STATUS, + [QPHY_COM_BIAS_EN_CLKBUFLR_EN] = QSERDES_V6_COM_PLL_BIAS_EN_CLK_BUFLR_EN, + + [QPHY_DP_PHY_STATUS] = QSERDES_V6_DP_PHY_STATUS, + [QPHY_DP_PHY_VCO_DIV] = QSERDES_V6_DP_PHY_VCO_DIV, + + [QPHY_TX_TX_POL_INV] = QSERDES_V6_N4_TX_TX_POL_INV, + [QPHY_TX_TX_DRV_LVL] = QSERDES_V6_N4_TX_TX_DRV_LVL, + [QPHY_TX_TX_EMP_POST1_LVL] = QSERDES_V6_N4_TX_TX_EMP_POST1_LVL, + [QPHY_TX_HIGHZ_DRVR_EN] = QSERDES_V6_N4_TX_HIGHZ_DRVR_EN, + [QPHY_TX_TRANSCEIVER_BIAS_EN] = QSERDES_V6_N4_TX_TRANSCEIVER_BIAS_EN, +}; + static const struct qmp_phy_init_tbl qmp_v3_usb3_serdes_tbl[] = { QMP_PHY_INIT_CFG(QSERDES_V3_COM_PLL_IVCO, 0x07), QMP_PHY_INIT_CFG(QSERDES_V3_COM_SYSCLK_EN_SEL, 0x14), @@ -997,6 +1022,31 @@ static const struct qmp_phy_init_tbl qmp_v6_dp_serdes_tbl[] = { QMP_PHY_INIT_CFG(QSERDES_V6_COM_CORE_CLK_EN, 0x0f), }; +static const struct qmp_phy_init_tbl qmp_v6_n4_dp_serdes_tbl[] = { + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SVS_MODE_CLK_SEL, 0x15), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SYSCLK_EN_SEL, 0x3b), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SYS_CLK_CTRL, 0x02), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_CLK_ENABLE1, 0x0c), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SYSCLK_BUF_ENABLE, 0x06), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_CLK_SELECT, 0x30), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_IVCO, 0x07), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_CCTRL_MODE0, 0x36), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_RCTRL_MODE0, 0x16), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_CP_CTRL_MODE0, 0x06), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DEC_START_MODE0, 0x34), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DIV_FRAC_START1_MODE0, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DIV_FRAC_START2_MODE0, 0xc0), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_CMN_CONFIG_1, 0x12), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_INTEGLOOP_GAIN0_MODE0, 0x3f), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_INTEGLOOP_GAIN1_MODE0, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_VCO_TUNE_MAP, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_BG_TIMER, 0x0a), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_CORE_CLK_DIV_MODE0, 0x14), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_VCO_TUNE_CTRL, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_BIAS_EN_CLK_BUFLR_EN, 0x17), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_CORE_CLK_EN, 0x0f), +}; + static const struct qmp_phy_init_tbl qmp_v6_dp_tx_tbl[] = { QMP_PHY_INIT_CFG(QSERDES_V6_TX_VMODE_CTRL1, 0x40), QMP_PHY_INIT_CFG(QSERDES_V6_TX_PRE_STALL_LDO_BOOST_EN, 0x30), @@ -1011,6 +1061,19 @@ static const struct qmp_phy_init_tbl qmp_v6_dp_tx_tbl[] = { QMP_PHY_INIT_CFG(QSERDES_V6_TX_TX_BAND, 0x4), }; +static const struct qmp_phy_init_tbl qmp_v6_n4_dp_tx_tbl[] = { + QMP_PHY_INIT_CFG(QSERDES_V6_N4_TX_VMODE_CTRL1, 0x40), + QMP_PHY_INIT_CFG(QSERDES_V6_N4_TX_PRE_STALL_LDO_BOOST_EN, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V6_N4_TX_INTERFACE_SELECT, 0xff), + QMP_PHY_INIT_CFG(QSERDES_V6_N4_TX_CLKBUF_ENABLE, 0x0f), + QMP_PHY_INIT_CFG(QSERDES_V6_N4_TX_RESET_TSYNC_EN, 0x03), + QMP_PHY_INIT_CFG(QSERDES_V6_N4_TX_TRAN_DRVR_EMP_EN, 0x0f), + QMP_PHY_INIT_CFG(QSERDES_V6_N4_TX_PARRATE_REC_DETECT_IDLE_EN, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V6_N4_TX_RES_CODE_LANE_OFFSET_TX, 0x11), + QMP_PHY_INIT_CFG(QSERDES_V6_N4_TX_RES_CODE_LANE_OFFSET_RX, 0x11), + QMP_PHY_INIT_CFG(QSERDES_V6_N4_TX_TX_BAND, 0x1), +}; + static const struct qmp_phy_init_tbl qmp_v6_dp_serdes_tbl_rbr[] = { QMP_PHY_INIT_CFG(QSERDES_V6_COM_HSCLK_SEL_1, 0x05), QMP_PHY_INIT_CFG(QSERDES_V6_COM_DEC_START_MODE0, 0x34), @@ -1059,6 +1122,74 @@ static const struct qmp_phy_init_tbl qmp_v6_dp_serdes_tbl_hbr3[] = { QMP_PHY_INIT_CFG(QSERDES_V6_COM_BIN_VCOCAL_CMP_CODE2_MODE0, 0x0c), }; +static const struct qmp_phy_init_tbl qmp_v6_n4_dp_serdes_tbl_rbr[] = { + QMP_PHY_INIT_CFG(QSERDES_V6_COM_HSCLK_SEL_1, 0x05), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DEC_START_MODE0, 0x34), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP_EN, 0x04), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DIV_FRAC_START3_MODE0, 0x0b), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP1_MODE0, 0x37), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP2_MODE0, 0x04), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_BIN_VCOCAL_CMP_CODE1_MODE0, 0x71), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_BIN_VCOCAL_CMP_CODE2_MODE0, 0x0c), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_EN_CENTER, 0x01), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_ADJ_PER1, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_PER1, 0x6b), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_PER2, 0x02), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_STEP_SIZE1_MODE0, 0x92), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_STEP_SIZE2_MODE0, 0x01), +}; + +static const struct qmp_phy_init_tbl qmp_v6_n4_dp_serdes_tbl_hbr[] = { + QMP_PHY_INIT_CFG(QSERDES_V6_COM_HSCLK_SEL_1, 0x03), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DEC_START_MODE0, 0x34), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP_EN, 0x08), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DIV_FRAC_START3_MODE0, 0x0b), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP1_MODE0, 0x07), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP2_MODE0, 0x07), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_BIN_VCOCAL_CMP_CODE1_MODE0, 0x71), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_BIN_VCOCAL_CMP_CODE2_MODE0, 0x0c), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_EN_CENTER, 0x01), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_ADJ_PER1, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_PER1, 0x6b), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_PER2, 0x02), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_STEP_SIZE1_MODE0, 0x92), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_STEP_SIZE2_MODE0, 0x01), +}; + +static const struct qmp_phy_init_tbl qmp_v6_n4_dp_serdes_tbl_hbr2[] = { + QMP_PHY_INIT_CFG(QSERDES_V6_COM_HSCLK_SEL_1, 0x01), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DEC_START_MODE0, 0x46), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP_EN, 0x08), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DIV_FRAC_START3_MODE0, 0x05), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP1_MODE0, 0x0f), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP2_MODE0, 0x0e), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_BIN_VCOCAL_CMP_CODE1_MODE0, 0x97), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_BIN_VCOCAL_CMP_CODE2_MODE0, 0x10), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_EN_CENTER, 0x01), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_ADJ_PER1, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_PER1, 0x6b), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_PER2, 0x02), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_STEP_SIZE1_MODE0, 0x18), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_STEP_SIZE2_MODE0, 0x02), +}; + +static const struct qmp_phy_init_tbl qmp_v6_n4_dp_serdes_tbl_hbr3[] = { + QMP_PHY_INIT_CFG(QSERDES_V6_COM_HSCLK_SEL_1, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DEC_START_MODE0, 0x34), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP_EN, 0x08), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_DIV_FRAC_START3_MODE0, 0x0b), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP1_MODE0, 0x17), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP2_MODE0, 0x15), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_BIN_VCOCAL_CMP_CODE1_MODE0, 0x71), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_BIN_VCOCAL_CMP_CODE2_MODE0, 0x0c), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_EN_CENTER, 0x01), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_ADJ_PER1, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_PER1, 0x6b), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_PER2, 0x02), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_STEP_SIZE1_MODE0, 0x92), + QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_STEP_SIZE2_MODE0, 0x01), +}; + static const struct qmp_phy_init_tbl sc8280xp_usb43dp_serdes_tbl[] = { QMP_PHY_INIT_CFG(QSERDES_V5_COM_SSC_EN_CENTER, 0x01), QMP_PHY_INIT_CFG(QSERDES_V5_COM_SSC_PER1, 0x31), @@ -1273,20 +1404,20 @@ static const struct qmp_phy_init_tbl x1e80100_usb43dp_rx_tbl[] = { }; static const struct qmp_phy_init_tbl x1e80100_usb43dp_pcs_tbl[] = { - QMP_PHY_INIT_CFG(QPHY_V6_PCS_RCVR_DTCT_DLY_P1U2_L, 0xe7), - QMP_PHY_INIT_CFG(QPHY_V6_PCS_RCVR_DTCT_DLY_P1U2_H, 0x03), - QMP_PHY_INIT_CFG(QPHY_V6_PCS_LOCK_DETECT_CONFIG1, 0xc4), - QMP_PHY_INIT_CFG(QPHY_V6_PCS_LOCK_DETECT_CONFIG2, 0x89), - QMP_PHY_INIT_CFG(QPHY_V6_PCS_LOCK_DETECT_CONFIG3, 0x20), - QMP_PHY_INIT_CFG(QPHY_V6_PCS_LOCK_DETECT_CONFIG6, 0x13), - QMP_PHY_INIT_CFG(QPHY_V6_PCS_REFGEN_REQ_CONFIG1, 0x21), - QMP_PHY_INIT_CFG(QPHY_V6_PCS_RX_SIGDET_LVL, 0x55), - QMP_PHY_INIT_CFG(QPHY_V6_PCS_CDR_RESET_TIME, 0x0a), - QMP_PHY_INIT_CFG(QPHY_V6_PCS_ALIGN_DETECT_CONFIG1, 0xd4), - QMP_PHY_INIT_CFG(QPHY_V6_PCS_ALIGN_DETECT_CONFIG2, 0x30), - QMP_PHY_INIT_CFG(QPHY_V6_PCS_PCS_TX_RX_CONFIG, 0x0c), - QMP_PHY_INIT_CFG(QPHY_V6_PCS_EQ_CONFIG1, 0x4b), - QMP_PHY_INIT_CFG(QPHY_V6_PCS_EQ_CONFIG5, 0x10), + QMP_PHY_INIT_CFG(QPHY_V6_N4_PCS_RCVR_DTCT_DLY_P1U2_L, 0xe7), + QMP_PHY_INIT_CFG(QPHY_V6_N4_PCS_RCVR_DTCT_DLY_P1U2_H, 0x03), + QMP_PHY_INIT_CFG(QPHY_V6_N4_PCS_LOCK_DETECT_CONFIG1, 0xc4), + QMP_PHY_INIT_CFG(QPHY_V6_N4_PCS_LOCK_DETECT_CONFIG2, 0x89), + QMP_PHY_INIT_CFG(QPHY_V6_N4_PCS_LOCK_DETECT_CONFIG3, 0x20), + QMP_PHY_INIT_CFG(QPHY_V6_N4_PCS_LOCK_DETECT_CONFIG6, 0x13), + QMP_PHY_INIT_CFG(QPHY_V6_N4_PCS_REFGEN_REQ_CONFIG1, 0x21), + QMP_PHY_INIT_CFG(QPHY_V6_N4_PCS_RX_SIGDET_LVL, 0x55), + QMP_PHY_INIT_CFG(QPHY_V6_N4_PCS_RX_CONFIG, 0x0a), + QMP_PHY_INIT_CFG(QPHY_V6_N4_PCS_ALIGN_DETECT_CONFIG1, 0xd4), + QMP_PHY_INIT_CFG(QPHY_V6_N4_PCS_ALIGN_DETECT_CONFIG2, 0x30), + QMP_PHY_INIT_CFG(QPHY_V6_N4_PCS_PCS_TX_RX_CONFIG, 0x0c), + QMP_PHY_INIT_CFG(QPHY_V6_N4_PCS_EQ_CONFIG1, 0x4b), + QMP_PHY_INIT_CFG(QPHY_V6_N4_PCS_EQ_CONFIG5, 0x10), }; static const struct qmp_phy_init_tbl x1e80100_usb43dp_pcs_usb_tbl[] = { @@ -1794,22 +1925,22 @@ static const struct qmp_phy_cfg x1e80100_usb3dpphy_cfg = { .pcs_usb_tbl = x1e80100_usb43dp_pcs_usb_tbl, .pcs_usb_tbl_num = ARRAY_SIZE(x1e80100_usb43dp_pcs_usb_tbl), - .dp_serdes_tbl = qmp_v6_dp_serdes_tbl, - .dp_serdes_tbl_num = ARRAY_SIZE(qmp_v6_dp_serdes_tbl), - .dp_tx_tbl = qmp_v6_dp_tx_tbl, - .dp_tx_tbl_num = ARRAY_SIZE(qmp_v6_dp_tx_tbl), + .dp_serdes_tbl = qmp_v6_n4_dp_serdes_tbl, + .dp_serdes_tbl_num = ARRAY_SIZE(qmp_v6_n4_dp_serdes_tbl), + .dp_tx_tbl = qmp_v6_n4_dp_tx_tbl, + .dp_tx_tbl_num = ARRAY_SIZE(qmp_v6_n4_dp_tx_tbl), - .serdes_tbl_rbr = qmp_v6_dp_serdes_tbl_rbr, - .serdes_tbl_rbr_num = ARRAY_SIZE(qmp_v6_dp_serdes_tbl_rbr), - .serdes_tbl_hbr = qmp_v6_dp_serdes_tbl_hbr, - .serdes_tbl_hbr_num = ARRAY_SIZE(qmp_v6_dp_serdes_tbl_hbr), - .serdes_tbl_hbr2 = qmp_v6_dp_serdes_tbl_hbr2, - .serdes_tbl_hbr2_num = ARRAY_SIZE(qmp_v6_dp_serdes_tbl_hbr2), - .serdes_tbl_hbr3 = qmp_v6_dp_serdes_tbl_hbr3, - .serdes_tbl_hbr3_num = ARRAY_SIZE(qmp_v6_dp_serdes_tbl_hbr3), + .serdes_tbl_rbr = qmp_v6_n4_dp_serdes_tbl_rbr, + .serdes_tbl_rbr_num = ARRAY_SIZE(qmp_v6_n4_dp_serdes_tbl_rbr), + .serdes_tbl_hbr = qmp_v6_n4_dp_serdes_tbl_hbr, + .serdes_tbl_hbr_num = ARRAY_SIZE(qmp_v6_n4_dp_serdes_tbl_hbr), + .serdes_tbl_hbr2 = qmp_v6_n4_dp_serdes_tbl_hbr2, + .serdes_tbl_hbr2_num = ARRAY_SIZE(qmp_v6_n4_dp_serdes_tbl_hbr2), + .serdes_tbl_hbr3 = qmp_v6_n4_dp_serdes_tbl_hbr3, + .serdes_tbl_hbr3_num = ARRAY_SIZE(qmp_v6_n4_dp_serdes_tbl_hbr3), - .swing_hbr_rbr = &qmp_dp_v5_voltage_swing_hbr_rbr, - .pre_emphasis_hbr_rbr = &qmp_dp_v5_pre_emphasis_hbr_rbr, + .swing_hbr_rbr = &qmp_dp_v6_voltage_swing_hbr_rbr, + .pre_emphasis_hbr_rbr = &qmp_dp_v6_pre_emphasis_hbr_rbr, .swing_hbr3_hbr2 = &qmp_dp_v5_voltage_swing_hbr3_hbr2, .pre_emphasis_hbr3_hbr2 = &qmp_dp_v5_pre_emphasis_hbr3_hbr2, @@ -1822,7 +1953,7 @@ static const struct qmp_phy_cfg x1e80100_usb3dpphy_cfg = { .num_resets = ARRAY_SIZE(msm8996_usb3phy_reset_l), .vreg_list = qmp_phy_vreg_l, .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), - .regs = qmp_v45_usb3phy_regs_layout, + .regs = qmp_v6_n4_usb3phy_regs_layout, }; static const struct qmp_phy_cfg sm6350_usb3dpphy_cfg = { diff --git a/drivers/phy/qualcomm/phy-qcom-qmp.h b/drivers/phy/qualcomm/phy-qcom-qmp.h index d10b8f653c4b..d0f41e4aaa85 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp.h +++ b/drivers/phy/qualcomm/phy-qcom-qmp.h @@ -46,6 +46,8 @@ #include "phy-qcom-qmp-pcs-v6.h" +#include "phy-qcom-qmp-pcs-v6-n4.h" + #include "phy-qcom-qmp-pcs-v6_20.h" #include "phy-qcom-qmp-pcs-v7.h" From e0e8e4bce61cac674fdabd85d070e7bab1634a8b Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Mon, 3 Jun 2024 10:32:23 +0300 Subject: [PATCH 064/778] ASoC: SOF: Intel: hda-dai: skip tlv for dspless mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit sof_ipc4_dma_config_tlv{} is for Audio DSP firmware only. Don't set it in dspless mode. Fixes: 17386cb1b48b ("ASoC: SOF: Intel: hda-dai: set dma_stream_channel_map device") Signed-off-by: Bard Liao Reviewed-by: Pierre-Louis Bossart Reviewed-by: Péter Ujfalusi Signed-off-by: Peter Ujfalusi Link: https://msgid.link/r/20240603073224.14726-2-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/intel/hda-dai.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/sof/intel/hda-dai.c b/sound/soc/sof/intel/hda-dai.c index ce675c22a5ab..a2b6dbcfa918 100644 --- a/sound/soc/sof/intel/hda-dai.c +++ b/sound/soc/sof/intel/hda-dai.c @@ -525,6 +525,9 @@ int sdw_hda_dai_hw_params(struct snd_pcm_substream *substream, return ret; } + if (sdev->dspless_mode_selected) + return 0; + ipc4_copier = widget_to_copier(w); dma_config_tlv = &ipc4_copier->dma_config_tlv[cpu_dai_id]; dma_config = &dma_config_tlv->dma_config; From 3b06e137089fc0beb5ffa6a869de9a93df984072 Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Mon, 3 Jun 2024 10:32:24 +0300 Subject: [PATCH 065/778] ASoC: SOF: Intel: hda-dai: remove skip_tlv label MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We just return 0 after the skip_tlv label. No need to use a label. Signed-off-by: Bard Liao Reviewed-by: Pierre-Louis Bossart Reviewed-by: Péter Ujfalusi Signed-off-by: Peter Ujfalusi Link: https://msgid.link/r/20240603073224.14726-3-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/intel/hda-dai.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sound/soc/sof/intel/hda-dai.c b/sound/soc/sof/intel/hda-dai.c index a2b6dbcfa918..c61d298ea6b3 100644 --- a/sound/soc/sof/intel/hda-dai.c +++ b/sound/soc/sof/intel/hda-dai.c @@ -379,7 +379,7 @@ static int non_hda_dai_hw_params_data(struct snd_pcm_substream *substream, sdev = widget_to_sdev(w); if (sdev->dspless_mode_selected) - goto skip_tlv; + return 0; /* get stream_id */ hext_stream = ops->get_hext_stream(sdev, cpu_dai, substream); @@ -423,7 +423,6 @@ static int non_hda_dai_hw_params_data(struct snd_pcm_substream *substream, dma_config->dma_stream_channel_map.device_count = 1; dma_config->dma_priv_config_size = 0; -skip_tlv: return 0; } From d3cb3516f2540e6c384eef96b4ffeb49425175ed Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Fri, 31 May 2024 01:30:54 +0300 Subject: [PATCH 066/778] MAINTAINERS: copy linux-arm-msm for sound/qcom changes Not having linux-arm-msm@ in cc for audio-related changes for Qualcomm platforms means that interested parties can easily miss the patches. Add corresponding L: entry so that linux-arm-msm ML gets CC'ed for audio patches too. Signed-off-by: Dmitry Baryshkov Acked-by: Srinivas Kandagatla Link: https://msgid.link/r/20240531-asoc-qcom-cc-lamsm-v1-1-f026ad618496@linaro.org Signed-off-by: Mark Brown --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 8754ac2c259d..451c1aa5af3c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -18212,6 +18212,7 @@ QCOM AUDIO (ASoC) DRIVERS M: Srinivas Kandagatla M: Banajit Goswami L: alsa-devel@alsa-project.org (moderated for non-subscribers) +L: linux-arm-msm@vger.kernel.org S: Supported F: Documentation/devicetree/bindings/soc/qcom/qcom,apr* F: Documentation/devicetree/bindings/sound/qcom,* From a73a83021ae136ab6b0d08eb196d84b1d02814e9 Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Sun, 2 Jun 2024 09:30:13 -0700 Subject: [PATCH 067/778] ASoC: mxs: add missing MODULE_DESCRIPTION() macro make allmodconfig && make W=1 C=1 reports: WARNING: modpost: missing MODULE_DESCRIPTION() in sound/soc/mxs/snd-soc-mxs-pcm.o Add the missing invocation of the MODULE_DESCRIPTION() macro. Signed-off-by: Jeff Johnson Link: https://msgid.link/r/20240602-md-snd-soc-mxs-pcm-v1-1-1e663d11328d@quicinc.com Signed-off-by: Mark Brown --- sound/soc/mxs/mxs-pcm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/mxs/mxs-pcm.c b/sound/soc/mxs/mxs-pcm.c index df2e4be992d2..9bb08cadeb18 100644 --- a/sound/soc/mxs/mxs-pcm.c +++ b/sound/soc/mxs/mxs-pcm.c @@ -43,4 +43,5 @@ int mxs_pcm_platform_register(struct device *dev) } EXPORT_SYMBOL_GPL(mxs_pcm_platform_register); +MODULE_DESCRIPTION("MXS ASoC PCM driver"); MODULE_LICENSE("GPL"); From 7478e15bcc16cbc0fa1b8c431163bf651033c088 Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Sun, 2 Jun 2024 10:00:27 -0700 Subject: [PATCH 068/778] ASoC: fsl: add missing MODULE_DESCRIPTION() macro make allmodconfig && make W=1 C=1 reports: WARNING: modpost: missing MODULE_DESCRIPTION() in sound/soc/fsl/imx-pcm-dma.o Add the missing invocation of the MODULE_DESCRIPTION() macro. Signed-off-by: Jeff Johnson Link: https://msgid.link/r/20240602-md-snd-fsl-imx-pcm-dma-v1-1-e7efc33c6bf3@quicinc.com Signed-off-by: Mark Brown --- sound/soc/fsl/imx-pcm-dma.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/fsl/imx-pcm-dma.c b/sound/soc/fsl/imx-pcm-dma.c index 14e94270911c..4fa208d6a032 100644 --- a/sound/soc/fsl/imx-pcm-dma.c +++ b/sound/soc/fsl/imx-pcm-dma.c @@ -50,4 +50,5 @@ int imx_pcm_dma_init(struct platform_device *pdev) } EXPORT_SYMBOL_GPL(imx_pcm_dma_init); +MODULE_DESCRIPTION("Freescale i.MX PCM DMA interface"); MODULE_LICENSE("GPL"); From 968c974c08106fcf911d8d390d0f049af855d348 Mon Sep 17 00:00:00 2001 From: Jack Yu Date: Mon, 3 Jun 2024 10:47:16 +0000 Subject: [PATCH 069/778] ASoC: rt722-sdca-sdw: add silence detection register as volatile Including silence detection register as volatile. Signed-off-by: Jack Yu Link: https://msgid.link/r/c66a6bd6d220426793096b42baf85437@realtek.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt722-sdca-sdw.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/codecs/rt722-sdca-sdw.c b/sound/soc/codecs/rt722-sdca-sdw.c index b33da2215ade..f73ee3bf90f5 100644 --- a/sound/soc/codecs/rt722-sdca-sdw.c +++ b/sound/soc/codecs/rt722-sdca-sdw.c @@ -68,6 +68,7 @@ static bool rt722_sdca_mbq_readable_register(struct device *dev, unsigned int re case 0x200007f: case 0x2000082 ... 0x200008e: case 0x2000090 ... 0x2000094: + case 0x3110000: case 0x5300000 ... 0x5300002: case 0x5400002: case 0x5600000 ... 0x5600007: @@ -125,6 +126,7 @@ static bool rt722_sdca_mbq_volatile_register(struct device *dev, unsigned int re case 0x2000067: case 0x2000084: case 0x2000086: + case 0x3110000: return true; default: return false; From 18befe4a28403f599115c5ae753cc7f5157af8b7 Mon Sep 17 00:00:00 2001 From: Dumitru Ceclan Date: Thu, 30 May 2024 15:07:52 +0300 Subject: [PATCH 070/778] iio: adc: ad7173: Clear append status bit The previous value of the append status bit was not cleared before setting the new value. This caused the bit to remain set after enabling buffered mode for multiple channels and not permit further buffered reads from a single channel after the fact. Fixes: 76a1e6a42802 ("iio: adc: ad7173: add AD7173 driver") Signed-off-by: Dumitru Ceclan Link: https://lore.kernel.org/r/20240530-ad7173-fixes-v3-4-b85f33079e18@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7173.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/adc/ad7173.c b/drivers/iio/adc/ad7173.c index 048127bf979a..e2f17292409c 100644 --- a/drivers/iio/adc/ad7173.c +++ b/drivers/iio/adc/ad7173.c @@ -543,6 +543,7 @@ static int ad7173_append_status(struct ad_sigma_delta *sd, bool append) unsigned int interface_mode = st->interface_mode; int ret; + interface_mode &= ~AD7173_INTERFACE_DATA_STAT; interface_mode |= AD7173_INTERFACE_DATA_STAT_EN(append); ret = ad_sd_write_reg(&st->sd, AD7173_REG_INTERFACE_MODE, 2, interface_mode); if (ret) From 182bc496dc63ca03986e3c393166477f4a4c2742 Mon Sep 17 00:00:00 2001 From: Dumitru Ceclan Date: Thu, 30 May 2024 15:07:53 +0300 Subject: [PATCH 071/778] iio: adc: ad7173: Fix sampling frequency setting This patch fixes two issues regarding the sampling frequency setting: -The attribute was set as per device, not per channel. As such, when setting the sampling frequency, the configuration was always done for the slot 0, and the correct configuration was applied on the next channel configuration call by the LRU mechanism. -The LRU implementation does not take into account external settings of the slot registers. When setting the sampling frequency directly to a slot register in write_raw(), there is no guarantee that other channels were not also using that slot and now incorrectly retain their config as live. Set the sampling frequency attribute as separate in the channel templates. Do not set the sampling directly to the slot register in write_raw(), just mark the config as not live and let the LRU mechanism handle it. As the reg variable is no longer used, remove it. Fixes: 76a1e6a42802 ("iio: adc: ad7173: add AD7173 driver") Signed-off-by: Dumitru Ceclan Link: https://lore.kernel.org/r/20240530-ad7173-fixes-v3-5-b85f33079e18@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7173.c | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/drivers/iio/adc/ad7173.c b/drivers/iio/adc/ad7173.c index e2f17292409c..b26d4575e256 100644 --- a/drivers/iio/adc/ad7173.c +++ b/drivers/iio/adc/ad7173.c @@ -717,7 +717,7 @@ static int ad7173_write_raw(struct iio_dev *indio_dev, { struct ad7173_state *st = iio_priv(indio_dev); struct ad7173_channel_config *cfg; - unsigned int freq, i, reg; + unsigned int freq, i; int ret; ret = iio_device_claim_direct_mode(indio_dev); @@ -733,16 +733,7 @@ static int ad7173_write_raw(struct iio_dev *indio_dev, cfg = &st->channels[chan->address].cfg; cfg->odr = i; - - if (!cfg->live) - break; - - ret = ad_sd_read_reg(&st->sd, AD7173_REG_FILTER(cfg->cfg_slot), 2, ®); - if (ret) - break; - reg &= ~AD7173_FILTER_ODR0_MASK; - reg |= FIELD_PREP(AD7173_FILTER_ODR0_MASK, i); - ret = ad_sd_write_reg(&st->sd, AD7173_REG_FILTER(cfg->cfg_slot), 2, reg); + cfg->live = false; break; default: @@ -804,8 +795,7 @@ static const struct iio_chan_spec ad7173_channel_template = { .type = IIO_VOLTAGE, .indexed = 1, .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | - BIT(IIO_CHAN_INFO_SCALE), - .info_mask_shared_by_all = BIT(IIO_CHAN_INFO_SAMP_FREQ), + BIT(IIO_CHAN_INFO_SCALE) | BIT(IIO_CHAN_INFO_SAMP_FREQ), .scan_type = { .sign = 'u', .realbits = 24, @@ -819,8 +809,8 @@ static const struct iio_chan_spec ad7173_temp_iio_channel_template = { .channel = AD7173_AIN_TEMP_POS, .channel2 = AD7173_AIN_TEMP_NEG, .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | - BIT(IIO_CHAN_INFO_SCALE) | BIT(IIO_CHAN_INFO_OFFSET), - .info_mask_shared_by_all = BIT(IIO_CHAN_INFO_SAMP_FREQ), + BIT(IIO_CHAN_INFO_SCALE) | BIT(IIO_CHAN_INFO_OFFSET) | + BIT(IIO_CHAN_INFO_SAMP_FREQ), .scan_type = { .sign = 'u', .realbits = 24, From 8844ed0a6e063acf7173b231021b2d301e31ded9 Mon Sep 17 00:00:00 2001 From: Jean-Baptiste Maneyrol Date: Mon, 27 May 2024 15:01:17 +0000 Subject: [PATCH 072/778] iio: imu: inv_mpu6050: stabilized timestamping in interrupt Use IRQ ONESHOT flag to ensure the timestamp is not updated in the hard handler during the thread handler. And use a fixed value of 1 sample that correspond to this first timestamp. This way we can ensure the timestamp is always corresponding to the value used by the timestamping mechanism. Otherwise, it is possible that between FIFO count read and FIFO processing the timestamp is overwritten in the hard handler. Fixes: 111e1abd0045 ("iio: imu: inv_mpu6050: use the common inv_sensors timestamp module") Cc: stable@vger.kernel.org Signed-off-by: Jean-Baptiste Maneyrol Link: https://lore.kernel.org/r/20240527150117.608792-1-inv.git-commit@tdk.com Signed-off-by: Jonathan Cameron --- drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c | 4 ++-- drivers/iio/imu/inv_mpu6050/inv_mpu_trigger.c | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c index 0dc0f22a5582..3d3b27f28c9d 100644 --- a/drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c +++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c @@ -100,8 +100,8 @@ irqreturn_t inv_mpu6050_read_fifo(int irq, void *p) goto end_session; /* Each FIFO data contains all sensors, so same number for FIFO and sensor data */ fifo_period = NSEC_PER_SEC / INV_MPU6050_DIVIDER_TO_FIFO_RATE(st->chip_config.divider); - inv_sensors_timestamp_interrupt(&st->timestamp, nb, pf->timestamp); - inv_sensors_timestamp_apply_odr(&st->timestamp, fifo_period, nb, 0); + inv_sensors_timestamp_interrupt(&st->timestamp, 1, pf->timestamp); + inv_sensors_timestamp_apply_odr(&st->timestamp, fifo_period, 1, 0); /* clear internal data buffer for avoiding kernel data leak */ memset(data, 0, sizeof(data)); diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_trigger.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_trigger.c index 1b603567ccc8..84273660ca2e 100644 --- a/drivers/iio/imu/inv_mpu6050/inv_mpu_trigger.c +++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_trigger.c @@ -300,6 +300,7 @@ int inv_mpu6050_probe_trigger(struct iio_dev *indio_dev, int irq_type) if (!st->trig) return -ENOMEM; + irq_type |= IRQF_ONESHOT; ret = devm_request_threaded_irq(&indio_dev->dev, st->irq, &inv_mpu6050_interrupt_timestamp, &inv_mpu6050_interrupt_handle, From 78f0dfa64cbd05f381849377a32e0a2f1afe9215 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 30 May 2024 09:44:16 +0200 Subject: [PATCH 073/778] iio: inkern: fix channel read regression A recent "cleanup" broke IIO channel read outs and thereby thermal mitigation on the Lenovo ThinkPad X13s by returning zero instead of the expected IIO value type in iio_read_channel_processed_scale(): thermal thermal_zone12: failed to read out thermal zone (-22) Fixes: 3092bde731ca ("iio: inkern: move to the cleanup.h magic") Cc: Nuno Sa Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20240530074416.13697-1-johan+linaro@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/inkern.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/inkern.c b/drivers/iio/inkern.c index 52d773261828..485e6fc44a04 100644 --- a/drivers/iio/inkern.c +++ b/drivers/iio/inkern.c @@ -721,7 +721,7 @@ int iio_read_channel_processed_scale(struct iio_channel *chan, int *val, return ret; *val *= scale; - return 0; + return ret; } else { ret = iio_channel_read(chan, val, NULL, IIO_CHAN_INFO_RAW); if (ret < 0) From 2b85b7fb1376481f7d4c2cf92e5da942f06b2547 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 3 Jun 2024 08:01:03 -0500 Subject: [PATCH 074/778] powerpc/crypto: Add generated P8 asm to .gitignore Looks like drivers/crypto/vmx/.gitignore should have been merged into arch/powerpc/crypto/.gitignore as part of commit 109303336a0c ("crypto: vmx - Move to arch/powerpc/crypto") so that all generated asm files are ignored. Signed-off-by: Nathan Lynch Fixes: 109303336a0c ("crypto: vmx - Move to arch/powerpc/crypto") Signed-off-by: Michael Ellerman Link: https://msgid.link/20240603-powerpc-crypto-ignore-p8-asm-v1-1-05843fec2bb7@linux.ibm.com --- arch/powerpc/crypto/.gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/crypto/.gitignore b/arch/powerpc/crypto/.gitignore index e1094f08f713..e9fe73aac8b6 100644 --- a/arch/powerpc/crypto/.gitignore +++ b/arch/powerpc/crypto/.gitignore @@ -1,3 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only aesp10-ppc.S +aesp8-ppc.S ghashp10-ppc.S +ghashp8-ppc.S From c3552ab19aeb8101b751e7c7ad45deab9e1134e1 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Fri, 24 May 2024 17:15:42 +0200 Subject: [PATCH 075/778] staging: vchiq_debugfs: Fix NPD in vchiq_dump_state The commit 42a2f6664e18 ("staging: vc04_services: Move global g_state to vchiq_state") falsely assumed that the debugfs entry vchiq/state was created with vchiq_instance as data. This causes now a NULL pointer derefence while trying to dump the vchiq state. So fix this by passing vchiq_state as data, because this is the relevant part here. Fixes: 42a2f6664e18 ("staging: vc04_services: Move global g_state to vchiq_state") Signed-off-by: Stefan Wahren Reviewed-by: Umang Jain Link: https://lore.kernel.org/r/20240524151542.19415-1-wahrenst@gmx.net Signed-off-by: Greg Kroah-Hartman --- .../staging/vc04_services/interface/vchiq_arm/vchiq_arm.c | 2 +- .../vc04_services/interface/vchiq_arm/vchiq_debugfs.c | 8 ++++---- .../vc04_services/interface/vchiq_arm/vchiq_debugfs.h | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c index 297af1d80b12..69daeba974f2 100644 --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c @@ -1759,7 +1759,7 @@ static int vchiq_probe(struct platform_device *pdev) if (err) goto failed_platform_init; - vchiq_debugfs_init(); + vchiq_debugfs_init(&mgmt->state); dev_dbg(&pdev->dev, "arm: platform initialised - version %d (min %d)\n", VCHIQ_VERSION, VCHIQ_VERSION_MIN); diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_debugfs.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_debugfs.c index 54e7bf029d9a..1f74d0bb33ba 100644 --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_debugfs.c +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_debugfs.c @@ -42,9 +42,9 @@ static int debugfs_trace_show(struct seq_file *f, void *offset) static int vchiq_dump_show(struct seq_file *f, void *offset) { - struct vchiq_instance *instance = f->private; + struct vchiq_state *state = f->private; - vchiq_dump_state(f, instance->state); + vchiq_dump_state(f, state); return 0; } @@ -121,12 +121,12 @@ void vchiq_debugfs_remove_instance(struct vchiq_instance *instance) debugfs_remove_recursive(node->dentry); } -void vchiq_debugfs_init(void) +void vchiq_debugfs_init(struct vchiq_state *state) { vchiq_dbg_dir = debugfs_create_dir("vchiq", NULL); vchiq_dbg_clients = debugfs_create_dir("clients", vchiq_dbg_dir); - debugfs_create_file("state", S_IFREG | 0444, vchiq_dbg_dir, NULL, + debugfs_create_file("state", S_IFREG | 0444, vchiq_dbg_dir, state, &vchiq_dump_fops); } diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_debugfs.h b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_debugfs.h index e9bf055a4ca9..fabffd81b1ec 100644 --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_debugfs.h +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_debugfs.h @@ -10,7 +10,7 @@ struct vchiq_debugfs_node { struct dentry *dentry; }; -void vchiq_debugfs_init(void); +void vchiq_debugfs_init(struct vchiq_state *state); void vchiq_debugfs_deinit(void); From 65909a7e7aa8b25c9cc5f04c1fd5d6f0f1d76fcd Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Mon, 3 Jun 2024 17:16:07 -0700 Subject: [PATCH 076/778] ASoC: qcom: add missing MODULE_DESCRIPTION() macro make allmodconfig && make W=1 C=1 reports: WARNING: modpost: missing MODULE_DESCRIPTION() in sound/soc/qcom/snd-soc-qcom-sdw.o Add the missing invocation of the MODULE_DESCRIPTION() macro. Signed-off-by: Jeff Johnson Link: https://msgid.link/r/20240603-md-snd-soc-qcom-sdw-v1-1-101ea8bcdd38@quicinc.com Signed-off-by: Mark Brown --- sound/soc/qcom/sdw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/qcom/sdw.c b/sound/soc/qcom/sdw.c index eaa8bb016e50..f2eda2ff46c0 100644 --- a/sound/soc/qcom/sdw.c +++ b/sound/soc/qcom/sdw.c @@ -160,4 +160,5 @@ int qcom_snd_sdw_hw_free(struct snd_pcm_substream *substream, return 0; } EXPORT_SYMBOL_GPL(qcom_snd_sdw_hw_free); +MODULE_DESCRIPTION("Qualcomm ASoC SoundWire helper functions"); MODULE_LICENSE("GPL"); From b19ab7ee2c4c1ec5f27c18413c3ab63907f7d55c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Tue, 14 May 2024 17:04:29 +0300 Subject: [PATCH 077/778] tty: n_tty: Fix buffer offsets when lookahead is used MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When lookahead has "consumed" some characters (la_count > 0), n_tty_receive_buf_standard() and n_tty_receive_buf_closing() for characters beyond the la_count are given wrong cp/fp offsets which leads to duplicating and losing some characters. If la_count > 0, correct buffer pointers and make count consistent too (the latter is not strictly necessary to fix the issue but seems more logical to adjust all variables immediately to keep state consistent). Reported-by: Vadym Krevs Fixes: 6bb6fa6908eb ("tty: Implement lookahead to process XON/XOFF timely") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218834 Tested-by: Vadym Krevs Cc: stable@vger.kernel.org Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20240514140429.12087-1-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_tty.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c index f252d0b5a434..5e9ca4376d68 100644 --- a/drivers/tty/n_tty.c +++ b/drivers/tty/n_tty.c @@ -1619,15 +1619,25 @@ static void __receive_buf(struct tty_struct *tty, const u8 *cp, const u8 *fp, else if (ldata->raw || (L_EXTPROC(tty) && !preops)) n_tty_receive_buf_raw(tty, cp, fp, count); else if (tty->closing && !L_EXTPROC(tty)) { - if (la_count > 0) + if (la_count > 0) { n_tty_receive_buf_closing(tty, cp, fp, la_count, true); - if (count > la_count) - n_tty_receive_buf_closing(tty, cp, fp, count - la_count, false); + cp += la_count; + if (fp) + fp += la_count; + count -= la_count; + } + if (count > 0) + n_tty_receive_buf_closing(tty, cp, fp, count, false); } else { - if (la_count > 0) + if (la_count > 0) { n_tty_receive_buf_standard(tty, cp, fp, la_count, true); - if (count > la_count) - n_tty_receive_buf_standard(tty, cp, fp, count - la_count, false); + cp += la_count; + if (fp) + fp += la_count; + count -= la_count; + } + if (count > 0) + n_tty_receive_buf_standard(tty, cp, fp, count, false); flush_echoes(tty); if (tty->ops->flush_chars) From 87d80bfbd577912462061b1a45c0ed9c7fcb872f Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 14 May 2024 22:05:53 +0300 Subject: [PATCH 078/778] serial: 8250_dw: Don't use struct dw8250_data outside of 8250_dw The container of the struct dw8250_port_data is private to the actual driver. In particular, 8250_lpss and 8250_dw use different data types that are assigned to the UART port private_data. Hence, it must not be used outside the specific driver. Currently the only cpr_val is required by the common code, make it be available via struct dw8250_port_data. This fixes the UART breakage on Intel Galileo boards. Fixes: 593dea000bc1 ("serial: 8250: dw: Allow to use a fallback CPR value if not synthesized") Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20240514190730.2787071-2-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_dw.c | 9 +++++++-- drivers/tty/serial/8250/8250_dwlib.c | 3 +-- drivers/tty/serial/8250/8250_dwlib.h | 3 ++- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c index ba9f4dc4e71d..667e16897bde 100644 --- a/drivers/tty/serial/8250/8250_dw.c +++ b/drivers/tty/serial/8250/8250_dw.c @@ -55,6 +55,7 @@ #define DW_UART_QUIRK_SKIP_SET_RATE BIT(2) #define DW_UART_QUIRK_IS_DMA_FC BIT(3) #define DW_UART_QUIRK_APMC0D08 BIT(4) +#define DW_UART_QUIRK_CPR_VALUE BIT(5) static inline struct dw8250_data *clk_to_dw8250_data(struct notifier_block *nb) { @@ -432,6 +433,10 @@ static void dw8250_prepare_rx_dma(struct uart_8250_port *p) static void dw8250_quirks(struct uart_port *p, struct dw8250_data *data) { unsigned int quirks = data->pdata ? data->pdata->quirks : 0; + u32 cpr_value = data->pdata ? data->pdata->cpr_value : 0; + + if (quirks & DW_UART_QUIRK_CPR_VALUE) + data->data.cpr_value = cpr_value; #ifdef CONFIG_64BIT if (quirks & DW_UART_QUIRK_OCTEON) { @@ -714,8 +719,8 @@ static const struct dw8250_platform_data dw8250_armada_38x_data = { static const struct dw8250_platform_data dw8250_renesas_rzn1_data = { .usr_reg = DW_UART_USR, - .cpr_val = 0x00012f32, - .quirks = DW_UART_QUIRK_IS_DMA_FC, + .cpr_value = 0x00012f32, + .quirks = DW_UART_QUIRK_CPR_VALUE | DW_UART_QUIRK_IS_DMA_FC, }; static const struct dw8250_platform_data dw8250_starfive_jh7100_data = { diff --git a/drivers/tty/serial/8250/8250_dwlib.c b/drivers/tty/serial/8250/8250_dwlib.c index 3e33ddf7bc80..5a2520943dfd 100644 --- a/drivers/tty/serial/8250/8250_dwlib.c +++ b/drivers/tty/serial/8250/8250_dwlib.c @@ -242,7 +242,6 @@ static const struct serial_rs485 dw8250_rs485_supported = { void dw8250_setup_port(struct uart_port *p) { struct dw8250_port_data *pd = p->private_data; - struct dw8250_data *data = to_dw8250_data(pd); struct uart_8250_port *up = up_to_u8250p(p); u32 reg, old_dlf; @@ -278,7 +277,7 @@ void dw8250_setup_port(struct uart_port *p) reg = dw8250_readl_ext(p, DW_UART_CPR); if (!reg) { - reg = data->pdata->cpr_val; + reg = pd->cpr_value; dev_dbg(p->dev, "CPR is not available, using 0x%08x instead\n", reg); } if (!reg) diff --git a/drivers/tty/serial/8250/8250_dwlib.h b/drivers/tty/serial/8250/8250_dwlib.h index f13e91f2cace..794a9014cdac 100644 --- a/drivers/tty/serial/8250/8250_dwlib.h +++ b/drivers/tty/serial/8250/8250_dwlib.h @@ -19,6 +19,7 @@ struct dw8250_port_data { struct uart_8250_dma dma; /* Hardware configuration */ + u32 cpr_value; u8 dlf_size; /* RS485 variables */ @@ -27,7 +28,7 @@ struct dw8250_port_data { struct dw8250_platform_data { u8 usr_reg; - u32 cpr_val; + u32 cpr_value; unsigned int quirks; }; From 2c94512055f362dd789e0f87b8566feeddec83c9 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 14 May 2024 22:05:54 +0300 Subject: [PATCH 079/778] serial: 8250_dw: Revert "Move definitions to the shared header" This reverts commit d9666dfb314e1ffd6eb9c3c4243fe3e094c047a7. The container of the struct dw8250_port_data is private to the actual driver. In particular, 8250_lpss and 8250_dw use different data types that are assigned to the UART port private_data. Hence, it must not be used outside the specific driver. Fix the mistake made in the past by moving the respective definitions to the specific driver. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20240514190730.2787071-3-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_dw.c | 27 +++++++++++++++++++++++ drivers/tty/serial/8250/8250_dwlib.h | 32 ---------------------------- 2 files changed, 27 insertions(+), 32 deletions(-) diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c index 667e16897bde..fb809e32c6ae 100644 --- a/drivers/tty/serial/8250/8250_dw.c +++ b/drivers/tty/serial/8250/8250_dw.c @@ -57,6 +57,33 @@ #define DW_UART_QUIRK_APMC0D08 BIT(4) #define DW_UART_QUIRK_CPR_VALUE BIT(5) +struct dw8250_platform_data { + u8 usr_reg; + u32 cpr_value; + unsigned int quirks; +}; + +struct dw8250_data { + struct dw8250_port_data data; + const struct dw8250_platform_data *pdata; + + int msr_mask_on; + int msr_mask_off; + struct clk *clk; + struct clk *pclk; + struct notifier_block clk_notifier; + struct work_struct clk_work; + struct reset_control *rst; + + unsigned int skip_autocfg:1; + unsigned int uart_16550_compatible:1; +}; + +static inline struct dw8250_data *to_dw8250_data(struct dw8250_port_data *data) +{ + return container_of(data, struct dw8250_data, data); +} + static inline struct dw8250_data *clk_to_dw8250_data(struct notifier_block *nb) { return container_of(nb, struct dw8250_data, clk_notifier); diff --git a/drivers/tty/serial/8250/8250_dwlib.h b/drivers/tty/serial/8250/8250_dwlib.h index 794a9014cdac..7dd2a8e7b780 100644 --- a/drivers/tty/serial/8250/8250_dwlib.h +++ b/drivers/tty/serial/8250/8250_dwlib.h @@ -2,15 +2,10 @@ /* Synopsys DesignWare 8250 library header file. */ #include -#include #include -#include #include "8250.h" -struct clk; -struct reset_control; - struct dw8250_port_data { /* Port properties */ int line; @@ -26,36 +21,9 @@ struct dw8250_port_data { bool hw_rs485_support; }; -struct dw8250_platform_data { - u8 usr_reg; - u32 cpr_value; - unsigned int quirks; -}; - -struct dw8250_data { - struct dw8250_port_data data; - const struct dw8250_platform_data *pdata; - - int msr_mask_on; - int msr_mask_off; - struct clk *clk; - struct clk *pclk; - struct notifier_block clk_notifier; - struct work_struct clk_work; - struct reset_control *rst; - - unsigned int skip_autocfg:1; - unsigned int uart_16550_compatible:1; -}; - void dw8250_do_set_termios(struct uart_port *p, struct ktermios *termios, const struct ktermios *old); void dw8250_setup_port(struct uart_port *p); -static inline struct dw8250_data *to_dw8250_data(struct dw8250_port_data *data) -{ - return container_of(data, struct dw8250_data, data); -} - static inline u32 dw8250_readl_ext(struct uart_port *p, int offset) { if (p->iotype == UPIO_MEM32BE) From 5208e7ced520a813b4f4774451fbac4e517e78b2 Mon Sep 17 00:00:00 2001 From: Doug Brown Date: Sun, 19 May 2024 12:19:30 -0700 Subject: [PATCH 080/778] serial: 8250_pxa: Configure tx_loadsz to match FIFO IRQ level The FIFO is 64 bytes, but the FCR is configured to fire the TX interrupt when the FIFO is half empty (bit 3 = 0). Thus, we should only write 32 bytes when a TX interrupt occurs. This fixes a problem observed on the PXA168 that dropped a bunch of TX bytes during large transmissions. Fixes: ab28f51c77cd ("serial: rewrite pxa2xx-uart to use 8250_core") Signed-off-by: Doug Brown Link: https://lore.kernel.org/r/20240519191929.122202-1-doug@schmorgal.com Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_pxa.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/tty/serial/8250/8250_pxa.c b/drivers/tty/serial/8250/8250_pxa.c index f1a51b00b1b9..ba96fa913e7f 100644 --- a/drivers/tty/serial/8250/8250_pxa.c +++ b/drivers/tty/serial/8250/8250_pxa.c @@ -125,6 +125,7 @@ static int serial_pxa_probe(struct platform_device *pdev) uart.port.iotype = UPIO_MEM32; uart.port.regshift = 2; uart.port.fifosize = 64; + uart.tx_loadsz = 32; uart.dl_write = serial_pxa_dl_write; ret = serial8250_register_8250_port(&uart); From ca84cd379b45e9b1775b9e026f069a3a886b409d Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Fri, 31 May 2024 08:09:18 -0700 Subject: [PATCH 081/778] serial: port: Don't block system suspend even if bytes are left to xmit Recently, suspend testing on sc7180-trogdor based devices has started to sometimes fail with messages like this: port a88000.serial:0.0: PM: calling pm_runtime_force_suspend+0x0/0xf8 @ 28934, parent: a88000.serial:0 port a88000.serial:0.0: PM: dpm_run_callback(): pm_runtime_force_suspend+0x0/0xf8 returns -16 port a88000.serial:0.0: PM: pm_runtime_force_suspend+0x0/0xf8 returned -16 after 33 usecs port a88000.serial:0.0: PM: failed to suspend: error -16 I could reproduce these problems by logging in via an agetty on the debug serial port (which was _not_ used for kernel console) and running: cat /var/log/messages ...and then (via an SSH session) forcing a few suspend/resume cycles. Tracing through the code and doing some printf()-based debugging shows that the -16 (-EBUSY) comes from the recently added serial_port_runtime_suspend(). The idea of the serial_port_runtime_suspend() function is to prevent the port from being _runtime_ suspended if it still has bytes left to transmit. Having bytes left to transmit isn't a reason to block _system_ suspend, though. If a serdev device in the kernel needs to block system suspend it should block its own suspend and it can use serdev_device_wait_until_sent() to ensure bytes are sent. The DEFINE_RUNTIME_DEV_PM_OPS() used by the serial_port code means that the system suspend function will be pm_runtime_force_suspend(). In pm_runtime_force_suspend() we can see that before calling the runtime suspend function we'll call pm_runtime_disable(). This should be a reliable way to detect that we're called from system suspend and that we shouldn't look for busyness. Fixes: 43066e32227e ("serial: port: Don't suspend if the port is still busy") Cc: stable@vger.kernel.org Reviewed-by: Tony Lindgren Signed-off-by: Douglas Anderson Link: https://lore.kernel.org/r/20240531080914.v3.1.I2395e66cf70c6e67d774c56943825c289b9c13e4@changeid Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/serial_port.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/tty/serial/serial_port.c b/drivers/tty/serial/serial_port.c index 91a338d3cb34..d35f1d24156c 100644 --- a/drivers/tty/serial/serial_port.c +++ b/drivers/tty/serial/serial_port.c @@ -64,6 +64,13 @@ static int serial_port_runtime_suspend(struct device *dev) if (port->flags & UPF_DEAD) return 0; + /* + * Nothing to do on pm_runtime_force_suspend(), see + * DEFINE_RUNTIME_DEV_PM_OPS. + */ + if (!pm_runtime_enabled(dev)) + return 0; + uart_port_lock_irqsave(port, &flags); if (!port_dev->tx_enabled) { uart_port_unlock_irqrestore(port, flags); From 4e534ff4b69c6960a165cab2c851b48f0a0da945 Mon Sep 17 00:00:00 2001 From: Hugo Villeneuve Date: Mon, 3 Jun 2024 11:26:00 -0400 Subject: [PATCH 082/778] serial: sc16is7xx: rename Kconfig CONFIG_SERIAL_SC16IS7XX_CORE Commit d49216438139 ("serial: sc16is7xx: split into core and I2C/SPI parts (core)") renamed SERIAL_SC16IS7XX_CORE by SERIAL_SC16IS7XX. This means that some configs should have been updated when I submitted the original patch, but unfortunately they were not. Geert mentioned for example: arch/mips/configs/cu1??0-neo_defconfig Rename SERIAL_SC16IS7XX to SERIAL_SC16IS7XX_CORE so that existing configs will still work correctly. Suggested-by: Geert Uytterhoeven Fixes: d49216438139 ("serial: sc16is7xx: split into core and I2C/SPI parts (core)") Signed-off-by: Hugo Villeneuve Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20240603152601.3689319-2-hugo@hugovil.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/Kconfig | 2 +- drivers/tty/serial/Makefile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig index 4fdd7857ef4d..2a0da35c0ef8 100644 --- a/drivers/tty/serial/Kconfig +++ b/drivers/tty/serial/Kconfig @@ -1023,7 +1023,7 @@ config SERIAL_SCCNXP_CONSOLE help Support for console on SCCNXP serial ports. -config SERIAL_SC16IS7XX_CORE +config SERIAL_SC16IS7XX tristate "NXP SC16IS7xx UART support" select SERIAL_CORE select SERIAL_SC16IS7XX_SPI if SPI_MASTER diff --git a/drivers/tty/serial/Makefile b/drivers/tty/serial/Makefile index faa45f2b8bb0..6ff74f0a9530 100644 --- a/drivers/tty/serial/Makefile +++ b/drivers/tty/serial/Makefile @@ -75,7 +75,7 @@ obj-$(CONFIG_SERIAL_SA1100) += sa1100.o obj-$(CONFIG_SERIAL_SAMSUNG) += samsung_tty.o obj-$(CONFIG_SERIAL_SB1250_DUART) += sb1250-duart.o obj-$(CONFIG_SERIAL_SCCNXP) += sccnxp.o -obj-$(CONFIG_SERIAL_SC16IS7XX_CORE) += sc16is7xx.o +obj-$(CONFIG_SERIAL_SC16IS7XX) += sc16is7xx.o obj-$(CONFIG_SERIAL_SC16IS7XX_SPI) += sc16is7xx_spi.o obj-$(CONFIG_SERIAL_SC16IS7XX_I2C) += sc16is7xx_i2c.o obj-$(CONFIG_SERIAL_SH_SCI) += sh-sci.o From 7a2e8e30ad89f498b206977396d028e10174390a Mon Sep 17 00:00:00 2001 From: Hugo Villeneuve Date: Mon, 3 Jun 2024 11:26:01 -0400 Subject: [PATCH 083/778] serial: sc16is7xx: re-add Kconfig SPI or I2C dependency Commit d49216438139 ("serial: sc16is7xx: split into core and I2C/SPI parts (core)") removed Kconfig SPI_MASTER or I2C dependency for SERIAL_SC16IS7XX (core). This removal was done because I inadvertently misinterpreted some review comments. Because of that, the driver question now pops up if both I2C and SPI_MASTER are disabled. Re-add Kconfig SPI_MASTER or I2C dependency to fix the problem. Suggested-by: Geert Uytterhoeven Fixes: d49216438139 ("serial: sc16is7xx: split into core and I2C/SPI parts (core)") Signed-off-by: Hugo Villeneuve Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20240603152601.3689319-3-hugo@hugovil.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig index 2a0da35c0ef8..28e4beeabf8f 100644 --- a/drivers/tty/serial/Kconfig +++ b/drivers/tty/serial/Kconfig @@ -1025,6 +1025,7 @@ config SERIAL_SCCNXP_CONSOLE config SERIAL_SC16IS7XX tristate "NXP SC16IS7xx UART support" + depends on SPI_MASTER || I2C select SERIAL_CORE select SERIAL_SC16IS7XX_SPI if SPI_MASTER select SERIAL_SC16IS7XX_I2C if I2C From ae01e52da244af5d650378ada1bfd2d946dc1b45 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Wed, 29 May 2024 00:05:53 +0900 Subject: [PATCH 084/778] serial: drop debugging WARN_ON_ONCE() from uart_write() syzbot is reporting lockdep warning upon int disc = 7; ioctl(open("/dev/ttyS3", O_RDONLY), TIOCSETD, &disc); sequence. Do like what commit 5f1149d2f4bf ("serial: drop debugging WARN_ON_ONCE() from uart_put_char()") does. Reported-by: syzbot+f78380e4eae53c64125c@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=f78380e4eae53c64125c Signed-off-by: Tetsuo Handa Acked-by: Jiri Slaby Link: https://lore.kernel.org/r/d775ae2d-a2ac-439e-8e2b-134749f60f30@I-love.SAKURA.ne.jp Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/serial_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index 2c1a0254d3f4..0c4d60976663 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -622,7 +622,7 @@ static ssize_t uart_write(struct tty_struct *tty, const u8 *buf, size_t count) return -EL3HLT; port = uart_port_lock(state, flags); - if (WARN_ON_ONCE(!state->port.xmit_buf)) { + if (!state->port.xmit_buf) { uart_port_unlock(port, flags); return 0; } From 8141b6da1763b9db009e5dcf873869bb31bcef45 Mon Sep 17 00:00:00 2001 From: Thomas Richard Date: Mon, 3 Jun 2024 19:01:00 +0200 Subject: [PATCH 085/778] regulator: tps6594-regulator: Fix the number of irqs for TPS65224 and TPS6594 The number of irqs is computed to allocate the right amount of memory for the irq data. An array of struct tps6594_regulator_irq_data is allocated one time for all the irqs. Each irq uses one cell of the array. If the computed number of irqs is not correct, not allocated memory could be used. Fix the values used in the calculation for TPS6594 and TPS65224. Fixes: 00c826525fba (regulator: tps6594-regulator: Add TI TPS65224 PMIC regulators) Signed-off-by: Thomas Richard Tested-by: Nishanth Menon Link: https://msgid.link/r/20240603170100.2394402-1-thomas.richard@bootlin.com Signed-off-by: Mark Brown --- drivers/regulator/tps6594-regulator.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/regulator/tps6594-regulator.c b/drivers/regulator/tps6594-regulator.c index 4a859f4c0f83..ac53792e3fed 100644 --- a/drivers/regulator/tps6594-regulator.c +++ b/drivers/regulator/tps6594-regulator.c @@ -653,18 +653,14 @@ static int tps6594_regulator_probe(struct platform_device *pdev) } } - if (tps->chip_id == LP8764) { - nr_buck = ARRAY_SIZE(buck_regs); - nr_ldo = 0; - nr_types = REGS_INT_NB; - } else if (tps->chip_id == TPS65224) { + if (tps->chip_id == TPS65224) { nr_buck = ARRAY_SIZE(tps65224_buck_regs); nr_ldo = ARRAY_SIZE(tps65224_ldo_regs); - nr_types = REGS_INT_NB; + nr_types = TPS65224_REGS_INT_NB; } else { nr_buck = ARRAY_SIZE(buck_regs); - nr_ldo = ARRAY_SIZE(tps6594_ldo_regs); - nr_types = TPS65224_REGS_INT_NB; + nr_ldo = (tps->chip_id == LP8764) ? 0 : ARRAY_SIZE(tps6594_ldo_regs); + nr_types = REGS_INT_NB; } reg_irq_nb = nr_types * (nr_buck + nr_ldo); From 718d4a63c0a62d16af1d0425d515d7e76f35681e Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Fri, 17 May 2024 10:36:48 +0800 Subject: [PATCH 086/778] Revert "usb: chipidea: move ci_ulpi_init after the phy initialization" This reverts commit 22ffd399e6e7aa18ae0314278ed0b7f05f8ab679. People report this commit causes the driver defer probed, and never back to work[1][2]. [1] https://lore.kernel.org/lkml/20240407011913.GA168730@nchen-desktop/T/#mc2b93bc11a8b01ec7cd0d0bf6b0b03951d9ef751 [2] https://lore.kernel.org/lkml/20240407011913.GA168730@nchen-desktop/T/#me87d9a2a76c07619d83b3879ea14780da89fbbbf Cc: Michael Grzeschik Cc: Marek Szyprowski Cc: Wouter Franken Signed-off-by: Peter Chen Link: https://lore.kernel.org/r/20240517023648.3459188-1-peter.chen@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/chipidea/core.c | 8 ++++---- drivers/usb/chipidea/ulpi.c | 5 +++++ 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/usb/chipidea/core.c b/drivers/usb/chipidea/core.c index bada13f704b6..835bf2428dc6 100644 --- a/drivers/usb/chipidea/core.c +++ b/drivers/usb/chipidea/core.c @@ -1084,6 +1084,10 @@ static int ci_hdrc_probe(struct platform_device *pdev) return -ENODEV; } + ret = ci_ulpi_init(ci); + if (ret) + return ret; + if (ci->platdata->phy) { ci->phy = ci->platdata->phy; } else if (ci->platdata->usb_phy) { @@ -1138,10 +1142,6 @@ static int ci_hdrc_probe(struct platform_device *pdev) goto ulpi_exit; } - ret = ci_ulpi_init(ci); - if (ret) - return ret; - ci->hw_bank.phys = res->start; ci->irq = platform_get_irq(pdev, 0); diff --git a/drivers/usb/chipidea/ulpi.c b/drivers/usb/chipidea/ulpi.c index 89fb51e2c3de..dfec07e8ae1d 100644 --- a/drivers/usb/chipidea/ulpi.c +++ b/drivers/usb/chipidea/ulpi.c @@ -68,6 +68,11 @@ int ci_ulpi_init(struct ci_hdrc *ci) if (ci->platdata->phy_mode != USBPHY_INTERFACE_MODE_ULPI) return 0; + /* + * Set PORTSC correctly so we can read/write ULPI registers for + * identification purposes + */ + hw_phymode_configure(ci); ci->ulpi_ops.read = ci_ulpi_read; ci->ulpi_ops.write = ci_ulpi_write; From fc3568f142cc5fe071d83be02d1851717d1fbf66 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 3 Jun 2024 12:00:07 +0200 Subject: [PATCH 087/778] usb: typec: ucsi: glink: increase max ports for x1e80100 The new X Elite (x1e80100) platform has three ports so increase the maximum so that all ports can be registered. Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Reviewed-by: Heikki Krogerus Reviewed-by: Abel Vesa Link: https://lore.kernel.org/r/20240603100007.10236-1-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi_glink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/typec/ucsi/ucsi_glink.c b/drivers/usb/typec/ucsi/ucsi_glink.c index f7546bb488c3..985a880e86da 100644 --- a/drivers/usb/typec/ucsi/ucsi_glink.c +++ b/drivers/usb/typec/ucsi/ucsi_glink.c @@ -14,7 +14,7 @@ #include #include "ucsi.h" -#define PMIC_GLINK_MAX_PORTS 2 +#define PMIC_GLINK_MAX_PORTS 3 #define UCSI_BUF_SIZE 48 From 8475ffcfb381a77075562207ce08552414a80326 Mon Sep 17 00:00:00 2001 From: John Ernberg Date: Fri, 17 May 2024 11:43:52 +0000 Subject: [PATCH 088/778] USB: xen-hcd: Traverse host/ when CONFIG_USB_XEN_HCD is selected If no other USB HCDs are selected when compiling a small pure virutal machine, the Xen HCD driver cannot be built. Fix it by traversing down host/ if CONFIG_USB_XEN_HCD is selected. Fixes: 494ed3997d75 ("usb: Introduce Xen pvUSB frontend (xen hcd)") Cc: stable@vger.kernel.org # v5.17+ Signed-off-by: John Ernberg Link: https://lore.kernel.org/r/20240517114345.1190755-1-john.ernberg@actia.se Signed-off-by: Greg Kroah-Hartman --- drivers/usb/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/Makefile b/drivers/usb/Makefile index 3a9a0dd4be70..949eca0adebe 100644 --- a/drivers/usb/Makefile +++ b/drivers/usb/Makefile @@ -35,6 +35,7 @@ obj-$(CONFIG_USB_R8A66597_HCD) += host/ obj-$(CONFIG_USB_FSL_USB2) += host/ obj-$(CONFIG_USB_FOTG210_HCD) += host/ obj-$(CONFIG_USB_MAX3421_HCD) += host/ +obj-$(CONFIG_USB_XEN_HCD) += host/ obj-$(CONFIG_USB_C67X00_HCD) += c67x00/ From e7e921918d905544500ca7a95889f898121ba886 Mon Sep 17 00:00:00 2001 From: Amit Sunil Dhamne Date: Tue, 14 May 2024 15:01:31 -0700 Subject: [PATCH 089/778] usb: typec: tcpm: fix use-after-free case in tcpm_register_source_caps There could be a potential use-after-free case in tcpm_register_source_caps(). This could happen when: * new (say invalid) source caps are advertised * the existing source caps are unregistered * tcpm_register_source_caps() returns with an error as usb_power_delivery_register_capabilities() fails This causes port->partner_source_caps to hold on to the now freed source caps. Reset port->partner_source_caps value to NULL after unregistering existing source caps. Fixes: 230ecdf71a64 ("usb: typec: tcpm: unregister existing source caps before re-registration") Cc: stable@vger.kernel.org Signed-off-by: Amit Sunil Dhamne Reviewed-by: Ondrej Jirman Reviewed-by: Heikki Krogerus Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20240514220134.2143181-1-amitsd@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 8a1af08f71b6..be4127ef84e9 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -3014,8 +3014,10 @@ static int tcpm_register_source_caps(struct tcpm_port *port) memcpy(caps.pdo, port->source_caps, sizeof(u32) * port->nr_source_caps); caps.role = TYPEC_SOURCE; - if (cap) + if (cap) { usb_power_delivery_unregister_capabilities(cap); + port->partner_source_caps = NULL; + } cap = usb_power_delivery_register_capabilities(port->partner_pd, &caps); if (IS_ERR(cap)) From fc8fb9eea94d8f476e15f3a4a7addeb16b3b99d6 Mon Sep 17 00:00:00 2001 From: Kyle Tso Date: Mon, 20 May 2024 23:48:58 +0800 Subject: [PATCH 090/778] usb: typec: tcpm: Ignore received Hard Reset in TOGGLING state Similar to what fixed in Commit a6fe37f428c1 ("usb: typec: tcpm: Skip hard reset when in error recovery"), the handling of the received Hard Reset has to be skipped during TOGGLING state. [ 4086.021288] VBUS off [ 4086.021295] pending state change SNK_READY -> SNK_UNATTACHED @ 650 ms [rev2 NONE_AMS] [ 4086.022113] VBUS VSAFE0V [ 4086.022117] state change SNK_READY -> SNK_UNATTACHED [rev2 NONE_AMS] [ 4086.022447] VBUS off [ 4086.022450] state change SNK_UNATTACHED -> SNK_UNATTACHED [rev2 NONE_AMS] [ 4086.023060] VBUS VSAFE0V [ 4086.023064] state change SNK_UNATTACHED -> SNK_UNATTACHED [rev2 NONE_AMS] [ 4086.023070] disable BIST MODE TESTDATA [ 4086.023766] disable vbus discharge ret:0 [ 4086.023911] Setting usb_comm capable false [ 4086.028874] Setting voltage/current limit 0 mV 0 mA [ 4086.028888] polarity 0 [ 4086.030305] Requesting mux state 0, usb-role 0, orientation 0 [ 4086.033539] Start toggling [ 4086.038496] state change SNK_UNATTACHED -> TOGGLING [rev2 NONE_AMS] // This Hard Reset is unexpected [ 4086.038499] Received hard reset [ 4086.038501] state change TOGGLING -> HARD_RESET_START [rev2 HARD_RESET] Fixes: f0690a25a140 ("staging: typec: USB Type-C Port Manager (tcpm)") Cc: stable@vger.kernel.org Signed-off-by: Kyle Tso Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20240520154858.1072347-1-kyletso@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index be4127ef84e9..5d4da962acc8 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -6174,6 +6174,7 @@ static void _tcpm_pd_hard_reset(struct tcpm_port *port) port->tcpc->set_bist_data(port->tcpc, false); switch (port->state) { + case TOGGLING: case ERROR_RECOVERY: case PORT_RESET: case PORT_RESET_WAIT_OFF: From e4228cfd092351c2d9b1a3048b2070287291ccbb Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Thu, 23 May 2024 14:44:59 -0500 Subject: [PATCH 091/778] dt-bindings: usb: realtek,rts5411: Add missing "additionalProperties" on child nodes All nodes need an explicit additionalProperties or unevaluatedProperties unless a $ref has one that's false. As that is not the case with usb-device.yaml, "additionalProperties" is needed here. Fixes: c44d9dab31d6 ("dt-bindings: usb: Add downstream facing ports to realtek binding") Signed-off-by: "Rob Herring (Arm)" Reviewed-by: Stephen Boyd Link: https://lore.kernel.org/r/20240523194500.2958192-1-robh@kernel.org Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/usb/realtek,rts5411.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/usb/realtek,rts5411.yaml b/Documentation/devicetree/bindings/usb/realtek,rts5411.yaml index 0874fc21f66f..6577a61cc075 100644 --- a/Documentation/devicetree/bindings/usb/realtek,rts5411.yaml +++ b/Documentation/devicetree/bindings/usb/realtek,rts5411.yaml @@ -65,6 +65,7 @@ patternProperties: description: The hard wired USB devices type: object $ref: /schemas/usb/usb-device.yaml + additionalProperties: true required: - peer-hub From f85d39dd7ed89ffdd622bc1de247ffba8d961504 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Mon, 27 May 2024 19:35:38 +0200 Subject: [PATCH 092/778] kcov, usb: disable interrupts in kcov_remote_start_usb_softirq After commit 8fea0c8fda30 ("usb: core: hcd: Convert from tasklet to BH workqueue"), usb_giveback_urb_bh() runs in the BH workqueue with interrupts enabled. Thus, the remote coverage collection section in usb_giveback_urb_bh()-> __usb_hcd_giveback_urb() might be interrupted, and the interrupt handler might invoke __usb_hcd_giveback_urb() again. This breaks KCOV, as it does not support nested remote coverage collection sections within the same context (neither in task nor in softirq). Update kcov_remote_start/stop_usb_softirq() to disable interrupts for the duration of the coverage collection section to avoid nested sections in the softirq context (in addition to such in the task context, which are already handled). Reported-by: Tetsuo Handa Closes: https://lore.kernel.org/linux-usb/0f4d1964-7397-485b-bc48-11c01e2fcbca@I-love.SAKURA.ne.jp/ Closes: https://syzkaller.appspot.com/bug?extid=0438378d6f157baae1a2 Suggested-by: Alan Stern Fixes: 8fea0c8fda30 ("usb: core: hcd: Convert from tasklet to BH workqueue") Cc: stable@vger.kernel.org Acked-by: Dmitry Vyukov Signed-off-by: Andrey Konovalov Link: https://lore.kernel.org/r/20240527173538.4989-1-andrey.konovalov@linux.dev Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hcd.c | 12 ++++++----- include/linux/kcov.h | 47 ++++++++++++++++++++++++++++++++++-------- 2 files changed, 45 insertions(+), 14 deletions(-) diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index e3366f4d82b9..1ff7d901fede 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -1623,6 +1623,7 @@ static void __usb_hcd_giveback_urb(struct urb *urb) struct usb_hcd *hcd = bus_to_hcd(urb->dev->bus); struct usb_anchor *anchor = urb->anchor; int status = urb->unlinked; + unsigned long flags; urb->hcpriv = NULL; if (unlikely((urb->transfer_flags & URB_SHORT_NOT_OK) && @@ -1640,13 +1641,14 @@ static void __usb_hcd_giveback_urb(struct urb *urb) /* pass ownership to the completion handler */ urb->status = status; /* - * This function can be called in task context inside another remote - * coverage collection section, but kcov doesn't support that kind of - * recursion yet. Only collect coverage in softirq context for now. + * Only collect coverage in the softirq context and disable interrupts + * to avoid scenarios with nested remote coverage collection sections + * that KCOV does not support. + * See the comment next to kcov_remote_start_usb_softirq() for details. */ - kcov_remote_start_usb_softirq((u64)urb->dev->bus->busnum); + flags = kcov_remote_start_usb_softirq((u64)urb->dev->bus->busnum); urb->complete(urb); - kcov_remote_stop_softirq(); + kcov_remote_stop_softirq(flags); usb_anchor_resume_wakeups(anchor); atomic_dec(&urb->use_count); diff --git a/include/linux/kcov.h b/include/linux/kcov.h index b851ba415e03..1068a7318d89 100644 --- a/include/linux/kcov.h +++ b/include/linux/kcov.h @@ -55,21 +55,47 @@ static inline void kcov_remote_start_usb(u64 id) /* * The softirq flavor of kcov_remote_*() functions is introduced as a temporary - * work around for kcov's lack of nested remote coverage sections support in - * task context. Adding support for nested sections is tracked in: - * https://bugzilla.kernel.org/show_bug.cgi?id=210337 + * workaround for KCOV's lack of nested remote coverage sections support. + * + * Adding support is tracked in https://bugzilla.kernel.org/show_bug.cgi?id=210337. + * + * kcov_remote_start_usb_softirq(): + * + * 1. Only collects coverage when called in the softirq context. This allows + * avoiding nested remote coverage collection sections in the task context. + * For example, USB/IP calls usb_hcd_giveback_urb() in the task context + * within an existing remote coverage collection section. Thus, KCOV should + * not attempt to start collecting coverage within the coverage collection + * section in __usb_hcd_giveback_urb() in this case. + * + * 2. Disables interrupts for the duration of the coverage collection section. + * This allows avoiding nested remote coverage collection sections in the + * softirq context (a softirq might occur during the execution of a work in + * the BH workqueue, which runs with in_serving_softirq() > 0). + * For example, usb_giveback_urb_bh() runs in the BH workqueue with + * interrupts enabled, so __usb_hcd_giveback_urb() might be interrupted in + * the middle of its remote coverage collection section, and the interrupt + * handler might invoke __usb_hcd_giveback_urb() again. */ -static inline void kcov_remote_start_usb_softirq(u64 id) +static inline unsigned long kcov_remote_start_usb_softirq(u64 id) { - if (in_serving_softirq()) + unsigned long flags = 0; + + if (in_serving_softirq()) { + local_irq_save(flags); kcov_remote_start_usb(id); + } + + return flags; } -static inline void kcov_remote_stop_softirq(void) +static inline void kcov_remote_stop_softirq(unsigned long flags) { - if (in_serving_softirq()) + if (in_serving_softirq()) { kcov_remote_stop(); + local_irq_restore(flags); + } } #ifdef CONFIG_64BIT @@ -103,8 +129,11 @@ static inline u64 kcov_common_handle(void) } static inline void kcov_remote_start_common(u64 id) {} static inline void kcov_remote_start_usb(u64 id) {} -static inline void kcov_remote_start_usb_softirq(u64 id) {} -static inline void kcov_remote_stop_softirq(void) {} +static inline unsigned long kcov_remote_start_usb_softirq(u64 id) +{ + return 0; +} +static inline void kcov_remote_stop_softirq(unsigned long flags) {} #endif /* CONFIG_KCOV */ #endif /* _LINUX_KCOV_H */ From 8bdf8a42bca4f47646fd105a387ab6926948c7f1 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Fri, 31 May 2024 13:46:52 +0300 Subject: [PATCH 093/778] usb: typec: ucsi: Ack also failed Get Error commands It is possible that also the GET_ERROR command fails. If that happens, the command completion still needs to be acknowledged. Otherwise the interface will be stuck until it's reset. Reported-by: Ammy Yi Fixes: bdc62f2bae8f ("usb: typec: ucsi: Simplified registration and I/O API") Cc: stable@vger.kernel.org Signed-off-by: Heikki Krogerus Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20240531104653.1303519-1-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index cb52e7b0a2c5..2cc7aedd490f 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -153,8 +153,13 @@ static int ucsi_exec_command(struct ucsi *ucsi, u64 cmd) } if (cci & UCSI_CCI_ERROR) { - if (cmd == UCSI_GET_ERROR_STATUS) + if (cmd == UCSI_GET_ERROR_STATUS) { + ret = ucsi_acknowledge(ucsi, false); + if (ret) + return ret; + return -EIO; + } return ucsi_read_error(ucsi); } From 16637fea001ab3c8df528a8995b3211906165a30 Mon Sep 17 00:00:00 2001 From: Shichao Lai Date: Sun, 26 May 2024 09:27:45 +0800 Subject: [PATCH 094/778] usb-storage: alauda: Check whether the media is initialized The member "uzonesize" of struct alauda_info will remain 0 if alauda_init_media() fails, potentially causing divide errors in alauda_read_data() and alauda_write_lba(). - Add a member "media_initialized" to struct alauda_info. - Change a condition in alauda_check_media() to ensure the first initialization. - Add an error check for the return value of alauda_init_media(). Fixes: e80b0fade09e ("[PATCH] USB Storage: add alauda support") Reported-by: xingwei lee Reported-by: yue sun Reviewed-by: Alan Stern Signed-off-by: Shichao Lai Link: https://lore.kernel.org/r/20240526012745.2852061-1-shichaorai@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/alauda.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/usb/storage/alauda.c b/drivers/usb/storage/alauda.c index 115f05a6201a..40d34cc28344 100644 --- a/drivers/usb/storage/alauda.c +++ b/drivers/usb/storage/alauda.c @@ -105,6 +105,8 @@ struct alauda_info { unsigned char sense_key; unsigned long sense_asc; /* additional sense code */ unsigned long sense_ascq; /* additional sense code qualifier */ + + bool media_initialized; }; #define short_pack(lsb,msb) ( ((u16)(lsb)) | ( ((u16)(msb))<<8 ) ) @@ -476,11 +478,12 @@ static int alauda_check_media(struct us_data *us) } /* Check for media change */ - if (status[0] & 0x08) { + if (status[0] & 0x08 || !info->media_initialized) { usb_stor_dbg(us, "Media change detected\n"); alauda_free_maps(&MEDIA_INFO(us)); - alauda_init_media(us); - + rc = alauda_init_media(us); + if (rc == USB_STOR_TRANSPORT_GOOD) + info->media_initialized = true; info->sense_key = UNIT_ATTENTION; info->sense_asc = 0x28; info->sense_ascq = 0x00; From 971187350602d03c4a27c0783ff412502b95720a Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 4 Jul 2023 14:17:19 +0100 Subject: [PATCH 095/778] driver core: remove devm_device_add_groups() There is no more in-kernel users of this function, and no driver should ever be using it, so remove it from the kernel. Acked-by: Dmitry Torokhov Acked-by: "Rafael J. Wysocki" Link: https://lore.kernel.org/r/20230704131715.44454-8-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 45 ------------------------------------------ include/linux/device.h | 2 -- 2 files changed, 47 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 131d96c6090b..2e776fcc31b6 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -2845,15 +2845,6 @@ static void devm_attr_group_remove(struct device *dev, void *res) sysfs_remove_group(&dev->kobj, group); } -static void devm_attr_groups_remove(struct device *dev, void *res) -{ - union device_attr_group_devres *devres = res; - const struct attribute_group **groups = devres->groups; - - dev_dbg(dev, "%s: removing groups %p\n", __func__, groups); - sysfs_remove_groups(&dev->kobj, groups); -} - /** * devm_device_add_group - given a device, create a managed attribute group * @dev: The device to create the group for @@ -2886,42 +2877,6 @@ int devm_device_add_group(struct device *dev, const struct attribute_group *grp) } EXPORT_SYMBOL_GPL(devm_device_add_group); -/** - * devm_device_add_groups - create a bunch of managed attribute groups - * @dev: The device to create the group for - * @groups: The attribute groups to create, NULL terminated - * - * This function creates a bunch of managed attribute groups. If an error - * occurs when creating a group, all previously created groups will be - * removed, unwinding everything back to the original state when this - * function was called. It will explicitly warn and error if any of the - * attribute files being created already exist. - * - * Returns 0 on success or error code from sysfs_create_group on failure. - */ -int devm_device_add_groups(struct device *dev, - const struct attribute_group **groups) -{ - union device_attr_group_devres *devres; - int error; - - devres = devres_alloc(devm_attr_groups_remove, - sizeof(*devres), GFP_KERNEL); - if (!devres) - return -ENOMEM; - - error = sysfs_create_groups(&dev->kobj, groups); - if (error) { - devres_free(devres); - return error; - } - - devres->groups = groups; - devres_add(dev, devres); - return 0; -} -EXPORT_SYMBOL_GPL(devm_device_add_groups); - static int device_add_attrs(struct device *dev) { const struct class *class = dev->class; diff --git a/include/linux/device.h b/include/linux/device.h index fc3bd7116ab9..ace039151cb8 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -1220,8 +1220,6 @@ static inline void device_remove_group(struct device *dev, return device_remove_groups(dev, groups); } -int __must_check devm_device_add_groups(struct device *dev, - const struct attribute_group **groups); int __must_check devm_device_add_group(struct device *dev, const struct attribute_group *grp); From 44a45be57f85165761fdabf072f9a97aa026ff61 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Thu, 23 May 2024 13:00:00 +0200 Subject: [PATCH 096/778] sysfs: Unbreak the build around sysfs_bin_attr_simple_read() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Günter reports build breakage for m68k "m5208evb_defconfig" plus CONFIG_BLK_DEV_INITRD=y caused by commit 66bc1a173328 ("treewide: Use sysfs_bin_attr_simple_read() helper"). The defconfig disables CONFIG_SYSFS, so sysfs_bin_attr_simple_read() is not compiled into the kernel. But init/initramfs.c references that function in the initializer of a struct bin_attribute. Add an empty static inline to avoid the build breakage. Fixes: 66bc1a173328 ("treewide: Use sysfs_bin_attr_simple_read() helper") Reported-by: Guenter Roeck Closes: https://lore.kernel.org/r/e12b0027-b199-4de7-b83d-668171447ccc@roeck-us.net Signed-off-by: Lukas Wunner Tested-by: Guenter Roeck Reviewed-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/05f4290439a58730738a15b0c99cd8576c4aa0d9.1716461752.git.lukas@wunner.de Signed-off-by: Greg Kroah-Hartman --- include/linux/sysfs.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index a7d725fbf739..c4e64dc11206 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -750,6 +750,15 @@ static inline int sysfs_emit_at(char *buf, int at, const char *fmt, ...) { return 0; } + +static inline ssize_t sysfs_bin_attr_simple_read(struct file *file, + struct kobject *kobj, + struct bin_attribute *attr, + char *buf, loff_t off, + size_t count) +{ + return 0; +} #endif /* CONFIG_SYSFS */ static inline int __must_check sysfs_create_file(struct kobject *kobj, From 1db5322b7e6b58e1b304ce69a50e9dca798ca95b Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Thu, 30 May 2024 12:14:15 +0300 Subject: [PATCH 097/778] mei: demote client disconnect warning on suspend to debug Change level for the "not connected" client message in the write callback from error to debug. The MEI driver currently disconnects all clients upon system suspend. This behavior is by design and user-space applications with open connections before the suspend are expected to handle errors upon resume, by reopening their handles, reconnecting, and retrying their operations. However, the current driver implementation logs an error message every time a write operation is attempted on a disconnected client. Since this is a normal and expected flow after system resume logging this as an error can be misleading. Signed-off-by: Alexander Usyskin Signed-off-by: Tomas Winkler Link: https://lore.kernel.org/r/20240530091415.725247-1-tomas.winkler@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/mei/main.c b/drivers/misc/mei/main.c index 79e6f3c1341f..40c3fe26f76d 100644 --- a/drivers/misc/mei/main.c +++ b/drivers/misc/mei/main.c @@ -329,7 +329,7 @@ static ssize_t mei_write(struct file *file, const char __user *ubuf, } if (!mei_cl_is_connected(cl)) { - cl_err(dev, cl, "is not connected"); + cl_dbg(dev, cl, "is not connected"); rets = -ENODEV; goto out; } From 283cb234ef95d94c61f59e1cd070cd9499b51292 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Tue, 4 Jun 2024 12:07:28 +0300 Subject: [PATCH 098/778] mei: me: release irq in mei_me_pci_resume error path The mei_me_pci_resume doesn't release irq on the error path, in case mei_start() fails. Cc: Fixes: 33ec08263147 ("mei: revamp mei reset state machine") Signed-off-by: Tomas Winkler Link: https://lore.kernel.org/r/20240604090728.1027307-1-tomas.winkler@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/pci-me.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index 7f59dd38c32f..6589635f8ba3 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -385,8 +385,10 @@ static int mei_me_pci_resume(struct device *device) } err = mei_restart(dev); - if (err) + if (err) { + free_irq(pdev->irq, dev); return err; + } /* Start timer if stopped in suspend */ schedule_delayed_work(&dev->timer_work, HZ); From 9b5e045029d8bded4c6979874ed3abc347c1415c Mon Sep 17 00:00:00 2001 From: Wentong Wu Date: Mon, 27 May 2024 20:38:35 +0800 Subject: [PATCH 099/778] mei: vsc: Don't stop/restart mei device during system suspend/resume The dynamically created mei client device (mei csi) is used as one V4L2 sub device of the whole video pipeline, and the V4L2 connection graph is built by software node. The mei_stop() and mei_restart() will delete the old mei csi client device and create a new mei client device, which will cause the software node information saved in old mei csi device lost and the whole video pipeline will be broken. Removing mei_stop()/mei_restart() during system suspend/resume can fix the issue above and won't impact hardware actual power saving logic. Fixes: f6085a96c973 ("mei: vsc: Unregister interrupt handler for system suspend") Cc: stable@vger.kernel.org # for 6.8+ Reported-by: Hao Yao Signed-off-by: Wentong Wu Reviewed-by: Sakari Ailus Tested-by: Jason Chen Tested-by: Sakari Ailus Acked-by: Tomas Winkler Link: https://lore.kernel.org/r/20240527123835.522384-1-wentong.wu@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/platform-vsc.c | 39 +++++++++++++-------------------- 1 file changed, 15 insertions(+), 24 deletions(-) diff --git a/drivers/misc/mei/platform-vsc.c b/drivers/misc/mei/platform-vsc.c index b543e6b9f3cf..1ec65d87488a 100644 --- a/drivers/misc/mei/platform-vsc.c +++ b/drivers/misc/mei/platform-vsc.c @@ -399,41 +399,32 @@ static void mei_vsc_remove(struct platform_device *pdev) static int mei_vsc_suspend(struct device *dev) { - struct mei_device *mei_dev = dev_get_drvdata(dev); - struct mei_vsc_hw *hw = mei_dev_to_vsc_hw(mei_dev); + struct mei_device *mei_dev; + int ret = 0; - mei_stop(mei_dev); + mei_dev = dev_get_drvdata(dev); + if (!mei_dev) + return -ENODEV; - mei_disable_interrupts(mei_dev); + mutex_lock(&mei_dev->device_lock); - vsc_tp_free_irq(hw->tp); + if (!mei_write_is_idle(mei_dev)) + ret = -EAGAIN; - return 0; + mutex_unlock(&mei_dev->device_lock); + + return ret; } static int mei_vsc_resume(struct device *dev) { - struct mei_device *mei_dev = dev_get_drvdata(dev); - struct mei_vsc_hw *hw = mei_dev_to_vsc_hw(mei_dev); - int ret; + struct mei_device *mei_dev; - ret = vsc_tp_request_irq(hw->tp); - if (ret) - return ret; - - ret = mei_restart(mei_dev); - if (ret) - goto err_free; - - /* start timer if stopped in suspend */ - schedule_delayed_work(&mei_dev->timer_work, HZ); + mei_dev = dev_get_drvdata(dev); + if (!mei_dev) + return -ENODEV; return 0; - -err_free: - vsc_tp_free_irq(hw->tp); - - return ret; } static DEFINE_SIMPLE_DEV_PM_OPS(mei_vsc_pm_ops, mei_vsc_suspend, mei_vsc_resume); From af076156ec6d70332f1555754e99d4a3771ec297 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 3 Jun 2024 22:50:50 +0200 Subject: [PATCH 100/778] mei: vsc: Fix wrong invocation of ACPI SID method When using an initializer for a union only one of the union members must be initialized. The initializer for the acpi_object union variable passed as argument to the SID ACPI method was initializing both the type and the integer members of the union. Unfortunately rather then complaining about this gcc simply ignores the first initializer and only used the second integer.value = 1 initializer. Leaving type set to 0 which leads to the argument being skipped by acpi acpi_ns_evaluate() resulting in: ACPI Warning: \_SB.PC00.SPI1.SPFD.CVFD.SID: Insufficient arguments - Caller passed 0, method requires 1 (20240322/nsarguments-232) Fix this by initializing only the integer struct part of the union and initializing both members of the integer struct. Signed-off-by: Hans de Goede Reviewed-by: Sakari Ailus Fixes: 566f5ca97680 ("mei: Add transport driver for IVSC device") Reviewed-by: Wentong Wu Link: https://lore.kernel.org/r/20240603205050.505389-1-hdegoede@redhat.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/vsc-fw-loader.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/mei/vsc-fw-loader.c b/drivers/misc/mei/vsc-fw-loader.c index ffa4ccd96a10..596a9d695dfc 100644 --- a/drivers/misc/mei/vsc-fw-loader.c +++ b/drivers/misc/mei/vsc-fw-loader.c @@ -252,7 +252,7 @@ static int vsc_get_sensor_name(struct vsc_fw_loader *fw_loader, { struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER }; union acpi_object obj = { - .type = ACPI_TYPE_INTEGER, + .integer.type = ACPI_TYPE_INTEGER, .integer.value = 1, }; struct acpi_object_list arg_list = { From 73fedc31fed38cb6039fd8a7efea1774143b68b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 13 May 2024 09:52:06 +0200 Subject: [PATCH 101/778] parport: amiga: Mark driver struct with __refdata to prevent section mismatch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As described in the added code comment, a reference to .exit.text is ok for drivers registered via module_platform_driver_probe(). Make this explicit to prevent the following section mismatch warning WARNING: modpost: drivers/parport/parport_amiga: section mismatch in reference: amiga_parallel_driver+0x8 (section: .data) -> amiga_parallel_remove (section: .exit.text) that triggers on an allmodconfig W=1 build. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20240513075206.2337310-2-u.kleine-koenig@pengutronix.de Signed-off-by: Greg Kroah-Hartman --- drivers/parport/parport_amiga.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/parport/parport_amiga.c b/drivers/parport/parport_amiga.c index e6dc857aac3f..e06c7b2aac5c 100644 --- a/drivers/parport/parport_amiga.c +++ b/drivers/parport/parport_amiga.c @@ -229,7 +229,13 @@ static void __exit amiga_parallel_remove(struct platform_device *pdev) parport_put_port(port); } -static struct platform_driver amiga_parallel_driver = { +/* + * amiga_parallel_remove() lives in .exit.text. For drivers registered via + * module_platform_driver_probe() this is ok because they cannot get unbound at + * runtime. So mark the driver struct with __refdata to prevent modpost + * triggering a section mismatch warning. + */ +static struct platform_driver amiga_parallel_driver __refdata = { .remove_new = __exit_p(amiga_parallel_remove), .driver = { .name = "amiga-parallel", From 086c6cbcc563c81d55257f9b27e14faf1d0963d3 Mon Sep 17 00:00:00 2001 From: Yongzhi Liu Date: Thu, 23 May 2024 20:14:33 +0800 Subject: [PATCH 102/778] misc: microchip: pci1xxxx: fix double free in the error handling of gp_aux_bus_probe() When auxiliary_device_add() returns error and then calls auxiliary_device_uninit(), callback function gp_auxiliary_device_release() calls ida_free() and kfree(aux_device_wrapper) to free memory. We should't call them again in the error handling path. Fix this by skipping the redundant cleanup functions. Fixes: 393fc2f5948f ("misc: microchip: pci1xxxx: load auxiliary bus driver for the PIO function in the multi-function endpoint of pci1xxxx device.") Signed-off-by: Yongzhi Liu Link: https://lore.kernel.org/r/20240523121434.21855-3-hyperlyzcs@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gp.c b/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gp.c index 32af2b14ff34..de75d89ef53e 100644 --- a/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gp.c +++ b/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gp.c @@ -111,6 +111,7 @@ static int gp_aux_bus_probe(struct pci_dev *pdev, const struct pci_device_id *id err_aux_dev_add_1: auxiliary_device_uninit(&aux_bus->aux_device_wrapper[1]->aux_dev); + goto err_aux_dev_add_0; err_aux_dev_init_1: ida_free(&gp_client_ida, aux_bus->aux_device_wrapper[1]->aux_dev.id); @@ -120,6 +121,7 @@ err_ida_alloc_1: err_aux_dev_add_0: auxiliary_device_uninit(&aux_bus->aux_device_wrapper[0]->aux_dev); + goto err_ret; err_aux_dev_init_0: ida_free(&gp_client_ida, aux_bus->aux_device_wrapper[0]->aux_dev.id); @@ -127,6 +129,7 @@ err_aux_dev_init_0: err_ida_alloc_0: kfree(aux_bus->aux_device_wrapper[0]); +err_ret: return retval; } From 77427e3d5c353e3dd98c7c0af322f8d9e3131ace Mon Sep 17 00:00:00 2001 From: Yongzhi Liu Date: Thu, 23 May 2024 20:14:34 +0800 Subject: [PATCH 103/778] misc: microchip: pci1xxxx: Fix a memory leak in the error handling of gp_aux_bus_probe() There is a memory leak (forget to free allocated buffers) in a memory allocation failure path. Fix it to jump to the correct error handling code. Fixes: 393fc2f5948f ("misc: microchip: pci1xxxx: load auxiliary bus driver for the PIO function in the multi-function endpoint of pci1xxxx device.") Signed-off-by: Yongzhi Liu Reviewed-by: Kumaravel Thiagarajan Link: https://lore.kernel.org/r/20240523121434.21855-4-hyperlyzcs@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gp.c b/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gp.c index de75d89ef53e..34c9be437432 100644 --- a/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gp.c +++ b/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gp.c @@ -69,8 +69,10 @@ static int gp_aux_bus_probe(struct pci_dev *pdev, const struct pci_device_id *id aux_bus->aux_device_wrapper[1] = kzalloc(sizeof(*aux_bus->aux_device_wrapper[1]), GFP_KERNEL); - if (!aux_bus->aux_device_wrapper[1]) - return -ENOMEM; + if (!aux_bus->aux_device_wrapper[1]) { + retval = -ENOMEM; + goto err_aux_dev_add_0; + } retval = ida_alloc(&gp_client_ida, GFP_KERNEL); if (retval < 0) From 7c55b78818cfb732680c4a72ab270cc2d2ee3d0f Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 14 May 2024 12:06:34 +0200 Subject: [PATCH 104/778] jfs: xattr: fix buffer overflow for invalid xattr When an xattr size is not what is expected, it is printed out to the kernel log in hex format as a form of debugging. But when that xattr size is bigger than the expected size, printing it out can cause an access off the end of the buffer. Fix this all up by properly restricting the size of the debug hex dump in the kernel log. Reported-by: syzbot+9dfe490c8176301c1d06@syzkaller.appspotmail.com Cc: Dave Kleikamp Link: https://lore.kernel.org/r/2024051433-slider-cloning-98f9@gregkh Signed-off-by: Greg Kroah-Hartman --- fs/jfs/xattr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c index 0fb7afac298e..9987055293b3 100644 --- a/fs/jfs/xattr.c +++ b/fs/jfs/xattr.c @@ -557,9 +557,11 @@ static int ea_get(struct inode *inode, struct ea_buffer *ea_buf, int min_size) size_check: if (EALIST_SIZE(ea_buf->xattr) != ea_size) { + int size = min_t(int, EALIST_SIZE(ea_buf->xattr), ea_size); + printk(KERN_ERR "ea_get: invalid extended attribute\n"); print_hex_dump(KERN_ERR, "", DUMP_PREFIX_ADDRESS, 16, 1, - ea_buf->xattr, ea_size, 1); + ea_buf->xattr, size, 1); ea_release(inode, ea_buf); rc = -EIO; goto clean_up; From 616501eccb58615f8f352a29239ea6c6fc5e6546 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Mon, 27 May 2024 09:00:06 +0100 Subject: [PATCH 105/778] clkdev: don't fail clkdev_alloc() if over-sized Don't fail clkdev_alloc() if the strings are over-sized. In this case, the entry will not match during lookup, so its useless. However, since code fails if we return NULL leading to boot failure, return a dummy entry with the connection and device IDs set to "bad". Leave the warning so these problems can be found, and the useless wasteful clkdev registrations removed. Reported-by: Ron Economos Reported-by: Guenter Roeck Fixes: 8d532528ff6a ("clkdev: report over-sized strings when creating clkdev entries") Closes: https://lore.kernel.org/linux-clk/7eda7621-0dde-4153-89e4-172e4c095d01@roeck-us.net. Link: https://lore.kernel.org/r/28114882-f8d7-21bf-4536-a186e8d7a22a@w6rz.net Tested-by: Ron Economos Signed-off-by: Russell King (Oracle) --- drivers/clk/clkdev.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/clk/clkdev.c b/drivers/clk/clkdev.c index 56a12f1da472..476fb9b7adb5 100644 --- a/drivers/clk/clkdev.c +++ b/drivers/clk/clkdev.c @@ -204,8 +204,15 @@ fail: pr_err("%pV:%s: %s ID is greater than %zu\n", &vaf, con_id, failure, max_size); va_end(ap_copy); - kfree(cla); - return NULL; + + /* + * Don't fail in this case, but as the entry won't ever match just + * fill it with something that also won't match. + */ + strscpy(cla->con_id, "bad", sizeof(cla->con_id)); + strscpy(cla->dev_id, "bad", sizeof(cla->dev_id)); + + return &cla->cl; } static struct clk_lookup * From c0a40097f0bc81deafc15f9195d1fb54595cd6d0 Mon Sep 17 00:00:00 2001 From: Dirk Behme Date: Mon, 13 May 2024 07:06:34 +0200 Subject: [PATCH 106/778] drivers: core: synchronize really_probe() and dev_uevent() Synchronize the dev->driver usage in really_probe() and dev_uevent(). These can run in different threads, what can result in the following race condition for dev->driver uninitialization: Thread #1: ========== really_probe() { ... probe_failed: ... device_unbind_cleanup(dev) { ... dev->driver = NULL; // <= Failed probe sets dev->driver to NULL ... } ... } Thread #2: ========== dev_uevent() { ... if (dev->driver) // If dev->driver is NULLed from really_probe() from here on, // after above check, the system crashes add_uevent_var(env, "DRIVER=%s", dev->driver->name); ... } really_probe() holds the lock, already. So nothing needs to be done there. dev_uevent() is called with lock held, often, too. But not always. What implies that we can't add any locking in dev_uevent() itself. So fix this race by adding the lock to the non-protected path. This is the path where above race is observed: dev_uevent+0x235/0x380 uevent_show+0x10c/0x1f0 <= Add lock here dev_attr_show+0x3a/0xa0 sysfs_kf_seq_show+0x17c/0x250 kernfs_seq_show+0x7c/0x90 seq_read_iter+0x2d7/0x940 kernfs_fop_read_iter+0xc6/0x310 vfs_read+0x5bc/0x6b0 ksys_read+0xeb/0x1b0 __x64_sys_read+0x42/0x50 x64_sys_call+0x27ad/0x2d30 do_syscall_64+0xcd/0x1d0 entry_SYSCALL_64_after_hwframe+0x77/0x7f Similar cases are reported by syzkaller in https://syzkaller.appspot.com/bug?extid=ffa8143439596313a85a But these are regarding the *initialization* of dev->driver dev->driver = drv; As this switches dev->driver to non-NULL these reports can be considered to be false-positives (which should be "fixed" by this commit, as well, though). The same issue was reported and tried to be fixed back in 2015 in https://lore.kernel.org/lkml/1421259054-2574-1-git-send-email-a.sangwan@samsung.com/ already. Fixes: 239378f16aa1 ("Driver core: add uevent vars for devices of a class") Cc: stable Cc: syzbot+ffa8143439596313a85a@syzkaller.appspotmail.com Cc: Ashish Sangwan Cc: Namjae Jeon Signed-off-by: Dirk Behme Link: https://lore.kernel.org/r/20240513050634.3964461-1-dirk.behme@de.bosch.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/base/core.c b/drivers/base/core.c index 2e776fcc31b6..2b4c0624b704 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -2739,8 +2739,11 @@ static ssize_t uevent_show(struct device *dev, struct device_attribute *attr, if (!env) return -ENOMEM; + /* Synchronize with really_probe() */ + device_lock(dev); /* let the kset specific function add its keys */ retval = kset->uevent_ops->uevent(&dev->kobj, env); + device_unlock(dev); if (retval) goto out; From d9fef76e89498bf99cdb03f77b7091d7e95d7edd Mon Sep 17 00:00:00 2001 From: Julien Panis Date: Thu, 16 May 2024 12:44:37 +0200 Subject: [PATCH 107/778] thermal/drivers/mediatek/lvts_thermal: Remove filtered mode for mt8188 Filtered mode is not supported on mt8188 SoC and is the source of bad results. Move to immediate mode which provides good temperatures. Fixes: f4745f546e60 ("thermal/drivers/mediatek/lvts_thermal: Add MT8188 support") Reviewed-by: Nicolas Pitre Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Julien Panis Link: https://lore.kernel.org/r/20240516-mtk-thermal-mt8188-mode-fix-v2-1-40a317442c62@baylibre.com Signed-off-by: Daniel Lezcano --- drivers/thermal/mediatek/lvts_thermal.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/thermal/mediatek/lvts_thermal.c b/drivers/thermal/mediatek/lvts_thermal.c index 0bb3a495b56e..82c355c466cf 100644 --- a/drivers/thermal/mediatek/lvts_thermal.c +++ b/drivers/thermal/mediatek/lvts_thermal.c @@ -1458,7 +1458,6 @@ static const struct lvts_ctrl_data mt8188_lvts_mcu_data_ctrl[] = { }, VALID_SENSOR_MAP(1, 1, 1, 1), .offset = 0x0, - .mode = LVTS_MSR_FILTERED_MODE, }, { .lvts_sensor = { @@ -1469,7 +1468,6 @@ static const struct lvts_ctrl_data mt8188_lvts_mcu_data_ctrl[] = { }, VALID_SENSOR_MAP(1, 1, 0, 0), .offset = 0x100, - .mode = LVTS_MSR_FILTERED_MODE, } }; @@ -1483,7 +1481,6 @@ static const struct lvts_ctrl_data mt8188_lvts_ap_data_ctrl[] = { }, VALID_SENSOR_MAP(0, 1, 0, 0), .offset = 0x0, - .mode = LVTS_MSR_FILTERED_MODE, }, { .lvts_sensor = { @@ -1496,7 +1493,6 @@ static const struct lvts_ctrl_data mt8188_lvts_ap_data_ctrl[] = { }, VALID_SENSOR_MAP(1, 1, 1, 0), .offset = 0x100, - .mode = LVTS_MSR_FILTERED_MODE, }, { .lvts_sensor = { @@ -1507,7 +1503,6 @@ static const struct lvts_ctrl_data mt8188_lvts_ap_data_ctrl[] = { }, VALID_SENSOR_MAP(1, 1, 0, 0), .offset = 0x200, - .mode = LVTS_MSR_FILTERED_MODE, }, { .lvts_sensor = { @@ -1518,7 +1513,6 @@ static const struct lvts_ctrl_data mt8188_lvts_ap_data_ctrl[] = { }, VALID_SENSOR_MAP(1, 1, 0, 0), .offset = 0x300, - .mode = LVTS_MSR_FILTERED_MODE, } }; From afe377286ad49e0b69071d2a767e2c6553f4094b Mon Sep 17 00:00:00 2001 From: Maciej Strozek Date: Tue, 4 Jun 2024 14:28:43 +0100 Subject: [PATCH 108/778] ASoC: cs42l43: Increase default type detect time and button delay Some problematic headsets have been discovered, to help with correctly identifying these, the detect time must be increased. Also improve the reliability of the impedance value from the button detect by slightly increasing the button detect delay. Fixes: 686b8f711b99 ("ASoC: cs42l43: Lower default type detect time") Signed-off-by: Maciej Strozek Signed-off-by: Charles Keepax Link: https://msgid.link/r/20240604132843.3309114-1-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs42l43-jack.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/cs42l43-jack.c b/sound/soc/codecs/cs42l43-jack.c index 901b9dbcf585..d9ab003e166b 100644 --- a/sound/soc/codecs/cs42l43-jack.c +++ b/sound/soc/codecs/cs42l43-jack.c @@ -121,7 +121,7 @@ int cs42l43_set_jack(struct snd_soc_component *component, priv->buttons[3] = 735; } - ret = cs42l43_find_index(priv, "cirrus,detect-us", 1000, &priv->detect_us, + ret = cs42l43_find_index(priv, "cirrus,detect-us", 50000, &priv->detect_us, cs42l43_accdet_us, ARRAY_SIZE(cs42l43_accdet_us)); if (ret < 0) goto error; @@ -433,7 +433,7 @@ irqreturn_t cs42l43_button_press(int irq, void *data) // Wait for 2 full cycles of comb filter to ensure good reading queue_delayed_work(system_wq, &priv->button_press_work, - msecs_to_jiffies(10)); + msecs_to_jiffies(20)); return IRQ_HANDLED; } From b7c40988808f8d7426dee1e4d96a4e204de4a8bc Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Tue, 4 Jun 2024 10:19:46 +0800 Subject: [PATCH 109/778] ASoC: codecs: ES8326: Solve headphone detection issue When switching between OMTP and CTIA headset, we can hear pop noise. To solve this issue, We modified the configuration for headphone detection Signed-off-by: Zhang Yi Link: https://msgid.link/r/20240604021946.2911-1-zhangyi@everest-semi.com Signed-off-by: Mark Brown --- sound/soc/codecs/es8326.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/es8326.c b/sound/soc/codecs/es8326.c index 03b539ba540f..6a4e42e5e35b 100644 --- a/sound/soc/codecs/es8326.c +++ b/sound/soc/codecs/es8326.c @@ -857,12 +857,16 @@ static void es8326_jack_detect_handler(struct work_struct *work) * set auto-check mode, then restart jack_detect_work after 400ms. * Don't report jack status. */ - regmap_write(es8326->regmap, ES8326_INT_SOURCE, - (ES8326_INT_SRC_PIN9 | ES8326_INT_SRC_BUTTON)); + regmap_write(es8326->regmap, ES8326_INT_SOURCE, 0x00); regmap_update_bits(es8326->regmap, ES8326_HPDET_TYPE, 0x03, 0x01); + regmap_update_bits(es8326->regmap, ES8326_HPDET_TYPE, 0x10, 0x00); es8326_enable_micbias(es8326->component); usleep_range(50000, 70000); regmap_update_bits(es8326->regmap, ES8326_HPDET_TYPE, 0x03, 0x00); + regmap_update_bits(es8326->regmap, ES8326_HPDET_TYPE, 0x10, 0x10); + usleep_range(50000, 70000); + regmap_write(es8326->regmap, ES8326_INT_SOURCE, + (ES8326_INT_SRC_PIN9 | ES8326_INT_SRC_BUTTON)); regmap_write(es8326->regmap, ES8326_SYS_BIAS, 0x1f); regmap_update_bits(es8326->regmap, ES8326_HP_DRIVER_REF, 0x0f, 0x08); queue_delayed_work(system_wq, &es8326->jack_detect_work, From 4eecb644b8b82f5279a348f6ebe77e3d6e5b1b05 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 4 Jun 2024 14:17:04 +0100 Subject: [PATCH 110/778] spi: cs42l43: Correct SPI root clock speed The root clock is actually 49.152MHz not 40MHz, as it is derived from the primary audio clock, update the driver to match. This error can cause the actual clock rate to be higher than the requested clock rate on the SPI bus. Fixes: ef75e767167a ("spi: cs42l43: Add SPI controller support") Signed-off-by: Charles Keepax Link: https://msgid.link/r/20240604131704.3227500-1-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- drivers/spi/spi-cs42l43.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-cs42l43.c b/drivers/spi/spi-cs42l43.c index 9d747ea69926..902a0734cc36 100644 --- a/drivers/spi/spi-cs42l43.c +++ b/drivers/spi/spi-cs42l43.c @@ -26,7 +26,7 @@ #include #define CS42L43_FIFO_SIZE 16 -#define CS42L43_SPI_ROOT_HZ (40 * HZ_PER_MHZ) +#define CS42L43_SPI_ROOT_HZ 49152000 #define CS42L43_SPI_MAX_LENGTH 65532 enum cs42l43_spi_cmd { From ccd8d753f0fe8f16745fa2b6be5946349731d901 Mon Sep 17 00:00:00 2001 From: Alibek Omarov Date: Tue, 4 Jun 2024 21:47:52 +0300 Subject: [PATCH 111/778] ASoC: rockchip: i2s-tdm: Fix trcm mode by setting clock on right mclk When TRCM mode is enabled, I2S RX and TX clocks are synchronized through selected clock source. Without this fix BCLK and LRCK might get parented to an uninitialized MCLK and the DAI will receive data at wrong pace. However, unlike in original i2s-tdm driver, there is no need to manually synchronize mclk_rx and mclk_tx, as only one gets used anyway. Tested on a board with RK3568 SoC and Silergy SY24145S codec with enabled and disabled TRCM mode. Fixes: 9e2ab4b18ebd ("ASoC: rockchip: i2s-tdm: Fix inaccurate sampling rates") Signed-off-by: Alibek Omarov Reviewed-by: Luca Ceresoli Link: https://msgid.link/r/20240604184752.697313-1-a1ba.omarov@gmail.com Signed-off-by: Mark Brown --- sound/soc/rockchip/rockchip_i2s_tdm.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/sound/soc/rockchip/rockchip_i2s_tdm.c b/sound/soc/rockchip/rockchip_i2s_tdm.c index 9fa020ef7eab..ee517d7b5b7b 100644 --- a/sound/soc/rockchip/rockchip_i2s_tdm.c +++ b/sound/soc/rockchip/rockchip_i2s_tdm.c @@ -655,8 +655,17 @@ static int rockchip_i2s_tdm_hw_params(struct snd_pcm_substream *substream, int err; if (i2s_tdm->is_master_mode) { - struct clk *mclk = (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) ? - i2s_tdm->mclk_tx : i2s_tdm->mclk_rx; + struct clk *mclk; + + if (i2s_tdm->clk_trcm == TRCM_TX) { + mclk = i2s_tdm->mclk_tx; + } else if (i2s_tdm->clk_trcm == TRCM_RX) { + mclk = i2s_tdm->mclk_rx; + } else if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) { + mclk = i2s_tdm->mclk_tx; + } else { + mclk = i2s_tdm->mclk_rx; + } err = clk_set_rate(mclk, DEFAULT_MCLK_FS * params_rate(params)); if (err) From 97d8613679eb53bd0c07d0fbd3d8471e46ba46c1 Mon Sep 17 00:00:00 2001 From: Hsin-Te Yuan Date: Fri, 31 May 2024 08:37:54 +0000 Subject: [PATCH 112/778] ASoC: mediatek: mt8183-da7219-max98357: Fix kcontrol name collision Since "Headphone Switch" kcontrol name has already been used by da7219, rename the control name from "Headphone" to "Headphones" to prevent the colision. Also, this change makes kcontrol name align with the one in mt8186-mt6366-da7219-max98357.c. Fixes: 9c7388baa2053 ("ASoC: mediatek: mt8183-da7219-max98357: Map missing jack kcontrols") Change-Id: I9ae69a4673cd04786b247cc514fdd20f878ef009 Signed-off-by: Hsin-Te Yuan Reviewed-by: Chen-Yu Tsai Link: https://msgid.link/r/20240531-da7219-v1-1-ac3343f3ae6a@chromium.org Signed-off-by: Mark Brown --- sound/soc/mediatek/mt8183/mt8183-da7219-max98357.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sound/soc/mediatek/mt8183/mt8183-da7219-max98357.c b/sound/soc/mediatek/mt8183/mt8183-da7219-max98357.c index acaf81fd6c9b..f848e14b091a 100644 --- a/sound/soc/mediatek/mt8183/mt8183-da7219-max98357.c +++ b/sound/soc/mediatek/mt8183/mt8183-da7219-max98357.c @@ -31,7 +31,7 @@ struct mt8183_da7219_max98357_priv { static struct snd_soc_jack_pin mt8183_da7219_max98357_jack_pins[] = { { - .pin = "Headphone", + .pin = "Headphones", .mask = SND_JACK_HEADPHONE, }, { @@ -626,7 +626,7 @@ static struct snd_soc_codec_conf mt6358_codec_conf[] = { }; static const struct snd_kcontrol_new mt8183_da7219_max98357_snd_controls[] = { - SOC_DAPM_PIN_SWITCH("Headphone"), + SOC_DAPM_PIN_SWITCH("Headphones"), SOC_DAPM_PIN_SWITCH("Headset Mic"), SOC_DAPM_PIN_SWITCH("Speakers"), SOC_DAPM_PIN_SWITCH("Line Out"), @@ -634,7 +634,7 @@ static const struct snd_kcontrol_new mt8183_da7219_max98357_snd_controls[] = { static const struct snd_soc_dapm_widget mt8183_da7219_max98357_dapm_widgets[] = { - SND_SOC_DAPM_HP("Headphone", NULL), + SND_SOC_DAPM_HP("Headphones", NULL), SND_SOC_DAPM_MIC("Headset Mic", NULL), SND_SOC_DAPM_SPK("Speakers", NULL), SND_SOC_DAPM_SPK("Line Out", NULL), @@ -680,7 +680,7 @@ static struct snd_soc_codec_conf mt8183_da7219_rt1015_codec_conf[] = { }; static const struct snd_kcontrol_new mt8183_da7219_rt1015_snd_controls[] = { - SOC_DAPM_PIN_SWITCH("Headphone"), + SOC_DAPM_PIN_SWITCH("Headphones"), SOC_DAPM_PIN_SWITCH("Headset Mic"), SOC_DAPM_PIN_SWITCH("Left Spk"), SOC_DAPM_PIN_SWITCH("Right Spk"), @@ -689,7 +689,7 @@ static const struct snd_kcontrol_new mt8183_da7219_rt1015_snd_controls[] = { static const struct snd_soc_dapm_widget mt8183_da7219_rt1015_dapm_widgets[] = { - SND_SOC_DAPM_HP("Headphone", NULL), + SND_SOC_DAPM_HP("Headphones", NULL), SND_SOC_DAPM_MIC("Headset Mic", NULL), SND_SOC_DAPM_SPK("Left Spk", NULL), SND_SOC_DAPM_SPK("Right Spk", NULL), From d21b3c60d6e3917f7388db6bcc455f01c99ee42b Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 23 May 2024 16:41:02 +0100 Subject: [PATCH 113/778] KVM: selftests: Fix shift of 32 bit unsigned int more than 32 bits Currrentl a 32 bit 1u value is being shifted more than 32 bits causing overflow and incorrect checking of bits 32-63. Fix this by using the BIT_ULL macro for shifting bits. Detected by cppcheck: sev_init2_tests.c:108:34: error: Shifting 32-bit value by 63 bits is undefined behaviour [shiftTooManyBits] Fixes: dfc083a181ba ("selftests: kvm: add tests for KVM_SEV_INIT2") Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20240523154102.2236133-1-colin.i.king@gmail.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/x86_64/sev_init2_tests.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kvm/x86_64/sev_init2_tests.c b/tools/testing/selftests/kvm/x86_64/sev_init2_tests.c index 7a4a61be119b..3fb967f40c6a 100644 --- a/tools/testing/selftests/kvm/x86_64/sev_init2_tests.c +++ b/tools/testing/selftests/kvm/x86_64/sev_init2_tests.c @@ -105,11 +105,11 @@ void test_features(uint32_t vm_type, uint64_t supported_features) int i; for (i = 0; i < 64; i++) { - if (!(supported_features & (1u << i))) + if (!(supported_features & BIT_ULL(i))) test_init2_invalid(vm_type, &(struct kvm_sev_init){ .vmsa_features = BIT_ULL(i) }, "unknown feature"); - else if (KNOWN_FEATURES & (1u << i)) + else if (KNOWN_FEATURES & BIT_ULL(i)) test_init2(vm_type, &(struct kvm_sev_init){ .vmsa_features = BIT_ULL(i) }); } From 980b8bc01938c8bcc9742c1051f64b5f0ed178ac Mon Sep 17 00:00:00 2001 From: Tao Su Date: Mon, 13 May 2024 09:40:03 +0800 Subject: [PATCH 114/778] KVM: selftests: x86: Prioritize getting max_gfn from GuestPhysBits Use the max mappable GPA via GuestPhysBits advertised by KVM to calculate max_gfn. Currently some selftests (e.g. access_tracking_perf_test, dirty_log_test...) add RAM regions close to max_gfn, so guest may access GPA beyond its mappable range and cause infinite loop. Adjust max_gfn in vm_compute_max_gfn() since x86 selftests already overrides vm_compute_max_gfn() specifically to deal with goofy edge cases. Reported-by: Yi Lai Signed-off-by: Tao Su Tested-by: Yi Lai Reviewed-by: Xiaoyao Li Link: https://lore.kernel.org/r/20240513014003.104593-1-tao1.su@linux.intel.com [sean: tweak name, add comment and sanity check] Signed-off-by: Sean Christopherson --- .../selftests/kvm/include/x86_64/processor.h | 1 + .../testing/selftests/kvm/lib/x86_64/processor.c | 15 +++++++++++++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 8eb57de0b587..c0c7c1fe93f9 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -277,6 +277,7 @@ struct kvm_x86_cpu_property { #define X86_PROPERTY_MAX_EXT_LEAF KVM_X86_CPU_PROPERTY(0x80000000, 0, EAX, 0, 31) #define X86_PROPERTY_MAX_PHY_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 0, 7) #define X86_PROPERTY_MAX_VIRT_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 8, 15) +#define X86_PROPERTY_GUEST_MAX_PHY_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 16, 23) #define X86_PROPERTY_SEV_C_BIT KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 0, 5) #define X86_PROPERTY_PHYS_ADDR_REDUCTION KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 6, 11) diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index c664e446136b..594b061aef52 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -1247,9 +1247,20 @@ unsigned long vm_compute_max_gfn(struct kvm_vm *vm) { const unsigned long num_ht_pages = 12 << (30 - vm->page_shift); /* 12 GiB */ unsigned long ht_gfn, max_gfn, max_pfn; - uint8_t maxphyaddr; + uint8_t maxphyaddr, guest_maxphyaddr; - max_gfn = (1ULL << (vm->pa_bits - vm->page_shift)) - 1; + /* + * Use "guest MAXPHYADDR" from KVM if it's available. Guest MAXPHYADDR + * enumerates the max _mappable_ GPA, which can be less than the raw + * MAXPHYADDR, e.g. if MAXPHYADDR=52, KVM is using TDP, and the CPU + * doesn't support 5-level TDP. + */ + guest_maxphyaddr = kvm_cpu_property(X86_PROPERTY_GUEST_MAX_PHY_ADDR); + guest_maxphyaddr = guest_maxphyaddr ?: vm->pa_bits; + TEST_ASSERT(guest_maxphyaddr <= vm->pa_bits, + "Guest MAXPHYADDR should never be greater than raw MAXPHYADDR"); + + max_gfn = (1ULL << (guest_maxphyaddr - vm->page_shift)) - 1; /* Avoid reserved HyperTransport region on AMD processors. */ if (!host_cpu_is_amd) From 49f683b41f28918df3e51ddc0d928cb2e934ccdb Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Fri, 10 May 2024 02:23:52 -0700 Subject: [PATCH 115/778] KVM: Fix a data race on last_boosted_vcpu in kvm_vcpu_on_spin() Use {READ,WRITE}_ONCE() to access kvm->last_boosted_vcpu to ensure the loads and stores are atomic. In the extremely unlikely scenario the compiler tears the stores, it's theoretically possible for KVM to attempt to get a vCPU using an out-of-bounds index, e.g. if the write is split into multiple 8-bit stores, and is paired with a 32-bit load on a VM with 257 vCPUs: CPU0 CPU1 last_boosted_vcpu = 0xff; (last_boosted_vcpu = 0x100) last_boosted_vcpu[15:8] = 0x01; i = (last_boosted_vcpu = 0x1ff) last_boosted_vcpu[7:0] = 0x00; vcpu = kvm->vcpu_array[0x1ff]; As detected by KCSAN: BUG: KCSAN: data-race in kvm_vcpu_on_spin [kvm] / kvm_vcpu_on_spin [kvm] write to 0xffffc90025a92344 of 4 bytes by task 4340 on cpu 16: kvm_vcpu_on_spin (arch/x86/kvm/../../../virt/kvm/kvm_main.c:4112) kvm handle_pause (arch/x86/kvm/vmx/vmx.c:5929) kvm_intel vmx_handle_exit (arch/x86/kvm/vmx/vmx.c:? arch/x86/kvm/vmx/vmx.c:6606) kvm_intel vcpu_run (arch/x86/kvm/x86.c:11107 arch/x86/kvm/x86.c:11211) kvm kvm_arch_vcpu_ioctl_run (arch/x86/kvm/x86.c:?) kvm kvm_vcpu_ioctl (arch/x86/kvm/../../../virt/kvm/kvm_main.c:?) kvm __se_sys_ioctl (fs/ioctl.c:52 fs/ioctl.c:904 fs/ioctl.c:890) __x64_sys_ioctl (fs/ioctl.c:890) x64_sys_call (arch/x86/entry/syscall_64.c:33) do_syscall_64 (arch/x86/entry/common.c:?) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130) read to 0xffffc90025a92344 of 4 bytes by task 4342 on cpu 4: kvm_vcpu_on_spin (arch/x86/kvm/../../../virt/kvm/kvm_main.c:4069) kvm handle_pause (arch/x86/kvm/vmx/vmx.c:5929) kvm_intel vmx_handle_exit (arch/x86/kvm/vmx/vmx.c:? arch/x86/kvm/vmx/vmx.c:6606) kvm_intel vcpu_run (arch/x86/kvm/x86.c:11107 arch/x86/kvm/x86.c:11211) kvm kvm_arch_vcpu_ioctl_run (arch/x86/kvm/x86.c:?) kvm kvm_vcpu_ioctl (arch/x86/kvm/../../../virt/kvm/kvm_main.c:?) kvm __se_sys_ioctl (fs/ioctl.c:52 fs/ioctl.c:904 fs/ioctl.c:890) __x64_sys_ioctl (fs/ioctl.c:890) x64_sys_call (arch/x86/entry/syscall_64.c:33) do_syscall_64 (arch/x86/entry/common.c:?) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130) value changed: 0x00000012 -> 0x00000000 Fixes: 217ece6129f2 ("KVM: use yield_to instead of sleep in kvm_vcpu_on_spin") Cc: stable@vger.kernel.org Signed-off-by: Breno Leitao Link: https://lore.kernel.org/r/20240510092353.2261824-1-leitao@debian.org Signed-off-by: Sean Christopherson --- virt/kvm/kvm_main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 14841acb8b95..843aa68cbcd0 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -4025,12 +4025,13 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode) { struct kvm *kvm = me->kvm; struct kvm_vcpu *vcpu; - int last_boosted_vcpu = me->kvm->last_boosted_vcpu; + int last_boosted_vcpu; unsigned long i; int yielded = 0; int try = 3; int pass; + last_boosted_vcpu = READ_ONCE(kvm->last_boosted_vcpu); kvm_vcpu_set_in_spin_loop(me, true); /* * We boost the priority of a VCPU that is runnable but not @@ -4068,7 +4069,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode) yielded = kvm_vcpu_yield_to(vcpu); if (yielded > 0) { - kvm->last_boosted_vcpu = i; + WRITE_ONCE(kvm->last_boosted_vcpu, i); break; } else if (yielded < 0) { try--; From 0841ea4a3b416554be401a91aa267b7de838de8b Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Mon, 3 Jun 2024 19:22:22 +0800 Subject: [PATCH 116/778] iomap: keep on increasing i_size in iomap_write_end() Commit '943bc0882ceb ("iomap: don't increase i_size if it's not a write operation")' breaks xfs with realtime device on generic/561, the problem is when unaligned truncate down a xfs realtime inode with rtextsize > 1 fs block, xfs only zero out the EOF block but doesn't zero out the tail blocks that aligned to rtextsize, so if we don't increase i_size in iomap_write_end(), it could expose stale data after we do an append write beyond the aligned EOF block. xfs should zero out the tail blocks when truncate down, but before we finish that, let's fix the issue by just revert the changes in iomap_write_end(). Fixes: 943bc0882ceb ("iomap: don't increase i_size if it's not a write operation") Reported-by: Chandan Babu R Link: https://lore.kernel.org/linux-xfs/0b92a215-9d9b-3788-4504-a520778953c2@huaweicloud.com Signed-off-by: Zhang Yi Link: https://lore.kernel.org/r/20240603112222.2109341-1-yi.zhang@huaweicloud.com Tested-by: Chandan Babu R Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner --- fs/iomap/buffered-io.c | 55 +++++++++++++++++++----------------------- 1 file changed, 25 insertions(+), 30 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index c5802a459334..bd70fcbc168e 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -877,22 +877,37 @@ static bool iomap_write_end(struct iomap_iter *iter, loff_t pos, size_t len, size_t copied, struct folio *folio) { const struct iomap *srcmap = iomap_iter_srcmap(iter); + loff_t old_size = iter->inode->i_size; + size_t written; if (srcmap->type == IOMAP_INLINE) { iomap_write_end_inline(iter, folio, pos, copied); - return true; - } - - if (srcmap->flags & IOMAP_F_BUFFER_HEAD) { - size_t bh_written; - - bh_written = block_write_end(NULL, iter->inode->i_mapping, pos, + written = copied; + } else if (srcmap->flags & IOMAP_F_BUFFER_HEAD) { + written = block_write_end(NULL, iter->inode->i_mapping, pos, len, copied, &folio->page, NULL); - WARN_ON_ONCE(bh_written != copied && bh_written != 0); - return bh_written == copied; + WARN_ON_ONCE(written != copied && written != 0); + } else { + written = __iomap_write_end(iter->inode, pos, len, copied, + folio) ? copied : 0; } - return __iomap_write_end(iter->inode, pos, len, copied, folio); + /* + * Update the in-memory inode size after copying the data into the page + * cache. It's up to the file system to write the updated size to disk, + * preferably after I/O completion so that no stale data is exposed. + * Only once that's done can we unlock and release the folio. + */ + if (pos + written > old_size) { + i_size_write(iter->inode, pos + written); + iter->iomap.flags |= IOMAP_F_SIZE_CHANGED; + } + __iomap_put_folio(iter, pos, written, folio); + + if (old_size < pos) + pagecache_isize_extended(iter->inode, old_size, pos); + + return written == copied; } static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) @@ -907,7 +922,6 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) do { struct folio *folio; - loff_t old_size; size_t offset; /* Offset into folio */ size_t bytes; /* Bytes to write to folio */ size_t copied; /* Bytes copied from user */ @@ -959,23 +973,6 @@ retry: written = iomap_write_end(iter, pos, bytes, copied, folio) ? copied : 0; - /* - * Update the in-memory inode size after copying the data into - * the page cache. It's up to the file system to write the - * updated size to disk, preferably after I/O completion so that - * no stale data is exposed. Only once that's done can we - * unlock and release the folio. - */ - old_size = iter->inode->i_size; - if (pos + written > old_size) { - i_size_write(iter->inode, pos + written); - iter->iomap.flags |= IOMAP_F_SIZE_CHANGED; - } - __iomap_put_folio(iter, pos, written, folio); - - if (old_size < pos) - pagecache_isize_extended(iter->inode, old_size, pos); - cond_resched(); if (unlikely(written == 0)) { /* @@ -1346,7 +1343,6 @@ static loff_t iomap_unshare_iter(struct iomap_iter *iter) bytes = folio_size(folio) - offset; ret = iomap_write_end(iter, pos, bytes, bytes, folio); - __iomap_put_folio(iter, pos, bytes, folio); if (WARN_ON_ONCE(!ret)) return -EIO; @@ -1412,7 +1408,6 @@ static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero) folio_mark_accessed(folio); ret = iomap_write_end(iter, pos, bytes, bytes, folio); - __iomap_put_folio(iter, pos, bytes, folio); if (WARN_ON_ONCE(!ret)) return -EIO; From f5ceb1bbc98c69536d4673a97315e8427e67de1b Mon Sep 17 00:00:00 2001 From: "Ritesh Harjani (IBM)" Date: Tue, 7 May 2024 14:25:42 +0530 Subject: [PATCH 117/778] iomap: Fix iomap_adjust_read_range for plen calculation If the extent spans the block that contains i_size, we need to handle both halves separately so that we properly zero data in the page cache for blocks that are entirely outside of i_size. But this is needed only when i_size is within the current folio under processing. "orig_pos + length > isize" can be true for all folios if the mapped extent length is greater than the folio size. That is making plen to break for every folio instead of only the last folio. So use orig_plen for checking if "orig_pos + orig_plen > isize". Signed-off-by: Ritesh Harjani (IBM) Link: https://lore.kernel.org/r/a32e5f9a4fcfdb99077300c4020ed7ae61d6e0f9.1715067055.git.ritesh.list@gmail.com Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Reviewed-by: Jan Kara cc: Ojaswin Mujoo Signed-off-by: Christian Brauner --- fs/iomap/buffered-io.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index bd70fcbc168e..d46558990279 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -241,6 +241,7 @@ static void iomap_adjust_read_range(struct inode *inode, struct folio *folio, unsigned block_size = (1 << block_bits); size_t poff = offset_in_folio(folio, *pos); size_t plen = min_t(loff_t, folio_size(folio) - poff, length); + size_t orig_plen = plen; unsigned first = poff >> block_bits; unsigned last = (poff + plen - 1) >> block_bits; @@ -277,7 +278,7 @@ static void iomap_adjust_read_range(struct inode *inode, struct folio *folio, * handle both halves separately so that we properly zero data in the * page cache for blocks that are entirely outside of i_size. */ - if (orig_pos <= isize && orig_pos + length > isize) { + if (orig_pos <= isize && orig_pos + orig_plen > isize) { unsigned end = offset_in_folio(folio, isize - 1) >> block_bits; if (first <= end && last > end) From 7926d51f73e0434a6250c2fd1a0555f98d9a62da Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Tue, 4 Jun 2024 22:25:21 -0400 Subject: [PATCH 118/778] scsi: sd: Use READ(16) when reading block zero on large capacity disks Commit 321da3dc1f3c ("scsi: sd: usb_storage: uas: Access media prior to querying device properties") triggered a read to LBA 0 before attempting to inquire about device characteristics. This was done because some protocol bridge devices will return generic values until an attached storage device's media has been accessed. Pierre Tomon reported that this change caused problems on a large capacity external drive connected via a bridge device. The bridge in question does not appear to implement the READ(10) command. Issue a READ(16) instead of READ(10) when a device has been identified as preferring 16-byte commands (use_16_for_rw heuristic). Link: https://bugzilla.kernel.org/show_bug.cgi?id=218890 Link: https://lore.kernel.org/r/70dd7ae0-b6b1-48e1-bb59-53b7c7f18274@rowland.harvard.edu Link: https://lore.kernel.org/r/20240605022521.3960956-1-martin.petersen@oracle.com Fixes: 321da3dc1f3c ("scsi: sd: usb_storage: uas: Access media prior to querying device properties") Cc: stable@vger.kernel.org Reported-by: Pierre Tomon Suggested-by: Alan Stern Tested-by: Pierre Tomon Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 332eb9dac22d..fbc11046bbf6 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3565,16 +3565,23 @@ static bool sd_validate_opt_xfer_size(struct scsi_disk *sdkp, static void sd_read_block_zero(struct scsi_disk *sdkp) { - unsigned int buf_len = sdkp->device->sector_size; - char *buffer, cmd[10] = { }; + struct scsi_device *sdev = sdkp->device; + unsigned int buf_len = sdev->sector_size; + u8 *buffer, cmd[16] = { }; buffer = kmalloc(buf_len, GFP_KERNEL); if (!buffer) return; - cmd[0] = READ_10; - put_unaligned_be32(0, &cmd[2]); /* Logical block address 0 */ - put_unaligned_be16(1, &cmd[7]); /* Transfer 1 logical block */ + if (sdev->use_16_for_rw) { + cmd[0] = READ_16; + put_unaligned_be64(0, &cmd[2]); /* Logical block address 0 */ + put_unaligned_be32(1, &cmd[10]);/* Transfer 1 logical block */ + } else { + cmd[0] = READ_10; + put_unaligned_be32(0, &cmd[2]); /* Logical block address 0 */ + put_unaligned_be16(1, &cmd[7]); /* Transfer 1 logical block */ + } scsi_execute_cmd(sdkp->device, cmd, REQ_OP_DRV_IN, buffer, buf_len, SD_TIMEOUT, sdkp->max_retries, NULL); From 4254dfeda82f20844299dca6c38cbffcfd499f41 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Wed, 5 Jun 2024 01:55:29 -0700 Subject: [PATCH 119/778] scsi: mpt3sas: Avoid test/set_bit() operating in non-allocated memory There is a potential out-of-bounds access when using test_bit() on a single word. The test_bit() and set_bit() functions operate on long values, and when testing or setting a single word, they can exceed the word boundary. KASAN detects this issue and produces a dump: BUG: KASAN: slab-out-of-bounds in _scsih_add_device.constprop.0 (./arch/x86/include/asm/bitops.h:60 ./include/asm-generic/bitops/instrumented-atomic.h:29 drivers/scsi/mpt3sas/mpt3sas_scsih.c:7331) mpt3sas Write of size 8 at addr ffff8881d26e3c60 by task kworker/u1536:2/2965 For full log, please look at [1]. Make the allocation at least the size of sizeof(unsigned long) so that set_bit() and test_bit() have sufficient room for read/write operations without overwriting unallocated memory. [1] Link: https://lore.kernel.org/all/ZkNcALr3W3KGYYJG@gmail.com/ Fixes: c696f7b83ede ("scsi: mpt3sas: Implement device_remove_in_progress check in IOCTL path") Cc: stable@vger.kernel.org Suggested-by: Keith Busch Signed-off-by: Breno Leitao Link: https://lore.kernel.org/r/20240605085530.499432-1-leitao@debian.org Reviewed-by: Keith Busch Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_base.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index 258647fc6bdd..1092497563b2 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -8512,6 +8512,12 @@ mpt3sas_base_attach(struct MPT3SAS_ADAPTER *ioc) ioc->pd_handles_sz = (ioc->facts.MaxDevHandle / 8); if (ioc->facts.MaxDevHandle % 8) ioc->pd_handles_sz++; + /* + * pd_handles_sz should have, at least, the minimal room for + * set_bit()/test_bit(), otherwise out-of-memory touch may occur. + */ + ioc->pd_handles_sz = ALIGN(ioc->pd_handles_sz, sizeof(unsigned long)); + ioc->pd_handles = kzalloc(ioc->pd_handles_sz, GFP_KERNEL); if (!ioc->pd_handles) { @@ -8529,6 +8535,13 @@ mpt3sas_base_attach(struct MPT3SAS_ADAPTER *ioc) ioc->pend_os_device_add_sz = (ioc->facts.MaxDevHandle / 8); if (ioc->facts.MaxDevHandle % 8) ioc->pend_os_device_add_sz++; + + /* + * pend_os_device_add_sz should have, at least, the minimal room for + * set_bit()/test_bit(), otherwise out-of-memory may occur. + */ + ioc->pend_os_device_add_sz = ALIGN(ioc->pend_os_device_add_sz, + sizeof(unsigned long)); ioc->pend_os_device_add = kzalloc(ioc->pend_os_device_add_sz, GFP_KERNEL); if (!ioc->pend_os_device_add) { @@ -8820,6 +8833,12 @@ _base_check_ioc_facts_changes(struct MPT3SAS_ADAPTER *ioc) if (ioc->facts.MaxDevHandle % 8) pd_handles_sz++; + /* + * pd_handles should have, at least, the minimal room for + * set_bit()/test_bit(), otherwise out-of-memory touch may + * occur. + */ + pd_handles_sz = ALIGN(pd_handles_sz, sizeof(unsigned long)); pd_handles = krealloc(ioc->pd_handles, pd_handles_sz, GFP_KERNEL); if (!pd_handles) { From 831bcbcead6668ebf20b64fdb27518f1362ace3a Mon Sep 17 00:00:00 2001 From: Aditya Nagesh Date: Fri, 31 May 2024 03:48:41 -0700 Subject: [PATCH 120/778] Drivers: hv: Cosmetic changes for hv.c and balloon.c Fix issues reported by checkpatch.pl script in hv.c and balloon.c - Remove unnecessary parentheses - Remove extra newlines - Remove extra spaces - Add spaces between comparison operators - Remove comparison with NULL in if statements No functional changes intended Signed-off-by: Aditya Nagesh Reviewed-by: Saurabh Sengar Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/1717152521-6439-1-git-send-email-adityanagesh@linux.microsoft.com Signed-off-by: Wei Liu Message-ID: <1717152521-6439-1-git-send-email-adityanagesh@linux.microsoft.com> --- drivers/hv/hv.c | 37 ++++++++-------- drivers/hv/hv_balloon.c | 98 +++++++++++++++-------------------------- 2 files changed, 53 insertions(+), 82 deletions(-) diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index a8ad728354cb..e0d676c74f14 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -45,8 +45,8 @@ int hv_init(void) * This involves a hypercall. */ int hv_post_message(union hv_connection_id connection_id, - enum hv_message_type message_type, - void *payload, size_t payload_size) + enum hv_message_type message_type, + void *payload, size_t payload_size) { struct hv_input_post_message *aligned_msg; unsigned long flags; @@ -86,7 +86,7 @@ int hv_post_message(union hv_connection_id connection_id, status = HV_STATUS_INVALID_PARAMETER; } else { status = hv_do_hypercall(HVCALL_POST_MESSAGE, - aligned_msg, NULL); + aligned_msg, NULL); } local_irq_restore(flags); @@ -111,7 +111,7 @@ int hv_synic_alloc(void) hv_context.hv_numa_map = kcalloc(nr_node_ids, sizeof(struct cpumask), GFP_KERNEL); - if (hv_context.hv_numa_map == NULL) { + if (!hv_context.hv_numa_map) { pr_err("Unable to allocate NUMA map\n"); goto err; } @@ -120,11 +120,11 @@ int hv_synic_alloc(void) hv_cpu = per_cpu_ptr(hv_context.cpu_context, cpu); tasklet_init(&hv_cpu->msg_dpc, - vmbus_on_msg_dpc, (unsigned long) hv_cpu); + vmbus_on_msg_dpc, (unsigned long)hv_cpu); if (ms_hyperv.paravisor_present && hv_isolation_type_tdx()) { hv_cpu->post_msg_page = (void *)get_zeroed_page(GFP_ATOMIC); - if (hv_cpu->post_msg_page == NULL) { + if (!hv_cpu->post_msg_page) { pr_err("Unable to allocate post msg page\n"); goto err; } @@ -147,14 +147,14 @@ int hv_synic_alloc(void) if (!ms_hyperv.paravisor_present && !hv_root_partition) { hv_cpu->synic_message_page = (void *)get_zeroed_page(GFP_ATOMIC); - if (hv_cpu->synic_message_page == NULL) { + if (!hv_cpu->synic_message_page) { pr_err("Unable to allocate SYNIC message page\n"); goto err; } hv_cpu->synic_event_page = (void *)get_zeroed_page(GFP_ATOMIC); - if (hv_cpu->synic_event_page == NULL) { + if (!hv_cpu->synic_event_page) { pr_err("Unable to allocate SYNIC event page\n"); free_page((unsigned long)hv_cpu->synic_message_page); @@ -203,14 +203,13 @@ err: return ret; } - void hv_synic_free(void) { int cpu, ret; for_each_present_cpu(cpu) { - struct hv_per_cpu_context *hv_cpu - = per_cpu_ptr(hv_context.cpu_context, cpu); + struct hv_per_cpu_context *hv_cpu = + per_cpu_ptr(hv_context.cpu_context, cpu); /* It's better to leak the page if the encryption fails. */ if (ms_hyperv.paravisor_present && hv_isolation_type_tdx()) { @@ -262,8 +261,8 @@ void hv_synic_free(void) */ void hv_synic_enable_regs(unsigned int cpu) { - struct hv_per_cpu_context *hv_cpu - = per_cpu_ptr(hv_context.cpu_context, cpu); + struct hv_per_cpu_context *hv_cpu = + per_cpu_ptr(hv_context.cpu_context, cpu); union hv_synic_simp simp; union hv_synic_siefp siefp; union hv_synic_sint shared_sint; @@ -277,8 +276,8 @@ void hv_synic_enable_regs(unsigned int cpu) /* Mask out vTOM bit. ioremap_cache() maps decrypted */ u64 base = (simp.base_simp_gpa << HV_HYP_PAGE_SHIFT) & ~ms_hyperv.shared_gpa_boundary; - hv_cpu->synic_message_page - = (void *)ioremap_cache(base, HV_HYP_PAGE_SIZE); + hv_cpu->synic_message_page = + (void *)ioremap_cache(base, HV_HYP_PAGE_SIZE); if (!hv_cpu->synic_message_page) pr_err("Fail to map synic message page.\n"); } else { @@ -296,8 +295,8 @@ void hv_synic_enable_regs(unsigned int cpu) /* Mask out vTOM bit. ioremap_cache() maps decrypted */ u64 base = (siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT) & ~ms_hyperv.shared_gpa_boundary; - hv_cpu->synic_event_page - = (void *)ioremap_cache(base, HV_HYP_PAGE_SIZE); + hv_cpu->synic_event_page = + (void *)ioremap_cache(base, HV_HYP_PAGE_SIZE); if (!hv_cpu->synic_event_page) pr_err("Fail to map synic event page.\n"); } else { @@ -348,8 +347,8 @@ int hv_synic_init(unsigned int cpu) */ void hv_synic_disable_regs(unsigned int cpu) { - struct hv_per_cpu_context *hv_cpu - = per_cpu_ptr(hv_context.cpu_context, cpu); + struct hv_per_cpu_context *hv_cpu = + per_cpu_ptr(hv_context.cpu_context, cpu); union hv_synic_sint shared_sint; union hv_synic_simp simp; union hv_synic_siefp siefp; diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index 4370ad31b5b3..0e7427c2baf5 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -42,8 +42,6 @@ * Begin protocol definitions. */ - - /* * Protocol versions. The low word is the minor version, the high word the major * version. @@ -72,8 +70,6 @@ enum { DYNMEM_PROTOCOL_VERSION_CURRENT = DYNMEM_PROTOCOL_VERSION_WIN10 }; - - /* * Message Types */ @@ -102,7 +98,6 @@ enum dm_message_type { DM_VERSION_1_MAX = 12 }; - /* * Structures defining the dynamic memory management * protocol. @@ -116,7 +111,6 @@ union dm_version { __u32 version; } __packed; - union dm_caps { struct { __u64 balloon:1; @@ -149,8 +143,6 @@ union dm_mem_page_range { __u64 page_range; } __packed; - - /* * The header for all dynamic memory messages: * @@ -175,7 +167,6 @@ struct dm_message { __u8 data[]; /* enclosed message */ } __packed; - /* * Specific message types supporting the dynamic memory protocol. */ @@ -272,7 +263,6 @@ struct dm_status { __u32 io_diff; } __packed; - /* * Message to ask the guest to allocate memory - balloon up message. * This message is sent from the host to the guest. The guest may not be @@ -287,14 +277,13 @@ struct dm_balloon { __u32 reservedz; } __packed; - /* * Balloon response message; this message is sent from the guest * to the host in response to the balloon message. * * reservedz: Reserved; must be set to zero. * more_pages: If FALSE, this is the last message of the transaction. - * if TRUE there will atleast one more message from the guest. + * if TRUE there will be at least one more message from the guest. * * range_count: The number of ranges in the range array. * @@ -315,7 +304,7 @@ struct dm_balloon_response { * to the guest to give guest more memory. * * more_pages: If FALSE, this is the last message of the transaction. - * if TRUE there will atleast one more message from the guest. + * if TRUE there will be at least one more message from the guest. * * reservedz: Reserved; must be set to zero. * @@ -343,7 +332,6 @@ struct dm_unballoon_response { struct dm_header hdr; } __packed; - /* * Hot add request message. Message sent from the host to the guest. * @@ -391,7 +379,6 @@ enum dm_info_type { MAX_INFO_TYPE }; - /* * Header for the information message. */ @@ -481,10 +468,10 @@ static unsigned long last_post_time; static int hv_hypercall_multi_failure; -module_param(hot_add, bool, (S_IRUGO | S_IWUSR)); +module_param(hot_add, bool, 0644); MODULE_PARM_DESC(hot_add, "If set attempt memory hot_add"); -module_param(pressure_report_delay, uint, (S_IRUGO | S_IWUSR)); +module_param(pressure_report_delay, uint, 0644); MODULE_PARM_DESC(pressure_report_delay, "Delay in secs in reporting pressure"); static atomic_t trans_id = ATOMIC_INIT(0); @@ -503,7 +490,6 @@ enum hv_dm_state { DM_INIT_ERROR }; - static __u8 recv_buffer[HV_HYP_PAGE_SIZE]; static __u8 balloon_up_send_buffer[HV_HYP_PAGE_SIZE]; @@ -599,12 +585,12 @@ static inline bool has_pfn_is_backed(struct hv_hotadd_state *has, struct hv_hotadd_gap *gap; /* The page is not backed. */ - if ((pfn < has->covered_start_pfn) || (pfn >= has->covered_end_pfn)) + if (pfn < has->covered_start_pfn || pfn >= has->covered_end_pfn) return false; /* Check for gaps. */ list_for_each_entry(gap, &has->gap_list, list) { - if ((pfn >= gap->start_pfn) && (pfn < gap->end_pfn)) + if (pfn >= gap->start_pfn && pfn < gap->end_pfn) return false; } @@ -784,8 +770,8 @@ static void hv_online_page(struct page *pg, unsigned int order) guard(spinlock_irqsave)(&dm_device.ha_lock); list_for_each_entry(has, &dm_device.ha_region_list, list) { /* The page belongs to a different HAS. */ - if ((pfn < has->start_pfn) || - (pfn + (1UL << order) > has->end_pfn)) + if (pfn < has->start_pfn || + (pfn + (1UL << order) > has->end_pfn)) continue; hv_bring_pgs_online(has, pfn, 1UL << order); @@ -846,7 +832,7 @@ static int pfn_covered(unsigned long start_pfn, unsigned long pfn_cnt) } static unsigned long handle_pg_range(unsigned long pg_start, - unsigned long pg_count) + unsigned long pg_count) { unsigned long start_pfn = pg_start; unsigned long pfn_cnt = pg_count; @@ -857,7 +843,7 @@ static unsigned long handle_pg_range(unsigned long pg_start, unsigned long res = 0, flags; pr_debug("Hot adding %lu pages starting at pfn 0x%lx.\n", pg_count, - pg_start); + pg_start); spin_lock_irqsave(&dm_device.ha_lock, flags); list_for_each_entry(has, &dm_device.ha_region_list, list) { @@ -893,10 +879,9 @@ static unsigned long handle_pg_range(unsigned long pg_start, if (start_pfn > has->start_pfn && online_section_nr(pfn_to_section_nr(start_pfn))) hv_bring_pgs_online(has, start_pfn, pgs_ol); - } - if ((has->ha_end_pfn < has->end_pfn) && (pfn_cnt > 0)) { + if (has->ha_end_pfn < has->end_pfn && pfn_cnt > 0) { /* * We have some residual hot add range * that needs to be hot added; hot add @@ -999,7 +984,7 @@ static void hot_add_req(struct work_struct *dummy) rg_start = dm->ha_wrk.ha_region_range.finfo.start_page; rg_sz = dm->ha_wrk.ha_region_range.finfo.page_cnt; - if ((rg_start == 0) && (!dm->host_specified_ha_region)) { + if (rg_start == 0 && !dm->host_specified_ha_region) { /* * The host has not specified the hot-add region. * Based on the hot-add page range being specified, @@ -1013,7 +998,7 @@ static void hot_add_req(struct work_struct *dummy) if (do_hot_add) resp.page_count = process_hot_add(pg_start, pfn_cnt, - rg_start, rg_sz); + rg_start, rg_sz); dm->num_pages_added += resp.page_count; #endif @@ -1191,11 +1176,10 @@ static void post_status(struct hv_dynmem_device *dm) sizeof(struct dm_status), (unsigned long)NULL, VM_PKT_DATA_INBAND, 0); - } static void free_balloon_pages(struct hv_dynmem_device *dm, - union dm_mem_page_range *range_array) + union dm_mem_page_range *range_array) { int num_pages = range_array->finfo.page_cnt; __u64 start_frame = range_array->finfo.start_page; @@ -1211,8 +1195,6 @@ static void free_balloon_pages(struct hv_dynmem_device *dm, } } - - static unsigned int alloc_balloon_pages(struct hv_dynmem_device *dm, unsigned int num_pages, struct dm_balloon_response *bl_resp, @@ -1258,7 +1240,6 @@ static unsigned int alloc_balloon_pages(struct hv_dynmem_device *dm, page_to_pfn(pg); bl_resp->range_array[i].finfo.page_cnt = alloc_unit; bl_resp->hdr.size += sizeof(union dm_mem_page_range); - } return i * alloc_unit; @@ -1312,7 +1293,7 @@ static void balloon_up(struct work_struct *dummy) if (num_ballooned == 0 || num_ballooned == num_pages) { pr_debug("Ballooned %u out of %u requested pages.\n", - num_pages, dm_device.balloon_wrk.num_pages); + num_pages, dm_device.balloon_wrk.num_pages); bl_resp->more_pages = 0; done = true; @@ -1346,16 +1327,15 @@ static void balloon_up(struct work_struct *dummy) for (i = 0; i < bl_resp->range_count; i++) free_balloon_pages(&dm_device, - &bl_resp->range_array[i]); + &bl_resp->range_array[i]); done = true; } } - } static void balloon_down(struct hv_dynmem_device *dm, - struct dm_unballoon_request *req) + struct dm_unballoon_request *req) { union dm_mem_page_range *range_array = req->range_array; int range_count = req->range_count; @@ -1369,7 +1349,7 @@ static void balloon_down(struct hv_dynmem_device *dm, } pr_debug("Freed %u ballooned pages.\n", - prev_pages_ballooned - dm->num_pages_ballooned); + prev_pages_ballooned - dm->num_pages_ballooned); if (req->more_pages == 1) return; @@ -1394,8 +1374,7 @@ static int dm_thread_func(void *dm_dev) struct hv_dynmem_device *dm = dm_dev; while (!kthread_should_stop()) { - wait_for_completion_interruptible_timeout( - &dm_device.config_event, 1*HZ); + wait_for_completion_interruptible_timeout(&dm_device.config_event, 1 * HZ); /* * The host expects us to post information on the memory * pressure every second. @@ -1419,9 +1398,8 @@ static int dm_thread_func(void *dm_dev) return 0; } - static void version_resp(struct hv_dynmem_device *dm, - struct dm_version_response *vresp) + struct dm_version_response *vresp) { struct dm_version_request version_req; int ret; @@ -1482,7 +1460,7 @@ version_error: } static void cap_resp(struct hv_dynmem_device *dm, - struct dm_capabilities_resp_msg *cap_resp) + struct dm_capabilities_resp_msg *cap_resp) { if (!cap_resp->is_accepted) { pr_err("Capabilities not accepted by host\n"); @@ -1515,7 +1493,7 @@ static void balloon_onchannelcallback(void *context) switch (dm_hdr->type) { case DM_VERSION_RESPONSE: version_resp(dm, - (struct dm_version_response *)dm_msg); + (struct dm_version_response *)dm_msg); break; case DM_CAPABILITIES_RESPONSE: @@ -1545,7 +1523,7 @@ static void balloon_onchannelcallback(void *context) dm->state = DM_BALLOON_DOWN; balloon_down(dm, - (struct dm_unballoon_request *)recv_buffer); + (struct dm_unballoon_request *)recv_buffer); break; case DM_MEM_HOT_ADD_REQUEST: @@ -1583,17 +1561,15 @@ static void balloon_onchannelcallback(void *context) default: pr_warn_ratelimited("Unhandled message: type: %d\n", dm_hdr->type); - } } - } #define HV_LARGE_REPORTING_ORDER 9 #define HV_LARGE_REPORTING_LEN (HV_HYP_PAGE_SIZE << \ HV_LARGE_REPORTING_ORDER) static int hv_free_page_report(struct page_reporting_dev_info *pr_dev_info, - struct scatterlist *sgl, unsigned int nents) + struct scatterlist *sgl, unsigned int nents) { unsigned long flags; struct hv_memory_hint *hint; @@ -1628,7 +1604,7 @@ static int hv_free_page_report(struct page_reporting_dev_info *pr_dev_info, */ /* page reporting for pages 2MB or higher */ - if (order >= HV_LARGE_REPORTING_ORDER ) { + if (order >= HV_LARGE_REPORTING_ORDER) { range->page.largepage = 1; range->page_size = HV_GPA_PAGE_RANGE_PAGE_SIZE_2MB; range->base_large_pfn = page_to_hvpfn( @@ -1642,23 +1618,21 @@ static int hv_free_page_report(struct page_reporting_dev_info *pr_dev_info, range->page.additional_pages = (sg->length / HV_HYP_PAGE_SIZE) - 1; } - } status = hv_do_rep_hypercall(HV_EXT_CALL_MEMORY_HEAT_HINT, nents, 0, hint, NULL); local_irq_restore(flags); if (!hv_result_success(status)) { - pr_err("Cold memory discard hypercall failed with status %llx\n", - status); + status); if (hv_hypercall_multi_failure > 0) hv_hypercall_multi_failure++; if (hv_result(status) == HV_STATUS_INVALID_PARAMETER) { pr_err("Underlying Hyper-V does not support order less than 9. Hypercall failed\n"); pr_err("Defaulting to page_reporting_order %d\n", - pageblock_order); + pageblock_order); page_reporting_order = pageblock_order; hv_hypercall_multi_failure++; return -EINVAL; @@ -1692,7 +1666,7 @@ static void enable_page_reporting(void) pr_err("Failed to enable cold memory discard: %d\n", ret); } else { pr_info("Cold memory discard hint enabled with order %d\n", - page_reporting_order); + page_reporting_order); } } @@ -1775,7 +1749,7 @@ static int balloon_connect_vsp(struct hv_device *dev) if (ret) goto out; - t = wait_for_completion_timeout(&dm_device.host_event, 5*HZ); + t = wait_for_completion_timeout(&dm_device.host_event, 5 * HZ); if (t == 0) { ret = -ETIMEDOUT; goto out; @@ -1833,7 +1807,7 @@ static int balloon_connect_vsp(struct hv_device *dev) if (ret) goto out; - t = wait_for_completion_timeout(&dm_device.host_event, 5*HZ); + t = wait_for_completion_timeout(&dm_device.host_event, 5 * HZ); if (t == 0) { ret = -ETIMEDOUT; goto out; @@ -1874,8 +1848,8 @@ static int hv_balloon_debug_show(struct seq_file *f, void *offset) char *sname; seq_printf(f, "%-22s: %u.%u\n", "host_version", - DYNMEM_MAJOR_VERSION(dm->version), - DYNMEM_MINOR_VERSION(dm->version)); + DYNMEM_MAJOR_VERSION(dm->version), + DYNMEM_MINOR_VERSION(dm->version)); seq_printf(f, "%-22s:", "capabilities"); if (ballooning_enabled()) @@ -1924,10 +1898,10 @@ static int hv_balloon_debug_show(struct seq_file *f, void *offset) seq_printf(f, "%-22s: %u\n", "pages_ballooned", dm->num_pages_ballooned); seq_printf(f, "%-22s: %lu\n", "total_pages_committed", - get_pages_committed(dm)); + get_pages_committed(dm)); seq_printf(f, "%-22s: %llu\n", "max_dynamic_page_count", - dm->max_dynamic_page_count); + dm->max_dynamic_page_count); return 0; } @@ -1937,7 +1911,7 @@ DEFINE_SHOW_ATTRIBUTE(hv_balloon_debug); static void hv_balloon_debugfs_init(struct hv_dynmem_device *b) { debugfs_create_file("hv-balloon", 0444, NULL, b, - &hv_balloon_debug_fops); + &hv_balloon_debug_fops); } static void hv_balloon_debugfs_exit(struct hv_dynmem_device *b) @@ -2095,7 +2069,6 @@ static int balloon_suspend(struct hv_device *hv_dev) tasklet_enable(&hv_dev->channel->callback_event); return 0; - } static int balloon_resume(struct hv_device *dev) @@ -2154,7 +2127,6 @@ static struct hv_driver balloon_drv = { static int __init init_balloon_drv(void) { - return vmbus_driver_register(&balloon_drv); } From 0d92e4a7ffd5c42b9fa864692f82476c0bf8bcc8 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 5 Jun 2024 18:56:37 +0100 Subject: [PATCH 121/778] KVM: arm64: Disassociate vcpus from redistributor region on teardown When tearing down a redistributor region, make sure we don't have any dangling pointer to that region stored in a vcpu. Fixes: e5a35635464b ("kvm: arm64: vgic-v3: Introduce vgic_v3_free_redist_region()") Reported-by: Alexander Potapenko Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20240605175637.1635653-1-maz@kernel.org Cc: stable@vger.kernel.org --- arch/arm64/kvm/vgic/vgic-init.c | 2 +- arch/arm64/kvm/vgic/vgic-mmio-v3.c | 15 +++++++++++++-- arch/arm64/kvm/vgic/vgic.h | 2 +- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index 8f5b7a3e7009..7f68cf58b978 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -391,7 +391,7 @@ static void kvm_vgic_dist_destroy(struct kvm *kvm) if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) { list_for_each_entry_safe(rdreg, next, &dist->rd_regions, list) - vgic_v3_free_redist_region(rdreg); + vgic_v3_free_redist_region(kvm, rdreg); INIT_LIST_HEAD(&dist->rd_regions); } else { dist->vgic_cpu_base = VGIC_ADDR_UNDEF; diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c index a3983a631b5a..9e50928f5d7d 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c @@ -919,8 +919,19 @@ free: return ret; } -void vgic_v3_free_redist_region(struct vgic_redist_region *rdreg) +void vgic_v3_free_redist_region(struct kvm *kvm, struct vgic_redist_region *rdreg) { + struct kvm_vcpu *vcpu; + unsigned long c; + + lockdep_assert_held(&kvm->arch.config_lock); + + /* Garbage collect the region */ + kvm_for_each_vcpu(c, vcpu, kvm) { + if (vcpu->arch.vgic_cpu.rdreg == rdreg) + vcpu->arch.vgic_cpu.rdreg = NULL; + } + list_del(&rdreg->list); kfree(rdreg); } @@ -945,7 +956,7 @@ int vgic_v3_set_redist_base(struct kvm *kvm, u32 index, u64 addr, u32 count) mutex_lock(&kvm->arch.config_lock); rdreg = vgic_v3_rdist_region_from_index(kvm, index); - vgic_v3_free_redist_region(rdreg); + vgic_v3_free_redist_region(kvm, rdreg); mutex_unlock(&kvm->arch.config_lock); return ret; } diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h index 6106ebd5ba42..03d356a12377 100644 --- a/arch/arm64/kvm/vgic/vgic.h +++ b/arch/arm64/kvm/vgic/vgic.h @@ -316,7 +316,7 @@ vgic_v3_rd_region_size(struct kvm *kvm, struct vgic_redist_region *rdreg) struct vgic_redist_region *vgic_v3_rdist_region_from_index(struct kvm *kvm, u32 index); -void vgic_v3_free_redist_region(struct vgic_redist_region *rdreg); +void vgic_v3_free_redist_region(struct kvm *kvm, struct vgic_redist_region *rdreg); bool vgic_v3_rdist_overlap(struct kvm *kvm, gpa_t base, size_t size); From 0fc670d07d5de36a54f061f457743c9cde1d8b46 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Mon, 3 Jun 2024 14:20:46 +0200 Subject: [PATCH 122/778] KVM: selftests: Fix RISC-V compilation Due to commit 2b7deea3ec7c ("Revert "kvm: selftests: move base kvm_util.h declarations to kvm_util_base.h"") kvm selftests now requires explicitly including ucall_common.h when needed. The commit added the directives everywhere they were needed at the time, but, by merge time, new places had been merged for RISC-V. Add those now to fix RISC-V's compilation. Fixes: dee7ea42a1eb ("Merge tag 'kvm-x86-selftests_utils-6.10' of https://github.com/kvm-x86/linux into HEAD") Signed-off-by: Andrew Jones Link: https://lore.kernel.org/r/20240603122045.323064-2-ajones@ventanamicro.com Signed-off-by: Anup Patel --- tools/testing/selftests/kvm/lib/riscv/ucall.c | 1 + tools/testing/selftests/kvm/riscv/ebreak_test.c | 1 + tools/testing/selftests/kvm/riscv/sbi_pmu_test.c | 1 + 3 files changed, 3 insertions(+) diff --git a/tools/testing/selftests/kvm/lib/riscv/ucall.c b/tools/testing/selftests/kvm/lib/riscv/ucall.c index 14ee17151a59..b5035c63d516 100644 --- a/tools/testing/selftests/kvm/lib/riscv/ucall.c +++ b/tools/testing/selftests/kvm/lib/riscv/ucall.c @@ -9,6 +9,7 @@ #include "kvm_util.h" #include "processor.h" +#include "sbi.h" void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu) { diff --git a/tools/testing/selftests/kvm/riscv/ebreak_test.c b/tools/testing/selftests/kvm/riscv/ebreak_test.c index 823c132069b4..0e0712854953 100644 --- a/tools/testing/selftests/kvm/riscv/ebreak_test.c +++ b/tools/testing/selftests/kvm/riscv/ebreak_test.c @@ -6,6 +6,7 @@ * */ #include "kvm_util.h" +#include "ucall_common.h" #define LABEL_ADDRESS(v) ((uint64_t)&(v)) diff --git a/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c index 69bb94e6b227..f299cbfd23ca 100644 --- a/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c +++ b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c @@ -15,6 +15,7 @@ #include "processor.h" #include "sbi.h" #include "arch_timer.h" +#include "ucall_common.h" /* Maximum counters(firmware + hardware) */ #define RISCV_MAX_PMU_COUNTERS 64 From 2ed22161b19b11239aa742804549f63edd7c91e3 Mon Sep 17 00:00:00 2001 From: Andrei Simion Date: Tue, 4 Jun 2024 13:10:30 +0300 Subject: [PATCH 123/778] ASoC: atmel: atmel-classd: Re-add dai_link->platform to fix card init The removed dai_link->platform component cause a fail which is exposed at runtime. (ex: when a sound tool is used) This patch re-adds the dai_link->platform component to have a full card registered. Before this patch: :~$ aplay -l **** List of PLAYBACK Hardware Devices **** card 0: CLASSD [CLASSD], device 0: CLASSD PCM snd-soc-dummy-dai-0 [] Subdevices: 1/1 Subdevice #0: subdevice #0 :~$ speaker-test -t sine speaker-test 1.2.6 Playback device is default Stream parameters are 48000Hz, S16_LE, 1 channels Sine wave rate is 440.0000Hz Playback open error: -22,Invalid argument After this patch which restores the platform component: :~$ aplay -l **** List of PLAYBACK Hardware Devices **** card 0: CLASSD [CLASSD], device 0: CLASSD PCM snd-soc-dummy-dai-0 [CLASSD PCM snd-soc-dummy-dai-0] Subdevices: 1/1 Subdevice #0: subdevice #0 -> Resolve the playback error. Fixes: 2f650f87c03c ("ASoC: atmel: remove unnecessary dai_link->platform") Signed-off-by: Andrei Simion Acked-by: Kuninori Morimoto Link: https://msgid.link/r/20240604101030.237792-1-andrei.simion@microchip.com Signed-off-by: Mark Brown --- sound/soc/atmel/atmel-classd.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sound/soc/atmel/atmel-classd.c b/sound/soc/atmel/atmel-classd.c index 6aed1ee443b4..ba314b279919 100644 --- a/sound/soc/atmel/atmel-classd.c +++ b/sound/soc/atmel/atmel-classd.c @@ -473,19 +473,22 @@ static int atmel_classd_asoc_card_init(struct device *dev, if (!dai_link) return -ENOMEM; - comp = devm_kzalloc(dev, sizeof(*comp), GFP_KERNEL); + comp = devm_kzalloc(dev, 2 * sizeof(*comp), GFP_KERNEL); if (!comp) return -ENOMEM; - dai_link->cpus = comp; + dai_link->cpus = &comp[0]; dai_link->codecs = &snd_soc_dummy_dlc; + dai_link->platforms = &comp[1]; dai_link->num_cpus = 1; dai_link->num_codecs = 1; + dai_link->num_platforms = 1; dai_link->name = "CLASSD"; dai_link->stream_name = "CLASSD PCM"; dai_link->cpus->dai_name = dev_name(dev); + dai_link->platforms->name = dev_name(dev); card->dai_link = dai_link; card->num_links = 1; From 0ee14725471cea66e03e3cd4f4c582d759de502c Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Thu, 6 Jun 2024 15:46:09 +0100 Subject: [PATCH 124/778] mm/util: Swap kmemdup_array() arguments GCC 14.1 complains about the argument usage of kmemdup_array(): drivers/soc/tegra/fuse/fuse-tegra.c:130:65: error: 'kmemdup_array' sizes specified with 'sizeof' in the earlier argument and not in the later argument [-Werror=calloc-transposed-args] 130 | fuse->lookups = kmemdup_array(fuse->soc->lookups, sizeof(*fuse->lookups), | ^ drivers/soc/tegra/fuse/fuse-tegra.c:130:65: note: earlier argument should specify number of elements, later size of each element The annotation introduced by commit 7d78a7773355 ("string: Add additional __realloc_size() annotations for "dup" helpers") lets the compiler think that kmemdup_array() follows the same format as calloc(), with the number of elements preceding the size of one element. So we could simply swap the arguments to __realloc_size() to get rid of that warning, but it seems cleaner to instead have kmemdup_array() follow the same format as krealloc_array(), memdup_array_user(), calloc() etc. Fixes: 7d78a7773355 ("string: Add additional __realloc_size() annotations for "dup" helpers") Signed-off-by: Jean-Philippe Brucker Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20240606144608.97817-2-jean-philippe@linaro.org Signed-off-by: Kees Cook --- drivers/soc/tegra/fuse/fuse-tegra.c | 4 ++-- include/linux/string.h | 2 +- lib/fortify_kunit.c | 2 +- mm/util.c | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/soc/tegra/fuse/fuse-tegra.c b/drivers/soc/tegra/fuse/fuse-tegra.c index b6bfd6729df3..d27667283846 100644 --- a/drivers/soc/tegra/fuse/fuse-tegra.c +++ b/drivers/soc/tegra/fuse/fuse-tegra.c @@ -127,8 +127,8 @@ static void tegra_fuse_print_sku_info(struct tegra_sku_info *tegra_sku_info) static int tegra_fuse_add_lookups(struct tegra_fuse *fuse) { - fuse->lookups = kmemdup_array(fuse->soc->lookups, sizeof(*fuse->lookups), - fuse->soc->num_lookups, GFP_KERNEL); + fuse->lookups = kmemdup_array(fuse->soc->lookups, fuse->soc->num_lookups, + sizeof(*fuse->lookups), GFP_KERNEL); if (!fuse->lookups) return -ENOMEM; diff --git a/include/linux/string.h b/include/linux/string.h index 60168aa2af07..9edace076ddb 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -289,7 +289,7 @@ extern void *kmemdup_noprof(const void *src, size_t len, gfp_t gfp) __realloc_si extern void *kvmemdup(const void *src, size_t len, gfp_t gfp) __realloc_size(2); extern char *kmemdup_nul(const char *s, size_t len, gfp_t gfp); -extern void *kmemdup_array(const void *src, size_t element_size, size_t count, gfp_t gfp) +extern void *kmemdup_array(const void *src, size_t count, size_t element_size, gfp_t gfp) __realloc_size(2, 3); /* lib/argv_split.c */ diff --git a/lib/fortify_kunit.c b/lib/fortify_kunit.c index f9cc467334ce..e17d520f532c 100644 --- a/lib/fortify_kunit.c +++ b/lib/fortify_kunit.c @@ -374,7 +374,7 @@ static const char * const test_strs[] = { for (i = 0; i < ARRAY_SIZE(test_strs); i++) { \ len = strlen(test_strs[i]); \ KUNIT_EXPECT_EQ(test, __builtin_constant_p(len), 0); \ - checker(len, kmemdup_array(test_strs[i], len, 1, gfp), \ + checker(len, kmemdup_array(test_strs[i], 1, len, gfp), \ kfree(p)); \ checker(len, kmemdup(test_strs[i], len, gfp), \ kfree(p)); \ diff --git a/mm/util.c b/mm/util.c index c9e519e6811f..6682097372ef 100644 --- a/mm/util.c +++ b/mm/util.c @@ -139,14 +139,14 @@ EXPORT_SYMBOL(kmemdup_noprof); * kmemdup_array - duplicate a given array. * * @src: array to duplicate. - * @element_size: size of each element of array. * @count: number of elements to duplicate from array. + * @element_size: size of each element of array. * @gfp: GFP mask to use. * * Return: duplicated array of @src or %NULL in case of error, * result is physically contiguous. Use kfree() to free. */ -void *kmemdup_array(const void *src, size_t element_size, size_t count, gfp_t gfp) +void *kmemdup_array(const void *src, size_t count, size_t element_size, gfp_t gfp) { return kmemdup(src, size_mul(element_size, count), gfp); } From f7d3b1ffc654b0435ac2c9c02a72fb2752bdb0fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20G=C3=B6ttsche?= Date: Fri, 15 Mar 2024 13:54:10 +0100 Subject: [PATCH 125/778] yama: document function parameter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Document the unused function parameter of yama_relation_cleanup() to please kernel doc warnings. Signed-off-by: Christian Göttsche Reviewed-by: Paul Moore Reviewed-by: Kees Cook Link: https://lore.kernel.org/r/20240315125418.273104-2-cgzones@googlemail.com Signed-off-by: Kees Cook --- security/yama/yama_lsm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/security/yama/yama_lsm.c b/security/yama/yama_lsm.c index b6684a074a59..39944a859ff6 100644 --- a/security/yama/yama_lsm.c +++ b/security/yama/yama_lsm.c @@ -111,6 +111,7 @@ static void report_access(const char *access, struct task_struct *target, /** * yama_relation_cleanup - remove invalid entries from the relation list + * @work: unused * */ static void yama_relation_cleanup(struct work_struct *work) From 3ac36aa7307363b7247ccb6f6a804e11496b2b36 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 29 May 2024 09:42:05 +0200 Subject: [PATCH 126/778] x86/mm/numa: Use NUMA_NO_NODE when calling memblock_set_node() memblock_set_node() warns about using MAX_NUMNODES, see e0eec24e2e19 ("memblock: make memblock_set_node() also warn about use of MAX_NUMNODES") for details. Reported-by: Narasimhan V Signed-off-by: Jan Beulich Cc: stable@vger.kernel.org [bp: commit message] Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Mike Rapoport (IBM) Tested-by: Paul E. McKenney Link: https://lore.kernel.org/r/20240603141005.23261-1-bp@kernel.org Link: https://lore.kernel.org/r/abadb736-a239-49e4-ab42-ace7acdd4278@suse.com Signed-off-by: Mike Rapoport (IBM) --- arch/x86/mm/numa.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index ce84ba86e69e..6ce10e3c6228 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -493,7 +493,7 @@ static void __init numa_clear_kernel_node_hotplug(void) for_each_reserved_mem_region(mb_region) { int nid = memblock_get_region_node(mb_region); - if (nid != MAX_NUMNODES) + if (nid != NUMA_NO_NODE) node_set(nid, reserved_nodemask); } @@ -614,9 +614,9 @@ static int __init numa_init(int (*init_func)(void)) nodes_clear(node_online_map); memset(&numa_meminfo, 0, sizeof(numa_meminfo)); WARN_ON(memblock_set_node(0, ULLONG_MAX, &memblock.memory, - MAX_NUMNODES)); + NUMA_NO_NODE)); WARN_ON(memblock_set_node(0, ULLONG_MAX, &memblock.reserved, - MAX_NUMNODES)); + NUMA_NO_NODE)); /* In case that parsing SRAT failed. */ WARN_ON(memblock_clear_hotplug(0, ULLONG_MAX)); numa_reset_distance(); From bb93148ca831c30248270b4d81d3ccca23faa1bf Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Wed, 15 May 2024 19:22:22 +0300 Subject: [PATCH 127/778] MAINTAINERS: update Xe driver maintainers Because I left Intel, I'm removing myself from the list of Xe driver maintainers. Signed-off-by: Oded Gabbay Acked-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240515162222.12958-3-ogabbay@kernel.org Signed-off-by: Lucas De Marchi (cherry picked from commit 8de6625dafd202da733188f632701e9109188035) Signed-off-by: Lucas De Marchi --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index d6c90161c7bf..69f553485b9c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11035,7 +11035,6 @@ F: include/uapi/drm/i915_drm.h INTEL DRM XE DRIVER (Lunar Lake and newer) M: Lucas De Marchi -M: Oded Gabbay M: Thomas Hellström L: intel-xe@lists.freedesktop.org S: Supported From a9f9b30e1748252d158f78a0c0affdc949671dd1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Sun, 2 Jun 2024 21:09:59 +0200 Subject: [PATCH 128/778] MAINTAINERS: Update Xe driver maintainers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add Rodrigo Vivi as an Xe driver maintainer. v2: - Cc also Lucas De Marchi (Rodrigo vivi) - Remove a blank line in commit the commit message (Lucas De Marchi) Cc: David Airlie Cc: Daniel Vetter Cc: Rodrigo Vivi Cc: Lucas De Marchi Cc: dri-devel@lists.freedesktop.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Thomas Hellström Acked-by: Rodrigo Vivi Acked-by: Lucas De Marchi Acked-by: Jani Nikula Acked-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20240602190959.2981-1-thomas.hellstrom@linux.intel.com Signed-off-by: Lucas De Marchi --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 69f553485b9c..c517af00df84 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11036,6 +11036,7 @@ F: include/uapi/drm/i915_drm.h INTEL DRM XE DRIVER (Lunar Lake and newer) M: Lucas De Marchi M: Thomas Hellström +M: Rodrigo Vivi L: intel-xe@lists.freedesktop.org S: Supported W: https://drm.pages.freedesktop.org/intel-docs/ From e96b2933152fd87b6a41765b2f58b158fde855b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cs=C3=B3k=C3=A1s=2C=20Bence?= Date: Wed, 5 Jun 2024 10:42:51 +0200 Subject: [PATCH 129/778] net: sfp: Always call `sfp_sm_mod_remove()` on remove MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the module is in SFP_MOD_ERROR, `sfp_sm_mod_remove()` will not be run. As a consequence, `sfp_hwmon_remove()` is not getting run either, leaving a stale `hwmon` device behind. `sfp_sm_mod_remove()` itself checks `sfp->sm_mod_state` anyways, so this check was not really needed in the first place. Fixes: d2e816c0293f ("net: sfp: handle module remove outside state machine") Signed-off-by: "Csókás, Bence" Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20240605084251.63502-1-csokas.bence@prolan.hu Signed-off-by: Jakub Kicinski --- drivers/net/phy/sfp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index 3f9cbd797fd6..a5684ef5884b 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -2429,8 +2429,7 @@ static void sfp_sm_module(struct sfp *sfp, unsigned int event) /* Handle remove event globally, it resets this state machine */ if (event == SFP_E_REMOVE) { - if (sfp->sm_mod_state > SFP_MOD_PROBE) - sfp_sm_mod_remove(sfp); + sfp_sm_mod_remove(sfp); sfp_sm_mod_next(sfp, SFP_MOD_EMPTY, 0); return; } From b472b996a43404a912c5cb4f27050022fdbce10c Mon Sep 17 00:00:00 2001 From: Udit Kumar Date: Fri, 31 May 2024 22:27:25 +0530 Subject: [PATCH 130/778] dt-bindings: net: dp8386x: Add MIT license along with GPL-2.0 Modify license to include dual licensing as GPL-2.0-only OR MIT license for TI specific phy header files. This allows for Linux kernel files to be used in other Operating System ecosystems such as Zephyr or FreeBSD. While at this, update the GPL-2.0 to be GPL-2.0-only to be in sync with latest SPDX conventions (GPL-2.0 is deprecated). While at this, update the TI copyright year to sync with current year to indicate license change. Cc: Thomas Gleixner Cc: Trent Piepho Cc: Wadim Egorov Cc: Kip Broadhurst Signed-off-by: Udit Kumar Acked-by: Wadim Egorov Acked-by: Rob Herring Signed-off-by: David S. Miller --- include/dt-bindings/net/ti-dp83867.h | 4 ++-- include/dt-bindings/net/ti-dp83869.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/dt-bindings/net/ti-dp83867.h b/include/dt-bindings/net/ti-dp83867.h index 6fc4b445d3a1..b8a4f3ff4a3b 100644 --- a/include/dt-bindings/net/ti-dp83867.h +++ b/include/dt-bindings/net/ti-dp83867.h @@ -1,10 +1,10 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ /* * Device Tree constants for the Texas Instruments DP83867 PHY * * Author: Dan Murphy * - * Copyright: (C) 2015 Texas Instruments, Inc. + * Copyright (C) 2015-2024 Texas Instruments Incorporated - https://www.ti.com/ */ #ifndef _DT_BINDINGS_TI_DP83867_H diff --git a/include/dt-bindings/net/ti-dp83869.h b/include/dt-bindings/net/ti-dp83869.h index 218b1a64e975..917114aad7d0 100644 --- a/include/dt-bindings/net/ti-dp83869.h +++ b/include/dt-bindings/net/ti-dp83869.h @@ -1,10 +1,10 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ /* * Device Tree constants for the Texas Instruments DP83869 PHY * * Author: Dan Murphy * - * Copyright: (C) 2019 Texas Instruments, Inc. + * Copyright (C) 2015-2024 Texas Instruments Incorporated - https://www.ti.com/ */ #ifndef _DT_BINDINGS_TI_DP83869_H From 12cda920212a49fa22d9e8b9492ac4ea013310a4 Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Wed, 5 Jun 2024 15:20:57 +0800 Subject: [PATCH 131/778] net: hns3: fix kernel crash problem in concurrent scenario When link status change, the nic driver need to notify the roce driver to handle this event, but at this time, the roce driver may uninit, then cause kernel crash. To fix the problem, when link status change, need to check whether the roce registered, and when uninit, need to wait link update finish. Fixes: 45e92b7e4e27 ("net: hns3: add calling roce callback function when link status change") Signed-off-by: Yonglong Liu Signed-off-by: Jijie Shao Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- .../hisilicon/hns3/hns3pf/hclge_main.c | 21 ++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 43cc6ee4d87d..82574ce0194f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -3086,9 +3086,7 @@ static void hclge_push_link_status(struct hclge_dev *hdev) static void hclge_update_link_status(struct hclge_dev *hdev) { - struct hnae3_handle *rhandle = &hdev->vport[0].roce; struct hnae3_handle *handle = &hdev->vport[0].nic; - struct hnae3_client *rclient = hdev->roce_client; struct hnae3_client *client = hdev->nic_client; int state; int ret; @@ -3112,8 +3110,15 @@ static void hclge_update_link_status(struct hclge_dev *hdev) client->ops->link_status_change(handle, state); hclge_config_mac_tnl_int(hdev, state); - if (rclient && rclient->ops->link_status_change) - rclient->ops->link_status_change(rhandle, state); + + if (test_bit(HCLGE_STATE_ROCE_REGISTERED, &hdev->state)) { + struct hnae3_handle *rhandle = &hdev->vport[0].roce; + struct hnae3_client *rclient = hdev->roce_client; + + if (rclient && rclient->ops->link_status_change) + rclient->ops->link_status_change(rhandle, + state); + } hclge_push_link_status(hdev); } @@ -11319,6 +11324,12 @@ clear_roce: return ret; } +static bool hclge_uninit_need_wait(struct hclge_dev *hdev) +{ + return test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state) || + test_bit(HCLGE_STATE_LINK_UPDATING, &hdev->state); +} + static void hclge_uninit_client_instance(struct hnae3_client *client, struct hnae3_ae_dev *ae_dev) { @@ -11327,7 +11338,7 @@ static void hclge_uninit_client_instance(struct hnae3_client *client, if (hdev->roce_client) { clear_bit(HCLGE_STATE_ROCE_REGISTERED, &hdev->state); - while (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state)) + while (hclge_uninit_need_wait(hdev)) msleep(HCLGE_WAIT_RESET_DONE); hdev->roce_client->ops->uninit_instance(&vport->roce, 0); From 968fde83841a8c23558dfbd0a0c69d636db52b55 Mon Sep 17 00:00:00 2001 From: Jie Wang Date: Wed, 5 Jun 2024 15:20:58 +0800 Subject: [PATCH 132/778] net: hns3: add cond_resched() to hns3 ring buffer init process Currently hns3 ring buffer init process would hold cpu too long with big Tx/Rx ring depth. This could cause soft lockup. So this patch adds cond_resched() to the process. Then cpu can break to run other tasks instead of busy looping. Fixes: a723fb8efe29 ("net: hns3: refine for set ring parameters") Signed-off-by: Jie Wang Signed-off-by: Jijie Shao Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 4 ++++ drivers/net/ethernet/hisilicon/hns3/hns3_enet.h | 2 ++ 2 files changed, 6 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index ff71fb1eced9..a5fc0209d628 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -3535,6 +3535,9 @@ static int hns3_alloc_ring_buffers(struct hns3_enet_ring *ring) ret = hns3_alloc_and_attach_buffer(ring, i); if (ret) goto out_buffer_fail; + + if (!(i % HNS3_RESCHED_BD_NUM)) + cond_resched(); } return 0; @@ -5107,6 +5110,7 @@ int hns3_init_all_ring(struct hns3_nic_priv *priv) } u64_stats_init(&priv->ring[i].syncp); + cond_resched(); } return 0; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h index acd756b0c7c9..d36c4ed16d8d 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h @@ -214,6 +214,8 @@ enum hns3_nic_state { #define HNS3_CQ_MODE_EQE 1U #define HNS3_CQ_MODE_CQE 0U +#define HNS3_RESCHED_BD_NUM 1024 + enum hns3_pkt_l2t_type { HNS3_L2_TYPE_UNICAST, HNS3_L2_TYPE_MULTICAST, From 1af89dedc8a58006d8e385b1e0d2cd24df8a3b69 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 6 Jun 2024 20:27:19 +0200 Subject: [PATCH 133/778] thermal: core: Do not fail cdev registration because of invalid initial state It is reported that commit 31a0fa0019b0 ("thermal/debugfs: Pass cooling device state to thermal_debug_cdev_add()") causes the ACPI fan driver to fail probing on some systems which turns out to be due to the _FST control method returning an invalid value until _FSL is first evaluated for the given fan. If this happens, the .get_cur_state() cooling device callback returns an error and __thermal_cooling_device_register() fails as uses that callback after commit 31a0fa0019b0. Arguably, _FST should not return an invalid value even if it is evaluated before _FSL, so this may be regarded as a platform firmware issue, but at the same time it is not a good enough reason for failing the cooling device registration where the initial cooling device state is only needed to initialize a thermal debug facility. Accordingly, modify __thermal_cooling_device_register() to avoid calling thermal_debug_cdev_add() instead of returning an error if the initial .get_cur_state() callback invocation fails. Fixes: 31a0fa0019b0 ("thermal/debugfs: Pass cooling device state to thermal_debug_cdev_add()") Closes: https://lore.kernel.org/linux-acpi/20240530153727.843378-1-laura.nao@collabora.com Reported-by: Laura Nao Signed-off-by: Rafael J. Wysocki Acked-by: Daniel Lezcano Tested-by: Laura Nao --- drivers/thermal/thermal_core.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 30567b499455..d70e76dd3c94 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -999,9 +999,17 @@ __thermal_cooling_device_register(struct device_node *np, if (ret) goto out_cdev_type; + /* + * The cooling device's current state is only needed for debug + * initialization below, so a failure to get it does not cause + * the entire cooling device initialization to fail. However, + * the debug will not work for the device if its initial state + * cannot be determined and drivers are responsible for ensuring + * that this will not happen. + */ ret = cdev->ops->get_cur_state(cdev, ¤t_state); if (ret) - goto out_cdev_type; + current_state = ULONG_MAX; thermal_cooling_device_setup_sysfs(cdev); @@ -1016,7 +1024,8 @@ __thermal_cooling_device_register(struct device_node *np, return ERR_PTR(ret); } - thermal_debug_cdev_add(cdev, current_state); + if (current_state <= cdev->max_state) + thermal_debug_cdev_add(cdev, current_state); /* Add 'this' new cdev to the global cdev list */ mutex_lock(&thermal_list_lock); From 7f18bd49cb6b6a3ab6d860fefccdc94f2a247db0 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 6 Jun 2024 20:27:30 +0200 Subject: [PATCH 134/778] thermal: ACPI: Invalidate trip points with temperature of 0 or below It is reported that commit 950210887670 ("thermal: core: Drop trips_disabled bitmask") causes the maximum frequency of CPUs to drop further down with every system sleep-wake cycle on Intel Core i7-4710HQ. This turns out to be due to a trip point whose temperature is equal to 0 degrees Celsius which is acted on every time the system wakes from sleep. Before commit 950210887670 this trip point would be disabled wia the trips_disabled bitmask, but now it is treated as a valid one. Since ACPI thermal control is generally about protection against overheating, trip points with temperature of 0 centigrade or below are not particularly useful there, so initialize them all as invalid which fixes the problem at hand. Fixes: 950210887670 ("thermal: core: Drop trips_disabled bitmask") Closes: https://lore.kernel.org/linux-pm/3f71747b-f852-4ee0-b384-cf46b2aefa3f@gmx.com Reported-by: Tibor Billes Tested-by: Tibor Billes Cc: 6.7+ # 6.7+ Signed-off-by: Rafael J. Wysocki --- drivers/acpi/thermal.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c index d67881b50bca..a0cfc857fb55 100644 --- a/drivers/acpi/thermal.c +++ b/drivers/acpi/thermal.c @@ -168,11 +168,17 @@ static int acpi_thermal_get_polling_frequency(struct acpi_thermal *tz) static int acpi_thermal_temp(struct acpi_thermal *tz, int temp_deci_k) { + int temp; + if (temp_deci_k == THERMAL_TEMP_INVALID) return THERMAL_TEMP_INVALID; - return deci_kelvin_to_millicelsius_with_offset(temp_deci_k, + temp = deci_kelvin_to_millicelsius_with_offset(temp_deci_k, tz->kelvin_offset); + if (temp <= 0) + return THERMAL_TEMP_INVALID; + + return temp; } static bool acpi_thermal_trip_valid(struct acpi_thermal_trip *acpi_trip) From c44711b78608c98a3e6b49ce91678cd0917d5349 Mon Sep 17 00:00:00 2001 From: Aleksandr Mishin Date: Wed, 5 Jun 2024 13:11:35 +0300 Subject: [PATCH 135/778] liquidio: Adjust a NULL pointer handling path in lio_vf_rep_copy_packet In lio_vf_rep_copy_packet() pg_info->page is compared to a NULL value, but then it is unconditionally passed to skb_add_rx_frag() which looks strange and could lead to null pointer dereference. lio_vf_rep_copy_packet() call trace looks like: octeon_droq_process_packets octeon_droq_fast_process_packets octeon_droq_dispatch_pkt octeon_create_recv_info ...search in the dispatch_list... ->disp_fn(rdisp->rinfo, ...) lio_vf_rep_pkt_recv(struct octeon_recv_info *recv_info, ...) In this path there is no code which sets pg_info->page to NULL. So this check looks unneeded and doesn't solve potential problem. But I guess the author had reason to add a check and I have no such card and can't do real test. In addition, the code in the function liquidio_push_packet() in liquidio/lio_core.c does exactly the same. Based on this, I consider the most acceptable compromise solution to adjust this issue by moving skb_add_rx_frag() into conditional scope. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 1f233f327913 ("liquidio: switchdev support for LiquidIO NIC") Signed-off-by: Aleksandr Mishin Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c index 96c6ea12279f..989b4ddae342 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c @@ -272,13 +272,12 @@ lio_vf_rep_copy_packet(struct octeon_device *oct, pg_info->page_offset; memcpy(skb->data, va, MIN_SKB_SIZE); skb_put(skb, MIN_SKB_SIZE); + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, + pg_info->page, + pg_info->page_offset + MIN_SKB_SIZE, + len - MIN_SKB_SIZE, + LIO_RXBUFFER_SZ); } - - skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, - pg_info->page, - pg_info->page_offset + MIN_SKB_SIZE, - len - MIN_SKB_SIZE, - LIO_RXBUFFER_SZ); } else { struct octeon_skb_page_info *pg_info = ((struct octeon_skb_page_info *)(skb->cb)); From 60980cf5b8c8cc9182e5e9dbb62cbfd345c54074 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Fri, 7 Jun 2024 11:34:23 +0100 Subject: [PATCH 136/778] spi: cs42l43: Drop cs35l56 SPI speed down to 11MHz Some internals of the cs35l56 can only support SPI speeds of up to 11MHz. Whilst some use-cases could support higher rates, keep things simple by dropping the SPI speed down to this avoid any potential issues. Signed-off-by: Charles Keepax Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240607103423.4159834-1-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- drivers/spi/spi-cs42l43.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-cs42l43.c b/drivers/spi/spi-cs42l43.c index 902a0734cc36..8b618ef0f711 100644 --- a/drivers/spi/spi-cs42l43.c +++ b/drivers/spi/spi-cs42l43.c @@ -54,7 +54,7 @@ static const struct software_node ampr = { static struct spi_board_info ampl_info = { .modalias = "cs35l56", - .max_speed_hz = 20 * HZ_PER_MHZ, + .max_speed_hz = 11 * HZ_PER_MHZ, .chip_select = 0, .mode = SPI_MODE_0, .swnode = &l, @@ -62,7 +62,7 @@ static struct spi_board_info ampl_info = { static struct spi_board_info ampr_info = { .modalias = "cs35l56", - .max_speed_hz = 20 * HZ_PER_MHZ, + .max_speed_hz = 11 * HZ_PER_MHZ, .chip_select = 1, .mode = SPI_MODE_0, .swnode = &r, From 0c76053e3fec801e86aca73f80072b3da4e72849 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 16 May 2024 11:33:43 +0300 Subject: [PATCH 137/778] drm: have config DRM_WERROR depend on !WERROR If WERROR is already enabled, there's no point in enabling DRM_WERROR or asking users about it. Reported-by: Linus Torvalds Closes: https://lore.kernel.org/r/CAHk-=whxT8D_0j=bjtrvj-O=VEOjn6GW8GK4j2V+BiDUntZKAQ@mail.gmail.com Fixes: f89632a9e5fa ("drm: Add CONFIG_DRM_WERROR") Reviewed-by: Javier Martinez Canillas Link: https://patchwork.freedesktop.org/patch/msgid/20240516083343.1375687-1-jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 026444eeb5c6..d0aa277fc3bf 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -450,6 +450,7 @@ config DRM_PRIVACY_SCREEN config DRM_WERROR bool "Compile the drm subsystem with warnings as errors" depends on DRM && EXPERT + depends on !WERROR default n help A kernel build should not cause any compiler warnings, and this From 231035f18d6b80e5c28732a20872398116a54ecd Mon Sep 17 00:00:00 2001 From: Wenchao Hao Date: Thu, 6 Jun 2024 16:52:15 +0800 Subject: [PATCH 138/778] workqueue: Increase worker desc's length to 32 Commit 31c89007285d ("workqueue.c: Increase workqueue name length") increased WQ_NAME_LEN from 24 to 32, but forget to increase WORKER_DESC_LEN, which would cause truncation when setting kworker's desc from workqueue_struct's name, process_one_work() for example. Fixes: 31c89007285d ("workqueue.c: Increase workqueue name length") Signed-off-by: Wenchao Hao CC: Audra Mitchell Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index fb3993894536..d9968bfc8eac 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -95,7 +95,7 @@ enum wq_misc_consts { WORK_BUSY_RUNNING = 1 << 1, /* maximum string length for set_worker_desc() */ - WORKER_DESC_LEN = 24, + WORKER_DESC_LEN = 32, }; /* Convenience constants - of type 'unsigned long', not 'enum'! */ From 462237d2d93fc9e9221d1cf9f773954d27da83c0 Mon Sep 17 00:00:00 2001 From: Louis Chauvet Date: Fri, 7 Jun 2024 10:34:38 +0200 Subject: [PATCH 139/778] dmaengine: xilinx: xdma: Fix data synchronisation in xdma_channel_isr() Requests the vchan lock before using xdma->stop_request. Fixes: 6a40fb824596 ("dmaengine: xilinx: xdma: Fix synchronization issue") Cc: stable@vger.kernel.org Signed-off-by: Louis Chauvet Link: https://lore.kernel.org/r/20240607-xdma-fixes-v2-1-0282319ce345@bootlin.com Signed-off-by: Vinod Koul --- drivers/dma/xilinx/xdma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/xilinx/xdma.c b/drivers/dma/xilinx/xdma.c index e143a7330816..718842fdaf98 100644 --- a/drivers/dma/xilinx/xdma.c +++ b/drivers/dma/xilinx/xdma.c @@ -885,11 +885,11 @@ static irqreturn_t xdma_channel_isr(int irq, void *dev_id) u32 st; bool repeat_tx; + spin_lock(&xchan->vchan.lock); + if (xchan->stop_requested) complete(&xchan->last_interrupt); - spin_lock(&xchan->vchan.lock); - /* get submitted request */ vd = vchan_next_desc(&xchan->vchan); if (!vd) From e79a10652bbd320649da705ca1ea0c04351af403 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 9 May 2024 13:45:02 -0500 Subject: [PATCH 140/778] ACPI: x86: Force StorageD3Enable on more products A Rembrandt-based HP thin client is reported to have problems where the NVME disk isn't present after resume from s2idle. This is because the NVME disk wasn't put into D3 at suspend, and that happened because the StorageD3Enable _DSD was missing in the BIOS. As AMD's architecture requires that the NVME is in D3 for s2idle, adjust the criteria for force_storage_d3 to match *all* Zen SoCs when the FADT advertises low power idle support. This will ensure that any future products with this BIOS deficiency don't need to be added to the allow list of overrides. Cc: All applicable Signed-off-by: Mario Limonciello Acked-by: Hans de Goede Signed-off-by: Rafael J. Wysocki --- drivers/acpi/x86/utils.c | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/drivers/acpi/x86/utils.c b/drivers/acpi/x86/utils.c index 7dca73417e2b..2fe0934dcd64 100644 --- a/drivers/acpi/x86/utils.c +++ b/drivers/acpi/x86/utils.c @@ -206,16 +206,16 @@ bool acpi_device_override_status(struct acpi_device *adev, unsigned long long *s } /* - * AMD systems from Renoir and Lucienne *require* that the NVME controller + * AMD systems from Renoir onwards *require* that the NVME controller * is put into D3 over a Modern Standby / suspend-to-idle cycle. * * This is "typically" accomplished using the `StorageD3Enable` * property in the _DSD that is checked via the `acpi_storage_d3` function - * but this property was introduced after many of these systems launched - * and most OEM systems don't have it in their BIOS. + * but some OEM systems still don't have it in their BIOS. * * The Microsoft documentation for StorageD3Enable mentioned that Windows has - * a hardcoded allowlist for D3 support, which was used for these platforms. + * a hardcoded allowlist for D3 support as well as a registry key to override + * the BIOS, which has been used for these cases. * * This allows quirking on Linux in a similar fashion. * @@ -228,19 +228,15 @@ bool acpi_device_override_status(struct acpi_device *adev, unsigned long long *s * https://bugzilla.kernel.org/show_bug.cgi?id=216773 * https://bugzilla.kernel.org/show_bug.cgi?id=217003 * 2) On at least one HP system StorageD3Enable is missing on the second NVME - disk in the system. + * disk in the system. + * 3) On at least one HP Rembrandt system StorageD3Enable is missing on the only + * NVME device. */ -static const struct x86_cpu_id storage_d3_cpu_ids[] = { - X86_MATCH_VENDOR_FAM_MODEL(AMD, 23, 24, NULL), /* Picasso */ - X86_MATCH_VENDOR_FAM_MODEL(AMD, 23, 96, NULL), /* Renoir */ - X86_MATCH_VENDOR_FAM_MODEL(AMD, 23, 104, NULL), /* Lucienne */ - X86_MATCH_VENDOR_FAM_MODEL(AMD, 25, 80, NULL), /* Cezanne */ - {} -}; - bool force_storage_d3(void) { - return x86_match_cpu(storage_d3_cpu_ids); + if (!cpu_feature_enabled(X86_FEATURE_ZEN)) + return false; + return acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0; } /* From 9f0fad0382124e7e23b3c730fa78818c22c89c0a Mon Sep 17 00:00:00 2001 From: John Keeping Date: Thu, 23 May 2024 09:56:24 +0100 Subject: [PATCH 141/778] Input: ili210x - fix ili251x_read_touch_data() return value The caller of this function treats all non-zero values as an error, so the return value of i2c_master_recv() cannot be returned directly. This fixes touch reporting when there are more than 6 active touches. Fixes: ef536abd3afd1 ("Input: ili210x - define and use chip operations structure") Signed-off-by: John Keeping Link: https://lore.kernel.org/r/20240523085624.2295988-1-jkeeping@inmusicbrands.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/ili210x.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/input/touchscreen/ili210x.c b/drivers/input/touchscreen/ili210x.c index 31ffdc2a93f3..79bdb2b10949 100644 --- a/drivers/input/touchscreen/ili210x.c +++ b/drivers/input/touchscreen/ili210x.c @@ -261,8 +261,8 @@ static int ili251x_read_touch_data(struct i2c_client *client, u8 *data) if (!error && data[0] == 2) { error = i2c_master_recv(client, data + ILI251X_DATA_SIZE1, ILI251X_DATA_SIZE2); - if (error >= 0 && error != ILI251X_DATA_SIZE2) - error = -EIO; + if (error >= 0) + error = error == ILI251X_DATA_SIZE2 ? 0 : -EIO; } return error; From cee77149ebe9cd971ba238d87aa10e09bd98f1c9 Mon Sep 17 00:00:00 2001 From: "Luke D. Jones" Date: Fri, 7 Jun 2024 16:37:48 -0700 Subject: [PATCH 142/778] Input: xpad - add support for ASUS ROG RAIKIRI PRO Add the VID/PID for ASUS ROG RAIKIRI PRO to the list of known devices. Signed-off-by: Luke D. Jones Link: https://lore.kernel.org/r/20240607223722.1170776-1-luke@ljones.dev Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/xpad.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 70f0654c58b6..2b8370ecf42a 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -209,6 +209,7 @@ static const struct xpad_device { { 0x0738, 0xf738, "Super SFIV FightStick TE S", 0, XTYPE_XBOX360 }, { 0x07ff, 0xffff, "Mad Catz GamePad", 0, XTYPE_XBOX360 }, { 0x0b05, 0x1a38, "ASUS ROG RAIKIRI", 0, XTYPE_XBOXONE }, + { 0x0b05, 0x1abb, "ASUS ROG RAIKIRI PRO", 0, XTYPE_XBOXONE }, { 0x0c12, 0x0005, "Intec wireless", 0, XTYPE_XBOX }, { 0x0c12, 0x8801, "Nyko Xbox Controller", 0, XTYPE_XBOX }, { 0x0c12, 0x8802, "Zeroplus Xbox Controller", 0, XTYPE_XBOX }, From 89b898c627a49b978a4c323ea6856eacfc21f6ba Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Mon, 11 Mar 2024 12:28:00 -0400 Subject: [PATCH 143/778] iio: xilinx-ams: Don't include ams_ctrl_channels in scan_mask ams_enable_channel_sequence constructs a "scan_mask" for all the PS and PL channels. This works out fine, since scan_index for these channels is less than 64. However, it also includes the ams_ctrl_channels, where scan_index is greater than 64, triggering undefined behavior. Since we don't need these channels anyway, just exclude them. Fixes: d5c70627a794 ("iio: adc: Add Xilinx AMS driver") Signed-off-by: Sean Anderson Link: https://lore.kernel.org/r/20240311162800.11074-1-sean.anderson@linux.dev Signed-off-by: Jonathan Cameron --- drivers/iio/adc/xilinx-ams.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/iio/adc/xilinx-ams.c b/drivers/iio/adc/xilinx-ams.c index f0b71a1220e0..f52abf759260 100644 --- a/drivers/iio/adc/xilinx-ams.c +++ b/drivers/iio/adc/xilinx-ams.c @@ -414,8 +414,12 @@ static void ams_enable_channel_sequence(struct iio_dev *indio_dev) /* Run calibration of PS & PL as part of the sequence */ scan_mask = BIT(0) | BIT(AMS_PS_SEQ_MAX); - for (i = 0; i < indio_dev->num_channels; i++) - scan_mask |= BIT_ULL(indio_dev->channels[i].scan_index); + for (i = 0; i < indio_dev->num_channels; i++) { + const struct iio_chan_spec *chan = &indio_dev->channels[i]; + + if (chan->scan_index < AMS_CTRL_SEQ_BASE) + scan_mask |= BIT_ULL(chan->scan_index); + } if (ams->ps_base) { /* put sysmon in a soft reset to change the sequence */ From a2b86132955268b2a1703082fbc2d4832fc001b8 Mon Sep 17 00:00:00 2001 From: Fernando Yang Date: Mon, 3 Jun 2024 15:07:54 -0300 Subject: [PATCH 144/778] iio: adc: ad7266: Fix variable checking bug The ret variable was not checked after iio_device_release_direct_mode(), which could possibly cause errors Fixes: c70df20e3159 ("iio: adc: ad7266: claim direct mode during sensor read") Signed-off-by: Fernando Yang Link: https://lore.kernel.org/r/20240603180757.8560-1-hagisf@usp.br Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7266.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iio/adc/ad7266.c b/drivers/iio/adc/ad7266.c index 353a97f9c086..13ea8a1073d2 100644 --- a/drivers/iio/adc/ad7266.c +++ b/drivers/iio/adc/ad7266.c @@ -157,6 +157,8 @@ static int ad7266_read_raw(struct iio_dev *indio_dev, ret = ad7266_read_single(st, val, chan->address); iio_device_release_direct_mode(indio_dev); + if (ret < 0) + return ret; *val = (*val >> 2) & 0xfff; if (chan->scan_type.sign == 's') *val = sign_extend32(*val, From a821d7111e3f7c8869961b606714a299bfe20014 Mon Sep 17 00:00:00 2001 From: Alexander Sverdlin Date: Wed, 5 Jun 2024 22:38:06 +0200 Subject: [PATCH 145/778] iio: accel: fxls8962af: select IIO_BUFFER & IIO_KFIFO_BUF Provide missing symbols to the module: ERROR: modpost: iio_push_to_buffers [drivers/iio/accel/fxls8962af-core.ko] undefined! ERROR: modpost: devm_iio_kfifo_buffer_setup_ext [drivers/iio/accel/fxls8962af-core.ko] undefined! Cc: stable@vger.kernel.org Fixes: 79e3a5bdd9ef ("iio: accel: fxls8962af: add hw buffered sampling") Signed-off-by: Alexander Sverdlin Reviewed-by: Sean Nyekjaer Link: https://lore.kernel.org/r/20240605203810.2908980-2-alexander.sverdlin@siemens.com Signed-off-by: Jonathan Cameron --- drivers/iio/accel/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iio/accel/Kconfig b/drivers/iio/accel/Kconfig index c2da5066e9a7..80b57d3ee3a7 100644 --- a/drivers/iio/accel/Kconfig +++ b/drivers/iio/accel/Kconfig @@ -330,6 +330,8 @@ config DMARD10 config FXLS8962AF tristate depends on I2C || !I2C # cannot be built-in for modular I2C + select IIO_BUFFER + select IIO_KFIFO_BUF config FXLS8962AF_I2C tristate "NXP FXLS8962AF/FXLS8964AF Accelerometer I2C Driver" From f67ac0061c7614c1548963d3ef1ee1606efd8636 Mon Sep 17 00:00:00 2001 From: Honggang LI Date: Thu, 23 May 2024 17:46:17 +0800 Subject: [PATCH 146/778] RDMA/rxe: Fix responder length checking for UD request packets According to the IBA specification: If a UD request packet is detected with an invalid length, the request shall be an invalid request and it shall be silently dropped by the responder. The responder then waits for a new request packet. commit 689c5421bfe0 ("RDMA/rxe: Fix incorrect responder length checking") defers responder length check for UD QPs in function `copy_data`. But it introduces a regression issue for UD QPs. When the packet size is too large to fit in the receive buffer. `copy_data` will return error code -EINVAL. Then `send_data_in` will return RESPST_ERR_MALFORMED_WQE. UD QP will transfer into ERROR state. Fixes: 689c5421bfe0 ("RDMA/rxe: Fix incorrect responder length checking") Signed-off-by: Honggang LI Link: https://lore.kernel.org/r/20240523094617.141148-1-honggangli@163.com Reviewed-by: Zhu Yanjun Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/rxe/rxe_resp.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index c6a7fa3054fa..6596a85723c9 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -344,6 +344,19 @@ static enum resp_states rxe_resp_check_length(struct rxe_qp *qp, * receive buffer later. For rmda operations additional * length checks are performed in check_rkey. */ + if ((qp_type(qp) == IB_QPT_GSI) || (qp_type(qp) == IB_QPT_UD)) { + unsigned int payload = payload_size(pkt); + unsigned int recv_buffer_len = 0; + int i; + + for (i = 0; i < qp->resp.wqe->dma.num_sge; i++) + recv_buffer_len += qp->resp.wqe->dma.sge[i].length; + if (payload + 40 > recv_buffer_len) { + rxe_dbg_qp(qp, "The receive buffer is too small for this UD packet.\n"); + return RESPST_ERR_LENGTH; + } + } + if (pkt->mask & RXE_PAYLOAD_MASK && ((qp_type(qp) == IB_QPT_RC) || (qp_type(qp) == IB_QPT_UC))) { unsigned int mtu = qp->mtu; From 75183e461ce033605c3e85518a9f3d4e4ef848a3 Mon Sep 17 00:00:00 2001 From: Ke Sun Date: Sun, 9 Jun 2024 11:08:47 +0800 Subject: [PATCH 147/778] iio: dac: fix ad9739a random config compile error WARNING: unmet direct dependencies detected for REGMAP_SPI Depends on [n]: SPI [=n] Selected by [m]: - AD9739A [=m] && IIO [=m] && (SPI [=n] || COMPILE_TEST [=y]) ... ERROR: modpost: "spi_write_then_read" [drivers/base/regmap/regmap-spi.ko] undefined! ERROR: modpost: "spi_async" [drivers/base/regmap/regmap-spi.ko] undefined! ERROR: modpost: "spi_sync" [drivers/base/regmap/regmap-spi.ko] undefined! ERROR: modpost: "__spi_register_driver" [drivers/iio/dac/ad9739a.ko] undefined! Fixes: e77603d5468b ("iio: dac: support the ad9739a RF DAC") Reported-by: k2ci Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202404250156.2PQRWmex-lkp@intel.com/ Signed-off-by: Ke Sun Reviewed-by: Nuno Sa Link: https://lore.kernel.org/r/20240609030847.2869455-1-sunke@kylinos.cn Signed-off-by: Jonathan Cameron --- drivers/iio/dac/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/dac/Kconfig b/drivers/iio/dac/Kconfig index 3c2bf620f00f..ee0d9798d8b4 100644 --- a/drivers/iio/dac/Kconfig +++ b/drivers/iio/dac/Kconfig @@ -133,7 +133,7 @@ config AD5624R_SPI config AD9739A tristate "Analog Devices AD9739A RF DAC spi driver" - depends on SPI || COMPILE_TEST + depends on SPI select REGMAP_SPI select IIO_BACKEND help From 9547d6a4c65e975e40e203900322342ef7379c52 Mon Sep 17 00:00:00 2001 From: Dimitri Fedrau Date: Wed, 5 Jun 2024 21:21:35 +0200 Subject: [PATCH 148/778] iio: humidity: hdc3020: fix hysteresis representation According to the ABI docs hysteresis values are represented as offsets to threshold values. Current implementation represents hysteresis values as absolute values which is wrong. Nevertheless the device stores them as absolute values and the datasheet refers to them as clear thresholds. Fix the reading and writing of hysteresis values by including thresholds into calculations. Hysteresis values that result in threshold clear values that are out of limits will be truncated. To check that the threshold clear values are correct, registers are read out using i2ctransfer and the corresponding temperature and relative humidity thresholds are calculated using the formulas in the datasheet. Fixes: 3ad0e7e5f0cb ("iio: humidity: hdc3020: add threshold events support") Signed-off-by: Dimitri Fedrau Reviewed-by: Javier Carrasco Link: https://lore.kernel.org/r/20240605192136.38146-1-dima.fedrau@gmail.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/humidity/hdc3020.c | 323 +++++++++++++++++++++++++-------- 1 file changed, 248 insertions(+), 75 deletions(-) diff --git a/drivers/iio/humidity/hdc3020.c b/drivers/iio/humidity/hdc3020.c index cdc4789213ba..a82dcc3da421 100644 --- a/drivers/iio/humidity/hdc3020.c +++ b/drivers/iio/humidity/hdc3020.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -66,8 +67,10 @@ #define HDC3020_CRC8_POLYNOMIAL 0x31 -#define HDC3020_MIN_TEMP -40 -#define HDC3020_MAX_TEMP 125 +#define HDC3020_MIN_TEMP_MICRO -39872968 +#define HDC3020_MAX_TEMP_MICRO 124875639 +#define HDC3020_MAX_TEMP_HYST_MICRO 164748607 +#define HDC3020_MAX_HUM_MICRO 99220264 struct hdc3020_data { struct i2c_client *client; @@ -368,6 +371,105 @@ static int hdc3020_write_raw(struct iio_dev *indio_dev, return -EINVAL; } +static int hdc3020_thresh_get_temp(u16 thresh) +{ + int temp; + + /* + * Get the temperature threshold from 9 LSBs, shift them to get + * the truncated temperature threshold representation and + * calculate the threshold according to the formula in the + * datasheet. Result is degree celsius scaled by 65535. + */ + temp = FIELD_GET(HDC3020_THRESH_TEMP_MASK, thresh) << + HDC3020_THRESH_TEMP_TRUNC_SHIFT; + + return -2949075 + (175 * temp); +} + +static int hdc3020_thresh_get_hum(u16 thresh) +{ + int hum; + + /* + * Get the humidity threshold from 7 MSBs, shift them to get the + * truncated humidity threshold representation and calculate the + * threshold according to the formula in the datasheet. Result is + * percent scaled by 65535. + */ + hum = FIELD_GET(HDC3020_THRESH_HUM_MASK, thresh) << + HDC3020_THRESH_HUM_TRUNC_SHIFT; + + return hum * 100; +} + +static u16 hdc3020_thresh_set_temp(int s_temp, u16 curr_thresh) +{ + u64 temp; + u16 thresh; + + /* + * Calculate temperature threshold, shift it down to get the + * truncated threshold representation in the 9LSBs while keeping + * the current humidity threshold in the 7 MSBs. + */ + temp = (u64)(s_temp + 45000000) * 65535ULL; + temp = div_u64(temp, 1000000 * 175) >> HDC3020_THRESH_TEMP_TRUNC_SHIFT; + thresh = FIELD_PREP(HDC3020_THRESH_TEMP_MASK, temp); + thresh |= (FIELD_GET(HDC3020_THRESH_HUM_MASK, curr_thresh) << + HDC3020_THRESH_HUM_TRUNC_SHIFT); + + return thresh; +} + +static u16 hdc3020_thresh_set_hum(int s_hum, u16 curr_thresh) +{ + u64 hum; + u16 thresh; + + /* + * Calculate humidity threshold, shift it down and up to get the + * truncated threshold representation in the 7MSBs while keeping + * the current temperature threshold in the 9 LSBs. + */ + hum = (u64)(s_hum) * 65535ULL; + hum = div_u64(hum, 1000000 * 100) >> HDC3020_THRESH_HUM_TRUNC_SHIFT; + thresh = FIELD_PREP(HDC3020_THRESH_HUM_MASK, hum); + thresh |= FIELD_GET(HDC3020_THRESH_TEMP_MASK, curr_thresh); + + return thresh; +} + +static +int hdc3020_thresh_clr(s64 s_thresh, s64 s_hyst, enum iio_event_direction dir) +{ + s64 s_clr; + + /* + * Include directions when calculation the clear value, + * since hysteresis is unsigned by definition and the + * clear value is an absolute value which is signed. + */ + if (dir == IIO_EV_DIR_RISING) + s_clr = s_thresh - s_hyst; + else + s_clr = s_thresh + s_hyst; + + /* Divide by 65535 to get units of micro */ + return div_s64(s_clr, 65535); +} + +static int _hdc3020_write_thresh(struct hdc3020_data *data, u16 reg, u16 val) +{ + u8 buf[5]; + + put_unaligned_be16(reg, buf); + put_unaligned_be16(val, buf + 2); + buf[4] = crc8(hdc3020_crc8_table, buf + 2, 2, CRC8_INIT_VALUE); + + return hdc3020_write_bytes(data, buf, 5); +} + static int hdc3020_write_thresh(struct iio_dev *indio_dev, const struct iio_chan_spec *chan, enum iio_event_type type, @@ -376,67 +478,126 @@ static int hdc3020_write_thresh(struct iio_dev *indio_dev, int val, int val2) { struct hdc3020_data *data = iio_priv(indio_dev); - u8 buf[5]; - u64 tmp; - u16 reg; - int ret; + u16 reg, reg_val, reg_thresh_rd, reg_clr_rd, reg_thresh_wr, reg_clr_wr; + s64 s_thresh, s_hyst, s_clr; + int s_val, thresh, clr, ret; - /* Supported temperature range is from –40 to 125 degree celsius */ - if (val < HDC3020_MIN_TEMP || val > HDC3020_MAX_TEMP) - return -EINVAL; - - /* Select threshold register */ - if (info == IIO_EV_INFO_VALUE) { - if (dir == IIO_EV_DIR_RISING) - reg = HDC3020_S_T_RH_THRESH_HIGH; - else - reg = HDC3020_S_T_RH_THRESH_LOW; + /* Select threshold registers */ + if (dir == IIO_EV_DIR_RISING) { + reg_thresh_rd = HDC3020_R_T_RH_THRESH_HIGH; + reg_thresh_wr = HDC3020_S_T_RH_THRESH_HIGH; + reg_clr_rd = HDC3020_R_T_RH_THRESH_HIGH_CLR; + reg_clr_wr = HDC3020_S_T_RH_THRESH_HIGH_CLR; } else { - if (dir == IIO_EV_DIR_RISING) - reg = HDC3020_S_T_RH_THRESH_HIGH_CLR; - else - reg = HDC3020_S_T_RH_THRESH_LOW_CLR; + reg_thresh_rd = HDC3020_R_T_RH_THRESH_LOW; + reg_thresh_wr = HDC3020_S_T_RH_THRESH_LOW; + reg_clr_rd = HDC3020_R_T_RH_THRESH_LOW_CLR; + reg_clr_wr = HDC3020_S_T_RH_THRESH_LOW_CLR; } guard(mutex)(&data->lock); - ret = hdc3020_read_be16(data, reg); + ret = hdc3020_read_be16(data, reg_thresh_rd); if (ret < 0) return ret; + thresh = ret; + ret = hdc3020_read_be16(data, reg_clr_rd); + if (ret < 0) + return ret; + + clr = ret; + /* Scale value to include decimal part into calculations */ + s_val = (val < 0) ? (val * 1000000 - val2) : (val * 1000000 + val2); switch (chan->type) { case IIO_TEMP: - /* - * Calculate temperature threshold, shift it down to get the - * truncated threshold representation in the 9LSBs while keeping - * the current humidity threshold in the 7 MSBs. - */ - tmp = ((u64)(((val + 45) * MICRO) + val2)) * 65535ULL; - tmp = div_u64(tmp, MICRO * 175); - val = tmp >> HDC3020_THRESH_TEMP_TRUNC_SHIFT; - val = FIELD_PREP(HDC3020_THRESH_TEMP_MASK, val); - val |= (FIELD_GET(HDC3020_THRESH_HUM_MASK, ret) << - HDC3020_THRESH_HUM_TRUNC_SHIFT); + switch (info) { + case IIO_EV_INFO_VALUE: + s_val = max(s_val, HDC3020_MIN_TEMP_MICRO); + s_val = min(s_val, HDC3020_MAX_TEMP_MICRO); + reg = reg_thresh_wr; + reg_val = hdc3020_thresh_set_temp(s_val, thresh); + ret = _hdc3020_write_thresh(data, reg, reg_val); + if (ret < 0) + return ret; + + /* Calculate old hysteresis */ + s_thresh = (s64)hdc3020_thresh_get_temp(thresh) * 1000000; + s_clr = (s64)hdc3020_thresh_get_temp(clr) * 1000000; + s_hyst = div_s64(abs(s_thresh - s_clr), 65535); + /* Set new threshold */ + thresh = reg_val; + /* Set old hysteresis */ + s_val = s_hyst; + fallthrough; + case IIO_EV_INFO_HYSTERESIS: + /* + * Function hdc3020_thresh_get_temp returns temperature + * in degree celsius scaled by 65535. Scale by 1000000 + * to be able to subtract scaled hysteresis value. + */ + s_thresh = (s64)hdc3020_thresh_get_temp(thresh) * 1000000; + /* + * Units of s_val are in micro degree celsius, scale by + * 65535 to get same units as s_thresh. + */ + s_val = min(abs(s_val), HDC3020_MAX_TEMP_HYST_MICRO); + s_hyst = (s64)s_val * 65535; + s_clr = hdc3020_thresh_clr(s_thresh, s_hyst, dir); + s_clr = max(s_clr, HDC3020_MIN_TEMP_MICRO); + s_clr = min(s_clr, HDC3020_MAX_TEMP_MICRO); + reg = reg_clr_wr; + reg_val = hdc3020_thresh_set_temp(s_clr, clr); + break; + default: + return -EOPNOTSUPP; + } break; case IIO_HUMIDITYRELATIVE: - /* - * Calculate humidity threshold, shift it down and up to get the - * truncated threshold representation in the 7MSBs while keeping - * the current temperature threshold in the 9 LSBs. - */ - tmp = ((u64)((val * MICRO) + val2)) * 65535ULL; - tmp = div_u64(tmp, MICRO * 100); - val = tmp >> HDC3020_THRESH_HUM_TRUNC_SHIFT; - val = FIELD_PREP(HDC3020_THRESH_HUM_MASK, val); - val |= FIELD_GET(HDC3020_THRESH_TEMP_MASK, ret); + s_val = (s_val < 0) ? 0 : min(s_val, HDC3020_MAX_HUM_MICRO); + switch (info) { + case IIO_EV_INFO_VALUE: + reg = reg_thresh_wr; + reg_val = hdc3020_thresh_set_hum(s_val, thresh); + ret = _hdc3020_write_thresh(data, reg, reg_val); + if (ret < 0) + return ret; + + /* Calculate old hysteresis */ + s_thresh = (s64)hdc3020_thresh_get_hum(thresh) * 1000000; + s_clr = (s64)hdc3020_thresh_get_hum(clr) * 1000000; + s_hyst = div_s64(abs(s_thresh - s_clr), 65535); + /* Set new threshold */ + thresh = reg_val; + /* Try to set old hysteresis */ + s_val = min(abs(s_hyst), HDC3020_MAX_HUM_MICRO); + fallthrough; + case IIO_EV_INFO_HYSTERESIS: + /* + * Function hdc3020_thresh_get_hum returns relative + * humidity in percent scaled by 65535. Scale by 1000000 + * to be able to subtract scaled hysteresis value. + */ + s_thresh = (s64)hdc3020_thresh_get_hum(thresh) * 1000000; + /* + * Units of s_val are in micro percent, scale by 65535 + * to get same units as s_thresh. + */ + s_hyst = (s64)s_val * 65535; + s_clr = hdc3020_thresh_clr(s_thresh, s_hyst, dir); + s_clr = max(s_clr, 0); + s_clr = min(s_clr, HDC3020_MAX_HUM_MICRO); + reg = reg_clr_wr; + reg_val = hdc3020_thresh_set_hum(s_clr, clr); + break; + default: + return -EOPNOTSUPP; + } break; default: return -EOPNOTSUPP; } - put_unaligned_be16(reg, buf); - put_unaligned_be16(val, buf + 2); - buf[4] = crc8(hdc3020_crc8_table, buf + 2, 2, CRC8_INIT_VALUE); - return hdc3020_write_bytes(data, buf, 5); + return _hdc3020_write_thresh(data, reg, reg_val); } static int hdc3020_read_thresh(struct iio_dev *indio_dev, @@ -447,48 +608,60 @@ static int hdc3020_read_thresh(struct iio_dev *indio_dev, int *val, int *val2) { struct hdc3020_data *data = iio_priv(indio_dev); - u16 reg; - int ret; + u16 reg_thresh, reg_clr; + int thresh, clr, ret; - /* Select threshold register */ - if (info == IIO_EV_INFO_VALUE) { - if (dir == IIO_EV_DIR_RISING) - reg = HDC3020_R_T_RH_THRESH_HIGH; - else - reg = HDC3020_R_T_RH_THRESH_LOW; + /* Select threshold registers */ + if (dir == IIO_EV_DIR_RISING) { + reg_thresh = HDC3020_R_T_RH_THRESH_HIGH; + reg_clr = HDC3020_R_T_RH_THRESH_HIGH_CLR; } else { - if (dir == IIO_EV_DIR_RISING) - reg = HDC3020_R_T_RH_THRESH_HIGH_CLR; - else - reg = HDC3020_R_T_RH_THRESH_LOW_CLR; + reg_thresh = HDC3020_R_T_RH_THRESH_LOW; + reg_clr = HDC3020_R_T_RH_THRESH_LOW_CLR; } guard(mutex)(&data->lock); - ret = hdc3020_read_be16(data, reg); + ret = hdc3020_read_be16(data, reg_thresh); if (ret < 0) return ret; switch (chan->type) { case IIO_TEMP: - /* - * Get the temperature threshold from 9 LSBs, shift them to get - * the truncated temperature threshold representation and - * calculate the threshold according to the formula in the - * datasheet. - */ - *val = FIELD_GET(HDC3020_THRESH_TEMP_MASK, ret); - *val = *val << HDC3020_THRESH_TEMP_TRUNC_SHIFT; - *val = -2949075 + (175 * (*val)); + thresh = hdc3020_thresh_get_temp(ret); + switch (info) { + case IIO_EV_INFO_VALUE: + *val = thresh; + break; + case IIO_EV_INFO_HYSTERESIS: + ret = hdc3020_read_be16(data, reg_clr); + if (ret < 0) + return ret; + + clr = hdc3020_thresh_get_temp(ret); + *val = abs(thresh - clr); + break; + default: + return -EOPNOTSUPP; + } *val2 = 65535; return IIO_VAL_FRACTIONAL; case IIO_HUMIDITYRELATIVE: - /* - * Get the humidity threshold from 7 MSBs, shift them to get the - * truncated humidity threshold representation and calculate the - * threshold according to the formula in the datasheet. - */ - *val = FIELD_GET(HDC3020_THRESH_HUM_MASK, ret); - *val = (*val << HDC3020_THRESH_HUM_TRUNC_SHIFT) * 100; + thresh = hdc3020_thresh_get_hum(ret); + switch (info) { + case IIO_EV_INFO_VALUE: + *val = thresh; + break; + case IIO_EV_INFO_HYSTERESIS: + ret = hdc3020_read_be16(data, reg_clr); + if (ret < 0) + return ret; + + clr = hdc3020_thresh_get_hum(ret); + *val = abs(thresh - clr); + break; + default: + return -EOPNOTSUPP; + } *val2 = 65535; return IIO_VAL_FRACTIONAL; default: From ae1f7b93b52095be6776d0f34957b4f35dda44d9 Mon Sep 17 00:00:00 2001 From: Vasileios Amoiridis Date: Thu, 6 Jun 2024 23:22:53 +0200 Subject: [PATCH 149/778] iio: chemical: bme680: Fix pressure value output The IIO standard units are measured in kPa while the driver is using hPa. Apart from checking the userspace value itself, it is mentioned also in the Bosch API [1] that the pressure value is in Pascal. [1]: https://github.com/boschsensortec/BME68x_SensorAPI/blob/v4.4.8/bme68x_defs.h#L742 Fixes: 1b3bd8592780 ("iio: chemical: Add support for Bosch BME680 sensor") Signed-off-by: Vasileios Amoiridis Link: https://lore.kernel.org/r/20240606212313.207550-2-vassilisamir@gmail.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/chemical/bme680_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/chemical/bme680_core.c b/drivers/iio/chemical/bme680_core.c index ef5e0e46fd34..2c40c13fe97a 100644 --- a/drivers/iio/chemical/bme680_core.c +++ b/drivers/iio/chemical/bme680_core.c @@ -678,7 +678,7 @@ static int bme680_read_press(struct bme680_data *data, } *val = bme680_compensate_press(data, adc_press); - *val2 = 100; + *val2 = 1000; return IIO_VAL_FRACTIONAL; } From b47c0fee73a810c4503c4a94ea34858a1d865bba Mon Sep 17 00:00:00 2001 From: Vasileios Amoiridis Date: Thu, 6 Jun 2024 23:22:54 +0200 Subject: [PATCH 150/778] iio: chemical: bme680: Fix calibration data variable According to the BME68x Sensor API [1], the h6 calibration data variable should be an unsigned integer of size 8. [1]: https://github.com/boschsensortec/BME68x_SensorAPI/blob/v4.4.8/bme68x_defs.h#L789 Fixes: 1b3bd8592780 ("iio: chemical: Add support for Bosch BME680 sensor") Signed-off-by: Vasileios Amoiridis Link: https://lore.kernel.org/r/20240606212313.207550-3-vassilisamir@gmail.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/chemical/bme680_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/chemical/bme680_core.c b/drivers/iio/chemical/bme680_core.c index 2c40c13fe97a..812829841733 100644 --- a/drivers/iio/chemical/bme680_core.c +++ b/drivers/iio/chemical/bme680_core.c @@ -38,7 +38,7 @@ struct bme680_calib { s8 par_h3; s8 par_h4; s8 par_h5; - s8 par_h6; + u8 par_h6; s8 par_h7; s8 par_gh1; s16 par_gh2; From fdd478c3ae98c3f13628e110dce9b6cfb0d9b3c8 Mon Sep 17 00:00:00 2001 From: Vasileios Amoiridis Date: Thu, 6 Jun 2024 23:22:55 +0200 Subject: [PATCH 151/778] iio: chemical: bme680: Fix overflows in compensate() functions There are cases in the compensate functions of the driver that there could be overflows of variables due to bit shifting ops. These implications were initially discussed here [1] and they were mentioned in log message of Commit 1b3bd8592780 ("iio: chemical: Add support for Bosch BME680 sensor"). [1]: https://lore.kernel.org/linux-iio/20180728114028.3c1bbe81@archlinux/ Fixes: 1b3bd8592780 ("iio: chemical: Add support for Bosch BME680 sensor") Signed-off-by: Vasileios Amoiridis Link: https://lore.kernel.org/r/20240606212313.207550-4-vassilisamir@gmail.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/chemical/bme680_core.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/iio/chemical/bme680_core.c b/drivers/iio/chemical/bme680_core.c index 812829841733..5db48f6d646c 100644 --- a/drivers/iio/chemical/bme680_core.c +++ b/drivers/iio/chemical/bme680_core.c @@ -342,10 +342,10 @@ static s16 bme680_compensate_temp(struct bme680_data *data, if (!calib->par_t2) bme680_read_calib(data, calib); - var1 = (adc_temp >> 3) - (calib->par_t1 << 1); + var1 = (adc_temp >> 3) - ((s32)calib->par_t1 << 1); var2 = (var1 * calib->par_t2) >> 11; var3 = ((var1 >> 1) * (var1 >> 1)) >> 12; - var3 = (var3 * (calib->par_t3 << 4)) >> 14; + var3 = (var3 * ((s32)calib->par_t3 << 4)) >> 14; data->t_fine = var2 + var3; calc_temp = (data->t_fine * 5 + 128) >> 8; @@ -368,9 +368,9 @@ static u32 bme680_compensate_press(struct bme680_data *data, var1 = (data->t_fine >> 1) - 64000; var2 = ((((var1 >> 2) * (var1 >> 2)) >> 11) * calib->par_p6) >> 2; var2 = var2 + (var1 * calib->par_p5 << 1); - var2 = (var2 >> 2) + (calib->par_p4 << 16); + var2 = (var2 >> 2) + ((s32)calib->par_p4 << 16); var1 = (((((var1 >> 2) * (var1 >> 2)) >> 13) * - (calib->par_p3 << 5)) >> 3) + + ((s32)calib->par_p3 << 5)) >> 3) + ((calib->par_p2 * var1) >> 1); var1 = var1 >> 18; var1 = ((32768 + var1) * calib->par_p1) >> 15; @@ -388,7 +388,7 @@ static u32 bme680_compensate_press(struct bme680_data *data, var3 = ((press_comp >> 8) * (press_comp >> 8) * (press_comp >> 8) * calib->par_p10) >> 17; - press_comp += (var1 + var2 + var3 + (calib->par_p7 << 7)) >> 4; + press_comp += (var1 + var2 + var3 + ((s32)calib->par_p7 << 7)) >> 4; return press_comp; } @@ -414,7 +414,7 @@ static u32 bme680_compensate_humid(struct bme680_data *data, (((temp_scaled * ((temp_scaled * calib->par_h5) / 100)) >> 6) / 100) + (1 << 14))) >> 10; var3 = var1 * var2; - var4 = calib->par_h6 << 7; + var4 = (s32)calib->par_h6 << 7; var4 = (var4 + ((temp_scaled * calib->par_h7) / 100)) >> 4; var5 = ((var3 >> 14) * (var3 >> 14)) >> 10; var6 = (var4 * var5) >> 1; From 4241665e6ea063a9c1d734de790121a71db763fc Mon Sep 17 00:00:00 2001 From: Vasileios Amoiridis Date: Thu, 6 Jun 2024 23:22:56 +0200 Subject: [PATCH 152/778] iio: chemical: bme680: Fix sensor data read operation A read operation is happening as follows: a) Set sensor to forced mode b) Sensor measures values and update data registers and sleeps again c) Read data registers In the current implementation the read operation happens immediately after the sensor is set to forced mode so the sensor does not have the time to update properly the registers. This leads to the following 2 problems: 1) The first ever value which is read by the register is always wrong 2) Every read operation, puts the register into forced mode and reads the data that were calculated in the previous conversion. This behaviour was tested in 2 ways: 1) The internal meas_status_0 register was read before and after every read operation in order to verify that the data were ready even before the register was set to forced mode and also to check that after the forced mode was set the new data were not yet ready. 2) Physically changing the temperature and measuring the temperature This commit adds the waiting time in between the set of the forced mode and the read of the data. The function is taken from the Bosch BME68x Sensor API [1]. [1]: https://github.com/boschsensortec/BME68x_SensorAPI/blob/v4.4.8/bme68x.c#L490 Fixes: 1b3bd8592780 ("iio: chemical: Add support for Bosch BME680 sensor") Signed-off-by: Vasileios Amoiridis Link: https://lore.kernel.org/r/20240606212313.207550-5-vassilisamir@gmail.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/chemical/bme680.h | 2 ++ drivers/iio/chemical/bme680_core.c | 46 ++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/drivers/iio/chemical/bme680.h b/drivers/iio/chemical/bme680.h index 4edc5d21cb9f..f959252a4fe6 100644 --- a/drivers/iio/chemical/bme680.h +++ b/drivers/iio/chemical/bme680.h @@ -54,7 +54,9 @@ #define BME680_NB_CONV_MASK GENMASK(3, 0) #define BME680_REG_MEAS_STAT_0 0x1D +#define BME680_NEW_DATA_BIT BIT(7) #define BME680_GAS_MEAS_BIT BIT(6) +#define BME680_MEAS_BIT BIT(5) /* Calibration Parameters */ #define BME680_T2_LSB_REG 0x8A diff --git a/drivers/iio/chemical/bme680_core.c b/drivers/iio/chemical/bme680_core.c index 5db48f6d646c..500f56834b01 100644 --- a/drivers/iio/chemical/bme680_core.c +++ b/drivers/iio/chemical/bme680_core.c @@ -10,6 +10,7 @@ */ #include #include +#include #include #include #include @@ -532,6 +533,43 @@ static u8 bme680_oversampling_to_reg(u8 val) return ilog2(val) + 1; } +/* + * Taken from Bosch BME680 API: + * https://github.com/boschsensortec/BME68x_SensorAPI/blob/v4.4.8/bme68x.c#L490 + */ +static int bme680_wait_for_eoc(struct bme680_data *data) +{ + struct device *dev = regmap_get_device(data->regmap); + unsigned int check; + int ret; + /* + * (Sum of oversampling ratios * time per oversampling) + + * TPH measurement + gas measurement + wait transition from forced mode + * + heater duration + */ + int wait_eoc_us = ((data->oversampling_temp + data->oversampling_press + + data->oversampling_humid) * 1936) + (477 * 4) + + (477 * 5) + 1000 + (data->heater_dur * 1000); + + usleep_range(wait_eoc_us, wait_eoc_us + 100); + + ret = regmap_read(data->regmap, BME680_REG_MEAS_STAT_0, &check); + if (ret) { + dev_err(dev, "failed to read measurement status register.\n"); + return ret; + } + if (check & BME680_MEAS_BIT) { + dev_err(dev, "Device measurement cycle incomplete.\n"); + return -EBUSY; + } + if (!(check & BME680_NEW_DATA_BIT)) { + dev_err(dev, "No new data available from the device.\n"); + return -ENODATA; + } + + return 0; +} + static int bme680_chip_config(struct bme680_data *data) { struct device *dev = regmap_get_device(data->regmap); @@ -622,6 +660,10 @@ static int bme680_read_temp(struct bme680_data *data, int *val) if (ret < 0) return ret; + ret = bme680_wait_for_eoc(data); + if (ret) + return ret; + ret = regmap_bulk_read(data->regmap, BME680_REG_TEMP_MSB, &tmp, 3); if (ret < 0) { @@ -738,6 +780,10 @@ static int bme680_read_gas(struct bme680_data *data, if (ret < 0) return ret; + ret = bme680_wait_for_eoc(data); + if (ret) + return ret; + ret = regmap_read(data->regmap, BME680_REG_MEAS_STAT_0, &check); if (check & BME680_GAS_MEAS_BIT) { dev_err(dev, "gas measurement incomplete\n"); From 0579f27249047006a818e463ee66a6c314d04cea Mon Sep 17 00:00:00 2001 From: Sagar Cheluvegowda Date: Wed, 5 Jun 2024 11:57:18 -0700 Subject: [PATCH 153/778] net: stmmac: dwmac-qcom-ethqos: Configure host DMA width Commit 070246e4674b ("net: stmmac: Fix for mismatched host/device DMA address width") added support in the stmmac driver for platform drivers to indicate the host DMA width, but left it up to authors of the specific platforms to indicate if their width differed from the addr64 register read from the MAC itself. Qualcomm's EMAC4 integration supports only up to 36 bit width (as opposed to the addr64 register indicating 40 bit width). Let's indicate that in the platform driver to avoid a scenario where the driver will allocate descriptors of size that is supported by the CPU which in our case is 36 bit, but as the addr64 register is still capable of 40 bits the device will use two descriptors as one address. Fixes: 8c4d92e82d50 ("net: stmmac: dwmac-qcom-ethqos: add support for emac4 on sa8775p platforms") Signed-off-by: Sagar Cheluvegowda Reviewed-by: Simon Horman Reviewed-by: Andrew Halaney Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index e254b21fdb59..65d7370b47d5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -93,6 +93,7 @@ struct ethqos_emac_driver_data { bool has_emac_ge_3; const char *link_clk_name; bool has_integrated_pcs; + u32 dma_addr_width; struct dwmac4_addrs dwmac4_addrs; }; @@ -276,6 +277,7 @@ static const struct ethqos_emac_driver_data emac_v4_0_0_data = { .has_emac_ge_3 = true, .link_clk_name = "phyaux", .has_integrated_pcs = true, + .dma_addr_width = 36, .dwmac4_addrs = { .dma_chan = 0x00008100, .dma_chan_offset = 0x1000, @@ -845,6 +847,8 @@ static int qcom_ethqos_probe(struct platform_device *pdev) plat_dat->flags |= STMMAC_FLAG_RX_CLK_RUNS_IN_LPI; if (data->has_integrated_pcs) plat_dat->flags |= STMMAC_FLAG_HAS_INTEGRATED_PCS; + if (data->dma_addr_width) + plat_dat->host_dma_width = data->dma_addr_width; if (ethqos->serdes_phy) { plat_dat->serdes_powerup = qcom_ethqos_serdes_powerup; From 799d4b392417ed6889030a5b2335ccb6dcf030ab Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 25 Apr 2024 11:48:51 +0200 Subject: [PATCH 154/778] drm/exynos: hdmi: report safe 640x480 mode as a fallback when no EDID found When reading EDID fails and driver reports no modes available, the DRM core adds an artificial 1024x786 mode to the connector. Unfortunately some variants of the Exynos HDMI (like the one in Exynos4 SoCs) are not able to drive such mode, so report a safe 640x480 mode instead of nothing in case of the EDID reading failure. This fixes the following issue observed on Trats2 board since commit 13d5b040363c ("drm/exynos: do not return negative values from .get_modes()"): [drm] Exynos DRM: using 11c00000.fimd device for DMA mapping operations exynos-drm exynos-drm: bound 11c00000.fimd (ops fimd_component_ops) exynos-drm exynos-drm: bound 12c10000.mixer (ops mixer_component_ops) exynos-dsi 11c80000.dsi: [drm:samsung_dsim_host_attach] Attached s6e8aa0 device (lanes:4 bpp:24 mode-flags:0x10b) exynos-drm exynos-drm: bound 11c80000.dsi (ops exynos_dsi_component_ops) exynos-drm exynos-drm: bound 12d00000.hdmi (ops hdmi_component_ops) [drm] Initialized exynos 1.1.0 20180330 for exynos-drm on minor 1 exynos-hdmi 12d00000.hdmi: [drm:hdmiphy_enable.part.0] *ERROR* PLL could not reach steady state panel-samsung-s6e8aa0 11c80000.dsi.0: ID: 0xa2, 0x20, 0x8c exynos-mixer 12c10000.mixer: timeout waiting for VSYNC ------------[ cut here ]------------ WARNING: CPU: 1 PID: 11 at drivers/gpu/drm/drm_atomic_helper.c:1682 drm_atomic_helper_wait_for_vblanks.part.0+0x2b0/0x2b8 [CRTC:70:crtc-1] vblank wait timed out Modules linked in: CPU: 1 PID: 11 Comm: kworker/u16:0 Not tainted 6.9.0-rc5-next-20240424 #14913 Hardware name: Samsung Exynos (Flattened Device Tree) Workqueue: events_unbound deferred_probe_work_func Call trace: unwind_backtrace from show_stack+0x10/0x14 show_stack from dump_stack_lvl+0x68/0x88 dump_stack_lvl from __warn+0x7c/0x1c4 __warn from warn_slowpath_fmt+0x11c/0x1a8 warn_slowpath_fmt from drm_atomic_helper_wait_for_vblanks.part.0+0x2b0/0x2b8 drm_atomic_helper_wait_for_vblanks.part.0 from drm_atomic_helper_commit_tail_rpm+0x7c/0x8c drm_atomic_helper_commit_tail_rpm from commit_tail+0x9c/0x184 commit_tail from drm_atomic_helper_commit+0x168/0x190 drm_atomic_helper_commit from drm_atomic_commit+0xb4/0xe0 drm_atomic_commit from drm_client_modeset_commit_atomic+0x23c/0x27c drm_client_modeset_commit_atomic from drm_client_modeset_commit_locked+0x60/0x1cc drm_client_modeset_commit_locked from drm_client_modeset_commit+0x24/0x40 drm_client_modeset_commit from __drm_fb_helper_restore_fbdev_mode_unlocked+0x9c/0xc4 __drm_fb_helper_restore_fbdev_mode_unlocked from drm_fb_helper_set_par+0x2c/0x3c drm_fb_helper_set_par from fbcon_init+0x3d8/0x550 fbcon_init from visual_init+0xc0/0x108 visual_init from do_bind_con_driver+0x1b8/0x3a4 do_bind_con_driver from do_take_over_console+0x140/0x1ec do_take_over_console from do_fbcon_takeover+0x70/0xd0 do_fbcon_takeover from fbcon_fb_registered+0x19c/0x1ac fbcon_fb_registered from register_framebuffer+0x190/0x21c register_framebuffer from __drm_fb_helper_initial_config_and_unlock+0x350/0x574 __drm_fb_helper_initial_config_and_unlock from exynos_drm_fbdev_client_hotplug+0x6c/0xb0 exynos_drm_fbdev_client_hotplug from drm_client_register+0x58/0x94 drm_client_register from exynos_drm_bind+0x160/0x190 exynos_drm_bind from try_to_bring_up_aggregate_device+0x200/0x2d8 try_to_bring_up_aggregate_device from __component_add+0xb0/0x170 __component_add from mixer_probe+0x74/0xcc mixer_probe from platform_probe+0x5c/0xb8 platform_probe from really_probe+0xe0/0x3d8 really_probe from __driver_probe_device+0x9c/0x1e4 __driver_probe_device from driver_probe_device+0x30/0xc0 driver_probe_device from __device_attach_driver+0xa8/0x120 __device_attach_driver from bus_for_each_drv+0x80/0xcc bus_for_each_drv from __device_attach+0xac/0x1fc __device_attach from bus_probe_device+0x8c/0x90 bus_probe_device from deferred_probe_work_func+0x98/0xe0 deferred_probe_work_func from process_one_work+0x240/0x6d0 process_one_work from worker_thread+0x1a0/0x3f4 worker_thread from kthread+0x104/0x138 kthread from ret_from_fork+0x14/0x28 Exception stack(0xf0895fb0 to 0xf0895ff8) ... irq event stamp: 82357 hardirqs last enabled at (82363): [] vprintk_emit+0x308/0x33c hardirqs last disabled at (82368): [] vprintk_emit+0x2bc/0x33c softirqs last enabled at (81614): [] __do_softirq+0x320/0x500 softirqs last disabled at (81609): [] __irq_exit_rcu+0x130/0x184 ---[ end trace 0000000000000000 ]--- exynos-drm exynos-drm: [drm] *ERROR* flip_done timed out exynos-drm exynos-drm: [drm] *ERROR* [CRTC:70:crtc-1] commit wait timed out exynos-drm exynos-drm: [drm] *ERROR* flip_done timed out exynos-drm exynos-drm: [drm] *ERROR* [CONNECTOR:74:HDMI-A-1] commit wait timed out exynos-drm exynos-drm: [drm] *ERROR* flip_done timed out exynos-drm exynos-drm: [drm] *ERROR* [PLANE:56:plane-5] commit wait timed out exynos-mixer 12c10000.mixer: timeout waiting for VSYNC Cc: stable@vger.kernel.org Fixes: 13d5b040363c ("drm/exynos: do not return negative values from .get_modes()") Signed-off-by: Marek Szyprowski Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_hdmi.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_hdmi.c b/drivers/gpu/drm/exynos/exynos_hdmi.c index e968824a4c72..1e26cd4f8347 100644 --- a/drivers/gpu/drm/exynos/exynos_hdmi.c +++ b/drivers/gpu/drm/exynos/exynos_hdmi.c @@ -887,11 +887,11 @@ static int hdmi_get_modes(struct drm_connector *connector) int ret; if (!hdata->ddc_adpt) - return 0; + goto no_edid; edid = drm_get_edid(connector, hdata->ddc_adpt); if (!edid) - return 0; + goto no_edid; hdata->dvi_mode = !connector->display_info.is_hdmi; DRM_DEV_DEBUG_KMS(hdata->dev, "%s : width[%d] x height[%d]\n", @@ -906,6 +906,9 @@ static int hdmi_get_modes(struct drm_connector *connector) kfree(edid); return ret; + +no_edid: + return drm_add_modes_noedid(connector, 640, 480); } static int hdmi_find_phy_conf(struct hdmi_context *hdata, u32 pixel_clock) From 1f3512cdf8299f9edaea9046d53ea324a7730bab Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 4 Jun 2024 15:11:29 +0200 Subject: [PATCH 155/778] drm/exynos: dp: drop driver owner initialization Core in platform_driver_register() already sets the .owner, so driver does not need to. Whatever is set here will be anyway overwritten by main driver calling platform_driver_register(). Signed-off-by: Krzysztof Kozlowski Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_dp.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/exynos/exynos_dp.c b/drivers/gpu/drm/exynos/exynos_dp.c index f48c4343f469..3e6d4c6aa877 100644 --- a/drivers/gpu/drm/exynos/exynos_dp.c +++ b/drivers/gpu/drm/exynos/exynos_dp.c @@ -285,7 +285,6 @@ struct platform_driver dp_driver = { .remove_new = exynos_dp_remove, .driver = { .name = "exynos-dp", - .owner = THIS_MODULE, .pm = pm_ptr(&exynos_dp_pm_ops), .of_match_table = exynos_dp_match, }, From fe8a08973a0dea9757394c5adbdc3c0a03b0b432 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Fri, 7 Jun 2024 16:32:59 -0500 Subject: [PATCH 156/778] RAS/AMD/ATL: Fix MI300 bank hash Apply the SID bits to the correct offset in the Bank value. Do this in the temporary value so they don't need to be masked off later. Fixes: 87a612375307 ("RAS/AMD/ATL: Add MI300 DRAM to normalized address translation support") Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Cc: Link: https://lore.kernel.org/r/20240607-mi300-dram-xl-fix-v1-1-2f11547a178c@amd.com --- drivers/ras/amd/atl/umc.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/ras/amd/atl/umc.c b/drivers/ras/amd/atl/umc.c index 59b6169093f7..5cb92330dc67 100644 --- a/drivers/ras/amd/atl/umc.c +++ b/drivers/ras/amd/atl/umc.c @@ -189,16 +189,11 @@ static unsigned long convert_dram_to_norm_addr_mi300(unsigned long addr) /* Calculate hash for PC bit. */ if (addr_hash.pc.xor_enable) { - /* Bits SID[1:0] act as Bank[6:5] for PC hash, so apply them here. */ - bank |= sid << 5; - temp = bitwise_xor_bits(col & addr_hash.pc.col_xor); temp ^= bitwise_xor_bits(row & addr_hash.pc.row_xor); - temp ^= bitwise_xor_bits(bank & addr_hash.bank_xor); + /* Bits SID[1:0] act as Bank[5:4] for PC hash, so apply them here. */ + temp ^= bitwise_xor_bits((bank | sid << NUM_BANK_BITS) & addr_hash.bank_xor); pc ^= temp; - - /* Drop SID bits for the sake of debug printing later. */ - bank &= 0x1F; } /* Reconstruct the normalized address starting with NA[4:0] = 0 */ From 38e3825631b1f314b21e3ade00b5a4d737eb054e Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 30 May 2024 13:01:51 +0300 Subject: [PATCH 157/778] drm/exynos/vidi: fix memory leak in .get_modes() The duplicated EDID is never freed. Fix it. Cc: stable@vger.kernel.org Signed-off-by: Jani Nikula Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_vidi.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_vidi.c b/drivers/gpu/drm/exynos/exynos_drm_vidi.c index fab135308b70..11a720fef32b 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_vidi.c +++ b/drivers/gpu/drm/exynos/exynos_drm_vidi.c @@ -309,6 +309,7 @@ static int vidi_get_modes(struct drm_connector *connector) struct vidi_context *ctx = ctx_from_connector(connector); struct edid *edid; int edid_len; + int count; /* * the edid data comes from user side and it would be set @@ -328,7 +329,11 @@ static int vidi_get_modes(struct drm_connector *connector) drm_connector_update_edid_property(connector, edid); - return drm_add_edid_modes(connector, edid); + count = drm_add_edid_modes(connector, edid); + + kfree(edid); + + return count; } static const struct drm_connector_helper_funcs vidi_connector_helper_funcs = { From 58f880711f2ba53fd5e959875aff5b3bf6d5c32e Mon Sep 17 00:00:00 2001 From: Wengang Wang Date: Thu, 6 Jun 2024 11:11:57 -0700 Subject: [PATCH 158/778] xfs: make sure sb_fdblocks is non-negative A user with a completely full filesystem experienced an unexpected shutdown when the filesystem tried to write the superblock during runtime. kernel shows the following dmesg: [ 8.176281] XFS (dm-4): Metadata corruption detected at xfs_sb_write_verify+0x60/0x120 [xfs], xfs_sb block 0x0 [ 8.177417] XFS (dm-4): Unmount and run xfs_repair [ 8.178016] XFS (dm-4): First 128 bytes of corrupted metadata buffer: [ 8.178703] 00000000: 58 46 53 42 00 00 10 00 00 00 00 00 01 90 00 00 XFSB............ [ 8.179487] 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ [ 8.180312] 00000020: cf 12 dc 89 ca 26 45 29 92 e6 e3 8d 3b b8 a2 c3 .....&E)....;... [ 8.181150] 00000030: 00 00 00 00 01 00 00 06 00 00 00 00 00 00 00 80 ................ [ 8.182003] 00000040: 00 00 00 00 00 00 00 81 00 00 00 00 00 00 00 82 ................ [ 8.182004] 00000050: 00 00 00 01 00 64 00 00 00 00 00 04 00 00 00 00 .....d.......... [ 8.182004] 00000060: 00 00 64 00 b4 a5 02 00 02 00 00 08 00 00 00 00 ..d............. [ 8.182005] 00000070: 00 00 00 00 00 00 00 00 0c 09 09 03 17 00 00 19 ................ [ 8.182008] XFS (dm-4): Corruption of in-memory data detected. Shutting down filesystem [ 8.182010] XFS (dm-4): Please unmount the filesystem and rectify the problem(s) When xfs_log_sb writes super block to disk, b_fdblocks is fetched from m_fdblocks without any lock. As m_fdblocks can experience a positive -> negative -> positive changing when the FS reaches fullness (see xfs_mod_fdblocks). So there is a chance that sb_fdblocks is negative, and because sb_fdblocks is type of unsigned long long, it reads super big. And sb_fdblocks being bigger than sb_dblocks is a problem during log recovery, xfs_validate_sb_write() complains. Fix: As sb_fdblocks will be re-calculated during mount when lazysbcount is enabled, We just need to make xfs_validate_sb_write() happy -- make sure sb_fdblocks is not nenative. This patch also takes care of other percpu counters in xfs_log_sb. Signed-off-by: Wengang Wang Reviewed-by: Darrick J. Wong Signed-off-by: Chandan Babu R --- fs/xfs/libxfs/xfs_sb.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 09e4bf949bf8..6b56f0f6d4c1 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -1038,11 +1038,12 @@ xfs_log_sb( * and hence we don't need have to update it here. */ if (xfs_has_lazysbcount(mp)) { - mp->m_sb.sb_icount = percpu_counter_sum(&mp->m_icount); + mp->m_sb.sb_icount = percpu_counter_sum_positive(&mp->m_icount); mp->m_sb.sb_ifree = min_t(uint64_t, - percpu_counter_sum(&mp->m_ifree), + percpu_counter_sum_positive(&mp->m_ifree), mp->m_sb.sb_icount); - mp->m_sb.sb_fdblocks = percpu_counter_sum(&mp->m_fdblocks); + mp->m_sb.sb_fdblocks = + percpu_counter_sum_positive(&mp->m_fdblocks); } xfs_sb_to_disk(bp->b_addr, &mp->m_sb); From f74fb5df429ebc6a614dc5aa9e44d7194d402e5a Mon Sep 17 00:00:00 2001 From: Tobias Jakobi Date: Sun, 10 Mar 2024 23:04:00 +0100 Subject: [PATCH 159/778] drm: panel-orientation-quirks: Add quirk for Aya Neo KUN Similar to the other Aya Neo devices this one features again a portrait screen, here with a native resolution of 1600x2560. Signed-off-by: Tobias Jakobi Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede Link: https://patchwork.freedesktop.org/patch/msgid/20240310220401.895591-1-tjakobi@math.uni-bielefeld.de --- drivers/gpu/drm/drm_panel_orientation_quirks.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c index aa93129c3397..2166208a961d 100644 --- a/drivers/gpu/drm/drm_panel_orientation_quirks.c +++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c @@ -202,6 +202,12 @@ static const struct dmi_system_id orientation_data[] = { DMI_MATCH(DMI_BOARD_NAME, "NEXT"), }, .driver_data = (void *)&lcd800x1280_rightside_up, + }, { /* AYA NEO KUN */ + .matches = { + DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "AYANEO"), + DMI_MATCH(DMI_BOARD_NAME, "KUN"), + }, + .driver_data = (void *)&lcd1600x2560_rightside_up, }, { /* Chuwi HiBook (CWI514) */ .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "Hampoo"), From 5add2f7288468f35a374620dabf126c13baaea9c Mon Sep 17 00:00:00 2001 From: David Wei Date: Thu, 6 Jun 2024 07:59:08 -0700 Subject: [PATCH 160/778] netdevsim: fix backwards compatibility in nsim_get_iflink() The default ndo_get_iflink() implementation returns the current ifindex of the netdev. But the overridden nsim_get_iflink() returns 0 if the current nsim is not linked, breaking backwards compatibility for userspace that depend on this behaviour. Fix the problem by returning the current ifindex if not linked to a peer. Fixes: 8debcf5832c3 ("netdevsim: add ndo_get_iflink() implementation") Reported-by: Yu Watanabe Suggested-by: Yu Watanabe Signed-off-by: David Wei Signed-off-by: David S. Miller --- drivers/net/netdevsim/netdev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index c22897bf5509..017a6102be0a 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -324,7 +324,8 @@ static int nsim_get_iflink(const struct net_device *dev) rcu_read_lock(); peer = rcu_dereference(nsim->peer); - iflink = peer ? READ_ONCE(peer->netdev->ifindex) : 0; + iflink = peer ? READ_ONCE(peer->netdev->ifindex) : + READ_ONCE(dev->ifindex); rcu_read_unlock(); return iflink; From e3cf20e5c68df604315ab30bdbe15dc8a5da556b Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 4 Jun 2024 22:32:34 +0100 Subject: [PATCH 161/778] ARM: 9405/1: ftrace: Don't assume stack frames are contiguous in memory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The frame pointer unwinder relies on a standard layout of the stack frame, consisting of (in downward order) Calling frame: PC <---------+ LR | SP | FP | .. locals .. | Callee frame: | PC | LR | SP | FP ----------+ where after storing its previous value on the stack, FP is made to point at the location of PC in the callee stack frame, using the canonical prologue: mov ip, sp stmdb sp!, {fp, ip, lr, pc} sub fp, ip, #4 The ftrace code assumes that this activation record is pushed first, and that any stack space for locals is allocated below this. Strict adherence to this would imply that the caller's value of SP at the time of the function call can always be obtained by adding 4 to FP (which points to PC in the callee frame). However, recent versions of GCC appear to deviate from this rule, and so the only reliable way to obtain the caller's value of SP is to read it from the activation record. Since this involves a read from memory rather than simple arithmetic, we need to use the uaccess API here which protects against inadvertent data aborts resulting from attempts to dereference bogus FP values. The plain uaccess API is ftrace instrumented itself, so to avoid unbounded recursion, use the __get_kernel_nofault() primitive directly. Closes: https://lore.kernel.org/all/alp44tukzo6mvcwl4ke4ehhmojrqnv6xfcdeuliybxfjfvgd3e@gpjvwj33cc76 Closes: https://lore.kernel.org/all/d870c149-4363-43de-b0ea-7125dec5608e@broadcom.com/ Reported-by: Uwe Kleine-König Reported-by: Justin Chen Tested-by: Thorsten Scherer Reviewed-by: Linus Walleij Signed-off-by: Ard Biesheuvel Signed-off-by: Russell King (Oracle) --- arch/arm/kernel/ftrace.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c index a0b6d1e3812f..e61591f33a6c 100644 --- a/arch/arm/kernel/ftrace.c +++ b/arch/arm/kernel/ftrace.c @@ -232,11 +232,24 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, unsigned long old; if (unlikely(atomic_read(¤t->tracing_graph_pause))) +err_out: return; if (IS_ENABLED(CONFIG_UNWINDER_FRAME_POINTER)) { - /* FP points one word below parent's top of stack */ - frame_pointer += 4; + /* + * Usually, the stack frames are contiguous in memory but cases + * have been observed where the next stack frame does not live + * at 'frame_pointer + 4' as this code used to assume. + * + * Instead, dereference the field in the stack frame that + * stores the SP of the calling frame: to avoid unbounded + * recursion, this cannot involve any ftrace instrumented + * functions, so use the __get_kernel_nofault() primitive + * directly. + */ + __get_kernel_nofault(&frame_pointer, + (unsigned long *)(frame_pointer - 8), + unsigned long, err_out); } else { struct stackframe frame = { .fp = frame_pointer, From b880018edd3a577e50366338194dee9b899947e0 Mon Sep 17 00:00:00 2001 From: Amjad Ouled-Ameur Date: Mon, 10 Jun 2024 11:20:56 +0100 Subject: [PATCH 162/778] drm/komeda: check for error-valued pointer komeda_pipeline_get_state() may return an error-valued pointer, thus check the pointer for negative or null value before dereferencing. Fixes: 502932a03fce ("drm/komeda: Add the initial scaler support for CORE") Signed-off-by: Amjad Ouled-Ameur Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20240610102056.40406-1-amjad.ouled-ameur@arm.com --- drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c index f3e744172673..f4e76b46ca32 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c +++ b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c @@ -259,7 +259,7 @@ komeda_component_get_avail_scaler(struct komeda_component *c, u32 avail_scalers; pipe_st = komeda_pipeline_get_state(c->pipeline, state); - if (!pipe_st) + if (IS_ERR_OR_NULL(pipe_st)) return NULL; avail_scalers = (pipe_st->active_comps & KOMEDA_PIPELINE_SCALERS) ^ From ce62600c4dbee8d43b02277669dd91785a9b81d9 Mon Sep 17 00:00:00 2001 From: Adam Miotk Date: Mon, 10 Jun 2024 11:27:39 +0100 Subject: [PATCH 163/778] drm/bridge/panel: Fix runtime warning on panel bridge release Device managed panel bridge wrappers are created by calling to drm_panel_bridge_add_typed() and registering a release handler for clean-up when the device gets unbound. Since the memory for this bridge is also managed and linked to the panel device, the release function should not try to free that memory. Moreover, the call to devm_kfree() inside drm_panel_bridge_remove() will fail in this case and emit a warning because the panel bridge resource is no longer on the device resources list (it has been removed from there before the call to release handlers). Fixes: 67022227ffb1 ("drm/bridge: Add a devm_ allocator for panel bridge.") Signed-off-by: Adam Miotk Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20240610102739.139852-1-adam.miotk@arm.com --- drivers/gpu/drm/bridge/panel.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/bridge/panel.c b/drivers/gpu/drm/bridge/panel.c index 32506524d9a2..fe5fb08c9fc4 100644 --- a/drivers/gpu/drm/bridge/panel.c +++ b/drivers/gpu/drm/bridge/panel.c @@ -360,9 +360,12 @@ EXPORT_SYMBOL(drm_panel_bridge_set_orientation); static void devm_drm_panel_bridge_release(struct device *dev, void *res) { - struct drm_bridge **bridge = res; + struct drm_bridge *bridge = *(struct drm_bridge **)res; - drm_panel_bridge_remove(*bridge); + if (!bridge) + return; + + drm_bridge_remove(bridge); } /** From 97ab304ecd95c0b1703ff8c8c3956dc6e2afe8e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Amadeusz=20S=C5=82awi=C5=84ski?= Date: Mon, 3 Jun 2024 12:28:15 +0200 Subject: [PATCH 164/778] ASoC: topology: Fix references to freed memory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Most users after parsing a topology file, release memory used by it, so having pointer references directly into topology file contents is wrong. Use devm_kmemdup(), to allocate memory as needed. Reported-by: Jason Montleon Link: https://github.com/thesofproject/avs-topology-xml/issues/22#issuecomment-2127892605 Reviewed-by: Cezary Rojewski Signed-off-by: Amadeusz Sławiński Link: https://lore.kernel.org/r/20240603102818.36165-2-amadeuszx.slawinski@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/soc-topology.c | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c index 90ca37e008b3..75d9395a18ed 100644 --- a/sound/soc/soc-topology.c +++ b/sound/soc/soc-topology.c @@ -1060,15 +1060,32 @@ static int soc_tplg_dapm_graph_elems_load(struct soc_tplg *tplg, break; } - route->source = elem->source; - route->sink = elem->sink; + route->source = devm_kmemdup(tplg->dev, elem->source, + min(strlen(elem->source), + SNDRV_CTL_ELEM_ID_NAME_MAXLEN), + GFP_KERNEL); + route->sink = devm_kmemdup(tplg->dev, elem->sink, + min(strlen(elem->sink), SNDRV_CTL_ELEM_ID_NAME_MAXLEN), + GFP_KERNEL); + if (!route->source || !route->sink) { + ret = -ENOMEM; + break; + } /* set to NULL atm for tplg users */ route->connected = NULL; - if (strnlen(elem->control, SNDRV_CTL_ELEM_ID_NAME_MAXLEN) == 0) + if (strnlen(elem->control, SNDRV_CTL_ELEM_ID_NAME_MAXLEN) == 0) { route->control = NULL; - else - route->control = elem->control; + } else { + route->control = devm_kmemdup(tplg->dev, elem->control, + min(strlen(elem->control), + SNDRV_CTL_ELEM_ID_NAME_MAXLEN), + GFP_KERNEL); + if (!route->control) { + ret = -ENOMEM; + break; + } + } /* add route dobj to dobj_list */ route->dobj.type = SND_SOC_DOBJ_GRAPH; From fd660b1bd015e5aa9a558ee04088f2431010548d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Amadeusz=20S=C5=82awi=C5=84ski?= Date: Mon, 3 Jun 2024 12:28:16 +0200 Subject: [PATCH 165/778] ASoC: Intel: avs: Fix route override MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of overriding existing memory strings that may be too short, just allocate needed memory and point the route at it. Reported-by: Jason Montleon Link: https://github.com/thesofproject/avs-topology-xml/issues/22#issuecomment-2127892605 Reviewed-by: Cezary Rojewski Signed-off-by: Amadeusz Sławiński Link: https://lore.kernel.org/r/20240603102818.36165-3-amadeuszx.slawinski@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/intel/avs/topology.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/sound/soc/intel/avs/topology.c b/sound/soc/intel/avs/topology.c index 02bae207f6ec..b6c5d94a1554 100644 --- a/sound/soc/intel/avs/topology.c +++ b/sound/soc/intel/avs/topology.c @@ -1545,8 +1545,8 @@ static int avs_route_load(struct snd_soc_component *comp, int index, { struct snd_soc_acpi_mach *mach = dev_get_platdata(comp->card->dev); size_t len = SNDRV_CTL_ELEM_ID_NAME_MAXLEN; - char buf[SNDRV_CTL_ELEM_ID_NAME_MAXLEN]; int ssp_port, tdm_slot; + char *buf; /* See parse_link_formatted_string() for dynamic naming when(s). */ if (!avs_mach_singular_ssp(mach)) @@ -1557,13 +1557,24 @@ static int avs_route_load(struct snd_soc_component *comp, int index, return 0; tdm_slot = avs_mach_ssp_tdm(mach, ssp_port); + buf = devm_kzalloc(comp->card->dev, len, GFP_KERNEL); + if (!buf) + return -ENOMEM; avs_ssp_sprint(buf, len, route->source, ssp_port, tdm_slot); - strscpy((char *)route->source, buf, len); + route->source = buf; + + buf = devm_kzalloc(comp->card->dev, len, GFP_KERNEL); + if (!buf) + return -ENOMEM; avs_ssp_sprint(buf, len, route->sink, ssp_port, tdm_slot); - strscpy((char *)route->sink, buf, len); + route->sink = buf; + if (route->control) { + buf = devm_kzalloc(comp->card->dev, len, GFP_KERNEL); + if (!buf) + return -ENOMEM; avs_ssp_sprint(buf, len, route->control, ssp_port, tdm_slot); - strscpy((char *)route->control, buf, len); + route->control = buf; } return 0; From daf0b99d4720c9f05bdb81c73b2efdb43fa9def3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Amadeusz=20S=C5=82awi=C5=84ski?= Date: Mon, 3 Jun 2024 12:28:17 +0200 Subject: [PATCH 166/778] ASoC: topology: Do not assign fields that are already set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The routes are allocated with kzalloc(), so all fields are zeroed by default, skip unnecessary assignments. Reviewed-by: Cezary Rojewski Signed-off-by: Amadeusz Sławiński Link: https://lore.kernel.org/r/20240603102818.36165-4-amadeuszx.slawinski@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/soc-topology.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c index 75d9395a18ed..1db540aaad45 100644 --- a/sound/soc/soc-topology.c +++ b/sound/soc/soc-topology.c @@ -1072,11 +1072,7 @@ static int soc_tplg_dapm_graph_elems_load(struct soc_tplg *tplg, break; } - /* set to NULL atm for tplg users */ - route->connected = NULL; - if (strnlen(elem->control, SNDRV_CTL_ELEM_ID_NAME_MAXLEN) == 0) { - route->control = NULL; - } else { + if (strnlen(elem->control, SNDRV_CTL_ELEM_ID_NAME_MAXLEN) != 0) { route->control = devm_kmemdup(tplg->dev, elem->control, min(strlen(elem->control), SNDRV_CTL_ELEM_ID_NAME_MAXLEN), From e0e7bc2cbee93778c4ad7d9a792d425ffb5af6f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Amadeusz=20S=C5=82awi=C5=84ski?= Date: Mon, 3 Jun 2024 12:28:18 +0200 Subject: [PATCH 167/778] ASoC: topology: Clean up route loading MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of using very long macro name, assign it to shorter variable and use it instead. While doing that, we can reduce multiple if checks using this define to one. Reviewed-by: Cezary Rojewski Signed-off-by: Amadeusz Sławiński Link: https://lore.kernel.org/r/20240603102818.36165-5-amadeuszx.slawinski@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/soc-topology.c | 26 ++++++++------------------ 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c index 1db540aaad45..2ac442644ed4 100644 --- a/sound/soc/soc-topology.c +++ b/sound/soc/soc-topology.c @@ -1021,6 +1021,7 @@ static int soc_tplg_dapm_graph_elems_load(struct soc_tplg *tplg, struct snd_soc_tplg_hdr *hdr) { struct snd_soc_dapm_context *dapm = &tplg->comp->dapm; + const size_t maxlen = SNDRV_CTL_ELEM_ID_NAME_MAXLEN; struct snd_soc_tplg_dapm_graph_elem *elem; struct snd_soc_dapm_route *route; int count, i; @@ -1044,38 +1045,27 @@ static int soc_tplg_dapm_graph_elems_load(struct soc_tplg *tplg, tplg->pos += sizeof(struct snd_soc_tplg_dapm_graph_elem); /* validate routes */ - if (strnlen(elem->source, SNDRV_CTL_ELEM_ID_NAME_MAXLEN) == - SNDRV_CTL_ELEM_ID_NAME_MAXLEN) { - ret = -EINVAL; - break; - } - if (strnlen(elem->sink, SNDRV_CTL_ELEM_ID_NAME_MAXLEN) == - SNDRV_CTL_ELEM_ID_NAME_MAXLEN) { - ret = -EINVAL; - break; - } - if (strnlen(elem->control, SNDRV_CTL_ELEM_ID_NAME_MAXLEN) == - SNDRV_CTL_ELEM_ID_NAME_MAXLEN) { + if ((strnlen(elem->source, maxlen) == maxlen) || + (strnlen(elem->sink, maxlen) == maxlen) || + (strnlen(elem->control, maxlen) == maxlen)) { ret = -EINVAL; break; } route->source = devm_kmemdup(tplg->dev, elem->source, - min(strlen(elem->source), - SNDRV_CTL_ELEM_ID_NAME_MAXLEN), + min(strlen(elem->source), maxlen), GFP_KERNEL); route->sink = devm_kmemdup(tplg->dev, elem->sink, - min(strlen(elem->sink), SNDRV_CTL_ELEM_ID_NAME_MAXLEN), + min(strlen(elem->sink), maxlen), GFP_KERNEL); if (!route->source || !route->sink) { ret = -ENOMEM; break; } - if (strnlen(elem->control, SNDRV_CTL_ELEM_ID_NAME_MAXLEN) != 0) { + if (strnlen(elem->control, maxlen) != 0) { route->control = devm_kmemdup(tplg->dev, elem->control, - min(strlen(elem->control), - SNDRV_CTL_ELEM_ID_NAME_MAXLEN), + min(strlen(elem->control), maxlen), GFP_KERNEL); if (!route->control) { ret = -ENOMEM; From e3209a1827646daaab744aa6a5767b1f57fb5385 Mon Sep 17 00:00:00 2001 From: Thomas GENTY Date: Sat, 8 Jun 2024 19:02:51 +0200 Subject: [PATCH 168/778] bytcr_rt5640 : inverse jack detect for Archos 101 cesium When headphones are plugged in, they appear absent; when they are removed, they appear present. Add a specific entry in bytcr_rt5640 for this device Signed-off-by: Thomas GENTY Reviewed-by: Hans de Goede Acked-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20240608170251.99936-1-tomlohave@gmail.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/bytcr_rt5640.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c index b41a1147f1c3..a64d1989e28a 100644 --- a/sound/soc/intel/boards/bytcr_rt5640.c +++ b/sound/soc/intel/boards/bytcr_rt5640.c @@ -610,6 +610,17 @@ static const struct dmi_system_id byt_rt5640_quirk_table[] = { BYT_RT5640_SSP0_AIF1 | BYT_RT5640_MCLK_EN), }, + { + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ARCHOS"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "ARCHOS 101 CESIUM"), + }, + .driver_data = (void *)(BYTCR_INPUT_DEFAULTS | + BYT_RT5640_JD_NOT_INV | + BYT_RT5640_DIFF_MIC | + BYT_RT5640_SSP0_AIF1 | + BYT_RT5640_MCLK_EN), + }, { .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ARCHOS"), From d37fe4255abe8e7b419b90c5847e8ec2b8debb08 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 6 Jun 2024 15:46:51 +0000 Subject: [PATCH 169/778] tcp: fix race in tcp_v6_syn_recv_sock() tcp_v6_syn_recv_sock() calls ip6_dst_store() before inet_sk(newsk)->pinet6 has been set up. This means ip6_dst_store() writes over the parent (listener) np->dst_cookie. This is racy because multiple threads could share the same parent and their final np->dst_cookie could be wrong. Move ip6_dst_store() call after inet_sk(newsk)->pinet6 has been changed and after the copy of parent ipv6_pinfo. Fixes: e994b2f0fb92 ("tcp: do not lock listener to process SYN packets") Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 8c577b651bfc..729faf8bd366 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1439,7 +1439,6 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * */ newsk->sk_gso_type = SKB_GSO_TCPV6; - ip6_dst_store(newsk, dst, NULL, NULL); inet6_sk_rx_dst_set(newsk, skb); inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk); @@ -1450,6 +1449,8 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * memcpy(newnp, np, sizeof(struct ipv6_pinfo)); + ip6_dst_store(newsk, dst, NULL, NULL); + newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr; newnp->saddr = ireq->ir_v6_loc_addr; newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr; From d029edefed39647c797c2710aedd9d31f84c069e Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 6 Jun 2024 19:13:03 +0300 Subject: [PATCH 170/778] net dsa: qca8k: fix usages of device_get_named_child_node() The documentation for device_get_named_child_node() mentions this important point: " The caller is responsible for calling fwnode_handle_put() on the returned fwnode pointer. " Add fwnode_handle_put() to avoid leaked references. Fixes: 1e264f9d2918 ("net: dsa: qca8k: add LEDs basic support") Reviewed-by: Simon Horman Signed-off-by: Andy Shevchenko Signed-off-by: David S. Miller --- drivers/net/dsa/qca/qca8k-leds.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/qca/qca8k-leds.c b/drivers/net/dsa/qca/qca8k-leds.c index 811ebeeff4ed..43ac68052baf 100644 --- a/drivers/net/dsa/qca/qca8k-leds.c +++ b/drivers/net/dsa/qca/qca8k-leds.c @@ -431,8 +431,11 @@ qca8k_parse_port_leds(struct qca8k_priv *priv, struct fwnode_handle *port, int p init_data.devicename = kasprintf(GFP_KERNEL, "%s:0%d", priv->internal_mdio_bus->id, port_num); - if (!init_data.devicename) + if (!init_data.devicename) { + fwnode_handle_put(led); + fwnode_handle_put(leds); return -ENOMEM; + } ret = devm_led_classdev_register_ext(priv->dev, &port_led->cdev, &init_data); if (ret) @@ -441,6 +444,7 @@ qca8k_parse_port_leds(struct qca8k_priv *priv, struct fwnode_handle *port, int p kfree(init_data.devicename); } + fwnode_handle_put(leds); return 0; } @@ -471,9 +475,13 @@ qca8k_setup_led_ctrl(struct qca8k_priv *priv) * the correct port for LED setup. */ ret = qca8k_parse_port_leds(priv, port, qca8k_port_to_phy(port_num)); - if (ret) + if (ret) { + fwnode_handle_put(port); + fwnode_handle_put(ports); return ret; + } } + fwnode_handle_put(ports); return 0; } From c6ae073f5903f6c6439d0ac855836a4da5c0a701 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Thu, 6 Jun 2024 23:32:48 +0300 Subject: [PATCH 171/778] geneve: Fix incorrect inner network header offset when innerprotoinherit is set When innerprotoinherit is set, the tunneled packets do not have an inner Ethernet header. Change 'maclen' to not always assume the header length is ETH_HLEN, as there might not be a MAC header. This resolves issues with drivers (e.g. mlx5, in mlx5e_tx_tunnel_accel()) who rely on the skb inner network header offset to be correct, and use it for TX offloads. Fixes: d8a6213d70ac ("geneve: fix header validation in geneve[6]_xmit_skb") Signed-off-by: Gal Pressman Signed-off-by: Tariq Toukan Reviewed-by: Wojciech Drewek Signed-off-by: David S. Miller --- drivers/net/geneve.c | 10 ++++++---- include/net/ip_tunnels.h | 5 +++-- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 51495cb4b9be..838e85ddec67 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -815,6 +815,7 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, struct geneve_dev *geneve, const struct ip_tunnel_info *info) { + bool inner_proto_inherit = geneve->cfg.inner_proto_inherit; bool xnet = !net_eq(geneve->net, dev_net(geneve->dev)); struct geneve_sock *gs4 = rcu_dereference(geneve->sock4); const struct ip_tunnel_key *key = &info->key; @@ -826,7 +827,7 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, __be16 sport; int err; - if (!skb_vlan_inet_prepare(skb)) + if (!skb_vlan_inet_prepare(skb, inner_proto_inherit)) return -EINVAL; if (!gs4) @@ -908,7 +909,7 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, } err = geneve_build_skb(&rt->dst, skb, info, xnet, sizeof(struct iphdr), - geneve->cfg.inner_proto_inherit); + inner_proto_inherit); if (unlikely(err)) return err; @@ -925,6 +926,7 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, struct geneve_dev *geneve, const struct ip_tunnel_info *info) { + bool inner_proto_inherit = geneve->cfg.inner_proto_inherit; bool xnet = !net_eq(geneve->net, dev_net(geneve->dev)); struct geneve_sock *gs6 = rcu_dereference(geneve->sock6); const struct ip_tunnel_key *key = &info->key; @@ -935,7 +937,7 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, __be16 sport; int err; - if (!skb_vlan_inet_prepare(skb)) + if (!skb_vlan_inet_prepare(skb, inner_proto_inherit)) return -EINVAL; if (!gs6) @@ -997,7 +999,7 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, ttl = ttl ? : ip6_dst_hoplimit(dst); } err = geneve_build_skb(dst, skb, info, xnet, sizeof(struct ipv6hdr), - geneve->cfg.inner_proto_inherit); + inner_proto_inherit); if (unlikely(err)) return err; diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 9a6a08ec7713..1db2417b8ff5 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -461,9 +461,10 @@ static inline bool pskb_inet_may_pull(struct sk_buff *skb) /* Variant of pskb_inet_may_pull(). */ -static inline bool skb_vlan_inet_prepare(struct sk_buff *skb) +static inline bool skb_vlan_inet_prepare(struct sk_buff *skb, + bool inner_proto_inherit) { - int nhlen = 0, maclen = ETH_HLEN; + int nhlen = 0, maclen = inner_proto_inherit ? 0 : ETH_HLEN; __be16 type = skb->protocol; /* Essentially this is skb_protocol(skb, true) From 791b4089e326271424b78f2fae778b20e53d071b Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Thu, 6 Jun 2024 23:32:49 +0300 Subject: [PATCH 172/778] net/mlx5e: Fix features validation check for tunneled UDP (non-VXLAN) packets Move the vxlan_features_check() call to after we verified the packet is a tunneled VXLAN packet. Without this, tunneled UDP non-VXLAN packets (for ex. GENENVE) might wrongly not get offloaded. In some cases, it worked by chance as GENEVE header is the same size as VXLAN, but it is obviously incorrect. Fixes: e3cfc7e6b7bd ("net/mlx5e: TX, Add geneve tunnel stateless offload support") Signed-off-by: Gal Pressman Reviewed-by: Dragos Tatulea Signed-off-by: Tariq Toukan Reviewed-by: Wojciech Drewek Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index c53c99dde558..a605eae56685 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4875,7 +4875,7 @@ static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv, /* Verify if UDP port is being offloaded by HW */ if (mlx5_vxlan_lookup_port(priv->mdev->vxlan, port)) - return features; + return vxlan_features_check(skb, features); #if IS_ENABLED(CONFIG_GENEVE) /* Support Geneve offload for default UDP port */ @@ -4901,7 +4901,6 @@ netdev_features_t mlx5e_features_check(struct sk_buff *skb, struct mlx5e_priv *priv = netdev_priv(netdev); features = vlan_features_check(skb, features); - features = vxlan_features_check(skb, features); /* Validate if the tunneled packet is being offloaded by HW */ if (skb->encapsulation && From 86fbd9f63a6b42b8f158361334f5a25762aea358 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 15 May 2024 10:32:01 -0400 Subject: [PATCH 173/778] Bluetooth: hci_sync: Fix not using correct handle When setting up an advertisement the code shall always attempt to use the handle set by the instance since it may not be equal to the instance ID. Fixes: e77f43d531af ("Bluetooth: hci_core: Fix not handling hdev->le_num_of_adv_sets=1") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_sync.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 16daa79b7981..a8a7d2b36870 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -1194,7 +1194,7 @@ int hci_setup_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance) cp.own_addr_type = own_addr_type; cp.channel_map = hdev->le_adv_channel_map; - cp.handle = instance; + cp.handle = adv ? adv->handle : instance; if (flags & MGMT_ADV_FLAG_SEC_2M) { cp.primary_phy = HCI_ADV_PHY_1M; From 806a5198c05987b748b50f3d0c0cfb3d417381a4 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 20 May 2024 16:03:07 -0400 Subject: [PATCH 174/778] Bluetooth: L2CAP: Fix rejecting L2CAP_CONN_PARAM_UPDATE_REQ This removes the bogus check for max > hcon->le_conn_max_interval since the later is just the initial maximum conn interval not the maximum the stack could support which is really 3200=4000ms. In order to pass GAP/CONN/CPUP/BV-05-C one shall probably enter values of the following fields in IXIT that would cause hci_check_conn_params to fail: TSPX_conn_update_int_min TSPX_conn_update_int_max TSPX_conn_update_peripheral_latency TSPX_conn_update_supervision_timeout Link: https://github.com/bluez/bluez/issues/847 Fixes: e4b019515f95 ("Bluetooth: Enforce validation on max value of connection interval") Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 36 ++++++++++++++++++++++++++++---- net/bluetooth/l2cap_core.c | 8 +------ 2 files changed, 33 insertions(+), 11 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 9231396fe96f..c43716edf205 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -2113,18 +2113,46 @@ static inline int hci_check_conn_params(u16 min, u16 max, u16 latency, { u16 max_latency; - if (min > max || min < 6 || max > 3200) + if (min > max) { + BT_WARN("min %d > max %d", min, max); return -EINVAL; + } - if (to_multiplier < 10 || to_multiplier > 3200) + if (min < 6) { + BT_WARN("min %d < 6", min); return -EINVAL; + } - if (max >= to_multiplier * 8) + if (max > 3200) { + BT_WARN("max %d > 3200", max); return -EINVAL; + } + + if (to_multiplier < 10) { + BT_WARN("to_multiplier %d < 10", to_multiplier); + return -EINVAL; + } + + if (to_multiplier > 3200) { + BT_WARN("to_multiplier %d > 3200", to_multiplier); + return -EINVAL; + } + + if (max >= to_multiplier * 8) { + BT_WARN("max %d >= to_multiplier %d * 8", max, to_multiplier); + return -EINVAL; + } max_latency = (to_multiplier * 4 / max) - 1; - if (latency > 499 || latency > max_latency) + if (latency > 499) { + BT_WARN("latency %d > 499", latency); return -EINVAL; + } + + if (latency > max_latency) { + BT_WARN("latency %d > max_latency %d", latency, max_latency); + return -EINVAL; + } return 0; } diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 5b509b767557..c49e0d4b3c0d 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -4647,13 +4647,7 @@ static inline int l2cap_conn_param_update_req(struct l2cap_conn *conn, memset(&rsp, 0, sizeof(rsp)); - if (max > hcon->le_conn_max_interval) { - BT_DBG("requested connection interval exceeds current bounds."); - err = -EINVAL; - } else { - err = hci_check_conn_params(min, max, latency, to_multiplier); - } - + err = hci_check_conn_params(min, max, latency, to_multiplier); if (err) rsp.result = cpu_to_le16(L2CAP_CONN_PARAM_REJECTED); else From c695439d198d30e10553a3b98360c5efe77b6903 Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Sun, 9 Jun 2024 18:06:20 +0300 Subject: [PATCH 175/778] Bluetooth: fix connection setup in l2cap_connect The amp_id argument of l2cap_connect() was removed in commit 84a4bb6548a2 ("Bluetooth: HCI: Remove HCI_AMP support") It was always called with amp_id == 0, i.e. AMP_ID_BREDR == 0x00 (ie. non-AMP controller). In the above commit, the code path for amp_id != 0 was preserved, although it should have used the amp_id == 0 one. Restore the previous behavior of the non-AMP code path, to fix problems with L2CAP connections. Fixes: 84a4bb6548a2 ("Bluetooth: HCI: Remove HCI_AMP support") Signed-off-by: Pauli Virtanen Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/l2cap_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index c49e0d4b3c0d..aed025734d04 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -4011,8 +4011,8 @@ static void l2cap_connect(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, status = L2CAP_CS_AUTHOR_PEND; chan->ops->defer(chan); } else { - l2cap_state_change(chan, BT_CONNECT2); - result = L2CAP_CR_PEND; + l2cap_state_change(chan, BT_CONFIG); + result = L2CAP_CR_SUCCESS; status = L2CAP_CS_NO_INFO; } } else { From 161f73c2c7d061a78390388811e3a6d11e99ce9d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 5 Jun 2024 11:08:20 -0400 Subject: [PATCH 176/778] bcachefs: Split out btree_write_submit_wq Split the workqueues for btree read completions and btree write submissions; we don't want concurrency control on btree read completions, but we do want concurrency control on write submissions, else blocking in submit_bio() will cause a ton of kworkers to be allocated. Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 3 ++- fs/bcachefs/btree_io.c | 8 ++++---- fs/bcachefs/super.c | 10 +++++++--- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 2a538eb2af11..2992a644d822 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -790,7 +790,8 @@ struct bch_fs { /* BTREE CACHE */ struct bio_set btree_bio; - struct workqueue_struct *io_complete_wq; + struct workqueue_struct *btree_read_complete_wq; + struct workqueue_struct *btree_write_submit_wq; struct btree_root btree_roots_known[BTREE_ID_NR]; DARRAY(struct btree_root) btree_roots_extra; diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 829c1b91477d..7bca15c604f5 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -1389,7 +1389,7 @@ static void btree_node_read_endio(struct bio *bio) bch2_latency_acct(ca, rb->start_time, READ); } - queue_work(c->io_complete_wq, &rb->work); + queue_work(c->btree_read_complete_wq, &rb->work); } struct btree_node_read_all { @@ -1656,7 +1656,7 @@ static int btree_node_read_all_replicas(struct bch_fs *c, struct btree *b, bool btree_node_read_all_replicas_done(&ra->cl.work); } else { continue_at(&ra->cl, btree_node_read_all_replicas_done, - c->io_complete_wq); + c->btree_read_complete_wq); } return 0; @@ -1737,7 +1737,7 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b, if (sync) btree_node_read_work(&rb->work); else - queue_work(c->io_complete_wq, &rb->work); + queue_work(c->btree_read_complete_wq, &rb->work); } } @@ -2229,7 +2229,7 @@ do_write: atomic64_add(bytes_to_write, &c->btree_write_stats[type].bytes); INIT_WORK(&wbio->work, btree_write_submit); - queue_work(c->io_complete_wq, &wbio->work); + queue_work(c->btree_write_submit_wq, &wbio->work); return; err: set_btree_node_noevict(b); diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index df2bea38e83f..65e239d32915 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -582,8 +582,10 @@ static void __bch2_fs_free(struct bch_fs *c) if (c->write_ref_wq) destroy_workqueue(c->write_ref_wq); - if (c->io_complete_wq) - destroy_workqueue(c->io_complete_wq); + if (c->btree_write_submit_wq) + destroy_workqueue(c->btree_write_submit_wq); + if (c->btree_read_complete_wq) + destroy_workqueue(c->btree_read_complete_wq); if (c->copygc_wq) destroy_workqueue(c->copygc_wq); if (c->btree_io_complete_wq) @@ -878,8 +880,10 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM, 1)) || !(c->copygc_wq = alloc_workqueue("bcachefs_copygc", WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) || - !(c->io_complete_wq = alloc_workqueue("bcachefs_io", + !(c->btree_read_complete_wq = alloc_workqueue("bcachefs_btree_read_complete", WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM, 512)) || + !(c->btree_write_submit_wq = alloc_workqueue("bcachefs_btree_write_sumit", + WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM, 1)) || !(c->write_ref_wq = alloc_workqueue("bcachefs_write_ref", WQ_FREEZABLE, 0)) || #ifndef BCH_WRITE_REF_DEBUG From 1c8cc24eef4a0e824f75e38f82766e4baede24ca Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 5 Jun 2024 21:16:29 -0400 Subject: [PATCH 177/778] bcachefs: Fix incorrect error handling found_btree_node_is_readable() error handling here is slightly odd, which is why we were accidently calling evict() on an error pointer Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_node_scan.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c index 45cb8149d374..2cb0442f6cc9 100644 --- a/fs/bcachefs/btree_node_scan.c +++ b/fs/bcachefs/btree_node_scan.c @@ -72,10 +72,11 @@ static bool found_btree_node_is_readable(struct btree_trans *trans, struct btree *b = bch2_btree_node_get_noiter(trans, &k.k, f->btree_id, f->level, false); bool ret = !IS_ERR_OR_NULL(b); - if (ret) { - f->sectors_written = b->written; - six_unlock_read(&b->c.lock); - } + if (!ret) + return ret; + + f->sectors_written = b->written; + six_unlock_read(&b->c.lock); /* * We might update this node's range; if that happens, we need the node From 04f635ede85b2e7457f3029b9179079a8ac42ff4 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 6 Jun 2024 14:02:11 -0400 Subject: [PATCH 178/778] bcachefs: Delete incorrect BTREE_ID_NR assertion for forwards compat we now explicitly allow mounting and using filesystems with unknown btrees, and we have to walk them for fsck. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index d3bcb4e4e230..53b63be537e5 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -221,11 +221,8 @@ static void bch2_btree_path_verify(struct btree_trans *trans, struct btree_path *path) { struct bch_fs *c = trans->c; - unsigned i; - EBUG_ON(path->btree_id >= BTREE_ID_NR); - - for (i = 0; i < (!path->cached ? BTREE_MAX_DEPTH : 1); i++) { + for (unsigned i = 0; i < (!path->cached ? BTREE_MAX_DEPTH : 1); i++) { if (!path->l[i].b) { BUG_ON(!path->cached && bch2_btree_id_root(c, path->btree_id)->b->c.level > i); @@ -251,8 +248,6 @@ static void bch2_btree_iter_verify(struct btree_iter *iter) { struct btree_trans *trans = iter->trans; - BUG_ON(iter->btree_id >= BTREE_ID_NR); - BUG_ON(!!(iter->flags & BTREE_ITER_cached) != btree_iter_path(trans, iter)->cached); BUG_ON((iter->flags & BTREE_ITER_is_extents) && From dab1870439a1176969c5bf06247e088ad0a3551d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 5 Jun 2024 21:45:24 -0400 Subject: [PATCH 179/778] bcachefs: fix stack frame size in fsck.c fsck.c always runs top of the stack so we're not too concerned here; noinline_for_stack is sufficient Signed-off-by: Kent Overstreet --- fs/bcachefs/fsck.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index fd277bd58ed3..921bcdb3e5e4 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -1677,6 +1677,7 @@ static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w) trans_was_restarted(trans, restart_count); } +noinline_for_stack static int check_dirent_inode_dirent(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c_dirent d, @@ -1773,6 +1774,7 @@ out_noiter: return ret; } +noinline_for_stack static int check_dirent_target(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c_dirent d, @@ -1847,6 +1849,7 @@ found: return ret; } +noinline_for_stack static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c_dirent d) { From 26447d224a7f48f669bf95a98fa29c8f50da4d63 Mon Sep 17 00:00:00 2001 From: Hongbo Li Date: Mon, 3 Jun 2024 21:23:35 +0800 Subject: [PATCH 180/778] bcachefs: fix the display format for show-super There are three keys displayed in non-uniform format. Let's fix them. [Before] ``` Label: testbcachefs Version: 1.9: (unknown version) Version upgrade complete: 0.0: (unknown version) ``` [After] ``` Label: testbcachefs Version: 1.9: (unknown version) Version upgrade complete: 0.0: (unknown version) ``` Fixes: 7423330e30ab ("bcachefs: prt_printf() now respects \r\n\t") Signed-off-by: Hongbo Li Signed-off-by: Kent Overstreet --- fs/bcachefs/super-io.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index d73a0222f709..055478d21e9e 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -1310,15 +1310,15 @@ void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb, prt_printf(out, "Device index:\t%u\n", sb->dev_idx); - prt_str(out, "Label:\t"); + prt_printf(out, "Label:\t"); prt_printf(out, "%.*s", (int) sizeof(sb->label), sb->label); prt_newline(out); - prt_str(out, "Version:\t"); + prt_printf(out, "Version:\t"); bch2_version_to_text(out, le16_to_cpu(sb->version)); prt_newline(out); - prt_str(out, "Version upgrade complete:\t"); + prt_printf(out, "Version upgrade complete:\t"); bch2_version_to_text(out, BCH_SB_VERSION_UPGRADE_COMPLETE(sb)); prt_newline(out); From 5ae67abcdfdfa49de84be00320ffe8a669ef674f Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 6 Jun 2024 18:56:59 -0400 Subject: [PATCH 181/778] bcachefs: Enable automatic shrinking for rhashtables Since the key cache shrinker walks the rhashtable, a mostly empty rhashtable leads to really nasty reclaim performance issues. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_cache.c | 9 +++++---- fs/bcachefs/btree_key_cache.c | 9 +++++---- fs/bcachefs/io_read.c | 7 ++++--- fs/bcachefs/movinggc.c | 7 ++++--- 4 files changed, 18 insertions(+), 14 deletions(-) diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index 9e4ed75d3675..4f5e411771ba 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -91,10 +91,11 @@ static int bch2_btree_cache_cmp_fn(struct rhashtable_compare_arg *arg, } static const struct rhashtable_params bch_btree_cache_params = { - .head_offset = offsetof(struct btree, hash), - .key_offset = offsetof(struct btree, hash_val), - .key_len = sizeof(u64), - .obj_cmpfn = bch2_btree_cache_cmp_fn, + .head_offset = offsetof(struct btree, hash), + .key_offset = offsetof(struct btree, hash_val), + .key_len = sizeof(u64), + .obj_cmpfn = bch2_btree_cache_cmp_fn, + .automatic_shrinking = true, }; static int btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp) diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index 34056aaece00..fb731d52b1ec 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -32,10 +32,11 @@ static int bch2_btree_key_cache_cmp_fn(struct rhashtable_compare_arg *arg, } static const struct rhashtable_params bch2_btree_key_cache_params = { - .head_offset = offsetof(struct bkey_cached, hash), - .key_offset = offsetof(struct bkey_cached, key), - .key_len = sizeof(struct bkey_cached_key), - .obj_cmpfn = bch2_btree_key_cache_cmp_fn, + .head_offset = offsetof(struct bkey_cached, hash), + .key_offset = offsetof(struct bkey_cached, key), + .key_len = sizeof(struct bkey_cached_key), + .obj_cmpfn = bch2_btree_key_cache_cmp_fn, + .automatic_shrinking = true, }; __flatten diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index f57486794484..862b79f86b91 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -84,9 +84,10 @@ struct promote_op { }; static const struct rhashtable_params bch_promote_params = { - .head_offset = offsetof(struct promote_op, hash), - .key_offset = offsetof(struct promote_op, pos), - .key_len = sizeof(struct bpos), + .head_offset = offsetof(struct promote_op, hash), + .key_offset = offsetof(struct promote_op, pos), + .key_len = sizeof(struct bpos), + .automatic_shrinking = true, }; static inline int should_promote(struct bch_fs *c, struct bkey_s_c k, diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index 10bfb31c151b..eb49dd045eff 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -35,9 +35,10 @@ struct buckets_in_flight { }; static const struct rhashtable_params bch_move_bucket_params = { - .head_offset = offsetof(struct move_bucket_in_flight, hash), - .key_offset = offsetof(struct move_bucket_in_flight, bucket.k), - .key_len = sizeof(struct move_bucket_key), + .head_offset = offsetof(struct move_bucket_in_flight, hash), + .key_offset = offsetof(struct move_bucket_in_flight, bucket.k), + .key_len = sizeof(struct move_bucket_key), + .automatic_shrinking = true, }; static struct move_bucket_in_flight * From bc65e98e68dac2c0b588e67ea75ee8674c208fc7 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 6 Jun 2024 19:12:11 -0400 Subject: [PATCH 182/778] bcachefs: increase key cache shrinker batch size Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_key_cache.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index fb731d52b1ec..eaf012ddca08 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -1026,9 +1026,10 @@ int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc) if (!shrink) return -BCH_ERR_ENOMEM_fs_btree_cache_init; bc->shrink = shrink; - shrink->seeks = 0; shrink->count_objects = bch2_btree_key_cache_count; shrink->scan_objects = bch2_btree_key_cache_scan; + shrink->batch = 1 << 14; + shrink->seeks = 0; shrink->private_data = c; shrinker_register(shrink); return 0; From 9ac3e660cac3e29cfc817b6a23735b70f12bd16a Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 6 Jun 2024 19:30:41 -0400 Subject: [PATCH 183/778] bcachefs: set sb->s_shrinker->seeks = 0 inodes and dentries are still present in the btree node cache, in much more compact form Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index cd388f1702dc..f6f1dbc1fe15 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1967,6 +1967,7 @@ got_sb: sb->s_time_min = div_s64(S64_MIN, c->sb.time_units_per_sec) + 1; sb->s_time_max = div_s64(S64_MAX, c->sb.time_units_per_sec); sb->s_uuid = c->sb.user_uuid; + sb->s_shrink->seeks = 0; c->vfs_sb = sb; strscpy(sb->s_id, c->name, sizeof(sb->s_id)); From 2760bfe38826f65b1806f1cc62744404b5917dea Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 6 Jun 2024 20:01:34 -0400 Subject: [PATCH 184/778] bcachefs: Fix reporting of freed objects from key cache shrinker We count objects as freed when we move them to the srcu-pending lists because we're doing the equivalent of a kfree_srcu(); the only difference is managing the pending list ourself means we can allocate from the pending list. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_key_cache.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index eaf012ddca08..2ad3d27c91e6 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -841,7 +841,6 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink, six_lock_exit(&ck->c.lock); kmem_cache_free(bch2_key_cache, ck); atomic_long_dec(&bc->nr_freed); - freed++; bc->nr_freed_nonpcpu--; bc->freed++; } @@ -855,7 +854,6 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink, six_lock_exit(&ck->c.lock); kmem_cache_free(bch2_key_cache, ck); atomic_long_dec(&bc->nr_freed); - freed++; bc->nr_freed_pcpu--; bc->freed++; } @@ -877,23 +875,22 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink, if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) { bc->skipped_dirty++; - goto next; } else if (test_bit(BKEY_CACHED_ACCESSED, &ck->flags)) { clear_bit(BKEY_CACHED_ACCESSED, &ck->flags); bc->skipped_accessed++; - goto next; - } else if (bkey_cached_lock_for_evict(ck)) { + } else if (!bkey_cached_lock_for_evict(ck)) { + bc->skipped_lock_fail++; + } else { bkey_cached_evict(bc, ck); bkey_cached_free(bc, ck); bc->moved_to_freelist++; - } else { - bc->skipped_lock_fail++; + freed++; } scanned++; if (scanned >= nr) break; -next: + pos = next; } From bf2b356afdcafa18db1b409f7039059d1fd6f25f Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 7 Jun 2024 14:25:18 -0400 Subject: [PATCH 185/778] bcachefs: Leave a buffer in the btree key cache to avoid lock thrashing Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_key_cache.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index 2ad3d27c91e6..2d3c0d45c37f 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -915,6 +915,14 @@ static unsigned long bch2_btree_key_cache_count(struct shrinker *shrink, long nr = atomic_long_read(&bc->nr_keys) - atomic_long_read(&bc->nr_dirty); + /* + * Avoid hammering our shrinker too much if it's nearly empty - the + * shrinker code doesn't take into account how big our cache is, if it's + * mostly empty but the system is under memory pressure it causes nasty + * lock contention: + */ + nr -= 128; + return max(0L, nr); } From f9035b0ce60cfaf8abd7e1cd5c55690c739aaaf6 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 6 Jun 2024 21:59:12 -0400 Subject: [PATCH 186/778] bcachefs: Fix refcount leak in check_fix_ptrs() fsck_err() does a goto fsck_err on error; factor out check_fix_ptr() so that our error label can drop our device ref. Signed-off-by: Kent Overstreet --- fs/bcachefs/buckets.c | 263 ++++++++++++++++++++++-------------------- 1 file changed, 140 insertions(+), 123 deletions(-) diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index ed97712d0db1..75a54ed977d7 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -465,6 +465,143 @@ int bch2_update_cached_sectors_list(struct btree_trans *trans, unsigned dev, s64 return bch2_update_replicas_list(trans, &r.e, sectors); } +static int bch2_check_fix_ptr(struct btree_trans *trans, + struct bkey_s_c k, + struct extent_ptr_decoded p, + const union bch_extent_entry *entry, + bool *do_update) +{ + struct bch_fs *c = trans->c; + struct printbuf buf = PRINTBUF; + int ret = 0; + + struct bch_dev *ca = bch2_dev_tryget(c, p.ptr.dev); + if (!ca) { + if (fsck_err(c, ptr_to_invalid_device, + "pointer to missing device %u\n" + "while marking %s", + p.ptr.dev, + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, k), buf.buf))) + *do_update = true; + return 0; + } + + struct bucket *g = PTR_GC_BUCKET(ca, &p.ptr); + enum bch_data_type data_type = bch2_bkey_ptr_data_type(k, p, entry); + + if (fsck_err_on(!g->gen_valid, + c, ptr_to_missing_alloc_key, + "bucket %u:%zu data type %s ptr gen %u missing in alloc btree\n" + "while marking %s", + p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), + bch2_data_type_str(ptr_data_type(k.k, &p.ptr)), + p.ptr.gen, + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { + if (!p.ptr.cached) { + g->gen_valid = true; + g->gen = p.ptr.gen; + } else { + *do_update = true; + } + } + + if (fsck_err_on(gen_cmp(p.ptr.gen, g->gen) > 0, + c, ptr_gen_newer_than_bucket_gen, + "bucket %u:%zu data type %s ptr gen in the future: %u > %u\n" + "while marking %s", + p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), + bch2_data_type_str(ptr_data_type(k.k, &p.ptr)), + p.ptr.gen, g->gen, + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { + if (!p.ptr.cached && + (g->data_type != BCH_DATA_btree || + data_type == BCH_DATA_btree)) { + g->gen_valid = true; + g->gen = p.ptr.gen; + g->data_type = 0; + g->dirty_sectors = 0; + g->cached_sectors = 0; + } else { + *do_update = true; + } + } + + if (fsck_err_on(gen_cmp(g->gen, p.ptr.gen) > BUCKET_GC_GEN_MAX, + c, ptr_gen_newer_than_bucket_gen, + "bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n" + "while marking %s", + p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), g->gen, + bch2_data_type_str(ptr_data_type(k.k, &p.ptr)), + p.ptr.gen, + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, k), buf.buf))) + *do_update = true; + + if (fsck_err_on(!p.ptr.cached && gen_cmp(p.ptr.gen, g->gen) < 0, + c, stale_dirty_ptr, + "bucket %u:%zu data type %s stale dirty ptr: %u < %u\n" + "while marking %s", + p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), + bch2_data_type_str(ptr_data_type(k.k, &p.ptr)), + p.ptr.gen, g->gen, + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, k), buf.buf))) + *do_update = true; + + if (data_type != BCH_DATA_btree && p.ptr.gen != g->gen) + goto out; + + if (fsck_err_on(bucket_data_type_mismatch(g->data_type, data_type), + c, ptr_bucket_data_type_mismatch, + "bucket %u:%zu gen %u different types of data in same bucket: %s, %s\n" + "while marking %s", + p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), g->gen, + bch2_data_type_str(g->data_type), + bch2_data_type_str(data_type), + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { + if (data_type == BCH_DATA_btree) { + g->gen_valid = true; + g->gen = p.ptr.gen; + g->data_type = data_type; + g->dirty_sectors = 0; + g->cached_sectors = 0; + } else { + *do_update = true; + } + } + + if (p.has_ec) { + struct gc_stripe *m = genradix_ptr(&c->gc_stripes, p.ec.idx); + + if (fsck_err_on(!m || !m->alive, c, + ptr_to_missing_stripe, + "pointer to nonexistent stripe %llu\n" + "while marking %s", + (u64) p.ec.idx, + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, k), buf.buf))) + *do_update = true; + + if (fsck_err_on(m && m->alive && !bch2_ptr_matches_stripe_m(m, p), c, + ptr_to_incorrect_stripe, + "pointer does not match stripe %llu\n" + "while marking %s", + (u64) p.ec.idx, + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, k), buf.buf))) + *do_update = true; + } +out: +fsck_err: + bch2_dev_put(ca); + printbuf_exit(&buf); + return ret; +} + int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree, unsigned level, struct bkey_s_c k, enum btree_iter_update_trigger_flags flags) @@ -480,128 +617,9 @@ int bch2_check_fix_ptrs(struct btree_trans *trans, percpu_down_read(&c->mark_lock); bkey_for_each_ptr_decode(k.k, ptrs_c, p, entry_c) { - struct bch_dev *ca = bch2_dev_tryget(c, p.ptr.dev); - if (!ca) { - if (fsck_err(c, ptr_to_invalid_device, - "pointer to missing device %u\n" - "while marking %s", - p.ptr.dev, - (printbuf_reset(&buf), - bch2_bkey_val_to_text(&buf, c, k), buf.buf))) - do_update = true; - continue; - } - - struct bucket *g = PTR_GC_BUCKET(ca, &p.ptr); - enum bch_data_type data_type = bch2_bkey_ptr_data_type(k, p, entry_c); - - if (fsck_err_on(!g->gen_valid, - c, ptr_to_missing_alloc_key, - "bucket %u:%zu data type %s ptr gen %u missing in alloc btree\n" - "while marking %s", - p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), - bch2_data_type_str(ptr_data_type(k.k, &p.ptr)), - p.ptr.gen, - (printbuf_reset(&buf), - bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { - if (!p.ptr.cached) { - g->gen_valid = true; - g->gen = p.ptr.gen; - } else { - do_update = true; - } - } - - if (fsck_err_on(gen_cmp(p.ptr.gen, g->gen) > 0, - c, ptr_gen_newer_than_bucket_gen, - "bucket %u:%zu data type %s ptr gen in the future: %u > %u\n" - "while marking %s", - p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), - bch2_data_type_str(ptr_data_type(k.k, &p.ptr)), - p.ptr.gen, g->gen, - (printbuf_reset(&buf), - bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { - if (!p.ptr.cached && - (g->data_type != BCH_DATA_btree || - data_type == BCH_DATA_btree)) { - g->gen_valid = true; - g->gen = p.ptr.gen; - g->data_type = 0; - g->dirty_sectors = 0; - g->cached_sectors = 0; - } else { - do_update = true; - } - } - - if (fsck_err_on(gen_cmp(g->gen, p.ptr.gen) > BUCKET_GC_GEN_MAX, - c, ptr_gen_newer_than_bucket_gen, - "bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n" - "while marking %s", - p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), g->gen, - bch2_data_type_str(ptr_data_type(k.k, &p.ptr)), - p.ptr.gen, - (printbuf_reset(&buf), - bch2_bkey_val_to_text(&buf, c, k), buf.buf))) - do_update = true; - - if (fsck_err_on(!p.ptr.cached && gen_cmp(p.ptr.gen, g->gen) < 0, - c, stale_dirty_ptr, - "bucket %u:%zu data type %s stale dirty ptr: %u < %u\n" - "while marking %s", - p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), - bch2_data_type_str(ptr_data_type(k.k, &p.ptr)), - p.ptr.gen, g->gen, - (printbuf_reset(&buf), - bch2_bkey_val_to_text(&buf, c, k), buf.buf))) - do_update = true; - - if (data_type != BCH_DATA_btree && p.ptr.gen != g->gen) - goto next; - - if (fsck_err_on(bucket_data_type_mismatch(g->data_type, data_type), - c, ptr_bucket_data_type_mismatch, - "bucket %u:%zu gen %u different types of data in same bucket: %s, %s\n" - "while marking %s", - p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), g->gen, - bch2_data_type_str(g->data_type), - bch2_data_type_str(data_type), - (printbuf_reset(&buf), - bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { - if (data_type == BCH_DATA_btree) { - g->gen_valid = true; - g->gen = p.ptr.gen; - g->data_type = data_type; - g->dirty_sectors = 0; - g->cached_sectors = 0; - } else { - do_update = true; - } - } - - if (p.has_ec) { - struct gc_stripe *m = genradix_ptr(&c->gc_stripes, p.ec.idx); - - if (fsck_err_on(!m || !m->alive, c, - ptr_to_missing_stripe, - "pointer to nonexistent stripe %llu\n" - "while marking %s", - (u64) p.ec.idx, - (printbuf_reset(&buf), - bch2_bkey_val_to_text(&buf, c, k), buf.buf))) - do_update = true; - - if (fsck_err_on(m && m->alive && !bch2_ptr_matches_stripe_m(m, p), c, - ptr_to_incorrect_stripe, - "pointer does not match stripe %llu\n" - "while marking %s", - (u64) p.ec.idx, - (printbuf_reset(&buf), - bch2_bkey_val_to_text(&buf, c, k), buf.buf))) - do_update = true; - } -next: - bch2_dev_put(ca); + ret = bch2_check_fix_ptr(trans, k, p, entry_c, &do_update); + if (ret) + goto err; } if (do_update) { @@ -716,7 +734,6 @@ found: bch2_btree_node_update_key_early(trans, btree, level - 1, k, new); } err: -fsck_err: percpu_up_read(&c->mark_lock); printbuf_exit(&buf); return ret; From e0cb5722e112811d32d600ef750f9b39e6f684ca Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 7 Jun 2024 21:02:06 -0400 Subject: [PATCH 187/778] bcachefs: Fix snapshot_create_lock lock ordering ====================================================== WARNING: possible circular locking dependency detected 6.10.0-rc2-ktest-00018-gebd1d148b278 #144 Not tainted ------------------------------------------------------ fio/1345 is trying to acquire lock: ffff88813e200ab8 (&c->snapshot_create_lock){++++}-{3:3}, at: bch2_truncate+0x76/0xf0 but task is already holding lock: ffff888105a1fa38 (&sb->s_type->i_mutex_key#13){+.+.}-{3:3}, at: do_truncate+0x7b/0xc0 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (&sb->s_type->i_mutex_key#13){+.+.}-{3:3}: down_write+0x3d/0xd0 bch2_write_iter+0x1c0/0x10f0 vfs_write+0x24a/0x560 __x64_sys_pwrite64+0x77/0xb0 x64_sys_call+0x17e5/0x1ab0 do_syscall_64+0x68/0x130 entry_SYSCALL_64_after_hwframe+0x4b/0x53 -> #1 (sb_writers#10){.+.+}-{0:0}: mnt_want_write+0x4a/0x1d0 filename_create+0x69/0x1a0 user_path_create+0x38/0x50 bch2_fs_file_ioctl+0x315/0xbf0 __x64_sys_ioctl+0x297/0xaf0 x64_sys_call+0x10cb/0x1ab0 do_syscall_64+0x68/0x130 entry_SYSCALL_64_after_hwframe+0x4b/0x53 -> #0 (&c->snapshot_create_lock){++++}-{3:3}: __lock_acquire+0x1445/0x25b0 lock_acquire+0xbd/0x2b0 down_read+0x40/0x180 bch2_truncate+0x76/0xf0 bchfs_truncate+0x240/0x3f0 bch2_setattr+0x7b/0xb0 notify_change+0x322/0x4b0 do_truncate+0x8b/0xc0 do_ftruncate+0x110/0x270 __x64_sys_ftruncate+0x43/0x80 x64_sys_call+0x1373/0x1ab0 do_syscall_64+0x68/0x130 entry_SYSCALL_64_after_hwframe+0x4b/0x53 other info that might help us debug this: Chain exists of: &c->snapshot_create_lock --> sb_writers#10 --> &sb->s_type->i_mutex_key#13 Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&sb->s_type->i_mutex_key#13); lock(sb_writers#10); lock(&sb->s_type->i_mutex_key#13); rlock(&c->snapshot_create_lock); *** DEADLOCK *** Signed-off-by: Kent Overstreet --- fs/bcachefs/fs-ioctl.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c index 205a323ffc6d..3551a737181b 100644 --- a/fs/bcachefs/fs-ioctl.c +++ b/fs/bcachefs/fs-ioctl.c @@ -308,8 +308,8 @@ static int bch2_ioc_goingdown(struct bch_fs *c, u32 __user *arg) return ret; } -static long __bch2_ioctl_subvolume_create(struct bch_fs *c, struct file *filp, - struct bch_ioctl_subvolume arg) +static long bch2_ioctl_subvolume_create(struct bch_fs *c, struct file *filp, + struct bch_ioctl_subvolume arg) { struct inode *dir; struct bch_inode_info *inode; @@ -406,9 +406,12 @@ retry: !arg.src_ptr) snapshot_src.subvol = inode_inum(to_bch_ei(dir)).subvol; + down_write(&c->snapshot_create_lock); inode = __bch2_create(file_mnt_idmap(filp), to_bch_ei(dir), dst_dentry, arg.mode|S_IFDIR, 0, snapshot_src, create_flags); + up_write(&c->snapshot_create_lock); + error = PTR_ERR_OR_ZERO(inode); if (error) goto err3; @@ -429,16 +432,6 @@ err1: return error; } -static long bch2_ioctl_subvolume_create(struct bch_fs *c, struct file *filp, - struct bch_ioctl_subvolume arg) -{ - down_write(&c->snapshot_create_lock); - long ret = __bch2_ioctl_subvolume_create(c, filp, arg); - up_write(&c->snapshot_create_lock); - - return ret; -} - static long bch2_ioctl_subvolume_destroy(struct bch_fs *c, struct file *filp, struct bch_ioctl_subvolume arg) { From 9c4acd19bbff5db4629c193366f82960e38d1c6f Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 6 Jun 2024 14:50:06 -0400 Subject: [PATCH 188/778] bcachefs: Replace bucket_valid() asserts in bucket lookup with proper checks The bucket_gens array and gc_buckets array known their own size; we should be using those members, and returning an error. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_gc.c | 2 ++ fs/bcachefs/buckets.c | 2 ++ fs/bcachefs/buckets.h | 6 ++++-- fs/bcachefs/buckets_types.h | 2 ++ 4 files changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index dc97991bcd6a..130a0131cd73 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -990,6 +990,8 @@ static int bch2_gc_alloc_start(struct bch_fs *c) buckets->first_bucket = ca->mi.first_bucket; buckets->nbuckets = ca->mi.nbuckets; + buckets->nbuckets_minus_first = + buckets->nbuckets - buckets->first_bucket; rcu_assign_pointer(ca->buckets_gc, buckets); } diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 75a54ed977d7..99a7824d0de2 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -1612,6 +1612,8 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) bucket_gens->first_bucket = ca->mi.first_bucket; bucket_gens->nbuckets = nbuckets; + bucket_gens->nbuckets_minus_first = + bucket_gens->nbuckets - bucket_gens->first_bucket; if (resize) { down_write(&c->gc_lock); diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index 617ffde2fb7a..e1a5e3082bbf 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -93,7 +93,8 @@ static inline struct bucket *gc_bucket(struct bch_dev *ca, size_t b) { struct bucket_array *buckets = gc_bucket_array(ca); - BUG_ON(!bucket_valid(ca, b)); + if (b - buckets->first_bucket >= buckets->nbuckets_minus_first) + return NULL; return buckets->b + b; } @@ -110,7 +111,8 @@ static inline u8 *bucket_gen(struct bch_dev *ca, size_t b) { struct bucket_gens *gens = bucket_gens(ca); - BUG_ON(!bucket_valid(ca, b)); + if (b - gens->first_bucket >= gens->nbuckets_minus_first) + return NULL; return gens->b + b; } diff --git a/fs/bcachefs/buckets_types.h b/fs/bcachefs/buckets_types.h index 6a31740222a7..f636e17c4caf 100644 --- a/fs/bcachefs/buckets_types.h +++ b/fs/bcachefs/buckets_types.h @@ -22,6 +22,7 @@ struct bucket_array { struct rcu_head rcu; u16 first_bucket; size_t nbuckets; + size_t nbuckets_minus_first; struct bucket b[]; }; @@ -29,6 +30,7 @@ struct bucket_gens { struct rcu_head rcu; u16 first_bucket; size_t nbuckets; + size_t nbuckets_minus_first; u8 b[]; }; From 9432e90df1b8a544f220fd455b2fa39eed8a535d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 6 Jun 2024 15:06:22 -0400 Subject: [PATCH 189/778] bcachefs: Check for invalid bucket from bucket_gen(), gc_bucket() Turn more asserts into proper recoverable error paths. Reported-by: syzbot+246b47da27f8e7e7d6fb@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 22 +++++++++++++-- fs/bcachefs/btree_gc.c | 15 ++++++---- fs/bcachefs/buckets.c | 50 ++++++++++++++++++++++++---------- fs/bcachefs/buckets.h | 11 +++++--- fs/bcachefs/ec.c | 26 ++++++++++++++---- fs/bcachefs/extents.c | 9 ++++-- fs/bcachefs/io_read.c | 32 ++++++++++++++++------ fs/bcachefs/io_write.c | 19 ++++++++++--- 8 files changed, 136 insertions(+), 48 deletions(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 346cd91f91f9..c4b6601f5b74 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -741,6 +741,7 @@ int bch2_trigger_alloc(struct btree_trans *trans, enum btree_iter_update_trigger_flags flags) { struct bch_fs *c = trans->c; + struct printbuf buf = PRINTBUF; int ret = 0; struct bch_dev *ca = bch2_dev_bucket_tryget(c, new.k->p); @@ -860,8 +861,14 @@ int bch2_trigger_alloc(struct btree_trans *trans, } percpu_down_read(&c->mark_lock); - if (new_a->gen != old_a->gen) - *bucket_gen(ca, new.k->p.offset) = new_a->gen; + if (new_a->gen != old_a->gen) { + u8 *gen = bucket_gen(ca, new.k->p.offset); + if (unlikely(!gen)) { + percpu_up_read(&c->mark_lock); + goto invalid_bucket; + } + *gen = new_a->gen; + } bch2_dev_usage_update(c, ca, old_a, new_a, journal_seq, false); percpu_up_read(&c->mark_lock); @@ -895,6 +902,11 @@ int bch2_trigger_alloc(struct btree_trans *trans, percpu_down_read(&c->mark_lock); struct bucket *g = gc_bucket(ca, new.k->p.offset); + if (unlikely(!g)) { + percpu_up_read(&c->mark_lock); + goto invalid_bucket; + } + g->gen_valid = 1; bucket_lock(g); @@ -910,8 +922,14 @@ int bch2_trigger_alloc(struct btree_trans *trans, percpu_up_read(&c->mark_lock); } err: + printbuf_exit(&buf); bch2_dev_put(ca); return ret; +invalid_bucket: + bch2_fs_inconsistent(c, "reference to invalid bucket\n %s", + (bch2_bkey_val_to_text(&buf, c, new.s_c), buf.buf)); + ret = -EIO; + goto err; } /* diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 130a0131cd73..0e477a926579 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -874,6 +874,9 @@ static int bch2_alloc_write_key(struct btree_trans *trans, const struct bch_alloc_v4 *old; int ret; + if (!bucket_valid(ca, k.k->p.offset)) + return 0; + old = bch2_alloc_to_v4(k, &old_convert); gc = new = *old; @@ -1005,12 +1008,14 @@ static int bch2_gc_alloc_start(struct bch_fs *c) continue; } - struct bch_alloc_v4 a_convert; - const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &a_convert); + if (bucket_valid(ca, k.k->p.offset)) { + struct bch_alloc_v4 a_convert; + const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &a_convert); - struct bucket *g = gc_bucket(ca, k.k->p.offset); - g->gen_valid = 1; - g->gen = a->gen; + struct bucket *g = gc_bucket(ca, k.k->p.offset); + g->gen_valid = 1; + g->gen = a->gen; + } 0; }))); bch2_dev_put(ca); diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 99a7824d0de2..743d57eba760 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -488,6 +488,17 @@ static int bch2_check_fix_ptr(struct btree_trans *trans, } struct bucket *g = PTR_GC_BUCKET(ca, &p.ptr); + if (!g) { + if (fsck_err(c, ptr_to_invalid_device, + "pointer to invalid bucket on device %u\n" + "while marking %s", + p.ptr.dev, + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, k), buf.buf))) + *do_update = true; + goto out; + } + enum bch_data_type data_type = bch2_bkey_ptr_data_type(k, p, entry); if (fsck_err_on(!g->gen_valid, @@ -577,8 +588,8 @@ static int bch2_check_fix_ptr(struct btree_trans *trans, if (p.has_ec) { struct gc_stripe *m = genradix_ptr(&c->gc_stripes, p.ec.idx); - if (fsck_err_on(!m || !m->alive, c, - ptr_to_missing_stripe, + if (fsck_err_on(!m || !m->alive, + c, ptr_to_missing_stripe, "pointer to nonexistent stripe %llu\n" "while marking %s", (u64) p.ec.idx, @@ -586,8 +597,8 @@ static int bch2_check_fix_ptr(struct btree_trans *trans, bch2_bkey_val_to_text(&buf, c, k), buf.buf))) *do_update = true; - if (fsck_err_on(m && m->alive && !bch2_ptr_matches_stripe_m(m, p), c, - ptr_to_incorrect_stripe, + if (fsck_err_on(m && m->alive && !bch2_ptr_matches_stripe_m(m, p), + c, ptr_to_incorrect_stripe, "pointer does not match stripe %llu\n" "while marking %s", (u64) p.ec.idx, @@ -1004,6 +1015,7 @@ static int bch2_trigger_pointer(struct btree_trans *trans, enum btree_iter_update_trigger_flags flags) { bool insert = !(flags & BTREE_TRIGGER_overwrite); + struct printbuf buf = PRINTBUF; int ret = 0; struct bch_fs *c = trans->c; @@ -1036,6 +1048,13 @@ static int bch2_trigger_pointer(struct btree_trans *trans, if (flags & BTREE_TRIGGER_gc) { percpu_down_read(&c->mark_lock); struct bucket *g = gc_bucket(ca, bucket.offset); + if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u\n %s", + p.ptr.dev, + (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { + ret = -EIO; + goto err_unlock; + } + bucket_lock(g); struct bch_alloc_v4 old = bucket_m_to_alloc(*g), new = old; ret = __mark_pointer(trans, ca, k, &p.ptr, *sectors, bp.data_type, &new); @@ -1044,10 +1063,12 @@ static int bch2_trigger_pointer(struct btree_trans *trans, bch2_dev_usage_update(c, ca, &old, &new, 0, true); } bucket_unlock(g); +err_unlock: percpu_up_read(&c->mark_lock); } err: bch2_dev_put(ca); + printbuf_exit(&buf); return ret; } @@ -1335,10 +1356,11 @@ static int bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca, u64 b, enum bch_data_type data_type, unsigned sectors, enum btree_iter_update_trigger_flags flags) { - int ret = 0; - percpu_down_read(&c->mark_lock); struct bucket *g = gc_bucket(ca, b); + if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u when marking metadata type %s", + ca->dev_idx, bch2_data_type_str(data_type))) + goto err_unlock; bucket_lock(g); struct bch_alloc_v4 old = bucket_m_to_alloc(*g); @@ -1347,29 +1369,27 @@ static int bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca, g->data_type != data_type, c, "different types of data in same bucket: %s, %s", bch2_data_type_str(g->data_type), - bch2_data_type_str(data_type))) { - ret = -EIO; + bch2_data_type_str(data_type))) goto err; - } if (bch2_fs_inconsistent_on((u64) g->dirty_sectors + sectors > ca->mi.bucket_size, c, "bucket %u:%llu gen %u data type %s sector count overflow: %u + %u > bucket size", ca->dev_idx, b, g->gen, bch2_data_type_str(g->data_type ?: data_type), - g->dirty_sectors, sectors)) { - ret = -EIO; + g->dirty_sectors, sectors)) goto err; - } g->data_type = data_type; g->dirty_sectors += sectors; struct bch_alloc_v4 new = bucket_m_to_alloc(*g); + bch2_dev_usage_update(c, ca, &old, &new, 0, true); + percpu_up_read(&c->mark_lock); + return 0; err: bucket_unlock(g); - if (!ret) - bch2_dev_usage_update(c, ca, &old, &new, 0, true); +err_unlock: percpu_up_read(&c->mark_lock); - return ret; + return -EIO; } int bch2_trans_mark_metadata_bucket(struct btree_trans *trans, diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index e1a5e3082bbf..80ee0be9793e 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -172,19 +172,22 @@ static inline int gen_after(u8 a, u8 b) return r > 0 ? r : 0; } -static inline u8 dev_ptr_stale_rcu(struct bch_dev *ca, const struct bch_extent_ptr *ptr) +static inline int dev_ptr_stale_rcu(struct bch_dev *ca, const struct bch_extent_ptr *ptr) { - return gen_after(*bucket_gen(ca, PTR_BUCKET_NR(ca, ptr)), ptr->gen); + u8 *gen = bucket_gen(ca, PTR_BUCKET_NR(ca, ptr)); + if (!gen) + return -1; + return gen_after(*gen, ptr->gen); } /** * dev_ptr_stale() - check if a pointer points into a bucket that has been * invalidated. */ -static inline u8 dev_ptr_stale(struct bch_dev *ca, const struct bch_extent_ptr *ptr) +static inline int dev_ptr_stale(struct bch_dev *ca, const struct bch_extent_ptr *ptr) { rcu_read_lock(); - u8 ret = dev_ptr_stale_rcu(ca, ptr); + int ret = dev_ptr_stale_rcu(ca, ptr); rcu_read_unlock(); return ret; diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index d8b9beca3776..83e279d41829 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -268,6 +268,7 @@ static int mark_stripe_bucket(struct btree_trans *trans, { struct bch_fs *c = trans->c; const struct bch_extent_ptr *ptr = s.v->ptrs + ptr_idx; + struct printbuf buf = PRINTBUF; int ret = 0; struct bch_dev *ca = bch2_dev_tryget(c, ptr->dev); @@ -289,6 +290,13 @@ static int mark_stripe_bucket(struct btree_trans *trans, if (flags & BTREE_TRIGGER_gc) { percpu_down_read(&c->mark_lock); struct bucket *g = gc_bucket(ca, bucket.offset); + if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u\n %s", + ptr->dev, + (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) { + ret = -EIO; + goto err_unlock; + } + bucket_lock(g); struct bch_alloc_v4 old = bucket_m_to_alloc(*g), new = old; ret = __mark_stripe_bucket(trans, ca, s, ptr_idx, deleting, bucket, &new, flags); @@ -297,10 +305,12 @@ static int mark_stripe_bucket(struct btree_trans *trans, bch2_dev_usage_update(c, ca, &old, &new, 0, true); } bucket_unlock(g); +err_unlock: percpu_up_read(&c->mark_lock); } err: bch2_dev_put(ca); + printbuf_exit(&buf); return ret; } @@ -714,10 +724,12 @@ static void ec_block_endio(struct bio *bio) bch2_blk_status_to_str(bio->bi_status))) clear_bit(ec_bio->idx, ec_bio->buf->valid); - if (dev_ptr_stale(ca, ptr)) { + int stale = dev_ptr_stale(ca, ptr); + if (stale) { bch_err_ratelimited(ca->fs, - "error %s stripe: stale pointer after io", - bio_data_dir(bio) == READ ? "reading from" : "writing to"); + "error %s stripe: stale/invalid pointer (%i) after io", + bio_data_dir(bio) == READ ? "reading from" : "writing to", + stale); clear_bit(ec_bio->idx, ec_bio->buf->valid); } @@ -743,10 +755,12 @@ static void ec_block_io(struct bch_fs *c, struct ec_stripe_buf *buf, return; } - if (dev_ptr_stale(ca, ptr)) { + int stale = dev_ptr_stale(ca, ptr); + if (stale) { bch_err_ratelimited(c, - "error %s stripe: stale pointer", - rw == READ ? "reading from" : "writing to"); + "error %s stripe: stale pointer (%i)", + rw == READ ? "reading from" : "writing to", + stale); clear_bit(idx, buf->valid); return; } diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 469037929685..410b8bd81b5a 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -137,7 +137,7 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k, struct bch_dev *ca = bch2_dev_rcu(c, p.ptr.dev); - if (p.ptr.cached && (!ca || dev_ptr_stale(ca, &p.ptr))) + if (p.ptr.cached && (!ca || dev_ptr_stale_rcu(ca, &p.ptr))) continue; f = failed ? dev_io_failures(failed, p.ptr.dev) : NULL; @@ -999,7 +999,7 @@ bool bch2_extent_normalize(struct bch_fs *c, struct bkey_s k) bch2_bkey_drop_ptrs(k, ptr, ptr->cached && (ca = bch2_dev_rcu(c, ptr->dev)) && - dev_ptr_stale_rcu(ca, ptr)); + dev_ptr_stale_rcu(ca, ptr) > 0); rcu_read_unlock(); return bkey_deleted(k.k); @@ -1024,8 +1024,11 @@ void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *c, const struc prt_str(out, " cached"); if (ptr->unwritten) prt_str(out, " unwritten"); - if (bucket_valid(ca, b) && dev_ptr_stale_rcu(ca, ptr)) + int stale = dev_ptr_stale_rcu(ca, ptr); + if (stale > 0) prt_printf(out, " stale"); + else if (stale) + prt_printf(out, " invalid"); } rcu_read_unlock(); --out->atomic; diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index 862b79f86b91..c97fa7002b06 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -777,18 +777,32 @@ static noinline void read_from_stale_dirty_pointer(struct btree_trans *trans, PTR_BUCKET_POS(ca, &ptr), BTREE_ITER_cached); - prt_printf(&buf, "Attempting to read from stale dirty pointer:\n"); - printbuf_indent_add(&buf, 2); + u8 *gen = bucket_gen(ca, iter.pos.offset); + if (gen) { - bch2_bkey_val_to_text(&buf, c, k); - prt_newline(&buf); + prt_printf(&buf, "Attempting to read from stale dirty pointer:\n"); + printbuf_indent_add(&buf, 2); - prt_printf(&buf, "memory gen: %u", *bucket_gen(ca, iter.pos.offset)); - - ret = lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_slot(&iter))); - if (!ret) { - prt_newline(&buf); bch2_bkey_val_to_text(&buf, c, k); + prt_newline(&buf); + + prt_printf(&buf, "memory gen: %u", *gen); + + ret = lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_slot(&iter))); + if (!ret) { + prt_newline(&buf); + bch2_bkey_val_to_text(&buf, c, k); + } + } else { + prt_printf(&buf, "Attempting to read from invalid bucket %llu:%llu:\n", + iter.pos.inode, iter.pos.offset); + printbuf_indent_add(&buf, 2); + + prt_printf(&buf, "first bucket %u nbuckets %llu\n", + ca->mi.first_bucket, ca->mi.nbuckets); + + bch2_bkey_val_to_text(&buf, c, k); + prt_newline(&buf); } bch2_fs_inconsistent(c, "%s", buf.buf); diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index 9401d13e31bb..05e0cbef420b 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -1220,7 +1220,7 @@ static void bch2_nocow_write(struct bch_write_op *op) DARRAY_PREALLOCATED(struct bucket_to_lock, 3) buckets; u32 snapshot; struct bucket_to_lock *stale_at; - int ret; + int stale, ret; if (op->flags & BCH_WRITE_MOVE) return; @@ -1299,7 +1299,8 @@ retry: BUCKET_NOCOW_LOCK_UPDATE); rcu_read_lock(); - bool stale = gen_after(*bucket_gen(ca, i->b.offset), i->gen); + u8 *gen = bucket_gen(ca, i->b.offset); + stale = !gen ? -1 : gen_after(*gen, i->gen); rcu_read_unlock(); if (unlikely(stale)) { @@ -1380,8 +1381,18 @@ err_bucket_stale: break; } - /* We can retry this: */ - ret = -BCH_ERR_transaction_restart; + struct printbuf buf = PRINTBUF; + if (bch2_fs_inconsistent_on(stale < 0, c, + "pointer to invalid bucket in nocow path on device %llu\n %s", + stale_at->b.inode, + (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { + ret = -EIO; + } else { + /* We can retry this: */ + ret = -BCH_ERR_transaction_restart; + } + printbuf_exit(&buf); + goto err_get_ioref; } From b79922009214e6ab23c07db32a5606a45710f86e Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 8 Jun 2024 17:36:24 -0400 Subject: [PATCH 190/778] bcachefs: Add missing synchronize_srcu_expedited() call when shutting down We use the polling interface to srcu for tracking pending frees; when shutting down we don't need to wait for an srcu barrier to free them, but SRCU still gets confused if we shutdown with an outstanding grace period. Reported-by: syzbot+6a038377f0a594d7d44e@syzkaller.appspotmail.com Reported-by: syzbot+0ece6edfd05ed20e32d9@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 53b63be537e5..3694c600a3ad 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -3401,8 +3401,10 @@ void bch2_fs_btree_iter_exit(struct bch_fs *c) bch2_time_stats_exit(&s->lock_hold_times); } - if (c->btree_trans_barrier_initialized) + if (c->btree_trans_barrier_initialized) { + synchronize_srcu_expedited(&c->btree_trans_barrier); cleanup_srcu_struct(&c->btree_trans_barrier); + } mempool_exit(&c->btree_trans_mem_pool); mempool_exit(&c->btree_trans_pool); } From 31849bf07e0fb3e7d050c086b77ebdb6cec89167 Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Mon, 10 Jun 2024 23:34:01 +0530 Subject: [PATCH 191/778] drm/amdgpu: Fix the BO release clear memory warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This happens when the amdgpu_bo_release_notify running before amdgpu_ttm_set_buffer_funcs_status set the buffer funcs to enabled. check the buffer funcs enablement before calling the fill buffer memory. v2:(Christian) - Apply it only for GEM buffers and since GEM buffers are only allocated/freed while the driver is loaded we never run into the issue to clear with buffer funcs disabled. v3:(Mario) - drop the stable tag as this will presumably go into a -fixes PR for 6.10 Log snip: *ERROR* Trying to clear memory with ring turned off. RIP: 0010:amdgpu_bo_release_notify+0x201/0x220 [amdgpu] Fixes: a68c7eaa7a8f ("drm/amdgpu: Enable clear page functionality") Signed-off-by: Arunpravin Paneer Selvam Reviewed-by: Christian König Tested-by: Mikhail Gavrilov Tested-by: Richard Gong Suggested-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20240610180401.9540-1-Arunpravin.PaneerSelvam@amd.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 67c234bcf89f..3adaa4670103 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -108,6 +108,7 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, memset(&bp, 0, sizeof(bp)); *obj = NULL; + flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE; bp.size = size; bp.byte_align = alignment; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 8d8c39be6129..c556c8b653fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -604,8 +604,6 @@ int amdgpu_bo_create(struct amdgpu_device *adev, if (!amdgpu_bo_support_uswc(bo->flags)) bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC; - bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE; - bo->tbo.bdev = &adev->mman.bdev; if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA | AMDGPU_GEM_DOMAIN_GDS)) From 07c54cc5988f19c9642fd463c2dbdac7fc52f777 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 28 May 2024 14:20:19 +0200 Subject: [PATCH 192/778] tick/nohz_full: Don't abuse smp_call_function_single() in tick_setup_device() After the recent commit 5097cbcb38e6 ("sched/isolation: Prevent boot crash when the boot CPU is nohz_full") the kernel no longer crashes, but there is another problem. In this case tick_setup_device() calls tick_take_do_timer_from_boot() to update tick_do_timer_cpu and this triggers the WARN_ON_ONCE(irqs_disabled) in smp_call_function_single(). Kill tick_take_do_timer_from_boot() and just use WRITE_ONCE(), the new comment explains why this is safe (thanks Thomas!). Fixes: 08ae95f4fd3b ("nohz_full: Allow the boot CPU to be nohz_full") Signed-off-by: Oleg Nesterov Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240528122019.GA28794@redhat.com Link: https://lore.kernel.org/all/20240522151742.GA10400@redhat.com --- kernel/time/tick-common.c | 42 +++++++++++++-------------------------- 1 file changed, 14 insertions(+), 28 deletions(-) diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index d88b13076b79..a47bcf71defc 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -178,26 +178,6 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast) } } -#ifdef CONFIG_NO_HZ_FULL -static void giveup_do_timer(void *info) -{ - int cpu = *(unsigned int *)info; - - WARN_ON(tick_do_timer_cpu != smp_processor_id()); - - tick_do_timer_cpu = cpu; -} - -static void tick_take_do_timer_from_boot(void) -{ - int cpu = smp_processor_id(); - int from = tick_do_timer_boot_cpu; - - if (from >= 0 && from != cpu) - smp_call_function_single(from, giveup_do_timer, &cpu, 1); -} -#endif - /* * Setup the tick device */ @@ -221,19 +201,25 @@ static void tick_setup_device(struct tick_device *td, tick_next_period = ktime_get(); #ifdef CONFIG_NO_HZ_FULL /* - * The boot CPU may be nohz_full, in which case set - * tick_do_timer_boot_cpu so the first housekeeping - * secondary that comes up will take do_timer from - * us. + * The boot CPU may be nohz_full, in which case the + * first housekeeping secondary will take do_timer() + * from it. */ if (tick_nohz_full_cpu(cpu)) tick_do_timer_boot_cpu = cpu; - } else if (tick_do_timer_boot_cpu != -1 && - !tick_nohz_full_cpu(cpu)) { - tick_take_do_timer_from_boot(); + } else if (tick_do_timer_boot_cpu != -1 && !tick_nohz_full_cpu(cpu)) { tick_do_timer_boot_cpu = -1; - WARN_ON(READ_ONCE(tick_do_timer_cpu) != cpu); + /* + * The boot CPU will stay in periodic (NOHZ disabled) + * mode until clocksource_done_booting() called after + * smp_init() selects a high resolution clocksource and + * timekeeping_notify() kicks the NOHZ stuff alive. + * + * So this WRITE_ONCE can only race with the READ_ONCE + * check in tick_periodic() but this race is harmless. + */ + WRITE_ONCE(tick_do_timer_cpu, cpu); #endif } From 9dd5134c61580ba4c219296c37e08ff64c109a74 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 10 Jun 2024 11:23:05 -0700 Subject: [PATCH 193/778] kunit/overflow: Adjust for __counted_by with DEFINE_RAW_FLEX() When a flexible array structure has a __counted_by annotation, its use with DEFINE_RAW_FLEX() will result in the count being zero-initialized. This is expected since one doesn't want to use RAW with a counted_by struct. Adjust the tests to check for the condition and for compiler support. Reported-by: Christian Schrefl Closes: https://lore.kernel.org/all/0bfc6b38-8bc5-4971-b6fb-dc642a73fbfe@gmail.com/ Suggested-by: Nathan Chancellor Reviewed-by: Nathan Chancellor Link: https://lore.kernel.org/r/20240610182301.work.272-kees@kernel.org Tested-by: Christian Schrefl Reviewed-by: Christian Schrefl Signed-off-by: Kees Cook --- lib/overflow_kunit.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/lib/overflow_kunit.c b/lib/overflow_kunit.c index 4ef31b0bb74d..d305b0c054bb 100644 --- a/lib/overflow_kunit.c +++ b/lib/overflow_kunit.c @@ -1178,14 +1178,28 @@ struct foo { s16 array[] __counted_by(counter); }; +struct bar { + int a; + u32 counter; + s16 array[]; +}; + static void DEFINE_FLEX_test(struct kunit *test) { - DEFINE_RAW_FLEX(struct foo, two, array, 2); + /* Using _RAW_ on a __counted_by struct will initialize "counter" to zero */ + DEFINE_RAW_FLEX(struct foo, two_but_zero, array, 2); +#if __has_attribute(__counted_by__) + int expected_raw_size = sizeof(struct foo); +#else + int expected_raw_size = sizeof(struct foo) + 2 * sizeof(s16); +#endif + /* Without annotation, it will always be on-stack size. */ + DEFINE_RAW_FLEX(struct bar, two, array, 2); DEFINE_FLEX(struct foo, eight, array, counter, 8); DEFINE_FLEX(struct foo, empty, array, counter, 0); - KUNIT_EXPECT_EQ(test, __struct_size(two), - sizeof(struct foo) + sizeof(s16) + sizeof(s16)); + KUNIT_EXPECT_EQ(test, __struct_size(two_but_zero), expected_raw_size); + KUNIT_EXPECT_EQ(test, __struct_size(two), sizeof(struct bar) + 2 * sizeof(s16)); KUNIT_EXPECT_EQ(test, __struct_size(eight), 24); KUNIT_EXPECT_EQ(test, __struct_size(empty), sizeof(struct foo)); } From 3f60497c658d2072714d097a177612d34b34aa3d Mon Sep 17 00:00:00 2001 From: Biju Das Date: Mon, 10 Jun 2024 20:55:32 +0100 Subject: [PATCH 194/778] regulator: core: Fix modpost error "regulator_get_regmap" undefined Fix the modpost error "regulator_get_regmap" undefined by adding export symbol. Fixes: 04eca28cde52 ("regulator: Add helpers for low-level register access") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202406110117.mk5UR3VZ-lkp@intel.com Signed-off-by: Biju Das Link: https://lore.kernel.org/r/20240610195532.175942-1-biju.das.jz@bp.renesas.com Signed-off-by: Mark Brown --- drivers/regulator/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 5794f4e9dd52..844e9587a880 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -3347,6 +3347,7 @@ struct regmap *regulator_get_regmap(struct regulator *regulator) return map ? map : ERR_PTR(-EOPNOTSUPP); } +EXPORT_SYMBOL_GPL(regulator_get_regmap); /** * regulator_get_hardware_vsel_register - get the HW voltage selector register From 7124a8982b621e1a8af81c17f44b90587cdd161c Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 10 Jun 2024 20:48:41 -0400 Subject: [PATCH 195/778] bcachefs: Add missing bch_inode_info.ei_flags init Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index f6f1dbc1fe15..77126992dba8 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -227,7 +227,9 @@ static struct bch_inode_info *__bch2_new_inode(struct bch_fs *c) mutex_init(&inode->ei_update_lock); two_state_lock_init(&inode->ei_pagecache_lock); INIT_LIST_HEAD(&inode->ei_vfs_inode_list); + inode->ei_flags = 0; mutex_init(&inode->ei_quota_lock); + memset(&inode->ei_devs_need_flush, 0, sizeof(inode->ei_devs_need_flush)); inode->v.i_state = 0; if (unlikely(inode_init_always(c->vfs_sb, &inode->v))) { From 44180feaccf266d9b0b28cc4ceaac019817deb5c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 7 Jun 2024 17:53:32 +0200 Subject: [PATCH 196/778] net/sched: initialize noop_qdisc owner When the noop_qdisc owner isn't initialized, then it will be 0, so packets will erroneously be regarded as having been subject to recursion as long as only CPU 0 queues them. For non-SMP, that's all packets, of course. This causes a change in what's reported to userspace, normally noop_qdisc would drop packets silently, but with this change the syscall returns -ENOBUFS if RECVERR is also set on the socket. Fix this by initializing the owner field to -1, just like it would be for dynamically allocated qdiscs by qdisc_alloc(). Fixes: 0f022d32c3ec ("net/sched: Fix mirred deadlock on device recursion") Signed-off-by: Johannes Berg Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240607175340.786bfb938803.I493bf8422e36be4454c08880a8d3703cea8e421a@changeid Signed-off-by: Jakub Kicinski --- net/sched/sch_generic.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 2a637a17061b..e22ff003d52e 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -676,6 +676,7 @@ struct Qdisc noop_qdisc = { .qlen = 0, .lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.skb_bad_txq.lock), }, + .owner = -1, }; EXPORT_SYMBOL(noop_qdisc); From 8031b58c3a9b1db3ef68b3bd749fbee2e1e1aaa3 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 7 Jun 2024 17:01:48 +0200 Subject: [PATCH 197/778] mptcp: ensure snd_una is properly initialized on connect This is strictly related to commit fb7a0d334894 ("mptcp: ensure snd_nxt is properly initialized on connect"). It turns out that syzkaller can trigger the retransmit after fallback and before processing any other incoming packet - so that snd_una is still left uninitialized. Address the issue explicitly initializing snd_una together with snd_nxt and write_seq. Suggested-by: Mat Martineau Fixes: 8fd738049ac3 ("mptcp: fallback in case of simultaneous connect") Cc: stable@vger.kernel.org Reported-by: Christoph Paasch Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/485 Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240607-upstream-net-20240607-misc-fixes-v1-1-1ab9ddfa3d00@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 96b113854bd3..bb7dca8aa2d9 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3740,6 +3740,7 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) WRITE_ONCE(msk->write_seq, subflow->idsn); WRITE_ONCE(msk->snd_nxt, subflow->idsn); + WRITE_ONCE(msk->snd_una, subflow->idsn); if (likely(!__mptcp_check_fallback(msk))) MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVE); From 6a09788c1a66e3d8b04b3b3e7618cc817bb60ae9 Mon Sep 17 00:00:00 2001 From: YonglongLi Date: Fri, 7 Jun 2024 17:01:49 +0200 Subject: [PATCH 198/778] mptcp: pm: inc RmAddr MIB counter once per RM_ADDR ID The RmAddr MIB counter is supposed to be incremented once when a valid RM_ADDR has been received. Before this patch, it could have been incremented as many times as the number of subflows connected to the linked address ID, so it could have been 0, 1 or more than 1. The "RmSubflow" is incremented after a local operation. In this case, it is normal to tied it with the number of subflows that have been actually removed. The "remove invalid addresses" MP Join subtest has been modified to validate this case. A broadcast IP address is now used instead: the client will not be able to create a subflow to this address. The consequence is that when receiving the RM_ADDR with the ID attached to this broadcast IP address, no subflow linked to this ID will be found. Fixes: 7a7e52e38a40 ("mptcp: add RM_ADDR related mibs") Cc: stable@vger.kernel.org Co-developed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: YonglongLi Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240607-upstream-net-20240607-misc-fixes-v1-2-1ab9ddfa3d00@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 5 ++++- tools/testing/selftests/net/mptcp/mptcp_join.sh | 3 ++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 7f53e022e27e..766a8409fa67 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -814,10 +814,13 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, spin_lock_bh(&msk->pm.lock); removed = true; - __MPTCP_INC_STATS(sock_net(sk), rm_type); + if (rm_type == MPTCP_MIB_RMSUBFLOW) + __MPTCP_INC_STATS(sock_net(sk), rm_type); } if (rm_type == MPTCP_MIB_RMSUBFLOW) __set_bit(rm_id ? rm_id : msk->mpc_endpoint_id, msk->pm.id_avail_bitmap); + else if (rm_type == MPTCP_MIB_RMADDR) + __MPTCP_INC_STATS(sock_net(sk), rm_type); if (!removed) continue; diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 2b66c5fa71eb..aea314d140c9 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -2250,7 +2250,8 @@ remove_tests() pm_nl_set_limits $ns1 3 3 pm_nl_add_endpoint $ns1 10.0.12.1 flags signal pm_nl_add_endpoint $ns1 10.0.3.1 flags signal - pm_nl_add_endpoint $ns1 10.0.14.1 flags signal + # broadcast IP: no packet for this address will be received on ns1 + pm_nl_add_endpoint $ns1 224.0.0.1 flags signal pm_nl_set_limits $ns2 3 3 addr_nr_ns1=-3 speed=10 \ run_tests $ns1 $ns2 10.0.1.1 From 40eec1795cc27b076d49236649a29507c7ed8c2d Mon Sep 17 00:00:00 2001 From: YonglongLi Date: Fri, 7 Jun 2024 17:01:50 +0200 Subject: [PATCH 199/778] mptcp: pm: update add_addr counters after connect The creation of new subflows can fail for different reasons. If no subflow have been created using the received ADD_ADDR, the related counters should not be updated, otherwise they will never be decremented for events related to this ID later on. For the moment, the number of accepted ADD_ADDR is only decremented upon the reception of a related RM_ADDR, and only if the remote address ID is currently being used by at least one subflow. In other words, if no subflow can be created with the received address, the counter will not be decremented. In this case, it is then important not to increment pm.add_addr_accepted counter, and not to modify pm.accept_addr bit. Note that this patch does not modify the behaviour in case of failures later on, e.g. if the MP Join is dropped or rejected. The "remove invalid addresses" MP Join subtest has been modified to validate this case. The broadcast IP address is added before the "valid" address that will be used to successfully create a subflow, and the limit is decreased by one: without this patch, it was not possible to create the last subflow, because: - the broadcast address would have been accepted even if it was not usable: the creation of a subflow to this address results in an error, - the limit of 2 accepted ADD_ADDR would have then been reached. Fixes: 01cacb00b35c ("mptcp: add netlink-based PM") Cc: stable@vger.kernel.org Co-developed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: YonglongLi Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240607-upstream-net-20240607-misc-fixes-v1-3-1ab9ddfa3d00@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 16 ++++++++++------ tools/testing/selftests/net/mptcp/mptcp_join.sh | 4 ++-- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 766a8409fa67..ea9e5817b9e9 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -677,6 +677,7 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) unsigned int add_addr_accept_max; struct mptcp_addr_info remote; unsigned int subflows_max; + bool sf_created = false; int i, nr; add_addr_accept_max = mptcp_pm_get_add_addr_accept_max(msk); @@ -704,15 +705,18 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) if (nr == 0) return; - msk->pm.add_addr_accepted++; - if (msk->pm.add_addr_accepted >= add_addr_accept_max || - msk->pm.subflows >= subflows_max) - WRITE_ONCE(msk->pm.accept_addr, false); - spin_unlock_bh(&msk->pm.lock); for (i = 0; i < nr; i++) - __mptcp_subflow_connect(sk, &addrs[i], &remote); + if (__mptcp_subflow_connect(sk, &addrs[i], &remote) == 0) + sf_created = true; spin_lock_bh(&msk->pm.lock); + + if (sf_created) { + msk->pm.add_addr_accepted++; + if (msk->pm.add_addr_accepted >= add_addr_accept_max || + msk->pm.subflows >= subflows_max) + WRITE_ONCE(msk->pm.accept_addr, false); + } } void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index aea314d140c9..108aeeb84ef1 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -2249,10 +2249,10 @@ remove_tests() if reset "remove invalid addresses"; then pm_nl_set_limits $ns1 3 3 pm_nl_add_endpoint $ns1 10.0.12.1 flags signal - pm_nl_add_endpoint $ns1 10.0.3.1 flags signal # broadcast IP: no packet for this address will be received on ns1 pm_nl_add_endpoint $ns1 224.0.0.1 flags signal - pm_nl_set_limits $ns2 3 3 + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal + pm_nl_set_limits $ns2 2 2 addr_nr_ns1=-3 speed=10 \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 From 74acb250e103f42be372177628f9272b6e888c49 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 7 Jun 2024 17:01:51 +0200 Subject: [PATCH 200/778] mailmap: map Geliang's new email address Just like my other email addresses, map my new one to kernel.org account too. My new email address uses "last name, first name" format, which is different from my other email addresses. This mailmap is also used to indicate that it is actually the same person. Suggested-by: Mat Martineau Suggested-by: Matthieu Baerts Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240607-upstream-net-20240607-misc-fixes-v1-4-1ab9ddfa3d00@kernel.org Signed-off-by: Jakub Kicinski --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index efd9fa867a8e..9af91bd3584b 100644 --- a/.mailmap +++ b/.mailmap @@ -217,6 +217,7 @@ Geliang Tang Geliang Tang Geliang Tang Geliang Tang +Geliang Tang Georgi Djakov Gerald Schaefer Gerald Schaefer From 36534d3c54537bf098224a32dc31397793d4594d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 7 Jun 2024 12:56:52 +0000 Subject: [PATCH 201/778] tcp: use signed arithmetic in tcp_rtx_probe0_timed_out() Due to timer wheel implementation, a timer will usually fire after its schedule. For instance, for HZ=1000, a timeout between 512ms and 4s has a granularity of 64ms. For this range of values, the extra delay could be up to 63ms. For TCP, this means that tp->rcv_tstamp may be after inet_csk(sk)->icsk_timeout whenever the timer interrupt finally triggers, if one packet came during the extra delay. We need to make sure tcp_rtx_probe0_timed_out() handles this case. Fixes: e89688e3e978 ("net: tcp: fix unexcepted socket die when snd_wnd is 0") Signed-off-by: Eric Dumazet Cc: Menglong Dong Acked-by: Neal Cardwell Reviewed-by: Jason Xing Link: https://lore.kernel.org/r/20240607125652.1472540-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_timer.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 83fe7f62f7f1..5bfd76a31af6 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -485,8 +485,12 @@ static bool tcp_rtx_probe0_timed_out(const struct sock *sk, { const struct tcp_sock *tp = tcp_sk(sk); const int timeout = TCP_RTO_MAX * 2; - u32 rcv_delta; + s32 rcv_delta; + /* Note: timer interrupt might have been delayed by at least one jiffy, + * and tp->rcv_tstamp might very well have been written recently. + * rcv_delta can thus be negative. + */ rcv_delta = inet_csk(sk)->icsk_timeout - tp->rcv_tstamp; if (rcv_delta <= timeout) return false; From b96a225377b6602299a03d2ce3c289b68cd41bb7 Mon Sep 17 00:00:00 2001 From: Vasily Khoruzhick Date: Fri, 7 Jun 2024 15:09:32 -0700 Subject: [PATCH 202/778] drm/nouveau: don't attempt to schedule hpd_work on headless cards If the card doesn't have display hardware, hpd_work and hpd_lock are left uninitialized which causes BUG when attempting to schedule hpd_work on runtime PM resume. Fix it by adding headless flag to DRM and skip any hpd if it's set. Fixes: ae1aadb1eb8d ("nouveau: don't fail driver load if no display hw present.") Link: https://gitlab.freedesktop.org/drm/nouveau/-/issues/337 Signed-off-by: Vasily Khoruzhick Reviewed-by: Ben Skeggs Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20240607221032.25918-1-anarsoul@gmail.com --- drivers/gpu/drm/nouveau/dispnv04/disp.c | 2 +- drivers/gpu/drm/nouveau/dispnv50/disp.c | 2 +- drivers/gpu/drm/nouveau/nouveau_display.c | 6 +++++- drivers/gpu/drm/nouveau/nouveau_drv.h | 1 + 4 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/nouveau/dispnv04/disp.c b/drivers/gpu/drm/nouveau/dispnv04/disp.c index 13705c5f1497..4b7497a8755c 100644 --- a/drivers/gpu/drm/nouveau/dispnv04/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv04/disp.c @@ -68,7 +68,7 @@ nv04_display_fini(struct drm_device *dev, bool runtime, bool suspend) if (nv_two_heads(dev)) NVWriteCRTC(dev, 1, NV_PCRTC_INTR_EN_0, 0); - if (!runtime) + if (!runtime && !drm->headless) cancel_work_sync(&drm->hpd_work); if (!suspend) diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c index 88728a0b2c25..674dc567e179 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c @@ -2680,7 +2680,7 @@ nv50_display_fini(struct drm_device *dev, bool runtime, bool suspend) nv50_mstm_fini(nouveau_encoder(encoder)); } - if (!runtime) + if (!runtime && !drm->headless) cancel_work_sync(&drm->hpd_work); } diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c index aed5d5b51b43..d4725a968827 100644 --- a/drivers/gpu/drm/nouveau/nouveau_display.c +++ b/drivers/gpu/drm/nouveau/nouveau_display.c @@ -450,6 +450,9 @@ nouveau_display_hpd_resume(struct drm_device *dev) { struct nouveau_drm *drm = nouveau_drm(dev); + if (drm->headless) + return; + spin_lock_irq(&drm->hpd_lock); drm->hpd_pending = ~0; spin_unlock_irq(&drm->hpd_lock); @@ -635,7 +638,7 @@ nouveau_display_fini(struct drm_device *dev, bool suspend, bool runtime) } drm_connector_list_iter_end(&conn_iter); - if (!runtime) + if (!runtime && !drm->headless) cancel_work_sync(&drm->hpd_work); drm_kms_helper_poll_disable(dev); @@ -729,6 +732,7 @@ nouveau_display_create(struct drm_device *dev) /* no display hw */ if (ret == -ENODEV) { ret = 0; + drm->headless = true; goto disp_create_err; } diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h index e239c6bf4afa..25fca98a20bc 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.h +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h @@ -276,6 +276,7 @@ struct nouveau_drm { /* modesetting */ struct nvbios vbios; struct nouveau_display *display; + bool headless; struct work_struct hpd_work; spinlock_t hpd_lock; u32 hpd_pending; From ae9daffd9028f2500c9ac1517e46d4f2b57efb80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 8 May 2024 15:07:00 +0300 Subject: [PATCH 203/778] MIPS: Routerboard 532: Fix vendor retry check code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit read_config_dword() contains strange condition checking ret for a number of values. The ret variable, however, is always zero because config_access() never returns anything else. Thus, the retry is always taken until number of tries is exceeded. The code looks like it wants to check *val instead of ret to see if the read gave an error response. Fixes: 73b4390fb234 ("[MIPS] Routerboard 532: Support for base system") Signed-off-by: Ilpo Järvinen Signed-off-by: Thomas Bogendoerfer --- arch/mips/pci/ops-rc32434.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/mips/pci/ops-rc32434.c b/arch/mips/pci/ops-rc32434.c index 874ed6df9768..34b9323bdabb 100644 --- a/arch/mips/pci/ops-rc32434.c +++ b/arch/mips/pci/ops-rc32434.c @@ -112,8 +112,8 @@ retry: * gives them time to settle */ if (where == PCI_VENDOR_ID) { - if (ret == 0xffffffff || ret == 0x00000000 || - ret == 0x0000ffff || ret == 0xffff0000) { + if (*val == 0xffffffff || *val == 0x00000000 || + *val == 0x0000ffff || *val == 0xffff0000) { if (delay > 4) return 0; delay *= 2; From 277a0363120276645ae598d8d5fea7265e076ae9 Mon Sep 17 00:00:00 2001 From: Martin Schiller Date: Fri, 7 Jun 2024 11:04:00 +0200 Subject: [PATCH 204/778] MIPS: pci: lantiq: restore reset gpio polarity Commit 90c2d2eb7ab5 ("MIPS: pci: lantiq: switch to using gpiod API") not only switched to the gpiod API, but also inverted / changed the polarity of the GPIO. According to the PCI specification, the RST# pin is an active-low signal. However, most of the device trees that have been widely used for a long time (mainly in the openWrt project) define this GPIO as active-high and the old driver code inverted the signal internally. Apparently there are actually boards where the reset gpio must be operated inverted. For this reason, we cannot use the GPIOD_OUT_LOW/HIGH flag for initialization. Instead, we must explicitly set the gpio to value 1 in order to take into account any "GPIO_ACTIVE_LOW" flag that may have been set. In order to remain compatible with all these existing device trees, we should therefore keep the logic as it was before the commit. Fixes: 90c2d2eb7ab5 ("MIPS: pci: lantiq: switch to using gpiod API") Cc: stable@vger.kernel.org Signed-off-by: Martin Schiller Signed-off-by: Thomas Bogendoerfer --- arch/mips/pci/pci-lantiq.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/mips/pci/pci-lantiq.c b/arch/mips/pci/pci-lantiq.c index 68a8cefed420..0844db34022e 100644 --- a/arch/mips/pci/pci-lantiq.c +++ b/arch/mips/pci/pci-lantiq.c @@ -124,14 +124,14 @@ static int ltq_pci_startup(struct platform_device *pdev) clk_disable(clk_external); /* setup reset gpio used by pci */ - reset_gpio = devm_gpiod_get_optional(&pdev->dev, "reset", - GPIOD_OUT_LOW); + reset_gpio = devm_gpiod_get_optional(&pdev->dev, "reset", GPIOD_ASIS); error = PTR_ERR_OR_ZERO(reset_gpio); if (error) { dev_err(&pdev->dev, "failed to request gpio: %d\n", error); return error; } gpiod_set_consumer_name(reset_gpio, "pci_reset"); + gpiod_direction_output(reset_gpio, 1); /* enable auto-switching between PCI and EBU */ ltq_pci_w32(0xa, PCI_CR_CLK_CTRL); @@ -194,10 +194,10 @@ static int ltq_pci_startup(struct platform_device *pdev) /* toggle reset pin */ if (reset_gpio) { - gpiod_set_value_cansleep(reset_gpio, 1); + gpiod_set_value_cansleep(reset_gpio, 0); wmb(); mdelay(1); - gpiod_set_value_cansleep(reset_gpio, 0); + gpiod_set_value_cansleep(reset_gpio, 1); } return 0; } From ce5cdd3b05216b704a704f466fb4c2dff3778caf Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Tue, 11 Jun 2024 13:35:33 +0200 Subject: [PATCH 205/778] mips: bmips: BCM6358: make sure CBR is correctly set It was discovered that some device have CBR address set to 0 causing kernel panic when arch_sync_dma_for_cpu_all is called. This was notice in situation where the system is booted from TP1 and BMIPS_GET_CBR() returns 0 instead of a valid address and !!(read_c0_brcm_cmt_local() & (1 << 31)); not failing. The current check whether RAC flush should be disabled or not are not enough hence lets check if CBR is a valid address or not. Fixes: ab327f8acdf8 ("mips: bmips: BCM6358: disable RAC flush for TP1") Signed-off-by: Christian Marangi Acked-by: Florian Fainelli Signed-off-by: Thomas Bogendoerfer --- arch/mips/bmips/setup.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/mips/bmips/setup.c b/arch/mips/bmips/setup.c index ec180ab92eaa..66a8ba19c287 100644 --- a/arch/mips/bmips/setup.c +++ b/arch/mips/bmips/setup.c @@ -110,7 +110,8 @@ static void bcm6358_quirks(void) * RAC flush causes kernel panics on BCM6358 when booting from TP1 * because the bootloader is not initializing it properly. */ - bmips_rac_flush_disable = !!(read_c0_brcm_cmt_local() & (1 << 31)); + bmips_rac_flush_disable = !!(read_c0_brcm_cmt_local() & (1 << 31)) || + !!BMIPS_GET_CBR(); } static void bcm6368_quirks(void) From b01b8151efe47a432f3f73623a6c1438727e7880 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sun, 9 Jun 2024 10:21:39 +0200 Subject: [PATCH 206/778] s390: Update defconfigs Signed-off-by: Heiko Carstens Acked-by: Vasily Gorbik Signed-off-by: Vasily Gorbik --- arch/s390/configs/debug_defconfig | 43 ++++++++++++++++++++++------ arch/s390/configs/defconfig | 40 ++++++++++++++++++++++---- arch/s390/configs/zfcpdump_defconfig | 5 +--- 3 files changed, 69 insertions(+), 19 deletions(-) diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 145342e46ea8..8c4adece8911 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -43,7 +43,6 @@ CONFIG_PROFILING=y CONFIG_KEXEC=y CONFIG_KEXEC_FILE=y CONFIG_KEXEC_SIG=y -CONFIG_CRASH_DUMP=y CONFIG_LIVEPATCH=y CONFIG_MARCH_Z13=y CONFIG_NR_CPUS=512 @@ -51,6 +50,7 @@ CONFIG_NUMA=y CONFIG_HZ_100=y CONFIG_CERT_STORE=y CONFIG_EXPOLINE=y +# CONFIG_EXPOLINE_EXTERN is not set CONFIG_EXPOLINE_AUTO=y CONFIG_CHSC_SCH=y CONFIG_VFIO_CCW=m @@ -76,6 +76,7 @@ CONFIG_MODULE_FORCE_UNLOAD=y CONFIG_MODULE_UNLOAD_TAINT_TRACKING=y CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y +CONFIG_MODULE_SIG_SHA256=y CONFIG_BLK_DEV_THROTTLING=y CONFIG_BLK_WBT=y CONFIG_BLK_CGROUP_IOLATENCY=y @@ -100,7 +101,6 @@ CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_KSM=y CONFIG_TRANSPARENT_HUGEPAGE=y -CONFIG_CMA_DEBUG=y CONFIG_CMA_DEBUGFS=y CONFIG_CMA_SYSFS=y CONFIG_CMA_AREAS=7 @@ -119,6 +119,7 @@ CONFIG_UNIX_DIAG=m CONFIG_XFRM_USER=m CONFIG_NET_KEY=m CONFIG_SMC_DIAG=m +CONFIG_SMC_LO=y CONFIG_INET=y CONFIG_IP_MULTICAST=y CONFIG_IP_ADVANCED_ROUTER=y @@ -133,7 +134,6 @@ CONFIG_IP_MROUTE=y CONFIG_IP_MROUTE_MULTIPLE_TABLES=y CONFIG_IP_PIMSM_V1=y CONFIG_IP_PIMSM_V2=y -CONFIG_SYN_COOKIES=y CONFIG_NET_IPVTI=m CONFIG_INET_AH=m CONFIG_INET_ESP=m @@ -167,6 +167,7 @@ CONFIG_BRIDGE_NETFILTER=m CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_SECMARK=y +CONFIG_NF_CONNTRACK_ZONES=y CONFIG_NF_CONNTRACK_PROCFS=y CONFIG_NF_CONNTRACK_EVENTS=y CONFIG_NF_CONNTRACK_TIMEOUT=y @@ -183,17 +184,39 @@ CONFIG_NF_CONNTRACK_SIP=m CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_CT_NETLINK=m CONFIG_NF_CT_NETLINK_TIMEOUT=m +CONFIG_NF_CT_NETLINK_HELPER=m +CONFIG_NETFILTER_NETLINK_GLUE_CT=y CONFIG_NF_TABLES=m CONFIG_NF_TABLES_INET=y +CONFIG_NF_TABLES_NETDEV=y +CONFIG_NFT_NUMGEN=m CONFIG_NFT_CT=m +CONFIG_NFT_FLOW_OFFLOAD=m +CONFIG_NFT_CONNLIMIT=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m +CONFIG_NFT_MASQ=m +CONFIG_NFT_REDIR=m CONFIG_NFT_NAT=m +CONFIG_NFT_TUNNEL=m +CONFIG_NFT_QUEUE=m +CONFIG_NFT_QUOTA=m CONFIG_NFT_REJECT=m CONFIG_NFT_COMPAT=m CONFIG_NFT_HASH=m CONFIG_NFT_FIB_INET=m -CONFIG_NETFILTER_XTABLES_COMPAT=y +CONFIG_NFT_XFRM=m +CONFIG_NFT_SOCKET=m +CONFIG_NFT_OSF=m +CONFIG_NFT_TPROXY=m +CONFIG_NFT_SYNPROXY=m +CONFIG_NFT_DUP_NETDEV=m +CONFIG_NFT_FWD_NETDEV=m +CONFIG_NFT_FIB_NETDEV=m +CONFIG_NFT_REJECT_NETDEV=m +CONFIG_NF_FLOW_TABLE_INET=m +CONFIG_NF_FLOW_TABLE=m +CONFIG_NF_FLOW_TABLE_PROCFS=y CONFIG_NETFILTER_XT_SET=m CONFIG_NETFILTER_XT_TARGET_AUDIT=m CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m @@ -206,8 +229,10 @@ CONFIG_NETFILTER_XT_TARGET_HMARK=m CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m CONFIG_NETFILTER_XT_TARGET_LOG=m CONFIG_NETFILTER_XT_TARGET_MARK=m +CONFIG_NETFILTER_XT_TARGET_NETMAP=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m +CONFIG_NETFILTER_XT_TARGET_REDIRECT=m CONFIG_NETFILTER_XT_TARGET_TEE=m CONFIG_NETFILTER_XT_TARGET_TPROXY=m CONFIG_NETFILTER_XT_TARGET_TRACE=m @@ -216,6 +241,7 @@ CONFIG_NETFILTER_XT_TARGET_TCPMSS=m CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m CONFIG_NETFILTER_XT_MATCH_BPF=m +CONFIG_NETFILTER_XT_MATCH_CGROUP=m CONFIG_NETFILTER_XT_MATCH_CLUSTER=m CONFIG_NETFILTER_XT_MATCH_COMMENT=m CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m @@ -230,6 +256,7 @@ CONFIG_NETFILTER_XT_MATCH_DSCP=m CONFIG_NETFILTER_XT_MATCH_ESP=m CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m CONFIG_NETFILTER_XT_MATCH_HELPER=m +CONFIG_NETFILTER_XT_MATCH_IPCOMP=m CONFIG_NETFILTER_XT_MATCH_IPRANGE=m CONFIG_NETFILTER_XT_MATCH_IPVS=m CONFIG_NETFILTER_XT_MATCH_LENGTH=m @@ -247,6 +274,7 @@ CONFIG_NETFILTER_XT_MATCH_QUOTA=m CONFIG_NETFILTER_XT_MATCH_RATEEST=m CONFIG_NETFILTER_XT_MATCH_REALM=m CONFIG_NETFILTER_XT_MATCH_RECENT=m +CONFIG_NETFILTER_XT_MATCH_SOCKET=m CONFIG_NETFILTER_XT_MATCH_STATE=m CONFIG_NETFILTER_XT_MATCH_STATISTIC=m CONFIG_NETFILTER_XT_MATCH_STRING=m @@ -302,7 +330,6 @@ CONFIG_IP_NF_TARGET_ECN=m CONFIG_IP_NF_TARGET_TTL=m CONFIG_IP_NF_RAW=m CONFIG_IP_NF_SECURITY=m -CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NFT_FIB_IPV6=m @@ -373,7 +400,6 @@ CONFIG_NET_ACT_POLICE=m CONFIG_NET_ACT_GACT=m CONFIG_GACT_PROB=y CONFIG_NET_ACT_MIRRED=m -CONFIG_NET_ACT_IPT=m CONFIG_NET_ACT_NAT=m CONFIG_NET_ACT_PEDIT=m CONFIG_NET_ACT_SIMP=m @@ -462,6 +488,7 @@ CONFIG_DM_VERITY=m CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG=y CONFIG_DM_SWITCH=m CONFIG_DM_INTEGRITY=m +CONFIG_DM_VDO=m CONFIG_NETDEVICES=y CONFIG_BONDING=m CONFIG_DUMMY=m @@ -574,7 +601,6 @@ CONFIG_WATCHDOG=y CONFIG_WATCHDOG_NOWAYOUT=y CONFIG_SOFT_WATCHDOG=m CONFIG_DIAG288_WATCHDOG=m -# CONFIG_DRM_DEBUG_MODESET_LOCK is not set CONFIG_FB=y # CONFIG_FB_DEVICE is not set CONFIG_FRAMEBUFFER_CONSOLE=y @@ -645,7 +671,6 @@ CONFIG_MSDOS_FS=m CONFIG_VFAT_FS=m CONFIG_EXFAT_FS=m CONFIG_NTFS_FS=m -CONFIG_NTFS_RW=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y @@ -663,6 +688,7 @@ CONFIG_SQUASHFS_XZ=y CONFIG_SQUASHFS_ZSTD=y CONFIG_ROMFS_FS=m CONFIG_NFS_FS=m +CONFIG_NFS_V2=m CONFIG_NFS_V3_ACL=y CONFIG_NFS_V4=m CONFIG_NFS_SWAP=y @@ -879,6 +905,5 @@ CONFIG_RBTREE_TEST=y CONFIG_INTERVAL_TREE_TEST=m CONFIG_PERCPU_TEST=m CONFIG_ATOMIC64_SELFTEST=y -CONFIG_STRING_SELFTEST=y CONFIG_TEST_BITOPS=m CONFIG_TEST_BPF=m diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index dc237896f99d..6dd11d3b6aaa 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -41,7 +41,6 @@ CONFIG_PROFILING=y CONFIG_KEXEC=y CONFIG_KEXEC_FILE=y CONFIG_KEXEC_SIG=y -CONFIG_CRASH_DUMP=y CONFIG_LIVEPATCH=y CONFIG_MARCH_Z13=y CONFIG_NR_CPUS=512 @@ -49,6 +48,7 @@ CONFIG_NUMA=y CONFIG_HZ_100=y CONFIG_CERT_STORE=y CONFIG_EXPOLINE=y +# CONFIG_EXPOLINE_EXTERN is not set CONFIG_EXPOLINE_AUTO=y CONFIG_CHSC_SCH=y CONFIG_VFIO_CCW=m @@ -71,6 +71,7 @@ CONFIG_MODULE_FORCE_UNLOAD=y CONFIG_MODULE_UNLOAD_TAINT_TRACKING=y CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y +CONFIG_MODULE_SIG_SHA256=y CONFIG_BLK_DEV_THROTTLING=y CONFIG_BLK_WBT=y CONFIG_BLK_CGROUP_IOLATENCY=y @@ -110,6 +111,7 @@ CONFIG_UNIX_DIAG=m CONFIG_XFRM_USER=m CONFIG_NET_KEY=m CONFIG_SMC_DIAG=m +CONFIG_SMC_LO=y CONFIG_INET=y CONFIG_IP_MULTICAST=y CONFIG_IP_ADVANCED_ROUTER=y @@ -124,7 +126,6 @@ CONFIG_IP_MROUTE=y CONFIG_IP_MROUTE_MULTIPLE_TABLES=y CONFIG_IP_PIMSM_V1=y CONFIG_IP_PIMSM_V2=y -CONFIG_SYN_COOKIES=y CONFIG_NET_IPVTI=m CONFIG_INET_AH=m CONFIG_INET_ESP=m @@ -158,6 +159,7 @@ CONFIG_BRIDGE_NETFILTER=m CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_SECMARK=y +CONFIG_NF_CONNTRACK_ZONES=y CONFIG_NF_CONNTRACK_PROCFS=y CONFIG_NF_CONNTRACK_EVENTS=y CONFIG_NF_CONNTRACK_TIMEOUT=y @@ -174,17 +176,39 @@ CONFIG_NF_CONNTRACK_SIP=m CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_CT_NETLINK=m CONFIG_NF_CT_NETLINK_TIMEOUT=m +CONFIG_NF_CT_NETLINK_HELPER=m +CONFIG_NETFILTER_NETLINK_GLUE_CT=y CONFIG_NF_TABLES=m CONFIG_NF_TABLES_INET=y +CONFIG_NF_TABLES_NETDEV=y +CONFIG_NFT_NUMGEN=m CONFIG_NFT_CT=m +CONFIG_NFT_FLOW_OFFLOAD=m +CONFIG_NFT_CONNLIMIT=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m +CONFIG_NFT_MASQ=m +CONFIG_NFT_REDIR=m CONFIG_NFT_NAT=m +CONFIG_NFT_TUNNEL=m +CONFIG_NFT_QUEUE=m +CONFIG_NFT_QUOTA=m CONFIG_NFT_REJECT=m CONFIG_NFT_COMPAT=m CONFIG_NFT_HASH=m CONFIG_NFT_FIB_INET=m -CONFIG_NETFILTER_XTABLES_COMPAT=y +CONFIG_NFT_XFRM=m +CONFIG_NFT_SOCKET=m +CONFIG_NFT_OSF=m +CONFIG_NFT_TPROXY=m +CONFIG_NFT_SYNPROXY=m +CONFIG_NFT_DUP_NETDEV=m +CONFIG_NFT_FWD_NETDEV=m +CONFIG_NFT_FIB_NETDEV=m +CONFIG_NFT_REJECT_NETDEV=m +CONFIG_NF_FLOW_TABLE_INET=m +CONFIG_NF_FLOW_TABLE=m +CONFIG_NF_FLOW_TABLE_PROCFS=y CONFIG_NETFILTER_XT_SET=m CONFIG_NETFILTER_XT_TARGET_AUDIT=m CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m @@ -197,8 +221,10 @@ CONFIG_NETFILTER_XT_TARGET_HMARK=m CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m CONFIG_NETFILTER_XT_TARGET_LOG=m CONFIG_NETFILTER_XT_TARGET_MARK=m +CONFIG_NETFILTER_XT_TARGET_NETMAP=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m +CONFIG_NETFILTER_XT_TARGET_REDIRECT=m CONFIG_NETFILTER_XT_TARGET_TEE=m CONFIG_NETFILTER_XT_TARGET_TPROXY=m CONFIG_NETFILTER_XT_TARGET_TRACE=m @@ -207,6 +233,7 @@ CONFIG_NETFILTER_XT_TARGET_TCPMSS=m CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m CONFIG_NETFILTER_XT_MATCH_BPF=m +CONFIG_NETFILTER_XT_MATCH_CGROUP=m CONFIG_NETFILTER_XT_MATCH_CLUSTER=m CONFIG_NETFILTER_XT_MATCH_COMMENT=m CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m @@ -221,6 +248,7 @@ CONFIG_NETFILTER_XT_MATCH_DSCP=m CONFIG_NETFILTER_XT_MATCH_ESP=m CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m CONFIG_NETFILTER_XT_MATCH_HELPER=m +CONFIG_NETFILTER_XT_MATCH_IPCOMP=m CONFIG_NETFILTER_XT_MATCH_IPRANGE=m CONFIG_NETFILTER_XT_MATCH_IPVS=m CONFIG_NETFILTER_XT_MATCH_LENGTH=m @@ -238,6 +266,7 @@ CONFIG_NETFILTER_XT_MATCH_QUOTA=m CONFIG_NETFILTER_XT_MATCH_RATEEST=m CONFIG_NETFILTER_XT_MATCH_REALM=m CONFIG_NETFILTER_XT_MATCH_RECENT=m +CONFIG_NETFILTER_XT_MATCH_SOCKET=m CONFIG_NETFILTER_XT_MATCH_STATE=m CONFIG_NETFILTER_XT_MATCH_STATISTIC=m CONFIG_NETFILTER_XT_MATCH_STRING=m @@ -293,7 +322,6 @@ CONFIG_IP_NF_TARGET_ECN=m CONFIG_IP_NF_TARGET_TTL=m CONFIG_IP_NF_RAW=m CONFIG_IP_NF_SECURITY=m -CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NFT_FIB_IPV6=m @@ -363,7 +391,6 @@ CONFIG_NET_ACT_POLICE=m CONFIG_NET_ACT_GACT=m CONFIG_GACT_PROB=y CONFIG_NET_ACT_MIRRED=m -CONFIG_NET_ACT_IPT=m CONFIG_NET_ACT_NAT=m CONFIG_NET_ACT_PEDIT=m CONFIG_NET_ACT_SIMP=m @@ -452,6 +479,7 @@ CONFIG_DM_VERITY=m CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG=y CONFIG_DM_SWITCH=m CONFIG_DM_INTEGRITY=m +CONFIG_DM_VDO=m CONFIG_NETDEVICES=y CONFIG_BONDING=m CONFIG_DUMMY=m @@ -630,7 +658,6 @@ CONFIG_MSDOS_FS=m CONFIG_VFAT_FS=m CONFIG_EXFAT_FS=m CONFIG_NTFS_FS=m -CONFIG_NTFS_RW=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y @@ -649,6 +676,7 @@ CONFIG_SQUASHFS_XZ=y CONFIG_SQUASHFS_ZSTD=y CONFIG_ROMFS_FS=m CONFIG_NFS_FS=m +CONFIG_NFS_V2=m CONFIG_NFS_V3_ACL=y CONFIG_NFS_V4=m CONFIG_NFS_SWAP=y diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index c51f3ec4eb28..8c2b61363bab 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -9,25 +9,22 @@ CONFIG_BPF_SYSCALL=y CONFIG_BLK_DEV_INITRD=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_KEXEC=y -CONFIG_CRASH_DUMP=y CONFIG_MARCH_Z13=y CONFIG_NR_CPUS=2 CONFIG_HZ_100=y # CONFIG_CHSC_SCH is not set # CONFIG_SCM_BUS is not set +# CONFIG_AP is not set # CONFIG_PFAULT is not set # CONFIG_S390_HYPFS is not set # CONFIG_VIRTUALIZATION is not set # CONFIG_S390_GUEST is not set # CONFIG_SECCOMP is not set -# CONFIG_GCC_PLUGINS is not set # CONFIG_BLOCK_LEGACY_AUTOLOAD is not set CONFIG_PARTITION_ADVANCED=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set # CONFIG_SWAP is not set # CONFIG_COMPAT_BRK is not set -# CONFIG_COMPACTION is not set -# CONFIG_MIGRATION is not set CONFIG_NET=y # CONFIG_IUCV is not set # CONFIG_PCPU_DEV_REFCNT is not set From d8073dc6bc04a061660b31e49a990478a73f1883 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Fri, 7 Jun 2024 14:19:48 +0200 Subject: [PATCH 207/778] s390/mm: Allow large pages only for aligned physical addresses Do not allow creation of large pages against physical addresses, which itself are not aligned on the correct boundary. Failure to do so might lead to referencing wrong memory as result of the way DAT works. Fixes: c98d2ecae08f ("s390/mm: Uncouple physical vs virtual address spaces") Reviewed-by: Heiko Carstens Signed-off-by: Alexander Gordeev Signed-off-by: Vasily Gorbik --- arch/s390/boot/vmem.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c index 96d48b7112d4..1f7dbb4b66b7 100644 --- a/arch/s390/boot/vmem.c +++ b/arch/s390/boot/vmem.c @@ -267,15 +267,21 @@ static bool large_allowed(enum populate_mode mode) static bool can_large_pud(pud_t *pu_dir, unsigned long addr, unsigned long end, enum populate_mode mode) { + unsigned long size = end - addr; + return machine.has_edat2 && large_allowed(mode) && - IS_ALIGNED(addr, PUD_SIZE) && (end - addr) >= PUD_SIZE; + IS_ALIGNED(addr, PUD_SIZE) && (size >= PUD_SIZE) && + IS_ALIGNED(_pa(addr, size, mode), PUD_SIZE); } static bool can_large_pmd(pmd_t *pm_dir, unsigned long addr, unsigned long end, enum populate_mode mode) { + unsigned long size = end - addr; + return machine.has_edat1 && large_allowed(mode) && - IS_ALIGNED(addr, PMD_SIZE) && (end - addr) >= PMD_SIZE; + IS_ALIGNED(addr, PMD_SIZE) && (size >= PMD_SIZE) && + IS_ALIGNED(_pa(addr, size, mode), PMD_SIZE); } static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long end, From 693d41f7c938f92d881e6a51525e6c132a186afd Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Fri, 7 Jun 2024 14:19:50 +0200 Subject: [PATCH 208/778] s390/mm: Restore mapping of kernel image using large pages Since physical and virtual kernel address spaces are uncoupled the kernel image is not mapped using large segment pages anymore, which is a regression. Put the kernel image at the same large segment page offset in physical memory as in virtual memory. Such approach preserves the existing number of bits of entropy used for randomization of the kernel location in virtual memory when KASLR is on. As result, the kernel is mapped using large segment pages. Fixes: c98d2ecae08f ("s390/mm: Uncouple physical vs virtual address spaces") Reported-by: Heiko Carstens Reviewed-by: Heiko Carstens Signed-off-by: Alexander Gordeev Signed-off-by: Vasily Gorbik --- arch/s390/boot/startup.c | 27 ++++++++++++++++++++++++--- arch/s390/boot/vmem.c | 2 +- arch/s390/boot/vmlinux.lds.S | 1 + 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index 182aac6a0f77..48ef5fe5c08a 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -384,7 +384,7 @@ static void fixup_vmlinux_info(void) void startup_kernel(void) { unsigned long kernel_size = vmlinux.image_size + vmlinux.bss_size; - unsigned long nokaslr_offset_phys = mem_safe_offset(); + unsigned long nokaslr_offset_phys, kaslr_large_page_offset; unsigned long amode31_lma = 0; unsigned long max_physmem_end; unsigned long asce_limit; @@ -393,6 +393,12 @@ void startup_kernel(void) fixup_vmlinux_info(); setup_lpp(); + + /* + * Non-randomized kernel physical start address must be _SEGMENT_SIZE + * aligned (see blow). + */ + nokaslr_offset_phys = ALIGN(mem_safe_offset(), _SEGMENT_SIZE); safe_addr = PAGE_ALIGN(nokaslr_offset_phys + kernel_size); /* @@ -425,10 +431,25 @@ void startup_kernel(void) save_ipl_cert_comp_list(); rescue_initrd(safe_addr, ident_map_size); - if (kaslr_enabled()) - __kaslr_offset_phys = randomize_within_range(kernel_size, THREAD_SIZE, 0, ident_map_size); + /* + * __kaslr_offset_phys must be _SEGMENT_SIZE aligned, so the lower + * 20 bits (the offset within a large page) are zero. Copy the last + * 20 bits of __kaslr_offset, which is THREAD_SIZE aligned, to + * __kaslr_offset_phys. + * + * With this the last 20 bits of __kaslr_offset_phys and __kaslr_offset + * are identical, which is required to allow for large mappings of the + * kernel image. + */ + kaslr_large_page_offset = __kaslr_offset & ~_SEGMENT_MASK; + if (kaslr_enabled()) { + unsigned long end = ident_map_size - kaslr_large_page_offset; + + __kaslr_offset_phys = randomize_within_range(kernel_size, _SEGMENT_SIZE, 0, end); + } if (!__kaslr_offset_phys) __kaslr_offset_phys = nokaslr_offset_phys; + __kaslr_offset_phys |= kaslr_large_page_offset; kaslr_adjust_vmlinux_info(__kaslr_offset_phys); physmem_reserve(RR_VMLINUX, __kaslr_offset_phys, kernel_size); deploy_kernel((void *)__kaslr_offset_phys); diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c index 1f7dbb4b66b7..40cfce2687c4 100644 --- a/arch/s390/boot/vmem.c +++ b/arch/s390/boot/vmem.c @@ -261,7 +261,7 @@ static unsigned long _pa(unsigned long addr, unsigned long size, enum populate_m static bool large_allowed(enum populate_mode mode) { - return (mode == POPULATE_DIRECT) || (mode == POPULATE_IDENTITY); + return (mode == POPULATE_DIRECT) || (mode == POPULATE_IDENTITY) || (mode == POPULATE_KERNEL); } static bool can_large_pud(pud_t *pu_dir, unsigned long addr, unsigned long end, diff --git a/arch/s390/boot/vmlinux.lds.S b/arch/s390/boot/vmlinux.lds.S index 1fe5a1d3ff60..a750711d44c8 100644 --- a/arch/s390/boot/vmlinux.lds.S +++ b/arch/s390/boot/vmlinux.lds.S @@ -109,6 +109,7 @@ SECTIONS #ifdef CONFIG_KERNEL_UNCOMPRESSED . = ALIGN(PAGE_SIZE); . += AMODE31_SIZE; /* .amode31 section */ + . = ALIGN(1 << 20); /* _SEGMENT_SIZE */ #else . = ALIGN(8); #endif From 2049aad5d3a6921f80121029afe6fbcfb2727861 Mon Sep 17 00:00:00 2001 From: Amer Al Shanawany Date: Wed, 17 Apr 2024 20:49:13 +0200 Subject: [PATCH 209/778] selftests: filesystems: fix warn_unused_result build warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the following warnings by adding return check and error messages. statmount_test.c: In function ‘cleanup_namespace’: statmount_test.c:128:9: warning: ignoring return value of ‘fchdir’ declared with attribute ‘warn_unused_result’ [-Wunused-result] 128 | fchdir(orig_root); | ^~~~~~~~~~~~~~~~~ statmount_test.c:129:9: warning: ignoring return value of ‘chroot’ declared with attribute ‘warn_unused_result’ [-Wunused-result] 129 | chroot("."); | ^~~~~~~~~~~ Signed-off-by: Amer Al Shanawany Reviewed-by: Muhammad Usama Anjum Signed-off-by: Shuah Khan --- .../selftests/filesystems/statmount/statmount_test.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/filesystems/statmount/statmount_test.c b/tools/testing/selftests/filesystems/statmount/statmount_test.c index e6d7c4f1c85b..e8c019d72cbf 100644 --- a/tools/testing/selftests/filesystems/statmount/statmount_test.c +++ b/tools/testing/selftests/filesystems/statmount/statmount_test.c @@ -125,8 +125,16 @@ static uint32_t old_root_id, old_parent_id; static void cleanup_namespace(void) { - fchdir(orig_root); - chroot("."); + int ret; + + ret = fchdir(orig_root); + if (ret == -1) + ksft_perror("fchdir to original root"); + + ret = chroot("."); + if (ret == -1) + ksft_perror("chroot to original root"); + umount2(root_mntpoint, MNT_DETACH); rmdir(root_mntpoint); } From 04e1f99afe8bec27ad2d2726897ac8185bf0532c Mon Sep 17 00:00:00 2001 From: Amer Al Shanawany Date: Tue, 11 Jun 2024 17:16:08 +0200 Subject: [PATCH 210/778] selftests: seccomp: fix format-zero-length warnings fix the following errors by using string format specifier and an empty parameter: seccomp_benchmark.c:197:24: warning: zero-length gnu_printf format string [-Wformat-zero-length] 197 | ksft_print_msg(""); | ^~ seccomp_benchmark.c:202:24: warning: zero-length gnu_printf format string [-Wformat-zero-length] 202 | ksft_print_msg(""); | ^~ seccomp_benchmark.c:204:24: warning: zero-length gnu_printf format string [-Wformat-zero-length] 204 | ksft_print_msg(""); | ^~ Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202312260235.Uj5ug8K9-lkp@intel.com/ Suggested-by: Kees Cook Signed-off-by: Amer Al Shanawany Signed-off-by: Shuah Khan --- tools/testing/selftests/seccomp/seccomp_benchmark.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/seccomp/seccomp_benchmark.c b/tools/testing/selftests/seccomp/seccomp_benchmark.c index b83099160fbc..94886c82ae60 100644 --- a/tools/testing/selftests/seccomp/seccomp_benchmark.c +++ b/tools/testing/selftests/seccomp/seccomp_benchmark.c @@ -194,14 +194,14 @@ int main(int argc, char *argv[]) ksft_set_plan(7); ksft_print_msg("Running on:\n"); - ksft_print_msg(""); + ksft_print_msg("%s", ""); system("uname -a"); ksft_print_msg("Current BPF sysctl settings:\n"); /* Avoid using "sysctl" which may not be installed. */ - ksft_print_msg(""); + ksft_print_msg("%s", ""); system("grep -H . /proc/sys/net/core/bpf_jit_enable"); - ksft_print_msg(""); + ksft_print_msg("%s", ""); system("grep -H . /proc/sys/net/core/bpf_jit_harden"); affinity(); From e3215deca4520773cd2b155bed164c12365149a7 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Mon, 3 Jun 2024 09:24:44 +0800 Subject: [PATCH 211/778] dmaengine: idxd: Fix possible Use-After-Free in irq_process_work_list Use list_for_each_entry_safe() to allow iterating through the list and deleting the entry in the iteration process. The descriptor is freed via idxd_desc_complete() and there's a slight chance may cause issue for the list iterator when the descriptor is reused by another thread without it being deleted from the list. Fixes: 16e19e11228b ("dmaengine: idxd: Fix list corruption in description completion") Signed-off-by: Li RongQing Reviewed-by: Dave Jiang Reviewed-by: Fenghua Yu Link: https://lore.kernel.org/r/20240603012444.11902-1-lirongqing@baidu.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/irq.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index 8dc029c86551..fc049c9c9892 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -611,11 +611,13 @@ static void irq_process_work_list(struct idxd_irq_entry *irq_entry) spin_unlock(&irq_entry->list_lock); - list_for_each_entry(desc, &flist, list) { + list_for_each_entry_safe(desc, n, &flist, list) { /* * Check against the original status as ABORT is software defined * and 0xff, which DSA_COMP_STATUS_MASK can mask out. */ + list_del(&desc->list); + if (unlikely(desc->completion->status == IDXD_COMP_DESC_ABORT)) { idxd_desc_complete(desc, IDXD_COMPLETE_ABORT, true); continue; From e8343410ddf08fc36a9b9cc7c51a4e53a262d4c6 Mon Sep 17 00:00:00 2001 From: Jai Luthra Date: Tue, 11 Jun 2024 18:02:55 +0530 Subject: [PATCH 212/778] ALSA: dmaengine: Synchronize dma channel after drop() Sometimes the stream may be stopped due to XRUN events, in which case the userspace can call snd_pcm_drop() and snd_pcm_prepare() to stop and start the stream again. In these cases, we must wait for the DMA channel to synchronize before marking the stream as prepared for playback, as the DMA channel gets stopped by drop() without any synchronization. Make sure the ALSA core synchronizes the DMA channel by adding a sync_stop() hook. Reviewed-by: Peter Ujfalusi Signed-off-by: Jai Luthra Link: https://lore.kernel.org/r/20240611-asoc_next-v3-1-fcfd84b12164@ti.com Signed-off-by: Mark Brown --- include/sound/dmaengine_pcm.h | 1 + sound/core/pcm_dmaengine.c | 10 ++++++++++ sound/soc/soc-generic-dmaengine-pcm.c | 8 ++++++++ 3 files changed, 19 insertions(+) diff --git a/include/sound/dmaengine_pcm.h b/include/sound/dmaengine_pcm.h index c11aaf8079fb..f6baa9a01868 100644 --- a/include/sound/dmaengine_pcm.h +++ b/include/sound/dmaengine_pcm.h @@ -36,6 +36,7 @@ snd_pcm_uframes_t snd_dmaengine_pcm_pointer_no_residue(struct snd_pcm_substream int snd_dmaengine_pcm_open(struct snd_pcm_substream *substream, struct dma_chan *chan); int snd_dmaengine_pcm_close(struct snd_pcm_substream *substream); +int snd_dmaengine_pcm_sync_stop(struct snd_pcm_substream *substream); int snd_dmaengine_pcm_open_request_chan(struct snd_pcm_substream *substream, dma_filter_fn filter_fn, void *filter_data); diff --git a/sound/core/pcm_dmaengine.c b/sound/core/pcm_dmaengine.c index 12aa1cef11a1..ed07fa5693d2 100644 --- a/sound/core/pcm_dmaengine.c +++ b/sound/core/pcm_dmaengine.c @@ -349,6 +349,16 @@ int snd_dmaengine_pcm_open_request_chan(struct snd_pcm_substream *substream, } EXPORT_SYMBOL_GPL(snd_dmaengine_pcm_open_request_chan); +int snd_dmaengine_pcm_sync_stop(struct snd_pcm_substream *substream) +{ + struct dmaengine_pcm_runtime_data *prtd = substream_to_prtd(substream); + + dmaengine_synchronize(prtd->dma_chan); + + return 0; +} +EXPORT_SYMBOL_GPL(snd_dmaengine_pcm_sync_stop); + /** * snd_dmaengine_pcm_close - Close a dmaengine based PCM substream * @substream: PCM substream diff --git a/sound/soc/soc-generic-dmaengine-pcm.c b/sound/soc/soc-generic-dmaengine-pcm.c index ea3bc9318412..a63e942fdc0b 100644 --- a/sound/soc/soc-generic-dmaengine-pcm.c +++ b/sound/soc/soc-generic-dmaengine-pcm.c @@ -318,6 +318,12 @@ static int dmaengine_copy(struct snd_soc_component *component, return 0; } +static int dmaengine_pcm_sync_stop(struct snd_soc_component *component, + struct snd_pcm_substream *substream) +{ + return snd_dmaengine_pcm_sync_stop(substream); +} + static const struct snd_soc_component_driver dmaengine_pcm_component = { .name = SND_DMAENGINE_PCM_DRV_NAME, .probe_order = SND_SOC_COMP_ORDER_LATE, @@ -327,6 +333,7 @@ static const struct snd_soc_component_driver dmaengine_pcm_component = { .trigger = dmaengine_pcm_trigger, .pointer = dmaengine_pcm_pointer, .pcm_construct = dmaengine_pcm_new, + .sync_stop = dmaengine_pcm_sync_stop, }; static const struct snd_soc_component_driver dmaengine_pcm_component_process = { @@ -339,6 +346,7 @@ static const struct snd_soc_component_driver dmaengine_pcm_component_process = { .pointer = dmaengine_pcm_pointer, .copy = dmaengine_copy, .pcm_construct = dmaengine_pcm_new, + .sync_stop = dmaengine_pcm_sync_stop, }; static const char * const dmaengine_pcm_dma_channel_names[] = { From c5dcf8ab10606e76c1d8a0ec77f27d84a392e874 Mon Sep 17 00:00:00 2001 From: Jai Luthra Date: Tue, 11 Jun 2024 18:02:56 +0530 Subject: [PATCH 213/778] ASoC: ti: davinci-mcasp: Set min period size using FIFO config The minimum period size was enforced to 64 as older devices integrating McASP with EDMA used an internal FIFO of 64 samples. With UDMA based platforms this internal McASP FIFO is optional, as the DMA engine internally does some buffering which is already accounted for when registering the platform. So we should read the actual FIFO configuration (txnumevt/rxnumevt) instead of hardcoding frames.min to 64. Acked-by: Peter Ujfalusi Signed-off-by: Jai Luthra Link: https://lore.kernel.org/r/20240611-asoc_next-v3-2-fcfd84b12164@ti.com Signed-off-by: Mark Brown --- sound/soc/ti/davinci-mcasp.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/sound/soc/ti/davinci-mcasp.c b/sound/soc/ti/davinci-mcasp.c index 1e760c315521..2b1ed91a736c 100644 --- a/sound/soc/ti/davinci-mcasp.c +++ b/sound/soc/ti/davinci-mcasp.c @@ -1472,10 +1472,11 @@ static int davinci_mcasp_hw_rule_min_periodsize( { struct snd_interval *period_size = hw_param_interval(params, SNDRV_PCM_HW_PARAM_PERIOD_SIZE); + u8 numevt = *((u8 *)rule->private); struct snd_interval frames; snd_interval_any(&frames); - frames.min = 64; + frames.min = numevt; frames.integer = 1; return snd_interval_refine(period_size, &frames); @@ -1490,6 +1491,7 @@ static int davinci_mcasp_startup(struct snd_pcm_substream *substream, u32 max_channels = 0; int i, dir, ret; int tdm_slots = mcasp->tdm_slots; + u8 *numevt; /* Do not allow more then one stream per direction */ if (mcasp->substreams[substream->stream]) @@ -1589,9 +1591,12 @@ static int davinci_mcasp_startup(struct snd_pcm_substream *substream, return ret; } + numevt = (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) ? + &mcasp->txnumevt : + &mcasp->rxnumevt; snd_pcm_hw_rule_add(substream->runtime, 0, SNDRV_PCM_HW_PARAM_PERIOD_SIZE, - davinci_mcasp_hw_rule_min_periodsize, NULL, + davinci_mcasp_hw_rule_min_periodsize, numevt, SNDRV_PCM_HW_PARAM_PERIOD_SIZE, -1); return 0; From 524d3f126362b6033e92cbe107ae2158d7fbff94 Mon Sep 17 00:00:00 2001 From: Primoz Fiser Date: Mon, 10 Jun 2024 14:58:47 +0200 Subject: [PATCH 214/778] ASoC: ti: omap-hdmi: Fix too long driver name Set driver name to "HDMI". This simplifies the code and gets rid of the following error messages: ASoC: driver name too long 'HDMI 58040000.encoder' -> 'HDMI_58040000_e' Signed-off-by: Primoz Fiser Acked-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20240610125847.773394-1-primoz.fiser@norik.com Signed-off-by: Mark Brown --- sound/soc/ti/omap-hdmi.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/sound/soc/ti/omap-hdmi.c b/sound/soc/ti/omap-hdmi.c index 639bc83f4263..cf43ac19c4a6 100644 --- a/sound/soc/ti/omap-hdmi.c +++ b/sound/soc/ti/omap-hdmi.c @@ -354,11 +354,7 @@ static int omap_hdmi_audio_probe(struct platform_device *pdev) if (!card) return -ENOMEM; - card->name = devm_kasprintf(dev, GFP_KERNEL, - "HDMI %s", dev_name(ad->dssdev)); - if (!card->name) - return -ENOMEM; - + card->name = "HDMI"; card->owner = THIS_MODULE; card->dai_link = devm_kzalloc(dev, sizeof(*(card->dai_link)), GFP_KERNEL); From c4ab9da85b9df3692f861512fe6c9812f38b7471 Mon Sep 17 00:00:00 2001 From: Davide Ornaghi Date: Wed, 5 Jun 2024 13:03:45 +0200 Subject: [PATCH 215/778] netfilter: nft_inner: validate mandatory meta and payload Check for mandatory netlink attributes in payload and meta expression when used embedded from the inner expression, otherwise NULL pointer dereference is possible from userspace. Fixes: a150d122b6bd ("netfilter: nft_meta: add inner match support") Fixes: 3a07327d10a0 ("netfilter: nft_inner: support for inner tunnel header matching") Signed-off-by: Davide Ornaghi Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_meta.c | 3 +++ net/netfilter/nft_payload.c | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index ba0d3683a45d..9139ce38ea7b 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -839,6 +839,9 @@ static int nft_meta_inner_init(const struct nft_ctx *ctx, struct nft_meta *priv = nft_expr_priv(expr); unsigned int len; + if (!tb[NFTA_META_KEY] || !tb[NFTA_META_DREG]) + return -EINVAL; + priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY])); switch (priv->key) { case NFT_META_PROTOCOL: diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 0c43d748e23a..50429cbd42da 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -650,6 +650,10 @@ static int nft_payload_inner_init(const struct nft_ctx *ctx, struct nft_payload *priv = nft_expr_priv(expr); u32 base; + if (!tb[NFTA_PAYLOAD_BASE] || !tb[NFTA_PAYLOAD_OFFSET] || + !tb[NFTA_PAYLOAD_LEN] || !tb[NFTA_PAYLOAD_DREG]) + return -EINVAL; + base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE])); switch (base) { case NFT_PAYLOAD_TUN_HEADER: From 4e7aaa6b82d63e8ddcbfb56b4fd3d014ca586f10 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 4 Jun 2024 15:58:03 +0200 Subject: [PATCH 216/778] netfilter: ipset: Fix race between namespace cleanup and gc in the list:set type Lion Ackermann reported that there is a race condition between namespace cleanup in ipset and the garbage collection of the list:set type. The namespace cleanup can destroy the list:set type of sets while the gc of the set type is waiting to run in rcu cleanup. The latter uses data from the destroyed set which thus leads use after free. The patch contains the following parts: - When destroying all sets, first remove the garbage collectors, then wait if needed and then destroy the sets. - Fix the badly ordered "wait then remove gc" for the destroy a single set case. - Fix the missing rcu locking in the list:set type in the userspace test case. - Use proper RCU list handlings in the list:set type. The patch depends on c1193d9bbbd3 (netfilter: ipset: Add list flush to cancel_gc). Fixes: 97f7cf1cd80e (netfilter: ipset: fix performance regression in swap operation) Reported-by: Lion Ackermann Tested-by: Lion Ackermann Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_core.c | 93 +++++++++++++++------------ net/netfilter/ipset/ip_set_list_set.c | 30 ++++----- 2 files changed, 66 insertions(+), 57 deletions(-) diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 3184cc6be4c9..c7ae4d9bf3d2 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1172,23 +1172,50 @@ ip_set_setname_policy[IPSET_ATTR_CMD_MAX + 1] = { .len = IPSET_MAXNAMELEN - 1 }, }; -static void -ip_set_destroy_set(struct ip_set *set) -{ - pr_debug("set: %s\n", set->name); - - /* Must call it without holding any lock */ - set->variant->destroy(set); - module_put(set->type->me); - kfree(set); -} +/* In order to return quickly when destroying a single set, it is split + * into two stages: + * - Cancel garbage collector + * - Destroy the set itself via call_rcu() + */ static void ip_set_destroy_set_rcu(struct rcu_head *head) { struct ip_set *set = container_of(head, struct ip_set, rcu); - ip_set_destroy_set(set); + set->variant->destroy(set); + module_put(set->type->me); + kfree(set); +} + +static void +_destroy_all_sets(struct ip_set_net *inst) +{ + struct ip_set *set; + ip_set_id_t i; + bool need_wait = false; + + /* First cancel gc's: set:list sets are flushed as well */ + for (i = 0; i < inst->ip_set_max; i++) { + set = ip_set(inst, i); + if (set) { + set->variant->cancel_gc(set); + if (set->type->features & IPSET_TYPE_NAME) + need_wait = true; + } + } + /* Must wait for flush to be really finished */ + if (need_wait) + rcu_barrier(); + for (i = 0; i < inst->ip_set_max; i++) { + set = ip_set(inst, i); + if (set) { + ip_set(inst, i) = NULL; + set->variant->destroy(set); + module_put(set->type->me); + kfree(set); + } + } } static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, @@ -1202,11 +1229,10 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, if (unlikely(protocol_min_failed(attr))) return -IPSET_ERR_PROTOCOL; - /* Commands are serialized and references are * protected by the ip_set_ref_lock. * External systems (i.e. xt_set) must call - * ip_set_put|get_nfnl_* functions, that way we + * ip_set_nfnl_get_* functions, that way we * can safely check references here. * * list:set timer can only decrement the reference @@ -1214,8 +1240,6 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, * without holding the lock. */ if (!attr[IPSET_ATTR_SETNAME]) { - /* Must wait for flush to be really finished in list:set */ - rcu_barrier(); read_lock_bh(&ip_set_ref_lock); for (i = 0; i < inst->ip_set_max; i++) { s = ip_set(inst, i); @@ -1226,15 +1250,7 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, } inst->is_destroyed = true; read_unlock_bh(&ip_set_ref_lock); - for (i = 0; i < inst->ip_set_max; i++) { - s = ip_set(inst, i); - if (s) { - ip_set(inst, i) = NULL; - /* Must cancel garbage collectors */ - s->variant->cancel_gc(s); - ip_set_destroy_set(s); - } - } + _destroy_all_sets(inst); /* Modified by ip_set_destroy() only, which is serialized */ inst->is_destroyed = false; } else { @@ -1255,12 +1271,12 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, features = s->type->features; ip_set(inst, i) = NULL; read_unlock_bh(&ip_set_ref_lock); + /* Must cancel garbage collectors */ + s->variant->cancel_gc(s); if (features & IPSET_TYPE_NAME) { /* Must wait for flush to be really finished */ rcu_barrier(); } - /* Must cancel garbage collectors */ - s->variant->cancel_gc(s); call_rcu(&s->rcu, ip_set_destroy_set_rcu); } return 0; @@ -2364,31 +2380,26 @@ ip_set_net_init(struct net *net) return 0; } +static void __net_exit +ip_set_net_pre_exit(struct net *net) +{ + struct ip_set_net *inst = ip_set_pernet(net); + + inst->is_deleted = true; /* flag for ip_set_nfnl_put */ +} + static void __net_exit ip_set_net_exit(struct net *net) { struct ip_set_net *inst = ip_set_pernet(net); - struct ip_set *set = NULL; - ip_set_id_t i; - - inst->is_deleted = true; /* flag for ip_set_nfnl_put */ - - nfnl_lock(NFNL_SUBSYS_IPSET); - for (i = 0; i < inst->ip_set_max; i++) { - set = ip_set(inst, i); - if (set) { - ip_set(inst, i) = NULL; - set->variant->cancel_gc(set); - ip_set_destroy_set(set); - } - } - nfnl_unlock(NFNL_SUBSYS_IPSET); + _destroy_all_sets(inst); kvfree(rcu_dereference_protected(inst->ip_set_list, 1)); } static struct pernet_operations ip_set_net_ops = { .init = ip_set_net_init, + .pre_exit = ip_set_net_pre_exit, .exit = ip_set_net_exit, .id = &ip_set_net_id, .size = sizeof(struct ip_set_net), diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 54e2a1dd7f5f..bfae7066936b 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -79,7 +79,7 @@ list_set_kadd(struct ip_set *set, const struct sk_buff *skb, struct set_elem *e; int ret; - list_for_each_entry(e, &map->members, list) { + list_for_each_entry_rcu(e, &map->members, list) { if (SET_WITH_TIMEOUT(set) && ip_set_timeout_expired(ext_timeout(e, set))) continue; @@ -99,7 +99,7 @@ list_set_kdel(struct ip_set *set, const struct sk_buff *skb, struct set_elem *e; int ret; - list_for_each_entry(e, &map->members, list) { + list_for_each_entry_rcu(e, &map->members, list) { if (SET_WITH_TIMEOUT(set) && ip_set_timeout_expired(ext_timeout(e, set))) continue; @@ -188,9 +188,10 @@ list_set_utest(struct ip_set *set, void *value, const struct ip_set_ext *ext, struct list_set *map = set->data; struct set_adt_elem *d = value; struct set_elem *e, *next, *prev = NULL; - int ret; + int ret = 0; - list_for_each_entry(e, &map->members, list) { + rcu_read_lock(); + list_for_each_entry_rcu(e, &map->members, list) { if (SET_WITH_TIMEOUT(set) && ip_set_timeout_expired(ext_timeout(e, set))) continue; @@ -201,6 +202,7 @@ list_set_utest(struct ip_set *set, void *value, const struct ip_set_ext *ext, if (d->before == 0) { ret = 1; + goto out; } else if (d->before > 0) { next = list_next_entry(e, list); ret = !list_is_last(&e->list, &map->members) && @@ -208,9 +210,11 @@ list_set_utest(struct ip_set *set, void *value, const struct ip_set_ext *ext, } else { ret = prev && prev->id == d->refid; } - return ret; + goto out; } - return 0; +out: + rcu_read_unlock(); + return ret; } static void @@ -239,7 +243,7 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext, /* Find where to add the new entry */ n = prev = next = NULL; - list_for_each_entry(e, &map->members, list) { + list_for_each_entry_rcu(e, &map->members, list) { if (SET_WITH_TIMEOUT(set) && ip_set_timeout_expired(ext_timeout(e, set))) continue; @@ -316,9 +320,9 @@ list_set_udel(struct ip_set *set, void *value, const struct ip_set_ext *ext, { struct list_set *map = set->data; struct set_adt_elem *d = value; - struct set_elem *e, *next, *prev = NULL; + struct set_elem *e, *n, *next, *prev = NULL; - list_for_each_entry(e, &map->members, list) { + list_for_each_entry_safe(e, n, &map->members, list) { if (SET_WITH_TIMEOUT(set) && ip_set_timeout_expired(ext_timeout(e, set))) continue; @@ -424,14 +428,8 @@ static void list_set_destroy(struct ip_set *set) { struct list_set *map = set->data; - struct set_elem *e, *n; - list_for_each_entry_safe(e, n, &map->members, list) { - list_del(&e->list); - ip_set_put_byindex(map->net, e->id); - ip_set_ext_destroy(set, e); - kfree(e); - } + WARN_ON_ONCE(!list_empty(&map->members)); kfree(map); set->data = NULL; From 6f8f132cc7bac2ac76911e47d5baa378aafda4cb Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 6 Jun 2024 12:23:31 +0200 Subject: [PATCH 217/778] netfilter: Use flowlabel flow key when re-routing mangled packets 'ip6 dscp set $v' in an nftables outpute route chain has no effect. While nftables does detect the dscp change and calls the reroute hook. But ip6_route_me_harder never sets the dscp/flowlabel: flowlabel/dsfield routing rules are ignored and no reroute takes place. Thanks to Yi Chen for an excellent reproducer script that I used to validate this change. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Yi Chen Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv6/netfilter.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 53d255838e6a..5d989d803009 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -36,6 +36,7 @@ int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff .flowi6_uid = sock_net_uid(net, sk), .daddr = iph->daddr, .saddr = iph->saddr, + .flowlabel = ip6_flowinfo(iph), }; int err; From ba27e9d2207784da748b19170a2e56bd7770bd81 Mon Sep 17 00:00:00 2001 From: Siddharth Vadapalli Date: Sun, 2 Jun 2024 07:03:19 +0530 Subject: [PATCH 218/778] dmaengine: ti: k3-udma-glue: Fix of_k3_udma_glue_parse_chn_by_id() The of_k3_udma_glue_parse_chn_by_id() helper function erroneously invokes "of_node_put()" on the "udmax_np" device-node passed to it, without having incremented its reference count at any point. Fix it. Fixes: 81a1f90f20af ("dmaengine: ti: k3-udma-glue: Add function to parse channel by ID") Signed-off-by: Siddharth Vadapalli Acked-by: Peter Ujfalusi Acked-by: Peter Ujfalusi@gmail.com Link: https://lore.kernel.org/r/20240602013319.2975894-1-s-vadapalli@ti.com Signed-off-by: Vinod Koul --- drivers/dma/ti/k3-udma-glue.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/dma/ti/k3-udma-glue.c b/drivers/dma/ti/k3-udma-glue.c index c9b93055dc9d..f0a399cf45b2 100644 --- a/drivers/dma/ti/k3-udma-glue.c +++ b/drivers/dma/ti/k3-udma-glue.c @@ -200,12 +200,9 @@ of_k3_udma_glue_parse_chn_by_id(struct device_node *udmax_np, struct k3_udma_glu ret = of_k3_udma_glue_parse(udmax_np, common); if (ret) - goto out_put_spec; + return ret; ret = of_k3_udma_glue_parse_chn_common(common, thread_id, tx_chn); - -out_put_spec: - of_node_put(udmax_np); return ret; } From 1b11b4ef6bd68591dcaf8423c7d05e794e6aec6f Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Tue, 28 May 2024 09:09:23 +0300 Subject: [PATCH 219/778] dmaengine: ioatdma: Fix leaking on version mismatch Fix leaking ioatdma_device if I/OAT version is less than IOAT_VER_3_0. Fixes: bf453a0a18b2 ("dmaengine: ioat: Support in-use unbind") Signed-off-by: Nikita Shubin Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20240528-ioatdma-fixes-v2-1-a9f2fbe26ab1@yadro.com Signed-off-by: Vinod Koul --- drivers/dma/ioat/init.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/dma/ioat/init.c b/drivers/dma/ioat/init.c index 9c364e92cb82..e76e507ae898 100644 --- a/drivers/dma/ioat/init.c +++ b/drivers/dma/ioat/init.c @@ -1350,6 +1350,7 @@ static int ioat_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) void __iomem * const *iomap; struct device *dev = &pdev->dev; struct ioatdma_device *device; + u8 version; int err; err = pcim_enable_device(pdev); @@ -1363,6 +1364,10 @@ static int ioat_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (!iomap) return -ENOMEM; + version = readb(iomap[IOAT_MMIO_BAR] + IOAT_VER_OFFSET); + if (version < IOAT_VER_3_0) + return -ENODEV; + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); if (err) return err; @@ -1373,16 +1378,14 @@ static int ioat_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) pci_set_master(pdev); pci_set_drvdata(pdev, device); - device->version = readb(device->reg_base + IOAT_VER_OFFSET); + device->version = version; if (device->version >= IOAT_VER_3_4) ioat_dca_enabled = 0; - if (device->version >= IOAT_VER_3_0) { - if (is_skx_ioat(pdev)) - device->version = IOAT_VER_3_2; - err = ioat3_dma_probe(device, ioat_dca_enabled); - } else - return -ENODEV; + if (is_skx_ioat(pdev)) + device->version = IOAT_VER_3_2; + + err = ioat3_dma_probe(device, ioat_dca_enabled); if (err) { dev_err(dev, "Intel(R) I/OAT DMA Engine init failed\n"); return -ENODEV; From f0dc9fda2e0ee9e01496c2f5aca3a831131fad79 Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Tue, 28 May 2024 09:09:24 +0300 Subject: [PATCH 220/778] dmaengine: ioatdma: Fix error path in ioat3_dma_probe() Make sure we are disabling interrupts and destroying DMA pool if pcie_capability_read/write_word() call failed. Fixes: 511deae0261c ("dmaengine: ioatdma: disable relaxed ordering for ioatdma") Signed-off-by: Nikita Shubin Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20240528-ioatdma-fixes-v2-2-a9f2fbe26ab1@yadro.com Signed-off-by: Vinod Koul --- drivers/dma/ioat/init.c | 33 +++++++++++++++------------------ 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/drivers/dma/ioat/init.c b/drivers/dma/ioat/init.c index e76e507ae898..26964b7c8cf1 100644 --- a/drivers/dma/ioat/init.c +++ b/drivers/dma/ioat/init.c @@ -534,18 +534,6 @@ err_out: return err; } -static int ioat_register(struct ioatdma_device *ioat_dma) -{ - int err = dma_async_device_register(&ioat_dma->dma_dev); - - if (err) { - ioat_disable_interrupts(ioat_dma); - dma_pool_destroy(ioat_dma->completion_pool); - } - - return err; -} - static void ioat_dma_remove(struct ioatdma_device *ioat_dma) { struct dma_device *dma = &ioat_dma->dma_dev; @@ -1181,9 +1169,9 @@ static int ioat3_dma_probe(struct ioatdma_device *ioat_dma, int dca) ioat_chan->reg_base + IOAT_DCACTRL_OFFSET); } - err = ioat_register(ioat_dma); + err = dma_async_device_register(&ioat_dma->dma_dev); if (err) - return err; + goto err_disable_interrupts; ioat_kobject_add(ioat_dma, &ioat_ktype); @@ -1192,20 +1180,29 @@ static int ioat3_dma_probe(struct ioatdma_device *ioat_dma, int dca) /* disable relaxed ordering */ err = pcie_capability_read_word(pdev, PCI_EXP_DEVCTL, &val16); - if (err) - return pcibios_err_to_errno(err); + if (err) { + err = pcibios_err_to_errno(err); + goto err_disable_interrupts; + } /* clear relaxed ordering enable */ val16 &= ~PCI_EXP_DEVCTL_RELAX_EN; err = pcie_capability_write_word(pdev, PCI_EXP_DEVCTL, val16); - if (err) - return pcibios_err_to_errno(err); + if (err) { + err = pcibios_err_to_errno(err); + goto err_disable_interrupts; + } if (ioat_dma->cap & IOAT_CAP_DPS) writeb(ioat_pending_level + 1, ioat_dma->reg_base + IOAT_PREFETCH_LIMIT_OFFSET); return 0; + +err_disable_interrupts: + ioat_disable_interrupts(ioat_dma); + dma_pool_destroy(ioat_dma->completion_pool); + return err; } static void ioat_shutdown(struct pci_dev *pdev) From 29b7cd255f3628e0d65be33a939d8b5bba10aa62 Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Tue, 28 May 2024 09:09:25 +0300 Subject: [PATCH 221/778] dmaengine: ioatdma: Fix kmemleak in ioat_pci_probe() If probing fails we end up with leaking ioatdma_device and each allocated channel. Following kmemleak easy to reproduce by injecting an error in ioat_alloc_chan_resources() when doing ioat_dma_self_test(). unreferenced object 0xffff888014ad5800 (size 1024): [..] [] kmemleak_alloc+0x4a/0x80 [] kmalloc_trace+0x270/0x2f0 [] ioat_pci_probe+0xc1/0x1c0 [ioatdma] [..] repeated for each ioatdma channel: unreferenced object 0xffff8880148e5c00 (size 512): [..] [] kmemleak_alloc+0x4a/0x80 [] kmalloc_trace+0x270/0x2f0 [] ioat_enumerate_channels+0x101/0x2d0 [ioatdma] [] ioat3_dma_probe+0x4d6/0x970 [ioatdma] [] ioat_pci_probe+0x181/0x1c0 [ioatdma] [..] Fixes: bf453a0a18b2 ("dmaengine: ioat: Support in-use unbind") Signed-off-by: Nikita Shubin Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20240528-ioatdma-fixes-v2-3-a9f2fbe26ab1@yadro.com Signed-off-by: Vinod Koul --- drivers/dma/ioat/init.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/dma/ioat/init.c b/drivers/dma/ioat/init.c index 26964b7c8cf1..cf688b0c8444 100644 --- a/drivers/dma/ioat/init.c +++ b/drivers/dma/ioat/init.c @@ -1347,6 +1347,7 @@ static int ioat_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) void __iomem * const *iomap; struct device *dev = &pdev->dev; struct ioatdma_device *device; + unsigned int i; u8 version; int err; @@ -1384,6 +1385,9 @@ static int ioat_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) err = ioat3_dma_probe(device, ioat_dca_enabled); if (err) { + for (i = 0; i < IOAT_MAX_CHANS; i++) + kfree(device->idx[i]); + kfree(device); dev_err(dev, "Intel(R) I/OAT DMA Engine init failed\n"); return -ENODEV; } From fa555b5026d0bf1ba7c9e645ff75e2725a982631 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 28 May 2024 13:54:22 +0200 Subject: [PATCH 222/778] dmaengine: fsl-edma: avoid linking both modules Kbuild does not support having a source file compiled multiple times and linked into distinct modules, or built-in and modular at the same time. For fs-edma, there are two common components that are linked into the fsl-edma.ko for Arm and PowerPC, plus the mcf-edma.ko module on Coldfire. This violates the rule for compile-testing: scripts/Makefile.build:236: drivers/dma/Makefile: fsl-edma-common.o is added to multiple modules: fsl-edma mcf-edma scripts/Makefile.build:236: drivers/dma/Makefile: fsl-edma-trace.o is added to multiple modules: fsl-edma mcf-edma I tried splitting out the common parts into a separate modules, but that adds back the complexity that a cleanup patch removed, and it gets harder with the addition of the tracepoints. As a minimal workaround, address it at the Kconfig level, by disallowing the broken configurations. Link: https://lore.kernel.org/lkml/20240110232255.1099757-1-arnd@kernel.org/ Fixes: 66aac8ea0a6c ("dmaengine: fsl-edma: clean up EXPORT_SYMBOL_GPL in fsl-edma-common.c") Signed-off-by: Arnd Bergmann Acked-by: Peng Fan Link: https://lore.kernel.org/r/20240528115440.2965975-1-arnd@kernel.org Signed-off-by: Vinod Koul --- drivers/dma/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 002a5ec80620..9fc99cfbef08 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -394,7 +394,7 @@ config LS2X_APB_DMA config MCF_EDMA tristate "Freescale eDMA engine support, ColdFire mcf5441x SoCs" - depends on M5441x || COMPILE_TEST + depends on M5441x || (COMPILE_TEST && FSL_EDMA=n) select DMA_ENGINE select DMA_VIRTUAL_CHANNELS help From 1345a13f18370ad9e5bc98995959a27f9bd71464 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 21 May 2024 10:30:02 +0200 Subject: [PATCH 223/778] dt-bindings: dma: fsl-edma: fix dma-channels constraints dma-channels is a number, not a list. Apply proper constraints on the actual number. Fixes: 6eb439dff645 ("dt-bindings: fsl-dma: fsl-edma: add edma3 compatible string") Cc: stable@vger.kernel.org Signed-off-by: Krzysztof Kozlowski Reviewed-by: Peng Fan Acked-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20240521083002.23262-1-krzysztof.kozlowski@linaro.org Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/dma/fsl,edma.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/dma/fsl,edma.yaml b/Documentation/devicetree/bindings/dma/fsl,edma.yaml index acfb4b2ee7a9..d54140f18d34 100644 --- a/Documentation/devicetree/bindings/dma/fsl,edma.yaml +++ b/Documentation/devicetree/bindings/dma/fsl,edma.yaml @@ -59,8 +59,8 @@ properties: - 3 dma-channels: - minItems: 1 - maxItems: 64 + minimum: 1 + maximum: 64 clocks: minItems: 1 From 5422145d0b749ad554ada772133b9b20f9fb0ec8 Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Tue, 14 May 2024 13:52:31 +0300 Subject: [PATCH 224/778] dmaengine: ioatdma: Fix missing kmem_cache_destroy() Fix missing kmem_cache_destroy() for ioat_sed_cache in ioat_exit_module(). Noticed via: ``` modprobe ioatdma rmmod ioatdma modprobe ioatdma debugfs: Directory 'ioat_sed_ent' with parent 'slab' already present! ``` Fixes: c0f28ce66ecf ("dmaengine: ioatdma: move all the init routines") Signed-off-by: Nikita Shubin Acked-by: Dave Jiang Link: https://lore.kernel.org/r/20240514-ioatdma_fixes-v1-1-2776a0913254@yadro.com Signed-off-by: Vinod Koul --- drivers/dma/ioat/init.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/dma/ioat/init.c b/drivers/dma/ioat/init.c index cf688b0c8444..e8f45a7fded4 100644 --- a/drivers/dma/ioat/init.c +++ b/drivers/dma/ioat/init.c @@ -1449,6 +1449,7 @@ module_init(ioat_init_module); static void __exit ioat_exit_module(void) { pci_unregister_driver(&ioat_pci_driver); + kmem_cache_destroy(ioat_sed_cache); kmem_cache_destroy(ioat_cache); } module_exit(ioat_exit_module); From d66e50beb91114f387bd798a371384b2a245e8cc Mon Sep 17 00:00:00 2001 From: Vincent Donnefort Date: Tue, 11 Jun 2024 18:53:17 +0100 Subject: [PATCH 225/778] KVM: arm64: FFA: Release hyp rx buffer According to the FF-A spec (Buffer states and ownership), after a producer has written into a buffer, it is "full" and now owned by the consumer. The producer won't be able to use that buffer, until the consumer hands it over with an invocation such as RX_RELEASE. It is clear in the following paragraph (Transfer of buffer ownership), that MEM_RETRIEVE_RESP is transferring the ownership from producer (in our case SPM) to consumer (hypervisor). RX_RELEASE is therefore mandatory here. It is less clear though what is happening with MEM_FRAG_TX. But this invocation, as a response to MEM_FRAG_RX writes into the same hypervisor RX buffer (see paragraph "Transmission of transaction descriptor in fragments"). Also this is matching the TF-A implementation where the RX buffer is marked "full" during a MEM_FRAG_RX. Release the RX hypervisor buffer in those two cases. This will unblock later invocations using this buffer which would otherwise fail. (RETRIEVE_REQ, MEM_FRAG_RX and PARTITION_INFO_GET). Signed-off-by: Vincent Donnefort Reviewed-by: Sudeep Holla Link: https://lore.kernel.org/r/20240611175317.1220842-1-vdonnefort@google.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/nvhe/ffa.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c index 02746f9d0980..efb053af331c 100644 --- a/arch/arm64/kvm/hyp/nvhe/ffa.c +++ b/arch/arm64/kvm/hyp/nvhe/ffa.c @@ -177,6 +177,14 @@ static void ffa_retrieve_req(struct arm_smccc_res *res, u32 len) res); } +static void ffa_rx_release(struct arm_smccc_res *res) +{ + arm_smccc_1_1_smc(FFA_RX_RELEASE, + 0, 0, + 0, 0, 0, 0, 0, + res); +} + static void do_ffa_rxtx_map(struct arm_smccc_res *res, struct kvm_cpu_context *ctxt) { @@ -543,16 +551,19 @@ static void do_ffa_mem_reclaim(struct arm_smccc_res *res, if (WARN_ON(offset > len || fraglen > KVM_FFA_MBOX_NR_PAGES * PAGE_SIZE)) { ret = FFA_RET_ABORTED; + ffa_rx_release(res); goto out_unlock; } if (len > ffa_desc_buf.len) { ret = FFA_RET_NO_MEMORY; + ffa_rx_release(res); goto out_unlock; } buf = ffa_desc_buf.buf; memcpy(buf, hyp_buffers.rx, fraglen); + ffa_rx_release(res); for (fragoff = fraglen; fragoff < len; fragoff += fraglen) { ffa_mem_frag_rx(res, handle_lo, handle_hi, fragoff); @@ -563,6 +574,7 @@ static void do_ffa_mem_reclaim(struct arm_smccc_res *res, fraglen = res->a3; memcpy((void *)buf + fragoff, hyp_buffers.rx, fraglen); + ffa_rx_release(res); } ffa_mem_reclaim(res, handle_lo, handle_hi, flags); From b6846826982b9f2f2ad0e79540521b517469ee92 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 11 Jun 2024 16:51:23 +0200 Subject: [PATCH 226/778] thermal: gov_step_wise: Restore passive polling management Consider a thermal zone with one passive trip point, a cooling device with 3 states (0, 1, 2) bound to it, passive polling enabled (nonzero passive_delay_jiffies) and no regular polling (polling_delay_jiffies equal to 0) that is managed by the Step-Wise governor. Suppose that the initial state of the cooling device is 0 and the zone temperature is below the trip point to start with. When the trip point is crossed, tz->passive is incremented by the thermal core and the governor's .manage() callback is invoked. It sets 'throttle' to 'true' for the trip in question and get_target_state() returns 1 for the instance corresponding to the cooling device (say that 'upper' and 'lower' are set to 2 and 0 for it, respectively), so its state changes to 1. Passive polling is still active for the zone, so next time the temperature is updated, the governor's .manage() callback will be invoked again. If the temperature is still rising, it will change the state of the cooling device to 2. Now suppose that next time the zone temperature is updated, it falls below the trip point, so tz->passive is decremented for the zone (say it becomes 0 then) and the governor's .manage() callbacks runs. It finds that the temperature trend for the zone is 'falling' and 'throttle' will be set to 'false' for the trip in question, so the cooling device's state will be changed to 1. However, because tz->polling is 0 for the zone, the governor's .manage() callback may not be invoked again for a long time and the cooling device's state will not be reset back to 0. This can happen because commit 042a3d80f118 ("thermal: core: Move passive polling management to the core") removed passive polling management from the Step-Wise governor. Before that change, thermal_zone_trip_update() would bump up tz->passive when changing the target state for a thermal instance from "no target" to a specific value and it would drop tz->passive when changing it back to "no target" which would cause passive polling to be active for the zone until the governor has reset the states of all cooling devices. In particular, in the example above tz->passive would be incremented when changing the state of the cooling device from 0 to 1 and then it would be still nonzero when the state of the cooling device was changed from 2 to 1. To prevent this problem from occurring, restore the passive polling management in the Step-Wise governor by partially reverting the commit in question and update the comment in the restored code to explain its role more clearly. Fixes: 042a3d80f118 ("thermal: core: Move passive polling management to the core") Closes: https://lore.kernel.org/linux-pm/ZmVfcEOxmjUHZTSX@hovoldconsulting.com Reported-by: Johan Hovold Tested-by: Johan Hovold Signed-off-by: Rafael J. Wysocki --- drivers/thermal/gov_step_wise.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/thermal/gov_step_wise.c b/drivers/thermal/gov_step_wise.c index e0fdc497bfcc..65974fe8be0d 100644 --- a/drivers/thermal/gov_step_wise.c +++ b/drivers/thermal/gov_step_wise.c @@ -93,6 +93,23 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz, if (instance->initialized && old_target == instance->target) continue; + if (trip->type == THERMAL_TRIP_PASSIVE) { + /* + * If the target state for this thermal instance + * changes from THERMAL_NO_TARGET to something else, + * ensure that the zone temperature will be updated + * (assuming enabled passive cooling) until it becomes + * THERMAL_NO_TARGET again, or the cooling device may + * not be reset to its initial state. + */ + if (old_target == THERMAL_NO_TARGET && + instance->target != THERMAL_NO_TARGET) + tz->passive++; + else if (old_target != THERMAL_NO_TARGET && + instance->target == THERMAL_NO_TARGET) + tz->passive--; + } + instance->initialized = true; mutex_lock(&instance->cdev->lock); From 442b15a2d7a3f01534cb80585b84d7b60e4e2219 Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Fri, 31 May 2024 18:45:33 -0700 Subject: [PATCH 227/778] selftests/openat2: fix clang build failures: -static-libasan, LOCAL_HDRS When building with clang via: make LLVM=1 -C tools/testing/selftests two distinct failures occur: 1) gcc requires -static-libasan in order to ensure that Address Sanitizer's library is the first one loaded. However, this leads to build failures on clang, when building via: make LLVM=1 -C tools/testing/selftests However, clang already does the right thing by default: it statically links the Address Sanitizer if -fsanitize is specified. Therefore, fix this by simply omitting -static-libasan for clang builds. And leave behind a comment, because the whole reason for static linking might not be obvious. 2) clang won't accept invocations of this form, but gcc will: $(CC) file1.c header2.h Fix this by using selftests/lib.mk facilities for tracking local header file dependencies: add them to LOCAL_HDRS, leaving only the .c files to be passed to the compiler. Reviewed-by: Ryan Roberts Signed-off-by: John Hubbard Reviewed-by: Nathan Chancellor Reviewed-by: Nathan Chancellor Signed-off-by: Shuah Khan --- tools/testing/selftests/openat2/Makefile | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/openat2/Makefile b/tools/testing/selftests/openat2/Makefile index 254d676a2689..185dc76ebb5f 100644 --- a/tools/testing/selftests/openat2/Makefile +++ b/tools/testing/selftests/openat2/Makefile @@ -1,8 +1,18 @@ # SPDX-License-Identifier: GPL-2.0-or-later -CFLAGS += -Wall -O2 -g -fsanitize=address -fsanitize=undefined -static-libasan +CFLAGS += -Wall -O2 -g -fsanitize=address -fsanitize=undefined TEST_GEN_PROGS := openat2_test resolve_test rename_attack_test +# gcc requires -static-libasan in order to ensure that Address Sanitizer's +# library is the first one loaded. However, clang already statically links the +# Address Sanitizer if -fsanitize is specified. Therefore, simply omit +# -static-libasan for clang builds. +ifeq ($(LLVM),) + CFLAGS += -static-libasan +endif + +LOCAL_HDRS += helpers.h + include ../lib.mk -$(TEST_GEN_PROGS): helpers.c helpers.h +$(TEST_GEN_PROGS): helpers.c From ed3994ac847e0d6605f248e7f6776b1d4f445f4b Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Fri, 31 May 2024 18:45:34 -0700 Subject: [PATCH 228/778] selftests/fchmodat2: fix clang build failure due to -static-libasan gcc requires -static-libasan in order to ensure that Address Sanitizer's library is the first one loaded. However, this leads to build failures on clang, when building via: make LLVM=1 -C tools/testing/selftests However, clang already does the right thing by default: it statically links the Address Sanitizer if -fsanitize is specified. Therefore, simply omit -static-libasan for clang builds. And leave behind a comment, because the whole reason for static linking might not be obvious. Cc: Ryan Roberts Signed-off-by: John Hubbard Reviewed-by: Nathan Chancellor Signed-off-by: Shuah Khan --- tools/testing/selftests/fchmodat2/Makefile | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/fchmodat2/Makefile b/tools/testing/selftests/fchmodat2/Makefile index 71ec34bf1501..4373cea79b79 100644 --- a/tools/testing/selftests/fchmodat2/Makefile +++ b/tools/testing/selftests/fchmodat2/Makefile @@ -1,6 +1,15 @@ # SPDX-License-Identifier: GPL-2.0-or-later -CFLAGS += -Wall -O2 -g -fsanitize=address -fsanitize=undefined -static-libasan $(KHDR_INCLUDES) +CFLAGS += -Wall -O2 -g -fsanitize=address -fsanitize=undefined $(KHDR_INCLUDES) + +# gcc requires -static-libasan in order to ensure that Address Sanitizer's +# library is the first one loaded. However, clang already statically links the +# Address Sanitizer if -fsanitize is specified. Therefore, simply omit +# -static-libasan for clang builds. +ifeq ($(LLVM),) + CFLAGS += -static-libasan +endif + TEST_GEN_PROGS := fchmodat2_test include ../lib.mk From 017ed5e70c88bf2f1e0952bab6f53a53a028e561 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Sat, 18 May 2024 00:26:17 +0100 Subject: [PATCH 229/778] drm/nouveau: remove unused struct 'init_exec' 'init_exec' is unused since commit cb75d97e9c77 ("drm/nouveau: implement devinit subdev, and new init table parser") Remove it. Signed-off-by: Dr. David Alan Gilbert Acked-by: Danilo Krummrich Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20240517232617.230767-1-linux@treblig.org --- drivers/gpu/drm/nouveau/nouveau_bios.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_bios.c b/drivers/gpu/drm/nouveau/nouveau_bios.c index 79cfab53f80e..8c3c1f1e01c5 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bios.c +++ b/drivers/gpu/drm/nouveau/nouveau_bios.c @@ -43,11 +43,6 @@ #define BIOSLOG(sip, fmt, arg...) NV_DEBUG(sip->dev, fmt, ##arg) #define LOG_OLD_VALUE(x) -struct init_exec { - bool execute; - bool repeat; -}; - static bool nv_cksum(const uint8_t *data, unsigned int length) { /* From a126eca844353360ebafa9088d22865cb8e022e3 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sun, 19 May 2024 23:07:35 +0200 Subject: [PATCH 230/778] rust: avoid unused import warning in `rusttest` When compiling for the `rusttest` target, the `core::ptr` import is unused since its only use happens in the `reserve()` method which is not compiled in that target: warning: unused import: `core::ptr` --> rust/kernel/alloc/vec_ext.rs:7:5 | 7 | use core::ptr; | ^^^^^^^^^ | = note: `#[warn(unused_imports)]` on by default Thus clean it. Fixes: 97ab3e8eec0c ("rust: alloc: fix dangling pointer in VecExt::reserve()") Reviewed-by: Alice Ryhl Reviewed-by: Danilo Krummrich Link: https://lore.kernel.org/r/20240519210735.587323-1-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- rust/kernel/alloc/vec_ext.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/rust/kernel/alloc/vec_ext.rs b/rust/kernel/alloc/vec_ext.rs index e9a81052728a..1297a4be32e8 100644 --- a/rust/kernel/alloc/vec_ext.rs +++ b/rust/kernel/alloc/vec_ext.rs @@ -4,7 +4,6 @@ use super::{AllocError, Flags}; use alloc::vec::Vec; -use core::ptr; /// Extensions to [`Vec`]. pub trait VecExt: Sized { @@ -141,7 +140,11 @@ impl VecExt for Vec { // `krealloc_aligned`. A `Vec`'s `ptr` value is not guaranteed to be NULL and might be // dangling after being created with `Vec::new`. Instead, we can rely on `Vec`'s capacity // to be zero if no memory has been allocated yet. - let ptr = if cap == 0 { ptr::null_mut() } else { old_ptr }; + let ptr = if cap == 0 { + core::ptr::null_mut() + } else { + old_ptr + }; // SAFETY: `ptr` is valid because it's either NULL or comes from a previous call to // `krealloc_aligned`. We also verified that the type is not a ZST. From f2736b9c791a126ecb9cfc1aef1c7b4152b66e2d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 11 Jun 2024 18:59:08 -0400 Subject: [PATCH 231/778] bcachefs: Fix rcu_read_lock() leak in drop_extra_replicas Signed-off-by: Kent Overstreet --- fs/bcachefs/data_update.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 0d807c2ce9c6..1a0072eef109 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -202,9 +202,8 @@ restart_drop_conflicting_replicas: bch2_bkey_durability(c, bkey_i_to_s_c(&new->k_i)); /* Now, drop excess replicas: */ -restart_drop_extra_replicas: - rcu_read_lock(); +restart_drop_extra_replicas: bkey_for_each_ptr_decode(old.k, bch2_bkey_ptrs(bkey_i_to_s(insert)), p, entry) { unsigned ptr_durability = bch2_extent_ptr_durability(c, &p); From 8c860ed825cb85f6672cd7b10a8f33e3498a7c81 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 10 Jun 2024 14:02:27 -0700 Subject: [PATCH 232/778] x86/uaccess: Fix missed zeroing of ia32 u64 get_user() range checking When reworking the range checking for get_user(), the get_user_8() case on 32-bit wasn't zeroing the high register. (The jump to bad_get_user_8 was accidentally dropped.) Restore the correct error handling destination (and rename the jump to using the expected ".L" prefix). While here, switch to using a named argument ("size") for the call template ("%c4" to "%c[size]") as already used in the other call templates in this file. Found after moving the usercopy selftests to KUnit: # usercopy_test_invalid: EXPECTATION FAILED at lib/usercopy_kunit.c:278 Expected val_u64 == 0, but val_u64 == -60129542144 (0xfffffff200000000) Closes: https://lore.kernel.org/all/CABVgOSn=tb=Lj9SxHuT4_9MTjjKVxsq-ikdXC4kGHO4CfKVmGQ@mail.gmail.com Fixes: b19b74bc99b1 ("x86/mm: Rework address range check in get_user() and put_user()") Reported-by: David Gow Signed-off-by: Kees Cook Signed-off-by: Dave Hansen Reviewed-by: Kirill A. Shutemov Reviewed-by: Qiuxu Zhuo Tested-by: David Gow Link: https://lore.kernel.org/all/20240610210213.work.143-kees%40kernel.org --- arch/x86/include/asm/uaccess.h | 4 ++-- arch/x86/lib/getuser.S | 6 +++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 0f9bab92a43d..3a7755c1a441 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -78,10 +78,10 @@ extern int __get_user_bad(void); int __ret_gu; \ register __inttype(*(ptr)) __val_gu asm("%"_ASM_DX); \ __chk_user_ptr(ptr); \ - asm volatile("call __" #fn "_%c4" \ + asm volatile("call __" #fn "_%c[size]" \ : "=a" (__ret_gu), "=r" (__val_gu), \ ASM_CALL_CONSTRAINT \ - : "0" (ptr), "i" (sizeof(*(ptr)))); \ + : "0" (ptr), [size] "i" (sizeof(*(ptr)))); \ instrument_get_user(__val_gu); \ (x) = (__force __typeof__(*(ptr))) __val_gu; \ __builtin_expect(__ret_gu, 0); \ diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S index 10d5ed8b5990..a1cb3a4e6742 100644 --- a/arch/x86/lib/getuser.S +++ b/arch/x86/lib/getuser.S @@ -44,7 +44,11 @@ or %rdx, %rax .else cmp $TASK_SIZE_MAX-\size+1, %eax +.if \size != 8 jae .Lbad_get_user +.else + jae .Lbad_get_user_8 +.endif sbb %edx, %edx /* array_index_mask_nospec() */ and %edx, %eax .endif @@ -154,7 +158,7 @@ SYM_CODE_END(__get_user_handle_exception) #ifdef CONFIG_X86_32 SYM_CODE_START_LOCAL(__get_user_8_handle_exception) ASM_CLAC -bad_get_user_8: +.Lbad_get_user_8: xor %edx,%edx xor %ecx,%ecx mov $(-EFAULT),%_ASM_AX From 3572bd5689b0812b161b40279e39ca5b66d73e88 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Tue, 11 Jun 2024 22:30:37 +0900 Subject: [PATCH 233/778] tracing: Build event generation tests only as modules The kprobes and synth event generation test modules add events and lock (get a reference) those event file reference in module init function, and unlock and delete it in module exit function. This is because those are designed for playing as modules. If we make those modules as built-in, those events are left locked in the kernel, and never be removed. This causes kprobe event self-test failure as below. [ 97.349708] ------------[ cut here ]------------ [ 97.353453] WARNING: CPU: 3 PID: 1 at kernel/trace/trace_kprobe.c:2133 kprobe_trace_self_tests_init+0x3f1/0x480 [ 97.357106] Modules linked in: [ 97.358488] CPU: 3 PID: 1 Comm: swapper/0 Not tainted 6.9.0-g699646734ab5-dirty #14 [ 97.361556] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 [ 97.363880] RIP: 0010:kprobe_trace_self_tests_init+0x3f1/0x480 [ 97.365538] Code: a8 24 08 82 e9 ae fd ff ff 90 0f 0b 90 48 c7 c7 e5 aa 0b 82 e9 ee fc ff ff 90 0f 0b 90 48 c7 c7 2d 61 06 82 e9 8e fd ff ff 90 <0f> 0b 90 48 c7 c7 33 0b 0c 82 89 c6 e8 6e 03 1f ff 41 ff c7 e9 90 [ 97.370429] RSP: 0000:ffffc90000013b50 EFLAGS: 00010286 [ 97.371852] RAX: 00000000fffffff0 RBX: ffff888005919c00 RCX: 0000000000000000 [ 97.373829] RDX: ffff888003f40000 RSI: ffffffff8236a598 RDI: ffff888003f40a68 [ 97.375715] RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000000 [ 97.377675] R10: ffffffff811c9ae5 R11: ffffffff8120c4e0 R12: 0000000000000000 [ 97.379591] R13: 0000000000000001 R14: 0000000000000015 R15: 0000000000000000 [ 97.381536] FS: 0000000000000000(0000) GS:ffff88807dcc0000(0000) knlGS:0000000000000000 [ 97.383813] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 97.385449] CR2: 0000000000000000 CR3: 0000000002244000 CR4: 00000000000006b0 [ 97.387347] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 97.389277] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 97.391196] Call Trace: [ 97.391967] [ 97.392647] ? __warn+0xcc/0x180 [ 97.393640] ? kprobe_trace_self_tests_init+0x3f1/0x480 [ 97.395181] ? report_bug+0xbd/0x150 [ 97.396234] ? handle_bug+0x3e/0x60 [ 97.397311] ? exc_invalid_op+0x1a/0x50 [ 97.398434] ? asm_exc_invalid_op+0x1a/0x20 [ 97.399652] ? trace_kprobe_is_busy+0x20/0x20 [ 97.400904] ? tracing_reset_all_online_cpus+0x15/0x90 [ 97.402304] ? kprobe_trace_self_tests_init+0x3f1/0x480 [ 97.403773] ? init_kprobe_trace+0x50/0x50 [ 97.404972] do_one_initcall+0x112/0x240 [ 97.406113] do_initcall_level+0x95/0xb0 [ 97.407286] ? kernel_init+0x1a/0x1a0 [ 97.408401] do_initcalls+0x3f/0x70 [ 97.409452] kernel_init_freeable+0x16f/0x1e0 [ 97.410662] ? rest_init+0x1f0/0x1f0 [ 97.411738] kernel_init+0x1a/0x1a0 [ 97.412788] ret_from_fork+0x39/0x50 [ 97.413817] ? rest_init+0x1f0/0x1f0 [ 97.414844] ret_from_fork_asm+0x11/0x20 [ 97.416285] [ 97.417134] irq event stamp: 13437323 [ 97.418376] hardirqs last enabled at (13437337): [] console_unlock+0x11c/0x150 [ 97.421285] hardirqs last disabled at (13437370): [] console_unlock+0x101/0x150 [ 97.423838] softirqs last enabled at (13437366): [] handle_softirqs+0x23f/0x2a0 [ 97.426450] softirqs last disabled at (13437393): [] __irq_exit_rcu+0x66/0xd0 [ 97.428850] ---[ end trace 0000000000000000 ]--- And also, since we can not cleanup dynamic_event file, ftracetest are failed too. To avoid these issues, build these tests only as modules. Link: https://lore.kernel.org/all/171811263754.85078.5877446624311852525.stgit@devnote2/ Fixes: 9fe41efaca08 ("tracing: Add synth event generation test module") Fixes: 64836248dda2 ("tracing: Add kprobe event command generation test module") Signed-off-by: Masami Hiramatsu (Google) Reviewed-by: Steven Rostedt (Google) --- kernel/trace/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 166ad5444eea..721c3b221048 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -1136,7 +1136,7 @@ config PREEMPTIRQ_DELAY_TEST config SYNTH_EVENT_GEN_TEST tristate "Test module for in-kernel synthetic event generation" - depends on SYNTH_EVENTS + depends on SYNTH_EVENTS && m help This option creates a test module to check the base functionality of in-kernel synthetic event definition and @@ -1149,7 +1149,7 @@ config SYNTH_EVENT_GEN_TEST config KPROBE_EVENT_GEN_TEST tristate "Test module for in-kernel kprobe event generation" - depends on KPROBE_EVENTS + depends on KPROBE_EVENTS && m help This option creates a test module to check the base functionality of in-kernel kprobe event definition. From 72d95924ee35c8cd16ef52f912483ee938a34d49 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Mon, 10 Jun 2024 18:47:07 +0000 Subject: [PATCH 234/778] parisc: Try to fix random segmentation faults in package builds PA-RISC systems with PA8800 and PA8900 processors have had problems with random segmentation faults for many years. Systems with earlier processors are much more stable. Systems with PA8800 and PA8900 processors have a large L2 cache which needs per page flushing for decent performance when a large range is flushed. The combined cache in these systems is also more sensitive to non-equivalent aliases than the caches in earlier systems. The majority of random segmentation faults that I have looked at appear to be memory corruption in memory allocated using mmap and malloc. My first attempt at fixing the random faults didn't work. On reviewing the cache code, I realized that there were two issues which the existing code didn't handle correctly. Both relate to cache move-in. Another issue is that the present bit in PTEs is racy. 1) PA-RISC caches have a mind of their own and they can speculatively load data and instructions for a page as long as there is a entry in the TLB for the page which allows move-in. TLBs are local to each CPU. Thus, the TLB entry for a page must be purged before flushing the page. This is particularly important on SMP systems. In some of the flush routines, the flush routine would be called and then the TLB entry would be purged. This was because the flush routine needed the TLB entry to do the flush. 2) My initial approach to trying the fix the random faults was to try and use flush_cache_page_if_present for all flush operations. This actually made things worse and led to a couple of hardware lockups. It finally dawned on me that some lines weren't being flushed because the pte check code was racy. This resulted in random inequivalent mappings to physical pages. The __flush_cache_page tmpalias flush sets up its own TLB entry and it doesn't need the existing TLB entry. As long as we can find the pte pointer for the vm page, we can get the pfn and physical address of the page. We can also purge the TLB entry for the page before doing the flush. Further, __flush_cache_page uses a special TLB entry that inhibits cache move-in. When switching page mappings, we need to ensure that lines are removed from the cache. It is not sufficient to just flush the lines to memory as they may come back. This made it clear that we needed to implement all the required flush operations using tmpalias routines. This includes flushes for user and kernel pages. After modifying the code to use tmpalias flushes, it became clear that the random segmentation faults were not fully resolved. The frequency of faults was worse on systems with a 64 MB L2 (PA8900) and systems with more CPUs (rp4440). The warning that I added to flush_cache_page_if_present to detect pages that couldn't be flushed triggered frequently on some systems. Helge and I looked at the pages that couldn't be flushed and found that the PTE was either cleared or for a swap page. Ignoring pages that were swapped out seemed okay but pages with cleared PTEs seemed problematic. I looked at routines related to pte_clear and noticed ptep_clear_flush. The default implementation just flushes the TLB entry. However, it was obvious that on parisc we need to flush the cache page as well. If we don't flush the cache page, stale lines will be left in the cache and cause random corruption. Once a PTE is cleared, there is no way to find the physical address associated with the PTE and flush the associated page at a later time. I implemented an updated change with a parisc specific version of ptep_clear_flush. It fixed the random data corruption on Helge's rp4440 and rp3440, as well as on my c8000. At this point, I realized that I could restore the code where we only flush in flush_cache_page_if_present if the page has been accessed. However, for this, we also need to flush the cache when the accessed bit is cleared in ptep_clear_flush_young to keep things synchronized. The default implementation only flushes the TLB entry. Other changes in this version are: 1) Implement parisc specific version of ptep_get. It's identical to default but needed in arch/parisc/include/asm/pgtable.h. 2) Revise parisc implementation of ptep_test_and_clear_young to use ptep_get (READ_ONCE). 3) Drop parisc implementation of ptep_get_and_clear. We can use default. 4) Revise flush_kernel_vmap_range and invalidate_kernel_vmap_range to use full data cache flush. 5) Move flush_cache_vmap and flush_cache_vunmap to cache.c. Handle VM_IOREMAP case in flush_cache_vmap. At this time, I don't know whether it is better to always flush when the PTE present bit is set or when both the accessed and present bits are set. The later saves flushing pages that haven't been accessed, but we need to flush in ptep_clear_flush_young. It also needs a page table lookup to find the PTE pointer. The lpa instruction only needs a page table lookup when the PTE entry isn't in the TLB. We don't atomically handle setting and clearing the _PAGE_ACCESSED bit. If we miss an update, we may miss a flush and the cache may get corrupted. Whether the current code is effectively atomic depends on process control. When CONFIG_FLUSH_PAGE_ACCESSED is set to zero, the page will eventually be flushed when the PTE is cleared or in flush_cache_page_if_present. The _PAGE_ACCESSED bit is not used, so the problem is avoided. The flush method can be selected using the CONFIG_FLUSH_PAGE_ACCESSED define in cache.c. The default is 0. I didn't see a large difference in performance. Signed-off-by: John David Anglin Cc: # v6.6+ Signed-off-by: Helge Deller --- arch/parisc/include/asm/cacheflush.h | 15 +- arch/parisc/include/asm/pgtable.h | 27 +- arch/parisc/kernel/cache.c | 421 +++++++++++++++++---------- 3 files changed, 279 insertions(+), 184 deletions(-) diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h index ba4c05bc24d6..8394718870e1 100644 --- a/arch/parisc/include/asm/cacheflush.h +++ b/arch/parisc/include/asm/cacheflush.h @@ -31,18 +31,17 @@ void flush_cache_all_local(void); void flush_cache_all(void); void flush_cache_mm(struct mm_struct *mm); -void flush_kernel_dcache_page_addr(const void *addr); - #define flush_kernel_dcache_range(start,size) \ flush_kernel_dcache_range_asm((start), (start)+(size)); +/* The only way to flush a vmap range is to flush whole cache */ #define ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE 1 void flush_kernel_vmap_range(void *vaddr, int size); void invalidate_kernel_vmap_range(void *vaddr, int size); -#define flush_cache_vmap(start, end) flush_cache_all() +void flush_cache_vmap(unsigned long start, unsigned long end); #define flush_cache_vmap_early(start, end) do { } while (0) -#define flush_cache_vunmap(start, end) flush_cache_all() +void flush_cache_vunmap(unsigned long start, unsigned long end); void flush_dcache_folio(struct folio *folio); #define flush_dcache_folio flush_dcache_folio @@ -77,17 +76,11 @@ void flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); -/* defined in pacache.S exported in cache.c used by flush_anon_page */ -void flush_dcache_page_asm(unsigned long phys_addr, unsigned long vaddr); - #define ARCH_HAS_FLUSH_ANON_PAGE void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr); #define ARCH_HAS_FLUSH_ON_KUNMAP -static inline void kunmap_flush_on_unmap(const void *addr) -{ - flush_kernel_dcache_page_addr(addr); -} +void kunmap_flush_on_unmap(const void *addr); #endif /* _PARISC_CACHEFLUSH_H */ diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h index 974accac05cd..babf65751e81 100644 --- a/arch/parisc/include/asm/pgtable.h +++ b/arch/parisc/include/asm/pgtable.h @@ -448,14 +448,17 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte) return pte; } +static inline pte_t ptep_get(pte_t *ptep) +{ + return READ_ONCE(*ptep); +} +#define ptep_get ptep_get + static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { pte_t pte; - if (!pte_young(*ptep)) - return 0; - - pte = *ptep; + pte = ptep_get(ptep); if (!pte_young(pte)) { return 0; } @@ -463,17 +466,10 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned return 1; } +int ptep_clear_flush_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep); +pte_t ptep_clear_flush(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep); + struct mm_struct; -static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) -{ - pte_t old_pte; - - old_pte = *ptep; - set_pte(ptep, __pte(0)); - - return old_pte; -} - static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { set_pte(ptep, pte_wrprotect(*ptep)); @@ -511,7 +507,8 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG -#define __HAVE_ARCH_PTEP_GET_AND_CLEAR +#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH +#define __HAVE_ARCH_PTEP_CLEAR_FLUSH #define __HAVE_ARCH_PTEP_SET_WRPROTECT #define __HAVE_ARCH_PTE_SAME diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index 422f3e1e6d9c..483bfafd930c 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -31,20 +32,31 @@ #include #include +#define PTR_PAGE_ALIGN_DOWN(addr) PTR_ALIGN_DOWN(addr, PAGE_SIZE) + +/* + * When nonzero, use _PAGE_ACCESSED bit to try to reduce the number + * of page flushes done flush_cache_page_if_present. There are some + * pros and cons in using this option. It may increase the risk of + * random segmentation faults. + */ +#define CONFIG_FLUSH_PAGE_ACCESSED 0 + int split_tlb __ro_after_init; int dcache_stride __ro_after_init; int icache_stride __ro_after_init; EXPORT_SYMBOL(dcache_stride); +/* Internal implementation in arch/parisc/kernel/pacache.S */ void flush_dcache_page_asm(unsigned long phys_addr, unsigned long vaddr); EXPORT_SYMBOL(flush_dcache_page_asm); void purge_dcache_page_asm(unsigned long phys_addr, unsigned long vaddr); void flush_icache_page_asm(unsigned long phys_addr, unsigned long vaddr); - -/* Internal implementation in arch/parisc/kernel/pacache.S */ void flush_data_cache_local(void *); /* flushes local data-cache only */ void flush_instruction_cache_local(void); /* flushes local code-cache only */ +static void flush_kernel_dcache_page_addr(const void *addr); + /* On some machines (i.e., ones with the Merced bus), there can be * only a single PxTLB broadcast at a time; this must be guaranteed * by software. We need a spinlock around all TLB flushes to ensure @@ -321,6 +333,18 @@ __flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, { if (!static_branch_likely(&parisc_has_cache)) return; + + /* + * The TLB is the engine of coherence on parisc. The CPU is + * entitled to speculate any page with a TLB mapping, so here + * we kill the mapping then flush the page along a special flush + * only alias mapping. This guarantees that the page is no-longer + * in the cache for any process and nor may it be speculatively + * read in (until the user or kernel specifically accesses it, + * of course). + */ + flush_tlb_page(vma, vmaddr); + preempt_disable(); flush_dcache_page_asm(physaddr, vmaddr); if (vma->vm_flags & VM_EXEC) @@ -328,46 +352,44 @@ __flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, preempt_enable(); } -static void flush_user_cache_page(struct vm_area_struct *vma, unsigned long vmaddr) +static void flush_kernel_dcache_page_addr(const void *addr) { - unsigned long flags, space, pgd, prot; -#ifdef CONFIG_TLB_PTLOCK - unsigned long pgd_lock; -#endif + unsigned long vaddr = (unsigned long)addr; + unsigned long flags; - vmaddr &= PAGE_MASK; + /* Purge TLB entry to remove translation on all CPUs */ + purge_tlb_start(flags); + pdtlb(SR_KERNEL, addr); + purge_tlb_end(flags); + /* Use tmpalias flush to prevent data cache move-in */ preempt_disable(); - - /* Set context for flush */ - local_irq_save(flags); - prot = mfctl(8); - space = mfsp(SR_USER); - pgd = mfctl(25); -#ifdef CONFIG_TLB_PTLOCK - pgd_lock = mfctl(28); -#endif - switch_mm_irqs_off(NULL, vma->vm_mm, NULL); - local_irq_restore(flags); - - flush_user_dcache_range_asm(vmaddr, vmaddr + PAGE_SIZE); - if (vma->vm_flags & VM_EXEC) - flush_user_icache_range_asm(vmaddr, vmaddr + PAGE_SIZE); - flush_tlb_page(vma, vmaddr); - - /* Restore previous context */ - local_irq_save(flags); -#ifdef CONFIG_TLB_PTLOCK - mtctl(pgd_lock, 28); -#endif - mtctl(pgd, 25); - mtsp(space, SR_USER); - mtctl(prot, 8); - local_irq_restore(flags); - + flush_dcache_page_asm(__pa(vaddr), vaddr); preempt_enable(); } +static void flush_kernel_icache_page_addr(const void *addr) +{ + unsigned long vaddr = (unsigned long)addr; + unsigned long flags; + + /* Purge TLB entry to remove translation on all CPUs */ + purge_tlb_start(flags); + pdtlb(SR_KERNEL, addr); + purge_tlb_end(flags); + + /* Use tmpalias flush to prevent instruction cache move-in */ + preempt_disable(); + flush_icache_page_asm(__pa(vaddr), vaddr); + preempt_enable(); +} + +void kunmap_flush_on_unmap(const void *addr) +{ + flush_kernel_dcache_page_addr(addr); +} +EXPORT_SYMBOL(kunmap_flush_on_unmap); + void flush_icache_pages(struct vm_area_struct *vma, struct page *page, unsigned int nr) { @@ -375,13 +397,16 @@ void flush_icache_pages(struct vm_area_struct *vma, struct page *page, for (;;) { flush_kernel_dcache_page_addr(kaddr); - flush_kernel_icache_page(kaddr); + flush_kernel_icache_page_addr(kaddr); if (--nr == 0) break; kaddr += PAGE_SIZE; } } +/* + * Walk page directory for MM to find PTEP pointer for address ADDR. + */ static inline pte_t *get_ptep(struct mm_struct *mm, unsigned long addr) { pte_t *ptep = NULL; @@ -410,6 +435,41 @@ static inline bool pte_needs_flush(pte_t pte) == (_PAGE_PRESENT | _PAGE_ACCESSED); } +/* + * Return user physical address. Returns 0 if page is not present. + */ +static inline unsigned long get_upa(struct mm_struct *mm, unsigned long addr) +{ + unsigned long flags, space, pgd, prot, pa; +#ifdef CONFIG_TLB_PTLOCK + unsigned long pgd_lock; +#endif + + /* Save context */ + local_irq_save(flags); + prot = mfctl(8); + space = mfsp(SR_USER); + pgd = mfctl(25); +#ifdef CONFIG_TLB_PTLOCK + pgd_lock = mfctl(28); +#endif + + /* Set context for lpa_user */ + switch_mm_irqs_off(NULL, mm, NULL); + pa = lpa_user(addr); + + /* Restore previous context */ +#ifdef CONFIG_TLB_PTLOCK + mtctl(pgd_lock, 28); +#endif + mtctl(pgd, 25); + mtsp(space, SR_USER); + mtctl(prot, 8); + local_irq_restore(flags); + + return pa; +} + void flush_dcache_folio(struct folio *folio) { struct address_space *mapping = folio_flush_mapping(folio); @@ -458,50 +518,23 @@ void flush_dcache_folio(struct folio *folio) if (addr + nr * PAGE_SIZE > vma->vm_end) nr = (vma->vm_end - addr) / PAGE_SIZE; - if (parisc_requires_coherency()) { - for (i = 0; i < nr; i++) { - pte_t *ptep = get_ptep(vma->vm_mm, - addr + i * PAGE_SIZE); - if (!ptep) - continue; - if (pte_needs_flush(*ptep)) - flush_user_cache_page(vma, - addr + i * PAGE_SIZE); - /* Optimise accesses to the same table? */ - pte_unmap(ptep); - } - } else { - /* - * The TLB is the engine of coherence on parisc: - * The CPU is entitled to speculate any page - * with a TLB mapping, so here we kill the - * mapping then flush the page along a special - * flush only alias mapping. This guarantees that - * the page is no-longer in the cache for any - * process and nor may it be speculatively read - * in (until the user or kernel specifically - * accesses it, of course) - */ - for (i = 0; i < nr; i++) - flush_tlb_page(vma, addr + i * PAGE_SIZE); - if (old_addr == 0 || (old_addr & (SHM_COLOUR - 1)) + if (old_addr == 0 || (old_addr & (SHM_COLOUR - 1)) != (addr & (SHM_COLOUR - 1))) { - for (i = 0; i < nr; i++) - __flush_cache_page(vma, - addr + i * PAGE_SIZE, - (pfn + i) * PAGE_SIZE); - /* - * Software is allowed to have any number - * of private mappings to a page. - */ - if (!(vma->vm_flags & VM_SHARED)) - continue; - if (old_addr) - pr_err("INEQUIVALENT ALIASES 0x%lx and 0x%lx in file %pD\n", - old_addr, addr, vma->vm_file); - if (nr == folio_nr_pages(folio)) - old_addr = addr; - } + for (i = 0; i < nr; i++) + __flush_cache_page(vma, + addr + i * PAGE_SIZE, + (pfn + i) * PAGE_SIZE); + /* + * Software is allowed to have any number + * of private mappings to a page. + */ + if (!(vma->vm_flags & VM_SHARED)) + continue; + if (old_addr) + pr_err("INEQUIVALENT ALIASES 0x%lx and 0x%lx in file %pD\n", + old_addr, addr, vma->vm_file); + if (nr == folio_nr_pages(folio)) + old_addr = addr; } WARN_ON(++count == 4096); } @@ -591,35 +624,28 @@ extern void purge_kernel_dcache_page_asm(unsigned long); extern void clear_user_page_asm(void *, unsigned long); extern void copy_user_page_asm(void *, void *, unsigned long); -void flush_kernel_dcache_page_addr(const void *addr) -{ - unsigned long flags; - - flush_kernel_dcache_page_asm(addr); - purge_tlb_start(flags); - pdtlb(SR_KERNEL, addr); - purge_tlb_end(flags); -} -EXPORT_SYMBOL(flush_kernel_dcache_page_addr); - static void flush_cache_page_if_present(struct vm_area_struct *vma, - unsigned long vmaddr, unsigned long pfn) + unsigned long vmaddr) { +#if CONFIG_FLUSH_PAGE_ACCESSED bool needs_flush = false; - pte_t *ptep; + pte_t *ptep, pte; - /* - * The pte check is racy and sometimes the flush will trigger - * a non-access TLB miss. Hopefully, the page has already been - * flushed. - */ ptep = get_ptep(vma->vm_mm, vmaddr); if (ptep) { - needs_flush = pte_needs_flush(*ptep); + pte = ptep_get(ptep); + needs_flush = pte_needs_flush(pte); pte_unmap(ptep); } if (needs_flush) - flush_cache_page(vma, vmaddr, pfn); + __flush_cache_page(vma, vmaddr, PFN_PHYS(pte_pfn(pte))); +#else + struct mm_struct *mm = vma->vm_mm; + unsigned long physaddr = get_upa(mm, vmaddr); + + if (physaddr) + __flush_cache_page(vma, vmaddr, PAGE_ALIGN_DOWN(physaddr)); +#endif } void copy_user_highpage(struct page *to, struct page *from, @@ -629,7 +655,7 @@ void copy_user_highpage(struct page *to, struct page *from, kfrom = kmap_local_page(from); kto = kmap_local_page(to); - flush_cache_page_if_present(vma, vaddr, page_to_pfn(from)); + __flush_cache_page(vma, vaddr, PFN_PHYS(page_to_pfn(from))); copy_page_asm(kto, kfrom); kunmap_local(kto); kunmap_local(kfrom); @@ -638,16 +664,17 @@ void copy_user_highpage(struct page *to, struct page *from, void copy_to_user_page(struct vm_area_struct *vma, struct page *page, unsigned long user_vaddr, void *dst, void *src, int len) { - flush_cache_page_if_present(vma, user_vaddr, page_to_pfn(page)); + __flush_cache_page(vma, user_vaddr, PFN_PHYS(page_to_pfn(page))); memcpy(dst, src, len); - flush_kernel_dcache_range_asm((unsigned long)dst, (unsigned long)dst + len); + flush_kernel_dcache_page_addr(PTR_PAGE_ALIGN_DOWN(dst)); } void copy_from_user_page(struct vm_area_struct *vma, struct page *page, unsigned long user_vaddr, void *dst, void *src, int len) { - flush_cache_page_if_present(vma, user_vaddr, page_to_pfn(page)); + __flush_cache_page(vma, user_vaddr, PFN_PHYS(page_to_pfn(page))); memcpy(dst, src, len); + flush_kernel_dcache_page_addr(PTR_PAGE_ALIGN_DOWN(src)); } /* __flush_tlb_range() @@ -681,32 +708,10 @@ int __flush_tlb_range(unsigned long sid, unsigned long start, static void flush_cache_pages(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - unsigned long addr, pfn; - pte_t *ptep; + unsigned long addr; - for (addr = start; addr < end; addr += PAGE_SIZE) { - bool needs_flush = false; - /* - * The vma can contain pages that aren't present. Although - * the pte search is expensive, we need the pte to find the - * page pfn and to check whether the page should be flushed. - */ - ptep = get_ptep(vma->vm_mm, addr); - if (ptep) { - needs_flush = pte_needs_flush(*ptep); - pfn = pte_pfn(*ptep); - pte_unmap(ptep); - } - if (needs_flush) { - if (parisc_requires_coherency()) { - flush_user_cache_page(vma, addr); - } else { - if (WARN_ON(!pfn_valid(pfn))) - return; - __flush_cache_page(vma, addr, PFN_PHYS(pfn)); - } - } - } + for (addr = start; addr < end; addr += PAGE_SIZE) + flush_cache_page_if_present(vma, addr); } static inline unsigned long mm_total_size(struct mm_struct *mm) @@ -757,21 +762,19 @@ void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned if (WARN_ON(IS_ENABLED(CONFIG_SMP) && arch_irqs_disabled())) return; flush_tlb_range(vma, start, end); - flush_cache_all(); + if (vma->vm_flags & VM_EXEC) + flush_cache_all(); + else + flush_data_cache(); return; } - flush_cache_pages(vma, start, end); + flush_cache_pages(vma, start & PAGE_MASK, end); } void flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long pfn) { - if (WARN_ON(!pfn_valid(pfn))) - return; - if (parisc_requires_coherency()) - flush_user_cache_page(vma, vmaddr); - else - __flush_cache_page(vma, vmaddr, PFN_PHYS(pfn)); + __flush_cache_page(vma, vmaddr, PFN_PHYS(pfn)); } void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr) @@ -779,34 +782,133 @@ void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned lon if (!PageAnon(page)) return; - if (parisc_requires_coherency()) { - if (vma->vm_flags & VM_SHARED) - flush_data_cache(); - else - flush_user_cache_page(vma, vmaddr); + __flush_cache_page(vma, vmaddr, PFN_PHYS(page_to_pfn(page))); +} + +int ptep_clear_flush_young(struct vm_area_struct *vma, unsigned long addr, + pte_t *ptep) +{ + pte_t pte = ptep_get(ptep); + + if (!pte_young(pte)) + return 0; + set_pte(ptep, pte_mkold(pte)); +#if CONFIG_FLUSH_PAGE_ACCESSED + __flush_cache_page(vma, addr, PFN_PHYS(pte_pfn(pte))); +#endif + return 1; +} + +/* + * After a PTE is cleared, we have no way to flush the cache for + * the physical page. On PA8800 and PA8900 processors, these lines + * can cause random cache corruption. Thus, we must flush the cache + * as well as the TLB when clearing a PTE that's valid. + */ +pte_t ptep_clear_flush(struct vm_area_struct *vma, unsigned long addr, + pte_t *ptep) +{ + struct mm_struct *mm = (vma)->vm_mm; + pte_t pte = ptep_get_and_clear(mm, addr, ptep); + unsigned long pfn = pte_pfn(pte); + + if (pfn_valid(pfn)) + __flush_cache_page(vma, addr, PFN_PHYS(pfn)); + else if (pte_accessible(mm, pte)) + flush_tlb_page(vma, addr); + + return pte; +} + +/* + * The physical address for pages in the ioremap case can be obtained + * from the vm_struct struct. I wasn't able to successfully handle the + * vmalloc and vmap cases. We have an array of struct page pointers in + * the uninitialized vmalloc case but the flush failed using page_to_pfn. + */ +void flush_cache_vmap(unsigned long start, unsigned long end) +{ + unsigned long addr, physaddr; + struct vm_struct *vm; + + /* Prevent cache move-in */ + flush_tlb_kernel_range(start, end); + + if (end - start >= parisc_cache_flush_threshold) { + flush_cache_all(); return; } - flush_tlb_page(vma, vmaddr); - preempt_disable(); - flush_dcache_page_asm(page_to_phys(page), vmaddr); - preempt_enable(); -} + if (WARN_ON_ONCE(!is_vmalloc_addr((void *)start))) { + flush_cache_all(); + return; + } + vm = find_vm_area((void *)start); + if (WARN_ON_ONCE(!vm)) { + flush_cache_all(); + return; + } + + /* The physical addresses of IOREMAP regions are contiguous */ + if (vm->flags & VM_IOREMAP) { + physaddr = vm->phys_addr; + for (addr = start; addr < end; addr += PAGE_SIZE) { + preempt_disable(); + flush_dcache_page_asm(physaddr, start); + flush_icache_page_asm(physaddr, start); + preempt_enable(); + physaddr += PAGE_SIZE; + } + return; + } + + flush_cache_all(); +} +EXPORT_SYMBOL(flush_cache_vmap); + +/* + * The vm_struct has been retired and the page table is set up. The + * last page in the range is a guard page. Its physical address can't + * be determined using lpa, so there is no way to flush the range + * using flush_dcache_page_asm. + */ +void flush_cache_vunmap(unsigned long start, unsigned long end) +{ + /* Prevent cache move-in */ + flush_tlb_kernel_range(start, end); + flush_data_cache(); +} +EXPORT_SYMBOL(flush_cache_vunmap); + +/* + * On systems with PA8800/PA8900 processors, there is no way to flush + * a vmap range other than using the architected loop to flush the + * entire cache. The page directory is not set up, so we can't use + * fdc, etc. FDCE/FICE don't work to flush a portion of the cache. + * L2 is physically indexed but FDCE/FICE instructions in virtual + * mode output their virtual address on the core bus, not their + * real address. As a result, the L2 cache index formed from the + * virtual address will most likely not be the same as the L2 index + * formed from the real address. + */ void flush_kernel_vmap_range(void *vaddr, int size) { unsigned long start = (unsigned long)vaddr; unsigned long end = start + size; - if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) && - (unsigned long)size >= parisc_cache_flush_threshold) { - flush_tlb_kernel_range(start, end); - flush_data_cache(); + flush_tlb_kernel_range(start, end); + + if (!static_branch_likely(&parisc_has_dcache)) + return; + + /* If interrupts are disabled, we can only do local flush */ + if (WARN_ON(IS_ENABLED(CONFIG_SMP) && arch_irqs_disabled())) { + flush_data_cache_local(NULL); return; } - flush_kernel_dcache_range_asm(start, end); - flush_tlb_kernel_range(start, end); + flush_data_cache(); } EXPORT_SYMBOL(flush_kernel_vmap_range); @@ -818,15 +920,18 @@ void invalidate_kernel_vmap_range(void *vaddr, int size) /* Ensure DMA is complete */ asm_syncdma(); - if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) && - (unsigned long)size >= parisc_cache_flush_threshold) { - flush_tlb_kernel_range(start, end); - flush_data_cache(); + flush_tlb_kernel_range(start, end); + + if (!static_branch_likely(&parisc_has_dcache)) + return; + + /* If interrupts are disabled, we can only do local flush */ + if (WARN_ON(IS_ENABLED(CONFIG_SMP) && arch_irqs_disabled())) { + flush_data_cache_local(NULL); return; } - purge_kernel_dcache_range_asm(start, end); - flush_tlb_kernel_range(start, end); + flush_data_cache(); } EXPORT_SYMBOL(invalidate_kernel_vmap_range); From 52912ca87e2b810e5acdcdc452593d30c9187d8f Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 7 Jun 2024 10:25:07 +0900 Subject: [PATCH 235/778] scsi: core: Disable CDL by default For SCSI devices supporting the Command Duration Limits feature set, the user can enable/disable this feature use through the sysfs device attribute "cdl_enable". This attribute modification triggers a call to scsi_cdl_enable() to enable and disable the feature for ATA devices and set the scsi device cdl_enable field to the user provided bool value. For SCSI devices supporting CDL, the feature set is always enabled and scsi_cdl_enable() is reduced to setting the cdl_enable field. However, for ATA devices, a drive may spin-up with the CDL feature enabled by default. But the SCSI device cdl_enable field is always initialized to false (CDL disabled), regardless of the actual device CDL feature state. For ATA devices managed by libata (or libsas), libata-core always disables the CDL feature set when the device is attached, thus syncing the state of the CDL feature on the device and of the SCSI device cdl_enable field. However, for ATA devices connected to a SAS HBA, the CDL feature is not disabled on scan for ATA devices that have this feature enabled by default, leading to an inconsistent state of the feature on the device with the SCSI device cdl_enable field. Avoid this inconsistency by adding a call to scsi_cdl_enable() in scsi_cdl_check() to make sure that the device-side state of the CDL feature set always matches the scsi device cdl_enable field state. This implies that CDL will always be disabled for ATA devices connected to SAS HBAs, which is consistent with libata/libsas initialization of the device. Reported-by: Scott McCoy Fixes: 1b22cfb14142 ("scsi: core: Allow enabling and disabling command duration limits") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20240607012507.111488-1-dlemoal@kernel.org Reviewed-by: Niklas Cassel Reviewed-by: Igor Pylypiv Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index f0464db3f9de..ee69bd35889d 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -673,6 +673,13 @@ void scsi_cdl_check(struct scsi_device *sdev) sdev->use_10_for_rw = 0; sdev->cdl_supported = 1; + + /* + * If the device supports CDL, make sure that the current drive + * feature status is consistent with the user controlled + * cdl_enable state. + */ + scsi_cdl_enable(sdev, sdev->cdl_enable); } else { sdev->cdl_supported = 0; } From 77691af484e28af7a692e511b9ed5ca63012ec6e Mon Sep 17 00:00:00 2001 From: Ziqi Chen Date: Fri, 7 Jun 2024 18:06:23 +0800 Subject: [PATCH 236/778] scsi: ufs: core: Quiesce request queues before checking pending cmds In ufshcd_clock_scaling_prepare(), after SCSI layer is blocked, ufshcd_pending_cmds() is called to check whether there are pending transactions or not. And only if there are no pending transactions can we proceed to kickstart the clock scaling sequence. ufshcd_pending_cmds() traverses over all SCSI devices and calls sbitmap_weight() on their budget_map. sbitmap_weight() can be broken down to three steps: 1. Calculate the nr outstanding bits set in the 'word' bitmap. 2. Calculate the nr outstanding bits set in the 'cleared' bitmap. 3. Subtract the result from step 1 by the result from step 2. This can lead to a race condition as outlined below: Assume there is one pending transaction in the request queue of one SCSI device, say sda, and the budget token of this request is 0, the 'word' is 0x1 and the 'cleared' is 0x0. 1. When step 1 executes, it gets the result as 1. 2. Before step 2 executes, block layer tries to dispatch a new request to sda. Since the SCSI layer is blocked, the request cannot pass through SCSI but the block layer would do budget_get() and budget_put() to sda's budget map regardless, so the 'word' has become 0x3 and 'cleared' has become 0x2 (assume the new request got budget token 1). 3. When step 2 executes, it gets the result as 1. 4. When step 3 executes, it gets the result as 0, meaning there is no pending transactions, which is wrong. Thread A Thread B ufshcd_pending_cmds() __blk_mq_sched_dispatch_requests() | | sbitmap_weight(word) | | scsi_mq_get_budget() | | | scsi_mq_put_budget() | | sbitmap_weight(cleared) ... When this race condition happens, the clock scaling sequence is started with transactions still in flight, leading to subsequent hibernate enter failure, broken link, task abort and back to back error recovery. Fix this race condition by quiescing the request queues before calling ufshcd_pending_cmds() so that block layer won't touch the budget map when ufshcd_pending_cmds() is working on it. In addition, remove the SCSI layer blocking/unblocking to reduce redundancies and latencies. Fixes: 8d077ede48c1 ("scsi: ufs: Optimize the command queueing code") Co-developed-by: Can Guo Signed-off-by: Can Guo Signed-off-by: Ziqi Chen Link: https://lore.kernel.org/r/1717754818-39863-1-git-send-email-quic_ziqichen@quicinc.com Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 0cf07194bbe8..e5e9da61f15d 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -1366,7 +1366,7 @@ static int ufshcd_clock_scaling_prepare(struct ufs_hba *hba, u64 timeout_us) * make sure that there are no outstanding requests when * clock scaling is in progress */ - ufshcd_scsi_block_requests(hba); + blk_mq_quiesce_tagset(&hba->host->tag_set); mutex_lock(&hba->wb_mutex); down_write(&hba->clk_scaling_lock); @@ -1375,7 +1375,7 @@ static int ufshcd_clock_scaling_prepare(struct ufs_hba *hba, u64 timeout_us) ret = -EBUSY; up_write(&hba->clk_scaling_lock); mutex_unlock(&hba->wb_mutex); - ufshcd_scsi_unblock_requests(hba); + blk_mq_unquiesce_tagset(&hba->host->tag_set); goto out; } @@ -1396,7 +1396,7 @@ static void ufshcd_clock_scaling_unprepare(struct ufs_hba *hba, int err, bool sc mutex_unlock(&hba->wb_mutex); - ufshcd_scsi_unblock_requests(hba); + blk_mq_unquiesce_tagset(&hba->host->tag_set); ufshcd_release(hba); } From 90e6f08915ec6efe46570420412a65050ec826b2 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 11 Jun 2024 17:34:35 +0900 Subject: [PATCH 237/778] scsi: mpi3mr: Fix ATA NCQ priority support The function mpi3mr_qcmd() of the mpi3mr driver is able to indicate to the HBA if a read or write command directed at an ATA device should be translated to an NCQ read/write command with the high prioiryt bit set when the request uses the RT priority class and the user has enabled NCQ priority through sysfs. However, unlike the mpt3sas driver, the mpi3mr driver does not define the sas_ncq_prio_supported and sas_ncq_prio_enable sysfs attributes, so the ncq_prio_enable field of struct mpi3mr_sdev_priv_data is never actually set and NCQ Priority cannot ever be used. Fix this by defining these missing atributes to allow a user to check if an ATA device supports NCQ priority and to enable/disable the use of NCQ priority. To do this, lift the function scsih_ncq_prio_supp() out of the mpt3sas driver and make it the generic SCSI SAS transport function sas_ata_ncq_prio_supported(). Nothing in that function is hardware specific, so this function can be used in both the mpt3sas driver and the mpi3mr driver. Reported-by: Scott McCoy Fixes: 023ab2a9b4ed ("scsi: mpi3mr: Add support for queue command processing") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20240611083435.92961-1-dlemoal@kernel.org Reviewed-by: Niklas Cassel Signed-off-by: Martin K. Petersen --- drivers/scsi/mpi3mr/mpi3mr_app.c | 62 ++++++++++++++++++++++++++++ drivers/scsi/mpt3sas/mpt3sas_base.h | 3 -- drivers/scsi/mpt3sas/mpt3sas_ctl.c | 4 +- drivers/scsi/mpt3sas/mpt3sas_scsih.c | 23 ----------- drivers/scsi/scsi_transport_sas.c | 23 +++++++++++ include/scsi/scsi_transport_sas.h | 2 + 6 files changed, 89 insertions(+), 28 deletions(-) diff --git a/drivers/scsi/mpi3mr/mpi3mr_app.c b/drivers/scsi/mpi3mr/mpi3mr_app.c index 1638109a68a0..cd261b48eb46 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_app.c +++ b/drivers/scsi/mpi3mr/mpi3mr_app.c @@ -2163,10 +2163,72 @@ persistent_id_show(struct device *dev, struct device_attribute *attr, } static DEVICE_ATTR_RO(persistent_id); +/** + * sas_ncq_prio_supported_show - Indicate if device supports NCQ priority + * @dev: pointer to embedded device + * @attr: sas_ncq_prio_supported attribute descriptor + * @buf: the buffer returned + * + * A sysfs 'read-only' sdev attribute, only works with SATA devices + */ +static ssize_t +sas_ncq_prio_supported_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct scsi_device *sdev = to_scsi_device(dev); + + return sysfs_emit(buf, "%d\n", sas_ata_ncq_prio_supported(sdev)); +} +static DEVICE_ATTR_RO(sas_ncq_prio_supported); + +/** + * sas_ncq_prio_enable_show - send prioritized io commands to device + * @dev: pointer to embedded device + * @attr: sas_ncq_prio_enable attribute descriptor + * @buf: the buffer returned + * + * A sysfs 'read/write' sdev attribute, only works with SATA devices + */ +static ssize_t +sas_ncq_prio_enable_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct scsi_device *sdev = to_scsi_device(dev); + struct mpi3mr_sdev_priv_data *sdev_priv_data = sdev->hostdata; + + if (!sdev_priv_data) + return 0; + + return sysfs_emit(buf, "%d\n", sdev_priv_data->ncq_prio_enable); +} + +static ssize_t +sas_ncq_prio_enable_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct scsi_device *sdev = to_scsi_device(dev); + struct mpi3mr_sdev_priv_data *sdev_priv_data = sdev->hostdata; + bool ncq_prio_enable = 0; + + if (kstrtobool(buf, &ncq_prio_enable)) + return -EINVAL; + + if (!sas_ata_ncq_prio_supported(sdev)) + return -EINVAL; + + sdev_priv_data->ncq_prio_enable = ncq_prio_enable; + + return strlen(buf); +} +static DEVICE_ATTR_RW(sas_ncq_prio_enable); + static struct attribute *mpi3mr_dev_attrs[] = { &dev_attr_sas_address.attr, &dev_attr_device_handle.attr, &dev_attr_persistent_id.attr, + &dev_attr_sas_ncq_prio_supported.attr, + &dev_attr_sas_ncq_prio_enable.attr, NULL, }; diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h index bf100a4ebfc3..fe1e96fda284 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.h +++ b/drivers/scsi/mpt3sas/mpt3sas_base.h @@ -2048,9 +2048,6 @@ void mpt3sas_setup_direct_io(struct MPT3SAS_ADAPTER *ioc, struct scsi_cmnd *scmd, struct _raid_device *raid_device, Mpi25SCSIIORequest_t *mpi_request); -/* NCQ Prio Handling Check */ -bool scsih_ncq_prio_supp(struct scsi_device *sdev); - void mpt3sas_setup_debugfs(struct MPT3SAS_ADAPTER *ioc); void mpt3sas_destroy_debugfs(struct MPT3SAS_ADAPTER *ioc); void mpt3sas_init_debugfs(void); diff --git a/drivers/scsi/mpt3sas/mpt3sas_ctl.c b/drivers/scsi/mpt3sas/mpt3sas_ctl.c index 1c9fd26195b8..87784c96249a 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_ctl.c +++ b/drivers/scsi/mpt3sas/mpt3sas_ctl.c @@ -4088,7 +4088,7 @@ sas_ncq_prio_supported_show(struct device *dev, { struct scsi_device *sdev = to_scsi_device(dev); - return sysfs_emit(buf, "%d\n", scsih_ncq_prio_supp(sdev)); + return sysfs_emit(buf, "%d\n", sas_ata_ncq_prio_supported(sdev)); } static DEVICE_ATTR_RO(sas_ncq_prio_supported); @@ -4123,7 +4123,7 @@ sas_ncq_prio_enable_store(struct device *dev, if (kstrtobool(buf, &ncq_prio_enable)) return -EINVAL; - if (!scsih_ncq_prio_supp(sdev)) + if (!sas_ata_ncq_prio_supported(sdev)) return -EINVAL; sas_device_priv_data->ncq_prio_enable = ncq_prio_enable; diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 12d08d8ba538..870ec2cb4af4 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -12571,29 +12571,6 @@ scsih_pci_mmio_enabled(struct pci_dev *pdev) return PCI_ERS_RESULT_RECOVERED; } -/** - * scsih_ncq_prio_supp - Check for NCQ command priority support - * @sdev: scsi device struct - * - * This is called when a user indicates they would like to enable - * ncq command priorities. This works only on SATA devices. - */ -bool scsih_ncq_prio_supp(struct scsi_device *sdev) -{ - struct scsi_vpd *vpd; - bool ncq_prio_supp = false; - - rcu_read_lock(); - vpd = rcu_dereference(sdev->vpd_pg89); - if (!vpd || vpd->len < 214) - goto out; - - ncq_prio_supp = (vpd->data[213] >> 4) & 1; -out: - rcu_read_unlock(); - - return ncq_prio_supp; -} /* * The pci device ids are defined in mpi/mpi2_cnfg.h. */ diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c index 424a89513814..4e33f1661e4c 100644 --- a/drivers/scsi/scsi_transport_sas.c +++ b/drivers/scsi/scsi_transport_sas.c @@ -416,6 +416,29 @@ unsigned int sas_is_tlr_enabled(struct scsi_device *sdev) } EXPORT_SYMBOL_GPL(sas_is_tlr_enabled); +/** + * sas_ata_ncq_prio_supported - Check for ATA NCQ command priority support + * @sdev: SCSI device + * + * Check if an ATA device supports NCQ priority using VPD page 89h (ATA + * Information). Since this VPD page is implemented only for ATA devices, + * this function always returns false for SCSI devices. + */ +bool sas_ata_ncq_prio_supported(struct scsi_device *sdev) +{ + struct scsi_vpd *vpd; + bool ncq_prio_supported = false; + + rcu_read_lock(); + vpd = rcu_dereference(sdev->vpd_pg89); + if (vpd && vpd->len >= 214) + ncq_prio_supported = (vpd->data[213] >> 4) & 1; + rcu_read_unlock(); + + return ncq_prio_supported; +} +EXPORT_SYMBOL_GPL(sas_ata_ncq_prio_supported); + /* * SAS Phy attributes */ diff --git a/include/scsi/scsi_transport_sas.h b/include/scsi/scsi_transport_sas.h index 0e75b9277c8c..e3b6ce3cbf88 100644 --- a/include/scsi/scsi_transport_sas.h +++ b/include/scsi/scsi_transport_sas.h @@ -200,6 +200,8 @@ unsigned int sas_is_tlr_enabled(struct scsi_device *); void sas_disable_tlr(struct scsi_device *); void sas_enable_tlr(struct scsi_device *); +bool sas_ata_ncq_prio_supported(struct scsi_device *sdev); + extern struct sas_rphy *sas_end_device_alloc(struct sas_port *); extern struct sas_rphy *sas_expander_alloc(struct sas_port *, enum sas_device_type); void sas_rphy_free(struct sas_rphy *); From 144ba8580bcb82b2686c3d1a043299d844b9a682 Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Mon, 10 Jun 2024 10:34:26 +0200 Subject: [PATCH 238/778] net: pse-pd: Use EOPNOTSUPP error code instead of ENOTSUPP ENOTSUPP is not a SUSV4 error code, prefer EOPNOTSUPP as reported by checkpatch script. Fixes: 18ff0bcda6d1 ("ethtool: add interface to interact with Ethernet Power Equipment") Reviewed-by: Andrew Lunn Acked-by: Oleksij Rempel Signed-off-by: Kory Maincent Link: https://lore.kernel.org/r/20240610083426.740660-1-kory.maincent@bootlin.com Signed-off-by: Jakub Kicinski --- include/linux/pse-pd/pse.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/pse-pd/pse.h b/include/linux/pse-pd/pse.h index 6d07c95dabb9..6eec24ffa866 100644 --- a/include/linux/pse-pd/pse.h +++ b/include/linux/pse-pd/pse.h @@ -167,14 +167,14 @@ static inline int pse_ethtool_get_status(struct pse_control *psec, struct netlink_ext_ack *extack, struct pse_control_status *status) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static inline int pse_ethtool_set_config(struct pse_control *psec, struct netlink_ext_ack *extack, const struct pse_control_config *config) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static inline bool pse_has_podl(struct pse_control *psec) From 1b9f756344416e02b41439bf2324b26aa25e141c Mon Sep 17 00:00:00 2001 From: Joshua Washington Date: Mon, 10 Jun 2024 15:57:18 -0700 Subject: [PATCH 239/778] gve: ignore nonrelevant GSO type bits when processing TSO headers TSO currently fails when the skb's gso_type field has more than one bit set. TSO packets can be passed from userspace using PF_PACKET, TUNTAP and a few others, using virtio_net_hdr (e.g., PACKET_VNET_HDR). This includes virtualization, such as QEMU, a real use-case. The gso_type and gso_size fields as passed from userspace in virtio_net_hdr are not trusted blindly by the kernel. It adds gso_type |= SKB_GSO_DODGY to force the packet to enter the software GSO stack for verification. This issue might similarly come up when the CWR bit is set in the TCP header for congestion control, causing the SKB_GSO_TCP_ECN gso_type bit to be set. Fixes: a57e5de476be ("gve: DQO: Add TX path") Signed-off-by: Joshua Washington Reviewed-by: Praveen Kaligineedi Reviewed-by: Harshitha Ramamurthy Reviewed-by: Willem de Bruijn Suggested-by: Eric Dumazet Acked-by: Andrei Vagin v2 - Remove unnecessary comments, remove line break between fixes tag and signoffs. v3 - Add back unrelated empty line removal. Link: https://lore.kernel.org/r/20240610225729.2985343-1-joshwash@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/google/gve/gve_tx_dqo.c | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/google/gve/gve_tx_dqo.c b/drivers/net/ethernet/google/gve/gve_tx_dqo.c index fe1b26a4d736..0b3cca3fc792 100644 --- a/drivers/net/ethernet/google/gve/gve_tx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_tx_dqo.c @@ -555,28 +555,18 @@ static int gve_prep_tso(struct sk_buff *skb) if (unlikely(skb_shinfo(skb)->gso_size < GVE_TX_MIN_TSO_MSS_DQO)) return -1; + if (!(skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) + return -EINVAL; + /* Needed because we will modify header. */ err = skb_cow_head(skb, 0); if (err < 0) return err; tcp = tcp_hdr(skb); - - /* Remove payload length from checksum. */ paylen = skb->len - skb_transport_offset(skb); - - switch (skb_shinfo(skb)->gso_type) { - case SKB_GSO_TCPV4: - case SKB_GSO_TCPV6: - csum_replace_by_diff(&tcp->check, - (__force __wsum)htonl(paylen)); - - /* Compute length of segmentation header. */ - header_len = skb_tcp_all_headers(skb); - break; - default: - return -EINVAL; - } + csum_replace_by_diff(&tcp->check, (__force __wsum)htonl(paylen)); + header_len = skb_tcp_all_headers(skb); if (unlikely(header_len > GVE_TX_MAX_HDR_SIZE_DQO)) return -EINVAL; From be27b896529787e23a35ae4befb6337ce73fcca0 Mon Sep 17 00:00:00 2001 From: Xiaolei Wang Date: Sat, 8 Jun 2024 22:35:24 +0800 Subject: [PATCH 240/778] net: stmmac: replace priv->speed with the portTransmitRate from the tc-cbs parameters The current cbs parameter depends on speed after uplinking, which is not needed and will report a configuration error if the port is not initially connected. The UAPI exposed by tc-cbs requires userspace to recalculate the send slope anyway, because the formula depends on port_transmit_rate (see man tc-cbs), which is not an invariant from tc's perspective. Therefore, we use offload->sendslope and offload->idleslope to derive the original port_transmit_rate from the CBS formula. Fixes: 1f705bc61aee ("net: stmmac: Add support for CBS QDISC") Signed-off-by: Xiaolei Wang Reviewed-by: Wojciech Drewek Reviewed-by: Vladimir Oltean Link: https://lore.kernel.org/r/20240608143524.2065736-1-xiaolei.wang@windriver.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/stmmac_tc.c | 25 ++++++++----------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index 222540b55480..1562fbdd0a04 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -343,10 +343,11 @@ static int tc_setup_cbs(struct stmmac_priv *priv, struct tc_cbs_qopt_offload *qopt) { u32 tx_queues_count = priv->plat->tx_queues_to_use; + s64 port_transmit_rate_kbps; u32 queue = qopt->queue; - u32 ptr, speed_div; u32 mode_to_use; u64 value; + u32 ptr; int ret; /* Queue 0 is not AVB capable */ @@ -355,30 +356,26 @@ static int tc_setup_cbs(struct stmmac_priv *priv, if (!priv->dma_cap.av) return -EOPNOTSUPP; + port_transmit_rate_kbps = qopt->idleslope - qopt->sendslope; + /* Port Transmit Rate and Speed Divider */ - switch (priv->speed) { + switch (div_s64(port_transmit_rate_kbps, 1000)) { case SPEED_10000: - ptr = 32; - speed_div = 10000000; - break; case SPEED_5000: ptr = 32; - speed_div = 5000000; break; case SPEED_2500: - ptr = 8; - speed_div = 2500000; - break; case SPEED_1000: ptr = 8; - speed_div = 1000000; break; case SPEED_100: ptr = 4; - speed_div = 100000; break; default: - return -EOPNOTSUPP; + netdev_err(priv->dev, + "Invalid portTransmitRate %lld (idleSlope - sendSlope)\n", + port_transmit_rate_kbps); + return -EINVAL; } mode_to_use = priv->plat->tx_queues_cfg[queue].mode_to_use; @@ -398,10 +395,10 @@ static int tc_setup_cbs(struct stmmac_priv *priv, } /* Final adjustments for HW */ - value = div_s64(qopt->idleslope * 1024ll * ptr, speed_div); + value = div_s64(qopt->idleslope * 1024ll * ptr, port_transmit_rate_kbps); priv->plat->tx_queues_cfg[queue].idle_slope = value & GENMASK(31, 0); - value = div_s64(-qopt->sendslope * 1024ll * ptr, speed_div); + value = div_s64(-qopt->sendslope * 1024ll * ptr, port_transmit_rate_kbps); priv->plat->tx_queues_cfg[queue].send_slope = value & GENMASK(31, 0); value = qopt->hicredit * 1024ll * 8; From 1cdeca6a7264021e20157de0baf7880ff0ced822 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Mon, 10 Jun 2024 23:06:19 +0900 Subject: [PATCH 241/778] ksmbd: move leading slash check to smb2_get_name() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the directory name in the root of the share starts with character like 镜(0x955c) or Ṝ(0x1e5c), it (and anything inside) cannot be accessed. The leading slash check must be checked after converting unicode to nls string. Cc: stable@vger.kernel.org Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index b6c5a8ea3887..f79d06d2d655 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -630,6 +630,12 @@ smb2_get_name(const char *src, const int maxlen, struct nls_table *local_nls) return name; } + if (*name == '\\') { + pr_err("not allow directory name included leading slash\n"); + kfree(name); + return ERR_PTR(-EINVAL); + } + ksmbd_conv_path_to_unix(name); ksmbd_strip_last_slash(name); return name; @@ -2842,20 +2848,11 @@ int smb2_open(struct ksmbd_work *work) } if (req->NameLength) { - if ((req->CreateOptions & FILE_DIRECTORY_FILE_LE) && - *(char *)req->Buffer == '\\') { - pr_err("not allow directory name included leading slash\n"); - rc = -EINVAL; - goto err_out2; - } - name = smb2_get_name((char *)req + le16_to_cpu(req->NameOffset), le16_to_cpu(req->NameLength), work->conn->local_nls); if (IS_ERR(name)) { rc = PTR_ERR(name); - if (rc != -ENOMEM) - rc = -ENOENT; name = NULL; goto err_out2; } From 2bfc4214c69c62da13a9da8e3c3db5539da2ccd3 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Tue, 11 Jun 2024 23:27:27 +0900 Subject: [PATCH 242/778] ksmbd: fix missing use of get_write in in smb2_set_ea() Fix an issue where get_write is not used in smb2_set_ea(). Fixes: 6fc0a265e1b9 ("ksmbd: fix potential circular locking issue in smb2_set_ea()") Cc: stable@vger.kernel.org Reported-by: Wang Zhaolong Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 7 ++++--- fs/smb/server/vfs.c | 17 +++++++++++------ fs/smb/server/vfs.h | 3 ++- fs/smb/server/vfs_cache.c | 3 ++- 4 files changed, 19 insertions(+), 11 deletions(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index f79d06d2d655..e7e07891781b 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -2367,7 +2367,8 @@ static int smb2_set_ea(struct smb2_ea_info *eabuf, unsigned int buf_len, if (rc > 0) { rc = ksmbd_vfs_remove_xattr(idmap, path, - attr_name); + attr_name, + get_write); if (rc < 0) { ksmbd_debug(SMB, @@ -2382,7 +2383,7 @@ static int smb2_set_ea(struct smb2_ea_info *eabuf, unsigned int buf_len, } else { rc = ksmbd_vfs_setxattr(idmap, path, attr_name, value, le16_to_cpu(eabuf->EaValueLength), - 0, true); + 0, get_write); if (rc < 0) { ksmbd_debug(SMB, "ksmbd_vfs_setxattr is failed(%d)\n", @@ -2474,7 +2475,7 @@ static int smb2_remove_smb_xattrs(const struct path *path) !strncmp(&name[XATTR_USER_PREFIX_LEN], STREAM_PREFIX, STREAM_PREFIX_LEN)) { err = ksmbd_vfs_remove_xattr(idmap, path, - name); + name, true); if (err) ksmbd_debug(SMB, "remove xattr failed : %s\n", name); diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index 51b1b0bed616..9e859ba010cf 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -1058,16 +1058,21 @@ int ksmbd_vfs_fqar_lseek(struct ksmbd_file *fp, loff_t start, loff_t length, } int ksmbd_vfs_remove_xattr(struct mnt_idmap *idmap, - const struct path *path, char *attr_name) + const struct path *path, char *attr_name, + bool get_write) { int err; - err = mnt_want_write(path->mnt); - if (err) - return err; + if (get_write == true) { + err = mnt_want_write(path->mnt); + if (err) + return err; + } err = vfs_removexattr(idmap, path->dentry, attr_name); - mnt_drop_write(path->mnt); + + if (get_write == true) + mnt_drop_write(path->mnt); return err; } @@ -1380,7 +1385,7 @@ int ksmbd_vfs_remove_sd_xattrs(struct mnt_idmap *idmap, const struct path *path) ksmbd_debug(SMB, "%s, len %zd\n", name, strlen(name)); if (!strncmp(name, XATTR_NAME_SD, XATTR_NAME_SD_LEN)) { - err = ksmbd_vfs_remove_xattr(idmap, path, name); + err = ksmbd_vfs_remove_xattr(idmap, path, name, true); if (err) ksmbd_debug(SMB, "remove xattr failed : %s\n", name); } diff --git a/fs/smb/server/vfs.h b/fs/smb/server/vfs.h index cfe1c8092f23..cb76f4b5bafe 100644 --- a/fs/smb/server/vfs.h +++ b/fs/smb/server/vfs.h @@ -114,7 +114,8 @@ int ksmbd_vfs_setxattr(struct mnt_idmap *idmap, int ksmbd_vfs_xattr_stream_name(char *stream_name, char **xattr_stream_name, size_t *xattr_stream_name_size, int s_type); int ksmbd_vfs_remove_xattr(struct mnt_idmap *idmap, - const struct path *path, char *attr_name); + const struct path *path, char *attr_name, + bool get_write); int ksmbd_vfs_kern_path_locked(struct ksmbd_work *work, char *name, unsigned int flags, struct path *parent_path, struct path *path, bool caseless); diff --git a/fs/smb/server/vfs_cache.c b/fs/smb/server/vfs_cache.c index 6cb599cd287e..8b2e37c8716e 100644 --- a/fs/smb/server/vfs_cache.c +++ b/fs/smb/server/vfs_cache.c @@ -254,7 +254,8 @@ static void __ksmbd_inode_close(struct ksmbd_file *fp) ci->m_flags &= ~S_DEL_ON_CLS_STREAM; err = ksmbd_vfs_remove_xattr(file_mnt_idmap(filp), &filp->f_path, - fp->stream.name); + fp->stream.name, + true); if (err) pr_err("remove xattr failed : %s\n", fp->stream.name); From f0260589b439e2637ad54a2b25f00a516ef28a57 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Tue, 11 Jun 2024 15:06:07 +0300 Subject: [PATCH 243/778] xhci: Set correct transferred length for cancelled bulk transfers The transferred length is set incorrectly for cancelled bulk transfer TDs in case the bulk transfer ring stops on the last transfer block with a 'Stop - Length Invalid' completion code. length essentially ends up being set to the requested length: urb->actual_length = urb->transfer_buffer_length Length for 'Stop - Length Invalid' cases should be the sum of all TRB transfer block lengths up to the one the ring stopped on, _excluding_ the one stopped on. Fix this by always summing up TRB lengths for 'Stop - Length Invalid' bulk cases. This issue was discovered by Alan Stern while debugging https://bugzilla.kernel.org/show_bug.cgi?id=218890, but does not solve that bug. Issue is older than 4.10 kernel but fix won't apply to those due to major reworks in that area. Tested-by: Pierre Tomon Cc: stable@vger.kernel.org # v4.10+ Cc: Alan Stern Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20240611120610.3264502-2-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 9e90d2952760..1db61bb2b9b5 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2524,9 +2524,8 @@ static int process_bulk_intr_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, goto finish_td; case COMP_STOPPED_LENGTH_INVALID: /* stopped on ep trb with invalid length, exclude it */ - ep_trb_len = 0; - remaining = 0; - break; + td->urb->actual_length = sum_trb_lengths(xhci, ep_ring, ep_trb); + goto finish_td; case COMP_USB_TRANSACTION_ERROR: if (xhci->quirks & XHCI_NO_SOFT_RETRY || (ep->err_count++ > MAX_SOFT_RETRY) || From 17bd54555c2aaecfdb38e2734149f684a73fa584 Mon Sep 17 00:00:00 2001 From: Kuangyi Chiang Date: Tue, 11 Jun 2024 15:06:08 +0300 Subject: [PATCH 244/778] xhci: Apply reset resume quirk to Etron EJ188 xHCI host As described in commit c877b3b2ad5c ("xhci: Add reset on resume quirk for asrock p67 host"), EJ188 have the same issue as EJ168, where completely dies on resume. So apply XHCI_RESET_ON_RESUME quirk to EJ188 as well. Cc: stable@vger.kernel.org Signed-off-by: Kuangyi Chiang Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20240611120610.3264502-3-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index c040d816e626..dd8256e4e02a 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -36,6 +36,7 @@ #define PCI_VENDOR_ID_ETRON 0x1b6f #define PCI_DEVICE_ID_EJ168 0x7023 +#define PCI_DEVICE_ID_EJ188 0x7052 #define PCI_DEVICE_ID_INTEL_LYNXPOINT_XHCI 0x8c31 #define PCI_DEVICE_ID_INTEL_LYNXPOINT_LP_XHCI 0x9c31 @@ -395,6 +396,10 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) xhci->quirks |= XHCI_RESET_ON_RESUME; xhci->quirks |= XHCI_BROKEN_STREAMS; } + if (pdev->vendor == PCI_VENDOR_ID_ETRON && + pdev->device == PCI_DEVICE_ID_EJ188) + xhci->quirks |= XHCI_RESET_ON_RESUME; + if (pdev->vendor == PCI_VENDOR_ID_RENESAS && pdev->device == 0x0014) { xhci->quirks |= XHCI_ZERO_64B_REGS; From 91f7a1524a92c70ffe264db8bdfa075f15bbbeb9 Mon Sep 17 00:00:00 2001 From: Kuangyi Chiang Date: Tue, 11 Jun 2024 15:06:09 +0300 Subject: [PATCH 245/778] xhci: Apply broken streams quirk to Etron EJ188 xHCI host As described in commit 8f873c1ff4ca ("xhci: Blacklist using streams on the Etron EJ168 controller"), EJ188 have the same issue as EJ168, where Streams do not work reliable on EJ188. So apply XHCI_BROKEN_STREAMS quirk to EJ188 as well. Cc: stable@vger.kernel.org Signed-off-by: Kuangyi Chiang Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20240611120610.3264502-4-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index dd8256e4e02a..05881153883e 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -397,8 +397,10 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) xhci->quirks |= XHCI_BROKEN_STREAMS; } if (pdev->vendor == PCI_VENDOR_ID_ETRON && - pdev->device == PCI_DEVICE_ID_EJ188) + pdev->device == PCI_DEVICE_ID_EJ188) { xhci->quirks |= XHCI_RESET_ON_RESUME; + xhci->quirks |= XHCI_BROKEN_STREAMS; + } if (pdev->vendor == PCI_VENDOR_ID_RENESAS && pdev->device == 0x0014) { From 5ceac4402f5d975e5a01c806438eb4e554771577 Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Tue, 11 Jun 2024 15:06:10 +0300 Subject: [PATCH 246/778] xhci: Handle TD clearing for multiple streams case When multiple streams are in use, multiple TDs might be in flight when an endpoint is stopped. We need to issue a Set TR Dequeue Pointer for each, to ensure everything is reset properly and the caches cleared. Change the logic so that any N>1 TDs found active for different streams are deferred until after the first one is processed, calling xhci_invalidate_cancelled_tds() again from xhci_handle_cmd_set_deq() to queue another command until we are done with all of them. Also change the error/"should never happen" paths to ensure we at least clear any affected TDs, even if we can't issue a command to clear the hardware cache, and complain loudly with an xhci_warn() if this ever happens. This problem case dates back to commit e9df17eb1408 ("USB: xhci: Correct assumptions about number of rings per endpoint.") early on in the XHCI driver's life, when stream support was first added. It was then identified but not fixed nor made into a warning in commit 674f8438c121 ("xhci: split handling halted endpoints into two steps"), which added a FIXME comment for the problem case (without materially changing the behavior as far as I can tell, though the new logic made the problem more obvious). Then later, in commit 94f339147fc3 ("xhci: Fix failure to give back some cached cancelled URBs."), it was acknowledged again. [Mathias: commit 94f339147fc3 ("xhci: Fix failure to give back some cached cancelled URBs.") was a targeted regression fix to the previously mentioned patch. Users reported issues with usb stuck after unmounting/disconnecting UAS devices. This rolled back the TD clearing of multiple streams to its original state.] Apparently the commit author was aware of the problem (yet still chose to submit it): It was still mentioned as a FIXME, an xhci_dbg() was added to log the problem condition, and the remaining issue was mentioned in the commit description. The choice of making the log type xhci_dbg() for what is, at this point, a completely unhandled and known broken condition is puzzling and unfortunate, as it guarantees that no actual users would see the log in production, thereby making it nigh undebuggable (indeed, even if you turn on DEBUG, the message doesn't really hint at there being a problem at all). It took me *months* of random xHC crashes to finally find a reliable repro and be able to do a deep dive debug session, which could all have been avoided had this unhandled, broken condition been actually reported with a warning, as it should have been as a bug intentionally left in unfixed (never mind that it shouldn't have been left in at all). > Another fix to solve clearing the caches of all stream rings with > cancelled TDs is needed, but not as urgent. 3 years after that statement and 14 years after the original bug was introduced, I think it's finally time to fix it. And maybe next time let's not leave bugs unfixed (that are actually worse than the original bug), and let's actually get people to review kernel commits please. Fixes xHC crashes and IOMMU faults with UAS devices when handling errors/faults. Easiest repro is to use `hdparm` to mark an early sector (e.g. 1024) on a disk as bad, then `cat /dev/sdX > /dev/null` in a loop. At least in the case of JMicron controllers, the read errors end up having to cancel two TDs (for two queued requests to different streams) and the one that didn't get cleared properly ends up faulting the xHC entirely when it tries to access DMA pages that have since been unmapped, referred to by the stale TDs. This normally happens quickly (after two or three loops). After this fix, I left the `cat` in a loop running overnight and experienced no xHC failures, with all read errors recovered properly. Repro'd and tested on an Apple M1 Mac Mini (dwc3 host). On systems without an IOMMU, this bug would instead silently corrupt freed memory, making this a security bug (even on systems with IOMMUs this could silently corrupt memory belonging to other USB devices on the same controller, so it's still a security bug). Given that the kernel autoprobes partition tables, I'm pretty sure a malicious USB device pretending to be a UAS device and reporting an error with the right timing could deliberately trigger a UAF and write to freed memory, with no user action. [Mathias: Commit message and code comment edit, original at:] https://lore.kernel.org/linux-usb/20240524-xhci-streams-v1-1-6b1f13819bea@marcan.st/ Fixes: e9df17eb1408 ("USB: xhci: Correct assumptions about number of rings per endpoint.") Fixes: 94f339147fc3 ("xhci: Fix failure to give back some cached cancelled URBs.") Fixes: 674f8438c121 ("xhci: split handling halted endpoints into two steps") Cc: stable@vger.kernel.org Cc: security@kernel.org Reviewed-by: Neal Gompa Signed-off-by: Hector Martin Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20240611120610.3264502-5-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 54 ++++++++++++++++++++++++++++-------- drivers/usb/host/xhci.h | 1 + 2 files changed, 44 insertions(+), 11 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 1db61bb2b9b5..fd0cde3d1569 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -1031,13 +1031,27 @@ static int xhci_invalidate_cancelled_tds(struct xhci_virt_ep *ep) break; case TD_DIRTY: /* TD is cached, clear it */ case TD_HALTED: + case TD_CLEARING_CACHE_DEFERRED: + if (cached_td) { + if (cached_td->urb->stream_id != td->urb->stream_id) { + /* Multiple streams case, defer move dq */ + xhci_dbg(xhci, + "Move dq deferred: stream %u URB %p\n", + td->urb->stream_id, td->urb); + td->cancel_status = TD_CLEARING_CACHE_DEFERRED; + break; + } + + /* Should never happen, but clear the TD if it does */ + xhci_warn(xhci, + "Found multiple active URBs %p and %p in stream %u?\n", + td->urb, cached_td->urb, + td->urb->stream_id); + td_to_noop(xhci, ring, cached_td, false); + cached_td->cancel_status = TD_CLEARED; + } + td->cancel_status = TD_CLEARING_CACHE; - if (cached_td) - /* FIXME stream case, several stopped rings */ - xhci_dbg(xhci, - "Move dq past stream %u URB %p instead of stream %u URB %p\n", - td->urb->stream_id, td->urb, - cached_td->urb->stream_id, cached_td->urb); cached_td = td; break; } @@ -1057,10 +1071,16 @@ static int xhci_invalidate_cancelled_tds(struct xhci_virt_ep *ep) if (err) { /* Failed to move past cached td, just set cached TDs to no-op */ list_for_each_entry_safe(td, tmp_td, &ep->cancelled_td_list, cancelled_td_list) { - if (td->cancel_status != TD_CLEARING_CACHE) + /* + * Deferred TDs need to have the deq pointer set after the above command + * completes, so if that failed we just give up on all of them (and + * complain loudly since this could cause issues due to caching). + */ + if (td->cancel_status != TD_CLEARING_CACHE && + td->cancel_status != TD_CLEARING_CACHE_DEFERRED) continue; - xhci_dbg(xhci, "Failed to clear cancelled cached URB %p, mark clear anyway\n", - td->urb); + xhci_warn(xhci, "Failed to clear cancelled cached URB %p, mark clear anyway\n", + td->urb); td_to_noop(xhci, ring, td, false); td->cancel_status = TD_CLEARED; } @@ -1346,6 +1366,7 @@ static void xhci_handle_cmd_set_deq(struct xhci_hcd *xhci, int slot_id, struct xhci_ep_ctx *ep_ctx; struct xhci_slot_ctx *slot_ctx; struct xhci_td *td, *tmp_td; + bool deferred = false; ep_index = TRB_TO_EP_INDEX(le32_to_cpu(trb->generic.field[3])); stream_id = TRB_TO_STREAM_ID(le32_to_cpu(trb->generic.field[2])); @@ -1432,6 +1453,8 @@ static void xhci_handle_cmd_set_deq(struct xhci_hcd *xhci, int slot_id, xhci_dbg(ep->xhci, "%s: Giveback cancelled URB %p TD\n", __func__, td->urb); xhci_td_cleanup(ep->xhci, td, ep_ring, td->status); + } else if (td->cancel_status == TD_CLEARING_CACHE_DEFERRED) { + deferred = true; } else { xhci_dbg(ep->xhci, "%s: Keep cancelled URB %p TD as cancel_status is %d\n", __func__, td->urb, td->cancel_status); @@ -1441,8 +1464,17 @@ cleanup: ep->ep_state &= ~SET_DEQ_PENDING; ep->queued_deq_seg = NULL; ep->queued_deq_ptr = NULL; - /* Restart any rings with pending URBs */ - ring_doorbell_for_active_rings(xhci, slot_id, ep_index); + + if (deferred) { + /* We have more streams to clear */ + xhci_dbg(ep->xhci, "%s: Pending TDs to clear, continuing with invalidation\n", + __func__); + xhci_invalidate_cancelled_tds(ep); + } else { + /* Restart any rings with pending URBs */ + xhci_dbg(ep->xhci, "%s: All TDs cleared, ring doorbell\n", __func__); + ring_doorbell_for_active_rings(xhci, slot_id, ep_index); + } } static void xhci_handle_cmd_reset_ep(struct xhci_hcd *xhci, int slot_id, diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 30415158ed3c..78d014c4d884 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1276,6 +1276,7 @@ enum xhci_cancelled_td_status { TD_DIRTY = 0, TD_HALTED, TD_CLEARING_CACHE, + TD_CLEARING_CACHE_DEFERRED, TD_CLEARED, }; From 0320ca14c6fb68ad19aa72e55a1a21c061b2946b Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Wed, 12 Jun 2024 09:23:13 +0200 Subject: [PATCH 247/778] drm: renesas: shmobile: Call drm_atomic_helper_shutdown() at shutdown time Based on grepping through the source code, this driver appears to be missing a call to drm_atomic_helper_shutdown() at system shutdown time. This is important because drm_atomic_helper_shutdown() will cause panels to get disabled cleanly which may be important for their power sequencing. Future changes will remove any custom powering off in individual panel drivers so the DRM drivers need to start getting this right. The fact that we should call drm_atomic_helper_shutdown() in the case of OS shutdown comes straight out of the kernel doc "driver instance overview" in drm_drv.c. [geert: shmob_drm_remove() already calls drm_atomic_helper_shutdown] Suggested-by: Maxime Ripard Signed-off-by: Douglas Anderson Link: https://lore.kernel.org/r/20230901164111.RFT.15.Iaf638a1d4c8b3c307a6192efabb4cbb06b195f15@changeid [geert: s/drm_helper_force_disable_all/drm_atomic_helper_shutdown/] Signed-off-by: Geert Uytterhoeven Reviewed-by: Laurent Pinchart Reviewed-by: Sui Jingfeng Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/17c6a5a668e5975f871b77fb1fca6711a0799d9e.1718176895.git.geert+renesas@glider.be --- drivers/gpu/drm/renesas/shmobile/shmob_drm_drv.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/renesas/shmobile/shmob_drm_drv.c b/drivers/gpu/drm/renesas/shmobile/shmob_drm_drv.c index e83c3e52251d..0250d5f00bf1 100644 --- a/drivers/gpu/drm/renesas/shmobile/shmob_drm_drv.c +++ b/drivers/gpu/drm/renesas/shmobile/shmob_drm_drv.c @@ -171,6 +171,13 @@ static void shmob_drm_remove(struct platform_device *pdev) drm_kms_helper_poll_fini(ddev); } +static void shmob_drm_shutdown(struct platform_device *pdev) +{ + struct shmob_drm_device *sdev = platform_get_drvdata(pdev); + + drm_atomic_helper_shutdown(&sdev->ddev); +} + static int shmob_drm_probe(struct platform_device *pdev) { struct shmob_drm_platform_data *pdata = pdev->dev.platform_data; @@ -273,6 +280,7 @@ static const struct of_device_id shmob_drm_of_table[] __maybe_unused = { static struct platform_driver shmob_drm_platform_driver = { .probe = shmob_drm_probe, .remove_new = shmob_drm_remove, + .shutdown = shmob_drm_shutdown, .driver = { .name = "shmob-drm", .of_match_table = of_match_ptr(shmob_drm_of_table), From c38896ca6318c2df20bbe6c8e3f633e071fda910 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Tue, 11 Jun 2024 10:27:44 -0700 Subject: [PATCH 248/778] drm/mediatek: Call drm_atomic_helper_shutdown() at shutdown time Based on grepping through the source code this driver appears to be missing a call to drm_atomic_helper_shutdown() at system shutdown time. Among other things, this means that if a panel is in use that it won't be cleanly powered off at system shutdown time. The fact that we should call drm_atomic_helper_shutdown() in the case of OS shutdown/restart comes straight out of the kernel doc "driver instance overview" in drm_drv.c. This driver users the component model and shutdown happens in the base driver. The "drvdata" for this driver will always be valid if shutdown() is called and as of commit 2a073968289d ("drm/atomic-helper: drm_atomic_helper_shutdown(NULL) should be a noop") we don't need to confirm that "drm" is non-NULL. Suggested-by: Maxime Ripard Reviewed-by: Maxime Ripard Reviewed-by: Fei Shao Tested-by: Fei Shao Signed-off-by: Douglas Anderson Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20240611102744.v2.1.I2b014f90afc4729b6ecc7b5ddd1f6dedcea4625b@changeid --- drivers/gpu/drm/mediatek/mtk_drm_drv.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index b5f605751b0a..de811e2265da 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -952,6 +952,13 @@ static void mtk_drm_remove(struct platform_device *pdev) of_node_put(private->comp_node[i]); } +static void mtk_drm_shutdown(struct platform_device *pdev) +{ + struct mtk_drm_private *private = platform_get_drvdata(pdev); + + drm_atomic_helper_shutdown(private->drm); +} + static int mtk_drm_sys_prepare(struct device *dev) { struct mtk_drm_private *private = dev_get_drvdata(dev); @@ -983,6 +990,7 @@ static const struct dev_pm_ops mtk_drm_pm_ops = { static struct platform_driver mtk_drm_platform_driver = { .probe = mtk_drm_probe, .remove_new = mtk_drm_remove, + .shutdown = mtk_drm_shutdown, .driver = { .name = "mediatek-drm", .pm = &mtk_drm_pm_ops, From 0941772342d59e48733131ac3a202fa1a4d832e9 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 11 Jun 2024 18:58:16 +0200 Subject: [PATCH 249/778] wifi: cfg80211: wext: set ssids=NULL for passive scans In nl80211, we always set the ssids of a scan request to NULL when n_ssids==0 (passive scan). Drivers have relied on this behaviour in the past, so we fixed it in 6 GHz scan requests as well, and added a warning so we'd have assurance the API would always be called that way. syzbot found that wext doesn't ensure that, so we reach the check and trigger the warning. Fix the wext code to set the ssids pointer to NULL when there are none. Reported-by: syzbot+cd6135193ba6bb9ad158@syzkaller.appspotmail.com Fixes: f7a8b10bfd61 ("wifi: cfg80211: fix 6 GHz scan request building") Signed-off-by: Johannes Berg --- net/wireless/scan.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 2f2a3163968a..d7485e26f4fc 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -3493,8 +3493,10 @@ int cfg80211_wext_siwscan(struct net_device *dev, memcpy(creq->ssids[0].ssid, wreq->essid, wreq->essid_len); creq->ssids[0].ssid_len = wreq->essid_len; } - if (wreq->scan_type == IW_SCAN_TYPE_PASSIVE) + if (wreq->scan_type == IW_SCAN_TYPE_PASSIVE) { + creq->ssids = NULL; creq->n_ssids = 0; + } } for (i = 0; i < NUM_NL80211_BANDS; i++) From 6ef09cdc5ba0f93826c09d810c141a8d103a80fc Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Fri, 31 May 2024 06:20:10 +0300 Subject: [PATCH 250/778] wifi: cfg80211: wext: add extra SIOCSIWSCAN data check In 'cfg80211_wext_siwscan()', add extra check whether number of channels passed via 'ioctl(sock, SIOCSIWSCAN, ...)' doesn't exceed IW_MAX_FREQUENCIES and reject invalid request with -EINVAL otherwise. Reported-by: syzbot+253cd2d2491df77c93ac@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=253cd2d2491df77c93ac Signed-off-by: Dmitry Antipov Link: https://msgid.link/20240531032010.451295-1-dmantipov@yandex.ru Signed-off-by: Johannes Berg --- net/wireless/scan.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index d7485e26f4fc..0222ede0feb6 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -3416,10 +3416,14 @@ int cfg80211_wext_siwscan(struct net_device *dev, wiphy = &rdev->wiphy; /* Determine number of channels, needed to allocate creq */ - if (wreq && wreq->num_channels) + if (wreq && wreq->num_channels) { + /* Passed from userspace so should be checked */ + if (unlikely(wreq->num_channels > IW_MAX_FREQUENCIES)) + return -EINVAL; n_channels = wreq->num_channels; - else + } else { n_channels = ieee80211_get_num_supported_channels(wiphy); + } creq = kzalloc(sizeof(*creq) + sizeof(struct cfg80211_ssid) + n_channels * sizeof(void *), From d792011b6c282bfb787eb2893538e5e336d5e982 Mon Sep 17 00:00:00 2001 From: Shaul Triebitz Date: Wed, 5 Jun 2024 14:05:04 +0300 Subject: [PATCH 251/778] wifi: iwlwifi: mvm: unlock mvm mutex Unlock the mvm mutex before returning from a function with the mutex locked. Fixes: a1efeb823084 ("wifi: iwlwifi: mvm: Block EMLSR when a p2p/softAP vif is active") Signed-off-by: Shaul Triebitz Signed-off-by: Miri Korenblit Link: https://msgid.link/20240605140327.96cb956db4af.Ib468cbad38959910977b5581f6111ab0afae9880@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/time-event.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c index 8ee4498f4245..31bc80cdcb7d 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c @@ -1238,6 +1238,7 @@ void iwl_mvm_stop_roc(struct iwl_mvm *mvm, struct ieee80211_vif *vif) if (te_data->id >= SESSION_PROTECT_CONF_MAX_ID) { IWL_DEBUG_TE(mvm, "No remain on channel event\n"); + mutex_unlock(&mvm->mutex); return; } @@ -1253,6 +1254,7 @@ void iwl_mvm_stop_roc(struct iwl_mvm *mvm, struct ieee80211_vif *vif) te_data = iwl_mvm_get_roc_te(mvm); if (!te_data) { IWL_WARN(mvm, "No remain on channel event\n"); + mutex_unlock(&mvm->mutex); return; } From 4c2bed6042fb6aca1d1d4f291f85461b1d5ac08c Mon Sep 17 00:00:00 2001 From: Shaul Triebitz Date: Wed, 5 Jun 2024 14:05:05 +0300 Subject: [PATCH 252/778] wifi: iwlwifi: mvm: fix ROC version check For using the ROC command, check that the ROC version is *greater or equal* to 3, rather than *equal* to 3. The ROC version was added to the TLV starting from version 3. Fixes: 67ac248e4db0 ("wifi: iwlwifi: mvm: implement ROC version 3") Signed-off-by: Shaul Triebitz Signed-off-by: Miri Korenblit Link: https://msgid.link/20240605140327.93d86cd188ad.Iceadef5a2f3cfa4a127e94a0405eba8342ec89c1@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index de9f0b446545..18ce060df9b5 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -4795,7 +4795,7 @@ static int iwl_mvm_roc_station(struct iwl_mvm *mvm, if (fw_ver == IWL_FW_CMD_VER_UNKNOWN) { ret = iwl_mvm_send_aux_roc_cmd(mvm, channel, vif, duration); - } else if (fw_ver == 3) { + } else if (fw_ver >= 3) { ret = iwl_mvm_roc_add_cmd(mvm, channel, vif, duration, ROC_ACTIVITY_HOTSPOT); } else { From fcc356020a0171106c9ba524ba05a6792668451e Mon Sep 17 00:00:00 2001 From: Ayala Beker Date: Wed, 5 Jun 2024 14:07:38 +0300 Subject: [PATCH 253/778] wifi: iwlwifi: scan: correctly check if PSC listen period is needed The flags variable is incorrectly checked while it is still cleared and has not been assigned any value yet. Fix it. Fixes: a615323f7f90 ("wifi: iwlwifi: mvm: always apply 6 GHz probe limitations") Signed-off-by: Ayala Beker Reviewed-by: Benjamin Berg Signed-off-by: Miri Korenblit Link: https://msgid.link/20240605140556.291c33f9a283.Id651fe69828aebce177b49b2316c5780906f1b37@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/scan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c index b5f664ae5a17..e975f5ff17b5 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c @@ -1830,7 +1830,7 @@ iwl_mvm_umac_scan_cfg_channels_v7_6g(struct iwl_mvm *mvm, */ if (!iwl_mvm_is_scan_fragmented(params->type)) { if (!cfg80211_channel_is_psc(params->channels[i]) || - flags & IWL_UHB_CHAN_CFG_FLAG_PSC_CHAN_NO_LISTEN) { + psc_no_listen) { if (unsolicited_probe_on_chan) { max_s_ssids = 2; max_bssids = 6; From 7d09e17c0415fe6d946044c7e70bce31cda952ec Mon Sep 17 00:00:00 2001 From: Remi Pommarel Date: Sat, 18 May 2024 18:07:33 +0200 Subject: [PATCH 254/778] wifi: mac80211: Recalc offload when monitor stop When a monitor interface is started, ieee80211_recalc_offload() is called and 802.11 encapsulation offloading support get disabled so monitor interface could get native wifi frames directly. But when this interface is stopped there is no need to keep the 802.11 encpasulation offloading off. This call ieee80211_recalc_offload() when monitor interface is stopped so 802.11 encapsulation offloading gets re-activated if possible. Fixes: 6aea26ce5a4c ("mac80211: rework tx encapsulation offload API") Signed-off-by: Remi Pommarel Link: https://msgid.link/840baab454f83718e6e16fd836ac597d924e85b9.1716048326.git.repk@triplefau.lt Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index dc42902e2693..0c54554bf761 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -686,6 +686,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do ieee80211_del_virtual_monitor(local); ieee80211_recalc_idle(local); + ieee80211_recalc_offload(local); if (!(sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE)) break; From 350cbb5d2f676bff22c49e5e81764c3b8da342a9 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 11 Jun 2024 16:53:06 +0200 Subject: [PATCH 255/778] cpufreq: intel_pstate: Check turbo_is_disabled() in store_no_turbo() After recent changes in intel_pstate, global.turbo_disabled is only set at the initialization time and never changed. However, it turns out that on some systems the "turbo disabled" bit in MSR_IA32_MISC_ENABLE, the initial state of which is reflected by global.turbo_disabled, can be flipped later and there should be a way to take that into account (other than checking that MSR every time the driver runs which is costly and useless overhead on the vast majority of systems). For this purpose, notice that before the changes in question, store_no_turbo() contained a turbo_is_disabled() check that was used for updating global.turbo_disabled if the "turbo disabled" bit in MSR_IA32_MISC_ENABLE had been flipped and that functionality can be restored. Then, users will be able to reset global.turbo_disabled by writing 0 to no_turbo which used to work before on systems with flipping "turbo disabled" bit. This guarantees the driver state to remain in sync, but READ_ONCE() annotations need to be added in two places where global.turbo_disabled is accessed locklessly, so modify the driver to make that happen. Fixes: 0940f1a8011f ("cpufreq: intel_pstate: Do not update global.turbo_disabled after initialization") Closes: https://lore.kernel.org/linux-pm/bf3ebf1571a4788e97daf861eb493c12d42639a3.camel@xry111.site Suggested-by: Srinivas Pandruvada Reported-by: Xi Ruoyao Tested-by: Xi Ruoyao Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 65d3f79104bd..15de5e3d96fd 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1302,12 +1302,17 @@ static ssize_t store_no_turbo(struct kobject *a, struct kobj_attribute *b, no_turbo = !!clamp_t(int, input, 0, 1); - if (no_turbo == global.no_turbo) - goto unlock_driver; - - if (global.turbo_disabled) { - pr_notice_once("Turbo disabled by BIOS or unavailable on processor\n"); + WRITE_ONCE(global.turbo_disabled, turbo_is_disabled()); + if (global.turbo_disabled && !no_turbo) { + pr_notice("Turbo disabled by BIOS or unavailable on processor\n"); count = -EPERM; + if (global.no_turbo) + goto unlock_driver; + else + no_turbo = 1; + } + + if (no_turbo == global.no_turbo) { goto unlock_driver; } @@ -1762,7 +1767,7 @@ static u64 atom_get_val(struct cpudata *cpudata, int pstate) u32 vid; val = (u64)pstate << 8; - if (READ_ONCE(global.no_turbo) && !global.turbo_disabled) + if (READ_ONCE(global.no_turbo) && !READ_ONCE(global.turbo_disabled)) val |= (u64)1 << 32; vid_fp = cpudata->vid.min + mul_fp( @@ -1927,7 +1932,7 @@ static u64 core_get_val(struct cpudata *cpudata, int pstate) u64 val; val = (u64)pstate << 8; - if (READ_ONCE(global.no_turbo) && !global.turbo_disabled) + if (READ_ONCE(global.no_turbo) && !READ_ONCE(global.turbo_disabled)) val |= (u64)1 << 32; return val; From 4cac29b846f38d5f0654cdfff5c5bfc37305081c Mon Sep 17 00:00:00 2001 From: Kalle Niemi Date: Wed, 12 Jun 2024 14:42:34 +0300 Subject: [PATCH 256/778] regulator: bd71815: fix ramp values Ramp values are inverted. This caused wrong values written to register when ramp values were defined in device tree. Invert values in table to fix this. Signed-off-by: Kalle Niemi Fixes: 1aad39001e85 ("regulator: Support ROHM BD71815 regulators") Reviewed-by: Matti Vaittinen Link: https://lore.kernel.org/r/ZmmJXtuVJU6RgQAH@latitude5580 Signed-off-by: Mark Brown --- drivers/regulator/bd71815-regulator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/bd71815-regulator.c b/drivers/regulator/bd71815-regulator.c index 26192d55a685..79fbb45297f6 100644 --- a/drivers/regulator/bd71815-regulator.c +++ b/drivers/regulator/bd71815-regulator.c @@ -256,7 +256,7 @@ static int buck12_set_hw_dvs_levels(struct device_node *np, * 10: 2.50mV/usec 10mV 4uS * 11: 1.25mV/usec 10mV 8uS */ -static const unsigned int bd7181x_ramp_table[] = { 1250, 2500, 5000, 10000 }; +static const unsigned int bd7181x_ramp_table[] = { 10000, 5000, 2500, 1250 }; static int bd7181x_led_set_current_limit(struct regulator_dev *rdev, int min_uA, int max_uA) From d6d5645e5fc1233a7ba950de4a72981c394a2557 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Fri, 31 May 2024 11:19:14 +0200 Subject: [PATCH 257/778] i2c: at91: Fix the functionality flags of the slave-only interface When an I2C adapter acts only as a slave, it should not claim to support I2C master capabilities. Fixes: 9d3ca54b550c ("i2c: at91: added slave mode support") Signed-off-by: Jean Delvare Cc: Juergen Fitschen Cc: Ludovic Desroches Cc: Codrin Ciubotariu Cc: Andi Shyti Cc: Nicolas Ferre Cc: Alexandre Belloni Cc: Claudiu Beznea Signed-off-by: Andi Shyti --- drivers/i2c/busses/i2c-at91-slave.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-at91-slave.c b/drivers/i2c/busses/i2c-at91-slave.c index d6eeea5166c0..131a67d9d4a6 100644 --- a/drivers/i2c/busses/i2c-at91-slave.c +++ b/drivers/i2c/busses/i2c-at91-slave.c @@ -106,8 +106,7 @@ static int at91_unreg_slave(struct i2c_client *slave) static u32 at91_twi_func(struct i2c_adapter *adapter) { - return I2C_FUNC_SLAVE | I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL - | I2C_FUNC_SMBUS_READ_BLOCK_DATA; + return I2C_FUNC_SLAVE; } static const struct i2c_algorithm at91_twi_algorithm_slave = { From cbf3fb5b29e99e3689d63a88c3cddbffa1b8de99 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Fri, 31 May 2024 11:17:48 +0200 Subject: [PATCH 258/778] i2c: designware: Fix the functionality flags of the slave-only interface When an I2C adapter acts only as a slave, it should not claim to support I2C master capabilities. Fixes: 5b6d721b266a ("i2c: designware: enable SLAVE in platform module") Signed-off-by: Jean Delvare Cc: Luis Oliveira Cc: Jarkko Nikula Cc: Andy Shevchenko Cc: Mika Westerberg Cc: Jan Dabros Cc: Andi Shyti Reviewed-by: Andy Shevchenko Acked-by: Jarkko Nikula Tested-by: Jarkko Nikula Signed-off-by: Andi Shyti --- drivers/i2c/busses/i2c-designware-slave.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-designware-slave.c b/drivers/i2c/busses/i2c-designware-slave.c index 2e079cf20bb5..78e2c47e3d7d 100644 --- a/drivers/i2c/busses/i2c-designware-slave.c +++ b/drivers/i2c/busses/i2c-designware-slave.c @@ -220,7 +220,7 @@ static const struct i2c_algorithm i2c_dw_algo = { void i2c_dw_configure_slave(struct dw_i2c_dev *dev) { - dev->functionality = I2C_FUNC_SLAVE | DW_IC_DEFAULT_FUNCTIONALITY; + dev->functionality = I2C_FUNC_SLAVE; dev->slave_cfg = DW_IC_CON_RX_FIFO_FULL_HLD_CTRL | DW_IC_CON_RESTART_EN | DW_IC_CON_STOP_DET_IFADDRESSED; From cea2a26553ace13ee36b56dc09ad548b5e6907df Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 11 Jun 2024 17:58:57 +0300 Subject: [PATCH 259/778] mailmap: Add my outdated addresses to the map file There is a couple of outdated addresses that are still visible in the Git history, add them to .mailmap. While at it, replace one in the comment. Signed-off-by: Andy Shevchenko Signed-off-by: Linus Torvalds --- .mailmap | 2 ++ drivers/media/pci/saa7134/saa7134-cards.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.mailmap b/.mailmap index efd9fa867a8e..b6e9fe0916ce 100644 --- a/.mailmap +++ b/.mailmap @@ -72,6 +72,8 @@ Andrey Ryabinin Andrzej Hajda André Almeida Andy Adamson +Andy Shevchenko +Andy Shevchenko Anilkumar Kolli Anirudh Ghayal Antoine Tenart diff --git a/drivers/media/pci/saa7134/saa7134-cards.c b/drivers/media/pci/saa7134/saa7134-cards.c index 1280696f65f2..e80fb4ebfda6 100644 --- a/drivers/media/pci/saa7134/saa7134-cards.c +++ b/drivers/media/pci/saa7134/saa7134-cards.c @@ -5152,7 +5152,7 @@ struct saa7134_board saa7134_boards[] = { }, }, [SAA7134_BOARD_AVERMEDIA_STUDIO_507UA] = { - /* Andy Shevchenko */ + /* Andy Shevchenko */ .name = "Avermedia AVerTV Studio 507UA", .audio_clock = 0x00187de7, .tuner_type = TUNER_PHILIPS_FM1216ME_MK3, /* Should be MK5 */ From 9b1ebce6a1fded90d4a1c6c57dc6262dac4c4c14 Mon Sep 17 00:00:00 2001 From: Su Hui Date: Tue, 11 Jun 2024 15:37:00 +0800 Subject: [PATCH 260/778] block: sed-opal: avoid possible wrong address reference in read_sed_opal_key() Clang static checker (scan-build) warning: block/sed-opal.c:line 317, column 3 Value stored to 'ret' is never read. Fix this problem by returning the error code when keyring_search() failed. Otherwise, 'key' will have a wrong value when 'kerf' stores the error code. Fixes: 3bfeb6125664 ("block: sed-opal: keyring support for SED keys") Signed-off-by: Su Hui Link: https://lore.kernel.org/r/20240611073659.429582-1-suhui@nfschina.com Signed-off-by: Jens Axboe --- block/sed-opal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/sed-opal.c b/block/sed-opal.c index 14fe0fef811c..598fd3e7fcc8 100644 --- a/block/sed-opal.c +++ b/block/sed-opal.c @@ -314,7 +314,7 @@ static int read_sed_opal_key(const char *key_name, u_char *buffer, int buflen) &key_type_user, key_name, true); if (IS_ERR(kref)) - ret = PTR_ERR(kref); + return PTR_ERR(kref); key = key_ref_to_ptr(kref); down_read(&key->sem); From 1933192a91be0a570663a3c6310c46e4ce3b2baa Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 7 Jun 2024 09:21:26 +0900 Subject: [PATCH 261/778] block: Optimize disk zone resource cleanup For zoned block devices using zone write plugging, an rcu_barrier() call is needed in disk_free_zone_resources() to synchronize freeing of zone write plugs and the destrution of the mempool used to allocate the plugs. The barrier call does slow down a little teardown of zoned block devices but should not affect teardown of regular block devices or zoned block devices that do not use zone write plugging (e.g. zoned DM devices that do not require zone append emulation). Modify disk_free_zone_resources() to return early if we do not have a mempool to start with, that is, if the device does not use zone write plugging. This avoids the costly rcu_barrier() and speeds up disk teardown. Reported-by: Mikulas Patocka Fixes: dd291d77cc90 ("block: Introduce zone write plugging") Signed-off-by: Damien Le Moal Reviewed-by: Christoph Hellwig Tested-by: Mikulas Patocka Reviewed-by: Niklas Cassel Link: https://lore.kernel.org/r/20240607002126.104227-1-dlemoal@kernel.org Signed-off-by: Jens Axboe --- block/blk-zoned.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 52abebf56027..08d7dfe8bd93 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -1552,6 +1552,9 @@ static void disk_destroy_zone_wplugs_hash_table(struct gendisk *disk) void disk_free_zone_resources(struct gendisk *disk) { + if (!disk->zone_wplugs_pool) + return; + cancel_work_sync(&disk->zone_wplugs_work); if (disk->zone_wplugs_wq) { From d0321c812d89c5910d8da8e4b10c891c6b96ff70 Mon Sep 17 00:00:00 2001 From: Chengming Zhou Date: Sat, 8 Jun 2024 22:31:15 +0800 Subject: [PATCH 262/778] block: fix request.queuelist usage in flush Friedrich Weber reported a kernel crash problem and bisected to commit 81ada09cc25e ("blk-flush: reuse rq queuelist in flush state machine"). The root cause is that we use "list_move_tail(&rq->queuelist, pending)" in the PREFLUSH/POSTFLUSH sequences. But rq->queuelist.next == xxx since it's popped out from plug->cached_rq in __blk_mq_alloc_requests_batch(). We don't initialize its queuelist just for this first request, although the queuelist of all later popped requests will be initialized. Fix it by changing to use "list_add_tail(&rq->queuelist, pending)" so rq->queuelist doesn't need to be initialized. It should be ok since rq can't be on any list when PREFLUSH or POSTFLUSH, has no move actually. Please note the commit 81ada09cc25e ("blk-flush: reuse rq queuelist in flush state machine") also has another requirement that no drivers would touch rq->queuelist after blk_mq_end_request() since we will reuse it to add rq to the post-flush pending list in POSTFLUSH. If this is not true, we will have to revert that commit IMHO. This updated version adds "list_del_init(&rq->queuelist)" in flush rq callback since the dm layer may submit request of a weird invalid format (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH), which causes double list_add if without this "list_del_init(&rq->queuelist)". The weird invalid format problem should be fixed in dm layer. Reported-by: Friedrich Weber Closes: https://lore.kernel.org/lkml/14b89dfb-505c-49f7-aebb-01c54451db40@proxmox.com/ Closes: https://lore.kernel.org/lkml/c9d03ff7-27c5-4ebd-b3f6-5a90d96f35ba@proxmox.com/ Fixes: 81ada09cc25e ("blk-flush: reuse rq queuelist in flush state machine") Cc: Christoph Hellwig Cc: ming.lei@redhat.com Cc: bvanassche@acm.org Tested-by: Friedrich Weber Signed-off-by: Chengming Zhou Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240608143115.972486-1-chengming.zhou@linux.dev Signed-off-by: Jens Axboe --- block/blk-flush.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/block/blk-flush.c b/block/blk-flush.c index c17cf8ed8113..cca4f9131f79 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -185,7 +185,7 @@ static void blk_flush_complete_seq(struct request *rq, /* queue for flush */ if (list_empty(pending)) fq->flush_pending_since = jiffies; - list_move_tail(&rq->queuelist, pending); + list_add_tail(&rq->queuelist, pending); break; case REQ_FSEQ_DATA: @@ -263,6 +263,7 @@ static enum rq_end_io_ret flush_end_io(struct request *flush_rq, unsigned int seq = blk_flush_cur_seq(rq); BUG_ON(seq != REQ_FSEQ_PREFLUSH && seq != REQ_FSEQ_POSTFLUSH); + list_del_init(&rq->queuelist); blk_flush_complete_seq(rq, fq, seq, error); } From e038ee6189842e9662d2fc59d09dbcf48350cf99 Mon Sep 17 00:00:00 2001 From: Anuj Gupta Date: Mon, 10 Jun 2024 16:41:44 +0530 Subject: [PATCH 263/778] block: unmap and free user mapped integrity via submitter The user mapped intergity is copied back and unpinned by bio_integrity_free which is a low-level routine. Do it via the submitter rather than doing it in the low-level block layer code, to split the submitter side from the consumer side of the bio. Signed-off-by: Anuj Gupta Signed-off-by: Kanchan Joshi Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20240610111144.14647-1-anuj20.g@samsung.com Signed-off-by: Jens Axboe --- block/bio-integrity.c | 26 ++++++++++++++++++++++++-- drivers/nvme/host/ioctl.c | 15 +++++++++++---- include/linux/bio.h | 4 ++++ 3 files changed, 39 insertions(+), 6 deletions(-) diff --git a/block/bio-integrity.c b/block/bio-integrity.c index 2e3e8e04961e..8b528e12136f 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -144,16 +144,38 @@ void bio_integrity_free(struct bio *bio) struct bio_integrity_payload *bip = bio_integrity(bio); struct bio_set *bs = bio->bi_pool; + if (bip->bip_flags & BIP_INTEGRITY_USER) + return; if (bip->bip_flags & BIP_BLOCK_INTEGRITY) kfree(bvec_virt(bip->bip_vec)); - else if (bip->bip_flags & BIP_INTEGRITY_USER) - bio_integrity_unmap_user(bip); __bio_integrity_free(bs, bip); bio->bi_integrity = NULL; bio->bi_opf &= ~REQ_INTEGRITY; } +/** + * bio_integrity_unmap_free_user - Unmap and free bio user integrity payload + * @bio: bio containing bip to be unmapped and freed + * + * Description: Used to unmap and free the user mapped integrity portion of a + * bio. Submitter attaching the user integrity buffer is responsible for + * unmapping and freeing it during completion. + */ +void bio_integrity_unmap_free_user(struct bio *bio) +{ + struct bio_integrity_payload *bip = bio_integrity(bio); + struct bio_set *bs = bio->bi_pool; + + if (WARN_ON_ONCE(!(bip->bip_flags & BIP_INTEGRITY_USER))) + return; + bio_integrity_unmap_user(bip); + __bio_integrity_free(bs, bip); + bio->bi_integrity = NULL; + bio->bi_opf &= ~REQ_INTEGRITY; +} +EXPORT_SYMBOL(bio_integrity_unmap_free_user); + /** * bio_integrity_add_page - Attach integrity metadata * @bio: bio to update diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index 9d9d2a127c4e..8b69427a4476 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -111,6 +111,13 @@ static struct request *nvme_alloc_user_request(struct request_queue *q, return req; } +static void nvme_unmap_bio(struct bio *bio) +{ + if (bio_integrity(bio)) + bio_integrity_unmap_free_user(bio); + blk_rq_unmap_user(bio); +} + static int nvme_map_user_request(struct request *req, u64 ubuffer, unsigned bufflen, void __user *meta_buffer, unsigned meta_len, u32 meta_seed, struct io_uring_cmd *ioucmd, unsigned int flags) @@ -157,7 +164,7 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer, out_unmap: if (bio) - blk_rq_unmap_user(bio); + nvme_unmap_bio(bio); out: blk_mq_free_request(req); return ret; @@ -195,7 +202,7 @@ static int nvme_submit_user_cmd(struct request_queue *q, if (result) *result = le64_to_cpu(nvme_req(req)->result.u64); if (bio) - blk_rq_unmap_user(bio); + nvme_unmap_bio(bio); blk_mq_free_request(req); if (effects) @@ -406,7 +413,7 @@ static void nvme_uring_task_cb(struct io_uring_cmd *ioucmd, struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); if (pdu->bio) - blk_rq_unmap_user(pdu->bio); + nvme_unmap_bio(pdu->bio); io_uring_cmd_done(ioucmd, pdu->status, pdu->result, issue_flags); } @@ -432,7 +439,7 @@ static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req, */ if (blk_rq_is_poll(req)) { if (pdu->bio) - blk_rq_unmap_user(pdu->bio); + nvme_unmap_bio(pdu->bio); io_uring_cmd_iopoll_done(ioucmd, pdu->result, pdu->status); } else { io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_cb); diff --git a/include/linux/bio.h b/include/linux/bio.h index d5379548d684..818e93612947 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -731,6 +731,7 @@ static inline bool bioset_initialized(struct bio_set *bs) bip_for_each_vec(_bvl, _bio->bi_integrity, _iter) int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t len, u32 seed); +void bio_integrity_unmap_free_user(struct bio *bio); extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int); extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int); extern bool bio_integrity_prep(struct bio *); @@ -807,6 +808,9 @@ static inline int bio_integrity_map_user(struct bio *bio, void __user *ubuf, { return -EINVAL; } +static inline void bio_integrity_unmap_free_user(struct bio *bio) +{ +} #endif /* CONFIG_BLK_DEV_INTEGRITY */ From 72cacd06e47d86d89b0e7179fbc9eb3a0f39cd93 Mon Sep 17 00:00:00 2001 From: Julien Panis Date: Tue, 4 Jun 2024 18:46:58 +0200 Subject: [PATCH 264/778] thermal/drivers/mediatek/lvts_thermal: Return error in case of invalid efuse data This patch prevents from registering thermal entries and letting the driver misbehave if efuse data is invalid. A device is not properly calibrated if the golden temperature is zero. Fixes: f5f633b18234 ("thermal/drivers/mediatek: Add the Low Voltage Thermal Sensor driver") Signed-off-by: Julien Panis Reviewed-by: Nicolas Pitre Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20240604-mtk-thermal-calib-check-v2-1-8f258254051d@baylibre.com Signed-off-by: Daniel Lezcano --- drivers/thermal/mediatek/lvts_thermal.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/thermal/mediatek/lvts_thermal.c b/drivers/thermal/mediatek/lvts_thermal.c index 82c355c466cf..819ed0110f3e 100644 --- a/drivers/thermal/mediatek/lvts_thermal.c +++ b/drivers/thermal/mediatek/lvts_thermal.c @@ -769,7 +769,11 @@ static int lvts_golden_temp_init(struct device *dev, u8 *calib, */ gt = (((u32 *)calib)[0] >> lvts_data->gt_calib_bit_offset) & 0xff; - if (gt && gt < LVTS_GOLDEN_TEMP_MAX) + /* A zero value for gt means that device has invalid efuse data */ + if (!gt) + return -ENODATA; + + if (gt < LVTS_GOLDEN_TEMP_MAX) golden_temp = gt; golden_temp_offset = golden_temp * 500 + lvts_data->temp_offset; From 6f2a43e3d14f6e31a3b041a1043195d02c54d615 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Wed, 12 Jun 2024 15:12:03 +0300 Subject: [PATCH 265/778] ASoC: SOF: sof-audio: Skip unprepare for in-use widgets on error rollback If the ipc_prepare() callback fails for a module instance, on error rewind we must skip the ipc_unprepare() call for ones that has positive use count. The positive use count means that the module instance is in active use, it cannot be unprepared. The issue affects capture direction paths with branches (single dai with multiple PCMs), the affected widgets are in the shared part of the paths. Signed-off-by: Peter Ujfalusi Reviewed-by: Pierre-Louis Bossart Reviewed-by: Kai Vehmanen Reviewed-by: Ranjani Sridharan Link: https://lore.kernel.org/r/20240612121203.15468-1-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/sof-audio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/sof/sof-audio.c b/sound/soc/sof/sof-audio.c index b3ac040811e7..ef9318947d74 100644 --- a/sound/soc/sof/sof-audio.c +++ b/sound/soc/sof/sof-audio.c @@ -485,7 +485,7 @@ sink_prepare: if (ret < 0) { /* unprepare the source widget */ if (widget_ops[widget->id].ipc_unprepare && - swidget && swidget->prepared) { + swidget && swidget->prepared && swidget->use_count == 0) { widget_ops[widget->id].ipc_unprepare(swidget); swidget->prepared = false; } From f3b198e4788fcc8d03ed0c8bd5e3856c6a5760c5 Mon Sep 17 00:00:00 2001 From: Jack Yu Date: Wed, 12 Jun 2024 09:01:07 +0000 Subject: [PATCH 266/778] ASoC: rt722-sdca-sdw: add debounce time for type detection Add debounce time in headset type detection for better performance. Signed-off-by: Jack Yu Link: https://lore.kernel.org/r/7e502e9a9dd94122a1b60deb5ceb60fb@realtek.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt722-sdca-sdw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/rt722-sdca-sdw.c b/sound/soc/codecs/rt722-sdca-sdw.c index f73ee3bf90f5..87354bb1564e 100644 --- a/sound/soc/codecs/rt722-sdca-sdw.c +++ b/sound/soc/codecs/rt722-sdca-sdw.c @@ -352,7 +352,7 @@ static int rt722_sdca_interrupt_callback(struct sdw_slave *slave, if (status->sdca_cascade && !rt722->disable_irq) mod_delayed_work(system_power_efficient_wq, - &rt722->jack_detect_work, msecs_to_jiffies(30)); + &rt722->jack_detect_work, msecs_to_jiffies(280)); mutex_unlock(&rt722->disable_irq_lock); From e5d574ab37f5f2e7937405613d9b1a724811e5ad Mon Sep 17 00:00:00 2001 From: Chunguang Xu Date: Tue, 11 Jun 2024 18:02:08 +0800 Subject: [PATCH 267/778] nvme: avoid double free special payload If a discard request needs to be retried, and that retry may fail before a new special payload is added, a double free will result. Clear the RQF_SPECIAL_LOAD when the request is cleaned. Signed-off-by: Chunguang Xu Reviewed-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index f5d150c62955..c40930d10bd3 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -998,6 +998,7 @@ void nvme_cleanup_cmd(struct request *req) clear_bit_unlock(0, &ctrl->discard_page_busy); else kfree(bvec_virt(&req->special_vec)); + req->rq_flags &= ~RQF_SPECIAL_PAYLOAD; } } EXPORT_SYMBOL_GPL(nvme_cleanup_cmd); From d76584e53f4244dbc154bec447c3852600acc914 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 12 Jun 2024 16:02:40 +0200 Subject: [PATCH 268/778] nvmet-passthru: propagate status from id override functions The id override functions return a status which is not propagated to the caller. Fixes: c1fef73f793b ("nvmet: add passthru code to process commands") Signed-off-by: Daniel Wagner Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/target/passthru.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c index bb4a69d538fd..f003782d4ecf 100644 --- a/drivers/nvme/target/passthru.c +++ b/drivers/nvme/target/passthru.c @@ -226,13 +226,13 @@ static void nvmet_passthru_execute_cmd_work(struct work_struct *w) req->cmd->common.opcode == nvme_admin_identify) { switch (req->cmd->identify.cns) { case NVME_ID_CNS_CTRL: - nvmet_passthru_override_id_ctrl(req); + status = nvmet_passthru_override_id_ctrl(req); break; case NVME_ID_CNS_NS: - nvmet_passthru_override_id_ns(req); + status = nvmet_passthru_override_id_ns(req); break; case NVME_ID_CNS_NS_DESC_LIST: - nvmet_passthru_override_id_descs(req); + status = nvmet_passthru_override_id_descs(req); break; } } else if (status < 0) From cd0c1b8e045a8d2785342b385cb2684d9b48e426 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 12 Jun 2024 16:11:59 +0200 Subject: [PATCH 269/778] nvmet: always initialize cqe.result The spec doesn't mandate that the first two double words (aka results) for the command queue entry need to be set to 0 when they are not used (not specified). Though, the target implemention returns 0 for TCP and FC but not for RDMA. Let's make RDMA behave the same and thus explicitly initializing the result field. This prevents leaking any data from the stack. Signed-off-by: Daniel Wagner Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/target/core.c | 1 + drivers/nvme/target/fabrics-cmd-auth.c | 3 --- drivers/nvme/target/fabrics-cmd.c | 6 ------ 3 files changed, 1 insertion(+), 9 deletions(-) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 06f0c587f343..4ff460ba2826 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -957,6 +957,7 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, req->metadata_sg_cnt = 0; req->transfer_len = 0; req->metadata_len = 0; + req->cqe->result.u64 = 0; req->cqe->status = 0; req->cqe->sq_head = 0; req->ns = NULL; diff --git a/drivers/nvme/target/fabrics-cmd-auth.c b/drivers/nvme/target/fabrics-cmd-auth.c index d61b8c6ff3b2..cb34d644ed08 100644 --- a/drivers/nvme/target/fabrics-cmd-auth.c +++ b/drivers/nvme/target/fabrics-cmd-auth.c @@ -333,7 +333,6 @@ done: pr_debug("%s: ctrl %d qid %d nvme status %x error loc %d\n", __func__, ctrl->cntlid, req->sq->qid, status, req->error_loc); - req->cqe->result.u64 = 0; if (req->sq->dhchap_step != NVME_AUTH_DHCHAP_MESSAGE_SUCCESS2 && req->sq->dhchap_step != NVME_AUTH_DHCHAP_MESSAGE_FAILURE2) { unsigned long auth_expire_secs = ctrl->kato ? ctrl->kato : 120; @@ -516,8 +515,6 @@ void nvmet_execute_auth_receive(struct nvmet_req *req) status = nvmet_copy_to_sgl(req, 0, d, al); kfree(d); done: - req->cqe->result.u64 = 0; - if (req->sq->dhchap_step == NVME_AUTH_DHCHAP_MESSAGE_SUCCESS2) nvmet_auth_sq_free(req->sq); else if (req->sq->dhchap_step == NVME_AUTH_DHCHAP_MESSAGE_FAILURE1) { diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index 042b379cbb36..69d77d34bec1 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -226,9 +226,6 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) if (status) goto out; - /* zero out initial completion result, assign values as needed */ - req->cqe->result.u32 = 0; - if (c->recfmt != 0) { pr_warn("invalid connect version (%d).\n", le16_to_cpu(c->recfmt)); @@ -305,9 +302,6 @@ static void nvmet_execute_io_connect(struct nvmet_req *req) if (status) goto out; - /* zero out initial completion result, assign values as needed */ - req->cqe->result.u32 = 0; - if (c->recfmt != 0) { pr_warn("invalid connect version (%d).\n", le16_to_cpu(c->recfmt)); From 6bfff3582416b2f809e6b08c6e9d57b18086bdbd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Wed, 12 Jun 2024 15:33:57 +0200 Subject: [PATCH 270/778] Revert "batman-adv: prefer kfree_rcu() over call_rcu() with free-only callbacks" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 537db05da2ca8ccc1243c9dd1d0c148b84aa0432. This change seems to result in a memory leak / RCU race and the following kernel splat when the batman-adv kernel module is unloaded: ``` [ 112.208633] ============================================================================= [ 112.210359] BUG batadv_tl_cache (Tainted: G OE ): Objects remaining in batadv_tl_cache on __kmem_cache_shutdown() [ 112.211943] ----------------------------------------------------------------------------- [ 112.212517] Slab 0xffffe8afc0216d00 objects=16 used=1 fp=0xffff93f4085b4340 flags=0xfffffc0000a00(workingset|slab|node=0|zone=1|lastcpupid=0x1fffff) [ 112.212517] CPU: 1 PID: 776 Comm: rmmod Tainted: G OE 6.8.12-amd64 #1 Debian 6.8.12-1 [ 112.212517] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014 [ 112.212517] Call Trace: [ 112.212517] [ 112.212517] dump_stack_lvl+0x64/0x80 [ 112.212517] slab_err+0xe6/0x120 [ 112.212517] __kmem_cache_shutdown+0x160/0x2e0 [ 112.212517] kmem_cache_destroy+0x55/0x160 [ 112.220849] batadv_tt_cache_destroy+0x15/0x60 [batman_adv] [ 112.220849] __do_sys_delete_module+0x1d5/0x320 [ 112.220849] do_syscall_64+0x83/0x190 [ 112.220849] ? do_syscall_64+0x8f/0x190 [ 112.220849] ? exc_page_fault+0x7f/0x180 [ 112.220849] entry_SYSCALL_64_after_hwframe+0x78/0x80 [ 112.224478] RIP: 0033:0x7f2ac8434977 [ 112.224478] Code: 73 01 c3 48 8b 0d a9 94 0c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 b8 b0 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 79 94 0c 00 f7 d8 64 89 01 48 [ 112.224478] RSP: 002b:00007ffe0adf6138 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 [ 112.224478] RAX: ffffffffffffffda RBX: 000055db9018e770 RCX: 00007f2ac8434977 [ 112.224478] RDX: 0000000000000000 RSI: 0000000000000800 RDI: 000055db9018e7d8 [ 112.224478] RBP: 0000000000000000 R08: 1999999999999999 R09: 0000000000000000 [ 112.224478] R10: 00007f2ac84a6ac0 R11: 0000000000000206 R12: 00007ffe0adf6390 [ 112.224478] R13: 000055db9018e770 R14: 000055db9018d2a0 R15: 0000000000000000 [ 112.233961] [ 112.233961] Disabling lock debugging due to kernel taint [ 112.233961] Object 0xffff93f4085b4140 @offset=320 [ 112.233961] Allocated in batadv_tt_local_add+0x297/0xa20 [batman_adv] age=15835 cpu=1 pid=755 [ 112.233961] batadv_tt_local_add+0x297/0xa20 [batman_adv] [ 112.233961] batadv_interface_set_mac_addr+0xf6/0x120 [batman_adv] [ 112.233961] dev_set_mac_address+0xde/0x140 [ 112.233961] dev_set_mac_address_user+0x30/0x50 [ 112.233961] do_setlink+0x261/0x12d0 [ 112.233961] rtnl_setlink+0x11f/0x1d0 [ 112.233961] rtnetlink_rcv_msg+0x152/0x3c0 [ 112.241772] netlink_rcv_skb+0x5b/0x110 [ 112.241772] netlink_unicast+0x1a6/0x290 [ 112.241772] netlink_sendmsg+0x223/0x490 [ 112.241772] __sys_sendto+0x1df/0x1f0 [ 112.241772] __x64_sys_sendto+0x24/0x30 [ 112.241772] do_syscall_64+0x83/0x190 [ 112.241772] entry_SYSCALL_64_after_hwframe+0x78/0x80 [ 112.245994] ------------[ cut here ]------------ [ 112.246650] kmem_cache_destroy batadv_tl_cache: Slab cache still has objects when called from batadv_tt_cache_destroy+0x15/0x60 [batman_adv] [ 112.246668] WARNING: CPU: 1 PID: 776 at mm/slab_common.c:493 kmem_cache_destroy+0x14d/0x160 [ 112.249584] Modules linked in: veth batman_adv(OE-) cfg80211 rfkill bridge stp llc libcrc32c crc32c_generic crc16 rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver binfmt_misc pcspkr button joydev evdev serio_raw loop dm_mod efi_pstore nfnetlink vsock_loopback vmw_vsock_virtio_transport_common vmw_vsock_vmci_transport vsock vmw_vmci qemu_fw_cfg ip_tables x_tables autofs4 nfsv3 nfs_acl nfs lockd grace sunrpc 9pnet_rdma rdma_cm iw_cm ib_cm ib_core configfs 9p netfs ata_generic ata_piix libata psmouse scsi_mod 9pnet_virtio i2c_piix4 9pnet e1000 scsi_common floppy crypto_simd cryptd [ 112.256555] CPU: 1 PID: 776 Comm: rmmod Tainted: G B OE 6.8.12-amd64 #1 Debian 6.8.12-1 [ 112.258457] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014 [ 112.260410] RIP: 0010:kmem_cache_destroy+0x14d/0x160 [ 112.261687] Code: 00 eb be 5b 5d 41 5c 41 5d c3 cc cc cc cc 48 8b 53 60 48 8b 4c 24 20 48 c7 c6 60 d5 e3 98 48 c7 c7 b8 ec 2d 99 e8 43 0d d8 ff <0f> 0b e9 e2 fe ff ff c3 cc cc cc cc 0f 1f 80 00 00 00 00 90 90 90 [ 112.265219] RSP: 0018:ffffb3b2806e7e48 EFLAGS: 00010282 [ 112.266044] RAX: 0000000000000000 RBX: ffff93f4270a2640 RCX: 0000000000000027 [ 112.267157] RDX: ffff93f43c521708 RSI: 0000000000000001 RDI: ffff93f43c521700 [ 112.268268] RBP: 000055db9018e7d8 R08: 0000000000000000 R09: ffffb3b2806e7cd8 [ 112.269418] R10: ffffb3b2806e7cd0 R11: 0000000000000003 R12: 0000000080012d00 [ 112.270572] R13: ffffb3b2806e7f58 R14: 0000000000000000 R15: 0000000000000000 [ 112.271699] FS: 00007f2ac8308440(0000) GS:ffff93f43c500000(0000) knlGS:0000000000000000 [ 112.273001] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 112.273923] CR2: 00005584ef830110 CR3: 000000000787c000 CR4: 00000000000006f0 [ 112.275050] Call Trace: [ 112.275464] [ 112.275810] ? kmem_cache_destroy+0x14d/0x160 [ 112.276518] ? __warn+0x81/0x130 [ 112.277043] ? kmem_cache_destroy+0x14d/0x160 [ 112.277730] ? report_bug+0x171/0x1a0 [ 112.278315] ? prb_read_valid+0x1b/0x30 [ 112.278919] ? handle_bug+0x3c/0x80 [ 112.279467] ? exc_invalid_op+0x17/0x70 [ 112.280071] ? asm_exc_invalid_op+0x1a/0x20 [ 112.280741] ? kmem_cache_destroy+0x14d/0x160 [ 112.281603] ? kmem_cache_destroy+0x14d/0x160 [ 112.282489] batadv_tt_cache_destroy+0x15/0x60 [batman_adv] [ 112.283373] __do_sys_delete_module+0x1d5/0x320 [ 112.284080] do_syscall_64+0x83/0x190 [ 112.284696] ? do_syscall_64+0x8f/0x190 [ 112.285315] ? exc_page_fault+0x7f/0x180 [ 112.285970] entry_SYSCALL_64_after_hwframe+0x78/0x80 [ 112.286768] RIP: 0033:0x7f2ac8434977 [ 112.287355] Code: 73 01 c3 48 8b 0d a9 94 0c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 b8 b0 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 79 94 0c 00 f7 d8 64 89 01 48 [ 112.290282] RSP: 002b:00007ffe0adf6138 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 [ 112.291465] RAX: ffffffffffffffda RBX: 000055db9018e770 RCX: 00007f2ac8434977 [ 112.292595] RDX: 0000000000000000 RSI: 0000000000000800 RDI: 000055db9018e7d8 [ 112.293724] RBP: 0000000000000000 R08: 1999999999999999 R09: 0000000000000000 [ 112.294863] R10: 00007f2ac84a6ac0 R11: 0000000000000206 R12: 00007ffe0adf6390 [ 112.295982] R13: 000055db9018e770 R14: 000055db9018d2a0 R15: 0000000000000000 [ 112.297103] [ 112.297465] ---[ end trace 0000000000000000 ]--- ``` So far, after some debugging, the actual cause for this could not immediately be found within the batman-adv code. Therefore reverting this for now until the underlying issue can be found and better understood. Some additional debugging information and discussions can be found on our Redmine bugtracker, linked below. Link: https://www.open-mesh.org/issues/428 Fixes: 537db05da2ca ("batman-adv: prefer kfree_rcu() over call_rcu() with free-only callbacks") Signed-off-by: Linus Lüssing Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/translation-table.c | 47 ++++++++++++++++++++++++++++-- 1 file changed, 44 insertions(+), 3 deletions(-) diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index b21ff3c36b07..2243cec18ecc 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -208,6 +208,20 @@ batadv_tt_global_hash_find(struct batadv_priv *bat_priv, const u8 *addr, return tt_global_entry; } +/** + * batadv_tt_local_entry_free_rcu() - free the tt_local_entry + * @rcu: rcu pointer of the tt_local_entry + */ +static void batadv_tt_local_entry_free_rcu(struct rcu_head *rcu) +{ + struct batadv_tt_local_entry *tt_local_entry; + + tt_local_entry = container_of(rcu, struct batadv_tt_local_entry, + common.rcu); + + kmem_cache_free(batadv_tl_cache, tt_local_entry); +} + /** * batadv_tt_local_entry_release() - release tt_local_entry from lists and queue * for free after rcu grace period @@ -222,7 +236,7 @@ static void batadv_tt_local_entry_release(struct kref *ref) batadv_softif_vlan_put(tt_local_entry->vlan); - kfree_rcu(tt_local_entry, common.rcu); + call_rcu(&tt_local_entry->common.rcu, batadv_tt_local_entry_free_rcu); } /** @@ -240,6 +254,20 @@ batadv_tt_local_entry_put(struct batadv_tt_local_entry *tt_local_entry) batadv_tt_local_entry_release); } +/** + * batadv_tt_global_entry_free_rcu() - free the tt_global_entry + * @rcu: rcu pointer of the tt_global_entry + */ +static void batadv_tt_global_entry_free_rcu(struct rcu_head *rcu) +{ + struct batadv_tt_global_entry *tt_global_entry; + + tt_global_entry = container_of(rcu, struct batadv_tt_global_entry, + common.rcu); + + kmem_cache_free(batadv_tg_cache, tt_global_entry); +} + /** * batadv_tt_global_entry_release() - release tt_global_entry from lists and * queue for free after rcu grace period @@ -254,7 +282,7 @@ void batadv_tt_global_entry_release(struct kref *ref) batadv_tt_global_del_orig_list(tt_global_entry); - kfree_rcu(tt_global_entry, common.rcu); + call_rcu(&tt_global_entry->common.rcu, batadv_tt_global_entry_free_rcu); } /** @@ -379,6 +407,19 @@ static void batadv_tt_global_size_dec(struct batadv_orig_node *orig_node, batadv_tt_global_size_mod(orig_node, vid, -1); } +/** + * batadv_tt_orig_list_entry_free_rcu() - free the orig_entry + * @rcu: rcu pointer of the orig_entry + */ +static void batadv_tt_orig_list_entry_free_rcu(struct rcu_head *rcu) +{ + struct batadv_tt_orig_list_entry *orig_entry; + + orig_entry = container_of(rcu, struct batadv_tt_orig_list_entry, rcu); + + kmem_cache_free(batadv_tt_orig_cache, orig_entry); +} + /** * batadv_tt_orig_list_entry_release() - release tt orig entry from lists and * queue for free after rcu grace period @@ -392,7 +433,7 @@ static void batadv_tt_orig_list_entry_release(struct kref *ref) refcount); batadv_orig_node_put(orig_entry->orig_node); - kfree_rcu(orig_entry, rcu); + call_rcu(&orig_entry->rcu, batadv_tt_orig_list_entry_free_rcu); } /** From 54559642b96116b45e4b5ca7fd9f7835b8561272 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 12 Jun 2024 13:56:38 +0100 Subject: [PATCH 271/778] io_uring/rsrc: don't lock while !TASK_RUNNING There is a report of io_rsrc_ref_quiesce() locking a mutex while not TASK_RUNNING, which is due to forgetting restoring the state back after io_run_task_work_sig() and attempts to break out of the waiting loop. do not call blocking ops when !TASK_RUNNING; state=1 set at [] prepare_to_wait+0xa4/0x380 kernel/sched/wait.c:237 WARNING: CPU: 2 PID: 397056 at kernel/sched/core.c:10099 __might_sleep+0x114/0x160 kernel/sched/core.c:10099 RIP: 0010:__might_sleep+0x114/0x160 kernel/sched/core.c:10099 Call Trace: __mutex_lock_common kernel/locking/mutex.c:585 [inline] __mutex_lock+0xb4/0x940 kernel/locking/mutex.c:752 io_rsrc_ref_quiesce+0x590/0x940 io_uring/rsrc.c:253 io_sqe_buffers_unregister+0xa2/0x340 io_uring/rsrc.c:799 __io_uring_register io_uring/register.c:424 [inline] __do_sys_io_uring_register+0x5b9/0x2400 io_uring/register.c:613 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xd8/0x270 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x6f/0x77 Reported-by: Li Shi Fixes: 4ea15b56f0810 ("io_uring/rsrc: use wq for quiescing") Cc: stable@vger.kernel.org Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/77966bc104e25b0534995d5dbb152332bc8f31c0.1718196953.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/rsrc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index 65417c9553b1..edb9c5baf2e2 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -249,6 +249,7 @@ __cold static int io_rsrc_ref_quiesce(struct io_rsrc_data *data, ret = io_run_task_work_sig(ctx); if (ret < 0) { + __set_current_state(TASK_RUNNING); mutex_lock(&ctx->uring_lock); if (list_empty(&ctx->rsrc_ref_list)) ret = 0; From 0057222c45140830a7bf55e92fb67f84a2814f67 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Thu, 18 Apr 2024 01:07:33 +0100 Subject: [PATCH 272/778] regulator: axp20x: AXP717: fix LDO supply rails and off-by-ones The X-Powers AXP717 PMIC has separate input supply pins for each group of LDOs, so they are not all using the same DCDC1 input, as described currently. Replace the "supply" member of each LDO description with the respective group supply name, so that the supply dependencies can be correctly described in the devicetree. Also fix two off-by-ones in the regulator macros, after some double checking the numbers against the datasheet. This uncovered a bug in the datasheet: add a comment to document this. Fixes: d2ac3df75c3a ("regulator: axp20x: add support for the AXP717") Signed-off-by: Andre Przywara Reviewed-by: John Watts Link: https://lore.kernel.org/r/20240418000736.24338-3-andre.przywara@arm.com Signed-off-by: Mark Brown --- drivers/regulator/axp20x-regulator.c | 33 ++++++++++++++++------------ 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/drivers/regulator/axp20x-regulator.c b/drivers/regulator/axp20x-regulator.c index 34fcdd82b2ea..f3c447ecdc3b 100644 --- a/drivers/regulator/axp20x-regulator.c +++ b/drivers/regulator/axp20x-regulator.c @@ -140,7 +140,7 @@ #define AXP717_DCDC1_NUM_VOLTAGES 88 #define AXP717_DCDC2_NUM_VOLTAGES 107 -#define AXP717_DCDC3_NUM_VOLTAGES 104 +#define AXP717_DCDC3_NUM_VOLTAGES 103 #define AXP717_DCDC_V_OUT_MASK GENMASK(6, 0) #define AXP717_LDO_V_OUT_MASK GENMASK(4, 0) @@ -763,10 +763,15 @@ static const struct linear_range axp717_dcdc1_ranges[] = { REGULATOR_LINEAR_RANGE(1220000, 71, 87, 20000), }; +/* + * The manual says that the last voltage is 3.4V, encoded as 0b1101011 (107), + * but every other method proves that this is wrong, so it's really 106 that + * programs the final 3.4V. + */ static const struct linear_range axp717_dcdc2_ranges[] = { REGULATOR_LINEAR_RANGE(500000, 0, 70, 10000), REGULATOR_LINEAR_RANGE(1220000, 71, 87, 20000), - REGULATOR_LINEAR_RANGE(1600000, 88, 107, 100000), + REGULATOR_LINEAR_RANGE(1600000, 88, 106, 100000), }; static const struct linear_range axp717_dcdc3_ranges[] = { @@ -790,40 +795,40 @@ static const struct regulator_desc axp717_regulators[] = { AXP_DESC(AXP717, DCDC4, "dcdc4", "vin4", 1000, 3700, 100, AXP717_DCDC4_CONTROL, AXP717_DCDC_V_OUT_MASK, AXP717_DCDC_OUTPUT_CONTROL, BIT(3)), - AXP_DESC(AXP717, ALDO1, "aldo1", "vin1", 500, 3500, 100, + AXP_DESC(AXP717, ALDO1, "aldo1", "aldoin", 500, 3500, 100, AXP717_ALDO1_CONTROL, AXP717_LDO_V_OUT_MASK, AXP717_LDO0_OUTPUT_CONTROL, BIT(0)), - AXP_DESC(AXP717, ALDO2, "aldo2", "vin1", 500, 3500, 100, + AXP_DESC(AXP717, ALDO2, "aldo2", "aldoin", 500, 3500, 100, AXP717_ALDO2_CONTROL, AXP717_LDO_V_OUT_MASK, AXP717_LDO0_OUTPUT_CONTROL, BIT(1)), - AXP_DESC(AXP717, ALDO3, "aldo3", "vin1", 500, 3500, 100, + AXP_DESC(AXP717, ALDO3, "aldo3", "aldoin", 500, 3500, 100, AXP717_ALDO3_CONTROL, AXP717_LDO_V_OUT_MASK, AXP717_LDO0_OUTPUT_CONTROL, BIT(2)), - AXP_DESC(AXP717, ALDO4, "aldo4", "vin1", 500, 3500, 100, + AXP_DESC(AXP717, ALDO4, "aldo4", "aldoin", 500, 3500, 100, AXP717_ALDO4_CONTROL, AXP717_LDO_V_OUT_MASK, AXP717_LDO0_OUTPUT_CONTROL, BIT(3)), - AXP_DESC(AXP717, BLDO1, "bldo1", "vin1", 500, 3500, 100, + AXP_DESC(AXP717, BLDO1, "bldo1", "bldoin", 500, 3500, 100, AXP717_BLDO1_CONTROL, AXP717_LDO_V_OUT_MASK, AXP717_LDO0_OUTPUT_CONTROL, BIT(4)), - AXP_DESC(AXP717, BLDO2, "bldo2", "vin1", 500, 3500, 100, + AXP_DESC(AXP717, BLDO2, "bldo2", "bldoin", 500, 3500, 100, AXP717_BLDO2_CONTROL, AXP717_LDO_V_OUT_MASK, AXP717_LDO0_OUTPUT_CONTROL, BIT(5)), - AXP_DESC(AXP717, BLDO3, "bldo3", "vin1", 500, 3500, 100, + AXP_DESC(AXP717, BLDO3, "bldo3", "bldoin", 500, 3500, 100, AXP717_BLDO3_CONTROL, AXP717_LDO_V_OUT_MASK, AXP717_LDO0_OUTPUT_CONTROL, BIT(6)), - AXP_DESC(AXP717, BLDO4, "bldo4", "vin1", 500, 3500, 100, + AXP_DESC(AXP717, BLDO4, "bldo4", "bldoin", 500, 3500, 100, AXP717_BLDO4_CONTROL, AXP717_LDO_V_OUT_MASK, AXP717_LDO0_OUTPUT_CONTROL, BIT(7)), - AXP_DESC(AXP717, CLDO1, "cldo1", "vin1", 500, 3500, 100, + AXP_DESC(AXP717, CLDO1, "cldo1", "cldoin", 500, 3500, 100, AXP717_CLDO1_CONTROL, AXP717_LDO_V_OUT_MASK, AXP717_LDO1_OUTPUT_CONTROL, BIT(0)), - AXP_DESC(AXP717, CLDO2, "cldo2", "vin1", 500, 3500, 100, + AXP_DESC(AXP717, CLDO2, "cldo2", "cldoin", 500, 3500, 100, AXP717_CLDO2_CONTROL, AXP717_LDO_V_OUT_MASK, AXP717_LDO1_OUTPUT_CONTROL, BIT(1)), - AXP_DESC(AXP717, CLDO3, "cldo3", "vin1", 500, 3500, 100, + AXP_DESC(AXP717, CLDO3, "cldo3", "cldoin", 500, 3500, 100, AXP717_CLDO3_CONTROL, AXP717_LDO_V_OUT_MASK, AXP717_LDO1_OUTPUT_CONTROL, BIT(2)), - AXP_DESC(AXP717, CLDO4, "cldo4", "vin1", 500, 3500, 100, + AXP_DESC(AXP717, CLDO4, "cldo4", "cldoin", 500, 3500, 100, AXP717_CLDO4_CONTROL, AXP717_LDO_V_OUT_MASK, AXP717_LDO1_OUTPUT_CONTROL, BIT(3)), AXP_DESC(AXP717, CPUSLDO, "cpusldo", "vin1", 500, 1400, 50, From 957df9af723ccc570da835978cf7fe7863632842 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 4 Jun 2024 16:15:31 -0600 Subject: [PATCH 273/778] nbd: Remove __force casts Make it again possible for sparse to verify that blk_status_t and Unix error codes are used in the proper context by making nbd_send_cmd() return a blk_status_t instead of an integer. No functionality has been changed. Signed-off-by: Christoph Hellwig [ bvanassche: added description and made two small formatting changes ] Signed-off-by: Bart Van Assche Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20240604221531.327131-1-bvanassche@acm.org Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 51 +++++++++++++++++++-------------------------- 1 file changed, 22 insertions(+), 29 deletions(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 22a79a62cc4e..b87aa80a46dd 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -589,10 +589,11 @@ static inline int was_interrupted(int result) } /* - * Returns BLK_STS_RESOURCE if the caller should retry after a delay. Returns - * -EAGAIN if the caller should requeue @cmd. Returns -EIO if sending failed. + * Returns BLK_STS_RESOURCE if the caller should retry after a delay. + * Returns BLK_STS_IOERR if sending failed. */ -static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) +static blk_status_t nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, + int index) { struct request *req = blk_mq_rq_from_pdu(cmd); struct nbd_config *config = nbd->config; @@ -614,13 +615,13 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) type = req_to_nbd_cmd_type(req); if (type == U32_MAX) - return -EIO; + return BLK_STS_IOERR; if (rq_data_dir(req) == WRITE && (config->flags & NBD_FLAG_READ_ONLY)) { dev_err_ratelimited(disk_to_dev(nbd->disk), "Write on read-only\n"); - return -EIO; + return BLK_STS_IOERR; } if (req->cmd_flags & REQ_FUA) @@ -674,11 +675,11 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) nsock->sent = sent; } set_bit(NBD_CMD_REQUEUED, &cmd->flags); - return (__force int)BLK_STS_RESOURCE; + return BLK_STS_RESOURCE; } dev_err_ratelimited(disk_to_dev(nbd->disk), "Send control failed (result %d)\n", result); - return -EAGAIN; + goto requeue; } send_pages: if (type != NBD_CMD_WRITE) @@ -715,12 +716,12 @@ send_pages: nsock->pending = req; nsock->sent = sent; set_bit(NBD_CMD_REQUEUED, &cmd->flags); - return (__force int)BLK_STS_RESOURCE; + return BLK_STS_RESOURCE; } dev_err(disk_to_dev(nbd->disk), "Send data failed (result %d)\n", result); - return -EAGAIN; + goto requeue; } /* * The completion might already have come in, @@ -737,7 +738,16 @@ out: trace_nbd_payload_sent(req, handle); nsock->pending = NULL; nsock->sent = 0; - return 0; + __set_bit(NBD_CMD_INFLIGHT, &cmd->flags); + return BLK_STS_OK; + +requeue: + /* retry on a different socket */ + dev_err_ratelimited(disk_to_dev(nbd->disk), + "Request send failed, requeueing\n"); + nbd_mark_nsock_dead(nbd, nsock, 1); + nbd_requeue_cmd(cmd); + return BLK_STS_OK; } static int nbd_read_reply(struct nbd_device *nbd, struct socket *sock, @@ -1018,7 +1028,7 @@ static blk_status_t nbd_handle_cmd(struct nbd_cmd *cmd, int index) struct nbd_device *nbd = cmd->nbd; struct nbd_config *config; struct nbd_sock *nsock; - int ret; + blk_status_t ret; lockdep_assert_held(&cmd->lock); @@ -1072,28 +1082,11 @@ again: ret = BLK_STS_OK; goto out; } - /* - * Some failures are related to the link going down, so anything that - * returns EAGAIN can be retried on a different socket. - */ ret = nbd_send_cmd(nbd, cmd, index); - /* - * Access to this flag is protected by cmd->lock, thus it's safe to set - * the flag after nbd_send_cmd() succeed to send request to server. - */ - if (!ret) - __set_bit(NBD_CMD_INFLIGHT, &cmd->flags); - else if (ret == -EAGAIN) { - dev_err_ratelimited(disk_to_dev(nbd->disk), - "Request send failed, requeueing\n"); - nbd_mark_nsock_dead(nbd, nsock, 1); - nbd_requeue_cmd(cmd); - ret = BLK_STS_OK; - } out: mutex_unlock(&nsock->tx_lock); nbd_config_put(nbd); - return ret < 0 ? BLK_STS_IOERR : (__force blk_status_t)ret; + return ret; } static blk_status_t nbd_queue_rq(struct blk_mq_hw_ctx *hctx, From d71a989cf5d961989c273093cdff2550acdde314 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Thu, 6 Jun 2024 21:52:07 -0600 Subject: [PATCH 274/778] vfio/pci: Insert full vma on mmap'd MMIO fault In order to improve performance of typical scenarios we can try to insert the entire vma on fault. This accelerates typical cases, such as when the MMIO region is DMA mapped by QEMU. The vfio_iommu_type1 driver will fault in the entire DMA mapped range through fixup_user_fault(). In synthetic testing, this improves the time required to walk a PCI BAR mapping from userspace by roughly 1/3rd. This is likely an interim solution until vmf_insert_pfn_{pmd,pud}() gain support for pfnmaps. Suggested-by: Yan Zhao Link: https://lore.kernel.org/all/Zl6XdUkt%2FzMMGOLF@yzhao56-desk.sh.intel.com/ Reviewed-by: Yan Zhao Link: https://lore.kernel.org/r/20240607035213.2054226-1-alex.williamson@redhat.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci_core.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index db31c27bf78b..987c7921affa 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -1662,6 +1662,7 @@ static vm_fault_t vfio_pci_mmap_fault(struct vm_fault *vmf) struct vm_area_struct *vma = vmf->vma; struct vfio_pci_core_device *vdev = vma->vm_private_data; unsigned long pfn, pgoff = vmf->pgoff - vma->vm_pgoff; + unsigned long addr = vma->vm_start; vm_fault_t ret = VM_FAULT_SIGBUS; pfn = vma_to_pfn(vma); @@ -1669,11 +1670,25 @@ static vm_fault_t vfio_pci_mmap_fault(struct vm_fault *vmf) down_read(&vdev->memory_lock); if (vdev->pm_runtime_engaged || !__vfio_pci_memory_enabled(vdev)) - goto out_disabled; + goto out_unlock; ret = vmf_insert_pfn(vma, vmf->address, pfn + pgoff); + if (ret & VM_FAULT_ERROR) + goto out_unlock; -out_disabled: + /* + * Pre-fault the remainder of the vma, abort further insertions and + * supress error if fault is encountered during pre-fault. + */ + for (; addr < vma->vm_end; addr += PAGE_SIZE, pfn++) { + if (addr == vmf->address) + continue; + + if (vmf_insert_pfn(vma, addr, pfn) & VM_FAULT_ERROR) + break; + } + +out_unlock: up_read(&vdev->memory_lock); return ret; From 14a20e5b4ad998793c5f43b0330d9e1388446cf3 Mon Sep 17 00:00:00 2001 From: Petr Pavlu Date: Fri, 7 Jun 2024 13:28:28 +0200 Subject: [PATCH 275/778] net/ipv6: Fix the RT cache flush via sysctl using a previous delay The net.ipv6.route.flush system parameter takes a value which specifies a delay used during the flush operation for aging exception routes. The written value is however not used in the currently requested flush and instead utilized only in the next one. A problem is that ipv6_sysctl_rtcache_flush() first reads the old value of net->ipv6.sysctl.flush_delay into a local delay variable and then calls proc_dointvec() which actually updates the sysctl based on the provided input. Fix the problem by switching the order of the two operations. Fixes: 4990509f19e8 ("[NETNS][IPV6]: Make sysctls route per namespace.") Signed-off-by: Petr Pavlu Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240607112828.30285-1-petr.pavlu@suse.com Signed-off-by: Jakub Kicinski --- net/ipv6/route.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index f083d9faba6b..952c2bf11709 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -6343,12 +6343,12 @@ static int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write, if (!write) return -EINVAL; - net = (struct net *)ctl->extra1; - delay = net->ipv6.sysctl.flush_delay; ret = proc_dointvec(ctl, write, buffer, lenp, ppos); if (ret) return ret; + net = (struct net *)ctl->extra1; + delay = net->ipv6.sysctl.flush_delay; fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0); return 0; } From 26ba7c3f139f843bf46ed0779e30d84641767959 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Wed, 12 Jun 2024 15:53:29 -0700 Subject: [PATCH 276/778] MAINTAINERS: mailmap: Update Stanislav's email address Moving to personal address for upstream work. Signed-off-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20240612225334.41869-1-sdf@google.com Signed-off-by: Alexei Starovoitov --- .mailmap | 1 + MAINTAINERS | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.mailmap b/.mailmap index 9af91bd3584b..689801733975 100644 --- a/.mailmap +++ b/.mailmap @@ -606,6 +606,7 @@ Simon Kelley Sricharan Ramabadhran Srinivas Ramana Sriram R +Stanislav Fomichev Stefan Wahren Stéphane Witzmann Stephen Hemminger diff --git a/MAINTAINERS b/MAINTAINERS index aacccb376c28..4582af09f2da 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3980,7 +3980,7 @@ R: Song Liu R: Yonghong Song R: John Fastabend R: KP Singh -R: Stanislav Fomichev +R: Stanislav Fomichev R: Hao Luo R: Jiri Olsa L: bpf@vger.kernel.org From 36c92936e868601fa1f43da6758cf55805043509 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Sun, 9 Jun 2024 13:36:53 +0300 Subject: [PATCH 277/778] net: bridge: mst: pass vlan group directly to br_mst_vlan_set_state Pass the already obtained vlan group pointer to br_mst_vlan_set_state() instead of dereferencing it again. Each caller has already correctly dereferenced it for their context. This change is required for the following suspicious RCU dereference fix. No functional changes intended. Fixes: 3a7c1661ae13 ("net: bridge: mst: fix vlan use-after-free") Reported-by: syzbot+9bbe2de1bc9d470eb5fe@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=9bbe2de1bc9d470eb5fe Signed-off-by: Nikolay Aleksandrov Link: https://lore.kernel.org/r/20240609103654.914987-2-razor@blackwall.org Signed-off-by: Jakub Kicinski --- net/bridge/br_mst.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/net/bridge/br_mst.c b/net/bridge/br_mst.c index 3c66141d34d6..1de72816b0fb 100644 --- a/net/bridge/br_mst.c +++ b/net/bridge/br_mst.c @@ -73,11 +73,10 @@ int br_mst_get_state(const struct net_device *dev, u16 msti, u8 *state) } EXPORT_SYMBOL_GPL(br_mst_get_state); -static void br_mst_vlan_set_state(struct net_bridge_port *p, struct net_bridge_vlan *v, +static void br_mst_vlan_set_state(struct net_bridge_vlan_group *vg, + struct net_bridge_vlan *v, u8 state) { - struct net_bridge_vlan_group *vg = nbp_vlan_group(p); - if (br_vlan_get_state(v) == state) return; @@ -121,7 +120,7 @@ int br_mst_set_state(struct net_bridge_port *p, u16 msti, u8 state, if (v->brvlan->msti != msti) continue; - br_mst_vlan_set_state(p, v, state); + br_mst_vlan_set_state(vg, v, state); } out: @@ -140,13 +139,13 @@ static void br_mst_vlan_sync_state(struct net_bridge_vlan *pv, u16 msti) * it. */ if (v != pv && v->brvlan->msti == msti) { - br_mst_vlan_set_state(pv->port, pv, v->state); + br_mst_vlan_set_state(vg, pv, v->state); return; } } /* Otherwise, start out in a new MSTI with all ports disabled. */ - return br_mst_vlan_set_state(pv->port, pv, BR_STATE_DISABLED); + return br_mst_vlan_set_state(vg, pv, BR_STATE_DISABLED); } int br_mst_vlan_set_msti(struct net_bridge_vlan *mv, u16 msti) From 546ceb1dfdac866648ec959cbc71d9525bd73462 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Sun, 9 Jun 2024 13:36:54 +0300 Subject: [PATCH 278/778] net: bridge: mst: fix suspicious rcu usage in br_mst_set_state I converted br_mst_set_state to RCU to avoid a vlan use-after-free but forgot to change the vlan group dereference helper. Switch to vlan group RCU deref helper to fix the suspicious rcu usage warning. Fixes: 3a7c1661ae13 ("net: bridge: mst: fix vlan use-after-free") Reported-by: syzbot+9bbe2de1bc9d470eb5fe@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=9bbe2de1bc9d470eb5fe Signed-off-by: Nikolay Aleksandrov Link: https://lore.kernel.org/r/20240609103654.914987-3-razor@blackwall.org Signed-off-by: Jakub Kicinski --- net/bridge/br_mst.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bridge/br_mst.c b/net/bridge/br_mst.c index 1de72816b0fb..1820f09ff59c 100644 --- a/net/bridge/br_mst.c +++ b/net/bridge/br_mst.c @@ -102,7 +102,7 @@ int br_mst_set_state(struct net_bridge_port *p, u16 msti, u8 state, int err = 0; rcu_read_lock(); - vg = nbp_vlan_group(p); + vg = nbp_vlan_group_rcu(p); if (!vg) goto out; From 6e5aee08bd2517397c9572243a816664f2ead547 Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Thu, 13 Jun 2024 10:17:09 +0200 Subject: [PATCH 279/778] Revert "MIPS: pci: lantiq: restore reset gpio polarity" This reverts commit 277a0363120276645ae598d8d5fea7265e076ae9. While fixing old boards with broken DTs, this change will break newer ones with correct gpio polarity annotation. Signed-off-by: Thomas Bogendoerfer --- arch/mips/pci/pci-lantiq.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/mips/pci/pci-lantiq.c b/arch/mips/pci/pci-lantiq.c index 0844db34022e..68a8cefed420 100644 --- a/arch/mips/pci/pci-lantiq.c +++ b/arch/mips/pci/pci-lantiq.c @@ -124,14 +124,14 @@ static int ltq_pci_startup(struct platform_device *pdev) clk_disable(clk_external); /* setup reset gpio used by pci */ - reset_gpio = devm_gpiod_get_optional(&pdev->dev, "reset", GPIOD_ASIS); + reset_gpio = devm_gpiod_get_optional(&pdev->dev, "reset", + GPIOD_OUT_LOW); error = PTR_ERR_OR_ZERO(reset_gpio); if (error) { dev_err(&pdev->dev, "failed to request gpio: %d\n", error); return error; } gpiod_set_consumer_name(reset_gpio, "pci_reset"); - gpiod_direction_output(reset_gpio, 1); /* enable auto-switching between PCI and EBU */ ltq_pci_w32(0xa, PCI_CR_CLK_CTRL); @@ -194,10 +194,10 @@ static int ltq_pci_startup(struct platform_device *pdev) /* toggle reset pin */ if (reset_gpio) { - gpiod_set_value_cansleep(reset_gpio, 0); + gpiod_set_value_cansleep(reset_gpio, 1); wmb(); mdelay(1); - gpiod_set_value_cansleep(reset_gpio, 1); + gpiod_set_value_cansleep(reset_gpio, 0); } return 0; } From b2747f108b8034271fd5289bd8f3a7003e0775a3 Mon Sep 17 00:00:00 2001 From: Benjamin Segall Date: Wed, 12 Jun 2024 12:44:44 -0700 Subject: [PATCH 280/778] x86/boot: Don't add the EFI stub to targets, again This is a re-commit of da05b143a308 ("x86/boot: Don't add the EFI stub to targets") after the tagged patch incorrectly reverted it. vmlinux-objs-y is added to targets, with an assumption that they are all relative to $(obj); adding a $(objtree)/drivers/... path causes the build to incorrectly create a useless arch/x86/boot/compressed/drivers/... directory tree. Fix this just by using a different make variable for the EFI stub. Fixes: cb8bda8ad443 ("x86/boot/compressed: Rename efi_thunk_64.S to efi-mixed.S") Signed-off-by: Ben Segall Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Ard Biesheuvel Cc: stable@vger.kernel.org # v6.1+ Link: https://lore.kernel.org/r/xm267ceukksz.fsf@bsegall.svl.corp.google.com --- arch/x86/boot/compressed/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 243ee86cb1b1..f2051644de94 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -105,9 +105,9 @@ vmlinux-objs-$(CONFIG_UNACCEPTED_MEMORY) += $(obj)/mem.o vmlinux-objs-$(CONFIG_EFI) += $(obj)/efi.o vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_mixed.o -vmlinux-objs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a +vmlinux-libs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a -$(obj)/vmlinux: $(vmlinux-objs-y) FORCE +$(obj)/vmlinux: $(vmlinux-objs-y) $(vmlinux-libs-y) FORCE $(call if_changed,ld) OBJCOPYFLAGS_vmlinux.bin := -R .comment -S From 0298f51652be47b79780833e0b63194e1231fa34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Amadeusz=20S=C5=82awi=C5=84ski?= Date: Thu, 13 Jun 2024 11:01:26 +0200 Subject: [PATCH 281/778] ASoC: topology: Fix route memory corruption MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It was reported that recent fix for memory corruption during topology load, causes corruption in other cases. Instead of being overeager with checking topology, assume that it is properly formatted and just duplicate strings. Reported-by: Pierre-Louis Bossart Closes: https://lore.kernel.org/linux-sound/171812236450.201359.3019210915105428447.b4-ty@kernel.org/T/#m8c4bd5abf453960fde6f826c4b7f84881da63e9d Suggested-by: Péter Ujfalusi Signed-off-by: Amadeusz Sławiński Link: https://lore.kernel.org/r/20240613090126.841189-1-amadeuszx.slawinski@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/soc-topology.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c index 2ac442644ed4..6951ff7bc61e 100644 --- a/sound/soc/soc-topology.c +++ b/sound/soc/soc-topology.c @@ -1052,21 +1052,15 @@ static int soc_tplg_dapm_graph_elems_load(struct soc_tplg *tplg, break; } - route->source = devm_kmemdup(tplg->dev, elem->source, - min(strlen(elem->source), maxlen), - GFP_KERNEL); - route->sink = devm_kmemdup(tplg->dev, elem->sink, - min(strlen(elem->sink), maxlen), - GFP_KERNEL); + route->source = devm_kstrdup(tplg->dev, elem->source, GFP_KERNEL); + route->sink = devm_kstrdup(tplg->dev, elem->sink, GFP_KERNEL); if (!route->source || !route->sink) { ret = -ENOMEM; break; } if (strnlen(elem->control, maxlen) != 0) { - route->control = devm_kmemdup(tplg->dev, elem->control, - min(strlen(elem->control), maxlen), - GFP_KERNEL); + route->control = devm_kstrdup(tplg->dev, elem->control, GFP_KERNEL); if (!route->control) { ret = -ENOMEM; break; From 12243a8115f8583a6bcada7717c01fb164e23c89 Mon Sep 17 00:00:00 2001 From: Dimitri Sivanich Date: Thu, 30 May 2024 13:36:03 -0500 Subject: [PATCH 282/778] iommu/amd: Fix panic accessing amd_iommu_enable_faulting This fixes a bug introduced by commit d74169ceb0d2 ("iommu/vt-d: Allocate DMAR fault interrupts locally"). The panic happens when amd_iommu_enable_faulting is called from CPUHP_AP_ONLINE_DYN context. Fixes: d74169ceb0d2 ("iommu/vt-d: Allocate DMAR fault interrupts locally") Signed-off-by: Dimitri Sivanich Tested-by: Yi Zhang Reviewed-by: Jerry Snitselaar Reviewed-by: Vasant Hegde Link: https://lore.kernel.org/r/ZljHE/R4KLzGU6vx@hpe.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 27e293727095..161248067776 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -3362,7 +3362,7 @@ int amd_iommu_reenable(int mode) return 0; } -int __init amd_iommu_enable_faulting(unsigned int cpu) +int amd_iommu_enable_faulting(unsigned int cpu) { /* We enable MSI later when PCI is initialized */ return 0; From 0e6b6dedf16800df0ff73ffe2bb5066514db29c2 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 12 Jun 2024 16:15:55 +0200 Subject: [PATCH 283/778] ACPI: EC: Evaluate orphan _REG under EC device After starting to install the EC address space handler at the ACPI namespace root, if there is an "orphan" _REG method in the EC device's scope, it will not be evaluated any more. This breaks EC operation regions on some systems, like Asus gu605. To address this, use a wrapper around an existing ACPICA function to look for an "orphan" _REG method in the EC device scope and evaluate it if present. Fixes: 60fa6ae6e6d0 ("ACPI: EC: Install address space handler at the namespace root") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218945 Reported-by: VitaliiT Tested-by: VitaliiT Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/acevents.h | 4 +++ drivers/acpi/acpica/evregion.c | 6 +--- drivers/acpi/acpica/evxfregn.c | 54 ++++++++++++++++++++++++++++++++++ drivers/acpi/ec.c | 3 ++ include/acpi/acpixf.h | 4 +++ 5 files changed, 66 insertions(+), 5 deletions(-) diff --git a/drivers/acpi/acpica/acevents.h b/drivers/acpi/acpica/acevents.h index ddd072cbc738..2133085deda7 100644 --- a/drivers/acpi/acpica/acevents.h +++ b/drivers/acpi/acpica/acevents.h @@ -191,6 +191,10 @@ void acpi_ev_execute_reg_methods(struct acpi_namespace_node *node, acpi_adr_space_type space_id, u32 function); +void +acpi_ev_execute_orphan_reg_method(struct acpi_namespace_node *node, + acpi_adr_space_type space_id); + acpi_status acpi_ev_execute_reg_method(union acpi_operand_object *region_obj, u32 function); diff --git a/drivers/acpi/acpica/evregion.c b/drivers/acpi/acpica/evregion.c index 18fdf2bc2d49..dc6004daf624 100644 --- a/drivers/acpi/acpica/evregion.c +++ b/drivers/acpi/acpica/evregion.c @@ -20,10 +20,6 @@ extern u8 acpi_gbl_default_address_spaces[]; /* Local prototypes */ -static void -acpi_ev_execute_orphan_reg_method(struct acpi_namespace_node *device_node, - acpi_adr_space_type space_id); - static acpi_status acpi_ev_reg_run(acpi_handle obj_handle, u32 level, void *context, void **return_value); @@ -818,7 +814,7 @@ acpi_ev_reg_run(acpi_handle obj_handle, * ******************************************************************************/ -static void +void acpi_ev_execute_orphan_reg_method(struct acpi_namespace_node *device_node, acpi_adr_space_type space_id) { diff --git a/drivers/acpi/acpica/evxfregn.c b/drivers/acpi/acpica/evxfregn.c index 3197e6303c5b..624361a5f34d 100644 --- a/drivers/acpi/acpica/evxfregn.c +++ b/drivers/acpi/acpica/evxfregn.c @@ -306,3 +306,57 @@ acpi_execute_reg_methods(acpi_handle device, acpi_adr_space_type space_id) } ACPI_EXPORT_SYMBOL(acpi_execute_reg_methods) + +/******************************************************************************* + * + * FUNCTION: acpi_execute_orphan_reg_method + * + * PARAMETERS: device - Handle for the device + * space_id - The address space ID + * + * RETURN: Status + * + * DESCRIPTION: Execute an "orphan" _REG method that appears under an ACPI + * device. This is a _REG method that has no corresponding region + * within the device's scope. + * + ******************************************************************************/ +acpi_status +acpi_execute_orphan_reg_method(acpi_handle device, acpi_adr_space_type space_id) +{ + struct acpi_namespace_node *node; + acpi_status status; + + ACPI_FUNCTION_TRACE(acpi_execute_orphan_reg_method); + + /* Parameter validation */ + + if (!device) { + return_ACPI_STATUS(AE_BAD_PARAMETER); + } + + status = acpi_ut_acquire_mutex(ACPI_MTX_NAMESPACE); + if (ACPI_FAILURE(status)) { + return_ACPI_STATUS(status); + } + + /* Convert and validate the device handle */ + + node = acpi_ns_validate_handle(device); + if (node) { + + /* + * If an "orphan" _REG method is present in the device's scope + * for the given address space ID, run it. + */ + + acpi_ev_execute_orphan_reg_method(node, space_id); + } else { + status = AE_BAD_PARAMETER; + } + + (void)acpi_ut_release_mutex(ACPI_MTX_NAMESPACE); + return_ACPI_STATUS(status); +} + +ACPI_EXPORT_SYMBOL(acpi_execute_orphan_reg_method) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 68dd17f96f63..299ec653388c 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -1507,6 +1507,9 @@ static int ec_install_handlers(struct acpi_ec *ec, struct acpi_device *device, if (call_reg && !test_bit(EC_FLAGS_EC_REG_CALLED, &ec->flags)) { acpi_execute_reg_methods(scope_handle, ACPI_ADR_SPACE_EC); + if (scope_handle != ec->handle) + acpi_execute_orphan_reg_method(ec->handle, ACPI_ADR_SPACE_EC); + set_bit(EC_FLAGS_EC_REG_CALLED, &ec->flags); } diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index 94d0fc3bd412..80dc36f9d527 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -662,6 +662,10 @@ ACPI_EXTERNAL_RETURN_STATUS(acpi_status acpi_execute_reg_methods(acpi_handle device, acpi_adr_space_type space_id)) +ACPI_EXTERNAL_RETURN_STATUS(acpi_status + acpi_execute_orphan_reg_method(acpi_handle device, + acpi_adr_space_type + space_id)) ACPI_EXTERNAL_RETURN_STATUS(acpi_status acpi_remove_address_space_handler(acpi_handle device, From 7c877115da4196fa108dcfefd49f5a9b67b8d8ca Mon Sep 17 00:00:00 2001 From: Riana Tauro Date: Thu, 6 Jun 2024 15:38:41 +0530 Subject: [PATCH 284/778] drm/xe/xe_gt_idle: use GT forcewake domain assertion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The rc6 registers used in disable_c6 function belong to the GT forcewake domain. Hence change the forcewake assertion to check GT forcewake domain. v2: add fixes tag (Himal) Fixes: 975e4a3795d4 ("drm/xe: Manually setup C6 when skip_guc_pc is set") Signed-off-by: Riana Tauro Reviewed-by: Rodrigo Vivi Reviewed-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20240606100842.956072-2-riana.tauro@intel.com Signed-off-by: Rodrigo Vivi (cherry picked from commit 21b708554648177a0078962c31629bce31ef5d83) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_gt_idle.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c index 8fc0f3f6ecc5..c699f47f4700 100644 --- a/drivers/gpu/drm/xe/xe_gt_idle.c +++ b/drivers/gpu/drm/xe/xe_gt_idle.c @@ -199,7 +199,7 @@ void xe_gt_idle_enable_c6(struct xe_gt *gt) void xe_gt_idle_disable_c6(struct xe_gt *gt) { xe_device_assert_mem_access(gt_to_xe(gt)); - xe_force_wake_assert_held(gt_to_fw(gt), XE_FORCEWAKE_ALL); + xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); xe_mmio_write32(gt, PG_ENABLE, 0); xe_mmio_write32(gt, RC_CONTROL, 0); From cd554e1e118a6aa1c919309cd28398b003f69c1f Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 7 Jun 2024 17:31:55 +0200 Subject: [PATCH 285/778] drm/xe/pf: Assert LMEM provisioning is done only on DGFX MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Local Memory (aka VRAM) is only available on DGFX platforms. We shouldn't attempt to provision VFs with LMEM or attempt to update the LMTT on non-DGFX platforms. Add missing asserts that would enforce that and fix release code that could crash on iGFX due to uninitialized LMTT. Fixes: 0698ff57bf32 ("drm/xe/pf: Update the LMTT when freeing VF GT config") Signed-off-by: Michal Wajdeczko Cc: Piotr Piórkowski Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240607153155.1592-1-michal.wajdeczko@intel.com (cherry picked from commit b321cb83a375bcc18cd0a4b62bdeaf6905cca769) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index 476d613333a9..6c2cfc54442c 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -1274,6 +1274,9 @@ static void pf_reset_vf_lmtt(struct xe_device *xe, unsigned int vfid) struct xe_tile *tile; unsigned int tid; + xe_assert(xe, IS_DGFX(xe)); + xe_assert(xe, IS_SRIOV_PF(xe)); + for_each_tile(tile, xe, tid) { lmtt = &tile->sriov.pf.lmtt; xe_lmtt_drop_pages(lmtt, vfid); @@ -1292,6 +1295,9 @@ static int pf_update_vf_lmtt(struct xe_device *xe, unsigned int vfid) unsigned int tid; int err; + xe_assert(xe, IS_DGFX(xe)); + xe_assert(xe, IS_SRIOV_PF(xe)); + total = 0; for_each_tile(tile, xe, tid) total += pf_get_vf_config_lmem(tile->primary_gt, vfid); @@ -1337,6 +1343,7 @@ fail: static void pf_release_vf_config_lmem(struct xe_gt *gt, struct xe_gt_sriov_config *config) { + xe_gt_assert(gt, IS_DGFX(gt_to_xe(gt))); xe_gt_assert(gt, !xe_gt_is_media_type(gt)); lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); @@ -1355,6 +1362,7 @@ static int pf_provision_vf_lmem(struct xe_gt *gt, unsigned int vfid, u64 size) int err; xe_gt_assert(gt, vfid); + xe_gt_assert(gt, IS_DGFX(xe)); xe_gt_assert(gt, !xe_gt_is_media_type(gt)); size = round_up(size, pf_get_lmem_alignment(gt)); @@ -1745,11 +1753,14 @@ static void pf_reset_config_sched(struct xe_gt *gt, struct xe_gt_sriov_config *c static void pf_release_vf_config(struct xe_gt *gt, unsigned int vfid) { struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); + struct xe_device *xe = gt_to_xe(gt); if (!xe_gt_is_media_type(gt)) { pf_release_vf_config_ggtt(gt, config); - pf_release_vf_config_lmem(gt, config); - pf_update_vf_lmtt(gt_to_xe(gt), vfid); + if (IS_DGFX(xe)) { + pf_release_vf_config_lmem(gt, config); + pf_update_vf_lmtt(xe, vfid); + } } pf_release_config_ctxs(gt, config); pf_release_config_dbs(gt, config); From b5e3a9b83f352a737b77a01734a6661d1130ed49 Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Wed, 5 Jun 2024 09:29:48 +0200 Subject: [PATCH 286/778] drm/xe: flush engine buffers before signalling user fence on all engines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tests show that user fence signalling requires kind of write barrier, otherwise not all writes performed by the workload will be available to userspace. It is already done for render and compute, we need it also for the rest: video, gsc, copy. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Andrzej Hajda Reviewed-by: Thomas Hellström Signed-off-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240605-fix_user_fence_posted-v3-2-06e7932f784a@intel.com (cherry picked from commit 3ad7d18c5dad75ed38098c7cc3bc9594b4701399) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_ring_ops.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index d42b3f33bd7a..aca7a9af6e84 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -80,6 +80,16 @@ static int emit_store_imm_ggtt(u32 addr, u32 value, u32 *dw, int i) return i; } +static int emit_flush_dw(u32 *dw, int i) +{ + dw[i++] = MI_FLUSH_DW | MI_FLUSH_IMM_DW; + dw[i++] = 0; + dw[i++] = 0; + dw[i++] = 0; + + return i; +} + static int emit_flush_imm_ggtt(u32 addr, u32 value, bool invalidate_tlb, u32 *dw, int i) { @@ -234,10 +244,12 @@ static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc i = emit_bb_start(batch_addr, ppgtt_flag, dw, i); - if (job->user_fence.used) + if (job->user_fence.used) { + i = emit_flush_dw(dw, i); i = emit_store_imm_ppgtt_posted(job->user_fence.addr, job->user_fence.value, dw, i); + } i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, false, dw, i); @@ -293,10 +305,12 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc, i = emit_bb_start(batch_addr, ppgtt_flag, dw, i); - if (job->user_fence.used) + if (job->user_fence.used) { + i = emit_flush_dw(dw, i); i = emit_store_imm_ppgtt_posted(job->user_fence.addr, job->user_fence.value, dw, i); + } i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, false, dw, i); From 2470b141bfae2b9695b5b6823e3b978b22d33dde Mon Sep 17 00:00:00 2001 From: Riana Tauro Date: Thu, 6 Jun 2024 15:38:42 +0530 Subject: [PATCH 287/778] drm/xe: move disable_c6 call MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit disable c6 called in guc_pc_fini_hw is unreachable. GuC PC init returns earlier if skip_guc_pc is true and never registers the finish call thus making disable_c6 unreachable. move this call to gt idle. v2: rebase v3: add fixes tag (Himal) Fixes: 975e4a3795d4 ("drm/xe: Manually setup C6 when skip_guc_pc is set") Signed-off-by: Riana Tauro Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240606100842.956072-3-riana.tauro@intel.com Signed-off-by: Rodrigo Vivi (cherry picked from commit 6800e63cf97bae62bca56d8e691544540d945f53) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_gt_idle.c | 7 +++++++ drivers/gpu/drm/xe/xe_guc_pc.c | 6 ------ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c index c699f47f4700..944770fb2daf 100644 --- a/drivers/gpu/drm/xe/xe_gt_idle.c +++ b/drivers/gpu/drm/xe/xe_gt_idle.c @@ -147,6 +147,13 @@ static const struct attribute *gt_idle_attrs[] = { static void gt_idle_sysfs_fini(struct drm_device *drm, void *arg) { struct kobject *kobj = arg; + struct xe_gt *gt = kobj_to_gt(kobj->parent); + + if (gt_to_xe(gt)->info.skip_guc_pc) { + XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FW_GT)); + xe_gt_idle_disable_c6(gt); + xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); + } sysfs_remove_files(kobj, gt_idle_attrs); kobject_put(kobj); diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 509649d0e65e..23382ced4ea7 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -895,12 +895,6 @@ int xe_guc_pc_stop(struct xe_guc_pc *pc) static void xe_guc_pc_fini(struct drm_device *drm, void *arg) { struct xe_guc_pc *pc = arg; - struct xe_device *xe = pc_to_xe(pc); - - if (xe->info.skip_guc_pc) { - xe_gt_idle_disable_c6(pc_to_gt(pc)); - return; - } XE_WARN_ON(xe_force_wake_get(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL)); XE_WARN_ON(xe_guc_pc_gucrc_disable(pc)); From ea5f8c4cffcd8a6b62b3a3bd5008275218c9d02a Mon Sep 17 00:00:00 2001 From: Andy Chi Date: Wed, 5 Jun 2024 17:22:41 +0800 Subject: [PATCH 288/778] ALSA: hda/realtek: fix mute/micmute LEDs don't work for ProBook 445/465 G11. HP ProBook 445/465 G11 needs ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF quirk to make mic-mute/audio-mute working. Signed-off-by: Andy Chi Cc: Link: https://lore.kernel.org/r/20240605092243.41963-1-andy.chi@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index aa76d1c88589..54a52c148070 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10194,6 +10194,10 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8c70, "HP EliteBook 835 G11", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8c71, "HP EliteBook 845 G11", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8c72, "HP EliteBook 865 G11", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8c7b, "HP ProBook 445 G11", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), + SND_PCI_QUIRK(0x103c, 0x8c7c, "HP ProBook 445 G11", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), + SND_PCI_QUIRK(0x103c, 0x8c7d, "HP ProBook 465 G11", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), + SND_PCI_QUIRK(0x103c, 0x8c7e, "HP ProBook 465 G11", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8c89, "HP ProBook 460 G11", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8c8a, "HP EliteBook 630", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8c8c, "HP EliteBook 660", ALC236_FIXUP_HP_GPIO_LED), From 86a433862912f52597263aa224a9ed82bcd533bf Mon Sep 17 00:00:00 2001 From: Edson Juliano Drosdeck Date: Wed, 5 Jun 2024 12:39:23 -0300 Subject: [PATCH 289/778] ALSA: hda/realtek: Limit mic boost on N14AP7 The internal mic boost on the N14AP7 is too high. Fix this by applying the ALC269_FIXUP_LIMIT_INT_MIC_BOOST fixup to the machine to limit the gain. Signed-off-by: Edson Juliano Drosdeck Cc: Link: https://lore.kernel.org/r/20240605153923.2837-1-edson.drosdeck@gmail.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 54a52c148070..da54300279af 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10585,6 +10585,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1b7d, 0xa831, "Ordissimo EVE2 ", ALC269VB_FIXUP_ORDISSIMO_EVE2), /* Also known as Malata PC-B1303 */ SND_PCI_QUIRK(0x1c06, 0x2013, "Lemote A1802", ALC269_FIXUP_LEMOTE_A1802), SND_PCI_QUIRK(0x1c06, 0x2015, "Lemote A190X", ALC269_FIXUP_LEMOTE_A190X), + SND_PCI_QUIRK(0x1c6c, 0x122a, "Positivo N14AP7", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x1c6c, 0x1251, "Positivo N14KP6-TG", ALC288_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1d05, 0x1132, "TongFang PHxTxX1", ALC256_FIXUP_SET_COEF_DEFAULTS), SND_PCI_QUIRK(0x1d05, 0x1096, "TongFang GMxMRxx", ALC269_FIXUP_NO_SHUTUP), From e799bdf51d54bebaf939fdb655aad424e624c1b1 Mon Sep 17 00:00:00 2001 From: "Dustin L. Howett" Date: Wed, 5 Jun 2024 12:01:32 -0500 Subject: [PATCH 290/778] ALSA: hda/realtek: Remove Framework Laptop 16 from quirks The Framework Laptop 16 does not have a combination headphone/headset 3.5mm jack; however, applying the pincfg from the Laptop 13 (nid=0x19) erroneously informs hda that the node is present. Fixes: 8804fa04a492 ("ALSA: hda/realtek: Add Framework laptop 16 to quirks") Signed-off-by: Dustin L. Howett Reviewed-by: Mario Limonciello Link: https://lore.kernel.org/r/20240605-alsa-hda-realtek-remove-framework-laptop-16-from-quirks-v1-1-11d47fe8ec4d@howett.net Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index da54300279af..8408e0b0730a 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10610,7 +10610,6 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x8086, 0x2081, "Intel NUC 10", ALC256_FIXUP_INTEL_NUC10), SND_PCI_QUIRK(0x8086, 0x3038, "Intel NUC 13", ALC295_FIXUP_CHROME_BOOK), SND_PCI_QUIRK(0xf111, 0x0001, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE), - SND_PCI_QUIRK(0xf111, 0x0005, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0xf111, 0x0006, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE), #if 0 From 82f3daed2d3590fa286a02301573a183dd902a0f Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Thu, 6 Jun 2024 14:03:48 +0100 Subject: [PATCH 291/778] ALSA: hda: cs35l41: Support Lenovo Thinkbook 16P Gen 5 This laptop does not contain _DSD so needs to be supported using the configuration table. Signed-off-by: Stefan Binding Signed-off-by: Takashi Iwai Link: https://lore.kernel.org/r/20240606130351.333495-2-sbinding@opensource.cirrus.com --- sound/pci/hda/cs35l41_hda_property.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/pci/hda/cs35l41_hda_property.c b/sound/pci/hda/cs35l41_hda_property.c index 6a7a6d486916..046a94250683 100644 --- a/sound/pci/hda/cs35l41_hda_property.c +++ b/sound/pci/hda/cs35l41_hda_property.c @@ -128,6 +128,8 @@ static const struct cs35l41_config cs35l41_config_table[] = { { "17AA38B5", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 0, 0, 0 }, { "17AA38B6", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 0, 0, 0 }, { "17AA38B7", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 0, 0, 0 }, + { "17AA38F9", 2, EXTERNAL, { CS35L41_RIGHT, CS35L41_LEFT, 0, 0 }, 0, 2, -1, 0, 0, 0 }, + { "17AA38FA", 2, EXTERNAL, { CS35L41_RIGHT, CS35L41_LEFT, 0, 0 }, 0, 2, -1, 0, 0, 0 }, {} }; @@ -529,6 +531,8 @@ static const struct cs35l41_prop_model cs35l41_prop_model_table[] = { { "CSC3551", "17AA38B5", generic_dsd_config }, { "CSC3551", "17AA38B6", generic_dsd_config }, { "CSC3551", "17AA38B7", generic_dsd_config }, + { "CSC3551", "17AA38F9", generic_dsd_config }, + { "CSC3551", "17AA38FA", generic_dsd_config }, {} }; From b32f92d1af3789038f03c2899e3be0d00b43faf2 Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Thu, 6 Jun 2024 14:03:49 +0100 Subject: [PATCH 292/778] ALSA: hda: cs35l41: Support Lenovo Thinkbook 13x Gen 4 This laptop does not contain _DSD so needs to be supported using the configuration table. Signed-off-by: Stefan Binding Signed-off-by: Takashi Iwai Link: https://lore.kernel.org/r/20240606130351.333495-3-sbinding@opensource.cirrus.com --- sound/pci/hda/cs35l41_hda_property.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/pci/hda/cs35l41_hda_property.c b/sound/pci/hda/cs35l41_hda_property.c index 046a94250683..51998d1c72ff 100644 --- a/sound/pci/hda/cs35l41_hda_property.c +++ b/sound/pci/hda/cs35l41_hda_property.c @@ -128,6 +128,8 @@ static const struct cs35l41_config cs35l41_config_table[] = { { "17AA38B5", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 0, 0, 0 }, { "17AA38B6", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 0, 0, 0 }, { "17AA38B7", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 0, 0, 0 }, + { "17AA38C7", 4, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, CS35L41_LEFT, CS35L41_RIGHT }, 0, 2, -1, 1000, 4500, 24 }, + { "17AA38C8", 4, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, CS35L41_LEFT, CS35L41_RIGHT }, 0, 2, -1, 1000, 4500, 24 }, { "17AA38F9", 2, EXTERNAL, { CS35L41_RIGHT, CS35L41_LEFT, 0, 0 }, 0, 2, -1, 0, 0, 0 }, { "17AA38FA", 2, EXTERNAL, { CS35L41_RIGHT, CS35L41_LEFT, 0, 0 }, 0, 2, -1, 0, 0, 0 }, {} @@ -531,6 +533,8 @@ static const struct cs35l41_prop_model cs35l41_prop_model_table[] = { { "CSC3551", "17AA38B5", generic_dsd_config }, { "CSC3551", "17AA38B6", generic_dsd_config }, { "CSC3551", "17AA38B7", generic_dsd_config }, + { "CSC3551", "17AA38C7", generic_dsd_config }, + { "CSC3551", "17AA38C8", generic_dsd_config }, { "CSC3551", "17AA38F9", generic_dsd_config }, { "CSC3551", "17AA38FA", generic_dsd_config }, {} From 75f2ea939b5c694b36aad8ef823a2f9bcf7b3d7d Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Thu, 6 Jun 2024 14:03:50 +0100 Subject: [PATCH 293/778] ALSA: hda/realtek: Support Lenovo Thinkbook 16P Gen 5 Add support for this laptop, which uses CS35L41 HDA amps. The laptop does not contain valid _DSD for these amps, so requires entries into the CS35L41 configuration table to function correctly. [ fixed to lower hex numbers in quirk entries -- tiwai ] Signed-off-by: Stefan Binding Signed-off-by: Takashi Iwai Link: https://lore.kernel.org/r/20240606130351.333495-4-sbinding@opensource.cirrus.com --- sound/pci/hda/patch_realtek.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 8408e0b0730a..2320f04eca5a 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10548,6 +10548,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x38cd, "Y790 VECO DUAL", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x17aa, 0x38d2, "Lenovo Yoga 9 14IMH9", ALC287_FIXUP_YOGA9_14IMH9_BASS_SPK_PIN), SND_PCI_QUIRK(0x17aa, 0x38d7, "Lenovo Yoga 9 14IMH9", ALC287_FIXUP_YOGA9_14IMH9_BASS_SPK_PIN), + SND_PCI_QUIRK(0x17aa, 0x38f9, "Thinkbook 16P Gen5", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x17aa, 0x38fa, "Thinkbook 16P Gen5", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x17aa, 0x3902, "Lenovo E50-80", ALC269_FIXUP_DMIC_THINKPAD_ACPI), SND_PCI_QUIRK(0x17aa, 0x3977, "IdeaPad S210", ALC283_FIXUP_INT_MIC), SND_PCI_QUIRK(0x17aa, 0x3978, "Lenovo B50-70", ALC269_FIXUP_DMIC_THINKPAD_ACPI), From 4ecb16d9250e6fcf8818572bf317b6adae16515b Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Thu, 6 Jun 2024 14:03:51 +0100 Subject: [PATCH 294/778] ALSA: hda/realtek: Support Lenovo Thinkbook 13x Gen 4 Add support for this laptop, which uses CS35L41 HDA amps. The laptop does not contain valid _DSD for these amps, so requires entries into the CS35L41 configuration table to function correctly. Signed-off-by: Stefan Binding Signed-off-by: Takashi Iwai Link: https://lore.kernel.org/r/20240606130351.333495-5-sbinding@opensource.cirrus.com --- sound/pci/hda/patch_realtek.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 2320f04eca5a..79736c8782a3 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10544,6 +10544,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x38be, "Yoga S980-14.5 proX YC Dual", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x17aa, 0x38bf, "Yoga S980-14.5 proX LX Dual", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x17aa, 0x38c3, "Y980 DUAL", ALC287_FIXUP_TAS2781_I2C), + SND_PCI_QUIRK(0x17aa, 0x38c7, "Thinkbook 13x Gen 4", ALC287_FIXUP_CS35L41_I2C_4), + SND_PCI_QUIRK(0x17aa, 0x38c8, "Thinkbook 13x Gen 4", ALC287_FIXUP_CS35L41_I2C_4), SND_PCI_QUIRK(0x17aa, 0x38cb, "Y790 YG DUAL", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x17aa, 0x38cd, "Y790 VECO DUAL", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x17aa, 0x38d2, "Lenovo Yoga 9 14IMH9", ALC287_FIXUP_YOGA9_14IMH9_BASS_SPK_PIN), From 2646b43910c0e6d7f4ad535919b44b88f98c688d Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Fri, 7 Jun 2024 09:00:21 +0300 Subject: [PATCH 295/778] ALSA/hda: intel-dsp-config: Document AVS as dsp_driver option dsp_driver=4 will force the AVS driver stack to be used, it is better to docuement this. Fixes: 1affc44ea5dd ("ASoC: Intel: avs: PCI driver implementation") Signed-off-by: Peter Ujfalusi Reviewed-by: Cezary Rojewski Link: https://lore.kernel.org/r/20240607060021.11503-1-peter.ujfalusi@linux.intel.com Signed-off-by: Takashi Iwai --- sound/hda/intel-dsp-config.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/hda/intel-dsp-config.c b/sound/hda/intel-dsp-config.c index 537863447358..478d2b50c571 100644 --- a/sound/hda/intel-dsp-config.c +++ b/sound/hda/intel-dsp-config.c @@ -18,7 +18,7 @@ static int dsp_driver; module_param(dsp_driver, int, 0444); -MODULE_PARM_DESC(dsp_driver, "Force the DSP driver for Intel DSP (0=auto, 1=legacy, 2=SST, 3=SOF)"); +MODULE_PARM_DESC(dsp_driver, "Force the DSP driver for Intel DSP (0=auto, 1=legacy, 2=SST, 3=SOF, 4=AVS)"); #define FLAG_SST BIT(0) #define FLAG_SOF BIT(1) From 8eef5c3cea65f248c99cd9dcb3f84c6509b78162 Mon Sep 17 00:00:00 2001 From: Sasha Neftin Date: Tue, 11 Jun 2024 09:24:55 -0700 Subject: [PATCH 296/778] Revert "igc: fix a log entry using uninitialized netdev" This reverts commit 86167183a17e03ec77198897975e9fdfbd53cb0b. igc_ptp_init() needs to be called before igc_reset(), otherwise kernel crash could be observed. Following the corresponding discussion [1] and [2] revert this commit. Link: https://lore.kernel.org/all/8fb634f8-7330-4cf4-a8ce-485af9c0a61a@intel.com/ [1] Link: https://lore.kernel.org/all/87o78rmkhu.fsf@intel.com/ [2] Fixes: 86167183a17e ("igc: fix a log entry using uninitialized netdev") Signed-off-by: Sasha Neftin Tested-by: Naama Meir Signed-off-by: Tony Nguyen Link: https://lore.kernel.org/r/20240611162456.961631-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/igc/igc_main.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 305e05294a26..87b655b839c1 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -7032,6 +7032,8 @@ static int igc_probe(struct pci_dev *pdev, device_set_wakeup_enable(&adapter->pdev->dev, adapter->flags & IGC_FLAG_WOL_SUPPORTED); + igc_ptp_init(adapter); + igc_tsn_clear_schedule(adapter); /* reset the hardware with the new settings */ @@ -7053,9 +7055,6 @@ static int igc_probe(struct pci_dev *pdev, /* Check if Media Autosense is enabled */ adapter->ei = *ei; - /* do hw tstamp init after resetting */ - igc_ptp_init(adapter); - /* print pcie link status and MAC address */ pcie_print_link_status(pdev); netdev_info(netdev, "MAC: %pM\n", netdev->dev_addr); From 79f18a41dd056115d685f3b0a419c7cd40055e13 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Wed, 12 Jun 2024 06:04:46 +0000 Subject: [PATCH 297/778] ionic: fix use after netif_napi_del() When queues are started, netif_napi_add() and napi_enable() are called. If there are 4 queues and only 3 queues are used for the current configuration, only 3 queues' napi should be registered and enabled. The ionic_qcq_enable() checks whether the .poll pointer is not NULL for enabling only the using queue' napi. Unused queues' napi will not be registered by netif_napi_add(), so the .poll pointer indicates NULL. But it couldn't distinguish whether the napi was unregistered or not because netif_napi_del() doesn't reset the .poll pointer to NULL. So, ionic_qcq_enable() calls napi_enable() for the queue, which was unregistered by netif_napi_del(). Reproducer: ethtool -L rx 1 tx 1 combined 0 ethtool -L rx 0 tx 0 combined 1 ethtool -L rx 0 tx 0 combined 4 Splat looks like: kernel BUG at net/core/dev.c:6666! Oops: invalid opcode: 0000 [#1] PREEMPT SMP NOPTI CPU: 3 PID: 1057 Comm: kworker/3:3 Not tainted 6.10.0-rc2+ #16 Workqueue: events ionic_lif_deferred_work [ionic] RIP: 0010:napi_enable+0x3b/0x40 Code: 48 89 c2 48 83 e2 f6 80 b9 61 09 00 00 00 74 0d 48 83 bf 60 01 00 00 00 74 03 80 ce 01 f0 4f RSP: 0018:ffffb6ed83227d48 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff97560cda0828 RCX: 0000000000000029 RDX: 0000000000000001 RSI: 0000000000000000 RDI: ffff97560cda0a28 RBP: ffffb6ed83227d50 R08: 0000000000000400 R09: 0000000000000001 R10: 0000000000000001 R11: 0000000000000001 R12: 0000000000000000 R13: ffff97560ce3c1a0 R14: 0000000000000000 R15: ffff975613ba0a20 FS: 0000000000000000(0000) GS:ffff975d5f780000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f8f734ee200 CR3: 0000000103e50000 CR4: 00000000007506f0 PKRU: 55555554 Call Trace: ? die+0x33/0x90 ? do_trap+0xd9/0x100 ? napi_enable+0x3b/0x40 ? do_error_trap+0x83/0xb0 ? napi_enable+0x3b/0x40 ? napi_enable+0x3b/0x40 ? exc_invalid_op+0x4e/0x70 ? napi_enable+0x3b/0x40 ? asm_exc_invalid_op+0x16/0x20 ? napi_enable+0x3b/0x40 ionic_qcq_enable+0xb7/0x180 [ionic 59bdfc8a035436e1c4224ff7d10789e3f14643f8] ionic_start_queues+0xc4/0x290 [ionic 59bdfc8a035436e1c4224ff7d10789e3f14643f8] ionic_link_status_check+0x11c/0x170 [ionic 59bdfc8a035436e1c4224ff7d10789e3f14643f8] ionic_lif_deferred_work+0x129/0x280 [ionic 59bdfc8a035436e1c4224ff7d10789e3f14643f8] process_one_work+0x145/0x360 worker_thread+0x2bb/0x3d0 ? __pfx_worker_thread+0x10/0x10 kthread+0xcc/0x100 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x2d/0x50 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1a/0x30 Fixes: 0f3154e6bcb3 ("ionic: Add Tx and Rx handling") Signed-off-by: Taehee Yoo Reviewed-by: Brett Creeley Reviewed-by: Shannon Nelson Link: https://lore.kernel.org/r/20240612060446.1754392-1-ap420073@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/pensando/ionic/ionic_lif.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 24870da3f484..1934e9d6d9e4 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -304,10 +304,8 @@ static int ionic_qcq_enable(struct ionic_qcq *qcq) if (ret) return ret; - if (qcq->napi.poll) - napi_enable(&qcq->napi); - if (qcq->flags & IONIC_QCQ_F_INTR) { + napi_enable(&qcq->napi); irq_set_affinity_hint(qcq->intr.vector, &qcq->intr.affinity_mask); ionic_intr_mask(idev->intr_ctrl, qcq->intr.index, From 6f4d93b78ade0a4c2cafd587f7b429ce95abb02e Mon Sep 17 00:00:00 2001 From: Ziwei Xiao Date: Wed, 12 Jun 2024 00:16:54 +0000 Subject: [PATCH 298/778] gve: Clear napi->skb before dev_kfree_skb_any() gve_rx_free_skb incorrectly leaves napi->skb referencing an skb after it is freed with dev_kfree_skb_any(). This can result in a subsequent call to napi_get_frags returning a dangling pointer. Fix this by clearing napi->skb before the skb is freed. Fixes: 9b8dd5e5ea48 ("gve: DQO: Add RX path") Cc: stable@vger.kernel.org Reported-by: Shailend Chand Signed-off-by: Ziwei Xiao Reviewed-by: Harshitha Ramamurthy Reviewed-by: Shailend Chand Reviewed-by: Praveen Kaligineedi Link: https://lore.kernel.org/r/20240612001654.923887-1-ziweixiao@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/google/gve/gve_rx_dqo.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c index c1c912de59c7..1154c1d8f66f 100644 --- a/drivers/net/ethernet/google/gve/gve_rx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_rx_dqo.c @@ -647,11 +647,13 @@ static void gve_rx_skb_hash(struct sk_buff *skb, skb_set_hash(skb, le32_to_cpu(compl_desc->hash), hash_type); } -static void gve_rx_free_skb(struct gve_rx_ring *rx) +static void gve_rx_free_skb(struct napi_struct *napi, struct gve_rx_ring *rx) { if (!rx->ctx.skb_head) return; + if (rx->ctx.skb_head == napi->skb) + napi->skb = NULL; dev_kfree_skb_any(rx->ctx.skb_head); rx->ctx.skb_head = NULL; rx->ctx.skb_tail = NULL; @@ -950,7 +952,7 @@ int gve_rx_poll_dqo(struct gve_notify_block *block, int budget) err = gve_rx_dqo(napi, rx, compl_desc, complq->head, rx->q_num); if (err < 0) { - gve_rx_free_skb(rx); + gve_rx_free_skb(napi, rx); u64_stats_update_begin(&rx->statss); if (err == -ENOMEM) rx->rx_skb_alloc_fail++; @@ -993,7 +995,7 @@ int gve_rx_poll_dqo(struct gve_notify_block *block, int budget) /* gve_rx_complete_skb() will consume skb if successful */ if (gve_rx_complete_skb(rx, napi, compl_desc, feat) != 0) { - gve_rx_free_skb(rx); + gve_rx_free_skb(napi, rx); u64_stats_update_begin(&rx->statss); rx->rx_desc_err_dropped_pkt++; u64_stats_update_end(&rx->statss); From 7da9dfdd5a3dbfd3d2450d9c6a3d1d699d625c43 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 11 Jun 2024 09:27:38 +0200 Subject: [PATCH 299/778] .editorconfig: remove trim_trailing_whitespace option MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some editors (like the vim variants), when seeing "trim_whitespace" decide to do just that for all of the whitespace in the file you are saving, even if it is not on a line that you have modified. This plays havoc with diffs and is NOT something that should be intended. As the "only trim whitespace on modified lines" is not part of the editorconfig standard yet, just delete these lines from the .editorconfig file so that we don't end up with diffs that are automatically rejected by maintainers for containing things they shouldn't. Cc: Danny Lin Cc: Íñigo Huguet Cc: Mickaël Salaün Cc: Masahiro Yamada Fixes: 5a602de99797 ("Add .editorconfig file for basic formatting") Acked-by: Vincent Mailhol Link: https://lore.kernel.org/r/2024061137-jawless-dipped-e789@gregkh Signed-off-by: Greg Kroah-Hartman --- .editorconfig | 3 --- 1 file changed, 3 deletions(-) diff --git a/.editorconfig b/.editorconfig index 854773350cc5..29a30ccfc07b 100644 --- a/.editorconfig +++ b/.editorconfig @@ -5,7 +5,6 @@ root = true [{*.{awk,c,dts,dtsi,dtso,h,mk,s,S},Kconfig,Makefile,Makefile.*}] charset = utf-8 end_of_line = lf -trim_trailing_whitespace = true insert_final_newline = true indent_style = tab indent_size = 8 @@ -13,7 +12,6 @@ indent_size = 8 [*.{json,py,rs}] charset = utf-8 end_of_line = lf -trim_trailing_whitespace = true insert_final_newline = true indent_style = space indent_size = 4 @@ -26,7 +24,6 @@ indent_size = 8 [*.yaml] charset = utf-8 end_of_line = lf -trim_trailing_whitespace = unset insert_final_newline = true indent_style = space indent_size = 2 From 7d9df38c9c037ab84502ce7eeae9f1e1e7e72603 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Wed, 12 Jun 2024 16:17:36 -0700 Subject: [PATCH 300/778] bnxt_en: Cap the size of HWRM_PORT_PHY_QCFG forwarded response Firmware interface 1.10.2.118 has increased the size of HWRM_PORT_PHY_QCFG response beyond the maximum size that can be forwarded. When the VF's link state is not the default auto state, the PF will need to forward the response back to the VF to indicate the forced state. This regression may cause the VF to fail to initialize. Fix it by capping the HWRM_PORT_PHY_QCFG response to the maximum 96 bytes. The SPEEDS2_SUPPORTED flag needs to be cleared because the new speeds2 fields are beyond the legacy structure. Also modify bnxt_hwrm_fwd_resp() to print a warning if the message size exceeds 96 bytes to make this failure more obvious. Fixes: 84a911db8305 ("bnxt_en: Update firmware interface to 1.10.2.118") Reviewed-by: Somnath Kotur Reviewed-by: Pavan Chebbi Signed-off-by: Michael Chan Link: https://lore.kernel.org/r/20240612231736.57823-1-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 51 +++++++++++++++++++ .../net/ethernet/broadcom/bnxt/bnxt_sriov.c | 12 ++++- 2 files changed, 61 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 656ab81c0272..bbc7edccd5a4 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1434,6 +1434,57 @@ struct bnxt_l2_filter { atomic_t refcnt; }; +/* Compat version of hwrm_port_phy_qcfg_output capped at 96 bytes. The + * first 95 bytes are identical to hwrm_port_phy_qcfg_output in bnxt_hsi.h. + * The last valid byte in the compat version is different. + */ +struct hwrm_port_phy_qcfg_output_compat { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 link; + u8 active_fec_signal_mode; + __le16 link_speed; + u8 duplex_cfg; + u8 pause; + __le16 support_speeds; + __le16 force_link_speed; + u8 auto_mode; + u8 auto_pause; + __le16 auto_link_speed; + __le16 auto_link_speed_mask; + u8 wirespeed; + u8 lpbk; + u8 force_pause; + u8 module_status; + __le32 preemphasis; + u8 phy_maj; + u8 phy_min; + u8 phy_bld; + u8 phy_type; + u8 media_type; + u8 xcvr_pkg_type; + u8 eee_config_phy_addr; + u8 parallel_detect; + __le16 link_partner_adv_speeds; + u8 link_partner_adv_auto_mode; + u8 link_partner_adv_pause; + __le16 adv_eee_link_speed_mask; + __le16 link_partner_adv_eee_link_speed_mask; + __le32 xcvr_identifier_type_tx_lpi_timer; + __le16 fec_cfg; + u8 duplex_state; + u8 option_flags; + char phy_vendor_name[16]; + char phy_vendor_partnumber[16]; + __le16 support_pam4_speeds; + __le16 force_pam4_link_speed; + __le16 auto_pam4_link_speed_mask; + u8 link_partner_pam4_adv_speeds; + u8 valid; +}; + struct bnxt_link_info { u8 phy_type; u8 media_type; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c index 175192ebaa77..22898d3d088b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c @@ -950,8 +950,11 @@ static int bnxt_hwrm_fwd_resp(struct bnxt *bp, struct bnxt_vf_info *vf, struct hwrm_fwd_resp_input *req; int rc; - if (BNXT_FWD_RESP_SIZE_ERR(msg_size)) + if (BNXT_FWD_RESP_SIZE_ERR(msg_size)) { + netdev_warn_once(bp->dev, "HWRM fwd response too big (%d bytes)\n", + msg_size); return -EINVAL; + } rc = hwrm_req_init(bp, req, HWRM_FWD_RESP); if (!rc) { @@ -1085,7 +1088,7 @@ static int bnxt_vf_set_link(struct bnxt *bp, struct bnxt_vf_info *vf) rc = bnxt_hwrm_exec_fwd_resp( bp, vf, sizeof(struct hwrm_port_phy_qcfg_input)); } else { - struct hwrm_port_phy_qcfg_output phy_qcfg_resp = {0}; + struct hwrm_port_phy_qcfg_output_compat phy_qcfg_resp = {}; struct hwrm_port_phy_qcfg_input *phy_qcfg_req; phy_qcfg_req = @@ -1096,6 +1099,11 @@ static int bnxt_vf_set_link(struct bnxt *bp, struct bnxt_vf_info *vf) mutex_unlock(&bp->link_lock); phy_qcfg_resp.resp_len = cpu_to_le16(sizeof(phy_qcfg_resp)); phy_qcfg_resp.seq_id = phy_qcfg_req->seq_id; + /* New SPEEDS2 fields are beyond the legacy structure, so + * clear the SPEEDS2_SUPPORTED flag. + */ + phy_qcfg_resp.option_flags &= + ~PORT_PHY_QCAPS_RESP_FLAGS2_SPEEDS2_SUPPORTED; phy_qcfg_resp.valid = 1; if (vf->flags & BNXT_VF_LINK_UP) { From a6736a0addd60fccc3a3508461d72314cc609772 Mon Sep 17 00:00:00 2001 From: Rao Shoaib Date: Tue, 11 Jun 2024 01:46:39 -0700 Subject: [PATCH 301/778] af_unix: Read with MSG_PEEK loops if the first unread byte is OOB Read with MSG_PEEK flag loops if the first byte to read is an OOB byte. commit 22dd70eb2c3d ("af_unix: Don't peek OOB data without MSG_OOB.") addresses the loop issue but does not address the issue that no data beyond OOB byte can be read. >>> from socket import * >>> c1, c2 = socketpair(AF_UNIX, SOCK_STREAM) >>> c1.send(b'a', MSG_OOB) 1 >>> c1.send(b'b') 1 >>> c2.recv(1, MSG_PEEK | MSG_DONTWAIT) b'b' >>> from socket import * >>> c1, c2 = socketpair(AF_UNIX, SOCK_STREAM) >>> c2.setsockopt(SOL_SOCKET, SO_OOBINLINE, 1) >>> c1.send(b'a', MSG_OOB) 1 >>> c1.send(b'b') 1 >>> c2.recv(1, MSG_PEEK | MSG_DONTWAIT) b'a' >>> c2.recv(1, MSG_PEEK | MSG_DONTWAIT) b'a' >>> c2.recv(1, MSG_DONTWAIT) b'a' >>> c2.recv(1, MSG_PEEK | MSG_DONTWAIT) b'b' >>> Fixes: 314001f0bf92 ("af_unix: Add OOB support") Signed-off-by: Rao Shoaib Reviewed-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20240611084639.2248934-1-Rao.Shoaib@oracle.com Signed-off-by: Jakub Kicinski --- net/unix/af_unix.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 80846279de9f..5e695a9a609c 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2625,18 +2625,18 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk, if (skb == u->oob_skb) { if (copied) { skb = NULL; - } else if (sock_flag(sk, SOCK_URGINLINE)) { - if (!(flags & MSG_PEEK)) { + } else if (!(flags & MSG_PEEK)) { + if (sock_flag(sk, SOCK_URGINLINE)) { WRITE_ONCE(u->oob_skb, NULL); consume_skb(skb); + } else { + __skb_unlink(skb, &sk->sk_receive_queue); + WRITE_ONCE(u->oob_skb, NULL); + unlinked_skb = skb; + skb = skb_peek(&sk->sk_receive_queue); } - } else if (flags & MSG_PEEK) { - skb = NULL; - } else { - __skb_unlink(skb, &sk->sk_receive_queue); - WRITE_ONCE(u->oob_skb, NULL); - unlinked_skb = skb; - skb = skb_peek(&sk->sk_receive_queue); + } else if (!sock_flag(sk, SOCK_URGINLINE)) { + skb = skb_peek_next(skb, &sk->sk_receive_queue); } } From a9b9741854a9fe9df948af49ca5514e0ed0429df Mon Sep 17 00:00:00 2001 From: Aleksandr Mishin Date: Tue, 11 Jun 2024 11:25:46 +0300 Subject: [PATCH 302/778] bnxt_en: Adjust logging of firmware messages in case of released token in __hwrm_send() In case of token is released due to token->state == BNXT_HWRM_DEFERRED, released token (set to NULL) is used in log messages. This issue is expected to be prevented by HWRM_ERR_CODE_PF_UNAVAILABLE error code. But this error code is returned by recent firmware. So some firmware may not return it. This may lead to NULL pointer dereference. Adjust this issue by adding token pointer check. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 8fa4219dba8e ("bnxt_en: add dynamic debug support for HWRM messages") Suggested-by: Michael Chan Signed-off-by: Aleksandr Mishin Reviewed-by: Wojciech Drewek Reviewed-by: Michael Chan Link: https://lore.kernel.org/r/20240611082547.12178-1-amishin@t-argos.ru Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c index 1df3d56cc4b5..d2fd2d04ed47 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c @@ -680,7 +680,7 @@ static int __hwrm_send(struct bnxt *bp, struct bnxt_hwrm_ctx *ctx) req_type); else if (rc && rc != HWRM_ERR_CODE_PF_UNAVAILABLE) hwrm_err(bp, ctx, "hwrm req_type 0x%x seq id 0x%x error 0x%x\n", - req_type, token->seq_id, rc); + req_type, le16_to_cpu(ctx->req->seq_id), rc); rc = __hwrm_to_stderr(rc); exit: if (token) From bc69ad74867dba1377abe14356c94a946d9837a3 Mon Sep 17 00:00:00 2001 From: En-Wei Wu Date: Thu, 30 May 2024 22:21:31 +0800 Subject: [PATCH 303/778] ice: avoid IRQ collision to fix init failure on ACPI S3 resume A bug in https://bugzilla.kernel.org/show_bug.cgi?id=218906 describes that irdma would break and report hardware initialization failed after suspend/resume with Intel E810 NIC (tested on 6.9.0-rc5). The problem is caused due to the collision between the irq numbers requested in irdma and the irq numbers requested in other drivers after suspend/resume. The irq numbers used by irdma are derived from ice's ice_pf->msix_entries which stores mappings between MSI-X index and Linux interrupt number. It's supposed to be cleaned up when suspend and rebuilt in resume but it's not, causing irdma using the old irq numbers stored in the old ice_pf->msix_entries to request_irq() when resume. And eventually collide with other drivers. This patch fixes this problem. On suspend, we call ice_deinit_rdma() to clean up the ice_pf->msix_entries (and free the MSI-X vectors used by irdma if we've dynamically allocated them). On resume, we call ice_init_rdma() to rebuild the ice_pf->msix_entries (and allocate the MSI-X vectors if we would like to dynamically allocate them). Fixes: f9f5301e7e2d ("ice: Register auxiliary device to provide RDMA") Tested-by: Cyrus Lien Signed-off-by: En-Wei Wu Reviewed-by: Wojciech Drewek Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 1b61ca3a6eb6..45d850514f4c 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -5564,7 +5564,7 @@ static int ice_suspend(struct device *dev) */ disabled = ice_service_task_stop(pf); - ice_unplug_aux_dev(pf); + ice_deinit_rdma(pf); /* Already suspended?, then there is nothing to do */ if (test_and_set_bit(ICE_SUSPENDED, pf->state)) { @@ -5644,6 +5644,11 @@ static int ice_resume(struct device *dev) if (ret) dev_err(dev, "Cannot restore interrupt scheme: %d\n", ret); + ret = ice_init_rdma(pf); + if (ret) + dev_err(dev, "Reinitialize RDMA during resume failed: %d\n", + ret); + clear_bit(ICE_DOWN, pf->state); /* Now perform PF reset and rebuild */ reset_type = ICE_RESET_PFR; From aeccadb24d9dacdde673a0f68f0a9135c6be4993 Mon Sep 17 00:00:00 2001 From: Paul Greenwalt Date: Thu, 30 May 2024 13:06:17 -0400 Subject: [PATCH 304/778] ice: fix 200G link speed message log Commit 24407a01e57c ("ice: Add 200G speed/phy type use") added support for 200G PHY speeds, but did not include 200G link speed message support. As a result the driver incorrectly reports Unknown for 200G link speed. Fix this by adding 200G support to ice_print_link_msg(). Fixes: 24407a01e57c ("ice: Add 200G speed/phy type use") Reviewed-by: Michal Swiatkowski Signed-off-by: Paul Greenwalt Reviewed-by: Jesse Brandeburg Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 45d850514f4c..1766230abfff 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -805,6 +805,9 @@ void ice_print_link_msg(struct ice_vsi *vsi, bool isup) } switch (vsi->port_info->phy.link_info.link_speed) { + case ICE_AQ_LINK_SPEED_200GB: + speed = "200 G"; + break; case ICE_AQ_LINK_SPEED_100GB: speed = "100 G"; break; From a27f6ac9d404ea84196639dcc456f969ef813c0f Mon Sep 17 00:00:00 2001 From: Wojciech Drewek Date: Tue, 4 Jun 2024 14:55:14 +0200 Subject: [PATCH 305/778] ice: implement AQ download pkg retry ice_aqc_opc_download_pkg (0x0C40) AQ sporadically returns error due to FW issue. Fix this by retrying five times before moving to Safe Mode. Sleep for 20 ms before retrying. This was tested with the 4.40 firmware. Fixes: c76488109616 ("ice: Implement Dynamic Device Personalization (DDP) download") Reviewed-by: Michal Swiatkowski Signed-off-by: Wojciech Drewek Reviewed-by: Brett Creeley Reviewed-by: Przemek Kitszel Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_ddp.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ddp.c b/drivers/net/ethernet/intel/ice/ice_ddp.c index ce5034ed2b24..f182179529b7 100644 --- a/drivers/net/ethernet/intel/ice/ice_ddp.c +++ b/drivers/net/ethernet/intel/ice/ice_ddp.c @@ -1339,6 +1339,7 @@ ice_dwnld_cfg_bufs_no_lock(struct ice_hw *hw, struct ice_buf *bufs, u32 start, for (i = 0; i < count; i++) { bool last = false; + int try_cnt = 0; int status; bh = (struct ice_buf_hdr *)(bufs + start + i); @@ -1346,8 +1347,26 @@ ice_dwnld_cfg_bufs_no_lock(struct ice_hw *hw, struct ice_buf *bufs, u32 start, if (indicate_last) last = ice_is_last_download_buffer(bh, i, count); - status = ice_aq_download_pkg(hw, bh, ICE_PKG_BUF_SIZE, last, - &offset, &info, NULL); + while (1) { + status = ice_aq_download_pkg(hw, bh, ICE_PKG_BUF_SIZE, + last, &offset, &info, + NULL); + if (hw->adminq.sq_last_status != ICE_AQ_RC_ENOSEC && + hw->adminq.sq_last_status != ICE_AQ_RC_EBADSIG) + break; + + try_cnt++; + + if (try_cnt == 5) + break; + + msleep(20); + } + + if (try_cnt) + dev_dbg(ice_hw_to_dev(hw), + "ice_aq_download_pkg number of retries: %d\n", + try_cnt); /* Save AQ status from download package */ if (status) { From 92424801261d1564a0bb759da3cf3ccd69fdf5a2 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 13 Jun 2024 13:53:08 +0200 Subject: [PATCH 306/778] bpf: Fix reg_set_min_max corruption of fake_reg MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Juan reported that after doing some changes to buzzer [0] and implementing a new fuzzing strategy guided by coverage, they noticed the following in one of the probes: [...] 13: (79) r6 = *(u64 *)(r0 +0) ; R0=map_value(ks=4,vs=8) R6_w=scalar() 14: (b7) r0 = 0 ; R0_w=0 15: (b4) w0 = -1 ; R0_w=0xffffffff 16: (74) w0 >>= 1 ; R0_w=0x7fffffff 17: (5c) w6 &= w0 ; R0_w=0x7fffffff R6_w=scalar(smin=smin32=0,smax=umax=umax32=0x7fffffff,var_off=(0x0; 0x7fffffff)) 18: (44) w6 |= 2 ; R6_w=scalar(smin=umin=smin32=umin32=2,smax=umax=umax32=0x7fffffff,var_off=(0x2; 0x7ffffffd)) 19: (56) if w6 != 0x7ffffffd goto pc+1 REG INVARIANTS VIOLATION (true_reg2): range bounds violation u64=[0x7fffffff, 0x7ffffffd] s64=[0x7fffffff, 0x7ffffffd] u32=[0x7fffffff, 0x7ffffffd] s32=[0x7fffffff, 0x7ffffffd] var_off=(0x7fffffff, 0x0) REG INVARIANTS VIOLATION (false_reg1): range bounds violation u64=[0x7fffffff, 0x7ffffffd] s64=[0x7fffffff, 0x7ffffffd] u32=[0x7fffffff, 0x7ffffffd] s32=[0x7fffffff, 0x7ffffffd] var_off=(0x7fffffff, 0x0) REG INVARIANTS VIOLATION (false_reg2): const tnum out of sync with range bounds u64=[0x0, 0xffffffffffffffff] s64=[0x8000000000000000, 0x7fffffffffffffff] u32=[0x0, 0xffffffff] s32=[0x80000000, 0x7fffffff] var_off=(0x7fffffff, 0x0) 19: R6_w=0x7fffffff 20: (95) exit from 19 to 21: R0=0x7fffffff R6=scalar(smin=umin=smin32=umin32=2,smax=umax=smax32=umax32=0x7ffffffe,var_off=(0x2; 0x7ffffffd)) R7=map_ptr(ks=4,vs=8) R9=ctx() R10=fp0 fp-24=map_ptr(ks=4,vs=8) fp-40=mmmmmmmm 21: R0=0x7fffffff R6=scalar(smin=umin=smin32=umin32=2,smax=umax=smax32=umax32=0x7ffffffe,var_off=(0x2; 0x7ffffffd)) R7=map_ptr(ks=4,vs=8) R9=ctx() R10=fp0 fp-24=map_ptr(ks=4,vs=8) fp-40=mmmmmmmm 21: (14) w6 -= 2147483632 ; R6_w=scalar(smin=umin=umin32=2,smax=umax=0xffffffff,smin32=0x80000012,smax32=14,var_off=(0x2; 0xfffffffd)) 22: (76) if w6 s>= 0xe goto pc+1 ; R6_w=scalar(smin=umin=umin32=2,smax=umax=0xffffffff,smin32=0x80000012,smax32=13,var_off=(0x2; 0xfffffffd)) 23: (95) exit from 22 to 24: R0=0x7fffffff R6_w=14 R7=map_ptr(ks=4,vs=8) R9=ctx() R10=fp0 fp-24=map_ptr(ks=4,vs=8) fp-40=mmmmmmmm 24: R0=0x7fffffff R6_w=14 R7=map_ptr(ks=4,vs=8) R9=ctx() R10=fp0 fp-24=map_ptr(ks=4,vs=8) fp-40=mmmmmmmm 24: (14) w6 -= 14 ; R6_w=0 [...] What can be seen here is a register invariant violation on line 19. After the binary-or in line 18, the verifier knows that bit 2 is set but knows nothing about the rest of the content which was loaded from a map value, meaning, range is [2,0x7fffffff] with var_off=(0x2; 0x7ffffffd). When in line 19 the verifier analyzes the branch, it splits the register states in reg_set_min_max() into the registers of the true branch (true_reg1, true_reg2) and the registers of the false branch (false_reg1, false_reg2). Since the test is w6 != 0x7ffffffd, the src_reg is a known constant. Internally, the verifier creates a "fake" register initialized as scalar to the value of 0x7ffffffd, and then passes it onto reg_set_min_max(). Now, for line 19, it is mathematically impossible to take the false branch of this program, yet the verifier analyzes it. It is impossible because the second bit of r6 will be set due to the prior or operation and the constant in the condition has that bit unset (hex(fd) == binary(1111 1101). When the verifier first analyzes the false / fall-through branch, it will compute an intersection between the var_off of r6 and of the constant. This is because the verifier creates a "fake" register initialized to the value of the constant. The intersection result later refines both registers in regs_refine_cond_op(): [...] t = tnum_intersect(tnum_subreg(reg1->var_off), tnum_subreg(reg2->var_off)); reg1->var_off = tnum_with_subreg(reg1->var_off, t); reg2->var_off = tnum_with_subreg(reg2->var_off, t); [...] Since the verifier is analyzing the false branch of the conditional jump, reg1 is equal to false_reg1 and reg2 is equal to false_reg2, i.e. the reg2 is the "fake" register that was meant to hold a constant value. The resulting var_off of the intersection says that both registers now hold a known value of var_off=(0x7fffffff, 0x0) or in other words: this operation manages to make the verifier think that the "constant" value that was passed in the jump operation now holds a different value. Normally this would not be an issue since it should not influence the true branch, however, false_reg2 and true_reg2 are pointers to the same "fake" register. Meaning, the false branch can influence the results of the true branch. In line 24, the verifier assumes R6_w=0, but the actual runtime value in this case is 1. The fix is simply not passing in the same "fake" register location as inputs to reg_set_min_max(), but instead making a copy. Moving the fake_reg into the env also reduces stack consumption by 120 bytes. With this, the verifier successfully rejects invalid accesses from the test program. [0] https://github.com/google/buzzer Fixes: 67420501e868 ("bpf: generalize reg_set_min_max() to handle non-const register comparisons") Reported-by: Juan José López Jaimez Signed-off-by: Daniel Borkmann Reviewed-by: John Fastabend Link: https://lore.kernel.org/r/20240613115310.25383-1-daniel@iogearbox.net Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 2 ++ kernel/bpf/verifier.c | 14 ++++++++++---- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 50aa87f8d77f..e4070fb02b11 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -746,6 +746,8 @@ struct bpf_verifier_env { /* Same as scratched_regs but for stack slots */ u64 scratched_stack_slots; u64 prev_log_pos, prev_insn_print_pos; + /* buffer used to temporary hold constants as scalar registers */ + struct bpf_reg_state fake_reg[2]; /* buffer used to generate temporary string representations, * e.g., in reg_type_str() to generate reg_type string */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 36ef8e96787e..f455548ba46c 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -15113,7 +15113,6 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs; struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL; struct bpf_reg_state *eq_branch_regs; - struct bpf_reg_state fake_reg = {}; u8 opcode = BPF_OP(insn->code); bool is_jmp32; int pred = -1; @@ -15179,7 +15178,8 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, verbose(env, "BPF_JMP/JMP32 uses reserved fields\n"); return -EINVAL; } - src_reg = &fake_reg; + src_reg = &env->fake_reg[0]; + memset(src_reg, 0, sizeof(*src_reg)); src_reg->type = SCALAR_VALUE; __mark_reg_known(src_reg, insn->imm); } @@ -15239,10 +15239,16 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, &other_branch_regs[insn->src_reg], dst_reg, src_reg, opcode, is_jmp32); } else /* BPF_SRC(insn->code) == BPF_K */ { + /* reg_set_min_max() can mangle the fake_reg. Make a copy + * so that these are two different memory locations. The + * src_reg is not used beyond here in context of K. + */ + memcpy(&env->fake_reg[1], &env->fake_reg[0], + sizeof(env->fake_reg[0])); err = reg_set_min_max(env, &other_branch_regs[insn->dst_reg], - src_reg /* fake one */, - dst_reg, src_reg /* same fake one */, + &env->fake_reg[0], + dst_reg, &env->fake_reg[1], opcode, is_jmp32); } if (err) From e73cd1cfc2177654e562b04f514be5f0f0b96da2 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 13 Jun 2024 13:53:09 +0200 Subject: [PATCH 307/778] bpf: Reduce stack consumption in check_stack_write_fixed_off The fake_reg moved into env->fake_reg given it consumes a lot of stack space (120 bytes). Migrate the fake_reg in check_stack_write_fixed_off() as well now that we have it. Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/r/20240613115310.25383-2-daniel@iogearbox.net Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f455548ba46c..e5a0ba3bc38d 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4549,11 +4549,12 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, state->stack[spi].spilled_ptr.id = 0; } else if (!reg && !(off % BPF_REG_SIZE) && is_bpf_st_mem(insn) && env->bpf_capable) { - struct bpf_reg_state fake_reg = {}; + struct bpf_reg_state *tmp_reg = &env->fake_reg[0]; - __mark_reg_known(&fake_reg, insn->imm); - fake_reg.type = SCALAR_VALUE; - save_register_state(env, state, spi, &fake_reg, size); + memset(tmp_reg, 0, sizeof(*tmp_reg)); + __mark_reg_known(tmp_reg, insn->imm); + tmp_reg->type = SCALAR_VALUE; + save_register_state(env, state, spi, tmp_reg, size); } else if (reg && is_spillable_regtype(reg->type)) { /* register containing pointer is being spilled into stack */ if (size != BPF_REG_SIZE) { From ceb65eb60026e03e1028a99f0ec94f22065e722a Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 13 Jun 2024 13:53:10 +0200 Subject: [PATCH 308/778] selftests/bpf: Add test coverage for reg_set_min_max handling Add a test case for the jmp32/k fix to ensure selftests have coverage. Before fix: # ./vmtest.sh -- ./test_progs -t verifier_or_jmp32_k [...] ./test_progs -t verifier_or_jmp32_k tester_init:PASS:tester_log_buf 0 nsec process_subtest:PASS:obj_open_mem 0 nsec process_subtest:PASS:specs_alloc 0 nsec run_subtest:PASS:obj_open_mem 0 nsec run_subtest:FAIL:unexpected_load_success unexpected success: 0 #492/1 verifier_or_jmp32_k/or_jmp32_k: bit ops + branch on unknown value:FAIL #492 verifier_or_jmp32_k:FAIL Summary: 0/0 PASSED, 0 SKIPPED, 1 FAILED After fix: # ./vmtest.sh -- ./test_progs -t verifier_or_jmp32_k [...] ./test_progs -t verifier_or_jmp32_k #492/1 verifier_or_jmp32_k/or_jmp32_k: bit ops + branch on unknown value:OK #492 verifier_or_jmp32_k:OK Summary: 1/1 PASSED, 0 SKIPPED, 0 FAILED Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Link: https://lore.kernel.org/r/20240613115310.25383-3-daniel@iogearbox.net Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/verifier.c | 2 + .../selftests/bpf/progs/verifier_or_jmp32_k.c | 41 +++++++++++++++++++ 2 files changed, 43 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/verifier_or_jmp32_k.c diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index 1c9c4ec1be11..98ef39efa77e 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -53,6 +53,7 @@ #include "verifier_movsx.skel.h" #include "verifier_netfilter_ctx.skel.h" #include "verifier_netfilter_retcode.skel.h" +#include "verifier_or_jmp32_k.skel.h" #include "verifier_precision.skel.h" #include "verifier_prevent_map_lookup.skel.h" #include "verifier_raw_stack.skel.h" @@ -170,6 +171,7 @@ void test_verifier_meta_access(void) { RUN(verifier_meta_access); } void test_verifier_movsx(void) { RUN(verifier_movsx); } void test_verifier_netfilter_ctx(void) { RUN(verifier_netfilter_ctx); } void test_verifier_netfilter_retcode(void) { RUN(verifier_netfilter_retcode); } +void test_verifier_or_jmp32_k(void) { RUN(verifier_or_jmp32_k); } void test_verifier_precision(void) { RUN(verifier_precision); } void test_verifier_prevent_map_lookup(void) { RUN(verifier_prevent_map_lookup); } void test_verifier_raw_stack(void) { RUN(verifier_raw_stack); } diff --git a/tools/testing/selftests/bpf/progs/verifier_or_jmp32_k.c b/tools/testing/selftests/bpf/progs/verifier_or_jmp32_k.c new file mode 100644 index 000000000000..f37713a265ac --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_or_jmp32_k.c @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include "bpf_misc.h" + +SEC("socket") +__description("or_jmp32_k: bit ops + branch on unknown value") +__failure +__msg("R0 invalid mem access 'scalar'") +__naked void or_jmp32_k(void) +{ + asm volatile (" \ + r0 = 0xffffffff; \ + r0 /= 1; \ + r1 = 0; \ + w1 = -1; \ + w1 >>= 1; \ + w0 &= w1; \ + w0 |= 2; \ + if w0 != 0x7ffffffd goto l1; \ + r0 = 1; \ + exit; \ +l3: \ + r0 = 5; \ + *(u64*)(r0 - 8) = r0; \ + exit; \ +l2: \ + w0 -= 0xe; \ + if w0 == 1 goto l3; \ + r0 = 4; \ + exit; \ +l1: \ + w0 -= 0x7ffffff0; \ + if w0 s>= 0xe goto l2; \ + r0 = 3; \ + exit; \ +" ::: __clobber_all); +} + +char _license[] SEC("license") = "GPL"; From b99a95bc56c52a428befbce12d9451fd7a0f3bc2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= Date: Thu, 13 Jun 2024 10:31:46 -0700 Subject: [PATCH 309/778] bpf: fix UML x86_64 compile failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pcpu_hot (defined in arch/x86) is not available on user mode linux (ARCH=um) Cc: Andrii Nakryiko Cc: John Fastabend Cc: Alexei Starovoitov Fixes: 1ae6921009e5 ("bpf: inline bpf_get_smp_processor_id() helper") Signed-off-by: Maciej Żenczykowski Link: https://lore.kernel.org/r/20240613173146.2524647-1-maze@google.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index e5a0ba3bc38d..010cfee7ffe9 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -20320,7 +20320,7 @@ patch_map_ops_generic: goto next_insn; } -#ifdef CONFIG_X86_64 +#if defined(CONFIG_X86_64) && !defined(CONFIG_UML) /* Implement bpf_get_smp_processor_id() inline. */ if (insn->imm == BPF_FUNC_get_smp_processor_id && prog->jit_requested && bpf_jit_supports_percpu_insn()) { From 9a95c5bfbf02a0a7f5983280fe284a0ff0836c34 Mon Sep 17 00:00:00 2001 From: GUO Zihua Date: Tue, 7 May 2024 01:25:41 +0000 Subject: [PATCH 310/778] ima: Avoid blocking in RCU read-side critical section A panic happens in ima_match_policy: BUG: unable to handle kernel NULL pointer dereference at 0000000000000010 PGD 42f873067 P4D 0 Oops: 0000 [#1] SMP NOPTI CPU: 5 PID: 1286325 Comm: kubeletmonit.sh Kdump: loaded Tainted: P Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 0.0.0 02/06/2015 RIP: 0010:ima_match_policy+0x84/0x450 Code: 49 89 fc 41 89 cf 31 ed 89 44 24 14 eb 1c 44 39 7b 18 74 26 41 83 ff 05 74 20 48 8b 1b 48 3b 1d f2 b9 f4 00 0f 84 9c 01 00 00 <44> 85 73 10 74 ea 44 8b 6b 14 41 f6 c5 01 75 d4 41 f6 c5 02 74 0f RSP: 0018:ff71570009e07a80 EFLAGS: 00010207 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000200 RDX: ffffffffad8dc7c0 RSI: 0000000024924925 RDI: ff3e27850dea2000 RBP: 0000000000000000 R08: 0000000000000000 R09: ffffffffabfce739 R10: ff3e27810cc42400 R11: 0000000000000000 R12: ff3e2781825ef970 R13: 00000000ff3e2785 R14: 000000000000000c R15: 0000000000000001 FS: 00007f5195b51740(0000) GS:ff3e278b12d40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000010 CR3: 0000000626d24002 CR4: 0000000000361ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ima_get_action+0x22/0x30 process_measurement+0xb0/0x830 ? page_add_file_rmap+0x15/0x170 ? alloc_set_pte+0x269/0x4c0 ? prep_new_page+0x81/0x140 ? simple_xattr_get+0x75/0xa0 ? selinux_file_open+0x9d/0xf0 ima_file_check+0x64/0x90 path_openat+0x571/0x1720 do_filp_open+0x9b/0x110 ? page_counter_try_charge+0x57/0xc0 ? files_cgroup_alloc_fd+0x38/0x60 ? __alloc_fd+0xd4/0x250 ? do_sys_open+0x1bd/0x250 do_sys_open+0x1bd/0x250 do_syscall_64+0x5d/0x1d0 entry_SYSCALL_64_after_hwframe+0x65/0xca Commit c7423dbdbc9e ("ima: Handle -ESTALE returned by ima_filter_rule_match()") introduced call to ima_lsm_copy_rule within a RCU read-side critical section which contains kmalloc with GFP_KERNEL. This implies a possible sleep and violates limitations of RCU read-side critical sections on non-PREEMPT systems. Sleeping within RCU read-side critical section might cause synchronize_rcu() returning early and break RCU protection, allowing a UAF to happen. The root cause of this issue could be described as follows: | Thread A | Thread B | | |ima_match_policy | | | rcu_read_lock | |ima_lsm_update_rule | | | synchronize_rcu | | | | kmalloc(GFP_KERNEL)| | | sleep | ==> synchronize_rcu returns early | kfree(entry) | | | | entry = entry->next| ==> UAF happens and entry now becomes NULL (or could be anything). | | entry->action | ==> Accessing entry might cause panic. To fix this issue, we are converting all kmalloc that is called within RCU read-side critical section to use GFP_ATOMIC. Fixes: c7423dbdbc9e ("ima: Handle -ESTALE returned by ima_filter_rule_match()") Cc: stable@vger.kernel.org Signed-off-by: GUO Zihua Acked-by: John Johansen Reviewed-by: Mimi Zohar Reviewed-by: Casey Schaufler [PM: fixed missing comment, long lines, !CONFIG_IMA_LSM_RULES case] Signed-off-by: Paul Moore --- include/linux/lsm_hook_defs.h | 2 +- include/linux/security.h | 5 +++-- kernel/auditfilter.c | 5 +++-- security/apparmor/audit.c | 6 +++--- security/apparmor/include/audit.h | 2 +- security/integrity/ima/ima.h | 2 +- security/integrity/ima/ima_policy.c | 15 +++++++++------ security/security.c | 6 ++++-- security/selinux/include/audit.h | 4 +++- security/selinux/ss/services.c | 5 +++-- security/smack/smack_lsm.c | 4 +++- 11 files changed, 34 insertions(+), 22 deletions(-) diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index f804b76cde44..44488b1ab9a9 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -413,7 +413,7 @@ LSM_HOOK(void, LSM_RET_VOID, key_post_create_or_update, struct key *keyring, #ifdef CONFIG_AUDIT LSM_HOOK(int, 0, audit_rule_init, u32 field, u32 op, char *rulestr, - void **lsmrule) + void **lsmrule, gfp_t gfp) LSM_HOOK(int, 0, audit_rule_known, struct audit_krule *krule) LSM_HOOK(int, 0, audit_rule_match, u32 secid, u32 field, u32 op, void *lsmrule) LSM_HOOK(void, LSM_RET_VOID, audit_rule_free, void *lsmrule) diff --git a/include/linux/security.h b/include/linux/security.h index 21cf70346b33..de3af33e6ff5 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -2048,7 +2048,8 @@ static inline void security_key_post_create_or_update(struct key *keyring, #ifdef CONFIG_AUDIT #ifdef CONFIG_SECURITY -int security_audit_rule_init(u32 field, u32 op, char *rulestr, void **lsmrule); +int security_audit_rule_init(u32 field, u32 op, char *rulestr, void **lsmrule, + gfp_t gfp); int security_audit_rule_known(struct audit_krule *krule); int security_audit_rule_match(u32 secid, u32 field, u32 op, void *lsmrule); void security_audit_rule_free(void *lsmrule); @@ -2056,7 +2057,7 @@ void security_audit_rule_free(void *lsmrule); #else static inline int security_audit_rule_init(u32 field, u32 op, char *rulestr, - void **lsmrule) + void **lsmrule, gfp_t gfp) { return 0; } diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index be8c680121e4..d6ef4f4f9cba 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -529,7 +529,8 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, entry->rule.buflen += f_val; f->lsm_str = str; err = security_audit_rule_init(f->type, f->op, str, - (void **)&f->lsm_rule); + (void **)&f->lsm_rule, + GFP_KERNEL); /* Keep currently invalid fields around in case they * become valid after a policy reload. */ if (err == -EINVAL) { @@ -799,7 +800,7 @@ static inline int audit_dupe_lsm_field(struct audit_field *df, /* our own (refreshed) copy of lsm_rule */ ret = security_audit_rule_init(df->type, df->op, df->lsm_str, - (void **)&df->lsm_rule); + (void **)&df->lsm_rule, GFP_KERNEL); /* Keep currently invalid fields around in case they * become valid after a policy reload. */ if (ret == -EINVAL) { diff --git a/security/apparmor/audit.c b/security/apparmor/audit.c index 45beb1c5f747..6b5181c668b5 100644 --- a/security/apparmor/audit.c +++ b/security/apparmor/audit.c @@ -217,7 +217,7 @@ void aa_audit_rule_free(void *vrule) } } -int aa_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule) +int aa_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule, gfp_t gfp) { struct aa_audit_rule *rule; @@ -230,14 +230,14 @@ int aa_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule) return -EINVAL; } - rule = kzalloc(sizeof(struct aa_audit_rule), GFP_KERNEL); + rule = kzalloc(sizeof(struct aa_audit_rule), gfp); if (!rule) return -ENOMEM; /* Currently rules are treated as coming from the root ns */ rule->label = aa_label_parse(&root_ns->unconfined->label, rulestr, - GFP_KERNEL, true, false); + gfp, true, false); if (IS_ERR(rule->label)) { int err = PTR_ERR(rule->label); aa_audit_rule_free(rule); diff --git a/security/apparmor/include/audit.h b/security/apparmor/include/audit.h index acbb03b9bd25..0c8cc86b417b 100644 --- a/security/apparmor/include/audit.h +++ b/security/apparmor/include/audit.h @@ -200,7 +200,7 @@ static inline int complain_error(int error) } void aa_audit_rule_free(void *vrule); -int aa_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule); +int aa_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule, gfp_t gfp); int aa_audit_rule_known(struct audit_krule *rule); int aa_audit_rule_match(u32 sid, u32 field, u32 op, void *vrule); diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h index 3e568126cd48..c51e24d24d1e 100644 --- a/security/integrity/ima/ima.h +++ b/security/integrity/ima/ima.h @@ -546,7 +546,7 @@ static inline void ima_free_modsig(struct modsig *modsig) #else static inline int ima_filter_rule_init(u32 field, u32 op, char *rulestr, - void **lsmrule) + void **lsmrule, gfp_t gfp) { return -EINVAL; } diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c index c0556907c2e6..09da8e639239 100644 --- a/security/integrity/ima/ima_policy.c +++ b/security/integrity/ima/ima_policy.c @@ -401,7 +401,8 @@ static void ima_free_rule(struct ima_rule_entry *entry) kfree(entry); } -static struct ima_rule_entry *ima_lsm_copy_rule(struct ima_rule_entry *entry) +static struct ima_rule_entry *ima_lsm_copy_rule(struct ima_rule_entry *entry, + gfp_t gfp) { struct ima_rule_entry *nentry; int i; @@ -410,7 +411,7 @@ static struct ima_rule_entry *ima_lsm_copy_rule(struct ima_rule_entry *entry) * Immutable elements are copied over as pointers and data; only * lsm rules can change */ - nentry = kmemdup(entry, sizeof(*nentry), GFP_KERNEL); + nentry = kmemdup(entry, sizeof(*nentry), gfp); if (!nentry) return NULL; @@ -425,7 +426,8 @@ static struct ima_rule_entry *ima_lsm_copy_rule(struct ima_rule_entry *entry) ima_filter_rule_init(nentry->lsm[i].type, Audit_equal, nentry->lsm[i].args_p, - &nentry->lsm[i].rule); + &nentry->lsm[i].rule, + gfp); if (!nentry->lsm[i].rule) pr_warn("rule for LSM \'%s\' is undefined\n", nentry->lsm[i].args_p); @@ -438,7 +440,7 @@ static int ima_lsm_update_rule(struct ima_rule_entry *entry) int i; struct ima_rule_entry *nentry; - nentry = ima_lsm_copy_rule(entry); + nentry = ima_lsm_copy_rule(entry, GFP_KERNEL); if (!nentry) return -ENOMEM; @@ -664,7 +666,7 @@ retry: } if (rc == -ESTALE && !rule_reinitialized) { - lsm_rule = ima_lsm_copy_rule(rule); + lsm_rule = ima_lsm_copy_rule(rule, GFP_ATOMIC); if (lsm_rule) { rule_reinitialized = true; goto retry; @@ -1140,7 +1142,8 @@ static int ima_lsm_rule_init(struct ima_rule_entry *entry, entry->lsm[lsm_rule].type = audit_type; result = ima_filter_rule_init(entry->lsm[lsm_rule].type, Audit_equal, entry->lsm[lsm_rule].args_p, - &entry->lsm[lsm_rule].rule); + &entry->lsm[lsm_rule].rule, + GFP_KERNEL); if (!entry->lsm[lsm_rule].rule) { pr_warn("rule for LSM \'%s\' is undefined\n", entry->lsm[lsm_rule].args_p); diff --git a/security/security.c b/security/security.c index e5da848c50b9..e5ca08789f74 100644 --- a/security/security.c +++ b/security/security.c @@ -5332,15 +5332,17 @@ void security_key_post_create_or_update(struct key *keyring, struct key *key, * @op: rule operator * @rulestr: rule context * @lsmrule: receive buffer for audit rule struct + * @gfp: GFP flag used for kmalloc * * Allocate and initialize an LSM audit rule structure. * * Return: Return 0 if @lsmrule has been successfully set, -EINVAL in case of * an invalid rule. */ -int security_audit_rule_init(u32 field, u32 op, char *rulestr, void **lsmrule) +int security_audit_rule_init(u32 field, u32 op, char *rulestr, void **lsmrule, + gfp_t gfp) { - return call_int_hook(audit_rule_init, field, op, rulestr, lsmrule); + return call_int_hook(audit_rule_init, field, op, rulestr, lsmrule, gfp); } /** diff --git a/security/selinux/include/audit.h b/security/selinux/include/audit.h index 52aca71210b4..29c7d4c86f6d 100644 --- a/security/selinux/include/audit.h +++ b/security/selinux/include/audit.h @@ -21,12 +21,14 @@ * @op: the operator the rule uses * @rulestr: the text "target" of the rule * @rule: pointer to the new rule structure returned via this + * @gfp: GFP flag used for kmalloc * * Returns 0 if successful, -errno if not. On success, the rule structure * will be allocated internally. The caller must free this structure with * selinux_audit_rule_free() after use. */ -int selinux_audit_rule_init(u32 field, u32 op, char *rulestr, void **rule); +int selinux_audit_rule_init(u32 field, u32 op, char *rulestr, void **rule, + gfp_t gfp); /** * selinux_audit_rule_free - free an selinux audit rule structure. diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index f20e1968b7f7..e33e55384b75 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -3507,7 +3507,8 @@ void selinux_audit_rule_free(void *vrule) } } -int selinux_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule) +int selinux_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule, + gfp_t gfp) { struct selinux_state *state = &selinux_state; struct selinux_policy *policy; @@ -3548,7 +3549,7 @@ int selinux_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule) return -EINVAL; } - tmprule = kzalloc(sizeof(struct selinux_audit_rule), GFP_KERNEL); + tmprule = kzalloc(sizeof(struct selinux_audit_rule), gfp); if (!tmprule) return -ENOMEM; context_init(&tmprule->au_ctxt); diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 70ba2841e181..f5cbec1e6a92 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -4693,11 +4693,13 @@ static int smack_post_notification(const struct cred *w_cred, * @op: required testing operator (=, !=, >, <, ...) * @rulestr: smack label to be audited * @vrule: pointer to save our own audit rule representation + * @gfp: type of the memory for the allocation * * Prepare to audit cases where (@field @op @rulestr) is true. * The label to be audited is created if necessay. */ -static int smack_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule) +static int smack_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule, + gfp_t gfp) { struct smack_known *skp; char **rule = (char **)vrule; From 4eb4e85c4f818491efc67e9373aa16b123c3f522 Mon Sep 17 00:00:00 2001 From: Boris Burkov Date: Fri, 7 Jun 2024 12:50:14 -0700 Subject: [PATCH 311/778] btrfs: retry block group reclaim without infinite loop If inc_block_group_ro systematically fails (e.g. due to ETXTBUSY from swap) or btrfs_relocate_chunk systematically fails (from lack of space), then this worker becomes an infinite loop. At the very least, this strands the cleaner thread, but can also result in hung tasks/RCU stalls on PREEMPT_NONE kernels and if the reclaim_bgs_lock mutex is not contended. I believe the best long term fix is to manage reclaim via work queue, where we queue up a relocation on the triggering condition and re-queue on failure. In the meantime, this is an easy fix to apply to avoid the immediate pain. Fixes: 7e2718099438 ("btrfs: reinsert BGs failed to reclaim") CC: stable@vger.kernel.org # 6.6+ Signed-off-by: Boris Burkov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/block-group.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 1e09aeea69c2..1a66be33bb04 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -1785,6 +1785,7 @@ void btrfs_reclaim_bgs_work(struct work_struct *work) container_of(work, struct btrfs_fs_info, reclaim_bgs_work); struct btrfs_block_group *bg; struct btrfs_space_info *space_info; + LIST_HEAD(retry_list); if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags)) return; @@ -1921,8 +1922,11 @@ void btrfs_reclaim_bgs_work(struct work_struct *work) } next: - if (ret) - btrfs_mark_bg_to_reclaim(bg); + if (ret) { + /* Refcount held by the reclaim_bgs list after splice. */ + btrfs_get_block_group(bg); + list_add_tail(&bg->bg_list, &retry_list); + } btrfs_put_block_group(bg); mutex_unlock(&fs_info->reclaim_bgs_lock); @@ -1942,6 +1946,9 @@ next: spin_unlock(&fs_info->unused_bgs_lock); mutex_unlock(&fs_info->reclaim_bgs_lock); end: + spin_lock(&fs_info->unused_bgs_lock); + list_splice_tail(&retry_list, &fs_info->reclaim_bgs); + spin_unlock(&fs_info->unused_bgs_lock); btrfs_exclop_finish(fs_info); sb_end_write(fs_info->sb); } From cebae292e0c32a228e8f2219c270a7237be24a6a Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Fri, 7 Jun 2024 13:27:48 +0200 Subject: [PATCH 312/778] btrfs: zoned: allocate dummy checksums for zoned NODATASUM writes Shin'ichiro reported that when he's running fstests' test-case btrfs/167 on emulated zoned devices, he's seeing the following NULL pointer dereference in 'btrfs_zone_finish_endio()': Oops: general protection fault, probably for non-canonical address 0xdffffc0000000011: 0000 [#1] PREEMPT SMP KASAN NOPTI KASAN: null-ptr-deref in range [0x0000000000000088-0x000000000000008f] CPU: 4 PID: 2332440 Comm: kworker/u80:15 Tainted: G W 6.10.0-rc2-kts+ #4 Hardware name: Supermicro Super Server/X11SPi-TF, BIOS 3.3 02/21/2020 Workqueue: btrfs-endio-write btrfs_work_helper [btrfs] RIP: 0010:btrfs_zone_finish_endio.part.0+0x34/0x160 [btrfs] RSP: 0018:ffff88867f107a90 EFLAGS: 00010206 RAX: dffffc0000000000 RBX: 0000000000000000 RCX: ffffffff893e5534 RDX: 0000000000000011 RSI: 0000000000000004 RDI: 0000000000000088 RBP: 0000000000000002 R08: 0000000000000001 R09: ffffed1081696028 R10: ffff88840b4b0143 R11: ffff88834dfff600 R12: ffff88840b4b0000 R13: 0000000000020000 R14: 0000000000000000 R15: ffff888530ad5210 FS: 0000000000000000(0000) GS:ffff888e3f800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f87223fff38 CR3: 00000007a7c6a002 CR4: 00000000007706f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: ? __die_body.cold+0x19/0x27 ? die_addr+0x46/0x70 ? exc_general_protection+0x14f/0x250 ? asm_exc_general_protection+0x26/0x30 ? do_raw_read_unlock+0x44/0x70 ? btrfs_zone_finish_endio.part.0+0x34/0x160 [btrfs] btrfs_finish_one_ordered+0x5d9/0x19a0 [btrfs] ? __pfx_lock_release+0x10/0x10 ? do_raw_write_lock+0x90/0x260 ? __pfx_do_raw_write_lock+0x10/0x10 ? __pfx_btrfs_finish_one_ordered+0x10/0x10 [btrfs] ? _raw_write_unlock+0x23/0x40 ? btrfs_finish_ordered_zoned+0x5a9/0x850 [btrfs] ? lock_acquire+0x435/0x500 btrfs_work_helper+0x1b1/0xa70 [btrfs] ? __schedule+0x10a8/0x60b0 ? __pfx___might_resched+0x10/0x10 process_one_work+0x862/0x1410 ? __pfx_lock_acquire+0x10/0x10 ? __pfx_process_one_work+0x10/0x10 ? assign_work+0x16c/0x240 worker_thread+0x5e6/0x1010 ? __pfx_worker_thread+0x10/0x10 kthread+0x2c3/0x3a0 ? trace_irq_enable.constprop.0+0xce/0x110 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x31/0x70 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1a/0x30 Enabling CONFIG_BTRFS_ASSERT revealed the following assertion to trigger: assertion failed: !list_empty(&ordered->list), in fs/btrfs/zoned.c:1815 This indicates, that we're missing the checksums list on the ordered_extent. As btrfs/167 is doing a NOCOW write this is to be expected. Further analysis with drgn confirmed the assumption: >>> inode = prog.crashed_thread().stack_trace()[11]['ordered'].inode >>> btrfs_inode = drgn.container_of(inode, "struct btrfs_inode", \ "vfs_inode") >>> print(btrfs_inode.flags) (u32)1 As zoned emulation mode simulates conventional zones on regular devices, we cannot use zone-append for writing. But we're only attaching dummy checksums if we're doing a zone-append write. So for NOCOW zoned data writes on conventional zones, also attach a dummy checksum. Reported-by: Shinichiro Kawasaki Fixes: cbfce4c7fbde ("btrfs: optimize the logical to physical mapping for zoned writes") CC: Naohiro Aota # 6.6+ Tested-by: Shin'ichiro Kawasaki Reviewed-by: Naohiro Aota Signed-off-by: Johannes Thumshirn Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/bio.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c index 477f350a8bd0..e3a57196b0ee 100644 --- a/fs/btrfs/bio.c +++ b/fs/btrfs/bio.c @@ -741,7 +741,9 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num) ret = btrfs_bio_csum(bbio); if (ret) goto fail_put_bio; - } else if (use_append) { + } else if (use_append || + (btrfs_is_zoned(fs_info) && inode && + inode->flags & BTRFS_INODE_NODATASUM)) { ret = btrfs_alloc_dummy_sum(bbio); if (ret) goto fail_put_bio; From ff0ffe5b7c3c12c6e0cca16652905963ae817b44 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 13 Jun 2024 09:36:50 -0700 Subject: [PATCH 313/778] nvme: fix namespace removal list This function wants to move a subset of a list from one element to the tail into another list. It also needs to use the srcu synchronize instead of the regular rcu version. Do this one element at a time because that's the only to do it. Fixes: be647e2c76b27f4 ("nvme: use srcu for iterating namespace list") Reported-by: Venkat Rao Bagalkote Tested-by: Venkat Rao Bagalkote Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index c40930d10bd3..782090ce0bc1 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3960,12 +3960,13 @@ static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl, mutex_lock(&ctrl->namespaces_lock); list_for_each_entry_safe(ns, next, &ctrl->namespaces, list) { - if (ns->head->ns_id > nsid) - list_splice_init_rcu(&ns->list, &rm_list, - synchronize_rcu); + if (ns->head->ns_id > nsid) { + list_del_rcu(&ns->list); + synchronize_srcu(&ctrl->srcu); + list_add_tail_rcu(&ns->list, &rm_list); + } } mutex_unlock(&ctrl->namespaces_lock); - synchronize_srcu(&ctrl->srcu); list_for_each_entry_safe(ns, next, &rm_list, list) nvme_ns_remove(ns); From 4467c09bc7a66a17ffd84d6262d48279b26106ea Mon Sep 17 00:00:00 2001 From: Aryan Srivastava Date: Thu, 13 Jun 2024 14:49:00 +1200 Subject: [PATCH 314/778] net: mvpp2: use slab_build_skb for oversized frames Setting frag_size to 0 to indicate kmalloc has been deprecated, use slab_build_skb directly. Fixes: ce098da1497c ("skbuff: Introduce slab_build_skb()") Signed-off-by: Aryan Srivastava Reviewed-by: Kees Cook Link: https://lore.kernel.org/r/20240613024900.3842238-1-aryan.srivastava@alliedtelesis.co.nz Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index e91486c48de3..671368d2c77e 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -4014,7 +4014,10 @@ static int mvpp2_rx(struct mvpp2_port *port, struct napi_struct *napi, } } - skb = build_skb(data, frag_size); + if (frag_size) + skb = build_skb(data, frag_size); + else + skb = slab_build_skb(data); if (!skb) { netdev_warn(port->dev, "skb build failed\n"); goto err_drop_frame; From 135c6eb27a85c8b261a2cc1f5093abcda6ee9010 Mon Sep 17 00:00:00 2001 From: Joel Slebodnick Date: Thu, 13 Jun 2024 14:27:28 -0400 Subject: [PATCH 315/778] scsi: ufs: core: Free memory allocated for model before reinit Under the conditions that a device is to be reinitialized within ufshcd_probe_hba(), the device must first be fully reset. Resetting the device should include freeing U8 model (member of dev_info) but does not, and this causes a memory leak. ufs_put_device_desc() is responsible for freeing model. unreferenced object 0xffff3f63008bee60 (size 32): comm "kworker/u33:1", pid 60, jiffies 4294892642 hex dump (first 32 bytes): 54 48 47 4a 46 47 54 30 54 32 35 42 41 5a 5a 41 THGJFGT0T25BAZZA 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace (crc ed7ff1a9): [] kmemleak_alloc+0x34/0x40 [] __kmalloc_noprof+0x1e4/0x2fc [] ufshcd_read_string_desc+0x94/0x190 [] ufshcd_device_init+0x480/0xdf8 [] ufshcd_probe_hba+0x3c/0x404 [] ufshcd_async_scan+0x40/0x370 [] async_run_entry_fn+0x34/0xe0 [] process_one_work+0x154/0x298 [] worker_thread+0x2f8/0x408 [] kthread+0x114/0x118 [] ret_from_fork+0x10/0x20 Fixes: 96a7141da332 ("scsi: ufs: core: Add support for reinitializing the UFS device") Cc: Reviewed-by: Andrew Halaney Reviewed-by: Bart Van Assche Signed-off-by: Joel Slebodnick Link: https://lore.kernel.org/r/20240613200202.2524194-1-jslebodn@redhat.com Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index e5e9da61f15d..1b65e6ae4137 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -8787,6 +8787,7 @@ static int ufshcd_probe_hba(struct ufs_hba *hba, bool init_dev_params) (hba->quirks & UFSHCD_QUIRK_REINIT_AFTER_MAX_GEAR_SWITCH)) { /* Reset the device and controller before doing reinit */ ufshcd_device_reset(hba); + ufs_put_device_desc(hba); ufshcd_hba_stop(hba); ufshcd_vops_reinit_notify(hba); ret = ufshcd_hba_enable(hba); From 633aeefafc9c2a07a76a62be6aac1d73c3e3defa Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 13 Jun 2024 14:18:26 -0700 Subject: [PATCH 316/778] scsi: core: Introduce the BLIST_SKIP_IO_HINTS flag Prepare for skipping the IO Advice Hints Grouping mode page for USB storage devices. Cc: Alan Stern Cc: Joao Machado Cc: Andy Shevchenko Cc: Christian Heusel Cc: stable@vger.kernel.org Fixes: 4f53138fffc2 ("scsi: sd: Translate data lifetime information") Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20240613211828.2077477-2-bvanassche@acm.org Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 4 ++++ include/scsi/scsi_devinfo.h | 4 +++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index fbc11046bbf6..fe82baa924f8 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -63,6 +63,7 @@ #include #include #include +#include #include #include #include @@ -3118,6 +3119,9 @@ static void sd_read_io_hints(struct scsi_disk *sdkp, unsigned char *buffer) struct scsi_mode_data data; int res; + if (sdp->sdev_bflags & BLIST_SKIP_IO_HINTS) + return; + res = scsi_mode_sense(sdp, /*dbd=*/0x8, /*modepage=*/0x0a, /*subpage=*/0x05, buffer, SD_BUF_SIZE, SD_TIMEOUT, sdkp->max_retries, &data, &sshdr); diff --git a/include/scsi/scsi_devinfo.h b/include/scsi/scsi_devinfo.h index 6b548dc2c496..1d79a3b536ce 100644 --- a/include/scsi/scsi_devinfo.h +++ b/include/scsi/scsi_devinfo.h @@ -69,8 +69,10 @@ #define BLIST_RETRY_ITF ((__force blist_flags_t)(1ULL << 32)) /* Always retry ABORTED_COMMAND with ASC 0xc1 */ #define BLIST_RETRY_ASC_C1 ((__force blist_flags_t)(1ULL << 33)) +/* Do not query the IO Advice Hints Grouping mode page */ +#define BLIST_SKIP_IO_HINTS ((__force blist_flags_t)(1ULL << 34)) -#define __BLIST_LAST_USED BLIST_RETRY_ASC_C1 +#define __BLIST_LAST_USED BLIST_SKIP_IO_HINTS #define __BLIST_HIGH_UNUSED (~(__BLIST_LAST_USED | \ (__force blist_flags_t) \ From 57619f3cdeb5ae9f4252833b0ed600e9f81da722 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 13 Jun 2024 14:18:27 -0700 Subject: [PATCH 317/778] scsi: usb: uas: Do not query the IO Advice Hints Grouping mode page for USB/UAS devices Recently it was reported that the following USB storage devices are unusable with Linux kernel 6.9: * Kingston DataTraveler G2 * Garmin FR35 This is because attempting to read the IO Advice Hints Grouping mode page causes these devices to reset. Hence do not read the IO Advice Hints Grouping mode page from USB/UAS storage devices. Acked-by: Alan Stern Cc: stable@vger.kernel.org Fixes: 4f53138fffc2 ("scsi: sd: Translate data lifetime information") Reported-by: Joao Machado Closes: https://lore.kernel.org/linux-scsi/20240130214911.1863909-1-bvanassche@acm.org/T/#mf4e3410d8f210454d7e4c3d1fb5c0f41e651b85f Tested-by: Andy Shevchenko Bisected-by: Christian Heusel Reported-by: Andy Shevchenko Closes: https://lore.kernel.org/linux-scsi/CACLx9VdpUanftfPo2jVAqXdcWe8Y43MsDeZmMPooTzVaVJAh2w@mail.gmail.com/ Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20240613211828.2077477-3-bvanassche@acm.org Signed-off-by: Martin K. Petersen --- drivers/usb/storage/scsiglue.c | 6 ++++++ drivers/usb/storage/uas.c | 7 +++++++ 2 files changed, 13 insertions(+) diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c index b31464740f6c..8c8b5e6041cc 100644 --- a/drivers/usb/storage/scsiglue.c +++ b/drivers/usb/storage/scsiglue.c @@ -79,6 +79,12 @@ static int slave_alloc (struct scsi_device *sdev) if (us->protocol == USB_PR_BULK && us->max_lun > 0) sdev->sdev_bflags |= BLIST_FORCELUN; + /* + * Some USB storage devices reset if the IO advice hints grouping mode + * page is queried. Hence skip that mode page. + */ + sdev->sdev_bflags |= BLIST_SKIP_IO_HINTS; + return 0; } diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c index a48870a87a29..b610a2de4ae5 100644 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -820,6 +821,12 @@ static int uas_slave_alloc(struct scsi_device *sdev) struct uas_dev_info *devinfo = (struct uas_dev_info *)sdev->host->hostdata; + /* + * Some USB storage devices reset if the IO advice hints grouping mode + * page is queried. Hence skip that mode page. + */ + sdev->sdev_bflags |= BLIST_SKIP_IO_HINTS; + sdev->hostdata = devinfo; return 0; } From f4a1254f2a076afb0edd473589bf40f9b4d36b41 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 14 Jun 2024 01:04:29 +0100 Subject: [PATCH 318/778] io_uring: fix cancellation overwriting req->flags Only the current owner of a request is allowed to write into req->flags. Hence, the cancellation path should never touch it. Add a new field instead of the flag, move it into the 3rd cache line because it should always be initialised. poll_refs can move further as polling is an involved process anyway. It's a minimal patch, in the future we can and should find a better place for it and remove now unused REQ_F_CANCEL_SEQ. Fixes: 521223d7c229f ("io_uring/cancel: don't default to setting req->work.cancel_seq") Cc: stable@vger.kernel.org Reported-by: Li Shi Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/6827b129f8f0ad76fa9d1f0a773de938b240ffab.1718323430.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 3 ++- io_uring/cancel.h | 4 ++-- io_uring/io_uring.c | 1 + 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 7a6b190c7da7..b48570eaa449 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -648,7 +648,7 @@ struct io_kiocb { struct io_rsrc_node *rsrc_node; atomic_t refs; - atomic_t poll_refs; + bool cancel_seq_set; struct io_task_work io_task_work; /* for polled requests, i.e. IORING_OP_POLL_ADD and async armed poll */ struct hlist_node hash_node; @@ -657,6 +657,7 @@ struct io_kiocb { /* opcode allocated if it needs to store data for async defer */ void *async_data; /* linked requests, IFF REQ_F_HARDLINK or REQ_F_LINK are set */ + atomic_t poll_refs; struct io_kiocb *link; /* custom credentials, valid IFF REQ_F_CREDS is set */ const struct cred *creds; diff --git a/io_uring/cancel.h b/io_uring/cancel.h index 76b32e65c03c..b33995e00ba9 100644 --- a/io_uring/cancel.h +++ b/io_uring/cancel.h @@ -27,10 +27,10 @@ bool io_cancel_req_match(struct io_kiocb *req, struct io_cancel_data *cd); static inline bool io_cancel_match_sequence(struct io_kiocb *req, int sequence) { - if ((req->flags & REQ_F_CANCEL_SEQ) && sequence == req->work.cancel_seq) + if (req->cancel_seq_set && sequence == req->work.cancel_seq) return true; - req->flags |= REQ_F_CANCEL_SEQ; + req->cancel_seq_set = true; req->work.cancel_seq = sequence; return false; } diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 816e93e7f949..154b25b8a613 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -2058,6 +2058,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, req->file = NULL; req->rsrc_node = NULL; req->task = current; + req->cancel_seq_set = false; if (unlikely(opcode >= IORING_OP_LAST)) { req->opcode = 0; From 22f00812862564b314784167a89f27b444f82a46 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Thu, 13 Jun 2024 21:30:43 -0400 Subject: [PATCH 319/778] USB: class: cdc-wdm: Fix CPU lockup caused by excessive log messages The syzbot fuzzer found that the interrupt-URB completion callback in the cdc-wdm driver was taking too long, and the driver's immediate resubmission of interrupt URBs with -EPROTO status combined with the dummy-hcd emulation to cause a CPU lockup: cdc_wdm 1-1:1.0: nonzero urb status received: -71 cdc_wdm 1-1:1.0: wdm_int_callback - 0 bytes watchdog: BUG: soft lockup - CPU#0 stuck for 26s! [syz-executor782:6625] CPU#0 Utilization every 4s during lockup: #1: 98% system, 0% softirq, 3% hardirq, 0% idle #2: 98% system, 0% softirq, 3% hardirq, 0% idle #3: 98% system, 0% softirq, 3% hardirq, 0% idle #4: 98% system, 0% softirq, 3% hardirq, 0% idle #5: 98% system, 1% softirq, 3% hardirq, 0% idle Modules linked in: irq event stamp: 73096 hardirqs last enabled at (73095): [] console_emit_next_record kernel/printk/printk.c:2935 [inline] hardirqs last enabled at (73095): [] console_flush_all+0x650/0xb74 kernel/printk/printk.c:2994 hardirqs last disabled at (73096): [] __el1_irq arch/arm64/kernel/entry-common.c:533 [inline] hardirqs last disabled at (73096): [] el1_interrupt+0x24/0x68 arch/arm64/kernel/entry-common.c:551 softirqs last enabled at (73048): [] softirq_handle_end kernel/softirq.c:400 [inline] softirqs last enabled at (73048): [] handle_softirqs+0xa60/0xc34 kernel/softirq.c:582 softirqs last disabled at (73043): [] __do_softirq+0x14/0x20 kernel/softirq.c:588 CPU: 0 PID: 6625 Comm: syz-executor782 Tainted: G W 6.10.0-rc2-syzkaller-g8867bbd4a056 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 04/02/2024 Testing showed that the problem did not occur if the two error messages -- the first two lines above -- were removed; apparently adding material to the kernel log takes a surprisingly large amount of time. In any case, the best approach for preventing these lockups and to avoid spamming the log with thousands of error messages per second is to ratelimit the two dev_err() calls. Therefore we replace them with dev_err_ratelimited(). Signed-off-by: Alan Stern Suggested-by: Greg KH Reported-and-tested-by: syzbot+5f996b83575ef4058638@syzkaller.appspotmail.com Closes: https://lore.kernel.org/linux-usb/00000000000073d54b061a6a1c65@google.com/ Reported-and-tested-by: syzbot+1b2abad17596ad03dcff@syzkaller.appspotmail.com Closes: https://lore.kernel.org/linux-usb/000000000000f45085061aa9b37e@google.com/ Fixes: 9908a32e94de ("USB: remove err() macro from usb class drivers") Link: https://lore.kernel.org/linux-usb/40dfa45b-5f21-4eef-a8c1-51a2f320e267@rowland.harvard.edu/ Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/29855215-52f5-4385-b058-91f42c2bee18@rowland.harvard.edu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-wdm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c index c553decb5461..6830be4419e2 100644 --- a/drivers/usb/class/cdc-wdm.c +++ b/drivers/usb/class/cdc-wdm.c @@ -266,14 +266,14 @@ static void wdm_int_callback(struct urb *urb) dev_err(&desc->intf->dev, "Stall on int endpoint\n"); goto sw; /* halt is cleared in work */ default: - dev_err(&desc->intf->dev, + dev_err_ratelimited(&desc->intf->dev, "nonzero urb status received: %d\n", status); break; } } if (urb->actual_length < sizeof(struct usb_cdc_notification)) { - dev_err(&desc->intf->dev, "wdm_int_callback - %d bytes\n", + dev_err_ratelimited(&desc->intf->dev, "wdm_int_callback - %d bytes\n", urb->actual_length); goto exit; } From 41f590e31c6c8b8a0a490b7c1ad2e57c20ec3d9b Mon Sep 17 00:00:00 2001 From: pengfuyuan Date: Thu, 6 Jun 2024 20:08:42 +0800 Subject: [PATCH 320/778] arm/komeda: Remove all CONFIG_DEBUG_FS conditional compilations Since the debugfs functions have no-op stubs for CONFIG_DEBUG_FS=n, the compiler will optimize the rest away since they are no longer referenced. The benefit of removing the conditional compilation is that the build is actually tested for both CONFIG_DEBUG_FS configuration values. Assuming most developers have it enabled, CONFIG_DEBUG_FS=n is not tested much and may fail the build due to the conditional compilation. Reported-by: k2ci Signed-off-by: pengfuyuan Acked-by: Liviu Dudau Link: https://patchwork.freedesktop.org/patch/msgid/20240606120842.1377267-1-pengfuyuan@kylinos.cn Signed-off-by: Liviu Dudau Signed-off-by: Maxime Ripard --- drivers/gpu/drm/arm/display/komeda/komeda_dev.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_dev.c b/drivers/gpu/drm/arm/display/komeda/komeda_dev.c index 14ee79becacb..5ba62e637a61 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_dev.c +++ b/drivers/gpu/drm/arm/display/komeda/komeda_dev.c @@ -12,10 +12,8 @@ #include #include #include -#ifdef CONFIG_DEBUG_FS #include #include -#endif #include @@ -43,7 +41,6 @@ static int komeda_register_show(struct seq_file *sf, void *x) DEFINE_SHOW_ATTRIBUTE(komeda_register); -#ifdef CONFIG_DEBUG_FS static void komeda_debugfs_init(struct komeda_dev *mdev) { if (!debugfs_initialized()) @@ -55,7 +52,6 @@ static void komeda_debugfs_init(struct komeda_dev *mdev) debugfs_create_x16("err_verbosity", 0664, mdev->debugfs_root, &mdev->err_verbosity); } -#endif static ssize_t core_id_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -265,9 +261,7 @@ struct komeda_dev *komeda_dev_create(struct device *dev) mdev->err_verbosity = KOMEDA_DEV_PRINT_ERR_EVENTS; -#ifdef CONFIG_DEBUG_FS komeda_debugfs_init(mdev); -#endif return mdev; @@ -286,9 +280,7 @@ void komeda_dev_destroy(struct komeda_dev *mdev) sysfs_remove_group(&dev->kobj, &komeda_sysfs_attr_group); -#ifdef CONFIG_DEBUG_FS debugfs_remove_recursive(mdev->debugfs_root); -#endif if (mdev->aclk) clk_prepare_enable(mdev->aclk); From 2663d0462eb32ae7c9b035300ab6b1523886c718 Mon Sep 17 00:00:00 2001 From: Kenton Groombridge Date: Wed, 5 Jun 2024 11:22:18 -0400 Subject: [PATCH 321/778] wifi: mac80211: Avoid address calculations via out of bounds array indexing req->n_channels must be set before req->channels[] can be used. This patch fixes one of the issues encountered in [1]. [ 83.964255] UBSAN: array-index-out-of-bounds in net/mac80211/scan.c:364:4 [ 83.964258] index 0 is out of range for type 'struct ieee80211_channel *[]' [...] [ 83.964264] Call Trace: [ 83.964267] [ 83.964269] dump_stack_lvl+0x3f/0xc0 [ 83.964274] __ubsan_handle_out_of_bounds+0xec/0x110 [ 83.964278] ieee80211_prep_hw_scan+0x2db/0x4b0 [ 83.964281] __ieee80211_start_scan+0x601/0x990 [ 83.964291] nl80211_trigger_scan+0x874/0x980 [ 83.964295] genl_family_rcv_msg_doit+0xe8/0x160 [ 83.964298] genl_rcv_msg+0x240/0x270 [...] [1] https://bugzilla.kernel.org/show_bug.cgi?id=218810 Co-authored-by: Kees Cook Signed-off-by: Kees Cook Signed-off-by: Kenton Groombridge Link: https://msgid.link/20240605152218.236061-1-concord@gentoo.org Signed-off-by: Johannes Berg --- net/mac80211/scan.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 8ecc4b710b0e..b5f2df61c7f6 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -358,7 +358,8 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_sub_if_data *sdata) struct cfg80211_scan_request *req; struct cfg80211_chan_def chandef; u8 bands_used = 0; - int i, ielen, n_chans; + int i, ielen; + u32 *n_chans; u32 flags = 0; req = rcu_dereference_protected(local->scan_req, @@ -368,34 +369,34 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_sub_if_data *sdata) return false; if (ieee80211_hw_check(&local->hw, SINGLE_SCAN_ON_ALL_BANDS)) { + local->hw_scan_req->req.n_channels = req->n_channels; + for (i = 0; i < req->n_channels; i++) { local->hw_scan_req->req.channels[i] = req->channels[i]; bands_used |= BIT(req->channels[i]->band); } - - n_chans = req->n_channels; } else { do { if (local->hw_scan_band == NUM_NL80211_BANDS) return false; - n_chans = 0; + n_chans = &local->hw_scan_req->req.n_channels; + *n_chans = 0; for (i = 0; i < req->n_channels; i++) { if (req->channels[i]->band != local->hw_scan_band) continue; - local->hw_scan_req->req.channels[n_chans] = + local->hw_scan_req->req.channels[(*n_chans)++] = req->channels[i]; - n_chans++; + bands_used |= BIT(req->channels[i]->band); } local->hw_scan_band++; - } while (!n_chans); + } while (!*n_chans); } - local->hw_scan_req->req.n_channels = n_chans; ieee80211_prepare_scan_chandef(&chandef); if (req->flags & NL80211_SCAN_FLAG_MIN_PREQ_CONTENT) From 0d9c2beed116e623ac30810d382bd67163650f98 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 12 Jun 2024 12:23:51 +0200 Subject: [PATCH 322/778] wifi: mac80211: fix monitor channel with chanctx emulation After the channel context emulation, there were reports that changing the monitor channel no longer works. This is because those drivers don't have WANT_MONITOR_VIF, so the setting the channel always exits out quickly. Fix this by always allocating the virtual monitor sdata, and simply not telling the driver about it unless it wanted to. This way, we have an interface/sdata to bind the chanctx to, and the emulation can work correctly. Cc: stable@vger.kernel.org Fixes: 0a44dfc07074 ("wifi: mac80211: simplify non-chanctx drivers") Reported-and-tested-by: Savyasaachi Vanga Closes: https://lore.kernel.org/r/chwoymvpzwtbmzryrlitpwmta5j6mtndocxsyqvdyikqu63lon@gfds653hkknl Link: https://msgid.link/20240612122351.b12d4a109dde.I1831a44417faaab92bea1071209abbe4efbe3fba@changeid Signed-off-by: Johannes Berg --- net/mac80211/driver-ops.c | 17 +++++++++++++++++ net/mac80211/iface.c | 21 +++++++++------------ net/mac80211/util.c | 2 +- 3 files changed, 27 insertions(+), 13 deletions(-) diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c index dce37ba8ebe3..254d745832cb 100644 --- a/net/mac80211/driver-ops.c +++ b/net/mac80211/driver-ops.c @@ -311,6 +311,18 @@ int drv_assign_vif_chanctx(struct ieee80211_local *local, might_sleep(); lockdep_assert_wiphy(local->hw.wiphy); + /* + * We should perhaps push emulate chanctx down and only + * make it call ->config() when the chanctx is actually + * assigned here (and unassigned below), but that's yet + * another change to all drivers to add assign/unassign + * emulation callbacks. Maybe later. + */ + if (sdata->vif.type == NL80211_IFTYPE_MONITOR && + local->emulate_chanctx && + !ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) + return 0; + if (!check_sdata_in_driver(sdata)) return -EIO; @@ -338,6 +350,11 @@ void drv_unassign_vif_chanctx(struct ieee80211_local *local, might_sleep(); lockdep_assert_wiphy(local->hw.wiphy); + if (sdata->vif.type == NL80211_IFTYPE_MONITOR && + local->emulate_chanctx && + !ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) + return; + if (!check_sdata_in_driver(sdata)) return; diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 0c54554bf761..b935bb5d8ed1 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1122,9 +1122,6 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local) struct ieee80211_sub_if_data *sdata; int ret; - if (!ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) - return 0; - ASSERT_RTNL(); lockdep_assert_wiphy(local->hw.wiphy); @@ -1146,11 +1143,13 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local) ieee80211_set_default_queues(sdata); - ret = drv_add_interface(local, sdata); - if (WARN_ON(ret)) { - /* ok .. stupid driver, it asked for this! */ - kfree(sdata); - return ret; + if (ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) { + ret = drv_add_interface(local, sdata); + if (WARN_ON(ret)) { + /* ok .. stupid driver, it asked for this! */ + kfree(sdata); + return ret; + } } set_bit(SDATA_STATE_RUNNING, &sdata->state); @@ -1188,9 +1187,6 @@ void ieee80211_del_virtual_monitor(struct ieee80211_local *local) { struct ieee80211_sub_if_data *sdata; - if (!ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) - return; - ASSERT_RTNL(); lockdep_assert_wiphy(local->hw.wiphy); @@ -1210,7 +1206,8 @@ void ieee80211_del_virtual_monitor(struct ieee80211_local *local) ieee80211_link_release_channel(&sdata->deflink); - drv_remove_interface(local, sdata); + if (ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) + drv_remove_interface(local, sdata); kfree(sdata); } diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 283bfc99417e..963ed75deb76 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1843,7 +1843,7 @@ int ieee80211_reconfig(struct ieee80211_local *local) /* add interfaces */ sdata = wiphy_dereference(local->hw.wiphy, local->monitor_sdata); - if (sdata) { + if (sdata && ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) { /* in HW restart it exists already */ WARN_ON(local->resuming); res = drv_add_interface(local, sdata); From 9f36169912331fa035d7b73a91252d7c2512eb1a Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek Date: Fri, 7 Jun 2024 18:07:52 +0200 Subject: [PATCH 323/778] cipso: fix total option length computation As evident from the definition of ip_options_get(), the IP option IPOPT_END is used to pad the IP option data array, not IPOPT_NOP. Yet the loop that walks the IP options to determine the total IP options length in cipso_v4_delopt() doesn't take IPOPT_END into account. Fix it by recognizing the IPOPT_END value as the end of actual options. Fixes: 014ab19a69c3 ("selinux: Set socket NetLabel based on connection endpoint") Signed-off-by: Ondrej Mosnacek Signed-off-by: David S. Miller --- net/ipv4/cipso_ipv4.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index dd6d46015058..5e9ac68444f8 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -2013,12 +2013,16 @@ static int cipso_v4_delopt(struct ip_options_rcu __rcu **opt_ptr) * from there we can determine the new total option length */ iter = 0; optlen_new = 0; - while (iter < opt->opt.optlen) - if (opt->opt.__data[iter] != IPOPT_NOP) { + while (iter < opt->opt.optlen) { + if (opt->opt.__data[iter] == IPOPT_END) { + break; + } else if (opt->opt.__data[iter] == IPOPT_NOP) { + iter++; + } else { iter += opt->opt.__data[iter + 1]; optlen_new = iter; - } else - iter++; + } + } hdr_delta = opt->opt.optlen; opt->opt.optlen = (optlen_new + 3) & ~3; hdr_delta -= opt->opt.optlen; From 89aa3619d141d6cfb6040a561aebb6d99d3e2285 Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek Date: Fri, 7 Jun 2024 18:07:53 +0200 Subject: [PATCH 324/778] cipso: make cipso_v4_skbuff_delattr() fully remove the CIPSO options As the comment in this function says, the code currently just clears the CIPSO part with IPOPT_NOP, rather than removing it completely and trimming the packet. The other cipso_v4_*_delattr() functions, however, do the proper removal and also calipso_skbuff_delattr() makes an effort to remove the CALIPSO options instead of replacing them with padding. Some routers treat IPv4 packets with anything (even NOPs) in the option header as a special case and take them through a slower processing path. Consequently, hardening guides such as STIG recommend to configure such routers to drop packets with non-empty IP option headers [1][2]. Thus, users might expect NetLabel to produce packets with minimal padding (or at least with no padding when no actual options are present). Implement the proper option removal to address this and to be closer to what the peer functions do. [1] https://www.stigviewer.com/stig/juniper_router_rtr/2019-09-27/finding/V-90937 [2] https://www.stigviewer.com/stig/cisco_ios_xe_router_rtr/2021-03-26/finding/V-217001 Signed-off-by: Ondrej Mosnacek Signed-off-by: David S. Miller --- net/ipv4/cipso_ipv4.c | 79 +++++++++++++++++++++++++++++-------------- 1 file changed, 54 insertions(+), 25 deletions(-) diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 5e9ac68444f8..e9cb27061c12 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -1810,6 +1810,29 @@ static int cipso_v4_genopt(unsigned char *buf, u32 buf_len, return CIPSO_V4_HDR_LEN + ret_val; } +static int cipso_v4_get_actual_opt_len(const unsigned char *data, int len) +{ + int iter = 0, optlen = 0; + + /* determining the new total option length is tricky because of + * the padding necessary, the only thing i can think to do at + * this point is walk the options one-by-one, skipping the + * padding at the end to determine the actual option size and + * from there we can determine the new total option length + */ + while (iter < len) { + if (data[iter] == IPOPT_END) { + break; + } else if (data[iter] == IPOPT_NOP) { + iter++; + } else { + iter += data[iter + 1]; + optlen = iter; + } + } + return optlen; +} + /** * cipso_v4_sock_setattr - Add a CIPSO option to a socket * @sk: the socket @@ -1986,7 +2009,6 @@ static int cipso_v4_delopt(struct ip_options_rcu __rcu **opt_ptr) u8 cipso_len; u8 cipso_off; unsigned char *cipso_ptr; - int iter; int optlen_new; cipso_off = opt->opt.cipso - sizeof(struct iphdr); @@ -2006,23 +2028,8 @@ static int cipso_v4_delopt(struct ip_options_rcu __rcu **opt_ptr) memmove(cipso_ptr, cipso_ptr + cipso_len, opt->opt.optlen - cipso_off - cipso_len); - /* determining the new total option length is tricky because of - * the padding necessary, the only thing i can think to do at - * this point is walk the options one-by-one, skipping the - * padding at the end to determine the actual option size and - * from there we can determine the new total option length */ - iter = 0; - optlen_new = 0; - while (iter < opt->opt.optlen) { - if (opt->opt.__data[iter] == IPOPT_END) { - break; - } else if (opt->opt.__data[iter] == IPOPT_NOP) { - iter++; - } else { - iter += opt->opt.__data[iter + 1]; - optlen_new = iter; - } - } + optlen_new = cipso_v4_get_actual_opt_len(opt->opt.__data, + opt->opt.optlen); hdr_delta = opt->opt.optlen; opt->opt.optlen = (optlen_new + 3) & ~3; hdr_delta -= opt->opt.optlen; @@ -2242,7 +2249,8 @@ int cipso_v4_skbuff_setattr(struct sk_buff *skb, */ int cipso_v4_skbuff_delattr(struct sk_buff *skb) { - int ret_val; + int ret_val, cipso_len, hdr_len_actual, new_hdr_len_actual, new_hdr_len, + hdr_len_delta; struct iphdr *iph; struct ip_options *opt = &IPCB(skb)->opt; unsigned char *cipso_ptr; @@ -2255,16 +2263,37 @@ int cipso_v4_skbuff_delattr(struct sk_buff *skb) if (ret_val < 0) return ret_val; - /* the easiest thing to do is just replace the cipso option with noop - * options since we don't change the size of the packet, although we - * still need to recalculate the checksum */ - iph = ip_hdr(skb); cipso_ptr = (unsigned char *)iph + opt->cipso; - memset(cipso_ptr, IPOPT_NOOP, cipso_ptr[1]); + cipso_len = cipso_ptr[1]; + + hdr_len_actual = sizeof(struct iphdr) + + cipso_v4_get_actual_opt_len((unsigned char *)(iph + 1), + opt->optlen); + new_hdr_len_actual = hdr_len_actual - cipso_len; + new_hdr_len = (new_hdr_len_actual + 3) & ~3; + hdr_len_delta = (iph->ihl << 2) - new_hdr_len; + + /* 1. shift any options after CIPSO to the left */ + memmove(cipso_ptr, cipso_ptr + cipso_len, + new_hdr_len_actual - opt->cipso); + /* 2. move the whole IP header to its new place */ + memmove((unsigned char *)iph + hdr_len_delta, iph, new_hdr_len_actual); + /* 3. adjust the skb layout */ + skb_pull(skb, hdr_len_delta); + skb_reset_network_header(skb); + iph = ip_hdr(skb); + /* 4. re-fill new padding with IPOPT_END (may now be longer) */ + memset((unsigned char *)iph + new_hdr_len_actual, IPOPT_END, + new_hdr_len - new_hdr_len_actual); + + opt->optlen -= hdr_len_delta; opt->cipso = 0; opt->is_changed = 1; - + if (hdr_len_delta != 0) { + iph->ihl = new_hdr_len >> 2; + iph_set_totlen(iph, skb->len); + } ip_send_check(iph); return 0; From 68f860426d500cfb697b505799244c7dfff604b1 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Fri, 14 Jun 2024 00:31:04 +0100 Subject: [PATCH 325/778] mfd: axp20x: AXP717: Fix missing IRQ status registers range While we list the "IRQ status *and acknowledge*" registers as volatile in the MFD description, they are missing from the writable range array, so acknowledging any interrupts was met with an -EIO error. This error propagates up, leading to the whole AXP717 driver failing to probe, which is fatal to most systems using this PMIC, since most peripherals refer one of the PMIC voltage rails. This wasn't noticed on the initial submission, since the interrupt was completely missing at this point, but the DTs now merged describe the interrupt, creating the problem. Add the five registers that hold those bits to the writable array. This fixes the boot on the Anbernic systems using the AXP717 PMIC. Fixes: b5bfc8ab2484 ("mfd: axp20x: Add support for AXP717 PMIC") Reported-by: Chris Morgan Signed-off-by: Andre Przywara Reviewed-by: John Watts Link: https://lore.kernel.org/r/20240613233104.17529-1-andre.przywara@arm.com Signed-off-by: Lee Jones --- drivers/mfd/axp20x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mfd/axp20x.c b/drivers/mfd/axp20x.c index f2c0f144c0fc..dacd3c96c9f5 100644 --- a/drivers/mfd/axp20x.c +++ b/drivers/mfd/axp20x.c @@ -210,6 +210,7 @@ static const struct regmap_access_table axp313a_volatile_table = { static const struct regmap_range axp717_writeable_ranges[] = { regmap_reg_range(AXP717_IRQ0_EN, AXP717_IRQ4_EN), + regmap_reg_range(AXP717_IRQ0_STATE, AXP717_IRQ4_STATE), regmap_reg_range(AXP717_DCDC_OUTPUT_CONTROL, AXP717_CPUSLDO_CONTROL), }; From 004b8d1491b4bcbb7da1a3206d1e7e66822d47c6 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 14 Jun 2024 09:55:58 +0200 Subject: [PATCH 326/778] ovl: fix encoding fid for lower only root ovl_check_encode_origin() should return a positive number if the lower dentry is to be encoded, zero otherwise. If there's no upper layer at all (read-only overlay), then it obviously needs to return positive. This was broken by commit 16aac5ad1fa9 ("ovl: support encoding non-decodable file handles"), which didn't take the lower-only configuration into account. Fix by checking the no-upper-layer case up-front. Reported-and-tested-by: Youzhong Yang Closes: https://lore.kernel.org/all/CADpNCvaBimi+zCYfRJHvCOhMih8OU0rmZkwLuh24MKKroRuT8Q@mail.gmail.com/ Fixes: 16aac5ad1fa9 ("ovl: support encoding non-decodable file handles") Cc: # v6.6 Signed-off-by: Miklos Szeredi --- fs/overlayfs/export.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c index 063409069f56..5868cb222955 100644 --- a/fs/overlayfs/export.c +++ b/fs/overlayfs/export.c @@ -181,6 +181,10 @@ static int ovl_check_encode_origin(struct dentry *dentry) struct ovl_fs *ofs = OVL_FS(dentry->d_sb); bool decodable = ofs->config.nfs_export; + /* No upper layer? */ + if (!ovl_upper_mnt(ofs)) + return 1; + /* Lower file handle for non-upper non-decodable */ if (!ovl_dentry_upper(dentry) && !decodable) return 1; @@ -209,7 +213,7 @@ static int ovl_check_encode_origin(struct dentry *dentry) * ovl_connect_layer() will try to make origin's layer "connected" by * copying up a "connectable" ancestor. */ - if (d_is_dir(dentry) && ovl_upper_mnt(ofs) && decodable) + if (d_is_dir(dentry) && decodable) return ovl_connect_layer(dentry); /* Lower file handle for indexed and non-upper dir/non-dir */ From a6a75edc8669a4f030546c7390808ef0cc034742 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 13 Jun 2024 19:33:53 +0200 Subject: [PATCH 327/778] ata: libata-scsi: Set the RMB bit only for removable media devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The SCSI Removable Media Bit (RMB) should only be set for removable media, where the device stays and the media changes, e.g. CD-ROM or floppy. The ATA removable media device bit is obsoleted since ATA-8 ACS (2006), but before that it was used to indicate that the device can have its media removed (while the device stays). Commit 8a3e33cf92c7 ("ata: ahci: find eSATA ports and flag them as removable") introduced a change to set the RMB bit if the port has either the eSATA bit or the hot-plug capable bit set. The reasoning was that the author wanted his eSATA ports to get treated like a USB stick. This is however wrong. See "20-082r23SPC-6: Removable Medium Bit Expectations" which has since been integrated to SPC, which states that: """ Reports have been received that some USB Memory Stick device servers set the removable medium (RMB) bit to one. The rub comes when the medium is actually removed, because... The device server is removed concurrently with the medium removal. If there is no device server, then there is no device server that is waiting to have removable medium inserted. Sufficient numbers of SCSI analysts see such a device: - not as a device that supports removable medium; but - as a removable, hot pluggable device. """ The definition of the RMB bit in the SPC specification has since been clarified to match this. Thus, a USB stick should not have the RMB bit set (and neither shall an eSATA nor a hot-plug capable port). Commit dc8b4afc4a04 ("ata: ahci: don't mark HotPlugCapable Ports as external/removable") then changed so that the RMB bit is only set for the eSATA bit (and not for the hot-plug capable bit), because of a lot of bug reports of SATA devices were being automounted by udisks. However, treating eSATA and hot-plug capable ports differently is not correct. From the AHCI 1.3.1 spec: Hot Plug Capable Port (HPCP): When set to '1', indicates that this port's signal and power connectors are externally accessible via a joint signal and power connector for blindmate device hot plug. So a hot-plug capable port is an external port, just like commit 45b96d65ec68 ("ata: ahci: a hotplug capable port is an external port") claims. In order to not violate the SPC specification, modify the SCSI INQUIRY data to only set the RMB bit if the ATA device can have its media removed. This fixes a reported problem where GNOME/udisks was automounting devices connected to hot-plug capable ports. Fixes: 45b96d65ec68 ("ata: ahci: a hotplug capable port is an external port") Cc: stable@vger.kernel.org Reviewed-by: Mario Limonciello Reviewed-by: Thomas Weißschuh Tested-by: Thomas Weißschuh Reported-by: Thomas Weißschuh Closes: https://lore.kernel.org/linux-ide/c0de8262-dc4b-4c22-9fac-33432e5bddd3@t-8ch.de/ Signed-off-by: Damien Le Moal [cassel: wrote commit message] Signed-off-by: Niklas Cassel --- drivers/ata/libata-scsi.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index cdf29b178ddc..bb4d30d377ae 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -1831,11 +1831,11 @@ static unsigned int ata_scsiop_inq_std(struct ata_scsi_args *args, u8 *rbuf) 2 }; - /* set scsi removable (RMB) bit per ata bit, or if the - * AHCI port says it's external (Hotplug-capable, eSATA). + /* + * Set the SCSI Removable Media Bit (RMB) if the ATA removable media + * device bit (obsolete since ATA-8 ACS) is set. */ - if (ata_id_removable(args->id) || - (args->dev->link->ap->pflags & ATA_PFLAG_EXTERNAL)) + if (ata_id_removable(args->id)) hdr[1] |= (1 << 7); if (args->dev->class == ATA_DEV_ZAC) { From 5f75e081ab5cbfbe7aca2112a802e69576ee9778 Mon Sep 17 00:00:00 2001 From: Cyril Hrubis Date: Thu, 13 Jun 2024 18:38:17 +0200 Subject: [PATCH 328/778] loop: Disable fallocate() zero and discard if not supported If fallcate is implemented but zero and discard operations are not supported by the filesystem the backing file is on we continue to fill dmesg with errors from the blk_mq_end_request() since each time we call fallocate() on the loop device the EOPNOTSUPP error from lo_fallocate() ends up propagated into the block layer. In the end syscall succeeds since the blkdev_issue_zeroout() falls back to writing zeroes which makes the errors even more misleading and confusing. How to reproduce: 1. make sure /tmp is mounted as tmpfs 2. dd if=/dev/zero of=/tmp/disk.img bs=1M count=100 3. losetup /dev/loop0 /tmp/disk.img 4. mkfs.ext2 /dev/loop0 5. dmesg |tail [710690.898214] operation not supported error, dev loop0, sector 204672 op 0x9:(WRITE_ZEROES) flags 0x8000800 phys_seg 0 prio class 0 [710690.898279] operation not supported error, dev loop0, sector 522 op 0x9:(WRITE_ZEROES) flags 0x8000800 phys_seg 0 prio class 0 [710690.898603] operation not supported error, dev loop0, sector 16906 op 0x9:(WRITE_ZEROES) flags 0x8000800 phys_seg 0 prio class 0 [710690.898917] operation not supported error, dev loop0, sector 32774 op 0x9:(WRITE_ZEROES) flags 0x8000800 phys_seg 0 prio class 0 [710690.899218] operation not supported error, dev loop0, sector 49674 op 0x9:(WRITE_ZEROES) flags 0x8000800 phys_seg 0 prio class 0 [710690.899484] operation not supported error, dev loop0, sector 65542 op 0x9:(WRITE_ZEROES) flags 0x8000800 phys_seg 0 prio class 0 [710690.899743] operation not supported error, dev loop0, sector 82442 op 0x9:(WRITE_ZEROES) flags 0x8000800 phys_seg 0 prio class 0 [710690.900015] operation not supported error, dev loop0, sector 98310 op 0x9:(WRITE_ZEROES) flags 0x8000800 phys_seg 0 prio class 0 [710690.900276] operation not supported error, dev loop0, sector 115210 op 0x9:(WRITE_ZEROES) flags 0x8000800 phys_seg 0 prio class 0 [710690.900546] operation not supported error, dev loop0, sector 131078 op 0x9:(WRITE_ZEROES) flags 0x8000800 phys_seg 0 prio class 0 This patch changes the lo_fallocate() to clear the flags for zero and discard operations if we get EOPNOTSUPP from the backing file fallocate callback, that way we at least stop spewing errors after the first unsuccessful try. CC: Jan Kara Signed-off-by: Cyril Hrubis Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240613163817.22640-1-chrubis@suse.cz Signed-off-by: Jens Axboe --- drivers/block/loop.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 93780f41646b..1153721bc7c2 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -302,6 +302,21 @@ static int lo_read_simple(struct loop_device *lo, struct request *rq, return 0; } +static void loop_clear_limits(struct loop_device *lo, int mode) +{ + struct queue_limits lim = queue_limits_start_update(lo->lo_queue); + + if (mode & FALLOC_FL_ZERO_RANGE) + lim.max_write_zeroes_sectors = 0; + + if (mode & FALLOC_FL_PUNCH_HOLE) { + lim.max_hw_discard_sectors = 0; + lim.discard_granularity = 0; + } + + queue_limits_commit_update(lo->lo_queue, &lim); +} + static int lo_fallocate(struct loop_device *lo, struct request *rq, loff_t pos, int mode) { @@ -320,6 +335,14 @@ static int lo_fallocate(struct loop_device *lo, struct request *rq, loff_t pos, ret = file->f_op->fallocate(file, mode, pos, blk_rq_bytes(rq)); if (unlikely(ret && ret != -EINVAL && ret != -EOPNOTSUPP)) return -EIO; + + /* + * We initially configure the limits in a hope that fallocate is + * supported and clear them here if that turns out not to be true. + */ + if (unlikely(ret == -EOPNOTSUPP)) + loop_clear_limits(lo, mode); + return ret; } From 721f2e6653f5ab0cc52b3a459c4a2158b92fcf80 Mon Sep 17 00:00:00 2001 From: Simon Trimmer Date: Thu, 13 Jun 2024 14:37:11 +0100 Subject: [PATCH 329/778] ALSA: hda: cs35l56: Component should be unbound before deconstruction The interface associated with the hda_component should be deactivated before the driver is deconstructed during removal. Fixes: 73cfbfa9caea ("ALSA: hda/cs35l56: Add driver for Cirrus Logic CS35L56 amplifier") Signed-off-by: Simon Trimmer Signed-off-by: Takashi Iwai Link: https://lore.kernel.org/r/20240613133713.75550-2-simont@opensource.cirrus.com --- sound/pci/hda/cs35l56_hda.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/cs35l56_hda.c b/sound/pci/hda/cs35l56_hda.c index 0923e2589f5f..e134ede6c5aa 100644 --- a/sound/pci/hda/cs35l56_hda.c +++ b/sound/pci/hda/cs35l56_hda.c @@ -1077,12 +1077,12 @@ void cs35l56_hda_remove(struct device *dev) { struct cs35l56_hda *cs35l56 = dev_get_drvdata(dev); + component_del(cs35l56->base.dev, &cs35l56_hda_comp_ops); + pm_runtime_dont_use_autosuspend(cs35l56->base.dev); pm_runtime_get_sync(cs35l56->base.dev); pm_runtime_disable(cs35l56->base.dev); - component_del(cs35l56->base.dev, &cs35l56_hda_comp_ops); - cs_dsp_remove(&cs35l56->cs_dsp); kfree(cs35l56->system_name); From 6f9a40d61cad0f5560e8530b4dd4a05fc4d15987 Mon Sep 17 00:00:00 2001 From: Simon Trimmer Date: Thu, 13 Jun 2024 14:37:12 +0100 Subject: [PATCH 330/778] ALSA: hda: cs35l41: Component should be unbound before deconstruction The interface associated with the hda_component should be deactivated before the driver is deconstructed during removal. Fixes: 7b2f3eb492da ("ALSA: hda: cs35l41: Add support for CS35L41 in HDA systems") Signed-off-by: Simon Trimmer Signed-off-by: Takashi Iwai Link: https://lore.kernel.org/r/20240613133713.75550-3-simont@opensource.cirrus.com --- sound/pci/hda/cs35l41_hda.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/cs35l41_hda.c b/sound/pci/hda/cs35l41_hda.c index d54d4d60b03e..031703f010be 100644 --- a/sound/pci/hda/cs35l41_hda.c +++ b/sound/pci/hda/cs35l41_hda.c @@ -2019,6 +2019,8 @@ void cs35l41_hda_remove(struct device *dev) { struct cs35l41_hda *cs35l41 = dev_get_drvdata(dev); + component_del(cs35l41->dev, &cs35l41_hda_comp_ops); + pm_runtime_get_sync(cs35l41->dev); pm_runtime_dont_use_autosuspend(cs35l41->dev); pm_runtime_disable(cs35l41->dev); @@ -2026,8 +2028,6 @@ void cs35l41_hda_remove(struct device *dev) if (cs35l41->halo_initialized) cs35l41_remove_dsp(cs35l41); - component_del(cs35l41->dev, &cs35l41_hda_comp_ops); - acpi_dev_put(cs35l41->dacpi); pm_runtime_put_noidle(cs35l41->dev); From d832b5a03e94a2a9f866dab3d04937a0f84ea116 Mon Sep 17 00:00:00 2001 From: Simon Trimmer Date: Thu, 13 Jun 2024 14:37:13 +0100 Subject: [PATCH 331/778] ALSA: hda: tas2781: Component should be unbound before deconstruction The interface associated with the hda_component should be deactivated before the driver is deconstructed during removal. Fixes: 4e7914eb1dae ("ALSA: hda/tas2781: remove sound controls in unbind") Signed-off-by: Simon Trimmer Signed-off-by: Takashi Iwai Link: https://lore.kernel.org/r/20240613133713.75550-4-simont@opensource.cirrus.com --- sound/pci/hda/tas2781_hda_i2c.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/tas2781_hda_i2c.c b/sound/pci/hda/tas2781_hda_i2c.c index 75f7674c66ee..fdee6592c502 100644 --- a/sound/pci/hda/tas2781_hda_i2c.c +++ b/sound/pci/hda/tas2781_hda_i2c.c @@ -777,11 +777,11 @@ static void tas2781_hda_remove(struct device *dev) { struct tas2781_hda *tas_hda = dev_get_drvdata(dev); + component_del(tas_hda->dev, &tas2781_hda_comp_ops); + pm_runtime_get_sync(tas_hda->dev); pm_runtime_disable(tas_hda->dev); - component_del(tas_hda->dev, &tas2781_hda_comp_ops); - pm_runtime_put_noidle(tas_hda->dev); tasdevice_remove(tas_hda->priv); From 8af49868e51ed1ba117b74728af12abe1eda82e5 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Thu, 13 Jun 2024 14:25:27 +0100 Subject: [PATCH 332/778] ASoC: cs35l56: Disconnect ASP1 TX sources when ASP1 DAI is hooked up If the ASP1 DAI is hooked up by the machine driver the ASP TX mixer sources should be initialized to disconnected. There aren't currently any available products using the ASP so this doesn't affect any existing systems. The cs35l56 does not have any fixed default for the mixer source registers. When the cs35l56 boots, its firmware patches these registers to setup a system-specific routing; this is so that Windows can use generic SDCA drivers instead of needing knowledge of chip-specific registers. The setup varies between end-products, which each have customized firmware, and so the default register state varies between end-products. It can also change if the firmware on an end-product is upgraded - for example if a change was needed to the routing for Windows use-cases. It must be emphasized that the settings applied by the firmware are not internal magic tuning; they are statically implementing use-case setup that on Linux would be done via ALSA controls. The driver is currently syncing the mixer controls with whatever initial state the firmware wrote to the registers, so that they report the actual audio routing. But if the ASP DAI is hooked up this can create a powered-up DAPM graph without anything intentionally setting up a path. This can lead to parts of the audio system powering up unexpectedly. For example when cs35l56 is connected to cs42l43 using a codec-codec link, this can create a complete DAPM graph which then powers-up cs42l43. But the cs42l43 can only be clocked from its SoundWire bus so this causes a bunch of errors in the kernel log where cs42l43 is unexpectedly powered-up without a clock. If the host is taking ownership of the ASP (either directly or as a codec-to-codec link) there is no need to keep the mixer settings that the firmware wrote. The driver has ALSA controls for setting these using standard Linux mechanisms. So if the machine driver hooks up the ASP the ASP mixers are initialized to "None" (no input). This prevents unintended DAPM-graph power-ups, and means the initial state of the mixers is always going to be None. Since the initial state of the mixers can vary from system to system and potentially between firmware upgrades, no use-case manager can currently assume that cs35l56 has a known initial state. The firmware could just as easily default them to "None" as to any input source. So defaulting them to "None" in the driver is not increasing the entropy of the system. Signed-off-by: Richard Fitzgerald Link: https://lore.kernel.org/r/20240613132527.46537-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs35l56-shared.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/soc/codecs/cs35l56-shared.c b/sound/soc/codecs/cs35l56-shared.c index 8af89a263594..30497152e02a 100644 --- a/sound/soc/codecs/cs35l56-shared.c +++ b/sound/soc/codecs/cs35l56-shared.c @@ -215,6 +215,10 @@ static const struct reg_sequence cs35l56_asp1_defaults[] = { REG_SEQ0(CS35L56_ASP1_FRAME_CONTROL5, 0x00020100), REG_SEQ0(CS35L56_ASP1_DATA_CONTROL1, 0x00000018), REG_SEQ0(CS35L56_ASP1_DATA_CONTROL5, 0x00000018), + REG_SEQ0(CS35L56_ASP1TX1_INPUT, 0x00000000), + REG_SEQ0(CS35L56_ASP1TX2_INPUT, 0x00000000), + REG_SEQ0(CS35L56_ASP1TX3_INPUT, 0x00000000), + REG_SEQ0(CS35L56_ASP1TX4_INPUT, 0x00000000), }; /* From be1fae62cf253a5b67526cee9fbc07689b97c125 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Thu, 13 Jun 2024 13:13:05 +0100 Subject: [PATCH 333/778] ASoC: q6apm-lpass-dai: close graph on prepare errors There is an issue around with error handling and graph management with the exising code, none of the error paths close the graph, which result in leaving the loaded graph in dsp, however the driver thinks otherwise. This can have a nasty side effect specially when we try to load the same graph to dsp, dsp returns error which leaves the board with no sound and requires restart. Fix this by properly closing the graph when we hit errors between open and close. Fixes: 30ad723b93ad ("ASoC: qdsp6: audioreach: add q6apm lpass dai support") Signed-off-by: Srinivas Kandagatla Reviewed-by: Dmitry Baryshkov Tested-by: Dmitry Baryshkov # X13s Link: https://lore.kernel.org/r/20240613-q6apm-fixes-v1-1-d88953675ab3@linaro.org Signed-off-by: Mark Brown --- sound/soc/qcom/qdsp6/q6apm-lpass-dais.c | 32 +++++++++++++++---------- 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/sound/soc/qcom/qdsp6/q6apm-lpass-dais.c b/sound/soc/qcom/qdsp6/q6apm-lpass-dais.c index 68a38f63a2db..66b911b49e3f 100644 --- a/sound/soc/qcom/qdsp6/q6apm-lpass-dais.c +++ b/sound/soc/qcom/qdsp6/q6apm-lpass-dais.c @@ -141,14 +141,17 @@ static void q6apm_lpass_dai_shutdown(struct snd_pcm_substream *substream, struct struct q6apm_lpass_dai_data *dai_data = dev_get_drvdata(dai->dev); int rc; - if (!dai_data->is_port_started[dai->id]) - return; - rc = q6apm_graph_stop(dai_data->graph[dai->id]); - if (rc < 0) - dev_err(dai->dev, "fail to close APM port (%d)\n", rc); + if (dai_data->is_port_started[dai->id]) { + rc = q6apm_graph_stop(dai_data->graph[dai->id]); + dai_data->is_port_started[dai->id] = false; + if (rc < 0) + dev_err(dai->dev, "fail to close APM port (%d)\n", rc); + } - q6apm_graph_close(dai_data->graph[dai->id]); - dai_data->is_port_started[dai->id] = false; + if (dai_data->graph[dai->id]) { + q6apm_graph_close(dai_data->graph[dai->id]); + dai_data->graph[dai->id] = NULL; + } } static int q6apm_lpass_dai_prepare(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) @@ -163,8 +166,10 @@ static int q6apm_lpass_dai_prepare(struct snd_pcm_substream *substream, struct s q6apm_graph_stop(dai_data->graph[dai->id]); dai_data->is_port_started[dai->id] = false; - if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) + if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) { q6apm_graph_close(dai_data->graph[dai->id]); + dai_data->graph[dai->id] = NULL; + } } /** @@ -183,26 +188,29 @@ static int q6apm_lpass_dai_prepare(struct snd_pcm_substream *substream, struct s cfg->direction = substream->stream; rc = q6apm_graph_media_format_pcm(dai_data->graph[dai->id], cfg); - if (rc) { dev_err(dai->dev, "Failed to set media format %d\n", rc); - return rc; + goto err; } rc = q6apm_graph_prepare(dai_data->graph[dai->id]); if (rc) { dev_err(dai->dev, "Failed to prepare Graph %d\n", rc); - return rc; + goto err; } rc = q6apm_graph_start(dai_data->graph[dai->id]); if (rc < 0) { dev_err(dai->dev, "fail to start APM port %x\n", dai->id); - return rc; + goto err; } dai_data->is_port_started[dai->id] = true; return 0; +err: + q6apm_graph_close(dai_data->graph[dai->id]); + dai_data->graph[dai->id] = NULL; + return rc; } static int q6apm_lpass_dai_startup(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) From 2bbe3e5a2f4ef69d13be54f1cf895b4658287080 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 14 Jun 2024 12:17:33 +0200 Subject: [PATCH 334/778] bpf: Avoid splat in pskb_pull_reason syzkaller builds (CONFIG_DEBUG_NET=y) frequently trigger a debug hint in pskb_may_pull. We'd like to retain this debug check because it might hint at integer overflows and other issues (kernel code should pull headers, not huge value). In bpf case, this splat isn't interesting at all: such (nonsensical) bpf programs are typically generated by a fuzzer anyway. Do what Eric suggested and suppress such warning. For CONFIG_DEBUG_NET=n we don't need the extra check because pskb_may_pull will do the right thing: return an error without the WARN() backtrace. Fixes: 219eee9c0d16 ("net: skbuff: add overflow debug check to pull/push helpers") Reported-by: syzbot+0c4150bff9fff3bf023c@syzkaller.appspotmail.com Suggested-by: Eric Dumazet Signed-off-by: Florian Westphal Signed-off-by: Daniel Borkmann Reviewed-by: Eric Dumazet Acked-by: Daniel Borkmann Closes: https://syzkaller.appspot.com/bug?extid=0c4150bff9fff3bf023c Link: https://lore.kernel.org/netdev/9f254c96-54f2-4457-b7ab-1d9f6187939c@gmail.com/ Link: https://lore.kernel.org/bpf/20240614101801.9496-1-fw@strlen.de --- net/core/filter.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/core/filter.c b/net/core/filter.c index 2510464692af..9933851c685e 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1665,6 +1665,11 @@ static DEFINE_PER_CPU(struct bpf_scratchpad, bpf_sp); static inline int __bpf_try_make_writable(struct sk_buff *skb, unsigned int write_len) { +#ifdef CONFIG_DEBUG_NET + /* Avoid a splat in pskb_may_pull_reason() */ + if (write_len > INT_MAX) + return -EINVAL; +#endif return skb_ensure_writable(skb, write_len); } From d2278f3533a8c4933c52f85784ffa73e8250c524 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 14 Jun 2024 17:22:25 +0200 Subject: [PATCH 335/778] thermal: core: Synchronize suspend-prepare and post-suspend actions After commit 5a5efdaffda5 ("thermal: core: Resume thermal zones asynchronously") it is theoretically possible that, if a system suspend starts immediately after a system resume, thermal_zone_device_resume() spawned by the thermal PM notifier for one of the thermal zones at the end of the system resume will run after the PM thermal notifier for the suspend-prepare action. If that happens, tz->suspended set by the latter will be reset by the former which may lead to unexpected consequences. To avoid that race, synchronize thermal_zone_device_resume() with the suspend-prepare thermal PM notifier with the help of additional bool field and completion in struct thermal_zone_device. Note that this also ensures running __thermal_zone_device_update() at least once for each thermal zone between system resume and the following system suspend in case it is needed to start thermal mitigation. Fixes: 5a5efdaffda5 ("thermal: core: Resume thermal zones asynchronously") Signed-off-by: Rafael J. Wysocki --- drivers/thermal/thermal_core.c | 21 +++++++++++++++++++++ drivers/thermal/thermal_core.h | 4 ++++ 2 files changed, 25 insertions(+) diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 30567b499455..f92529fb0d10 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -1397,6 +1397,7 @@ thermal_zone_device_register_with_trips(const char *type, ida_init(&tz->ida); mutex_init(&tz->lock); init_completion(&tz->removal); + init_completion(&tz->resume); id = ida_alloc(&thermal_tz_ida, GFP_KERNEL); if (id < 0) { result = id; @@ -1642,6 +1643,9 @@ static void thermal_zone_device_resume(struct work_struct *work) thermal_zone_device_init(tz); __thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED); + complete(&tz->resume); + tz->resuming = false; + mutex_unlock(&tz->lock); } @@ -1659,6 +1663,20 @@ static int thermal_pm_notify(struct notifier_block *nb, list_for_each_entry(tz, &thermal_tz_list, node) { mutex_lock(&tz->lock); + if (tz->resuming) { + /* + * thermal_zone_device_resume() queued up for + * this zone has not acquired the lock yet, so + * release it to let the function run and wait + * util it has done the work. + */ + mutex_unlock(&tz->lock); + + wait_for_completion(&tz->resume); + + mutex_lock(&tz->lock); + } + tz->suspended = true; mutex_unlock(&tz->lock); @@ -1676,6 +1694,9 @@ static int thermal_pm_notify(struct notifier_block *nb, cancel_delayed_work(&tz->poll_queue); + reinit_completion(&tz->resume); + tz->resuming = true; + /* * Replace the work function with the resume one, which * will restore the original work function and schedule diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h index 20e7b45673d6..66f67e54e0c8 100644 --- a/drivers/thermal/thermal_core.h +++ b/drivers/thermal/thermal_core.h @@ -55,6 +55,7 @@ struct thermal_governor { * @type: the thermal zone device type * @device: &struct device for this thermal zone * @removal: removal completion + * @resume: resume completion * @trip_temp_attrs: attributes for trip points for sysfs: trip temperature * @trip_type_attrs: attributes for trip points for sysfs: trip type * @trip_hyst_attrs: attributes for trip points for sysfs: trip hysteresis @@ -89,6 +90,7 @@ struct thermal_governor { * @poll_queue: delayed work for polling * @notify_event: Last notification event * @suspended: thermal zone suspend indicator + * @resuming: indicates whether or not thermal zone resume is in progress * @trips: array of struct thermal_trip objects */ struct thermal_zone_device { @@ -96,6 +98,7 @@ struct thermal_zone_device { char type[THERMAL_NAME_LENGTH]; struct device device; struct completion removal; + struct completion resume; struct attribute_group trips_attribute_group; struct thermal_attr *trip_temp_attrs; struct thermal_attr *trip_type_attrs; @@ -123,6 +126,7 @@ struct thermal_zone_device { struct delayed_work poll_queue; enum thermal_notify_event notify_event; bool suspended; + bool resuming; #ifdef CONFIG_THERMAL_DEBUGFS struct thermal_debugfs *debugfs; #endif From 494c7d055081da066424706b28faa9a4c719d852 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 14 Jun 2024 17:26:00 +0200 Subject: [PATCH 336/778] thermal: core: Change PM notifier priority to the minimum It is reported that commit 5a5efdaffda5 ("thermal: core: Resume thermal zones asynchronously") causes battery data in sysfs on Thinkpad P1 Gen2 to become invalid after a resume from S3 (and it is necessary to reboot the machine to restore correct battery data). Some investigation into the problem indicated that it happened because, after the commit in question, the ACPI battery PM notifier ran in parallel with thermal_zone_device_resume() for one of the thermal zones which apparently confused the platform firmware on the affected system. While the exact reason for the firmware confusion remains unclear, it is arguably not particularly relevant, and the expected behavior of the affected system can be restored by making the thermal PM notifier run at the lowest priority which avoids interference between work items spawned by it and the other PM notifiers (that will run before those work items now). Fixes: 5a5efdaffda5 ("thermal: core: Resume thermal zones asynchronously") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218881 Reported-by: fhortner@yahoo.de Tested-by: fhortner@yahoo.de Cc: 6.8+ # 6.8+ Signed-off-by: Rafael J. Wysocki --- drivers/thermal/thermal_core.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index f92529fb0d10..f7c38c0d6199 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -1721,6 +1721,12 @@ static int thermal_pm_notify(struct notifier_block *nb, static struct notifier_block thermal_pm_nb = { .notifier_call = thermal_pm_notify, + /* + * Run at the lowest priority to avoid interference between the thermal + * zone resume work items spawned by thermal_pm_notify() and the other + * PM notifiers. + */ + .priority = INT_MIN, }; static int __init thermal_init(void) From 0a5d3258d7c97295a89d22e54733b54aacb62562 Mon Sep 17 00:00:00 2001 From: Tony Ambardar Date: Mon, 3 Jun 2024 22:23:15 -0700 Subject: [PATCH 337/778] compiler_types.h: Define __retain for __attribute__((__retain__)) Some code includes the __used macro to prevent functions and data from being optimized out. This macro implements __attribute__((__used__)), which operates at the compiler and IR-level, and so still allows a linker to remove objects intended to be kept. Compilers supporting __attribute__((__retain__)) can address this gap by setting the flag SHF_GNU_RETAIN on the section of a function/variable, indicating to the linker the object should be retained. This attribute is available since gcc 11, clang 13, and binutils 2.36. Provide a __retain macro implementing __attribute__((__retain__)), whose first user will be the '__bpf_kfunc' tag. [ Additional remark from discussion: Why is CONFIG_LTO_CLANG added here? The __used macro permits garbage collection at section level, so CLANG_LTO_CLANG without CONFIG_LD_DEAD_CODE_DATA_ELIMINATION should not change final section dynamics? The conditional guard was included to ensure consistent behaviour between __retain and other features forcing split sections. In particular, the same guard is used in vmlinux.lds.h to merge split sections where needed. For example, using __retain in LLVM builds without CONFIG_LTO was failing CI tests on kernel-patches/bpf because the kernel didn't boot properly. And in further testing, the kernel had no issues loading BPF kfunc modules with such split sections, so the module (partial) linking scripts were left alone. ] Signed-off-by: Tony Ambardar Signed-off-by: Daniel Borkmann Cc: Yonghong Song Link: https://lore.kernel.org/bpf/ZlmGoT9KiYLZd91S@krava/T/ Link: https://lore.kernel.org/bpf/b31bca5a5e6765a0f32cc8c19b1d9cdbfaa822b5.1717477560.git.Tony.Ambardar@gmail.com --- include/linux/compiler_types.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 93600de3800b..f14c275950b5 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -143,6 +143,29 @@ static inline void __chk_io_ptr(const volatile void __iomem *ptr) { } # define __preserve_most #endif +/* + * Annotating a function/variable with __retain tells the compiler to place + * the object in its own section and set the flag SHF_GNU_RETAIN. This flag + * instructs the linker to retain the object during garbage-cleanup or LTO + * phases. + * + * Note that the __used macro is also used to prevent functions or data + * being optimized out, but operates at the compiler/IR-level and may still + * allow unintended removal of objects during linking. + * + * Optional: only supported since gcc >= 11, clang >= 13 + * + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-retain-function-attribute + * clang: https://clang.llvm.org/docs/AttributeReference.html#retain + */ +#if __has_attribute(__retain__) && \ + (defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || \ + defined(CONFIG_LTO_CLANG)) +# define __retain __attribute__((__retain__)) +#else +# define __retain +#endif + /* Compiler specific macros. */ #ifdef __clang__ #include From 7bdcedd5c8fb88e7176b93812b139eca5fe0aa46 Mon Sep 17 00:00:00 2001 From: Tony Ambardar Date: Mon, 3 Jun 2024 22:23:16 -0700 Subject: [PATCH 338/778] bpf: Harden __bpf_kfunc tag against linker kfunc removal BPF kfuncs are often not directly referenced and may be inadvertently removed by optimization steps during kernel builds, thus the __bpf_kfunc tag mitigates against this removal by including the __used macro. However, this macro alone does not prevent removal during linking, and may still yield build warnings (e.g. on mips64el): [...] LD vmlinux BTFIDS vmlinux WARN: resolve_btfids: unresolved symbol bpf_verify_pkcs7_signature WARN: resolve_btfids: unresolved symbol bpf_lookup_user_key WARN: resolve_btfids: unresolved symbol bpf_lookup_system_key WARN: resolve_btfids: unresolved symbol bpf_key_put WARN: resolve_btfids: unresolved symbol bpf_iter_task_next WARN: resolve_btfids: unresolved symbol bpf_iter_css_task_new WARN: resolve_btfids: unresolved symbol bpf_get_file_xattr WARN: resolve_btfids: unresolved symbol bpf_ct_insert_entry WARN: resolve_btfids: unresolved symbol bpf_cgroup_release WARN: resolve_btfids: unresolved symbol bpf_cgroup_from_id WARN: resolve_btfids: unresolved symbol bpf_cgroup_acquire WARN: resolve_btfids: unresolved symbol bpf_arena_free_pages NM System.map SORTTAB vmlinux OBJCOPY vmlinux.32 [...] Update the __bpf_kfunc tag to better guard against linker optimization by including the new __retain compiler macro, which fixes the warnings above. Verify the __retain macro with readelf by checking object flags for 'R': $ readelf -Wa kernel/trace/bpf_trace.o Section Headers: [Nr] Name Type Address Off Size ES Flg Lk Inf Al [...] [178] .text.bpf_key_put PROGBITS 00000000 6420 0050 00 AXR 0 0 8 [...] Key to Flags: [...] R (retain), D (mbind), p (processor specific) Fixes: 57e7c169cd6a ("bpf: Add __bpf_kfunc tag for marking kernel functions as kfuncs") Reported-by: kernel test robot Signed-off-by: Tony Ambardar Signed-off-by: Daniel Borkmann Tested-by: Jiri Olsa Reviewed-by: Jiri Olsa Cc: Yonghong Song Closes: https://lore.kernel.org/r/202401211357.OCX9yllM-lkp@intel.com/ Link: https://lore.kernel.org/bpf/ZlmGoT9KiYLZd91S@krava/T/ Link: https://lore.kernel.org/bpf/e9c64e9b5c073dabd457ff45128aabcab7630098.1717477560.git.Tony.Ambardar@gmail.com --- include/linux/btf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/btf.h b/include/linux/btf.h index f9e56fd12a9f..7c3e40c3295e 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -82,7 +82,7 @@ * as to avoid issues such as the compiler inlining or eliding either a static * kfunc, or a global kfunc in an LTO build. */ -#define __bpf_kfunc __used noinline +#define __bpf_kfunc __used __retain noinline #define __bpf_kfunc_start_defs() \ __diag_push(); \ From 7ed352d34f1a09a7659c53de07785115587499fe Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 13 Jun 2024 14:30:44 -0700 Subject: [PATCH 339/778] netdev-genl: fix error codes when outputting XDP features -EINVAL will interrupt the dump. The correct error to return if we have more data to dump is -EMSGSIZE. Discovered by doing: for i in `seq 80`; do ip link add type veth; done ./cli.py --dbg-small-recv 5300 --spec netdev.yaml --dump dev-get >> /dev/null [...] nl_len = 64 (48) nl_flags = 0x0 nl_type = 19 nl_len = 20 (4) nl_flags = 0x2 nl_type = 3 error: -22 Fixes: d3d854fd6a1d ("netdev-genl: create a simple family for netdev stuff") Reviewed-by: Amritha Nambiar Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240613213044.3675745-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/core/netdev-genl.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index 1f6ae6379e0f..05f9515d2c05 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -59,22 +59,22 @@ XDP_METADATA_KFUNC_xxx nla_put_u64_64bit(rsp, NETDEV_A_DEV_XDP_RX_METADATA_FEATURES, xdp_rx_meta, NETDEV_A_DEV_PAD) || nla_put_u64_64bit(rsp, NETDEV_A_DEV_XSK_FEATURES, - xsk_features, NETDEV_A_DEV_PAD)) { - genlmsg_cancel(rsp, hdr); - return -EINVAL; - } + xsk_features, NETDEV_A_DEV_PAD)) + goto err_cancel_msg; if (netdev->xdp_features & NETDEV_XDP_ACT_XSK_ZEROCOPY) { if (nla_put_u32(rsp, NETDEV_A_DEV_XDP_ZC_MAX_SEGS, - netdev->xdp_zc_max_segs)) { - genlmsg_cancel(rsp, hdr); - return -EINVAL; - } + netdev->xdp_zc_max_segs)) + goto err_cancel_msg; } genlmsg_end(rsp, hdr); return 0; + +err_cancel_msg: + genlmsg_cancel(rsp, hdr); + return -EMSGSIZE; } static void From e789523fe248a80839f8dfe736ca96e5c442a9e8 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Thu, 13 Jun 2024 18:03:43 +0900 Subject: [PATCH 340/778] firewire: fix website URL in Kconfig The wiki in kernel.org is no longer updated. This commit replaces the website URL with the latest one. Link: https://lore.kernel.org/r/20240613090343.416198-1-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- drivers/firewire/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firewire/Kconfig b/drivers/firewire/Kconfig index 869598b20e3a..5268b3f0a25a 100644 --- a/drivers/firewire/Kconfig +++ b/drivers/firewire/Kconfig @@ -11,7 +11,7 @@ config FIREWIRE This is the new-generation IEEE 1394 (FireWire) driver stack a.k.a. Juju, a new implementation designed for robustness and simplicity. - See http://ieee1394.wiki.kernel.org/index.php/Juju_Migration + See http://ieee1394.docs.kernel.org/en/latest/migration.html for information about migration from the older Linux 1394 stack to the new driver stack. From e7da16abf030b39c3598574d5457f324a6f0e4a7 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Thu, 13 Jun 2024 22:14:33 +0900 Subject: [PATCH 341/778] firewire: core: record card index in tracepoinrts events derived from async_outbound_complete_template The asynchronous transaction is initiated on one of 1394 OHCI controller, however the existing tracepoints events has the lack of data about it. This commit adds card_index member into event structure to store the index of host controller in use, and prints it. Link: https://lore.kernel.org/r/20240613131440.431766-2-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- drivers/firewire/core-transaction.c | 6 +++--- include/trace/events/firewire.h | 17 ++++++++++------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c index 571fdff65c2b..de75e758fd07 100644 --- a/drivers/firewire/core-transaction.c +++ b/drivers/firewire/core-transaction.c @@ -174,8 +174,8 @@ static void transmit_complete_callback(struct fw_packet *packet, struct fw_transaction *t = container_of(packet, struct fw_transaction, packet); - trace_async_request_outbound_complete((uintptr_t)t, packet->generation, packet->speed, - status, packet->timestamp); + trace_async_request_outbound_complete((uintptr_t)t, card->index, packet->generation, + packet->speed, status, packet->timestamp); switch (status) { case ACK_COMPLETE: @@ -674,7 +674,7 @@ static void free_response_callback(struct fw_packet *packet, { struct fw_request *request = container_of(packet, struct fw_request, response); - trace_async_response_outbound_complete((uintptr_t)request, packet->generation, + trace_async_response_outbound_complete((uintptr_t)request, card->index, packet->generation, packet->speed, status, packet->timestamp); // Decrease the reference count since not at in-flight. diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h index d695a560673f..ca6ea9bd1eba 100644 --- a/include/trace/events/firewire.h +++ b/include/trace/events/firewire.h @@ -71,10 +71,11 @@ DECLARE_EVENT_CLASS(async_outbound_initiate_template, // The value of status is one of ack codes and rcodes specific to Linux FireWire subsystem. DECLARE_EVENT_CLASS(async_outbound_complete_template, - TP_PROTO(u64 transaction, unsigned int generation, unsigned int scode, unsigned int status, unsigned int timestamp), - TP_ARGS(transaction, generation, scode, status, timestamp), + TP_PROTO(u64 transaction, unsigned int card_index, unsigned int generation, unsigned int scode, unsigned int status, unsigned int timestamp), + TP_ARGS(transaction, card_index, generation, scode, status, timestamp), TP_STRUCT__entry( __field(u64, transaction) + __field(u8, card_index) __field(u8, generation) __field(u8, scode) __field(u8, status) @@ -82,14 +83,16 @@ DECLARE_EVENT_CLASS(async_outbound_complete_template, ), TP_fast_assign( __entry->transaction = transaction; + __entry->card_index = card_index; __entry->generation = generation; __entry->scode = scode; __entry->status = status; __entry->timestamp = timestamp; ), TP_printk( - "transaction=0x%llx generation=%u scode=%u status=%u timestamp=0x%04x", + "transaction=0x%llx card_index=%u generation=%u scode=%u status=%u timestamp=0x%04x", __entry->transaction, + __entry->card_index, __entry->generation, __entry->scode, __entry->status, @@ -144,8 +147,8 @@ DEFINE_EVENT(async_outbound_initiate_template, async_request_outbound_initiate, ); DEFINE_EVENT(async_outbound_complete_template, async_request_outbound_complete, - TP_PROTO(u64 transaction, unsigned int generation, unsigned int scode, unsigned int status, unsigned int timestamp), - TP_ARGS(transaction, generation, scode, status, timestamp) + TP_PROTO(u64 transaction, unsigned int card_index, unsigned int generation, unsigned int scode, unsigned int status, unsigned int timestamp), + TP_ARGS(transaction, card_index, generation, scode, status, timestamp) ); DEFINE_EVENT(async_inbound_template, async_response_inbound, @@ -194,8 +197,8 @@ DEFINE_EVENT_PRINT(async_outbound_initiate_template, async_response_outbound_ini ); DEFINE_EVENT(async_outbound_complete_template, async_response_outbound_complete, - TP_PROTO(u64 transaction, unsigned int generation, unsigned int scode, unsigned int status, unsigned int timestamp), - TP_ARGS(transaction, generation, scode, status, timestamp) + TP_PROTO(u64 transaction, unsigned int card_index, unsigned int generation, unsigned int scode, unsigned int status, unsigned int timestamp), + TP_ARGS(transaction, card_index, generation, scode, status, timestamp) ); #undef ASYNC_HEADER_GET_DESTINATION From 64e02b64fb1d832a9a5254055a829db64750421a Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Thu, 13 Jun 2024 22:14:34 +0900 Subject: [PATCH 342/778] firewire: core: record card index in tracepoinrts events derived from async_outbound_initiate_template The asynchronous transaction is initiated on one of 1394 OHCI controller, however the existing tracepoints events has the lack of data about it. This commit adds card_index member into event structure to store the index of host controller in use, and prints it. Link: https://lore.kernel.org/r/20240613131440.431766-3-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- drivers/firewire/core-transaction.c | 10 ++++++---- include/trace/events/firewire.h | 20 ++++++++++++-------- 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c index de75e758fd07..3f9361d15607 100644 --- a/drivers/firewire/core-transaction.c +++ b/drivers/firewire/core-transaction.c @@ -398,7 +398,8 @@ void __fw_send_request(struct fw_card *card, struct fw_transaction *t, int tcode spin_unlock_irqrestore(&card->lock, flags); - trace_async_request_outbound_initiate((uintptr_t)t, generation, speed, t->packet.header, payload, + trace_async_request_outbound_initiate((uintptr_t)t, card->index, generation, speed, + t->packet.header, payload, tcode_is_read_request(tcode) ? 0 : length / 4); card->driver->send_request(card, &t->packet); @@ -879,9 +880,10 @@ void fw_send_response(struct fw_card *card, // Increase the reference count so that the object is kept during in-flight. fw_request_get(request); - trace_async_response_outbound_initiate((uintptr_t)request, request->response.generation, - request->response.speed, request->response.header, - data, data ? data_length / 4 : 0); + trace_async_response_outbound_initiate((uintptr_t)request, card->index, + request->response.generation, request->response.speed, + request->response.header, data, + data ? data_length / 4 : 0); card->driver->send_response(card, &request->response); } diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h index ca6ea9bd1eba..a3d9916cbad1 100644 --- a/include/trace/events/firewire.h +++ b/include/trace/events/firewire.h @@ -36,10 +36,11 @@ #define QUADLET_SIZE 4 DECLARE_EVENT_CLASS(async_outbound_initiate_template, - TP_PROTO(u64 transaction, unsigned int generation, unsigned int scode, const u32 *header, const u32 *data, unsigned int data_count), - TP_ARGS(transaction, generation, scode, header, data, data_count), + TP_PROTO(u64 transaction, unsigned int card_index, unsigned int generation, unsigned int scode, const u32 *header, const u32 *data, unsigned int data_count), + TP_ARGS(transaction, card_index, generation, scode, header, data, data_count), TP_STRUCT__entry( __field(u64, transaction) + __field(u8, card_index) __field(u8, generation) __field(u8, scode) __array(u32, header, ASYNC_HEADER_QUADLET_COUNT) @@ -47,6 +48,7 @@ DECLARE_EVENT_CLASS(async_outbound_initiate_template, ), TP_fast_assign( __entry->transaction = transaction; + __entry->card_index = card_index; __entry->generation = generation; __entry->scode = scode; memcpy(__entry->header, header, QUADLET_SIZE * ASYNC_HEADER_QUADLET_COUNT); @@ -54,8 +56,9 @@ DECLARE_EVENT_CLASS(async_outbound_initiate_template, ), // This format is for the request subaction. TP_printk( - "transaction=0x%llx generation=%u scode=%u dst_id=0x%04x tlabel=%u tcode=%u src_id=0x%04x offset=0x%012llx header=%s data=%s", + "transaction=0x%llx card_index=%u generation=%u scode=%u dst_id=0x%04x tlabel=%u tcode=%u src_id=0x%04x offset=0x%012llx header=%s data=%s", __entry->transaction, + __entry->card_index, __entry->generation, __entry->scode, ASYNC_HEADER_GET_DESTINATION(__entry->header), @@ -142,8 +145,8 @@ DECLARE_EVENT_CLASS(async_inbound_template, ); DEFINE_EVENT(async_outbound_initiate_template, async_request_outbound_initiate, - TP_PROTO(u64 transaction, unsigned int generation, unsigned int scode, const u32 *header, const u32 *data, unsigned int data_count), - TP_ARGS(transaction, generation, scode, header, data, data_count) + TP_PROTO(u64 transaction, unsigned int card_index, unsigned int generation, unsigned int scode, const u32 *header, const u32 *data, unsigned int data_count), + TP_ARGS(transaction, card_index, generation, scode, header, data, data_count) ); DEFINE_EVENT(async_outbound_complete_template, async_request_outbound_complete, @@ -178,11 +181,12 @@ DEFINE_EVENT_PRINT(async_inbound_template, async_request_inbound, ); DEFINE_EVENT_PRINT(async_outbound_initiate_template, async_response_outbound_initiate, - TP_PROTO(u64 transaction, unsigned int generation, unsigned int scode, const u32 *header, const u32 *data, unsigned int data_count), - TP_ARGS(transaction, generation, scode, header, data, data_count), + TP_PROTO(u64 transaction, unsigned int card_index, unsigned int generation, unsigned int scode, const u32 *header, const u32 *data, unsigned int data_count), + TP_ARGS(transaction, card_index, generation, scode, header, data, data_count), TP_printk( - "transaction=0x%llx generation=%u scode=%u dst_id=0x%04x tlabel=%u tcode=%u src_id=0x%04x rcode=%u header=%s data=%s", + "transaction=0x%llx card_index=%u generation=%u scode=%u dst_id=0x%04x tlabel=%u tcode=%u src_id=0x%04x rcode=%u header=%s data=%s", __entry->transaction, + __entry->card_index, __entry->generation, __entry->scode, ASYNC_HEADER_GET_DESTINATION(__entry->header), From 65ec7ebefe7de01281cce1f552ebd4dd00386665 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Thu, 13 Jun 2024 22:14:35 +0900 Subject: [PATCH 343/778] firewire: core: record card index in tracepoinrts events derived from async_inbound_template The asynchronous transaction is initiated on one of 1394 OHCI controller, however the existing tracepoints events has the lack of data about it. This commit adds card_index member into event structure to store the index of host controller in use, and prints it. Link: https://lore.kernel.org/r/20240613131440.431766-4-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- drivers/firewire/core-transaction.c | 8 ++++---- include/trace/events/firewire.h | 20 ++++++++++++-------- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c index 3f9361d15607..3503c238f8ae 100644 --- a/drivers/firewire/core-transaction.c +++ b/drivers/firewire/core-transaction.c @@ -1009,8 +1009,8 @@ void fw_core_handle_request(struct fw_card *card, struct fw_packet *p) return; } - trace_async_request_inbound((uintptr_t)request, p->generation, p->speed, p->ack, - p->timestamp, p->header, request->data, + trace_async_request_inbound((uintptr_t)request, card->index, p->generation, p->speed, + p->ack, p->timestamp, p->header, request->data, tcode_is_read_request(tcode) ? 0 : request->length / 4); offset = async_header_get_offset(p->header); @@ -1080,8 +1080,8 @@ void fw_core_handle_response(struct fw_card *card, struct fw_packet *p) } spin_unlock_irqrestore(&card->lock, flags); - trace_async_response_inbound((uintptr_t)t, p->generation, p->speed, p->ack, p->timestamp, - p->header, data, data_length / 4); + trace_async_response_inbound((uintptr_t)t, card->index, p->generation, p->speed, p->ack, + p->timestamp, p->header, data, data_length / 4); if (!t) { timed_out: diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h index a3d9916cbad1..b72f613cfa02 100644 --- a/include/trace/events/firewire.h +++ b/include/trace/events/firewire.h @@ -105,10 +105,11 @@ DECLARE_EVENT_CLASS(async_outbound_complete_template, // The value of status is one of ack codes and rcodes specific to Linux FireWire subsystem. DECLARE_EVENT_CLASS(async_inbound_template, - TP_PROTO(u64 transaction, unsigned int generation, unsigned int scode, unsigned int status, unsigned int timestamp, const u32 *header, const u32 *data, unsigned int data_count), - TP_ARGS(transaction, generation, scode, status, timestamp, header, data, data_count), + TP_PROTO(u64 transaction, unsigned int card_index, unsigned int generation, unsigned int scode, unsigned int status, unsigned int timestamp, const u32 *header, const u32 *data, unsigned int data_count), + TP_ARGS(transaction, card_index, generation, scode, status, timestamp, header, data, data_count), TP_STRUCT__entry( __field(u64, transaction) + __field(u8, card_index) __field(u8, generation) __field(u8, scode) __field(u8, status) @@ -118,6 +119,7 @@ DECLARE_EVENT_CLASS(async_inbound_template, ), TP_fast_assign( __entry->transaction = transaction; + __entry->card_index = card_index; __entry->generation = generation; __entry->scode = scode; __entry->status = status; @@ -127,8 +129,9 @@ DECLARE_EVENT_CLASS(async_inbound_template, ), // This format is for the response subaction. TP_printk( - "transaction=0x%llx generation=%u scode=%u status=%u timestamp=0x%04x dst_id=0x%04x tlabel=%u tcode=%u src_id=0x%04x rcode=%u header=%s data=%s", + "transaction=0x%llx card_index=%u generation=%u scode=%u status=%u timestamp=0x%04x dst_id=0x%04x tlabel=%u tcode=%u src_id=0x%04x rcode=%u header=%s data=%s", __entry->transaction, + __entry->card_index, __entry->generation, __entry->scode, __entry->status, @@ -155,16 +158,17 @@ DEFINE_EVENT(async_outbound_complete_template, async_request_outbound_complete, ); DEFINE_EVENT(async_inbound_template, async_response_inbound, - TP_PROTO(u64 transaction, unsigned int generation, unsigned int scode, unsigned int status, unsigned int timestamp, const u32 *header, const u32 *data, unsigned int data_count), - TP_ARGS(transaction, generation, scode, status, timestamp, header, data, data_count) + TP_PROTO(u64 transaction, unsigned int card_index, unsigned int generation, unsigned int scode, unsigned int status, unsigned int timestamp, const u32 *header, const u32 *data, unsigned int data_count), + TP_ARGS(transaction, card_index, generation, scode, status, timestamp, header, data, data_count) ); DEFINE_EVENT_PRINT(async_inbound_template, async_request_inbound, - TP_PROTO(u64 transaction, unsigned int generation, unsigned int scode, unsigned int status, unsigned int timestamp, const u32 *header, const u32 *data, unsigned int data_count), - TP_ARGS(transaction, generation, scode, status, timestamp, header, data, data_count), + TP_PROTO(u64 transaction, unsigned int card_index, unsigned int generation, unsigned int scode, unsigned int status, unsigned int timestamp, const u32 *header, const u32 *data, unsigned int data_count), + TP_ARGS(transaction, card_index, generation, scode, status, timestamp, header, data, data_count), TP_printk( - "transaction=0x%llx generation=%u scode=%u status=%u timestamp=0x%04x dst_id=0x%04x tlabel=%u tcode=%u src_id=0x%04x offset=0x%012llx header=%s data=%s", + "transaction=0x%llx card_index=%u generation=%u scode=%u status=%u timestamp=0x%04x dst_id=0x%04x tlabel=%u tcode=%u src_id=0x%04x offset=0x%012llx header=%s data=%s", __entry->transaction, + __entry->card_index, __entry->generation, __entry->scode, __entry->status, From 3cb44a72a39835b368ab78d739819330089aa2bf Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Thu, 13 Jun 2024 22:14:36 +0900 Subject: [PATCH 344/778] firewire: core: record card index in async_phy_outbound_initiate tracepoints event The asynchronous transaction is initiated on one of 1394 OHCI controller, however the existing tracepoints events has the lack of data about it. This commit adds card_index member into event structure to store the index of host controller in use, and prints it. Link: https://lore.kernel.org/r/20240613131440.431766-5-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- drivers/firewire/core-cdev.c | 4 ++-- drivers/firewire/core-transaction.c | 2 +- include/trace/events/firewire.h | 9 ++++++--- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c index 55993c9e0b90..ff8739f96af5 100644 --- a/drivers/firewire/core-cdev.c +++ b/drivers/firewire/core-cdev.c @@ -1659,8 +1659,8 @@ static int ioctl_send_phy_packet(struct client *client, union ioctl_arg *arg) memcpy(pp->data, a->data, sizeof(a->data)); } - trace_async_phy_outbound_initiate((uintptr_t)&e->p, e->p.generation, e->p.header[1], - e->p.header[2]); + trace_async_phy_outbound_initiate((uintptr_t)&e->p, card->index, e->p.generation, + e->p.header[1], e->p.header[2]); card->driver->send_request(card, &e->p); diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c index 3503c238f8ae..e522dc3d9897 100644 --- a/drivers/firewire/core-transaction.c +++ b/drivers/firewire/core-transaction.c @@ -504,7 +504,7 @@ void fw_send_phy_config(struct fw_card *card, phy_config_packet.generation = generation; reinit_completion(&phy_config_done); - trace_async_phy_outbound_initiate((uintptr_t)&phy_config_packet, + trace_async_phy_outbound_initiate((uintptr_t)&phy_config_packet, card->index, phy_config_packet.generation, phy_config_packet.header[1], phy_config_packet.header[2]); diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h index b72f613cfa02..a59dc26b2a53 100644 --- a/include/trace/events/firewire.h +++ b/include/trace/events/firewire.h @@ -217,23 +217,26 @@ DEFINE_EVENT(async_outbound_complete_template, async_response_outbound_complete, #undef ASYNC_HEADER_GET_RCODE TRACE_EVENT(async_phy_outbound_initiate, - TP_PROTO(u64 packet, unsigned int generation, u32 first_quadlet, u32 second_quadlet), - TP_ARGS(packet, generation, first_quadlet, second_quadlet), + TP_PROTO(u64 packet, unsigned int card_index, unsigned int generation, u32 first_quadlet, u32 second_quadlet), + TP_ARGS(packet, card_index, generation, first_quadlet, second_quadlet), TP_STRUCT__entry( __field(u64, packet) + __field(u8, card_index) __field(u8, generation) __field(u32, first_quadlet) __field(u32, second_quadlet) ), TP_fast_assign( __entry->packet = packet; + __entry->card_index = card_index; __entry->generation = generation; __entry->first_quadlet = first_quadlet; __entry->second_quadlet = second_quadlet ), TP_printk( - "packet=0x%llx generation=%u first_quadlet=0x%08x second_quadlet=0x%08x", + "packet=0x%llx card_index=%u generation=%u first_quadlet=0x%08x second_quadlet=0x%08x", __entry->packet, + __entry->card_index, __entry->generation, __entry->first_quadlet, __entry->second_quadlet From 810f2aa83563020d834425018303f2aaecef1e6e Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Thu, 13 Jun 2024 22:14:37 +0900 Subject: [PATCH 345/778] firewire: core: record card index in async_phy_outbound_complete tracepoints event The asynchronous transmission of phy packet is initiated on one of 1394 OHCI controller, however the existing tracepoints events has the lack of data about it. This commit adds card_index member into event structure to store the index of host controller in use, and prints it. Link: https://lore.kernel.org/r/20240613131440.431766-6-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- drivers/firewire/core-cdev.c | 2 +- drivers/firewire/core-transaction.c | 2 +- include/trace/events/firewire.h | 9 ++++++--- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c index ff8739f96af5..9a7dc90330a3 100644 --- a/drivers/firewire/core-cdev.c +++ b/drivers/firewire/core-cdev.c @@ -1559,7 +1559,7 @@ static void outbound_phy_packet_callback(struct fw_packet *packet, struct client *e_client = e->client; u32 rcode; - trace_async_phy_outbound_complete((uintptr_t)packet, status, packet->generation, + trace_async_phy_outbound_complete((uintptr_t)packet, card->index, status, packet->generation, packet->timestamp); switch (status) { diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c index e522dc3d9897..bd5a467cfd60 100644 --- a/drivers/firewire/core-transaction.c +++ b/drivers/firewire/core-transaction.c @@ -464,7 +464,7 @@ static DECLARE_COMPLETION(phy_config_done); static void transmit_phy_packet_callback(struct fw_packet *packet, struct fw_card *card, int status) { - trace_async_phy_outbound_complete((uintptr_t)packet, packet->generation, status, + trace_async_phy_outbound_complete((uintptr_t)packet, card->index, packet->generation, status, packet->timestamp); complete(&phy_config_done); } diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h index a59dc26b2a53..61c7a2461fbc 100644 --- a/include/trace/events/firewire.h +++ b/include/trace/events/firewire.h @@ -244,23 +244,26 @@ TRACE_EVENT(async_phy_outbound_initiate, ); TRACE_EVENT(async_phy_outbound_complete, - TP_PROTO(u64 packet, unsigned int generation, unsigned int status, unsigned int timestamp), - TP_ARGS(packet, generation, status, timestamp), + TP_PROTO(u64 packet, unsigned int card_index, unsigned int generation, unsigned int status, unsigned int timestamp), + TP_ARGS(packet, card_index, generation, status, timestamp), TP_STRUCT__entry( __field(u64, packet) + __field(u8, card_index) __field(u8, generation) __field(u8, status) __field(u16, timestamp) ), TP_fast_assign( __entry->packet = packet; + __entry->card_index = card_index; __entry->generation = generation; __entry->status = status; __entry->timestamp = timestamp; ), TP_printk( - "packet=0x%llx generation=%u status=%u timestamp=0x%04x", + "packet=0x%llx card_index=%u generation=%u status=%u timestamp=0x%04x", __entry->packet, + __entry->card_index, __entry->generation, __entry->status, __entry->timestamp From abbb4bd96d7f871b10bd7058f7284ffaf4e8257f Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Thu, 13 Jun 2024 22:14:38 +0900 Subject: [PATCH 346/778] firewire: core: record card index in async_phy_inbound tracepoints event The asynchronous transmission of phy packet is initiated on one of 1394 OHCI controller, however the existing tracepoints events has the lack of data about it. This commit adds card_index member into event structure to store the index of host controller in use, and prints it. Link: https://lore.kernel.org/r/20240613131440.431766-7-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- drivers/firewire/core-transaction.c | 2 +- include/trace/events/firewire.h | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c index bd5a467cfd60..76ab6a209768 100644 --- a/drivers/firewire/core-transaction.c +++ b/drivers/firewire/core-transaction.c @@ -997,7 +997,7 @@ void fw_core_handle_request(struct fw_card *card, struct fw_packet *p) tcode = async_header_get_tcode(p->header); if (tcode_is_link_internal(tcode)) { - trace_async_phy_inbound((uintptr_t)p, p->generation, p->ack, p->timestamp, + trace_async_phy_inbound((uintptr_t)p, card->index, p->generation, p->ack, p->timestamp, p->header[1], p->header[2]); fw_cdev_handle_phy_packet(card, p); return; diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h index 61c7a2461fbc..e5524fc71880 100644 --- a/include/trace/events/firewire.h +++ b/include/trace/events/firewire.h @@ -271,10 +271,11 @@ TRACE_EVENT(async_phy_outbound_complete, ); TRACE_EVENT(async_phy_inbound, - TP_PROTO(u64 packet, unsigned int generation, unsigned int status, unsigned int timestamp, u32 first_quadlet, u32 second_quadlet), - TP_ARGS(packet, generation, status, timestamp, first_quadlet, second_quadlet), + TP_PROTO(u64 packet, unsigned int card_index, unsigned int generation, unsigned int status, unsigned int timestamp, u32 first_quadlet, u32 second_quadlet), + TP_ARGS(packet, card_index, generation, status, timestamp, first_quadlet, second_quadlet), TP_STRUCT__entry( __field(u64, packet) + __field(u8, card_index) __field(u8, generation) __field(u8, status) __field(u16, timestamp) @@ -290,8 +291,9 @@ TRACE_EVENT(async_phy_inbound, __entry->second_quadlet = second_quadlet ), TP_printk( - "packet=0x%llx generation=%u status=%u timestamp=0x%04x first_quadlet=0x%08x second_quadlet=0x%08x", + "packet=0x%llx card_index=%u generation=%u status=%u timestamp=0x%04x first_quadlet=0x%08x second_quadlet=0x%08x", __entry->packet, + __entry->card_index, __entry->generation, __entry->status, __entry->timestamp, From 7507dbc46b780e9fa2ec3d4db7cd9ce07e722927 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Thu, 13 Jun 2024 22:14:39 +0900 Subject: [PATCH 347/778] firewire: core: record card index in tracepoinrts events derived from bus_reset_arrange_template The asynchronous transmission of phy packet is initiated on one of 1394 OHCI controller, however the existing tracepoints events has the lack of data about it. This commit adds card_index member into event structure to store the index of host controller in use, and prints it. Link: https://lore.kernel.org/r/20240613131440.431766-8-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- drivers/firewire/core-card.c | 6 +++--- include/trace/events/firewire.h | 21 ++++++++++++--------- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/drivers/firewire/core-card.c b/drivers/firewire/core-card.c index 127d87e3a153..f8b99dd6cd82 100644 --- a/drivers/firewire/core-card.c +++ b/drivers/firewire/core-card.c @@ -222,14 +222,14 @@ static int reset_bus(struct fw_card *card, bool short_reset) int reg = short_reset ? 5 : 1; int bit = short_reset ? PHY_BUS_SHORT_RESET : PHY_BUS_RESET; - trace_bus_reset_initiate(card->generation, short_reset); + trace_bus_reset_initiate(card->index, card->generation, short_reset); return card->driver->update_phy_reg(card, reg, 0, bit); } void fw_schedule_bus_reset(struct fw_card *card, bool delayed, bool short_reset) { - trace_bus_reset_schedule(card->generation, short_reset); + trace_bus_reset_schedule(card->index, card->generation, short_reset); /* We don't try hard to sort out requests of long vs. short resets. */ card->br_short = short_reset; @@ -249,7 +249,7 @@ static void br_work(struct work_struct *work) /* Delay for 2s after last reset per IEEE 1394 clause 8.2.1. */ if (card->reset_jiffies != 0 && time_before64(get_jiffies_64(), card->reset_jiffies + 2 * HZ)) { - trace_bus_reset_postpone(card->generation, card->br_short); + trace_bus_reset_postpone(card->index, card->generation, card->br_short); if (!queue_delayed_work(fw_workqueue, &card->br_work, 2 * HZ)) fw_card_put(card); diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h index e5524fc71880..e6485051f546 100644 --- a/include/trace/events/firewire.h +++ b/include/trace/events/firewire.h @@ -303,36 +303,39 @@ TRACE_EVENT(async_phy_inbound, ); DECLARE_EVENT_CLASS(bus_reset_arrange_template, - TP_PROTO(unsigned int generation, bool short_reset), - TP_ARGS(generation, short_reset), + TP_PROTO(unsigned int card_index, unsigned int generation, bool short_reset), + TP_ARGS(card_index, generation, short_reset), TP_STRUCT__entry( + __field(u8, card_index) __field(u8, generation) __field(bool, short_reset) ), TP_fast_assign( + __entry->card_index = card_index; __entry->generation = generation; __entry->short_reset = short_reset; ), TP_printk( - "generation=%u short_reset=%s", + "card_index=%u generation=%u short_reset=%s", + __entry->card_index, __entry->generation, __entry->short_reset ? "true" : "false" ) ); DEFINE_EVENT(bus_reset_arrange_template, bus_reset_initiate, - TP_PROTO(unsigned int generation, bool short_reset), - TP_ARGS(generation, short_reset) + TP_PROTO(unsigned int card_index, unsigned int generation, bool short_reset), + TP_ARGS(card_index, generation, short_reset) ); DEFINE_EVENT(bus_reset_arrange_template, bus_reset_schedule, - TP_PROTO(unsigned int generation, bool short_reset), - TP_ARGS(generation, short_reset) + TP_PROTO(unsigned int card_index, unsigned int generation, bool short_reset), + TP_ARGS(card_index, generation, short_reset) ); DEFINE_EVENT(bus_reset_arrange_template, bus_reset_postpone, - TP_PROTO(unsigned int generation, bool short_reset), - TP_ARGS(generation, short_reset) + TP_PROTO(unsigned int card_index, unsigned int generation, bool short_reset), + TP_ARGS(card_index, generation, short_reset) ); TRACE_EVENT(bus_reset_handle, From 893098b2af3ea12bab2f505aa825662b379df67d Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Thu, 13 Jun 2024 22:14:40 +0900 Subject: [PATCH 348/778] firewire: core: record card index in bus_reset_handle tracepoints event The bus reset event occurs in the bus managed by one of 1394 OHCI controller in Linux system, however the existing tracepoints events has the lack of data about it to distinguish the issued hardware from the others. This commit adds card_index member into event structure to store the index of host controller in use, and prints it. Link: https://lore.kernel.org/r/20240613131440.431766-9-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- drivers/firewire/core-topology.c | 2 +- include/trace/events/firewire.h | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/firewire/core-topology.c b/drivers/firewire/core-topology.c index 837cc44d8d9f..8107eebd4296 100644 --- a/drivers/firewire/core-topology.c +++ b/drivers/firewire/core-topology.c @@ -508,7 +508,7 @@ void fw_core_handle_bus_reset(struct fw_card *card, int node_id, int generation, struct fw_node *local_node; unsigned long flags; - trace_bus_reset_handle(generation, node_id, bm_abdicate, self_ids, self_id_count); + trace_bus_reset_handle(card->index, generation, node_id, bm_abdicate, self_ids, self_id_count); spin_lock_irqsave(&card->lock, flags); diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h index e6485051f546..5ccc0d91b220 100644 --- a/include/trace/events/firewire.h +++ b/include/trace/events/firewire.h @@ -339,22 +339,25 @@ DEFINE_EVENT(bus_reset_arrange_template, bus_reset_postpone, ); TRACE_EVENT(bus_reset_handle, - TP_PROTO(unsigned int generation, unsigned int node_id, bool bm_abdicate, u32 *self_ids, unsigned int self_id_count), - TP_ARGS(generation, node_id, bm_abdicate, self_ids, self_id_count), + TP_PROTO(unsigned int card_index, unsigned int generation, unsigned int node_id, bool bm_abdicate, u32 *self_ids, unsigned int self_id_count), + TP_ARGS(card_index, generation, node_id, bm_abdicate, self_ids, self_id_count), TP_STRUCT__entry( + __field(u8, card_index) __field(u8, generation) __field(u8, node_id) __field(bool, bm_abdicate) __dynamic_array(u32, self_ids, self_id_count) ), TP_fast_assign( + __entry->card_index = card_index; __entry->generation = generation; __entry->node_id = node_id; __entry->bm_abdicate = bm_abdicate; memcpy(__get_dynamic_array(self_ids), self_ids, __get_dynamic_array_len(self_ids)); ), TP_printk( - "generation=%u node_id=0x%04x bm_abdicate=%s self_ids=%s", + "card_index=%u generation=%u node_id=0x%04x bm_abdicate=%s self_ids=%s", + __entry->card_index, __entry->generation, __entry->node_id, __entry->bm_abdicate ? "true" : "false", From c03984d43a9dd9282da54ccf275419f666029452 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sat, 15 Jun 2024 16:00:43 +0800 Subject: [PATCH 349/778] arm64: dts: imx8mp: Fix TC9595 input clock on DH i.MX8M Plus DHCOM SoM The IMX8MP_CLK_CLKOUT2 supplies the TC9595 bridge with 13 MHz reference clock. The IMX8MP_CLK_CLKOUT2 is supplied from IMX8MP_AUDIO_PLL2_OUT. The IMX8MP_CLK_CLKOUT2 operates only as a power-of-two divider, and the current 156 MHz is not power-of-two divisible to achieve 13 MHz. To achieve 13 MHz output from IMX8MP_CLK_CLKOUT2, set IMX8MP_AUDIO_PLL2_OUT to 208 MHz, because 208 MHz / 16 = 13 MHz. Fixes: 20d0b83e712b ("arm64: dts: imx8mp: Add TC9595 bridge on DH electronics i.MX8M Plus DHCOM") Signed-off-by: Marek Vasut Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mp-dhcom-som.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mp-dhcom-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-dhcom-som.dtsi index 43f1d45ccc96..f5115f9e8c47 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-dhcom-som.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp-dhcom-som.dtsi @@ -254,7 +254,7 @@ <&clk IMX8MP_CLK_CLKOUT2>, <&clk IMX8MP_AUDIO_PLL2_OUT>; assigned-clock-parents = <&clk IMX8MP_AUDIO_PLL2_OUT>; - assigned-clock-rates = <13000000>, <13000000>, <156000000>; + assigned-clock-rates = <13000000>, <13000000>, <208000000>; reset-gpios = <&gpio4 1 GPIO_ACTIVE_HIGH>; status = "disabled"; From bcdea3e81ea51c9e89e3b11aac2612e1b4330bee Mon Sep 17 00:00:00 2001 From: Liu Ying Date: Tue, 14 May 2024 11:07:18 +0800 Subject: [PATCH 350/778] arm: dts: imx53-qsb-hdmi: Disable panel instead of deleting node We cannot use /delete-node/ directive to delete a node in a DT overlay. The node won't be deleted effectively. Instead, set the node's status property to "disabled" to achieve something similar. Fixes: eeb403df953f ("ARM: dts: imx53-qsb: add support for the HDMI expander") Signed-off-by: Liu Ying Reviewed-by: Dmitry Baryshkov Signed-off-by: Shawn Guo --- arch/arm/boot/dts/nxp/imx/imx53-qsb-common.dtsi | 2 +- arch/arm/boot/dts/nxp/imx/imx53-qsb-hdmi.dtso | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/arm/boot/dts/nxp/imx/imx53-qsb-common.dtsi b/arch/arm/boot/dts/nxp/imx/imx53-qsb-common.dtsi index d80440446473..05d7a462ea25 100644 --- a/arch/arm/boot/dts/nxp/imx/imx53-qsb-common.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx53-qsb-common.dtsi @@ -85,7 +85,7 @@ }; }; - panel { + panel_dpi: panel { compatible = "sii,43wvf1g"; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_display_power>; diff --git a/arch/arm/boot/dts/nxp/imx/imx53-qsb-hdmi.dtso b/arch/arm/boot/dts/nxp/imx/imx53-qsb-hdmi.dtso index c84e9b052527..151e9cee3c87 100644 --- a/arch/arm/boot/dts/nxp/imx/imx53-qsb-hdmi.dtso +++ b/arch/arm/boot/dts/nxp/imx/imx53-qsb-hdmi.dtso @@ -10,8 +10,6 @@ /plugin/; &{/} { - /delete-node/ panel; - hdmi: connector-hdmi { compatible = "hdmi-connector"; label = "hdmi"; @@ -82,6 +80,10 @@ }; }; +&panel_dpi { + status = "disabled"; +}; + &tve { status = "disabled"; }; From a1439d89480754ddbc0a837544129ff5100f4087 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 10 Jun 2024 14:12:45 +0200 Subject: [PATCH 351/778] efi/arm: Disable LPAE PAN when calling EFI runtime services EFI runtime services are remapped into the lower 1 GiB of virtual address space at boot, so they are guaranteed to be able to co-exist with the kernel virtual mappings without the need to allocate space for them in the kernel's vmalloc region, which is rather small. This means those mappings are covered by TTBR0 when LPAE PAN is enabled, and so 'user' access must be enabled while such calls are in progress. Reviewed-by: Linus Walleij Signed-off-by: Ard Biesheuvel --- arch/arm/include/asm/efi.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/arm/include/asm/efi.h b/arch/arm/include/asm/efi.h index 78282ced5038..e408399d5f0e 100644 --- a/arch/arm/include/asm/efi.h +++ b/arch/arm/include/asm/efi.h @@ -14,6 +14,7 @@ #include #include #include +#include #ifdef CONFIG_EFI void efi_init(void); @@ -25,6 +26,18 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md, boo #define arch_efi_call_virt_setup() efi_virtmap_load() #define arch_efi_call_virt_teardown() efi_virtmap_unload() +#ifdef CONFIG_CPU_TTBR0_PAN +#undef arch_efi_call_virt +#define arch_efi_call_virt(p, f, args...) ({ \ + unsigned int flags = uaccess_save_and_enable(); \ + efi_status_t res = _Generic((p)->f(args), \ + efi_status_t: (p)->f(args), \ + default: ((p)->f(args), EFI_ABORTED)); \ + uaccess_restore(flags); \ + res; \ +}) +#endif + #define ARCH_EFI_IRQ_FLAGS_MASK \ (PSR_J_BIT | PSR_E_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT | \ PSR_T_BIT | MODE_MASK) From 75dde792d6f6c2d0af50278bd374bf0c512fe196 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 10 Jun 2024 16:02:13 +0200 Subject: [PATCH 352/778] efi/x86: Free EFI memory map only when installing a new one. The logic in __efi_memmap_init() is shared between two different execution flows: - mapping the EFI memory map early or late into the kernel VA space, so that its entries can be accessed; - the x86 specific cloning of the EFI memory map in order to insert new entries that are created as a result of making a memory reservation via a call to efi_mem_reserve(). In the former case, the underlying memory containing the kernel's view of the EFI memory map (which may be heavily modified by the kernel itself on x86) is not modified at all, and the only thing that changes is the virtual mapping of this memory, which is different between early and late boot. In the latter case, an entirely new allocation is created that carries a new, updated version of the kernel's view of the EFI memory map. When installing this new version, the old version will no longer be referenced, and if the memory was allocated by the kernel, it will leak unless it gets freed. The logic that implements this freeing currently lives on the code path that is shared between these two use cases, but it should only apply to the latter. So move it to the correct spot. While at it, drop the dummy definition for non-x86 architectures, as that is no longer needed. Cc: Fixes: f0ef6523475f ("efi: Fix efi_memmap_alloc() leaks") Tested-by: Ashish Kalra Link: https://lore.kernel.org/all/36ad5079-4326-45ed-85f6-928ff76483d3@amd.com Signed-off-by: Ard Biesheuvel --- arch/x86/include/asm/efi.h | 1 - arch/x86/platform/efi/memmap.c | 12 +++++++++++- drivers/firmware/efi/memmap.c | 9 --------- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 1dc600fa3ba5..481096177500 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -401,7 +401,6 @@ extern int __init efi_memmap_alloc(unsigned int num_entries, struct efi_memory_map_data *data); extern void __efi_memmap_free(u64 phys, unsigned long size, unsigned long flags); -#define __efi_memmap_free __efi_memmap_free extern int __init efi_memmap_install(struct efi_memory_map_data *data); extern int __init efi_memmap_split_count(efi_memory_desc_t *md, diff --git a/arch/x86/platform/efi/memmap.c b/arch/x86/platform/efi/memmap.c index 4ef20b49eb5e..6ed1935504b9 100644 --- a/arch/x86/platform/efi/memmap.c +++ b/arch/x86/platform/efi/memmap.c @@ -92,12 +92,22 @@ int __init efi_memmap_alloc(unsigned int num_entries, */ int __init efi_memmap_install(struct efi_memory_map_data *data) { + unsigned long size = efi.memmap.desc_size * efi.memmap.nr_map; + unsigned long flags = efi.memmap.flags; + u64 phys = efi.memmap.phys_map; + int ret; + efi_memmap_unmap(); if (efi_enabled(EFI_PARAVIRT)) return 0; - return __efi_memmap_init(data); + ret = __efi_memmap_init(data); + if (ret) + return ret; + + __efi_memmap_free(phys, size, flags); + return 0; } /** diff --git a/drivers/firmware/efi/memmap.c b/drivers/firmware/efi/memmap.c index 3365944f7965..34109fd86c55 100644 --- a/drivers/firmware/efi/memmap.c +++ b/drivers/firmware/efi/memmap.c @@ -15,10 +15,6 @@ #include #include -#ifndef __efi_memmap_free -#define __efi_memmap_free(phys, size, flags) do { } while (0) -#endif - /** * __efi_memmap_init - Common code for mapping the EFI memory map * @data: EFI memory map data @@ -51,11 +47,6 @@ int __init __efi_memmap_init(struct efi_memory_map_data *data) return -ENOMEM; } - if (efi.memmap.flags & (EFI_MEMMAP_MEMBLOCK | EFI_MEMMAP_SLAB)) - __efi_memmap_free(efi.memmap.phys_map, - efi.memmap.desc_size * efi.memmap.nr_map, - efi.memmap.flags); - map.phys_map = data->phys_map; map.nr_map = data->size / data->desc_size; map.map_end = map.map + data->size; From 46e27b9961d8712bc89234444ede314cec0e8bae Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Thu, 13 Jun 2024 12:20:31 -0400 Subject: [PATCH 353/778] efi/arm64: Fix kmemleak false positive in arm64_efi_rt_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The kmemleak code sometimes complains about the following leak: unreferenced object 0xffff8000102e0000 (size 32768):   comm "swapper/0", pid 1, jiffies 4294937323 (age 71.240s)   hex dump (first 32 bytes):     00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................     00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................   backtrace:     [<00000000db9a88a3>] __vmalloc_node_range+0x324/0x450     [<00000000ff8903a4>] __vmalloc_node+0x90/0xd0     [<000000001a06634f>] arm64_efi_rt_init+0x64/0xdc     [<0000000007826a8d>] do_one_initcall+0x178/0xac0     [<0000000054a87017>] do_initcalls+0x190/0x1d0     [<00000000308092d0>] kernel_init_freeable+0x2c0/0x2f0     [<000000003e7b99e0>] kernel_init+0x28/0x14c     [<000000002246af5b>] ret_from_fork+0x10/0x20 The memory object in this case is for efi_rt_stack_top and is allocated in an initcall. So this is certainly a false positive. Mark the object as not a leak to quash it. Signed-off-by: Waiman Long Signed-off-by: Ard Biesheuvel --- arch/arm64/kernel/efi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 4a92096db34e..712718aed5dd 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -9,6 +9,7 @@ #include #include +#include #include #include @@ -213,6 +214,7 @@ l: if (!p) { return -ENOMEM; } + kmemleak_not_leak(p); efi_rt_stack_top = p + THREAD_SIZE; return 0; } From e1b4622efbe7ad09c9a902365a993f68c270c453 Mon Sep 17 00:00:00 2001 From: Tim Harvey Date: Wed, 22 May 2024 14:38:28 -0700 Subject: [PATCH 354/778] arm64: dts: freescale: imx8mp-venice-gw73xx-2x: fix BT shutdown GPIO Fix the invalid BT shutdown GPIO (gpio1_io3 not gpio4_io16) Fixes: 716ced308234 ("arm64: dts: freescale: Add imx8mp-venice-gw73xx-2x") Signed-off-by: Tim Harvey Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi index dec57fad6828..e2b5e7ac3e46 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi @@ -219,7 +219,7 @@ bluetooth { compatible = "brcm,bcm4330-bt"; - shutdown-gpios = <&gpio4 16 GPIO_ACTIVE_HIGH>; + shutdown-gpios = <&gpio1 3 GPIO_ACTIVE_HIGH>; }; }; From 68e5afd8f440a2cc18deb53ae151aa74e2a8eced Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 28 May 2024 14:07:31 +0200 Subject: [PATCH 355/778] watchdog: lenovo_se10_wdt: add HAS_IOPORT dependency Once the inb()/outb() helpers become conditional, the newly added driver fails to link on targets without CONFIG_HAS_IOPORT: In file included from arch/arm64/include/asm/io.h:299, from include/linux/io.h:14, from drivers/watchdog/lenovo_se10_wdt.c:8: drivers/watchdog/lenovo_se10_wdt.c: In function 'set_bram': include/asm-generic/io.h:596:15: error: call to '_outb' declared with attribute error: outb() requires CONFIG_HAS_IOPORT 596 | #define _outb _outb include/asm-generic/io.h:655:14: note: in expansion of macro '_outb' 655 | #define outb _outb | ^~~~~ drivers/watchdog/lenovo_se10_wdt.c:67:9: note: in expansion of macro 'outb' 67 | outb(offset, bram_base); | ^~~~ Add the same dependency we added to the other such drivers. Fixes: 1f6602c8ed1e ("watchdog: lenovo_se10_wdt: Watchdog driver for Lenovo SE10 platform") Signed-off-by: Arnd Bergmann Reviewed-by Mark Pearson Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20240528120759.3491774-1-arnd@kernel.org Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index 85eea38dbdf4..2882944d23cc 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -257,6 +257,7 @@ config GPIO_WATCHDOG_ARCH_INITCALL config LENOVO_SE10_WDT tristate "Lenovo SE10 Watchdog" depends on (X86 && DMI) || COMPILE_TEST + depends on HAS_IOPORT select WATCHDOG_CORE help If you say yes here you get support for the watchdog From acf9e67a7625367b89440855572b29c5ec19dd20 Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Fri, 7 Jun 2024 11:04:40 -0700 Subject: [PATCH 356/778] watchdog: add missing MODULE_DESCRIPTION() macros make allmodconfig && make W=1 C=1 reports: WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/watchdog/omap_wdt.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/watchdog/twl4030_wdt.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/watchdog/ts4800_wdt.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/watchdog/simatic-ipc-wdt.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/watchdog/menz69_wdt.o Add the missing invocations of the MODULE_DESCRIPTION() macro. Signed-off-by: Jeff Johnson Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20240607-md-drivers-watchdog-v1-1-485c1c58301f@quicinc.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/menz69_wdt.c | 1 + drivers/watchdog/omap_wdt.c | 1 + drivers/watchdog/simatic-ipc-wdt.c | 1 + drivers/watchdog/ts4800_wdt.c | 1 + drivers/watchdog/twl4030_wdt.c | 1 + 5 files changed, 5 insertions(+) diff --git a/drivers/watchdog/menz69_wdt.c b/drivers/watchdog/menz69_wdt.c index c7de30270043..0508a65acfa6 100644 --- a/drivers/watchdog/menz69_wdt.c +++ b/drivers/watchdog/menz69_wdt.c @@ -161,6 +161,7 @@ static struct mcb_driver men_z069_driver = { module_mcb_driver(men_z069_driver); MODULE_AUTHOR("Johannes Thumshirn "); +MODULE_DESCRIPTION("Watchdog driver for the MEN z069 IP-Core"); MODULE_LICENSE("GPL v2"); MODULE_ALIAS("mcb:16z069"); MODULE_IMPORT_NS(MCB); diff --git a/drivers/watchdog/omap_wdt.c b/drivers/watchdog/omap_wdt.c index a7a12f2fe9de..b6e0236509bb 100644 --- a/drivers/watchdog/omap_wdt.c +++ b/drivers/watchdog/omap_wdt.c @@ -370,5 +370,6 @@ static struct platform_driver omap_wdt_driver = { module_platform_driver(omap_wdt_driver); MODULE_AUTHOR("George G. Davis"); +MODULE_DESCRIPTION("Driver for the TI OMAP 16xx/24xx/34xx 32KHz (non-secure) watchdog"); MODULE_LICENSE("GPL"); MODULE_ALIAS("platform:omap_wdt"); diff --git a/drivers/watchdog/simatic-ipc-wdt.c b/drivers/watchdog/simatic-ipc-wdt.c index cdc1a2e15180..1e91f0a560ff 100644 --- a/drivers/watchdog/simatic-ipc-wdt.c +++ b/drivers/watchdog/simatic-ipc-wdt.c @@ -227,6 +227,7 @@ static struct platform_driver simatic_ipc_wdt_driver = { module_platform_driver(simatic_ipc_wdt_driver); +MODULE_DESCRIPTION("Siemens SIMATIC IPC driver for Watchdogs"); MODULE_LICENSE("GPL v2"); MODULE_ALIAS("platform:" KBUILD_MODNAME); MODULE_AUTHOR("Gerd Haeussler "); diff --git a/drivers/watchdog/ts4800_wdt.c b/drivers/watchdog/ts4800_wdt.c index 0099403f4992..24b1ad52102e 100644 --- a/drivers/watchdog/ts4800_wdt.c +++ b/drivers/watchdog/ts4800_wdt.c @@ -200,5 +200,6 @@ static struct platform_driver ts4800_wdt_driver = { module_platform_driver(ts4800_wdt_driver); MODULE_AUTHOR("Damien Riegel "); +MODULE_DESCRIPTION("Watchdog driver for TS-4800 based boards"); MODULE_LICENSE("GPL v2"); MODULE_ALIAS("platform:ts4800_wdt"); diff --git a/drivers/watchdog/twl4030_wdt.c b/drivers/watchdog/twl4030_wdt.c index 09d17e20f4a7..8c80d04811e4 100644 --- a/drivers/watchdog/twl4030_wdt.c +++ b/drivers/watchdog/twl4030_wdt.c @@ -118,6 +118,7 @@ static struct platform_driver twl4030_wdt_driver = { module_platform_driver(twl4030_wdt_driver); MODULE_AUTHOR("Nokia Corporation"); +MODULE_DESCRIPTION("TWL4030 Watchdog"); MODULE_LICENSE("GPL"); MODULE_ALIAS("platform:twl4030_wdt"); From 58f7e1e2c9e72c7974054c64c3abeac81c11f822 Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Thu, 30 May 2024 19:06:29 +0800 Subject: [PATCH 357/778] ocfs2: fix NULL pointer dereference in ocfs2_journal_dirty() bdev->bd_super has been removed and commit 8887b94d9322 change the usage from bdev->bd_super to b_assoc_map->host->i_sb. This introduces the following NULL pointer dereference in ocfs2_journal_dirty() since b_assoc_map is still not initialized. This can be easily reproduced by running xfstests generic/186, which simulate no more credits. [ 134.351592] BUG: kernel NULL pointer dereference, address: 0000000000000000 ... [ 134.355341] RIP: 0010:ocfs2_journal_dirty+0x14f/0x160 [ocfs2] ... [ 134.365071] Call Trace: [ 134.365312] [ 134.365524] ? __die_body+0x1e/0x60 [ 134.365868] ? page_fault_oops+0x13d/0x4f0 [ 134.366265] ? __pfx_bit_wait_io+0x10/0x10 [ 134.366659] ? schedule+0x27/0xb0 [ 134.366981] ? exc_page_fault+0x6a/0x140 [ 134.367356] ? asm_exc_page_fault+0x26/0x30 [ 134.367762] ? ocfs2_journal_dirty+0x14f/0x160 [ocfs2] [ 134.368305] ? ocfs2_journal_dirty+0x13d/0x160 [ocfs2] [ 134.368837] ocfs2_create_new_meta_bhs.isra.51+0x139/0x2e0 [ocfs2] [ 134.369454] ocfs2_grow_tree+0x688/0x8a0 [ocfs2] [ 134.369927] ocfs2_split_and_insert.isra.67+0x35c/0x4a0 [ocfs2] [ 134.370521] ocfs2_split_extent+0x314/0x4d0 [ocfs2] [ 134.371019] ocfs2_change_extent_flag+0x174/0x410 [ocfs2] [ 134.371566] ocfs2_add_refcount_flag+0x3fa/0x630 [ocfs2] [ 134.372117] ocfs2_reflink_remap_extent+0x21b/0x4c0 [ocfs2] [ 134.372994] ? inode_update_timestamps+0x4a/0x120 [ 134.373692] ? __pfx_ocfs2_journal_access_di+0x10/0x10 [ocfs2] [ 134.374545] ? __pfx_ocfs2_journal_access_di+0x10/0x10 [ocfs2] [ 134.375393] ocfs2_reflink_remap_blocks+0xe4/0x4e0 [ocfs2] [ 134.376197] ocfs2_remap_file_range+0x1de/0x390 [ocfs2] [ 134.376971] ? security_file_permission+0x29/0x50 [ 134.377644] vfs_clone_file_range+0xfe/0x320 [ 134.378268] ioctl_file_clone+0x45/0xa0 [ 134.378853] do_vfs_ioctl+0x457/0x990 [ 134.379422] __x64_sys_ioctl+0x6e/0xd0 [ 134.379987] do_syscall_64+0x5d/0x170 [ 134.380550] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ 134.381231] RIP: 0033:0x7fa4926397cb [ 134.381786] Code: 73 01 c3 48 8b 0d bd 56 38 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa b8 10 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 8d 56 38 00 f7 d8 64 89 01 48 [ 134.383930] RSP: 002b:00007ffc2b39f7b8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [ 134.384854] RAX: ffffffffffffffda RBX: 0000000000000004 RCX: 00007fa4926397cb [ 134.385734] RDX: 00007ffc2b39f7f0 RSI: 000000004020940d RDI: 0000000000000003 [ 134.386606] RBP: 0000000000000000 R08: 00111a82a4f015bb R09: 00007fa494221000 [ 134.387476] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 [ 134.388342] R13: 0000000000f10000 R14: 0000558e844e2ac8 R15: 0000000000f10000 [ 134.389207] Fix it by only aborting transaction and journal in ocfs2_journal_dirty() now, and leave ocfs2_abort() later when detecting an aborted handle, e.g. start next transaction. Also log the handle details in this case. Link: https://lkml.kernel.org/r/20240530110630.3933832-1-joseph.qi@linux.alibaba.com Fixes: 8887b94d9322 ("ocfs2: stop using bdev->bd_super for journal error logging") Signed-off-by: Joseph Qi Reviewed-by: Heming Zhao Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Cc: [6.6+] Signed-off-by: Andrew Morton --- fs/ocfs2/journal.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 604fea3a26ff..27c7683c7d3f 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -778,13 +778,15 @@ void ocfs2_journal_dirty(handle_t *handle, struct buffer_head *bh) if (!is_handle_aborted(handle)) { journal_t *journal = handle->h_transaction->t_journal; - mlog(ML_ERROR, "jbd2_journal_dirty_metadata failed. " - "Aborting transaction and journal.\n"); + mlog(ML_ERROR, "jbd2_journal_dirty_metadata failed: " + "handle type %u started at line %u, credits %u/%u " + "errcode %d. Aborting transaction and journal.\n", + handle->h_type, handle->h_line_no, + handle->h_requested_credits, + jbd2_handle_buffer_credits(handle), status); handle->h_err = status; jbd2_journal_abort_handle(handle); jbd2_journal_abort(journal, status); - ocfs2_abort(bh->b_assoc_map->host->i_sb, - "Journal already aborted.\n"); } } } From 685d03c3795378fca6a1b3d43581f7f1a3fc095f Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Thu, 30 May 2024 19:06:30 +0800 Subject: [PATCH 358/778] ocfs2: fix NULL pointer dereference in ocfs2_abort_trigger() bdev->bd_super has been removed and commit 8887b94d9322 change the usage from bdev->bd_super to b_assoc_map->host->i_sb. Since ocfs2 hasn't set bh->b_assoc_map, it will trigger NULL pointer dereference when calling into ocfs2_abort_trigger(). Actually this was pointed out in history, see commit 74e364ad1b13. But I've made a mistake when reviewing commit 8887b94d9322 and then re-introduce this regression. Since we cannot revive bdev in buffer head, so fix this issue by initializing all types of ocfs2 triggers when fill super, and then get the specific ocfs2 trigger from ocfs2_caching_info when access journal. [joseph.qi@linux.alibaba.com: v2] Link: https://lkml.kernel.org/r/20240602112045.1112708-1-joseph.qi@linux.alibaba.com Link: https://lkml.kernel.org/r/20240530110630.3933832-2-joseph.qi@linux.alibaba.com Fixes: 8887b94d9322 ("ocfs2: stop using bdev->bd_super for journal error logging") Signed-off-by: Joseph Qi Reviewed-by: Heming Zhao Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Cc: [6.6+] Signed-off-by: Andrew Morton --- fs/ocfs2/journal.c | 182 +++++++++++++++++++++++++-------------------- fs/ocfs2/ocfs2.h | 27 +++++++ fs/ocfs2/super.c | 4 +- 3 files changed, 131 insertions(+), 82 deletions(-) diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 27c7683c7d3f..86807086b2df 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -479,12 +479,6 @@ bail: return status; } - -struct ocfs2_triggers { - struct jbd2_buffer_trigger_type ot_triggers; - int ot_offset; -}; - static inline struct ocfs2_triggers *to_ocfs2_trigger(struct jbd2_buffer_trigger_type *triggers) { return container_of(triggers, struct ocfs2_triggers, ot_triggers); @@ -548,85 +542,76 @@ static void ocfs2_db_frozen_trigger(struct jbd2_buffer_trigger_type *triggers, static void ocfs2_abort_trigger(struct jbd2_buffer_trigger_type *triggers, struct buffer_head *bh) { + struct ocfs2_triggers *ot = to_ocfs2_trigger(triggers); + mlog(ML_ERROR, "ocfs2_abort_trigger called by JBD2. bh = 0x%lx, " "bh->b_blocknr = %llu\n", (unsigned long)bh, (unsigned long long)bh->b_blocknr); - ocfs2_error(bh->b_assoc_map->host->i_sb, + ocfs2_error(ot->sb, "JBD2 has aborted our journal, ocfs2 cannot continue\n"); } -static struct ocfs2_triggers di_triggers = { - .ot_triggers = { - .t_frozen = ocfs2_frozen_trigger, - .t_abort = ocfs2_abort_trigger, - }, - .ot_offset = offsetof(struct ocfs2_dinode, i_check), -}; +static void ocfs2_setup_csum_triggers(struct super_block *sb, + enum ocfs2_journal_trigger_type type, + struct ocfs2_triggers *ot) +{ + BUG_ON(type >= OCFS2_JOURNAL_TRIGGER_COUNT); -static struct ocfs2_triggers eb_triggers = { - .ot_triggers = { - .t_frozen = ocfs2_frozen_trigger, - .t_abort = ocfs2_abort_trigger, - }, - .ot_offset = offsetof(struct ocfs2_extent_block, h_check), -}; + switch (type) { + case OCFS2_JTR_DI: + ot->ot_triggers.t_frozen = ocfs2_frozen_trigger; + ot->ot_offset = offsetof(struct ocfs2_dinode, i_check); + break; + case OCFS2_JTR_EB: + ot->ot_triggers.t_frozen = ocfs2_frozen_trigger; + ot->ot_offset = offsetof(struct ocfs2_extent_block, h_check); + break; + case OCFS2_JTR_RB: + ot->ot_triggers.t_frozen = ocfs2_frozen_trigger; + ot->ot_offset = offsetof(struct ocfs2_refcount_block, rf_check); + break; + case OCFS2_JTR_GD: + ot->ot_triggers.t_frozen = ocfs2_frozen_trigger; + ot->ot_offset = offsetof(struct ocfs2_group_desc, bg_check); + break; + case OCFS2_JTR_DB: + ot->ot_triggers.t_frozen = ocfs2_db_frozen_trigger; + break; + case OCFS2_JTR_XB: + ot->ot_triggers.t_frozen = ocfs2_frozen_trigger; + ot->ot_offset = offsetof(struct ocfs2_xattr_block, xb_check); + break; + case OCFS2_JTR_DQ: + ot->ot_triggers.t_frozen = ocfs2_dq_frozen_trigger; + break; + case OCFS2_JTR_DR: + ot->ot_triggers.t_frozen = ocfs2_frozen_trigger; + ot->ot_offset = offsetof(struct ocfs2_dx_root_block, dr_check); + break; + case OCFS2_JTR_DL: + ot->ot_triggers.t_frozen = ocfs2_frozen_trigger; + ot->ot_offset = offsetof(struct ocfs2_dx_leaf, dl_check); + break; + case OCFS2_JTR_NONE: + /* To make compiler happy... */ + return; + } -static struct ocfs2_triggers rb_triggers = { - .ot_triggers = { - .t_frozen = ocfs2_frozen_trigger, - .t_abort = ocfs2_abort_trigger, - }, - .ot_offset = offsetof(struct ocfs2_refcount_block, rf_check), -}; + ot->ot_triggers.t_abort = ocfs2_abort_trigger; + ot->sb = sb; +} -static struct ocfs2_triggers gd_triggers = { - .ot_triggers = { - .t_frozen = ocfs2_frozen_trigger, - .t_abort = ocfs2_abort_trigger, - }, - .ot_offset = offsetof(struct ocfs2_group_desc, bg_check), -}; +void ocfs2_initialize_journal_triggers(struct super_block *sb, + struct ocfs2_triggers triggers[]) +{ + enum ocfs2_journal_trigger_type type; -static struct ocfs2_triggers db_triggers = { - .ot_triggers = { - .t_frozen = ocfs2_db_frozen_trigger, - .t_abort = ocfs2_abort_trigger, - }, -}; - -static struct ocfs2_triggers xb_triggers = { - .ot_triggers = { - .t_frozen = ocfs2_frozen_trigger, - .t_abort = ocfs2_abort_trigger, - }, - .ot_offset = offsetof(struct ocfs2_xattr_block, xb_check), -}; - -static struct ocfs2_triggers dq_triggers = { - .ot_triggers = { - .t_frozen = ocfs2_dq_frozen_trigger, - .t_abort = ocfs2_abort_trigger, - }, -}; - -static struct ocfs2_triggers dr_triggers = { - .ot_triggers = { - .t_frozen = ocfs2_frozen_trigger, - .t_abort = ocfs2_abort_trigger, - }, - .ot_offset = offsetof(struct ocfs2_dx_root_block, dr_check), -}; - -static struct ocfs2_triggers dl_triggers = { - .ot_triggers = { - .t_frozen = ocfs2_frozen_trigger, - .t_abort = ocfs2_abort_trigger, - }, - .ot_offset = offsetof(struct ocfs2_dx_leaf, dl_check), -}; + for (type = OCFS2_JTR_DI; type < OCFS2_JOURNAL_TRIGGER_COUNT; type++) + ocfs2_setup_csum_triggers(sb, type, &triggers[type]); +} static int __ocfs2_journal_access(handle_t *handle, struct ocfs2_caching_info *ci, @@ -708,56 +693,91 @@ static int __ocfs2_journal_access(handle_t *handle, int ocfs2_journal_access_di(handle_t *handle, struct ocfs2_caching_info *ci, struct buffer_head *bh, int type) { - return __ocfs2_journal_access(handle, ci, bh, &di_triggers, type); + struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(ci)); + + return __ocfs2_journal_access(handle, ci, bh, + &osb->s_journal_triggers[OCFS2_JTR_DI], + type); } int ocfs2_journal_access_eb(handle_t *handle, struct ocfs2_caching_info *ci, struct buffer_head *bh, int type) { - return __ocfs2_journal_access(handle, ci, bh, &eb_triggers, type); + struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(ci)); + + return __ocfs2_journal_access(handle, ci, bh, + &osb->s_journal_triggers[OCFS2_JTR_EB], + type); } int ocfs2_journal_access_rb(handle_t *handle, struct ocfs2_caching_info *ci, struct buffer_head *bh, int type) { - return __ocfs2_journal_access(handle, ci, bh, &rb_triggers, + struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(ci)); + + return __ocfs2_journal_access(handle, ci, bh, + &osb->s_journal_triggers[OCFS2_JTR_RB], type); } int ocfs2_journal_access_gd(handle_t *handle, struct ocfs2_caching_info *ci, struct buffer_head *bh, int type) { - return __ocfs2_journal_access(handle, ci, bh, &gd_triggers, type); + struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(ci)); + + return __ocfs2_journal_access(handle, ci, bh, + &osb->s_journal_triggers[OCFS2_JTR_GD], + type); } int ocfs2_journal_access_db(handle_t *handle, struct ocfs2_caching_info *ci, struct buffer_head *bh, int type) { - return __ocfs2_journal_access(handle, ci, bh, &db_triggers, type); + struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(ci)); + + return __ocfs2_journal_access(handle, ci, bh, + &osb->s_journal_triggers[OCFS2_JTR_DB], + type); } int ocfs2_journal_access_xb(handle_t *handle, struct ocfs2_caching_info *ci, struct buffer_head *bh, int type) { - return __ocfs2_journal_access(handle, ci, bh, &xb_triggers, type); + struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(ci)); + + return __ocfs2_journal_access(handle, ci, bh, + &osb->s_journal_triggers[OCFS2_JTR_XB], + type); } int ocfs2_journal_access_dq(handle_t *handle, struct ocfs2_caching_info *ci, struct buffer_head *bh, int type) { - return __ocfs2_journal_access(handle, ci, bh, &dq_triggers, type); + struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(ci)); + + return __ocfs2_journal_access(handle, ci, bh, + &osb->s_journal_triggers[OCFS2_JTR_DQ], + type); } int ocfs2_journal_access_dr(handle_t *handle, struct ocfs2_caching_info *ci, struct buffer_head *bh, int type) { - return __ocfs2_journal_access(handle, ci, bh, &dr_triggers, type); + struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(ci)); + + return __ocfs2_journal_access(handle, ci, bh, + &osb->s_journal_triggers[OCFS2_JTR_DR], + type); } int ocfs2_journal_access_dl(handle_t *handle, struct ocfs2_caching_info *ci, struct buffer_head *bh, int type) { - return __ocfs2_journal_access(handle, ci, bh, &dl_triggers, type); + struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(ci)); + + return __ocfs2_journal_access(handle, ci, bh, + &osb->s_journal_triggers[OCFS2_JTR_DL], + type); } int ocfs2_journal_access(handle_t *handle, struct ocfs2_caching_info *ci, diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index a503c553bab2..8fe826143d7b 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -284,6 +284,30 @@ enum ocfs2_mount_options #define OCFS2_OSB_ERROR_FS 0x0004 #define OCFS2_DEFAULT_ATIME_QUANTUM 60 +struct ocfs2_triggers { + struct jbd2_buffer_trigger_type ot_triggers; + int ot_offset; + struct super_block *sb; +}; + +enum ocfs2_journal_trigger_type { + OCFS2_JTR_DI, + OCFS2_JTR_EB, + OCFS2_JTR_RB, + OCFS2_JTR_GD, + OCFS2_JTR_DB, + OCFS2_JTR_XB, + OCFS2_JTR_DQ, + OCFS2_JTR_DR, + OCFS2_JTR_DL, + OCFS2_JTR_NONE /* This must be the last entry */ +}; + +#define OCFS2_JOURNAL_TRIGGER_COUNT OCFS2_JTR_NONE + +void ocfs2_initialize_journal_triggers(struct super_block *sb, + struct ocfs2_triggers triggers[]); + struct ocfs2_journal; struct ocfs2_slot_info; struct ocfs2_recovery_map; @@ -351,6 +375,9 @@ struct ocfs2_super struct ocfs2_journal *journal; unsigned long osb_commit_interval; + /* Journal triggers for checksum */ + struct ocfs2_triggers s_journal_triggers[OCFS2_JOURNAL_TRIGGER_COUNT]; + struct delayed_work la_enable_wq; /* diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 8aabaed2c1cb..afee70125ae3 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1075,9 +1075,11 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) debugfs_create_file("fs_state", S_IFREG|S_IRUSR, osb->osb_debug_root, osb, &ocfs2_osb_debug_fops); - if (ocfs2_meta_ecc(osb)) + if (ocfs2_meta_ecc(osb)) { + ocfs2_initialize_journal_triggers(sb, osb->s_journal_triggers); ocfs2_blockcheck_stats_debugfs_install( &osb->osb_ecc_stats, osb->osb_debug_root); + } status = ocfs2_mount_volume(sb); if (status < 0) From 8e5bd4eadd01ea0c47f3a1c798815849f813a700 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Wed, 22 May 2024 15:58:30 -0700 Subject: [PATCH 359/778] gcc: disable '-Warray-bounds' for gcc-9 '-Warray-bounds' is already disabled for gcc-10+. Now that we've merged bitmap_{read,write), I see the following error when building the kernel with gcc-9.4 (Ubuntu 20.04.4 LTS) for x86_64 allmodconfig: drivers/pinctrl/pinctrl-cy8c95x0.c: In function `cy8c95x0_read_regs_mask.isra.0': include/linux/bitmap.h:756:18: error: array subscript [1, 288230376151711744] is outside array bounds of `long unsigned int[1]' [-Werror=array-bounds] 756 | value_high = map[index + 1] & BITMAP_LAST_WORD_MASK(start + nbits); | ~~~^~~~~~~~~~~ The immediate reason is that the commit b44759705f7d ("bitmap: make bitmap_{get,set}_value8() use bitmap_{read,write}()") switched the bitmap_get_value8() to an alias of bitmap_read(); the same for 'set'. Now; the code that triggers Warray-bounds, calls the function like this: #define MAX_BANK 8 #define BANK_SZ 8 #define MAX_LINE (MAX_BANK * BANK_SZ) DECLARE_BITMAP(tval, MAX_LINE); // 64-bit map: unsigned long tval[1] read_val |= bitmap_get_value8(tval, i * BANK_SZ) & ~bits; bitmap_read() is implemented such that it may conditionally dereference a pointer beyond the boundary like this: unsigned long offset = start % BITS_PER_LONG; unsigned long space = BITS_PER_LONG - offset; if (space >= nbits) return (map[index] >> offset) & BITMAP_LAST_WORD_MASK(nbits); value_low = map[index] & BITMAP_FIRST_WORD_MASK(start); value_high = map[index + 1] & BITMAP_LAST_WORD_MASK(start + nbits); return (value_low >> offset) | (value_high << space); In case of bitmap_get_value8(), it's impossible to violate the boundary because 'space >= nbits' is never the true for byte-aligned 8-bit access. So, this is clearly a false-positive. The same type of false-positives break my allmodconfig build in many places. gcc-8, is clear, however. Link: https://lkml.kernel.org/r/20240522225830.1201778-1-yury.norov@gmail.com Fixes: b44759705f7d ("bitmap: make bitmap_{get,set}_value8() use bitmap_{read,write}()") Signed-off-by: Yury Norov Cc: Alexander Lobakin Cc: David S. Miller Cc: Gustavo A. R. Silva Cc: Masahiro Yamada Cc: Nhat Pham Cc: Petr Mladek Cc: Randy Dunlap Cc: Vincent Guittot Cc: Yoann Congal Cc: Arnd Bergmann Signed-off-by: Andrew Morton --- init/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/init/Kconfig b/init/Kconfig index 72404c1f2157..febdea2afc3b 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -883,7 +883,7 @@ config GCC10_NO_ARRAY_BOUNDS config CC_NO_ARRAY_BOUNDS bool - default y if CC_IS_GCC && GCC_VERSION >= 100000 && GCC10_NO_ARRAY_BOUNDS + default y if CC_IS_GCC && GCC_VERSION >= 90000 && GCC10_NO_ARRAY_BOUNDS # Currently, disable -Wstringop-overflow for GCC globally. config GCC_NO_STRINGOP_OVERFLOW From 8bb592c2eca8fd2bc06db7d80b38da18da4a2f43 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 5 Jun 2024 17:21:46 -0400 Subject: [PATCH 360/778] mm/page_table_check: fix crash on ZONE_DEVICE Not all pages may apply to pgtable check. One example is ZONE_DEVICE pages: they map PFNs directly, and they don't allocate page_ext at all even if there's struct page around. One may reference devm_memremap_pages(). When both ZONE_DEVICE and page-table-check enabled, then try to map some dax memories, one can trigger kernel bug constantly now when the kernel was trying to inject some pfn maps on the dax device: kernel BUG at mm/page_table_check.c:55! While it's pretty legal to use set_pxx_at() for ZONE_DEVICE pages for page fault resolutions, skip all the checks if page_ext doesn't even exist in pgtable checker, which applies to ZONE_DEVICE but maybe more. Link: https://lkml.kernel.org/r/20240605212146.994486-1-peterx@redhat.com Fixes: df4e817b7108 ("mm: page table check") Signed-off-by: Peter Xu Reviewed-by: Pasha Tatashin Reviewed-by: Dan Williams Reviewed-by: Alistair Popple Cc: Signed-off-by: Andrew Morton --- mm/page_table_check.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/mm/page_table_check.c b/mm/page_table_check.c index 4169576bed72..509c6ef8de40 100644 --- a/mm/page_table_check.c +++ b/mm/page_table_check.c @@ -73,6 +73,9 @@ static void page_table_check_clear(unsigned long pfn, unsigned long pgcnt) page = pfn_to_page(pfn); page_ext = page_ext_get(page); + if (!page_ext) + return; + BUG_ON(PageSlab(page)); anon = PageAnon(page); @@ -110,6 +113,9 @@ static void page_table_check_set(unsigned long pfn, unsigned long pgcnt, page = pfn_to_page(pfn); page_ext = page_ext_get(page); + if (!page_ext) + return; + BUG_ON(PageSlab(page)); anon = PageAnon(page); @@ -140,7 +146,10 @@ void __page_table_check_zero(struct page *page, unsigned int order) BUG_ON(PageSlab(page)); page_ext = page_ext_get(page); - BUG_ON(!page_ext); + + if (!page_ext) + return; + for (i = 0; i < (1ul << order); i++) { struct page_table_check *ptc = get_page_table_check(page_ext); From 384a746bb55960aa5ffb3a67de08f11fc2f51042 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 5 Jun 2024 11:17:10 +0200 Subject: [PATCH 361/778] Revert "mm: init_mlocked_on_free_v3" There was insufficient review and no agreement that this is the right approach. There are serious flaws with the implementation that make processes using mlock() not even work with simple fork() [1] and we get reliable crashes when rebooting. Further, simply because we might be unmapping a single PTE of a large mlocked folio, we shouldn't zero out the whole folio. ... especially because the code can also *corrupt* urelated memory because kernel_init_pages(page, folio_nr_pages(folio)); Could end up writing outside of the actual folio if we work with a tail page. Let's revert it. Once there is agreement that this is the right approach, the issues were fixed and there was reasonable review and proper testing, we can consider it again. [1] https://lkml.kernel.org/r/4da9da2f-73e4-45fd-b62f-a8a513314057@redhat.com Link: https://lkml.kernel.org/r/20240605091710.38961-1-david@redhat.com Fixes: ba42b524a040 ("mm: init_mlocked_on_free_v3") Signed-off-by: David Hildenbrand Reported-by: David Wang <00107082@163.com> Closes: https://lore.kernel.org/lkml/20240528151340.4282-1-00107082@163.com/ Reported-by: Lance Yang Closes: https://lkml.kernel.org/r/20240601140917.43562-1-ioworker0@gmail.com Acked-by: Lance Yang Cc: York Jasper Niebuhr Cc: Matthew Wilcox (Oracle) Cc: Kees Cook Signed-off-by: Andrew Morton --- .../admin-guide/kernel-parameters.txt | 6 --- include/linux/mm.h | 9 +--- mm/internal.h | 1 - mm/memory.c | 6 --- mm/mm_init.c | 43 +++---------------- mm/page_alloc.c | 2 +- security/Kconfig.hardening | 15 ------- 7 files changed, 9 insertions(+), 73 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index b600df82669d..11e57ba2985c 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2192,12 +2192,6 @@ Format: 0 | 1 Default set by CONFIG_INIT_ON_FREE_DEFAULT_ON. - init_mlocked_on_free= [MM] Fill freed userspace memory with zeroes if - it was mlock'ed and not explicitly munlock'ed - afterwards. - Format: 0 | 1 - Default set by CONFIG_INIT_MLOCKED_ON_FREE_DEFAULT_ON - init_pkru= [X86] Specify the default memory protection keys rights register contents for all processes. 0x55555554 by default (disallow access to all but pkey 0). Can diff --git a/include/linux/mm.h b/include/linux/mm.h index 9849dfda44d4..9a5652c5fadd 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3776,14 +3776,7 @@ DECLARE_STATIC_KEY_MAYBE(CONFIG_INIT_ON_FREE_DEFAULT_ON, init_on_free); static inline bool want_init_on_free(void) { return static_branch_maybe(CONFIG_INIT_ON_FREE_DEFAULT_ON, - &init_on_free); -} - -DECLARE_STATIC_KEY_MAYBE(CONFIG_INIT_MLOCKED_ON_FREE_DEFAULT_ON, init_mlocked_on_free); -static inline bool want_init_mlocked_on_free(void) -{ - return static_branch_maybe(CONFIG_INIT_MLOCKED_ON_FREE_DEFAULT_ON, - &init_mlocked_on_free); + &init_on_free); } extern bool _debug_pagealloc_enabled_early; diff --git a/mm/internal.h b/mm/internal.h index b2c75b12014e..c72c306761a4 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -588,7 +588,6 @@ extern void __putback_isolated_page(struct page *page, unsigned int order, extern void memblock_free_pages(struct page *page, unsigned long pfn, unsigned int order); extern void __free_pages_core(struct page *page, unsigned int order); -extern void kernel_init_pages(struct page *page, int numpages); /* * This will have no effect, other than possibly generating a warning, if the diff --git a/mm/memory.c b/mm/memory.c index 0f47a533014e..2bc8032a30a2 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1507,12 +1507,6 @@ static __always_inline void zap_present_folio_ptes(struct mmu_gather *tlb, if (unlikely(folio_mapcount(folio) < 0)) print_bad_pte(vma, addr, ptent, page); } - - if (want_init_mlocked_on_free() && folio_test_mlocked(folio) && - !delay_rmap && folio_test_anon(folio)) { - kernel_init_pages(page, folio_nr_pages(folio)); - } - if (unlikely(__tlb_remove_folio_pages(tlb, page, nr, delay_rmap))) { *force_flush = true; *force_break = true; diff --git a/mm/mm_init.c b/mm/mm_init.c index f72b852bd5b8..3ec04933f7fd 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -2523,9 +2523,6 @@ EXPORT_SYMBOL(init_on_alloc); DEFINE_STATIC_KEY_MAYBE(CONFIG_INIT_ON_FREE_DEFAULT_ON, init_on_free); EXPORT_SYMBOL(init_on_free); -DEFINE_STATIC_KEY_MAYBE(CONFIG_INIT_MLOCKED_ON_FREE_DEFAULT_ON, init_mlocked_on_free); -EXPORT_SYMBOL(init_mlocked_on_free); - static bool _init_on_alloc_enabled_early __read_mostly = IS_ENABLED(CONFIG_INIT_ON_ALLOC_DEFAULT_ON); static int __init early_init_on_alloc(char *buf) @@ -2543,14 +2540,6 @@ static int __init early_init_on_free(char *buf) } early_param("init_on_free", early_init_on_free); -static bool _init_mlocked_on_free_enabled_early __read_mostly - = IS_ENABLED(CONFIG_INIT_MLOCKED_ON_FREE_DEFAULT_ON); -static int __init early_init_mlocked_on_free(char *buf) -{ - return kstrtobool(buf, &_init_mlocked_on_free_enabled_early); -} -early_param("init_mlocked_on_free", early_init_mlocked_on_free); - DEFINE_STATIC_KEY_MAYBE(CONFIG_DEBUG_VM, check_pages_enabled); /* @@ -2578,21 +2567,12 @@ static void __init mem_debugging_and_hardening_init(void) } #endif - if ((_init_on_alloc_enabled_early || _init_on_free_enabled_early || - _init_mlocked_on_free_enabled_early) && + if ((_init_on_alloc_enabled_early || _init_on_free_enabled_early) && page_poisoning_requested) { pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, " - "will take precedence over init_on_alloc, init_on_free " - "and init_mlocked_on_free\n"); + "will take precedence over init_on_alloc and init_on_free\n"); _init_on_alloc_enabled_early = false; _init_on_free_enabled_early = false; - _init_mlocked_on_free_enabled_early = false; - } - - if (_init_mlocked_on_free_enabled_early && _init_on_free_enabled_early) { - pr_info("mem auto-init: init_on_free is on, " - "will take precedence over init_mlocked_on_free\n"); - _init_mlocked_on_free_enabled_early = false; } if (_init_on_alloc_enabled_early) { @@ -2609,17 +2589,9 @@ static void __init mem_debugging_and_hardening_init(void) static_branch_disable(&init_on_free); } - if (_init_mlocked_on_free_enabled_early) { - want_check_pages = true; - static_branch_enable(&init_mlocked_on_free); - } else { - static_branch_disable(&init_mlocked_on_free); - } - - if (IS_ENABLED(CONFIG_KMSAN) && (_init_on_alloc_enabled_early || - _init_on_free_enabled_early || _init_mlocked_on_free_enabled_early)) - pr_info("mem auto-init: please make sure init_on_alloc, init_on_free and " - "init_mlocked_on_free are disabled when running KMSAN\n"); + if (IS_ENABLED(CONFIG_KMSAN) && + (_init_on_alloc_enabled_early || _init_on_free_enabled_early)) + pr_info("mem auto-init: please make sure init_on_alloc and init_on_free are disabled when running KMSAN\n"); #ifdef CONFIG_DEBUG_PAGEALLOC if (debug_pagealloc_enabled()) { @@ -2658,10 +2630,9 @@ static void __init report_meminit(void) else stack = "off"; - pr_info("mem auto-init: stack:%s, heap alloc:%s, heap free:%s, mlocked free:%s\n", + pr_info("mem auto-init: stack:%s, heap alloc:%s, heap free:%s\n", stack, want_init_on_alloc(GFP_KERNEL) ? "on" : "off", - want_init_on_free() ? "on" : "off", - want_init_mlocked_on_free() ? "on" : "off"); + want_init_on_free() ? "on" : "off"); if (want_init_on_free()) pr_info("mem auto-init: clearing system memory may take some time...\n"); } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 222299b5c0e6..7300aa9f14b0 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1016,7 +1016,7 @@ static inline bool should_skip_kasan_poison(struct page *page) return page_kasan_tag(page) == KASAN_TAG_KERNEL; } -void kernel_init_pages(struct page *page, int numpages) +static void kernel_init_pages(struct page *page, int numpages) { int i; diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening index effbf5982be1..2cff851ebfd7 100644 --- a/security/Kconfig.hardening +++ b/security/Kconfig.hardening @@ -255,21 +255,6 @@ config INIT_ON_FREE_DEFAULT_ON touching "cold" memory areas. Most cases see 3-5% impact. Some synthetic workloads have measured as high as 8%. -config INIT_MLOCKED_ON_FREE_DEFAULT_ON - bool "Enable mlocked memory zeroing on free" - depends on !KMSAN - help - This config has the effect of setting "init_mlocked_on_free=1" - on the kernel command line. If it is enabled, all mlocked process - memory is zeroed when freed. This restriction to mlocked memory - improves performance over "init_on_free" but can still be used to - protect confidential data like key material from content exposures - to other processes, as well as live forensics and cold boot attacks. - Any non-mlocked memory is not cleared before it is reassigned. This - configuration can be overwritten by setting "init_mlocked_on_free=0" - on the command line. The "init_on_free" boot option takes - precedence over "init_mlocked_on_free". - config CC_HAS_ZERO_CALL_USED_REGS def_bool $(cc-option,-fzero-call-used-regs=used-gpr) # https://github.com/ClangBuiltLinux/linux/issues/1766 From 3ab85f4046c12fb773084c2974cd7bbe8a3e2e68 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Sun, 2 Jun 2024 21:55:10 +0100 Subject: [PATCH 362/778] MAINTAINERS: remove Lorenzo as vmalloc reviewer I haven't had the bandwidth to review vmalloc patches recently and I suspect I won't be able to do so consistently moving forwards, so I think it's best if I remove myself as reviewer for the time being. Link: https://lkml.kernel.org/r/20240602205510.108807-1-lstoakes@gmail.com Signed-off-by: Lorenzo Stoakes Cc: Baoquan He Cc: Christoph Hellwig Cc: Uladzislau Rezki (Sony) Signed-off-by: Andrew Morton --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index aacccb376c28..3620c4e6b469 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -23974,7 +23974,6 @@ VMALLOC M: Andrew Morton R: Uladzislau Rezki R: Christoph Hellwig -R: Lorenzo Stoakes L: linux-mm@kvack.org S: Maintained W: http://www.linux-mm.org From c944bf60c16a65ae812a59fd1b66f6c9e18c91c9 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Sat, 1 Jun 2024 16:38:31 -0700 Subject: [PATCH 363/778] lib/alloc_tag: do not register sysctl interface when CONFIG_SYSCTL=n Memory allocation profiling is trying to register sysctl interface even when CONFIG_SYSCTL=n, resulting in proc_do_static_key() being undefined. Prevent that by skipping sysctl registration for such configurations. Link: https://lkml.kernel.org/r/20240601233831.617124-1-surenb@google.com Fixes: 22d407b164ff ("lib: add allocation tagging support for memory allocation profiling") Signed-off-by: Suren Baghdasaryan Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202405280616.wcOGWJEj-lkp@intel.com/ Acked-by: Vlastimil Babka Cc: Kent Overstreet Cc: Kees Cook Cc: Pasha Tatashin Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton --- lib/alloc_tag.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c index 11ed973ac359..c347b8b72d78 100644 --- a/lib/alloc_tag.c +++ b/lib/alloc_tag.c @@ -227,6 +227,7 @@ struct page_ext_operations page_alloc_tagging_ops = { }; EXPORT_SYMBOL(page_alloc_tagging_ops); +#ifdef CONFIG_SYSCTL static struct ctl_table memory_allocation_profiling_sysctls[] = { { .procname = "mem_profiling", @@ -241,6 +242,17 @@ static struct ctl_table memory_allocation_profiling_sysctls[] = { { } }; +static void __init sysctl_init(void) +{ + if (!mem_profiling_support) + memory_allocation_profiling_sysctls[0].mode = 0444; + + register_sysctl_init("vm", memory_allocation_profiling_sysctls); +} +#else /* CONFIG_SYSCTL */ +static inline void sysctl_init(void) {} +#endif /* CONFIG_SYSCTL */ + static int __init alloc_tag_init(void) { const struct codetag_type_desc desc = { @@ -253,9 +265,7 @@ static int __init alloc_tag_init(void) if (IS_ERR(alloc_tag_cttype)) return PTR_ERR(alloc_tag_cttype); - if (!mem_profiling_support) - memory_allocation_profiling_sysctls[0].mode = 0444; - register_sysctl_init("vm", memory_allocation_profiling_sysctls); + sysctl_init(); procfs_init(); return 0; From a273559e9eb68cb58c57803d76a1622b8324a878 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Sat, 1 Jun 2024 16:38:40 -0700 Subject: [PATCH 364/778] lib/alloc_tag: fix RCU imbalance in pgalloc_tag_get() put_page_tag_ref() should be called only when get_page_tag_ref() returns a valid reference because only in that case get_page_tag_ref() enters RCU read section while put_page_tag_ref() will call rcu_read_unlock() even if the provided reference is NULL. Fix pgalloc_tag_get() which does not follow this rule causing RCU imbalance. Add a warning in put_page_tag_ref() to catch any future mistakes. Link: https://lkml.kernel.org/r/20240601233840.617458-1-surenb@google.com Fixes: cc92eba1c88b ("mm: fix non-compound multi-order memory accounting in __free_pages") Signed-off-by: Suren Baghdasaryan Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202405271029.6d2f9c4c-lkp@intel.com Acked-by: Vlastimil Babka Cc: Kent Overstreet Cc: Kees Cook Cc: Pasha Tatashin Signed-off-by: Andrew Morton --- include/linux/pgalloc_tag.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/include/linux/pgalloc_tag.h b/include/linux/pgalloc_tag.h index 86ba5d33e43b..9cacadbd61f8 100644 --- a/include/linux/pgalloc_tag.h +++ b/include/linux/pgalloc_tag.h @@ -37,6 +37,9 @@ static inline union codetag_ref *get_page_tag_ref(struct page *page) static inline void put_page_tag_ref(union codetag_ref *ref) { + if (WARN_ON(!ref)) + return; + page_ext_put(page_ext_from_codetag_ref(ref)); } @@ -102,9 +105,11 @@ static inline struct alloc_tag *pgalloc_tag_get(struct page *page) union codetag_ref *ref = get_page_tag_ref(page); alloc_tag_sub_check(ref); - if (ref && ref->ct) - tag = ct_to_alloc_tag(ref->ct); - put_page_tag_ref(ref); + if (ref) { + if (ref->ct) + tag = ct_to_alloc_tag(ref->ct); + put_page_tag_ref(ref); + } } return tag; From 6a50c9b512f7734bc356f4bd47885a6f7c98491a Mon Sep 17 00:00:00 2001 From: Ran Xiaokai Date: Fri, 7 Jun 2024 17:40:48 +0800 Subject: [PATCH 365/778] mm: huge_memory: fix misused mapping_large_folio_support() for anon folios When I did a large folios split test, a WARNING "[ 5059.122759][ T166] Cannot split file folio to non-0 order" was triggered. But the test cases are only for anonmous folios. while mapping_large_folio_support() is only reasonable for page cache folios. In split_huge_page_to_list_to_order(), the folio passed to mapping_large_folio_support() maybe anonmous folio. The folio_test_anon() check is missing. So the split of the anonmous THP is failed. This is also the same for shmem_mapping(). We'd better add a check for both. But the shmem_mapping() in __split_huge_page() is not involved, as for anonmous folios, the end parameter is set to -1, so (head[i].index >= end) is always false. shmem_mapping() is not called. Also add a VM_WARN_ON_ONCE() in mapping_large_folio_support() for anon mapping, So we can detect the wrong use more easily. THP folios maybe exist in the pagecache even the file system doesn't support large folio, it is because when CONFIG_TRANSPARENT_HUGEPAGE is enabled, khugepaged will try to collapse read-only file-backed pages to THP. But the mapping does not actually support multi order large folios properly. Using /sys/kernel/debug/split_huge_pages to verify this, with this patch, large anon THP is successfully split and the warning is ceased. Link: https://lkml.kernel.org/r/202406071740485174hcFl7jRxncsHDtI-Pz-o@zte.com.cn Fixes: c010d47f107f ("mm: thp: split huge page to any lower order pages") Reviewed-by: Barry Song Reviewed-by: Zi Yan Acked-by: David Hildenbrand Signed-off-by: Ran Xiaokai Cc: Michal Hocko Cc: xu xin Cc: Yang Yang Cc: Signed-off-by: Andrew Morton --- include/linux/pagemap.h | 4 ++++ mm/huge_memory.c | 28 +++++++++++++++++----------- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index ee633712bba0..59f1df0cde5a 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -381,6 +381,10 @@ static inline void mapping_set_large_folios(struct address_space *mapping) */ static inline bool mapping_large_folio_support(struct address_space *mapping) { + /* AS_LARGE_FOLIO_SUPPORT is only reasonable for pagecache folios */ + VM_WARN_ONCE((unsigned long)mapping & PAGE_MAPPING_ANON, + "Anonymous mapping always supports large folio"); + return IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && test_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags); } diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 89932fd0f62e..db7946a0a28c 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -3009,30 +3009,36 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list, if (new_order >= folio_order(folio)) return -EINVAL; - /* Cannot split anonymous THP to order-1 */ - if (new_order == 1 && folio_test_anon(folio)) { - VM_WARN_ONCE(1, "Cannot split to order-1 folio"); - return -EINVAL; - } - - if (new_order) { - /* Only swapping a whole PMD-mapped folio is supported */ - if (folio_test_swapcache(folio)) + if (folio_test_anon(folio)) { + /* order-1 is not supported for anonymous THP. */ + if (new_order == 1) { + VM_WARN_ONCE(1, "Cannot split to order-1 folio"); return -EINVAL; + } + } else if (new_order) { /* Split shmem folio to non-zero order not supported */ if (shmem_mapping(folio->mapping)) { VM_WARN_ONCE(1, "Cannot split shmem folio to non-0 order"); return -EINVAL; } - /* No split if the file system does not support large folio */ - if (!mapping_large_folio_support(folio->mapping)) { + /* + * No split if the file system does not support large folio. + * Note that we might still have THPs in such mappings due to + * CONFIG_READ_ONLY_THP_FOR_FS. But in that case, the mapping + * does not actually support large folios properly. + */ + if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && + !mapping_large_folio_support(folio->mapping)) { VM_WARN_ONCE(1, "Cannot split file folio to non-0 order"); return -EINVAL; } } + /* Only swapping a whole PMD-mapped folio is supported */ + if (folio_test_swapcache(folio) && new_order) + return -EINVAL; is_hzp = is_huge_zero_folio(folio); if (is_hzp) { From 7fea700e04bd3f424c2d836e98425782f97b494e Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 8 Jun 2024 14:06:16 +0200 Subject: [PATCH 366/778] zap_pid_ns_processes: clear TIF_NOTIFY_SIGNAL along with TIF_SIGPENDING kernel_wait4() doesn't sleep and returns -EINTR if there is no eligible child and signal_pending() is true. That is why zap_pid_ns_processes() clears TIF_SIGPENDING but this is not enough, it should also clear TIF_NOTIFY_SIGNAL to make signal_pending() return false and avoid a busy-wait loop. Link: https://lkml.kernel.org/r/20240608120616.GB7947@redhat.com Fixes: 12db8b690010 ("entry: Add support for TIF_NOTIFY_SIGNAL") Signed-off-by: Oleg Nesterov Reported-by: Rachel Menge Closes: https://lore.kernel.org/all/1386cd49-36d0-4a5c-85e9-bc42056a5a38@linux.microsoft.com/ Reviewed-by: Boqun Feng Tested-by: Wei Fu Reviewed-by: Jens Axboe Cc: Allen Pais Cc: Christian Brauner Cc: Frederic Weisbecker Cc: Joel Fernandes (Google) Cc: Joel Granados Cc: Josh Triplett Cc: Lai Jiangshan Cc: Mateusz Guzik Cc: Mathieu Desnoyers Cc: Mike Christie Cc: Neeraj Upadhyay Cc: Paul E. McKenney Cc: Steven Rostedt (Google) Cc: Zqiang Cc: Thomas Gleixner Signed-off-by: Andrew Morton --- kernel/pid_namespace.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index dc48fecfa1dc..25f3cf679b35 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -218,6 +218,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) */ do { clear_thread_flag(TIF_SIGPENDING); + clear_thread_flag(TIF_NOTIFY_SIGNAL); rc = kernel_wait4(-1, NULL, __WALL, NULL); } while (rc != -ECHILD); From c1558bc57b8e5b4da5d821537cd30e2e660861d8 Mon Sep 17 00:00:00 2001 From: Peter Oberparleiter Date: Mon, 10 Jun 2024 11:27:43 +0200 Subject: [PATCH 367/778] gcov: add support for GCC 14 Using gcov on kernels compiled with GCC 14 results in truncated 16-byte long .gcda files with no usable data. To fix this, update GCOV_COUNTERS to match the value defined by GCC 14. Tested with GCC versions 14.1.0 and 13.2.0. Link: https://lkml.kernel.org/r/20240610092743.1609845-1-oberpar@linux.ibm.com Signed-off-by: Peter Oberparleiter Reported-by: Allison Henderson Reported-by: Chuck Lever III Tested-by: Chuck Lever Cc: Signed-off-by: Andrew Morton --- kernel/gcov/gcc_4_7.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/gcov/gcc_4_7.c b/kernel/gcov/gcc_4_7.c index 74a4ef1da9ad..fd75b4a484d7 100644 --- a/kernel/gcov/gcc_4_7.c +++ b/kernel/gcov/gcc_4_7.c @@ -18,7 +18,9 @@ #include #include "gcov.h" -#if (__GNUC__ >= 10) +#if (__GNUC__ >= 14) +#define GCOV_COUNTERS 9 +#elif (__GNUC__ >= 10) #define GCOV_COUNTERS 8 #elif (__GNUC__ >= 7) #define GCOV_COUNTERS 9 From 3afb76a66b5559a7b595155803ce23801558a7a9 Mon Sep 17 00:00:00 2001 From: Rafael Aquini Date: Thu, 6 Jun 2024 14:06:22 -0400 Subject: [PATCH 368/778] mm: mmap: allow for the maximum number of bits for randomizing mmap_base by default An ASLR regression was noticed [1] and tracked down to file-mapped areas being backed by THP in recent kernels. The 21-bit alignment constraint for such mappings reduces the entropy for randomizing the placement of 64-bit library mappings and breaks ASLR completely for 32-bit libraries. The reported issue is easily addressed by increasing vm.mmap_rnd_bits and vm.mmap_rnd_compat_bits. This patch just provides a simple way to set ARCH_MMAP_RND_BITS and ARCH_MMAP_RND_COMPAT_BITS to their maximum values allowed by the architecture at build time. [1] https://zolutal.github.io/aslrnt/ [akpm@linux-foundation.org: default to `y' if 32-bit, per Rafael] Link: https://lkml.kernel.org/r/20240606180622.102099-1-aquini@redhat.com Fixes: 1854bc6e2420 ("mm/readahead: Align file mappings for non-DAX") Signed-off-by: Rafael Aquini Cc: Arnd Bergmann Cc: Heiko Carstens Cc: Mike Rapoport (IBM) Cc: Paul E. McKenney Cc: Petr Mladek Cc: Samuel Holland Cc: Signed-off-by: Andrew Morton --- arch/Kconfig | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/Kconfig b/arch/Kconfig index 975dd22a2dbd..3e2a63772b3d 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -1046,10 +1046,21 @@ config ARCH_MMAP_RND_BITS_MAX config ARCH_MMAP_RND_BITS_DEFAULT int +config FORCE_MAX_MMAP_RND_BITS + bool "Force maximum number of bits to use for ASLR of mmap base address" + default y if !64BIT + help + ARCH_MMAP_RND_BITS and ARCH_MMAP_RND_COMPAT_BITS represent the number + of bits to use for ASLR and if no custom value is assigned (EXPERT) + then the architecture's lower bound (minimum) value is assumed. + This toggle changes that default assumption to assume the arch upper + bound (maximum) value instead. + config ARCH_MMAP_RND_BITS int "Number of bits to use for ASLR of mmap base address" if EXPERT range ARCH_MMAP_RND_BITS_MIN ARCH_MMAP_RND_BITS_MAX default ARCH_MMAP_RND_BITS_DEFAULT if ARCH_MMAP_RND_BITS_DEFAULT + default ARCH_MMAP_RND_BITS_MAX if FORCE_MAX_MMAP_RND_BITS default ARCH_MMAP_RND_BITS_MIN depends on HAVE_ARCH_MMAP_RND_BITS help @@ -1084,6 +1095,7 @@ config ARCH_MMAP_RND_COMPAT_BITS int "Number of bits to use for ASLR of mmap base address for compatible applications" if EXPERT range ARCH_MMAP_RND_COMPAT_BITS_MIN ARCH_MMAP_RND_COMPAT_BITS_MAX default ARCH_MMAP_RND_COMPAT_BITS_DEFAULT if ARCH_MMAP_RND_COMPAT_BITS_DEFAULT + default ARCH_MMAP_RND_COMPAT_BITS_MAX if FORCE_MAX_MMAP_RND_BITS default ARCH_MMAP_RND_COMPAT_BITS_MIN depends on HAVE_ARCH_MMAP_RND_COMPAT_BITS help From 653c5c75115c1e23b8393c1cb1ad2d6f6712742f Mon Sep 17 00:00:00 2001 From: Jeff Xu Date: Fri, 7 Jun 2024 20:35:41 +0000 Subject: [PATCH 369/778] mm/memfd: add documentation for MFD_NOEXEC_SEAL MFD_EXEC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When MFD_NOEXEC_SEAL was introduced, there was one big mistake: it didn't have proper documentation. This led to a lot of confusion, especially about whether or not memfd created with the MFD_NOEXEC_SEAL flag is sealable. Before MFD_NOEXEC_SEAL, memfd had to explicitly set MFD_ALLOW_SEALING to be sealable, so it's a fair question. As one might have noticed, unlike other flags in memfd_create, MFD_NOEXEC_SEAL is actually a combination of multiple flags. The idea is to make it easier to use memfd in the most common way, which is NOEXEC + F_SEAL_EXEC + MFD_ALLOW_SEALING. This works with sysctl vm.noexec to help existing applications move to a more secure way of using memfd. Proposals have been made to put MFD_NOEXEC_SEAL non-sealable, unless MFD_ALLOW_SEALING is set, to be consistent with other flags [1], Those are based on the viewpoint that each flag is an atomic unit, which is a reasonable assumption. However, MFD_NOEXEC_SEAL was designed with the intent of promoting the most secure method of using memfd, therefore a combination of multiple functionalities into one bit. Furthermore, the MFD_NOEXEC_SEAL has been added for more than one year, and multiple applications and distributions have backported and utilized it. Altering ABI now presents a degree of risk and may lead to disruption. MFD_NOEXEC_SEAL is a new flag, and applications must change their code to use it. There is no backward compatibility problem. When sysctl vm.noexec == 1 or 2, applications that don't set MFD_NOEXEC_SEAL or MFD_EXEC will get MFD_NOEXEC_SEAL memfd. And old-application might break, that is by-design, in such a system vm.noexec = 0 shall be used. Also no backward compatibility problem. I propose to include this documentation patch to assist in clarifying the semantics of MFD_NOEXEC_SEAL, thereby preventing any potential future confusion. Finally, I would like to express my gratitude to David Rheinsberg and Barnabás Pőcze for initiating the discussion on the topic of sealability. [1] https://lore.kernel.org/lkml/20230714114753.170814-1-david@readahead.eu/ [jeffxu@chromium.org: updates per Randy] Link: https://lkml.kernel.org/r/20240611034903.3456796-2-jeffxu@chromium.org [jeffxu@chromium.org: v3] Link: https://lkml.kernel.org/r/20240611231409.3899809-2-jeffxu@chromium.org Link: https://lkml.kernel.org/r/20240607203543.2151433-2-jeffxu@google.com Signed-off-by: Jeff Xu Reviewed-by: Randy Dunlap Cc: Aleksa Sarai Cc: Barnabás Pőcze Cc: Daniel Verkamp Cc: David Rheinsberg Cc: Dmitry Torokhov Cc: Hugh Dickins Cc: Jorge Lucangeli Obes Cc: Kees Cook Cc: Shuah Khan Signed-off-by: Andrew Morton --- Documentation/userspace-api/index.rst | 1 + Documentation/userspace-api/mfd_noexec.rst | 86 ++++++++++++++++++++++ 2 files changed, 87 insertions(+) create mode 100644 Documentation/userspace-api/mfd_noexec.rst diff --git a/Documentation/userspace-api/index.rst b/Documentation/userspace-api/index.rst index 5926115ec0ed..8a251d71fa6e 100644 --- a/Documentation/userspace-api/index.rst +++ b/Documentation/userspace-api/index.rst @@ -32,6 +32,7 @@ Security-related interfaces seccomp_filter landlock lsm + mfd_noexec spec_ctrl tee diff --git a/Documentation/userspace-api/mfd_noexec.rst b/Documentation/userspace-api/mfd_noexec.rst new file mode 100644 index 000000000000..7afcc480e38f --- /dev/null +++ b/Documentation/userspace-api/mfd_noexec.rst @@ -0,0 +1,86 @@ +.. SPDX-License-Identifier: GPL-2.0 + +================================== +Introduction of non-executable mfd +================================== +:Author: + Daniel Verkamp + Jeff Xu + +:Contributor: + Aleksa Sarai + +Since Linux introduced the memfd feature, memfds have always had their +execute bit set, and the memfd_create() syscall doesn't allow setting +it differently. + +However, in a secure-by-default system, such as ChromeOS, (where all +executables should come from the rootfs, which is protected by verified +boot), this executable nature of memfd opens a door for NoExec bypass +and enables “confused deputy attack”. E.g, in VRP bug [1]: cros_vm +process created a memfd to share the content with an external process, +however the memfd is overwritten and used for executing arbitrary code +and root escalation. [2] lists more VRP of this kind. + +On the other hand, executable memfd has its legit use: runc uses memfd’s +seal and executable feature to copy the contents of the binary then +execute them. For such a system, we need a solution to differentiate runc's +use of executable memfds and an attacker's [3]. + +To address those above: + - Let memfd_create() set X bit at creation time. + - Let memfd be sealed for modifying X bit when NX is set. + - Add a new pid namespace sysctl: vm.memfd_noexec to help applications in + migrating and enforcing non-executable MFD. + +User API +======== +``int memfd_create(const char *name, unsigned int flags)`` + +``MFD_NOEXEC_SEAL`` + When MFD_NOEXEC_SEAL bit is set in the ``flags``, memfd is created + with NX. F_SEAL_EXEC is set and the memfd can't be modified to + add X later. MFD_ALLOW_SEALING is also implied. + This is the most common case for the application to use memfd. + +``MFD_EXEC`` + When MFD_EXEC bit is set in the ``flags``, memfd is created with X. + +Note: + ``MFD_NOEXEC_SEAL`` implies ``MFD_ALLOW_SEALING``. In case that + an app doesn't want sealing, it can add F_SEAL_SEAL after creation. + + +Sysctl: +======== +``pid namespaced sysctl vm.memfd_noexec`` + +The new pid namespaced sysctl vm.memfd_noexec has 3 values: + + - 0: MEMFD_NOEXEC_SCOPE_EXEC + memfd_create() without MFD_EXEC nor MFD_NOEXEC_SEAL acts like + MFD_EXEC was set. + + - 1: MEMFD_NOEXEC_SCOPE_NOEXEC_SEAL + memfd_create() without MFD_EXEC nor MFD_NOEXEC_SEAL acts like + MFD_NOEXEC_SEAL was set. + + - 2: MEMFD_NOEXEC_SCOPE_NOEXEC_ENFORCED + memfd_create() without MFD_NOEXEC_SEAL will be rejected. + +The sysctl allows finer control of memfd_create for old software that +doesn't set the executable bit; for example, a container with +vm.memfd_noexec=1 means the old software will create non-executable memfd +by default while new software can create executable memfd by setting +MFD_EXEC. + +The value of vm.memfd_noexec is passed to child namespace at creation +time. In addition, the setting is hierarchical, i.e. during memfd_create, +we will search from current ns to root ns and use the most restrictive +setting. + +[1] https://crbug.com/1305267 + +[2] https://bugs.chromium.org/p/chromium/issues/list?q=type%3Dbug-security%20memfd%20escalation&can=1 + +[3] https://lwn.net/Articles/781013/ From e7d2a28bd0b27e43bff3f516ee0607d776b019f4 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 5 Jun 2024 23:36:12 +0100 Subject: [PATCH 370/778] selftests: mm: make map_fixed_noreplace test names stable KTAP parsers interpret the output of ksft_test_result_*() as being the name of the test. The map_fixed_noreplace test uses a dynamically allocated base address for the mmap()s that it tests and currently includes this in the test names that it logs so the test names that are logged are not stable between runs. It also uses multiples of PAGE_SIZE which mean that runs for kernels with different PAGE_SIZE configurations can't be directly compared. Both these factors cause issues for CI systems when interpreting and displaying results. Fix this by replacing the current test names with fixed strings describing the intent of the mappings that are logged, the existing messages with the actual addresses and sizes are retained as diagnostic prints to aid in debugging. Link: https://lkml.kernel.org/r/20240605-kselftest-mm-fixed-noreplace-v1-1-a235db8b9be9@kernel.org Fixes: 4838cf70e539 ("selftests/mm: map_fixed_noreplace: conform test to TAP format output") Signed-off-by: Mark Brown Reviewed-by: Ryan Roberts Cc: Muhammad Usama Anjum Cc: Shuah Khan Signed-off-by: Andrew Morton --- .../selftests/mm/map_fixed_noreplace.c | 24 ++++++++++++------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/mm/map_fixed_noreplace.c b/tools/testing/selftests/mm/map_fixed_noreplace.c index b74813fdc951..d53de2486080 100644 --- a/tools/testing/selftests/mm/map_fixed_noreplace.c +++ b/tools/testing/selftests/mm/map_fixed_noreplace.c @@ -67,7 +67,8 @@ int main(void) dump_maps(); ksft_exit_fail_msg("Error: munmap failed!?\n"); } - ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_test_result_pass("mmap() 5*PAGE_SIZE at base\n"); addr = base_addr + page_size; size = 3 * page_size; @@ -76,7 +77,8 @@ int main(void) dump_maps(); ksft_exit_fail_msg("Error: first mmap() failed unexpectedly\n"); } - ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_test_result_pass("mmap() 3*PAGE_SIZE at base+PAGE_SIZE\n"); /* * Exact same mapping again: @@ -93,7 +95,8 @@ int main(void) dump_maps(); ksft_exit_fail_msg("Error:1: mmap() succeeded when it shouldn't have\n"); } - ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_test_result_pass("mmap() 5*PAGE_SIZE at base\n"); /* * Second mapping contained within first: @@ -111,7 +114,8 @@ int main(void) dump_maps(); ksft_exit_fail_msg("Error:2: mmap() succeeded when it shouldn't have\n"); } - ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_test_result_pass("mmap() 2*PAGE_SIZE at base+PAGE_SIZE\n"); /* * Overlap end of existing mapping: @@ -128,7 +132,8 @@ int main(void) dump_maps(); ksft_exit_fail_msg("Error:3: mmap() succeeded when it shouldn't have\n"); } - ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_test_result_pass("mmap() 2*PAGE_SIZE at base+(3*PAGE_SIZE)\n"); /* * Overlap start of existing mapping: @@ -145,7 +150,8 @@ int main(void) dump_maps(); ksft_exit_fail_msg("Error:4: mmap() succeeded when it shouldn't have\n"); } - ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_test_result_pass("mmap() 2*PAGE_SIZE bytes at base\n"); /* * Adjacent to start of existing mapping: @@ -162,7 +168,8 @@ int main(void) dump_maps(); ksft_exit_fail_msg("Error:5: mmap() failed when it shouldn't have\n"); } - ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_test_result_pass("mmap() PAGE_SIZE at base\n"); /* * Adjacent to end of existing mapping: @@ -179,7 +186,8 @@ int main(void) dump_maps(); ksft_exit_fail_msg("Error:6: mmap() failed when it shouldn't have\n"); } - ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_test_result_pass("mmap() PAGE_SIZE at base+(4*PAGE_SIZE)\n"); addr = base_addr; size = 5 * page_size; From 8e279f970b5cb0628f856b6735e2e47b4da9f76e Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 11 Jun 2024 22:06:20 -0700 Subject: [PATCH 371/778] mm/migrate: fix kernel BUG at mm/compaction.c:2761! I hit the VM_BUG_ON(!list_empty(&cc->migratepages)) in compact_zone(); and if DEBUG_VM were off, then pages would be lost on a local list. Our convention is that if migrate_pages() reports complete success (0), then the migratepages list will be empty; but if it reports an error or some pages remaining, then its caller must putback_movable_pages(). There's a new case in which migrate_pages() has been reporting complete success, but returning with pages left on the migratepages list: when migrate_pages_batch() successfully split a folio on the deferred list, but then the "Failure isn't counted" call does not dispose of them all. Since that block is expecting the large folio to have been counted as 1 failure already, and since the return code is later adjusted to success whenever the returned list is found empty, the simple way to fix this safely is to count splitting the deferred folio as "a failure". Link: https://lkml.kernel.org/r/46c948b4-4dd8-6e03-4c7b-ce4e81cfa536@google.com Fixes: 7262f208ca68 ("mm/migrate: split source folio if it is on deferred split list") Signed-off-by: Hugh Dickins Cc: Baolin Wang Cc: David Hildenbrand Cc: "Huang, Ying" Cc: Zi Yan Signed-off-by: Andrew Morton --- mm/migrate.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/mm/migrate.c b/mm/migrate.c index dd04f578c19c..2cc5a68f6843 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1654,7 +1654,12 @@ static int migrate_pages_batch(struct list_head *from, /* * The rare folio on the deferred split list should - * be split now. It should not count as a failure. + * be split now. It should not count as a failure: + * but increment nr_failed because, without doing so, + * migrate_pages() may report success with (split but + * unmigrated) pages still on its fromlist; whereas it + * always reports success when its fromlist is empty. + * * Only check it without removing it from the list. * Since the folio can be on deferred_split_scan() * local list and removing it can cause the local list @@ -1669,6 +1674,7 @@ static int migrate_pages_batch(struct list_head *from, if (nr_pages > 2 && !list_empty(&folio->_deferred_list)) { if (try_split_folio(folio, split_folios) == 0) { + nr_failed++; stats->nr_thp_split += is_thp; stats->nr_split++; continue; From cfdd12b48202398a879e8bc4e7fa023f4d473f62 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Wed, 12 Jun 2024 20:28:22 +0800 Subject: [PATCH 372/778] mm: fix possible OOB in numa_rebuild_large_mapping() The large folio is mapped with folio size(not greater PMD_SIZE) aligned virtual address during the pagefault, ie, 'addr = ALIGN_DOWN(vmf->address, nr_pages * PAGE_SIZE)' in do_anonymous_page(). But after the mremap(), the virtual address only requires PAGE_SIZE alignment. Also pte is moved to new in move_page_tables(), then traversal of the new pte in the numa_rebuild_large_mapping() could hit the following issue, Unable to handle kernel paging request at virtual address 00000a80c021a788 Mem abort info: ESR = 0x0000000096000004 EC = 0x25: DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 FSC = 0x04: level 0 translation fault Data abort info: ISV = 0, ISS = 0x00000004, ISS2 = 0x00000000 CM = 0, WnR = 0, TnD = 0, TagAccess = 0 GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 user pgtable: 4k pages, 48-bit VAs, pgdp=00002040341a6000 [00000a80c021a788] pgd=0000000000000000, p4d=0000000000000000 Internal error: Oops: 0000000096000004 [#1] SMP ... CPU: 76 PID: 15187 Comm: git Kdump: loaded Tainted: G W 6.10.0-rc2+ #209 Hardware name: Huawei TaiShan 2280 V2/BC82AMDD, BIOS 1.79 08/21/2021 pstate: 60400009 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : numa_rebuild_large_mapping+0x338/0x638 lr : numa_rebuild_large_mapping+0x320/0x638 sp : ffff8000b41c3b00 x29: ffff8000b41c3b30 x28: ffff8000812a0000 x27: 00000000000a8000 x26: 00000000000000a8 x25: 0010000000000001 x24: ffff20401c7170f0 x23: 0000ffff33a1e000 x22: 0000ffff33a76000 x21: ffff20400869eca0 x20: 0000ffff33976000 x19: 00000000000000a8 x18: ffffffffffffffff x17: 0000000000000000 x16: 0000000000000020 x15: ffff8000b41c36a8 x14: 0000000000000000 x13: 205d373831353154 x12: 5b5d333331363732 x11: 000000000011ff78 x10: 000000000011ff10 x9 : ffff800080273f30 x8 : 000000320400869e x7 : c0000000ffffd87f x6 : 00000000001e6ba8 x5 : ffff206f3fb5af88 x4 : 0000000000000000 x3 : 0000000000000000 x2 : 0000000000000000 x1 : fffffdffc0000000 x0 : 00000a80c021a780 Call trace: numa_rebuild_large_mapping+0x338/0x638 do_numa_page+0x3e4/0x4e0 handle_pte_fault+0x1bc/0x238 __handle_mm_fault+0x20c/0x400 handle_mm_fault+0xa8/0x288 do_page_fault+0x124/0x498 do_translation_fault+0x54/0x80 do_mem_abort+0x4c/0xa8 el0_da+0x40/0x110 el0t_64_sync_handler+0xe4/0x158 el0t_64_sync+0x188/0x190 Fix it by making the start and end not only within the vma range, but also within the page table range. Link: https://lkml.kernel.org/r/20240612122822.4033433-1-wangkefeng.wang@huawei.com Fixes: d2136d749d76 ("mm: support multi-size THP numa balancing") Signed-off-by: Kefeng Wang Acked-by: David Hildenbrand Reviewed-by: Baolin Wang Cc: "Huang, Ying" Cc: John Hubbard Cc: Liu Shixin Cc: Mel Gorman Cc: Ryan Roberts Signed-off-by: Andrew Morton --- mm/memory.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 2bc8032a30a2..25a77c4fe4a0 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -5100,10 +5100,16 @@ static void numa_rebuild_large_mapping(struct vm_fault *vmf, struct vm_area_stru bool ignore_writable, bool pte_write_upgrade) { int nr = pte_pfn(fault_pte) - folio_pfn(folio); - unsigned long start = max(vmf->address - nr * PAGE_SIZE, vma->vm_start); - unsigned long end = min(vmf->address + (folio_nr_pages(folio) - nr) * PAGE_SIZE, vma->vm_end); - pte_t *start_ptep = vmf->pte - (vmf->address - start) / PAGE_SIZE; - unsigned long addr; + unsigned long start, end, addr = vmf->address; + unsigned long addr_start = addr - (nr << PAGE_SHIFT); + unsigned long pt_start = ALIGN_DOWN(addr, PMD_SIZE); + pte_t *start_ptep; + + /* Stay within the VMA and within the page table. */ + start = max3(addr_start, pt_start, vma->vm_start); + end = min3(addr_start + folio_size(folio), pt_start + PMD_SIZE, + vma->vm_end); + start_ptep = vmf->pte - ((addr - start) >> PAGE_SHIFT); /* Restore all PTEs' mapping of the large folio */ for (addr = start; addr != end; start_ptep++, addr += PAGE_SIZE) { From 0b1ef4fde7a24909ff2afacffd0d6afa28b73652 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 23 May 2024 09:21:39 -0400 Subject: [PATCH 373/778] mm/debug_vm_pgtable: drop RANDOM_ORVALUE trick Macro RANDOM_ORVALUE was used to make sure the pgtable entry will be populated with !none data in clear tests. The RANDOM_ORVALUE tried to cover mostly all the bits in a pgtable entry, even if there's no discussion on whether all the bits will be vaild. Both S390 and PPC64 have their own masks to avoid touching some bits. Now it's the turn for x86_64. The issue is there's a recent report from Mikhail Gavrilov showing that this can cause a warning with the newly added pte set check in commit 8430557fc5 on writable v.s. userfaultfd-wp bit, even though the check itself was valid, the random pte is not. We can choose to mask more bits out. However the need to have such random bits setup is questionable, as now it's already guaranteed to be true on below: - For pte level, the pgtable entry will be installed with value from pfn_pte(), where pfn points to a valid page. Hence the pte will be !none already if populated with pfn_pte(). - For upper-than-pte level, the pgtable entry should contain a directory entry always, which is also !none. All the cases look like good enough to test a pxx_clear() helper. Instead of extending the bitmask, drop the "set random bits" trick completely. Add some warning guards to make sure the entries will be !none before clear(). Link: https://lkml.kernel.org/r/20240523132139.289719-1-peterx@redhat.com Fixes: 8430557fc584 ("mm/page_table_check: support userfault wr-protect entries") Signed-off-by: Peter Xu Reported-by: Mikhail Gavrilov Link: https://lore.kernel.org/r/CABXGCsMB9A8-X+Np_Q+fWLURYL_0t3Y-MdoNabDM-Lzk58-DGA@mail.gmail.com Tested-by: Mikhail Gavrilov Reviewed-by: Pasha Tatashin Acked-by: David Hildenbrand Cc: Aneesh Kumar K.V Cc: Gavin Shan Cc: Anshuman Khandual Signed-off-by: Andrew Morton --- mm/debug_vm_pgtable.c | 31 +++++-------------------------- 1 file changed, 5 insertions(+), 26 deletions(-) diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c index b104a353b532..e4969fb54da3 100644 --- a/mm/debug_vm_pgtable.c +++ b/mm/debug_vm_pgtable.c @@ -40,22 +40,7 @@ * Please refer Documentation/mm/arch_pgtable_helpers.rst for the semantics * expectations that are being validated here. All future changes in here * or the documentation need to be in sync. - * - * On s390 platform, the lower 4 bits are used to identify given page table - * entry type. But these bits might affect the ability to clear entries with - * pxx_clear() because of how dynamic page table folding works on s390. So - * while loading up the entries do not change the lower 4 bits. It does not - * have affect any other platform. Also avoid the 62nd bit on ppc64 that is - * used to mark a pte entry. */ -#define S390_SKIP_MASK GENMASK(3, 0) -#if __BITS_PER_LONG == 64 -#define PPC64_SKIP_MASK GENMASK(62, 62) -#else -#define PPC64_SKIP_MASK 0x0 -#endif -#define ARCH_SKIP_MASK (S390_SKIP_MASK | PPC64_SKIP_MASK) -#define RANDOM_ORVALUE (GENMASK(BITS_PER_LONG - 1, 0) & ~ARCH_SKIP_MASK) #define RANDOM_NZVALUE GENMASK(7, 0) struct pgtable_debug_args { @@ -511,8 +496,7 @@ static void __init pud_clear_tests(struct pgtable_debug_args *args) return; pr_debug("Validating PUD clear\n"); - pud = __pud(pud_val(pud) | RANDOM_ORVALUE); - WRITE_ONCE(*args->pudp, pud); + WARN_ON(pud_none(pud)); pud_clear(args->pudp); pud = READ_ONCE(*args->pudp); WARN_ON(!pud_none(pud)); @@ -548,8 +532,7 @@ static void __init p4d_clear_tests(struct pgtable_debug_args *args) return; pr_debug("Validating P4D clear\n"); - p4d = __p4d(p4d_val(p4d) | RANDOM_ORVALUE); - WRITE_ONCE(*args->p4dp, p4d); + WARN_ON(p4d_none(p4d)); p4d_clear(args->p4dp); p4d = READ_ONCE(*args->p4dp); WARN_ON(!p4d_none(p4d)); @@ -582,8 +565,7 @@ static void __init pgd_clear_tests(struct pgtable_debug_args *args) return; pr_debug("Validating PGD clear\n"); - pgd = __pgd(pgd_val(pgd) | RANDOM_ORVALUE); - WRITE_ONCE(*args->pgdp, pgd); + WARN_ON(pgd_none(pgd)); pgd_clear(args->pgdp); pgd = READ_ONCE(*args->pgdp); WARN_ON(!pgd_none(pgd)); @@ -634,10 +616,8 @@ static void __init pte_clear_tests(struct pgtable_debug_args *args) if (WARN_ON(!args->ptep)) return; -#ifndef CONFIG_RISCV - pte = __pte(pte_val(pte) | RANDOM_ORVALUE); -#endif set_pte_at(args->mm, args->vaddr, args->ptep, pte); + WARN_ON(pte_none(pte)); flush_dcache_page(page); barrier(); ptep_clear(args->mm, args->vaddr, args->ptep); @@ -650,8 +630,7 @@ static void __init pmd_clear_tests(struct pgtable_debug_args *args) pmd_t pmd = READ_ONCE(*args->pmdp); pr_debug("Validating PMD clear\n"); - pmd = __pmd(pmd_val(pmd) | RANDOM_ORVALUE); - WRITE_ONCE(*args->pmdp, pmd); + WARN_ON(pmd_none(pmd)); pmd_clear(args->pmdp); pmd = READ_ONCE(*args->pmdp); WARN_ON(!pmd_none(pmd)); From 9094b4a1c76cfe84b906cc152bab34d4ba26fa5c Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Thu, 13 Jun 2024 16:21:19 +0800 Subject: [PATCH 374/778] mm: shmem: fix getting incorrect lruvec when replacing a shmem folio When testing shmem swapin, I encountered the warning below on my machine. The reason is that replacing an old shmem folio with a new one causes mem_cgroup_migrate() to clear the old folio's memcg data. As a result, the old folio cannot get the correct memcg's lruvec needed to remove itself from the LRU list when it is being freed. This could lead to possible serious problems, such as LRU list crashes due to holding the wrong LRU lock, and incorrect LRU statistics. To fix this issue, we can fallback to use the mem_cgroup_replace_folio() to replace the old shmem folio. [ 5241.100311] page: refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x5d9960 [ 5241.100317] head: order:4 mapcount:0 entire_mapcount:0 nr_pages_mapped:0 pincount:0 [ 5241.100319] flags: 0x17fffe0000040068(uptodate|lru|head|swapbacked|node=0|zone=2|lastcpupid=0x3ffff) [ 5241.100323] raw: 17fffe0000040068 fffffdffd6687948 fffffdffd69ae008 0000000000000000 [ 5241.100325] raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000 [ 5241.100326] head: 17fffe0000040068 fffffdffd6687948 fffffdffd69ae008 0000000000000000 [ 5241.100327] head: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000 [ 5241.100328] head: 17fffe0000000204 fffffdffd6665801 ffffffffffffffff 0000000000000000 [ 5241.100329] head: 0000000a00000010 0000000000000000 00000000ffffffff 0000000000000000 [ 5241.100330] page dumped because: VM_WARN_ON_ONCE_FOLIO(!memcg && !mem_cgroup_disabled()) [ 5241.100338] ------------[ cut here ]------------ [ 5241.100339] WARNING: CPU: 19 PID: 78402 at include/linux/memcontrol.h:775 folio_lruvec_lock_irqsave+0x140/0x150 [...] [ 5241.100374] pc : folio_lruvec_lock_irqsave+0x140/0x150 [ 5241.100375] lr : folio_lruvec_lock_irqsave+0x138/0x150 [ 5241.100376] sp : ffff80008b38b930 [...] [ 5241.100398] Call trace: [ 5241.100399] folio_lruvec_lock_irqsave+0x140/0x150 [ 5241.100401] __page_cache_release+0x90/0x300 [ 5241.100404] __folio_put+0x50/0x108 [ 5241.100406] shmem_replace_folio+0x1b4/0x240 [ 5241.100409] shmem_swapin_folio+0x314/0x528 [ 5241.100411] shmem_get_folio_gfp+0x3b4/0x930 [ 5241.100412] shmem_fault+0x74/0x160 [ 5241.100414] __do_fault+0x40/0x218 [ 5241.100417] do_shared_fault+0x34/0x1b0 [ 5241.100419] do_fault+0x40/0x168 [ 5241.100420] handle_pte_fault+0x80/0x228 [ 5241.100422] __handle_mm_fault+0x1c4/0x440 [ 5241.100424] handle_mm_fault+0x60/0x1f0 [ 5241.100426] do_page_fault+0x120/0x488 [ 5241.100429] do_translation_fault+0x4c/0x68 [ 5241.100431] do_mem_abort+0x48/0xa0 [ 5241.100434] el0_da+0x38/0xc0 [ 5241.100436] el0t_64_sync_handler+0x68/0xc0 [ 5241.100437] el0t_64_sync+0x14c/0x150 [ 5241.100439] ---[ end trace 0000000000000000 ]--- [baolin.wang@linux.alibaba.com: remove less helpful comments, per Matthew] Link: https://lkml.kernel.org/r/ccad3fe1375b468ebca3227b6b729f3eaf9d8046.1718423197.git.baolin.wang@linux.alibaba.com Link: https://lkml.kernel.org/r/3c11000dd6c1df83015a8321a859e9775ebbc23e.1718266112.git.baolin.wang@linux.alibaba.com Fixes: 85ce2c517ade ("memcontrol: only transfer the memcg data for migration") Signed-off-by: Baolin Wang Reviewed-by: Shakeel Butt Cc: Matthew Wilcox (Oracle) Cc: Hugh Dickins Cc: Johannes Weiner Cc: Nhat Pham Cc: Michal Hocko Cc: Roman Gushchin Cc: Muchun Song Cc: Signed-off-by: Andrew Morton --- mm/memcontrol.c | 3 +-- mm/shmem.c | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 36793e509f47..71fe2a95b8bd 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -7745,8 +7745,7 @@ void __mem_cgroup_uncharge_folios(struct folio_batch *folios) * @new: Replacement folio. * * Charge @new as a replacement folio for @old. @old will - * be uncharged upon free. This is only used by the page cache - * (in replace_page_cache_folio()). + * be uncharged upon free. * * Both folios must be locked, @new->mapping must be set up. */ diff --git a/mm/shmem.c b/mm/shmem.c index f5d60436b604..a8b181a63402 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1786,7 +1786,7 @@ static int shmem_replace_folio(struct folio **foliop, gfp_t gfp, xa_lock_irq(&swap_mapping->i_pages); error = shmem_replace_entry(swap_mapping, swap_index, old, new); if (!error) { - mem_cgroup_migrate(old, new); + mem_cgroup_replace_folio(old, new); __lruvec_stat_mod_folio(new, NR_FILE_PAGES, 1); __lruvec_stat_mod_folio(new, NR_SHMEM, 1); __lruvec_stat_mod_folio(old, NR_FILE_PAGES, -1); From 01c8f9806bde438ca1c8cbbc439f0a14a6694f6c Mon Sep 17 00:00:00 2001 From: Aleksandr Nogikh Date: Tue, 11 Jun 2024 15:32:29 +0200 Subject: [PATCH 375/778] kcov: don't lose track of remote references during softirqs In kcov_remote_start()/kcov_remote_stop(), we swap the previous KCOV metadata of the current task into a per-CPU variable. However, the kcov_mode_enabled(mode) check is not sufficient in the case of remote KCOV coverage: current->kcov_mode always remains KCOV_MODE_DISABLED for remote KCOV objects. If the original task that has invoked the KCOV_REMOTE_ENABLE ioctl happens to get interrupted and kcov_remote_start() is called, it ultimately leads to kcov_remote_stop() NOT restoring the original KCOV reference. So when the task exits, all registered remote KCOV handles remain active forever. The most uncomfortable effect (at least for syzkaller) is that the bug prevents the reuse of the same /sys/kernel/debug/kcov descriptor. If we obtain it in the parent process and then e.g. drop some capabilities and continuously fork to execute individual programs, at some point current->kcov of the forked process is lost, kcov_task_exit() takes no action, and all KCOV_REMOTE_ENABLE ioctls calls from subsequent forks fail. And, yes, the efficiency is also affected if we keep on losing remote kcov objects. a) kcov_remote_map keeps on growing forever. b) (If I'm not mistaken), we're also not freeing the memory referenced by kcov->area. Fix it by introducing a special kcov_mode that is assigned to the task that owns a KCOV remote object. It makes kcov_mode_enabled() return true and yet does not trigger coverage collection in __sanitizer_cov_trace_pc() and write_comp_data(). [nogikh@google.com: replace WRITE_ONCE() with an ordinary assignment] Link: https://lkml.kernel.org/r/20240614171221.2837584-1-nogikh@google.com Link: https://lkml.kernel.org/r/20240611133229.527822-1-nogikh@google.com Fixes: 5ff3b30ab57d ("kcov: collect coverage from interrupts") Signed-off-by: Aleksandr Nogikh Reviewed-by: Dmitry Vyukov Reviewed-by: Andrey Konovalov Tested-by: Andrey Konovalov Cc: Alexander Potapenko Cc: Arnd Bergmann Cc: Marco Elver Cc: Signed-off-by: Andrew Morton --- include/linux/kcov.h | 2 ++ kernel/kcov.c | 1 + 2 files changed, 3 insertions(+) diff --git a/include/linux/kcov.h b/include/linux/kcov.h index b851ba415e03..3b479a3d235a 100644 --- a/include/linux/kcov.h +++ b/include/linux/kcov.h @@ -21,6 +21,8 @@ enum kcov_mode { KCOV_MODE_TRACE_PC = 2, /* Collecting comparison operands mode. */ KCOV_MODE_TRACE_CMP = 3, + /* The process owns a KCOV remote reference. */ + KCOV_MODE_REMOTE = 4, }; #define KCOV_IN_CTXSW (1 << 30) diff --git a/kernel/kcov.c b/kernel/kcov.c index c3124f6d5536..f0a69d402066 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -632,6 +632,7 @@ static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd, return -EINVAL; kcov->mode = mode; t->kcov = kcov; + t->kcov_mode = KCOV_MODE_REMOTE; kcov->t = t; kcov->remote = true; kcov->remote_size = remote_arg->area_size; From a986fa57fd81a1430e00b3c6cf8a325d6f894a63 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 14 Jun 2024 22:29:10 +1000 Subject: [PATCH 376/778] KVM: PPC: Book3S HV: Prevent UAF in kvm_spapr_tce_attach_iommu_group() Al reported a possible use-after-free (UAF) in kvm_spapr_tce_attach_iommu_group(). It looks up `stt` from tablefd, but then continues to use it after doing fdput() on the returned fd. After the fdput() the tablefd is free to be closed by another thread. The close calls kvm_spapr_tce_release() and then release_spapr_tce_table() (via call_rcu()) which frees `stt`. Although there are calls to rcu_read_lock() in kvm_spapr_tce_attach_iommu_group() they are not sufficient to prevent the UAF, because `stt` is used outside the locked regions. With an artifcial delay after the fdput() and a userspace program which triggers the race, KASAN detects the UAF: BUG: KASAN: slab-use-after-free in kvm_spapr_tce_attach_iommu_group+0x298/0x720 [kvm] Read of size 4 at addr c000200027552c30 by task kvm-vfio/2505 CPU: 54 PID: 2505 Comm: kvm-vfio Not tainted 6.10.0-rc3-next-20240612-dirty #1 Hardware name: 8335-GTH POWER9 0x4e1202 opal:skiboot-v6.5.3-35-g1851b2a06 PowerNV Call Trace: dump_stack_lvl+0xb4/0x108 (unreliable) print_report+0x2b4/0x6ec kasan_report+0x118/0x2b0 __asan_load4+0xb8/0xd0 kvm_spapr_tce_attach_iommu_group+0x298/0x720 [kvm] kvm_vfio_set_attr+0x524/0xac0 [kvm] kvm_device_ioctl+0x144/0x240 [kvm] sys_ioctl+0x62c/0x1810 system_call_exception+0x190/0x440 system_call_vectored_common+0x15c/0x2ec ... Freed by task 0: ... kfree+0xec/0x3e0 release_spapr_tce_table+0xd4/0x11c [kvm] rcu_core+0x568/0x16a0 handle_softirqs+0x23c/0x920 do_softirq_own_stack+0x6c/0x90 do_softirq_own_stack+0x58/0x90 __irq_exit_rcu+0x218/0x2d0 irq_exit+0x30/0x80 arch_local_irq_restore+0x128/0x230 arch_local_irq_enable+0x1c/0x30 cpuidle_enter_state+0x134/0x5cc cpuidle_enter+0x6c/0xb0 call_cpuidle+0x7c/0x100 do_idle+0x394/0x410 cpu_startup_entry+0x60/0x70 start_secondary+0x3fc/0x410 start_secondary_prolog+0x10/0x14 Fix it by delaying the fdput() until `stt` is no longer in use, which is effectively the entire function. To keep the patch minimal add a call to fdput() at each of the existing return paths. Future work can convert the function to goto or __cleanup style cleanup. With the fix in place the test case no longer triggers the UAF. Reported-by: Al Viro Closes: https://lore.kernel.org/all/20240610024437.GA1464458@ZenIV/ Signed-off-by: Michael Ellerman Link: https://msgid.link/20240614122910.3499489-1-mpe@ellerman.id.au --- arch/powerpc/kvm/book3s_64_vio.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index b569ebaa590e..3ff3de9a52ac 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -130,14 +130,16 @@ long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, } rcu_read_unlock(); - fdput(f); - - if (!found) + if (!found) { + fdput(f); return -EINVAL; + } table_group = iommu_group_get_iommudata(grp); - if (WARN_ON(!table_group)) + if (WARN_ON(!table_group)) { + fdput(f); return -EFAULT; + } for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { struct iommu_table *tbltmp = table_group->tables[i]; @@ -158,8 +160,10 @@ long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, break; } } - if (!tbl) + if (!tbl) { + fdput(f); return -EINVAL; + } rcu_read_lock(); list_for_each_entry_rcu(stit, &stt->iommu_tables, next) { @@ -170,6 +174,7 @@ long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, /* stit is being destroyed */ iommu_tce_table_put(tbl); rcu_read_unlock(); + fdput(f); return -ENOTTY; } /* @@ -177,6 +182,7 @@ long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, * its KVM reference counter and can return. */ rcu_read_unlock(); + fdput(f); return 0; } rcu_read_unlock(); @@ -184,6 +190,7 @@ long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, stit = kzalloc(sizeof(*stit), GFP_KERNEL); if (!stit) { iommu_tce_table_put(tbl); + fdput(f); return -ENOMEM; } @@ -192,6 +199,7 @@ long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, list_add_rcu(&stit->next, &stt->iommu_tables); + fdput(f); return 0; } From a5d400b6439ac734a5c0dbb641e26a38736abc17 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Wed, 29 May 2024 00:48:54 -0300 Subject: [PATCH 377/778] arm64: dts: imx93-11x11-evk: Remove the 'no-sdio' property The usdhc2 port is connected to the microSD slot. The presence of the 'no-sdio' property prevents Wifi SDIO cards, such as CMP9010-X-EVB [1] to be detected. Remove the 'no-sdio' property so that SDIO cards could also work. [1] https://www.nxp.com/products/wireless-connectivity/wi-fi-plus-bluetooth-plus-802-15-4/cmp9010-x-evb-iw416-usd-interface-evaluation-board:CMP9010-X-EVB Fixes: e37907bd8294 ("arm64: dts: freescale: add i.MX93 11x11 EVK basic support") Signed-off-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx93-11x11-evk.dts | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx93-11x11-evk.dts b/arch/arm64/boot/dts/freescale/imx93-11x11-evk.dts index d400d85f42a9..bd98eff4d685 100644 --- a/arch/arm64/boot/dts/freescale/imx93-11x11-evk.dts +++ b/arch/arm64/boot/dts/freescale/imx93-11x11-evk.dts @@ -296,7 +296,6 @@ vmmc-supply = <®_usdhc2_vmmc>; bus-width = <4>; status = "okay"; - no-sdio; no-mmc; }; From 8043832e2a123fd9372007a29192f2f3ba328cd6 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (IBM)" Date: Fri, 14 Jun 2024 11:05:43 +0300 Subject: [PATCH 378/778] memblock: use numa_valid_node() helper to check for invalid node ID Introduce numa_valid_node(nid) that verifies that nid is a valid node ID and use that instead of comparing nid parameter with either NUMA_NO_NODE or MAX_NUMNODES. This makes the checks for valid node IDs consistent and more robust and allows to get rid of multiple WARNings. Suggested-by: Linus Torvalds Signed-off-by: Mike Rapoport (IBM) --- include/linux/numa.h | 5 +++++ mm/memblock.c | 28 +++++++--------------------- 2 files changed, 12 insertions(+), 21 deletions(-) diff --git a/include/linux/numa.h b/include/linux/numa.h index 1d43371fafd2..eb19503604fe 100644 --- a/include/linux/numa.h +++ b/include/linux/numa.h @@ -15,6 +15,11 @@ #define NUMA_NO_NODE (-1) #define NUMA_NO_MEMBLK (-1) +static inline bool numa_valid_node(int nid) +{ + return nid >= 0 && nid < MAX_NUMNODES; +} + /* optionally keep NUMA memory info available post init */ #ifdef CONFIG_NUMA_KEEP_MEMINFO #define __initdata_or_meminfo diff --git a/mm/memblock.c b/mm/memblock.c index 08e9806b1cf9..e81fb68f7f88 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -754,7 +754,7 @@ bool __init_memblock memblock_validate_numa_coverage(unsigned long threshold_byt /* calculate lose page */ for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) { - if (nid == NUMA_NO_NODE) + if (!numa_valid_node(nid)) nr_pages += end_pfn - start_pfn; } @@ -1061,7 +1061,7 @@ static bool should_skip_region(struct memblock_type *type, return false; /* only memory regions are associated with nodes, check it */ - if (nid != NUMA_NO_NODE && nid != m_nid) + if (numa_valid_node(nid) && nid != m_nid) return true; /* skip hotpluggable memory regions if needed */ @@ -1118,10 +1118,6 @@ void __next_mem_range(u64 *idx, int nid, enum memblock_flags flags, int idx_a = *idx & 0xffffffff; int idx_b = *idx >> 32; - if (WARN_ONCE(nid == MAX_NUMNODES, - "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n")) - nid = NUMA_NO_NODE; - for (; idx_a < type_a->cnt; idx_a++) { struct memblock_region *m = &type_a->regions[idx_a]; @@ -1215,9 +1211,6 @@ void __init_memblock __next_mem_range_rev(u64 *idx, int nid, int idx_a = *idx & 0xffffffff; int idx_b = *idx >> 32; - if (WARN_ONCE(nid == MAX_NUMNODES, "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n")) - nid = NUMA_NO_NODE; - if (*idx == (u64)ULLONG_MAX) { idx_a = type_a->cnt - 1; if (type_b != NULL) @@ -1303,7 +1296,7 @@ void __init_memblock __next_mem_pfn_range(int *idx, int nid, if (PFN_UP(r->base) >= PFN_DOWN(r->base + r->size)) continue; - if (nid == MAX_NUMNODES || nid == r_nid) + if (!numa_valid_node(nid) || nid == r_nid) break; } if (*idx >= type->cnt) { @@ -1339,10 +1332,6 @@ int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size, int start_rgn, end_rgn; int i, ret; - if (WARN_ONCE(nid == MAX_NUMNODES, - "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n")) - nid = NUMA_NO_NODE; - ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn); if (ret) return ret; @@ -1452,9 +1441,6 @@ phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size, enum memblock_flags flags = choose_memblock_flags(); phys_addr_t found; - if (WARN_ONCE(nid == MAX_NUMNODES, "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n")) - nid = NUMA_NO_NODE; - if (!align) { /* Can't use WARNs this early in boot on powerpc */ dump_stack(); @@ -1467,7 +1453,7 @@ again: if (found && !memblock_reserve(found, size)) goto done; - if (nid != NUMA_NO_NODE && !exact_nid) { + if (numa_valid_node(nid) && !exact_nid) { found = memblock_find_in_range_node(size, align, start, end, NUMA_NO_NODE, flags); @@ -1987,7 +1973,7 @@ static void __init_memblock memblock_dump(struct memblock_type *type) end = base + size - 1; flags = rgn->flags; #ifdef CONFIG_NUMA - if (memblock_get_region_node(rgn) != MAX_NUMNODES) + if (numa_valid_node(memblock_get_region_node(rgn))) snprintf(nid_buf, sizeof(nid_buf), " on node %d", memblock_get_region_node(rgn)); #endif @@ -2181,7 +2167,7 @@ static void __init memmap_init_reserved_pages(void) start = region->base; end = start + region->size; - if (nid == NUMA_NO_NODE || nid >= MAX_NUMNODES) + if (!numa_valid_node(nid)) nid = early_pfn_to_nid(PFN_DOWN(start)); reserve_bootmem_region(start, end, nid); @@ -2272,7 +2258,7 @@ static int memblock_debug_show(struct seq_file *m, void *private) seq_printf(m, "%4d: ", i); seq_printf(m, "%pa..%pa ", ®->base, &end); - if (nid != MAX_NUMNODES) + if (numa_valid_node(nid)) seq_printf(m, "%4d ", nid); else seq_printf(m, "%4c ", 'x'); From b1fd0d1285b1eae8b99af36fb26ed2512b809af6 Mon Sep 17 00:00:00 2001 From: Ajrat Makhmutov Date: Sat, 15 Jun 2024 15:54:57 +0300 Subject: [PATCH 379/778] ALSA: hda/realtek: Enable headset mic on IdeaPad 330-17IKB 81DM Headset microphone do not work out of the box with this laptop. This quirk fixes it. Zihao Wang specified the wrong subsystem id in his patch. Link: https://lore.kernel.org/all/20220424084120.74125-1-wzhd@ustc.edu/ Fixes: 3b79954fd00d ("ALSA: hda/realtek: Add quirk for Yoga Duet 7 13ITL6 speakers") Signed-off-by: Ajrat Makhmutov Link: https://lore.kernel.org/r/20240615125457.167844-1-rauty@altlinux.org Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 79736c8782a3..a7594d46055d 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10506,7 +10506,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x3813, "Legion 7i 15IMHG05", ALC287_FIXUP_LEGION_15IMHG05_SPEAKERS), SND_PCI_QUIRK(0x17aa, 0x3818, "Lenovo C940 / Yoga Duet 7", ALC298_FIXUP_LENOVO_C940_DUET7), SND_PCI_QUIRK(0x17aa, 0x3819, "Lenovo 13s Gen2 ITL", ALC287_FIXUP_13S_GEN2_SPEAKERS), - SND_PCI_QUIRK(0x17aa, 0x3820, "Yoga Duet 7 13ITL6", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS), + SND_PCI_QUIRK(0x17aa, 0x3820, "IdeaPad 330-17IKB 81DM", ALC269_FIXUP_ASPIRE_HEADSET_MIC), SND_PCI_QUIRK(0x17aa, 0x3824, "Legion Y9000X 2020", ALC285_FIXUP_LEGION_Y9000X_SPEAKERS), SND_PCI_QUIRK(0x17aa, 0x3827, "Ideapad S740", ALC285_FIXUP_IDEAPAD_S740_COEF), SND_PCI_QUIRK(0x17aa, 0x3834, "Lenovo IdeaPad Slim 9i 14ITL5", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS), From ba437905b4fbf0ee1686c175069239a1cc292558 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Fri, 7 Jun 2024 16:33:00 -0500 Subject: [PATCH 380/778] RAS/AMD/ATL: Use system settings for MI300 DRAM to normalized address translation The currently used normalized address format is not applicable to all MI300 systems. This leads to incorrect results during address translation. Drop the fixed layout and construct the normalized address from system settings. Fixes: 87a612375307 ("RAS/AMD/ATL: Add MI300 DRAM to normalized address translation support") Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Cc: Link: https://lore.kernel.org/r/20240607-mi300-dram-xl-fix-v1-2-2f11547a178c@amd.com --- drivers/ras/amd/atl/internal.h | 2 +- drivers/ras/amd/atl/system.c | 2 +- drivers/ras/amd/atl/umc.c | 151 ++++++++++++++++++++++++--------- 3 files changed, 114 insertions(+), 41 deletions(-) diff --git a/drivers/ras/amd/atl/internal.h b/drivers/ras/amd/atl/internal.h index 5de69e0bb0f9..196c1c8b578c 100644 --- a/drivers/ras/amd/atl/internal.h +++ b/drivers/ras/amd/atl/internal.h @@ -224,7 +224,7 @@ int df_indirect_read_broadcast(u16 node, u8 func, u16 reg, u32 *lo); int get_df_system_info(void); int determine_node_id(struct addr_ctx *ctx, u8 socket_num, u8 die_num); -int get_addr_hash_mi300(void); +int get_umc_info_mi300(void); int get_address_map(struct addr_ctx *ctx); diff --git a/drivers/ras/amd/atl/system.c b/drivers/ras/amd/atl/system.c index 701349e84942..6979fa3d4fe2 100644 --- a/drivers/ras/amd/atl/system.c +++ b/drivers/ras/amd/atl/system.c @@ -127,7 +127,7 @@ static int df4_determine_df_rev(u32 reg) if (reg == DF_FUNC0_ID_MI300) { df_cfg.flags.heterogeneous = 1; - if (get_addr_hash_mi300()) + if (get_umc_info_mi300()) return -EINVAL; } diff --git a/drivers/ras/amd/atl/umc.c b/drivers/ras/amd/atl/umc.c index 5cb92330dc67..a1b4accf7b96 100644 --- a/drivers/ras/amd/atl/umc.c +++ b/drivers/ras/amd/atl/umc.c @@ -68,6 +68,8 @@ struct xor_bits { }; #define NUM_BANK_BITS 4 +#define NUM_COL_BITS 5 +#define NUM_SID_BITS 2 static struct { /* UMC::CH::AddrHashBank */ @@ -80,7 +82,22 @@ static struct { u8 bank_xor; } addr_hash; +static struct { + u8 bank[NUM_BANK_BITS]; + u8 col[NUM_COL_BITS]; + u8 sid[NUM_SID_BITS]; + u8 num_row_lo; + u8 num_row_hi; + u8 row_lo; + u8 row_hi; + u8 pc; +} bit_shifts; + #define MI300_UMC_CH_BASE 0x90000 +#define MI300_ADDR_CFG (MI300_UMC_CH_BASE + 0x30) +#define MI300_ADDR_SEL (MI300_UMC_CH_BASE + 0x40) +#define MI300_COL_SEL_LO (MI300_UMC_CH_BASE + 0x50) +#define MI300_ADDR_SEL_2 (MI300_UMC_CH_BASE + 0xA4) #define MI300_ADDR_HASH_BANK0 (MI300_UMC_CH_BASE + 0xC8) #define MI300_ADDR_HASH_PC (MI300_UMC_CH_BASE + 0xE0) #define MI300_ADDR_HASH_PC2 (MI300_UMC_CH_BASE + 0xE4) @@ -90,17 +107,42 @@ static struct { #define ADDR_HASH_ROW_XOR GENMASK(31, 14) #define ADDR_HASH_BANK_XOR GENMASK(5, 0) +#define ADDR_CFG_NUM_ROW_LO GENMASK(11, 8) +#define ADDR_CFG_NUM_ROW_HI GENMASK(15, 12) + +#define ADDR_SEL_BANK0 GENMASK(3, 0) +#define ADDR_SEL_BANK1 GENMASK(7, 4) +#define ADDR_SEL_BANK2 GENMASK(11, 8) +#define ADDR_SEL_BANK3 GENMASK(15, 12) +#define ADDR_SEL_BANK4 GENMASK(20, 16) +#define ADDR_SEL_ROW_LO GENMASK(27, 24) +#define ADDR_SEL_ROW_HI GENMASK(31, 28) + +#define COL_SEL_LO_COL0 GENMASK(3, 0) +#define COL_SEL_LO_COL1 GENMASK(7, 4) +#define COL_SEL_LO_COL2 GENMASK(11, 8) +#define COL_SEL_LO_COL3 GENMASK(15, 12) +#define COL_SEL_LO_COL4 GENMASK(19, 16) + +#define ADDR_SEL_2_BANK5 GENMASK(4, 0) +#define ADDR_SEL_2_CHAN GENMASK(15, 12) + /* * Read UMC::CH::AddrHash{Bank,PC,PC2} registers to get XOR bits used - * for hashing. Do this during module init, since the values will not - * change during run time. + * for hashing. + * + * Also, read UMC::CH::Addr{Cfg,Sel,Sel2} and UMC::CH:ColSelLo registers to + * get the values needed to reconstruct the normalized address. Apply additional + * offsets to the raw register values, as needed. + * + * Do this during module init, since the values will not change during run time. * * These registers are instantiated for each UMC across each AMD Node. * However, they should be identically programmed due to the fixed hardware * design of MI300 systems. So read the values from Node 0 UMC 0 and keep a * single global structure for simplicity. */ -int get_addr_hash_mi300(void) +int get_umc_info_mi300(void) { u32 temp; int ret; @@ -130,6 +172,44 @@ int get_addr_hash_mi300(void) addr_hash.bank_xor = FIELD_GET(ADDR_HASH_BANK_XOR, temp); + ret = amd_smn_read(0, MI300_ADDR_CFG, &temp); + if (ret) + return ret; + + bit_shifts.num_row_hi = FIELD_GET(ADDR_CFG_NUM_ROW_HI, temp); + bit_shifts.num_row_lo = 10 + FIELD_GET(ADDR_CFG_NUM_ROW_LO, temp); + + ret = amd_smn_read(0, MI300_ADDR_SEL, &temp); + if (ret) + return ret; + + bit_shifts.bank[0] = 5 + FIELD_GET(ADDR_SEL_BANK0, temp); + bit_shifts.bank[1] = 5 + FIELD_GET(ADDR_SEL_BANK1, temp); + bit_shifts.bank[2] = 5 + FIELD_GET(ADDR_SEL_BANK2, temp); + bit_shifts.bank[3] = 5 + FIELD_GET(ADDR_SEL_BANK3, temp); + /* Use BankBit4 for the SID0 position. */ + bit_shifts.sid[0] = 5 + FIELD_GET(ADDR_SEL_BANK4, temp); + bit_shifts.row_lo = 12 + FIELD_GET(ADDR_SEL_ROW_LO, temp); + bit_shifts.row_hi = 24 + FIELD_GET(ADDR_SEL_ROW_HI, temp); + + ret = amd_smn_read(0, MI300_COL_SEL_LO, &temp); + if (ret) + return ret; + + bit_shifts.col[0] = 2 + FIELD_GET(COL_SEL_LO_COL0, temp); + bit_shifts.col[1] = 2 + FIELD_GET(COL_SEL_LO_COL1, temp); + bit_shifts.col[2] = 2 + FIELD_GET(COL_SEL_LO_COL2, temp); + bit_shifts.col[3] = 2 + FIELD_GET(COL_SEL_LO_COL3, temp); + bit_shifts.col[4] = 2 + FIELD_GET(COL_SEL_LO_COL4, temp); + + ret = amd_smn_read(0, MI300_ADDR_SEL_2, &temp); + if (ret) + return ret; + + /* Use BankBit5 for the SID1 position. */ + bit_shifts.sid[1] = 5 + FIELD_GET(ADDR_SEL_2_BANK5, temp); + bit_shifts.pc = 5 + FIELD_GET(ADDR_SEL_2_CHAN, temp); + return 0; } @@ -146,9 +226,6 @@ int get_addr_hash_mi300(void) * The MCA address format is as follows: * MCA_ADDR[27:0] = {S[1:0], P[0], R[14:0], B[3:0], C[4:0], Z[0]} * - * The normalized address format is fixed in hardware and is as follows: - * NA[30:0] = {S[1:0], R[13:0], C4, B[1:0], B[3:2], C[3:2], P, C[1:0], Z[4:0]} - * * Additionally, the PC and Bank bits may be hashed. This must be accounted for before * reconstructing the normalized address. */ @@ -158,18 +235,10 @@ int get_addr_hash_mi300(void) #define MI300_UMC_MCA_PC BIT(25) #define MI300_UMC_MCA_SID GENMASK(27, 26) -#define MI300_NA_COL_1_0 GENMASK(6, 5) -#define MI300_NA_PC BIT(7) -#define MI300_NA_COL_3_2 GENMASK(9, 8) -#define MI300_NA_BANK_3_2 GENMASK(11, 10) -#define MI300_NA_BANK_1_0 GENMASK(13, 12) -#define MI300_NA_COL_4 BIT(14) -#define MI300_NA_ROW GENMASK(28, 15) -#define MI300_NA_SID GENMASK(30, 29) - static unsigned long convert_dram_to_norm_addr_mi300(unsigned long addr) { - u16 i, col, row, bank, pc, sid, temp; + u16 i, col, row, bank, pc, sid; + u32 temp; col = FIELD_GET(MI300_UMC_MCA_COL, addr); bank = FIELD_GET(MI300_UMC_MCA_BANK, addr); @@ -199,34 +268,38 @@ static unsigned long convert_dram_to_norm_addr_mi300(unsigned long addr) /* Reconstruct the normalized address starting with NA[4:0] = 0 */ addr = 0; - /* NA[6:5] = Column[1:0] */ - temp = col & 0x3; - addr |= FIELD_PREP(MI300_NA_COL_1_0, temp); + /* Column bits */ + for (i = 0; i < NUM_COL_BITS; i++) { + temp = (col >> i) & 0x1; + addr |= temp << bit_shifts.col[i]; + } - /* NA[7] = PC */ - addr |= FIELD_PREP(MI300_NA_PC, pc); + /* Bank bits */ + for (i = 0; i < NUM_BANK_BITS; i++) { + temp = (bank >> i) & 0x1; + addr |= temp << bit_shifts.bank[i]; + } - /* NA[9:8] = Column[3:2] */ - temp = (col >> 2) & 0x3; - addr |= FIELD_PREP(MI300_NA_COL_3_2, temp); + /* Row lo bits */ + for (i = 0; i < bit_shifts.num_row_lo; i++) { + temp = (row >> i) & 0x1; + addr |= temp << (i + bit_shifts.row_lo); + } - /* NA[11:10] = Bank[3:2] */ - temp = (bank >> 2) & 0x3; - addr |= FIELD_PREP(MI300_NA_BANK_3_2, temp); + /* Row hi bits */ + for (i = 0; i < bit_shifts.num_row_hi; i++) { + temp = (row >> (i + bit_shifts.num_row_lo)) & 0x1; + addr |= temp << (i + bit_shifts.row_hi); + } - /* NA[13:12] = Bank[1:0] */ - temp = bank & 0x3; - addr |= FIELD_PREP(MI300_NA_BANK_1_0, temp); + /* PC bit */ + addr |= pc << bit_shifts.pc; - /* NA[14] = Column[4] */ - temp = (col >> 4) & 0x1; - addr |= FIELD_PREP(MI300_NA_COL_4, temp); - - /* NA[28:15] = Row[13:0] */ - addr |= FIELD_PREP(MI300_NA_ROW, row); - - /* NA[30:29] = SID[1:0] */ - addr |= FIELD_PREP(MI300_NA_SID, sid); + /* SID bits */ + for (i = 0; i < NUM_SID_BITS; i++) { + temp = (sid >> i) & 0x1; + addr |= temp << bit_shifts.sid[i]; + } pr_debug("Addr=0x%016lx", addr); pr_debug("Bank=%u Row=%u Column=%u PC=%u SID=%u", bank, row, col, pc, sid); From 1a49509885cd984b6f63c91da612e258b8a89fc8 Mon Sep 17 00:00:00 2001 From: Gergely Meszaros Date: Sun, 16 Jun 2024 10:52:33 +0200 Subject: [PATCH 381/778] ALSA: hda/realtek: Add quirk for Lenovo Yoga Pro 7 14ARP8 Similarly to other Lenovo laptops these also have a dual speaker setup with a shared amplifier. The model also seems to have a conflicting PCI SSID with the codec SSID for the Legion Y9000X 2022 IAH7. Only tested on the Yoga Pro 7, as I don't have access to the other laptop. Signed-off-by: Gergely Meszaros Link: https://lore.kernel.org/r/20240616085233.16922-1-meszaros.gergely97@gmail.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index a7594d46055d..fd337fdd30f6 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -7520,6 +7520,7 @@ enum { ALC285_FIXUP_ASUS_GU605_SPI_SPEAKER2_TO_DAC1, ALC287_FIXUP_LENOVO_THKPAD_WH_ALC1318, ALC256_FIXUP_CHROME_BOOK, + ALC287_FIXUP_LENOVO_14ARP8_LEGION_IAH7, }; /* A special fixup for Lenovo C940 and Yoga Duet 7; @@ -7559,6 +7560,21 @@ static void alc287_fixup_lenovo_14irp8_duetitl(struct hda_codec *codec, __snd_hda_apply_fixup(codec, id, action, 0); } +/* Similar to above the Lenovo Yoga Pro 7 14ARP8 PCI SSID matches the codec SSID of the + Legion Y9000X 2022 IAH7.*/ +static void alc287_fixup_lenovo_14arp8_legion_iah7(struct hda_codec *codec, + const struct hda_fixup *fix, + int action) +{ + int id; + + if (codec->core.subsystem_id == 0x17aa386e) + id = ALC287_FIXUP_CS35L41_I2C_2; /* Legion Y9000X 2022 IAH7 */ + else + id = ALC285_FIXUP_SPEAKER2_TO_DAC1; /* Yoga Pro 7 14ARP8 */ + __snd_hda_apply_fixup(codec, id, action, 0); +} + /* Another hilarious PCI SSID conflict with Lenovo Legion Pro 7 16ARX8H (with * TAS2781 codec) and Legion 7i 16IAX7 (with CS35L41 codec); * we apply a corresponding fixup depending on the codec SSID instead @@ -9658,6 +9674,10 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK, }, + [ALC287_FIXUP_LENOVO_14ARP8_LEGION_IAH7] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc287_fixup_lenovo_14arp8_legion_iah7, + }, [ALC287_FIXUP_YOGA9_14IMH9_BASS_SPK_PIN] = { .type = HDA_FIXUP_FUNC, .v.func = alc287_fixup_yoga9_14iap7_bass_spk_pin, @@ -10520,7 +10540,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x3865, "Lenovo 13X", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x17aa, 0x3866, "Lenovo 13X", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x17aa, 0x3869, "Lenovo Yoga7 14IAL7", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN), - SND_PCI_QUIRK(0x17aa, 0x386e, "Legion Y9000X 2022 IAH7", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x17aa, 0x386e, "Legion Y9000X 2022 IAH7 / Yoga Pro 7 14ARP8", ALC287_FIXUP_LENOVO_14ARP8_LEGION_IAH7), SND_PCI_QUIRK(0x17aa, 0x386f, "Legion Pro 7/7i", ALC287_FIXUP_LENOVO_LEGION_7), SND_PCI_QUIRK(0x17aa, 0x3870, "Lenovo Yoga 7 14ARB7", ALC287_FIXUP_YOGA7_14ARB7_I2C), SND_PCI_QUIRK(0x17aa, 0x3877, "Lenovo Legion 7 Slim 16ARHA7", ALC287_FIXUP_CS35L41_I2C_2), From 67cc6125fb39902169707cb6277f010e56d4a40a Mon Sep 17 00:00:00 2001 From: Max Krummenacher Date: Mon, 3 Jun 2024 16:00:45 +0200 Subject: [PATCH 382/778] arm64: dts: freescale: imx8mm-verdin: enable hysteresis on slow input pin SODIMM 17 can be used as an edge triggered interrupt supplied from an off board source. Enable hysteresis on the pinmuxing to increase immunity against noise on the signal. Fixes: 60f01b5b5c7d ("arm64: dts: imx8mm-verdin: update iomux configuration") Signed-off-by: Max Krummenacher Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi index 0d9abca58821..98544741ce17 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi @@ -936,7 +936,7 @@ /* Verdin GPIO_9_DSI (pulled-up as active-low) */ pinctrl_gpio_9_dsi: gpio9dsigrp { fsl,pins = - ; /* SODIMM 17 */ + ; /* SODIMM 17 */ }; /* Verdin GPIO_10_DSI (pulled-up as active-low) */ From fcf2a9970ef587d8f358560c381ee6115a9108aa Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 7 Jun 2024 12:18:47 +0200 Subject: [PATCH 383/778] leds: class: Revert: "If no default trigger is given, make hw_control trigger the default trigger" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 66601a29bb23 ("leds: class: If no default trigger is given, make hw_control trigger the default trigger") causes ledtrig-netdev to get set as default trigger on various network LEDs. This causes users to hit a pre-existing AB-BA deadlock issue in ledtrig-netdev between the LED-trigger locks and the rtnl mutex, resulting in hung tasks in kernels >= 6.9. Solving the deadlock is non trivial, so for now revert the change to set the hw_control trigger as default trigger, so that ledtrig-netdev no longer gets activated automatically for various network LEDs. The netdev trigger is not needed because the network LEDs are usually under hw-control and the netdev trigger tries to leave things that way so setting it as the active trigger for the LED class device is a no-op. Fixes: 66601a29bb23 ("leds: class: If no default trigger is given, make hw_control trigger the default trigger") Reported-by: Genes Lists Closes: https://lore.kernel.org/all/9d189ec329cfe68ed68699f314e191a10d4b5eda.camel@sapience.com/ Reported-by: Johannes Wüller Closes: https://lore.kernel.org/lkml/e441605c-eaf2-4c2d-872b-d8e541f4cf60@gmail.com/ Cc: stable@vger.kernel.org Signed-off-by: Hans de Goede Reviewed-by: Andrew Lunn Acked-by: Lee Jones Signed-off-by: Linus Torvalds --- drivers/leds/led-class.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c index 24fcff682b24..ba1be15cfd8e 100644 --- a/drivers/leds/led-class.c +++ b/drivers/leds/led-class.c @@ -552,12 +552,6 @@ int led_classdev_register_ext(struct device *parent, led_init_core(led_cdev); #ifdef CONFIG_LEDS_TRIGGERS - /* - * If no default trigger was given and hw_control_trigger is set, - * make it the default trigger. - */ - if (!led_cdev->default_trigger && led_cdev->hw_control_trigger) - led_cdev->default_trigger = led_cdev->hw_control_trigger; led_trigger_set_default(led_cdev); #endif From 6ba59ff4227927d3a8530fc2973b80e94b54d58f Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 16 Jun 2024 13:40:16 -0700 Subject: [PATCH 384/778] Linux 6.10-rc4 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 925a75b8ba7d..14427547dc1e 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 10 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Baby Opossum Posse # *DOCUMENTATION* From dfd239a039b3581ca25f932e66b6e2c2bf77c798 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Fri, 14 Jun 2024 11:06:32 -0400 Subject: [PATCH 385/778] arm64: dts: imx8qm-mek: fix gpio number for reg_usdhc2_vmmc The gpio in "reg_usdhc2_vmmc" should be 7 instead of 19. Cc: stable@vger.kernel.org Fixes: 307fd14d4b14 ("arm64: dts: imx: add imx8qm mek support") Reviewed-by: Peng Fan Signed-off-by: Frank Li Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8qm-mek.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx8qm-mek.dts b/arch/arm64/boot/dts/freescale/imx8qm-mek.dts index 5c6b39c6933f..6e05361c1ffb 100644 --- a/arch/arm64/boot/dts/freescale/imx8qm-mek.dts +++ b/arch/arm64/boot/dts/freescale/imx8qm-mek.dts @@ -36,7 +36,7 @@ regulator-name = "SD1_SPWR"; regulator-min-microvolt = <3000000>; regulator-max-microvolt = <3000000>; - gpio = <&lsio_gpio4 19 GPIO_ACTIVE_HIGH>; + gpio = <&lsio_gpio4 7 GPIO_ACTIVE_HIGH>; enable-active-high; }; From 348a1983cf4cf5099fc398438a968443af4c9f65 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 13 Jun 2024 08:51:48 +1000 Subject: [PATCH 386/778] xfs: fix unlink vs cluster buffer instantiation race Luis has been reporting an assert failure when freeing an inode cluster during inode inactivation for a while. The assert looks like: XFS: Assertion failed: bp->b_flags & XBF_DONE, file: fs/xfs/xfs_trans_buf.c, line: 241 ------------[ cut here ]------------ kernel BUG at fs/xfs/xfs_message.c:102! Oops: invalid opcode: 0000 [#1] PREEMPT SMP KASAN NOPTI CPU: 4 PID: 73 Comm: kworker/4:1 Not tainted 6.10.0-rc1 #4 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2 04/01/2014 Workqueue: xfs-inodegc/loop5 xfs_inodegc_worker [xfs] RIP: 0010:assfail (fs/xfs/xfs_message.c:102) xfs RSP: 0018:ffff88810188f7f0 EFLAGS: 00010202 RAX: 0000000000000000 RBX: ffff88816e748250 RCX: 1ffffffff844b0e7 RDX: 0000000000000004 RSI: ffff88810188f558 RDI: ffffffffc2431fa0 RBP: 1ffff11020311f01 R08: 0000000042431f9f R09: ffffed1020311e9b R10: ffff88810188f4df R11: ffffffffac725d70 R12: ffff88817a3f4000 R13: ffff88812182f000 R14: ffff88810188f998 R15: ffffffffc2423f80 FS: 0000000000000000(0000) GS:ffff8881c8400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055fe9d0f109c CR3: 000000014426c002 CR4: 0000000000770ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe07f0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: xfs_trans_read_buf_map (fs/xfs/xfs_trans_buf.c:241 (discriminator 1)) xfs xfs_imap_to_bp (fs/xfs/xfs_trans.h:210 fs/xfs/libxfs/xfs_inode_buf.c:138) xfs xfs_inode_item_precommit (fs/xfs/xfs_inode_item.c:145) xfs xfs_trans_run_precommits (fs/xfs/xfs_trans.c:931) xfs __xfs_trans_commit (fs/xfs/xfs_trans.c:966) xfs xfs_inactive_ifree (fs/xfs/xfs_inode.c:1811) xfs xfs_inactive (fs/xfs/xfs_inode.c:2013) xfs xfs_inodegc_worker (fs/xfs/xfs_icache.c:1841 fs/xfs/xfs_icache.c:1886) xfs process_one_work (kernel/workqueue.c:3231) worker_thread (kernel/workqueue.c:3306 (discriminator 2) kernel/workqueue.c:3393 (discriminator 2)) kthread (kernel/kthread.c:389) ret_from_fork (arch/x86/kernel/process.c:147) ret_from_fork_asm (arch/x86/entry/entry_64.S:257) And occurs when the the inode precommit handlers is attempt to look up the inode cluster buffer to attach the inode for writeback. The trail of logic that I can reconstruct is as follows. 1. the inode is clean when inodegc runs, so it is not attached to a cluster buffer when precommit runs. 2. #1 implies the inode cluster buffer may be clean and not pinned by dirty inodes when inodegc runs. 3. #2 implies that the inode cluster buffer can be reclaimed by memory pressure at any time. 4. The assert failure implies that the cluster buffer was attached to the transaction, but not marked done. It had been accessed earlier in the transaction, but not marked done. 5. #4 implies the cluster buffer has been invalidated (i.e. marked stale). 6. #5 implies that the inode cluster buffer was instantiated uninitialised in the transaction in xfs_ifree_cluster(), which only instantiates the buffers to invalidate them and never marks them as done. Given factors 1-3, this issue is highly dependent on timing and environmental factors. Hence the issue can be very difficult to reproduce in some situations, but highly reliable in others. Luis has an environment where it can be reproduced easily by g/531 but, OTOH, I've reproduced it only once in ~2000 cycles of g/531. I think the fix is to have xfs_ifree_cluster() set the XBF_DONE flag on the cluster buffers, even though they may not be initialised. The reasons why I think this is safe are: 1. A buffer cache lookup hit on a XBF_STALE buffer will clear the XBF_DONE flag. Hence all future users of the buffer know they have to re-initialise the contents before use and mark it done themselves. 2. xfs_trans_binval() sets the XFS_BLI_STALE flag, which means the buffer remains locked until the journal commit completes and the buffer is unpinned. Hence once marked XBF_STALE/XFS_BLI_STALE by xfs_ifree_cluster(), the only context that can access the freed buffer is the currently running transaction. 3. #2 implies that future buffer lookups in the currently running transaction will hit the transaction match code and not the buffer cache. Hence XBF_STALE and XFS_BLI_STALE will not be cleared unless the transaction initialises and logs the buffer with valid contents again. At which point, the buffer will be marked marked XBF_DONE again, so having XBF_DONE already set on the stale buffer is a moot point. 4. #2 also implies that any concurrent access to that cluster buffer will block waiting on the buffer lock until the inode cluster has been fully freed and is no longer an active inode cluster buffer. 5. #4 + #1 means that any future user of the disk range of that buffer will always see the range of disk blocks covered by the cluster buffer as not done, and hence must initialise the contents themselves. 6. Setting XBF_DONE in xfs_ifree_cluster() then means the unlinked inode precommit code will see a XBF_DONE buffer from the transaction match as it expects. It can then attach the stale but newly dirtied inode to the stale but newly dirtied cluster buffer without unexpected failures. The stale buffer will then sail through the journal and do the right thing with the attached stale inode during unpin. Hence the fix is just one line of extra code. The explanation of why we have to set XBF_DONE in xfs_ifree_cluster, OTOH, is long and complex.... Fixes: 82842fee6e59 ("xfs: fix AGF vs inode cluster buffer deadlock") Signed-off-by: Dave Chinner Tested-by: Luis Chamberlain Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Chandan Babu R --- fs/xfs/xfs_inode.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 58fb7a5062e1..f36091e1e7f5 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2548,11 +2548,26 @@ xfs_ifree_cluster( * This buffer may not have been correctly initialised as we * didn't read it from disk. That's not important because we are * only using to mark the buffer as stale in the log, and to - * attach stale cached inodes on it. That means it will never be - * dispatched for IO. If it is, we want to know about it, and we - * want it to fail. We can acheive this by adding a write - * verifier to the buffer. + * attach stale cached inodes on it. + * + * For the inode that triggered the cluster freeing, this + * attachment may occur in xfs_inode_item_precommit() after we + * have marked this buffer stale. If this buffer was not in + * memory before xfs_ifree_cluster() started, it will not be + * marked XBF_DONE and this will cause problems later in + * xfs_inode_item_precommit() when we trip over a (stale, !done) + * buffer to attached to the transaction. + * + * Hence we have to mark the buffer as XFS_DONE here. This is + * safe because we are also marking the buffer as XBF_STALE and + * XFS_BLI_STALE. That means it will never be dispatched for + * IO and it won't be unlocked until the cluster freeing has + * been committed to the journal and the buffer unpinned. If it + * is written, we want to know about it, and we want it to + * fail. We can acheive this by adding a write verifier to the + * buffer. */ + bp->b_flags |= XBF_DONE; bp->b_ops = &xfs_inode_buf_ops; /* From 8da86499d4cd125a9561f9cd1de7fba99b0aecbf Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 29 May 2024 18:29:52 +0200 Subject: [PATCH 387/778] pinctrl: qcom: spmi-gpio: drop broken pm8008 support The SPMI GPIO driver assumes that the parent device is an SPMI device and accesses random data when backcasting the parent struct device pointer for non-SPMI devices. Fortunately this does not seem to cause any issues currently when the parent device is an I2C client like the PM8008, but this could change if the structures are reorganised (e.g. using structure randomisation). Notably the interrupt implementation is also broken for non-SPMI devices. Also note that the two GPIO pins on PM8008 are used for interrupts and reset so their practical use should be limited. Drop the broken GPIO support for PM8008 for now. Fixes: ea119e5a482a ("pinctrl: qcom-pmic-gpio: Add support for pm8008") Cc: stable@vger.kernel.org # 5.13 Reviewed-by: Bryan O'Donoghue Reviewed-by: Stephen Boyd Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20240529162958.18081-9-johan+linaro@kernel.org Signed-off-by: Linus Walleij --- drivers/pinctrl/qcom/pinctrl-spmi-gpio.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c b/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c index 4e80c7204e5f..4abd6f18bbef 100644 --- a/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c +++ b/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c @@ -1207,7 +1207,6 @@ static const struct of_device_id pmic_gpio_of_match[] = { { .compatible = "qcom,pm7325-gpio", .data = (void *) 10 }, { .compatible = "qcom,pm7550ba-gpio", .data = (void *) 8}, { .compatible = "qcom,pm8005-gpio", .data = (void *) 4 }, - { .compatible = "qcom,pm8008-gpio", .data = (void *) 2 }, { .compatible = "qcom,pm8019-gpio", .data = (void *) 6 }, /* pm8150 has 10 GPIOs with holes on 2, 5, 7 and 8 */ { .compatible = "qcom,pm8150-gpio", .data = (void *) 10 }, From 550dec8593cd0b32ffc90bb88edb190c04efbb48 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 29 May 2024 18:29:53 +0200 Subject: [PATCH 388/778] dt-bindings: pinctrl: qcom,pmic-gpio: drop pm8008 The binding for PM8008 is being reworked so that internal details like interrupts and register offsets are no longer described. This specifically also involves dropping the gpio child node and its compatible string which is no longer needed. Note that there are currently no users of the upstream binding and driver. Reviewed-by: Stephen Boyd Signed-off-by: Johan Hovold Reviewed-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20240529162958.18081-10-johan+linaro@kernel.org Signed-off-by: Linus Walleij --- Documentation/devicetree/bindings/pinctrl/qcom,pmic-gpio.yaml | 3 --- 1 file changed, 3 deletions(-) diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,pmic-gpio.yaml b/Documentation/devicetree/bindings/pinctrl/qcom,pmic-gpio.yaml index 50846a2d09c8..0bf2d9f093b5 100644 --- a/Documentation/devicetree/bindings/pinctrl/qcom,pmic-gpio.yaml +++ b/Documentation/devicetree/bindings/pinctrl/qcom,pmic-gpio.yaml @@ -29,7 +29,6 @@ properties: - qcom,pm7325-gpio - qcom,pm7550ba-gpio - qcom,pm8005-gpio - - qcom,pm8008-gpio - qcom,pm8018-gpio - qcom,pm8019-gpio - qcom,pm8038-gpio @@ -126,7 +125,6 @@ allOf: compatible: contains: enum: - - qcom,pm8008-gpio - qcom,pmi8950-gpio - qcom,pmr735d-gpio then: @@ -448,7 +446,6 @@ $defs: - gpio1-gpio10 for pm7325 - gpio1-gpio8 for pm7550ba - gpio1-gpio4 for pm8005 - - gpio1-gpio2 for pm8008 - gpio1-gpio6 for pm8018 - gpio1-gpio12 for pm8038 - gpio1-gpio40 for pm8058 From 03ecbe4bb61886cc7fff5b0efc3784e86ac16214 Mon Sep 17 00:00:00 2001 From: Thomas Richard Date: Mon, 3 Jun 2024 10:21:10 +0200 Subject: [PATCH 389/778] pinctrl: tps6594: add missing support for LP8764 PMIC Add missing support for LP8764 PMIC in the probe(). Issue detected with v6.10-rc1 (and reproduced with 6.10-rc2) using a TI J7200 EVM board. tps6594-pinctrl tps6594-pinctrl.8.auto: error -EINVAL: Couldn't register gpio_regmap driver tps6594-pinctrl tps6594-pinctrl.8.auto: probe with driver tps6594-pinctrl failed with error -22 Fixes: 208829715917 (pinctrl: pinctrl-tps6594: Add TPS65224 PMIC pinctrl and GPIO) Signed-off-by: Thomas Richard Link: https://lore.kernel.org/r/20240603082110.2104977-1-thomas.richard@bootlin.com Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-tps6594.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pinctrl/pinctrl-tps6594.c b/drivers/pinctrl/pinctrl-tps6594.c index 085047320853..5e7c7cf93445 100644 --- a/drivers/pinctrl/pinctrl-tps6594.c +++ b/drivers/pinctrl/pinctrl-tps6594.c @@ -486,6 +486,7 @@ static int tps6594_pinctrl_probe(struct platform_device *pdev) break; case TPS6593: case TPS6594: + case LP8764: pctrl_desc->pins = tps6594_pins; pctrl_desc->npins = ARRAY_SIZE(tps6594_pins); From 61fef29ad120d3077aeb0ef598d76822acb8c4cb Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Mon, 3 Jun 2024 20:19:37 +0200 Subject: [PATCH 390/778] pinctrl: bcm2835: Fix permissions of persist_gpio_outputs The commit 8ff05989b44e ("pinctrl: bcm2835: Make pin freeing behavior configurable") unintentionally made the module parameter persist_gpio_outputs changeable at runtime. So drop the write permission in order to make the freeing behavior predictable for user applications. Fixes: 8ff05989b44e ("pinctrl: bcm2835: Make pin freeing behavior configurable") Reported-by: Andy Shevchenko Closes: https://lore.kernel.org/linux-gpio/Zjk-C0nLmlynqLAE@surfacebook.localdomain/ Signed-off-by: Stefan Wahren Acked-by: Florian Fainelli Link: https://lore.kernel.org/r/20240603181938.76047-2-wahrenst@gmx.net Signed-off-by: Linus Walleij --- drivers/pinctrl/bcm/pinctrl-bcm2835.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/bcm/pinctrl-bcm2835.c b/drivers/pinctrl/bcm/pinctrl-bcm2835.c index 7178a38475cc..27fd54795791 100644 --- a/drivers/pinctrl/bcm/pinctrl-bcm2835.c +++ b/drivers/pinctrl/bcm/pinctrl-bcm2835.c @@ -245,7 +245,7 @@ static const char * const irq_type_names[] = { }; static bool persist_gpio_outputs; -module_param(persist_gpio_outputs, bool, 0644); +module_param(persist_gpio_outputs, bool, 0444); MODULE_PARM_DESC(persist_gpio_outputs, "Enable GPIO_OUT persistence when pin is freed"); static inline u32 bcm2835_gpio_rd(struct bcm2835_pinctrl *pc, unsigned reg) From adec57ff8e66aee632f3dd1f93787c13d112b7a1 Mon Sep 17 00:00:00 2001 From: Hagar Hemdan Date: Tue, 4 Jun 2024 08:58:38 +0000 Subject: [PATCH 391/778] pinctrl: fix deadlock in create_pinctrl() when handling -EPROBE_DEFER In create_pinctrl(), pinctrl_maps_mutex is acquired before calling add_setting(). If add_setting() returns -EPROBE_DEFER, create_pinctrl() calls pinctrl_free(). However, pinctrl_free() attempts to acquire pinctrl_maps_mutex, which is already held by create_pinctrl(), leading to a potential deadlock. This patch resolves the issue by releasing pinctrl_maps_mutex before calling pinctrl_free(), preventing the deadlock. This bug was discovered and resolved using Coverity Static Analysis Security Testing (SAST) by Synopsys, Inc. Fixes: 42fed7ba44e4 ("pinctrl: move subsystem mutex to pinctrl_dev struct") Suggested-by: Maximilian Heyne Signed-off-by: Hagar Hemdan Link: https://lore.kernel.org/r/20240604085838.3344-1-hagarhem@amazon.com Signed-off-by: Linus Walleij --- drivers/pinctrl/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c index cffeb869130d..f424a57f0013 100644 --- a/drivers/pinctrl/core.c +++ b/drivers/pinctrl/core.c @@ -1106,8 +1106,8 @@ static struct pinctrl *create_pinctrl(struct device *dev, * an -EPROBE_DEFER later, as that is the worst case. */ if (ret == -EPROBE_DEFER) { - pinctrl_free(p, false); mutex_unlock(&pinctrl_maps_mutex); + pinctrl_free(p, false); return ERR_PTR(ret); } } From e8448a6c817c2aa6c6af785b1d45678bd5977e8d Mon Sep 17 00:00:00 2001 From: Huang-Huang Bao Date: Thu, 6 Jun 2024 20:57:52 +0800 Subject: [PATCH 392/778] pinctrl: rockchip: fix pinmux bits for RK3328 GPIO2-B pins The pinmux bits for GPIO2-B0 to GPIO2-B6 actually have 2 bits width, correct the bank flag for GPIO2-B. The pinmux bits for GPIO2-B7 is recalculated so it remain unchanged. The pinmux bits for those pins are not explicitly specified in RK3328 TRM, however we can get hint from pad name and its correspinding IOMUX setting for pins in interface descriptions. The correspinding IOMIX settings for GPIO2-B0 to GPIO2-B6 can be found in the same row next to occurrences of following pad names in RK3328 TRM. GPIO2-B0: IO_SPIclkm0_GPIO2B0vccio5 GPIO2-B1: IO_SPItxdm0_GPIO2B1vccio5 GPIO2-B2: IO_SPIrxdm0_GPIO2B2vccio5 GPIO2-B3: IO_SPIcsn0m0_GPIO2B3vccio5 GPIO2-B4: IO_SPIcsn1m0_FLASHvol_sel_GPIO2B4vccio5 GPIO2-B5: IO_ I2C2sda_TSADCshut_GPIO2B5vccio5 GPIO2-B6: IO_ I2C2scl_GPIO2B6vccio5 This fix has been tested on NanoPi R2S for fixing confliting pinmux bits between GPIO2-B7 with GPIO2-B5. Signed-off-by: Huang-Huang Bao Reviewed-by: Heiko Stuebner Fixes: 3818e4a7678e ("pinctrl: rockchip: Add rk3328 pinctrl support") Link: https://lore.kernel.org/r/20240606125755.53778-2-i@eh5.me Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-rockchip.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/pinctrl/pinctrl-rockchip.c b/drivers/pinctrl/pinctrl-rockchip.c index 3bedf36a0019..78dcf4daccde 100644 --- a/drivers/pinctrl/pinctrl-rockchip.c +++ b/drivers/pinctrl/pinctrl-rockchip.c @@ -634,12 +634,6 @@ static struct rockchip_mux_recalced_data rk3308_mux_recalced_data[] = { static struct rockchip_mux_recalced_data rk3328_mux_recalced_data[] = { { - .num = 2, - .pin = 12, - .reg = 0x24, - .bit = 8, - .mask = 0x3 - }, { .num = 2, .pin = 15, .reg = 0x28, @@ -3763,7 +3757,7 @@ static struct rockchip_pin_bank rk3328_pin_banks[] = { PIN_BANK_IOMUX_FLAGS(0, 32, "gpio0", 0, 0, 0, 0), PIN_BANK_IOMUX_FLAGS(1, 32, "gpio1", 0, 0, 0, 0), PIN_BANK_IOMUX_FLAGS(2, 32, "gpio2", 0, - IOMUX_WIDTH_3BIT, + 0, IOMUX_WIDTH_3BIT, 0), PIN_BANK_IOMUX_FLAGS(3, 32, "gpio3", From 5ef6914e0bf578357b4c906ffe6b26e7eedb8ccf Mon Sep 17 00:00:00 2001 From: Huang-Huang Bao Date: Thu, 6 Jun 2024 20:57:53 +0800 Subject: [PATCH 393/778] pinctrl: rockchip: fix pinmux bits for RK3328 GPIO3-B pins The pinmux bits for GPIO3-B1 to GPIO3-B6 pins are not explicitly specified in RK3328 TRM, however we can get hint from pad name and its correspinding IOMUX setting for pins in interface descriptions. The correspinding IOMIX settings for these pins can be found in the same row next to occurrences of following pad names in RK3328 TRM. GPIO3-B1: IO_TSPd5m0_CIFdata5m0_GPIO3B1vccio6 GPIO3-B2: IO_TSPd6m0_CIFdata6m0_GPIO3B2vccio6 GPIO3-B3: IO_TSPd7m0_CIFdata7m0_GPIO3B3vccio6 GPIO3-B4: IO_CARDclkm0_GPIO3B4vccio6 GPIO3-B5: IO_CARDrstm0_GPIO3B5vccio6 GPIO3-B6: IO_CARDdetm0_GPIO3B6vccio6 Add pinmux data to rk3328_mux_recalced_data as mux register offset for these pins does not follow rockchip convention. Signed-off-by: Huang-Huang Bao Reviewed-by: Heiko Stuebner Fixes: 3818e4a7678e ("pinctrl: rockchip: Add rk3328 pinctrl support") Link: https://lore.kernel.org/r/20240606125755.53778-3-i@eh5.me Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-rockchip.c | 51 ++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/drivers/pinctrl/pinctrl-rockchip.c b/drivers/pinctrl/pinctrl-rockchip.c index 78dcf4daccde..23531ea0d088 100644 --- a/drivers/pinctrl/pinctrl-rockchip.c +++ b/drivers/pinctrl/pinctrl-rockchip.c @@ -634,17 +634,68 @@ static struct rockchip_mux_recalced_data rk3308_mux_recalced_data[] = { static struct rockchip_mux_recalced_data rk3328_mux_recalced_data[] = { { + /* gpio2_b7_sel */ .num = 2, .pin = 15, .reg = 0x28, .bit = 0, .mask = 0x7 }, { + /* gpio2_c7_sel */ .num = 2, .pin = 23, .reg = 0x30, .bit = 14, .mask = 0x3 + }, { + /* gpio3_b1_sel */ + .num = 3, + .pin = 9, + .reg = 0x44, + .bit = 2, + .mask = 0x3 + }, { + /* gpio3_b2_sel */ + .num = 3, + .pin = 10, + .reg = 0x44, + .bit = 4, + .mask = 0x3 + }, { + /* gpio3_b3_sel */ + .num = 3, + .pin = 11, + .reg = 0x44, + .bit = 6, + .mask = 0x3 + }, { + /* gpio3_b4_sel */ + .num = 3, + .pin = 12, + .reg = 0x44, + .bit = 8, + .mask = 0x3 + }, { + /* gpio3_b5_sel */ + .num = 3, + .pin = 13, + .reg = 0x44, + .bit = 10, + .mask = 0x3 + }, { + /* gpio3_b6_sel */ + .num = 3, + .pin = 14, + .reg = 0x44, + .bit = 12, + .mask = 0x3 + }, { + /* gpio3_b7_sel */ + .num = 3, + .pin = 15, + .reg = 0x44, + .bit = 14, + .mask = 0x3 }, }; From 01b4b1d1cec48ef4c26616c2fc4600b2c9fec05a Mon Sep 17 00:00:00 2001 From: Huang-Huang Bao Date: Thu, 6 Jun 2024 20:57:54 +0800 Subject: [PATCH 394/778] pinctrl: rockchip: use dedicated pinctrl type for RK3328 rk3328_pin_ctrl uses type of RK3288 which has a hack in rockchip_pinctrl_suspend and rockchip_pinctrl_resume to restore GPIO6-C6 at assume, the hack is not applicable to RK3328 as GPIO6 is not even exist in it. So use a dedicated pinctrl type to skip this hack. Fixes: 3818e4a7678e ("pinctrl: rockchip: Add rk3328 pinctrl support") Reviewed-by: Heiko Stuebner Signed-off-by: Huang-Huang Bao Link: https://lore.kernel.org/r/20240606125755.53778-4-i@eh5.me Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-rockchip.c | 5 ++++- drivers/pinctrl/pinctrl-rockchip.h | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/pinctrl-rockchip.c b/drivers/pinctrl/pinctrl-rockchip.c index 23531ea0d088..24ee88863ce3 100644 --- a/drivers/pinctrl/pinctrl-rockchip.c +++ b/drivers/pinctrl/pinctrl-rockchip.c @@ -2478,6 +2478,7 @@ static int rockchip_get_pull(struct rockchip_pin_bank *bank, int pin_num) case RK3188: case RK3288: case RK3308: + case RK3328: case RK3368: case RK3399: case RK3568: @@ -2536,6 +2537,7 @@ static int rockchip_set_pull(struct rockchip_pin_bank *bank, case RK3188: case RK3288: case RK3308: + case RK3328: case RK3368: case RK3399: case RK3568: @@ -2798,6 +2800,7 @@ static bool rockchip_pinconf_pull_valid(struct rockchip_pin_ctrl *ctrl, case RK3188: case RK3288: case RK3308: + case RK3328: case RK3368: case RK3399: case RK3568: @@ -3822,7 +3825,7 @@ static struct rockchip_pin_ctrl rk3328_pin_ctrl = { .pin_banks = rk3328_pin_banks, .nr_banks = ARRAY_SIZE(rk3328_pin_banks), .label = "RK3328-GPIO", - .type = RK3288, + .type = RK3328, .grf_mux_offset = 0x0, .iomux_recalced = rk3328_mux_recalced_data, .niomux_recalced = ARRAY_SIZE(rk3328_mux_recalced_data), diff --git a/drivers/pinctrl/pinctrl-rockchip.h b/drivers/pinctrl/pinctrl-rockchip.h index 4759f336941e..849266f8b191 100644 --- a/drivers/pinctrl/pinctrl-rockchip.h +++ b/drivers/pinctrl/pinctrl-rockchip.h @@ -193,6 +193,7 @@ enum rockchip_pinctrl_type { RK3188, RK3288, RK3308, + RK3328, RK3368, RK3399, RK3568, From 4ea4d4808e342ddf89ba24b93ffa2057005aaced Mon Sep 17 00:00:00 2001 From: Huang-Huang Bao Date: Thu, 6 Jun 2024 20:57:55 +0800 Subject: [PATCH 395/778] pinctrl: rockchip: fix pinmux reset in rockchip_pmx_set rockchip_pmx_set reset all pinmuxs in group to 0 in the case of error, add missing bank data retrieval in that code to avoid setting mux on unexpected pins. Fixes: 14797189b35e ("pinctrl: rockchip: add return value to rockchip_set_mux") Reviewed-by: Heiko Stuebner Signed-off-by: Huang-Huang Bao Link: https://lore.kernel.org/r/20240606125755.53778-5-i@eh5.me Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-rockchip.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/pinctrl-rockchip.c b/drivers/pinctrl/pinctrl-rockchip.c index 24ee88863ce3..3f56991f5b89 100644 --- a/drivers/pinctrl/pinctrl-rockchip.c +++ b/drivers/pinctrl/pinctrl-rockchip.c @@ -2751,8 +2751,10 @@ static int rockchip_pmx_set(struct pinctrl_dev *pctldev, unsigned selector, if (ret) { /* revert the already done pin settings */ - for (cnt--; cnt >= 0; cnt--) + for (cnt--; cnt >= 0; cnt--) { + bank = pin_to_bank(info, pins[cnt]); rockchip_set_mux(bank, pins[cnt] - bank->pin_base, 0); + } return ret; } From 49cc17967be95d64606d5684416ee51eec35e84a Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 14 Jun 2024 17:23:11 +0300 Subject: [PATCH 396/778] drm/i915/mso: using joiner is not possible with eDP MSO MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's not possible to use the joiner at the same time with eDP MSO. When a panel needs MSO, it's not optional, so MSO trumps joiner. v3: Only change intel_dp_has_joiner(), leave debugfs alone (Ville) Fixes: bc71194e8897 ("drm/i915/edp: enable eDP MSO during link training") Cc: # v5.13+ Cc: Ville Syrjala Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/1668 Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20240614142311.589089-1-jani.nikula@intel.com Signed-off-by: Jani Nikula (cherry picked from commit 8b5a92ca24eb96bb71e2a55e352687487d87687f) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_dp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index e05e25cd4a94..5b3b6ae1e3d7 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -442,6 +442,10 @@ bool intel_dp_has_bigjoiner(struct intel_dp *intel_dp) struct intel_encoder *encoder = &intel_dig_port->base; struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + /* eDP MSO is not compatible with joiner */ + if (intel_dp->mso_link_count) + return false; + return DISPLAY_VER(dev_priv) >= 12 || (DISPLAY_VER(dev_priv) == 11 && encoder->port != PORT_A); From 8c4d6945fe5bd04ff847c3c788abd34ca354ecee Mon Sep 17 00:00:00 2001 From: Alexey Makhalov Date: Sat, 15 Jun 2024 18:25:10 -0700 Subject: [PATCH 397/778] drm/vmwgfx: Fix missing HYPERVISOR_GUEST dependency VMWARE_HYPERCALL alternative will not work as intended without VMware guest code initialization. [ bp: note that this doesn't reproduce with newer gccs so it must be something gcc-9-specific. ] Closes: https://lore.kernel.org/oe-kbuild-all/202406152104.FxakP1MB-lkp@intel.com/ Reported-by: kernel test robot Signed-off-by: Alexey Makhalov Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20240616012511.198243-1-alexey.makhalov@broadcom.com --- drivers/gpu/drm/vmwgfx/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vmwgfx/Kconfig b/drivers/gpu/drm/vmwgfx/Kconfig index faddae3d6ac2..6f1ac940cbae 100644 --- a/drivers/gpu/drm/vmwgfx/Kconfig +++ b/drivers/gpu/drm/vmwgfx/Kconfig @@ -2,7 +2,7 @@ config DRM_VMWGFX tristate "DRM driver for VMware Virtual GPU" depends on DRM && PCI && MMU - depends on X86 || ARM64 + depends on (X86 && HYPERVISOR_GUEST) || ARM64 select DRM_TTM select DRM_TTM_HELPER select MAPPING_DIRTY_HELPERS From 0b9130247f3b6a1122478471ff0e014ea96bb735 Mon Sep 17 00:00:00 2001 From: Gavrilov Ilia Date: Thu, 13 Jun 2024 08:23:00 +0000 Subject: [PATCH 398/778] netrom: Fix a memory leak in nr_heartbeat_expiry() syzbot reported a memory leak in nr_create() [0]. Commit 409db27e3a2e ("netrom: Fix use-after-free of a listening socket.") added sock_hold() to the nr_heartbeat_expiry() function, where a) a socket has a SOCK_DESTROY flag or b) a listening socket has a SOCK_DEAD flag. But in the case "a," when the SOCK_DESTROY flag is set, the file descriptor has already been closed and the nr_release() function has been called. So it makes no sense to hold the reference count because no one will call another nr_destroy_socket() and put it as in the case "b." nr_connect nr_establish_data_link nr_start_heartbeat nr_release switch (nr->state) case NR_STATE_3 nr->state = NR_STATE_2 sock_set_flag(sk, SOCK_DESTROY); nr_rx_frame nr_process_rx_frame switch (nr->state) case NR_STATE_2 nr_state2_machine() nr_disconnect() nr_sk(sk)->state = NR_STATE_0 sock_set_flag(sk, SOCK_DEAD) nr_heartbeat_expiry switch (nr->state) case NR_STATE_0 if (sock_flag(sk, SOCK_DESTROY) || (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_DEAD))) sock_hold() // ( !!! ) nr_destroy_socket() To fix the memory leak, let's call sock_hold() only for a listening socket. Found by InfoTeCS on behalf of Linux Verification Center (linuxtesting.org) with Syzkaller. [0]: https://syzkaller.appspot.com/bug?extid=d327a1f3b12e1e206c16 Reported-by: syzbot+d327a1f3b12e1e206c16@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=d327a1f3b12e1e206c16 Fixes: 409db27e3a2e ("netrom: Fix use-after-free of a listening socket.") Signed-off-by: Gavrilov Ilia Signed-off-by: David S. Miller --- net/netrom/nr_timer.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netrom/nr_timer.c b/net/netrom/nr_timer.c index 4e7c968cde2d..5e3ca068f04e 100644 --- a/net/netrom/nr_timer.c +++ b/net/netrom/nr_timer.c @@ -121,7 +121,8 @@ static void nr_heartbeat_expiry(struct timer_list *t) is accepted() it isn't 'dead' so doesn't get removed. */ if (sock_flag(sk, SOCK_DESTROY) || (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_DEAD))) { - sock_hold(sk); + if (sk->sk_state == TCP_LISTEN) + sock_hold(sk); bh_unlock_sock(sk); nr_destroy_socket(sk); goto out; From 932d8476399f622aa0767a4a0a9e78e5341dc0e1 Mon Sep 17 00:00:00 2001 From: Yuntao Wang Date: Wed, 15 May 2024 21:45:54 +0800 Subject: [PATCH 399/778] cpu/hotplug: Fix dynstate assignment in __cpuhp_setup_state_cpuslocked() Commit 4205e4786d0b ("cpu/hotplug: Provide dynamic range for prepare stage") added a dynamic range for the prepare states, but did not handle the assignment of the dynstate variable in __cpuhp_setup_state_cpuslocked(). This causes the corresponding startup callback not to be invoked when calling __cpuhp_setup_state_cpuslocked() with the CPUHP_BP_PREPARE_DYN parameter, even though it should be. Currently, the users of __cpuhp_setup_state_cpuslocked(), for one reason or another, have not triggered this bug. Fixes: 4205e4786d0b ("cpu/hotplug: Provide dynamic range for prepare stage") Signed-off-by: Yuntao Wang Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240515134554.427071-1-ytcoode@gmail.com --- kernel/cpu.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/cpu.c b/kernel/cpu.c index 563877d6c28b..74cfdb66a9bd 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -2446,7 +2446,7 @@ EXPORT_SYMBOL_GPL(__cpuhp_state_add_instance); * The caller needs to hold cpus read locked while calling this function. * Return: * On success: - * Positive state number if @state is CPUHP_AP_ONLINE_DYN; + * Positive state number if @state is CPUHP_AP_ONLINE_DYN or CPUHP_BP_PREPARE_DYN; * 0 for all other states * On failure: proper (negative) error code */ @@ -2469,7 +2469,7 @@ int __cpuhp_setup_state_cpuslocked(enum cpuhp_state state, ret = cpuhp_store_callbacks(state, name, startup, teardown, multi_instance); - dynstate = state == CPUHP_AP_ONLINE_DYN; + dynstate = state == CPUHP_AP_ONLINE_DYN || state == CPUHP_BP_PREPARE_DYN; if (ret > 0 && dynstate) { state = ret; ret = 0; @@ -2500,8 +2500,8 @@ int __cpuhp_setup_state_cpuslocked(enum cpuhp_state state, out: mutex_unlock(&cpuhp_state_mutex); /* - * If the requested state is CPUHP_AP_ONLINE_DYN, return the - * dynamically allocated state in case of success. + * If the requested state is CPUHP_AP_ONLINE_DYN or CPUHP_BP_PREPARE_DYN, + * return the dynamically allocated state in case of success. */ if (!ret && dynstate) return state; From 8e948c365d9c10b685d1deb946bd833d6a9b43e0 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 17 Jun 2024 07:54:08 -0400 Subject: [PATCH 400/778] nfsd: fix oops when reading pool_stats before server is started Sourbh reported an oops that is triggerable by trying to read the pool_stats procfile before nfsd had been started. Move the check for a NULL serv in svc_pool_stats_start above the mutex acquisition, and fix the stop routine not to unlock the mutex if there is no serv yet. Fixes: 7b207ccd9833 ("svc: don't hold reference for poolstats, only mutex.") Reported-by: Sourabh Jain Signed-off-by: Jeff Layton Tested-by: Sourabh Jain Signed-off-by: Chuck Lever --- net/sunrpc/svc_xprt.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index dd86d7f1e97e..49a3bea33f9d 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -1421,12 +1421,13 @@ static void *svc_pool_stats_start(struct seq_file *m, loff_t *pos) dprintk("svc_pool_stats_start, *pidx=%u\n", pidx); + if (!si->serv) + return NULL; + mutex_lock(si->mutex); if (!pidx) return SEQ_START_TOKEN; - if (!si->serv) - return NULL; return pidx > si->serv->sv_nrpools ? NULL : &si->serv->sv_pools[pidx - 1]; } @@ -1458,7 +1459,8 @@ static void svc_pool_stats_stop(struct seq_file *m, void *p) { struct svc_info *si = m->private; - mutex_unlock(si->mutex); + if (si->serv) + mutex_unlock(si->mutex); } static int svc_pool_stats_show(struct seq_file *m, void *p) From da2c8fef130ec7197e2f91c7ed70a8c5bede4bea Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 17 Jun 2024 16:26:26 +0200 Subject: [PATCH 401/778] NFSD: grab nfsd_mutex in nfsd_nl_rpc_status_get_dumpit() Grab nfsd_mutex lock in nfsd_nl_rpc_status_get_dumpit routine and remove nfsd_nl_rpc_status_get_start() and nfsd_nl_rpc_status_get_done(). This patch fix the syzbot log reported below: INFO: task syz-executor.1:17770 blocked for more than 143 seconds. Not tainted 6.10.0-rc3-syzkaller-00022-gcea2a26553ac #0 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. task:syz-executor.1 state:D stack:23800 pid:17770 tgid:17767 ppid:11381 flags:0x00000006 Call Trace: context_switch kernel/sched/core.c:5408 [inline] __schedule+0x17e8/0x4a20 kernel/sched/core.c:6745 __schedule_loop kernel/sched/core.c:6822 [inline] schedule+0x14b/0x320 kernel/sched/core.c:6837 schedule_preempt_disabled+0x13/0x30 kernel/sched/core.c:6894 __mutex_lock_common kernel/locking/mutex.c:684 [inline] __mutex_lock+0x6a4/0xd70 kernel/locking/mutex.c:752 nfsd_nl_listener_get_doit+0x115/0x5d0 fs/nfsd/nfsctl.c:2124 genl_family_rcv_msg_doit net/netlink/genetlink.c:1115 [inline] genl_family_rcv_msg net/netlink/genetlink.c:1195 [inline] genl_rcv_msg+0xb16/0xec0 net/netlink/genetlink.c:1210 netlink_rcv_skb+0x1e5/0x430 net/netlink/af_netlink.c:2564 genl_rcv+0x28/0x40 net/netlink/genetlink.c:1219 netlink_unicast_kernel net/netlink/af_netlink.c:1335 [inline] netlink_unicast+0x7ec/0x980 net/netlink/af_netlink.c:1361 netlink_sendmsg+0x8db/0xcb0 net/netlink/af_netlink.c:1905 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x223/0x270 net/socket.c:745 ____sys_sendmsg+0x525/0x7d0 net/socket.c:2585 ___sys_sendmsg net/socket.c:2639 [inline] __sys_sendmsg+0x2b0/0x3a0 net/socket.c:2668 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f24ed27cea9 RSP: 002b:00007f24ee0080c8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007f24ed3b3f80 RCX: 00007f24ed27cea9 RDX: 0000000000000000 RSI: 0000000020000100 RDI: 0000000000000005 RBP: 00007f24ed2ebff4 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 Fixes: 1bd773b4f0c9 ("nfsd: hold nfsd_mutex across entire netlink operation") Fixes: bd9d6a3efa97 ("NFSD: add rpc_status netlink support") Signed-off-by: Lorenzo Bianconi Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- Documentation/netlink/specs/nfsd.yaml | 2 -- fs/nfsd/netlink.c | 2 -- fs/nfsd/netlink.h | 3 -- fs/nfsd/nfsctl.c | 48 ++++++--------------------- 4 files changed, 11 insertions(+), 44 deletions(-) diff --git a/Documentation/netlink/specs/nfsd.yaml b/Documentation/netlink/specs/nfsd.yaml index d21234097167..6bda7a467301 100644 --- a/Documentation/netlink/specs/nfsd.yaml +++ b/Documentation/netlink/specs/nfsd.yaml @@ -123,8 +123,6 @@ operations: doc: dump pending nfsd rpc attribute-set: rpc-status dump: - pre: nfsd-nl-rpc-status-get-start - post: nfsd-nl-rpc-status-get-done reply: attributes: - xid diff --git a/fs/nfsd/netlink.c b/fs/nfsd/netlink.c index 62d2586d9902..529a75ecf22e 100644 --- a/fs/nfsd/netlink.c +++ b/fs/nfsd/netlink.c @@ -44,9 +44,7 @@ static const struct nla_policy nfsd_listener_set_nl_policy[NFSD_A_SERVER_SOCK_AD static const struct genl_split_ops nfsd_nl_ops[] = { { .cmd = NFSD_CMD_RPC_STATUS_GET, - .start = nfsd_nl_rpc_status_get_start, .dumpit = nfsd_nl_rpc_status_get_dumpit, - .done = nfsd_nl_rpc_status_get_done, .flags = GENL_CMD_CAP_DUMP, }, { diff --git a/fs/nfsd/netlink.h b/fs/nfsd/netlink.h index e3724637d64d..2e132ef328f8 100644 --- a/fs/nfsd/netlink.h +++ b/fs/nfsd/netlink.h @@ -15,9 +15,6 @@ extern const struct nla_policy nfsd_sock_nl_policy[NFSD_A_SOCK_TRANSPORT_NAME + 1]; extern const struct nla_policy nfsd_version_nl_policy[NFSD_A_VERSION_ENABLED + 1]; -int nfsd_nl_rpc_status_get_start(struct netlink_callback *cb); -int nfsd_nl_rpc_status_get_done(struct netlink_callback *cb); - int nfsd_nl_rpc_status_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); int nfsd_nl_threads_set_doit(struct sk_buff *skb, struct genl_info *info); diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 202140df8f82..533b65057e18 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1460,28 +1460,6 @@ static int create_proc_exports_entry(void) unsigned int nfsd_net_id; -/** - * nfsd_nl_rpc_status_get_start - Prepare rpc_status_get dumpit - * @cb: netlink metadata and command arguments - * - * Return values: - * %0: The rpc_status_get command may proceed - * %-ENODEV: There is no NFSD running in this namespace - */ -int nfsd_nl_rpc_status_get_start(struct netlink_callback *cb) -{ - struct nfsd_net *nn = net_generic(sock_net(cb->skb->sk), nfsd_net_id); - int ret = -ENODEV; - - mutex_lock(&nfsd_mutex); - if (nn->nfsd_serv) - ret = 0; - else - mutex_unlock(&nfsd_mutex); - - return ret; -} - static int nfsd_genl_rpc_status_compose_msg(struct sk_buff *skb, struct netlink_callback *cb, struct nfsd_genl_rqstp *rqstp) @@ -1558,8 +1536,16 @@ static int nfsd_genl_rpc_status_compose_msg(struct sk_buff *skb, int nfsd_nl_rpc_status_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { - struct nfsd_net *nn = net_generic(sock_net(skb->sk), nfsd_net_id); int i, ret, rqstp_index = 0; + struct nfsd_net *nn; + + mutex_lock(&nfsd_mutex); + + nn = net_generic(sock_net(skb->sk), nfsd_net_id); + if (!nn->nfsd_serv) { + ret = -ENODEV; + goto out_unlock; + } rcu_read_lock(); @@ -1636,22 +1622,10 @@ int nfsd_nl_rpc_status_get_dumpit(struct sk_buff *skb, ret = skb->len; out: rcu_read_unlock(); - - return ret; -} - -/** - * nfsd_nl_rpc_status_get_done - rpc_status_get dumpit post-processing - * @cb: netlink metadata and command arguments - * - * Return values: - * %0: Success - */ -int nfsd_nl_rpc_status_get_done(struct netlink_callback *cb) -{ +out_unlock: mutex_unlock(&nfsd_mutex); - return 0; + return ret; } /** From 380d5f89a4815ff88461a45de2fb6f28533df708 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sat, 15 Jun 2024 10:46:26 -0700 Subject: [PATCH 402/778] bpf: Add missed var_off setting in set_sext32_default_val() Zac reported a verification failure and Alexei reproduced the issue with a simple reproducer ([1]). The verification failure is due to missed setting for var_off. The following is the reproducer in [1]: 0: R1=ctx() R10=fp0 0: (71) r3 = *(u8 *)(r10 -387) ; R3_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=255,var_off=(0x0; 0xff)) R10=fp0 1: (bc) w7 = (s8)w3 ; R3_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=255,var_off=(0x0; 0xff)) R7_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=127,var_off=(0x0; 0x7f)) 2: (36) if w7 >= 0x2533823b goto pc-3 mark_precise: frame0: last_idx 2 first_idx 0 subseq_idx -1 mark_precise: frame0: regs=r7 stack= before 1: (bc) w7 = (s8)w3 mark_precise: frame0: regs=r3 stack= before 0: (71) r3 = *(u8 *)(r10 -387) 2: R7_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=127,var_off=(0x0; 0x7f)) 3: (b4) w0 = 0 ; R0_w=0 4: (95) exit Note that after insn 1, the var_off for R7 is (0x0; 0x7f). This is not correct since upper 24 bits of w7 could be 0 or 1. So correct var_off should be (0x0; 0xffffffff). Missing var_off setting in set_sext32_default_val() caused later incorrect analysis in zext_32_to_64(dst_reg) and reg_bounds_sync(dst_reg). To fix the issue, set var_off correctly in set_sext32_default_val(). The correct reg state after insn 1 becomes: 1: (bc) w7 = (s8)w3 ; R3_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=255,var_off=(0x0; 0xff)) R7_w=scalar(smin=0,smax=umax=0xffffffff,smin32=-128,smax32=127,var_off=(0x0; 0xffffffff)) and at insn 2, the verifier correctly determines either branch is possible. [1] https://lore.kernel.org/bpf/CAADnVQLPU0Shz7dWV4bn2BgtGdxN3uFHPeobGBA72tpg5Xoykw@mail.gmail.com/ Fixes: 8100928c8814 ("bpf: Support new sign-extension mov insns") Reported-by: Zac Ecob Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20240615174626.3994813-1-yonghong.song@linux.dev Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 010cfee7ffe9..904ef5a03cf5 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6236,6 +6236,7 @@ static void set_sext32_default_val(struct bpf_reg_state *reg, int size) } reg->u32_min_value = 0; reg->u32_max_value = U32_MAX; + reg->var_off = tnum_subreg(tnum_unknown); } static void coerce_subreg_to_size_sx(struct bpf_reg_state *reg, int size) From 44b7f7151dfc2e0947f39ed4b9bc4b0c2ccd46fc Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sat, 15 Jun 2024 10:46:32 -0700 Subject: [PATCH 403/778] bpf: Add missed var_off setting in coerce_subreg_to_size_sx() In coerce_subreg_to_size_sx(), for the case where upper sign extension bits are the same for smax32 and smin32 values, we missed to setup properly. This is especially problematic if both smax32 and smin32's sign extension bits are 1. The following is a simple example illustrating the inconsistent verifier states due to missed var_off: 0: (85) call bpf_get_prandom_u32#7 ; R0_w=scalar() 1: (bf) r3 = r0 ; R0_w=scalar(id=1) R3_w=scalar(id=1) 2: (57) r3 &= 15 ; R3_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=15,var_off=(0x0; 0xf)) 3: (47) r3 |= 128 ; R3_w=scalar(smin=umin=smin32=umin32=128,smax=umax=smax32=umax32=143,var_off=(0x80; 0xf)) 4: (bc) w7 = (s8)w3 REG INVARIANTS VIOLATION (alu): range bounds violation u64=[0xffffff80, 0x8f] s64=[0xffffff80, 0x8f] u32=[0xffffff80, 0x8f] s32=[0x80, 0xffffff8f] var_off=(0x80, 0xf) The var_off=(0x80, 0xf) is not correct, and the correct one should be var_off=(0xffffff80; 0xf) since from insn 3, we know that at insn 4, the sign extension bits will be 1. This patch fixed this issue by setting var_off properly. Fixes: 8100928c8814 ("bpf: Support new sign-extension mov insns") Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20240615174632.3995278-1-yonghong.song@linux.dev Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 904ef5a03cf5..e0a398a97d32 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6281,6 +6281,7 @@ static void coerce_subreg_to_size_sx(struct bpf_reg_state *reg, int size) reg->s32_max_value = s32_max; reg->u32_min_value = (u32)s32_min; reg->u32_max_value = (u32)s32_max; + reg->var_off = tnum_subreg(tnum_range(s32_min, s32_max)); return; } From a62293c33b058415237c55058a6d20de313a2e61 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sat, 15 Jun 2024 10:46:37 -0700 Subject: [PATCH 404/778] selftests/bpf: Add a few tests to cover Add three unit tests in verifier_movsx.c to cover cases where missed var_off setting can cause unexpected verification success or failure. Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20240615174637.3995589-1-yonghong.song@linux.dev Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/progs/verifier_movsx.c | 63 +++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/tools/testing/selftests/bpf/progs/verifier_movsx.c b/tools/testing/selftests/bpf/progs/verifier_movsx.c index cbb9d6714f53..028ec855587b 100644 --- a/tools/testing/selftests/bpf/progs/verifier_movsx.c +++ b/tools/testing/selftests/bpf/progs/verifier_movsx.c @@ -224,6 +224,69 @@ l0_%=: \ : __clobber_all); } +SEC("socket") +__description("MOV32SX, S8, var_off u32_max") +__failure __msg("infinite loop detected") +__failure_unpriv __msg_unpriv("back-edge from insn 2 to 0") +__naked void mov64sx_s32_varoff_1(void) +{ + asm volatile (" \ +l0_%=: \ + r3 = *(u8 *)(r10 -387); \ + w7 = (s8)w3; \ + if w7 >= 0x2533823b goto l0_%=; \ + w0 = 0; \ + exit; \ +" : + : + : __clobber_all); +} + +SEC("socket") +__description("MOV32SX, S8, var_off not u32_max, positive after s8 extension") +__success __retval(0) +__failure_unpriv __msg_unpriv("frame pointer is read only") +__naked void mov64sx_s32_varoff_2(void) +{ + asm volatile (" \ + call %[bpf_get_prandom_u32]; \ + r3 = r0; \ + r3 &= 0xf; \ + w7 = (s8)w3; \ + if w7 s>= 16 goto l0_%=; \ + w0 = 0; \ + exit; \ +l0_%=: \ + r10 = 1; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__description("MOV32SX, S8, var_off not u32_max, negative after s8 extension") +__success __retval(0) +__failure_unpriv __msg_unpriv("frame pointer is read only") +__naked void mov64sx_s32_varoff_3(void) +{ + asm volatile (" \ + call %[bpf_get_prandom_u32]; \ + r3 = r0; \ + r3 &= 0xf; \ + r3 |= 0x80; \ + w7 = (s8)w3; \ + if w7 s>= -5 goto l0_%=; \ + w0 = 0; \ + exit; \ +l0_%=: \ + r10 = 1; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + #else SEC("socket") From 1ab1a422c0daedbd76f9f25c297eca48986ddea0 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 17 Jun 2024 11:13:01 -0700 Subject: [PATCH 405/778] MAINTAINERS: Update entries for Kees Cook Update current email address for Kees Cook in the MAINTAINER file to match the change from commit 4e173c825b19 ("mailmap: update entry for Kees Cook"). Link: https://lore.kernel.org/r/20240617181257.work.206-kees@kernel.org Signed-off-by: Kees Cook --- MAINTAINERS | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 8754ac2c259d..f601a2fd1ebf 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5296,7 +5296,7 @@ F: drivers/infiniband/hw/usnic/ CLANG CONTROL FLOW INTEGRITY SUPPORT M: Sami Tolvanen -M: Kees Cook +M: Kees Cook R: Nathan Chancellor L: llvm@lists.linux.dev S: Supported @@ -8212,7 +8212,7 @@ F: rust/kernel/net/phy.rs EXEC & BINFMT API, ELF R: Eric Biederman -R: Kees Cook +R: Kees Cook L: linux-mm@kvack.org S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/execve @@ -8613,7 +8613,7 @@ S: Maintained F: drivers/net/ethernet/nvidia/* FORTIFY_SOURCE -M: Kees Cook +M: Kees Cook L: linux-hardening@vger.kernel.org S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/hardening @@ -9103,7 +9103,7 @@ F: include/linux/mfd/gsc.h F: include/linux/platform_data/gsc_hwmon.h GCC PLUGINS -M: Kees Cook +M: Kees Cook L: linux-hardening@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/hardening @@ -9237,7 +9237,7 @@ S: Maintained F: drivers/input/touchscreen/resistive-adc-touch.c GENERIC STRING LIBRARY -M: Kees Cook +M: Kees Cook R: Andy Shevchenko L: linux-hardening@vger.kernel.org S: Supported @@ -11951,7 +11951,7 @@ F: scripts/package/ F: usr/ KERNEL HARDENING (not covered by other areas) -M: Kees Cook +M: Kees Cook R: Gustavo A. R. Silva L: linux-hardening@vger.kernel.org S: Supported @@ -12479,7 +12479,7 @@ F: drivers/scsi/53c700* LEAKING_ADDRESSES M: Tycho Andersen -R: Kees Cook +R: Kees Cook L: linux-hardening@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/hardening @@ -12775,7 +12775,7 @@ F: arch/powerpc/platforms/8xx/ F: arch/powerpc/platforms/83xx/ LINUX KERNEL DUMP TEST MODULE (LKDTM) -M: Kees Cook +M: Kees Cook S: Maintained F: drivers/misc/lkdtm/* F: tools/testing/selftests/lkdtm/* @@ -12905,7 +12905,7 @@ Q: http://patchwork.linuxtv.org/project/linux-media/list/ F: drivers/media/usb/dvb-usb-v2/lmedm04* LOADPIN SECURITY MODULE -M: Kees Cook +M: Kees Cook S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/hardening F: Documentation/admin-guide/LSM/LoadPin.rst @@ -17998,7 +17998,7 @@ F: tools/testing/selftests/proc/ PROC SYSCTL M: Luis Chamberlain -M: Kees Cook +M: Kees Cook M: Joel Granados L: linux-kernel@vger.kernel.org L: linux-fsdevel@vger.kernel.org @@ -18054,7 +18054,7 @@ F: Documentation/devicetree/bindings/net/pse-pd/ F: drivers/net/pse-pd/ PSTORE FILESYSTEM -M: Kees Cook +M: Kees Cook R: Tony Luck R: Guilherme G. Piccoli L: linux-hardening@vger.kernel.org @@ -20060,7 +20060,7 @@ F: drivers/media/cec/platform/seco/seco-cec.c F: drivers/media/cec/platform/seco/seco-cec.h SECURE COMPUTING -M: Kees Cook +M: Kees Cook R: Andy Lutomirski R: Will Drewry S: Supported @@ -22974,7 +22974,7 @@ F: drivers/block/ublk_drv.c F: include/uapi/linux/ublk_cmd.h UBSAN -M: Kees Cook +M: Kees Cook R: Marco Elver R: Andrey Konovalov R: Andrey Ryabinin @@ -24812,7 +24812,7 @@ F: drivers/net/hamradio/yam* F: include/linux/yam.h YAMA SECURITY MODULE -M: Kees Cook +M: Kees Cook S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/hardening F: Documentation/admin-guide/LSM/Yama.rst From 9570a48847e3acfa1a741cef431c923325ddc637 Mon Sep 17 00:00:00 2001 From: Boyang Yu Date: Mon, 17 Jun 2024 21:11:44 +0800 Subject: [PATCH 406/778] nvme: fix NVME_NS_DEAC may incorrectly identifying the disk as EXT_LBA. The value of NVME_NS_DEAC is 3, which means NVME_NS_METADATA_SUPPORTED | NVME_NS_EXT_LBAS. Provide a unique value for this feature flag. Fixes 1b96f862eccc ("nvme: implement the DEAC bit for the Write Zeroes command") Signed-off-by: Boyang Yu Reviewed-by: Kanchan Joshi Signed-off-by: Keith Busch --- drivers/nvme/host/nvme.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index f3a41133ac3f..68b400f9c42d 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -502,7 +502,7 @@ static inline bool nvme_ns_head_multipath(struct nvme_ns_head *head) enum nvme_ns_features { NVME_NS_EXT_LBAS = 1 << 0, /* support extended LBA format */ NVME_NS_METADATA_SUPPORTED = 1 << 1, /* support getting generated md */ - NVME_NS_DEAC, /* DEAC bit in Write Zeores supported */ + NVME_NS_DEAC = 1 << 2, /* DEAC bit in Write Zeores supported */ }; struct nvme_ns { From f31e85a4d7c6ac4a3e014129c9cdc31592ea29f3 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 17 Jun 2024 09:27:26 +0200 Subject: [PATCH 407/778] nvmet: do not return 'reserved' for empty TSAS values The 'TSAS' value is only defined for TCP and RDMA, but returning 'reserved' for undefined values tricked nvmetcli to try to write 'reserved' when restoring from a config file. This caused an error and the configuration would not be applied. Fixes: 3f123494db72 ("nvmet: make TCP sectype settable via configfs") Signed-off-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Signed-off-by: Keith Busch --- drivers/nvme/target/configfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index bd87dfd173a4..e60224356048 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -410,7 +410,7 @@ static ssize_t nvmet_addr_tsas_show(struct config_item *item, return sprintf(page, "%s\n", nvmet_addr_tsas_rdma[i].name); } } - return sprintf(page, "reserved\n"); + return sprintf(page, "\n"); } static ssize_t nvmet_addr_tsas_store(struct config_item *item, From a83e1385b780d41307433ddbc86e3c528db031f0 Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Fri, 14 Jun 2024 19:31:49 +0530 Subject: [PATCH 408/778] ACPICA: Revert "ACPICA: avoid Info: mapping multiple BARs. Your kernel is fine." Undo the modifications made in commit d410ee5109a1 ("ACPICA: avoid "Info: mapping multiple BARs. Your kernel is fine.""). The initial purpose of this commit was to stop memory mappings for operation regions from overlapping page boundaries, as it can trigger warnings if different page attributes are present. However, it was found that when this situation arises, mapping continues until the boundary's end, but there is still an attempt to read/write the entire length of the map, leading to a NULL pointer deference. For example, if a four-byte mapping request is made but only one byte is mapped because it hits the current page boundary's end, a four-byte read/write attempt is still made, resulting in a NULL pointer deference. Instead, map the entire length, as the ACPI specification does not mandate that it must be within the same page boundary. It is permissible for it to be mapped across different regions. Link: https://github.com/acpica/acpica/pull/954 Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218849 Fixes: d410ee5109a1 ("ACPICA: avoid "Info: mapping multiple BARs. Your kernel is fine."") Co-developed-by: Sanath S Signed-off-by: Sanath S Signed-off-by: Raju Rangoju Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/exregion.c | 23 ++--------------------- 1 file changed, 2 insertions(+), 21 deletions(-) diff --git a/drivers/acpi/acpica/exregion.c b/drivers/acpi/acpica/exregion.c index 8907b8bf4267..c49b9f8de723 100644 --- a/drivers/acpi/acpica/exregion.c +++ b/drivers/acpi/acpica/exregion.c @@ -44,7 +44,6 @@ acpi_ex_system_memory_space_handler(u32 function, struct acpi_mem_mapping *mm = mem_info->cur_mm; u32 length; acpi_size map_length; - acpi_size page_boundary_map_length; #ifdef ACPI_MISALIGNMENT_NOT_SUPPORTED u32 remainder; #endif @@ -138,26 +137,8 @@ acpi_ex_system_memory_space_handler(u32 function, map_length = (acpi_size) ((mem_info->address + mem_info->length) - address); - /* - * If mapping the entire remaining portion of the region will cross - * a page boundary, just map up to the page boundary, do not cross. - * On some systems, crossing a page boundary while mapping regions - * can cause warnings if the pages have different attributes - * due to resource management. - * - * This has the added benefit of constraining a single mapping to - * one page, which is similar to the original code that used a 4k - * maximum window. - */ - page_boundary_map_length = (acpi_size) - (ACPI_ROUND_UP(address, ACPI_DEFAULT_PAGE_SIZE) - address); - if (page_boundary_map_length == 0) { - page_boundary_map_length = ACPI_DEFAULT_PAGE_SIZE; - } - - if (map_length > page_boundary_map_length) { - map_length = page_boundary_map_length; - } + if (map_length > ACPI_DEFAULT_PAGE_SIZE) + map_length = ACPI_DEFAULT_PAGE_SIZE; /* Create a new mapping starting at the address given */ From c7be64355fccfe7d4727681e32fce07113e40af1 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 12 Jun 2024 12:42:20 +0200 Subject: [PATCH 409/778] ACPI: scan: Ignore camera graph port nodes on all Dell Tiger, Alder and Raptor Lake models Dell laptops with IPU6 camera (the Tiger Lake, Alder Lake and Raptor Lake generations) have broken ACPI MIPI DISCO information (this results from an OEM attempt to make Linux work by supplying it with custom data in the ACPI tables which has never been supported in the mainline). Instead of adding a lot of DMI quirks for this, check for Dell platforms based on the processor generations in question and drop the ACPI graph port nodes, likely to be created with the help of invalid data, on all of them. Fixes: bd721b934323 ("ACPI: scan: Extract CSI-2 connection graph from _CRS") Signed-off-by: Hans de Goede [ rjw: Changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/acpi/internal.h | 4 ++++ drivers/acpi/mipi-disco-img.c | 28 +++++++++++++++++++--------- 2 files changed, 23 insertions(+), 9 deletions(-) diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index 2a0e9fc7b74c..601b670356e5 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -302,6 +302,10 @@ void acpi_mipi_check_crs_csi2(acpi_handle handle); void acpi_mipi_scan_crs_csi2(void); void acpi_mipi_init_crs_csi2_swnodes(void); void acpi_mipi_crs_csi2_cleanup(void); +#ifdef CONFIG_X86 bool acpi_graph_ignore_port(acpi_handle handle); +#else +static inline bool acpi_graph_ignore_port(acpi_handle handle) { return false; } +#endif #endif /* _ACPI_INTERNAL_H_ */ diff --git a/drivers/acpi/mipi-disco-img.c b/drivers/acpi/mipi-disco-img.c index d05413a0672a..0ab13751f0db 100644 --- a/drivers/acpi/mipi-disco-img.c +++ b/drivers/acpi/mipi-disco-img.c @@ -725,14 +725,20 @@ void acpi_mipi_crs_csi2_cleanup(void) acpi_mipi_del_crs_csi2(csi2); } -static const struct dmi_system_id dmi_ignore_port_nodes[] = { - { - .matches = { - DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Dell Inc."), - DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "XPS 9315"), - }, - }, - { } +#ifdef CONFIG_X86 +#include +#include + +/* CPU matches for Dell generations with broken ACPI MIPI DISCO info */ +static const struct x86_cpu_id dell_broken_mipi_disco_cpu_gens[] = { + X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, NULL), + X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, NULL), + X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, NULL), + X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, NULL), + X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, NULL), + X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, NULL), + X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, NULL), + {} }; static const char *strnext(const char *s1, const char *s2) @@ -761,7 +767,10 @@ bool acpi_graph_ignore_port(acpi_handle handle) static bool dmi_tested, ignore_port; if (!dmi_tested) { - ignore_port = dmi_first_match(dmi_ignore_port_nodes); + if (dmi_name_in_vendors("Dell Inc.") && + x86_match_cpu(dell_broken_mipi_disco_cpu_gens)) + ignore_port = true; + dmi_tested = true; } @@ -794,3 +803,4 @@ out_free: kfree(orig_path); return false; } +#endif From 0606c5c4ad05076dba39af055644d83e3f6ba3a4 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 12 Jun 2024 12:48:28 +0200 Subject: [PATCH 410/778] ACPI: mipi-disco-img: Switch to new Intel CPU model defines Switch over to using the new Intel CPU model defines, as the old ones are going away. Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki --- drivers/acpi/mipi-disco-img.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/acpi/mipi-disco-img.c b/drivers/acpi/mipi-disco-img.c index 0ab13751f0db..92b658f92dc0 100644 --- a/drivers/acpi/mipi-disco-img.c +++ b/drivers/acpi/mipi-disco-img.c @@ -731,13 +731,13 @@ void acpi_mipi_crs_csi2_cleanup(void) /* CPU matches for Dell generations with broken ACPI MIPI DISCO info */ static const struct x86_cpu_id dell_broken_mipi_disco_cpu_gens[] = { - X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, NULL), - X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, NULL), - X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, NULL), - X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, NULL), - X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, NULL), - X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, NULL), - X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, NULL), + X86_MATCH_VFM(INTEL_TIGERLAKE, NULL), + X86_MATCH_VFM(INTEL_TIGERLAKE_L, NULL), + X86_MATCH_VFM(INTEL_ALDERLAKE, NULL), + X86_MATCH_VFM(INTEL_ALDERLAKE_L, NULL), + X86_MATCH_VFM(INTEL_RAPTORLAKE, NULL), + X86_MATCH_VFM(INTEL_RAPTORLAKE_P, NULL), + X86_MATCH_VFM(INTEL_RAPTORLAKE_S, NULL), {} }; From 4181b51c38875de9f6f11248fa0bcf3246c19c82 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Tue, 11 Jun 2024 14:06:31 +0200 Subject: [PATCH 411/778] s390/pci: Add missing virt_to_phys() for directed DIBV In commit 4e4dc65ab578 ("s390/pci: use phys_to_virt() for AIBVs/DIBVs") the setting of dibv_addr was missed when adding virt_to_phys(). This only affects systems with directed interrupt delivery enabled which are not generally available. Fixes: 4e4dc65ab578 ("s390/pci: use phys_to_virt() for AIBVs/DIBVs") Reviewed-by: Heiko Carstens Signed-off-by: Niklas Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/pci/pci_irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c index ff8f24854c64..0ef83b6ac0db 100644 --- a/arch/s390/pci/pci_irq.c +++ b/arch/s390/pci/pci_irq.c @@ -410,7 +410,7 @@ static void __init cpu_enable_directed_irq(void *unused) union zpci_sic_iib iib = {{0}}; union zpci_sic_iib ziib = {{0}}; - iib.cdiib.dibv_addr = (u64) zpci_ibv[smp_processor_id()]->vector; + iib.cdiib.dibv_addr = virt_to_phys(zpci_ibv[smp_processor_id()]->vector); zpci_set_irq_ctrl(SIC_IRQ_MODE_SET_CPU, 0, &iib); zpci_set_irq_ctrl(SIC_IRQ_MODE_D_SINGLE, PCI_ISC, &ziib); From d8354a1de2c4cc693812f6130fc922537a59217d Mon Sep 17 00:00:00 2001 From: Halil Pasic Date: Tue, 11 Jun 2024 23:47:16 +0200 Subject: [PATCH 412/778] s390/virtio_ccw: Fix config change notifications Commit e3e9bda38e6d ("s390/virtio_ccw: use DMA handle from DMA API") broke configuration change notifications for virtio-ccw by putting the DMA address of *indicatorp directly into ccw->cda disregarding the fact that if !!(vcdev->is_thinint) then the function virtio_ccw_register_adapter_ind() will overwrite that ccw->cda value with the address of the virtio_thinint_area so it can actually set up the adapter interrupts via CCW_CMD_SET_IND_ADAPTER. Thus we end up pointing to the wrong object for both CCW_CMD_SET_IND if setting up the adapter interrupts fails, and for CCW_CMD_SET_CONF_IND regardless whether it succeeds or fails. To fix this, let us save away the dma address of *indicatorp in a local variable, and copy it to ccw->cda after the "vcdev->is_thinint" branch. Fixes: e3e9bda38e6d ("s390/virtio_ccw: use DMA handle from DMA API") Reported-by: Boqiao Fu Reported-by: Sebastian Mitterle Closes: https://issues.redhat.com/browse/RHEL-39983 Tested-by: Thomas Huth Reviewed-by: Eric Farman Signed-off-by: Halil Pasic Link: https://lore.kernel.org/r/20240611214716.1002781-1-pasic@linux.ibm.com Signed-off-by: Vasily Gorbik --- drivers/s390/virtio/virtio_ccw.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c index d7569f395559..d6491fc84e8c 100644 --- a/drivers/s390/virtio/virtio_ccw.c +++ b/drivers/s390/virtio/virtio_ccw.c @@ -698,6 +698,7 @@ static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs, dma64_t *indicatorp = NULL; int ret, i, queue_idx = 0; struct ccw1 *ccw; + dma32_t indicatorp_dma = 0; ccw = ccw_device_dma_zalloc(vcdev->cdev, sizeof(*ccw), NULL); if (!ccw) @@ -725,7 +726,7 @@ static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs, */ indicatorp = ccw_device_dma_zalloc(vcdev->cdev, sizeof(*indicatorp), - &ccw->cda); + &indicatorp_dma); if (!indicatorp) goto out; *indicatorp = indicators_dma(vcdev); @@ -735,6 +736,7 @@ static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs, /* no error, just fall back to legacy interrupts */ vcdev->is_thinint = false; } + ccw->cda = indicatorp_dma; if (!vcdev->is_thinint) { /* Register queue indicators with host. */ *indicators(vcdev) = 0; From 14d7c92f8df9c0964ae6f8b813c1b3ac38120825 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 17 Jun 2024 12:57:03 -0700 Subject: [PATCH 413/778] Revert "mm: mmap: allow for the maximum number of bits for randomizing mmap_base by default" This reverts commit 3afb76a66b5559a7b595155803ce23801558a7a9. This was a wrongheaded workaround for an issue that had already been fixed much better by commit 4ef9ad19e176 ("mm: huge_memory: don't force huge page alignment on 32 bit"). Asking users questions at kernel compile time that they can't make sense of is not a viable strategy. And the fact that even the kernel VM maintainers apparently didn't catch that this "fix" is not a fix any more pretty much proves the point that people can't be expected to understand the implications of the question. It may well be the case that we could improve things further, and that __thp_get_unmapped_area() should take the mapping randomization into account even for 64-bit kernels. Maybe we should not be so eager to use THP mappings. But in no case should this be a kernel config option. Cc: Rafael Aquini Cc: Andrew Morton Cc: Jiri Slaby Cc: Suren Baghdasaryan Cc: Matthew Wilcox (Oracle) Signed-off-by: Linus Torvalds --- arch/Kconfig | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index 3e2a63772b3d..975dd22a2dbd 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -1046,21 +1046,10 @@ config ARCH_MMAP_RND_BITS_MAX config ARCH_MMAP_RND_BITS_DEFAULT int -config FORCE_MAX_MMAP_RND_BITS - bool "Force maximum number of bits to use for ASLR of mmap base address" - default y if !64BIT - help - ARCH_MMAP_RND_BITS and ARCH_MMAP_RND_COMPAT_BITS represent the number - of bits to use for ASLR and if no custom value is assigned (EXPERT) - then the architecture's lower bound (minimum) value is assumed. - This toggle changes that default assumption to assume the arch upper - bound (maximum) value instead. - config ARCH_MMAP_RND_BITS int "Number of bits to use for ASLR of mmap base address" if EXPERT range ARCH_MMAP_RND_BITS_MIN ARCH_MMAP_RND_BITS_MAX default ARCH_MMAP_RND_BITS_DEFAULT if ARCH_MMAP_RND_BITS_DEFAULT - default ARCH_MMAP_RND_BITS_MAX if FORCE_MAX_MMAP_RND_BITS default ARCH_MMAP_RND_BITS_MIN depends on HAVE_ARCH_MMAP_RND_BITS help @@ -1095,7 +1084,6 @@ config ARCH_MMAP_RND_COMPAT_BITS int "Number of bits to use for ASLR of mmap base address for compatible applications" if EXPERT range ARCH_MMAP_RND_COMPAT_BITS_MIN ARCH_MMAP_RND_COMPAT_BITS_MAX default ARCH_MMAP_RND_COMPAT_BITS_DEFAULT if ARCH_MMAP_RND_COMPAT_BITS_DEFAULT - default ARCH_MMAP_RND_COMPAT_BITS_MAX if FORCE_MAX_MMAP_RND_BITS default ARCH_MMAP_RND_COMPAT_BITS_MIN depends on HAVE_ARCH_MMAP_RND_COMPAT_BITS help From 9e046bb111f13461d3f9331e24e974324245140e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 14 Jun 2024 13:06:15 +0000 Subject: [PATCH 414/778] tcp: clear tp->retrans_stamp in tcp_rcv_fastopen_synack() Some applications were reporting ETIMEDOUT errors on apparently good looking flows, according to packet dumps. We were able to root cause the issue to an accidental setting of tp->retrans_stamp in the following scenario: - client sends TFO SYN with data. - server has TFO disabled, ACKs only SYN but not payload. - client receives SYNACK covering only SYN. - tcp_ack() eats SYN and sets tp->retrans_stamp to 0. - tcp_rcv_fastopen_synack() calls tcp_xmit_retransmit_queue() to retransmit TFO payload w/o SYN, sets tp->retrans_stamp to "now", but we are not in any loss recovery state. - TFO payload is ACKed. - we are not in any loss recovery state, and don't see any dupacks, so we don't get to any code path that clears tp->retrans_stamp. - tp->retrans_stamp stays non-zero for the lifetime of the connection. - after first RTO, tcp_clamp_rto_to_user_timeout() clamps second RTO to 1 jiffy due to bogus tp->retrans_stamp. - on clamped RTO with non-zero icsk_retransmits, retransmits_timed_out() sets start_ts from tp->retrans_stamp from TFO payload retransmit hours/days ago, and computes bogus long elapsed time for loss recovery, and suffers ETIMEDOUT early. Fixes: a7abf3cd76e1 ("tcp: consider using standard rtx logic in tcp_rcv_fastopen_synack()") CC: stable@vger.kernel.org Co-developed-by: Neal Cardwell Signed-off-by: Neal Cardwell Co-developed-by: Yuchung Cheng Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20240614130615.396837-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_input.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 9c04a9c8be9d..01d208e0eef3 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6296,6 +6296,7 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, skb_rbtree_walk_from(data) tcp_mark_skb_lost(sk, data); tcp_xmit_retransmit_queue(sk); + tp->retrans_stamp = 0; NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVEFAIL); return true; From e874557fce1b6023efafd523aee0c347bf7f1694 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Fri, 14 Jun 2024 19:15:29 +0200 Subject: [PATCH 415/778] selftests: mptcp: userspace_pm: fixed subtest names It is important to have fixed (sub)test names in TAP, because these names are used to identify them. If they are not fixed, tracking cannot be done. Some subtests from the userspace_pm selftest were using random numbers in their names: the client and server address IDs from $RANDOM, and the client port number randomly picked by the kernel when creating the connection. These values have been replaced by 'client' and 'server' words: that's even more helpful than showing random numbers. Note that the addresses IDs are incremented and decremented in the test: +1 or -1 are then displayed in these cases. Not to loose info that can be useful for debugging in case of issues, these random numbers are now displayed at the beginning of the test. Fixes: f589234e1af0 ("selftests: mptcp: userspace_pm: format subtests results in TAP") Cc: stable@vger.kernel.org Signed-off-by: Matthieu Baerts (NGI0) Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240614-upstream-net-20240614-selftests-mptcp-uspace-pm-fixed-test-names-v1-1-460ad3edb429@kernel.org Signed-off-by: Jakub Kicinski --- .../selftests/net/mptcp/userspace_pm.sh | 46 +++++++++++-------- 1 file changed, 28 insertions(+), 18 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index 9e2981f2d7f5..9cb05978269d 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -160,10 +160,12 @@ make_connection() local is_v6=$1 local app_port=$app4_port local connect_addr="10.0.1.1" + local client_addr="10.0.1.2" local listen_addr="0.0.0.0" if [ "$is_v6" = "v6" ] then connect_addr="dead:beef:1::1" + client_addr="dead:beef:1::2" listen_addr="::" app_port=$app6_port else @@ -206,6 +208,7 @@ make_connection() [ "$server_serverside" = 1 ] then test_pass + print_title "Connection info: ${client_addr}:${client_port} -> ${connect_addr}:${app_port}" else test_fail "Expected tokens (c:${client_token} - s:${server_token}) and server (c:${client_serverside} - s:${server_serverside})" mptcp_lib_result_print_all_tap @@ -297,7 +300,7 @@ test_announce() ip netns exec "$ns2"\ ./pm_nl_ctl ann 10.0.2.2 token "$client4_token" id $client_addr_id dev\ ns2eth1 - print_test "ADD_ADDR id:${client_addr_id} 10.0.2.2 (ns2) => ns1, reuse port" + print_test "ADD_ADDR id:client 10.0.2.2 (ns2) => ns1, reuse port" sleep 0.5 verify_announce_event $server_evts $ANNOUNCED $server4_token "10.0.2.2" $client_addr_id \ "$client4_port" @@ -306,7 +309,7 @@ test_announce() :>"$server_evts" ip netns exec "$ns2" ./pm_nl_ctl ann\ dead:beef:2::2 token "$client6_token" id $client_addr_id dev ns2eth1 - print_test "ADD_ADDR6 id:${client_addr_id} dead:beef:2::2 (ns2) => ns1, reuse port" + print_test "ADD_ADDR6 id:client dead:beef:2::2 (ns2) => ns1, reuse port" sleep 0.5 verify_announce_event "$server_evts" "$ANNOUNCED" "$server6_token" "dead:beef:2::2"\ "$client_addr_id" "$client6_port" "v6" @@ -316,7 +319,7 @@ test_announce() client_addr_id=$((client_addr_id+1)) ip netns exec "$ns2" ./pm_nl_ctl ann 10.0.2.2 token "$client4_token" id\ $client_addr_id dev ns2eth1 port $new4_port - print_test "ADD_ADDR id:${client_addr_id} 10.0.2.2 (ns2) => ns1, new port" + print_test "ADD_ADDR id:client+1 10.0.2.2 (ns2) => ns1, new port" sleep 0.5 verify_announce_event "$server_evts" "$ANNOUNCED" "$server4_token" "10.0.2.2"\ "$client_addr_id" "$new4_port" @@ -327,7 +330,7 @@ test_announce() # ADD_ADDR from the server to client machine reusing the subflow port ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server4_token" id\ $server_addr_id dev ns1eth2 - print_test "ADD_ADDR id:${server_addr_id} 10.0.2.1 (ns1) => ns2, reuse port" + print_test "ADD_ADDR id:server 10.0.2.1 (ns1) => ns2, reuse port" sleep 0.5 verify_announce_event "$client_evts" "$ANNOUNCED" "$client4_token" "10.0.2.1"\ "$server_addr_id" "$app4_port" @@ -336,7 +339,7 @@ test_announce() :>"$client_evts" ip netns exec "$ns1" ./pm_nl_ctl ann dead:beef:2::1 token "$server6_token" id\ $server_addr_id dev ns1eth2 - print_test "ADD_ADDR6 id:${server_addr_id} dead:beef:2::1 (ns1) => ns2, reuse port" + print_test "ADD_ADDR6 id:server dead:beef:2::1 (ns1) => ns2, reuse port" sleep 0.5 verify_announce_event "$client_evts" "$ANNOUNCED" "$client6_token" "dead:beef:2::1"\ "$server_addr_id" "$app6_port" "v6" @@ -346,7 +349,7 @@ test_announce() server_addr_id=$((server_addr_id+1)) ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server4_token" id\ $server_addr_id dev ns1eth2 port $new4_port - print_test "ADD_ADDR id:${server_addr_id} 10.0.2.1 (ns1) => ns2, new port" + print_test "ADD_ADDR id:server+1 10.0.2.1 (ns1) => ns2, new port" sleep 0.5 verify_announce_event "$client_evts" "$ANNOUNCED" "$client4_token" "10.0.2.1"\ "$server_addr_id" "$new4_port" @@ -380,7 +383,7 @@ test_remove() local invalid_token=$(( client4_token - 1 )) ip netns exec "$ns2" ./pm_nl_ctl rem token $invalid_token id\ $client_addr_id > /dev/null 2>&1 - print_test "RM_ADDR id:${client_addr_id} ns2 => ns1, invalid token" + print_test "RM_ADDR id:client ns2 => ns1, invalid token" local type type=$(mptcp_lib_evts_get_info type "$server_evts") if [ "$type" = "" ] @@ -394,7 +397,7 @@ test_remove() local invalid_id=$(( client_addr_id + 1 )) ip netns exec "$ns2" ./pm_nl_ctl rem token "$client4_token" id\ $invalid_id > /dev/null 2>&1 - print_test "RM_ADDR id:${invalid_id} ns2 => ns1, invalid id" + print_test "RM_ADDR id:client+1 ns2 => ns1, invalid id" type=$(mptcp_lib_evts_get_info type "$server_evts") if [ "$type" = "" ] then @@ -407,7 +410,7 @@ test_remove() :>"$server_evts" ip netns exec "$ns2" ./pm_nl_ctl rem token "$client4_token" id\ $client_addr_id - print_test "RM_ADDR id:${client_addr_id} ns2 => ns1" + print_test "RM_ADDR id:client ns2 => ns1" sleep 0.5 verify_remove_event "$server_evts" "$REMOVED" "$server4_token" "$client_addr_id" @@ -416,7 +419,7 @@ test_remove() client_addr_id=$(( client_addr_id - 1 )) ip netns exec "$ns2" ./pm_nl_ctl rem token "$client4_token" id\ $client_addr_id - print_test "RM_ADDR id:${client_addr_id} ns2 => ns1" + print_test "RM_ADDR id:client-1 ns2 => ns1" sleep 0.5 verify_remove_event "$server_evts" "$REMOVED" "$server4_token" "$client_addr_id" @@ -424,7 +427,7 @@ test_remove() :>"$server_evts" ip netns exec "$ns2" ./pm_nl_ctl rem token "$client6_token" id\ $client_addr_id - print_test "RM_ADDR6 id:${client_addr_id} ns2 => ns1" + print_test "RM_ADDR6 id:client-1 ns2 => ns1" sleep 0.5 verify_remove_event "$server_evts" "$REMOVED" "$server6_token" "$client_addr_id" @@ -434,7 +437,7 @@ test_remove() # RM_ADDR from the server to client machine ip netns exec "$ns1" ./pm_nl_ctl rem token "$server4_token" id\ $server_addr_id - print_test "RM_ADDR id:${server_addr_id} ns1 => ns2" + print_test "RM_ADDR id:server ns1 => ns2" sleep 0.5 verify_remove_event "$client_evts" "$REMOVED" "$client4_token" "$server_addr_id" @@ -443,7 +446,7 @@ test_remove() server_addr_id=$(( server_addr_id - 1 )) ip netns exec "$ns1" ./pm_nl_ctl rem token "$server4_token" id\ $server_addr_id - print_test "RM_ADDR id:${server_addr_id} ns1 => ns2" + print_test "RM_ADDR id:server-1 ns1 => ns2" sleep 0.5 verify_remove_event "$client_evts" "$REMOVED" "$client4_token" "$server_addr_id" @@ -451,7 +454,7 @@ test_remove() :>"$client_evts" ip netns exec "$ns1" ./pm_nl_ctl rem token "$server6_token" id\ $server_addr_id - print_test "RM_ADDR6 id:${server_addr_id} ns1 => ns2" + print_test "RM_ADDR6 id:server-1 ns1 => ns2" sleep 0.5 verify_remove_event "$client_evts" "$REMOVED" "$client6_token" "$server_addr_id" } @@ -479,8 +482,14 @@ verify_subflow_events() local locid local remid local info + local e_dport_txt - info="${e_saddr} (${e_from}) => ${e_daddr}:${e_dport} (${e_to})" + # only display the fixed ports + if [ "${e_dport}" -ge "${app4_port}" ] && [ "${e_dport}" -le "${app6_port}" ]; then + e_dport_txt=":${e_dport}" + fi + + info="${e_saddr} (${e_from}) => ${e_daddr}${e_dport_txt} (${e_to})" if [ "$e_type" = "$SUB_ESTABLISHED" ] then @@ -766,7 +775,7 @@ test_subflows_v4_v6_mix() :>"$client_evts" ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server6_token" id\ $server_addr_id dev ns1eth2 - print_test "ADD_ADDR4 id:${server_addr_id} 10.0.2.1 (ns1) => ns2, reuse port" + print_test "ADD_ADDR4 id:server 10.0.2.1 (ns1) => ns2, reuse port" sleep 0.5 verify_announce_event "$client_evts" "$ANNOUNCED" "$client6_token" "10.0.2.1"\ "$server_addr_id" "$app6_port" @@ -861,7 +870,7 @@ test_listener() local listener_pid=$! sleep 0.5 - print_test "CREATE_LISTENER 10.0.2.2:$client4_port" + print_test "CREATE_LISTENER 10.0.2.2 (client port)" verify_listener_events $client_evts $LISTENER_CREATED $AF_INET 10.0.2.2 $client4_port # ADD_ADDR from client to server machine reusing the subflow port @@ -878,13 +887,14 @@ test_listener() mptcp_lib_kill_wait $listener_pid sleep 0.5 - print_test "CLOSE_LISTENER 10.0.2.2:$client4_port" + print_test "CLOSE_LISTENER 10.0.2.2 (client port)" verify_listener_events $client_evts $LISTENER_CLOSED $AF_INET 10.0.2.2 $client4_port } print_title "Make connections" make_connection make_connection "v6" +print_title "Will be using address IDs ${client_addr_id} (client) and ${server_addr_id} (server)" test_announce test_remove From 2eab4543a2204092c3a7af81d7d6c506e59a03a6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 14 Jun 2024 08:20:02 +0000 Subject: [PATCH 416/778] ipv6: prevent possible NULL deref in fib6_nh_init() syzbot reminds us that in6_dev_get() can return NULL. fib6_nh_init() ip6_validate_gw( &idev ) ip6_route_check_nh( idev ) *idev = in6_dev_get(dev); // can be NULL Oops: general protection fault, probably for non-canonical address 0xdffffc00000000bc: 0000 [#1] PREEMPT SMP KASAN PTI KASAN: null-ptr-deref in range [0x00000000000005e0-0x00000000000005e7] CPU: 0 PID: 11237 Comm: syz-executor.3 Not tainted 6.10.0-rc2-syzkaller-00249-gbe27b8965297 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 06/07/2024 RIP: 0010:fib6_nh_init+0x640/0x2160 net/ipv6/route.c:3606 Code: 00 00 fc ff df 4c 8b 64 24 58 48 8b 44 24 28 4c 8b 74 24 30 48 89 c1 48 89 44 24 28 48 8d 98 e0 05 00 00 48 89 d8 48 c1 e8 03 <42> 0f b6 04 38 84 c0 0f 85 b3 17 00 00 8b 1b 31 ff 89 de e8 b8 8b RSP: 0018:ffffc900032775a0 EFLAGS: 00010202 RAX: 00000000000000bc RBX: 00000000000005e0 RCX: 0000000000000000 RDX: 0000000000000010 RSI: ffffc90003277a54 RDI: ffff88802b3a08d8 RBP: ffffc900032778b0 R08: 00000000000002fc R09: 0000000000000000 R10: 00000000000002fc R11: 0000000000000000 R12: ffff88802b3a08b8 R13: 1ffff9200064eec8 R14: ffffc90003277a00 R15: dffffc0000000000 FS: 00007f940feb06c0(0000) GS:ffff8880b9400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000000245e8000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ip6_route_info_create+0x99e/0x12b0 net/ipv6/route.c:3809 ip6_route_add+0x28/0x160 net/ipv6/route.c:3853 ipv6_route_ioctl+0x588/0x870 net/ipv6/route.c:4483 inet6_ioctl+0x21a/0x280 net/ipv6/af_inet6.c:579 sock_do_ioctl+0x158/0x460 net/socket.c:1222 sock_ioctl+0x629/0x8e0 net/socket.c:1341 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:907 [inline] __se_sys_ioctl+0xfc/0x170 fs/ioctl.c:893 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f940f07cea9 Fixes: 428604fb118f ("ipv6: do not set routes if disable_ipv6 has been enabled") Reported-by: syzbot Signed-off-by: Eric Dumazet Acked-by: Lorenzo Bianconi Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240614082002.26407-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 952c2bf11709..28788ffde585 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3603,7 +3603,7 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, if (!dev) goto out; - if (idev->cnf.disable_ipv6) { + if (!idev || idev->cnf.disable_ipv6) { NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device"); err = -EACCES; goto out; From b86762dbe19a62e785c189f313cda5b989931f37 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 15 Jun 2024 15:14:54 +0000 Subject: [PATCH 417/778] ipv6: prevent possible NULL dereference in rt6_probe() syzbot caught a NULL dereference in rt6_probe() [1] Bail out if __in6_dev_get() returns NULL. [1] Oops: general protection fault, probably for non-canonical address 0xdffffc00000000cb: 0000 [#1] PREEMPT SMP KASAN PTI KASAN: null-ptr-deref in range [0x0000000000000658-0x000000000000065f] CPU: 1 PID: 22444 Comm: syz-executor.0 Not tainted 6.10.0-rc2-syzkaller-00383-gb8481381d4e2 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 04/02/2024 RIP: 0010:rt6_probe net/ipv6/route.c:656 [inline] RIP: 0010:find_match+0x8c4/0xf50 net/ipv6/route.c:758 Code: 14 fd f7 48 8b 85 38 ff ff ff 48 c7 45 b0 00 00 00 00 48 8d b8 5c 06 00 00 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 <0f> b6 14 02 48 89 f8 83 e0 07 83 c0 03 38 d0 7c 08 84 d2 0f 85 19 RSP: 0018:ffffc900034af070 EFLAGS: 00010203 RAX: dffffc0000000000 RBX: 0000000000000000 RCX: ffffc90004521000 RDX: 00000000000000cb RSI: ffffffff8990d0cd RDI: 000000000000065c RBP: ffffc900034af150 R08: 0000000000000005 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000002 R12: 000000000000000a R13: 1ffff92000695e18 R14: ffff8880244a1d20 R15: 0000000000000000 FS: 00007f4844a5a6c0(0000) GS:ffff8880b9300000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000001b31b27000 CR3: 000000002d42c000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: rt6_nh_find_match+0xfa/0x1a0 net/ipv6/route.c:784 nexthop_for_each_fib6_nh+0x26d/0x4a0 net/ipv4/nexthop.c:1496 __find_rr_leaf+0x6e7/0xe00 net/ipv6/route.c:825 find_rr_leaf net/ipv6/route.c:853 [inline] rt6_select net/ipv6/route.c:897 [inline] fib6_table_lookup+0x57e/0xa30 net/ipv6/route.c:2195 ip6_pol_route+0x1cd/0x1150 net/ipv6/route.c:2231 pol_lookup_func include/net/ip6_fib.h:616 [inline] fib6_rule_lookup+0x386/0x720 net/ipv6/fib6_rules.c:121 ip6_route_output_flags_noref net/ipv6/route.c:2639 [inline] ip6_route_output_flags+0x1d0/0x640 net/ipv6/route.c:2651 ip6_dst_lookup_tail.constprop.0+0x961/0x1760 net/ipv6/ip6_output.c:1147 ip6_dst_lookup_flow+0x99/0x1d0 net/ipv6/ip6_output.c:1250 rawv6_sendmsg+0xdab/0x4340 net/ipv6/raw.c:898 inet_sendmsg+0x119/0x140 net/ipv4/af_inet.c:853 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg net/socket.c:745 [inline] sock_write_iter+0x4b8/0x5c0 net/socket.c:1160 new_sync_write fs/read_write.c:497 [inline] vfs_write+0x6b6/0x1140 fs/read_write.c:590 ksys_write+0x1f8/0x260 fs/read_write.c:643 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcd/0x250 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f Fixes: 52e1635631b3 ("[IPV6]: ROUTE: Add router_probe_interval sysctl.") Signed-off-by: Eric Dumazet Reviewed-by: Jason Xing Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240615151454.166404-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/route.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 28788ffde585..8d72ca0b086d 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -638,6 +638,8 @@ static void rt6_probe(struct fib6_nh *fib6_nh) rcu_read_lock(); last_probe = READ_ONCE(fib6_nh->last_probe); idev = __in6_dev_get(dev); + if (!idev) + goto out; neigh = __ipv6_neigh_lookup_noref(dev, nh_gw); if (neigh) { if (READ_ONCE(neigh->nud_state) & NUD_VALID) From d46401052c2d5614da8efea5788532f0401cb164 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 15 Jun 2024 15:42:31 +0000 Subject: [PATCH 418/778] xfrm6: check ip6_dst_idev() return value in xfrm6_get_saddr() ip6_dst_idev() can return NULL, xfrm6_get_saddr() must act accordingly. syzbot reported: Oops: general protection fault, probably for non-canonical address 0xdffffc0000000000: 0000 [#1] PREEMPT SMP KASAN PTI KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] CPU: 1 PID: 12 Comm: kworker/u8:1 Not tainted 6.10.0-rc2-syzkaller-00383-gb8481381d4e2 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 04/02/2024 Workqueue: wg-kex-wg1 wg_packet_handshake_send_worker RIP: 0010:xfrm6_get_saddr+0x93/0x130 net/ipv6/xfrm6_policy.c:64 Code: df 48 89 fa 48 c1 ea 03 80 3c 02 00 0f 85 97 00 00 00 4c 8b ab d8 00 00 00 48 b8 00 00 00 00 00 fc ff df 4c 89 ea 48 c1 ea 03 <80> 3c 02 00 0f 85 86 00 00 00 4d 8b 6d 00 e8 ca 13 47 01 48 b8 00 RSP: 0018:ffffc90000117378 EFLAGS: 00010246 RAX: dffffc0000000000 RBX: ffff88807b079dc0 RCX: ffffffff89a0d6d7 RDX: 0000000000000000 RSI: ffffffff89a0d6e9 RDI: ffff88807b079e98 RBP: ffff88807ad73248 R08: 0000000000000007 R09: fffffffffffff000 R10: ffff88807b079dc0 R11: 0000000000000007 R12: ffffc90000117480 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff8880b9300000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f4586d00440 CR3: 0000000079042000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: xfrm_get_saddr net/xfrm/xfrm_policy.c:2452 [inline] xfrm_tmpl_resolve_one net/xfrm/xfrm_policy.c:2481 [inline] xfrm_tmpl_resolve+0xa26/0xf10 net/xfrm/xfrm_policy.c:2541 xfrm_resolve_and_create_bundle+0x140/0x2570 net/xfrm/xfrm_policy.c:2835 xfrm_bundle_lookup net/xfrm/xfrm_policy.c:3070 [inline] xfrm_lookup_with_ifid+0x4d1/0x1e60 net/xfrm/xfrm_policy.c:3201 xfrm_lookup net/xfrm/xfrm_policy.c:3298 [inline] xfrm_lookup_route+0x3b/0x200 net/xfrm/xfrm_policy.c:3309 ip6_dst_lookup_flow+0x15c/0x1d0 net/ipv6/ip6_output.c:1256 send6+0x611/0xd20 drivers/net/wireguard/socket.c:139 wg_socket_send_skb_to_peer+0xf9/0x220 drivers/net/wireguard/socket.c:178 wg_socket_send_buffer_to_peer+0x12b/0x190 drivers/net/wireguard/socket.c:200 wg_packet_send_handshake_initiation+0x227/0x360 drivers/net/wireguard/send.c:40 wg_packet_handshake_send_worker+0x1c/0x30 drivers/net/wireguard/send.c:51 process_one_work+0x9fb/0x1b60 kernel/workqueue.c:3231 process_scheduled_works kernel/workqueue.c:3312 [inline] worker_thread+0x6c8/0xf70 kernel/workqueue.c:3393 kthread+0x2c1/0x3a0 kernel/kthread.c:389 ret_from_fork+0x45/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzbot Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240615154231.234442-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/xfrm6_policy.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index cc885d3aa9e5..2f1ea5f999a2 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -56,12 +56,18 @@ static int xfrm6_get_saddr(struct net *net, int oif, { struct dst_entry *dst; struct net_device *dev; + struct inet6_dev *idev; dst = xfrm6_dst_lookup(net, 0, oif, NULL, daddr, mark); if (IS_ERR(dst)) return -EHOSTUNREACH; - dev = ip6_dst_idev(dst)->dev; + idev = ip6_dst_idev(dst); + if (!idev) { + dst_release(dst); + return -EHOSTUNREACH; + } + dev = idev->dev; ipv6_dev_get_saddr(dev_net(dev), dev, &daddr->in6, 0, &saddr->in6); dst_release(dst); return 0; From ff960f9d3edbe08a736b5a224d91a305ccc946b0 Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Fri, 14 Jun 2024 21:13:02 +0800 Subject: [PATCH 419/778] netns: Make get_net_ns() handle zero refcount net Syzkaller hit a warning: refcount_t: addition on 0; use-after-free. WARNING: CPU: 3 PID: 7890 at lib/refcount.c:25 refcount_warn_saturate+0xdf/0x1d0 Modules linked in: CPU: 3 PID: 7890 Comm: tun Not tainted 6.10.0-rc3-00100-gcaa4f9578aba-dirty #310 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 RIP: 0010:refcount_warn_saturate+0xdf/0x1d0 Code: 41 49 04 31 ff 89 de e8 9f 1e cd fe 84 db 75 9c e8 76 26 cd fe c6 05 b6 41 49 04 01 90 48 c7 c7 b8 8e 25 86 e8 d2 05 b5 fe 90 <0f> 0b 90 90 e9 79 ff ff ff e8 53 26 cd fe 0f b6 1 RSP: 0018:ffff8881067b7da0 EFLAGS: 00010286 RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffffffff811c72ac RDX: ffff8881026a2140 RSI: ffffffff811c72b5 RDI: 0000000000000001 RBP: ffff8881067b7db0 R08: 0000000000000000 R09: 205b5d3730353139 R10: 0000000000000000 R11: 205d303938375420 R12: ffff8881086500c4 R13: ffff8881086500c4 R14: ffff8881086500b0 R15: ffff888108650040 FS: 00007f5b2961a4c0(0000) GS:ffff88823bd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055d7ed36fd18 CR3: 00000001482f6000 CR4: 00000000000006f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ? show_regs+0xa3/0xc0 ? __warn+0xa5/0x1c0 ? refcount_warn_saturate+0xdf/0x1d0 ? report_bug+0x1fc/0x2d0 ? refcount_warn_saturate+0xdf/0x1d0 ? handle_bug+0xa1/0x110 ? exc_invalid_op+0x3c/0xb0 ? asm_exc_invalid_op+0x1f/0x30 ? __warn_printk+0xcc/0x140 ? __warn_printk+0xd5/0x140 ? refcount_warn_saturate+0xdf/0x1d0 get_net_ns+0xa4/0xc0 ? __pfx_get_net_ns+0x10/0x10 open_related_ns+0x5a/0x130 __tun_chr_ioctl+0x1616/0x2370 ? __sanitizer_cov_trace_switch+0x58/0xa0 ? __sanitizer_cov_trace_const_cmp2+0x1c/0x30 ? __pfx_tun_chr_ioctl+0x10/0x10 tun_chr_ioctl+0x2f/0x40 __x64_sys_ioctl+0x11b/0x160 x64_sys_call+0x1211/0x20d0 do_syscall_64+0x9e/0x1d0 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f5b28f165d7 Code: b3 66 90 48 8b 05 b1 48 2d 00 64 c7 00 26 00 00 00 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 b8 10 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 81 48 2d 00 8 RSP: 002b:00007ffc2b59c5e8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f5b28f165d7 RDX: 0000000000000000 RSI: 00000000000054e3 RDI: 0000000000000003 RBP: 00007ffc2b59c650 R08: 00007f5b291ed8c0 R09: 00007f5b2961a4c0 R10: 0000000029690010 R11: 0000000000000246 R12: 0000000000400730 R13: 00007ffc2b59cf40 R14: 0000000000000000 R15: 0000000000000000 Kernel panic - not syncing: kernel: panic_on_warn set ... This is trigger as below: ns0 ns1 tun_set_iff() //dev is tun0 tun->dev = dev //ip link set tun0 netns ns1 put_net() //ref is 0 __tun_chr_ioctl() //TUNGETDEVNETNS net = dev_net(tun->dev); open_related_ns(&net->ns, get_net_ns); //ns1 get_net_ns() get_net() //addition on 0 Use maybe_get_net() in get_net_ns in case net's ref is zero to fix this Fixes: 0c3e0e3bb623 ("tun: Add ioctl() TUNGETDEVNETNS cmd to allow obtaining real net ns of tun device") Signed-off-by: Yue Haibing Link: https://lore.kernel.org/r/20240614131302.2698509-1-yuehaibing@huawei.com Signed-off-by: Paolo Abeni --- net/core/net_namespace.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 4f7a61688d18..6a823ba906c6 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -693,11 +693,16 @@ EXPORT_SYMBOL_GPL(__put_net); * get_net_ns - increment the refcount of the network namespace * @ns: common namespace (net) * - * Returns the net's common namespace. + * Returns the net's common namespace or ERR_PTR() if ref is zero. */ struct ns_common *get_net_ns(struct ns_common *ns) { - return &get_net(container_of(ns, struct net, ns))->ns; + struct net *net; + + net = maybe_get_net(container_of(ns, struct net, ns)); + if (net) + return &net->ns; + return ERR_PTR(-EINVAL); } EXPORT_SYMBOL_GPL(get_net_ns); From 2d7198278ece01818cd95a3beffbdf8b2a353fa0 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Fri, 14 Jun 2024 16:50:30 +0200 Subject: [PATCH 420/778] qca_spi: Make interrupt remembering atomic The whole mechanism to remember occurred SPI interrupts is not atomic, which could lead to unexpected behavior. So fix this by using atomic bit operations instead. Fixes: 291ab06ecf67 ("net: qualcomm: new Ethernet over SPI driver for QCA7000") Signed-off-by: Stefan Wahren Link: https://lore.kernel.org/r/20240614145030.7781-1-wahrenst@gmx.net Signed-off-by: Paolo Abeni --- drivers/net/ethernet/qualcomm/qca_debug.c | 6 ++---- drivers/net/ethernet/qualcomm/qca_spi.c | 16 ++++++++-------- drivers/net/ethernet/qualcomm/qca_spi.h | 3 +-- 3 files changed, 11 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/qca_debug.c b/drivers/net/ethernet/qualcomm/qca_debug.c index ff3b89e9028e..ad06da0fdaa0 100644 --- a/drivers/net/ethernet/qualcomm/qca_debug.c +++ b/drivers/net/ethernet/qualcomm/qca_debug.c @@ -98,10 +98,8 @@ qcaspi_info_show(struct seq_file *s, void *what) seq_printf(s, "IRQ : %d\n", qca->spi_dev->irq); - seq_printf(s, "INTR REQ : %u\n", - qca->intr_req); - seq_printf(s, "INTR SVC : %u\n", - qca->intr_svc); + seq_printf(s, "INTR : %lx\n", + qca->intr); seq_printf(s, "SPI max speed : %lu\n", (unsigned long)qca->spi_dev->max_speed_hz); diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c index 5799ecc88a87..8f7ce6b51a1c 100644 --- a/drivers/net/ethernet/qualcomm/qca_spi.c +++ b/drivers/net/ethernet/qualcomm/qca_spi.c @@ -35,6 +35,8 @@ #define MAX_DMA_BURST_LEN 5000 +#define SPI_INTR 0 + /* Modules parameters */ #define QCASPI_CLK_SPEED_MIN 1000000 #define QCASPI_CLK_SPEED_MAX 16000000 @@ -579,14 +581,14 @@ qcaspi_spi_thread(void *data) continue; } - if ((qca->intr_req == qca->intr_svc) && + if (!test_bit(SPI_INTR, &qca->intr) && !qca->txr.skb[qca->txr.head]) schedule(); set_current_state(TASK_RUNNING); - netdev_dbg(qca->net_dev, "have work to do. int: %d, tx_skb: %p\n", - qca->intr_req - qca->intr_svc, + netdev_dbg(qca->net_dev, "have work to do. int: %lu, tx_skb: %p\n", + qca->intr, qca->txr.skb[qca->txr.head]); qcaspi_qca7k_sync(qca, QCASPI_EVENT_UPDATE); @@ -600,8 +602,7 @@ qcaspi_spi_thread(void *data) msleep(QCASPI_QCA7K_REBOOT_TIME_MS); } - if (qca->intr_svc != qca->intr_req) { - qca->intr_svc = qca->intr_req; + if (test_and_clear_bit(SPI_INTR, &qca->intr)) { start_spi_intr_handling(qca, &intr_cause); if (intr_cause & SPI_INT_CPU_ON) { @@ -663,7 +664,7 @@ qcaspi_intr_handler(int irq, void *data) { struct qcaspi *qca = data; - qca->intr_req++; + set_bit(SPI_INTR, &qca->intr); if (qca->spi_thread) wake_up_process(qca->spi_thread); @@ -679,8 +680,7 @@ qcaspi_netdev_open(struct net_device *dev) if (!qca) return -EINVAL; - qca->intr_req = 1; - qca->intr_svc = 0; + set_bit(SPI_INTR, &qca->intr); qca->sync = QCASPI_SYNC_UNKNOWN; qcafrm_fsm_init_spi(&qca->frm_handle); diff --git a/drivers/net/ethernet/qualcomm/qca_spi.h b/drivers/net/ethernet/qualcomm/qca_spi.h index d59cb2352cee..8f4808695e82 100644 --- a/drivers/net/ethernet/qualcomm/qca_spi.h +++ b/drivers/net/ethernet/qualcomm/qca_spi.h @@ -81,8 +81,7 @@ struct qcaspi { struct qcafrm_handle frm_handle; struct sk_buff *rx_skb; - unsigned int intr_req; - unsigned int intr_svc; + unsigned long intr; u16 reset_count; #ifdef CONFIG_DEBUG_FS From 8039156e23bc07a0039168266dbe68b30cddf7b2 Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Mon, 17 Jun 2024 18:37:09 -0700 Subject: [PATCH 421/778] sound/oss/dmasound: add missing MODULE_DESCRIPTION() macro With ARCH=m68k, make allmodconfig && make W=1 C=1 reports: WARNING: modpost: missing MODULE_DESCRIPTION() in sound/oss/dmasound/dmasound_core.o Add the missing invocation of the MODULE_DESCRIPTION() macro. Signed-off-by: Jeff Johnson Reviewed-by: Geert Uytterhoeven Signed-off-by: Takashi Iwai Link: https://lore.kernel.org/20240617-md-m68k-sound-oss-dmasound-v1-1-5c19306be930@quicinc.com --- sound/oss/dmasound/dmasound_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/oss/dmasound/dmasound_core.c b/sound/oss/dmasound/dmasound_core.c index 164335d3c200..4b1baf4dd50e 100644 --- a/sound/oss/dmasound/dmasound_core.c +++ b/sound/oss/dmasound/dmasound_core.c @@ -204,6 +204,7 @@ module_param(numWriteBufs, int, 0); static unsigned int writeBufSize = DEFAULT_BUFF_SIZE ; /* in bytes */ module_param(writeBufSize, int, 0); +MODULE_DESCRIPTION("Atari/Amiga/Q40 core DMA sound driver"); MODULE_LICENSE("GPL"); static int sq_unit = -1; From 70794b9563fe011988bcf6a081af9777e63e8d37 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Tue, 18 Jun 2024 14:16:04 +0800 Subject: [PATCH 422/778] ALSA: hda/realtek: Add more codec ID to no shutup pins list If it enter to runtime D3 state, it didn't shutup Headset MIC pin. Signed-off-by: Kailang Yang Link: https://lore.kernel.org/r/8d86f61e7d6f4a03b311e4eb4e5caaef@realtek.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index fd337fdd30f6..e2dbcf8f5bcf 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -583,10 +583,14 @@ static void alc_shutup_pins(struct hda_codec *codec) switch (codec->core.vendor_id) { case 0x10ec0236: case 0x10ec0256: + case 0x10ec0257: case 0x19e58326: case 0x10ec0283: + case 0x10ec0285: case 0x10ec0286: + case 0x10ec0287: case 0x10ec0288: + case 0x10ec0295: case 0x10ec0298: alc_headset_mic_no_shutup(codec); break; From 7725363936a88351b71495774c1e0e852ae4cdca Mon Sep 17 00:00:00 2001 From: Raju Lakkaraju Date: Fri, 14 Jun 2024 22:41:55 +0530 Subject: [PATCH 423/778] net: lan743x: disable WOL upon resume to restore full data path operation When Wake-on-LAN (WoL) is active and the system is in suspend mode, triggering a system event can wake the system from sleep, which may block the data path. To restore normal data path functionality after waking, disable all wake-up events. Furthermore, clear all Write 1 to Clear (W1C) status bits by writing 1's to them. Fixes: 4d94282afd95 ("lan743x: Add power management support") Reviewed-by: Wojciech Drewek Signed-off-by: Raju Lakkaraju Signed-off-by: Paolo Abeni --- drivers/net/ethernet/microchip/lan743x_main.c | 30 ++++++++++++++++--- drivers/net/ethernet/microchip/lan743x_main.h | 24 +++++++++++++++ 2 files changed, 50 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index 6be8a43c908a..48835bdc2e63 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -3575,7 +3575,7 @@ static void lan743x_pm_set_wol(struct lan743x_adapter *adapter) /* clear wake settings */ pmtctl = lan743x_csr_read(adapter, PMT_CTL); - pmtctl |= PMT_CTL_WUPS_MASK_; + pmtctl |= PMT_CTL_WUPS_MASK_ | PMT_CTL_RES_CLR_WKP_MASK_; pmtctl &= ~(PMT_CTL_GPIO_WAKEUP_EN_ | PMT_CTL_EEE_WAKEUP_EN_ | PMT_CTL_WOL_EN_ | PMT_CTL_MAC_D3_RX_CLK_OVR_ | PMT_CTL_RX_FCT_RFE_D3_CLK_OVR_ | PMT_CTL_ETH_PHY_WAKE_EN_); @@ -3710,6 +3710,7 @@ static int lan743x_pm_resume(struct device *dev) struct pci_dev *pdev = to_pci_dev(dev); struct net_device *netdev = pci_get_drvdata(pdev); struct lan743x_adapter *adapter = netdev_priv(netdev); + u32 data; int ret; pci_set_power_state(pdev, PCI_D0); @@ -3728,6 +3729,30 @@ static int lan743x_pm_resume(struct device *dev) return ret; } + ret = lan743x_csr_read(adapter, MAC_WK_SRC); + netif_dbg(adapter, drv, adapter->netdev, + "Wakeup source : 0x%08X\n", ret); + + /* Clear the wol configuration and status bits. Note that + * the status bits are "Write One to Clear (W1C)" + */ + data = MAC_WUCSR_EEE_TX_WAKE_ | MAC_WUCSR_EEE_RX_WAKE_ | + MAC_WUCSR_RFE_WAKE_FR_ | MAC_WUCSR_PFDA_FR_ | MAC_WUCSR_WUFR_ | + MAC_WUCSR_MPR_ | MAC_WUCSR_BCAST_FR_; + lan743x_csr_write(adapter, MAC_WUCSR, data); + + data = MAC_WUCSR2_NS_RCD_ | MAC_WUCSR2_ARP_RCD_ | + MAC_WUCSR2_IPV6_TCPSYN_RCD_ | MAC_WUCSR2_IPV4_TCPSYN_RCD_; + lan743x_csr_write(adapter, MAC_WUCSR2, data); + + data = MAC_WK_SRC_ETH_PHY_WK_ | MAC_WK_SRC_IPV6_TCPSYN_RCD_WK_ | + MAC_WK_SRC_IPV4_TCPSYN_RCD_WK_ | MAC_WK_SRC_EEE_TX_WK_ | + MAC_WK_SRC_EEE_RX_WK_ | MAC_WK_SRC_RFE_FR_WK_ | + MAC_WK_SRC_PFDA_FR_WK_ | MAC_WK_SRC_MP_FR_WK_ | + MAC_WK_SRC_BCAST_FR_WK_ | MAC_WK_SRC_WU_FR_WK_ | + MAC_WK_SRC_WK_FR_SAVED_; + lan743x_csr_write(adapter, MAC_WK_SRC, data); + /* open netdev when netdev is at running state while resume. * For instance, it is true when system wakesup after pm-suspend * However, it is false when system wakes up after suspend GUI menu @@ -3736,9 +3761,6 @@ static int lan743x_pm_resume(struct device *dev) lan743x_netdev_open(netdev); netif_device_attach(netdev); - ret = lan743x_csr_read(adapter, MAC_WK_SRC); - netif_info(adapter, drv, adapter->netdev, - "Wakeup source : 0x%08X\n", ret); return 0; } diff --git a/drivers/net/ethernet/microchip/lan743x_main.h b/drivers/net/ethernet/microchip/lan743x_main.h index 645bc048e52e..fac0f33d10b2 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.h +++ b/drivers/net/ethernet/microchip/lan743x_main.h @@ -61,6 +61,7 @@ #define PMT_CTL_RX_FCT_RFE_D3_CLK_OVR_ BIT(18) #define PMT_CTL_GPIO_WAKEUP_EN_ BIT(15) #define PMT_CTL_EEE_WAKEUP_EN_ BIT(13) +#define PMT_CTL_RES_CLR_WKP_MASK_ GENMASK(9, 8) #define PMT_CTL_READY_ BIT(7) #define PMT_CTL_ETH_PHY_RST_ BIT(4) #define PMT_CTL_WOL_EN_ BIT(3) @@ -227,12 +228,31 @@ #define MAC_WUCSR (0x140) #define MAC_MP_SO_EN_ BIT(21) #define MAC_WUCSR_RFE_WAKE_EN_ BIT(14) +#define MAC_WUCSR_EEE_TX_WAKE_ BIT(13) +#define MAC_WUCSR_EEE_RX_WAKE_ BIT(11) +#define MAC_WUCSR_RFE_WAKE_FR_ BIT(9) +#define MAC_WUCSR_PFDA_FR_ BIT(7) +#define MAC_WUCSR_WUFR_ BIT(6) +#define MAC_WUCSR_MPR_ BIT(5) +#define MAC_WUCSR_BCAST_FR_ BIT(4) #define MAC_WUCSR_PFDA_EN_ BIT(3) #define MAC_WUCSR_WAKE_EN_ BIT(2) #define MAC_WUCSR_MPEN_ BIT(1) #define MAC_WUCSR_BCST_EN_ BIT(0) #define MAC_WK_SRC (0x144) +#define MAC_WK_SRC_ETH_PHY_WK_ BIT(17) +#define MAC_WK_SRC_IPV6_TCPSYN_RCD_WK_ BIT(16) +#define MAC_WK_SRC_IPV4_TCPSYN_RCD_WK_ BIT(15) +#define MAC_WK_SRC_EEE_TX_WK_ BIT(14) +#define MAC_WK_SRC_EEE_RX_WK_ BIT(13) +#define MAC_WK_SRC_RFE_FR_WK_ BIT(12) +#define MAC_WK_SRC_PFDA_FR_WK_ BIT(11) +#define MAC_WK_SRC_MP_FR_WK_ BIT(10) +#define MAC_WK_SRC_BCAST_FR_WK_ BIT(9) +#define MAC_WK_SRC_WU_FR_WK_ BIT(8) +#define MAC_WK_SRC_WK_FR_SAVED_ BIT(7) + #define MAC_MP_SO_HI (0x148) #define MAC_MP_SO_LO (0x14C) @@ -295,6 +315,10 @@ #define RFE_INDX(index) (0x580 + (index << 2)) #define MAC_WUCSR2 (0x600) +#define MAC_WUCSR2_NS_RCD_ BIT(7) +#define MAC_WUCSR2_ARP_RCD_ BIT(6) +#define MAC_WUCSR2_IPV6_TCPSYN_RCD_ BIT(5) +#define MAC_WUCSR2_IPV4_TCPSYN_RCD_ BIT(4) #define SGMII_ACC (0x720) #define SGMII_ACC_SGMII_BZY_ BIT(31) From 8c248cd836014339498486f14f435c0e344183a7 Mon Sep 17 00:00:00 2001 From: Raju Lakkaraju Date: Fri, 14 Jun 2024 22:41:56 +0530 Subject: [PATCH 424/778] net: lan743x: Support WOL at both the PHY and MAC appropriately Prevent options not supported by the PHY from being requested to it by the MAC Whenever a WOL option is supported by both, the PHY is given priority since that usually leads to better power savings. Fixes: e9e13b6adc33 ("lan743x: fix for potential NULL pointer dereference with bare card") Reviewed-by: Wojciech Drewek Signed-off-by: Raju Lakkaraju Signed-off-by: Paolo Abeni --- .../net/ethernet/microchip/lan743x_ethtool.c | 44 +++++++++++++++++-- drivers/net/ethernet/microchip/lan743x_main.c | 18 ++++++-- drivers/net/ethernet/microchip/lan743x_main.h | 4 ++ 3 files changed, 58 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan743x_ethtool.c b/drivers/net/ethernet/microchip/lan743x_ethtool.c index d0f4ff4ee075..0d1740d64676 100644 --- a/drivers/net/ethernet/microchip/lan743x_ethtool.c +++ b/drivers/net/ethernet/microchip/lan743x_ethtool.c @@ -1127,8 +1127,12 @@ static void lan743x_ethtool_get_wol(struct net_device *netdev, if (netdev->phydev) phy_ethtool_get_wol(netdev->phydev, wol); - wol->supported |= WAKE_BCAST | WAKE_UCAST | WAKE_MCAST | - WAKE_MAGIC | WAKE_PHY | WAKE_ARP; + if (wol->supported != adapter->phy_wol_supported) + netif_warn(adapter, drv, adapter->netdev, + "PHY changed its supported WOL! old=%x, new=%x\n", + adapter->phy_wol_supported, wol->supported); + + wol->supported |= MAC_SUPPORTED_WAKES; if (adapter->is_pci11x1x) wol->supported |= WAKE_MAGICSECURE; @@ -1143,7 +1147,39 @@ static int lan743x_ethtool_set_wol(struct net_device *netdev, { struct lan743x_adapter *adapter = netdev_priv(netdev); + /* WAKE_MAGICSEGURE is a modifier of and only valid together with + * WAKE_MAGIC + */ + if ((wol->wolopts & WAKE_MAGICSECURE) && !(wol->wolopts & WAKE_MAGIC)) + return -EINVAL; + + if (netdev->phydev) { + struct ethtool_wolinfo phy_wol; + int ret; + + phy_wol.wolopts = wol->wolopts & adapter->phy_wol_supported; + + /* If WAKE_MAGICSECURE was requested, filter out WAKE_MAGIC + * for PHYs that do not support WAKE_MAGICSECURE + */ + if (wol->wolopts & WAKE_MAGICSECURE && + !(adapter->phy_wol_supported & WAKE_MAGICSECURE)) + phy_wol.wolopts &= ~WAKE_MAGIC; + + ret = phy_ethtool_set_wol(netdev->phydev, &phy_wol); + if (ret && (ret != -EOPNOTSUPP)) + return ret; + + if (ret == -EOPNOTSUPP) + adapter->phy_wolopts = 0; + else + adapter->phy_wolopts = phy_wol.wolopts; + } else { + adapter->phy_wolopts = 0; + } + adapter->wolopts = 0; + wol->wolopts &= ~adapter->phy_wolopts; if (wol->wolopts & WAKE_UCAST) adapter->wolopts |= WAKE_UCAST; if (wol->wolopts & WAKE_MCAST) @@ -1164,10 +1200,10 @@ static int lan743x_ethtool_set_wol(struct net_device *netdev, memset(adapter->sopass, 0, sizeof(u8) * SOPASS_MAX); } + wol->wolopts = adapter->wolopts | adapter->phy_wolopts; device_set_wakeup_enable(&adapter->pdev->dev, (bool)wol->wolopts); - return netdev->phydev ? phy_ethtool_set_wol(netdev->phydev, wol) - : -ENETDOWN; + return 0; } #endif /* CONFIG_PM */ diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index 48835bdc2e63..e418539565b1 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -3118,6 +3118,17 @@ static int lan743x_netdev_open(struct net_device *netdev) if (ret) goto close_tx; } + +#ifdef CONFIG_PM + if (adapter->netdev->phydev) { + struct ethtool_wolinfo wol = { .cmd = ETHTOOL_GWOL }; + + phy_ethtool_get_wol(netdev->phydev, &wol); + adapter->phy_wol_supported = wol.supported; + adapter->phy_wolopts = wol.wolopts; + } +#endif + return 0; close_tx: @@ -3587,10 +3598,9 @@ static void lan743x_pm_set_wol(struct lan743x_adapter *adapter) pmtctl |= PMT_CTL_ETH_PHY_D3_COLD_OVR_ | PMT_CTL_ETH_PHY_D3_OVR_; - if (adapter->wolopts & WAKE_PHY) { - pmtctl |= PMT_CTL_ETH_PHY_EDPD_PLL_CTL_; + if (adapter->phy_wolopts) pmtctl |= PMT_CTL_ETH_PHY_WAKE_EN_; - } + if (adapter->wolopts & WAKE_MAGIC) { wucsr |= MAC_WUCSR_MPEN_; macrx |= MAC_RX_RXEN_; @@ -3686,7 +3696,7 @@ static int lan743x_pm_suspend(struct device *dev) lan743x_csr_write(adapter, MAC_WUCSR2, 0); lan743x_csr_write(adapter, MAC_WK_SRC, 0xFFFFFFFF); - if (adapter->wolopts) + if (adapter->wolopts || adapter->phy_wolopts) lan743x_pm_set_wol(adapter); if (adapter->is_pci11x1x) { diff --git a/drivers/net/ethernet/microchip/lan743x_main.h b/drivers/net/ethernet/microchip/lan743x_main.h index fac0f33d10b2..3b2585a384e2 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.h +++ b/drivers/net/ethernet/microchip/lan743x_main.h @@ -1042,6 +1042,8 @@ enum lan743x_sgmii_lsd { LINK_2500_SLAVE }; +#define MAC_SUPPORTED_WAKES (WAKE_BCAST | WAKE_UCAST | WAKE_MCAST | \ + WAKE_MAGIC | WAKE_ARP) struct lan743x_adapter { struct net_device *netdev; struct mii_bus *mdiobus; @@ -1049,6 +1051,8 @@ struct lan743x_adapter { #ifdef CONFIG_PM u32 wolopts; u8 sopass[SOPASS_MAX]; + u32 phy_wolopts; + u32 phy_wol_supported; #endif struct pci_dev *pdev; struct lan743x_csr csr; From c44d3ffd85db03ebcc3090e55589e10d5af9f3a9 Mon Sep 17 00:00:00 2001 From: Raju Lakkaraju Date: Fri, 14 Jun 2024 22:41:57 +0530 Subject: [PATCH 425/778] net: phy: mxl-gpy: Remove interrupt mask clearing from config_init When the system resumes from sleep, the phy_init_hw() function invokes config_init(), which clears all interrupt masks and causes wake events to be lost in subsequent wake sequences. Remove interrupt mask clearing from config_init() and preserve relevant masks in config_intr(). Fixes: 7d901a1e878a ("net: phy: add Maxlinear GPY115/21x/24x driver") Reviewed-by: Wojciech Drewek Signed-off-by: Raju Lakkaraju Signed-off-by: Paolo Abeni --- drivers/net/phy/mxl-gpy.c | 58 +++++++++++++++++++++++++-------------- 1 file changed, 38 insertions(+), 20 deletions(-) diff --git a/drivers/net/phy/mxl-gpy.c b/drivers/net/phy/mxl-gpy.c index b2d36a3a96f1..e5f8ac4b4604 100644 --- a/drivers/net/phy/mxl-gpy.c +++ b/drivers/net/phy/mxl-gpy.c @@ -107,6 +107,7 @@ struct gpy_priv { u8 fw_major; u8 fw_minor; + u32 wolopts; /* It takes 3 seconds to fully switch out of loopback mode before * it can safely re-enter loopback mode. Record the time when @@ -221,6 +222,15 @@ static int gpy_hwmon_register(struct phy_device *phydev) } #endif +static int gpy_ack_interrupt(struct phy_device *phydev) +{ + int ret; + + /* Clear all pending interrupts */ + ret = phy_read(phydev, PHY_ISTAT); + return ret < 0 ? ret : 0; +} + static int gpy_mbox_read(struct phy_device *phydev, u32 addr) { struct gpy_priv *priv = phydev->priv; @@ -262,16 +272,8 @@ out: static int gpy_config_init(struct phy_device *phydev) { - int ret; - - /* Mask all interrupts */ - ret = phy_write(phydev, PHY_IMASK, 0); - if (ret) - return ret; - - /* Clear all pending interrupts */ - ret = phy_read(phydev, PHY_ISTAT); - return ret < 0 ? ret : 0; + /* Nothing to configure. Configuration Requirement Placeholder */ + return 0; } static int gpy21x_config_init(struct phy_device *phydev) @@ -627,11 +629,23 @@ static int gpy_read_status(struct phy_device *phydev) static int gpy_config_intr(struct phy_device *phydev) { + struct gpy_priv *priv = phydev->priv; u16 mask = 0; + int ret; + + ret = gpy_ack_interrupt(phydev); + if (ret) + return ret; if (phydev->interrupts == PHY_INTERRUPT_ENABLED) mask = PHY_IMASK_MASK; + if (priv->wolopts & WAKE_MAGIC) + mask |= PHY_IMASK_WOL; + + if (priv->wolopts & WAKE_PHY) + mask |= PHY_IMASK_LSTC; + return phy_write(phydev, PHY_IMASK, mask); } @@ -678,6 +692,7 @@ static int gpy_set_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol) { struct net_device *attach_dev = phydev->attached_dev; + struct gpy_priv *priv = phydev->priv; int ret; if (wol->wolopts & WAKE_MAGIC) { @@ -725,6 +740,8 @@ static int gpy_set_wol(struct phy_device *phydev, ret = phy_read(phydev, PHY_ISTAT); if (ret < 0) return ret; + + priv->wolopts |= WAKE_MAGIC; } else { /* Disable magic packet matching */ ret = phy_clear_bits_mmd(phydev, MDIO_MMD_VEND2, @@ -732,6 +749,13 @@ static int gpy_set_wol(struct phy_device *phydev, WOL_EN); if (ret < 0) return ret; + + /* Disable the WOL interrupt */ + ret = phy_clear_bits(phydev, PHY_IMASK, PHY_IMASK_WOL); + if (ret < 0) + return ret; + + priv->wolopts &= ~WAKE_MAGIC; } if (wol->wolopts & WAKE_PHY) { @@ -748,9 +772,11 @@ static int gpy_set_wol(struct phy_device *phydev, if (ret & (PHY_IMASK_MASK & ~PHY_IMASK_LSTC)) phy_trigger_machine(phydev); + priv->wolopts |= WAKE_PHY; return 0; } + priv->wolopts &= ~WAKE_PHY; /* Disable the link state change interrupt */ return phy_clear_bits(phydev, PHY_IMASK, PHY_IMASK_LSTC); } @@ -758,18 +784,10 @@ static int gpy_set_wol(struct phy_device *phydev, static void gpy_get_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol) { - int ret; + struct gpy_priv *priv = phydev->priv; wol->supported = WAKE_MAGIC | WAKE_PHY; - wol->wolopts = 0; - - ret = phy_read_mmd(phydev, MDIO_MMD_VEND2, VPSPEC2_WOL_CTL); - if (ret & WOL_EN) - wol->wolopts |= WAKE_MAGIC; - - ret = phy_read(phydev, PHY_IMASK); - if (ret & PHY_IMASK_LSTC) - wol->wolopts |= WAKE_PHY; + wol->wolopts = priv->wolopts; } static int gpy_loopback(struct phy_device *phydev, bool enable) From d864319871b05fadd153e0aede4811ca7008f5d6 Mon Sep 17 00:00:00 2001 From: David Ruth Date: Fri, 14 Jun 2024 19:03:26 +0000 Subject: [PATCH 426/778] net/sched: act_api: fix possible infinite loop in tcf_idr_check_alloc() syzbot found hanging tasks waiting on rtnl_lock [1] A reproducer is available in the syzbot bug. When a request to add multiple actions with the same index is sent, the second request will block forever on the first request. This holds rtnl_lock, and causes tasks to hang. Return -EAGAIN to prevent infinite looping, while keeping documented behavior. [1] INFO: task kworker/1:0:5088 blocked for more than 143 seconds. Not tainted 6.9.0-rc4-syzkaller-00173-g3cdb45594619 #0 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. task:kworker/1:0 state:D stack:23744 pid:5088 tgid:5088 ppid:2 flags:0x00004000 Workqueue: events_power_efficient reg_check_chans_work Call Trace: context_switch kernel/sched/core.c:5409 [inline] __schedule+0xf15/0x5d00 kernel/sched/core.c:6746 __schedule_loop kernel/sched/core.c:6823 [inline] schedule+0xe7/0x350 kernel/sched/core.c:6838 schedule_preempt_disabled+0x13/0x30 kernel/sched/core.c:6895 __mutex_lock_common kernel/locking/mutex.c:684 [inline] __mutex_lock+0x5b8/0x9c0 kernel/locking/mutex.c:752 wiphy_lock include/net/cfg80211.h:5953 [inline] reg_leave_invalid_chans net/wireless/reg.c:2466 [inline] reg_check_chans_work+0x10a/0x10e0 net/wireless/reg.c:2481 Fixes: 0190c1d452a9 ("net: sched: atomically check-allocate action") Reported-by: syzbot+b87c222546179f4513a7@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=b87c222546179f4513a7 Signed-off-by: David Ruth Reviewed-by: Jamal Hadi Salim Link: https://lore.kernel.org/r/20240614190326.1349786-1-druth@chromium.org Signed-off-by: Paolo Abeni --- net/sched/act_api.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 9ee622fb1160..2520708b06a1 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -830,7 +830,6 @@ int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index, u32 max; if (*index) { -again: rcu_read_lock(); p = idr_find(&idrinfo->action_idr, *index); @@ -839,7 +838,7 @@ again: * index but did not assign the pointer yet. */ rcu_read_unlock(); - goto again; + return -EAGAIN; } if (!p) { From 2ebe8f840c7450ecbfca9d18ac92e9ce9155e269 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 15 Jun 2024 14:27:20 -0400 Subject: [PATCH 427/778] tipc: force a dst refcount before doing decryption As it says in commit 3bc07321ccc2 ("xfrm: Force a dst refcount before entering the xfrm type handlers"): "Crypto requests might return asynchronous. In this case we leave the rcu protected region, so force a refcount on the skb's destination entry before we enter the xfrm type input/output handlers." On TIPC decryption path it has the same problem, and skb_dst_force() should be called before doing decryption to avoid a possible crash. Shuang reported this issue when this warning is triggered: [] WARNING: include/net/dst.h:337 tipc_sk_rcv+0x1055/0x1ea0 [tipc] [] Kdump: loaded Tainted: G W --------- - - 4.18.0-496.el8.x86_64+debug [] Workqueue: crypto cryptd_queue_worker [] RIP: 0010:tipc_sk_rcv+0x1055/0x1ea0 [tipc] [] Call Trace: [] tipc_sk_mcast_rcv+0x548/0xea0 [tipc] [] tipc_rcv+0xcf5/0x1060 [tipc] [] tipc_aead_decrypt_done+0x215/0x2e0 [tipc] [] cryptd_aead_crypt+0xdb/0x190 [] cryptd_queue_worker+0xed/0x190 [] process_one_work+0x93d/0x17e0 Fixes: fc1b6d6de220 ("tipc: introduce TIPC encryption & authentication") Reported-by: Shuang Li Signed-off-by: Xin Long Link: https://lore.kernel.org/r/fbe3195fad6997a4eec62d9bf076b2ad03ac336b.1718476040.git.lucien.xin@gmail.com Signed-off-by: Paolo Abeni --- net/tipc/node.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/tipc/node.c b/net/tipc/node.c index c1e890a82434..500320e5ca47 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -2105,6 +2105,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) } else { n = tipc_node_find_by_id(net, ehdr->id); } + skb_dst_force(skb); tipc_crypto_rcv(net, (n) ? n->crypto_rx : NULL, &skb, b); if (!skb) return; From 88c67aeb14070bab61d3dd8be96c8b42ebcaf53a Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 15 Jun 2024 17:47:30 -0400 Subject: [PATCH 428/778] sched: act_ct: add netns into the key of tcf_ct_flow_table zones_ht is a global hashtable for flow_table with zone as key. However, it does not consider netns when getting a flow_table from zones_ht in tcf_ct_init(), and it means an act_ct action in netns A may get a flow_table that belongs to netns B if it has the same zone value. In Shuang's test with the TOPO: tcf2_c <---> tcf2_sw1 <---> tcf2_sw2 <---> tcf2_s tcf2_sw1 and tcf2_sw2 saw the same flow and used the same flow table, which caused their ct entries entering unexpected states and the TCP connection not able to end normally. This patch fixes the issue simply by adding netns into the key of tcf_ct_flow_table so that an act_ct action gets a flow_table that belongs to its own netns in tcf_ct_init(). Note that for easy coding we don't use tcf_ct_flow_table.nf_ft.net, as the ct_ft is initialized after inserting it to the hashtable in tcf_ct_flow_table_get() and also it requires to implement several functions in rhashtable_params including hashfn, obj_hashfn and obj_cmpfn. Fixes: 64ff70b80fd4 ("net/sched: act_ct: Offload established connections to flow table") Reported-by: Shuang Li Signed-off-by: Xin Long Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/1db5b6cc6902c5fc6f8c6cbd85494a2008087be5.1718488050.git.lucien.xin@gmail.com Signed-off-by: Paolo Abeni --- net/sched/act_ct.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index baac083fd8f1..2a96d9c1db65 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -41,21 +41,26 @@ static struct workqueue_struct *act_ct_wq; static struct rhashtable zones_ht; static DEFINE_MUTEX(zones_mutex); +struct zones_ht_key { + struct net *net; + u16 zone; +}; + struct tcf_ct_flow_table { struct rhash_head node; /* In zones tables */ struct rcu_work rwork; struct nf_flowtable nf_ft; refcount_t ref; - u16 zone; + struct zones_ht_key key; bool dying; }; static const struct rhashtable_params zones_params = { .head_offset = offsetof(struct tcf_ct_flow_table, node), - .key_offset = offsetof(struct tcf_ct_flow_table, zone), - .key_len = sizeof_field(struct tcf_ct_flow_table, zone), + .key_offset = offsetof(struct tcf_ct_flow_table, key), + .key_len = sizeof_field(struct tcf_ct_flow_table, key), .automatic_shrinking = true, }; @@ -316,11 +321,12 @@ static struct nf_flowtable_type flowtable_ct = { static int tcf_ct_flow_table_get(struct net *net, struct tcf_ct_params *params) { + struct zones_ht_key key = { .net = net, .zone = params->zone }; struct tcf_ct_flow_table *ct_ft; int err = -ENOMEM; mutex_lock(&zones_mutex); - ct_ft = rhashtable_lookup_fast(&zones_ht, ¶ms->zone, zones_params); + ct_ft = rhashtable_lookup_fast(&zones_ht, &key, zones_params); if (ct_ft && refcount_inc_not_zero(&ct_ft->ref)) goto out_unlock; @@ -329,7 +335,7 @@ static int tcf_ct_flow_table_get(struct net *net, struct tcf_ct_params *params) goto err_alloc; refcount_set(&ct_ft->ref, 1); - ct_ft->zone = params->zone; + ct_ft->key = key; err = rhashtable_insert_fast(&zones_ht, &ct_ft->node, zones_params); if (err) goto err_insert; From c2bd0791c5f02e964402624dfff45ca8995f5397 Mon Sep 17 00:00:00 2001 From: Patrice Chotard Date: Tue, 18 Jun 2024 15:29:49 +0200 Subject: [PATCH 429/778] spi: stm32: qspi: Fix dual flash mode sanity test in stm32_qspi_setup() Misplaced parenthesis make test of mode wrong in case mode is equal to SPI_TX_OCTAL or SPI_RX_OCTAL. Simplify this sanity test, if one of this bit is set, property cs-gpio must be present in DT. Fixes: a557fca630cc ("spi: stm32_qspi: Add transfer_one_message() spi callback") Cc: stable@vger.kernel.org Signed-off-by: Patrice Chotard Link: https://msgid.link/r/20240618132951.2743935-2-patrice.chotard@foss.st.com Signed-off-by: Mark Brown --- drivers/spi/spi-stm32-qspi.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/spi/spi-stm32-qspi.c b/drivers/spi/spi-stm32-qspi.c index f1e922fd362a..6944e85d8367 100644 --- a/drivers/spi/spi-stm32-qspi.c +++ b/drivers/spi/spi-stm32-qspi.c @@ -653,9 +653,7 @@ static int stm32_qspi_setup(struct spi_device *spi) return -EINVAL; mode = spi->mode & (SPI_TX_OCTAL | SPI_RX_OCTAL); - if ((mode == SPI_TX_OCTAL || mode == SPI_RX_OCTAL) || - ((mode == (SPI_TX_OCTAL | SPI_RX_OCTAL)) && - gpiod_count(qspi->dev, "cs") == -ENOENT)) { + if (mode && gpiod_count(qspi->dev, "cs") == -ENOENT) { dev_err(qspi->dev, "spi-rx-bus-width\\/spi-tx-bus-width\\/cs-gpios\n"); dev_err(qspi->dev, "configuration not supported\n"); @@ -676,10 +674,10 @@ static int stm32_qspi_setup(struct spi_device *spi) qspi->cr_reg = CR_APMS | 3 << CR_FTHRES_SHIFT | CR_SSHIFT | CR_EN; /* - * Dual flash mode is only enable in case SPI_TX_OCTAL and SPI_TX_OCTAL - * are both set in spi->mode and "cs-gpios" properties is found in DT + * Dual flash mode is only enable in case SPI_TX_OCTAL or SPI_RX_OCTAL + * is set in spi->mode and "cs-gpios" properties is found in DT */ - if (mode == (SPI_TX_OCTAL | SPI_RX_OCTAL)) { + if (mode) { qspi->cr_reg |= CR_DFM; dev_dbg(qspi->dev, "Dual flash mode enable"); } From 63deee52811b2f84ed2da55ad47252f0e8145d62 Mon Sep 17 00:00:00 2001 From: Patrice Chotard Date: Tue, 18 Jun 2024 15:29:50 +0200 Subject: [PATCH 430/778] spi: stm32: qspi: Clamp stm32_qspi_get_mode() output to CCR_BUSWIDTH_4 In case usage of OCTAL mode, buswidth parameter can take the value 8. As return value of stm32_qspi_get_mode() is used to configure fields of CCR registers that are 2 bits only (fields IMODE, ADMODE, ADSIZE, DMODE), clamp return value of stm32_qspi_get_mode() to 4. Fixes: a557fca630cc ("spi: stm32_qspi: Add transfer_one_message() spi callback") Cc: stable@vger.kernel.org Signed-off-by: Patrice Chotard Link: https://msgid.link/r/20240618132951.2743935-3-patrice.chotard@foss.st.com Signed-off-by: Mark Brown --- drivers/spi/spi-stm32-qspi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-stm32-qspi.c b/drivers/spi/spi-stm32-qspi.c index 6944e85d8367..955c920c4b63 100644 --- a/drivers/spi/spi-stm32-qspi.c +++ b/drivers/spi/spi-stm32-qspi.c @@ -349,7 +349,7 @@ static int stm32_qspi_wait_poll_status(struct stm32_qspi *qspi) static int stm32_qspi_get_mode(u8 buswidth) { - if (buswidth == 4) + if (buswidth >= 4) return CCR_BUSWIDTH_4; return buswidth; From d6a711a898672dd873aab3844f754a3ca40723a5 Mon Sep 17 00:00:00 2001 From: Patrice Chotard Date: Tue, 18 Jun 2024 15:29:51 +0200 Subject: [PATCH 431/778] spi: Fix OCTAL mode support Add OCTAL mode support. Issue detected using "--octal" spidev_test's option. Signed-off-by: Patrice Chotard Link: https://msgid.link/r/20240618132951.2743935-4-patrice.chotard@foss.st.com Signed-off-by: Mark Brown --- drivers/spi/spi.c | 6 ++++-- include/linux/spi/spi.h | 5 +++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 9bc9fd10d538..9da736d51a2b 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -4156,7 +4156,8 @@ static int __spi_validate(struct spi_device *spi, struct spi_message *message) return -EINVAL; if (xfer->tx_nbits != SPI_NBITS_SINGLE && xfer->tx_nbits != SPI_NBITS_DUAL && - xfer->tx_nbits != SPI_NBITS_QUAD) + xfer->tx_nbits != SPI_NBITS_QUAD && + xfer->tx_nbits != SPI_NBITS_OCTAL) return -EINVAL; if ((xfer->tx_nbits == SPI_NBITS_DUAL) && !(spi->mode & (SPI_TX_DUAL | SPI_TX_QUAD))) @@ -4171,7 +4172,8 @@ static int __spi_validate(struct spi_device *spi, struct spi_message *message) return -EINVAL; if (xfer->rx_nbits != SPI_NBITS_SINGLE && xfer->rx_nbits != SPI_NBITS_DUAL && - xfer->rx_nbits != SPI_NBITS_QUAD) + xfer->rx_nbits != SPI_NBITS_QUAD && + xfer->rx_nbits != SPI_NBITS_OCTAL) return -EINVAL; if ((xfer->rx_nbits == SPI_NBITS_DUAL) && !(spi->mode & (SPI_RX_DUAL | SPI_RX_QUAD))) diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index e8e1e798924f..98fdef6e28f2 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -1085,12 +1085,13 @@ struct spi_transfer { unsigned dummy_data:1; unsigned cs_off:1; unsigned cs_change:1; - unsigned tx_nbits:3; - unsigned rx_nbits:3; + unsigned tx_nbits:4; + unsigned rx_nbits:4; unsigned timestamped:1; #define SPI_NBITS_SINGLE 0x01 /* 1-bit transfer */ #define SPI_NBITS_DUAL 0x02 /* 2-bit transfer */ #define SPI_NBITS_QUAD 0x04 /* 4-bit transfer */ +#define SPI_NBITS_OCTAL 0x08 /* 8-bit transfer */ u8 bits_per_word; struct spi_delay delay; struct spi_delay cs_change_delay; From b90d77e5fd784ada62ddd714d15ee2400c28e1cf Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 17 Jun 2024 10:18:12 -0700 Subject: [PATCH 432/778] bpf: Fix remap of arena. The bpf arena logic didn't account for mremap operation. Add a refcnt for multiple mmap events to prevent use-after-free in arena_vm_close. Fixes: 317460317a02 ("bpf: Introduce bpf_arena.") Reported-by: Pengfei Xu Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Reviewed-by: Barret Rhoden Tested-by: Pengfei Xu Closes: https://lore.kernel.org/bpf/Zmuw29IhgyPNKnIM@xpf.sh.intel.com Link: https://lore.kernel.org/bpf/20240617171812.76634-1-alexei.starovoitov@gmail.com --- kernel/bpf/arena.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/arena.c b/kernel/bpf/arena.c index 583ee4fe48ef..e52b3ad231b9 100644 --- a/kernel/bpf/arena.c +++ b/kernel/bpf/arena.c @@ -212,6 +212,7 @@ static u64 arena_map_mem_usage(const struct bpf_map *map) struct vma_list { struct vm_area_struct *vma; struct list_head head; + atomic_t mmap_count; }; static int remember_vma(struct bpf_arena *arena, struct vm_area_struct *vma) @@ -221,20 +222,30 @@ static int remember_vma(struct bpf_arena *arena, struct vm_area_struct *vma) vml = kmalloc(sizeof(*vml), GFP_KERNEL); if (!vml) return -ENOMEM; + atomic_set(&vml->mmap_count, 1); vma->vm_private_data = vml; vml->vma = vma; list_add(&vml->head, &arena->vma_list); return 0; } +static void arena_vm_open(struct vm_area_struct *vma) +{ + struct vma_list *vml = vma->vm_private_data; + + atomic_inc(&vml->mmap_count); +} + static void arena_vm_close(struct vm_area_struct *vma) { struct bpf_map *map = vma->vm_file->private_data; struct bpf_arena *arena = container_of(map, struct bpf_arena, map); - struct vma_list *vml; + struct vma_list *vml = vma->vm_private_data; + if (!atomic_dec_and_test(&vml->mmap_count)) + return; guard(mutex)(&arena->lock); - vml = vma->vm_private_data; + /* update link list under lock */ list_del(&vml->head); vma->vm_private_data = NULL; kfree(vml); @@ -287,6 +298,7 @@ out: } static const struct vm_operations_struct arena_vm_ops = { + .open = arena_vm_open, .close = arena_vm_close, .fault = arena_vm_fault, }; From 66b5867150630e8f9c9a2b7430e55a3beaa83a5b Mon Sep 17 00:00:00 2001 From: Matt Bobrowski Date: Mon, 17 Jun 2024 13:49:10 +0000 Subject: [PATCH 433/778] bpf: Update BPF LSM maintainer list After catching up with KP recently, we discussed that I will be now be responsible for co-maintaining the BPF LSM. Adding myself as designated maintainer of the BPF LSM, and specifying more files in which the BPF LSM maintenance responsibilities should now extend out to. This is at the back of all the BPF kfuncs that have been added recently, which are fundamentally restricted to being used only from BPF LSM program types. Signed-off-by: Matt Bobrowski Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/ZnA-1qdtXS1TayD7@google.com --- MAINTAINERS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 10ecbf192ebb..9c621cb0c795 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4083,12 +4083,13 @@ F: kernel/bpf/ringbuf.c BPF [SECURITY & LSM] (Security Audit and Enforcement using BPF) M: KP Singh -R: Matt Bobrowski +M: Matt Bobrowski L: bpf@vger.kernel.org S: Maintained F: Documentation/bpf/prog_lsm.rst F: include/linux/bpf_lsm.h F: kernel/bpf/bpf_lsm.c +F: kernel/trace/bpf_trace.c F: security/bpf/ BPF [SELFTESTS] (Test Runners & Infrastructure) From c3f3edf73a8f854f8766a69d2734198a58762e33 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Wed, 12 Jun 2024 09:41:51 -0500 Subject: [PATCH 434/778] KVM: Stop processing *all* memslots when "null" mmu_notifier handler is found Bail from outer address space loop, not just the inner memslot loop, when a "null" handler is encountered by __kvm_handle_hva_range(), which is the intended behavior. On x86, which has multiple address spaces thanks to SMM emulation, breaking from just the memslot loop results in undefined behavior due to assigning the non-existent return value from kvm_null_fn() to a bool. In practice, the bug is benign as kvm_mmu_notifier_invalidate_range_end() is the only caller that passes handler=kvm_null_fn, and it doesn't set flush_on_ret, i.e. assigning garbage to r.ret is ultimately ignored. And for most configuration the compiler elides the entire sequence, i.e. there is no undefined behavior at runtime. ------------[ cut here ]------------ UBSAN: invalid-load in arch/x86/kvm/../../../virt/kvm/kvm_main.c:655:10 load of value 160 is not a valid value for type '_Bool' CPU: 370 PID: 8246 Comm: CPU 0/KVM Not tainted 6.8.2-amdsos-build58-ubuntu-22.04+ #1 Hardware name: AMD Corporation Sh54p/Sh54p, BIOS WPC4429N 04/25/2024 Call Trace: dump_stack_lvl+0x48/0x60 ubsan_epilogue+0x5/0x30 __ubsan_handle_load_invalid_value+0x79/0x80 kvm_mmu_notifier_invalidate_range_end.cold+0x18/0x4f [kvm] __mmu_notifier_invalidate_range_end+0x63/0xe0 __split_huge_pmd+0x367/0xfc0 do_huge_pmd_wp_page+0x1cc/0x380 __handle_mm_fault+0x8ee/0xe50 handle_mm_fault+0xe4/0x4a0 __get_user_pages+0x190/0x840 get_user_pages_unlocked+0xe0/0x590 hva_to_pfn+0x114/0x550 [kvm] kvm_faultin_pfn+0xed/0x5b0 [kvm] kvm_tdp_page_fault+0x123/0x170 [kvm] kvm_mmu_page_fault+0x244/0xaa0 [kvm] vcpu_enter_guest+0x592/0x1070 [kvm] kvm_arch_vcpu_ioctl_run+0x145/0x8a0 [kvm] kvm_vcpu_ioctl+0x288/0x6d0 [kvm] __x64_sys_ioctl+0x8f/0xd0 do_syscall_64+0x77/0x120 entry_SYSCALL_64_after_hwframe+0x6e/0x76 ---[ end trace ]--- Fixes: 071064f14d87 ("KVM: Don't take mmu_lock for range invalidation unless necessary") Signed-off-by: Babu Moger Link: https://lore.kernel.org/r/b8723d39903b64c241c50f5513f804390c7b5eec.1718203311.git.babu.moger@amd.com [sean: massage changelog] Signed-off-by: Sean Christopherson --- virt/kvm/kvm_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 843aa68cbcd0..68be4be5d846 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -651,7 +651,7 @@ static __always_inline kvm_mn_ret_t __kvm_handle_hva_range(struct kvm *kvm, range->on_lock(kvm); if (IS_KVM_NULL_FN(range->handler)) - break; + goto mmu_unlock; } r.ret |= range->handler(kvm, &gfn_range); } @@ -660,6 +660,7 @@ static __always_inline kvm_mn_ret_t __kvm_handle_hva_range(struct kvm *kvm, if (range->flush_on_ret && r.ret) kvm_flush_remote_tlbs(kvm); +mmu_unlock: if (r.found_memslot) KVM_MMU_UNLOCK(kvm); From 5d272dd1b3430bb31fa30042490fa081512424e4 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 18 Jun 2024 09:00:04 -0700 Subject: [PATCH 435/778] cpumask: limit FORCE_NR_CPUS to just the UP case Hardcoding the number of CPUs at compile time does improve code generation, but if you get it wrong the result will be confusion. We already limited this earlier to only "experts" (see commit fe5759d5bfda "cpumask: limit visibility of FORCE_NR_CPUS"), but with distro kernel configs often having EXPERT enabled, that turns out to not be much of a limit. To quote the philosophers at Disney: "Everyone can be an expert. And when everyone's an expert, no one will be". There's a runtime warning if you then set nr_cpus to anything but the forced number, but apparently that can be ignored too [1] and by then it's pretty much too late anyway. If we had some real way to limit this to "embedded only", maybe it would be worth it, but let's see if anybody even notices that the option is gone. We need to simplify kernel configuration anyway. Link: https://lore.kernel.org/all/20240618105036.208a8860@rorschach.local.home/ [1] Reported-by: Steven Rostedt Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Paul McKenney Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Yury Norov Signed-off-by: Linus Torvalds --- lib/Kconfig | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/lib/Kconfig b/lib/Kconfig index d33a268bc256..b0a76dff5c18 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -539,13 +539,7 @@ config CPUMASK_OFFSTACK stack overflow. config FORCE_NR_CPUS - bool "Set number of CPUs at compile time" - depends on SMP && EXPERT && !COMPILE_TEST - help - Say Yes if you have NR_CPUS set to an actual number of possible - CPUs in your system, not to a default value. This forces the core - code to rely on compile-time value and optimize kernel routines - better. + def_bool !SMP config CPU_RMAP bool From 60ff540a1d476c2d48b96f7bc8ac8581b820e878 Mon Sep 17 00:00:00 2001 From: Shuming Fan Date: Wed, 12 Jun 2024 15:57:40 +0800 Subject: [PATCH 436/778] ASoC: Intel: soc-acpi: mtl: fix speaker no sound on Dell SKU 0C64 Dell SKU 0C64 has a single rt1318 amplifier. The prefix name of control still needs to be set rt1318-1 corresponding to UCM config. Signed-off-by: Shuming Fan Reviewed-by: Bard Liao Link: https://msgid.link/r/20240612075740.1678082-1-shumingf@realtek.com Signed-off-by: Mark Brown --- sound/soc/intel/common/soc-acpi-intel-mtl-match.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/intel/common/soc-acpi-intel-mtl-match.c b/sound/soc/intel/common/soc-acpi-intel-mtl-match.c index 48252fa9e39e..8e0ae3635a35 100644 --- a/sound/soc/intel/common/soc-acpi-intel-mtl-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-mtl-match.c @@ -293,7 +293,7 @@ static const struct snd_soc_acpi_adr_device rt1318_1_single_adr[] = { .adr = 0x000130025D131801, .num_endpoints = 1, .endpoints = &single_endpoint, - .name_prefix = "rt1318" + .name_prefix = "rt1318-1" } }; From 2c1b7bbe253986619fa5623a13055316e730e746 Mon Sep 17 00:00:00 2001 From: Amit Kumar Mahapatra Date: Mon, 17 Jun 2024 21:00:52 +0530 Subject: [PATCH 437/778] spi: Fix SPI slave probe failure While adding a SPI device, the SPI core ensures that multiple logical CS doesn't map to the same physical CS. For example, spi->chip_select[0] != spi->chip_select[1] and so forth. However, unlike the SPI master, the SPI slave doesn't have the list of chip selects, this leads to probe failure when the SPI controller is configured as slave. Update the __spi_add_device() function to perform this check only if the SPI controller is configured as master. Fixes: 4d8ff6b0991d ("spi: Add multi-cs memories support in SPI core") Signed-off-by: Amit Kumar Mahapatra Link: https://msgid.link/r/20240617153052.26636-1-amit.kumar-mahapatra@amd.com Signed-off-by: Mark Brown --- drivers/spi/spi.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 9da736d51a2b..fc13fa192189 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -689,10 +689,12 @@ static int __spi_add_device(struct spi_device *spi) * Make sure that multiple logical CS doesn't map to the same physical CS. * For example, spi->chip_select[0] != spi->chip_select[1] and so on. */ - for (idx = 0; idx < SPI_CS_CNT_MAX; idx++) { - status = spi_dev_check_cs(dev, spi, idx, spi, idx + 1); - if (status) - return status; + if (!spi_controller_is_target(ctlr)) { + for (idx = 0; idx < SPI_CS_CNT_MAX; idx++) { + status = spi_dev_check_cs(dev, spi, idx, spi, idx + 1); + if (status) + return status; + } } /* Set the bus ID string */ From 81d23d2a24012e448f651e007fac2cfd20a45ce0 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 17 Jun 2024 12:34:32 +0300 Subject: [PATCH 438/778] ptp: fix integer overflow in max_vclocks_store On 32bit systems, the "4 * max" multiply can overflow. Use kcalloc() to do the allocation to prevent this. Fixes: 44c494c8e30e ("ptp: track available ptp vclocks information") Signed-off-by: Dan Carpenter Reviewed-by: Wojciech Drewek Reviewed-by: Jiri Pirko Reviewed-by: Heng Qi Link: https://lore.kernel.org/r/ee8110ed-6619-4bd7-9024-28c1f2ac24f4@moroto.mountain Signed-off-by: Jakub Kicinski --- drivers/ptp/ptp_sysfs.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/ptp/ptp_sysfs.c b/drivers/ptp/ptp_sysfs.c index a15460aaa03b..6b1b8f57cd95 100644 --- a/drivers/ptp/ptp_sysfs.c +++ b/drivers/ptp/ptp_sysfs.c @@ -296,8 +296,7 @@ static ssize_t max_vclocks_store(struct device *dev, if (max < ptp->n_vclocks) goto out; - size = sizeof(int) * max; - vclock_index = kzalloc(size, GFP_KERNEL); + vclock_index = kcalloc(max, sizeof(int), GFP_KERNEL); if (!vclock_index) { err = -ENOMEM; goto out; From e2b447c9a1bba718f9c07513a1e8958209e862a1 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Mon, 17 Jun 2024 09:28:33 +0100 Subject: [PATCH 439/778] selftests: openvswitch: Use bash as interpreter openvswitch.sh makes use of substitutions of the form ${ns:0:1}, to obtain the first character of $ns. Empirically, this is works with bash but not dash. When run with dash these evaluate to an empty string and printing an error to stdout. # dash -c 'ns=client; echo "${ns:0:1}"' 2>error # cat error dash: 1: Bad substitution # bash -c 'ns=client; echo "${ns:0:1}"' 2>error c # cat error This leads to tests that neither pass nor fail. F.e. TEST: arp_ping [START] adding sandbox 'test_arp_ping' Adding DP/Bridge IF: sbx:test_arp_ping dp:arpping {, , } create namespaces ./openvswitch.sh: 282: eval: Bad substitution TEST: ct_connect_v4 [START] adding sandbox 'test_ct_connect_v4' Adding DP/Bridge IF: sbx:test_ct_connect_v4 dp:ct4 {, , } ./openvswitch.sh: 322: eval: Bad substitution create namespaces Resolve this by making openvswitch.sh a bash script. Fixes: 918423fda910 ("selftests: openvswitch: add an initial flow programming case") Signed-off-by: Simon Horman Reviewed-by: Przemek Kitszel Link: https://lore.kernel.org/r/20240617-ovs-selftest-bash-v1-1-7ae6ccd3617b@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/openvswitch/openvswitch.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh index 5cae53543849..15bca0708717 100755 --- a/tools/testing/selftests/net/openvswitch/openvswitch.sh +++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 # # OVS kernel module self tests From 890182bb3d00d49ccd70b0d651ad8342745a08e7 Mon Sep 17 00:00:00 2001 From: Haylen Chu Date: Tue, 18 Jun 2024 09:16:14 +0000 Subject: [PATCH 440/778] riscv: dts: sophgo: disable write-protection for milkv duo Milkv Duo does not have a write-protect pin, so disable write protect to prevent SDcards misdetected as read-only. Fixes: 89a7056ed4f7 ("riscv: dts: sophgo: add sdcard support for milkv duo") Signed-off-by: Haylen Chu Link: https://lore.kernel.org/r/SEYPR01MB4221943C7B101DD2318DA0D3D7CE2@SEYPR01MB4221.apcprd01.prod.exchangelabs.com Signed-off-by: Inochi Amaoto Signed-off-by: Chen Wang --- arch/riscv/boot/dts/sophgo/cv1800b-milkv-duo.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/boot/dts/sophgo/cv1800b-milkv-duo.dts b/arch/riscv/boot/dts/sophgo/cv1800b-milkv-duo.dts index cd013588adc0..375ff2661b6e 100644 --- a/arch/riscv/boot/dts/sophgo/cv1800b-milkv-duo.dts +++ b/arch/riscv/boot/dts/sophgo/cv1800b-milkv-duo.dts @@ -45,6 +45,7 @@ no-1-8-v; no-mmc; no-sdio; + disable-wp; }; &uart0 { From cd6f12e173df44a20c2ac2ac110007dc14968088 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Fri, 14 Jun 2024 11:45:15 +0200 Subject: [PATCH 441/778] net: phy: dp83tg720: wake up PHYs in managed mode In case this PHY is bootstrapped for managed mode, we need to manually wake it. Otherwise no link will be detected. Cc: stable@vger.kernel.org Fixes: cb80ee2f9bee1 ("net: phy: Add support for the DP83TG720S Ethernet PHY") Signed-off-by: Oleksij Rempel Link: https://lore.kernel.org/r/20240614094516.1481231-1-o.rempel@pengutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/phy/dp83tg720.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/net/phy/dp83tg720.c b/drivers/net/phy/dp83tg720.c index 326c9770a6dc..1186dfc70fb3 100644 --- a/drivers/net/phy/dp83tg720.c +++ b/drivers/net/phy/dp83tg720.c @@ -17,6 +17,11 @@ #define DP83TG720S_PHY_RESET 0x1f #define DP83TG720S_HW_RESET BIT(15) +#define DP83TG720S_LPS_CFG3 0x18c +/* Power modes are documented as bit fields but used as values */ +/* Power Mode 0 is Normal mode */ +#define DP83TG720S_LPS_CFG3_PWR_MODE_0 BIT(0) + #define DP83TG720S_RGMII_DELAY_CTRL 0x602 /* In RGMII mode, Enable or disable the internal delay for RXD */ #define DP83TG720S_RGMII_RX_CLK_SEL BIT(1) @@ -154,10 +159,17 @@ static int dp83tg720_config_init(struct phy_device *phydev) */ usleep_range(1000, 2000); - if (phy_interface_is_rgmii(phydev)) - return dp83tg720_config_rgmii_delay(phydev); + if (phy_interface_is_rgmii(phydev)) { + ret = dp83tg720_config_rgmii_delay(phydev); + if (ret) + return ret; + } - return 0; + /* In case the PHY is bootstrapped in managed mode, we need to + * wake it. + */ + return phy_write_mmd(phydev, MDIO_MMD_VEND2, DP83TG720S_LPS_CFG3, + DP83TG720S_LPS_CFG3_PWR_MODE_0); } static struct phy_driver dp83tg720_driver[] = { From 40a64cc9679540ff7c46ecc51178b07d42abbb1c Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Fri, 14 Jun 2024 11:45:16 +0200 Subject: [PATCH 442/778] net: phy: dp83tg720: get master/slave configuration in link down state Get master/slave configuration for initial system start with the link in down state. This ensures ethtool shows current configuration. Also fixes link reconfiguration with ethtool while in down state, preventing ethtool from displaying outdated configuration. Even though dp83tg720_config_init() is executed periodically as long as the link is in admin up state but no carrier is detected, this is not sufficient for the link in admin down state where dp83tg720_read_status() is not periodically executed. To cover this case, we need an extra read role configuration in dp83tg720_config_aneg(). Fixes: cb80ee2f9bee1 ("net: phy: Add support for the DP83TG720S Ethernet PHY") Cc: stable@vger.kernel.org Signed-off-by: Oleksij Rempel Link: https://lore.kernel.org/r/20240614094516.1481231-2-o.rempel@pengutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/phy/dp83tg720.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/drivers/net/phy/dp83tg720.c b/drivers/net/phy/dp83tg720.c index 1186dfc70fb3..c706429b225a 100644 --- a/drivers/net/phy/dp83tg720.c +++ b/drivers/net/phy/dp83tg720.c @@ -36,11 +36,20 @@ static int dp83tg720_config_aneg(struct phy_device *phydev) { + int ret; + /* Autoneg is not supported and this PHY supports only one speed. * We need to care only about master/slave configuration if it was * changed by user. */ - return genphy_c45_pma_baset1_setup_master_slave(phydev); + ret = genphy_c45_pma_baset1_setup_master_slave(phydev); + if (ret) + return ret; + + /* Re-read role configuration to make changes visible even if + * the link is in administrative down state. + */ + return genphy_c45_pma_baset1_read_master_slave(phydev); } static int dp83tg720_read_status(struct phy_device *phydev) @@ -69,6 +78,8 @@ static int dp83tg720_read_status(struct phy_device *phydev) return ret; /* After HW reset we need to restore master/slave configuration. + * genphy_c45_pma_baset1_read_master_slave() call will be done + * by the dp83tg720_config_aneg() function. */ ret = dp83tg720_config_aneg(phydev); if (ret) @@ -168,8 +179,15 @@ static int dp83tg720_config_init(struct phy_device *phydev) /* In case the PHY is bootstrapped in managed mode, we need to * wake it. */ - return phy_write_mmd(phydev, MDIO_MMD_VEND2, DP83TG720S_LPS_CFG3, - DP83TG720S_LPS_CFG3_PWR_MODE_0); + ret = phy_write_mmd(phydev, MDIO_MMD_VEND2, DP83TG720S_LPS_CFG3, + DP83TG720S_LPS_CFG3_PWR_MODE_0); + if (ret) + return ret; + + /* Make role configuration visible for ethtool on init and after + * rest. + */ + return genphy_c45_pma_baset1_read_master_slave(phydev); } static struct phy_driver dp83tg720_driver[] = { From b8c43360f6e424131fa81d3ba8792ad8ff25a09e Mon Sep 17 00:00:00 2001 From: Xiaolei Wang Date: Mon, 17 Jun 2024 09:39:22 +0800 Subject: [PATCH 443/778] net: stmmac: No need to calculate speed divider when offload is disabled commit be27b8965297 ("net: stmmac: replace priv->speed with the portTransmitRate from the tc-cbs parameters") introduced a problem. When deleting, it prompts "Invalid portTransmitRate 0 (idleSlope - sendSlope)" and exits. Add judgment on cbs.enable. Only when offload is enabled, speed divider needs to be calculated. Fixes: be27b8965297 ("net: stmmac: replace priv->speed with the portTransmitRate from the tc-cbs parameters") Signed-off-by: Xiaolei Wang Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240617013922.1035854-1-xiaolei.wang@windriver.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/stmmac_tc.c | 40 ++++++++++--------- 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index 1562fbdd0a04..996f2bcd07a2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -358,24 +358,28 @@ static int tc_setup_cbs(struct stmmac_priv *priv, port_transmit_rate_kbps = qopt->idleslope - qopt->sendslope; - /* Port Transmit Rate and Speed Divider */ - switch (div_s64(port_transmit_rate_kbps, 1000)) { - case SPEED_10000: - case SPEED_5000: - ptr = 32; - break; - case SPEED_2500: - case SPEED_1000: - ptr = 8; - break; - case SPEED_100: - ptr = 4; - break; - default: - netdev_err(priv->dev, - "Invalid portTransmitRate %lld (idleSlope - sendSlope)\n", - port_transmit_rate_kbps); - return -EINVAL; + if (qopt->enable) { + /* Port Transmit Rate and Speed Divider */ + switch (div_s64(port_transmit_rate_kbps, 1000)) { + case SPEED_10000: + case SPEED_5000: + ptr = 32; + break; + case SPEED_2500: + case SPEED_1000: + ptr = 8; + break; + case SPEED_100: + ptr = 4; + break; + default: + netdev_err(priv->dev, + "Invalid portTransmitRate %lld (idleSlope - sendSlope)\n", + port_transmit_rate_kbps); + return -EINVAL; + } + } else { + ptr = 0; } mode_to_use = priv->plat->tx_queues_cfg[queue].mode_to_use; From 29433a17a79caa8680b9c0761f2b10502fda9ce3 Mon Sep 17 00:00:00 2001 From: Barry Song Date: Tue, 18 Jun 2024 19:22:58 +1200 Subject: [PATCH 444/778] cifs: drop the incorrect assertion in cifs_swap_rw() Since commit 2282679fb20b ("mm: submit multipage write for SWP_FS_OPS swap-space"), we can plug multiple pages then unplug them all together. That means iov_iter_count(iter) could be way bigger than PAGE_SIZE, it actually equals the size of iov_iter_npages(iter, INT_MAX). Note this issue has nothing to do with large folios as we don't support THP_SWPOUT to non-block devices. Fixes: 2282679fb20b ("mm: submit multipage write for SWP_FS_OPS swap-space") Reported-by: Christoph Hellwig Closes: https://lore.kernel.org/linux-mm/20240614100329.1203579-1-hch@lst.de/ Cc: NeilBrown Cc: Anna Schumaker Cc: Steve French Cc: Trond Myklebust Cc: Chuanhua Han Cc: Ryan Roberts Cc: Chris Li Cc: "Huang, Ying" Cc: Jeff Layton Cc: Paulo Alcantara Cc: Ronnie Sahlberg Cc: Shyam Prasad N Cc: Tom Talpey Cc: Bharath SM Cc: Signed-off-by: Barry Song Signed-off-by: Steve French --- fs/smb/client/file.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index 9d5c2440abfc..1e269e0bc75b 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -3200,8 +3200,6 @@ static int cifs_swap_rw(struct kiocb *iocb, struct iov_iter *iter) { ssize_t ret; - WARN_ON_ONCE(iov_iter_count(iter) != PAGE_SIZE); - if (iov_iter_rw(iter) == READ) ret = netfs_unbuffered_read_iter_locked(iocb, iter); else From 739c9765793e5794578a64aab293c58607f1826a Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Tue, 18 Jun 2024 15:01:52 +0100 Subject: [PATCH 445/778] x86/resctrl: Don't try to free nonexistent RMIDs Commit 6791e0ea3071 ("x86/resctrl: Access per-rmid structures by index") adds logic to map individual monitoring groups into a global index space used for tracking allocated RMIDs. Attempts to free the default RMID are ignored in free_rmid(), and this works fine on x86. With arm64 MPAM, there is a latent bug here however: on platforms with no monitors exposed through resctrl, each control group still gets a different monitoring group ID as seen by the hardware, since the CLOSID always forms part of the monitoring group ID. This means that when removing a control group, the code may try to free this group's default monitoring group RMID for real. If there are no monitors however, the RMID tracking table rmid_ptrs[] would be a waste of memory and is never allocated, leading to a splat when free_rmid() tries to dereference the table. One option would be to treat RMID 0 as special for every CLOSID, but this would be ugly since bookkeeping still needs to be done for these monitoring group IDs when there are monitors present in the hardware. Instead, add a gating check of resctrl_arch_mon_capable() in free_rmid(), and just do nothing if the hardware doesn't have monitors. This fix mirrors the gating checks already present in mkdir_rdt_prepare_rmid_alloc() and elsewhere. No functional change on x86. [ bp: Massage commit message. ] Fixes: 6791e0ea3071 ("x86/resctrl: Access per-rmid structures by index") Signed-off-by: Dave Martin Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Tested-by: Reinette Chatre Link: https://lore.kernel.org/r/20240618140152.83154-1-Dave.Martin@arm.com --- arch/x86/kernel/cpu/resctrl/monitor.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 2345e6836593..366f496ca3ce 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -519,7 +519,8 @@ void free_rmid(u32 closid, u32 rmid) * allows architectures that ignore the closid parameter to avoid an * unnecessary check. */ - if (idx == resctrl_arch_rmid_idx_encode(RESCTRL_RESERVED_CLOSID, + if (!resctrl_arch_mon_capable() || + idx == resctrl_arch_rmid_idx_encode(RESCTRL_RESERVED_CLOSID, RESCTRL_RESERVED_RMID)) return; From 7be4cb7189f747b4e5b6977d0e4387bde3204e62 Mon Sep 17 00:00:00 2001 From: Jose Ignacio Tornos Martinez Date: Mon, 17 Jun 2024 12:28:21 +0200 Subject: [PATCH 446/778] net: usb: ax88179_178a: improve reset check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After ecf848eb934b ("net: usb: ax88179_178a: fix link status when link is set to down/up") to not reset from usbnet_open after the reset from usbnet_probe at initialization stage to speed up this, some issues have been reported. It seems to happen that if the initialization is slower, and some time passes between the probe operation and the open operation, the second reset from open is necessary too to have the device working. The reason is that if there is no activity with the phy, this is "disconnected". In order to improve this, the solution is to detect when the phy is "disconnected", and we can use the phy status register for this. So we will only reset the device from reset operation in this situation, that is, only if necessary. The same bahavior is happening when the device is stopped (link set to down) and later is restarted (link set to up), so if the phy keeps working we only need to enable the mac again, but if enough time passes between the device stop and restart, reset is necessary, and we can detect the situation checking the phy status register too. cc: stable@vger.kernel.org # 6.6+ Fixes: ecf848eb934b ("net: usb: ax88179_178a: fix link status when link is set to down/up") Reported-by: Yongqin Liu Reported-by: Antje Miederhöfer Reported-by: Arne Fitzenreiter Tested-by: Yongqin Liu Tested-by: Antje Miederhöfer Signed-off-by: Jose Ignacio Tornos Martinez Signed-off-by: David S. Miller --- drivers/net/usb/ax88179_178a.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index 51c295e1e823..c2fb736f78b2 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -174,7 +174,6 @@ struct ax88179_data { u32 wol_supported; u32 wolopts; u8 disconnecting; - u8 initialized; }; struct ax88179_int_data { @@ -1678,12 +1677,21 @@ static int ax88179_reset(struct usbnet *dev) static int ax88179_net_reset(struct usbnet *dev) { - struct ax88179_data *ax179_data = dev->driver_priv; + u16 tmp16; - if (ax179_data->initialized) + ax88179_read_cmd(dev, AX_ACCESS_PHY, AX88179_PHY_ID, GMII_PHY_PHYSR, + 2, &tmp16); + if (tmp16) { + ax88179_read_cmd(dev, AX_ACCESS_MAC, AX_MEDIUM_STATUS_MODE, + 2, 2, &tmp16); + if (!(tmp16 & AX_MEDIUM_RECEIVE_EN)) { + tmp16 |= AX_MEDIUM_RECEIVE_EN; + ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_MEDIUM_STATUS_MODE, + 2, 2, &tmp16); + } + } else { ax88179_reset(dev); - else - ax179_data->initialized = 1; + } return 0; } From 604141c036e1b636e2a71cf6e1aa09d1e45f40c2 Mon Sep 17 00:00:00 2001 From: Heng Qi Date: Mon, 17 Jun 2024 21:15:23 +0800 Subject: [PATCH 447/778] virtio_net: checksum offloading handling fix In virtio spec 0.95, VIRTIO_NET_F_GUEST_CSUM was designed to handle partially checksummed packets, and the validation of fully checksummed packets by the device is independent of VIRTIO_NET_F_GUEST_CSUM negotiation. However, the specification erroneously stated: "If VIRTIO_NET_F_GUEST_CSUM is not negotiated, the device MUST set flags to zero and SHOULD supply a fully checksummed packet to the driver." This statement is inaccurate because even without VIRTIO_NET_F_GUEST_CSUM negotiation, the device can still set the VIRTIO_NET_HDR_F_DATA_VALID flag. Essentially, the device can facilitate the validation of these packets' checksums - a process known as RX checksum offloading - removing the need for the driver to do so. This scenario is currently not implemented in the driver and requires correction. The necessary specification correction[1] has been made and approved in the virtio TC vote. [1] https://lists.oasis-open.org/archives/virtio-comment/202401/msg00011.html Fixes: 4f49129be6fa ("virtio-net: Set RXCSUM feature if GUEST_CSUM is available") Signed-off-by: Heng Qi Reviewed-by: Jiri Pirko Acked-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 61a57d134544..aa70a7ed8072 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -5666,8 +5666,16 @@ static int virtnet_probe(struct virtio_device *vdev) dev->features |= dev->hw_features & NETIF_F_ALL_TSO; /* (!csum && gso) case will be fixed by register_netdev() */ } - if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_CSUM)) - dev->features |= NETIF_F_RXCSUM; + + /* 1. With VIRTIO_NET_F_GUEST_CSUM negotiation, the driver doesn't + * need to calculate checksums for partially checksummed packets, + * as they're considered valid by the upper layer. + * 2. Without VIRTIO_NET_F_GUEST_CSUM negotiation, the driver only + * receives fully checksummed packets. The device may assist in + * validating these packets' checksums, so the driver won't have to. + */ + dev->features |= NETIF_F_RXCSUM; + if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6)) dev->features |= NETIF_F_GRO_HW; From 703eec1b242276f2d97d98f04790ddad319ddde4 Mon Sep 17 00:00:00 2001 From: Heng Qi Date: Mon, 17 Jun 2024 21:15:24 +0800 Subject: [PATCH 448/778] virtio_net: fixing XDP for fully checksummed packets handling The XDP program can't correctly handle partially checksummed packets, but works fine with fully checksummed packets. If the device has already validated fully checksummed packets, then the driver doesn't need to re-validate them, saving CPU resources. Additionally, the driver does not drop all partially checksummed packets when VIRTIO_NET_F_GUEST_CSUM is not negotiated. This is not a bug, as the driver has always done this. Fixes: 436c9453a1ac ("virtio-net: keep vnet header zeroed after processing XDP") Signed-off-by: Heng Qi Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index aa70a7ed8072..ea10db9a09fa 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1360,6 +1360,10 @@ static struct sk_buff *receive_small_xdp(struct net_device *dev, if (unlikely(hdr->hdr.gso_type)) goto err_xdp; + /* Partially checksummed packets must be dropped. */ + if (unlikely(hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) + goto err_xdp; + buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); @@ -1677,6 +1681,10 @@ static void *mergeable_xdp_get_buf(struct virtnet_info *vi, if (unlikely(hdr->hdr.gso_type)) return NULL; + /* Partially checksummed packets must be dropped. */ + if (unlikely(hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) + return NULL; + /* Now XDP core assumes frag size is PAGE_SIZE, but buffers * with headroom may add hole in truesize, which * make their length exceed PAGE_SIZE. So we disabled the @@ -1943,6 +1951,7 @@ static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, struct net_device *dev = vi->dev; struct sk_buff *skb; struct virtio_net_common_hdr *hdr; + u8 flags; if (unlikely(len < vi->hdr_len + ETH_HLEN)) { pr_debug("%s: short packet %i\n", dev->name, len); @@ -1951,6 +1960,15 @@ static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, return; } + /* 1. Save the flags early, as the XDP program might overwrite them. + * These flags ensure packets marked as VIRTIO_NET_HDR_F_DATA_VALID + * stay valid after XDP processing. + * 2. XDP doesn't work with partially checksummed packets (refer to + * virtnet_xdp_set()), so packets marked as + * VIRTIO_NET_HDR_F_NEEDS_CSUM get dropped during XDP processing. + */ + flags = ((struct virtio_net_common_hdr *)buf)->hdr.flags; + if (vi->mergeable_rx_bufs) skb = receive_mergeable(dev, vi, rq, buf, ctx, len, xdp_xmit, stats); @@ -1966,7 +1984,7 @@ static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, if (dev->features & NETIF_F_RXHASH && vi->has_rss_hash_report) virtio_skb_set_hash(&hdr->hash_v1_hdr, skb); - if (hdr->hdr.flags & VIRTIO_NET_HDR_F_DATA_VALID) + if (flags & VIRTIO_NET_HDR_F_DATA_VALID) skb->ip_summed = CHECKSUM_UNNECESSARY; if (virtio_net_hdr_to_skb(skb, &hdr->hdr, From b95a4afe2defd6f46891985f9436a568cd35a31c Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Mon, 17 Jun 2024 17:50:26 +0100 Subject: [PATCH 449/778] octeontx2-pf: Add error handling to VLAN unoffload handling otx2_sq_append_skb makes used of __vlan_hwaccel_push_inside() to unoffload VLANs - push them from skb meta data into skb data. However, it omitts a check for __vlan_hwaccel_push_inside() returning NULL. Found by inspection based on [1] and [2]. Compile tested only. [1] Re: [PATCH net-next v1] net: stmmac: Enable TSO on VLANs https://lore.kernel.org/all/ZmrN2W8Fye450TKs@shell.armlinux.org.uk/ [2] Re: [PATCH net-next v2] net: stmmac: Enable TSO on VLANs https://lore.kernel.org/all/CANn89i+11L5=tKsa7V7Aeyxaj6nYGRwy35PAbCRYJ73G+b25sg@mail.gmail.com/ Fixes: fd9d7859db6c ("octeontx2-pf: Implement ingress/egress VLAN offload") Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c index a16e9f244117..929b4eac25d9 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c @@ -1174,8 +1174,11 @@ bool otx2_sq_append_skb(struct net_device *netdev, struct otx2_snd_queue *sq, if (skb_shinfo(skb)->gso_size && !is_hw_tso_supported(pfvf, skb)) { /* Insert vlan tag before giving pkt to tso */ - if (skb_vlan_tag_present(skb)) + if (skb_vlan_tag_present(skb)) { skb = __vlan_hwaccel_push_inside(skb); + if (!skb) + return true; + } otx2_sq_append_tso(pfvf, sq, skb, qidx); return true; } From fa997b0576c9df635ee363406f5e014dba0f9264 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Tue, 18 Jun 2024 17:28:29 +0200 Subject: [PATCH 450/778] ata: ahci: Do not enable LPM if no LPM states are supported by the HBA LPM consists of HIPM (host initiated power management) and DIPM (device initiated power management). ata_eh_set_lpm() will only enable HIPM if both the HBA and the device supports it. However, DIPM will be enabled as long as the device supports it. The HBA will later reject the device's request to enter a power state that it does not support (Slumber/Partial/DevSleep) (DevSleep is never initiated by the device). For a HBA that doesn't support any LPM states, simply don't set a LPM policy such that all the HIPM/DIPM probing/enabling will be skipped. Not enabling HIPM or DIPM in the first place is safer than relying on the device following the AHCI specification and respecting the NAK. (There are comments in the code that some devices misbehave when receiving a NAK.) Performing this check in ahci_update_initial_lpm_policy() also has the advantage that a HBA that doesn't support any LPM states will take the exact same code paths as a port that is external/hot plug capable. Side note: the port in ata_port_dbg() has not been given a unique id yet, but this is not overly important as the debug print is disabled unless explicitly enabled using dynamic debug. A follow-up series will make sure that the unique id assignment will be done earlier. For now, the important thing is that the function returns before setting the LPM policy. Fixes: 7627a0edef54 ("ata: ahci: Drop low power policy board type") Cc: stable@vger.kernel.org Reviewed-by: Mario Limonciello Reviewed-by: Mika Westerberg Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20240618152828.2686771-2-cassel@kernel.org Signed-off-by: Niklas Cassel --- drivers/ata/ahci.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 07d66d2c5f0d..5eb38fbbbecd 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -1735,6 +1735,14 @@ static void ahci_update_initial_lpm_policy(struct ata_port *ap) if (ap->pflags & ATA_PFLAG_EXTERNAL) return; + /* If no LPM states are supported by the HBA, do not bother with LPM */ + if ((ap->host->flags & ATA_HOST_NO_PART) && + (ap->host->flags & ATA_HOST_NO_SSC) && + (ap->host->flags & ATA_HOST_NO_DEVSLP)) { + ata_port_dbg(ap, "no LPM states supported, not enabling LPM\n"); + return; + } + /* user modified policy via module param */ if (mobile_lpm_policy != -1) { policy = mobile_lpm_policy; From 1062d03827b78614259b3b4b992deb27ee6aa84d Mon Sep 17 00:00:00 2001 From: Geetha sowjanya Date: Tue, 18 Jun 2024 11:41:22 +0530 Subject: [PATCH 451/778] octeontx2-pf: Fix linking objects into multiple modules This patch fixes the below build warning messages that are caused due to linking same files to multiple modules by exporting the required symbols. "scripts/Makefile.build:244: drivers/net/ethernet/marvell/octeontx2/nic/Makefile: otx2_devlink.o is added to multiple modules: rvu_nicpf rvu_nicvf scripts/Makefile.build:244: drivers/net/ethernet/marvell/octeontx2/nic/Makefile: otx2_dcbnl.o is added to multiple modules: rvu_nicpf rvu_nicvf" Fixes: 8e67558177f8 ("octeontx2-pf: PFC config support with DCBx"). Signed-off-by: Geetha sowjanya Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/nic/Makefile | 3 +-- drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c | 7 +++++++ drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c | 2 ++ 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/Makefile b/drivers/net/ethernet/marvell/octeontx2/nic/Makefile index 5664f768cb0c..64a97a0a10ed 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/Makefile +++ b/drivers/net/ethernet/marvell/octeontx2/nic/Makefile @@ -9,10 +9,9 @@ obj-$(CONFIG_OCTEONTX2_VF) += rvu_nicvf.o otx2_ptp.o rvu_nicpf-y := otx2_pf.o otx2_common.o otx2_txrx.o otx2_ethtool.o \ otx2_flows.o otx2_tc.o cn10k.o otx2_dmac_flt.o \ otx2_devlink.o qos_sq.o qos.o -rvu_nicvf-y := otx2_vf.o otx2_devlink.o +rvu_nicvf-y := otx2_vf.o rvu_nicpf-$(CONFIG_DCB) += otx2_dcbnl.o -rvu_nicvf-$(CONFIG_DCB) += otx2_dcbnl.o rvu_nicpf-$(CONFIG_MACSEC) += cn10k_macsec.o ccflags-y += -I$(srctree)/drivers/net/ethernet/marvell/octeontx2/af diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c index 28fb643d2917..aa01110f04a3 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c @@ -54,6 +54,7 @@ int otx2_pfc_txschq_config(struct otx2_nic *pfvf) return 0; } +EXPORT_SYMBOL(otx2_pfc_txschq_config); static int otx2_pfc_txschq_alloc_one(struct otx2_nic *pfvf, u8 prio) { @@ -122,6 +123,7 @@ int otx2_pfc_txschq_alloc(struct otx2_nic *pfvf) return 0; } +EXPORT_SYMBOL(otx2_pfc_txschq_alloc); static int otx2_pfc_txschq_stop_one(struct otx2_nic *pfvf, u8 prio) { @@ -260,6 +262,7 @@ update_sq_smq_map: return 0; } +EXPORT_SYMBOL(otx2_pfc_txschq_update); int otx2_pfc_txschq_stop(struct otx2_nic *pfvf) { @@ -282,6 +285,7 @@ int otx2_pfc_txschq_stop(struct otx2_nic *pfvf) return 0; } +EXPORT_SYMBOL(otx2_pfc_txschq_stop); int otx2_config_priority_flow_ctrl(struct otx2_nic *pfvf) { @@ -321,6 +325,7 @@ unlock: mutex_unlock(&pfvf->mbox.lock); return err; } +EXPORT_SYMBOL(otx2_config_priority_flow_ctrl); void otx2_update_bpid_in_rqctx(struct otx2_nic *pfvf, int vlan_prio, int qidx, bool pfc_enable) @@ -385,6 +390,7 @@ out: "Updating BPIDs in CQ and Aura contexts of RQ%d failed with err %d\n", qidx, err); } +EXPORT_SYMBOL(otx2_update_bpid_in_rqctx); static int otx2_dcbnl_ieee_getpfc(struct net_device *dev, struct ieee_pfc *pfc) { @@ -472,3 +478,4 @@ int otx2_dcbnl_set_ops(struct net_device *dev) return 0; } +EXPORT_SYMBOL(otx2_dcbnl_set_ops); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c index 99ddf31269d9..458d34a62e18 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c @@ -113,6 +113,7 @@ err_dl: devlink_free(dl); return err; } +EXPORT_SYMBOL(otx2_register_dl); void otx2_unregister_dl(struct otx2_nic *pfvf) { @@ -124,3 +125,4 @@ void otx2_unregister_dl(struct otx2_nic *pfvf) ARRAY_SIZE(otx2_dl_params)); devlink_free(dl); } +EXPORT_SYMBOL(otx2_unregister_dl); From a8763466669d21b570b26160d0a5e0a2ee529d22 Mon Sep 17 00:00:00 2001 From: Adrian Moreno Date: Tue, 18 Jun 2024 09:29:21 +0200 Subject: [PATCH 452/778] selftests: openvswitch: Set value to nla flags. Netlink flags, although they don't have payload at the netlink level, are represented as having "True" as value in pyroute2. Without it, trying to add a flow with a flag-type action (e.g: pop_vlan) fails with the following traceback: Traceback (most recent call last): File "[...]/ovs-dpctl.py", line 2498, in sys.exit(main(sys.argv)) ^^^^^^^^^^^^^^ File "[...]/ovs-dpctl.py", line 2487, in main ovsflow.add_flow(rep["dpifindex"], flow) File "[...]/ovs-dpctl.py", line 2136, in add_flow reply = self.nlm_request( ^^^^^^^^^^^^^^^^^ File "[...]/pyroute2/netlink/nlsocket.py", line 822, in nlm_request return tuple(self._genlm_request(*argv, **kwarg)) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "[...]/pyroute2/netlink/generic/__init__.py", line 126, in nlm_request return tuple(super().nlm_request(*argv, **kwarg)) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "[...]/pyroute2/netlink/nlsocket.py", line 1124, in nlm_request self.put(msg, msg_type, msg_flags, msg_seq=msg_seq) File "[...]/pyroute2/netlink/nlsocket.py", line 389, in put self.sendto_gate(msg, addr) File "[...]/pyroute2/netlink/nlsocket.py", line 1056, in sendto_gate msg.encode() File "[...]/pyroute2/netlink/__init__.py", line 1245, in encode offset = self.encode_nlas(offset) ^^^^^^^^^^^^^^^^^^^^^^^^ File "[...]/pyroute2/netlink/__init__.py", line 1560, in encode_nlas nla_instance.setvalue(cell[1]) File "[...]/pyroute2/netlink/__init__.py", line 1265, in setvalue nlv.setvalue(nla_tuple[1]) ~~~~~~~~~^^^ IndexError: list index out of range Signed-off-by: Adrian Moreno Acked-by: Aaron Conole Signed-off-by: David S. Miller --- tools/testing/selftests/net/openvswitch/ovs-dpctl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py index 1dd057afd3fb..9f8dec2f6539 100644 --- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py +++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py @@ -531,7 +531,7 @@ class ovsactions(nla): for flat_act in parse_flat_map: if parse_starts_block(actstr, flat_act[0], False): actstr = actstr[len(flat_act[0]):] - self["attrs"].append([flat_act[1]]) + self["attrs"].append([flat_act[1], True]) actstr = actstr[strspn(actstr, ", ") :] parsed = True From 895a37028a4854962def6e2f2820a23c84062f66 Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Wed, 19 Jun 2024 13:18:56 +0100 Subject: [PATCH 453/778] arm64: mm: Permit PTE SW bits to change in live mappings Previously pgattr_change_is_safe() was overly-strict and complained (e.g. "[ 116.262743] __check_safe_pte_update: unsafe attribute change: 0x0560000043768fc3 -> 0x0160000043768fc3") if it saw any SW bits change in a live PTE. There is no such restriction on SW bits in the Arm ARM. Until now, no SW bits have been updated in live mappings via the set_ptes() route. PTE_DIRTY would be updated live, but this is handled by ptep_set_access_flags() which does not call pgattr_change_is_safe(). However, with the introduction of uffd-wp for arm64, there is core-mm code that does ptep_get(); pte_clear_uffd_wp(); set_ptes(); which triggers this false warning. Silence this warning by masking out the SW bits during checks. The bug isn't technically in the highlighted commit below, but that's where bisecting would likely lead as its what made the bug user-visible. Signed-off-by: Ryan Roberts Fixes: 5b32510af77b ("arm64/mm: Add uffd write-protect support") Link: https://lore.kernel.org/r/20240619121859.4153966-1-ryan.roberts@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/pgtable-hwdef.h | 1 + arch/arm64/mm/mmu.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 9943ff0af4c9..1f60aa1bc750 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -170,6 +170,7 @@ #define PTE_CONT (_AT(pteval_t, 1) << 52) /* Contiguous range */ #define PTE_PXN (_AT(pteval_t, 1) << 53) /* Privileged XN */ #define PTE_UXN (_AT(pteval_t, 1) << 54) /* User XN */ +#define PTE_SWBITS_MASK _AT(pteval_t, (BIT(63) | GENMASK(58, 55))) #define PTE_ADDR_LOW (((_AT(pteval_t, 1) << (50 - PAGE_SHIFT)) - 1) << PAGE_SHIFT) #ifdef CONFIG_ARM64_PA_BITS_52 diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index c927e9312f10..353ea5dc32b8 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -124,7 +124,8 @@ bool pgattr_change_is_safe(u64 old, u64 new) * The following mapping attributes may be updated in live * kernel mappings without the need for break-before-make. */ - pteval_t mask = PTE_PXN | PTE_RDONLY | PTE_WRITE | PTE_NG; + pteval_t mask = PTE_PXN | PTE_RDONLY | PTE_WRITE | PTE_NG | + PTE_SWBITS_MASK; /* creating or taking down mappings is always safe */ if (!pte_valid(__pte(old)) || !pte_valid(__pte(new))) From df75470b317b46affbe1f5f8f006b34175be9789 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Tue, 18 Jun 2024 19:34:18 +0200 Subject: [PATCH 454/778] spi: spi-imx: imx51: revert burst length calculation back to bits_per_word The patch 15a6af94a277 ("spi: Increase imx51 ecspi burst length based on transfer length") increased the burst length calculation in mx51_ecspi_prepare_transfer() to be based on the transfer length. This breaks HW CS + SPI_CS_WORD support which was added in 6e95b23a5b2d ("spi: imx: Implement support for CS_WORD") and transfers with bits-per-word != 8, 16, 32. SPI_CS_WORD means the CS should be toggled after each word. The implementation in the imx-spi driver relies on the fact that the HW CS is toggled automatically by the controller after each burst length number of bits. Setting the burst length to the number of bits of the _whole_ message breaks this use case. Further the patch 15a6af94a277 ("spi: Increase imx51 ecspi burst length based on transfer length") claims to optimize the transfers. But even without this patch, on modern spi-imx controllers with "dynamic_burst = true" (imx51, imx6 and newer), the transfers are already optimized, i.e. the burst length is dynamically adjusted in spi_imx_push() to avoid the pause between the SPI bursts. This has been confirmed by a scope measurement on an imx6d. Subsequent Patches tried to fix these and other problems: - 5f66db08cbd3 ("spi: imx: Take in account bits per word instead of assuming 8-bits") - e9b220aeacf1 ("spi: spi-imx: correctly configure burst length when using dma") - c712c05e46c8 ("spi: imx: fix the burst length at DMA mode and CPU mode") - cf6d79a0f576 ("spi: spi-imx: fix off-by-one in mx51 CPU mode burst length") but the HW CS + SPI_CS_WORD use case is still broken. To fix the problems revert the burst size calculation in mx51_ecspi_prepare_transfer() back to the original form, before 15a6af94a277 ("spi: Increase imx51 ecspi burst length based on transfer length") was applied. Cc: Stefan Moring Cc: Stefan Bigler Cc: Clark Wang Cc: Carlos Song Cc: Sebastian Reichel Cc: Thorsten Scherer Fixes: 15a6af94a277 ("spi: Increase imx51 ecspi burst length based on transfer length") Fixes: 5f66db08cbd3 ("spi: imx: Take in account bits per word instead of assuming 8-bits") Fixes: e9b220aeacf1 ("spi: spi-imx: correctly configure burst length when using dma") Fixes: c712c05e46c8 ("spi: imx: fix the burst length at DMA mode and CPU mode") Fixes: cf6d79a0f576 ("spi: spi-imx: fix off-by-one in mx51 CPU mode burst length") Link: https://lore.kernel.org/all/20240618-oxpecker-of-ideal-mastery-db59f8-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde Tested-by: Thorsten Scherer Link: https://msgid.link/r/20240618-spi-imx-fix-bustlength-v1-1-2053dd5fdf87@pengutronix.de Signed-off-by: Mark Brown --- drivers/spi/spi-imx.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c index f4006c82f867..33164ebdb583 100644 --- a/drivers/spi/spi-imx.c +++ b/drivers/spi/spi-imx.c @@ -660,18 +660,8 @@ static int mx51_ecspi_prepare_transfer(struct spi_imx_data *spi_imx, ctrl |= (spi_imx->target_burst * 8 - 1) << MX51_ECSPI_CTRL_BL_OFFSET; else { - if (spi_imx->usedma) { - ctrl |= (spi_imx->bits_per_word - 1) - << MX51_ECSPI_CTRL_BL_OFFSET; - } else { - if (spi_imx->count >= MX51_ECSPI_CTRL_MAX_BURST) - ctrl |= (MX51_ECSPI_CTRL_MAX_BURST * BITS_PER_BYTE - 1) - << MX51_ECSPI_CTRL_BL_OFFSET; - else - ctrl |= (spi_imx->count / DIV_ROUND_UP(spi_imx->bits_per_word, - BITS_PER_BYTE) * spi_imx->bits_per_word - 1) - << MX51_ECSPI_CTRL_BL_OFFSET; - } + ctrl |= (spi_imx->bits_per_word - 1) + << MX51_ECSPI_CTRL_BL_OFFSET; } /* set clock speed */ From 8ecd06277a7664f4ef018abae3abd3451d64e7a6 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Mon, 17 Jun 2024 11:18:15 +0200 Subject: [PATCH 455/778] netfilter: ipset: Fix suspicious rcu_dereference_protected() When destroying all sets, we are either in pernet exit phase or are executing a "destroy all sets command" from userspace. The latter was taken into account in ip_set_dereference() (nfnetlink mutex is held), but the former was not. The patch adds the required check to rcu_dereference_protected() in ip_set_dereference(). Fixes: 4e7aaa6b82d6 ("netfilter: ipset: Fix race between namespace cleanup and gc in the list:set type") Reported-by: syzbot+b62c37cdd58103293a5a@syzkaller.appspotmail.com Reported-by: syzbot+cfbe1da5fdfc39efc293@syzkaller.appspotmail.com Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202406141556.e0b6f17e-lkp@intel.com Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_core.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index c7ae4d9bf3d2..61431690cbd5 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -53,12 +53,13 @@ MODULE_DESCRIPTION("core IP set support"); MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET); /* When the nfnl mutex or ip_set_ref_lock is held: */ -#define ip_set_dereference(p) \ - rcu_dereference_protected(p, \ +#define ip_set_dereference(inst) \ + rcu_dereference_protected((inst)->ip_set_list, \ lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET) || \ - lockdep_is_held(&ip_set_ref_lock)) + lockdep_is_held(&ip_set_ref_lock) || \ + (inst)->is_deleted) #define ip_set(inst, id) \ - ip_set_dereference((inst)->ip_set_list)[id] + ip_set_dereference(inst)[id] #define ip_set_ref_netlink(inst,id) \ rcu_dereference_raw((inst)->ip_set_list)[id] #define ip_set_dereference_nfnl(p) \ @@ -1133,7 +1134,7 @@ static int ip_set_create(struct sk_buff *skb, const struct nfnl_info *info, if (!list) goto cleanup; /* nfnl mutex is held, both lists are valid */ - tmp = ip_set_dereference(inst->ip_set_list); + tmp = ip_set_dereference(inst); memcpy(list, tmp, sizeof(struct ip_set *) * inst->ip_set_max); rcu_assign_pointer(inst->ip_set_list, list); /* Make sure all current packets have passed through */ From 9a3bc8d16e0aacd65c31aaf23a2bced3288a7779 Mon Sep 17 00:00:00 2001 From: Jianguo Wu Date: Thu, 13 Jun 2024 17:42:46 +0800 Subject: [PATCH 456/778] seg6: fix parameter passing when calling NF_HOOK() in End.DX4 and End.DX6 behaviors input_action_end_dx4() and input_action_end_dx6() are called NF_HOOK() for PREROUTING hook, in PREROUTING hook, we should passing a valid indev, and a NULL outdev to NF_HOOK(), otherwise may trigger a NULL pointer dereference, as below: [74830.647293] BUG: kernel NULL pointer dereference, address: 0000000000000090 [74830.655633] #PF: supervisor read access in kernel mode [74830.657888] #PF: error_code(0x0000) - not-present page [74830.659500] PGD 0 P4D 0 [74830.660450] Oops: 0000 [#1] PREEMPT SMP PTI ... [74830.664953] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 [74830.666569] RIP: 0010:rpfilter_mt+0x44/0x15e [ipt_rpfilter] ... [74830.689725] Call Trace: [74830.690402] [74830.690953] ? show_trace_log_lvl+0x1c4/0x2df [74830.692020] ? show_trace_log_lvl+0x1c4/0x2df [74830.693095] ? ipt_do_table+0x286/0x710 [ip_tables] [74830.694275] ? __die_body.cold+0x8/0xd [74830.695205] ? page_fault_oops+0xac/0x140 [74830.696244] ? exc_page_fault+0x62/0x150 [74830.697225] ? asm_exc_page_fault+0x22/0x30 [74830.698344] ? rpfilter_mt+0x44/0x15e [ipt_rpfilter] [74830.699540] ipt_do_table+0x286/0x710 [ip_tables] [74830.700758] ? ip6_route_input+0x19d/0x240 [74830.701752] nf_hook_slow+0x3f/0xb0 [74830.702678] input_action_end_dx4+0x19b/0x1e0 [74830.703735] ? input_action_end_t+0xe0/0xe0 [74830.704734] seg6_local_input_core+0x2d/0x60 [74830.705782] lwtunnel_input+0x5b/0xb0 [74830.706690] __netif_receive_skb_one_core+0x63/0xa0 [74830.707825] process_backlog+0x99/0x140 [74830.709538] __napi_poll+0x2c/0x160 [74830.710673] net_rx_action+0x296/0x350 [74830.711860] __do_softirq+0xcb/0x2ac [74830.713049] do_softirq+0x63/0x90 input_action_end_dx4() passing a NULL indev to NF_HOOK(), and finally trigger a NULL dereference in rpfilter_mt()->rpfilter_is_loopback(): static bool rpfilter_is_loopback(const struct sk_buff *skb, const struct net_device *in) { // in is NULL return skb->pkt_type == PACKET_LOOPBACK || in->flags & IFF_LOOPBACK; } Fixes: 7a3f5b0de364 ("netfilter: add netfilter hooks to SRv6 data plane") Signed-off-by: Jianguo Wu Reviewed-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/ipv6/seg6_local.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c index 24e2b4b494cb..c434940131b1 100644 --- a/net/ipv6/seg6_local.c +++ b/net/ipv6/seg6_local.c @@ -941,8 +941,8 @@ static int input_action_end_dx6(struct sk_buff *skb, if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, - dev_net(skb->dev), NULL, skb, NULL, - skb_dst(skb)->dev, input_action_end_dx6_finish); + dev_net(skb->dev), NULL, skb, skb->dev, + NULL, input_action_end_dx6_finish); return input_action_end_dx6_finish(dev_net(skb->dev), NULL, skb); drop: @@ -991,8 +991,8 @@ static int input_action_end_dx4(struct sk_buff *skb, if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, - dev_net(skb->dev), NULL, skb, NULL, - skb_dst(skb)->dev, input_action_end_dx4_finish); + dev_net(skb->dev), NULL, skb, skb->dev, + NULL, input_action_end_dx4_finish); return input_action_end_dx4_finish(dev_net(skb->dev), NULL, skb); drop: From 096597cfe4ea08b1830e775436d76d7c9d6d3037 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Tue, 18 Jun 2024 21:44:24 -0700 Subject: [PATCH 457/778] thermal: int340x: processor_thermal: Support shared interrupts On some systems the processor thermal device interrupt is shared with other PCI devices. In this case return IRQ_NONE from the interrupt handler when the interrupt is not for the processor thermal device. Signed-off-by: Srinivas Pandruvada Fixes: f0658708e863 ("thermal: int340x: processor_thermal: Use non MSI interrupts by default") Cc: 6.7+ # 6.7+ Signed-off-by: Rafael J. Wysocki --- .../intel/int340x_thermal/processor_thermal_device_pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c index 14e34eabc419..4a1bfebb1b8e 100644 --- a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c +++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c @@ -150,7 +150,7 @@ static irqreturn_t proc_thermal_irq_handler(int irq, void *devid) { struct proc_thermal_pci *pci_info = devid; struct proc_thermal_device *proc_priv; - int ret = IRQ_HANDLED; + int ret = IRQ_NONE; u32 status; proc_priv = pci_info->proc_priv; @@ -175,6 +175,7 @@ static irqreturn_t proc_thermal_irq_handler(int irq, void *devid) /* Disable enable interrupt flag */ proc_thermal_mmio_write(pci_info, PROC_THERMAL_MMIO_INT_ENABLE_0, 0); pkg_thermal_schedule_work(&pci_info->work); + ret = IRQ_HANDLED; } pci_write_config_byte(pci_info->pdev, 0xdc, 0x01); From d92a7580392ad4681b1d4f9275d00b95375ebe01 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 17 Jun 2024 17:26:37 +0200 Subject: [PATCH 458/778] drm/fbdev-dma: Only set smem_start is enable per module option Only export struct fb_info.fix.smem_start if that is required by the user and the memory does not come from vmalloc(). Setting struct fb_info.fix.smem_start breaks systems where DMA memory is backed by vmalloc address space. An example error is shown below. [ 3.536043] ------------[ cut here ]------------ [ 3.540716] virt_to_phys used for non-linear address: 000000007fc4f540 (0xffff800086001000) [ 3.552628] WARNING: CPU: 4 PID: 61 at arch/arm64/mm/physaddr.c:12 __virt_to_phys+0x68/0x98 [ 3.565455] Modules linked in: [ 3.568525] CPU: 4 PID: 61 Comm: kworker/u12:5 Not tainted 6.6.23-06226-g4986cc3e1b75-dirty #250 [ 3.577310] Hardware name: NXP i.MX95 19X19 board (DT) [ 3.582452] Workqueue: events_unbound deferred_probe_work_func [ 3.588291] pstate: 60400009 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 3.595233] pc : __virt_to_phys+0x68/0x98 [ 3.599246] lr : __virt_to_phys+0x68/0x98 [ 3.603276] sp : ffff800083603990 [ 3.677939] Call trace: [ 3.680393] __virt_to_phys+0x68/0x98 [ 3.684067] drm_fbdev_dma_helper_fb_probe+0x138/0x238 [ 3.689214] __drm_fb_helper_initial_config_and_unlock+0x2b0/0x4c0 [ 3.695385] drm_fb_helper_initial_config+0x4c/0x68 [ 3.700264] drm_fbdev_dma_client_hotplug+0x8c/0xe0 [ 3.705161] drm_client_register+0x60/0xb0 [ 3.709269] drm_fbdev_dma_setup+0x94/0x148 Additionally, DMA memory is assumed to by contiguous in physical address space, which is not guaranteed by vmalloc(). Resolve this by checking the module flag drm_leak_fbdev_smem when DRM allocated the instance of struct fb_info. Fbdev-dma then only sets smem_start only if required (via FBINFO_HIDE_SMEM_START). Also guarantee that the framebuffer is not located in vmalloc address space. Signed-off-by: Thomas Zimmermann Reported-by: Peng Fan (OSS) Closes: https://lore.kernel.org/dri-devel/20240604080328.4024838-1-peng.fan@oss.nxp.com/ Reported-by: Geert Uytterhoeven Closes: https://lore.kernel.org/dri-devel/CAMuHMdX3N0szUvt1VTbroa2zrT1Nye_VzPb5qqCZ7z5gSm7HGw@mail.gmail.com/ Fixes: a51c7663f144 ("drm/fb-helper: Consolidate CONFIG_DRM_FBDEV_LEAK_PHYS_SMEM") Tested-by: Geert Uytterhoeven Reviewed-by: Daniel Vetter Cc: # v6.4+ Link: https://patchwork.freedesktop.org/patch/msgid/20240617152843.11886-1-tzimmermann@suse.de --- drivers/gpu/drm/drm_fb_helper.c | 6 +++--- drivers/gpu/drm/drm_fbdev_dma.c | 5 ++++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index d612133e2cf7..117237d3528b 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -524,6 +524,9 @@ struct fb_info *drm_fb_helper_alloc_info(struct drm_fb_helper *fb_helper) if (!info) return ERR_PTR(-ENOMEM); + if (!drm_leak_fbdev_smem) + info->flags |= FBINFO_HIDE_SMEM_START; + ret = fb_alloc_cmap(&info->cmap, 256, 0); if (ret) goto err_release; @@ -1860,9 +1863,6 @@ __drm_fb_helper_initial_config_and_unlock(struct drm_fb_helper *fb_helper) info = fb_helper->info; info->var.pixclock = 0; - if (!drm_leak_fbdev_smem) - info->flags |= FBINFO_HIDE_SMEM_START; - /* Need to drop locks to avoid recursive deadlock in * register_framebuffer. This is ok because the only thing left to do is * register the fbdev emulation instance in kernel_fb_helper_list. */ diff --git a/drivers/gpu/drm/drm_fbdev_dma.c b/drivers/gpu/drm/drm_fbdev_dma.c index 6c9427bb4053..13cd754af311 100644 --- a/drivers/gpu/drm/drm_fbdev_dma.c +++ b/drivers/gpu/drm/drm_fbdev_dma.c @@ -130,7 +130,10 @@ static int drm_fbdev_dma_helper_fb_probe(struct drm_fb_helper *fb_helper, info->flags |= FBINFO_READS_FAST; /* signal caching */ info->screen_size = sizes->surface_height * fb->pitches[0]; info->screen_buffer = map.vaddr; - info->fix.smem_start = page_to_phys(virt_to_page(info->screen_buffer)); + if (!(info->flags & FBINFO_HIDE_SMEM_START)) { + if (!drm_WARN_ON(dev, is_vmalloc_addr(info->screen_buffer))) + info->fix.smem_start = page_to_phys(virt_to_page(info->screen_buffer)); + } info->fix.smem_len = info->screen_size; return 0; From a2225e0250c5fa397dcebf6ce65a9f05a114e0cf Mon Sep 17 00:00:00 2001 From: Jianguo Wu Date: Thu, 13 Jun 2024 17:42:47 +0800 Subject: [PATCH 459/778] netfilter: move the sysctl nf_hooks_lwtunnel into the netfilter core Currently, the sysctl net.netfilter.nf_hooks_lwtunnel depends on the nf_conntrack module, but the nf_conntrack module is not always loaded. Therefore, accessing net.netfilter.nf_hooks_lwtunnel may have an error. Move sysctl nf_hooks_lwtunnel into the netfilter core. Fixes: 7a3f5b0de364 ("netfilter: add netfilter hooks to SRv6 data plane") Suggested-by: Pablo Neira Ayuso Signed-off-by: Jianguo Wu Signed-off-by: Pablo Neira Ayuso --- include/net/netns/netfilter.h | 3 ++ net/netfilter/core.c | 13 ++++- net/netfilter/nf_conntrack_standalone.c | 15 ------ net/netfilter/nf_hooks_lwtunnel.c | 67 +++++++++++++++++++++++++ net/netfilter/nf_internals.h | 6 +++ 5 files changed, 87 insertions(+), 17 deletions(-) diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h index 02bbdc577f8e..a6a0bf4a247e 100644 --- a/include/net/netns/netfilter.h +++ b/include/net/netns/netfilter.h @@ -15,6 +15,9 @@ struct netns_nf { const struct nf_logger __rcu *nf_loggers[NFPROTO_NUMPROTO]; #ifdef CONFIG_SYSCTL struct ctl_table_header *nf_log_dir_header; +#ifdef CONFIG_LWTUNNEL + struct ctl_table_header *nf_lwtnl_dir_header; +#endif #endif struct nf_hook_entries __rcu *hooks_ipv4[NF_INET_NUMHOOKS]; struct nf_hook_entries __rcu *hooks_ipv6[NF_INET_NUMHOOKS]; diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 3126911f5042..b00fc285b334 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -815,12 +815,21 @@ int __init netfilter_init(void) if (ret < 0) goto err; +#ifdef CONFIG_LWTUNNEL + ret = netfilter_lwtunnel_init(); + if (ret < 0) + goto err_lwtunnel_pernet; +#endif ret = netfilter_log_init(); if (ret < 0) - goto err_pernet; + goto err_log_pernet; return 0; -err_pernet: +err_log_pernet: +#ifdef CONFIG_LWTUNNEL + netfilter_lwtunnel_fini(); +err_lwtunnel_pernet: +#endif unregister_pernet_subsys(&netfilter_net_ops); err: return ret; diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 74112e9c5dab..6c40bdf8b05a 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -22,9 +22,6 @@ #include #include #include -#ifdef CONFIG_LWTUNNEL -#include -#endif #include static bool enable_hooks __read_mostly; @@ -612,9 +609,6 @@ enum nf_ct_sysctl_index { NF_SYSCTL_CT_PROTO_TIMEOUT_GRE, NF_SYSCTL_CT_PROTO_TIMEOUT_GRE_STREAM, #endif -#ifdef CONFIG_LWTUNNEL - NF_SYSCTL_CT_LWTUNNEL, -#endif NF_SYSCTL_CT_LAST_SYSCTL, }; @@ -946,15 +940,6 @@ static struct ctl_table nf_ct_sysctl_table[] = { .proc_handler = proc_dointvec_jiffies, }, #endif -#ifdef CONFIG_LWTUNNEL - [NF_SYSCTL_CT_LWTUNNEL] = { - .procname = "nf_hooks_lwtunnel", - .data = NULL, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = nf_hooks_lwtunnel_sysctl_handler, - }, -#endif }; static struct ctl_table nf_ct_netfilter_table[] = { diff --git a/net/netfilter/nf_hooks_lwtunnel.c b/net/netfilter/nf_hooks_lwtunnel.c index 00e89ffd78f6..7cdb59bb4459 100644 --- a/net/netfilter/nf_hooks_lwtunnel.c +++ b/net/netfilter/nf_hooks_lwtunnel.c @@ -3,6 +3,9 @@ #include #include #include +#include + +#include "nf_internals.h" static inline int nf_hooks_lwtunnel_get(void) { @@ -50,4 +53,68 @@ int nf_hooks_lwtunnel_sysctl_handler(struct ctl_table *table, int write, return ret; } EXPORT_SYMBOL_GPL(nf_hooks_lwtunnel_sysctl_handler); + +static struct ctl_table nf_lwtunnel_sysctl_table[] = { + { + .procname = "nf_hooks_lwtunnel", + .data = NULL, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = nf_hooks_lwtunnel_sysctl_handler, + }, +}; + +static int __net_init nf_lwtunnel_net_init(struct net *net) +{ + struct ctl_table_header *hdr; + struct ctl_table *table; + + table = nf_lwtunnel_sysctl_table; + if (!net_eq(net, &init_net)) { + table = kmemdup(nf_lwtunnel_sysctl_table, + sizeof(nf_lwtunnel_sysctl_table), + GFP_KERNEL); + if (!table) + goto err_alloc; + } + + hdr = register_net_sysctl_sz(net, "net/netfilter", table, + ARRAY_SIZE(nf_lwtunnel_sysctl_table)); + if (!hdr) + goto err_reg; + + net->nf.nf_lwtnl_dir_header = hdr; + + return 0; +err_reg: + if (!net_eq(net, &init_net)) + kfree(table); +err_alloc: + return -ENOMEM; +} + +static void __net_exit nf_lwtunnel_net_exit(struct net *net) +{ + const struct ctl_table *table; + + table = net->nf.nf_lwtnl_dir_header->ctl_table_arg; + unregister_net_sysctl_table(net->nf.nf_lwtnl_dir_header); + if (!net_eq(net, &init_net)) + kfree(table); +} + +static struct pernet_operations nf_lwtunnel_net_ops = { + .init = nf_lwtunnel_net_init, + .exit = nf_lwtunnel_net_exit, +}; + +int __init netfilter_lwtunnel_init(void) +{ + return register_pernet_subsys(&nf_lwtunnel_net_ops); +} + +void netfilter_lwtunnel_fini(void) +{ + unregister_pernet_subsys(&nf_lwtunnel_net_ops); +} #endif /* CONFIG_SYSCTL */ diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h index 832ae64179f0..25403023060b 100644 --- a/net/netfilter/nf_internals.h +++ b/net/netfilter/nf_internals.h @@ -29,6 +29,12 @@ void nf_queue_nf_hook_drop(struct net *net); /* nf_log.c */ int __init netfilter_log_init(void); +#ifdef CONFIG_LWTUNNEL +/* nf_hooks_lwtunnel.c */ +int __init netfilter_lwtunnel_init(void); +void netfilter_lwtunnel_fini(void); +#endif + /* core.c */ void nf_hook_entries_delete_raw(struct nf_hook_entries __rcu **pp, const struct nf_hook_ops *reg); From 72e50ef99431163203657128050d0697caf3321d Mon Sep 17 00:00:00 2001 From: Jianguo Wu Date: Thu, 13 Jun 2024 17:42:48 +0800 Subject: [PATCH 460/778] selftests: add selftest for the SRv6 End.DX4 behavior with netfilter this selftest is designed for evaluating the SRv6 End.DX4 behavior used with netfilter(rpfilter), in this example, for implementing IPv4 L3 VPN use cases. Signed-off-by: Jianguo Wu Signed-off-by: Pablo Neira Ayuso --- tools/testing/selftests/net/Makefile | 1 + tools/testing/selftests/net/config | 1 + .../net/srv6_end_dx4_netfilter_test.sh | 335 ++++++++++++++++++ 3 files changed, 337 insertions(+) create mode 100755 tools/testing/selftests/net/srv6_end_dx4_netfilter_test.sh diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index bd01e4a0be2c..7a5f7dd320de 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -43,6 +43,7 @@ TEST_PROGS += srv6_hl2encap_red_l2vpn_test.sh TEST_PROGS += srv6_end_next_csid_l3vpn_test.sh TEST_PROGS += srv6_end_x_next_csid_l3vpn_test.sh TEST_PROGS += srv6_end_flavors_test.sh +TEST_PROGS += srv6_end_dx4_netfilter_test.sh TEST_PROGS += vrf_strict_mode_test.sh TEST_PROGS += arp_ndisc_evict_nocarrier.sh TEST_PROGS += ndisc_unsolicited_na_test.sh diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 04de7a6ba6f3..c2766e558f92 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -101,3 +101,4 @@ CONFIG_NETFILTER_XT_MATCH_POLICY=m CONFIG_CRYPTO_ARIA=y CONFIG_XFRM_INTERFACE=m CONFIG_XFRM_USER=m +CONFIG_IP_NF_MATCH_RPFILTER=m diff --git a/tools/testing/selftests/net/srv6_end_dx4_netfilter_test.sh b/tools/testing/selftests/net/srv6_end_dx4_netfilter_test.sh new file mode 100755 index 000000000000..e23210aa547f --- /dev/null +++ b/tools/testing/selftests/net/srv6_end_dx4_netfilter_test.sh @@ -0,0 +1,335 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# author: Jianguo Wu +# +# Mostly copied from tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh. +# +# This script is designed for testing the support of netfilter hooks for +# SRv6 End.DX4 behavior. +# +# Hereafter a network diagram is shown, where one tenants (named 100) offer +# IPv4 L3 VPN services allowing hosts to communicate with each other across +# an IPv6 network. +# +# Routers rt-1 and rt-2 implement IPv4 L3 VPN services leveraging the SRv6 +# architecture. The key components for such VPNs are: a) SRv6 Encap behavior, +# b) SRv6 End.DX4 behavior. +# +# To explain how an IPv4 L3 VPN based on SRv6 works, let us briefly consider an +# example where, within the same domain of tenant 100, the host hs-1 pings +# the host hs-2. +# +# First of all, L2 reachability of the host hs-2 is taken into account by +# the router rt-1 which acts as an arp proxy. +# +# When the host hs-1 sends an IPv4 packet destined to hs-2, the router rt-1 +# receives the packet on the internal veth-t100 interface, rt-1 contains the +# SRv6 Encap route for encapsulating the IPv4 packet in a IPv6 plus the Segment +# Routing Header (SRH) packet. This packet is sent through the (IPv6) core +# network up to the router rt-2 that receives it on veth0 interface. +# +# The rt-2 router uses the 'localsid' routing table to process incoming +# IPv6+SRH packets which belong to the VPN of the tenant 100. For each of these +# packets, the SRv6 End.DX4 behavior removes the outer IPv6+SRH headers and +# routs the packet to the specified nexthop. Afterwards, the packet is sent to +# the host hs-2 through the veth-t100 interface. +# +# The ping response follows the same processing but this time the role of rt-1 +# and rt-2 are swapped. +# +# And when net.netfilter.nf_hooks_lwtunnel is set to 1 in rt-1 or rt-2, and a +# rpfilter iptables rule is added, SRv6 packets will go through netfilter PREROUTING +# hooks. +# +# +# +-------------------+ +-------------------+ +# | | | | +# | hs-1 netns | | hs-2 netns | +# | | | | +# | +-------------+ | | +-------------+ | +# | | veth0 | | | | veth0 | | +# | | 10.0.0.1/24 | | | | 10.0.0.2/24 | | +# | +-------------+ | | +-------------+ | +# | . | | . | +# +-------------------+ +-------------------+ +# . . +# . . +# . . +# +-----------------------------------+ +-----------------------------------+ +# | . | | . | +# | +---------------+ | | +---------------- | +# | | veth-t100 | | | | veth-t100 | | +# | | 10.0.0.11/24 | +----------+ | | +----------+ | 10.0.0.22/24 | | +# | +-------+-------+ | route | | | | route | +-------+-------- | +# | | table | | | | table | | +# | +----------+ | | +----------+ | +# | +--------------+ | | +--------------+ | +# | | veth0 | | | | veth0 | | +# | | 2001:11::1/64 |.|...|.| 2001:11::2/64 | | +# | +--------------+ | | +--------------+ | +# | | | | +# | rt-1 netns | | rt-2 netns | +# | | | | +# +-----------------------------------+ +-----------------------------------+ +# +# ~~~~~~~~~~~~~~~~~~~~~~~~~ +# | Network configuration | +# ~~~~~~~~~~~~~~~~~~~~~~~~~ +# +# rt-1: localsid table +# +----------------------------------------------------------------+ +# |SID |Action | +# +----------------------------------------------------------------+ +# |fc00:21:100::6004|apply SRv6 End.DX4 nh4 10.0.0.1 dev veth-t100 | +# +----------------------------------------------------------------+ +# +# rt-1: route table +# +---------------------------------------------------+ +# |host |Action | +# +---------------------------------------------------+ +# |10.0.0.2 |apply seg6 encap segs fc00:12:100::6004| +# +---------------------------------------------------+ +# |10.0.0.0/24|forward to dev veth_t100 | +# +---------------------------------------------------+ +# +# +# rt-2: localsid table +# +---------------------------------------------------------------+ +# |SID |Action | +# +---------------------------------------------------------------+ +# |fc00:12:100::6004|apply SRv6 End.DX4 nh4 10.0.0.2 dev veth-t100| +# +---------------------------------------------------------------+ +# +# rt-2: route table +# +---------------------------------------------------+ +# |host |Action | +# +---------------------------------------------------+ +# |10.0.0.1 |apply seg6 encap segs fc00:21:100::6004| +# +---------------------------------------------------+ +# |10.0.0.0/24|forward to dev veth_t100 | +# +---------------------------------------------------+ +# + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +readonly IPv6_RT_NETWORK=2001:11 +readonly IPv4_HS_NETWORK=10.0.0 +readonly SID_LOCATOR=fc00 + +PING_TIMEOUT_SEC=4 + +ret=0 + +PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} + +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + + if [ ${rc} -eq ${expected} ]; then + nsuccess=$((nsuccess+1)) + printf "\n TEST: %-60s [ OK ]\n" "${msg}" + else + ret=1 + nfail=$((nfail+1)) + printf "\n TEST: %-60s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi +} + +print_log_test_results() +{ + if [ "$TESTS" != "none" ]; then + printf "\nTests passed: %3d\n" ${nsuccess} + printf "Tests failed: %3d\n" ${nfail} + fi +} + +log_section() +{ + echo + echo "################################################################################" + echo "TEST SECTION: $*" + echo "################################################################################" +} + +cleanup() +{ + ip link del veth-rt-1 2>/dev/null || true + ip link del veth-rt-2 2>/dev/null || true + + # destroy routers rt-* and hosts hs-* + for ns in $(ip netns show | grep -E 'rt-*|hs-*'); do + ip netns del ${ns} || true + done +} + +# Setup the basic networking for the routers +setup_rt_networking() +{ + local rt=$1 + local nsname=rt-${rt} + + ip netns add ${nsname} + + ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec ${nsname} sysctl -wq net.ipv6.conf.default.accept_dad=0 + + ip link set veth-rt-${rt} netns ${nsname} + ip -netns ${nsname} link set veth-rt-${rt} name veth0 + + ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${rt}/64 dev veth0 nodad + ip -netns ${nsname} link set veth0 up + ip -netns ${nsname} link set lo up + + ip netns exec ${nsname} sysctl -wq net.ipv4.ip_forward=1 + ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.forwarding=1 +} + +setup_rt_netfilter() +{ + local rt=$1 + local nsname=rt-${rt} + + ip netns exec ${nsname} sysctl -wq net.netfilter.nf_hooks_lwtunnel=1 + ip netns exec ${nsname} iptables -t raw -A PREROUTING -m rpfilter --invert -j DROP +} + +setup_hs() +{ + local hs=$1 + local rt=$2 + local tid=$3 + local hsname=hs-${hs} + local rtname=rt-${rt} + local rtveth=veth-t${tid} + + # set the networking for the host + ip netns add ${hsname} + + ip -netns ${hsname} link add veth0 type veth peer name ${rtveth} + ip -netns ${hsname} link set ${rtveth} netns ${rtname} + ip -netns ${hsname} addr add ${IPv4_HS_NETWORK}.${hs}/24 dev veth0 + ip -netns ${hsname} link set veth0 up + ip -netns ${hsname} link set lo up + + ip -netns ${rtname} addr add ${IPv4_HS_NETWORK}.${rt}${hs}/24 dev ${rtveth} + ip -netns ${rtname} link set ${rtveth} up + + ip netns exec ${rtname} sysctl -wq net.ipv4.conf.${rtveth}.proxy_arp=1 +} + +setup_vpn_config() +{ + local hssrc=$1 + local rtsrc=$2 + local hsdst=$3 + local rtdst=$4 + local tid=$5 + + local hssrc_name=hs-t${tid}-${hssrc} + local hsdst_name=hs-t${tid}-${hsdst} + local rtsrc_name=rt-${rtsrc} + local rtdst_name=rt-${rtdst} + local vpn_sid=${SID_LOCATOR}:${hssrc}${hsdst}:${tid}::6004 + + # set the encap route for encapsulating packets which arrive from the + # host hssrc and destined to the access router rtsrc. + ip -netns ${rtsrc_name} -4 route add ${IPv4_HS_NETWORK}.${hsdst}/32 \ + encap seg6 mode encap segs ${vpn_sid} dev veth0 + ip -netns ${rtsrc_name} -6 route add ${vpn_sid}/128 \ + via 2001:11::${rtdst} dev veth0 + + # set the decap route for decapsulating packets which arrive from + # the rtdst router and destined to the hsdst host. + ip -netns ${rtdst_name} -6 route add ${vpn_sid}/128 \ + encap seg6local action End.DX4 nh4 ${IPv4_HS_NETWORK}.${hsdst} dev veth-t${tid} +} + +setup() +{ + ip link add veth-rt-1 type veth peer name veth-rt-2 + # setup the networking for router rt-1 and router rt-2 + setup_rt_networking 1 + setup_rt_networking 2 + + # setup two hosts for the tenant 100. + # - host hs-1 is directly connected to the router rt-1; + # - host hs-2 is directly connected to the router rt-2. + setup_hs 1 1 100 + setup_hs 2 2 100 + + # setup the IPv4 L3 VPN which connects the host hs-1 and host hs-2. + setup_vpn_config 1 1 2 2 100 #args: src_host src_router dst_host dst_router tenant + setup_vpn_config 2 2 1 1 100 +} + +check_hs_connectivity() +{ + local hssrc=$1 + local hsdst=$2 + local tid=$3 + + ip netns exec hs-${hssrc} ping -c 1 -W ${PING_TIMEOUT_SEC} \ + ${IPv4_HS_NETWORK}.${hsdst} >/dev/null 2>&1 +} + +check_and_log_hs_connectivity() +{ + local hssrc=$1 + local hsdst=$2 + local tid=$3 + + check_hs_connectivity ${hssrc} ${hsdst} ${tid} + log_test $? 0 "Hosts connectivity: hs-${hssrc} -> hs-${hsdst} (tenant ${tid})" +} + +host_tests() +{ + log_section "SRv6 VPN connectivity test among hosts in the same tenant" + + check_and_log_hs_connectivity 1 2 100 + check_and_log_hs_connectivity 2 1 100 +} + +router_netfilter_tests() +{ + log_section "SRv6 VPN connectivity test with netfilter enabled in routers" + setup_rt_netfilter 1 + setup_rt_netfilter 2 + + check_and_log_hs_connectivity 1 2 100 + check_and_log_hs_connectivity 2 1 100 +} + +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit $ksft_skip +fi + +if [ ! -x "$(command -v ip)" ]; then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +cleanup &>/dev/null + +setup + +host_tests +router_netfilter_tests + +print_log_test_results + +cleanup &>/dev/null + +exit ${ret} From 221200ffeb065c6bbd196760c168b42305961655 Mon Sep 17 00:00:00 2001 From: Jianguo Wu Date: Thu, 13 Jun 2024 17:42:49 +0800 Subject: [PATCH 461/778] selftests: add selftest for the SRv6 End.DX6 behavior with netfilter this selftest is designed for evaluating the SRv6 End.DX6 behavior used with netfilter(rpfilter), in this example, for implementing IPv6 L3 VPN use cases. Signed-off-by: Jianguo Wu Signed-off-by: Pablo Neira Ayuso --- tools/testing/selftests/net/Makefile | 1 + tools/testing/selftests/net/config | 1 + .../net/srv6_end_dx6_netfilter_test.sh | 340 ++++++++++++++++++ 3 files changed, 342 insertions(+) create mode 100755 tools/testing/selftests/net/srv6_end_dx6_netfilter_test.sh diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 7a5f7dd320de..d9393569d03a 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -44,6 +44,7 @@ TEST_PROGS += srv6_end_next_csid_l3vpn_test.sh TEST_PROGS += srv6_end_x_next_csid_l3vpn_test.sh TEST_PROGS += srv6_end_flavors_test.sh TEST_PROGS += srv6_end_dx4_netfilter_test.sh +TEST_PROGS += srv6_end_dx6_netfilter_test.sh TEST_PROGS += vrf_strict_mode_test.sh TEST_PROGS += arp_ndisc_evict_nocarrier.sh TEST_PROGS += ndisc_unsolicited_na_test.sh diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index c2766e558f92..d4891f7a2bfa 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -102,3 +102,4 @@ CONFIG_CRYPTO_ARIA=y CONFIG_XFRM_INTERFACE=m CONFIG_XFRM_USER=m CONFIG_IP_NF_MATCH_RPFILTER=m +CONFIG_IP6_NF_MATCH_RPFILTER=m diff --git a/tools/testing/selftests/net/srv6_end_dx6_netfilter_test.sh b/tools/testing/selftests/net/srv6_end_dx6_netfilter_test.sh new file mode 100755 index 000000000000..9e69a2ed5bc3 --- /dev/null +++ b/tools/testing/selftests/net/srv6_end_dx6_netfilter_test.sh @@ -0,0 +1,340 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# author: Jianguo Wu +# +# Mostly copied from tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh. +# +# This script is designed for testing the support of netfilter hooks for +# SRv6 End.DX4 behavior. +# +# Hereafter a network diagram is shown, where one tenants (named 100) offer +# IPv6 L3 VPN services allowing hosts to communicate with each other across +# an IPv6 network. +# +# Routers rt-1 and rt-2 implement IPv6 L3 VPN services leveraging the SRv6 +# architecture. The key components for such VPNs are: a) SRv6 Encap behavior, +# b) SRv6 End.DX4 behavior. +# +# To explain how an IPv6 L3 VPN based on SRv6 works, let us briefly consider an +# example where, within the same domain of tenant 100, the host hs-1 pings +# the host hs-2. +# +# First of all, L2 reachability of the host hs-2 is taken into account by +# the router rt-1 which acts as an arp proxy. +# +# When the host hs-1 sends an IPv6 packet destined to hs-2, the router rt-1 +# receives the packet on the internal veth-t100 interface, rt-1 contains the +# SRv6 Encap route for encapsulating the IPv6 packet in a IPv6 plus the Segment +# Routing Header (SRH) packet. This packet is sent through the (IPv6) core +# network up to the router rt-2 that receives it on veth0 interface. +# +# The rt-2 router uses the 'localsid' routing table to process incoming +# IPv6+SRH packets which belong to the VPN of the tenant 100. For each of these +# packets, the SRv6 End.DX4 behavior removes the outer IPv6+SRH headers and +# routs the packet to the specified nexthop. Afterwards, the packet is sent to +# the host hs-2 through the veth-t100 interface. +# +# The ping response follows the same processing but this time the role of rt-1 +# and rt-2 are swapped. +# +# And when net.netfilter.nf_hooks_lwtunnel is set to 1 in rt-1 or rt-2, and a +# rpfilter iptables rule is added, SRv6 packets will go through netfilter PREROUTING +# hooks. +# +# +# +-------------------+ +-------------------+ +# | | | | +# | hs-1 netns | | hs-2 netns | +# | | | | +# | +-------------+ | | +-------------+ | +# | | veth0 | | | | veth0 | | +# | | cafe::1/64 | | | | cafe::2/64 | | +# | +-------------+ | | +-------------+ | +# | . | | . | +# +-------------------+ +-------------------+ +# . . +# . . +# . . +# +-----------------------------------+ +-----------------------------------+ +# | . | | . | +# | +---------------+ | | +---------------- | +# | | veth-t100 | | | | veth-t100 | | +# | | cafe::11/64 | +----------+ | | +----------+ | cafe::22/64 | | +# | +-------+-------+ | route | | | | route | +-------+-------- | +# | | table | | | | table | | +# | +----------+ | | +----------+ | +# | +--------------+ | | +--------------+ | +# | | veth0 | | | | veth0 | | +# | | 2001:11::1/64 |.|...|.| 2001:11::2/64 | | +# | +--------------+ | | +--------------+ | +# | | | | +# | rt-1 netns | | rt-2 netns | +# | | | | +# +-----------------------------------+ +-----------------------------------+ +# +# ~~~~~~~~~~~~~~~~~~~~~~~~~ +# | Network configuration | +# ~~~~~~~~~~~~~~~~~~~~~~~~~ +# +# rt-1: localsid table +# +----------------------------------------------------------------+ +# |SID |Action | +# +----------------------------------------------------------------+ +# |fc00:21:100::6004|apply SRv6 End.DX6 nh6 cafe::1 dev veth-t100 | +# +----------------------------------------------------------------+ +# +# rt-1: route table +# +---------------------------------------------------+ +# |host |Action | +# +---------------------------------------------------+ +# |cafe::2 |apply seg6 encap segs fc00:12:100::6004| +# +---------------------------------------------------+ +# |cafe::/64 |forward to dev veth_t100 | +# +---------------------------------------------------+ +# +# +# rt-2: localsid table +# +---------------------------------------------------------------+ +# |SID |Action | +# +---------------------------------------------------------------+ +# |fc00:12:100::6004|apply SRv6 End.DX6 nh6 cafe::2 dev veth-t100 | +# +---------------------------------------------------------------+ +# +# rt-2: route table +# +---------------------------------------------------+ +# |host |Action | +# +---------------------------------------------------+ +# |cafe::1 |apply seg6 encap segs fc00:21:100::6004| +# +---------------------------------------------------+ +# |cafe::/64 |forward to dev veth_t100 | +# +---------------------------------------------------+ +# + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +readonly IPv6_RT_NETWORK=2001:11 +readonly IPv6_HS_NETWORK=cafe +readonly SID_LOCATOR=fc00 + +PING_TIMEOUT_SEC=4 + +ret=0 + +PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} + +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + + if [ ${rc} -eq ${expected} ]; then + nsuccess=$((nsuccess+1)) + printf "\n TEST: %-60s [ OK ]\n" "${msg}" + else + ret=1 + nfail=$((nfail+1)) + printf "\n TEST: %-60s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi +} + +print_log_test_results() +{ + if [ "$TESTS" != "none" ]; then + printf "\nTests passed: %3d\n" ${nsuccess} + printf "Tests failed: %3d\n" ${nfail} + fi +} + +log_section() +{ + echo + echo "################################################################################" + echo "TEST SECTION: $*" + echo "################################################################################" +} + +cleanup() +{ + ip link del veth-rt-1 2>/dev/null || true + ip link del veth-rt-2 2>/dev/null || true + + # destroy routers rt-* and hosts hs-* + for ns in $(ip netns show | grep -E 'rt-*|hs-*'); do + ip netns del ${ns} || true + done +} + +# Setup the basic networking for the routers +setup_rt_networking() +{ + local rt=$1 + local nsname=rt-${rt} + + ip netns add ${nsname} + + ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec ${nsname} sysctl -wq net.ipv6.conf.default.accept_dad=0 + + ip link set veth-rt-${rt} netns ${nsname} + ip -netns ${nsname} link set veth-rt-${rt} name veth0 + + ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${rt}/64 dev veth0 nodad + ip -netns ${nsname} link set veth0 up + ip -netns ${nsname} link set lo up + + ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.forwarding=1 +} + +setup_rt_netfilter() +{ + local rt=$1 + local nsname=rt-${rt} + + ip netns exec ${nsname} sysctl -wq net.netfilter.nf_hooks_lwtunnel=1 + ip netns exec ${nsname} ip6tables -t raw -A PREROUTING -m rpfilter --invert -j DROP +} + +setup_hs() +{ + local hs=$1 + local rt=$2 + local tid=$3 + local hsname=hs-${hs} + local rtname=rt-${rt} + local rtveth=veth-t${tid} + + # set the networking for the host + ip netns add ${hsname} + + ip -netns ${hsname} link add veth0 type veth peer name ${rtveth} + ip -netns ${hsname} link set ${rtveth} netns ${rtname} + ip -netns ${hsname} addr add ${IPv6_HS_NETWORK}::${hs}/64 dev veth0 nodad + ip -netns ${hsname} link set veth0 up + ip -netns ${hsname} link set lo up + + ip -netns ${rtname} addr add ${IPv6_HS_NETWORK}::${rt}${hs}/64 dev ${rtveth} + ip -netns ${rtname} link set ${rtveth} up + + ip netns exec ${rtname} sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec ${rtname} sysctl -wq net.ipv6.conf.default.accept_dad=0 + + ip netns exec ${rtname} sysctl -wq net.ipv6.conf.${rtveth}.proxy_ndp=1 +} + +setup_vpn_config() +{ + local hssrc=$1 + local rtsrc=$2 + local hsdst=$3 + local rtdst=$4 + local tid=$5 + + local hssrc_name=hs-t${tid}-${hssrc} + local hsdst_name=hs-t${tid}-${hsdst} + local rtsrc_name=rt-${rtsrc} + local rtdst_name=rt-${rtdst} + local rtveth=veth-t${tid} + local vpn_sid=${SID_LOCATOR}:${hssrc}${hsdst}:${tid}::6004 + + ip -netns ${rtsrc_name} -6 neigh add proxy ${IPv6_HS_NETWORK}::${hsdst} dev ${rtveth} + + # set the encap route for encapsulating packets which arrive from the + # host hssrc and destined to the access router rtsrc. + ip -netns ${rtsrc_name} -6 route add ${IPv6_HS_NETWORK}::${hsdst}/128 \ + encap seg6 mode encap segs ${vpn_sid} dev veth0 + ip -netns ${rtsrc_name} -6 route add ${vpn_sid}/128 \ + via 2001:11::${rtdst} dev veth0 + + # set the decap route for decapsulating packets which arrive from + # the rtdst router and destined to the hsdst host. + ip -netns ${rtdst_name} -6 route add ${vpn_sid}/128 \ + encap seg6local action End.DX6 nh6 ${IPv6_HS_NETWORK}::${hsdst} dev veth-t${tid} +} + +setup() +{ + ip link add veth-rt-1 type veth peer name veth-rt-2 + # setup the networking for router rt-1 and router rt-2 + setup_rt_networking 1 + setup_rt_networking 2 + + # setup two hosts for the tenant 100. + # - host hs-1 is directly connected to the router rt-1; + # - host hs-2 is directly connected to the router rt-2. + setup_hs 1 1 100 + setup_hs 2 2 100 + + # setup the IPv4 L3 VPN which connects the host hs-1 and host hs-2. + setup_vpn_config 1 1 2 2 100 #args: src_host src_router dst_host dst_router tenant + setup_vpn_config 2 2 1 1 100 +} + +check_hs_connectivity() +{ + local hssrc=$1 + local hsdst=$2 + local tid=$3 + + ip netns exec hs-${hssrc} ping -6 -c 1 -W ${PING_TIMEOUT_SEC} \ + ${IPv6_HS_NETWORK}::${hsdst} >/dev/null 2>&1 +} + +check_and_log_hs_connectivity() +{ + local hssrc=$1 + local hsdst=$2 + local tid=$3 + + check_hs_connectivity ${hssrc} ${hsdst} ${tid} + log_test $? 0 "Hosts connectivity: hs-${hssrc} -> hs-${hsdst} (tenant ${tid})" +} + +host_tests() +{ + log_section "SRv6 VPN connectivity test among hosts in the same tenant" + + check_and_log_hs_connectivity 1 2 100 + check_and_log_hs_connectivity 2 1 100 +} + +router_netfilter_tests() +{ + log_section "SRv6 VPN connectivity test with netfilter enabled in routers" + setup_rt_netfilter 1 + setup_rt_netfilter 2 + + check_and_log_hs_connectivity 1 2 100 + check_and_log_hs_connectivity 2 1 100 +} + +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit $ksft_skip +fi + +if [ ! -x "$(command -v ip)" ]; then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +cleanup &>/dev/null + +setup + +host_tests +router_netfilter_tests + +print_log_test_results + +cleanup &>/dev/null + +exit ${ret} From 98d919dfee1cc402ca29d45da642852d7c9a2301 Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Mon, 17 Jun 2024 12:58:34 +0530 Subject: [PATCH 462/778] ASoC: amd: acp: add a null check for chip_pdev structure When acp platform device creation is skipped, chip->chip_pdev value will remain NULL. Add NULL check for chip->chip_pdev structure in snd_acp_resume() function to avoid null pointer dereference. Fixes: 088a40980efb ("ASoC: amd: acp: add pm ops support for acp pci driver") Signed-off-by: Vijendar Mukunda Link: https://msgid.link/r/20240617072844.871468-1-Vijendar.Mukunda@amd.com Signed-off-by: Mark Brown --- sound/soc/amd/acp/acp-pci.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/sound/soc/amd/acp/acp-pci.c b/sound/soc/amd/acp/acp-pci.c index ad320b29e87d..aa3e72d13451 100644 --- a/sound/soc/amd/acp/acp-pci.c +++ b/sound/soc/amd/acp/acp-pci.c @@ -199,10 +199,12 @@ static int __maybe_unused snd_acp_resume(struct device *dev) ret = acp_init(chip); if (ret) dev_err(dev, "ACP init failed\n"); - child = chip->chip_pdev->dev; - adata = dev_get_drvdata(&child); - if (adata) - acp_enable_interrupts(adata); + if (chip->chip_pdev) { + child = chip->chip_pdev->dev; + adata = dev_get_drvdata(&child); + if (adata) + acp_enable_interrupts(adata); + } return ret; } From 70fa3900c3ed92158628710e81d274e5cb52f92b Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Mon, 17 Jun 2024 12:58:35 +0530 Subject: [PATCH 463/778] ASoC: amd: acp: remove i2s configuration check in acp_i2s_probe() ACP supports different pin configurations for I2S IO. Checking ACP pin configuration value against specific value breaks the functionality for other I2S pin configurations. This check is no longer required in i2s dai driver probe call as i2s configuration check will be verified during acp platform device creation sequence. Remove i2s_mode check in acp_i2s_probe() function. Fixes: b24484c18b10 ("ASoC: amd: acp: ACP code generic to support newer platforms") Signed-off-by: Vijendar Mukunda Link: https://msgid.link/r/20240617072844.871468-2-Vijendar.Mukunda@amd.com Signed-off-by: Mark Brown --- sound/soc/amd/acp/acp-i2s.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/sound/soc/amd/acp/acp-i2s.c b/sound/soc/amd/acp/acp-i2s.c index 60cbc881be6e..ef12f97ddc69 100644 --- a/sound/soc/amd/acp/acp-i2s.c +++ b/sound/soc/amd/acp/acp-i2s.c @@ -588,20 +588,12 @@ static int acp_i2s_probe(struct snd_soc_dai *dai) { struct device *dev = dai->component->dev; struct acp_dev_data *adata = dev_get_drvdata(dev); - struct acp_resource *rsrc = adata->rsrc; - unsigned int val; if (!adata->acp_base) { dev_err(dev, "I2S base is NULL\n"); return -EINVAL; } - val = readl(adata->acp_base + rsrc->i2s_pin_cfg_offset); - if (val != rsrc->i2s_mode) { - dev_err(dev, "I2S Mode not supported val %x\n", val); - return -EINVAL; - } - return 0; } From 379bcd2c9197bf2c429434e8a01cea0ee1852316 Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Mon, 17 Jun 2024 12:58:36 +0530 Subject: [PATCH 464/778] ASoC: amd: acp: move chip->flag variable assignment chip->flag variable assignment will be skipped when acp platform device creation is skipped. In this case chip>flag value will not be set. chip->flag variable should be assigned along with other structure variables for 'chip' structure. Move chip->flag variable assignment prior to acp platform device creation. Fixes: 3a94c8ad0aae ("ASoC: amd: acp: add code for scanning acp pdm controller") Signed-off-by: Vijendar Mukunda Link: https://msgid.link/r/20240617072844.871468-3-Vijendar.Mukunda@amd.com Signed-off-by: Mark Brown --- sound/soc/amd/acp/acp-pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/amd/acp/acp-pci.c b/sound/soc/amd/acp/acp-pci.c index aa3e72d13451..777b5a78d8a9 100644 --- a/sound/soc/amd/acp/acp-pci.c +++ b/sound/soc/amd/acp/acp-pci.c @@ -100,6 +100,7 @@ static int acp_pci_probe(struct pci_dev *pci, const struct pci_device_id *pci_id ret = -EINVAL; goto release_regions; } + chip->flag = flag; dmic_dev = platform_device_register_data(dev, "dmic-codec", PLATFORM_DEVID_NONE, NULL, 0); if (IS_ERR(dmic_dev)) { dev_err(dev, "failed to create DMIC device\n"); @@ -139,7 +140,6 @@ static int acp_pci_probe(struct pci_dev *pci, const struct pci_device_id *pci_id } } - chip->flag = flag; memset(&pdevinfo, 0, sizeof(pdevinfo)); pdevinfo.name = chip->name; From e2654a4453ba3dac9baacf9980d841d84e15b869 Mon Sep 17 00:00:00 2001 From: Roman Li Date: Tue, 7 May 2024 16:26:08 -0400 Subject: [PATCH 465/778] drm/amd/display: Remove redundant idle optimization check [Why] Disable idle optimization for each atomic commit is unnecessary, and can lead to a potential race condition. [How] Remove idle optimization check from amdgpu_dm_atomic_commit_tail() Fixes: 196107eb1e15 ("drm/amd/display: Add IPS checks before dcn register access") Cc: stable@vger.kernel.org Reviewed-by: Hamza Mahfooz Acked-by: Roman Li Signed-off-by: Roman Li Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index f1d67c6f4b98..e426adf95d7d 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -9169,9 +9169,6 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) trace_amdgpu_dm_atomic_commit_tail_begin(state); - if (dm->dc->caps.ips_support && dm->dc->idle_optimizations_allowed) - dc_allow_idle_optimizations(dm->dc, false); - drm_atomic_helper_update_legacy_modeset_state(dev, state); drm_dp_mst_atomic_wait_for_dependencies(state); From 84801d4f1e4fbd2c44dddecaec9099bdff100a42 Mon Sep 17 00:00:00 2001 From: Yunxiang Li Date: Thu, 23 May 2024 07:48:19 -0400 Subject: [PATCH 466/778] drm/amdgpu: fix locking scope when flushing tlb MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Which method is used to flush tlb does not depend on whether a reset is in progress or not. We should skip flush altogether if the GPU will get reset. So put both path under reset_domain read lock. Signed-off-by: Yunxiang Li Reviewed-by: Christian König Signed-off-by: Alex Deucher CC: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 66 +++++++++++++------------ 1 file changed, 34 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index be4629cdac04..08b9dfb65335 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -684,12 +684,17 @@ int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid, struct amdgpu_ring *ring = &adev->gfx.kiq[inst].ring; struct amdgpu_kiq *kiq = &adev->gfx.kiq[inst]; unsigned int ndw; - signed long r; + int r; uint32_t seq; - if (!adev->gmc.flush_pasid_uses_kiq || !ring->sched.ready || - !down_read_trylock(&adev->reset_domain->sem)) { + /* + * A GPU reset should flush all TLBs anyway, so no need to do + * this while one is ongoing. + */ + if (!down_read_trylock(&adev->reset_domain->sem)) + return 0; + if (!adev->gmc.flush_pasid_uses_kiq || !ring->sched.ready) { if (adev->gmc.flush_tlb_needs_extra_type_2) adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid, 2, all_hub, @@ -703,44 +708,41 @@ int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid, adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub, inst); - return 0; - } + r = 0; + } else { + /* 2 dwords flush + 8 dwords fence */ + ndw = kiq->pmf->invalidate_tlbs_size + 8; - /* 2 dwords flush + 8 dwords fence */ - ndw = kiq->pmf->invalidate_tlbs_size + 8; + if (adev->gmc.flush_tlb_needs_extra_type_2) + ndw += kiq->pmf->invalidate_tlbs_size; - if (adev->gmc.flush_tlb_needs_extra_type_2) - ndw += kiq->pmf->invalidate_tlbs_size; + if (adev->gmc.flush_tlb_needs_extra_type_0) + ndw += kiq->pmf->invalidate_tlbs_size; - if (adev->gmc.flush_tlb_needs_extra_type_0) - ndw += kiq->pmf->invalidate_tlbs_size; + spin_lock(&adev->gfx.kiq[inst].ring_lock); + amdgpu_ring_alloc(ring, ndw); + if (adev->gmc.flush_tlb_needs_extra_type_2) + kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 2, all_hub); - spin_lock(&adev->gfx.kiq[inst].ring_lock); - amdgpu_ring_alloc(ring, ndw); - if (adev->gmc.flush_tlb_needs_extra_type_2) - kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 2, all_hub); + if (flush_type == 2 && adev->gmc.flush_tlb_needs_extra_type_0) + kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 0, all_hub); - if (flush_type == 2 && adev->gmc.flush_tlb_needs_extra_type_0) - kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 0, all_hub); + kiq->pmf->kiq_invalidate_tlbs(ring, pasid, flush_type, all_hub); + r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); + if (r) { + amdgpu_ring_undo(ring); + spin_unlock(&adev->gfx.kiq[inst].ring_lock); + goto error_unlock_reset; + } - kiq->pmf->kiq_invalidate_tlbs(ring, pasid, flush_type, all_hub); - r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); - if (r) { - amdgpu_ring_undo(ring); + amdgpu_ring_commit(ring); spin_unlock(&adev->gfx.kiq[inst].ring_lock); - goto error_unlock_reset; + if (amdgpu_fence_wait_polling(ring, seq, usec_timeout) < 1) { + dev_err(adev->dev, "timeout waiting for kiq fence\n"); + r = -ETIME; + } } - amdgpu_ring_commit(ring); - spin_unlock(&adev->gfx.kiq[inst].ring_lock); - r = amdgpu_fence_wait_polling(ring, seq, usec_timeout); - if (r < 1) { - dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r); - r = -ETIME; - goto error_unlock_reset; - } - r = 0; - error_unlock_reset: up_read(&adev->reset_domain->sem); return r; From 56342da3d8cc15efe9df7f29985ba8d256bdc258 Mon Sep 17 00:00:00 2001 From: Hamza Mahfooz Date: Mon, 3 Jun 2024 10:16:45 -0400 Subject: [PATCH 467/778] drm/amd/display: prevent register access while in IPS We can't read/write to DCN registers while in IPS. Since, that can cause the system to hang. So, before proceeding with the access in that scenario, force the system out of IPS. Cc: stable@vger.kernel.org # 6.6+ Reviewed-by: Roman Li Signed-off-by: Hamza Mahfooz Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index e426adf95d7d..e9ac20bed0f2 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -11437,6 +11437,12 @@ void amdgpu_dm_trigger_timing_sync(struct drm_device *dev) mutex_unlock(&adev->dm.dc_lock); } +static inline void amdgpu_dm_exit_ips_for_hw_access(struct dc *dc) +{ + if (dc->ctx->dmub_srv && !dc->ctx->dmub_srv->idle_exit_counter) + dc_exit_ips_for_hw_access(dc); +} + void dm_write_reg_func(const struct dc_context *ctx, uint32_t address, u32 value, const char *func_name) { @@ -11447,6 +11453,8 @@ void dm_write_reg_func(const struct dc_context *ctx, uint32_t address, return; } #endif + + amdgpu_dm_exit_ips_for_hw_access(ctx->dc); cgs_write_register(ctx->cgs_device, address, value); trace_amdgpu_dc_wreg(&ctx->perf_trace->write_count, address, value); } @@ -11470,6 +11478,8 @@ uint32_t dm_read_reg_func(const struct dc_context *ctx, uint32_t address, return 0; } + amdgpu_dm_exit_ips_for_hw_access(ctx->dc); + value = cgs_read_register(ctx->cgs_device, address); trace_amdgpu_dc_rreg(&ctx->perf_trace->read_count, address, value); From 49c9ffabde555c841392858d8b9e6cf58998a50c Mon Sep 17 00:00:00 2001 From: Harish Kasiviswanathan Date: Wed, 5 Jun 2024 09:30:50 -0400 Subject: [PATCH 468/778] drm/amdgpu: Indicate CU havest info to CP To achieve full occupancy CP hardware needs to know if CUs in SE are symmetrically or asymmetrically harvested v2: Reset is_symmetric_cus for each loop Signed-off-by: Harish Kasiviswanathan Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 7b16e8cca86a..f5b9f443cfdd 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -4195,9 +4195,10 @@ static u32 gfx_v9_4_3_get_cu_active_bitmap(struct amdgpu_device *adev, int xcc_i static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev, struct amdgpu_cu_info *cu_info) { - int i, j, k, counter, xcc_id, active_cu_number = 0; - u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; + int i, j, k, prev_counter, counter, xcc_id, active_cu_number = 0; + u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0, tmp; unsigned disable_masks[4 * 4]; + bool is_symmetric_cus; if (!adev || !cu_info) return -EINVAL; @@ -4215,6 +4216,7 @@ static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev, mutex_lock(&adev->grbm_idx_mutex); for (xcc_id = 0; xcc_id < NUM_XCC(adev->gfx.xcc_mask); xcc_id++) { + is_symmetric_cus = true; for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { mask = 1; @@ -4242,6 +4244,15 @@ static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev, ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); cu_info->ao_cu_bitmap[i][j] = ao_bitmap; } + if (i && is_symmetric_cus && prev_counter != counter) + is_symmetric_cus = false; + prev_counter = counter; + } + if (is_symmetric_cus) { + tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_DEBUG); + tmp = REG_SET_FIELD(tmp, CP_CPC_DEBUG, CPC_HARVESTING_RELAUNCH_DISABLE, 1); + tmp = REG_SET_FIELD(tmp, CP_CPC_DEBUG, CPC_HARVESTING_DISPATCH_DISABLE, 1); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_DEBUG, tmp); } gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, xcc_id); From 8bd82363e2ee2eb3a9a8ea1fa94ebe1900d05a71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 5 Jun 2024 13:27:20 +0200 Subject: [PATCH 469/778] drm/amdgpu: revert "take runtime pm reference when we attach a buffer" v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit b8c415e3bf98 ("drm/amdgpu: take runtime pm reference when we attach a buffer") and commit 425285d39afd ("drm/amdgpu: add amdgpu runpm usage trace for separate funcs"). Taking a runtime pm reference for DMA-buf is actually completely unnecessary and even dangerous. The problem is that calling pm_runtime_get_sync() from the DMA-buf callbacks is illegal because we have the reservation locked here which is also taken during resume. So this would deadlock. When the buffer is in GTT it is still accessible even when the GPU is powered down and when it is in VRAM the buffer gets migrated to GTT before powering down. The only use case which would make it mandatory to keep the runtime pm reference would be if we pin the buffer into VRAM, and that's not something we currently do. v2: improve the commit message Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher CC: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 34 --------------------- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 2 -- drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 15 --------- 3 files changed, 51 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 055ba2ea4c12..662d0f28f358 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -41,8 +41,6 @@ #include #include #include -#include -#include "amdgpu_trace.h" /** * amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation @@ -58,42 +56,11 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf, struct drm_gem_object *obj = dmabuf->priv; struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - int r; if (pci_p2pdma_distance(adev->pdev, attach->dev, false) < 0) attach->peer2peer = false; - r = pm_runtime_get_sync(adev_to_drm(adev)->dev); - trace_amdgpu_runpm_reference_dumps(1, __func__); - if (r < 0) - goto out; - return 0; - -out: - pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); - trace_amdgpu_runpm_reference_dumps(0, __func__); - return r; -} - -/** - * amdgpu_dma_buf_detach - &dma_buf_ops.detach implementation - * - * @dmabuf: DMA-buf where we remove the attachment from - * @attach: the attachment to remove - * - * Called when an attachment is removed from the DMA-buf. - */ -static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf, - struct dma_buf_attachment *attach) -{ - struct drm_gem_object *obj = dmabuf->priv; - struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); - struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - - pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); - pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); - trace_amdgpu_runpm_reference_dumps(0, __func__); } /** @@ -267,7 +234,6 @@ static int amdgpu_dma_buf_begin_cpu_access(struct dma_buf *dma_buf, const struct dma_buf_ops amdgpu_dmabuf_ops = { .attach = amdgpu_dma_buf_attach, - .detach = amdgpu_dma_buf_detach, .pin = amdgpu_dma_buf_pin, .unpin = amdgpu_dma_buf_unpin, .map_dma_buf = amdgpu_dma_buf_map, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 10832b470448..bc3ac73b6b8d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -181,7 +181,6 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amd amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, seq, flags | AMDGPU_FENCE_FLAG_INT); pm_runtime_get_noresume(adev_to_drm(adev)->dev); - trace_amdgpu_runpm_reference_dumps(1, __func__); ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask]; if (unlikely(rcu_dereference_protected(*ptr, 1))) { struct dma_fence *old; @@ -309,7 +308,6 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring) dma_fence_put(fence); pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); - trace_amdgpu_runpm_reference_dumps(0, __func__); } while (last_seq != seq); return true; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index 7aafeb763e5d..383fce40d4dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -554,21 +554,6 @@ TRACE_EVENT(amdgpu_reset_reg_dumps, __entry->value) ); -TRACE_EVENT(amdgpu_runpm_reference_dumps, - TP_PROTO(uint32_t index, const char *func), - TP_ARGS(index, func), - TP_STRUCT__entry( - __field(uint32_t, index) - __string(func, func) - ), - TP_fast_assign( - __entry->index = index; - __assign_str(func); - ), - TP_printk("amdgpu runpm reference dump 0x%x: 0x%s\n", - __entry->index, - __get_str(func)) -); #undef AMDGPU_JOB_GET_TIMELINE_NAME #endif From c60e20f13c27662de36cd5538d6299760780db52 Mon Sep 17 00:00:00 2001 From: Daniel Miess Date: Tue, 28 May 2024 16:17:17 -0400 Subject: [PATCH 470/778] drm/amd/display: Change dram_clock_latency to 34us for dcn351 [Why] Intermittent underflow observed when using 4k144 display on dcn351 [How] Update dram_clock_change_latency_us from 11.72us to 34us Reviewed-by: Nicholas Kazlauskas Acked-by: Zaeem Mohamed Signed-off-by: Daniel Miess Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c index e4f333d4fb54..a201dbb743d7 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c @@ -215,7 +215,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_51_soc = { .urgent_latency_pixel_data_only_us = 4.0, .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, .urgent_latency_vm_data_only_us = 4.0, - .dram_clock_change_latency_us = 11.72, + .dram_clock_change_latency_us = 34, .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, From 6071607bfefefc50a3907c0ba88878846960d29a Mon Sep 17 00:00:00 2001 From: Paul Hsieh Date: Tue, 28 May 2024 14:36:00 +0800 Subject: [PATCH 471/778] drm/amd/display: change dram_clock_latency to 34us for dcn35 [Why & How] Current DRAM setting would cause underflow on customer platform. Modify dram_clock_change_latency_us from 11.72 to 34.0 us as per recommendation from HW team Reviewed-by: Nicholas Kazlauskas Acked-by: Zaeem Mohamed Signed-off-by: Paul Hsieh Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c index 60f251cf973b..beed7adbbd43 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c @@ -177,7 +177,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = { .urgent_latency_pixel_data_only_us = 4.0, .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, .urgent_latency_vm_data_only_us = 4.0, - .dram_clock_change_latency_us = 11.72, + .dram_clock_change_latency_us = 34.0, .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, From c03d770c0b014a3007a5874bf6b3c3e64d32aaac Mon Sep 17 00:00:00 2001 From: Michael Strauss Date: Tue, 7 May 2024 12:03:15 -0400 Subject: [PATCH 472/778] drm/amd/display: Attempt to avoid empty TUs when endpoint is DPIA [WHY] Empty SST TUs are illegal to transmit over a USB4 DP tunnel. Current policy is to configure stream encoder to pack 2 pixels per pclk even when ODM combine is not in use, allowing seamless dynamic ODM reconfiguration. However, in extreme edge cases where average pixel count per TU is less than 2, this can lead to unexpected empty TU generation during compliance testing. For example, VIC 1 with a 1xHBR3 link configuration will average 1.98 pix/TU. [HOW] Calculate average pixel count per TU, and block 2 pixels per clock if endpoint is a DPIA tunnel and pixel clock is low enough that we will never require 2:1 ODM combine. Cc: stable@vger.kernel.org # 6.6+ Reviewed-by: Wenjing Liu Acked-by: Hamza Mahfooz Signed-off-by: Michael Strauss Signed-off-by: Alex Deucher --- .../amd/display/dc/hwss/dcn35/dcn35_hwseq.c | 72 +++++++++++++++++++ .../amd/display/dc/hwss/dcn35/dcn35_hwseq.h | 2 + .../amd/display/dc/hwss/dcn35/dcn35_init.c | 2 +- 3 files changed, 75 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index 5295f52e4fc8..dcced89c07b3 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -1439,3 +1439,75 @@ void dcn35_set_long_vblank(struct pipe_ctx **pipe_ctx, } } } + +static bool should_avoid_empty_tu(struct pipe_ctx *pipe_ctx) +{ + /* Calculate average pixel count per TU, return false if under ~2.00 to + * avoid empty TUs. This is only required for DPIA tunneling as empty TUs + * are legal to generate for native DP links. Assume TU size 64 as there + * is currently no scenario where it's reprogrammed from HW default. + * MTPs have no such limitation, so this does not affect MST use cases. + */ + unsigned int pix_clk_mhz; + unsigned int symclk_mhz; + unsigned int avg_pix_per_tu_x1000; + unsigned int tu_size_bytes = 64; + struct dc_crtc_timing *timing = &pipe_ctx->stream->timing; + struct dc_link_settings *link_settings = &pipe_ctx->link_config.dp_link_settings; + const struct dc *dc = pipe_ctx->stream->link->dc; + + if (pipe_ctx->stream->link->ep_type != DISPLAY_ENDPOINT_USB4_DPIA) + return false; + + // Not necessary for MST configurations + if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) + return false; + + pix_clk_mhz = timing->pix_clk_100hz / 10000; + + // If this is true, can't block due to dynamic ODM + if (pix_clk_mhz > dc->clk_mgr->bw_params->clk_table.entries[0].dispclk_mhz) + return false; + + switch (link_settings->link_rate) { + case LINK_RATE_LOW: + symclk_mhz = 162; + break; + case LINK_RATE_HIGH: + symclk_mhz = 270; + break; + case LINK_RATE_HIGH2: + symclk_mhz = 540; + break; + case LINK_RATE_HIGH3: + symclk_mhz = 810; + break; + default: + // We shouldn't be tunneling any other rates, something is wrong + ASSERT(0); + return false; + } + + avg_pix_per_tu_x1000 = (1000 * pix_clk_mhz * tu_size_bytes) + / (symclk_mhz * link_settings->lane_count); + + // Add small empirically-decided margin to account for potential jitter + return (avg_pix_per_tu_x1000 < 2020); +} + +bool dcn35_is_dp_dig_pixel_rate_div_policy(struct pipe_ctx *pipe_ctx) +{ + struct dc *dc = pipe_ctx->stream->ctx->dc; + + if (!is_h_timing_divisible_by_2(pipe_ctx->stream)) + return false; + + if (should_avoid_empty_tu(pipe_ctx)) + return false; + + if (dc_is_dp_signal(pipe_ctx->stream->signal) && !dc->link_srv->dp_is_128b_132b_signal(pipe_ctx) && + dc->debug.enable_dp_dig_pixel_rate_div_policy) + return true; + + return false; +} diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h index a731c8880d60..f0ea7d1511ae 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h @@ -95,4 +95,6 @@ void dcn35_set_static_screen_control(struct pipe_ctx **pipe_ctx, void dcn35_set_long_vblank(struct pipe_ctx **pipe_ctx, int num_pipes, uint32_t v_total_min, uint32_t v_total_max); +bool dcn35_is_dp_dig_pixel_rate_div_policy(struct pipe_ctx *pipe_ctx); + #endif /* __DC_HWSS_DCN35_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c index df3bf77f3fb4..199781233fd5 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c @@ -158,7 +158,7 @@ static const struct hwseq_private_funcs dcn35_private_funcs = { .setup_hpo_hw_control = dcn35_setup_hpo_hw_control, .calculate_dccg_k1_k2_values = dcn32_calculate_dccg_k1_k2_values, .set_pixels_per_cycle = dcn32_set_pixels_per_cycle, - .is_dp_dig_pixel_rate_div_policy = dcn32_is_dp_dig_pixel_rate_div_policy, + .is_dp_dig_pixel_rate_div_policy = dcn35_is_dp_dig_pixel_rate_div_policy, .dsc_pg_control = dcn35_dsc_pg_control, .dsc_pg_status = dcn32_dsc_pg_status, .enable_plane = dcn35_enable_plane, From 301daa346f0e34a87fb6c1e4a05db2aa0a66b573 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 14 Jun 2024 12:54:52 -0700 Subject: [PATCH 473/778] drm/amd/display: Disable CONFIG_DRM_AMD_DC_FP for RISC-V with clang Commit 77acc6b55ae4 ("riscv: add support for kernel-mode FPU") and commit a28e4b672f04 ("drm/amd/display: use ARCH_HAS_KERNEL_FPU_SUPPORT") enabled support for CONFIG_DRM_AMD_DC_FP with RISC-V. Unfortunately, this exposed -Wframe-larger-than warnings (which become fatal with CONFIG_WERROR=y) when building ARCH=riscv allmodconfig with clang: drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_32.c:58:13: error: stack frame size (2448) exceeds limit (2048) in 'DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation' [-Werror,-Wframe-larger-than] 58 | static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation( | ^ 1 error generated. Many functions in this file use a large number of parameters, which must be passed on the stack at a certain pointer due to register exhaustion, which can cause high stack usage when inlining and issues with stack slot analysis get involved. While the compiler can and should do better (as GCC uses less than half the amount of stack space for the same function), it is not as simple as a fix as adjusting the functions not to take a large number of parameters. Unfortunately, modifying these files to avoid the problem is a difficult to justify approach because any revisions to the files in the kernel tree never make it back to the original source (so copies of the code for newer hardware revisions just reintroduce the issue) and the files are hard to read/modify due to being "gcc-parsable HW gospel, coming straight from HW engineers". Avoid building the problematic code for RISC-V by modifying the existing condition for arm64 that exists for the same reason. Factor out the logical not to make the condition a little more readable naturally. Fixes: a28e4b672f04 ("drm/amd/display: use ARCH_HAS_KERNEL_FPU_SUPPORT") Reported-by: Palmer Dabbelt Closes: https://lore.kernel.org/20240530145741.7506-2-palmer@rivosinc.com/ Reviewed-by: Harry Wentland Signed-off-by: Nathan Chancellor Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig index 5fcd4f778dc3..47b8b49da8a7 100644 --- a/drivers/gpu/drm/amd/display/Kconfig +++ b/drivers/gpu/drm/amd/display/Kconfig @@ -8,7 +8,7 @@ config DRM_AMD_DC depends on BROKEN || !CC_IS_CLANG || ARM64 || RISCV || SPARC64 || X86_64 select SND_HDA_COMPONENT if SND_HDA_CORE # !CC_IS_CLANG: https://github.com/ClangBuiltLinux/linux/issues/1752 - select DRM_AMD_DC_FP if ARCH_HAS_KERNEL_FPU_SUPPORT && (!ARM64 || !CC_IS_CLANG) + select DRM_AMD_DC_FP if ARCH_HAS_KERNEL_FPU_SUPPORT && !(CC_IS_CLANG && (ARM64 || RISCV)) help Choose this option if you want to use the new display engine support for AMDGPU. This adds required support for Vega and From 8bf0287528da1992c5e49d757b99ad6bbc34b522 Mon Sep 17 00:00:00 2001 From: Steve French Date: Wed, 19 Jun 2024 14:46:48 -0500 Subject: [PATCH 474/778] cifs: fix typo in module parameter enable_gcm_256 enable_gcm_256 (which allows the server to require the strongest encryption) is enabled by default, but the modinfo description incorrectly showed it disabled by default. Fix the typo. Cc: stable@vger.kernel.org Fixes: fee742b50289 ("smb3.1.1: enable negotiating stronger encryption by default") Signed-off-by: Steve French --- fs/smb/client/cifsfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index bb86fc0641d8..6397fdefd876 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -134,7 +134,7 @@ module_param(enable_oplocks, bool, 0644); MODULE_PARM_DESC(enable_oplocks, "Enable or disable oplocks. Default: y/Y/1"); module_param(enable_gcm_256, bool, 0644); -MODULE_PARM_DESC(enable_gcm_256, "Enable requesting strongest (256 bit) GCM encryption. Default: n/N/0"); +MODULE_PARM_DESC(enable_gcm_256, "Enable requesting strongest (256 bit) GCM encryption. Default: y/Y/0"); module_param(require_gcm_256, bool, 0644); MODULE_PARM_DESC(require_gcm_256, "Require strongest (256 bit) GCM encryption. Default: n/N/0"); From a498df5421fd737d11bfd152428ba6b1c8538321 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 20 May 2024 09:11:45 -0400 Subject: [PATCH 475/778] drm/radeon: fix UBSAN warning in kv_dpm.c Adds bounds check for sumo_vid_mapping_entry. Reviewed-by: Mario Limonciello Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/sumo_dpm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/radeon/sumo_dpm.c b/drivers/gpu/drm/radeon/sumo_dpm.c index 21d27e6235f3..b11f7c5bbcbe 100644 --- a/drivers/gpu/drm/radeon/sumo_dpm.c +++ b/drivers/gpu/drm/radeon/sumo_dpm.c @@ -1619,6 +1619,8 @@ void sumo_construct_vid_mapping_table(struct radeon_device *rdev, for (i = 0; i < SUMO_MAX_HARDWARE_POWERLEVELS; i++) { if (table[i].ulSupportedSCLK != 0) { + if (table[i].usVoltageIndex >= SUMO_MAX_NUMBER_VOLTAGES) + continue; vid_mapping_table->entries[table[i].usVoltageIndex].vid_7bit = table[i].usVoltageID; vid_mapping_table->entries[table[i].usVoltageIndex].vid_2bit = From f0d576f840153392d04b2d52cf3adab8f62e8cb6 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 20 May 2024 09:05:21 -0400 Subject: [PATCH 476/778] drm/amdgpu: fix UBSAN warning in kv_dpm.c Adds bounds check for sumo_vid_mapping_entry. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3392 Reviewed-by: Mario Limonciello Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c index 6bb42d04b247..e8b6989a40f3 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c @@ -164,6 +164,8 @@ static void sumo_construct_vid_mapping_table(struct amdgpu_device *adev, for (i = 0; i < SUMO_MAX_HARDWARE_POWERLEVELS; i++) { if (table[i].ulSupportedSCLK != 0) { + if (table[i].usVoltageIndex >= SUMO_MAX_NUMBER_VOLTAGES) + continue; vid_mapping_table->entries[table[i].usVoltageIndex].vid_7bit = table[i].usVoltageID; vid_mapping_table->entries[table[i].usVoltageIndex].vid_2bit = From e356d321d0240663a09b139fa3658ddbca163e27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 31 May 2024 10:56:00 +0200 Subject: [PATCH 477/778] drm/amdgpu: cleanup MES11 command submission MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The approach of having a separate WB slot for each submission doesn't really work well and for example breaks GPU reset. Use a status query packet for the fence update instead since those should always succeed we can use the fence of the original packet to signal the state of the operation. While at it cleanup the coding style. Fixes: eef016ba8986 ("drm/amdgpu/mes11: Use a separate fence per transaction") Reviewed-by: Mukul Joshi Signed-off-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 76 ++++++++++++++++---------- 1 file changed, 48 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 0d1407f25005..32d4519541c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -154,18 +154,18 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, void *pkt, int size, int api_status_off) { - int ndw = size / 4; - signed long r; - union MESAPI__MISC *x_pkt = pkt; - struct MES_API_STATUS *api_status; + union MESAPI__QUERY_MES_STATUS mes_status_pkt; + signed long timeout = 3000000; /* 3000 ms */ struct amdgpu_device *adev = mes->adev; struct amdgpu_ring *ring = &mes->ring; - unsigned long flags; - signed long timeout = 3000000; /* 3000 ms */ + struct MES_API_STATUS *api_status; + union MESAPI__MISC *x_pkt = pkt; const char *op_str, *misc_op_str; - u32 fence_offset; - u64 fence_gpu_addr; - u64 *fence_ptr; + unsigned long flags; + u64 status_gpu_addr; + u32 status_offset; + u64 *status_ptr; + signed long r; int ret; if (x_pkt->header.opcode >= MES_SCH_API_MAX) @@ -177,28 +177,38 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, /* Worst case in sriov where all other 15 VF timeout, each VF needs about 600ms */ timeout = 15 * 600 * 1000; } - BUG_ON(size % 4 != 0); - ret = amdgpu_device_wb_get(adev, &fence_offset); + ret = amdgpu_device_wb_get(adev, &status_offset); if (ret) return ret; - fence_gpu_addr = - adev->wb.gpu_addr + (fence_offset * 4); - fence_ptr = (u64 *)&adev->wb.wb[fence_offset]; - *fence_ptr = 0; + + status_gpu_addr = adev->wb.gpu_addr + (status_offset * 4); + status_ptr = (u64 *)&adev->wb.wb[status_offset]; + *status_ptr = 0; spin_lock_irqsave(&mes->ring_lock, flags); - if (amdgpu_ring_alloc(ring, ndw)) { - spin_unlock_irqrestore(&mes->ring_lock, flags); - amdgpu_device_wb_free(adev, fence_offset); - return -ENOMEM; - } + r = amdgpu_ring_alloc(ring, (size + sizeof(mes_status_pkt)) / 4); + if (r) + goto error_unlock_free; api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off); - api_status->api_completion_fence_addr = fence_gpu_addr; + api_status->api_completion_fence_addr = status_gpu_addr; api_status->api_completion_fence_value = 1; - amdgpu_ring_write_multiple(ring, pkt, ndw); + amdgpu_ring_write_multiple(ring, pkt, size / 4); + + memset(&mes_status_pkt, 0, sizeof(mes_status_pkt)); + mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS; + mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + mes_status_pkt.api_status.api_completion_fence_addr = + ring->fence_drv.gpu_addr; + mes_status_pkt.api_status.api_completion_fence_value = + ++ring->fence_drv.sync_seq; + + amdgpu_ring_write_multiple(ring, &mes_status_pkt, + sizeof(mes_status_pkt) / 4); + amdgpu_ring_commit(ring); spin_unlock_irqrestore(&mes->ring_lock, flags); @@ -206,15 +216,16 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, misc_op_str = mes_v11_0_get_misc_op_string(x_pkt); if (misc_op_str) - dev_dbg(adev->dev, "MES msg=%s (%s) was emitted\n", op_str, misc_op_str); + dev_dbg(adev->dev, "MES msg=%s (%s) was emitted\n", op_str, + misc_op_str); else if (op_str) dev_dbg(adev->dev, "MES msg=%s was emitted\n", op_str); else - dev_dbg(adev->dev, "MES msg=%d was emitted\n", x_pkt->header.opcode); + dev_dbg(adev->dev, "MES msg=%d was emitted\n", + x_pkt->header.opcode); - r = amdgpu_mes_fence_wait_polling(fence_ptr, (u64)1, timeout); - amdgpu_device_wb_free(adev, fence_offset); - if (r < 1) { + r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq, timeout); + if (r < 1 || !*status_ptr) { if (misc_op_str) dev_err(adev->dev, "MES failed to respond to msg=%s (%s)\n", @@ -229,10 +240,19 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, while (halt_if_hws_hang) schedule(); - return -ETIMEDOUT; + r = -ETIMEDOUT; + goto error_wb_free; } + amdgpu_device_wb_free(adev, status_offset); return 0; + +error_unlock_free: + spin_unlock_irqrestore(&mes->ring_lock, flags); + +error_wb_free: + amdgpu_device_wb_free(adev, status_offset); + return r; } static int convert_to_mes_queue_type(int queue_type) From ed5a4484f074aa2bfb1dad99ff3628ea8da4acdc Mon Sep 17 00:00:00 2001 From: Likun Gao Date: Wed, 12 Jun 2024 14:30:40 +0800 Subject: [PATCH 478/778] drm/amdgpu: init TA fw for psp v14 Add support to init TA firmware for psp v14. Signed-off-by: Likun Gao Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/psp_v14_0.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c index f08a32c18694..40b28298af30 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c @@ -32,7 +32,9 @@ #include "mp/mp_14_0_2_sh_mask.h" MODULE_FIRMWARE("amdgpu/psp_14_0_2_sos.bin"); +MODULE_FIRMWARE("amdgpu/psp_14_0_2_ta.bin"); MODULE_FIRMWARE("amdgpu/psp_14_0_3_sos.bin"); +MODULE_FIRMWARE("amdgpu/psp_14_0_3_ta.bin"); /* For large FW files the time to complete can be very long */ #define USBC_PD_POLLING_LIMIT_S 240 @@ -64,6 +66,9 @@ static int psp_v14_0_init_microcode(struct psp_context *psp) case IP_VERSION(14, 0, 2): case IP_VERSION(14, 0, 3): err = psp_init_sos_microcode(psp, ucode_prefix); + if (err) + return err; + err = psp_init_ta_microcode(psp, ucode_prefix); if (err) return err; break; From f770a6e9a3d7a90f77863b51325614f37a57fef5 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 12 Jun 2024 19:28:13 -0400 Subject: [PATCH 479/778] bcachefs: Fix initialization order for srcu barrier btree_iter_init() needs to happen before key_cache_init(), to initialize btree_trans_barrier Reported-by: syzbot+3cca837c2183f8f6fcaf@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 65e239d32915..635da5b3439c 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -912,9 +912,9 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) bch2_io_clock_init(&c->io_clock[WRITE]) ?: bch2_fs_journal_init(&c->journal) ?: bch2_fs_replicas_init(c) ?: + bch2_fs_btree_iter_init(c) ?: bch2_fs_btree_cache_init(c) ?: bch2_fs_btree_key_cache_init(&c->btree_key_cache) ?: - bch2_fs_btree_iter_init(c) ?: bch2_fs_btree_interior_update_init(c) ?: bch2_fs_buckets_waiting_for_journal_init(c) ?: bch2_fs_btree_write_buffer_init(c) ?: From d47df4f616d523b4ef832d03ec28b2e6d838067b Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 12 Jun 2024 19:51:15 -0400 Subject: [PATCH 480/778] bcachefs: Fix array-index-out-of-bounds We use 0 size arrays as markers, but ubsan doesn't know that - cast them to a pointer to fix the splat. Also, make sure this code gets tested a bit more. Signed-off-by: Kent Overstreet --- fs/bcachefs/bkey.c | 2 +- fs/bcachefs/bkey_methods.c | 6 +++++- fs/bcachefs/bkey_methods.h | 3 ++- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/bcachefs/bkey.c b/fs/bcachefs/bkey.c index f46978e5cb7c..94a1d1982fa8 100644 --- a/fs/bcachefs/bkey.c +++ b/fs/bcachefs/bkey.c @@ -1064,7 +1064,7 @@ void bch2_bkey_swab_key(const struct bkey_format *_f, struct bkey_packed *k) { const struct bkey_format *f = bkey_packed(k) ? _f : &bch2_bkey_format_current; u8 *l = k->key_start; - u8 *h = (u8 *) (k->_data + f->key_u64s) - 1; + u8 *h = (u8 *) ((u64 *) k->_data + f->key_u64s) - 1; while (l < h) { swap(*l, *h); diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c index c2c3dae52186..bd32aac05192 100644 --- a/fs/bcachefs/bkey_methods.c +++ b/fs/bcachefs/bkey_methods.c @@ -398,8 +398,12 @@ void __bch2_bkey_compat(unsigned level, enum btree_id btree_id, for (i = 0; i < nr_compat; i++) switch (!write ? i : nr_compat - 1 - i) { case 0: - if (big_endian != CPU_BIG_ENDIAN) + if (big_endian != CPU_BIG_ENDIAN) { bch2_bkey_swab_key(f, k); + } else if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) { + bch2_bkey_swab_key(f, k); + bch2_bkey_swab_key(f, k); + } break; case 1: if (version < bcachefs_metadata_version_bkey_renumber) diff --git a/fs/bcachefs/bkey_methods.h b/fs/bcachefs/bkey_methods.h index 726ef7483763..baef0722f5fb 100644 --- a/fs/bcachefs/bkey_methods.h +++ b/fs/bcachefs/bkey_methods.h @@ -129,7 +129,8 @@ static inline void bch2_bkey_compat(unsigned level, enum btree_id btree_id, struct bkey_packed *k) { if (version < bcachefs_metadata_version_current || - big_endian != CPU_BIG_ENDIAN) + big_endian != CPU_BIG_ENDIAN || + IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) __bch2_bkey_compat(level, btree_id, version, big_endian, write, f, k); From 3727ca56049d893859b68f70e50092250de79f28 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 17 Jun 2024 09:09:52 -0400 Subject: [PATCH 481/778] bcachefs: Fix a locking bug in the do_discard_fast() path We can't discard a bucket while it's still open; this needs the bucket_is_open_safe() version, which takes the open_buckets lock. Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index c4b6601f5b74..d2241f2b40fe 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -882,7 +882,7 @@ int bch2_trigger_alloc(struct btree_trans *trans, closure_wake_up(&c->freelist_wait); if (statechange(a->data_type == BCH_DATA_need_discard) && - !bch2_bucket_is_open(c, new.k->p.inode, new.k->p.offset) && + !bch2_bucket_is_open_safe(c, new.k->p.inode, new.k->p.offset) && bucket_flushed(new_a)) bch2_discard_one_bucket_fast(c, new.k->p); From d406545613b5c2716d5658038c46861863510b90 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 17 Jun 2024 09:20:41 -0400 Subject: [PATCH 482/778] bcachefs: Fix shift overflow in read_one_super() Reported-by: syzbot+9f74cb4006b83e2a3df1@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/super-io.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index 055478d21e9e..b156fc85b8a3 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -649,9 +649,10 @@ reread: bytes = vstruct_bytes(sb->sb); - if (bytes > 512ULL << min(BCH_SB_LAYOUT_SIZE_BITS_MAX, sb->sb->layout.sb_max_size_bits)) { - prt_printf(err, "Invalid superblock: too big (got %zu bytes, layout max %lu)", - bytes, 512UL << sb->sb->layout.sb_max_size_bits); + u64 sb_size = 512ULL << min(BCH_SB_LAYOUT_SIZE_BITS_MAX, sb->sb->layout.sb_max_size_bits); + if (bytes > sb_size) { + prt_printf(err, "Invalid superblock: too big (got %zu bytes, layout max %llu)", + bytes, sb_size); return -BCH_ERR_invalid_sb_too_big; } From e3fd3faa453ce4cf4b6a0f3e29ee77d5d1b243a8 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 17 Jun 2024 09:28:01 -0400 Subject: [PATCH 483/778] bcachefs: Fix btree ID bitmasks these should be 64 bit bitmasks, not 32 bit. Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs_format.h | 5 +++-- fs/bcachefs/btree_types.h | 16 ++++++++-------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 90c12fe2a2cd..5d3c5b5e34af 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -1382,9 +1382,10 @@ enum btree_id { /* * Maximum number of btrees that we will _ever_ have under the current scheme, - * where we refer to them with bitfields + * where we refer to them with 64 bit bitfields - and we also need a bit for + * the interior btree node type: */ -#define BTREE_ID_NR_MAX 64 +#define BTREE_ID_NR_MAX 63 static inline bool btree_id_is_alloc(enum btree_id id) { diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index d63db4fefe73..87f485e9c552 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -761,13 +761,13 @@ static inline bool btree_node_type_needs_gc(enum btree_node_type type) static inline bool btree_node_type_is_extents(enum btree_node_type type) { - const unsigned mask = 0 + const u64 mask = 0 #define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_EXTENTS)) << (nr + 1)) BCH_BTREE_IDS() #undef x ; - return (1U << type) & mask; + return BIT_ULL(type) & mask; } static inline bool btree_id_is_extents(enum btree_id btree) @@ -777,35 +777,35 @@ static inline bool btree_id_is_extents(enum btree_id btree) static inline bool btree_type_has_snapshots(enum btree_id id) { - const unsigned mask = 0 + const u64 mask = 0 #define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_SNAPSHOTS)) << nr) BCH_BTREE_IDS() #undef x ; - return (1U << id) & mask; + return BIT_ULL(id) & mask; } static inline bool btree_type_has_snapshot_field(enum btree_id id) { - const unsigned mask = 0 + const u64 mask = 0 #define x(name, nr, flags, ...) |((!!((flags) & (BTREE_ID_SNAPSHOT_FIELD|BTREE_ID_SNAPSHOTS))) << nr) BCH_BTREE_IDS() #undef x ; - return (1U << id) & mask; + return BIT_ULL(id) & mask; } static inline bool btree_type_has_ptrs(enum btree_id id) { - const unsigned mask = 0 + const u64 mask = 0 #define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_DATA)) << nr) BCH_BTREE_IDS() #undef x ; - return (1U << id) & mask; + return BIT_ULL(id) & mask; } struct btree_root { From 9e7cfb35e2668e542c333ed3ec4b0a951dd332ee Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 17 Jun 2024 09:26:54 -0400 Subject: [PATCH 484/778] bcachefs: Check for invalid btree IDs We can only handle btree IDs up to 62, since the btree id (plus the type for interior btree nodes) has to fit ito a 64 bit bitmask - check for invalid ones to avoid invalid shifts later. Signed-off-by: Kent Overstreet --- fs/bcachefs/recovery.c | 8 +++++++- fs/bcachefs/sb-errors_format.h | 6 +++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index cf513fc79ce4..e632da69196c 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -326,6 +326,12 @@ static int journal_replay_entry_early(struct bch_fs *c, case BCH_JSET_ENTRY_btree_root: { struct btree_root *r; + if (fsck_err_on(entry->btree_id >= BTREE_ID_NR_MAX, + c, invalid_btree_id, + "invalid btree id %u (max %u)", + entry->btree_id, BTREE_ID_NR_MAX)) + return 0; + while (entry->btree_id >= c->btree_roots_extra.nr + BTREE_ID_NR) { ret = darray_push(&c->btree_roots_extra, (struct btree_root) { NULL }); if (ret) @@ -415,7 +421,7 @@ static int journal_replay_entry_early(struct bch_fs *c, atomic64_set(&c->io_clock[clock->rw].now, le64_to_cpu(clock->time)); } } - +fsck_err: return ret; } diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index 84d2763bd597..1d1251f1bb20 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -273,7 +273,11 @@ x(sb_clean_entry_overrun, 267) \ x(btree_ptr_v2_written_0, 268) \ x(subvol_snapshot_bad, 269) \ - x(subvol_inode_bad, 270) + x(subvol_inode_bad, 270) \ + x(alloc_key_stripe_sectors_wrong, 271) \ + x(accounting_mismatch, 272) \ + x(accounting_replicas_not_marked, 273) \ + x(invalid_btree_id, 274) enum bch_sb_error_id { #define x(t, n) BCH_FSCK_ERR_##t = n, From dbf4d79b7fc7e9bf5d1546f6dfffd789ea061221 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 17 Jun 2024 09:36:34 -0400 Subject: [PATCH 485/778] bcachefs: Fix early init error path in journal code We shouln't be running the journal shutdown sequence if we never fully initialized the journal. Reported-by: syzbot+ffd2270f0bca3322ee00@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/journal.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index adec8e1ea73e..dac2f498ae8b 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -1167,6 +1167,9 @@ void bch2_dev_journal_stop(struct journal *j, struct bch_dev *ca) void bch2_fs_journal_stop(struct journal *j) { + if (!test_bit(JOURNAL_running, &j->flags)) + return; + bch2_journal_reclaim_stop(j); bch2_journal_flush_all_pins(j); From 1ba44217f8258f92c56644ca4fad4462f1941e33 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 17 Jun 2024 09:51:01 -0400 Subject: [PATCH 486/778] bcachefs: delete_dead_snapshots() doesn't need to go RW We've been moving away from going RW lazily; if we want to go RW we do that in set_may_go_rw(), and if we didn't go RW we don't need to delete dead snapshots. Reported-by: syzbot+4366624c0b5aac4906cf@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/snapshot.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index 51918acfd726..961b5f56358c 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -1565,13 +1565,6 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) if (!test_and_clear_bit(BCH_FS_need_delete_dead_snapshots, &c->flags)) return 0; - if (!test_bit(BCH_FS_started, &c->flags)) { - ret = bch2_fs_read_write_early(c); - bch_err_msg(c, ret, "deleting dead snapshots: error going rw"); - if (ret) - return ret; - } - trans = bch2_trans_get(c); /* From cff07e2739d81cf33eb2a378a6136eced852b8cb Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 17 Jun 2024 10:06:03 -0400 Subject: [PATCH 487/778] bcachefs: Guard against overflowing LRU_TIME_BITS LRUs only have 48 bits for the time field (i.e. LRU order); thus we need overflow checks and guards. Reported-by: syzbot+df3bf3f088dcaa728857@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 22 +++++++++++++++------- fs/bcachefs/alloc_background.h | 8 +++++++- fs/bcachefs/bcachefs.h | 5 +++++ fs/bcachefs/bcachefs_format.h | 3 +++ fs/bcachefs/lru.h | 3 --- fs/bcachefs/sb-errors_format.h | 3 ++- 6 files changed, 32 insertions(+), 12 deletions(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index d2241f2b40fe..e258de704578 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -259,6 +259,14 @@ int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k, "invalid data type (got %u should be %u)", a.v->data_type, alloc_data_type(*a.v, a.v->data_type)); + for (unsigned i = 0; i < 2; i++) + bkey_fsck_err_on(a.v->io_time[i] > LRU_TIME_MAX, + c, err, + alloc_key_io_time_bad, + "invalid io_time[%s]: %llu, max %llu", + i == READ ? "read" : "write", + a.v->io_time[i], LRU_TIME_MAX); + switch (a.v->data_type) { case BCH_DATA_free: case BCH_DATA_need_gc_gens: @@ -757,8 +765,8 @@ int bch2_trigger_alloc(struct btree_trans *trans, alloc_data_type_set(new_a, new_a->data_type); if (bch2_bucket_sectors_total(*new_a) > bch2_bucket_sectors_total(*old_a)) { - new_a->io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now)); - new_a->io_time[WRITE]= max_t(u64, 1, atomic64_read(&c->io_clock[WRITE].now)); + new_a->io_time[READ] = bch2_current_io_time(c, READ); + new_a->io_time[WRITE]= bch2_current_io_time(c, WRITE); SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, true); SET_BCH_ALLOC_V4_NEED_DISCARD(new_a, true); } @@ -781,7 +789,7 @@ int bch2_trigger_alloc(struct btree_trans *trans, if (new_a->data_type == BCH_DATA_cached && !new_a->io_time[READ]) - new_a->io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now)); + new_a->io_time[READ] = bch2_current_io_time(c, READ); u64 old_lru = alloc_lru_idx_read(*old_a); u64 new_lru = alloc_lru_idx_read(*new_a); @@ -1579,7 +1587,7 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans, if (ret) goto err; - a_mut->v.io_time[READ] = atomic64_read(&c->io_clock[READ].now); + a_mut->v.io_time[READ] = bch2_current_io_time(c, READ); ret = bch2_trans_update(trans, alloc_iter, &a_mut->k_i, BTREE_TRIGGER_norun); if (ret) @@ -1975,8 +1983,8 @@ static int invalidate_one_bucket(struct btree_trans *trans, a->v.data_type = 0; a->v.dirty_sectors = 0; a->v.cached_sectors = 0; - a->v.io_time[READ] = atomic64_read(&c->io_clock[READ].now); - a->v.io_time[WRITE] = atomic64_read(&c->io_clock[WRITE].now); + a->v.io_time[READ] = bch2_current_io_time(c, READ); + a->v.io_time[WRITE] = bch2_current_io_time(c, WRITE); ret = bch2_trans_commit(trans, NULL, NULL, BCH_WATERMARK_btree| @@ -2204,7 +2212,7 @@ int bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev, if (ret) return ret; - now = atomic64_read(&c->io_clock[rw].now); + now = bch2_current_io_time(c, rw); if (a->v.io_time[rw] == now) goto out; diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h index ae31a94be6f9..c3cc3c5ba5b6 100644 --- a/fs/bcachefs/alloc_background.h +++ b/fs/bcachefs/alloc_background.h @@ -141,7 +141,13 @@ static inline u64 alloc_lru_idx_fragmentation(struct bch_alloc_v4 a, !bch2_bucket_sectors_fragmented(ca, a)) return 0; - u64 d = bch2_bucket_sectors_dirty(a); + /* + * avoid overflowing LRU_TIME_BITS on a corrupted fs, when + * bucket_sectors_dirty is (much) bigger than bucket_size + */ + u64 d = min(bch2_bucket_sectors_dirty(a), + ca->mi.bucket_size); + return div_u64(d * (1ULL << 31), ca->mi.bucket_size); } diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 2992a644d822..a6b83ecab7ce 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -1214,6 +1214,11 @@ static inline s64 bch2_current_time(const struct bch_fs *c) return timespec_to_bch2_time(c, now); } +static inline u64 bch2_current_io_time(const struct bch_fs *c, int rw) +{ + return max(1ULL, (u64) atomic64_read(&c->io_clock[rw].now) & LRU_TIME_MAX); +} + static inline struct stdio_redirect *bch2_fs_stdio_redirect(struct bch_fs *c) { struct stdio_redirect *stdio = c->stdio; diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 5d3c5b5e34af..4b98fed1ee9a 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -476,6 +476,9 @@ struct bch_lru { #define LRU_ID_STRIPES (1U << 16) +#define LRU_TIME_BITS 48 +#define LRU_TIME_MAX ((1ULL << LRU_TIME_BITS) - 1) + /* Optional/variable size superblock sections: */ struct bch_sb_field { diff --git a/fs/bcachefs/lru.h b/fs/bcachefs/lru.h index fb11ab0dd00e..bd71ba77de07 100644 --- a/fs/bcachefs/lru.h +++ b/fs/bcachefs/lru.h @@ -2,9 +2,6 @@ #ifndef _BCACHEFS_LRU_H #define _BCACHEFS_LRU_H -#define LRU_TIME_BITS 48 -#define LRU_TIME_MAX ((1ULL << LRU_TIME_BITS) - 1) - static inline u64 lru_pos_id(struct bpos pos) { return pos.inode >> LRU_TIME_BITS; diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index 1d1251f1bb20..1768e5c49f99 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -277,7 +277,8 @@ x(alloc_key_stripe_sectors_wrong, 271) \ x(accounting_mismatch, 272) \ x(accounting_replicas_not_marked, 273) \ - x(invalid_btree_id, 274) + x(invalid_btree_id, 274) \ + x(alloc_key_io_time_bad, 275) enum bch_sb_error_id { #define x(t, n) BCH_FSCK_ERR_##t = n, From 2e9940d4a19507deb29b3e05571fcaaed88155e2 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 17 Jun 2024 13:15:16 -0400 Subject: [PATCH 488/778] bcachefs: Handle cached data LRU wraparound We only have 48 bits for the LRU time field, which is insufficient to prevent wraparound. Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 46 ++++++++++++++++++++++++++++++---- 1 file changed, 41 insertions(+), 5 deletions(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index e258de704578..7b5909764d14 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -2019,6 +2019,21 @@ err: goto out; } +static struct bkey_s_c next_lru_key(struct btree_trans *trans, struct btree_iter *iter, + struct bch_dev *ca, bool *wrapped) +{ + struct bkey_s_c k; +again: + k = bch2_btree_iter_peek_upto(iter, lru_pos(ca->dev_idx, U64_MAX, LRU_TIME_MAX)); + if (!k.k && !*wrapped) { + bch2_btree_iter_set_pos(iter, lru_pos(ca->dev_idx, 0, 0)); + *wrapped = true; + goto again; + } + + return k; +} + static void bch2_do_invalidates_work(struct work_struct *work) { struct bch_fs *c = container_of(work, struct bch_fs, invalidate_work); @@ -2032,12 +2047,33 @@ static void bch2_do_invalidates_work(struct work_struct *work) for_each_member_device(c, ca) { s64 nr_to_invalidate = should_invalidate_buckets(ca, bch2_dev_usage_read(ca)); + struct btree_iter iter; + bool wrapped = false; - ret = for_each_btree_key_upto(trans, iter, BTREE_ID_lru, - lru_pos(ca->dev_idx, 0, 0), - lru_pos(ca->dev_idx, U64_MAX, LRU_TIME_MAX), - BTREE_ITER_intent, k, - invalidate_one_bucket(trans, &iter, k, &nr_to_invalidate)); + bch2_trans_iter_init(trans, &iter, BTREE_ID_lru, + lru_pos(ca->dev_idx, 0, + ((bch2_current_io_time(c, READ) + U32_MAX) & + LRU_TIME_MAX)), 0); + + while (true) { + bch2_trans_begin(trans); + + struct bkey_s_c k = next_lru_key(trans, &iter, ca, &wrapped); + ret = bkey_err(k); + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) + continue; + if (ret) + break; + if (!k.k) + break; + + ret = invalidate_one_bucket(trans, &iter, k, &nr_to_invalidate); + if (ret) + break; + + bch2_btree_iter_advance(&iter); + } + bch2_trans_iter_exit(trans, &iter); if (ret < 0) { bch2_dev_put(ca); From ddd118ab45e848b1956ef8c8ef84963a554b5b58 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 17 Jun 2024 11:31:00 -0400 Subject: [PATCH 489/778] bcachefs: Fix bch2_sb_downgrade_update() Missing enum conversion Signed-off-by: Kent Overstreet --- fs/bcachefs/sb-downgrade.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c index 3fb23e399ffb..4710b61631f0 100644 --- a/fs/bcachefs/sb-downgrade.c +++ b/fs/bcachefs/sb-downgrade.c @@ -228,7 +228,7 @@ int bch2_sb_downgrade_update(struct bch_fs *c) dst = (void *) &darray_top(table); dst->version = cpu_to_le16(src->version); - dst->recovery_passes[0] = cpu_to_le64(src->recovery_passes); + dst->recovery_passes[0] = cpu_to_le64(bch2_recovery_passes_to_stable(src->recovery_passes)); dst->recovery_passes[1] = 0; dst->nr_errors = cpu_to_le16(src->nr_errors); for (unsigned i = 0; i < src->nr_errors; i++) From 0a2a507d404eebbc168e8b1264edf0ac8c6047b4 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 19 Jun 2024 08:43:15 -0400 Subject: [PATCH 490/778] bcachefs: set_worker_desc() for delete_dead_snapshots this is long running - help users see what's going on Signed-off-by: Kent Overstreet --- fs/bcachefs/snapshot.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index 961b5f56358c..4ef98e696673 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -1680,6 +1680,8 @@ void bch2_delete_dead_snapshots_work(struct work_struct *work) { struct bch_fs *c = container_of(work, struct bch_fs, snapshot_delete_work); + set_worker_desc("bcachefs-delete-dead-snapshots/%s", c->name); + bch2_delete_dead_snapshots(c); bch2_write_ref_put(c, BCH_WRITE_REF_delete_dead_snapshots); } From a56da69799bd5f0c72bdc0fb64c3e3d8c1b1bb36 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 19 Jun 2024 15:47:44 -0400 Subject: [PATCH 491/778] bcachefs: Fix bch2_trans_put() reference: https://github.com/koverstreet/bcachefs/issues/692 trans->ref is the reference used by the cycle detector, which walks btree_trans objects of other threads to walk the graph of held locks and issue wakeups when an abort is required. We have to wait for the ref to go to 1 before freeing trans->paths or clearing trans->locking_wait.task. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 3694c600a3ad..3a1419d17888 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -3161,6 +3161,7 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx) list_add_done: seqmutex_unlock(&c->btree_trans_lock); got_trans: + trans->ref.closure_get_happened = false; trans->c = c; trans->last_begin_time = local_clock(); trans->fn_idx = fn_idx; @@ -3235,7 +3236,6 @@ void bch2_trans_put(struct btree_trans *trans) trans_for_each_update(trans, i) __btree_path_put(trans->paths + i->path, true); trans->nr_updates = 0; - trans->locking_wait.task = NULL; check_btree_paths_leaked(trans); @@ -3256,6 +3256,13 @@ void bch2_trans_put(struct btree_trans *trans) if (unlikely(trans->journal_replay_not_finished)) bch2_journal_keys_put(c); + /* + * trans->ref protects trans->locking_wait.task, btree_paths arary; used + * by cycle detector + */ + closure_sync(&trans->ref); + trans->locking_wait.task = NULL; + unsigned long *paths_allocated = trans->paths_allocated; trans->paths_allocated = NULL; trans->paths = NULL; @@ -3273,8 +3280,6 @@ void bch2_trans_put(struct btree_trans *trans) trans = this_cpu_xchg(c->btree_trans_bufs->trans, trans); if (trans) { - closure_sync(&trans->ref); - seqmutex_lock(&c->btree_trans_lock); list_del(&trans->list); seqmutex_unlock(&c->btree_trans_lock); From 02a176d42a88805b3520148a4eee28b0760cd8c0 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 18 Jun 2024 12:39:14 -0700 Subject: [PATCH 492/778] ipv6: bring NLM_DONE out to a separate recv() again MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit under Fixes optimized the number of recv() calls needed during RTM_GETROUTE dumps, but we got multiple reports of applications hanging on recv() calls. Applications expect that a route dump will be terminated with a recv() reading an individual NLM_DONE message. Coalescing NLM_DONE is perfectly legal in netlink, but even tho reporters fixed the code in respective projects, chances are it will take time for those applications to get updated. So revert to old behavior (for now)? This is an IPv6 version of commit 460b0d33cf10 ("inet: bring NLM_DONE out to a separate recv() again"). Reported-by: Maciej Żenczykowski Link: https://lore.kernel.org/all/CANP3RGc1RG71oPEBXNx_WZFP9AyphJefdO4paczN92n__ds4ow@mail.gmail.com Reported-by: Stefano Brivio Link: https://lore.kernel.org/all/20240315124808.033ff58d@elisabeth Reported-by: Ilya Maximets Link: https://lore.kernel.org/all/02b50aae-f0e9-47a4-8365-a977a85975d3@ovn.org Fixes: 5fc68320c1fb ("ipv6: remove RTNL protection from inet6_dump_fib()") Tested-by: Ilya Maximets Link: https://lore.kernel.org/r/20240618193914.561782-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/ipv6/ip6_fib.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 6e57c03e3255..83e4f9855ae1 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -2514,7 +2514,8 @@ int __init fib6_init(void) goto out_kmem_cache_create; ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE, NULL, - inet6_dump_fib, RTNL_FLAG_DUMP_UNLOCKED); + inet6_dump_fib, RTNL_FLAG_DUMP_UNLOCKED | + RTNL_FLAG_DUMP_SPLIT_NLM_DONE); if (ret) goto out_unregister_subsys; From 74382aebc9035470ec4c789bdb0d09d8c14f261e Mon Sep 17 00:00:00 2001 From: Marcin Szycik Date: Tue, 18 Jun 2024 14:02:05 -0700 Subject: [PATCH 493/778] ice: Fix VSI list rule with ICE_SW_LKUP_LAST type Adding/updating VSI list rule, as well as allocating/freeing VSI list resource are called several times with type ICE_SW_LKUP_LAST, which fails because ice_update_vsi_list_rule() and ice_aq_alloc_free_vsi_list() consider it invalid. Allow calling these functions with ICE_SW_LKUP_LAST. This fixes at least one issue in switchdev mode, where the same rule with different action cannot be added, e.g.: tc filter add dev $PF1 ingress protocol arp prio 0 flower skip_sw \ dst_mac ff:ff:ff:ff:ff:ff action mirred egress redirect dev $VF1_PR tc filter add dev $PF1 ingress protocol arp prio 0 flower skip_sw \ dst_mac ff:ff:ff:ff:ff:ff action mirred egress redirect dev $VF2_PR Fixes: 0f94570d0cae ("ice: allow adding advanced rules") Suggested-by: Michal Swiatkowski Reviewed-by: Michal Swiatkowski Reviewed-by: Przemek Kitszel Signed-off-by: Marcin Szycik Reviewed-by: Jacob Keller Reviewed-by: Simon Horman Tested-by: Sujai Buvaneswaran Signed-off-by: Tony Nguyen Link: https://lore.kernel.org/r/20240618210206.981885-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_switch.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index 94d6670d0901..1191031b2a43 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -1899,7 +1899,8 @@ ice_aq_alloc_free_vsi_list(struct ice_hw *hw, u16 *vsi_list_id, lkup_type == ICE_SW_LKUP_ETHERTYPE_MAC || lkup_type == ICE_SW_LKUP_PROMISC || lkup_type == ICE_SW_LKUP_PROMISC_VLAN || - lkup_type == ICE_SW_LKUP_DFLT) { + lkup_type == ICE_SW_LKUP_DFLT || + lkup_type == ICE_SW_LKUP_LAST) { sw_buf->res_type = cpu_to_le16(ICE_AQC_RES_TYPE_VSI_LIST_REP); } else if (lkup_type == ICE_SW_LKUP_VLAN) { if (opc == ice_aqc_opc_alloc_res) @@ -2922,7 +2923,8 @@ ice_update_vsi_list_rule(struct ice_hw *hw, u16 *vsi_handle_arr, u16 num_vsi, lkup_type == ICE_SW_LKUP_ETHERTYPE_MAC || lkup_type == ICE_SW_LKUP_PROMISC || lkup_type == ICE_SW_LKUP_PROMISC_VLAN || - lkup_type == ICE_SW_LKUP_DFLT) + lkup_type == ICE_SW_LKUP_DFLT || + lkup_type == ICE_SW_LKUP_LAST) rule_type = remove ? ICE_AQC_SW_RULES_T_VSI_LIST_CLEAR : ICE_AQC_SW_RULES_T_VSI_LIST_SET; else if (lkup_type == ICE_SW_LKUP_VLAN) From f9ae848904289ddb16c7c9e4553ed4c64300de49 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov <0x7f454c46@gmail.com> Date: Wed, 19 Jun 2024 01:29:04 +0100 Subject: [PATCH 494/778] net/tcp_ao: Don't leak ao_info on error-path It seems I introduced it together with TCP_AO_CMDF_AO_REQUIRED, on version 5 [1] of TCP-AO patches. Quite frustrative that having all these selftests that I've written, running kmemtest & kcov was always in todo. [1]: https://lore.kernel.org/netdev/20230215183335.800122-5-dima@arista.com/ Reported-by: Jakub Kicinski Closes: https://lore.kernel.org/netdev/20240617072451.1403e1d2@kernel.org/ Fixes: 0aadc73995d0 ("net/tcp: Prevent TCP-MD5 with TCP-AO being set") Cc: stable@vger.kernel.org Signed-off-by: Dmitry Safonov <0x7f454c46@gmail.com> Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240619-tcp-ao-required-leak-v1-1-6408f3c94247@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_ao.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c index 37c42b63ff99..09c0fa6756b7 100644 --- a/net/ipv4/tcp_ao.c +++ b/net/ipv4/tcp_ao.c @@ -1968,8 +1968,10 @@ static int tcp_ao_info_cmd(struct sock *sk, unsigned short int family, first = true; } - if (cmd.ao_required && tcp_ao_required_verify(sk)) - return -EKEYREJECTED; + if (cmd.ao_required && tcp_ao_required_verify(sk)) { + err = -EKEYREJECTED; + goto out; + } /* For sockets in TCP_CLOSED it's possible set keys that aren't * matching the future peer (address/port/VRF/etc), From d21d44dbdde83c4a8553c95de1853e63e88d7954 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Mon, 17 Jun 2024 17:47:36 +0200 Subject: [PATCH 495/778] drm/xe/vf: Don't touch GuC irq registers if using memory irqs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On platforms where VFs are using memory based interrupts, we missed invalid access to no longer existing interrupt registers, as we keep them marked with XE_REG_OPTION_VF. To fix that just either setup memirq vectors in GuC or enable legacy interrupts. Fixes: aef4eb7c7dec ("drm/xe/vf: Setup memory based interrupts in GuC") Signed-off-by: Michal Wajdeczko Cc: Matt Roper Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240617154736.685-1-michal.wajdeczko@intel.com (cherry picked from commit f0ccd2d805e55e12b430d5d6b9acd9f891af455e) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_guc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 240e7a4bbff1..5faca4fc2fef 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -631,8 +631,6 @@ int xe_guc_enable_communication(struct xe_guc *guc) struct xe_device *xe = guc_to_xe(guc); int err; - guc_enable_irq(guc); - if (IS_SRIOV_VF(xe) && xe_device_has_memirq(xe)) { struct xe_gt *gt = guc_to_gt(guc); struct xe_tile *tile = gt_to_tile(gt); @@ -640,6 +638,8 @@ int xe_guc_enable_communication(struct xe_guc *guc) err = xe_memirq_init_guc(&tile->sriov.vf.memirq, guc); if (err) return err; + } else { + guc_enable_irq(guc); } xe_mmio_rmw32(guc_to_gt(guc), PMINTRMSK, From 9b1effff19cdf2230d3ecb07ff4038a0da32e9cc Mon Sep 17 00:00:00 2001 From: Simon Trimmer Date: Wed, 19 Jun 2024 17:16:02 +0100 Subject: [PATCH 496/778] ALSA: hda: cs35l56: Select SERIAL_MULTI_INSTANTIATE The ACPI IDs used in the CS35L56 HDA drivers are all handled by the serial multi-instantiate driver which starts multiple Linux device instances from a single ACPI Device() node. As serial multi-instantiate is not an optional part of the system add it as a dependency in Kconfig so that it is not overlooked. Signed-off-by: Simon Trimmer Link: https://lore.kernel.org/20240619161602.117452-1-simont@opensource.cirrus.com Signed-off-by: Takashi Iwai --- sound/pci/hda/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/Kconfig b/sound/pci/hda/Kconfig index 0da625533afc..e59df40a0007 100644 --- a/sound/pci/hda/Kconfig +++ b/sound/pci/hda/Kconfig @@ -162,6 +162,7 @@ config SND_HDA_SCODEC_CS35L56_I2C depends on ACPI || COMPILE_TEST depends on SND_SOC select FW_CS_DSP + select SERIAL_MULTI_INSTANTIATE select SND_HDA_GENERIC select SND_SOC_CS35L56_SHARED select SND_HDA_SCODEC_CS35L56 @@ -178,6 +179,7 @@ config SND_HDA_SCODEC_CS35L56_SPI depends on ACPI || COMPILE_TEST depends on SND_SOC select FW_CS_DSP + select SERIAL_MULTI_INSTANTIATE select SND_HDA_GENERIC select SND_SOC_CS35L56_SHARED select SND_HDA_SCODEC_CS35L56 From 6cd4a78d962bebbaf8beb7d2ead3f34120e3f7b2 Mon Sep 17 00:00:00 2001 From: Ignat Korchagin Date: Mon, 17 Jun 2024 22:02:05 +0100 Subject: [PATCH 497/778] net: do not leave a dangling sk pointer, when socket creation fails It is possible to trigger a use-after-free by: * attaching an fentry probe to __sock_release() and the probe calling the bpf_get_socket_cookie() helper * running traceroute -I 1.1.1.1 on a freshly booted VM A KASAN enabled kernel will log something like below (decoded and stripped): ================================================================== BUG: KASAN: slab-use-after-free in __sock_gen_cookie (./arch/x86/include/asm/atomic64_64.h:15 ./include/linux/atomic/atomic-arch-fallback.h:2583 ./include/linux/atomic/atomic-instrumented.h:1611 net/core/sock_diag.c:29) Read of size 8 at addr ffff888007110dd8 by task traceroute/299 CPU: 2 PID: 299 Comm: traceroute Tainted: G E 6.10.0-rc2+ #2 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-debian-1.16.2-1 04/01/2014 Call Trace: dump_stack_lvl (lib/dump_stack.c:117 (discriminator 1)) print_report (mm/kasan/report.c:378 mm/kasan/report.c:488) ? __sock_gen_cookie (./arch/x86/include/asm/atomic64_64.h:15 ./include/linux/atomic/atomic-arch-fallback.h:2583 ./include/linux/atomic/atomic-instrumented.h:1611 net/core/sock_diag.c:29) kasan_report (mm/kasan/report.c:603) ? __sock_gen_cookie (./arch/x86/include/asm/atomic64_64.h:15 ./include/linux/atomic/atomic-arch-fallback.h:2583 ./include/linux/atomic/atomic-instrumented.h:1611 net/core/sock_diag.c:29) kasan_check_range (mm/kasan/generic.c:183 mm/kasan/generic.c:189) __sock_gen_cookie (./arch/x86/include/asm/atomic64_64.h:15 ./include/linux/atomic/atomic-arch-fallback.h:2583 ./include/linux/atomic/atomic-instrumented.h:1611 net/core/sock_diag.c:29) bpf_get_socket_ptr_cookie (./arch/x86/include/asm/preempt.h:94 ./include/linux/sock_diag.h:42 net/core/filter.c:5094 net/core/filter.c:5092) bpf_prog_875642cf11f1d139___sock_release+0x6e/0x8e bpf_trampoline_6442506592+0x47/0xaf __sock_release (net/socket.c:652) __sock_create (net/socket.c:1601) ... Allocated by task 299 on cpu 2 at 78.328492s: kasan_save_stack (mm/kasan/common.c:48) kasan_save_track (mm/kasan/common.c:68) __kasan_slab_alloc (mm/kasan/common.c:312 mm/kasan/common.c:338) kmem_cache_alloc_noprof (mm/slub.c:3941 mm/slub.c:4000 mm/slub.c:4007) sk_prot_alloc (net/core/sock.c:2075) sk_alloc (net/core/sock.c:2134) inet_create (net/ipv4/af_inet.c:327 net/ipv4/af_inet.c:252) __sock_create (net/socket.c:1572) __sys_socket (net/socket.c:1660 net/socket.c:1644 net/socket.c:1706) __x64_sys_socket (net/socket.c:1718) do_syscall_64 (arch/x86/entry/common.c:52 arch/x86/entry/common.c:83) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130) Freed by task 299 on cpu 2 at 78.328502s: kasan_save_stack (mm/kasan/common.c:48) kasan_save_track (mm/kasan/common.c:68) kasan_save_free_info (mm/kasan/generic.c:582) poison_slab_object (mm/kasan/common.c:242) __kasan_slab_free (mm/kasan/common.c:256) kmem_cache_free (mm/slub.c:4437 mm/slub.c:4511) __sk_destruct (net/core/sock.c:2117 net/core/sock.c:2208) inet_create (net/ipv4/af_inet.c:397 net/ipv4/af_inet.c:252) __sock_create (net/socket.c:1572) __sys_socket (net/socket.c:1660 net/socket.c:1644 net/socket.c:1706) __x64_sys_socket (net/socket.c:1718) do_syscall_64 (arch/x86/entry/common.c:52 arch/x86/entry/common.c:83) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130) Fix this by clearing the struct socket reference in sk_common_release() to cover all protocol families create functions, which may already attached the reference to the sk object with sock_init_data(). Fixes: c5dbb89fc2ac ("bpf: Expose bpf_get_socket_cookie to tracing programs") Suggested-by: Kuniyuki Iwashima Signed-off-by: Ignat Korchagin Cc: stable@vger.kernel.org Link: https://lore.kernel.org/netdev/20240613194047.36478-1-kuniyu@amazon.com/T/ Reviewed-by: Kuniyuki Iwashima Reviewed-by: D. Wythe Link: https://lore.kernel.org/r/20240617210205.67311-1-ignat@cloudflare.com Signed-off-by: Paolo Abeni --- net/core/sock.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/core/sock.c b/net/core/sock.c index 8629f9aecf91..100e975073ca 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3742,6 +3742,9 @@ void sk_common_release(struct sock *sk) sk->sk_prot->unhash(sk); + if (sk->sk_socket) + sk->sk_socket->sk = NULL; + /* * In this point socket cannot receive new packets, but it is possible * that some packets are in flight because some CPU runs receiver and From ebc4fc34eae8ddfbef49f2bdaced1bf4167ef80d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Mon, 27 May 2024 16:24:41 +0300 Subject: [PATCH 498/778] mmc: sdhci-pci: Convert PCIBIOS_* return codes to errnos MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit jmicron_pmos() and sdhci_pci_probe() use pci_{read,write}_config_byte() that return PCIBIOS_* codes. The return code is then returned as is by jmicron_probe() and sdhci_pci_probe(). Similarly, the return code is also returned as is from jmicron_resume(). Both probe and resume functions should return normal errnos. Convert PCIBIOS_* returns code using pcibios_err_to_errno() into normal errno before returning them the fix these issues. Fixes: 7582041ff3d4 ("mmc: sdhci-pci: fix simple_return.cocci warnings") Fixes: 45211e215984 ("sdhci: toggle JMicron PMOS setting") Cc: stable@vger.kernel.org Signed-off-by: Ilpo Järvinen Acked-by: Adrian Hunter Link: https://lore.kernel.org/r/20240527132443.14038-1-ilpo.jarvinen@linux.intel.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-pci-core.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c index ef89ec382bfe..23e6ba70144c 100644 --- a/drivers/mmc/host/sdhci-pci-core.c +++ b/drivers/mmc/host/sdhci-pci-core.c @@ -1326,7 +1326,7 @@ static int jmicron_pmos(struct sdhci_pci_chip *chip, int on) ret = pci_read_config_byte(chip->pdev, 0xAE, &scratch); if (ret) - return ret; + goto fail; /* * Turn PMOS on [bit 0], set over current detection to 2.4 V @@ -1337,7 +1337,10 @@ static int jmicron_pmos(struct sdhci_pci_chip *chip, int on) else scratch &= ~0x47; - return pci_write_config_byte(chip->pdev, 0xAE, scratch); + ret = pci_write_config_byte(chip->pdev, 0xAE, scratch); + +fail: + return pcibios_err_to_errno(ret); } static int jmicron_probe(struct sdhci_pci_chip *chip) @@ -2202,7 +2205,7 @@ static int sdhci_pci_probe(struct pci_dev *pdev, ret = pci_read_config_byte(pdev, PCI_SLOT_INFO, &slots); if (ret) - return ret; + return pcibios_err_to_errno(ret); slots = PCI_SLOT_INFO_SLOTS(slots) + 1; dev_dbg(&pdev->dev, "found %d slot(s)\n", slots); @@ -2211,7 +2214,7 @@ static int sdhci_pci_probe(struct pci_dev *pdev, ret = pci_read_config_byte(pdev, PCI_SLOT_INFO, &first_bar); if (ret) - return ret; + return pcibios_err_to_errno(ret); first_bar &= PCI_SLOT_INFO_FIRST_BAR_MASK; From a91bf3b3beadbb4f8b3bbc7969fb2ae1615e25c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Mon, 27 May 2024 16:24:42 +0300 Subject: [PATCH 499/778] mmc: sdhci-pci-o2micro: Convert PCIBIOS_* return codes to errnos MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit sdhci_pci_o2_probe() uses pci_read_config_{byte,dword}() that return PCIBIOS_* codes. The return code is then returned as is but as sdhci_pci_o2_probe() is probe function chain, it should return normal errnos. Convert PCIBIOS_* returns code using pcibios_err_to_errno() into normal errno before returning them. Add a label for read failure so that the conversion can be done in one place rather than on all of the return statements. Fixes: 3d757ddbd68c ("mmc: sdhci-pci-o2micro: add Bayhub new chip GG8 support for UHS-I") Fixes: d599005afde8 ("mmc: sdhci-pci-o2micro: Add missing checks in sdhci_pci_o2_probe") Fixes: 706adf6bc31c ("mmc: sdhci-pci-o2micro: Add SeaBird SeaEagle SD3 support") Fixes: 01acf6917aed ("mmc: sdhci-pci: add support of O2Micro/BayHubTech SD hosts") Fixes: 26daa1ed40c6 ("mmc: sdhci: Disable ADMA on some O2Micro SD/MMC parts.") Cc: stable@vger.kernel.org Signed-off-by: Ilpo Järvinen Acked-by: Adrian Hunter Link: https://lore.kernel.org/r/20240527132443.14038-2-ilpo.jarvinen@linux.intel.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-pci-o2micro.c | 41 +++++++++++++++------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/drivers/mmc/host/sdhci-pci-o2micro.c b/drivers/mmc/host/sdhci-pci-o2micro.c index d4a02184784a..058bef1c7e41 100644 --- a/drivers/mmc/host/sdhci-pci-o2micro.c +++ b/drivers/mmc/host/sdhci-pci-o2micro.c @@ -823,7 +823,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip) ret = pci_read_config_byte(chip->pdev, O2_SD_LOCK_WP, &scratch); if (ret) - return ret; + goto read_fail; scratch &= 0x7f; pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch); @@ -834,7 +834,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip) ret = pci_read_config_byte(chip->pdev, O2_SD_CLKREQ, &scratch); if (ret) - return ret; + goto read_fail; scratch |= 0x20; pci_write_config_byte(chip->pdev, O2_SD_CLKREQ, scratch); @@ -843,7 +843,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip) */ ret = pci_read_config_byte(chip->pdev, O2_SD_CAPS, &scratch); if (ret) - return ret; + goto read_fail; scratch |= 0x01; pci_write_config_byte(chip->pdev, O2_SD_CAPS, scratch); pci_write_config_byte(chip->pdev, O2_SD_CAPS, 0x73); @@ -856,7 +856,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip) ret = pci_read_config_byte(chip->pdev, O2_SD_INF_MOD, &scratch); if (ret) - return ret; + goto read_fail; scratch |= 0x08; pci_write_config_byte(chip->pdev, O2_SD_INF_MOD, scratch); @@ -864,7 +864,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip) ret = pci_read_config_byte(chip->pdev, O2_SD_LOCK_WP, &scratch); if (ret) - return ret; + goto read_fail; scratch |= 0x80; pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch); break; @@ -875,7 +875,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip) ret = pci_read_config_byte(chip->pdev, O2_SD_LOCK_WP, &scratch); if (ret) - return ret; + goto read_fail; scratch &= 0x7f; pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch); @@ -886,7 +886,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip) O2_SD_FUNC_REG0, &scratch_32); if (ret) - return ret; + goto read_fail; scratch_32 = ((scratch_32 & 0xFF000000) >> 24); /* Check Whether subId is 0x11 or 0x12 */ @@ -898,7 +898,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip) O2_SD_FUNC_REG4, &scratch_32); if (ret) - return ret; + goto read_fail; /* Enable Base Clk setting change */ scratch_32 |= O2_SD_FREG4_ENABLE_CLK_SET; @@ -921,7 +921,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip) ret = pci_read_config_dword(chip->pdev, O2_SD_CLK_SETTING, &scratch_32); if (ret) - return ret; + goto read_fail; scratch_32 &= ~(0xFF00); scratch_32 |= 0x07E0C800; @@ -931,14 +931,14 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip) ret = pci_read_config_dword(chip->pdev, O2_SD_CLKREQ, &scratch_32); if (ret) - return ret; + goto read_fail; scratch_32 |= 0x3; pci_write_config_dword(chip->pdev, O2_SD_CLKREQ, scratch_32); ret = pci_read_config_dword(chip->pdev, O2_SD_PLL_SETTING, &scratch_32); if (ret) - return ret; + goto read_fail; scratch_32 &= ~(0x1F3F070E); scratch_32 |= 0x18270106; @@ -949,7 +949,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip) ret = pci_read_config_dword(chip->pdev, O2_SD_CAP_REG2, &scratch_32); if (ret) - return ret; + goto read_fail; scratch_32 &= ~(0xE0); pci_write_config_dword(chip->pdev, O2_SD_CAP_REG2, scratch_32); @@ -961,7 +961,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip) ret = pci_read_config_byte(chip->pdev, O2_SD_LOCK_WP, &scratch); if (ret) - return ret; + goto read_fail; scratch |= 0x80; pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch); break; @@ -971,7 +971,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip) ret = pci_read_config_byte(chip->pdev, O2_SD_LOCK_WP, &scratch); if (ret) - return ret; + goto read_fail; scratch &= 0x7f; pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch); @@ -979,7 +979,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip) ret = pci_read_config_dword(chip->pdev, O2_SD_PLL_SETTING, &scratch_32); if (ret) - return ret; + goto read_fail; if ((scratch_32 & 0xff000000) == 0x01000000) { scratch_32 &= 0x0000FFFF; @@ -998,7 +998,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip) O2_SD_FUNC_REG4, &scratch_32); if (ret) - return ret; + goto read_fail; scratch_32 |= (1 << 22); pci_write_config_dword(chip->pdev, O2_SD_FUNC_REG4, scratch_32); @@ -1017,7 +1017,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip) ret = pci_read_config_byte(chip->pdev, O2_SD_LOCK_WP, &scratch); if (ret) - return ret; + goto read_fail; scratch |= 0x80; pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch); break; @@ -1028,7 +1028,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip) /* UnLock WP */ ret = pci_read_config_byte(chip->pdev, O2_SD_LOCK_WP, &scratch); if (ret) - return ret; + goto read_fail; scratch &= 0x7f; pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch); @@ -1057,13 +1057,16 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip) /* Lock WP */ ret = pci_read_config_byte(chip->pdev, O2_SD_LOCK_WP, &scratch); if (ret) - return ret; + goto read_fail; scratch |= 0x80; pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch); break; } return 0; + +read_fail: + return pcibios_err_to_errno(ret); } #ifdef CONFIG_PM_SLEEP From d77dc388cd61dfdafe30b98025fa827498378199 Mon Sep 17 00:00:00 2001 From: Kamal Dasu Date: Mon, 3 Jun 2024 18:08:34 -0400 Subject: [PATCH 500/778] mmc: sdhci-brcmstb: check R1_STATUS for erase/trim/discard When erase/trim/discard completion was converted to mmc_poll_for_busy(), optional support to poll with the host_ops->card_busy() callback was also added. The common sdhci's ->card_busy() turns out not to be working as expected for the sdhci-brcmstb variant, as it keeps returning busy beyond the card's busy period. In particular, this leads to the below splat for mmc_do_erase() when running a discard (BLKSECDISCARD) operation during mkfs.f2fs: Info: [/dev/mmcblk1p9] Discarding device [ 39.597258] sysrq: Show Blocked State [ 39.601183] task:mkfs.f2fs state:D stack:0 pid:1561 tgid:1561 ppid:1542 flags:0x0000000d [ 39.610609] Call trace: [ 39.613098] __switch_to+0xd8/0xf4 [ 39.616582] __schedule+0x440/0x4f4 [ 39.620137] schedule+0x2c/0x48 [ 39.623341] schedule_hrtimeout_range_clock+0xe0/0x114 [ 39.628562] schedule_hrtimeout_range+0x10/0x18 [ 39.633169] usleep_range_state+0x5c/0x90 [ 39.637253] __mmc_poll_for_busy+0xec/0x128 [ 39.641514] mmc_poll_for_busy+0x48/0x70 [ 39.645511] mmc_do_erase+0x1ec/0x210 [ 39.649237] mmc_erase+0x1b4/0x1d4 [ 39.652701] mmc_blk_mq_issue_rq+0x35c/0x6ac [ 39.657037] mmc_mq_queue_rq+0x18c/0x214 [ 39.661022] blk_mq_dispatch_rq_list+0x3a8/0x528 [ 39.665722] __blk_mq_sched_dispatch_requests+0x3a0/0x4ac [ 39.671198] blk_mq_sched_dispatch_requests+0x28/0x5c [ 39.676322] blk_mq_run_hw_queue+0x11c/0x12c [ 39.680668] blk_mq_flush_plug_list+0x200/0x33c [ 39.685278] blk_add_rq_to_plug+0x68/0xd8 [ 39.689365] blk_mq_submit_bio+0x3a4/0x458 [ 39.693539] __submit_bio+0x1c/0x80 [ 39.697096] submit_bio_noacct_nocheck+0x94/0x174 [ 39.701875] submit_bio_noacct+0x1b0/0x22c [ 39.706042] submit_bio+0xac/0xe8 [ 39.709424] blk_next_bio+0x4c/0x5c [ 39.712973] blkdev_issue_secure_erase+0x118/0x170 [ 39.717835] blkdev_common_ioctl+0x374/0x728 [ 39.722175] blkdev_ioctl+0x8c/0x2b0 [ 39.725816] vfs_ioctl+0x24/0x40 [ 39.729117] __arm64_sys_ioctl+0x5c/0x8c [ 39.733114] invoke_syscall+0x68/0xec [ 39.736839] el0_svc_common.constprop.0+0x70/0xd8 [ 39.741609] do_el0_svc+0x18/0x20 [ 39.744981] el0_svc+0x68/0x94 [ 39.748107] el0t_64_sync_handler+0x88/0x124 [ 39.752455] el0t_64_sync+0x168/0x16c To fix the problem let's override the host_ops->card_busy() callback by setting it to NULL, which forces the mmc core to poll with a CMD13 and checking the R1_STATUS in the mmc_busy_cb() function. Signed-off-by: Kamal Dasu Fixes: 0d84c3e6a5b2 ("mmc: core: Convert to mmc_poll_for_busy() for erase/trim/discard") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240603220834.21989-2-kamal.dasu@broadcom.com [Ulf: Clarified the commit message] Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-brcmstb.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/mmc/host/sdhci-brcmstb.c b/drivers/mmc/host/sdhci-brcmstb.c index 9053526fa212..150fb477b7cc 100644 --- a/drivers/mmc/host/sdhci-brcmstb.c +++ b/drivers/mmc/host/sdhci-brcmstb.c @@ -24,6 +24,7 @@ #define BRCMSTB_MATCH_FLAGS_NO_64BIT BIT(0) #define BRCMSTB_MATCH_FLAGS_BROKEN_TIMEOUT BIT(1) #define BRCMSTB_MATCH_FLAGS_HAS_CLOCK_GATE BIT(2) +#define BRCMSTB_MATCH_FLAGS_USE_CARD_BUSY BIT(4) #define BRCMSTB_PRIV_FLAGS_HAS_CQE BIT(0) #define BRCMSTB_PRIV_FLAGS_GATE_CLOCK BIT(1) @@ -384,6 +385,9 @@ static int sdhci_brcmstb_probe(struct platform_device *pdev) if (match_priv->flags & BRCMSTB_MATCH_FLAGS_BROKEN_TIMEOUT) host->quirks |= SDHCI_QUIRK_BROKEN_TIMEOUT_VAL; + if (!(match_priv->flags & BRCMSTB_MATCH_FLAGS_USE_CARD_BUSY)) + host->mmc_host_ops.card_busy = NULL; + /* Change the base clock frequency if the DT property exists */ if (device_property_read_u32(&pdev->dev, "clock-frequency", &priv->base_freq_hz) != 0) From 84bb8d8bbd8384081c3fc5c4f20b223524af529d Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 6 Jun 2024 20:17:20 +0200 Subject: [PATCH 501/778] Revert "mmc: moxart-mmc: Use sg_miter for PIO" This reverts commit 3ee0e7c3e67cab83ffbbe7707b43df8d41c9fe47. The patch is not working for unknown reasons and I would need access to the hardware to fix the bug. This shouldn't matter anyway: the Moxa Art is not expected to use highmem, and sg_miter() is only necessary to have to properly deal with highmem. Reported-by: Sergei Antonov Signed-off-by: Linus Walleij Fixes: 3ee0e7c3e67c ("mmc: moxart-mmc: Use sg_miter for PIO") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240606-mmc-moxart-revert-v1-1-a01c2f40de9c@linaro.org Signed-off-by: Ulf Hansson --- drivers/mmc/host/moxart-mmc.c | 78 +++++++++++++++++++---------------- 1 file changed, 43 insertions(+), 35 deletions(-) diff --git a/drivers/mmc/host/moxart-mmc.c b/drivers/mmc/host/moxart-mmc.c index 9a5f75163aca..8ede4ce93271 100644 --- a/drivers/mmc/host/moxart-mmc.c +++ b/drivers/mmc/host/moxart-mmc.c @@ -131,10 +131,12 @@ struct moxart_host { struct dma_async_tx_descriptor *tx_desc; struct mmc_host *mmc; struct mmc_request *mrq; + struct scatterlist *cur_sg; struct completion dma_complete; struct completion pio_complete; - struct sg_mapping_iter sg_miter; + u32 num_sg; + u32 data_remain; u32 data_len; u32 fifo_width; u32 timeout; @@ -146,6 +148,35 @@ struct moxart_host { bool is_removed; }; +static inline void moxart_init_sg(struct moxart_host *host, + struct mmc_data *data) +{ + host->cur_sg = data->sg; + host->num_sg = data->sg_len; + host->data_remain = host->cur_sg->length; + + if (host->data_remain > host->data_len) + host->data_remain = host->data_len; +} + +static inline int moxart_next_sg(struct moxart_host *host) +{ + int remain; + struct mmc_data *data = host->mrq->cmd->data; + + host->cur_sg++; + host->num_sg--; + + if (host->num_sg > 0) { + host->data_remain = host->cur_sg->length; + remain = host->data_len - data->bytes_xfered; + if (remain > 0 && remain < host->data_remain) + host->data_remain = remain; + } + + return host->num_sg; +} + static int moxart_wait_for_status(struct moxart_host *host, u32 mask, u32 *status) { @@ -278,29 +309,14 @@ static void moxart_transfer_dma(struct mmc_data *data, struct moxart_host *host) static void moxart_transfer_pio(struct moxart_host *host) { - struct sg_mapping_iter *sgm = &host->sg_miter; struct mmc_data *data = host->mrq->cmd->data; u32 *sgp, len = 0, remain, status; if (host->data_len == data->bytes_xfered) return; - /* - * By updating sgm->consumes this will get a proper pointer into the - * buffer at any time. - */ - if (!sg_miter_next(sgm)) { - /* This shold not happen */ - dev_err(mmc_dev(host->mmc), "ran out of scatterlist prematurely\n"); - data->error = -EINVAL; - complete(&host->pio_complete); - return; - } - sgp = sgm->addr; - remain = sgm->length; - if (remain > host->data_len) - remain = host->data_len; - sgm->consumed = 0; + sgp = sg_virt(host->cur_sg); + remain = host->data_remain; if (data->flags & MMC_DATA_WRITE) { while (remain > 0) { @@ -315,7 +331,6 @@ static void moxart_transfer_pio(struct moxart_host *host) sgp++; len += 4; } - sgm->consumed += len; remain -= len; } @@ -332,22 +347,22 @@ static void moxart_transfer_pio(struct moxart_host *host) sgp++; len += 4; } - sgm->consumed += len; remain -= len; } } - data->bytes_xfered += sgm->consumed; - if (host->data_len == data->bytes_xfered) { + data->bytes_xfered += host->data_remain - remain; + host->data_remain = remain; + + if (host->data_len != data->bytes_xfered) + moxart_next_sg(host); + else complete(&host->pio_complete); - return; - } } static void moxart_prepare_data(struct moxart_host *host) { struct mmc_data *data = host->mrq->cmd->data; - unsigned int flags = SG_MITER_ATOMIC; /* Used from IRQ */ u32 datactrl; int blksz_bits; @@ -358,19 +373,15 @@ static void moxart_prepare_data(struct moxart_host *host) blksz_bits = ffs(data->blksz) - 1; BUG_ON(1 << blksz_bits != data->blksz); + moxart_init_sg(host, data); + datactrl = DCR_DATA_EN | (blksz_bits & DCR_BLK_SIZE); - if (data->flags & MMC_DATA_WRITE) { - flags |= SG_MITER_FROM_SG; + if (data->flags & MMC_DATA_WRITE) datactrl |= DCR_DATA_WRITE; - } else { - flags |= SG_MITER_TO_SG; - } if (moxart_use_dma(host)) datactrl |= DCR_DMA_EN; - else - sg_miter_start(&host->sg_miter, data->sg, data->sg_len, flags); writel(DCR_DATA_FIFO_RESET, host->base + REG_DATA_CONTROL); writel(MASK_DATA | FIFO_URUN | FIFO_ORUN, host->base + REG_CLEAR); @@ -443,9 +454,6 @@ static void moxart_request(struct mmc_host *mmc, struct mmc_request *mrq) } request_done: - if (!moxart_use_dma(host)) - sg_miter_stop(&host->sg_miter); - spin_unlock_irqrestore(&host->lock, flags); mmc_request_done(host->mmc, mrq); } From 8851346912a1fa33e7a5966fe51f07313b274627 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Tue, 18 Jun 2024 09:38:21 +0200 Subject: [PATCH 502/778] net: stmmac: Assign configured channel value to EXTTS event Assign the configured channel value to the EXTTS event in the timestamp interrupt handler. Without assigning the correct channel, applications like ts2phc will refuse to accept the event, resulting in errors such as: ... ts2phc[656.834]: config item end1.ts2phc.pin_index is 0 ts2phc[656.834]: config item end1.ts2phc.channel is 3 ts2phc[656.834]: config item end1.ts2phc.extts_polarity is 2 ts2phc[656.834]: config item end1.ts2phc.extts_correction is 0 ... ts2phc[656.862]: extts on unexpected channel ts2phc[658.141]: extts on unexpected channel ts2phc[659.140]: extts on unexpected channel Fixes: f4da56529da60 ("net: stmmac: Add support for external trigger timestamping") Cc: stable@vger.kernel.org Signed-off-by: Oleksij Rempel Reviewed-by: Wojciech Drewek Link: https://lore.kernel.org/r/20240618073821.619751-1-o.rempel@pengutronix.de Signed-off-by: Paolo Abeni --- drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c index f05bd757dfe5..5ef52ef2698f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c @@ -218,6 +218,7 @@ static void timestamp_interrupt(struct stmmac_priv *priv) { u32 num_snapshot, ts_status, tsync_int; struct ptp_clock_event event; + u32 acr_value, channel; unsigned long flags; u64 ptp_time; int i; @@ -243,12 +244,15 @@ static void timestamp_interrupt(struct stmmac_priv *priv) num_snapshot = (ts_status & GMAC_TIMESTAMP_ATSNS_MASK) >> GMAC_TIMESTAMP_ATSNS_SHIFT; + acr_value = readl(priv->ptpaddr + PTP_ACR); + channel = ilog2(FIELD_GET(PTP_ACR_MASK, acr_value)); + for (i = 0; i < num_snapshot; i++) { read_lock_irqsave(&priv->ptp_lock, flags); get_ptptime(priv->ptpaddr, &ptp_time); read_unlock_irqrestore(&priv->ptp_lock, flags); event.type = PTP_CLOCK_EXTTS; - event.index = 0; + event.index = channel; event.timestamp = ptp_time; ptp_clock_event(priv->ptp_clock, &event); } From a23800f08a60787dfbf2b87b2e6ed411cb629859 Mon Sep 17 00:00:00 2001 From: Chenliang Li Date: Wed, 19 Jun 2024 14:38:19 +0800 Subject: [PATCH 503/778] io_uring/rsrc: fix incorrect assignment of iter->nr_segs in io_import_fixed In io_import_fixed when advancing the iter within the first bvec, the iter->nr_segs is set to bvec->bv_len. nr_segs should be the number of bvecs, plus we don't need to adjust it here, so just remove it. Fixes: b000ae0ec2d7 ("io_uring/rsrc: optimise single entry advance") Signed-off-by: Chenliang Li Reviewed-by: Pavel Begunkov Link: https://lore.kernel.org/r/20240619063819.2445-1-cliang01.li@samsung.com Signed-off-by: Jens Axboe --- io_uring/rsrc.c | 1 - 1 file changed, 1 deletion(-) diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index edb9c5baf2e2..570bfa6a31aa 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -1068,7 +1068,6 @@ int io_import_fixed(int ddir, struct iov_iter *iter, * branch doesn't expect non PAGE_SIZE'd chunks. */ iter->bvec = bvec; - iter->nr_segs = bvec->bv_len; iter->count -= offset; iter->iov_offset = offset; } else { From 33dfafa90285c0873a24d633877d505ab8e3fc20 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 19 Jun 2024 09:55:48 -0400 Subject: [PATCH 504/778] bcachefs: Fix safe errors by default i.e. the start of automatic self healing: If errors=continue or fix_safe, we now automatically fix simple errors without user intervention. New error action option: fix_safe This replaces the existing errors=ro option, which gets a new slot, i.e. existing errors=ro users now get errors=fix_safe. This is currently only enabled for a limited set of errors - initially just disk accounting; errors we would never not want to fix, and we don't want to require user intervention (i.e. to make sure a bug report gets filed). Errors will still be counted in the superblock, so we (developers) will still know they've been occuring if a bug report gets filed (as bug reports typically include the errors superblock section). Eventually we'll be enabling this for a much wider set of errors, after we've done thorough error injection testing. Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs_format.h | 5 +- fs/bcachefs/error.c | 19 +- fs/bcachefs/error.h | 7 - fs/bcachefs/opts.h | 2 +- fs/bcachefs/sb-errors_format.h | 564 +++++++++++++++++---------------- 5 files changed, 308 insertions(+), 289 deletions(-) diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 4b98fed1ee9a..e3b1bde489c3 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -990,8 +990,9 @@ enum bch_version_upgrade_opts { #define BCH_ERROR_ACTIONS() \ x(continue, 0) \ - x(ro, 1) \ - x(panic, 2) + x(fix_safe, 1) \ + x(panic, 2) \ + x(ro, 3) enum bch_error_actions { #define x(t, n) BCH_ON_ERROR_##t = n, diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index c66eeffcd7f2..d95c40f1b6af 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -15,6 +15,7 @@ bool bch2_inconsistent_error(struct bch_fs *c) switch (c->opts.errors) { case BCH_ON_ERROR_continue: return false; + case BCH_ON_ERROR_fix_safe: case BCH_ON_ERROR_ro: if (bch2_fs_emergency_read_only(c)) bch_err(c, "inconsistency detected - emergency read only at journal seq %llu", @@ -191,6 +192,12 @@ static void prt_actioning(struct printbuf *out, const char *action) prt_str(out, "ing"); } +static const u8 fsck_flags_extra[] = { +#define x(t, n, flags) [BCH_FSCK_ERR_##t] = flags, + BCH_SB_ERRS() +#undef x +}; + int bch2_fsck_err(struct bch_fs *c, enum bch_fsck_flags flags, enum bch_sb_error_id err, @@ -203,6 +210,9 @@ int bch2_fsck_err(struct bch_fs *c, int ret = -BCH_ERR_fsck_ignore; const char *action_orig = "fix?", *action = action_orig; + if (!WARN_ON(err >= ARRAY_SIZE(fsck_flags_extra))) + flags |= fsck_flags_extra[err]; + if ((flags & FSCK_CAN_FIX) && test_bit(err, c->sb.errors_silent)) return -BCH_ERR_fsck_fix; @@ -265,7 +275,14 @@ int bch2_fsck_err(struct bch_fs *c, prt_printf(out, bch2_log_msg(c, "")); #endif - if (!test_bit(BCH_FS_fsck_running, &c->flags)) { + if ((flags & FSCK_CAN_FIX) && + (flags & FSCK_AUTOFIX) && + (c->opts.errors == BCH_ON_ERROR_continue || + c->opts.errors == BCH_ON_ERROR_fix_safe)) { + prt_str(out, ", "); + prt_actioning(out, action); + ret = -BCH_ERR_fsck_fix; + } else if (!test_bit(BCH_FS_fsck_running, &c->flags)) { if (c->opts.errors != BCH_ON_ERROR_continue || !(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE))) { prt_str(out, ", shutting down"); diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h index 36caedf72d89..777711504c35 100644 --- a/fs/bcachefs/error.h +++ b/fs/bcachefs/error.h @@ -108,13 +108,6 @@ struct fsck_err_state { char *last_msg; }; -enum bch_fsck_flags { - FSCK_CAN_FIX = 1 << 0, - FSCK_CAN_IGNORE = 1 << 1, - FSCK_NEED_FSCK = 1 << 2, - FSCK_NO_RATELIMIT = 1 << 3, -}; - #define fsck_err_count(_c, _err) bch2_sb_err_count(_c, BCH_FSCK_ERR_##_err) __printf(4, 5) __cold diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h index 25530e0bb2f3..b197ec90d4cb 100644 --- a/fs/bcachefs/opts.h +++ b/fs/bcachefs/opts.h @@ -137,7 +137,7 @@ enum fsck_err_opts { x(errors, u8, \ OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ OPT_STR(bch2_error_actions), \ - BCH_SB_ERROR_ACTION, BCH_ON_ERROR_ro, \ + BCH_SB_ERROR_ACTION, BCH_ON_ERROR_fix_safe, \ NULL, "Action to take on filesystem error") \ x(metadata_replicas, u8, \ OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index 1768e5c49f99..d6f35a99c429 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -2,286 +2,294 @@ #ifndef _BCACHEFS_SB_ERRORS_FORMAT_H #define _BCACHEFS_SB_ERRORS_FORMAT_H -#define BCH_SB_ERRS() \ - x(clean_but_journal_not_empty, 0) \ - x(dirty_but_no_journal_entries, 1) \ - x(dirty_but_no_journal_entries_post_drop_nonflushes, 2) \ - x(sb_clean_journal_seq_mismatch, 3) \ - x(sb_clean_btree_root_mismatch, 4) \ - x(sb_clean_missing, 5) \ - x(jset_unsupported_version, 6) \ - x(jset_unknown_csum, 7) \ - x(jset_last_seq_newer_than_seq, 8) \ - x(jset_past_bucket_end, 9) \ - x(jset_seq_blacklisted, 10) \ - x(journal_entries_missing, 11) \ - x(journal_entry_replicas_not_marked, 12) \ - x(journal_entry_past_jset_end, 13) \ - x(journal_entry_replicas_data_mismatch, 14) \ - x(journal_entry_bkey_u64s_0, 15) \ - x(journal_entry_bkey_past_end, 16) \ - x(journal_entry_bkey_bad_format, 17) \ - x(journal_entry_bkey_invalid, 18) \ - x(journal_entry_btree_root_bad_size, 19) \ - x(journal_entry_blacklist_bad_size, 20) \ - x(journal_entry_blacklist_v2_bad_size, 21) \ - x(journal_entry_blacklist_v2_start_past_end, 22) \ - x(journal_entry_usage_bad_size, 23) \ - x(journal_entry_data_usage_bad_size, 24) \ - x(journal_entry_clock_bad_size, 25) \ - x(journal_entry_clock_bad_rw, 26) \ - x(journal_entry_dev_usage_bad_size, 27) \ - x(journal_entry_dev_usage_bad_dev, 28) \ - x(journal_entry_dev_usage_bad_pad, 29) \ - x(btree_node_unreadable, 30) \ - x(btree_node_fault_injected, 31) \ - x(btree_node_bad_magic, 32) \ - x(btree_node_bad_seq, 33) \ - x(btree_node_unsupported_version, 34) \ - x(btree_node_bset_older_than_sb_min, 35) \ - x(btree_node_bset_newer_than_sb, 36) \ - x(btree_node_data_missing, 37) \ - x(btree_node_bset_after_end, 38) \ - x(btree_node_replicas_sectors_written_mismatch, 39) \ - x(btree_node_replicas_data_mismatch, 40) \ - x(bset_unknown_csum, 41) \ - x(bset_bad_csum, 42) \ - x(bset_past_end_of_btree_node, 43) \ - x(bset_wrong_sector_offset, 44) \ - x(bset_empty, 45) \ - x(bset_bad_seq, 46) \ - x(bset_blacklisted_journal_seq, 47) \ - x(first_bset_blacklisted_journal_seq, 48) \ - x(btree_node_bad_btree, 49) \ - x(btree_node_bad_level, 50) \ - x(btree_node_bad_min_key, 51) \ - x(btree_node_bad_max_key, 52) \ - x(btree_node_bad_format, 53) \ - x(btree_node_bkey_past_bset_end, 54) \ - x(btree_node_bkey_bad_format, 55) \ - x(btree_node_bad_bkey, 56) \ - x(btree_node_bkey_out_of_order, 57) \ - x(btree_root_bkey_invalid, 58) \ - x(btree_root_read_error, 59) \ - x(btree_root_bad_min_key, 60) \ - x(btree_root_bad_max_key, 61) \ - x(btree_node_read_error, 62) \ - x(btree_node_topology_bad_min_key, 63) \ - x(btree_node_topology_bad_max_key, 64) \ - x(btree_node_topology_overwritten_by_prev_node, 65) \ - x(btree_node_topology_overwritten_by_next_node, 66) \ - x(btree_node_topology_interior_node_empty, 67) \ - x(fs_usage_hidden_wrong, 68) \ - x(fs_usage_btree_wrong, 69) \ - x(fs_usage_data_wrong, 70) \ - x(fs_usage_cached_wrong, 71) \ - x(fs_usage_reserved_wrong, 72) \ - x(fs_usage_persistent_reserved_wrong, 73) \ - x(fs_usage_nr_inodes_wrong, 74) \ - x(fs_usage_replicas_wrong, 75) \ - x(dev_usage_buckets_wrong, 76) \ - x(dev_usage_sectors_wrong, 77) \ - x(dev_usage_fragmented_wrong, 78) \ - x(dev_usage_buckets_ec_wrong, 79) \ - x(bkey_version_in_future, 80) \ - x(bkey_u64s_too_small, 81) \ - x(bkey_invalid_type_for_btree, 82) \ - x(bkey_extent_size_zero, 83) \ - x(bkey_extent_size_greater_than_offset, 84) \ - x(bkey_size_nonzero, 85) \ - x(bkey_snapshot_nonzero, 86) \ - x(bkey_snapshot_zero, 87) \ - x(bkey_at_pos_max, 88) \ - x(bkey_before_start_of_btree_node, 89) \ - x(bkey_after_end_of_btree_node, 90) \ - x(bkey_val_size_nonzero, 91) \ - x(bkey_val_size_too_small, 92) \ - x(alloc_v1_val_size_bad, 93) \ - x(alloc_v2_unpack_error, 94) \ - x(alloc_v3_unpack_error, 95) \ - x(alloc_v4_val_size_bad, 96) \ - x(alloc_v4_backpointers_start_bad, 97) \ - x(alloc_key_data_type_bad, 98) \ - x(alloc_key_empty_but_have_data, 99) \ - x(alloc_key_dirty_sectors_0, 100) \ - x(alloc_key_data_type_inconsistency, 101) \ - x(alloc_key_to_missing_dev_bucket, 102) \ - x(alloc_key_cached_inconsistency, 103) \ - x(alloc_key_cached_but_read_time_zero, 104) \ - x(alloc_key_to_missing_lru_entry, 105) \ - x(alloc_key_data_type_wrong, 106) \ - x(alloc_key_gen_wrong, 107) \ - x(alloc_key_dirty_sectors_wrong, 108) \ - x(alloc_key_cached_sectors_wrong, 109) \ - x(alloc_key_stripe_wrong, 110) \ - x(alloc_key_stripe_redundancy_wrong, 111) \ - x(bucket_sector_count_overflow, 112) \ - x(bucket_metadata_type_mismatch, 113) \ - x(need_discard_key_wrong, 114) \ - x(freespace_key_wrong, 115) \ - x(freespace_hole_missing, 116) \ - x(bucket_gens_val_size_bad, 117) \ - x(bucket_gens_key_wrong, 118) \ - x(bucket_gens_hole_wrong, 119) \ - x(bucket_gens_to_invalid_dev, 120) \ - x(bucket_gens_to_invalid_buckets, 121) \ - x(bucket_gens_nonzero_for_invalid_buckets, 122) \ - x(need_discard_freespace_key_to_invalid_dev_bucket, 123) \ - x(need_discard_freespace_key_bad, 124) \ - x(backpointer_bucket_offset_wrong, 125) \ - x(backpointer_to_missing_device, 126) \ - x(backpointer_to_missing_alloc, 127) \ - x(backpointer_to_missing_ptr, 128) \ - x(lru_entry_at_time_0, 129) \ - x(lru_entry_to_invalid_bucket, 130) \ - x(lru_entry_bad, 131) \ - x(btree_ptr_val_too_big, 132) \ - x(btree_ptr_v2_val_too_big, 133) \ - x(btree_ptr_has_non_ptr, 134) \ - x(extent_ptrs_invalid_entry, 135) \ - x(extent_ptrs_no_ptrs, 136) \ - x(extent_ptrs_too_many_ptrs, 137) \ - x(extent_ptrs_redundant_crc, 138) \ - x(extent_ptrs_redundant_stripe, 139) \ - x(extent_ptrs_unwritten, 140) \ - x(extent_ptrs_written_and_unwritten, 141) \ - x(ptr_to_invalid_device, 142) \ - x(ptr_to_duplicate_device, 143) \ - x(ptr_after_last_bucket, 144) \ - x(ptr_before_first_bucket, 145) \ - x(ptr_spans_multiple_buckets, 146) \ - x(ptr_to_missing_backpointer, 147) \ - x(ptr_to_missing_alloc_key, 148) \ - x(ptr_to_missing_replicas_entry, 149) \ - x(ptr_to_missing_stripe, 150) \ - x(ptr_to_incorrect_stripe, 151) \ - x(ptr_gen_newer_than_bucket_gen, 152) \ - x(ptr_too_stale, 153) \ - x(stale_dirty_ptr, 154) \ - x(ptr_bucket_data_type_mismatch, 155) \ - x(ptr_cached_and_erasure_coded, 156) \ - x(ptr_crc_uncompressed_size_too_small, 157) \ - x(ptr_crc_csum_type_unknown, 158) \ - x(ptr_crc_compression_type_unknown, 159) \ - x(ptr_crc_redundant, 160) \ - x(ptr_crc_uncompressed_size_too_big, 161) \ - x(ptr_crc_nonce_mismatch, 162) \ - x(ptr_stripe_redundant, 163) \ - x(reservation_key_nr_replicas_invalid, 164) \ - x(reflink_v_refcount_wrong, 165) \ - x(reflink_p_to_missing_reflink_v, 166) \ - x(stripe_pos_bad, 167) \ - x(stripe_val_size_bad, 168) \ - x(stripe_sector_count_wrong, 169) \ - x(snapshot_tree_pos_bad, 170) \ - x(snapshot_tree_to_missing_snapshot, 171) \ - x(snapshot_tree_to_missing_subvol, 172) \ - x(snapshot_tree_to_wrong_subvol, 173) \ - x(snapshot_tree_to_snapshot_subvol, 174) \ - x(snapshot_pos_bad, 175) \ - x(snapshot_parent_bad, 176) \ - x(snapshot_children_not_normalized, 177) \ - x(snapshot_child_duplicate, 178) \ - x(snapshot_child_bad, 179) \ - x(snapshot_skiplist_not_normalized, 180) \ - x(snapshot_skiplist_bad, 181) \ - x(snapshot_should_not_have_subvol, 182) \ - x(snapshot_to_bad_snapshot_tree, 183) \ - x(snapshot_bad_depth, 184) \ - x(snapshot_bad_skiplist, 185) \ - x(subvol_pos_bad, 186) \ - x(subvol_not_master_and_not_snapshot, 187) \ - x(subvol_to_missing_root, 188) \ - x(subvol_root_wrong_bi_subvol, 189) \ - x(bkey_in_missing_snapshot, 190) \ - x(inode_pos_inode_nonzero, 191) \ - x(inode_pos_blockdev_range, 192) \ - x(inode_unpack_error, 193) \ - x(inode_str_hash_invalid, 194) \ - x(inode_v3_fields_start_bad, 195) \ - x(inode_snapshot_mismatch, 196) \ - x(inode_unlinked_but_clean, 197) \ - x(inode_unlinked_but_nlink_nonzero, 198) \ - x(inode_checksum_type_invalid, 199) \ - x(inode_compression_type_invalid, 200) \ - x(inode_subvol_root_but_not_dir, 201) \ - x(inode_i_size_dirty_but_clean, 202) \ - x(inode_i_sectors_dirty_but_clean, 203) \ - x(inode_i_sectors_wrong, 204) \ - x(inode_dir_wrong_nlink, 205) \ - x(inode_dir_multiple_links, 206) \ - x(inode_multiple_links_but_nlink_0, 207) \ - x(inode_wrong_backpointer, 208) \ - x(inode_wrong_nlink, 209) \ - x(inode_unreachable, 210) \ - x(deleted_inode_but_clean, 211) \ - x(deleted_inode_missing, 212) \ - x(deleted_inode_is_dir, 213) \ - x(deleted_inode_not_unlinked, 214) \ - x(extent_overlapping, 215) \ - x(extent_in_missing_inode, 216) \ - x(extent_in_non_reg_inode, 217) \ - x(extent_past_end_of_inode, 218) \ - x(dirent_empty_name, 219) \ - x(dirent_val_too_big, 220) \ - x(dirent_name_too_long, 221) \ - x(dirent_name_embedded_nul, 222) \ - x(dirent_name_dot_or_dotdot, 223) \ - x(dirent_name_has_slash, 224) \ - x(dirent_d_type_wrong, 225) \ - x(inode_bi_parent_wrong, 226) \ - x(dirent_in_missing_dir_inode, 227) \ - x(dirent_in_non_dir_inode, 228) \ - x(dirent_to_missing_inode, 229) \ - x(dirent_to_missing_subvol, 230) \ - x(dirent_to_itself, 231) \ - x(quota_type_invalid, 232) \ - x(xattr_val_size_too_small, 233) \ - x(xattr_val_size_too_big, 234) \ - x(xattr_invalid_type, 235) \ - x(xattr_name_invalid_chars, 236) \ - x(xattr_in_missing_inode, 237) \ - x(root_subvol_missing, 238) \ - x(root_dir_missing, 239) \ - x(root_inode_not_dir, 240) \ - x(dir_loop, 241) \ - x(hash_table_key_duplicate, 242) \ - x(hash_table_key_wrong_offset, 243) \ - x(unlinked_inode_not_on_deleted_list, 244) \ - x(reflink_p_front_pad_bad, 245) \ - x(journal_entry_dup_same_device, 246) \ - x(inode_bi_subvol_missing, 247) \ - x(inode_bi_subvol_wrong, 248) \ - x(inode_points_to_missing_dirent, 249) \ - x(inode_points_to_wrong_dirent, 250) \ - x(inode_bi_parent_nonzero, 251) \ - x(dirent_to_missing_parent_subvol, 252) \ - x(dirent_not_visible_in_parent_subvol, 253) \ - x(subvol_fs_path_parent_wrong, 254) \ - x(subvol_root_fs_path_parent_nonzero, 255) \ - x(subvol_children_not_set, 256) \ - x(subvol_children_bad, 257) \ - x(subvol_loop, 258) \ - x(subvol_unreachable, 259) \ - x(btree_node_bkey_bad_u64s, 260) \ - x(btree_node_topology_empty_interior_node, 261) \ - x(btree_ptr_v2_min_key_bad, 262) \ - x(btree_root_unreadable_and_scan_found_nothing, 263) \ - x(snapshot_node_missing, 264) \ - x(dup_backpointer_to_bad_csum_extent, 265) \ - x(btree_bitmap_not_marked, 266) \ - x(sb_clean_entry_overrun, 267) \ - x(btree_ptr_v2_written_0, 268) \ - x(subvol_snapshot_bad, 269) \ - x(subvol_inode_bad, 270) \ - x(alloc_key_stripe_sectors_wrong, 271) \ - x(accounting_mismatch, 272) \ - x(accounting_replicas_not_marked, 273) \ - x(invalid_btree_id, 274) \ - x(alloc_key_io_time_bad, 275) +enum bch_fsck_flags { + FSCK_CAN_FIX = 1 << 0, + FSCK_CAN_IGNORE = 1 << 1, + FSCK_NEED_FSCK = 1 << 2, + FSCK_NO_RATELIMIT = 1 << 3, + FSCK_AUTOFIX = 1 << 4, +}; + +#define BCH_SB_ERRS() \ + x(clean_but_journal_not_empty, 0, 0) \ + x(dirty_but_no_journal_entries, 1, 0) \ + x(dirty_but_no_journal_entries_post_drop_nonflushes, 2, 0) \ + x(sb_clean_journal_seq_mismatch, 3, 0) \ + x(sb_clean_btree_root_mismatch, 4, 0) \ + x(sb_clean_missing, 5, 0) \ + x(jset_unsupported_version, 6, 0) \ + x(jset_unknown_csum, 7, 0) \ + x(jset_last_seq_newer_than_seq, 8, 0) \ + x(jset_past_bucket_end, 9, 0) \ + x(jset_seq_blacklisted, 10, 0) \ + x(journal_entries_missing, 11, 0) \ + x(journal_entry_replicas_not_marked, 12, 0) \ + x(journal_entry_past_jset_end, 13, 0) \ + x(journal_entry_replicas_data_mismatch, 14, 0) \ + x(journal_entry_bkey_u64s_0, 15, 0) \ + x(journal_entry_bkey_past_end, 16, 0) \ + x(journal_entry_bkey_bad_format, 17, 0) \ + x(journal_entry_bkey_invalid, 18, 0) \ + x(journal_entry_btree_root_bad_size, 19, 0) \ + x(journal_entry_blacklist_bad_size, 20, 0) \ + x(journal_entry_blacklist_v2_bad_size, 21, 0) \ + x(journal_entry_blacklist_v2_start_past_end, 22, 0) \ + x(journal_entry_usage_bad_size, 23, 0) \ + x(journal_entry_data_usage_bad_size, 24, 0) \ + x(journal_entry_clock_bad_size, 25, 0) \ + x(journal_entry_clock_bad_rw, 26, 0) \ + x(journal_entry_dev_usage_bad_size, 27, 0) \ + x(journal_entry_dev_usage_bad_dev, 28, 0) \ + x(journal_entry_dev_usage_bad_pad, 29, 0) \ + x(btree_node_unreadable, 30, 0) \ + x(btree_node_fault_injected, 31, 0) \ + x(btree_node_bad_magic, 32, 0) \ + x(btree_node_bad_seq, 33, 0) \ + x(btree_node_unsupported_version, 34, 0) \ + x(btree_node_bset_older_than_sb_min, 35, 0) \ + x(btree_node_bset_newer_than_sb, 36, 0) \ + x(btree_node_data_missing, 37, 0) \ + x(btree_node_bset_after_end, 38, 0) \ + x(btree_node_replicas_sectors_written_mismatch, 39, 0) \ + x(btree_node_replicas_data_mismatch, 40, 0) \ + x(bset_unknown_csum, 41, 0) \ + x(bset_bad_csum, 42, 0) \ + x(bset_past_end_of_btree_node, 43, 0) \ + x(bset_wrong_sector_offset, 44, 0) \ + x(bset_empty, 45, 0) \ + x(bset_bad_seq, 46, 0) \ + x(bset_blacklisted_journal_seq, 47, 0) \ + x(first_bset_blacklisted_journal_seq, 48, 0) \ + x(btree_node_bad_btree, 49, 0) \ + x(btree_node_bad_level, 50, 0) \ + x(btree_node_bad_min_key, 51, 0) \ + x(btree_node_bad_max_key, 52, 0) \ + x(btree_node_bad_format, 53, 0) \ + x(btree_node_bkey_past_bset_end, 54, 0) \ + x(btree_node_bkey_bad_format, 55, 0) \ + x(btree_node_bad_bkey, 56, 0) \ + x(btree_node_bkey_out_of_order, 57, 0) \ + x(btree_root_bkey_invalid, 58, 0) \ + x(btree_root_read_error, 59, 0) \ + x(btree_root_bad_min_key, 60, 0) \ + x(btree_root_bad_max_key, 61, 0) \ + x(btree_node_read_error, 62, 0) \ + x(btree_node_topology_bad_min_key, 63, 0) \ + x(btree_node_topology_bad_max_key, 64, 0) \ + x(btree_node_topology_overwritten_by_prev_node, 65, 0) \ + x(btree_node_topology_overwritten_by_next_node, 66, 0) \ + x(btree_node_topology_interior_node_empty, 67, 0) \ + x(fs_usage_hidden_wrong, 68, FSCK_AUTOFIX) \ + x(fs_usage_btree_wrong, 69, FSCK_AUTOFIX) \ + x(fs_usage_data_wrong, 70, FSCK_AUTOFIX) \ + x(fs_usage_cached_wrong, 71, FSCK_AUTOFIX) \ + x(fs_usage_reserved_wrong, 72, FSCK_AUTOFIX) \ + x(fs_usage_persistent_reserved_wrong, 73, FSCK_AUTOFIX) \ + x(fs_usage_nr_inodes_wrong, 74, FSCK_AUTOFIX) \ + x(fs_usage_replicas_wrong, 75, FSCK_AUTOFIX) \ + x(dev_usage_buckets_wrong, 76, FSCK_AUTOFIX) \ + x(dev_usage_sectors_wrong, 77, FSCK_AUTOFIX) \ + x(dev_usage_fragmented_wrong, 78, FSCK_AUTOFIX) \ + x(dev_usage_buckets_ec_wrong, 79, FSCK_AUTOFIX) \ + x(bkey_version_in_future, 80, 0) \ + x(bkey_u64s_too_small, 81, 0) \ + x(bkey_invalid_type_for_btree, 82, 0) \ + x(bkey_extent_size_zero, 83, 0) \ + x(bkey_extent_size_greater_than_offset, 84, 0) \ + x(bkey_size_nonzero, 85, 0) \ + x(bkey_snapshot_nonzero, 86, 0) \ + x(bkey_snapshot_zero, 87, 0) \ + x(bkey_at_pos_max, 88, 0) \ + x(bkey_before_start_of_btree_node, 89, 0) \ + x(bkey_after_end_of_btree_node, 90, 0) \ + x(bkey_val_size_nonzero, 91, 0) \ + x(bkey_val_size_too_small, 92, 0) \ + x(alloc_v1_val_size_bad, 93, 0) \ + x(alloc_v2_unpack_error, 94, 0) \ + x(alloc_v3_unpack_error, 95, 0) \ + x(alloc_v4_val_size_bad, 96, 0) \ + x(alloc_v4_backpointers_start_bad, 97, 0) \ + x(alloc_key_data_type_bad, 98, 0) \ + x(alloc_key_empty_but_have_data, 99, 0) \ + x(alloc_key_dirty_sectors_0, 100, 0) \ + x(alloc_key_data_type_inconsistency, 101, 0) \ + x(alloc_key_to_missing_dev_bucket, 102, 0) \ + x(alloc_key_cached_inconsistency, 103, 0) \ + x(alloc_key_cached_but_read_time_zero, 104, 0) \ + x(alloc_key_to_missing_lru_entry, 105, 0) \ + x(alloc_key_data_type_wrong, 106, FSCK_AUTOFIX) \ + x(alloc_key_gen_wrong, 107, FSCK_AUTOFIX) \ + x(alloc_key_dirty_sectors_wrong, 108, FSCK_AUTOFIX) \ + x(alloc_key_cached_sectors_wrong, 109, FSCK_AUTOFIX) \ + x(alloc_key_stripe_wrong, 110, FSCK_AUTOFIX) \ + x(alloc_key_stripe_redundancy_wrong, 111, FSCK_AUTOFIX) \ + x(bucket_sector_count_overflow, 112, 0) \ + x(bucket_metadata_type_mismatch, 113, 0) \ + x(need_discard_key_wrong, 114, 0) \ + x(freespace_key_wrong, 115, 0) \ + x(freespace_hole_missing, 116, 0) \ + x(bucket_gens_val_size_bad, 117, 0) \ + x(bucket_gens_key_wrong, 118, 0) \ + x(bucket_gens_hole_wrong, 119, 0) \ + x(bucket_gens_to_invalid_dev, 120, 0) \ + x(bucket_gens_to_invalid_buckets, 121, 0) \ + x(bucket_gens_nonzero_for_invalid_buckets, 122, 0) \ + x(need_discard_freespace_key_to_invalid_dev_bucket, 123, 0) \ + x(need_discard_freespace_key_bad, 124, 0) \ + x(backpointer_bucket_offset_wrong, 125, 0) \ + x(backpointer_to_missing_device, 126, 0) \ + x(backpointer_to_missing_alloc, 127, 0) \ + x(backpointer_to_missing_ptr, 128, 0) \ + x(lru_entry_at_time_0, 129, 0) \ + x(lru_entry_to_invalid_bucket, 130, 0) \ + x(lru_entry_bad, 131, 0) \ + x(btree_ptr_val_too_big, 132, 0) \ + x(btree_ptr_v2_val_too_big, 133, 0) \ + x(btree_ptr_has_non_ptr, 134, 0) \ + x(extent_ptrs_invalid_entry, 135, 0) \ + x(extent_ptrs_no_ptrs, 136, 0) \ + x(extent_ptrs_too_many_ptrs, 137, 0) \ + x(extent_ptrs_redundant_crc, 138, 0) \ + x(extent_ptrs_redundant_stripe, 139, 0) \ + x(extent_ptrs_unwritten, 140, 0) \ + x(extent_ptrs_written_and_unwritten, 141, 0) \ + x(ptr_to_invalid_device, 142, 0) \ + x(ptr_to_duplicate_device, 143, 0) \ + x(ptr_after_last_bucket, 144, 0) \ + x(ptr_before_first_bucket, 145, 0) \ + x(ptr_spans_multiple_buckets, 146, 0) \ + x(ptr_to_missing_backpointer, 147, 0) \ + x(ptr_to_missing_alloc_key, 148, 0) \ + x(ptr_to_missing_replicas_entry, 149, 0) \ + x(ptr_to_missing_stripe, 150, 0) \ + x(ptr_to_incorrect_stripe, 151, 0) \ + x(ptr_gen_newer_than_bucket_gen, 152, 0) \ + x(ptr_too_stale, 153, 0) \ + x(stale_dirty_ptr, 154, 0) \ + x(ptr_bucket_data_type_mismatch, 155, 0) \ + x(ptr_cached_and_erasure_coded, 156, 0) \ + x(ptr_crc_uncompressed_size_too_small, 157, 0) \ + x(ptr_crc_csum_type_unknown, 158, 0) \ + x(ptr_crc_compression_type_unknown, 159, 0) \ + x(ptr_crc_redundant, 160, 0) \ + x(ptr_crc_uncompressed_size_too_big, 161, 0) \ + x(ptr_crc_nonce_mismatch, 162, 0) \ + x(ptr_stripe_redundant, 163, 0) \ + x(reservation_key_nr_replicas_invalid, 164, 0) \ + x(reflink_v_refcount_wrong, 165, 0) \ + x(reflink_p_to_missing_reflink_v, 166, 0) \ + x(stripe_pos_bad, 167, 0) \ + x(stripe_val_size_bad, 168, 0) \ + x(stripe_sector_count_wrong, 169, 0) \ + x(snapshot_tree_pos_bad, 170, 0) \ + x(snapshot_tree_to_missing_snapshot, 171, 0) \ + x(snapshot_tree_to_missing_subvol, 172, 0) \ + x(snapshot_tree_to_wrong_subvol, 173, 0) \ + x(snapshot_tree_to_snapshot_subvol, 174, 0) \ + x(snapshot_pos_bad, 175, 0) \ + x(snapshot_parent_bad, 176, 0) \ + x(snapshot_children_not_normalized, 177, 0) \ + x(snapshot_child_duplicate, 178, 0) \ + x(snapshot_child_bad, 179, 0) \ + x(snapshot_skiplist_not_normalized, 180, 0) \ + x(snapshot_skiplist_bad, 181, 0) \ + x(snapshot_should_not_have_subvol, 182, 0) \ + x(snapshot_to_bad_snapshot_tree, 183, 0) \ + x(snapshot_bad_depth, 184, 0) \ + x(snapshot_bad_skiplist, 185, 0) \ + x(subvol_pos_bad, 186, 0) \ + x(subvol_not_master_and_not_snapshot, 187, 0) \ + x(subvol_to_missing_root, 188, 0) \ + x(subvol_root_wrong_bi_subvol, 189, 0) \ + x(bkey_in_missing_snapshot, 190, 0) \ + x(inode_pos_inode_nonzero, 191, 0) \ + x(inode_pos_blockdev_range, 192, 0) \ + x(inode_unpack_error, 193, 0) \ + x(inode_str_hash_invalid, 194, 0) \ + x(inode_v3_fields_start_bad, 195, 0) \ + x(inode_snapshot_mismatch, 196, 0) \ + x(inode_unlinked_but_clean, 197, 0) \ + x(inode_unlinked_but_nlink_nonzero, 198, 0) \ + x(inode_checksum_type_invalid, 199, 0) \ + x(inode_compression_type_invalid, 200, 0) \ + x(inode_subvol_root_but_not_dir, 201, 0) \ + x(inode_i_size_dirty_but_clean, 202, 0) \ + x(inode_i_sectors_dirty_but_clean, 203, 0) \ + x(inode_i_sectors_wrong, 204, 0) \ + x(inode_dir_wrong_nlink, 205, 0) \ + x(inode_dir_multiple_links, 206, 0) \ + x(inode_multiple_links_but_nlink_0, 207, 0) \ + x(inode_wrong_backpointer, 208, 0) \ + x(inode_wrong_nlink, 209, 0) \ + x(inode_unreachable, 210, 0) \ + x(deleted_inode_but_clean, 211, 0) \ + x(deleted_inode_missing, 212, 0) \ + x(deleted_inode_is_dir, 213, 0) \ + x(deleted_inode_not_unlinked, 214, 0) \ + x(extent_overlapping, 215, 0) \ + x(extent_in_missing_inode, 216, 0) \ + x(extent_in_non_reg_inode, 217, 0) \ + x(extent_past_end_of_inode, 218, 0) \ + x(dirent_empty_name, 219, 0) \ + x(dirent_val_too_big, 220, 0) \ + x(dirent_name_too_long, 221, 0) \ + x(dirent_name_embedded_nul, 222, 0) \ + x(dirent_name_dot_or_dotdot, 223, 0) \ + x(dirent_name_has_slash, 224, 0) \ + x(dirent_d_type_wrong, 225, 0) \ + x(inode_bi_parent_wrong, 226, 0) \ + x(dirent_in_missing_dir_inode, 227, 0) \ + x(dirent_in_non_dir_inode, 228, 0) \ + x(dirent_to_missing_inode, 229, 0) \ + x(dirent_to_missing_subvol, 230, 0) \ + x(dirent_to_itself, 231, 0) \ + x(quota_type_invalid, 232, 0) \ + x(xattr_val_size_too_small, 233, 0) \ + x(xattr_val_size_too_big, 234, 0) \ + x(xattr_invalid_type, 235, 0) \ + x(xattr_name_invalid_chars, 236, 0) \ + x(xattr_in_missing_inode, 237, 0) \ + x(root_subvol_missing, 238, 0) \ + x(root_dir_missing, 239, 0) \ + x(root_inode_not_dir, 240, 0) \ + x(dir_loop, 241, 0) \ + x(hash_table_key_duplicate, 242, 0) \ + x(hash_table_key_wrong_offset, 243, 0) \ + x(unlinked_inode_not_on_deleted_list, 244, 0) \ + x(reflink_p_front_pad_bad, 245, 0) \ + x(journal_entry_dup_same_device, 246, 0) \ + x(inode_bi_subvol_missing, 247, 0) \ + x(inode_bi_subvol_wrong, 248, 0) \ + x(inode_points_to_missing_dirent, 249, 0) \ + x(inode_points_to_wrong_dirent, 250, 0) \ + x(inode_bi_parent_nonzero, 251, 0) \ + x(dirent_to_missing_parent_subvol, 252, 0) \ + x(dirent_not_visible_in_parent_subvol, 253, 0) \ + x(subvol_fs_path_parent_wrong, 254, 0) \ + x(subvol_root_fs_path_parent_nonzero, 255, 0) \ + x(subvol_children_not_set, 256, 0) \ + x(subvol_children_bad, 257, 0) \ + x(subvol_loop, 258, 0) \ + x(subvol_unreachable, 259, 0) \ + x(btree_node_bkey_bad_u64s, 260, 0) \ + x(btree_node_topology_empty_interior_node, 261, 0) \ + x(btree_ptr_v2_min_key_bad, 262, 0) \ + x(btree_root_unreadable_and_scan_found_nothing, 263, 0) \ + x(snapshot_node_missing, 264, 0) \ + x(dup_backpointer_to_bad_csum_extent, 265, 0) \ + x(btree_bitmap_not_marked, 266, 0) \ + x(sb_clean_entry_overrun, 267, 0) \ + x(btree_ptr_v2_written_0, 268, 0) \ + x(subvol_snapshot_bad, 269, 0) \ + x(subvol_inode_bad, 270, 0) \ + x(alloc_key_stripe_sectors_wrong, 271, 0) \ + x(accounting_mismatch, 272, 0) \ + x(accounting_replicas_not_marked, 273, 0) \ + x(invalid_btree_id, 274, 0) \ + x(alloc_key_io_time_bad, 275, 0) enum bch_sb_error_id { -#define x(t, n) BCH_FSCK_ERR_##t = n, +#define x(t, n, ...) BCH_FSCK_ERR_##t = n, BCH_SB_ERRS() #undef x BCH_SB_ERR_MAX From c6cab97cdfd14571a17b9453b1d339eaa3b77c0b Mon Sep 17 00:00:00 2001 From: Youling Tang Date: Thu, 20 Jun 2024 09:22:42 +0800 Subject: [PATCH 505/778] bcachefs: fix alignment of VMA for memory mapped files on THP With CONFIG_READ_ONLY_THP_FOR_FS, the Linux kernel supports using THPs for read-only mmapped files, such as shared libraries. However, the kernel makes no attempt to actually align those mappings on 2MB boundaries, which makes it impossible to use those THPs most of the time. This issue applies to general file mapping THP as well as existing setups using CONFIG_READ_ONLY_THP_FOR_FS. This is easily fixed by using thp_get_unmapped_area for the unmapped_area function in bcachefs, which is what ext2, ext4, fuse, xfs and btrfs all use. Similar to commit b0c582233a85 ("btrfs: fix alignment of VMA for memory mapped files on THP"). Signed-off-by: Youling Tang Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 77126992dba8..8314d3e1582d 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1157,6 +1157,7 @@ static const struct file_operations bch_file_operations = { .read_iter = bch2_read_iter, .write_iter = bch2_write_iter, .mmap = bch2_mmap, + .get_unmapped_area = thp_get_unmapped_area, .fsync = bch2_fsync, .splice_read = filemap_splice_read, .splice_write = iter_file_splice_write, From 8ad04409921f4c405859a651c9a9e5ff5eb5e8a9 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Tue, 18 Jun 2024 14:53:11 -0700 Subject: [PATCH 506/778] bnxt_en: Update firmware interface to 1.10.3.44 The relevant change is the max_tso_segs value returned by firmware in the HWRM_FUNC_QCAPS response. This value will be used in the next patch to cap the TSO segments. Signed-off-by: Michael Chan Link: https://lore.kernel.org/r/20240618215313.29631-2-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h | 311 ++++++++++-------- 1 file changed, 178 insertions(+), 133 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h index 06ea86c80be1..f219709f9563 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h @@ -2,7 +2,7 @@ * * Copyright (c) 2014-2016 Broadcom Corporation * Copyright (c) 2014-2018 Broadcom Limited - * Copyright (c) 2018-2023 Broadcom Inc. + * Copyright (c) 2018-2024 Broadcom Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -500,7 +500,11 @@ struct cmd_nums { #define HWRM_TFC_IF_TBL_GET 0x399UL #define HWRM_TFC_TBL_SCOPE_CONFIG_GET 0x39aUL #define HWRM_TFC_RESC_USAGE_QUERY 0x39bUL + #define HWRM_QUEUE_PFCWD_TIMEOUT_QCAPS 0x39cUL + #define HWRM_QUEUE_PFCWD_TIMEOUT_CFG 0x39dUL + #define HWRM_QUEUE_PFCWD_TIMEOUT_QCFG 0x39eUL #define HWRM_SV 0x400UL + #define HWRM_DBG_LOG_BUFFER_FLUSH 0xff0fUL #define HWRM_DBG_READ_DIRECT 0xff10UL #define HWRM_DBG_READ_INDIRECT 0xff11UL #define HWRM_DBG_WRITE_DIRECT 0xff12UL @@ -609,8 +613,8 @@ struct hwrm_err_output { #define HWRM_VERSION_MAJOR 1 #define HWRM_VERSION_MINOR 10 #define HWRM_VERSION_UPDATE 3 -#define HWRM_VERSION_RSVD 39 -#define HWRM_VERSION_STR "1.10.3.39" +#define HWRM_VERSION_RSVD 44 +#define HWRM_VERSION_STR "1.10.3.44" /* hwrm_ver_get_input (size:192b/24B) */ struct hwrm_ver_get_input { @@ -664,6 +668,7 @@ struct hwrm_ver_get_output { #define VER_GET_RESP_DEV_CAPS_CFG_CFA_TFLIB_SUPPORTED 0x2000UL #define VER_GET_RESP_DEV_CAPS_CFG_CFA_TRUFLOW_SUPPORTED 0x4000UL #define VER_GET_RESP_DEV_CAPS_CFG_SECURE_BOOT_CAPABLE 0x8000UL + #define VER_GET_RESP_DEV_CAPS_CFG_SECURE_SOC_CAPABLE 0x10000UL u8 roce_fw_maj_8b; u8 roce_fw_min_8b; u8 roce_fw_bld_8b; @@ -843,7 +848,9 @@ struct hwrm_async_event_cmpl { #define ASYNC_EVENT_CMPL_EVENT_ID_HW_DOORBELL_RECOVERY_READ_ERROR 0x49UL #define ASYNC_EVENT_CMPL_EVENT_ID_CTX_ERROR 0x4aUL #define ASYNC_EVENT_CMPL_EVENT_ID_UDCC_SESSION_CHANGE 0x4bUL - #define ASYNC_EVENT_CMPL_EVENT_ID_MAX_RGTR_EVENT_ID 0x4cUL + #define ASYNC_EVENT_CMPL_EVENT_ID_DBG_BUF_PRODUCER 0x4cUL + #define ASYNC_EVENT_CMPL_EVENT_ID_PEER_MMAP_CHANGE 0x4dUL + #define ASYNC_EVENT_CMPL_EVENT_ID_MAX_RGTR_EVENT_ID 0x4eUL #define ASYNC_EVENT_CMPL_EVENT_ID_FW_TRACE_MSG 0xfeUL #define ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR 0xffUL #define ASYNC_EVENT_CMPL_EVENT_ID_LAST ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR @@ -1326,13 +1333,13 @@ struct hwrm_async_event_cmpl_error_report_base { u8 timestamp_lo; __le16 timestamp_hi; __le32 event_data1; - #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_MASK 0xffUL - #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_SFT 0 - #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_RESERVED 0x0UL - #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_PAUSE_STORM 0x1UL - #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_INVALID_SIGNAL 0x2UL - #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_NVM 0x3UL - #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_DOORBELL_DROP_THRESHOLD 0x4UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_MASK 0xffUL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_SFT 0 + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_RESERVED 0x0UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_PAUSE_STORM 0x1UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_INVALID_SIGNAL 0x2UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_NVM 0x3UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_DOORBELL_DROP_THRESHOLD 0x4UL #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_THERMAL_THRESHOLD 0x5UL #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_DUAL_DATA_RATE_NOT_SUPPORTED 0x6UL #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_DUAL_DATA_RATE_NOT_SUPPORTED @@ -1814,6 +1821,9 @@ struct hwrm_func_qcaps_output { #define FUNC_QCAPS_RESP_FLAGS_EXT2_SW_MAX_RESOURCE_LIMITS_SUPPORTED 0x800000UL #define FUNC_QCAPS_RESP_FLAGS_EXT2_TF_INGRESS_NIC_FLOW_SUPPORTED 0x1000000UL #define FUNC_QCAPS_RESP_FLAGS_EXT2_LPBK_STATS_SUPPORTED 0x2000000UL + #define FUNC_QCAPS_RESP_FLAGS_EXT2_TF_EGRESS_NIC_FLOW_SUPPORTED 0x4000000UL + #define FUNC_QCAPS_RESP_FLAGS_EXT2_MULTI_LOSSLESS_QUEUES_SUPPORTED 0x8000000UL + #define FUNC_QCAPS_RESP_FLAGS_EXT2_PEER_MMAP_SUPPORTED 0x10000000UL __le16 tunnel_disable_flag; #define FUNC_QCAPS_RESP_TUNNEL_DISABLE_FLAG_DISABLE_VXLAN 0x1UL #define FUNC_QCAPS_RESP_TUNNEL_DISABLE_FLAG_DISABLE_NGE 0x2UL @@ -1828,7 +1838,7 @@ struct hwrm_func_qcaps_output { #define FUNC_QCAPS_RESP_XID_PARTITION_CAP_RX_CK 0x2UL u8 device_serial_number[8]; __le16 ctxs_per_partition; - u8 unused_2[2]; + __le16 max_tso_segs; __le32 roce_vf_max_av; __le32 roce_vf_max_cq; __le32 roce_vf_max_mrw; @@ -2449,6 +2459,7 @@ struct hwrm_func_drv_rgtr_input { #define FUNC_DRV_RGTR_REQ_FLAGS_NPAR_1_2_SUPPORT 0x200UL #define FUNC_DRV_RGTR_REQ_FLAGS_ASYM_QUEUE_CFG_SUPPORT 0x400UL #define FUNC_DRV_RGTR_REQ_FLAGS_TF_INGRESS_NIC_FLOW_MODE 0x800UL + #define FUNC_DRV_RGTR_REQ_FLAGS_TF_EGRESS_NIC_FLOW_MODE 0x1000UL __le32 enables; #define FUNC_DRV_RGTR_REQ_ENABLES_OS_TYPE 0x1UL #define FUNC_DRV_RGTR_REQ_ENABLES_VER 0x2UL @@ -3660,22 +3671,24 @@ struct hwrm_func_backing_store_cfg_v2_input { __le16 target_id; __le64 resp_addr; __le16 type; - #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_QP 0x0UL - #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SRQ 0x1UL - #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_CQ 0x2UL - #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_VNIC 0x3UL - #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_STAT 0x4UL - #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SP_TQM_RING 0x5UL - #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_FP_TQM_RING 0x6UL - #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_MRAV 0xeUL - #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_TIM 0xfUL - #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_MP_TQM_RING 0x15UL - #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SQ_DB_SHADOW 0x16UL - #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_RQ_DB_SHADOW 0x17UL - #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SRQ_DB_SHADOW 0x18UL - #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_CQ_DB_SHADOW 0x19UL - #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_TBL_SCOPE 0x1cUL - #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_XID_PARTITION 0x1dUL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_QP 0x0UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SRQ 0x1UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_CQ 0x2UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_VNIC 0x3UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_STAT 0x4UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SP_TQM_RING 0x5UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_FP_TQM_RING 0x6UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_MRAV 0xeUL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_TIM 0xfUL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_TX_CK 0x13UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_RX_CK 0x14UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_MP_TQM_RING 0x15UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SQ_DB_SHADOW 0x16UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_RQ_DB_SHADOW 0x17UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SRQ_DB_SHADOW 0x18UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_CQ_DB_SHADOW 0x19UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_TBL_SCOPE 0x1cUL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_XID_PARTITION 0x1dUL #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SRT_TRACE 0x1eUL #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SRT2_TRACE 0x1fUL #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_CRT_TRACE 0x20UL @@ -3772,18 +3785,20 @@ struct hwrm_func_backing_store_qcfg_v2_output { __le16 seq_id; __le16 resp_len; __le16 type; - #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_QP 0x0UL - #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_SRQ 0x1UL - #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_CQ 0x2UL - #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_VNIC 0x3UL - #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_STAT 0x4UL - #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_SP_TQM_RING 0x5UL - #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_FP_TQM_RING 0x6UL - #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_MRAV 0xeUL - #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_TIM 0xfUL - #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_MP_TQM_RING 0x15UL - #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_TBL_SCOPE 0x1cUL - #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_XID_PARTITION 0x1dUL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_QP 0x0UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_SRQ 0x1UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_CQ 0x2UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_VNIC 0x3UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_STAT 0x4UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_SP_TQM_RING 0x5UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_FP_TQM_RING 0x6UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_MRAV 0xeUL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_TIM 0xfUL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_TX_CK 0x13UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_RX_CK 0x14UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_MP_TQM_RING 0x15UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_TBL_SCOPE 0x1cUL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_XID_PARTITION 0x1dUL #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_SRT_TRACE 0x1eUL #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_SRT2_TRACE 0x1fUL #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_CRT_TRACE 0x20UL @@ -3876,22 +3891,24 @@ struct hwrm_func_backing_store_qcaps_v2_input { __le16 target_id; __le64 resp_addr; __le16 type; - #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_QP 0x0UL - #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SRQ 0x1UL - #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_CQ 0x2UL - #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_VNIC 0x3UL - #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_STAT 0x4UL - #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SP_TQM_RING 0x5UL - #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_FP_TQM_RING 0x6UL - #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_MRAV 0xeUL - #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_TIM 0xfUL - #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_MP_TQM_RING 0x15UL - #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SQ_DB_SHADOW 0x16UL - #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_RQ_DB_SHADOW 0x17UL - #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SRQ_DB_SHADOW 0x18UL - #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_CQ_DB_SHADOW 0x19UL - #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_TBL_SCOPE 0x1cUL - #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_XID_PARTITION 0x1dUL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_QP 0x0UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SRQ 0x1UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_CQ 0x2UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_VNIC 0x3UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_STAT 0x4UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SP_TQM_RING 0x5UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_FP_TQM_RING 0x6UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_MRAV 0xeUL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_TIM 0xfUL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_TX_CK 0x13UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_RX_CK 0x14UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_MP_TQM_RING 0x15UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SQ_DB_SHADOW 0x16UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_RQ_DB_SHADOW 0x17UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SRQ_DB_SHADOW 0x18UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_CQ_DB_SHADOW 0x19UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_TBL_SCOPE 0x1cUL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_XID_PARTITION 0x1dUL #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SRT_TRACE 0x1eUL #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SRT2_TRACE 0x1fUL #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_CRT_TRACE 0x20UL @@ -3911,22 +3928,24 @@ struct hwrm_func_backing_store_qcaps_v2_output { __le16 seq_id; __le16 resp_len; __le16 type; - #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_QP 0x0UL - #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SRQ 0x1UL - #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_CQ 0x2UL - #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_VNIC 0x3UL - #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_STAT 0x4UL - #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SP_TQM_RING 0x5UL - #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_FP_TQM_RING 0x6UL - #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_MRAV 0xeUL - #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_TIM 0xfUL - #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_MP_TQM_RING 0x15UL - #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SQ_DB_SHADOW 0x16UL - #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_RQ_DB_SHADOW 0x17UL - #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SRQ_DB_SHADOW 0x18UL - #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_CQ_DB_SHADOW 0x19UL - #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_TBL_SCOPE 0x1cUL - #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_XID_PARTITION 0x1dUL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_QP 0x0UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SRQ 0x1UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_CQ 0x2UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_VNIC 0x3UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_STAT 0x4UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SP_TQM_RING 0x5UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_FP_TQM_RING 0x6UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_MRAV 0xeUL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_TIM 0xfUL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_TX_CK 0x13UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_RX_CK 0x14UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_MP_TQM_RING 0x15UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SQ_DB_SHADOW 0x16UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_RQ_DB_SHADOW 0x17UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SRQ_DB_SHADOW 0x18UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_CQ_DB_SHADOW 0x19UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_TBL_SCOPE 0x1cUL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_XID_PARTITION 0x1dUL #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SRT_TRACE 0x1eUL #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SRT2_TRACE 0x1fUL #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_CRT_TRACE 0x20UL @@ -4202,7 +4221,8 @@ struct hwrm_port_phy_cfg_input { #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEEDS2_100GB_PAM4_112 0x3eaUL #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEEDS2_200GB_PAM4_112 0x7d2UL #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEEDS2_400GB_PAM4_112 0xfa2UL - #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEEDS2_LAST PORT_PHY_CFG_REQ_FORCE_LINK_SPEEDS2_400GB_PAM4_112 + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEEDS2_800GB_PAM4_112 0x1f42UL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEEDS2_LAST PORT_PHY_CFG_REQ_FORCE_LINK_SPEEDS2_800GB_PAM4_112 __le16 auto_link_speeds2_mask; #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEEDS2_MASK_1GB 0x1UL #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEEDS2_MASK_10GB 0x2UL @@ -4217,6 +4237,7 @@ struct hwrm_port_phy_cfg_input { #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEEDS2_MASK_100GB_PAM4_112 0x400UL #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEEDS2_MASK_200GB_PAM4_112 0x800UL #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEEDS2_MASK_400GB_PAM4_112 0x1000UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEEDS2_MASK_800GB_PAM4_112 0x2000UL u8 unused_2[6]; }; @@ -4292,6 +4313,7 @@ struct hwrm_port_phy_qcfg_output { #define PORT_PHY_QCFG_RESP_LINK_SPEED_100GB 0x3e8UL #define PORT_PHY_QCFG_RESP_LINK_SPEED_200GB 0x7d0UL #define PORT_PHY_QCFG_RESP_LINK_SPEED_400GB 0xfa0UL + #define PORT_PHY_QCFG_RESP_LINK_SPEED_800GB 0x1f40UL #define PORT_PHY_QCFG_RESP_LINK_SPEED_10MB 0xffffUL #define PORT_PHY_QCFG_RESP_LINK_SPEED_LAST PORT_PHY_QCFG_RESP_LINK_SPEED_10MB u8 duplex_cfg; @@ -4451,7 +4473,13 @@ struct hwrm_port_phy_qcfg_output { #define PORT_PHY_QCFG_RESP_PHY_TYPE_400G_BASESR4 0x35UL #define PORT_PHY_QCFG_RESP_PHY_TYPE_400G_BASELR4 0x36UL #define PORT_PHY_QCFG_RESP_PHY_TYPE_400G_BASEER4 0x37UL - #define PORT_PHY_QCFG_RESP_PHY_TYPE_LAST PORT_PHY_QCFG_RESP_PHY_TYPE_400G_BASEER4 + #define PORT_PHY_QCFG_RESP_PHY_TYPE_800G_BASECR8 0x38UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_800G_BASESR8 0x39UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_800G_BASELR8 0x3aUL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_800G_BASEER8 0x3bUL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_800G_BASEFR8 0x3cUL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_800G_BASEDR8 0x3dUL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_LAST PORT_PHY_QCFG_RESP_PHY_TYPE_800G_BASEDR8 u8 media_type; #define PORT_PHY_QCFG_RESP_MEDIA_TYPE_UNKNOWN 0x0UL #define PORT_PHY_QCFG_RESP_MEDIA_TYPE_TP 0x1UL @@ -5049,33 +5077,43 @@ struct hwrm_port_qstats_ext_output { u8 valid; }; -/* hwrm_port_lpbk_qstats_input (size:128b/16B) */ +/* hwrm_port_lpbk_qstats_input (size:256b/32B) */ struct hwrm_port_lpbk_qstats_input { __le16 req_type; __le16 cmpl_ring; __le16 seq_id; __le16 target_id; __le64 resp_addr; + __le16 lpbk_stat_size; + u8 flags; + #define PORT_LPBK_QSTATS_REQ_FLAGS_COUNTER_MASK 0x1UL + u8 unused_0[5]; + __le64 lpbk_stat_host_addr; }; -/* hwrm_port_lpbk_qstats_output (size:768b/96B) */ +/* hwrm_port_lpbk_qstats_output (size:128b/16B) */ struct hwrm_port_lpbk_qstats_output { __le16 error_code; __le16 req_type; __le16 seq_id; __le16 resp_len; + __le16 lpbk_stat_size; + u8 unused_0[5]; + u8 valid; +}; + +/* port_lpbk_stats (size:640b/80B) */ +struct port_lpbk_stats { __le64 lpbk_ucast_frames; __le64 lpbk_mcast_frames; __le64 lpbk_bcast_frames; __le64 lpbk_ucast_bytes; __le64 lpbk_mcast_bytes; __le64 lpbk_bcast_bytes; - __le64 tx_stat_discard; - __le64 tx_stat_error; - __le64 rx_stat_discard; - __le64 rx_stat_error; - u8 unused_0[7]; - u8 valid; + __le64 lpbk_tx_discards; + __le64 lpbk_tx_errors; + __le64 lpbk_rx_discards; + __le64 lpbk_rx_errors; }; /* hwrm_port_ecn_qstats_input (size:256b/32B) */ @@ -5140,13 +5178,15 @@ struct hwrm_port_clr_stats_output { u8 valid; }; -/* hwrm_port_lpbk_clr_stats_input (size:128b/16B) */ +/* hwrm_port_lpbk_clr_stats_input (size:192b/24B) */ struct hwrm_port_lpbk_clr_stats_input { __le16 req_type; __le16 cmpl_ring; __le16 seq_id; __le16 target_id; __le64 resp_addr; + __le16 port_id; + u8 unused_0[6]; }; /* hwrm_port_lpbk_clr_stats_output (size:128b/16B) */ @@ -5287,10 +5327,11 @@ struct hwrm_port_phy_qcaps_output { #define PORT_PHY_QCAPS_RESP_SUPPORTED_PAM4_SPEEDS_FORCE_MODE_100G 0x2UL #define PORT_PHY_QCAPS_RESP_SUPPORTED_PAM4_SPEEDS_FORCE_MODE_200G 0x4UL __le16 flags2; - #define PORT_PHY_QCAPS_RESP_FLAGS2_PAUSE_UNSUPPORTED 0x1UL - #define PORT_PHY_QCAPS_RESP_FLAGS2_PFC_UNSUPPORTED 0x2UL - #define PORT_PHY_QCAPS_RESP_FLAGS2_BANK_ADDR_SUPPORTED 0x4UL - #define PORT_PHY_QCAPS_RESP_FLAGS2_SPEEDS2_SUPPORTED 0x8UL + #define PORT_PHY_QCAPS_RESP_FLAGS2_PAUSE_UNSUPPORTED 0x1UL + #define PORT_PHY_QCAPS_RESP_FLAGS2_PFC_UNSUPPORTED 0x2UL + #define PORT_PHY_QCAPS_RESP_FLAGS2_BANK_ADDR_SUPPORTED 0x4UL + #define PORT_PHY_QCAPS_RESP_FLAGS2_SPEEDS2_SUPPORTED 0x8UL + #define PORT_PHY_QCAPS_RESP_FLAGS2_REMOTE_LPBK_UNSUPPORTED 0x10UL u8 internal_port_cnt; u8 unused_0; __le16 supported_speeds2_force_mode; @@ -7443,17 +7484,17 @@ struct hwrm_cfa_l2_filter_cfg_input { __le16 target_id; __le64 resp_addr; __le32 flags; - #define CFA_L2_FILTER_CFG_REQ_FLAGS_PATH 0x1UL - #define CFA_L2_FILTER_CFG_REQ_FLAGS_PATH_TX 0x0UL - #define CFA_L2_FILTER_CFG_REQ_FLAGS_PATH_RX 0x1UL - #define CFA_L2_FILTER_CFG_REQ_FLAGS_PATH_LAST CFA_L2_FILTER_CFG_REQ_FLAGS_PATH_RX - #define CFA_L2_FILTER_CFG_REQ_FLAGS_DROP 0x2UL - #define CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_MASK 0xcUL - #define CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_SFT 2 - #define CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_NO_ROCE_L2 (0x0UL << 2) - #define CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_L2 (0x1UL << 2) - #define CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_ROCE (0x2UL << 2) - #define CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_LAST CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_ROCE + #define CFA_L2_FILTER_CFG_REQ_FLAGS_PATH 0x1UL + #define CFA_L2_FILTER_CFG_REQ_FLAGS_PATH_TX 0x0UL + #define CFA_L2_FILTER_CFG_REQ_FLAGS_PATH_RX 0x1UL + #define CFA_L2_FILTER_CFG_REQ_FLAGS_PATH_LAST CFA_L2_FILTER_CFG_REQ_FLAGS_PATH_RX + #define CFA_L2_FILTER_CFG_REQ_FLAGS_DROP 0x2UL + #define CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_MASK 0xcUL + #define CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_SFT 2 + #define CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_NO_ROCE_L2 (0x0UL << 2) + #define CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_L2 (0x1UL << 2) + #define CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_ROCE (0x2UL << 2) + #define CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_LAST CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_ROCE #define CFA_L2_FILTER_CFG_REQ_FLAGS_REMAP_OP_MASK 0x30UL #define CFA_L2_FILTER_CFG_REQ_FLAGS_REMAP_OP_SFT 4 #define CFA_L2_FILTER_CFG_REQ_FLAGS_REMAP_OP_NO_UPDATE (0x0UL << 4) @@ -8520,17 +8561,17 @@ struct hwrm_tunnel_dst_port_query_input { __le16 target_id; __le64 resp_addr; u8 tunnel_type; - #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_VXLAN 0x1UL - #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_GENEVE 0x5UL - #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_VXLAN_V4 0x9UL - #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_IPGRE_V1 0xaUL - #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_L2_ETYPE 0xbUL - #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_VXLAN_GPE_V6 0xcUL - #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_CUSTOM_GRE 0xdUL - #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_ECPRI 0xeUL - #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_SRV6 0xfUL - #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_VXLAN_GPE 0x10UL - #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_GRE 0x11UL + #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_VXLAN 0x1UL + #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_GENEVE 0x5UL + #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_VXLAN_V4 0x9UL + #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_IPGRE_V1 0xaUL + #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_L2_ETYPE 0xbUL + #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_VXLAN_GPE_V6 0xcUL + #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_CUSTOM_GRE 0xdUL + #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_ECPRI 0xeUL + #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_SRV6 0xfUL + #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_VXLAN_GPE 0x10UL + #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_GRE 0x11UL #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_ULP_DYN_UPAR 0x12UL #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES01 0x13UL #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES02 0x14UL @@ -8576,17 +8617,17 @@ struct hwrm_tunnel_dst_port_alloc_input { __le16 target_id; __le64 resp_addr; u8 tunnel_type; - #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_VXLAN 0x1UL - #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_GENEVE 0x5UL - #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_VXLAN_V4 0x9UL - #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_IPGRE_V1 0xaUL - #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_L2_ETYPE 0xbUL - #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_VXLAN_GPE_V6 0xcUL - #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_CUSTOM_GRE 0xdUL - #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_ECPRI 0xeUL - #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_SRV6 0xfUL - #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_VXLAN_GPE 0x10UL - #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_GRE 0x11UL + #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_VXLAN 0x1UL + #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_GENEVE 0x5UL + #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_VXLAN_V4 0x9UL + #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_IPGRE_V1 0xaUL + #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_L2_ETYPE 0xbUL + #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_VXLAN_GPE_V6 0xcUL + #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_CUSTOM_GRE 0xdUL + #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_ECPRI 0xeUL + #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_SRV6 0xfUL + #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_VXLAN_GPE 0x10UL + #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_GRE 0x11UL #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_ULP_DYN_UPAR 0x12UL #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES01 0x13UL #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES02 0x14UL @@ -8635,17 +8676,17 @@ struct hwrm_tunnel_dst_port_free_input { __le16 target_id; __le64 resp_addr; u8 tunnel_type; - #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN 0x1UL - #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_GENEVE 0x5UL - #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN_V4 0x9UL - #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_IPGRE_V1 0xaUL - #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_L2_ETYPE 0xbUL - #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN_GPE_V6 0xcUL - #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_CUSTOM_GRE 0xdUL - #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_ECPRI 0xeUL - #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_SRV6 0xfUL - #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN_GPE 0x10UL - #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_GRE 0x11UL + #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN 0x1UL + #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_GENEVE 0x5UL + #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN_V4 0x9UL + #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_IPGRE_V1 0xaUL + #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_L2_ETYPE 0xbUL + #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN_GPE_V6 0xcUL + #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_CUSTOM_GRE 0xdUL + #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_ECPRI 0xeUL + #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_SRV6 0xfUL + #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN_GPE 0x10UL + #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_GRE 0x11UL #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_ULP_DYN_UPAR 0x12UL #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES01 0x13UL #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES02 0x14UL @@ -9109,6 +9150,7 @@ struct hwrm_struct_hdr { #define STRUCT_HDR_STRUCT_ID_LLDP_GENERIC 0x424UL #define STRUCT_HDR_STRUCT_ID_LLDP_DEVICE 0x426UL #define STRUCT_HDR_STRUCT_ID_POWER_BKUP 0x427UL + #define STRUCT_HDR_STRUCT_ID_PEER_MMAP 0x429UL #define STRUCT_HDR_STRUCT_ID_AFM_OPAQUE 0x1UL #define STRUCT_HDR_STRUCT_ID_PORT_DESCRIPTION 0xaUL #define STRUCT_HDR_STRUCT_ID_RSS_V2 0x64UL @@ -9758,6 +9800,9 @@ struct hwrm_dbg_coredump_initiate_input { __le16 instance; __le16 unused_0; u8 seg_flags; + #define DBG_COREDUMP_INITIATE_REQ_SEG_FLAGS_LIVE_DATA 0x1UL + #define DBG_COREDUMP_INITIATE_REQ_SEG_FLAGS_CRASH_DATA 0x2UL + #define DBG_COREDUMP_INITIATE_REQ_SEG_FLAGS_COLLECT_CTX_L1_CACHE 0x4UL u8 unused_1[7]; }; @@ -10433,13 +10478,13 @@ struct hwrm_selftest_irq_output { /* dbc_dbc (size:64b/8B) */ struct dbc_dbc { - u32 index; + __le32 index; #define DBC_DBC_INDEX_MASK 0xffffffUL #define DBC_DBC_INDEX_SFT 0 #define DBC_DBC_EPOCH 0x1000000UL #define DBC_DBC_TOGGLE_MASK 0x6000000UL #define DBC_DBC_TOGGLE_SFT 25 - u32 type_path_xid; + __le32 type_path_xid; #define DBC_DBC_XID_MASK 0xfffffUL #define DBC_DBC_XID_SFT 0 #define DBC_DBC_PATH_MASK 0x3000000UL From b7bfcb4c7ce44fd0070ce8bccbc91c56341f05c1 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Tue, 18 Jun 2024 14:53:12 -0700 Subject: [PATCH 507/778] bnxt_en: Set TSO max segs on devices with limits Firmware will now advertise a non-zero TSO max segments if the device has a limit. 0 means no limit. The latest 5760X chip (early revs) has a limit of 2047 that cannot be exceeded. If exceeded, the chip will send out just a small number of segments. Call netif_set_tso_max_segs() if the device has a limit. Fixes: 2012a6abc876 ("bnxt_en: Add 5760X (P7) PCI IDs") Reviewed-by: Ajit Khaparde Reviewed-by: Somnath Kotur Signed-off-by: Michael Chan Link: https://lore.kernel.org/r/20240618215313.29631-3-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 +++ drivers/net/ethernet/broadcom/bnxt/bnxt.h | 1 + 2 files changed, 4 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index c437ca1c0fd3..89d29d6d7517 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -8996,6 +8996,7 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp) memcpy(vf->mac_addr, resp->mac_address, ETH_ALEN); #endif } + bp->tso_max_segs = le16_to_cpu(resp->max_tso_segs); hwrm_func_qcaps_exit: hwrm_req_drop(bp, req); @@ -15363,6 +15364,8 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) dev->priv_flags |= IFF_UNICAST_FLT; netif_set_tso_max_size(dev, GSO_MAX_SIZE); + if (bp->tso_max_segs) + netif_set_tso_max_segs(dev, bp->tso_max_segs); dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | NETDEV_XDP_ACT_RX_SG; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index bbc7edccd5a4..9cf0acfa04e5 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -2318,6 +2318,7 @@ struct bnxt { u8 rss_hash_key_updated:1; u16 max_mtu; + u16 tso_max_segs; u8 max_tc; u8 max_lltc; /* lossless TCs */ struct bnxt_queue_info q_info[BNXT_MAX_QUEUE]; From 1e7962114c10957fe4d10a15eb714578a394e90b Mon Sep 17 00:00:00 2001 From: Pavan Chebbi Date: Tue, 18 Jun 2024 14:53:13 -0700 Subject: [PATCH 508/778] bnxt_en: Restore PTP tx_avail count in case of skb_pad() error The current code only restores PTP tx_avail count when we get DMA mapping errors. Fix it so that the PTP tx_avail count will be restored for both DMA mapping errors and skb_pad() errors. Otherwise PTP TX timestamp will not be available after a PTP packet hits the skb_pad() error. Fixes: 83bb623c968e ("bnxt_en: Transmit and retrieve packet timestamps") Reviewed-by: Andy Gospodarek Signed-off-by: Pavan Chebbi Signed-off-by: Michael Chan Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240618215313.29631-4-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 89d29d6d7517..a6d69a45fa01 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -732,9 +732,6 @@ tx_done: return NETDEV_TX_OK; tx_dma_error: - if (BNXT_TX_PTP_IS_SET(lflags)) - atomic_inc(&bp->ptp_cfg->tx_avail); - last_frag = i; /* start back at beginning and unmap skb */ @@ -756,6 +753,8 @@ tx_dma_error: tx_free: dev_kfree_skb_any(skb); tx_kick_pending: + if (BNXT_TX_PTP_IS_SET(lflags)) + atomic_inc(&bp->ptp_cfg->tx_avail); if (txr->kick_pending) bnxt_txr_db_kick(bp, txr, txr->tx_prod); txr->tx_buf_ring[txr->tx_prod].skb = NULL; From 48dea8f7bb011608fd969749a1980f8311ef45f2 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 19 Jun 2024 08:17:48 +0200 Subject: [PATCH 509/778] selftests: virtio_net: add forgotten config options One may use tools/testing/selftests/drivers/net/virtio_net/config for example for vng build command like this one: $ vng -v -b -f tools/testing/selftests/drivers/net/virtio_net/config In that case, the needed kernel config options are not turned on. Add the missed kernel config options. Reported-by: Jakub Kicinski Closes: https://lore.kernel.org/netdev/20240617072614.75fe79e7@kernel.org/ Reported-by: Matthieu Baerts Closes: https://lore.kernel.org/netdev/1a63f209-b1d4-4809-bc30-295a5cafa296@kernel.org/ Fixes: ccfaed04db5e ("selftests: virtio_net: add initial tests") Signed-off-by: Jiri Pirko Reviewed-by: Xuan Zhuo Acked-by: Michael S. Tsirkin Link: https://lore.kernel.org/r/20240619061748.1869404-1-jiri@resnulli.us Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/virtio_net/config | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/drivers/net/virtio_net/config b/tools/testing/selftests/drivers/net/virtio_net/config index f35de0542b60..bcf7555eaffe 100644 --- a/tools/testing/selftests/drivers/net/virtio_net/config +++ b/tools/testing/selftests/drivers/net/virtio_net/config @@ -1,2 +1,8 @@ -CONFIG_VIRTIO_NET=y +CONFIG_BPF_SYSCALL=y +CONFIG_CGROUP_BPF=y +CONFIG_IPV6=y +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_NET_L3_MASTER_DEV=y +CONFIG_NET_VRF=m CONFIG_VIRTIO_DEBUG=y +CONFIG_VIRTIO_NET=y From fba383985354e83474f95f36d7c65feb75dba19d Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Wed, 19 Jun 2024 15:28:03 +0200 Subject: [PATCH 510/778] net: usb: rtl8150 fix unintiatilzed variables in rtl8150_get_link_ksettings This functions retrieves values by passing a pointer. As the function that retrieves them can fail before touching the pointers, the variables must be initialized. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzbot+5186630949e3c55f0799@syzkaller.appspotmail.com Signed-off-by: Oliver Neukum Link: https://lore.kernel.org/r/20240619132816.11526-1-oneukum@suse.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/rtl8150.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/rtl8150.c b/drivers/net/usb/rtl8150.c index 97afd7335d86..01a3b2417a54 100644 --- a/drivers/net/usb/rtl8150.c +++ b/drivers/net/usb/rtl8150.c @@ -778,7 +778,8 @@ static int rtl8150_get_link_ksettings(struct net_device *netdev, struct ethtool_link_ksettings *ecmd) { rtl8150_t *dev = netdev_priv(netdev); - short lpa, bmcr; + short lpa = 0; + short bmcr = 0; u32 supported; supported = (SUPPORTED_10baseT_Half | From fbd64f902b93fe9658b855b9892ae59ef6ea22b9 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 14 Jun 2024 11:00:49 +0300 Subject: [PATCH 511/778] mmc: sdhci: Do not invert write-protect twice mmc_of_parse() reads device property "wp-inverted" and sets MMC_CAP2_RO_ACTIVE_HIGH if it is true. MMC_CAP2_RO_ACTIVE_HIGH is used to invert a write-protect (AKA read-only) GPIO value. sdhci_get_property() also reads "wp-inverted" and sets SDHCI_QUIRK_INVERTED_WRITE_PROTECT which is used to invert the write-protect value as well but also acts upon a value read out from the SDHCI_PRESENT_STATE register. Many drivers call both mmc_of_parse() and sdhci_get_property(), so that both MMC_CAP2_RO_ACTIVE_HIGH and SDHCI_QUIRK_INVERTED_WRITE_PROTECT will be set if the controller has device property "wp-inverted". Amend the logic in sdhci_check_ro() to allow for that possibility, so that the write-protect value is not inverted twice. Also do not invert the value if it is a negative error value. Note that callers treat an error the same as not-write-protected, so the result is functionally the same in that case. Also do not invert the value if sdhci host operation ->get_ro() is used. None of the users of that callback set SDHCI_QUIRK_INVERTED_WRITE_PROTECT directly or indirectly, but two do call mmc_gpio_get_ro(), so leave it to them to deal with that if they ever set SDHCI_QUIRK_INVERTED_WRITE_PROTECT in the future. Fixes: 6d5cd068ee59 ("mmc: sdhci: use WP GPIO in sdhci_check_ro()") Signed-off-by: Adrian Hunter Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240614080051.4005-2-adrian.hunter@intel.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 746f4cf7ab03..81b81d7bb3d8 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -2515,26 +2515,34 @@ EXPORT_SYMBOL_GPL(sdhci_get_cd_nogpio); static int sdhci_check_ro(struct sdhci_host *host) { + bool allow_invert = false; unsigned long flags; int is_readonly; spin_lock_irqsave(&host->lock, flags); - if (host->flags & SDHCI_DEVICE_DEAD) + if (host->flags & SDHCI_DEVICE_DEAD) { is_readonly = 0; - else if (host->ops->get_ro) + } else if (host->ops->get_ro) { is_readonly = host->ops->get_ro(host); - else if (mmc_can_gpio_ro(host->mmc)) + } else if (mmc_can_gpio_ro(host->mmc)) { is_readonly = mmc_gpio_get_ro(host->mmc); - else + /* Do not invert twice */ + allow_invert = !(host->mmc->caps2 & MMC_CAP2_RO_ACTIVE_HIGH); + } else { is_readonly = !(sdhci_readl(host, SDHCI_PRESENT_STATE) & SDHCI_WRITE_PROTECT); + allow_invert = true; + } spin_unlock_irqrestore(&host->lock, flags); - /* This quirk needs to be replaced by a callback-function later */ - return host->quirks & SDHCI_QUIRK_INVERTED_WRITE_PROTECT ? - !is_readonly : is_readonly; + if (is_readonly >= 0 && + allow_invert && + (host->quirks & SDHCI_QUIRK_INVERTED_WRITE_PROTECT)) + is_readonly = !is_readonly; + + return is_readonly; } #define SAMPLE_COUNT 5 From ab069ce125965a5e282f7b53b86aee76ab32975c Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 14 Jun 2024 11:00:50 +0300 Subject: [PATCH 512/778] mmc: sdhci: Do not lock spinlock around mmc_gpio_get_ro() sdhci_check_ro() can call mmc_gpio_get_ro() while holding the sdhci host->lock spinlock. That would be a problem if the GPIO access done by mmc_gpio_get_ro() needed to sleep. However, host->lock is not needed anyway. The mmc core ensures that host operations do not race with each other, and asynchronous callbacks like the interrupt handler, software timeouts, completion work etc, cannot affect sdhci_check_ro(). So remove the locking. Fixes: 6d5cd068ee59 ("mmc: sdhci: use WP GPIO in sdhci_check_ro()") Signed-off-by: Adrian Hunter Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240614080051.4005-3-adrian.hunter@intel.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 81b81d7bb3d8..112584aa0772 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -2516,11 +2516,8 @@ EXPORT_SYMBOL_GPL(sdhci_get_cd_nogpio); static int sdhci_check_ro(struct sdhci_host *host) { bool allow_invert = false; - unsigned long flags; int is_readonly; - spin_lock_irqsave(&host->lock, flags); - if (host->flags & SDHCI_DEVICE_DEAD) { is_readonly = 0; } else if (host->ops->get_ro) { @@ -2535,8 +2532,6 @@ static int sdhci_check_ro(struct sdhci_host *host) allow_invert = true; } - spin_unlock_irqrestore(&host->lock, flags); - if (is_readonly >= 0 && allow_invert && (host->quirks & SDHCI_QUIRK_INVERTED_WRITE_PROTECT)) From 90f3feb24172185f1832636264943e8b5e289245 Mon Sep 17 00:00:00 2001 From: Elinor Montmasson Date: Thu, 20 Jun 2024 15:25:03 +0200 Subject: [PATCH 513/778] ASoC: fsl-asoc-card: set priv->pdev before using it priv->pdev pointer was set after being used in fsl_asoc_card_audmux_init(). Move this assignment at the start of the probe function, so sub-functions can correctly use pdev through priv. fsl_asoc_card_audmux_init() dereferences priv->pdev to get access to the dev struct, used with dev_err macros. As priv is zero-initialised, there would be a NULL pointer dereference. Note that if priv->dev is dereferenced before assignment but never used, for example if there is no error to be printed, the driver won't crash probably due to compiler optimisations. Fixes: 708b4351f08c ("ASoC: fsl: Add Freescale Generic ASoC Sound Card with ASRC support") Signed-off-by: Elinor Montmasson Link: https://patch.msgid.link/20240620132511.4291-2-elinor.montmasson@savoirfairelinux.com Signed-off-by: Mark Brown --- sound/soc/fsl/fsl-asoc-card.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/soc/fsl/fsl-asoc-card.c b/sound/soc/fsl/fsl-asoc-card.c index 5ddc0c2fe53f..eb67689dcd6e 100644 --- a/sound/soc/fsl/fsl-asoc-card.c +++ b/sound/soc/fsl/fsl-asoc-card.c @@ -559,6 +559,8 @@ static int fsl_asoc_card_probe(struct platform_device *pdev) if (!priv) return -ENOMEM; + priv->pdev = pdev; + cpu_np = of_parse_phandle(np, "audio-cpu", 0); /* Give a chance to old DT binding */ if (!cpu_np) @@ -787,7 +789,6 @@ static int fsl_asoc_card_probe(struct platform_device *pdev) } /* Initialize sound card */ - priv->pdev = pdev; priv->card.dev = &pdev->dev; priv->card.owner = THIS_MODULE; ret = snd_soc_of_parse_card_name(&priv->card, "model"); From 7c7b1be19b228b450c2945ec379d7fc6bfef9852 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Wed, 19 Jun 2024 14:27:02 +0200 Subject: [PATCH 514/778] Input: ads7846 - use spi_device_id table As the driver supports more devices over time the single MODULE_ALIAS is complete and raises several warnings: SPI driver ads7846 has no spi_device_id for ti,tsc2046 SPI driver ads7846 has no spi_device_id for ti,ads7843 SPI driver ads7846 has no spi_device_id for ti,ads7845 SPI driver ads7846 has no spi_device_id for ti,ads7873 Fix this by adding a spi_device_id table and removing the manual MODULE_ALIAS. Signed-off-by: Alexander Stein Link: https://lore.kernel.org/r/20240619122703.2081476-1-alexander.stein@ew.tq-group.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/ads7846.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c index d2bbb436a77d..4d13db13b9e5 100644 --- a/drivers/input/touchscreen/ads7846.c +++ b/drivers/input/touchscreen/ads7846.c @@ -1111,6 +1111,16 @@ static const struct of_device_id ads7846_dt_ids[] = { }; MODULE_DEVICE_TABLE(of, ads7846_dt_ids); +static const struct spi_device_id ads7846_spi_ids[] = { + { "tsc2046", 7846 }, + { "ads7843", 7843 }, + { "ads7845", 7845 }, + { "ads7846", 7846 }, + { "ads7873", 7873 }, + { }, +}; +MODULE_DEVICE_TABLE(spi, ads7846_spi_ids); + static const struct ads7846_platform_data *ads7846_get_props(struct device *dev) { struct ads7846_platform_data *pdata; @@ -1386,10 +1396,10 @@ static struct spi_driver ads7846_driver = { }, .probe = ads7846_probe, .remove = ads7846_remove, + .id_table = ads7846_spi_ids, }; module_spi_driver(ads7846_driver); MODULE_DESCRIPTION("ADS7846 TouchScreen Driver"); MODULE_LICENSE("GPL"); -MODULE_ALIAS("spi:ads7846"); From 5a5696a11f7e0c5ce3185b6234d02996f8267108 Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Thu, 20 Jun 2024 09:42:18 -0700 Subject: [PATCH 515/778] nvme-apple: add missing MODULE_DESCRIPTION() make allmodconfig && make W=1 C=1 reports: WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/nvme/host/nvme-apple.o Add the missing invocation of the MODULE_DESCRIPTION() macro. Reviewed-by: Eric Curtin Reviewed-by: Sven Peter Reviewed-by: Sagi Grimberg Signed-off-by: Jeff Johnson Signed-off-by: Keith Busch --- drivers/nvme/host/apple.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c index dd6ec0865141..0cfa39361d3b 100644 --- a/drivers/nvme/host/apple.c +++ b/drivers/nvme/host/apple.c @@ -1602,4 +1602,5 @@ static struct platform_driver apple_nvme_driver = { module_platform_driver(apple_nvme_driver); MODULE_AUTHOR("Sven Peter "); +MODULE_DESCRIPTION("Apple ANS NVM Express device driver"); MODULE_LICENSE("GPL"); From fd80731e5e9d1402cb2f85022a6abf9b1982ec5f Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 20 Jun 2024 11:37:39 +0200 Subject: [PATCH 516/778] usb: gadget: printer: SS+ support We need to treat super speed plus as super speed, not the default, which is full speed. Signed-off-by: Oliver Neukum Cc: stable Link: https://lore.kernel.org/r/20240620093800.28901-1-oneukum@suse.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_printer.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/gadget/function/f_printer.c b/drivers/usb/gadget/function/f_printer.c index ba7d180cc9e6..4c0b7c2970f1 100644 --- a/drivers/usb/gadget/function/f_printer.c +++ b/drivers/usb/gadget/function/f_printer.c @@ -213,6 +213,7 @@ static inline struct usb_endpoint_descriptor *ep_desc(struct usb_gadget *gadget, struct usb_endpoint_descriptor *ss) { switch (gadget->speed) { + case USB_SPEED_SUPER_PLUS: case USB_SPEED_SUPER: return ss; case USB_SPEED_HIGH: From e587a7633dfee8987a999cf253f7c52a8e09276c Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 20 Jun 2024 13:40:26 +0200 Subject: [PATCH 517/778] usb: gadget: printer: fix races against disable printer_read() and printer_write() guard against the race against disable() by checking the dev->interface flag, which in turn is guarded by a spinlock. These functions, however, drop the lock on multiple occasions. This means that the test has to be redone after reacquiring the lock and before doing IO. Add the tests. This also addresses CVE-2024-25741 Fixes: 7f2ca14d2f9b9 ("usb: gadget: function: printer: Interface is disabled and returns error") Cc: stable Signed-off-by: Oliver Neukum Link: https://lore.kernel.org/r/20240620114039.5767-1-oneukum@suse.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_printer.c | 39 ++++++++++++++++++------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/drivers/usb/gadget/function/f_printer.c b/drivers/usb/gadget/function/f_printer.c index 4c0b7c2970f1..44e20c6c36d3 100644 --- a/drivers/usb/gadget/function/f_printer.c +++ b/drivers/usb/gadget/function/f_printer.c @@ -450,11 +450,8 @@ printer_read(struct file *fd, char __user *buf, size_t len, loff_t *ptr) mutex_lock(&dev->lock_printer_io); spin_lock_irqsave(&dev->lock, flags); - if (dev->interface < 0) { - spin_unlock_irqrestore(&dev->lock, flags); - mutex_unlock(&dev->lock_printer_io); - return -ENODEV; - } + if (dev->interface < 0) + goto out_disabled; /* We will use this flag later to check if a printer reset happened * after we turn interrupts back on. @@ -462,6 +459,9 @@ printer_read(struct file *fd, char __user *buf, size_t len, loff_t *ptr) dev->reset_printer = 0; setup_rx_reqs(dev); + /* this dropped the lock - need to retest */ + if (dev->interface < 0) + goto out_disabled; bytes_copied = 0; current_rx_req = dev->current_rx_req; @@ -495,6 +495,8 @@ printer_read(struct file *fd, char __user *buf, size_t len, loff_t *ptr) wait_event_interruptible(dev->rx_wait, (likely(!list_empty(&dev->rx_buffers)))); spin_lock_irqsave(&dev->lock, flags); + if (dev->interface < 0) + goto out_disabled; } /* We have data to return then copy it to the caller's buffer.*/ @@ -538,6 +540,9 @@ printer_read(struct file *fd, char __user *buf, size_t len, loff_t *ptr) return -EAGAIN; } + if (dev->interface < 0) + goto out_disabled; + /* If we not returning all the data left in this RX request * buffer then adjust the amount of data left in the buffer. * Othewise if we are done with this RX request buffer then @@ -567,6 +572,11 @@ printer_read(struct file *fd, char __user *buf, size_t len, loff_t *ptr) return bytes_copied; else return -EAGAIN; + +out_disabled: + spin_unlock_irqrestore(&dev->lock, flags); + mutex_unlock(&dev->lock_printer_io); + return -ENODEV; } static ssize_t @@ -587,11 +597,8 @@ printer_write(struct file *fd, const char __user *buf, size_t len, loff_t *ptr) mutex_lock(&dev->lock_printer_io); spin_lock_irqsave(&dev->lock, flags); - if (dev->interface < 0) { - spin_unlock_irqrestore(&dev->lock, flags); - mutex_unlock(&dev->lock_printer_io); - return -ENODEV; - } + if (dev->interface < 0) + goto out_disabled; /* Check if a printer reset happens while we have interrupts on */ dev->reset_printer = 0; @@ -614,6 +621,8 @@ printer_write(struct file *fd, const char __user *buf, size_t len, loff_t *ptr) wait_event_interruptible(dev->tx_wait, (likely(!list_empty(&dev->tx_reqs)))); spin_lock_irqsave(&dev->lock, flags); + if (dev->interface < 0) + goto out_disabled; } while (likely(!list_empty(&dev->tx_reqs)) && len) { @@ -663,6 +672,9 @@ printer_write(struct file *fd, const char __user *buf, size_t len, loff_t *ptr) return -EAGAIN; } + if (dev->interface < 0) + goto out_disabled; + list_add(&req->list, &dev->tx_reqs_active); /* here, we unlock, and only unlock, to avoid deadlock. */ @@ -675,6 +687,8 @@ printer_write(struct file *fd, const char __user *buf, size_t len, loff_t *ptr) mutex_unlock(&dev->lock_printer_io); return -EAGAIN; } + if (dev->interface < 0) + goto out_disabled; } spin_unlock_irqrestore(&dev->lock, flags); @@ -686,6 +700,11 @@ printer_write(struct file *fd, const char __user *buf, size_t len, loff_t *ptr) return bytes_copied; else return -EAGAIN; + +out_disabled: + spin_unlock_irqrestore(&dev->lock, flags); + mutex_unlock(&dev->lock_printer_io); + return -ENODEV; } static int From 2eabb655a968b862bc0c31629a09f0fbf3c80d51 Mon Sep 17 00:00:00 2001 From: Nikita Zhandarovich Date: Sun, 9 Jun 2024 06:15:46 -0700 Subject: [PATCH 518/778] usb: atm: cxacru: fix endpoint checking in cxacru_bind() Syzbot is still reporting quite an old issue [1] that occurs due to incomplete checking of present usb endpoints. As such, wrong endpoints types may be used at urb sumbitting stage which in turn triggers a warning in usb_submit_urb(). Fix the issue by verifying that required endpoint types are present for both in and out endpoints, taking into account cmd endpoint type. Unfortunately, this patch has not been tested on real hardware. [1] Syzbot report: usb 1-1: BOGUS urb xfer, pipe 1 != type 3 WARNING: CPU: 0 PID: 8667 at drivers/usb/core/urb.c:502 usb_submit_urb+0xed2/0x18a0 drivers/usb/core/urb.c:502 Modules linked in: CPU: 0 PID: 8667 Comm: kworker/0:4 Not tainted 5.14.0-rc4-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: usb_hub_wq hub_event RIP: 0010:usb_submit_urb+0xed2/0x18a0 drivers/usb/core/urb.c:502 ... Call Trace: cxacru_cm+0x3c0/0x8e0 drivers/usb/atm/cxacru.c:649 cxacru_card_status+0x22/0xd0 drivers/usb/atm/cxacru.c:760 cxacru_bind+0x7ac/0x11a0 drivers/usb/atm/cxacru.c:1209 usbatm_usb_probe+0x321/0x1ae0 drivers/usb/atm/usbatm.c:1055 cxacru_usb_probe+0xdf/0x1e0 drivers/usb/atm/cxacru.c:1363 usb_probe_interface+0x315/0x7f0 drivers/usb/core/driver.c:396 call_driver_probe drivers/base/dd.c:517 [inline] really_probe+0x23c/0xcd0 drivers/base/dd.c:595 __driver_probe_device+0x338/0x4d0 drivers/base/dd.c:747 driver_probe_device+0x4c/0x1a0 drivers/base/dd.c:777 __device_attach_driver+0x20b/0x2f0 drivers/base/dd.c:894 bus_for_each_drv+0x15f/0x1e0 drivers/base/bus.c:427 __device_attach+0x228/0x4a0 drivers/base/dd.c:965 bus_probe_device+0x1e4/0x290 drivers/base/bus.c:487 device_add+0xc2f/0x2180 drivers/base/core.c:3354 usb_set_configuration+0x113a/0x1910 drivers/usb/core/message.c:2170 usb_generic_driver_probe+0xba/0x100 drivers/usb/core/generic.c:238 usb_probe_device+0xd9/0x2c0 drivers/usb/core/driver.c:293 Reported-and-tested-by: syzbot+00c18ee8497dd3be6ade@syzkaller.appspotmail.com Fixes: 902ffc3c707c ("USB: cxacru: Use a bulk/int URB to access the command endpoint") Cc: stable Signed-off-by: Nikita Zhandarovich Link: https://lore.kernel.org/r/20240609131546.3932-1-n.zhandarovich@fintech.ru Signed-off-by: Greg Kroah-Hartman --- drivers/usb/atm/cxacru.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/usb/atm/cxacru.c b/drivers/usb/atm/cxacru.c index 4ce7cba2b48a..8f3b9a0a38e1 100644 --- a/drivers/usb/atm/cxacru.c +++ b/drivers/usb/atm/cxacru.c @@ -1131,6 +1131,7 @@ static int cxacru_bind(struct usbatm_data *usbatm_instance, struct cxacru_data *instance; struct usb_device *usb_dev = interface_to_usbdev(intf); struct usb_host_endpoint *cmd_ep = usb_dev->ep_in[CXACRU_EP_CMD]; + struct usb_endpoint_descriptor *in, *out; int ret; /* instance init */ @@ -1177,6 +1178,19 @@ static int cxacru_bind(struct usbatm_data *usbatm_instance, goto fail; } + if (usb_endpoint_xfer_int(&cmd_ep->desc)) + ret = usb_find_common_endpoints(intf->cur_altsetting, + NULL, NULL, &in, &out); + else + ret = usb_find_common_endpoints(intf->cur_altsetting, + &in, &out, NULL, NULL); + + if (ret) { + usb_err(usbatm_instance, "cxacru_bind: interface has incorrect endpoints\n"); + ret = -ENODEV; + goto fail; + } + if ((cmd_ep->desc.bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) == USB_ENDPOINT_XFER_INT) { usb_fill_int_urb(instance->rcv_urb, From 8e1ec117efdfd4b2f59f57bd0ad16b4edf5b963f Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Wed, 12 Jun 2024 14:46:56 +0200 Subject: [PATCH 519/778] usb: ucsi: stm32: fix command completion handling Sometimes errors are seen, when doing DR swap, like: [ 24.672481] ucsi-stm32g0-i2c 0-0035: UCSI_GET_PDOS failed (-5) [ 24.720188] ucsi-stm32g0-i2c 0-0035: ucsi_handle_connector_change: GET_CONNECTOR_STATUS failed (-5) There may be some race, which lead to read CCI, before the command complete flag is set, hence returning -EIO. Similar fix has been done also in ucsi_acpi [1]. In case of a spurious or otherwise delayed notification it is possible that CCI still reports the previous completion. The UCSI spec is aware of this and provides two completion bits in CCI, one for normal commands and one for acks. As acks and commands alternate the notification handler can determine if the completion bit is from the current command. To fix this add the ACK_PENDING bit for ucsi_stm32g0 and only complete commands if the completion bit matches. [1] https://lore.kernel.org/lkml/20240121204123.275441-3-lk@c--e.de/ Fixes: 72849d4fcee7 ("usb: typec: ucsi: stm32g0: add support for stm32g0 controller") Signed-off-by: Fabrice Gasnier Link: https://lore.kernel.org/stable/20240612124656.2305603-1-fabrice.gasnier%40foss.st.com Cc: stable Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20240612124656.2305603-1-fabrice.gasnier@foss.st.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi_stm32g0.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/usb/typec/ucsi/ucsi_stm32g0.c b/drivers/usb/typec/ucsi/ucsi_stm32g0.c index ac48b7763114..ac69288e8bb0 100644 --- a/drivers/usb/typec/ucsi/ucsi_stm32g0.c +++ b/drivers/usb/typec/ucsi/ucsi_stm32g0.c @@ -65,6 +65,7 @@ struct ucsi_stm32g0 { struct device *dev; unsigned long flags; #define COMMAND_PENDING 1 +#define ACK_PENDING 2 const char *fw_name; struct ucsi *ucsi; bool suspended; @@ -396,9 +397,13 @@ static int ucsi_stm32g0_sync_write(struct ucsi *ucsi, unsigned int offset, const size_t len) { struct ucsi_stm32g0 *g0 = ucsi_get_drvdata(ucsi); + bool ack = UCSI_COMMAND(*(u64 *)val) == UCSI_ACK_CC_CI; int ret; - set_bit(COMMAND_PENDING, &g0->flags); + if (ack) + set_bit(ACK_PENDING, &g0->flags); + else + set_bit(COMMAND_PENDING, &g0->flags); ret = ucsi_stm32g0_async_write(ucsi, offset, val, len); if (ret) @@ -406,9 +411,14 @@ static int ucsi_stm32g0_sync_write(struct ucsi *ucsi, unsigned int offset, const if (!wait_for_completion_timeout(&g0->complete, msecs_to_jiffies(5000))) ret = -ETIMEDOUT; + else + return 0; out_clear_bit: - clear_bit(COMMAND_PENDING, &g0->flags); + if (ack) + clear_bit(ACK_PENDING, &g0->flags); + else + clear_bit(COMMAND_PENDING, &g0->flags); return ret; } @@ -429,8 +439,9 @@ static irqreturn_t ucsi_stm32g0_irq_handler(int irq, void *data) if (UCSI_CCI_CONNECTOR(cci)) ucsi_connector_change(g0->ucsi, UCSI_CCI_CONNECTOR(cci)); - if (test_bit(COMMAND_PENDING, &g0->flags) && - cci & (UCSI_CCI_ACK_COMPLETE | UCSI_CCI_COMMAND_COMPLETE)) + if (cci & UCSI_CCI_ACK_COMPLETE && test_and_clear_bit(ACK_PENDING, &g0->flags)) + complete(&g0->complete); + if (cci & UCSI_CCI_COMMAND_COMPLETE && test_and_clear_bit(COMMAND_PENDING, &g0->flags)) complete(&g0->complete); return IRQ_HANDLED; From 9e3caa9dd51b23e232f095a98336a84f42e4a7f2 Mon Sep 17 00:00:00 2001 From: Diogo Ivo Date: Wed, 12 Jun 2024 14:13:10 +0100 Subject: [PATCH 520/778] usb: typec: ucsi_acpi: Add LG Gram quirk Some LG Gram laptops report a bogus connector change event after a GET_PDOS command for the partner's source PDOs, which disappears from the CCI after acknowledging the command. However, the subsequent GET_CONNECTOR_STATUS in ucsi_handle_connector_change() still reports this bogus change in bits 5 and 6, leading to the UCSI core re-checking the partner's source PDOs and thus to an infinite loop. Fix this by adding a quirk that signals when a potentially buggy GET_PDOS command is used, checks the status change report and clears it if it is a bogus event before sending it to the UCSI core. Signed-off-by: Diogo Ivo Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20240612-gram_quirk-v1-1-52b0ff0e1546@tecnico.ulisboa.pt Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi_acpi.c | 61 ++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/drivers/usb/typec/ucsi/ucsi_acpi.c b/drivers/usb/typec/ucsi/ucsi_acpi.c index 8d112c3edae5..adf32ca0f761 100644 --- a/drivers/usb/typec/ucsi/ucsi_acpi.c +++ b/drivers/usb/typec/ucsi/ucsi_acpi.c @@ -25,6 +25,7 @@ struct ucsi_acpi { unsigned long flags; #define UCSI_ACPI_COMMAND_PENDING 1 #define UCSI_ACPI_ACK_PENDING 2 +#define UCSI_ACPI_CHECK_BOGUS_EVENT 3 guid_t guid; u64 cmd; }; @@ -128,6 +129,58 @@ static const struct ucsi_operations ucsi_zenbook_ops = { .async_write = ucsi_acpi_async_write }; +static int ucsi_gram_read(struct ucsi *ucsi, unsigned int offset, + void *val, size_t val_len) +{ + u16 bogus_change = UCSI_CONSTAT_POWER_LEVEL_CHANGE | + UCSI_CONSTAT_PDOS_CHANGE; + struct ucsi_acpi *ua = ucsi_get_drvdata(ucsi); + struct ucsi_connector_status *status; + int ret; + + ret = ucsi_acpi_read(ucsi, offset, val, val_len); + if (ret < 0) + return ret; + + if (UCSI_COMMAND(ua->cmd) == UCSI_GET_CONNECTOR_STATUS && + test_bit(UCSI_ACPI_CHECK_BOGUS_EVENT, &ua->flags) && + offset == UCSI_MESSAGE_IN) { + status = (struct ucsi_connector_status *)val; + + /* Clear the bogus change */ + if (status->change == bogus_change) + status->change = 0; + + clear_bit(UCSI_ACPI_CHECK_BOGUS_EVENT, &ua->flags); + } + + return ret; +} + +static int ucsi_gram_sync_write(struct ucsi *ucsi, unsigned int offset, + const void *val, size_t val_len) +{ + struct ucsi_acpi *ua = ucsi_get_drvdata(ucsi); + int ret; + + ret = ucsi_acpi_sync_write(ucsi, offset, val, val_len); + if (ret < 0) + return ret; + + if (UCSI_COMMAND(ua->cmd) == UCSI_GET_PDOS && + ua->cmd & UCSI_GET_PDOS_PARTNER_PDO(1) && + ua->cmd & UCSI_GET_PDOS_SRC_PDOS) + set_bit(UCSI_ACPI_CHECK_BOGUS_EVENT, &ua->flags); + + return ret; +} + +static const struct ucsi_operations ucsi_gram_ops = { + .read = ucsi_gram_read, + .sync_write = ucsi_gram_sync_write, + .async_write = ucsi_acpi_async_write +}; + static const struct dmi_system_id ucsi_acpi_quirks[] = { { .matches = { @@ -136,6 +189,14 @@ static const struct dmi_system_id ucsi_acpi_quirks[] = { }, .driver_data = (void *)&ucsi_zenbook_ops, }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LG Electronics"), + DMI_MATCH(DMI_PRODUCT_FAMILY, "LG gram PC"), + DMI_MATCH(DMI_PRODUCT_NAME, "90Q"), + }, + .driver_data = (void *)&ucsi_gram_ops, + }, { } }; From de644a4a86be04ed8a43ef8267d0f7d021941c5e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 17 Jun 2024 12:31:30 +0300 Subject: [PATCH 521/778] usb: musb: da8xx: fix a resource leak in probe() Call usb_phy_generic_unregister() if of_platform_populate() fails. Fixes: d6299b6efbf6 ("usb: musb: Add support of CPPI 4.1 DMA controller to DA8xx") Cc: stable Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/69af1b1d-d3f4-492b-bcea-359ca5949f30@moroto.mountain Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/da8xx.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/usb/musb/da8xx.c b/drivers/usb/musb/da8xx.c index 8abf3a567e30..108d9a593a80 100644 --- a/drivers/usb/musb/da8xx.c +++ b/drivers/usb/musb/da8xx.c @@ -556,7 +556,7 @@ static int da8xx_probe(struct platform_device *pdev) ret = of_platform_populate(pdev->dev.of_node, NULL, da8xx_auxdata_lookup, &pdev->dev); if (ret) - return ret; + goto err_unregister_phy; pinfo = da8xx_dev_info; pinfo.parent = &pdev->dev; @@ -571,9 +571,13 @@ static int da8xx_probe(struct platform_device *pdev) ret = PTR_ERR_OR_ZERO(glue->musb); if (ret) { dev_err(&pdev->dev, "failed to register musb device: %d\n", ret); - usb_phy_generic_unregister(glue->usb_phy); + goto err_unregister_phy; } + return 0; + +err_unregister_phy: + usb_phy_generic_unregister(glue->usb_phy); return ret; } From c68942624e254a4e8a65afcd3c17ed95acda5489 Mon Sep 17 00:00:00 2001 From: Javier Carrasco Date: Thu, 13 Jun 2024 14:14:48 +0200 Subject: [PATCH 522/778] usb: typec: ucsi: glink: fix child node release in probe function The device_for_each_child_node() macro requires explicit calls to fwnode_handle_put() in all early exits of the loop if the child node is not required outside. Otherwise, the child node's refcount is not decremented and the resource is not released. The current implementation of pmic_glink_ucsi_probe() makes use of the device_for_each_child_node(), but does not release the child node on early returns. Add the missing calls to fwnode_handle_put(). Cc: stable@vger.kernel.org Fixes: c6165ed2f425 ("usb: ucsi: glink: use the connector orientation GPIO to provide switch events") Signed-off-by: Javier Carrasco Reviewed-by: Dmitry Baryshkov Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20240613-ucsi-glink-release-node-v1-1-f7629a56f70a@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi_glink.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/usb/typec/ucsi/ucsi_glink.c b/drivers/usb/typec/ucsi/ucsi_glink.c index 985a880e86da..2fa973afe4e6 100644 --- a/drivers/usb/typec/ucsi/ucsi_glink.c +++ b/drivers/usb/typec/ucsi/ucsi_glink.c @@ -372,6 +372,7 @@ static int pmic_glink_ucsi_probe(struct auxiliary_device *adev, ret = fwnode_property_read_u32(fwnode, "reg", &port); if (ret < 0) { dev_err(dev, "missing reg property of %pOFn\n", fwnode); + fwnode_handle_put(fwnode); return ret; } @@ -386,9 +387,11 @@ static int pmic_glink_ucsi_probe(struct auxiliary_device *adev, if (!desc) continue; - if (IS_ERR(desc)) + if (IS_ERR(desc)) { + fwnode_handle_put(fwnode); return dev_err_probe(dev, PTR_ERR(desc), "unable to acquire orientation gpio\n"); + } ucsi->port_orientation[port] = desc; } From 7838de15bb700c2898a7d741db9b1f3cbc86c136 Mon Sep 17 00:00:00 2001 From: Meng Li Date: Tue, 18 Jun 2024 11:19:18 +0800 Subject: [PATCH 523/778] usb: dwc3: core: remove lock of otg mode during gadget suspend/resume to avoid deadlock When config CONFIG_USB_DWC3_DUAL_ROLE is selected, and trigger system to enter suspend status with below command: echo mem > /sys/power/state There will be a deadlock issue occurring. Detailed invoking path as below: dwc3_suspend_common() spin_lock_irqsave(&dwc->lock, flags); <-- 1st dwc3_gadget_suspend(dwc); dwc3_gadget_soft_disconnect(dwc); spin_lock_irqsave(&dwc->lock, flags); <-- 2nd This issue is exposed by commit c7ebd8149ee5 ("usb: dwc3: gadget: Fix NULL pointer dereference in dwc3_gadget_suspend") that removes the code of checking whether dwc->gadget_driver is NULL or not. It causes the following code is executed and deadlock occurs when trying to get the spinlock. In fact, the root cause is the commit 5265397f9442("usb: dwc3: Remove DWC3 locking during gadget suspend/resume") that forgot to remove the lock of otg mode. So, remove the redundant lock of otg mode during gadget suspend/resume. Fixes: 5265397f9442 ("usb: dwc3: Remove DWC3 locking during gadget suspend/resume") Cc: Xu Yang Cc: stable@vger.kernel.org Signed-off-by: Meng Li Acked-by: Thinh Nguyen Link: https://lore.kernel.org/r/20240618031918.2585799-1-Meng.Li@windriver.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/core.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 7ee61a89520b..9d47c3aa5777 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -2250,7 +2250,6 @@ assert_reset: static int dwc3_suspend_common(struct dwc3 *dwc, pm_message_t msg) { - unsigned long flags; u32 reg; int i; @@ -2293,9 +2292,7 @@ static int dwc3_suspend_common(struct dwc3 *dwc, pm_message_t msg) break; if (dwc->current_otg_role == DWC3_OTG_ROLE_DEVICE) { - spin_lock_irqsave(&dwc->lock, flags); dwc3_gadget_suspend(dwc); - spin_unlock_irqrestore(&dwc->lock, flags); synchronize_irq(dwc->irq_gadget); } @@ -2312,7 +2309,6 @@ static int dwc3_suspend_common(struct dwc3 *dwc, pm_message_t msg) static int dwc3_resume_common(struct dwc3 *dwc, pm_message_t msg) { - unsigned long flags; int ret; u32 reg; int i; @@ -2366,9 +2362,7 @@ static int dwc3_resume_common(struct dwc3 *dwc, pm_message_t msg) if (dwc->current_otg_role == DWC3_OTG_ROLE_HOST) { dwc3_otg_host_init(dwc); } else if (dwc->current_otg_role == DWC3_OTG_ROLE_DEVICE) { - spin_lock_irqsave(&dwc->lock, flags); dwc3_gadget_resume(dwc); - spin_unlock_irqrestore(&dwc->lock, flags); } break; From dba7567c2fbbf10a4de2471cdb0e16e5572dc007 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Thu, 13 Jun 2024 12:20:47 +0800 Subject: [PATCH 524/778] usb: gadget: aspeed_udc: fix device address configuration In the aspeed UDC setup, we configure the UDC hardware with the assigned USB device address. However, we have an off-by-one in the bitmask, so we're only setting the lower 6 bits of the address (USB addresses being 7 bits, and the hardware bitmask being bits 0:6). This means that device enumeration fails if the assigned address is greater than 64: [ 344.607255] usb 1-1: new high-speed USB device number 63 using ehci-platform [ 344.808459] usb 1-1: New USB device found, idVendor=cc00, idProduct=cc00, bcdDevice= 6.10 [ 344.817684] usb 1-1: New USB device strings: Mfr=1, Product=2, SerialNumber=3 [ 344.825671] usb 1-1: Product: Test device [ 344.831075] usb 1-1: Manufacturer: Test vendor [ 344.836335] usb 1-1: SerialNumber: 00 [ 349.917181] usb 1-1: USB disconnect, device number 63 [ 352.036775] usb 1-1: new high-speed USB device number 64 using ehci-platform [ 352.249432] usb 1-1: device descriptor read/all, error -71 [ 352.696740] usb 1-1: new high-speed USB device number 65 using ehci-platform [ 352.909431] usb 1-1: device descriptor read/all, error -71 Use the correct mask of 0x7f (rather than 0x3f), and generate this through the GENMASK macro, so we have numbers that correspond exactly to the hardware register definition. Fixes: 055276c13205 ("usb: gadget: add Aspeed ast2600 udc driver") Cc: stable@vger.kernel.org Reviewed-by: Neal Liu Reviewed-by: Andrew Jeffery Signed-off-by: Jeremy Kerr Link: https://lore.kernel.org/r/20240613-aspeed-udc-v2-1-29501ce9cb7a@codeconstruct.com.au Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/aspeed_udc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/udc/aspeed_udc.c b/drivers/usb/gadget/udc/aspeed_udc.c index 3916c8e2ba01..821a6ab5da56 100644 --- a/drivers/usb/gadget/udc/aspeed_udc.c +++ b/drivers/usb/gadget/udc/aspeed_udc.c @@ -66,8 +66,8 @@ #define USB_UPSTREAM_EN BIT(0) /* Main config reg */ -#define UDC_CFG_SET_ADDR(x) ((x) & 0x3f) -#define UDC_CFG_ADDR_MASK (0x3f) +#define UDC_CFG_SET_ADDR(x) ((x) & UDC_CFG_ADDR_MASK) +#define UDC_CFG_ADDR_MASK GENMASK(6, 0) /* Interrupt ctrl & status reg */ #define UDC_IRQ_EP_POOL_NAK BIT(17) From f3ced000a2df53f4b12849e121769045a81a3b22 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 10 Jun 2024 18:48:45 -0700 Subject: [PATCH 525/778] KVM: x86: Always sync PIR to IRR prior to scanning I/O APIC routes Sync pending posted interrupts to the IRR prior to re-scanning I/O APIC routes, irrespective of whether the I/O APIC is emulated by userspace or by KVM. If a level-triggered interrupt routed through the I/O APIC is pending or in-service for a vCPU, KVM needs to intercept EOIs on said vCPU even if the vCPU isn't the destination for the new routing, e.g. if servicing an interrupt using the old routing races with I/O APIC reconfiguration. Commit fceb3a36c29a ("KVM: x86: ioapic: Fix level-triggered EOI and userspace I/OAPIC reconfigure race") fixed the common cases, but kvm_apic_pending_eoi() only checks if an interrupt is in the local APIC's IRR or ISR, i.e. misses the uncommon case where an interrupt is pending in the PIR. Failure to intercept EOI can manifest as guest hangs with Windows 11 if the guest uses the RTC as its timekeeping source, e.g. if the VMM doesn't expose a more modern form of time to the guest. Cc: stable@vger.kernel.org Cc: Adamos Ttofari Cc: Raghavendra Rao Ananta Reviewed-by: Jim Mattson Signed-off-by: Sean Christopherson Message-ID: <20240611014845.82795-1-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8c9e4281d978..0763a0f72a06 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10718,13 +10718,12 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) bitmap_zero(vcpu->arch.ioapic_handled_vectors, 256); + static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu); + if (irqchip_split(vcpu->kvm)) kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors); - else { - static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu); - if (ioapic_in_kernel(vcpu->kvm)) - kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors); - } + else if (ioapic_in_kernel(vcpu->kvm)) + kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors); if (is_guest_mode(vcpu)) vcpu->arch.load_eoi_exitmap_pending = true; From b018589013d6db43fdc894c635d6590e0a7e3285 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 10 Jun 2024 09:34:27 -0700 Subject: [PATCH 526/778] MAINTAINERS: Drop Wanpeng Li as a Reviewer for KVM Paravirt support Drop Wanpeng as a KVM PARAVIRT reviewer as his @tencent.com email is bouncing, and according to lore[*], the last activity from his @gmail.com address was almost two years ago. [*] https://lore.kernel.org/all/CANRm+Cwj29M9HU3=JRUOaKDR+iDKgr0eNMWQi0iLkR5THON-bg@mail.gmail.com Cc: Wanpeng Li Cc: Like Xu Signed-off-by: Sean Christopherson Message-ID: <20240610163427.3359426-1-seanjc@google.com> Signed-off-by: Paolo Bonzini --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 8754ac2c259d..5d62fe9495e6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12383,7 +12383,6 @@ F: drivers/video/backlight/ktz8866.c KVM PARAVIRT (KVM/paravirt) M: Paolo Bonzini -R: Wanpeng Li R: Vitaly Kuznetsov L: kvm@vger.kernel.org S: Supported From f474092c6fe1e2154a35308a1a1aef3212c3ecf2 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 10 Jun 2024 13:31:21 +0300 Subject: [PATCH 527/778] kvm: do not account temporary allocations to kmem Some allocations done by KVM are temporary, they are created as result of program actions, but can't exists for arbitrary long times. They should have been GFP_TEMPORARY (rip!). OTOH, kvm-nx-lpage-recovery and kvm-pit kernel threads exist for as long as VM exists but their task_struct memory is not accounted. This is story for another day. Signed-off-by: Alexey Dobriyan Message-ID: Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 14841acb8b95..8e422c2c9450 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -4427,7 +4427,7 @@ static long kvm_vcpu_ioctl(struct file *filp, struct kvm_regs *kvm_regs; r = -ENOMEM; - kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL_ACCOUNT); + kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL); if (!kvm_regs) goto out; r = kvm_arch_vcpu_ioctl_get_regs(vcpu, kvm_regs); @@ -4454,8 +4454,7 @@ out_free1: break; } case KVM_GET_SREGS: { - kvm_sregs = kzalloc(sizeof(struct kvm_sregs), - GFP_KERNEL_ACCOUNT); + kvm_sregs = kzalloc(sizeof(struct kvm_sregs), GFP_KERNEL); r = -ENOMEM; if (!kvm_sregs) goto out; @@ -4547,7 +4546,7 @@ out_free1: break; } case KVM_GET_FPU: { - fpu = kzalloc(sizeof(struct kvm_fpu), GFP_KERNEL_ACCOUNT); + fpu = kzalloc(sizeof(struct kvm_fpu), GFP_KERNEL); r = -ENOMEM; if (!fpu) goto out; @@ -6210,7 +6209,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm) active = kvm_active_vms; mutex_unlock(&kvm_lock); - env = kzalloc(sizeof(*env), GFP_KERNEL_ACCOUNT); + env = kzalloc(sizeof(*env), GFP_KERNEL); if (!env) return; @@ -6226,7 +6225,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm) add_uevent_var(env, "PID=%d", kvm->userspace_pid); if (!IS_ERR(kvm->debugfs_dentry)) { - char *tmp, *p = kmalloc(PATH_MAX, GFP_KERNEL_ACCOUNT); + char *tmp, *p = kmalloc(PATH_MAX, GFP_KERNEL); if (p) { tmp = dentry_path_raw(kvm->debugfs_dentry, p, PATH_MAX); From ce5291e56081730ec7d87bc9aa41f3de73ff3256 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 20 Jun 2024 20:30:00 +0100 Subject: [PATCH 528/778] cifs: Defer read completion Defer read completion from the I/O thread to the cifsiod thread so as not to slow down the I/O thread. This restores the behaviour of v6.9. Fixes: 3ee1a1fc3981 ("cifs: Cut over to using netfslib") Signed-off-by: David Howells cc: Paulo Alcantara cc: Jeff Layton cc: linux-cifs@vger.kernel.org cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Steve French --- fs/smb/client/smb2pdu.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 38a06e8a0f90..e213cecd5094 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -4484,6 +4484,16 @@ smb2_new_read_req(void **buf, unsigned int *total_len, return rc; } +static void smb2_readv_worker(struct work_struct *work) +{ + struct cifs_io_subrequest *rdata = + container_of(work, struct cifs_io_subrequest, subreq.work); + + netfs_subreq_terminated(&rdata->subreq, + (rdata->result == 0 || rdata->result == -EAGAIN) ? + rdata->got_bytes : rdata->result, true); +} + static void smb2_readv_callback(struct mid_q_entry *mid) { @@ -4578,9 +4588,8 @@ smb2_readv_callback(struct mid_q_entry *mid) rdata->result = 0; } rdata->credits.value = 0; - netfs_subreq_terminated(&rdata->subreq, - (rdata->result == 0 || rdata->result == -EAGAIN) ? - rdata->got_bytes : rdata->result, true); + INIT_WORK(&rdata->subreq.work, smb2_readv_worker); + queue_work(cifsiod_wq, &rdata->subreq.work); release_mid(mid); add_credits(server, &credits, 0); } From 969b3010cbfcf58de65399dff8252c41b5e79292 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 20 Jun 2024 18:31:28 +0100 Subject: [PATCH 529/778] cifs: Only pick a channel once per read request In cifs, only pick a channel when setting up a read request rather than doing so individually for every subrequest and instead use that channel for all. This mirrors what the code in v6.9 does. Signed-off-by: David Howells cc: Steve French cc: Paulo Alcantara cc: Jeff Layton cc: linux-cifs@vger.kernel.org cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Steve French --- fs/smb/client/cifsglob.h | 1 + fs/smb/client/file.c | 14 +++----------- 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 73482734a8d8..0978997ddfa6 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -1494,6 +1494,7 @@ struct cifs_aio_ctx { struct cifs_io_request { struct netfs_io_request rreq; struct cifsFileInfo *cfile; + struct TCP_Server_Info *server; }; /* asynchronous read support */ diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index 1e269e0bc75b..4dbd80168a2b 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -134,17 +134,15 @@ fail: static bool cifs_clamp_length(struct netfs_io_subrequest *subreq) { struct netfs_io_request *rreq = subreq->rreq; - struct TCP_Server_Info *server; struct cifs_io_subrequest *rdata = container_of(subreq, struct cifs_io_subrequest, subreq); struct cifs_io_request *req = container_of(subreq->rreq, struct cifs_io_request, rreq); + struct TCP_Server_Info *server = req->server; struct cifs_sb_info *cifs_sb = CIFS_SB(rreq->inode->i_sb); size_t rsize = 0; int rc; rdata->xid = get_xid(); rdata->have_xid = true; - - server = cifs_pick_channel(tlink_tcon(req->cfile->tlink)->ses); rdata->server = server; if (cifs_sb->ctx->rsize == 0) @@ -203,14 +201,7 @@ static void cifs_req_issue_read(struct netfs_io_subrequest *subreq) __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); rdata->pid = pid; - rc = adjust_credits(rdata->server, &rdata->credits, rdata->subreq.len); - if (!rc) { - if (rdata->req->cfile->invalidHandle) - rc = -EAGAIN; - else - rc = rdata->server->ops->async_readv(rdata); - } - + rc = rdata->server->ops->async_readv(rdata); out: if (rc) netfs_subreq_terminated(subreq, rc, false); @@ -250,6 +241,7 @@ static int cifs_init_request(struct netfs_io_request *rreq, struct file *file) open_file = file->private_data; rreq->netfs_priv = file->private_data; req->cfile = cifsFileInfo_get(open_file); + req->server = cifs_pick_channel(tlink_tcon(req->cfile->tlink)->ses); } else if (rreq->origin != NETFS_WRITEBACK) { WARN_ON_ONCE(1); return -EIO; From 3f59138580bf8006fa99641b5803d0f683709f10 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 20 Jun 2024 18:31:29 +0100 Subject: [PATCH 530/778] cifs: Move the 'pid' from the subreq to the req Move the reference pid from the cifs_io_subrequest struct to the cifs_io_request struct as it's the same for all subreqs of a particular request. Signed-off-by: David Howells cc: Paulo Alcantara cc: Jeff Layton cc: linux-cifs@vger.kernel.org cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Steve French --- fs/smb/client/cifsglob.h | 2 +- fs/smb/client/cifssmb.c | 8 ++++---- fs/smb/client/file.c | 11 +++-------- fs/smb/client/smb2pdu.c | 4 ++-- 4 files changed, 10 insertions(+), 15 deletions(-) diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 0978997ddfa6..557b68e99d0a 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -1495,6 +1495,7 @@ struct cifs_io_request { struct netfs_io_request rreq; struct cifsFileInfo *cfile; struct TCP_Server_Info *server; + pid_t pid; }; /* asynchronous read support */ @@ -1505,7 +1506,6 @@ struct cifs_io_subrequest { struct cifs_io_request *req; }; ssize_t got_bytes; - pid_t pid; unsigned int xid; int result; bool have_xid; diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c index 25e9ab947c17..595c4b673707 100644 --- a/fs/smb/client/cifssmb.c +++ b/fs/smb/client/cifssmb.c @@ -1345,8 +1345,8 @@ cifs_async_readv(struct cifs_io_subrequest *rdata) if (rc) return rc; - smb->hdr.Pid = cpu_to_le16((__u16)rdata->pid); - smb->hdr.PidHigh = cpu_to_le16((__u16)(rdata->pid >> 16)); + smb->hdr.Pid = cpu_to_le16((__u16)rdata->req->pid); + smb->hdr.PidHigh = cpu_to_le16((__u16)(rdata->req->pid >> 16)); smb->AndXCommand = 0xFF; /* none */ smb->Fid = rdata->req->cfile->fid.netfid; @@ -1689,8 +1689,8 @@ cifs_async_writev(struct cifs_io_subrequest *wdata) if (rc) goto async_writev_out; - smb->hdr.Pid = cpu_to_le16((__u16)wdata->pid); - smb->hdr.PidHigh = cpu_to_le16((__u16)(wdata->pid >> 16)); + smb->hdr.Pid = cpu_to_le16((__u16)wdata->req->pid); + smb->hdr.PidHigh = cpu_to_le16((__u16)(wdata->req->pid >> 16)); smb->AndXCommand = 0xFF; /* none */ smb->Fid = wdata->req->cfile->fid.netfid; diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index 4dbd80168a2b..f1f2573bb18d 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -177,15 +177,8 @@ static void cifs_req_issue_read(struct netfs_io_subrequest *subreq) struct netfs_io_request *rreq = subreq->rreq; struct cifs_io_subrequest *rdata = container_of(subreq, struct cifs_io_subrequest, subreq); struct cifs_io_request *req = container_of(subreq->rreq, struct cifs_io_request, rreq); - struct cifs_sb_info *cifs_sb = CIFS_SB(rreq->inode->i_sb); - pid_t pid; int rc = 0; - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) - pid = req->cfile->pid; - else - pid = current->tgid; // Ummm... This may be a workqueue - cifs_dbg(FYI, "%s: op=%08x[%x] mapping=%p len=%zu/%zu\n", __func__, rreq->debug_id, subreq->debug_index, rreq->mapping, subreq->transferred, subreq->len); @@ -199,7 +192,6 @@ static void cifs_req_issue_read(struct netfs_io_subrequest *subreq) } __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); - rdata->pid = pid; rc = rdata->server->ops->async_readv(rdata); out: @@ -236,12 +228,15 @@ static int cifs_init_request(struct netfs_io_request *rreq, struct file *file) rreq->rsize = cifs_sb->ctx->rsize; rreq->wsize = cifs_sb->ctx->wsize; + req->pid = current->tgid; // Ummm... This may be a workqueue if (file) { open_file = file->private_data; rreq->netfs_priv = file->private_data; req->cfile = cifsFileInfo_get(open_file); req->server = cifs_pick_channel(tlink_tcon(req->cfile->tlink)->ses); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) + req->pid = req->cfile->pid; } else if (rreq->origin != NETFS_WRITEBACK) { WARN_ON_ONCE(1); return -EIO; diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index e213cecd5094..2ae2dbb6202b 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -4621,7 +4621,7 @@ smb2_async_readv(struct cifs_io_subrequest *rdata) io_parms.length = rdata->subreq.len; io_parms.persistent_fid = rdata->req->cfile->fid.persistent_fid; io_parms.volatile_fid = rdata->req->cfile->fid.volatile_fid; - io_parms.pid = rdata->pid; + io_parms.pid = rdata->req->pid; rc = smb2_new_read_req( (void **) &buf, &total_len, &io_parms, rdata, 0, 0); @@ -4873,7 +4873,7 @@ smb2_async_writev(struct cifs_io_subrequest *wdata) .length = wdata->subreq.len, .persistent_fid = wdata->req->cfile->fid.persistent_fid, .volatile_fid = wdata->req->cfile->fid.volatile_fid, - .pid = wdata->pid, + .pid = wdata->req->pid, }; io_parms = &_io_parms; From e7c3696d4692e8046d25f6e63f983e934e12f2c5 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Wed, 15 May 2024 10:55:28 +0100 Subject: [PATCH 531/778] firmware: psci: Fix return value from psci_system_suspend() Currently we return the value from invoke_psci_fn() directly as return value from psci_system_suspend(). It is wrong to send the PSCI interface return value directly. psci_to_linux_errno() provide the mapping from PSCI return value to the one that can be returned to the callers within the kernel. Use psci_to_linux_errno() to convert and return the correct value from psci_system_suspend(). Fixes: faf7ec4a92c0 ("drivers: firmware: psci: add system suspend support") Acked-by: Mark Rutland Signed-off-by: Sudeep Holla Link: https://lore.kernel.org/r/20240515095528.1949992-1-sudeep.holla@arm.com Signed-off-by: Arnd Bergmann --- drivers/firmware/psci/psci.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/psci/psci.c b/drivers/firmware/psci/psci.c index d9629ff87861..2328ca58bba6 100644 --- a/drivers/firmware/psci/psci.c +++ b/drivers/firmware/psci/psci.c @@ -497,10 +497,12 @@ int psci_cpu_suspend_enter(u32 state) static int psci_system_suspend(unsigned long unused) { + int err; phys_addr_t pa_cpu_resume = __pa_symbol(cpu_resume); - return invoke_psci_fn(PSCI_FN_NATIVE(1_0, SYSTEM_SUSPEND), + err = invoke_psci_fn(PSCI_FN_NATIVE(1_0, SYSTEM_SUSPEND), pa_cpu_resume, 0, 0); + return psci_to_linux_errno(err); } static int psci_system_suspend_enter(suspend_state_t state) From c31745d2c508796a0996c88bf2e55f552d513f65 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 11 Jun 2024 04:22:18 -0400 Subject: [PATCH 532/778] virt: guest_memfd: fix reference leak on hwpoisoned page If kvm_gmem_get_pfn() detects an hwpoisoned page, it returns -EHWPOISON but it does not put back the reference that kvm_gmem_get_folio() had grabbed. Add the forgotten folio_put(). Fixes: a7800aa80ea4 ("KVM: Add KVM_CREATE_GUEST_MEMFD ioctl() for guest-specific backing memory") Cc: stable@vger.kernel.org Reviewed-by: Liam Merwick Reviewed-by: Isaku Yamahata Signed-off-by: Paolo Bonzini --- virt/kvm/guest_memfd.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c index 0f4e0cf4f158..747fe251e445 100644 --- a/virt/kvm/guest_memfd.c +++ b/virt/kvm/guest_memfd.c @@ -510,8 +510,10 @@ int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, } if (folio_test_hwpoison(folio)) { + folio_unlock(folio); + folio_put(folio); r = -EHWPOISON; - goto out_unlock; + goto out_fput; } page = folio_file_page(folio, index); @@ -522,7 +524,6 @@ int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, r = 0; -out_unlock: folio_unlock(folio); out_fput: fput(file); From 676f819c3e982db3695a371f336a05086585ea4f Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Thu, 13 Jun 2024 20:28:03 +0800 Subject: [PATCH 533/778] KVM: Discard zero mask with function kvm_dirty_ring_reset Function kvm_reset_dirty_gfn may be called with parameters cur_slot / cur_offset / mask are all zero, it does not represent real dirty page. It is not necessary to clear dirty page in this condition. Also return value of macro __fls() is undefined if mask is zero which is called in funciton kvm_reset_dirty_gfn(). Here just return. Signed-off-by: Bibo Mao Message-ID: <20240613122803.1031511-1-maobibo@loongson.cn> [Move the conditional inside kvm_reset_dirty_gfn; suggested by Sean Christopherson. - Paolo] Signed-off-by: Paolo Bonzini --- virt/kvm/dirty_ring.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/virt/kvm/dirty_ring.c b/virt/kvm/dirty_ring.c index 86d267db87bb..7bc74969a819 100644 --- a/virt/kvm/dirty_ring.c +++ b/virt/kvm/dirty_ring.c @@ -55,6 +55,9 @@ static void kvm_reset_dirty_gfn(struct kvm *kvm, u32 slot, u64 offset, u64 mask) struct kvm_memory_slot *memslot; int as_id, id; + if (!mask) + return; + as_id = slot >> 16; id = (u16)slot; From d4e001ffeccfc128c715057e866f301ac9b95728 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 20 Jun 2024 13:34:49 +0200 Subject: [PATCH 534/778] dt-bindings: i2c: atmel,at91sam: correct path to i2c-controller schema The referenced i2c-controller.yaml schema is provided by dtschema package (outside of Linux kernel), so use full path to reference it. Cc: stable@vger.kernel.org Fixes: 7ea75dd386be ("dt-bindings: i2c: convert i2c-at91 to json-schema") Signed-off-by: Krzysztof Kozlowski Reviewed-by: Conor Dooley Signed-off-by: Andi Shyti --- Documentation/devicetree/bindings/i2c/atmel,at91sam-i2c.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/i2c/atmel,at91sam-i2c.yaml b/Documentation/devicetree/bindings/i2c/atmel,at91sam-i2c.yaml index b1c13bab2472..b2d19cfb87ad 100644 --- a/Documentation/devicetree/bindings/i2c/atmel,at91sam-i2c.yaml +++ b/Documentation/devicetree/bindings/i2c/atmel,at91sam-i2c.yaml @@ -77,7 +77,7 @@ required: - clocks allOf: - - $ref: i2c-controller.yaml + - $ref: /schemas/i2c/i2c-controller.yaml# - if: properties: compatible: From 5c8cfd592bb7632200b4edac8f2c7ec892ed9d81 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 20 Jun 2024 13:34:50 +0200 Subject: [PATCH 535/778] dt-bindings: i2c: google,cros-ec-i2c-tunnel: correct path to i2c-controller schema The referenced i2c-controller.yaml schema is provided by dtschema package (outside of Linux kernel), so use full path to reference it. Cc: stable@vger.kernel.org Fixes: 1acd4577a66f ("dt-bindings: i2c: convert i2c-cros-ec-tunnel to json-schema") Signed-off-by: Krzysztof Kozlowski Reviewed-by: Conor Dooley Signed-off-by: Andi Shyti --- .../devicetree/bindings/i2c/google,cros-ec-i2c-tunnel.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/i2c/google,cros-ec-i2c-tunnel.yaml b/Documentation/devicetree/bindings/i2c/google,cros-ec-i2c-tunnel.yaml index ab151c9db219..580003cdfff5 100644 --- a/Documentation/devicetree/bindings/i2c/google,cros-ec-i2c-tunnel.yaml +++ b/Documentation/devicetree/bindings/i2c/google,cros-ec-i2c-tunnel.yaml @@ -21,7 +21,7 @@ description: | google,cros-ec-spi or google,cros-ec-i2c. allOf: - - $ref: i2c-controller.yaml# + - $ref: /schemas/i2c/i2c-controller.yaml# properties: compatible: From 5a72477273066b5b357801ab2d315ef14949d402 Mon Sep 17 00:00:00 2001 From: Grygorii Tertychnyi Date: Mon, 20 May 2024 17:39:32 +0200 Subject: [PATCH 536/778] i2c: ocores: set IACK bit after core is enabled Setting IACK bit when core is disabled does not clear the "Interrupt Flag" bit in the status register, and the interrupt remains pending. Sometimes it causes failure for the very first message transfer, that is usually a device probe. Hence, set IACK bit after core is enabled to clear pending interrupt. Fixes: 18f98b1e3147 ("[PATCH] i2c: New bus driver for the OpenCores I2C controller") Signed-off-by: Grygorii Tertychnyi Acked-by: Peter Korsgaard Cc: stable@vger.kernel.org Signed-off-by: Andi Shyti --- drivers/i2c/busses/i2c-ocores.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-ocores.c b/drivers/i2c/busses/i2c-ocores.c index 56a4dabf5a38..4ad670a80a63 100644 --- a/drivers/i2c/busses/i2c-ocores.c +++ b/drivers/i2c/busses/i2c-ocores.c @@ -431,8 +431,8 @@ static int ocores_init(struct device *dev, struct ocores_i2c *i2c) oc_setreg(i2c, OCI2C_PREHIGH, prescale >> 8); /* Init the device */ - oc_setreg(i2c, OCI2C_CMD, OCI2C_CMD_IACK); oc_setreg(i2c, OCI2C_CONTROL, ctrl | OCI2C_CTRL_EN); + oc_setreg(i2c, OCI2C_CMD, OCI2C_CMD_IACK); return 0; } From 120dd4118e58dbda2ddb1dcf55f3c56cdfe8cee0 Mon Sep 17 00:00:00 2001 From: Xi Ruoyao Date: Fri, 21 Jun 2024 10:18:40 +0800 Subject: [PATCH 537/778] LoongArch: Only allow OBJTOOL & ORC unwinder if toolchain supports -mthin-add-sub GAS <= 2.41 does not support generating R_LARCH_{32,64}_PCREL for "label - ." and it generates R_LARCH_{ADD,SUB}{32,64} pairs instead. Objtool cannot handle R_LARCH_{ADD,SUB}{32,64} pair in __jump_table (static key implementation) and etc. so it will produce some warnings. This is causing the kernel CI systems to complain everywhere. For GAS we can check if -mthin-add-sub option is available to know if R_LARCH_{32,64}_PCREL are supported. For Clang, we require Clang >= 18 and Clang >= 17 already supports R_LARCH_{32,64}_PCREL. But unfortunately Clang has some other issues, so we disable objtool for Clang at present. Note that __jump_table here is not generated by the compiler, so -fno-jump-table is completely irrelevant for this issue. Fixes: cb8a2ef0848c ("LoongArch: Add ORC stack unwinder support") Closes: https://lore.kernel.org/loongarch/Zl5m1ZlVmGKitAof@yujie-X299/ Closes: https://lore.kernel.org/loongarch/ZlY1gDDPi_mNrwJ1@slm.duckdns.org/ Closes: https://lore.kernel.org/loongarch/1717478006.038663-1-hengqi@linux.alibaba.com/ Link: https://sourceware.org/git/?p=binutils-gdb.git;a=commitdiff;h=816029e06768 Link: https://github.com/llvm/llvm-project/commit/42cb3c6346fc Signed-off-by: Xi Ruoyao Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 5 ++++- arch/loongarch/Kconfig.debug | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index e38139c576ee..ddc042895d01 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -143,7 +143,7 @@ config LOONGARCH select HAVE_LIVEPATCH select HAVE_MOD_ARCH_SPECIFIC select HAVE_NMI - select HAVE_OBJTOOL if AS_HAS_EXPLICIT_RELOCS + select HAVE_OBJTOOL if AS_HAS_EXPLICIT_RELOCS && AS_HAS_THIN_ADD_SUB && !CC_IS_CLANG select HAVE_PCI select HAVE_PERF_EVENTS select HAVE_PERF_REGS @@ -261,6 +261,9 @@ config AS_HAS_EXPLICIT_RELOCS config AS_HAS_FCSR_CLASS def_bool $(as-instr,movfcsr2gr \$t0$(comma)\$fcsr0) +config AS_HAS_THIN_ADD_SUB + def_bool $(cc-option,-Wa$(comma)-mthin-add-sub) + config AS_HAS_LSX_EXTENSION def_bool $(as-instr,vld \$vr0$(comma)\$a0$(comma)0) diff --git a/arch/loongarch/Kconfig.debug b/arch/loongarch/Kconfig.debug index 98d60630c3d4..8b2ce5b5d43e 100644 --- a/arch/loongarch/Kconfig.debug +++ b/arch/loongarch/Kconfig.debug @@ -28,6 +28,7 @@ config UNWINDER_PROLOGUE config UNWINDER_ORC bool "ORC unwinder" + depends on HAVE_OBJTOOL select OBJTOOL help This option enables the ORC (Oops Rewind Capability) unwinder for From f63a47b34b140ed1ca39d7e4bd4f1cdc617fc316 Mon Sep 17 00:00:00 2001 From: Hui Li Date: Fri, 21 Jun 2024 10:18:40 +0800 Subject: [PATCH 538/778] LoongArch: Fix watchpoint setting error In the current code, when debugging the following code using gdb, "invalid argument ..." message will be displayed. lihui@bogon:~$ cat test.c #include int a = 0; int main() { a = 1; return 0; } lihui@bogon:~$ gcc -g test.c -o test lihui@bogon:~$ gdb test ... (gdb) watch a Hardware watchpoint 1: a (gdb) r ... Invalid argument setting hardware debug registers There are mainly two types of issues. 1. Some incorrect judgment condition existed in user_watch_state argument parsing, causing -EINVAL to be returned. When setting up a watchpoint, gdb uses the ptrace interface, ptrace(PTRACE_SETREGSET, tid, NT_LOONGARCH_HW_WATCH, (void *) &iov)). Register values in user_watch_state as follows: addr[0] = 0x0, mask[0] = 0x0, ctrl[0] = 0x0 addr[1] = 0x0, mask[1] = 0x0, ctrl[1] = 0x0 addr[2] = 0x0, mask[2] = 0x0, ctrl[2] = 0x0 addr[3] = 0x0, mask[3] = 0x0, ctrl[3] = 0x0 addr[4] = 0x0, mask[4] = 0x0, ctrl[4] = 0x0 addr[5] = 0x0, mask[5] = 0x0, ctrl[5] = 0x0 addr[6] = 0x0, mask[6] = 0x0, ctrl[6] = 0x0 addr[7] = 0x12000803c, mask[7] = 0x0, ctrl[7] = 0x610 In arch_bp_generic_fields(), return -EINVAL when ctrl.len is LOONGARCH_BREAKPOINT_LEN_8(0b00). So delete the incorrect judgment here. In ptrace_hbp_fill_attr_ctrl(), when note_type is NT_LOONGARCH_HW_WATCH and ctrl[0] == 0x0, if ((type & HW_BREAKPOINT_RW) != type) will return -EINVAL. Here ctrl.type should be set based on note_type, and unnecessary judgments can be removed. 2. The watchpoint argument was not set correctly due to unnecessary offset and alignment_mask. Modify ptrace_hbp_fill_attr_ctrl() and hw_breakpoint_arch_parse(), which ensure the watchpont argument is set correctly. All changes according to the LoongArch Reference Manual: https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.html#control-and-status-registers-related-to-watchpoints Cc: stable@vger.kernel.org Signed-off-by: Hui Li Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/hw_breakpoint.h | 2 +- arch/loongarch/kernel/hw_breakpoint.c | 19 ++++--------- arch/loongarch/kernel/ptrace.c | 32 ++++++++++------------ 3 files changed, 21 insertions(+), 32 deletions(-) diff --git a/arch/loongarch/include/asm/hw_breakpoint.h b/arch/loongarch/include/asm/hw_breakpoint.h index 21447fb1efc7..a8ce580f4fc6 100644 --- a/arch/loongarch/include/asm/hw_breakpoint.h +++ b/arch/loongarch/include/asm/hw_breakpoint.h @@ -101,7 +101,7 @@ struct perf_event; struct perf_event_attr; extern int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl, - int *gen_len, int *gen_type, int *offset); + int *gen_len, int *gen_type); extern int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw); extern int hw_breakpoint_arch_parse(struct perf_event *bp, const struct perf_event_attr *attr, diff --git a/arch/loongarch/kernel/hw_breakpoint.c b/arch/loongarch/kernel/hw_breakpoint.c index fc55c4de2a11..950b2b8a82ee 100644 --- a/arch/loongarch/kernel/hw_breakpoint.c +++ b/arch/loongarch/kernel/hw_breakpoint.c @@ -283,7 +283,7 @@ int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw) * to generic breakpoint descriptions. */ int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl, - int *gen_len, int *gen_type, int *offset) + int *gen_len, int *gen_type) { /* Type */ switch (ctrl.type) { @@ -303,11 +303,6 @@ int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl, return -EINVAL; } - if (!ctrl.len) - return -EINVAL; - - *offset = __ffs(ctrl.len); - /* Len */ switch (ctrl.len) { case LOONGARCH_BREAKPOINT_LEN_1: @@ -386,21 +381,17 @@ int hw_breakpoint_arch_parse(struct perf_event *bp, struct arch_hw_breakpoint *hw) { int ret; - u64 alignment_mask, offset; + u64 alignment_mask; /* Build the arch_hw_breakpoint. */ ret = arch_build_bp_info(bp, attr, hw); if (ret) return ret; - if (hw->ctrl.type != LOONGARCH_BREAKPOINT_EXECUTE) - alignment_mask = 0x7; - else + if (hw->ctrl.type == LOONGARCH_BREAKPOINT_EXECUTE) { alignment_mask = 0x3; - offset = hw->address & alignment_mask; - - hw->address &= ~alignment_mask; - hw->ctrl.len <<= offset; + hw->address &= ~alignment_mask; + } return 0; } diff --git a/arch/loongarch/kernel/ptrace.c b/arch/loongarch/kernel/ptrace.c index c114c5ef1332..16b756c6049b 100644 --- a/arch/loongarch/kernel/ptrace.c +++ b/arch/loongarch/kernel/ptrace.c @@ -494,28 +494,14 @@ static int ptrace_hbp_fill_attr_ctrl(unsigned int note_type, struct arch_hw_breakpoint_ctrl ctrl, struct perf_event_attr *attr) { - int err, len, type, offset; + int err, len, type; - err = arch_bp_generic_fields(ctrl, &len, &type, &offset); + err = arch_bp_generic_fields(ctrl, &len, &type); if (err) return err; - switch (note_type) { - case NT_LOONGARCH_HW_BREAK: - if ((type & HW_BREAKPOINT_X) != type) - return -EINVAL; - break; - case NT_LOONGARCH_HW_WATCH: - if ((type & HW_BREAKPOINT_RW) != type) - return -EINVAL; - break; - default: - return -EINVAL; - } - attr->bp_len = len; attr->bp_type = type; - attr->bp_addr += offset; return 0; } @@ -609,7 +595,19 @@ static int ptrace_hbp_set_ctrl(unsigned int note_type, return PTR_ERR(bp); attr = bp->attr; - decode_ctrl_reg(uctrl, &ctrl); + + switch (note_type) { + case NT_LOONGARCH_HW_BREAK: + ctrl.type = LOONGARCH_BREAKPOINT_EXECUTE; + ctrl.len = LOONGARCH_BREAKPOINT_LEN_4; + break; + case NT_LOONGARCH_HW_WATCH: + decode_ctrl_reg(uctrl, &ctrl); + break; + default: + return -EINVAL; + } + err = ptrace_hbp_fill_attr_ctrl(note_type, ctrl, &attr); if (err) return err; From c8e57ab0995c5b443d3c81c8a36b588776dcd0c3 Mon Sep 17 00:00:00 2001 From: Hui Li Date: Fri, 21 Jun 2024 10:18:40 +0800 Subject: [PATCH 539/778] LoongArch: Trigger user-space watchpoints correctly In the current code, gdb can set the watchpoint successfully through ptrace interface, but watchpoint will not be triggered. When debugging the following code using gdb. lihui@bogon:~$ cat test.c #include int a = 0; int main() { a = 1; printf("a = %d\n", a); return 0; } lihui@bogon:~$ gcc -g test.c -o test lihui@bogon:~$ gdb test ... (gdb) watch a ... (gdb) r ... a = 1 [Inferior 1 (process 4650) exited normally] No watchpoints were triggered, the root causes are: 1. Kernel uses perf_event and hw_breakpoint framework to control watchpoint, but the perf_event corresponding to watchpoint is not enabled. So it needs to be enabled according to MWPnCFG3 or FWPnCFG3 PLV bit field in ptrace_hbp_set_ctrl(), and privilege is set according to the monitored addr in hw_breakpoint_control(). Furthermore, add a judgment in ptrace_hbp_set_addr() to ensure kernel-space addr cannot be monitored in user mode. 2. The global enable control for all watchpoints is the WE bit of CSR.CRMD, and hardware sets the value to 0 when an exception is triggered. When the ERTN instruction is executed to return, the hardware restores the value of the PWE field of CSR.PRMD here. So, before a thread containing watchpoints be scheduled, the PWE field of CSR.PRMD needs to be set to 1. Add this modification in hw_breakpoint_control(). All changes according to the LoongArch Reference Manual: https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.html#control-and-status-registers-related-to-watchpoints https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.html#basic-control-and-status-registers With this patch: lihui@bogon:~$ gdb test ... (gdb) watch a Hardware watchpoint 1: a (gdb) r ... Hardware watchpoint 1: a Old value = 0 New value = 1 main () at test.c:6 6 printf("a = %d\n", a); (gdb) c Continuing. a = 1 [Inferior 1 (process 775) exited normally] Cc: stable@vger.kernel.org Signed-off-by: Hui Li Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/hw_breakpoint.h | 2 ++ arch/loongarch/kernel/hw_breakpoint.c | 20 +++++++++++++++++--- arch/loongarch/kernel/ptrace.c | 15 ++++++++++++--- 3 files changed, 31 insertions(+), 6 deletions(-) diff --git a/arch/loongarch/include/asm/hw_breakpoint.h b/arch/loongarch/include/asm/hw_breakpoint.h index a8ce580f4fc6..d78330916bd1 100644 --- a/arch/loongarch/include/asm/hw_breakpoint.h +++ b/arch/loongarch/include/asm/hw_breakpoint.h @@ -75,6 +75,8 @@ do { \ #define CSR_MWPC_NUM 0x3f #define CTRL_PLV_ENABLE 0x1e +#define CTRL_PLV0_ENABLE 0x02 +#define CTRL_PLV3_ENABLE 0x10 #define MWPnCFG3_LoadEn 8 #define MWPnCFG3_StoreEn 9 diff --git a/arch/loongarch/kernel/hw_breakpoint.c b/arch/loongarch/kernel/hw_breakpoint.c index 950b2b8a82ee..e882df1f72db 100644 --- a/arch/loongarch/kernel/hw_breakpoint.c +++ b/arch/loongarch/kernel/hw_breakpoint.c @@ -174,11 +174,21 @@ void flush_ptrace_hw_breakpoint(struct task_struct *tsk) static int hw_breakpoint_control(struct perf_event *bp, enum hw_breakpoint_ops ops) { - u32 ctrl; + u32 ctrl, privilege; int i, max_slots, enable; + struct pt_regs *regs; struct perf_event **slots; struct arch_hw_breakpoint *info = counter_arch_bp(bp); + if (arch_check_bp_in_kernelspace(info)) + privilege = CTRL_PLV0_ENABLE; + else + privilege = CTRL_PLV3_ENABLE; + + /* Whether bp belongs to a task. */ + if (bp->hw.target) + regs = task_pt_regs(bp->hw.target); + if (info->ctrl.type == LOONGARCH_BREAKPOINT_EXECUTE) { /* Breakpoint */ slots = this_cpu_ptr(bp_on_reg); @@ -204,13 +214,15 @@ static int hw_breakpoint_control(struct perf_event *bp, write_wb_reg(CSR_CFG_ASID, i, 0, 0); write_wb_reg(CSR_CFG_ASID, i, 1, 0); if (info->ctrl.type == LOONGARCH_BREAKPOINT_EXECUTE) { - write_wb_reg(CSR_CFG_CTRL, i, 0, CTRL_PLV_ENABLE); + write_wb_reg(CSR_CFG_CTRL, i, 0, privilege); } else { ctrl = encode_ctrl_reg(info->ctrl); - write_wb_reg(CSR_CFG_CTRL, i, 1, ctrl | CTRL_PLV_ENABLE); + write_wb_reg(CSR_CFG_CTRL, i, 1, ctrl | privilege); } enable = csr_read64(LOONGARCH_CSR_CRMD); csr_write64(CSR_CRMD_WE | enable, LOONGARCH_CSR_CRMD); + if (bp->hw.target) + regs->csr_prmd |= CSR_PRMD_PWE; break; case HW_BREAKPOINT_UNINSTALL: /* Reset the FWPnCFG/MWPnCFG 1~4 register. */ @@ -222,6 +234,8 @@ static int hw_breakpoint_control(struct perf_event *bp, write_wb_reg(CSR_CFG_CTRL, i, 1, 0); write_wb_reg(CSR_CFG_ASID, i, 0, 0); write_wb_reg(CSR_CFG_ASID, i, 1, 0); + if (bp->hw.target) + regs->csr_prmd &= ~CSR_PRMD_PWE; break; } diff --git a/arch/loongarch/kernel/ptrace.c b/arch/loongarch/kernel/ptrace.c index 16b756c6049b..200109de1971 100644 --- a/arch/loongarch/kernel/ptrace.c +++ b/arch/loongarch/kernel/ptrace.c @@ -608,9 +608,14 @@ static int ptrace_hbp_set_ctrl(unsigned int note_type, return -EINVAL; } - err = ptrace_hbp_fill_attr_ctrl(note_type, ctrl, &attr); - if (err) - return err; + if (uctrl & CTRL_PLV_ENABLE) { + err = ptrace_hbp_fill_attr_ctrl(note_type, ctrl, &attr); + if (err) + return err; + attr.disabled = 0; + } else { + attr.disabled = 1; + } return modify_user_hw_breakpoint(bp, &attr); } @@ -641,6 +646,10 @@ static int ptrace_hbp_set_addr(unsigned int note_type, struct perf_event *bp; struct perf_event_attr attr; + /* Kernel-space address cannot be monitored by user-space */ + if ((unsigned long)addr >= XKPRANGE) + return -EINVAL; + bp = ptrace_hbp_get_initialised_bp(note_type, tsk, idx); if (IS_ERR(bp)) return PTR_ERR(bp); From 3eb2a8b23598e90fda43abb0f23cb267bd5018ba Mon Sep 17 00:00:00 2001 From: Hui Li Date: Fri, 21 Jun 2024 10:18:40 +0800 Subject: [PATCH 540/778] LoongArch: Fix multiple hardware watchpoint issues In the current code, if multiple hardware breakpoints/watchpoints in a user-space thread, some of them will not be triggered. When debugging the following code using gdb. lihui@bogon:~$ cat test.c #include int a = 0; int main() { printf("start test\n"); a = 1; printf("a = %d\n", a); printf("end test\n"); return 0; } lihui@bogon:~$ gcc -g test.c -o test lihui@bogon:~$ gdb test ... (gdb) start ... Temporary breakpoint 1, main () at test.c:5 5 printf("start test\n"); (gdb) watch a Hardware watchpoint 2: a (gdb) hbreak 8 Hardware assisted breakpoint 3 at 0x1200006ec: file test.c, line 8. (gdb) c Continuing. start test a = 1 Breakpoint 3, main () at test.c:8 8 printf("end test\n"); ... The first hardware watchpoint is not triggered, the root causes are: 1. In hw_breakpoint_control(), The FWPnCFG1.2.4/MWPnCFG1.2.4 register settings are not distinguished. They should be set based on hardware watchpoint functions (fetch or load/store operations). 2. In breakpoint_handler() and watchpoint_handler(), it doesn't identify which watchpoint is triggered. So, all watchpoint-related perf_event callbacks are called and siginfo is sent to the user space. This will cause user-space unable to determine which watchpoint is triggered. The kernel need to identity which watchpoint is triggered via MWPS/ FWPS registers, and then call the corresponding perf event callbacks to report siginfo to the user-space. Modify the relevant code to solve above issues. All changes according to the LoongArch Reference Manual: https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.html#control-and-status-registers-related-to-watchpoints With this patch: lihui@bogon:~$ gdb test ... (gdb) start ... Temporary breakpoint 1, main () at test.c:5 5 printf("start test\n"); (gdb) watch a Hardware watchpoint 2: a (gdb) hbreak 8 Hardware assisted breakpoint 3 at 0x1200006ec: file test.c, line 8. (gdb) c Continuing. start test Hardware watchpoint 2: a Old value = 0 New value = 1 main () at test.c:7 7 printf("a = %d\n", a); (gdb) c Continuing. a = 1 Breakpoint 3, main () at test.c:8 8 printf("end test\n"); (gdb) c Continuing. end test [Inferior 1 (process 778) exited normally] Cc: stable@vger.kernel.org Signed-off-by: Hui Li Signed-off-by: Huacai Chen --- arch/loongarch/kernel/hw_breakpoint.c | 57 ++++++++++++++++----------- 1 file changed, 33 insertions(+), 24 deletions(-) diff --git a/arch/loongarch/kernel/hw_breakpoint.c b/arch/loongarch/kernel/hw_breakpoint.c index e882df1f72db..621ad7634df7 100644 --- a/arch/loongarch/kernel/hw_breakpoint.c +++ b/arch/loongarch/kernel/hw_breakpoint.c @@ -207,15 +207,15 @@ static int hw_breakpoint_control(struct perf_event *bp, switch (ops) { case HW_BREAKPOINT_INSTALL: /* Set the FWPnCFG/MWPnCFG 1~4 register. */ - write_wb_reg(CSR_CFG_ADDR, i, 0, info->address); - write_wb_reg(CSR_CFG_ADDR, i, 1, info->address); - write_wb_reg(CSR_CFG_MASK, i, 0, info->mask); - write_wb_reg(CSR_CFG_MASK, i, 1, info->mask); - write_wb_reg(CSR_CFG_ASID, i, 0, 0); - write_wb_reg(CSR_CFG_ASID, i, 1, 0); if (info->ctrl.type == LOONGARCH_BREAKPOINT_EXECUTE) { + write_wb_reg(CSR_CFG_ADDR, i, 0, info->address); + write_wb_reg(CSR_CFG_MASK, i, 0, info->mask); + write_wb_reg(CSR_CFG_ASID, i, 0, 0); write_wb_reg(CSR_CFG_CTRL, i, 0, privilege); } else { + write_wb_reg(CSR_CFG_ADDR, i, 1, info->address); + write_wb_reg(CSR_CFG_MASK, i, 1, info->mask); + write_wb_reg(CSR_CFG_ASID, i, 1, 0); ctrl = encode_ctrl_reg(info->ctrl); write_wb_reg(CSR_CFG_CTRL, i, 1, ctrl | privilege); } @@ -226,14 +226,17 @@ static int hw_breakpoint_control(struct perf_event *bp, break; case HW_BREAKPOINT_UNINSTALL: /* Reset the FWPnCFG/MWPnCFG 1~4 register. */ - write_wb_reg(CSR_CFG_ADDR, i, 0, 0); - write_wb_reg(CSR_CFG_ADDR, i, 1, 0); - write_wb_reg(CSR_CFG_MASK, i, 0, 0); - write_wb_reg(CSR_CFG_MASK, i, 1, 0); - write_wb_reg(CSR_CFG_CTRL, i, 0, 0); - write_wb_reg(CSR_CFG_CTRL, i, 1, 0); - write_wb_reg(CSR_CFG_ASID, i, 0, 0); - write_wb_reg(CSR_CFG_ASID, i, 1, 0); + if (info->ctrl.type == LOONGARCH_BREAKPOINT_EXECUTE) { + write_wb_reg(CSR_CFG_ADDR, i, 0, 0); + write_wb_reg(CSR_CFG_MASK, i, 0, 0); + write_wb_reg(CSR_CFG_CTRL, i, 0, 0); + write_wb_reg(CSR_CFG_ASID, i, 0, 0); + } else { + write_wb_reg(CSR_CFG_ADDR, i, 1, 0); + write_wb_reg(CSR_CFG_MASK, i, 1, 0); + write_wb_reg(CSR_CFG_CTRL, i, 1, 0); + write_wb_reg(CSR_CFG_ASID, i, 1, 0); + } if (bp->hw.target) regs->csr_prmd &= ~CSR_PRMD_PWE; break; @@ -476,12 +479,15 @@ void breakpoint_handler(struct pt_regs *regs) slots = this_cpu_ptr(bp_on_reg); for (i = 0; i < boot_cpu_data.watch_ireg_count; ++i) { - bp = slots[i]; - if (bp == NULL) - continue; - perf_bp_event(bp, regs); + if ((csr_read32(LOONGARCH_CSR_FWPS) & (0x1 << i))) { + bp = slots[i]; + if (bp == NULL) + continue; + perf_bp_event(bp, regs); + csr_write32(0x1 << i, LOONGARCH_CSR_FWPS); + update_bp_registers(regs, 0, 0); + } } - update_bp_registers(regs, 0, 0); } NOKPROBE_SYMBOL(breakpoint_handler); @@ -493,12 +499,15 @@ void watchpoint_handler(struct pt_regs *regs) slots = this_cpu_ptr(wp_on_reg); for (i = 0; i < boot_cpu_data.watch_dreg_count; ++i) { - wp = slots[i]; - if (wp == NULL) - continue; - perf_bp_event(wp, regs); + if ((csr_read32(LOONGARCH_CSR_MWPS) & (0x1 << i))) { + wp = slots[i]; + if (wp == NULL) + continue; + perf_bp_event(wp, regs); + csr_write32(0x1 << i, LOONGARCH_CSR_MWPS); + update_bp_registers(regs, 0, 1); + } } - update_bp_registers(regs, 0, 1); } NOKPROBE_SYMBOL(watchpoint_handler); From d0a1c07739e1b7f74683fe061545669156d102f2 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Fri, 21 Jun 2024 10:18:40 +0800 Subject: [PATCH 541/778] LoongArch: KVM: Remove an unneeded semicolon Remove an unneeded semicolon to avoid build warnings: ./arch/loongarch/kvm/exit.c:764:2-3: Unneeded semicolon Cc: stable@vger.kernel.org Reported-by: Abaci Robot Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=9343 Signed-off-by: Yang Li Signed-off-by: Huacai Chen --- arch/loongarch/kvm/exit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c index c86e099af5ca..a68573e091c0 100644 --- a/arch/loongarch/kvm/exit.c +++ b/arch/loongarch/kvm/exit.c @@ -761,7 +761,7 @@ static void kvm_handle_service(struct kvm_vcpu *vcpu) default: ret = KVM_HCALL_INVALID_CODE; break; - }; + } kvm_write_reg(vcpu, LOONGARCH_GPR_A0, ret); } From ad53f5f54f351e967128edbc431f0f26427172cf Mon Sep 17 00:00:00 2001 From: Tristram Ha Date: Tue, 18 Jun 2024 17:16:42 -0700 Subject: [PATCH 542/778] net: dsa: microchip: fix initial port flush problem The very first flush in any port will flush all learned addresses in all ports. This can be observed by unplugging the cable from one port while additional ports are connected and dumping the fdb entries. This problem is caused by the initially wrong value programmed to the REG_SW_LUE_CTRL_1 register. Setting SW_FLUSH_STP_TABLE and SW_FLUSH_MSTP_TABLE bits does not have an immediate effect. It is when ksz9477_flush_dyn_mac_table() is called then the SW_FLUSH_STP_TABLE bit takes effect and flushes all learned entries. After that call both bits are reset and so the next port flush will not cause such problem again. Fixes: b987e98e50ab ("dsa: add DSA switch driver for Microchip KSZ9477") Signed-off-by: Tristram Ha Link: https://patch.msgid.link/1718756202-2731-1-git-send-email-Tristram.Ha@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/microchip/ksz9477.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c index f8ad7833f5d9..2231128eef8b 100644 --- a/drivers/net/dsa/microchip/ksz9477.c +++ b/drivers/net/dsa/microchip/ksz9477.c @@ -355,10 +355,8 @@ int ksz9477_reset_switch(struct ksz_device *dev) SPI_AUTO_EDGE_DETECTION, 0); /* default configuration */ - ksz_read8(dev, REG_SW_LUE_CTRL_1, &data8); - data8 = SW_AGING_ENABLE | SW_LINK_AUTO_AGING | - SW_SRC_ADDR_FILTER | SW_FLUSH_STP_TABLE | SW_FLUSH_MSTP_TABLE; - ksz_write8(dev, REG_SW_LUE_CTRL_1, data8); + ksz_write8(dev, REG_SW_LUE_CTRL_1, + SW_AGING_ENABLE | SW_LINK_AUTO_AGING | SW_SRC_ADDR_FILTER); /* disable interrupts */ ksz_write32(dev, REG_SW_INT_MASK__4, SWITCH_INT_MASK); From ad22051afdad962b6012f3823d0ed1a735935386 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ca=C3=B1o?= Date: Thu, 20 Jun 2024 17:25:33 +0200 Subject: [PATCH 543/778] ALSA: hda/realtek: Add quirk for Lenovo Yoga Pro 7 14AHP9 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lenovo Yoga Pro 7 14AHP9 (PCI SSID 17aa:3891) seems requiring a similar workaround like Yoga 9 model and Yoga 7 Pro 14APH8 for the bass speaker. Cc: Link: https://lore.kernel.org/all/20231207182035.30248-1-tiwai@suse.de/ Signed-off-by: Pablo Caño Link: https://patch.msgid.link/20240620152533.76712-1-pablocpascual@gmail.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index e2dbcf8f5bcf..f4454abadc8d 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10555,6 +10555,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x3882, "Lenovo Yoga Pro 7 14APH8", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN), SND_PCI_QUIRK(0x17aa, 0x3884, "Y780 YG DUAL", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x17aa, 0x3886, "Y780 VECO DUAL", ALC287_FIXUP_TAS2781_I2C), + SND_PCI_QUIRK(0x17aa, 0x3891, "Lenovo Yoga Pro 7 14AHP9", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN), SND_PCI_QUIRK(0x17aa, 0x38a7, "Y780P AMD YG dual", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x17aa, 0x38a8, "Y780P AMD VECO dual", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x17aa, 0x38a9, "Thinkbook 16P", ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD), From 4a3e37b3caea817199757a0b13aa53dd7c9376c8 Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Sun, 16 Jun 2024 14:25:02 +0100 Subject: [PATCH 544/778] MIPS: mipsmtregs: Fix target register for MFTC0 Target register of mftc0 should be __res instead of $1, this is a leftover from old .insn code. Fixes: dd6d29a61489 ("MIPS: Implement microMIPS MT ASE helpers") Cc: stable@vger.kernel.org Signed-off-by: Jiaxun Yang Signed-off-by: Thomas Bogendoerfer --- arch/mips/include/asm/mipsmtregs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/include/asm/mipsmtregs.h b/arch/mips/include/asm/mipsmtregs.h index 30e86861c206..b1ee3c48e84b 100644 --- a/arch/mips/include/asm/mipsmtregs.h +++ b/arch/mips/include/asm/mipsmtregs.h @@ -322,7 +322,7 @@ static inline void ehb(void) " .set push \n" \ " .set "MIPS_ISA_LEVEL" \n" \ _ASM_SET_MFTC0 \ - " mftc0 $1, " #rt ", " #sel " \n" \ + " mftc0 %0, " #rt ", " #sel " \n" \ _ASM_UNSET_MFTC0 \ " .set pop \n" \ : "=r" (__res)); \ From 0d5679a0aae2d8cda72169452c32e5cb88a7ab33 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 20 Jun 2024 18:23:04 +0200 Subject: [PATCH 545/778] mips: fix compat_sys_lseek syscall This is almost compatible, but passing a negative offset should result in a EINVAL error, but on mips o32 compat mode would seek to a large 32-bit byte offset. Use compat_sys_lseek() to correctly sign-extend the argument. Signed-off-by: Arnd Bergmann Signed-off-by: Thomas Bogendoerfer --- arch/mips/kernel/syscalls/syscall_o32.tbl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index 008ebe60263e..81428a2eb660 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -27,7 +27,7 @@ 17 o32 break sys_ni_syscall # 18 was sys_stat 18 o32 unused18 sys_ni_syscall -19 o32 lseek sys_lseek +19 o32 lseek sys_lseek compat_sys_lseek 20 o32 getpid sys_getpid 21 o32 mount sys_mount 22 o32 umount sys_oldumount From d3e2904f71ea0fe7eaff1d68a2b0363c888ea0fb Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Fri, 17 Nov 2023 13:49:59 +0100 Subject: [PATCH 546/778] net: can: j1939: enhanced error handling for tightly received RTS messages in xtp_rx_rts_session_new This patch enhances error handling in scenarios with RTS (Request to Send) messages arriving closely. It replaces the less informative WARN_ON_ONCE backtraces with a new error handling method. This provides clearer error messages and allows for the early termination of problematic sessions. Previously, sessions were only released at the end of j1939_xtp_rx_rts(). Potentially this could be reproduced with something like: testj1939 -r vcan0:0x80 & while true; do # send first RTS cansend vcan0 18EC8090#1014000303002301; # send second RTS cansend vcan0 18EC8090#1014000303002301; # send abort cansend vcan0 18EC8090#ff00000000002301; done Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol") Reported-by: syzbot+daa36413a5cedf799ae4@syzkaller.appspotmail.com Cc: stable@vger.kernel.org Signed-off-by: Oleksij Rempel Link: https://lore.kernel.org/all/20231117124959.961171-1-o.rempel@pengutronix.de Signed-off-by: Marc Kleine-Budde --- net/can/j1939/transport.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c index fe3df23a2595..c6569f98d251 100644 --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@ -1593,8 +1593,8 @@ j1939_session *j1939_xtp_rx_rts_session_new(struct j1939_priv *priv, struct j1939_sk_buff_cb skcb = *j1939_skb_to_cb(skb); struct j1939_session *session; const u8 *dat; + int len, ret; pgn_t pgn; - int len; netdev_dbg(priv->ndev, "%s\n", __func__); @@ -1653,7 +1653,22 @@ j1939_session *j1939_xtp_rx_rts_session_new(struct j1939_priv *priv, session->tskey = priv->rx_tskey++; j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_RTS); - WARN_ON_ONCE(j1939_session_activate(session)); + ret = j1939_session_activate(session); + if (ret) { + /* Entering this scope indicates an issue with the J1939 bus. + * Possible scenarios include: + * - A time lapse occurred, and a new session was initiated + * due to another packet being sent correctly. This could + * have been caused by too long interrupt, debugger, or being + * out-scheduled by another task. + * - The bus is receiving numerous erroneous packets, either + * from a malfunctioning device or during a test scenario. + */ + netdev_alert(priv->ndev, "%s: 0x%p: concurrent session with same addr (%02x %02x) is already active.\n", + __func__, session, skcb.addr.sa, skcb.addr.da); + j1939_session_put(session); + return NULL; + } return session; } From b7cdf1dd5d2a2d8200efd98d1893684db48fe134 Mon Sep 17 00:00:00 2001 From: Shigeru Yoshida Date: Fri, 17 May 2024 12:59:53 +0900 Subject: [PATCH 547/778] net: can: j1939: Initialize unused data in j1939_send_one() syzbot reported kernel-infoleak in raw_recvmsg() [1]. j1939_send_one() creates full frame including unused data, but it doesn't initialize it. This causes the kernel-infoleak issue. Fix this by initializing unused data. [1] BUG: KMSAN: kernel-infoleak in instrument_copy_to_user include/linux/instrumented.h:114 [inline] BUG: KMSAN: kernel-infoleak in copy_to_user_iter lib/iov_iter.c:24 [inline] BUG: KMSAN: kernel-infoleak in iterate_ubuf include/linux/iov_iter.h:29 [inline] BUG: KMSAN: kernel-infoleak in iterate_and_advance2 include/linux/iov_iter.h:245 [inline] BUG: KMSAN: kernel-infoleak in iterate_and_advance include/linux/iov_iter.h:271 [inline] BUG: KMSAN: kernel-infoleak in _copy_to_iter+0x366/0x2520 lib/iov_iter.c:185 instrument_copy_to_user include/linux/instrumented.h:114 [inline] copy_to_user_iter lib/iov_iter.c:24 [inline] iterate_ubuf include/linux/iov_iter.h:29 [inline] iterate_and_advance2 include/linux/iov_iter.h:245 [inline] iterate_and_advance include/linux/iov_iter.h:271 [inline] _copy_to_iter+0x366/0x2520 lib/iov_iter.c:185 copy_to_iter include/linux/uio.h:196 [inline] memcpy_to_msg include/linux/skbuff.h:4113 [inline] raw_recvmsg+0x2b8/0x9e0 net/can/raw.c:1008 sock_recvmsg_nosec net/socket.c:1046 [inline] sock_recvmsg+0x2c4/0x340 net/socket.c:1068 ____sys_recvmsg+0x18a/0x620 net/socket.c:2803 ___sys_recvmsg+0x223/0x840 net/socket.c:2845 do_recvmmsg+0x4fc/0xfd0 net/socket.c:2939 __sys_recvmmsg net/socket.c:3018 [inline] __do_sys_recvmmsg net/socket.c:3041 [inline] __se_sys_recvmmsg net/socket.c:3034 [inline] __x64_sys_recvmmsg+0x397/0x490 net/socket.c:3034 x64_sys_call+0xf6c/0x3b50 arch/x86/include/generated/asm/syscalls_64.h:300 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcf/0x1e0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f Uninit was created at: slab_post_alloc_hook mm/slub.c:3804 [inline] slab_alloc_node mm/slub.c:3845 [inline] kmem_cache_alloc_node+0x613/0xc50 mm/slub.c:3888 kmalloc_reserve+0x13d/0x4a0 net/core/skbuff.c:577 __alloc_skb+0x35b/0x7a0 net/core/skbuff.c:668 alloc_skb include/linux/skbuff.h:1313 [inline] alloc_skb_with_frags+0xc8/0xbf0 net/core/skbuff.c:6504 sock_alloc_send_pskb+0xa81/0xbf0 net/core/sock.c:2795 sock_alloc_send_skb include/net/sock.h:1842 [inline] j1939_sk_alloc_skb net/can/j1939/socket.c:878 [inline] j1939_sk_send_loop net/can/j1939/socket.c:1142 [inline] j1939_sk_sendmsg+0xc0a/0x2730 net/can/j1939/socket.c:1277 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x30f/0x380 net/socket.c:745 ____sys_sendmsg+0x877/0xb60 net/socket.c:2584 ___sys_sendmsg+0x28d/0x3c0 net/socket.c:2638 __sys_sendmsg net/socket.c:2667 [inline] __do_sys_sendmsg net/socket.c:2676 [inline] __se_sys_sendmsg net/socket.c:2674 [inline] __x64_sys_sendmsg+0x307/0x4a0 net/socket.c:2674 x64_sys_call+0xc4b/0x3b50 arch/x86/include/generated/asm/syscalls_64.h:47 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcf/0x1e0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f Bytes 12-15 of 16 are uninitialized Memory access of size 16 starts at ffff888120969690 Data copied to user address 00000000200017c0 CPU: 1 PID: 5050 Comm: syz-executor198 Not tainted 6.9.0-rc5-syzkaller-00031-g71b1543c83d6 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/27/2024 Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol") Reported-and-tested-by: syzbot+5681e40d297b30f5b513@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=5681e40d297b30f5b513 Acked-by: Oleksij Rempel Signed-off-by: Shigeru Yoshida Link: https://lore.kernel.org/all/20240517035953.2617090-1-syoshida@redhat.com Cc: stable@vger.kernel.org Signed-off-by: Marc Kleine-Budde --- net/can/j1939/main.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c index a6fb89fa6278..7e8a20f2fc42 100644 --- a/net/can/j1939/main.c +++ b/net/can/j1939/main.c @@ -30,10 +30,6 @@ MODULE_ALIAS("can-proto-" __stringify(CAN_J1939)); /* CAN_HDR: #bytes before can_frame data part */ #define J1939_CAN_HDR (offsetof(struct can_frame, data)) -/* CAN_FTR: #bytes beyond data part */ -#define J1939_CAN_FTR (sizeof(struct can_frame) - J1939_CAN_HDR - \ - sizeof(((struct can_frame *)0)->data)) - /* lowest layer */ static void j1939_can_recv(struct sk_buff *iskb, void *data) { @@ -342,7 +338,7 @@ int j1939_send_one(struct j1939_priv *priv, struct sk_buff *skb) memset(cf, 0, J1939_CAN_HDR); /* make it a full can frame again */ - skb_put(skb, J1939_CAN_FTR + (8 - dlc)); + skb_put_zero(skb, 8 - dlc); canid = CAN_EFF_FLAG | (skcb->priority << 26) | From 9ad1da14ab3bf23087ae45fe399d84a109ddb81a Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Tue, 28 May 2024 09:06:48 +0200 Subject: [PATCH 548/778] net: can: j1939: recover socket queue on CAN bus error during BAM transmission MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses an issue where a CAN bus error during a BAM transmission could stall the socket queue, preventing further transmissions even after the bus error is resolved. The fix activates the next queued session after the error recovery, allowing communication to continue. Fixes: 9d71dd0c70099 ("can: add support of SAE J1939 protocol") Cc: stable@vger.kernel.org Reported-by: Alexander Hölzl Tested-by: Alexander Hölzl Signed-off-by: Oleksij Rempel Link: https://lore.kernel.org/all/20240528070648.1947203-1-o.rempel@pengutronix.de Cc: stable@vger.kernel.org Signed-off-by: Marc Kleine-Budde --- net/can/j1939/transport.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c index c6569f98d251..4be73de5033c 100644 --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@ -1696,6 +1696,8 @@ static int j1939_xtp_rx_rts_session_active(struct j1939_session *session, j1939_session_timers_cancel(session); j1939_session_cancel(session, J1939_XTP_ABORT_BUSY); + if (session->transmission) + j1939_session_deactivate_activate_next(session); return -EBUSY; } From 0d34d8163fd87978a6abd792e2d8ad849f4c3d57 Mon Sep 17 00:00:00 2001 From: Chen Ni Date: Tue, 21 May 2024 12:10:20 +0800 Subject: [PATCH 549/778] can: kvaser_usb: fix return value for hif_usb_send_regout As the potential failure of usb_submit_urb(), it should be better to return the err variable to catch the error. Signed-off-by: Chen Ni Link: https://lore.kernel.org/all/20240521041020.1519416-1-nichen@iscas.ac.cn Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c index 8faf8a462c05..7292c81fc0cd 100644 --- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c +++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c @@ -294,7 +294,7 @@ int kvaser_usb_send_cmd_async(struct kvaser_usb_net_priv *priv, void *cmd, } usb_free_urb(urb); - return 0; + return err; } int kvaser_usb_can_rx_over_error(struct net_device *netdev) From d8fb63e46c884c898a38f061c2330f7729e75510 Mon Sep 17 00:00:00 2001 From: Vitor Soares Date: Fri, 17 May 2024 14:43:55 +0100 Subject: [PATCH 550/778] can: mcp251xfd: fix infinite loop when xmit fails When the mcp251xfd_start_xmit() function fails, the driver stops processing messages, and the interrupt routine does not return, running indefinitely even after killing the running application. Error messages: [ 441.298819] mcp251xfd spi2.0 can0: ERROR in mcp251xfd_start_xmit: -16 [ 441.306498] mcp251xfd spi2.0 can0: Transmit Event FIFO buffer not empty. (seq=0x000017c7, tef_tail=0x000017cf, tef_head=0x000017d0, tx_head=0x000017d3). ... and repeat forever. The issue can be triggered when multiple devices share the same SPI interface. And there is concurrent access to the bus. The problem occurs because tx_ring->head increments even if mcp251xfd_start_xmit() fails. Consequently, the driver skips one TX package while still expecting a response in mcp251xfd_handle_tefif_one(). Resolve the issue by starting a workqueue to write the tx obj synchronously if err = -EBUSY. In case of another error, decrement tx_ring->head, remove skb from the echo stack, and drop the message. Fixes: 55e5b97f003e ("can: mcp25xxfd: add driver for Microchip MCP25xxFD SPI CAN") Cc: stable@vger.kernel.org Signed-off-by: Vitor Soares Link: https://lore.kernel.org/all/20240517134355.770777-1-ivitro@gmail.com [mkl: use more imperative wording in patch description] Signed-off-by: Marc Kleine-Budde --- .../net/can/spi/mcp251xfd/mcp251xfd-core.c | 14 ++++- drivers/net/can/spi/mcp251xfd/mcp251xfd-tx.c | 55 ++++++++++++++++--- drivers/net/can/spi/mcp251xfd/mcp251xfd.h | 5 ++ 3 files changed, 65 insertions(+), 9 deletions(-) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index 1d9057dc44f2..bf1589aef1fc 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -1618,11 +1618,20 @@ static int mcp251xfd_open(struct net_device *ndev) clear_bit(MCP251XFD_FLAGS_DOWN, priv->flags); can_rx_offload_enable(&priv->offload); + priv->wq = alloc_ordered_workqueue("%s-mcp251xfd_wq", + WQ_FREEZABLE | WQ_MEM_RECLAIM, + dev_name(&spi->dev)); + if (!priv->wq) { + err = -ENOMEM; + goto out_can_rx_offload_disable; + } + INIT_WORK(&priv->tx_work, mcp251xfd_tx_obj_write_sync); + err = request_threaded_irq(spi->irq, NULL, mcp251xfd_irq, IRQF_SHARED | IRQF_ONESHOT, dev_name(&spi->dev), priv); if (err) - goto out_can_rx_offload_disable; + goto out_destroy_workqueue; err = mcp251xfd_chip_interrupts_enable(priv); if (err) @@ -1634,6 +1643,8 @@ static int mcp251xfd_open(struct net_device *ndev) out_free_irq: free_irq(spi->irq, priv); + out_destroy_workqueue: + destroy_workqueue(priv->wq); out_can_rx_offload_disable: can_rx_offload_disable(&priv->offload); set_bit(MCP251XFD_FLAGS_DOWN, priv->flags); @@ -1661,6 +1672,7 @@ static int mcp251xfd_stop(struct net_device *ndev) hrtimer_cancel(&priv->tx_irq_timer); mcp251xfd_chip_interrupts_disable(priv); free_irq(ndev->irq, priv); + destroy_workqueue(priv->wq); can_rx_offload_disable(&priv->offload); mcp251xfd_timestamp_stop(priv); mcp251xfd_chip_stop(priv, CAN_STATE_STOPPED); diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-tx.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-tx.c index 160528d3cc26..b1de8052a45c 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-tx.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-tx.c @@ -131,6 +131,39 @@ mcp251xfd_tx_obj_from_skb(const struct mcp251xfd_priv *priv, tx_obj->xfer[0].len = len; } +static void mcp251xfd_tx_failure_drop(const struct mcp251xfd_priv *priv, + struct mcp251xfd_tx_ring *tx_ring, + int err) +{ + struct net_device *ndev = priv->ndev; + struct net_device_stats *stats = &ndev->stats; + unsigned int frame_len = 0; + u8 tx_head; + + tx_ring->head--; + stats->tx_dropped++; + tx_head = mcp251xfd_get_tx_head(tx_ring); + can_free_echo_skb(ndev, tx_head, &frame_len); + netdev_completed_queue(ndev, 1, frame_len); + netif_wake_queue(ndev); + + if (net_ratelimit()) + netdev_err(priv->ndev, "ERROR in %s: %d\n", __func__, err); +} + +void mcp251xfd_tx_obj_write_sync(struct work_struct *work) +{ + struct mcp251xfd_priv *priv = container_of(work, struct mcp251xfd_priv, + tx_work); + struct mcp251xfd_tx_obj *tx_obj = priv->tx_work_obj; + struct mcp251xfd_tx_ring *tx_ring = priv->tx; + int err; + + err = spi_sync(priv->spi, &tx_obj->msg); + if (err) + mcp251xfd_tx_failure_drop(priv, tx_ring, err); +} + static int mcp251xfd_tx_obj_write(const struct mcp251xfd_priv *priv, struct mcp251xfd_tx_obj *tx_obj) { @@ -162,6 +195,11 @@ static bool mcp251xfd_tx_busy(const struct mcp251xfd_priv *priv, return false; } +static bool mcp251xfd_work_busy(struct work_struct *work) +{ + return work_busy(work); +} + netdev_tx_t mcp251xfd_start_xmit(struct sk_buff *skb, struct net_device *ndev) { @@ -175,7 +213,8 @@ netdev_tx_t mcp251xfd_start_xmit(struct sk_buff *skb, if (can_dev_dropped_skb(ndev, skb)) return NETDEV_TX_OK; - if (mcp251xfd_tx_busy(priv, tx_ring)) + if (mcp251xfd_tx_busy(priv, tx_ring) || + mcp251xfd_work_busy(&priv->tx_work)) return NETDEV_TX_BUSY; tx_obj = mcp251xfd_get_tx_obj_next(tx_ring); @@ -193,13 +232,13 @@ netdev_tx_t mcp251xfd_start_xmit(struct sk_buff *skb, netdev_sent_queue(priv->ndev, frame_len); err = mcp251xfd_tx_obj_write(priv, tx_obj); - if (err) - goto out_err; - - return NETDEV_TX_OK; - - out_err: - netdev_err(priv->ndev, "ERROR in %s: %d\n", __func__, err); + if (err == -EBUSY) { + netif_stop_queue(ndev); + priv->tx_work_obj = tx_obj; + queue_work(priv->wq, &priv->tx_work); + } else if (err) { + mcp251xfd_tx_failure_drop(priv, tx_ring, err); + } return NETDEV_TX_OK; } diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd.h b/drivers/net/can/spi/mcp251xfd/mcp251xfd.h index 24510b3b8020..b35bfebd23f2 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd.h +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd.h @@ -633,6 +633,10 @@ struct mcp251xfd_priv { struct mcp251xfd_rx_ring *rx[MCP251XFD_FIFO_RX_NUM]; struct mcp251xfd_tx_ring tx[MCP251XFD_FIFO_TX_NUM]; + struct workqueue_struct *wq; + struct work_struct tx_work; + struct mcp251xfd_tx_obj *tx_work_obj; + DECLARE_BITMAP(flags, __MCP251XFD_FLAGS_SIZE__); u8 rx_ring_num; @@ -952,6 +956,7 @@ void mcp251xfd_skb_set_timestamp(const struct mcp251xfd_priv *priv, void mcp251xfd_timestamp_init(struct mcp251xfd_priv *priv); void mcp251xfd_timestamp_stop(struct mcp251xfd_priv *priv); +void mcp251xfd_tx_obj_write_sync(struct work_struct *work); netdev_tx_t mcp251xfd_start_xmit(struct sk_buff *skb, struct net_device *ndev); From a23ac973f67f37e77b3c634e8b1ad5b0164fcc1f Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 19 Jun 2024 18:08:56 -0400 Subject: [PATCH 551/778] openvswitch: get related ct labels from its master if it is not confirmed Ilya found a failure in running check-kernel tests with at_groups=144 (144: conntrack - FTP SNAT orig tuple) in OVS repo. After his further investigation, the root cause is that the labels sent to userspace for related ct are incorrect. The labels for unconfirmed related ct should use its master's labels. However, the changes made in commit 8c8b73320805 ("openvswitch: set IPS_CONFIRMED in tmpl status only when commit is set in conntrack") led to getting labels from this related ct. So fix it in ovs_ct_get_labels() by changing to copy labels from its master ct if it is a unconfirmed related ct. Note that there is no fix needed for ct->mark, as it was already copied from its master ct for related ct in init_conntrack(). Fixes: 8c8b73320805 ("openvswitch: set IPS_CONFIRMED in tmpl status only when commit is set in conntrack") Reported-by: Ilya Maximets Signed-off-by: Xin Long Reviewed-by: Ilya Maximets Tested-by: Ilya Maximets Reviewed-by: Aaron Conole Signed-off-by: David S. Miller --- net/openvswitch/conntrack.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 2928c142a2dd..3b980bf2770b 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -168,8 +168,13 @@ static u32 ovs_ct_get_mark(const struct nf_conn *ct) static void ovs_ct_get_labels(const struct nf_conn *ct, struct ovs_key_ct_labels *labels) { - struct nf_conn_labels *cl = ct ? nf_ct_labels_find(ct) : NULL; + struct nf_conn_labels *cl = NULL; + if (ct) { + if (ct->master && !nf_ct_is_confirmed(ct)) + ct = ct->master; + cl = nf_ct_labels_find(ct); + } if (cl) memcpy(labels, cl->bits, OVS_CT_LABELS_LEN); else From 17563b4a19d1844bdbccc7a82d2f31c28ca9cfae Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 21 Jun 2024 09:39:09 +0200 Subject: [PATCH 552/778] ALSA: hda: Use imply for suggesting CONFIG_SERIAL_MULTI_INSTANTIATE The recent fix introduced a reverse selection of CONFIG_SERIAL_MULTI_INSTANTIATE, but its condition isn't always met. Use a weak reverse selection to suggest the config for avoiding such inconsistencies, instead. Fixes: 9b1effff19cd ("ALSA: hda: cs35l56: Select SERIAL_MULTI_INSTANTIATE") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202406210732.ozgk8IMK-lkp@intel.com/ Closes: https://lore.kernel.org/oe-kbuild-all/202406211244.oLhoF3My-lkp@intel.com/ Reviewed-by: Richard Fitzgerald Link: https://patch.msgid.link/20240621073915.19576-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/hda/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/Kconfig b/sound/pci/hda/Kconfig index e59df40a0007..a3cf0725fc43 100644 --- a/sound/pci/hda/Kconfig +++ b/sound/pci/hda/Kconfig @@ -162,7 +162,7 @@ config SND_HDA_SCODEC_CS35L56_I2C depends on ACPI || COMPILE_TEST depends on SND_SOC select FW_CS_DSP - select SERIAL_MULTI_INSTANTIATE + imply SERIAL_MULTI_INSTANTIATE select SND_HDA_GENERIC select SND_SOC_CS35L56_SHARED select SND_HDA_SCODEC_CS35L56 @@ -179,7 +179,7 @@ config SND_HDA_SCODEC_CS35L56_SPI depends on ACPI || COMPILE_TEST depends on SND_SOC select FW_CS_DSP - select SERIAL_MULTI_INSTANTIATE + imply SERIAL_MULTI_INSTANTIATE select SND_HDA_GENERIC select SND_SOC_CS35L56_SHARED select SND_HDA_SCODEC_CS35L56 From 00418d5530ca1f42d8721fe0a3e73d1ae477c223 Mon Sep 17 00:00:00 2001 From: Aryan Srivastava Date: Thu, 20 Jun 2024 16:12:02 +1200 Subject: [PATCH 553/778] net: mvpp2: fill-in dev_port attribute Fill this in so user-space can identify multiple ports on the same CP unit. Signed-off-by: Aryan Srivastava Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 671368d2c77e..9adf4301c9b1 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -6907,6 +6907,7 @@ static int mvpp2_port_probe(struct platform_device *pdev, /* 9704 == 9728 - 20 and rounding to 8 */ dev->max_mtu = MVPP2_BM_JUMBO_PKT_SIZE; device_set_node(&dev->dev, port_fwnode); + dev->dev_port = port->id; port->pcs_gmac.ops = &mvpp2_phylink_gmac_pcs_ops; port->pcs_gmac.neg_mode = true; From a95b031c6796bf9972da2d4b4b524a57734f3a0a Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 20 Jun 2024 16:56:26 +0800 Subject: [PATCH 554/778] bonding: fix incorrect software timestamping report The __ethtool_get_ts_info function returns directly if the device has a get_ts_info() method. For bonding with an active slave, this works correctly as we simply return the real device's timestamping information. However, when there is no active slave, we only check the slave's TX software timestamp information. We still need to set the phc index and RX timestamp information manually. Otherwise, the result will be look like: Time stamping parameters for bond0: Capabilities: software-transmit PTP Hardware Clock: 0 Hardware Transmit Timestamp Modes: none Hardware Receive Filter Modes: none This issue does not affect VLAN or MACVLAN devices, as they only have one downlink and can directly use the downlink's timestamping information. Fixes: b8768dc40777 ("net: ethtool: Refactor identical get_ts_info implementations.") Reported-by: Liang Li Closes: https://issues.redhat.com/browse/RHEL-42409 Signed-off-by: Hangbin Liu Acked-by: Kory Maincent Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 3c3fcce4acd4..d19aabf5d4fb 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -5773,6 +5773,9 @@ static int bond_ethtool_get_ts_info(struct net_device *bond_dev, if (real_dev) { ret = ethtool_get_ts_info_by_layer(real_dev, info); } else { + info->phc_index = -1; + info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE | + SOF_TIMESTAMPING_SOFTWARE; /* Check if all slaves support software tx timestamping */ rcu_read_lock(); bond_for_each_slave_rcu(bond, slave, iter) { From 7eadf50095bc35c0e17e66cab617a934888edc20 Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Thu, 20 Jun 2024 11:57:50 +0200 Subject: [PATCH 555/778] net: pse-pd: Kconfig: Fix missing firmware loader config select Selecting FW_UPLOAD is not sufficient as it allows the firmware loader API to be built as a module alongside the pd692x0 driver built as builtin. Add select FW_LOADER to fix this issue. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202406200632.hSChnX0g-lkp@intel.com/ Fixes: 9a9938451890 ("net: pse-pd: Add PD692x0 PSE controller driver") Signed-off-by: Kory Maincent Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/pse-pd/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/pse-pd/Kconfig b/drivers/net/pse-pd/Kconfig index 577ea904b3d9..7fab916a7f46 100644 --- a/drivers/net/pse-pd/Kconfig +++ b/drivers/net/pse-pd/Kconfig @@ -23,6 +23,7 @@ config PSE_REGULATOR config PSE_PD692X0 tristate "PD692X0 PSE controller" depends on I2C + select FW_LOADER select FW_UPLOAD help This module provides support for PD692x0 regulator based Ethernet From e3f02f32a05009a688a87f5799e049ed6b55bab5 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Thu, 20 Jun 2024 10:58:08 +0000 Subject: [PATCH 556/778] ionic: fix kernel panic due to multi-buffer handling Currently, the ionic_run_xdp() doesn't handle multi-buffer packets properly for XDP_TX and XDP_REDIRECT. When a jumbo frame is received, the ionic_run_xdp() first makes xdp frame with all necessary pages in the rx descriptor. And if the action is either XDP_TX or XDP_REDIRECT, it should unmap dma-mapping and reset page pointer to NULL for all pages, not only the first page. But it doesn't for SG pages. So, SG pages unexpectedly will be reused. It eventually causes kernel panic. Oops: general protection fault, probably for non-canonical address 0x504f4e4dbebc64ff: 0000 [#1] PREEMPT SMP NOPTI CPU: 3 PID: 0 Comm: swapper/3 Not tainted 6.10.0-rc3+ #25 RIP: 0010:xdp_return_frame+0x42/0x90 Code: 01 75 12 5b 4c 89 e6 5d 31 c9 41 5c 31 d2 41 5d e9 73 fd ff ff 44 8b 6b 20 0f b7 43 0a 49 81 ed 68 01 00 00 49 29 c5 49 01 fd <41> 80 7d0 RSP: 0018:ffff99d00122ce08 EFLAGS: 00010202 RAX: 0000000000005453 RBX: ffff8d325f904000 RCX: 0000000000000001 RDX: 00000000670e1000 RSI: 000000011f90d000 RDI: 504f4e4d4c4b4a49 RBP: ffff99d003907740 R08: 0000000000000000 R09: 0000000000000000 R10: 000000011f90d000 R11: 0000000000000000 R12: ffff8d325f904010 R13: 504f4e4dbebc64fd R14: ffff8d3242b070c8 R15: ffff99d0039077c0 FS: 0000000000000000(0000) GS:ffff8d399f780000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f41f6c85e38 CR3: 000000037ac30000 CR4: 00000000007506f0 PKRU: 55555554 Call Trace: ? die_addr+0x33/0x90 ? exc_general_protection+0x251/0x2f0 ? asm_exc_general_protection+0x22/0x30 ? xdp_return_frame+0x42/0x90 ionic_tx_clean+0x211/0x280 [ionic 15881354510e6a9c655c59c54812b319ed2cd015] ionic_tx_cq_service+0xd3/0x210 [ionic 15881354510e6a9c655c59c54812b319ed2cd015] ionic_txrx_napi+0x41/0x1b0 [ionic 15881354510e6a9c655c59c54812b319ed2cd015] __napi_poll.constprop.0+0x29/0x1b0 net_rx_action+0x2c4/0x350 handle_softirqs+0xf4/0x320 irq_exit_rcu+0x78/0xa0 common_interrupt+0x77/0x90 Fixes: 5377805dc1c0 ("ionic: implement xdp frags support") Signed-off-by: Taehee Yoo Reviewed-by: Shannon Nelson Signed-off-by: David S. Miller --- .../net/ethernet/pensando/ionic/ionic_txrx.c | 27 ++++++++++++------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c index 2427610f4306..aed7d9cbce03 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c @@ -480,6 +480,20 @@ int ionic_xdp_xmit(struct net_device *netdev, int n, return nxmit; } +static void ionic_xdp_rx_put_bufs(struct ionic_queue *q, + struct ionic_buf_info *buf_info, + int nbufs) +{ + int i; + + for (i = 0; i < nbufs; i++) { + dma_unmap_page(q->dev, buf_info->dma_addr, + IONIC_PAGE_SIZE, DMA_FROM_DEVICE); + buf_info->page = NULL; + buf_info++; + } +} + static bool ionic_run_xdp(struct ionic_rx_stats *stats, struct net_device *netdev, struct bpf_prog *xdp_prog, @@ -493,6 +507,7 @@ static bool ionic_run_xdp(struct ionic_rx_stats *stats, struct netdev_queue *nq; struct xdp_frame *xdpf; int remain_len; + int nbufs = 1; int frag_len; int err = 0; @@ -542,6 +557,7 @@ static bool ionic_run_xdp(struct ionic_rx_stats *stats, if (page_is_pfmemalloc(bi->page)) xdp_buff_set_frag_pfmemalloc(&xdp_buf); } while (remain_len > 0); + nbufs += sinfo->nr_frags; } xdp_action = bpf_prog_run_xdp(xdp_prog, &xdp_buf); @@ -574,9 +590,6 @@ static bool ionic_run_xdp(struct ionic_rx_stats *stats, goto out_xdp_abort; } - dma_unmap_page(rxq->dev, buf_info->dma_addr, - IONIC_PAGE_SIZE, DMA_FROM_DEVICE); - err = ionic_xdp_post_frame(txq, xdpf, XDP_TX, buf_info->page, buf_info->page_offset, @@ -586,23 +599,19 @@ static bool ionic_run_xdp(struct ionic_rx_stats *stats, netdev_dbg(netdev, "tx ionic_xdp_post_frame err %d\n", err); goto out_xdp_abort; } - buf_info->page = NULL; + ionic_xdp_rx_put_bufs(rxq, buf_info, nbufs); stats->xdp_tx++; /* the Tx completion will free the buffers */ break; case XDP_REDIRECT: - /* unmap the pages before handing them to a different device */ - dma_unmap_page(rxq->dev, buf_info->dma_addr, - IONIC_PAGE_SIZE, DMA_FROM_DEVICE); - err = xdp_do_redirect(netdev, &xdp_buf, xdp_prog); if (err) { netdev_dbg(netdev, "xdp_do_redirect err %d\n", err); goto out_xdp_abort; } - buf_info->page = NULL; + ionic_xdp_rx_put_bufs(rxq, buf_info, nbufs); rxq->xdp_flush = true; stats->xdp_redirect++; break; From cf6d9d2d243f242f51ee0666ca88e61d9408752f Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Tue, 4 Jun 2024 18:35:10 -0500 Subject: [PATCH 557/778] KVM: SEV-ES: Fix svm_get_msr()/svm_set_msr() for KVM_SEV_ES_INIT guests With commit 27bd5fdc24c0 ("KVM: SEV-ES: Prevent MSR access post VMSA encryption"), older VMMs like QEMU 9.0 and older will fail when booting SEV-ES guests with something like the following error: qemu-system-x86_64: error: failed to get MSR 0x174 qemu-system-x86_64: ../qemu.git/target/i386/kvm/kvm.c:3950: kvm_get_msrs: Assertion `ret == cpu->kvm_msr_buf->nmsrs' failed. This is because older VMMs that might still call svm_get_msr()/svm_set_msr() for SEV-ES guests after guest boot even if those interfaces were essentially just noops because of the vCPU state being encrypted and stored separately in the VMSA. Now those VMMs will get an -EINVAL and generally crash. Newer VMMs that are aware of KVM_SEV_INIT2 however are already aware of the stricter limitations of what vCPU state can be sync'd during guest run-time, so newer QEMU for instance will work both for legacy KVM_SEV_ES_INIT interface as well as KVM_SEV_INIT2. So when using KVM_SEV_INIT2 it's okay to assume userspace can deal with -EINVAL, whereas for legacy KVM_SEV_ES_INIT the kernel might be dealing with either an older VMM and so it needs to assume that returning -EINVAL might break the VMM. Address this by only returning -EINVAL if the guest was started with KVM_SEV_INIT2. Otherwise, just silently return. Cc: Ravi Bangoria Cc: Nikunj A Dadhania Reported-by: Srikanth Aithal Closes: https://lore.kernel.org/lkml/37usuu4yu4ok7be2hqexhmcyopluuiqj3k266z4gajc2rcj4yo@eujb23qc3zcm/ Fixes: 27bd5fdc24c0 ("KVM: SEV-ES: Prevent MSR access post VMSA encryption") Signed-off-by: Michael Roth Message-ID: <20240604233510.764949-1-michael.roth@amd.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 296c524988f9..c95d3900fe56 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2843,7 +2843,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (sev_es_prevent_msr_access(vcpu, msr_info)) { msr_info->data = 0; - return -EINVAL; + return vcpu->kvm->arch.has_protected_state ? -EINVAL : 0; } switch (msr_info->index) { @@ -2998,7 +2998,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) u64 data = msr->data; if (sev_es_prevent_msr_access(vcpu, msr)) - return -EINVAL; + return vcpu->kvm->arch.has_protected_state ? -EINVAL : 0; switch (ecx) { case MSR_AMD64_TSC_RATIO: From 62e58ddb146502faff1dd23164a20688624eaaed Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 20 Jun 2024 13:31:19 +0000 Subject: [PATCH 558/778] net: add softirq safety to netdev_rename_lock syzbot reported a lockdep violation involving bridge driver [1] Make sure netdev_rename_lock is softirq safe to fix this issue. [1] WARNING: SOFTIRQ-safe -> SOFTIRQ-unsafe lock order detected 6.10.0-rc2-syzkaller-00249-gbe27b8965297 #0 Not tainted ----------------------------------------------------- syz-executor.2/9449 [HC0[0]:SC0[2]:HE0:SE0] is trying to acquire: ffffffff8f5de668 (netdev_rename_lock.seqcount){+.+.}-{0:0}, at: rtnl_fill_ifinfo+0x38e/0x2270 net/core/rtnetlink.c:1839 and this task is already holding: ffff888060c64cb8 (&br->lock){+.-.}-{2:2}, at: spin_lock_bh include/linux/spinlock.h:356 [inline] ffff888060c64cb8 (&br->lock){+.-.}-{2:2}, at: br_port_slave_changelink+0x3d/0x150 net/bridge/br_netlink.c:1212 which would create a new lock dependency: (&br->lock){+.-.}-{2:2} -> (netdev_rename_lock.seqcount){+.+.}-{0:0} but this new dependency connects a SOFTIRQ-irq-safe lock: (&br->lock){+.-.}-{2:2} ... which became SOFTIRQ-irq-safe at: lock_acquire+0x1ed/0x550 kernel/locking/lockdep.c:5754 __raw_spin_lock include/linux/spinlock_api_smp.h:133 [inline] _raw_spin_lock+0x2e/0x40 kernel/locking/spinlock.c:154 spin_lock include/linux/spinlock.h:351 [inline] br_forward_delay_timer_expired+0x50/0x440 net/bridge/br_stp_timer.c:86 call_timer_fn+0x18e/0x650 kernel/time/timer.c:1792 expire_timers kernel/time/timer.c:1843 [inline] __run_timers kernel/time/timer.c:2417 [inline] __run_timer_base+0x66a/0x8e0 kernel/time/timer.c:2428 run_timer_base kernel/time/timer.c:2437 [inline] run_timer_softirq+0xb7/0x170 kernel/time/timer.c:2447 handle_softirqs+0x2c4/0x970 kernel/softirq.c:554 __do_softirq kernel/softirq.c:588 [inline] invoke_softirq kernel/softirq.c:428 [inline] __irq_exit_rcu+0xf4/0x1c0 kernel/softirq.c:637 irq_exit_rcu+0x9/0x30 kernel/softirq.c:649 instr_sysvec_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1043 [inline] sysvec_apic_timer_interrupt+0xa6/0xc0 arch/x86/kernel/apic/apic.c:1043 asm_sysvec_apic_timer_interrupt+0x1a/0x20 arch/x86/include/asm/idtentry.h:702 lock_acquire+0x264/0x550 kernel/locking/lockdep.c:5758 fs_reclaim_acquire+0xaf/0x140 mm/page_alloc.c:3800 might_alloc include/linux/sched/mm.h:334 [inline] slab_pre_alloc_hook mm/slub.c:3890 [inline] slab_alloc_node mm/slub.c:3980 [inline] kmalloc_trace_noprof+0x3d/0x2c0 mm/slub.c:4147 kmalloc_noprof include/linux/slab.h:660 [inline] kzalloc_noprof include/linux/slab.h:778 [inline] class_dir_create_and_add drivers/base/core.c:3255 [inline] get_device_parent+0x2a7/0x410 drivers/base/core.c:3315 device_add+0x325/0xbf0 drivers/base/core.c:3645 netdev_register_kobject+0x17e/0x320 net/core/net-sysfs.c:2136 register_netdevice+0x11d5/0x19e0 net/core/dev.c:10375 nsim_init_netdevsim drivers/net/netdevsim/netdev.c:690 [inline] nsim_create+0x647/0x890 drivers/net/netdevsim/netdev.c:750 __nsim_dev_port_add+0x6c0/0xae0 drivers/net/netdevsim/dev.c:1390 nsim_dev_port_add_all drivers/net/netdevsim/dev.c:1446 [inline] nsim_dev_reload_create drivers/net/netdevsim/dev.c:1498 [inline] nsim_dev_reload_up+0x69b/0x8e0 drivers/net/netdevsim/dev.c:985 devlink_reload+0x478/0x870 net/devlink/dev.c:474 devlink_nl_reload_doit+0xbd6/0xe50 net/devlink/dev.c:586 genl_family_rcv_msg_doit net/netlink/genetlink.c:1115 [inline] genl_family_rcv_msg net/netlink/genetlink.c:1195 [inline] genl_rcv_msg+0xb14/0xec0 net/netlink/genetlink.c:1210 netlink_rcv_skb+0x1e3/0x430 net/netlink/af_netlink.c:2564 genl_rcv+0x28/0x40 net/netlink/genetlink.c:1219 netlink_unicast_kernel net/netlink/af_netlink.c:1335 [inline] netlink_unicast+0x7ea/0x980 net/netlink/af_netlink.c:1361 netlink_sendmsg+0x8db/0xcb0 net/netlink/af_netlink.c:1905 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x221/0x270 net/socket.c:745 ____sys_sendmsg+0x525/0x7d0 net/socket.c:2585 ___sys_sendmsg net/socket.c:2639 [inline] __sys_sendmsg+0x2b0/0x3a0 net/socket.c:2668 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f to a SOFTIRQ-irq-unsafe lock: (netdev_rename_lock.seqcount){+.+.}-{0:0} ... which became SOFTIRQ-irq-unsafe at: ... lock_acquire+0x1ed/0x550 kernel/locking/lockdep.c:5754 do_write_seqcount_begin_nested include/linux/seqlock.h:469 [inline] do_write_seqcount_begin include/linux/seqlock.h:495 [inline] write_seqlock include/linux/seqlock.h:823 [inline] dev_change_name+0x184/0x920 net/core/dev.c:1229 do_setlink+0xa4b/0x41f0 net/core/rtnetlink.c:2880 __rtnl_newlink net/core/rtnetlink.c:3696 [inline] rtnl_newlink+0x180b/0x20a0 net/core/rtnetlink.c:3743 rtnetlink_rcv_msg+0x89b/0x1180 net/core/rtnetlink.c:6635 netlink_rcv_skb+0x1e3/0x430 net/netlink/af_netlink.c:2564 netlink_unicast_kernel net/netlink/af_netlink.c:1335 [inline] netlink_unicast+0x7ea/0x980 net/netlink/af_netlink.c:1361 netlink_sendmsg+0x8db/0xcb0 net/netlink/af_netlink.c:1905 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x221/0x270 net/socket.c:745 __sys_sendto+0x3a4/0x4f0 net/socket.c:2192 __do_sys_sendto net/socket.c:2204 [inline] __se_sys_sendto net/socket.c:2200 [inline] __x64_sys_sendto+0xde/0x100 net/socket.c:2200 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f other info that might help us debug this: Possible interrupt unsafe locking scenario: CPU0 CPU1 ---- ---- lock(netdev_rename_lock.seqcount); local_irq_disable(); lock(&br->lock); lock(netdev_rename_lock.seqcount); lock(&br->lock); *** DEADLOCK *** 3 locks held by syz-executor.2/9449: #0: ffffffff8f5e7448 (rtnl_mutex){+.+.}-{3:3}, at: rtnl_lock net/core/rtnetlink.c:79 [inline] #0: ffffffff8f5e7448 (rtnl_mutex){+.+.}-{3:3}, at: rtnetlink_rcv_msg+0x842/0x1180 net/core/rtnetlink.c:6632 #1: ffff888060c64cb8 (&br->lock){+.-.}-{2:2}, at: spin_lock_bh include/linux/spinlock.h:356 [inline] #1: ffff888060c64cb8 (&br->lock){+.-.}-{2:2}, at: br_port_slave_changelink+0x3d/0x150 net/bridge/br_netlink.c:1212 #2: ffffffff8e333fa0 (rcu_read_lock){....}-{1:2}, at: rcu_lock_acquire include/linux/rcupdate.h:329 [inline] #2: ffffffff8e333fa0 (rcu_read_lock){....}-{1:2}, at: rcu_read_lock include/linux/rcupdate.h:781 [inline] #2: ffffffff8e333fa0 (rcu_read_lock){....}-{1:2}, at: team_change_rx_flags+0x29/0x330 drivers/net/team/team_core.c:1767 the dependencies between SOFTIRQ-irq-safe lock and the holding lock: -> (&br->lock){+.-.}-{2:2} { HARDIRQ-ON-W at: lock_acquire+0x1ed/0x550 kernel/locking/lockdep.c:5754 __raw_spin_lock_bh include/linux/spinlock_api_smp.h:126 [inline] _raw_spin_lock_bh+0x35/0x50 kernel/locking/spinlock.c:178 spin_lock_bh include/linux/spinlock.h:356 [inline] br_add_if+0xb34/0xef0 net/bridge/br_if.c:682 do_set_master net/core/rtnetlink.c:2701 [inline] do_setlink+0xe70/0x41f0 net/core/rtnetlink.c:2907 __rtnl_newlink net/core/rtnetlink.c:3696 [inline] rtnl_newlink+0x180b/0x20a0 net/core/rtnetlink.c:3743 rtnetlink_rcv_msg+0x89b/0x1180 net/core/rtnetlink.c:6635 netlink_rcv_skb+0x1e3/0x430 net/netlink/af_netlink.c:2564 netlink_unicast_kernel net/netlink/af_netlink.c:1335 [inline] netlink_unicast+0x7ea/0x980 net/netlink/af_netlink.c:1361 netlink_sendmsg+0x8db/0xcb0 net/netlink/af_netlink.c:1905 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x221/0x270 net/socket.c:745 __sys_sendto+0x3a4/0x4f0 net/socket.c:2192 __do_sys_sendto net/socket.c:2204 [inline] __se_sys_sendto net/socket.c:2200 [inline] __x64_sys_sendto+0xde/0x100 net/socket.c:2200 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f IN-SOFTIRQ-W at: lock_acquire+0x1ed/0x550 kernel/locking/lockdep.c:5754 __raw_spin_lock include/linux/spinlock_api_smp.h:133 [inline] _raw_spin_lock+0x2e/0x40 kernel/locking/spinlock.c:154 spin_lock include/linux/spinlock.h:351 [inline] br_forward_delay_timer_expired+0x50/0x440 net/bridge/br_stp_timer.c:86 call_timer_fn+0x18e/0x650 kernel/time/timer.c:1792 expire_timers kernel/time/timer.c:1843 [inline] __run_timers kernel/time/timer.c:2417 [inline] __run_timer_base+0x66a/0x8e0 kernel/time/timer.c:2428 run_timer_base kernel/time/timer.c:2437 [inline] run_timer_softirq+0xb7/0x170 kernel/time/timer.c:2447 handle_softirqs+0x2c4/0x970 kernel/softirq.c:554 __do_softirq kernel/softirq.c:588 [inline] invoke_softirq kernel/softirq.c:428 [inline] __irq_exit_rcu+0xf4/0x1c0 kernel/softirq.c:637 irq_exit_rcu+0x9/0x30 kernel/softirq.c:649 instr_sysvec_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1043 [inline] sysvec_apic_timer_interrupt+0xa6/0xc0 arch/x86/kernel/apic/apic.c:1043 asm_sysvec_apic_timer_interrupt+0x1a/0x20 arch/x86/include/asm/idtentry.h:702 lock_acquire+0x264/0x550 kernel/locking/lockdep.c:5758 fs_reclaim_acquire+0xaf/0x140 mm/page_alloc.c:3800 might_alloc include/linux/sched/mm.h:334 [inline] slab_pre_alloc_hook mm/slub.c:3890 [inline] slab_alloc_node mm/slub.c:3980 [inline] kmalloc_trace_noprof+0x3d/0x2c0 mm/slub.c:4147 kmalloc_noprof include/linux/slab.h:660 [inline] kzalloc_noprof include/linux/slab.h:778 [inline] class_dir_create_and_add drivers/base/core.c:3255 [inline] get_device_parent+0x2a7/0x410 drivers/base/core.c:3315 device_add+0x325/0xbf0 drivers/base/core.c:3645 netdev_register_kobject+0x17e/0x320 net/core/net-sysfs.c:2136 register_netdevice+0x11d5/0x19e0 net/core/dev.c:10375 nsim_init_netdevsim drivers/net/netdevsim/netdev.c:690 [inline] nsim_create+0x647/0x890 drivers/net/netdevsim/netdev.c:750 __nsim_dev_port_add+0x6c0/0xae0 drivers/net/netdevsim/dev.c:1390 nsim_dev_port_add_all drivers/net/netdevsim/dev.c:1446 [inline] nsim_dev_reload_create drivers/net/netdevsim/dev.c:1498 [inline] nsim_dev_reload_up+0x69b/0x8e0 drivers/net/netdevsim/dev.c:985 devlink_reload+0x478/0x870 net/devlink/dev.c:474 devlink_nl_reload_doit+0xbd6/0xe50 net/devlink/dev.c:586 genl_family_rcv_msg_doit net/netlink/genetlink.c:1115 [inline] genl_family_rcv_msg net/netlink/genetlink.c:1195 [inline] genl_rcv_msg+0xb14/0xec0 net/netlink/genetlink.c:1210 netlink_rcv_skb+0x1e3/0x430 net/netlink/af_netlink.c:2564 genl_rcv+0x28/0x40 net/netlink/genetlink.c:1219 netlink_unicast_kernel net/netlink/af_netlink.c:1335 [inline] netlink_unicast+0x7ea/0x980 net/netlink/af_netlink.c:1361 netlink_sendmsg+0x8db/0xcb0 net/netlink/af_netlink.c:1905 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x221/0x270 net/socket.c:745 ____sys_sendmsg+0x525/0x7d0 net/socket.c:2585 ___sys_sendmsg net/socket.c:2639 [inline] __sys_sendmsg+0x2b0/0x3a0 net/socket.c:2668 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f INITIAL USE at: lock_acquire+0x1ed/0x550 kernel/locking/lockdep.c:5754 __raw_spin_lock_bh include/linux/spinlock_api_smp.h:126 [inline] _raw_spin_lock_bh+0x35/0x50 kernel/locking/spinlock.c:178 spin_lock_bh include/linux/spinlock.h:356 [inline] br_add_if+0xb34/0xef0 net/bridge/br_if.c:682 do_set_master net/core/rtnetlink.c:2701 [inline] do_setlink+0xe70/0x41f0 net/core/rtnetlink.c:2907 __rtnl_newlink net/core/rtnetlink.c:3696 [inline] rtnl_newlink+0x180b/0x20a0 net/core/rtnetlink.c:3743 rtnetlink_rcv_msg+0x89b/0x1180 net/core/rtnetlink.c:6635 netlink_rcv_skb+0x1e3/0x430 net/netlink/af_netlink.c:2564 netlink_unicast_kernel net/netlink/af_netlink.c:1335 [inline] netlink_unicast+0x7ea/0x980 net/netlink/af_netlink.c:1361 netlink_sendmsg+0x8db/0xcb0 net/netlink/af_netlink.c:1905 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x221/0x270 net/socket.c:745 __sys_sendto+0x3a4/0x4f0 net/socket.c:2192 __do_sys_sendto net/socket.c:2204 [inline] __se_sys_sendto net/socket.c:2200 [inline] __x64_sys_sendto+0xde/0x100 net/socket.c:2200 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f } ... key at: [] br_dev_setup.__key+0x0/0x20 the dependencies between the lock to be acquired and SOFTIRQ-irq-unsafe lock: -> (netdev_rename_lock.seqcount){+.+.}-{0:0} { HARDIRQ-ON-W at: lock_acquire+0x1ed/0x550 kernel/locking/lockdep.c:5754 do_write_seqcount_begin_nested include/linux/seqlock.h:469 [inline] do_write_seqcount_begin include/linux/seqlock.h:495 [inline] write_seqlock include/linux/seqlock.h:823 [inline] dev_change_name+0x184/0x920 net/core/dev.c:1229 do_setlink+0xa4b/0x41f0 net/core/rtnetlink.c:2880 __rtnl_newlink net/core/rtnetlink.c:3696 [inline] rtnl_newlink+0x180b/0x20a0 net/core/rtnetlink.c:3743 rtnetlink_rcv_msg+0x89b/0x1180 net/core/rtnetlink.c:6635 netlink_rcv_skb+0x1e3/0x430 net/netlink/af_netlink.c:2564 netlink_unicast_kernel net/netlink/af_netlink.c:1335 [inline] netlink_unicast+0x7ea/0x980 net/netlink/af_netlink.c:1361 netlink_sendmsg+0x8db/0xcb0 net/netlink/af_netlink.c:1905 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x221/0x270 net/socket.c:745 __sys_sendto+0x3a4/0x4f0 net/socket.c:2192 __do_sys_sendto net/socket.c:2204 [inline] __se_sys_sendto net/socket.c:2200 [inline] __x64_sys_sendto+0xde/0x100 net/socket.c:2200 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f SOFTIRQ-ON-W at: lock_acquire+0x1ed/0x550 kernel/locking/lockdep.c:5754 do_write_seqcount_begin_nested include/linux/seqlock.h:469 [inline] do_write_seqcount_begin include/linux/seqlock.h:495 [inline] write_seqlock include/linux/seqlock.h:823 [inline] dev_change_name+0x184/0x920 net/core/dev.c:1229 do_setlink+0xa4b/0x41f0 net/core/rtnetlink.c:2880 __rtnl_newlink net/core/rtnetlink.c:3696 [inline] rtnl_newlink+0x180b/0x20a0 net/core/rtnetlink.c:3743 rtnetlink_rcv_msg+0x89b/0x1180 net/core/rtnetlink.c:6635 netlink_rcv_skb+0x1e3/0x430 net/netlink/af_netlink.c:2564 netlink_unicast_kernel net/netlink/af_netlink.c:1335 [inline] netlink_unicast+0x7ea/0x980 net/netlink/af_netlink.c:1361 netlink_sendmsg+0x8db/0xcb0 net/netlink/af_netlink.c:1905 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x221/0x270 net/socket.c:745 __sys_sendto+0x3a4/0x4f0 net/socket.c:2192 __do_sys_sendto net/socket.c:2204 [inline] __se_sys_sendto net/socket.c:2200 [inline] __x64_sys_sendto+0xde/0x100 net/socket.c:2200 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f INITIAL USE at: lock_acquire+0x1ed/0x550 kernel/locking/lockdep.c:5754 do_write_seqcount_begin_nested include/linux/seqlock.h:469 [inline] do_write_seqcount_begin include/linux/seqlock.h:495 [inline] write_seqlock include/linux/seqlock.h:823 [inline] dev_change_name+0x184/0x920 net/core/dev.c:1229 do_setlink+0xa4b/0x41f0 net/core/rtnetlink.c:2880 __rtnl_newlink net/core/rtnetlink.c:3696 [inline] rtnl_newlink+0x180b/0x20a0 net/core/rtnetlink.c:3743 rtnetlink_rcv_msg+0x89b/0x1180 net/core/rtnetlink.c:6635 netlink_rcv_skb+0x1e3/0x430 net/netlink/af_netlink.c:2564 netlink_unicast_kernel net/netlink/af_netlink.c:1335 [inline] netlink_unicast+0x7ea/0x980 net/netlink/af_netlink.c:1361 netlink_sendmsg+0x8db/0xcb0 net/netlink/af_netlink.c:1905 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x221/0x270 net/socket.c:745 __sys_sendto+0x3a4/0x4f0 net/socket.c:2192 __do_sys_sendto net/socket.c:2204 [inline] __se_sys_sendto net/socket.c:2200 [inline] __x64_sys_sendto+0xde/0x100 net/socket.c:2200 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f INITIAL READ USE at: lock_acquire+0x1ed/0x550 kernel/locking/lockdep.c:5754 seqcount_lockdep_reader_access include/linux/seqlock.h:72 [inline] read_seqbegin include/linux/seqlock.h:772 [inline] netdev_copy_name+0x168/0x2c0 net/core/dev.c:949 rtnl_fill_ifinfo+0x38e/0x2270 net/core/rtnetlink.c:1839 rtmsg_ifinfo_build_skb+0x18a/0x260 net/core/rtnetlink.c:4073 rtmsg_ifinfo_event net/core/rtnetlink.c:4107 [inline] rtmsg_ifinfo+0x91/0x1b0 net/core/rtnetlink.c:4116 register_netdevice+0x1665/0x19e0 net/core/dev.c:10422 register_netdev+0x3b/0x50 net/core/dev.c:10512 loopback_net_init+0x73/0x150 drivers/net/loopback.c:217 ops_init+0x359/0x610 net/core/net_namespace.c:139 __register_pernet_operations net/core/net_namespace.c:1247 [inline] register_pernet_operations+0x2cb/0x660 net/core/net_namespace.c:1320 register_pernet_device+0x33/0x80 net/core/net_namespace.c:1407 net_dev_init+0xfcd/0x10d0 net/core/dev.c:11956 do_one_initcall+0x248/0x880 init/main.c:1267 do_initcall_level+0x157/0x210 init/main.c:1329 do_initcalls+0x3f/0x80 init/main.c:1345 kernel_init_freeable+0x435/0x5d0 init/main.c:1578 kernel_init+0x1d/0x2b0 init/main.c:1467 ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244 } ... key at: [] netdev_rename_lock+0x8/0xa0 ... acquired at: lock_acquire+0x1ed/0x550 kernel/locking/lockdep.c:5754 seqcount_lockdep_reader_access include/linux/seqlock.h:72 [inline] read_seqbegin include/linux/seqlock.h:772 [inline] netdev_copy_name+0x168/0x2c0 net/core/dev.c:949 rtnl_fill_ifinfo+0x38e/0x2270 net/core/rtnetlink.c:1839 rtmsg_ifinfo_build_skb+0x18a/0x260 net/core/rtnetlink.c:4073 rtmsg_ifinfo_event net/core/rtnetlink.c:4107 [inline] rtmsg_ifinfo+0x91/0x1b0 net/core/rtnetlink.c:4116 __dev_notify_flags+0xf7/0x400 net/core/dev.c:8816 __dev_set_promiscuity+0x152/0x5a0 net/core/dev.c:8588 dev_set_promiscuity+0x51/0xe0 net/core/dev.c:8608 team_change_rx_flags+0x203/0x330 drivers/net/team/team_core.c:1771 dev_change_rx_flags net/core/dev.c:8541 [inline] __dev_set_promiscuity+0x406/0x5a0 net/core/dev.c:8585 dev_set_promiscuity+0x51/0xe0 net/core/dev.c:8608 br_port_clear_promisc net/bridge/br_if.c:135 [inline] br_manage_promisc+0x505/0x590 net/bridge/br_if.c:172 nbp_update_port_count net/bridge/br_if.c:242 [inline] br_port_flags_change+0x161/0x1f0 net/bridge/br_if.c:761 br_setport+0xcb5/0x16d0 net/bridge/br_netlink.c:1000 br_port_slave_changelink+0x135/0x150 net/bridge/br_netlink.c:1213 __rtnl_newlink net/core/rtnetlink.c:3689 [inline] rtnl_newlink+0x169f/0x20a0 net/core/rtnetlink.c:3743 rtnetlink_rcv_msg+0x89b/0x1180 net/core/rtnetlink.c:6635 netlink_rcv_skb+0x1e3/0x430 net/netlink/af_netlink.c:2564 netlink_unicast_kernel net/netlink/af_netlink.c:1335 [inline] netlink_unicast+0x7ea/0x980 net/netlink/af_netlink.c:1361 netlink_sendmsg+0x8db/0xcb0 net/netlink/af_netlink.c:1905 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x221/0x270 net/socket.c:745 ____sys_sendmsg+0x525/0x7d0 net/socket.c:2585 ___sys_sendmsg net/socket.c:2639 [inline] __sys_sendmsg+0x2b0/0x3a0 net/socket.c:2668 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f stack backtrace: CPU: 0 PID: 9449 Comm: syz-executor.2 Not tainted 6.10.0-rc2-syzkaller-00249-gbe27b8965297 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 06/07/2024 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x241/0x360 lib/dump_stack.c:114 print_bad_irq_dependency kernel/locking/lockdep.c:2626 [inline] check_irq_usage kernel/locking/lockdep.c:2865 [inline] check_prev_add kernel/locking/lockdep.c:3138 [inline] check_prevs_add kernel/locking/lockdep.c:3253 [inline] validate_chain+0x4de0/0x5900 kernel/locking/lockdep.c:3869 __lock_acquire+0x1346/0x1fd0 kernel/locking/lockdep.c:5137 lock_acquire+0x1ed/0x550 kernel/locking/lockdep.c:5754 seqcount_lockdep_reader_access include/linux/seqlock.h:72 [inline] read_seqbegin include/linux/seqlock.h:772 [inline] netdev_copy_name+0x168/0x2c0 net/core/dev.c:949 rtnl_fill_ifinfo+0x38e/0x2270 net/core/rtnetlink.c:1839 rtmsg_ifinfo_build_skb+0x18a/0x260 net/core/rtnetlink.c:4073 rtmsg_ifinfo_event net/core/rtnetlink.c:4107 [inline] rtmsg_ifinfo+0x91/0x1b0 net/core/rtnetlink.c:4116 __dev_notify_flags+0xf7/0x400 net/core/dev.c:8816 __dev_set_promiscuity+0x152/0x5a0 net/core/dev.c:8588 dev_set_promiscuity+0x51/0xe0 net/core/dev.c:8608 team_change_rx_flags+0x203/0x330 drivers/net/team/team_core.c:1771 dev_change_rx_flags net/core/dev.c:8541 [inline] __dev_set_promiscuity+0x406/0x5a0 net/core/dev.c:8585 dev_set_promiscuity+0x51/0xe0 net/core/dev.c:8608 br_port_clear_promisc net/bridge/br_if.c:135 [inline] br_manage_promisc+0x505/0x590 net/bridge/br_if.c:172 nbp_update_port_count net/bridge/br_if.c:242 [inline] br_port_flags_change+0x161/0x1f0 net/bridge/br_if.c:761 br_setport+0xcb5/0x16d0 net/bridge/br_netlink.c:1000 br_port_slave_changelink+0x135/0x150 net/bridge/br_netlink.c:1213 __rtnl_newlink net/core/rtnetlink.c:3689 [inline] rtnl_newlink+0x169f/0x20a0 net/core/rtnetlink.c:3743 rtnetlink_rcv_msg+0x89b/0x1180 net/core/rtnetlink.c:6635 netlink_rcv_skb+0x1e3/0x430 net/netlink/af_netlink.c:2564 netlink_unicast_kernel net/netlink/af_netlink.c:1335 [inline] netlink_unicast+0x7ea/0x980 net/netlink/af_netlink.c:1361 netlink_sendmsg+0x8db/0xcb0 net/netlink/af_netlink.c:1905 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x221/0x270 net/socket.c:745 ____sys_sendmsg+0x525/0x7d0 net/socket.c:2585 ___sys_sendmsg net/socket.c:2639 [inline] __sys_sendmsg+0x2b0/0x3a0 net/socket.c:2668 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f3b3047cf29 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 e1 20 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f3b311740c8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007f3b305b4050 RCX: 00007f3b3047cf29 RDX: 0000000000000000 RSI: 0000000020000000 RDI: 0000000000000008 RBP: 00007f3b304ec074 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 000000000000006e R14: 00007f3b305b4050 R15: 00007ffca2f3dc68 Fixes: 0840556e5a3a ("net: Protect dev->name by seqlock.") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Kuniyuki Iwashima Reviewed-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- net/core/dev.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 4d4de9008f6f..2b4819b610b8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1226,9 +1226,9 @@ int dev_change_name(struct net_device *dev, const char *newname) memcpy(oldname, dev->name, IFNAMSIZ); - write_seqlock(&netdev_rename_lock); + write_seqlock_bh(&netdev_rename_lock); err = dev_get_valid_name(net, dev, newname); - write_sequnlock(&netdev_rename_lock); + write_sequnlock_bh(&netdev_rename_lock); if (err < 0) { up_write(&devnet_rename_sem); @@ -1269,9 +1269,9 @@ rollback: if (err >= 0) { err = ret; down_write(&devnet_rename_sem); - write_seqlock(&netdev_rename_lock); + write_seqlock_bh(&netdev_rename_lock); memcpy(dev->name, oldname, IFNAMSIZ); - write_sequnlock(&netdev_rename_lock); + write_sequnlock_bh(&netdev_rename_lock); memcpy(oldname, newname, IFNAMSIZ); WRITE_ONCE(dev->name_assign_type, old_assign_type); old_assign_type = NET_NAME_RENAMED; @@ -11419,9 +11419,9 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net, if (new_name[0]) { /* Rename the netdev to prepared name */ - write_seqlock(&netdev_rename_lock); + write_seqlock_bh(&netdev_rename_lock); strscpy(dev->name, new_name, IFNAMSIZ); - write_sequnlock(&netdev_rename_lock); + write_sequnlock_bh(&netdev_rename_lock); } /* Fixup kobjects */ From 1cbf347288702af0fe8667c0ce760afbe982a2f1 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Fri, 14 Jun 2024 11:14:18 +0300 Subject: [PATCH 559/778] i2c: Add nop fwnode operations Add nop variants of i2c_find_device_by_fwnode(), i2c_find_adapter_by_fwnode() and i2c_get_adapter_by_fwnode() for use without CONFIG_I2C. Signed-off-by: Sakari Ailus Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 9709537370ee..424acb98c7c2 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -960,8 +960,6 @@ int i2c_handle_smbus_host_notify(struct i2c_adapter *adap, unsigned short addr); #define builtin_i2c_driver(__i2c_driver) \ builtin_driver(__i2c_driver, i2c_add_driver) -#endif /* I2C */ - /* must call put_device() when done with returned i2c_client device */ struct i2c_client *i2c_find_device_by_fwnode(struct fwnode_handle *fwnode); @@ -971,6 +969,28 @@ struct i2c_adapter *i2c_find_adapter_by_fwnode(struct fwnode_handle *fwnode); /* must call i2c_put_adapter() when done with returned i2c_adapter device */ struct i2c_adapter *i2c_get_adapter_by_fwnode(struct fwnode_handle *fwnode); +#else /* I2C */ + +static inline struct i2c_client * +i2c_find_device_by_fwnode(struct fwnode_handle *fwnode) +{ + return NULL; +} + +static inline struct i2c_adapter * +i2c_find_adapter_by_fwnode(struct fwnode_handle *fwnode) +{ + return NULL; +} + +static inline struct i2c_adapter * +i2c_get_adapter_by_fwnode(struct fwnode_handle *fwnode) +{ + return NULL; +} + +#endif /* !I2C */ + #if IS_ENABLED(CONFIG_OF) /* must call put_device() when done with returned i2c_client device */ static inline struct i2c_client *of_find_i2c_device_by_node(struct device_node *node) From 2490785ee778f7498afa0350aea5651a3472d0a7 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Thu, 20 Jun 2024 14:52:57 -0700 Subject: [PATCH 560/778] net: remove drivers@pensando.io from MAINTAINERS Our corporate overlords have been changing the domains around again and this mailing list has gone away. Signed-off-by: Shannon Nelson Reviewed-by: Martin Habets Signed-off-by: David S. Miller --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index a39c237edb95..972e1c8fec86 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17532,7 +17532,6 @@ F: include/linux/peci.h PENSANDO ETHERNET DRIVERS M: Shannon Nelson M: Brett Creeley -M: drivers@pensando.io L: netdev@vger.kernel.org S: Supported F: Documentation/networking/device_drivers/ethernet/pensando/ionic.rst From a5d8922ab2aec39336ebc78d7cefe3b84647b058 Mon Sep 17 00:00:00 2001 From: Xin Zeng Date: Mon, 10 Jun 2024 22:37:56 +0800 Subject: [PATCH 561/778] crypto: qat - fix linking errors when PCI_IOV is disabled When CONFIG_PCI_IOV=n, the build of the QAT vfio pci variant driver fails reporting the following linking errors: ERROR: modpost: "qat_vfmig_open" [drivers/vfio/pci/qat/qat_vfio_pci.ko] undefined! ERROR: modpost: "qat_vfmig_resume" [drivers/vfio/pci/qat/qat_vfio_pci.ko] undefined! ERROR: modpost: "qat_vfmig_save_state" [drivers/vfio/pci/qat/qat_vfio_pci.ko] undefined! ERROR: modpost: "qat_vfmig_suspend" [drivers/vfio/pci/qat/qat_vfio_pci.ko] undefined! ERROR: modpost: "qat_vfmig_load_state" [drivers/vfio/pci/qat/qat_vfio_pci.ko] undefined! ERROR: modpost: "qat_vfmig_reset" [drivers/vfio/pci/qat/qat_vfio_pci.ko] undefined! ERROR: modpost: "qat_vfmig_save_setup" [drivers/vfio/pci/qat/qat_vfio_pci.ko] undefined! ERROR: modpost: "qat_vfmig_destroy" [drivers/vfio/pci/qat/qat_vfio_pci.ko] undefined! ERROR: modpost: "qat_vfmig_close" [drivers/vfio/pci/qat/qat_vfio_pci.ko] undefined! ERROR: modpost: "qat_vfmig_cleanup" [drivers/vfio/pci/qat/qat_vfio_pci.ko] undefined! WARNING: modpost: suppressed 1 unresolved symbol warnings because there were too many) Make live migration helpers provided by QAT PF driver always available even if CONFIG_PCI_IOV is not selected. This does not cause any side effect. Reported-by: Arnd Bergmann Closes: https://lore.kernel.org/lkml/20240607153406.60355e6c.alex.williamson@redhat.com/T/ Fixes: bb208810b1ab ("vfio/qat: Add vfio_pci driver for Intel QAT SR-IOV VF devices") Signed-off-by: Xin Zeng Reviewed-by: Giovanni Cabiddu Reviewed-by: Kevin Tian Signed-off-by: Herbert Xu --- drivers/crypto/intel/qat/qat_common/Makefile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/intel/qat/qat_common/Makefile b/drivers/crypto/intel/qat/qat_common/Makefile index 6f9266edc9f1..eac73cbfdd38 100644 --- a/drivers/crypto/intel/qat/qat_common/Makefile +++ b/drivers/crypto/intel/qat/qat_common/Makefile @@ -39,7 +39,8 @@ intel_qat-objs := adf_cfg.o \ adf_sysfs_rl.o \ qat_uclo.o \ qat_hal.o \ - qat_bl.o + qat_bl.o \ + qat_mig_dev.o intel_qat-$(CONFIG_DEBUG_FS) += adf_transport_debug.o \ adf_fw_counters.o \ @@ -56,6 +57,6 @@ intel_qat-$(CONFIG_DEBUG_FS) += adf_transport_debug.o \ intel_qat-$(CONFIG_PCI_IOV) += adf_sriov.o adf_vf_isr.o adf_pfvf_utils.o \ adf_pfvf_pf_msg.o adf_pfvf_pf_proto.o \ adf_pfvf_vf_msg.o adf_pfvf_vf_proto.o \ - adf_gen2_pfvf.o adf_gen4_pfvf.o qat_mig_dev.o + adf_gen2_pfvf.o adf_gen4_pfvf.o intel_qat-$(CONFIG_CRYPTO_DEV_QAT_ERROR_INJECTION) += adf_heartbeat_inject.o From c1eb2512596fb3542357bb6c34c286f5e0374538 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 28 May 2024 15:52:52 +0300 Subject: [PATCH 562/778] RDMA/mlx5: Remove extra unlock on error path The below commit lifted the locking out of this function but left this error path unlock behind resulting in unbalanced locking. Remove the missed unlock too. Cc: stable@vger.kernel.org Fixes: 627122280c87 ("RDMA/mlx5: Add work to remove temporary entries from the cache") Signed-off-by: Jason Gunthorpe Reviewed-by: Michael Guralnik Link: https://lore.kernel.org/r/78090c210c750f47219b95248f9f782f34548bb1.1716900410.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/mr.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index ecc111ed5d86..38d2c743db87 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -641,10 +641,8 @@ static int mlx5_cache_ent_insert(struct mlx5_mkey_cache *cache, new = &((*new)->rb_left); if (cmp < 0) new = &((*new)->rb_right); - if (cmp == 0) { - mutex_unlock(&cache->rb_lock); + if (cmp == 0) return -EEXIST; - } } /* Add new node and rebalance tree. */ From f637040c3339a2ed8c12d65ad03f9552386e2fe7 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 28 May 2024 15:52:53 +0300 Subject: [PATCH 563/778] RDMA/mlx5: Follow rb_key.ats when creating new mkeys When a cache ent already exists but doesn't have any mkeys in it the cache will automatically create a new one based on the specification in the ent->rb_key. ent->ats was missed when creating the new key and so ma_translation_mode was not being set even though the ent requires it. Cc: stable@vger.kernel.org Fixes: 73d09b2fe833 ("RDMA/mlx5: Introduce mlx5r_cache_rb_key") Signed-off-by: Jason Gunthorpe Reviewed-by: Michael Guralnik Link: https://lore.kernel.org/r/7c5613458ecb89fbe5606b7aa4c8d990bdea5b9a.1716900410.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/mr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 38d2c743db87..35dcb9d9e12a 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -246,6 +246,7 @@ static void set_cache_mkc(struct mlx5_cache_ent *ent, void *mkc) MLX5_SET(mkc, mkc, access_mode_1_0, ent->rb_key.access_mode & 0x3); MLX5_SET(mkc, mkc, access_mode_4_2, (ent->rb_key.access_mode >> 2) & 0x7); + MLX5_SET(mkc, mkc, ma_translation_mode, !!ent->rb_key.ats); MLX5_SET(mkc, mkc, translations_octword_size, get_mkc_octo_size(ent->rb_key.access_mode, From 2e4c02fdecf2f6f55cefe48cb82d93fa4f8e2204 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 28 May 2024 15:52:54 +0300 Subject: [PATCH 564/778] RDMA/mlx5: Ensure created mkeys always have a populated rb_key cachable and mmkey.rb_key together are used by mlx5_revoke_mr() to put the MR/mkey back into the cache. In all cases they should be set correctly. alloc_cacheable_mr() was setting cachable but not filling rb_key, resulting in cache_ent_find_and_store() bucketing them all into a 0 length entry. implicit_get_child_mr()/mlx5_ib_alloc_implicit_mr() failed to set cachable or rb_key at all, so the cache was not working at all for implicit ODP. Cc: stable@vger.kernel.org Fixes: 8c1185fef68c ("RDMA/mlx5: Change check for cacheable mkeys") Fixes: dd1b913fb0d0 ("RDMA/mlx5: Cache all user cacheable mkeys on dereg MR flow") Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/7778c02dfa0999a30d6746c79a23dd7140a9c729.1716900410.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/mr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 35dcb9d9e12a..d3c1f63791a2 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -718,6 +718,8 @@ static struct mlx5_ib_mr *_mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, } mr->mmkey.cache_ent = ent; mr->mmkey.type = MLX5_MKEY_MR; + mr->mmkey.rb_key = ent->rb_key; + mr->mmkey.cacheable = true; init_waitqueue_head(&mr->mmkey.wait); return mr; } @@ -1168,7 +1170,6 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, mr->ibmr.pd = pd; mr->umem = umem; mr->page_shift = order_base_2(page_size); - mr->mmkey.cacheable = true; set_mr_fields(dev, mr, umem->length, access_flags, iova); return mr; From 81497c148b7a2e4a4fbda93aee585439f7323e2e Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Tue, 28 May 2024 15:52:55 +0300 Subject: [PATCH 565/778] RDMA/mlx5: Fix unwind flow as part of mlx5_ib_stage_init_init Fix unwind flow as part of mlx5_ib_stage_init_init to use the correct goto upon an error. Fixes: 758ce14aee82 ("RDMA/mlx5: Implement MACsec gid addition and deletion") Signed-off-by: Yishai Hadas Reviewed-by: Patrisious Haddad Link: https://lore.kernel.org/r/aa40615116eda14ec9eca21d52017d632ea89188.1716900410.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 2366c46eebc8..43660c831b22 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3759,10 +3759,10 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) spin_lock_init(&dev->dm.lock); dev->dm.dev = mdev; return 0; -err: - mlx5r_macsec_dealloc_gids(dev); err_mp: mlx5_ib_cleanup_multiport_master(dev); +err: + mlx5r_macsec_dealloc_gids(dev); return err; } From 36ab7ada64caf08f10ee5a114d39964d1f91e81d Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Tue, 28 May 2024 15:52:56 +0300 Subject: [PATCH 566/778] RDMA/mlx5: Add check for srq max_sge attribute max_sge attribute is passed by the user, and is inserted and used unchecked, so verify that the value doesn't exceed maximum allowed value before using it. Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Signed-off-by: Patrisious Haddad Link: https://lore.kernel.org/r/277ccc29e8d57bfd53ddeb2ac633f2760cf8cdd0.1716900410.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/srq.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index a056ea835da5..84be0c3d5699 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -199,17 +199,20 @@ int mlx5_ib_create_srq(struct ib_srq *ib_srq, int err; struct mlx5_srq_attr in = {}; __u32 max_srq_wqes = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz); + __u32 max_sge_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq) / + sizeof(struct mlx5_wqe_data_seg); if (init_attr->srq_type != IB_SRQT_BASIC && init_attr->srq_type != IB_SRQT_XRC && init_attr->srq_type != IB_SRQT_TM) return -EOPNOTSUPP; - /* Sanity check SRQ size before proceeding */ - if (init_attr->attr.max_wr >= max_srq_wqes) { - mlx5_ib_dbg(dev, "max_wr %d, cap %d\n", - init_attr->attr.max_wr, - max_srq_wqes); + /* Sanity check SRQ and sge size before proceeding */ + if (init_attr->attr.max_wr >= max_srq_wqes || + init_attr->attr.max_sge > max_sge_sz) { + mlx5_ib_dbg(dev, "max_wr %d,wr_cap %d,max_sge %d, sge_cap:%d\n", + init_attr->attr.max_wr, max_srq_wqes, + init_attr->attr.max_sge, max_sge_sz); return -EINVAL; } From 82a5cc783d49b86afd2f60e297ecd85223c39f88 Mon Sep 17 00:00:00 2001 From: Konstantin Taranov Date: Wed, 5 Jun 2024 01:16:08 -0700 Subject: [PATCH 567/778] RDMA/mana_ib: Ignore optional access flags for MRs Ignore optional ib_access_flags when an MR is created. Fixes: 0266a177631d ("RDMA/mana_ib: Add a driver for Microsoft Azure Network Adapter") Signed-off-by: Konstantin Taranov Link: https://lore.kernel.org/r/1717575368-14879-1-git-send-email-kotaranov@linux.microsoft.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mana/mr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/mana/mr.c b/drivers/infiniband/hw/mana/mr.c index 4f13423ecdbd..887b09dd86e7 100644 --- a/drivers/infiniband/hw/mana/mr.c +++ b/drivers/infiniband/hw/mana/mr.c @@ -112,6 +112,7 @@ struct ib_mr *mana_ib_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 length, "start 0x%llx, iova 0x%llx length 0x%llx access_flags 0x%x", start, iova, length, access_flags); + access_flags &= ~IB_ACCESS_OPTIONAL; if (access_flags & ~VALID_MR_FLAGS) return ERR_PTR(-EINVAL); From 11b006d6896c0471ad29c6f1fb1af606e7ba278f Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 20 Jun 2024 19:10:51 -0700 Subject: [PATCH 568/778] selftest: af_unix: Add Kconfig file. diag_uid selftest failed on NIPA where the received nlmsg_type is NLMSG_ERROR [0] because CONFIG_UNIX_DIAG is not set [1] by default and sock_diag_lock_handler() failed to load the module. # # Starting 2 tests from 2 test cases. # # RUN diag_uid.uid.1 ... # # diag_uid.c:159:1:Expected nlh->nlmsg_type (2) == SOCK_DIAG_BY_FAMILY (20) # # 1: Test terminated by assertion # # FAIL diag_uid.uid.1 # not ok 1 diag_uid.uid.1 Let's add all AF_UNIX Kconfig to the config file under af_unix dir so that NIPA consumes it. Fixes: ac011361bd4f ("af_unix: Add test for sock_diag and UDIAG_SHOW_UID.") Link: https://netdev-3.bots.linux.dev/vmksft-net/results/644841/104-diag-uid/stdout [0] Link: https://netdev-3.bots.linux.dev/vmksft-net/results/644841/config [1] Reported-by: Jakub Kicinski Closes: https://lore.kernel.org/netdev/20240617073033.0cbb829d@kernel.org/ Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- tools/testing/selftests/net/af_unix/config | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 tools/testing/selftests/net/af_unix/config diff --git a/tools/testing/selftests/net/af_unix/config b/tools/testing/selftests/net/af_unix/config new file mode 100644 index 000000000000..37368567768c --- /dev/null +++ b/tools/testing/selftests/net/af_unix/config @@ -0,0 +1,3 @@ +CONFIG_UNIX=y +CONFIG_AF_UNIX_OOB=y +CONFIG_UNIX_DIAG=m From 0602697d6f4d72b0bc5edbc76afabf6aaa029a69 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Fri, 21 Jun 2024 09:19:13 +0200 Subject: [PATCH 569/778] mlxsw: pci: Fix driver initialization with Spectrum-4 Cited commit added support for a new reset flow ("all reset") which is deeper than the existing reset flow ("software reset") and allows the device's PCI firmware to be upgraded. In the new flow the driver first tells the firmware that "all reset" is required by issuing a new reset command (i.e., MRSR.command=6) and then triggers the reset by having the PCI core issue a secondary bus reset (SBR). However, due to a race condition in the device's firmware the device is not always able to recover from this reset, resulting in initialization failures [1]. New firmware versions include a fix for the bug and advertise it using a new capability bit in the Management Capabilities Mask (MCAM) register. Avoid initialization failures by reading the new capability bit and triggering the new reset flow only if the bit is set. If the bit is not set, trigger a normal PCI hot reset by skipping the call to the Management Reset and Shutdown Register (MRSR). Normal PCI hot reset is weaker than "all reset", but it results in a fully operational driver and allows users to flash a new firmware, if they want to. [1] mlxsw_spectrum4 0000:01:00.0: not ready 1023ms after bus reset; waiting mlxsw_spectrum4 0000:01:00.0: not ready 2047ms after bus reset; waiting mlxsw_spectrum4 0000:01:00.0: not ready 4095ms after bus reset; waiting mlxsw_spectrum4 0000:01:00.0: not ready 8191ms after bus reset; waiting mlxsw_spectrum4 0000:01:00.0: not ready 16383ms after bus reset; waiting mlxsw_spectrum4 0000:01:00.0: not ready 32767ms after bus reset; waiting mlxsw_spectrum4 0000:01:00.0: not ready 65535ms after bus reset; giving up mlxsw_spectrum4 0000:01:00.0: PCI function reset failed with -25 mlxsw_spectrum4 0000:01:00.0: cannot register bus device mlxsw_spectrum4: probe of 0000:01:00.0 failed with error -25 Fixes: f257c73e5356 ("mlxsw: pci: Add support for new reset flow") Reported-by: Maksym Yaremchuk Signed-off-by: Ido Schimmel Tested-by: Maksym Yaremchuk Reviewed-by: Simon Horman Signed-off-by: Petr Machata Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/pci.c | 18 +++++++++++++++--- drivers/net/ethernet/mellanox/mlxsw/reg.h | 2 ++ 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index bf66d996e32e..c0ced4d315f3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -1594,18 +1594,25 @@ static int mlxsw_pci_sys_ready_wait(struct mlxsw_pci *mlxsw_pci, return -EBUSY; } -static int mlxsw_pci_reset_at_pci_disable(struct mlxsw_pci *mlxsw_pci) +static int mlxsw_pci_reset_at_pci_disable(struct mlxsw_pci *mlxsw_pci, + bool pci_reset_sbr_supported) { struct pci_dev *pdev = mlxsw_pci->pdev; char mrsr_pl[MLXSW_REG_MRSR_LEN]; int err; + if (!pci_reset_sbr_supported) { + pci_dbg(pdev, "Performing PCI hot reset instead of \"all reset\"\n"); + goto sbr; + } + mlxsw_reg_mrsr_pack(mrsr_pl, MLXSW_REG_MRSR_COMMAND_RESET_AT_PCI_DISABLE); err = mlxsw_reg_write(mlxsw_pci->core, MLXSW_REG(mrsr), mrsr_pl); if (err) return err; +sbr: device_lock_assert(&pdev->dev); pci_cfg_access_lock(pdev); @@ -1633,6 +1640,7 @@ static int mlxsw_pci_reset(struct mlxsw_pci *mlxsw_pci, const struct pci_device_id *id) { struct pci_dev *pdev = mlxsw_pci->pdev; + bool pci_reset_sbr_supported = false; char mcam_pl[MLXSW_REG_MCAM_LEN]; bool pci_reset_supported = false; u32 sys_status; @@ -1652,13 +1660,17 @@ mlxsw_pci_reset(struct mlxsw_pci *mlxsw_pci, const struct pci_device_id *id) mlxsw_reg_mcam_pack(mcam_pl, MLXSW_REG_MCAM_FEATURE_GROUP_ENHANCED_FEATURES); err = mlxsw_reg_query(mlxsw_pci->core, MLXSW_REG(mcam), mcam_pl); - if (!err) + if (!err) { mlxsw_reg_mcam_unpack(mcam_pl, MLXSW_REG_MCAM_PCI_RESET, &pci_reset_supported); + mlxsw_reg_mcam_unpack(mcam_pl, MLXSW_REG_MCAM_PCI_RESET_SBR, + &pci_reset_sbr_supported); + } if (pci_reset_supported) { pci_dbg(pdev, "Starting PCI reset flow\n"); - err = mlxsw_pci_reset_at_pci_disable(mlxsw_pci); + err = mlxsw_pci_reset_at_pci_disable(mlxsw_pci, + pci_reset_sbr_supported); } else { pci_dbg(pdev, "Starting software reset flow\n"); err = mlxsw_pci_reset_sw(mlxsw_pci); diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 8adf86a6f5cc..3bb89045eaf5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -10671,6 +10671,8 @@ enum mlxsw_reg_mcam_mng_feature_cap_mask_bits { MLXSW_REG_MCAM_MCIA_128B = 34, /* If set, MRSR.command=6 is supported. */ MLXSW_REG_MCAM_PCI_RESET = 48, + /* If set, MRSR.command=6 is supported with Secondary Bus Reset. */ + MLXSW_REG_MCAM_PCI_RESET_SBR = 67, }; #define MLXSW_REG_BYTES_PER_DWORD 0x4 From c28947de2bed40217cf256c5d0d16880054fcf13 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Fri, 21 Jun 2024 09:19:14 +0200 Subject: [PATCH 570/778] mlxsw: spectrum_buffers: Fix memory corruptions on Spectrum-4 systems The following two shared buffer operations make use of the Shared Buffer Status Register (SBSR): # devlink sb occupancy snapshot pci/0000:01:00.0 # devlink sb occupancy clearmax pci/0000:01:00.0 The register has two masks of 256 bits to denote on which ingress / egress ports the register should operate on. Spectrum-4 has more than 256 ports, so the register was extended by cited commit with a new 'port_page' field. However, when filling the register's payload, the driver specifies the ports as absolute numbers and not relative to the first port of the port page, resulting in memory corruptions [1]. Fix by specifying the ports relative to the first port of the port page. [1] BUG: KASAN: slab-use-after-free in mlxsw_sp_sb_occ_snapshot+0xb6d/0xbc0 Read of size 1 at addr ffff8881068cb00f by task devlink/1566 [...] Call Trace: dump_stack_lvl+0xc6/0x120 print_report+0xce/0x670 kasan_report+0xd7/0x110 mlxsw_sp_sb_occ_snapshot+0xb6d/0xbc0 mlxsw_devlink_sb_occ_snapshot+0x75/0xb0 devlink_nl_sb_occ_snapshot_doit+0x1f9/0x2a0 genl_family_rcv_msg_doit+0x20c/0x300 genl_rcv_msg+0x567/0x800 netlink_rcv_skb+0x170/0x450 genl_rcv+0x2d/0x40 netlink_unicast+0x547/0x830 netlink_sendmsg+0x8d4/0xdb0 __sys_sendto+0x49b/0x510 __x64_sys_sendto+0xe5/0x1c0 do_syscall_64+0xc1/0x1d0 entry_SYSCALL_64_after_hwframe+0x77/0x7f [...] Allocated by task 1: kasan_save_stack+0x33/0x60 kasan_save_track+0x14/0x30 __kasan_kmalloc+0x8f/0xa0 copy_verifier_state+0xbc2/0xfb0 do_check_common+0x2c51/0xc7e0 bpf_check+0x5107/0x9960 bpf_prog_load+0xf0e/0x2690 __sys_bpf+0x1a61/0x49d0 __x64_sys_bpf+0x7d/0xc0 do_syscall_64+0xc1/0x1d0 entry_SYSCALL_64_after_hwframe+0x77/0x7f Freed by task 1: kasan_save_stack+0x33/0x60 kasan_save_track+0x14/0x30 kasan_save_free_info+0x3b/0x60 poison_slab_object+0x109/0x170 __kasan_slab_free+0x14/0x30 kfree+0xca/0x2b0 free_verifier_state+0xce/0x270 do_check_common+0x4828/0xc7e0 bpf_check+0x5107/0x9960 bpf_prog_load+0xf0e/0x2690 __sys_bpf+0x1a61/0x49d0 __x64_sys_bpf+0x7d/0xc0 do_syscall_64+0xc1/0x1d0 entry_SYSCALL_64_after_hwframe+0x77/0x7f Fixes: f8538aec88b4 ("mlxsw: Add support for more than 256 ports in SBSR register") Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Reviewed-by: Simon Horman Signed-off-by: Petr Machata Signed-off-by: David S. Miller --- .../mellanox/mlxsw/spectrum_buffers.c | 20 +++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c index c9f1c79f3f9d..ba090262e27e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c @@ -1607,8 +1607,8 @@ static void mlxsw_sp_sb_sr_occ_query_cb(struct mlxsw_core *mlxsw_core, int mlxsw_sp_sb_occ_snapshot(struct mlxsw_core *mlxsw_core, unsigned int sb_index) { + u16 local_port, local_port_1, first_local_port, last_local_port; struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); - u16 local_port, local_port_1, last_local_port; struct mlxsw_sp_sb_sr_occ_query_cb_ctx cb_ctx; u8 masked_count, current_page = 0; unsigned long cb_priv = 0; @@ -1628,6 +1628,7 @@ next_batch: masked_count = 0; mlxsw_reg_sbsr_pack(sbsr_pl, false); mlxsw_reg_sbsr_port_page_set(sbsr_pl, current_page); + first_local_port = current_page * MLXSW_REG_SBSR_NUM_PORTS_IN_PAGE; last_local_port = current_page * MLXSW_REG_SBSR_NUM_PORTS_IN_PAGE + MLXSW_REG_SBSR_NUM_PORTS_IN_PAGE - 1; @@ -1645,9 +1646,12 @@ next_batch: if (local_port != MLXSW_PORT_CPU_PORT) { /* Ingress quotas are not supported for the CPU port */ mlxsw_reg_sbsr_ingress_port_mask_set(sbsr_pl, - local_port, 1); + local_port - first_local_port, + 1); } - mlxsw_reg_sbsr_egress_port_mask_set(sbsr_pl, local_port, 1); + mlxsw_reg_sbsr_egress_port_mask_set(sbsr_pl, + local_port - first_local_port, + 1); for (i = 0; i < mlxsw_sp->sb_vals->pool_count; i++) { err = mlxsw_sp_sb_pm_occ_query(mlxsw_sp, local_port, i, &bulk_list); @@ -1684,7 +1688,7 @@ int mlxsw_sp_sb_occ_max_clear(struct mlxsw_core *mlxsw_core, unsigned int sb_index) { struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); - u16 local_port, last_local_port; + u16 local_port, first_local_port, last_local_port; LIST_HEAD(bulk_list); unsigned int masked_count; u8 current_page = 0; @@ -1702,6 +1706,7 @@ next_batch: masked_count = 0; mlxsw_reg_sbsr_pack(sbsr_pl, true); mlxsw_reg_sbsr_port_page_set(sbsr_pl, current_page); + first_local_port = current_page * MLXSW_REG_SBSR_NUM_PORTS_IN_PAGE; last_local_port = current_page * MLXSW_REG_SBSR_NUM_PORTS_IN_PAGE + MLXSW_REG_SBSR_NUM_PORTS_IN_PAGE - 1; @@ -1719,9 +1724,12 @@ next_batch: if (local_port != MLXSW_PORT_CPU_PORT) { /* Ingress quotas are not supported for the CPU port */ mlxsw_reg_sbsr_ingress_port_mask_set(sbsr_pl, - local_port, 1); + local_port - first_local_port, + 1); } - mlxsw_reg_sbsr_egress_port_mask_set(sbsr_pl, local_port, 1); + mlxsw_reg_sbsr_egress_port_mask_set(sbsr_pl, + local_port - first_local_port, + 1); for (i = 0; i < mlxsw_sp->sb_vals->pool_count; i++) { err = mlxsw_sp_sb_pm_occ_clear(mlxsw_sp, local_port, i, &bulk_list); From 339b84ab6b1d66900c27bd999271cb2ae40ce812 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 20 Jun 2024 09:45:09 -0400 Subject: [PATCH 571/778] closures: Change BUG_ON() to WARN_ON() If a BUG_ON() can be hit in the wild, it shouldn't be a BUG_ON() For reference, this has popped up once in the CI, and we'll need more info to debug it: 03240 ------------[ cut here ]------------ 03240 kernel BUG at lib/closure.c:21! 03240 kernel BUG at lib/closure.c:21! 03240 Internal error: Oops - BUG: 00000000f2000800 [#1] SMP 03240 Modules linked in: 03240 CPU: 15 PID: 40534 Comm: kworker/u80:1 Not tainted 6.10.0-rc4-ktest-ga56da69799bd #25570 03240 Hardware name: linux,dummy-virt (DT) 03240 Workqueue: btree_update btree_interior_update_work 03240 pstate: 00001005 (nzcv daif -PAN -UAO -TCO -DIT +SSBS BTYPE=--) 03240 pc : closure_put+0x224/0x2a0 03240 lr : closure_put+0x24/0x2a0 03240 sp : ffff0000d12071c0 03240 x29: ffff0000d12071c0 x28: dfff800000000000 x27: ffff0000d1207360 03240 x26: 0000000000000040 x25: 0000000000000040 x24: 0000000000000040 03240 x23: ffff0000c1f20180 x22: 0000000000000000 x21: ffff0000c1f20168 03240 x20: 0000000040000000 x19: ffff0000c1f20140 x18: 0000000000000001 03240 x17: 0000000000003aa0 x16: 0000000000003ad0 x15: 1fffe0001c326974 03240 x14: 0000000000000a1e x13: 0000000000000000 x12: 1fffe000183e402d 03240 x11: ffff6000183e402d x10: dfff800000000000 x9 : ffff6000183e402e 03240 x8 : 0000000000000001 x7 : 00009fffe7c1bfd3 x6 : ffff0000c1f2016b 03240 x5 : ffff0000c1f20168 x4 : ffff6000183e402e x3 : ffff800081391954 03240 x2 : 0000000000000001 x1 : 0000000000000000 x0 : 00000000a8000000 03240 Call trace: 03240 closure_put+0x224/0x2a0 03240 bch2_check_for_deadlock+0x910/0x1028 03240 bch2_six_check_for_deadlock+0x1c/0x30 03240 six_lock_slowpath.isra.0+0x29c/0xed0 03240 six_lock_ip_waiter+0xa8/0xf8 03240 __bch2_btree_node_lock_write+0x14c/0x298 03240 bch2_trans_lock_write+0x6d4/0xb10 03240 __bch2_trans_commit+0x135c/0x5520 03240 btree_interior_update_work+0x1248/0x1c10 03240 process_scheduled_works+0x53c/0xd90 03240 worker_thread+0x370/0x8c8 03240 kthread+0x258/0x2e8 03240 ret_from_fork+0x10/0x20 03240 Code: aa1303e0 d63f0020 a94363f7 17ffff8c (d4210000) 03240 ---[ end trace 0000000000000000 ]--- 03240 Kernel panic - not syncing: Oops - BUG: Fatal exception 03240 SMP: stopping secondary CPUs 03241 SMP: failed to stop secondary CPUs 13,15 03241 Kernel Offset: disabled 03241 CPU features: 0x00,00000003,80000008,4240500b 03241 Memory Limit: none 03241 ---[ end Kernel panic - not syncing: Oops - BUG: Fatal exception ]--- 03246 ========= FAILED TIMEOUT copygc_torture_no_checksum in 7200s Signed-off-by: Kent Overstreet --- lib/closure.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/lib/closure.c b/lib/closure.c index 07409e9e35a5..2e1ee9fdec08 100644 --- a/lib/closure.c +++ b/lib/closure.c @@ -17,12 +17,18 @@ static inline void closure_put_after_sub(struct closure *cl, int flags) { int r = flags & CLOSURE_REMAINING_MASK; - BUG_ON(flags & CLOSURE_GUARD_MASK); - BUG_ON(!r && (flags & ~CLOSURE_DESTRUCTOR)); + if (WARN(flags & CLOSURE_GUARD_MASK, + "closure has guard bits set: %x (%u)", + flags & CLOSURE_GUARD_MASK, (unsigned) __fls(r))) + r &= ~CLOSURE_GUARD_MASK; if (!r) { smp_acquire__after_ctrl_dep(); + WARN(flags & ~CLOSURE_DESTRUCTOR, + "closure ref hit 0 with incorrect flags set: %x (%u)", + flags & ~CLOSURE_DESTRUCTOR, (unsigned) __fls(flags)); + cl->closure_get_happened = false; if (cl->fn && !(flags & CLOSURE_DESTRUCTOR)) { From f648b6c12b70af9d24a293617102729cee6b7862 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 20 Jun 2024 10:04:35 -0400 Subject: [PATCH 572/778] bcachefs: Fix missing alloc_data_type_set() Incorrect bucket state transition in the discard path; when incrementing a bucket's generation number that had already been discarded, we were forgetting to check if it should be need_gc_gens, not free. This was caught by the .invalid checks in the transaction commit path, causing us to go emergency read only. Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 7b5909764d14..e5e7d33f4a5e 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -776,6 +776,7 @@ int bch2_trigger_alloc(struct btree_trans *trans, !bch2_bucket_is_open_safe(c, new.k->p.inode, new.k->p.offset)) { new_a->gen++; SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, false); + alloc_data_type_set(new_a, new_a->data_type); } if (old_a->data_type != new_a->data_type || @@ -1796,8 +1797,9 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, } SET_BCH_ALLOC_V4_NEED_DISCARD(&a->v, false); - alloc_data_type_set(&a->v, a->v.data_type); write: + alloc_data_type_set(&a->v, a->v.data_type); + ret = bch2_trans_update(trans, &iter, &a->k_i, 0) ?: bch2_trans_commit(trans, NULL, NULL, BCH_WATERMARK_btree| From 504794067fc266be5ac170777a94a927a72ac846 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 26 May 2024 22:52:22 -0400 Subject: [PATCH 573/778] bcachefs: Replace bare EEXIST with private error codes Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 2 +- fs/bcachefs/errcode.h | 3 +++ fs/bcachefs/fs-ioctl.c | 2 +- fs/bcachefs/str_hash.h | 2 +- fs/bcachefs/super.c | 11 ++++++----- 5 files changed, 12 insertions(+), 8 deletions(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index e5e7d33f4a5e..8dec2c6cbb7e 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -1643,7 +1643,7 @@ static int discard_in_flight_add(struct bch_fs *c, struct bpos bucket) mutex_lock(&c->discard_buckets_in_flight_lock); darray_for_each(c->discard_buckets_in_flight, i) if (bkey_eq(*i, bucket)) { - ret = -EEXIST; + ret = -BCH_ERR_EEXIST_discard_in_flight_add; goto out; } diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index dbe35b80bc0b..58612abf7927 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -116,6 +116,9 @@ x(ENOENT, ENOENT_dev_idx_not_found) \ x(ENOTEMPTY, ENOTEMPTY_dir_not_empty) \ x(ENOTEMPTY, ENOTEMPTY_subvol_not_empty) \ + x(EEXIST, EEXIST_str_hash_set) \ + x(EEXIST, EEXIST_discard_in_flight_add) \ + x(EEXIST, EEXIST_subvolume_create) \ x(0, open_buckets_empty) \ x(0, freelist_empty) \ x(BCH_ERR_freelist_empty, no_buckets_found) \ diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c index 3551a737181b..79a0c8732bce 100644 --- a/fs/bcachefs/fs-ioctl.c +++ b/fs/bcachefs/fs-ioctl.c @@ -373,7 +373,7 @@ retry: } if (dst_dentry->d_inode) { - error = -EEXIST; + error = -BCH_ERR_EEXIST_subvolume_create; goto err3; } diff --git a/fs/bcachefs/str_hash.h b/fs/bcachefs/str_hash.h index cbad9b27874f..c8c266cb5797 100644 --- a/fs/bcachefs/str_hash.h +++ b/fs/bcachefs/str_hash.h @@ -300,7 +300,7 @@ not_found: if (!found && (flags & STR_HASH_must_replace)) { ret = -BCH_ERR_ENOENT_str_hash_set_must_replace; } else if (found && (flags & STR_HASH_must_create)) { - ret = -EEXIST; + ret = -BCH_ERR_EEXIST_str_hash_set; } else { if (!found && slot.path) swap(iter, slot); diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 635da5b3439c..9083df82073a 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -931,12 +931,13 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) if (ret) goto err; - for (i = 0; i < c->sb.nr_devices; i++) - if (bch2_member_exists(c->disk_sb.sb, i) && - bch2_dev_alloc(c, i)) { - ret = -EEXIST; + for (i = 0; i < c->sb.nr_devices; i++) { + if (!bch2_member_exists(c->disk_sb.sb, i)) + continue; + ret = bch2_dev_alloc(c, i); + if (ret) goto err; - } + } bch2_journal_entry_res_resize(&c->journal, &c->btree_root_journal_res, From dd9086487c1bb38641bcfbe765422c7f0a1a8d95 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 20 Jun 2024 13:20:49 -0400 Subject: [PATCH 574/778] bcachefs: Fix I_NEW warning in race path in bch2_inode_insert() discard_new_inode() is the correct interface for tearing down an indoe that was fully created but not made visible to other threads, but it expects I_NEW to be set, which we don't use. Reported-by: https://github.com/koverstreet/bcachefs/issues/690 Fixes: bcachefs: Fix race path in bch2_inode_insert() Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 8314d3e1582d..615ef8305c6e 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -188,6 +188,12 @@ static struct bch_inode_info *bch2_inode_insert(struct bch_fs *c, struct bch_ino BUG_ON(!old); if (unlikely(old != inode)) { + /* + * bcachefs doesn't use I_NEW; we have no use for it since we + * only insert fully created inodes in the inode hash table. But + * discard_new_inode() expects it to be set... + */ + inode->v.i_flags |= I_NEW; discard_new_inode(&inode->v); inode = old; } else { @@ -195,8 +201,10 @@ static struct bch_inode_info *bch2_inode_insert(struct bch_fs *c, struct bch_ino list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list); mutex_unlock(&c->vfs_inodes_lock); /* - * we really don't want insert_inode_locked2() to be setting - * I_NEW... + * Again, I_NEW makes no sense for bcachefs. This is only needed + * for clearing I_NEW, but since the inode was already fully + * created and initialized we didn't actually want + * inode_insert5() to set it for us. */ unlock_new_inode(&inode->v); } From e6b3a655ac7ba5282b1504851488236865804cb8 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 20 Jun 2024 13:10:34 -0400 Subject: [PATCH 575/778] bcachefs: Use bch2_print_string_as_lines for long err printk strings get truncated to 1024 bytes; if we have a long error message (journal debug info) we need to use a helper. Signed-off-by: Kent Overstreet --- fs/bcachefs/journal_io.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index cdcb1ad49af4..492426c8d869 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -1967,7 +1967,6 @@ CLOSURE_CALLBACK(bch2_journal_write) struct journal *j = container_of(w, struct journal, buf[w->idx]); struct bch_fs *c = container_of(j, struct bch_fs, journal); struct bch_replicas_padded replicas; - struct printbuf journal_debug_buf = PRINTBUF; unsigned nr_rw_members = 0; int ret; @@ -2011,11 +2010,15 @@ CLOSURE_CALLBACK(bch2_journal_write) } if (ret) { - __bch2_journal_debug_to_text(&journal_debug_buf, j); + struct printbuf buf = PRINTBUF; + buf.atomic++; + + prt_printf(&buf, bch2_fmt(c, "Unable to allocate journal write: %s"), + bch2_err_str(ret)); + __bch2_journal_debug_to_text(&buf, j); spin_unlock(&j->lock); - bch_err(c, "Unable to allocate journal write:\n%s", - journal_debug_buf.buf); - printbuf_exit(&journal_debug_buf); + bch2_print_string_as_lines(KERN_ERR, buf.buf); + printbuf_exit(&buf); goto err; } From 2fe79ce7d1e8ec5059e7dfc15f3c769ae9679569 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 20 Jun 2024 19:42:39 -0400 Subject: [PATCH 576/778] bcachefs: Fix a UAF after write_super() write_super() may reallocate the superblock buffer - but bch_sb_field_ext was referencing it; don't use it after the write_super call. Reported-by: syzbot+8992fc10a192067b8d8a@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/recovery.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index e632da69196c..1f9d044ed920 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -664,10 +664,10 @@ int bch2_fs_recovery(struct bch_fs *c) if (check_version_upgrade(c)) write_sb = true; + c->recovery_passes_explicit |= bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0])); + if (write_sb) bch2_write_super(c); - - c->recovery_passes_explicit |= bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0])); mutex_unlock(&c->sb_lock); if (c->opts.fsck && IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) From bd4da0462ea7bf26b2a5df5528ec20c550f7ec41 Mon Sep 17 00:00:00 2001 From: Youling Tang Date: Tue, 4 Jun 2024 16:46:10 +0800 Subject: [PATCH 577/778] bcachefs: Move the ei_flags setting to after initialization `inode->ei_flags` setting and cleaning should be done after initialization, otherwise the operation is invalid. Fixes: 9ca4853b98af ("bcachefs: Fix quota support for snapshots") Signed-off-by: Youling Tang Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 615ef8305c6e..f9c9a95d7d4c 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1497,11 +1497,6 @@ static void bch2_vfs_inode_init(struct btree_trans *trans, subvol_inum inum, bch2_iget5_set(&inode->v, &inum); bch2_inode_update_after_write(trans, inode, bi, ~0); - if (BCH_SUBVOLUME_SNAP(subvol)) - set_bit(EI_INODE_SNAPSHOT, &inode->ei_flags); - else - clear_bit(EI_INODE_SNAPSHOT, &inode->ei_flags); - inode->v.i_blocks = bi->bi_sectors; inode->v.i_ino = bi->bi_inum; inode->v.i_rdev = bi->bi_dev; @@ -1513,6 +1508,9 @@ static void bch2_vfs_inode_init(struct btree_trans *trans, subvol_inum inum, inode->ei_qid = bch_qid(bi); inode->ei_subvol = inum.subvol; + if (BCH_SUBVOLUME_SNAP(subvol)) + set_bit(EI_INODE_SNAPSHOT, &inode->ei_flags); + inode->v.i_mapping->a_ops = &bch_address_space_operations; switch (inode->v.i_mode & S_IFMT) { From bfc6444b57dc7186b6acc964705d7516cbaf3904 Mon Sep 17 00:00:00 2001 From: Ian Ray Date: Thu, 20 Jun 2024 07:29:15 +0300 Subject: [PATCH 578/778] gpio: pca953x: fix pca953x_irq_bus_sync_unlock race Ensure that `i2c_lock' is held when setting interrupt latch and mask in pca953x_irq_bus_sync_unlock() in order to avoid races. The other (non-probe) call site pca953x_gpio_set_multiple() ensures the lock is held before calling pca953x_write_regs(). The problem occurred when a request raced against irq_bus_sync_unlock() approximately once per thousand reboots on an i.MX8MP based system. * Normal case 0-0022: write register AI|3a {03,02,00,00,01} Input latch P0 0-0022: write register AI|49 {fc,fd,ff,ff,fe} Interrupt mask P0 0-0022: write register AI|08 {ff,00,00,00,00} Output P3 0-0022: write register AI|12 {fc,00,00,00,00} Config P3 * Race case 0-0022: write register AI|08 {ff,00,00,00,00} Output P3 0-0022: write register AI|08 {03,02,00,00,01} *** Wrong register *** 0-0022: write register AI|12 {fc,00,00,00,00} Config P3 0-0022: write register AI|49 {fc,fd,ff,ff,fe} Interrupt mask P0 Signed-off-by: Ian Ray Link: https://lore.kernel.org/r/20240620042915.2173-1-ian.ray@gehealthcare.com Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-pca953x.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index 77a2812f2974..732a6964748c 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -758,6 +758,8 @@ static void pca953x_irq_bus_sync_unlock(struct irq_data *d) int level; if (chip->driver_data & PCA_PCAL) { + guard(mutex)(&chip->i2c_lock); + /* Enable latch on interrupt-enabled inputs */ pca953x_write_regs(chip, PCAL953X_IN_LATCH, chip->irq_mask); From c45fcf46ca2368dafe7e5c513a711a6f0f974308 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 21 Jun 2024 16:37:12 +0200 Subject: [PATCH 579/778] pwm: stm32: Refuse too small period requests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If period_ns is small, prd might well become 0. Catch that case because otherwise with regmap_write(priv->regmap, TIM_ARR, prd - 1); a few lines down quite a big period is configured. Fixes: 7edf7369205b ("pwm: Add driver for STM32 plaftorm") Cc: stable@vger.kernel.org Reviewed-by: Trevor Gamblin Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/b86f62f099983646f97eeb6bfc0117bb2d0c340d.1718979150.git.u.kleine-koenig@baylibre.com Signed-off-by: Uwe Kleine-König --- drivers/pwm/pwm-stm32.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pwm/pwm-stm32.c b/drivers/pwm/pwm-stm32.c index a2f231d13a9f..3e7b2a8e34e7 100644 --- a/drivers/pwm/pwm-stm32.c +++ b/drivers/pwm/pwm-stm32.c @@ -337,6 +337,8 @@ static int stm32_pwm_config(struct stm32_pwm *priv, unsigned int ch, prd = mul_u64_u64_div_u64(period_ns, clk_get_rate(priv->clk), (u64)NSEC_PER_SEC * (prescaler + 1)); + if (!prd) + return -EINVAL; /* * All channels share the same prescaler and counter so when two From f80a55fa90fa76d01e3fffaa5d0413e522ab9a00 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 17 Jun 2024 09:27:27 +0200 Subject: [PATCH 580/778] nvme: fixup comment for nvme RDMA Provider Type PRTYPE is the provider type, not the QP service type. Fixes: eb793e2c9286 ("nvme.h: add NVMe over Fabrics definitions") Signed-off-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- include/linux/nvme.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 425573202295..69ac2abf8acf 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -87,8 +87,8 @@ enum { NVMF_RDMA_QPTYPE_DATAGRAM = 2, /* Reliable Datagram */ }; -/* RDMA QP Service Type codes for Discovery Log Page entry TSAS - * RDMA_QPTYPE field +/* RDMA Provider Type codes for Discovery Log Page entry TSAS + * RDMA_PRTYPE field */ enum { NVMF_RDMA_PRTYPE_NOT_SPECIFIED = 1, /* No Provider Specified */ From 0f1f5803920d2a6b88bee950914fd37421e17170 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 17 Jun 2024 09:27:28 +0200 Subject: [PATCH 581/778] nvmet: make 'tsas' attribute idempotent for RDMA The RDMA transport defines values for TSAS, but it cannot be changed as we only support the 'connected' mode. So to avoid errors during reconfiguration we should allow to write the current value. Fixes: 3f123494db72 ("nvmet: make TCP sectype settable via configfs") Signed-off-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/target/configfs.c | 39 ++++++++++++++++++++++++++-------- include/linux/nvme.h | 2 ++ 2 files changed, 32 insertions(+), 9 deletions(-) diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index e60224356048..685e89b35d33 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -413,25 +413,46 @@ static ssize_t nvmet_addr_tsas_show(struct config_item *item, return sprintf(page, "\n"); } +static u8 nvmet_addr_tsas_rdma_store(const char *page) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(nvmet_addr_tsas_rdma); i++) { + if (sysfs_streq(page, nvmet_addr_tsas_rdma[i].name)) + return nvmet_addr_tsas_rdma[i].type; + } + return NVMF_RDMA_QPTYPE_INVALID; +} + +static u8 nvmet_addr_tsas_tcp_store(const char *page) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(nvmet_addr_tsas_tcp); i++) { + if (sysfs_streq(page, nvmet_addr_tsas_tcp[i].name)) + return nvmet_addr_tsas_tcp[i].type; + } + return NVMF_TCP_SECTYPE_INVALID; +} + static ssize_t nvmet_addr_tsas_store(struct config_item *item, const char *page, size_t count) { struct nvmet_port *port = to_nvmet_port(item); u8 treq = nvmet_port_disc_addr_treq_mask(port); - u8 sectype; - int i; + u8 sectype, qptype; if (nvmet_is_port_enabled(port, __func__)) return -EACCES; - if (port->disc_addr.trtype != NVMF_TRTYPE_TCP) - return -EINVAL; - - for (i = 0; i < ARRAY_SIZE(nvmet_addr_tsas_tcp); i++) { - if (sysfs_streq(page, nvmet_addr_tsas_tcp[i].name)) { - sectype = nvmet_addr_tsas_tcp[i].type; + if (port->disc_addr.trtype == NVMF_TRTYPE_RDMA) { + qptype = nvmet_addr_tsas_rdma_store(page); + if (qptype == port->disc_addr.tsas.rdma.qptype) + return count; + } else if (port->disc_addr.trtype == NVMF_TRTYPE_TCP) { + sectype = nvmet_addr_tsas_tcp_store(page); + if (sectype != NVMF_TCP_SECTYPE_INVALID) goto found; - } } pr_err("Invalid value '%s' for tsas\n", page); diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 69ac2abf8acf..c693ac344ec0 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -85,6 +85,7 @@ enum { enum { NVMF_RDMA_QPTYPE_CONNECTED = 1, /* Reliable Connected */ NVMF_RDMA_QPTYPE_DATAGRAM = 2, /* Reliable Datagram */ + NVMF_RDMA_QPTYPE_INVALID = 0xff, }; /* RDMA Provider Type codes for Discovery Log Page entry TSAS @@ -110,6 +111,7 @@ enum { NVMF_TCP_SECTYPE_NONE = 0, /* No Security */ NVMF_TCP_SECTYPE_TLS12 = 1, /* TLSv1.2, NVMe-oF 1.1 and NVMe-TCP 3.6.1.1 */ NVMF_TCP_SECTYPE_TLS13 = 2, /* TLSv1.3, NVMe-oF 1.1 and NVMe-TCP 3.6.1.1 */ + NVMF_TCP_SECTYPE_INVALID = 0xff, }; #define NVME_AQ_DEPTH 32 From 5337ac4c9b807bc46baa0713121a0afa8beacd70 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 18 Jun 2024 18:18:58 -0700 Subject: [PATCH 582/778] bpf: Fix the corner case with may_goto and jump to the 1st insn. When the following program is processed by the verifier: L1: may_goto L2 goto L1 L2: w0 = 0 exit the may_goto insn is first converted to: L1: r11 = *(u64 *)(r10 -8) if r11 == 0x0 goto L2 r11 -= 1 *(u64 *)(r10 -8) = r11 goto L1 L2: w0 = 0 exit then later as the last step the verifier inserts: *(u64 *)(r10 -8) = BPF_MAX_LOOPS as the first insn of the program to initialize loop count. When the first insn happens to be a branch target of some jmp the bpf_patch_insn_data() logic will produce: L1: *(u64 *)(r10 -8) = BPF_MAX_LOOPS r11 = *(u64 *)(r10 -8) if r11 == 0x0 goto L2 r11 -= 1 *(u64 *)(r10 -8) = r11 goto L1 L2: w0 = 0 exit because instruction patching adjusts all jmps and calls, but for this particular corner case it's incorrect and the L1 label should be one instruction down, like: *(u64 *)(r10 -8) = BPF_MAX_LOOPS L1: r11 = *(u64 *)(r10 -8) if r11 == 0x0 goto L2 r11 -= 1 *(u64 *)(r10 -8) = r11 goto L1 L2: w0 = 0 exit and that's what this patch is fixing. After bpf_patch_insn_data() call adjust_jmp_off() to adjust all jmps that point to newly insert BPF_ST insn to point to insn after. Note that bpf_patch_insn_data() cannot easily be changed to accommodate this logic, since jumps that point before or after a sequence of patched instructions have to be adjusted with the full length of the patch. Conceptually it's somewhat similar to "insert" of instructions between other instructions with weird semantics. Like "insert" before 1st insn would require adjustment of CALL insns to point to newly inserted 1st insn, but not an adjustment JMP insns that point to 1st, yet still adjusting JMP insns that cross over 1st insn (point to insn before or insn after), hence use simple adjust_jmp_off() logic to fix this corner case. Ideally bpf_patch_insn_data() would have an auxiliary info to say where 'the start of newly inserted patch is', but it would be too complex for backport. Fixes: 011832b97b31 ("bpf: Introduce may_goto instruction") Reported-by: Zac Ecob Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Eduard Zingerman Closes: https://lore.kernel.org/bpf/CAADnVQJ_WWx8w4b=6Gc2EpzAjgv+6A0ridnMz2TvS2egj4r3Gw@mail.gmail.com/ Link: https://lore.kernel.org/bpf/20240619011859.79334-1-alexei.starovoitov@gmail.com --- kernel/bpf/verifier.c | 50 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index e0a398a97d32..5586a571bf55 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -12721,6 +12721,16 @@ static bool signed_add32_overflows(s32 a, s32 b) return res < a; } +static bool signed_add16_overflows(s16 a, s16 b) +{ + /* Do the add in u16, where overflow is well-defined */ + s16 res = (s16)((u16)a + (u16)b); + + if (b < 0) + return res > a; + return res < a; +} + static bool signed_sub_overflows(s64 a, s64 b) { /* Do the sub in u64, where overflow is well-defined */ @@ -18732,6 +18742,39 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of return new_prog; } +/* + * For all jmp insns in a given 'prog' that point to 'tgt_idx' insn adjust the + * jump offset by 'delta'. + */ +static int adjust_jmp_off(struct bpf_prog *prog, u32 tgt_idx, u32 delta) +{ + struct bpf_insn *insn = prog->insnsi; + u32 insn_cnt = prog->len, i; + + for (i = 0; i < insn_cnt; i++, insn++) { + u8 code = insn->code; + + if ((BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32) || + BPF_OP(code) == BPF_CALL || BPF_OP(code) == BPF_EXIT) + continue; + + if (insn->code == (BPF_JMP32 | BPF_JA)) { + if (i + 1 + insn->imm != tgt_idx) + continue; + if (signed_add32_overflows(insn->imm, delta)) + return -ERANGE; + insn->imm += delta; + } else { + if (i + 1 + insn->off != tgt_idx) + continue; + if (signed_add16_overflows(insn->imm, delta)) + return -ERANGE; + insn->off += delta; + } + } + return 0; +} + static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env, u32 off, u32 cnt) { @@ -20548,6 +20591,13 @@ next_insn: if (!new_prog) return -ENOMEM; env->prog = prog = new_prog; + /* + * If may_goto is a first insn of a prog there could be a jmp + * insn that points to it, hence adjust all such jmps to point + * to insn after BPF_ST that inits may_goto count. + * Adjustment will succeed because bpf_patch_insn_data() didn't fail. + */ + WARN_ON(adjust_jmp_off(env->prog, subprog_start, 1)); } /* Since poke tab is now finalized, publish aux to tracker. */ From 2673315947c9f3890ad34a8196f62142e4ddef5a Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 18 Jun 2024 18:18:59 -0700 Subject: [PATCH 583/778] selftests/bpf: Tests with may_goto and jumps to the 1st insn Add few tests with may_goto and jumps to the 1st insn. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Eduard Zingerman Link: https://lore.kernel.org/bpf/20240619011859.79334-2-alexei.starovoitov@gmail.com --- .../bpf/progs/verifier_iterating_callbacks.c | 94 +++++++++++++++++++ 1 file changed, 94 insertions(+) diff --git a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c index bd676d7e615f..8885e5239d6b 100644 --- a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c +++ b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c @@ -307,6 +307,100 @@ int iter_limit_bug(struct __sk_buff *skb) return 0; } +SEC("socket") +__success __retval(0) +__naked void ja_and_may_goto(void) +{ + asm volatile (" \ +l0_%=: .byte 0xe5; /* may_goto */ \ + .byte 0; /* regs */ \ + .short 1; /* off 1 */ \ + .long 0; /* imm */ \ + goto l0_%=; \ + r0 = 0; \ + exit; \ +" ::: __clobber_common); +} + +SEC("socket") +__success __retval(0) +__naked void ja_and_may_goto2(void) +{ + asm volatile (" \ +l0_%=: r0 = 0; \ + .byte 0xe5; /* may_goto */ \ + .byte 0; /* regs */ \ + .short 1; /* off 1 */ \ + .long 0; /* imm */ \ + goto l0_%=; \ + r0 = 0; \ + exit; \ +" ::: __clobber_common); +} + +SEC("socket") +__success __retval(0) +__naked void jlt_and_may_goto(void) +{ + asm volatile (" \ +l0_%=: call %[bpf_jiffies64]; \ + .byte 0xe5; /* may_goto */ \ + .byte 0; /* regs */ \ + .short 1; /* off 1 */ \ + .long 0; /* imm */ \ + if r0 < 10 goto l0_%=; \ + r0 = 0; \ + exit; \ +" :: __imm(bpf_jiffies64) + : __clobber_all); +} + +#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \ + defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \ + defined(__TARGET_ARCH_loongarch)) && \ + __clang_major__ >= 18 +SEC("socket") +__success __retval(0) +__naked void gotol_and_may_goto(void) +{ + asm volatile (" \ +l0_%=: r0 = 0; \ + .byte 0xe5; /* may_goto */ \ + .byte 0; /* regs */ \ + .short 1; /* off 1 */ \ + .long 0; /* imm */ \ + gotol l0_%=; \ + r0 = 0; \ + exit; \ +" ::: __clobber_common); +} +#endif + +SEC("socket") +__success __retval(0) +__naked void ja_and_may_goto_subprog(void) +{ + asm volatile (" \ + call subprog_with_may_goto; \ + exit; \ +" ::: __clobber_all); +} + +static __naked __noinline __used +void subprog_with_may_goto(void) +{ + asm volatile (" \ +l0_%=: .byte 0xe5; /* may_goto */ \ + .byte 0; /* regs */ \ + .short 1; /* off 1 */ \ + .long 0; /* imm */ \ + goto l0_%=; \ + r0 = 0; \ + exit; \ +" ::: __clobber_all); +} + #define ARR_SZ 1000000 int zero; char arr[ARR_SZ]; From cfa1a2329a691ffd991fcf7248a57d752e712881 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 21 Jun 2024 16:08:27 +0200 Subject: [PATCH 584/778] bpf: Fix overrunning reservations in ringbuf The BPF ring buffer internally is implemented as a power-of-2 sized circular buffer, with two logical and ever-increasing counters: consumer_pos is the consumer counter to show which logical position the consumer consumed the data, and producer_pos which is the producer counter denoting the amount of data reserved by all producers. Each time a record is reserved, the producer that "owns" the record will successfully advance producer counter. In user space each time a record is read, the consumer of the data advanced the consumer counter once it finished processing. Both counters are stored in separate pages so that from user space, the producer counter is read-only and the consumer counter is read-write. One aspect that simplifies and thus speeds up the implementation of both producers and consumers is how the data area is mapped twice contiguously back-to-back in the virtual memory, allowing to not take any special measures for samples that have to wrap around at the end of the circular buffer data area, because the next page after the last data page would be first data page again, and thus the sample will still appear completely contiguous in virtual memory. Each record has a struct bpf_ringbuf_hdr { u32 len; u32 pg_off; } header for book-keeping the length and offset, and is inaccessible to the BPF program. Helpers like bpf_ringbuf_reserve() return `(void *)hdr + BPF_RINGBUF_HDR_SZ` for the BPF program to use. Bing-Jhong and Muhammad reported that it is however possible to make a second allocated memory chunk overlapping with the first chunk and as a result, the BPF program is now able to edit first chunk's header. For example, consider the creation of a BPF_MAP_TYPE_RINGBUF map with size of 0x4000. Next, the consumer_pos is modified to 0x3000 /before/ a call to bpf_ringbuf_reserve() is made. This will allocate a chunk A, which is in [0x0,0x3008], and the BPF program is able to edit [0x8,0x3008]. Now, lets allocate a chunk B with size 0x3000. This will succeed because consumer_pos was edited ahead of time to pass the `new_prod_pos - cons_pos > rb->mask` check. Chunk B will be in range [0x3008,0x6010], and the BPF program is able to edit [0x3010,0x6010]. Due to the ring buffer memory layout mentioned earlier, the ranges [0x0,0x4000] and [0x4000,0x8000] point to the same data pages. This means that chunk B at [0x4000,0x4008] is chunk A's header. bpf_ringbuf_submit() / bpf_ringbuf_discard() use the header's pg_off to then locate the bpf_ringbuf itself via bpf_ringbuf_restore_from_rec(). Once chunk B modified chunk A's header, then bpf_ringbuf_commit() refers to the wrong page and could cause a crash. Fix it by calculating the oldest pending_pos and check whether the range from the oldest outstanding record to the newest would span beyond the ring buffer size. If that is the case, then reject the request. We've tested with the ring buffer benchmark in BPF selftests (./benchs/run_bench_ringbufs.sh) before/after the fix and while it seems a bit slower on some benchmarks, it is still not significantly enough to matter. Fixes: 457f44363a88 ("bpf: Implement BPF ring buffer and verifier support for it") Reported-by: Bing-Jhong Billy Jheng Reported-by: Muhammad Ramdhan Co-developed-by: Bing-Jhong Billy Jheng Co-developed-by: Andrii Nakryiko Signed-off-by: Bing-Jhong Billy Jheng Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240621140828.18238-1-daniel@iogearbox.net --- kernel/bpf/ringbuf.c | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c index 0ee653a936ea..e20b90c36131 100644 --- a/kernel/bpf/ringbuf.c +++ b/kernel/bpf/ringbuf.c @@ -51,7 +51,8 @@ struct bpf_ringbuf { * This prevents a user-space application from modifying the * position and ruining in-kernel tracking. The permissions of the * pages depend on who is producing samples: user-space or the - * kernel. + * kernel. Note that the pending counter is placed in the same + * page as the producer, so that it shares the same cache line. * * Kernel-producer * --------------- @@ -70,6 +71,7 @@ struct bpf_ringbuf { */ unsigned long consumer_pos __aligned(PAGE_SIZE); unsigned long producer_pos __aligned(PAGE_SIZE); + unsigned long pending_pos; char data[] __aligned(PAGE_SIZE); }; @@ -179,6 +181,7 @@ static struct bpf_ringbuf *bpf_ringbuf_alloc(size_t data_sz, int numa_node) rb->mask = data_sz - 1; rb->consumer_pos = 0; rb->producer_pos = 0; + rb->pending_pos = 0; return rb; } @@ -404,9 +407,9 @@ bpf_ringbuf_restore_from_rec(struct bpf_ringbuf_hdr *hdr) static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size) { - unsigned long cons_pos, prod_pos, new_prod_pos, flags; - u32 len, pg_off; + unsigned long cons_pos, prod_pos, new_prod_pos, pend_pos, flags; struct bpf_ringbuf_hdr *hdr; + u32 len, pg_off, tmp_size, hdr_len; if (unlikely(size > RINGBUF_MAX_RECORD_SZ)) return NULL; @@ -424,13 +427,29 @@ static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size) spin_lock_irqsave(&rb->spinlock, flags); } + pend_pos = rb->pending_pos; prod_pos = rb->producer_pos; new_prod_pos = prod_pos + len; - /* check for out of ringbuf space by ensuring producer position - * doesn't advance more than (ringbuf_size - 1) ahead + while (pend_pos < prod_pos) { + hdr = (void *)rb->data + (pend_pos & rb->mask); + hdr_len = READ_ONCE(hdr->len); + if (hdr_len & BPF_RINGBUF_BUSY_BIT) + break; + tmp_size = hdr_len & ~BPF_RINGBUF_DISCARD_BIT; + tmp_size = round_up(tmp_size + BPF_RINGBUF_HDR_SZ, 8); + pend_pos += tmp_size; + } + rb->pending_pos = pend_pos; + + /* check for out of ringbuf space: + * - by ensuring producer position doesn't advance more than + * (ringbuf_size - 1) ahead + * - by ensuring oldest not yet committed record until newest + * record does not span more than (ringbuf_size - 1) */ - if (new_prod_pos - cons_pos > rb->mask) { + if (new_prod_pos - cons_pos > rb->mask || + new_prod_pos - pend_pos > rb->mask) { spin_unlock_irqrestore(&rb->spinlock, flags); return NULL; } From 6ddc9deacc1312762c2edd9de00ce76b00f69f7c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 19 Jun 2024 09:51:08 -0400 Subject: [PATCH 585/778] SUNRPC: Fix backchannel reply, again I still see "RPC: Could not send backchannel reply error: -110" quite often, along with slow-running tests. Debugging shows that the backchannel is still stumbling when it has to queue a callback reply on a busy transport. Note that every one of these timeouts causes a connection loss by virtue of the xprt_conditional_disconnect() call in that arm of call_cb_transmit_status(). I found that setting to_maxval is necessary to get the RPC timeout logic to behave whenever to_exponential is not set. Fixes: 57331a59ac0d ("NFSv4.1: Use the nfs_client's rpc timeouts for backchannel") Signed-off-by: Chuck Lever Reviewed-by: Benjamin Coddington Signed-off-by: Trond Myklebust --- net/sunrpc/svc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 2b4b1276d4e8..d9cda1e53a01 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1557,9 +1557,11 @@ out_drop: */ void svc_process_bc(struct rpc_rqst *req, struct svc_rqst *rqstp) { + struct rpc_timeout timeout = { + .to_increment = 0, + }; struct rpc_task *task; int proc_error; - struct rpc_timeout timeout; /* Build the svc_rqst used by the common processing routine */ rqstp->rq_xid = req->rq_xid; @@ -1612,6 +1614,7 @@ void svc_process_bc(struct rpc_rqst *req, struct svc_rqst *rqstp) timeout.to_initval = req->rq_xprt->timeout->to_initval; timeout.to_retries = req->rq_xprt->timeout->to_retries; } + timeout.to_maxval = timeout.to_initval; memcpy(&req->rq_snd_buf, &rqstp->rq_res, sizeof(req->rq_snd_buf)); task = rpc_run_bc_task(req, &timeout); From 31392048f55f98cb01ca709d32d06d926ab9760a Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Wed, 19 Jun 2024 15:34:57 +0200 Subject: [PATCH 586/778] vxlan: Pull inner IP header in vxlan_xmit_one(). Ensure the inner IP header is part of the skb's linear data before setting old_iph. Otherwise, on a non-linear skb, old_iph could point outside of the packet data. Unlike classical VXLAN, which always encapsulates Ethernet packets, VXLAN-GPE can transport IP packets directly. In that case, we need to look at skb->protocol to figure out if an Ethernet header is present. Fixes: d342894c5d2f ("vxlan: virtual extensible lan") Signed-off-by: Guillaume Nault Link: https://patch.msgid.link/2aa75f6fa62ac9dbe4f16ad5ba75dd04a51d4b99.1718804000.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/vxlan/vxlan_core.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 567cb3faab70..ba59e92ab941 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -2339,7 +2339,7 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, struct ip_tunnel_key *pkey; struct ip_tunnel_key key; struct vxlan_dev *vxlan = netdev_priv(dev); - const struct iphdr *old_iph = ip_hdr(skb); + const struct iphdr *old_iph; struct vxlan_metadata _md; struct vxlan_metadata *md = &_md; unsigned int pkt_len = skb->len; @@ -2353,8 +2353,15 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, bool use_cache; bool udp_sum = false; bool xnet = !net_eq(vxlan->net, dev_net(vxlan->dev)); + bool no_eth_encap; __be32 vni = 0; + no_eth_encap = flags & VXLAN_F_GPE && skb->protocol != htons(ETH_P_TEB); + if (!skb_vlan_inet_prepare(skb, no_eth_encap)) + goto drop; + + old_iph = ip_hdr(skb); + info = skb_tunnel_info(skb); use_cache = ip_tunnel_dst_cache_usable(skb, info); From 0cf81c73e4c6a4861128a8f27861176ec312af4e Mon Sep 17 00:00:00 2001 From: David Lechner Date: Fri, 21 Jun 2024 17:22:40 -0500 Subject: [PATCH 587/778] counter: ti-eqep: enable clock at probe The TI eQEP clock is both a functional and interface clock. Since it is required for the device to function, we should be enabling it at probe. Up to now, we've just been lucky that the clock was enabled by something else on the system already. Fixes: f213729f6796 ("counter: new TI eQEP driver") Reviewed-by: Judith Mendez Signed-off-by: David Lechner Link: https://lore.kernel.org/r/20240621-ti-eqep-enable-clock-v2-1-edd3421b54d4@baylibre.com Signed-off-by: William Breathitt Gray --- drivers/counter/ti-eqep.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/counter/ti-eqep.c b/drivers/counter/ti-eqep.c index 072b11fd6b32..825ae22c3ebc 100644 --- a/drivers/counter/ti-eqep.c +++ b/drivers/counter/ti-eqep.c @@ -6,6 +6,7 @@ */ #include +#include #include #include #include @@ -376,6 +377,7 @@ static int ti_eqep_probe(struct platform_device *pdev) struct counter_device *counter; struct ti_eqep_cnt *priv; void __iomem *base; + struct clk *clk; int err; counter = devm_counter_alloc(dev, sizeof(*priv)); @@ -415,6 +417,10 @@ static int ti_eqep_probe(struct platform_device *pdev) pm_runtime_enable(dev); pm_runtime_get_sync(dev); + clk = devm_clk_get_enabled(dev, NULL); + if (IS_ERR(clk)) + return dev_err_probe(dev, PTR_ERR(clk), "failed to enable clock\n"); + err = counter_add(counter); if (err < 0) { pm_runtime_put_sync(dev); From d18b822c8f622ed37af7130088a0b7f1eb0b16e6 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 21 Jun 2024 09:30:08 +0200 Subject: [PATCH 588/778] docs: i2c: summary: start sentences consistently. Change the first paragraphs to contain only one space after the end of the previous sentence like in the rest of the document. Signed-off-by: Wolfram Sang Reviewed-by: Easwar Hariharan Signed-off-by: Wolfram Sang --- Documentation/i2c/summary.rst | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Documentation/i2c/summary.rst b/Documentation/i2c/summary.rst index 786c618ba3be..28ff80a2302b 100644 --- a/Documentation/i2c/summary.rst +++ b/Documentation/i2c/summary.rst @@ -4,10 +4,10 @@ Introduction to I2C and SMBus I²C (pronounce: I squared C and written I2C in the kernel documentation) is a protocol developed by Philips. It is a slow two-wire protocol (variable -speed, up to 400 kHz), with a high speed extension (3.4 MHz). It provides +speed, up to 400 kHz), with a high speed extension (3.4 MHz). It provides an inexpensive bus for connecting many types of devices with infrequent or -low bandwidth communications needs. I2C is widely used with embedded -systems. Some systems use variants that don't meet branding requirements, +low bandwidth communications needs. I2C is widely used with embedded +systems. Some systems use variants that don't meet branding requirements, and so are not advertised as being I2C but come under different names, e.g. TWI (Two Wire Interface), IIC. @@ -18,14 +18,14 @@ access the PDF. An older version of the specification (revision 6) is archived `here `_. SMBus (System Management Bus) is based on the I2C protocol, and is mostly -a subset of I2C protocols and signaling. Many I2C devices will work on an +a subset of I2C protocols and signaling. Many I2C devices will work on an SMBus, but some SMBus protocols add semantics beyond what is required to -achieve I2C branding. Modern PC mainboards rely on SMBus. The most common +achieve I2C branding. Modern PC mainboards rely on SMBus. The most common devices connected through SMBus are RAM modules configured using I2C EEPROMs, and hardware monitoring chips. Because the SMBus is mostly a subset of the generalized I2C bus, we can -use its protocols on many I2C systems. However, there are systems that don't +use its protocols on many I2C systems. However, there are systems that don't meet both SMBus and I2C electrical constraints; and others which can't implement all the common SMBus protocol semantics or messages. From 75d148c90a34b94a3e3e7e7b2f30a689d8fbb7c8 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 21 Jun 2024 09:30:09 +0200 Subject: [PATCH 589/778] docs: i2c: summary: update I2C specification link MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Luckily, the specs are directly downloadable again, so update the link. Also update its title to the original name "I²C". Signed-off-by: Wolfram Sang Reviewed-by: Easwar Hariharan Signed-off-by: Wolfram Sang --- Documentation/i2c/summary.rst | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/Documentation/i2c/summary.rst b/Documentation/i2c/summary.rst index 28ff80a2302b..e3ab1d414014 100644 --- a/Documentation/i2c/summary.rst +++ b/Documentation/i2c/summary.rst @@ -11,11 +11,9 @@ systems. Some systems use variants that don't meet branding requirements, and so are not advertised as being I2C but come under different names, e.g. TWI (Two Wire Interface), IIC. -The latest official I2C specification is the `"I2C-bus specification and user -manual" (UM10204) `_ -published by NXP Semiconductors. However, you need to log-in to the site to -access the PDF. An older version of the specification (revision 6) is archived -`here `_. +The latest official I2C specification is the `"I²C-bus specification and user +manual" (UM10204) `_ +published by NXP Semiconductors, version 7 as of this writing. SMBus (System Management Bus) is based on the I2C protocol, and is mostly a subset of I2C protocols and signaling. Many I2C devices will work on an From a5b88cb9fdff337a2867f0dff7c5cd23d4bd6663 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 21 Jun 2024 09:30:10 +0200 Subject: [PATCH 590/778] docs: i2c: summary: update speed mode description Fastest I2C mode is 5 MHz. Update the docs and reword the paragraph slightly. Signed-off-by: Wolfram Sang Reviewed-by: Easwar Hariharan Signed-off-by: Wolfram Sang --- Documentation/i2c/summary.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/i2c/summary.rst b/Documentation/i2c/summary.rst index e3ab1d414014..a1e5c0715f8b 100644 --- a/Documentation/i2c/summary.rst +++ b/Documentation/i2c/summary.rst @@ -3,8 +3,8 @@ Introduction to I2C and SMBus ============================= I²C (pronounce: I squared C and written I2C in the kernel documentation) is -a protocol developed by Philips. It is a slow two-wire protocol (variable -speed, up to 400 kHz), with a high speed extension (3.4 MHz). It provides +a protocol developed by Philips. It is a two-wire protocol with variable +speed (typically up to 400 kHz, high speed modes up to 5 MHz). It provides an inexpensive bus for connecting many types of devices with infrequent or low bandwidth communications needs. I2C is widely used with embedded systems. Some systems use variants that don't meet branding requirements, From d77367fff7c0d67e20393a8236b519d5c48ee875 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 21 Jun 2024 09:30:11 +0200 Subject: [PATCH 591/778] docs: i2c: summary: document use of inclusive language We now have the updated I2C specs and our own Code of Conduct, so we have all we need to switch over to the inclusive terminology. Define them here. Signed-off-by: Wolfram Sang Reviewed-by: Easwar Hariharan Signed-off-by: Wolfram Sang --- Documentation/i2c/i2c_bus.svg | 15 ++++++++------- Documentation/i2c/summary.rst | 23 +++++++++++++++++------ 2 files changed, 25 insertions(+), 13 deletions(-) diff --git a/Documentation/i2c/i2c_bus.svg b/Documentation/i2c/i2c_bus.svg index 3170de976373..45801de4af7d 100644 --- a/Documentation/i2c/i2c_bus.svg +++ b/Documentation/i2c/i2c_bus.svg @@ -1,5 +1,6 @@ + I2CMaster + id="tspan1285">Controller Slave + id="tspan1287">Target Slave + id="tspan1287-6">Target Slave + id="tspan1287-9">Target Date: Fri, 21 Jun 2024 09:30:12 +0200 Subject: [PATCH 592/778] docs: i2c: summary: document 'local' and 'remote' targets Because Linux can be a target as well, add terminology to differentiate between Linux being the target and Linux accessing targets. Signed-off-by: Wolfram Sang Reviewed-by: Easwar Hariharan Signed-off-by: Wolfram Sang --- Documentation/i2c/summary.rst | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/Documentation/i2c/summary.rst b/Documentation/i2c/summary.rst index a6da1032fa06..ff8bda32b9c3 100644 --- a/Documentation/i2c/summary.rst +++ b/Documentation/i2c/summary.rst @@ -49,10 +49,15 @@ whole class of I2C adapters. Each specific adapter driver either depends on an algorithm driver in the ``drivers/i2c/algos/`` subdirectory, or includes its own implementation. -A **target** chip is a node that responds to communications when addressed -by the controller. In Linux it is called a **client**. Client drivers are kept -in a directory specific to the feature they provide, for example -``drivers/media/gpio/`` for GPIO expanders and ``drivers/media/i2c/`` for +A **target** chip is a node that responds to communications when addressed by a +controller. In the Linux kernel implementation it is called a **client**. While +targets are usually separate external chips, Linux can also act as a target +(needs hardware support) and respond to another controller on the bus. This is +then called a **local target**. In contrast, an external chip is called a +**remote target**. + +Target drivers are kept in a directory specific to the feature they provide, +for example ``drivers/gpio/`` for GPIO expanders and ``drivers/media/i2c/`` for video-related chips. For the example configuration in figure, you will need a driver for your From 20738cb9fa7ad74c4f374c5b49c8189277df3a9d Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 21 Jun 2024 09:30:13 +0200 Subject: [PATCH 593/778] docs: i2c: summary: be clearer with 'controller/target' and 'adapter/client' pairs This not only includes rewording, but also where to put which emphasis on terms in this document. Signed-off-by: Wolfram Sang Reviewed-by: Easwar Hariharan Signed-off-by: Wolfram Sang --- Documentation/i2c/summary.rst | 41 ++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/Documentation/i2c/summary.rst b/Documentation/i2c/summary.rst index ff8bda32b9c3..579a1c7df200 100644 --- a/Documentation/i2c/summary.rst +++ b/Documentation/i2c/summary.rst @@ -31,9 +31,7 @@ implement all the common SMBus protocol semantics or messages. Terminology =========== -The I2C bus connects one or more *controller* chips and one or more *target* -chips. - +The I2C bus connects one or more controller chips and one or more target chips. .. kernel-figure:: i2c_bus.svg :alt: Simple I2C bus with one controller and 3 targets @@ -41,28 +39,37 @@ chips. Simple I2C bus A **controller** chip is a node that starts communications with targets. In the -Linux kernel implementation it is called an **adapter** or bus. Adapter -drivers are in the ``drivers/i2c/busses/`` subdirectory. +Linux kernel implementation it is also called an "adapter" or "bus". Controller +drivers are usually in the ``drivers/i2c/busses/`` subdirectory. -An **algorithm** contains general code that can be used to implement a -whole class of I2C adapters. Each specific adapter driver either depends on -an algorithm driver in the ``drivers/i2c/algos/`` subdirectory, or includes -its own implementation. +An **algorithm** contains general code that can be used to implement a whole +class of I2C controllers. Each specific controller driver either depends on an +algorithm driver in the ``drivers/i2c/algos/`` subdirectory, or includes its +own implementation. A **target** chip is a node that responds to communications when addressed by a -controller. In the Linux kernel implementation it is called a **client**. While -targets are usually separate external chips, Linux can also act as a target -(needs hardware support) and respond to another controller on the bus. This is -then called a **local target**. In contrast, an external chip is called a -**remote target**. +controller. In the Linux kernel implementation it is also called a "client". +While targets are usually separate external chips, Linux can also act as a +target (needs hardware support) and respond to another controller on the bus. +This is then called a **local target**. In contrast, an external chip is called +a **remote target**. Target drivers are kept in a directory specific to the feature they provide, for example ``drivers/gpio/`` for GPIO expanders and ``drivers/media/i2c/`` for video-related chips. -For the example configuration in figure, you will need a driver for your -I2C adapter, and drivers for your I2C devices (usually one driver for each -device). +For the example configuration in the figure above, you will need one driver for +the I2C controller, and drivers for your I2C targets. Usually one driver for +each target. + +Synonyms +-------- + +As mentioned above, the Linux I2C implementation historically uses the terms +"adapter" for controller and "client" for target. A number of data structures +have these synonyms in their name. So, when discussing implementation details, +you should be aware of these terms as well. The official wording is preferred, +though. Outdated terminology -------------------- From 49bbeb5719c2f56907d3a9623b47c6c15c2c431d Mon Sep 17 00:00:00 2001 From: Nick Child Date: Thu, 20 Jun 2024 10:23:12 -0500 Subject: [PATCH 594/778] ibmvnic: Free any outstanding tx skbs during scrq reset There are 2 types of outstanding tx skb's: Type 1: Packets that are sitting in the drivers ind_buff that are waiting to be batch sent to the NIC. During a device reset, these are freed with a call to ibmvnic_tx_scrq_clean_buffer() Type 2: Packets that have been sent to the NIC and are awaiting a TX completion IRQ. These are free'd during a reset with a call to clean_tx_pools() During any reset which requires us to free the tx irq, ensure that the Type 2 skb references are freed. Since the irq is released, it is impossible for the NIC to inform of any completions. Furthermore, later in the reset process is a call to init_tx_pools() which marks every entry in the tx pool as free (ie not outstanding). So if the driver is to make a call to init_tx_pools(), it must first be sure that the tx pool is empty of skb references. This issue was discovered by observing the following in the logs during EEH testing: TX free map points to untracked skb (tso_pool 0 idx=4) TX free map points to untracked skb (tso_pool 0 idx=5) TX free map points to untracked skb (tso_pool 1 idx=36) Fixes: 65d6470d139a ("ibmvnic: clean pending indirect buffs during reset") Signed-off-by: Nick Child Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 5e9a93bdb518..5490f0f9c112 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -4061,6 +4061,12 @@ static void release_sub_crqs(struct ibmvnic_adapter *adapter, bool do_h_free) adapter->num_active_tx_scrqs = 0; } + /* Clean any remaining outstanding SKBs + * we freed the irq so we won't be hearing + * from them + */ + clean_tx_pools(adapter); + if (adapter->rx_scrq) { for (i = 0; i < adapter->num_active_rx_scrqs; i++) { if (!adapter->rx_scrq[i]) From dab8f9f0fe3aada61c0eb013dcf7d3ff75a2c336 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 21 Jun 2024 16:37:13 +0200 Subject: [PATCH 595/778] pwm: stm32: Fix calculation of prescaler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A small prescaler is beneficial, as this improves the resolution of the duty_cycle configuration. However if the prescaler is too small, the maximal possible period becomes considerably smaller than the requested value. One situation where this goes wrong is the following: With a parent clock rate of 208877930 Hz and max_arr = 0xffff = 65535, a request for period = 941243 ns currently results in PSC = 1. The value for ARR is then calculated to ARR = 941243 * 208877930 / (1000000000 * 2) - 1 = 98301 This value is bigger than 65535 however and so doesn't fit into the respective register field. In this particular case the PWM was configured for a period of 313733.4806027616 ns (with ARR = 98301 & 0xffff). Even if ARR was configured to its maximal value, only period = 627495.6861167669 ns would be achievable. Fix the calculation accordingly and adapt the comment to match the new algorithm. With the calculation fixed the above case results in PSC = 2 and so an actual period of 941229.1667195285 ns. Fixes: 8002fbeef1e4 ("pwm: stm32: Calculate prescaler with a division instead of a loop") Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/b4d96b79917617434a540df45f20cb5de4142f88.1718979150.git.u.kleine-koenig@baylibre.com Signed-off-by: Uwe Kleine-König --- drivers/pwm/pwm-stm32.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/pwm/pwm-stm32.c b/drivers/pwm/pwm-stm32.c index 3e7b2a8e34e7..97d3de24f312 100644 --- a/drivers/pwm/pwm-stm32.c +++ b/drivers/pwm/pwm-stm32.c @@ -321,17 +321,23 @@ static int stm32_pwm_config(struct stm32_pwm *priv, unsigned int ch, * First we need to find the minimal value for prescaler such that * * period_ns * clkrate - * ------------------------------ + * ------------------------------ < max_arr + 1 * NSEC_PER_SEC * (prescaler + 1) * - * isn't bigger than max_arr. + * This equation is equivalent to + * + * period_ns * clkrate + * ---------------------------- < prescaler + 1 + * NSEC_PER_SEC * (max_arr + 1) + * + * Using integer division and knowing that the right hand side is + * integer, this is further equivalent to + * + * (period_ns * clkrate) // (NSEC_PER_SEC * (max_arr + 1)) ≤ prescaler */ prescaler = mul_u64_u64_div_u64(period_ns, clk_get_rate(priv->clk), - (u64)NSEC_PER_SEC * priv->max_arr); - if (prescaler > 0) - prescaler -= 1; - + (u64)NSEC_PER_SEC * ((u64)priv->max_arr + 1)); if (prescaler > MAX_TIM_PSC) return -EINVAL; From f01af3022d4a46362c5dda3d35dea939f3246d10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 21 Jun 2024 16:37:14 +0200 Subject: [PATCH 596/778] pwm: stm32: Fix error message to not describe the previous error path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit "Failed to lock the clock" is an appropriate error message for clk_rate_exclusive_get() failing, but not for the clock running too fast for the driver's calculations. Adapt the error message accordingly. Fixes: d44d635635a7 ("pwm: stm32: Fix for settings using period > UINT32_MAX") Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/285182163211203fc823a65b180761f46e828dcb.1718979150.git.u.kleine-koenig@baylibre.com Signed-off-by: Uwe Kleine-König --- drivers/pwm/pwm-stm32.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/pwm/pwm-stm32.c b/drivers/pwm/pwm-stm32.c index 97d3de24f312..8bae3fd2b330 100644 --- a/drivers/pwm/pwm-stm32.c +++ b/drivers/pwm/pwm-stm32.c @@ -681,7 +681,8 @@ static int stm32_pwm_probe(struct platform_device *pdev) * .apply() won't overflow. */ if (clk_get_rate(priv->clk) > 1000000000) - return dev_err_probe(dev, -EINVAL, "Failed to lock clock\n"); + return dev_err_probe(dev, -EINVAL, "Clock freq too high (%lu)\n", + clk_get_rate(priv->clk)); chip->ops = &stm32pwm_ops; From 9bd01500e4d8c3c3387076581c19b3987776d7af Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 22 Jun 2024 17:22:24 -0400 Subject: [PATCH 597/778] bcachefs: Fix freeing of error pointers This fixes incorrect/missign checking of strndup_user() returns. Signed-off-by: Kent Overstreet --- fs/bcachefs/chardev.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c index 9e54323f0f5f..6d82e1165adc 100644 --- a/fs/bcachefs/chardev.c +++ b/fs/bcachefs/chardev.c @@ -216,7 +216,8 @@ static long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_a ret = PTR_ERR_OR_ZERO(optstr) ?: bch2_parse_mount_opts(NULL, &thr->opts, optstr); - kfree(optstr); + if (!IS_ERR(optstr)) + kfree(optstr); if (ret) goto err; @@ -319,7 +320,8 @@ static long bch2_ioctl_disk_add(struct bch_fs *c, struct bch_ioctl_disk arg) return ret; ret = bch2_dev_add(c, path); - kfree(path); + if (!IS_ERR(path)) + kfree(path); return ret; } @@ -850,7 +852,8 @@ static long bch2_ioctl_fsck_online(struct bch_fs *c, ret = PTR_ERR_OR_ZERO(optstr) ?: bch2_parse_mount_opts(c, &thr->opts, optstr); - kfree(optstr); + if (!IS_ERR(optstr)) + kfree(optstr); if (ret) goto err; From f44cc269a1c148ad83332d85fe54607e8874ca79 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 22 Jun 2024 20:52:39 -0400 Subject: [PATCH 598/778] bcachefs: fix seqmutex_relock() We were grabbing the sequence number before unlock incremented it - fix this by moving the increment to seqmutex_lock() (so the seqmutex_relock() failure path skips the mutex_trylock()), and returning the sequence number from unlock(), to make the API simpler and safer. Signed-off-by: Kent Overstreet --- fs/bcachefs/debug.c | 8 ++------ fs/bcachefs/seqmutex.h | 11 ++++------- 2 files changed, 6 insertions(+), 13 deletions(-) diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index 51cbf3928361..8ec2d44e4956 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -575,7 +575,6 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf, struct bch_fs *c = i->c; struct btree_trans *trans; ssize_t ret = 0; - u32 seq; i->ubuf = buf; i->size = size; @@ -589,8 +588,7 @@ restart: continue; closure_get(&trans->ref); - seq = seqmutex_seq(&c->btree_trans_lock); - seqmutex_unlock(&c->btree_trans_lock); + u32 seq = seqmutex_unlock(&c->btree_trans_lock); ret = flush_buf(i); if (ret) { @@ -811,7 +809,6 @@ static ssize_t bch2_btree_deadlock_read(struct file *file, char __user *buf, struct bch_fs *c = i->c; struct btree_trans *trans; ssize_t ret = 0; - u32 seq; i->ubuf = buf; i->size = size; @@ -828,8 +825,7 @@ restart: continue; closure_get(&trans->ref); - seq = seqmutex_seq(&c->btree_trans_lock); - seqmutex_unlock(&c->btree_trans_lock); + u32 seq = seqmutex_unlock(&c->btree_trans_lock); ret = flush_buf(i); if (ret) { diff --git a/fs/bcachefs/seqmutex.h b/fs/bcachefs/seqmutex.h index c1860d8163fb..c4b3d8d3f414 100644 --- a/fs/bcachefs/seqmutex.h +++ b/fs/bcachefs/seqmutex.h @@ -19,17 +19,14 @@ static inline bool seqmutex_trylock(struct seqmutex *lock) static inline void seqmutex_lock(struct seqmutex *lock) { mutex_lock(&lock->lock); -} - -static inline void seqmutex_unlock(struct seqmutex *lock) -{ lock->seq++; - mutex_unlock(&lock->lock); } -static inline u32 seqmutex_seq(struct seqmutex *lock) +static inline u32 seqmutex_unlock(struct seqmutex *lock) { - return lock->seq; + u32 seq = lock->seq; + mutex_unlock(&lock->lock); + return seq; } static inline bool seqmutex_relock(struct seqmutex *lock, u32 seq) From 18e92841e87bc548fcb91530115a66e72eecb10c Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 22 Jun 2024 20:59:09 -0400 Subject: [PATCH 599/778] bcachefs: Make btree_deadlock_to_text() clearer btree_deadlock_to_text() searches the list of btree transactions to find a deadlock - when it finds one it's done; it's not like other *_read() functions that's printing each object. Factor out btree_deadlock_to_text() to make this clearer. Signed-off-by: Kent Overstreet --- fs/bcachefs/debug.c | 66 ++++++++++++++++++++++++--------------------- 1 file changed, 36 insertions(+), 30 deletions(-) diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index 8ec2d44e4956..ecfdb21ebade 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -802,48 +802,54 @@ static const struct file_operations btree_transaction_stats_op = { .read = btree_transaction_stats_read, }; +/* walk btree transactions until we find a deadlock and print it */ +static void btree_deadlock_to_text(struct printbuf *out, struct bch_fs *c) +{ + struct btree_trans *trans; + pid_t iter = 0; +restart: + seqmutex_lock(&c->btree_trans_lock); + list_for_each_entry(trans, &c->btree_trans_list, list) { + struct task_struct *task = READ_ONCE(trans->locking_wait.task); + + if (!task || task->pid <= iter) + continue; + + iter = task->pid; + + closure_get(&trans->ref); + + u32 seq = seqmutex_unlock(&c->btree_trans_lock); + + bool found = bch2_check_for_deadlock(trans, out) != 0; + + closure_put(&trans->ref); + + if (found) + return; + + if (!seqmutex_relock(&c->btree_trans_lock, seq)) + goto restart; + } + seqmutex_unlock(&c->btree_trans_lock); +} + static ssize_t bch2_btree_deadlock_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) { struct dump_iter *i = file->private_data; struct bch_fs *c = i->c; - struct btree_trans *trans; ssize_t ret = 0; i->ubuf = buf; i->size = size; i->ret = 0; - if (i->iter) - goto out; -restart: - seqmutex_lock(&c->btree_trans_lock); - list_for_each_entry(trans, &c->btree_trans_list, list) { - struct task_struct *task = READ_ONCE(trans->locking_wait.task); - - if (!task || task->pid <= i->iter) - continue; - - closure_get(&trans->ref); - u32 seq = seqmutex_unlock(&c->btree_trans_lock); - - ret = flush_buf(i); - if (ret) { - closure_put(&trans->ref); - goto out; - } - - bch2_check_for_deadlock(trans, &i->buf); - - i->iter = task->pid; - - closure_put(&trans->ref); - - if (!seqmutex_relock(&c->btree_trans_lock, seq)) - goto restart; + if (!i->iter) { + btree_deadlock_to_text(&i->buf, c); + i->iter++; } - seqmutex_unlock(&c->btree_trans_lock); -out: + if (i->buf.allocation_failure) ret = -ENOMEM; From 06efa5f30c28eaf237247ca8c4cb46eb62cb6bd9 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 22 Jun 2024 21:38:58 -0400 Subject: [PATCH 600/778] closures: closure_get_not_zero(), closure_return_sync() Provide new primitives for solving a lifetime issue with bcachefs btree_trans objects. closure_sync_return(): like closure_sync(), wait synchronously for any outstanding gets. like closure_return, the closure is considered "finished" and the ref left at 0. closure_get_not_zero(): get a ref on a closure if it's alive, i.e. the ref is not zero. Signed-off-by: Kent Overstreet --- include/linux/closure.h | 23 ++++++++++++++++++ lib/closure.c | 52 ++++++++++++++++++++++++++++++++++++----- 2 files changed, 69 insertions(+), 6 deletions(-) diff --git a/include/linux/closure.h b/include/linux/closure.h index 99155df162d0..59b8c06b11ff 100644 --- a/include/linux/closure.h +++ b/include/linux/closure.h @@ -284,6 +284,21 @@ static inline void closure_get(struct closure *cl) #endif } +/** + * closure_get_not_zero + */ +static inline bool closure_get_not_zero(struct closure *cl) +{ + unsigned old = atomic_read(&cl->remaining); + do { + if (!(old & CLOSURE_REMAINING_MASK)) + return false; + + } while (!atomic_try_cmpxchg_acquire(&cl->remaining, &old, old + 1)); + + return true; +} + /** * closure_init - Initialize a closure, setting the refcount to 1 * @cl: closure to initialize @@ -310,6 +325,12 @@ static inline void closure_init_stack(struct closure *cl) atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER); } +static inline void closure_init_stack_release(struct closure *cl) +{ + memset(cl, 0, sizeof(struct closure)); + atomic_set_release(&cl->remaining, CLOSURE_REMAINING_INITIALIZER); +} + /** * closure_wake_up - wake up all closures on a wait list, * with memory barrier @@ -355,6 +376,8 @@ do { \ */ #define closure_return(_cl) continue_at((_cl), NULL, NULL) +void closure_return_sync(struct closure *cl); + /** * continue_at_nobarrier - jump to another function without barrier * diff --git a/lib/closure.c b/lib/closure.c index 2e1ee9fdec08..c971216d9d77 100644 --- a/lib/closure.c +++ b/lib/closure.c @@ -13,7 +13,7 @@ #include #include -static inline void closure_put_after_sub(struct closure *cl, int flags) +static inline void closure_put_after_sub_checks(int flags) { int r = flags & CLOSURE_REMAINING_MASK; @@ -22,12 +22,17 @@ static inline void closure_put_after_sub(struct closure *cl, int flags) flags & CLOSURE_GUARD_MASK, (unsigned) __fls(r))) r &= ~CLOSURE_GUARD_MASK; - if (!r) { - smp_acquire__after_ctrl_dep(); + WARN(!r && (flags & ~CLOSURE_DESTRUCTOR), + "closure ref hit 0 with incorrect flags set: %x (%u)", + flags & ~CLOSURE_DESTRUCTOR, (unsigned) __fls(flags)); +} - WARN(flags & ~CLOSURE_DESTRUCTOR, - "closure ref hit 0 with incorrect flags set: %x (%u)", - flags & ~CLOSURE_DESTRUCTOR, (unsigned) __fls(flags)); +static inline void closure_put_after_sub(struct closure *cl, int flags) +{ + closure_put_after_sub_checks(flags); + + if (!(flags & CLOSURE_REMAINING_MASK)) { + smp_acquire__after_ctrl_dep(); cl->closure_get_happened = false; @@ -145,6 +150,41 @@ void __sched __closure_sync(struct closure *cl) } EXPORT_SYMBOL(__closure_sync); +/* + * closure_return_sync - finish running a closure, synchronously (i.e. waiting + * for outstanding get()s to finish) and returning once closure refcount is 0. + * + * Unlike closure_sync() this doesn't reinit the ref to 1; subsequent + * closure_get_not_zero() calls waill fail. + */ +void __sched closure_return_sync(struct closure *cl) +{ + struct closure_syncer s = { .task = current }; + + cl->s = &s; + set_closure_fn(cl, closure_sync_fn, NULL); + + unsigned flags = atomic_sub_return_release(1 + CLOSURE_RUNNING - CLOSURE_DESTRUCTOR, + &cl->remaining); + + closure_put_after_sub_checks(flags); + + if (unlikely(flags & CLOSURE_REMAINING_MASK)) { + while (1) { + set_current_state(TASK_UNINTERRUPTIBLE); + if (s.done) + break; + schedule(); + } + + __set_current_state(TASK_RUNNING); + } + + if (cl->parent) + closure_put(cl->parent); +} +EXPORT_SYMBOL(closure_return_sync); + int __sched __closure_sync_timeout(struct closure *cl, unsigned long timeout) { struct closure_syncer s = { .task = current }; From de611ab6fc5ed0d68dd46319b9913353e3b459e9 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 22 Jun 2024 22:02:09 -0400 Subject: [PATCH 601/778] bcachefs: Fix race between trans_put() and btree_transactions_read() debug.c was using closure_get() on a different thread's closure where the we don't know if the object being refcounted is alive. We keep btree_trans objects on a list so they can be printed by debug code, and because it is cost prohibitive to touch the btree_trans list every time we allocate and free btree_trans objects, cached objects are also on this list. However, we do not want the debug code to see cached but not in use btree_trans objects - critically because the btree_paths array will have been freed (if it was reallocated). closure_get() is also incorrect to use when that get may race with it hitting zero, i.e. we must already have a ref on the object or know the ref can't currently hit 0 for other reasons (as used in the cycle detector). to fix this, use the previously introduced closure_get_not_zero(), closure_return_sync(), and closure_init_stack_release(); the debug code now can only take a ref on a trans object if it's alive and in use. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 10 ++++------ fs/bcachefs/debug.c | 19 +++++++++---------- 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 3a1419d17888..15c1c7cfefe6 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -3130,7 +3130,6 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx) trans = mempool_alloc(&c->btree_trans_pool, GFP_NOFS); memset(trans, 0, sizeof(*trans)); - closure_init_stack(&trans->ref); seqmutex_lock(&c->btree_trans_lock); if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) { @@ -3161,7 +3160,6 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx) list_add_done: seqmutex_unlock(&c->btree_trans_lock); got_trans: - trans->ref.closure_get_happened = false; trans->c = c; trans->last_begin_time = local_clock(); trans->fn_idx = fn_idx; @@ -3200,6 +3198,8 @@ got_trans: trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier); trans->srcu_lock_time = jiffies; trans->srcu_held = true; + + closure_init_stack_release(&trans->ref); return trans; } @@ -3257,10 +3257,10 @@ void bch2_trans_put(struct btree_trans *trans) bch2_journal_keys_put(c); /* - * trans->ref protects trans->locking_wait.task, btree_paths arary; used + * trans->ref protects trans->locking_wait.task, btree_paths array; used * by cycle detector */ - closure_sync(&trans->ref); + closure_return_sync(&trans->ref); trans->locking_wait.task = NULL; unsigned long *paths_allocated = trans->paths_allocated; @@ -3385,8 +3385,6 @@ void bch2_fs_btree_iter_exit(struct bch_fs *c) per_cpu_ptr(c->btree_trans_bufs, cpu)->trans; if (trans) { - closure_sync(&trans->ref); - seqmutex_lock(&c->btree_trans_lock); list_del(&trans->list); seqmutex_unlock(&c->btree_trans_lock); diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index ecfdb21ebade..61c50522abb9 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -587,14 +587,10 @@ restart: if (!task || task->pid <= i->iter) continue; - closure_get(&trans->ref); - u32 seq = seqmutex_unlock(&c->btree_trans_lock); + if (!closure_get_not_zero(&trans->ref)) + continue; - ret = flush_buf(i); - if (ret) { - closure_put(&trans->ref); - goto unlocked; - } + u32 seq = seqmutex_unlock(&c->btree_trans_lock); bch2_btree_trans_to_text(&i->buf, trans); @@ -604,10 +600,12 @@ restart: printbuf_indent_sub(&i->buf, 2); prt_newline(&i->buf); - i->iter = task->pid; - closure_put(&trans->ref); + ret = flush_buf(i); + if (ret) + goto unlocked; + if (!seqmutex_relock(&c->btree_trans_lock, seq)) goto restart; } @@ -817,7 +815,8 @@ restart: iter = task->pid; - closure_get(&trans->ref); + if (!closure_get_not_zero(&trans->ref)) + continue; u32 seq = seqmutex_unlock(&c->btree_trans_lock); From 1aaf5cb41b8e92dcd3ac7e047124cb0e3e27f1c1 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 22 Jun 2024 22:11:01 -0400 Subject: [PATCH 602/778] bcachefs: Fix btree_trans list ordering The debug code relies on btree_trans_list being ordered so that it can resume on subsequent calls or lock restarts. However, it was using trans->locknig_wait.task.pid, which is incorrect since btree_trans objects are cached and reused - typically by different tasks. Fix this by switching to pointer order, and also sort them lazily when required - speeding up the btree_trans_get() fastpath. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 9 ++------- fs/bcachefs/debug.c | 36 ++++++++++++++++++++++++++++++++---- 2 files changed, 34 insertions(+), 11 deletions(-) diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 15c1c7cfefe6..0ed9e6574fcd 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -3149,15 +3149,10 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx) BUG_ON(pos_task && pid == pos_task->pid && pos->locked); - - if (pos_task && pid < pos_task->pid) { - list_add_tail(&trans->list, &pos->list); - goto list_add_done; - } } } - list_add_tail(&trans->list, &c->btree_trans_list); -list_add_done: + + list_add(&trans->list, &c->btree_trans_list); seqmutex_unlock(&c->btree_trans_lock); got_trans: trans->c = c; diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index 61c50522abb9..f0d4727c4dc2 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -568,6 +568,32 @@ static const struct file_operations cached_btree_nodes_ops = { .read = bch2_cached_btree_nodes_read, }; +typedef int (*list_cmp_fn)(const struct list_head *l, const struct list_head *r); + +static void list_sort(struct list_head *head, list_cmp_fn cmp) +{ + struct list_head *pos; + + list_for_each(pos, head) + while (!list_is_last(pos, head) && + cmp(pos, pos->next) > 0) { + struct list_head *pos2, *next = pos->next; + + list_del(next); + list_for_each(pos2, head) + if (cmp(next, pos2) < 0) + goto pos_found; + BUG(); +pos_found: + list_add_tail(next, pos2); + } +} + +static int list_ptr_order_cmp(const struct list_head *l, const struct list_head *r) +{ + return cmp_int(l, r); +} + static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) { @@ -581,12 +607,14 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf, i->ret = 0; restart: seqmutex_lock(&c->btree_trans_lock); - list_for_each_entry(trans, &c->btree_trans_list, list) { - struct task_struct *task = READ_ONCE(trans->locking_wait.task); + list_sort(&c->btree_trans_list, list_ptr_order_cmp); - if (!task || task->pid <= i->iter) + list_for_each_entry(trans, &c->btree_trans_list, list) { + if ((ulong) trans < i->iter) continue; + i->iter = (ulong) trans; + if (!closure_get_not_zero(&trans->ref)) continue; @@ -596,7 +624,7 @@ restart: prt_printf(&i->buf, "backtrace:\n"); printbuf_indent_add(&i->buf, 2); - bch2_prt_task_backtrace(&i->buf, task, 0, GFP_KERNEL); + bch2_prt_task_backtrace(&i->buf, trans->locking_wait.task, 0, GFP_KERNEL); printbuf_indent_sub(&i->buf, 2); prt_newline(&i->buf); From 42354e3c3150cf886457f3354af0acefc56b53a2 Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Fri, 21 Jun 2024 15:00:59 +0200 Subject: [PATCH 603/778] netlink: specs: Fix pse-set command attributes Not all PSE attributes are used for the pse-set netlink command. Select only the ones used by ethtool. Fixes: f8586411e40e ("netlink: specs: Expand the pse netlink command with PoE interface") Signed-off-by: Kory Maincent Reviewed-by: Donald Hunter Signed-off-by: David S. Miller --- Documentation/netlink/specs/ethtool.yaml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml index 00dc61358be8..4510e8d1adcb 100644 --- a/Documentation/netlink/specs/ethtool.yaml +++ b/Documentation/netlink/specs/ethtool.yaml @@ -1603,7 +1603,7 @@ operations: attributes: - header reply: - attributes: &pse + attributes: - header - podl-pse-admin-state - podl-pse-admin-control @@ -1620,7 +1620,10 @@ operations: do: request: - attributes: *pse + attributes: + - header + - podl-pse-admin-control + - c33-pse-admin-control - name: rss-get doc: Get RSS params. From 54a4e5c16382e871c01dd82b47e930fdce30406b Mon Sep 17 00:00:00 2001 From: Enguerrand de Ribaucourt Date: Fri, 21 Jun 2024 16:43:20 +0200 Subject: [PATCH 604/778] net: phy: micrel: add Microchip KSZ 9477 to the device table PHY_ID_KSZ9477 was supported but not added to the device table passed to MODULE_DEVICE_TABLE. Fixes: fc3973a1fa09 ("phy: micrel: add Microchip KSZ 9477 Switch PHY support") Signed-off-by: Enguerrand de Ribaucourt Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/micrel.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 5aada7cf3da7..ebafedde0ab7 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -5607,6 +5607,7 @@ static struct mdio_device_id __maybe_unused micrel_tbl[] = { { PHY_ID_KSZ8081, MICREL_PHY_ID_MASK }, { PHY_ID_KSZ8873MLL, MICREL_PHY_ID_MASK }, { PHY_ID_KSZ886X, MICREL_PHY_ID_MASK }, + { PHY_ID_KSZ9477, MICREL_PHY_ID_MASK }, { PHY_ID_LAN8814, MICREL_PHY_ID_MASK }, { PHY_ID_LAN8804, MICREL_PHY_ID_MASK }, { PHY_ID_LAN8841, MICREL_PHY_ID_MASK }, From d963c95bc9840d070a788c35e41b715a648717f7 Mon Sep 17 00:00:00 2001 From: Enguerrand de Ribaucourt Date: Fri, 21 Jun 2024 16:43:21 +0200 Subject: [PATCH 605/778] net: dsa: microchip: use collision based back pressure mode Errata DS80000758 states that carrier sense back pressure mode can cause link down issues in 100BASE-TX half duplex mode. The datasheet also recommends to always use the collision based back pressure mode. Fixes: b987e98e50ab ("dsa: add DSA switch driver for Microchip KSZ9477") Signed-off-by: Enguerrand de Ribaucourt Reviewed-by: Woojung Huh Acked-by: Arun Ramadoss Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/microchip/ksz9477.c | 4 ++++ drivers/net/dsa/microchip/ksz9477_reg.h | 1 + 2 files changed, 5 insertions(+) diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c index 2231128eef8b..cbaca4140cda 100644 --- a/drivers/net/dsa/microchip/ksz9477.c +++ b/drivers/net/dsa/microchip/ksz9477.c @@ -1297,6 +1297,10 @@ int ksz9477_setup(struct dsa_switch *ds) /* Enable REG_SW_MTU__2 reg by setting SW_JUMBO_PACKET */ ksz_cfg(dev, REG_SW_MAC_CTRL_1, SW_JUMBO_PACKET, true); + /* Use collision based back pressure mode. */ + ksz_cfg(dev, REG_SW_MAC_CTRL_1, SW_BACK_PRESSURE, + SW_BACK_PRESSURE_COLLISION); + /* Now we can configure default MTU value */ ret = regmap_update_bits(ksz_regmap_16(dev), REG_SW_MTU__2, REG_SW_MTU_MASK, VLAN_ETH_FRAME_LEN + ETH_FCS_LEN); diff --git a/drivers/net/dsa/microchip/ksz9477_reg.h b/drivers/net/dsa/microchip/ksz9477_reg.h index f3a205ee483f..fb124be8edd3 100644 --- a/drivers/net/dsa/microchip/ksz9477_reg.h +++ b/drivers/net/dsa/microchip/ksz9477_reg.h @@ -247,6 +247,7 @@ #define REG_SW_MAC_CTRL_1 0x0331 #define SW_BACK_PRESSURE BIT(5) +#define SW_BACK_PRESSURE_COLLISION 0 #define FAIR_FLOW_CTRL BIT(4) #define NO_EXC_COLLISION_DROP BIT(3) #define SW_JUMBO_PACKET BIT(2) From bf1bff11e497a01b0cc6cb2afcff734340ae95f8 Mon Sep 17 00:00:00 2001 From: Enguerrand de Ribaucourt Date: Fri, 21 Jun 2024 16:43:22 +0200 Subject: [PATCH 606/778] net: dsa: microchip: monitor potential faults in half-duplex mode The errata DS80000754 recommends monitoring potential faults in half-duplex mode for the KSZ9477 family. half-duplex is not very common so I just added a critical message when the fault conditions are detected. The switch can be expected to be unable to communicate anymore in these states and a software reset of the switch would be required which I did not implement. Fixes: b987e98e50ab ("dsa: add DSA switch driver for Microchip KSZ9477") Signed-off-by: Enguerrand de Ribaucourt Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/microchip/ksz9477.c | 51 +++++++++++++++++++++++++ drivers/net/dsa/microchip/ksz9477.h | 2 + drivers/net/dsa/microchip/ksz9477_reg.h | 10 ++++- drivers/net/dsa/microchip/ksz_common.c | 11 ++++++ drivers/net/dsa/microchip/ksz_common.h | 1 + 5 files changed, 73 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c index cbaca4140cda..425e20daf1e9 100644 --- a/drivers/net/dsa/microchip/ksz9477.c +++ b/drivers/net/dsa/microchip/ksz9477.c @@ -427,6 +427,57 @@ void ksz9477_freeze_mib(struct ksz_device *dev, int port, bool freeze) mutex_unlock(&p->mib.cnt_mutex); } +int ksz9477_errata_monitor(struct ksz_device *dev, int port, + u64 tx_late_col) +{ + u32 pmavbc; + u8 status; + u16 pqm; + int ret; + + ret = ksz_pread8(dev, port, REG_PORT_STATUS_0, &status); + if (ret) + return ret; + if (!(FIELD_GET(PORT_INTF_SPEED_MASK, status) == PORT_INTF_SPEED_NONE) && + !(status & PORT_INTF_FULL_DUPLEX)) { + /* Errata DS80000754 recommends monitoring potential faults in + * half-duplex mode. The switch might not be able to communicate anymore + * in these states. + * If you see this message, please read the errata-sheet for more information: + * https://ww1.microchip.com/downloads/aemDocuments/documents/UNG/ProductDocuments/Errata/KSZ9477S-Errata-DS80000754.pdf + * To workaround this issue, half-duplex mode should be avoided. + * A software reset could be implemented to recover from this state. + */ + dev_warn_once(dev->dev, + "Half-duplex detected on port %d, transmission halt may occur\n", + port); + if (tx_late_col != 0) { + /* Transmission halt with late collisions */ + dev_crit_once(dev->dev, + "TX late collisions detected, transmission may be halted on port %d\n", + port); + } + ret = ksz_read8(dev, REG_SW_LUE_CTRL_0, &status); + if (ret) + return ret; + if (status & SW_VLAN_ENABLE) { + ret = ksz_pread16(dev, port, REG_PORT_QM_TX_CNT_0__4, &pqm); + if (ret) + return ret; + ret = ksz_read32(dev, REG_PMAVBC, &pmavbc); + if (ret) + return ret; + if ((FIELD_GET(PMAVBC_MASK, pmavbc) <= PMAVBC_MIN) || + (FIELD_GET(PORT_QM_TX_CNT_M, pqm) >= PORT_QM_TX_CNT_MAX)) { + /* Transmission halt with Half-Duplex and VLAN */ + dev_crit_once(dev->dev, + "resources out of limits, transmission may be halted\n"); + } + } + } + return ret; +} + void ksz9477_port_init_cnt(struct ksz_device *dev, int port) { struct ksz_port_mib *mib = &dev->ports[port].mib; diff --git a/drivers/net/dsa/microchip/ksz9477.h b/drivers/net/dsa/microchip/ksz9477.h index ce1e656b800b..239a281da10b 100644 --- a/drivers/net/dsa/microchip/ksz9477.h +++ b/drivers/net/dsa/microchip/ksz9477.h @@ -36,6 +36,8 @@ int ksz9477_port_mirror_add(struct ksz_device *dev, int port, bool ingress, struct netlink_ext_ack *extack); void ksz9477_port_mirror_del(struct ksz_device *dev, int port, struct dsa_mall_mirror_tc_entry *mirror); +int ksz9477_errata_monitor(struct ksz_device *dev, int port, + u64 tx_late_col); void ksz9477_get_caps(struct ksz_device *dev, int port, struct phylink_config *config); int ksz9477_fdb_dump(struct ksz_device *dev, int port, diff --git a/drivers/net/dsa/microchip/ksz9477_reg.h b/drivers/net/dsa/microchip/ksz9477_reg.h index fb124be8edd3..d5354c600ea1 100644 --- a/drivers/net/dsa/microchip/ksz9477_reg.h +++ b/drivers/net/dsa/microchip/ksz9477_reg.h @@ -843,8 +843,8 @@ #define REG_PORT_STATUS_0 0x0030 -#define PORT_INTF_SPEED_M 0x3 -#define PORT_INTF_SPEED_S 3 +#define PORT_INTF_SPEED_MASK GENMASK(4, 3) +#define PORT_INTF_SPEED_NONE GENMASK(1, 0) #define PORT_INTF_FULL_DUPLEX BIT(2) #define PORT_TX_FLOW_CTRL BIT(1) #define PORT_RX_FLOW_CTRL BIT(0) @@ -1168,6 +1168,11 @@ #define PORT_RMII_CLK_SEL BIT(7) #define PORT_MII_SEL_EDGE BIT(5) +#define REG_PMAVBC 0x03AC + +#define PMAVBC_MASK GENMASK(26, 16) +#define PMAVBC_MIN 0x580 + /* 4 - MAC */ #define REG_PORT_MAC_CTRL_0 0x0400 @@ -1495,6 +1500,7 @@ #define PORT_QM_TX_CNT_USED_S 0 #define PORT_QM_TX_CNT_M (BIT(11) - 1) +#define PORT_QM_TX_CNT_MAX 0x200 #define REG_PORT_QM_TX_CNT_1__4 0x0A14 diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index 2818e24e2a51..0433109b42e5 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -1382,6 +1382,7 @@ const struct ksz_chip_data ksz_switch_chips[] = { .tc_cbs_supported = true, .ops = &ksz9477_dev_ops, .phylink_mac_ops = &ksz9477_phylink_mac_ops, + .phy_errata_9477 = true, .mib_names = ksz9477_mib_names, .mib_cnt = ARRAY_SIZE(ksz9477_mib_names), .reg_mib_cnt = MIB_COUNTER_NUM, @@ -1416,6 +1417,7 @@ const struct ksz_chip_data ksz_switch_chips[] = { .num_ipms = 8, .ops = &ksz9477_dev_ops, .phylink_mac_ops = &ksz9477_phylink_mac_ops, + .phy_errata_9477 = true, .mib_names = ksz9477_mib_names, .mib_cnt = ARRAY_SIZE(ksz9477_mib_names), .reg_mib_cnt = MIB_COUNTER_NUM, @@ -1450,6 +1452,7 @@ const struct ksz_chip_data ksz_switch_chips[] = { .num_ipms = 8, .ops = &ksz9477_dev_ops, .phylink_mac_ops = &ksz9477_phylink_mac_ops, + .phy_errata_9477 = true, .mib_names = ksz9477_mib_names, .mib_cnt = ARRAY_SIZE(ksz9477_mib_names), .reg_mib_cnt = MIB_COUNTER_NUM, @@ -1540,6 +1543,7 @@ const struct ksz_chip_data ksz_switch_chips[] = { .tc_cbs_supported = true, .ops = &ksz9477_dev_ops, .phylink_mac_ops = &ksz9477_phylink_mac_ops, + .phy_errata_9477 = true, .mib_names = ksz9477_mib_names, .mib_cnt = ARRAY_SIZE(ksz9477_mib_names), .reg_mib_cnt = MIB_COUNTER_NUM, @@ -1820,6 +1824,7 @@ void ksz_r_mib_stats64(struct ksz_device *dev, int port) struct rtnl_link_stats64 *stats; struct ksz_stats_raw *raw; struct ksz_port_mib *mib; + int ret; mib = &dev->ports[port].mib; stats = &mib->stats64; @@ -1861,6 +1866,12 @@ void ksz_r_mib_stats64(struct ksz_device *dev, int port) pstats->rx_pause_frames = raw->rx_pause; spin_unlock(&mib->stats64_lock); + + if (dev->info->phy_errata_9477) { + ret = ksz9477_errata_monitor(dev, port, raw->tx_late_col); + if (ret) + dev_err(dev->dev, "Failed to monitor transmission halt\n"); + } } void ksz88xx_r_mib_stats64(struct ksz_device *dev, int port) diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h index c784fd23a993..ee7db46e469d 100644 --- a/drivers/net/dsa/microchip/ksz_common.h +++ b/drivers/net/dsa/microchip/ksz_common.h @@ -66,6 +66,7 @@ struct ksz_chip_data { bool tc_cbs_supported; const struct ksz_dev_ops *ops; const struct phylink_mac_ops *phylink_mac_ops; + bool phy_errata_9477; bool ksz87xx_eee_link_erratum; const struct ksz_mib_names *mib_names; int mib_cnt; From 8a67cbd47bf431a1f531ba73e952ce4c114a33a5 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Fri, 21 Jun 2024 13:00:00 -0400 Subject: [PATCH 607/778] dt-bindings: net: fman: remove ptp-timer from required list IEEE1588(ptp) is optional feature for network. Remove it from required list to fix below CHECK_DTBS warning. arch/arm64/boot/dts/freescale/fsl-ls1043a-qds.dtb: ethernet@f0000: 'ptp-timer' is a required property Signed-off-by: Frank Li Acked-by: Krzysztof Kozlowski Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/fsl,fman-dtsec.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/fsl,fman-dtsec.yaml b/Documentation/devicetree/bindings/net/fsl,fman-dtsec.yaml index c80c880a9dab..60aaf30d68ed 100644 --- a/Documentation/devicetree/bindings/net/fsl,fman-dtsec.yaml +++ b/Documentation/devicetree/bindings/net/fsl,fman-dtsec.yaml @@ -128,7 +128,6 @@ required: - cell-index - reg - fsl,fman-ports - - ptp-timer dependencies: pcs-handle-names: From f4b91c1d17c676b8ad4c6bd674da874f3f7d5701 Mon Sep 17 00:00:00 2001 From: Jan Sokolowski Date: Fri, 21 Jun 2024 10:54:19 -0700 Subject: [PATCH 608/778] ice: Rebuild TC queues on VSI queue reconfiguration TC queues needs to be correctly updated when the number of queues on a VSI is reconfigured, so netdev's queue and TC settings will be dynamically adjusted and could accurately represent the underlying hardware state after changes to the VSI queue counts. Fixes: 0754d65bd4be ("ice: Add infrastructure for mqprio support via ndo_setup_tc") Reviewed-by: Wojciech Drewek Signed-off-by: Jan Sokolowski Signed-off-by: Karen Ostrowska Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ice/ice_main.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 1766230abfff..55a42aad92a5 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -4139,7 +4139,7 @@ bool ice_is_wol_supported(struct ice_hw *hw) int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx, bool locked) { struct ice_pf *pf = vsi->back; - int err = 0, timeout = 50; + int i, err = 0, timeout = 50; if (!new_rx && !new_tx) return -EINVAL; @@ -4165,6 +4165,14 @@ int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx, bool locked) ice_vsi_close(vsi); ice_vsi_rebuild(vsi, ICE_VSI_FLAG_NO_INIT); + + ice_for_each_traffic_class(i) { + if (vsi->tc_cfg.ena_tc & BIT(i)) + netdev_set_tc_queue(vsi->netdev, + vsi->tc_cfg.tc_info[i].netdev_tc, + vsi->tc_cfg.tc_info[i].qcount_tx, + vsi->tc_cfg.tc_info[i].qoffset); + } ice_pf_dcb_recfg(pf, locked); ice_vsi_open(vsi); done: From 36da8e387b0632d4c43d67849a5b506fa79fcadd Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 23 Jun 2024 10:10:43 -0400 Subject: [PATCH 609/778] bcachefs: Add missing recalc_capacity() call This fixes filesystem size not changing on device removal. Signed-off-by: Kent Overstreet --- fs/bcachefs/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 9083df82073a..641f2975177b 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -1529,6 +1529,7 @@ static void __bch2_dev_read_only(struct bch_fs *c, struct bch_dev *ca) * The allocator thread itself allocates btree nodes, so stop it first: */ bch2_dev_allocator_remove(c, ca); + bch2_recalc_capacity(c); bch2_dev_journal_stop(&c->journal, ca); } From 2d64eaeeeda5659d52da1af79d237269ba3c2d2c Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sun, 23 Jun 2024 11:41:13 +0800 Subject: [PATCH 610/778] irqchip/loongson-eiointc: Use early_cpu_to_node() instead of cpu_to_node() Multi-bridge machines required that all eiointc controllers in the system are initialized, otherwise the system does not boot. The initialization happens on the boot CPU during early boot and relies on cpu_to_node() for identifying the individual nodes. That works when the number of possible CPUs is large enough, but with a command line limit, e.g. "nr_cpus=$N" for kdump, but fails when the CPUs of the secondary nodes are not covered. During early ACPI enumeration all CPU to node mappings are recorded up to CONFIG_NR_CPUS. These are accessible via early_cpu_to_node() even in the case that "nr_cpus=N" truncates the number of possible CPUs and only provides the possible CPUs via cpu_to_node() translation. Change the node lookup in the driver to use early_cpu_to_node() so that even with a limitation on the number of possible CPUs all eointc instances are initialized. This can't obviously cure the case where CONFIG_NR_CPUS is too small. [ tglx: Massaged changelog ] Fixes: 64cc451e45e1 ("irqchip/loongson-eiointc: Fix incorrect use of acpi_get_vec_parent") Signed-off-by: Huacai Chen Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240623034113.1808727-1-chenhuacai@loongson.cn --- drivers/irqchip/irq-loongson-eiointc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-loongson-eiointc.c b/drivers/irqchip/irq-loongson-eiointc.c index c7ddebf312ad..b1f2080be2be 100644 --- a/drivers/irqchip/irq-loongson-eiointc.c +++ b/drivers/irqchip/irq-loongson-eiointc.c @@ -15,6 +15,7 @@ #include #include #include +#include #define EIOINTC_REG_NODEMAP 0x14a0 #define EIOINTC_REG_IPMAP 0x14c0 @@ -339,7 +340,7 @@ static int __init pch_msi_parse_madt(union acpi_subtable_headers *header, int node; if (cpu_has_flatmode) - node = cpu_to_node(eiointc_priv[nr_pics - 1]->node * CORES_PER_EIO_NODE); + node = early_cpu_to_node(eiointc_priv[nr_pics - 1]->node * CORES_PER_EIO_NODE); else node = eiointc_priv[nr_pics - 1]->node; @@ -431,7 +432,7 @@ int __init eiointc_acpi_init(struct irq_domain *parent, goto out_free_handle; if (cpu_has_flatmode) - node = cpu_to_node(acpi_eiointc->node * CORES_PER_EIO_NODE); + node = early_cpu_to_node(acpi_eiointc->node * CORES_PER_EIO_NODE); else node = acpi_eiointc->node; acpi_set_vec_parent(node, priv->eiointc_domain, pch_group); From a9c3ee5d0fdb069b54902300df6ac822027f3b0a Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sat, 22 Jun 2024 12:33:38 +0800 Subject: [PATCH 611/778] irqchip/loongson-liointc: Set different ISRs for different cores The liointc hardware provides separate Interrupt Status Registers (ISR) for each core. The current code uses always the ISR of core #0, which works during boot because by default all interrupts are routed to core #0. When the interrupt routing changes in the firmware configuration then this causes interrupts to be lost because they are not configured in the corresponding core. Use the core index to access the correct ISR instead of a hardcoded 0. [ tglx: Massaged changelog ] Fixes: 0858ed035a85 ("irqchip/loongson-liointc: Add ACPI init support") Co-developed-by: Tianli Xiong Signed-off-by: Tianli Xiong Signed-off-by: Huacai Chen Signed-off-by: Thomas Gleixner Cc: Link: https://lore.kernel.org/r/20240622043338.1566945-1-chenhuacai@loongson.cn --- drivers/irqchip/irq-loongson-liointc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-loongson-liointc.c b/drivers/irqchip/irq-loongson-liointc.c index e4b33aed1c97..7c4fe7ab4b83 100644 --- a/drivers/irqchip/irq-loongson-liointc.c +++ b/drivers/irqchip/irq-loongson-liointc.c @@ -28,7 +28,7 @@ #define LIOINTC_INTC_CHIP_START 0x20 -#define LIOINTC_REG_INTC_STATUS (LIOINTC_INTC_CHIP_START + 0x20) +#define LIOINTC_REG_INTC_STATUS(core) (LIOINTC_INTC_CHIP_START + 0x20 + (core) * 8) #define LIOINTC_REG_INTC_EN_STATUS (LIOINTC_INTC_CHIP_START + 0x04) #define LIOINTC_REG_INTC_ENABLE (LIOINTC_INTC_CHIP_START + 0x08) #define LIOINTC_REG_INTC_DISABLE (LIOINTC_INTC_CHIP_START + 0x0c) @@ -217,7 +217,7 @@ static int liointc_init(phys_addr_t addr, unsigned long size, int revision, goto out_free_priv; for (i = 0; i < LIOINTC_NUM_CORES; i++) - priv->core_isr[i] = base + LIOINTC_REG_INTC_STATUS; + priv->core_isr[i] = base + LIOINTC_REG_INTC_STATUS(i); for (i = 0; i < LIOINTC_NUM_PARENT; i++) priv->handler[i].parent_int_map = parent_int_map[i]; From d6b52f6828e6d9bb1fe35f889e1a9d0dcff0e21d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 23 Jun 2024 12:07:07 -0400 Subject: [PATCH 612/778] bcachefs: Fix null ptr deref in journal_pins_to_text() Signed-off-by: Kent Overstreet --- fs/bcachefs/journal.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index dac2f498ae8b..13669dd0e375 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -1521,6 +1521,11 @@ bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64 struct journal_entry_pin *pin; spin_lock(&j->lock); + if (!test_bit(JOURNAL_running, &j->flags)) { + spin_unlock(&j->lock); + return true; + } + *seq = max(*seq, j->pin.front); if (*seq >= j->pin.back) { From 89d21b69b4f88e7a04b66bec38a01470cd40d703 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 23 Jun 2024 12:55:16 -0400 Subject: [PATCH 613/778] bcachefs: Add missing bch2_journal_do_writes() call This fixes a rare deadlock when we're doing an emergency shutdown due to failure to do a journal write. Signed-off-by: Kent Overstreet --- fs/bcachefs/journal_io.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 492426c8d869..db24ce21b2ac 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -1677,6 +1677,13 @@ static CLOSURE_CALLBACK(journal_write_done) mod_delayed_work(j->wq, &j->write_work, max(0L, delta)); } + /* + * We don't typically trigger journal writes from her - the next journal + * write will be triggered immediately after the previous one is + * allocated, in bch2_journal_write() - but the journal write error path + * is special: + */ + bch2_journal_do_writes(j); spin_unlock(&j->lock); } From 02ea312055da84e08e3e5bce2539c1ff11c8b5f2 Mon Sep 17 00:00:00 2001 From: Ratheesh Kannoth Date: Sat, 22 Jun 2024 12:14:37 +0530 Subject: [PATCH 614/778] octeontx2-pf: Fix coverity and klockwork issues in octeon PF driver Fix unintended sign extension and klockwork issues. These are not real issue but for sanity checks. Signed-off-by: Ratheesh Kannoth Signed-off-by: Suman Ghosh Signed-off-by: David S. Miller --- .../marvell/octeontx2/nic/otx2_common.c | 10 ++-- .../ethernet/marvell/octeontx2/nic/otx2_reg.h | 55 ++++++++++--------- .../marvell/octeontx2/nic/otx2_txrx.c | 2 +- .../net/ethernet/marvell/octeontx2/nic/qos.c | 3 +- 4 files changed, 35 insertions(+), 35 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index a85ac039d779..87d5776e3b88 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -648,14 +648,14 @@ int otx2_txschq_config(struct otx2_nic *pfvf, int lvl, int prio, bool txschq_for } else if (lvl == NIX_TXSCH_LVL_TL4) { parent = schq_list[NIX_TXSCH_LVL_TL3][prio]; req->reg[0] = NIX_AF_TL4X_PARENT(schq); - req->regval[0] = parent << 16; + req->regval[0] = (u64)parent << 16; req->num_regs++; req->reg[1] = NIX_AF_TL4X_SCHEDULE(schq); req->regval[1] = dwrr_val; } else if (lvl == NIX_TXSCH_LVL_TL3) { parent = schq_list[NIX_TXSCH_LVL_TL2][prio]; req->reg[0] = NIX_AF_TL3X_PARENT(schq); - req->regval[0] = parent << 16; + req->regval[0] = (u64)parent << 16; req->num_regs++; req->reg[1] = NIX_AF_TL3X_SCHEDULE(schq); req->regval[1] = dwrr_val; @@ -670,11 +670,11 @@ int otx2_txschq_config(struct otx2_nic *pfvf, int lvl, int prio, bool txschq_for } else if (lvl == NIX_TXSCH_LVL_TL2) { parent = schq_list[NIX_TXSCH_LVL_TL1][prio]; req->reg[0] = NIX_AF_TL2X_PARENT(schq); - req->regval[0] = parent << 16; + req->regval[0] = (u64)parent << 16; req->num_regs++; req->reg[1] = NIX_AF_TL2X_SCHEDULE(schq); - req->regval[1] = TXSCH_TL1_DFLT_RR_PRIO << 24 | dwrr_val; + req->regval[1] = (u64)hw->txschq_aggr_lvl_rr_prio << 24 | dwrr_val; if (lvl == hw->txschq_link_cfg_lvl) { req->num_regs++; @@ -698,7 +698,7 @@ int otx2_txschq_config(struct otx2_nic *pfvf, int lvl, int prio, bool txschq_for req->num_regs++; req->reg[1] = NIX_AF_TL1X_TOPOLOGY(schq); - req->regval[1] = (TXSCH_TL1_DFLT_RR_PRIO << 1); + req->regval[1] = hw->txschq_aggr_lvl_rr_prio << 1; req->num_regs++; req->reg[2] = NIX_AF_TL1X_CIR(schq); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_reg.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_reg.h index 45a32e4b49d1..e3aee6e36215 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_reg.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_reg.h @@ -139,33 +139,34 @@ #define NIX_LF_CINTX_ENA_W1C(a) (NIX_LFBASE | 0xD50 | (a) << 12) /* NIX AF transmit scheduler registers */ -#define NIX_AF_SMQX_CFG(a) (0x700 | (a) << 16) -#define NIX_AF_TL1X_SCHEDULE(a) (0xC00 | (a) << 16) -#define NIX_AF_TL1X_CIR(a) (0xC20 | (a) << 16) -#define NIX_AF_TL1X_TOPOLOGY(a) (0xC80 | (a) << 16) -#define NIX_AF_TL2X_PARENT(a) (0xE88 | (a) << 16) -#define NIX_AF_TL2X_SCHEDULE(a) (0xE00 | (a) << 16) -#define NIX_AF_TL2X_TOPOLOGY(a) (0xE80 | (a) << 16) -#define NIX_AF_TL2X_CIR(a) (0xE20 | (a) << 16) -#define NIX_AF_TL2X_PIR(a) (0xE30 | (a) << 16) -#define NIX_AF_TL3X_PARENT(a) (0x1088 | (a) << 16) -#define NIX_AF_TL3X_SCHEDULE(a) (0x1000 | (a) << 16) -#define NIX_AF_TL3X_SHAPE(a) (0x1010 | (a) << 16) -#define NIX_AF_TL3X_CIR(a) (0x1020 | (a) << 16) -#define NIX_AF_TL3X_PIR(a) (0x1030 | (a) << 16) -#define NIX_AF_TL3X_TOPOLOGY(a) (0x1080 | (a) << 16) -#define NIX_AF_TL4X_PARENT(a) (0x1288 | (a) << 16) -#define NIX_AF_TL4X_SCHEDULE(a) (0x1200 | (a) << 16) -#define NIX_AF_TL4X_SHAPE(a) (0x1210 | (a) << 16) -#define NIX_AF_TL4X_CIR(a) (0x1220 | (a) << 16) -#define NIX_AF_TL4X_PIR(a) (0x1230 | (a) << 16) -#define NIX_AF_TL4X_TOPOLOGY(a) (0x1280 | (a) << 16) -#define NIX_AF_MDQX_SCHEDULE(a) (0x1400 | (a) << 16) -#define NIX_AF_MDQX_SHAPE(a) (0x1410 | (a) << 16) -#define NIX_AF_MDQX_CIR(a) (0x1420 | (a) << 16) -#define NIX_AF_MDQX_PIR(a) (0x1430 | (a) << 16) -#define NIX_AF_MDQX_PARENT(a) (0x1480 | (a) << 16) -#define NIX_AF_TL3_TL2X_LINKX_CFG(a, b) (0x1700 | (a) << 16 | (b) << 3) +#define NIX_AF_SMQX_CFG(a) (0x700 | (u64)(a) << 16) +#define NIX_AF_TL4X_SDP_LINK_CFG(a) (0xB10 | (u64)(a) << 16) +#define NIX_AF_TL1X_SCHEDULE(a) (0xC00 | (u64)(a) << 16) +#define NIX_AF_TL1X_CIR(a) (0xC20 | (u64)(a) << 16) +#define NIX_AF_TL1X_TOPOLOGY(a) (0xC80 | (u64)(a) << 16) +#define NIX_AF_TL2X_PARENT(a) (0xE88 | (u64)(a) << 16) +#define NIX_AF_TL2X_SCHEDULE(a) (0xE00 | (u64)(a) << 16) +#define NIX_AF_TL2X_TOPOLOGY(a) (0xE80 | (u64)(a) << 16) +#define NIX_AF_TL2X_CIR(a) (0xE20 | (u64)(a) << 16) +#define NIX_AF_TL2X_PIR(a) (0xE30 | (u64)(a) << 16) +#define NIX_AF_TL3X_PARENT(a) (0x1088 | (u64)(a) << 16) +#define NIX_AF_TL3X_SCHEDULE(a) (0x1000 | (u64)(a) << 16) +#define NIX_AF_TL3X_SHAPE(a) (0x1010 | (u64)(a) << 16) +#define NIX_AF_TL3X_CIR(a) (0x1020 | (u64)(a) << 16) +#define NIX_AF_TL3X_PIR(a) (0x1030 | (u64)(a) << 16) +#define NIX_AF_TL3X_TOPOLOGY(a) (0x1080 | (u64)(a) << 16) +#define NIX_AF_TL4X_PARENT(a) (0x1288 | (u64)(a) << 16) +#define NIX_AF_TL4X_SCHEDULE(a) (0x1200 | (u64)(a) << 16) +#define NIX_AF_TL4X_SHAPE(a) (0x1210 | (u64)(a) << 16) +#define NIX_AF_TL4X_CIR(a) (0x1220 | (u64)(a) << 16) +#define NIX_AF_TL4X_PIR(a) (0x1230 | (u64)(a) << 16) +#define NIX_AF_TL4X_TOPOLOGY(a) (0x1280 | (u64)(a) << 16) +#define NIX_AF_MDQX_SCHEDULE(a) (0x1400 | (u64)(a) << 16) +#define NIX_AF_MDQX_SHAPE(a) (0x1410 | (u64)(a) << 16) +#define NIX_AF_MDQX_CIR(a) (0x1420 | (u64)(a) << 16) +#define NIX_AF_MDQX_PIR(a) (0x1430 | (u64)(a) << 16) +#define NIX_AF_MDQX_PARENT(a) (0x1480 | (u64)(a) << 16) +#define NIX_AF_TL3_TL2X_LINKX_CFG(a, b) (0x1700 | (u64)(a) << 16 | (b) << 3) /* LMT LF registers */ #define LMT_LFBASE BIT_ULL(RVU_FUNC_BLKADDR_SHIFT) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c index 929b4eac25d9..3eb85949677a 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c @@ -513,7 +513,7 @@ process_cqe: static void otx2_adjust_adaptive_coalese(struct otx2_nic *pfvf, struct otx2_cq_poll *cq_poll) { - struct dim_sample dim_sample; + struct dim_sample dim_sample = { 0 }; u64 rx_frames, rx_bytes; u64 tx_frames, tx_bytes; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/qos.c b/drivers/net/ethernet/marvell/octeontx2/nic/qos.c index edac008099c0..0f844c14485a 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/qos.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/qos.c @@ -153,7 +153,6 @@ static void __otx2_qos_txschq_cfg(struct otx2_nic *pfvf, num_regs++; otx2_config_sched_shaping(pfvf, node, cfg, &num_regs); - } else if (level == NIX_TXSCH_LVL_TL4) { otx2_config_sched_shaping(pfvf, node, cfg, &num_regs); } else if (level == NIX_TXSCH_LVL_TL3) { @@ -176,7 +175,7 @@ static void __otx2_qos_txschq_cfg(struct otx2_nic *pfvf, /* check if node is root */ if (node->qid == OTX2_QOS_QID_INNER && !node->parent) { cfg->reg[num_regs] = NIX_AF_TL2X_SCHEDULE(node->schq); - cfg->regval[num_regs] = TXSCH_TL1_DFLT_RR_PRIO << 24 | + cfg->regval[num_regs] = (u64)hw->txschq_aggr_lvl_rr_prio << 24 | mtu_to_dwrr_weight(pfvf, pfvf->tx_max_pktlen); num_regs++; From 6ef8eb5125722c241fd60d7b0c872d5c2e5dd4ca Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 18 Jun 2024 16:13:36 +0800 Subject: [PATCH 615/778] cpu: Fix broken cmdline "nosmp" and "maxcpus=0" After the rework of "Parallel CPU bringup", the cmdline "nosmp" and "maxcpus=0" parameters are not working anymore. These parameters set setup_max_cpus to zero and that's handed to bringup_nonboot_cpus(). The code there does a decrement before checking for zero, which brings it into the negative space and brings up all CPUs. Add a zero check at the beginning of the function to prevent this. [ tglx: Massaged change log ] Fixes: 18415f33e2ac4ab382 ("cpu/hotplug: Allow "parallel" bringup up to CPUHP_BP_KICK_AP_STATE") Fixes: 06c6796e0304234da6 ("cpu/hotplug: Fix off by one in cpuhp_bringup_mask()") Signed-off-by: Huacai Chen Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240618081336.3996825-1-chenhuacai@loongson.cn --- kernel/cpu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/cpu.c b/kernel/cpu.c index 74cfdb66a9bd..3d2bf1d50a0c 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1859,6 +1859,9 @@ static inline bool cpuhp_bringup_cpus_parallel(unsigned int ncpus) { return fals void __init bringup_nonboot_cpus(unsigned int max_cpus) { + if (!max_cpus) + return; + /* Try parallel bringup optimization if enabled */ if (cpuhp_bringup_cpus_parallel(max_cpus)) return; From f2661062f16b2de5d7b6a5c42a9a5c96326b8454 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 23 Jun 2024 17:08:54 -0400 Subject: [PATCH 616/778] Linux 6.10-rc5 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 14427547dc1e..4d36f943b3b1 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 10 SUBLEVEL = 0 -EXTRAVERSION = -rc4 +EXTRAVERSION = -rc5 NAME = Baby Opossum Posse # *DOCUMENTATION* From 37ce99b77762256ec9fda58d58fd613230151456 Mon Sep 17 00:00:00 2001 From: Liu Ying Date: Mon, 24 Jun 2024 09:56:12 +0800 Subject: [PATCH 617/778] drm/panel: simple: Add missing display timing flags for KOE TX26D202VM0BWA KOE TX26D202VM0BWA panel spec indicates the DE signal is active high in timing chart, so add DISPLAY_FLAGS_DE_HIGH flag in display timing flags. This aligns display_timing with panel_desc. Fixes: 8a07052440c2 ("drm/panel: simple: Add support for KOE TX26D202VM0BWA panel") Signed-off-by: Liu Ying Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20240624015612.341983-1-victor.liu@nxp.com Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20240624015612.341983-1-victor.liu@nxp.com --- drivers/gpu/drm/panel/panel-simple.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index dcb6d0b6ced0..c8cdc8356c58 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -2752,6 +2752,7 @@ static const struct display_timing koe_tx26d202vm0bwa_timing = { .vfront_porch = { 3, 5, 10 }, .vback_porch = { 2, 5, 10 }, .vsync_len = { 5, 5, 5 }, + .flags = DISPLAY_FLAGS_DE_HIGH, }; static const struct panel_desc koe_tx26d202vm0bwa = { From 6434b33faaa063df500af355ee6c3942e0f8d982 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 14 Jun 2024 18:09:01 +0200 Subject: [PATCH 618/778] s390/sclp: Fix sclp_init() cleanup on failure If sclp_init() fails it only partially cleans up: if there are multiple failing calls to sclp_init() sclp_state_change_event will be added several times to sclp_reg_list, which results in the following warning: ------------[ cut here ]------------ list_add double add: new=000003ffe1598c10, prev=000003ffe1598bf0, next=000003ffe1598c10. WARNING: CPU: 0 PID: 1 at lib/list_debug.c:35 __list_add_valid_or_report+0xde/0xf8 CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.10.0-rc3 Krnl PSW : 0404c00180000000 000003ffe0d6076a (__list_add_valid_or_report+0xe2/0xf8) R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 RI:0 EA:3 ... Call Trace: [<000003ffe0d6076a>] __list_add_valid_or_report+0xe2/0xf8 ([<000003ffe0d60766>] __list_add_valid_or_report+0xde/0xf8) [<000003ffe0a8d37e>] sclp_init+0x40e/0x450 [<000003ffe00009f2>] do_one_initcall+0x42/0x1e0 [<000003ffe15b77a6>] do_initcalls+0x126/0x150 [<000003ffe15b7a0a>] kernel_init_freeable+0x1ba/0x1f8 [<000003ffe0d6650e>] kernel_init+0x2e/0x180 [<000003ffe000301c>] __ret_from_fork+0x3c/0x60 [<000003ffe0d759ca>] ret_from_fork+0xa/0x30 Fix this by removing sclp_state_change_event from sclp_reg_list when sclp_init() fails. Reviewed-by: Peter Oberparleiter Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- drivers/s390/char/sclp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/s390/char/sclp.c b/drivers/s390/char/sclp.c index d53ee34d398f..fbe29cabcbb8 100644 --- a/drivers/s390/char/sclp.c +++ b/drivers/s390/char/sclp.c @@ -1293,6 +1293,7 @@ sclp_init(void) fail_unregister_reboot_notifier: unregister_reboot_notifier(&sclp_reboot_notifier); fail_init_state_uninitialized: + list_del(&sclp_state_change_event.list); sclp_init_state = sclp_init_state_uninitialized; free_page((unsigned long) sclp_read_sccb); free_page((unsigned long) sclp_init_sccb); From 058722ee350c0bdd664e467156feb2bf5d9cc271 Mon Sep 17 00:00:00 2001 From: Jose Ignacio Tornos Martinez Date: Thu, 20 Jun 2024 15:34:31 +0200 Subject: [PATCH 619/778] net: usb: ax88179_178a: improve link status logs Avoid spurious link status logs that may ultimately be wrong; for example, if the link is set to down with the cable plugged, then the cable is unplugged and after this the link is set to up, the last new log that is appearing is incorrectly telling that the link is up. In order to avoid errors, show link status logs after link_reset processing, and in order to avoid spurious as much as possible, only show the link loss when some link status change is detected. cc: stable@vger.kernel.org Fixes: e2ca90c276e1 ("ax88179_178a: ASIX AX88179_178A USB 3.0/2.0 to gigabit ethernet adapter driver") Signed-off-by: Jose Ignacio Tornos Martinez Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/usb/ax88179_178a.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index c2fb736f78b2..b034ef8a73ea 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -326,7 +326,8 @@ static void ax88179_status(struct usbnet *dev, struct urb *urb) if (netif_carrier_ok(dev->net) != link) { usbnet_link_change(dev, link, 1); - netdev_info(dev->net, "ax88179 - Link status is: %d\n", link); + if (!link) + netdev_info(dev->net, "ax88179 - Link status is: 0\n"); } } @@ -1542,6 +1543,7 @@ static int ax88179_link_reset(struct usbnet *dev) GMII_PHY_PHYSR, 2, &tmp16); if (!(tmp16 & GMII_PHY_PHYSR_LINK)) { + netdev_info(dev->net, "ax88179 - Link status is: 0\n"); return 0; } else if (GMII_PHY_PHYSR_GIGA == (tmp16 & GMII_PHY_PHYSR_SMASK)) { mode |= AX_MEDIUM_GIGAMODE | AX_MEDIUM_EN_125MHZ; @@ -1579,6 +1581,8 @@ static int ax88179_link_reset(struct usbnet *dev) netif_carrier_on(dev->net); + netdev_info(dev->net, "ax88179 - Link status is: 1\n"); + return 0; } From 996c3412a06578e9d779a16b9e79ace18125ab50 Mon Sep 17 00:00:00 2001 From: Janusz Krzysztofik Date: Mon, 3 Jun 2024 21:54:45 +0200 Subject: [PATCH 620/778] drm/i915/gt: Fix potential UAF by revoke of fence registers CI has been sporadically reporting the following issue triggered by igt@i915_selftest@live@hangcheck on ADL-P and similar machines: <6> [414.049203] i915: Running intel_hangcheck_live_selftests/igt_reset_evict_fence ... <6> [414.068804] i915 0000:00:02.0: [drm] GT0: GUC: submission enabled <6> [414.068812] i915 0000:00:02.0: [drm] GT0: GUC: SLPC enabled <3> [414.070354] Unable to pin Y-tiled fence; err:-4 <3> [414.071282] i915_vma_revoke_fence:301 GEM_BUG_ON(!i915_active_is_idle(&fence->active)) ... <4>[ 609.603992] ------------[ cut here ]------------ <2>[ 609.603995] kernel BUG at drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c:301! <4>[ 609.604003] invalid opcode: 0000 [#1] PREEMPT SMP NOPTI <4>[ 609.604006] CPU: 0 PID: 268 Comm: kworker/u64:3 Tainted: G U W 6.9.0-CI_DRM_14785-g1ba62f8cea9c+ #1 <4>[ 609.604008] Hardware name: Intel Corporation Alder Lake Client Platform/AlderLake-P DDR4 RVP, BIOS RPLPFWI1.R00.4035.A00.2301200723 01/20/2023 <4>[ 609.604010] Workqueue: i915 __i915_gem_free_work [i915] <4>[ 609.604149] RIP: 0010:i915_vma_revoke_fence+0x187/0x1f0 [i915] ... <4>[ 609.604271] Call Trace: <4>[ 609.604273] ... <4>[ 609.604716] __i915_vma_evict+0x2e9/0x550 [i915] <4>[ 609.604852] __i915_vma_unbind+0x7c/0x160 [i915] <4>[ 609.604977] force_unbind+0x24/0xa0 [i915] <4>[ 609.605098] i915_vma_destroy+0x2f/0xa0 [i915] <4>[ 609.605210] __i915_gem_object_pages_fini+0x51/0x2f0 [i915] <4>[ 609.605330] __i915_gem_free_objects.isra.0+0x6a/0xc0 [i915] <4>[ 609.605440] process_scheduled_works+0x351/0x690 ... In the past, there were similar failures reported by CI from other IGT tests, observed on other platforms. Before commit 63baf4f3d587 ("drm/i915/gt: Only wait for GPU activity before unbinding a GGTT fence"), i915_vma_revoke_fence() was waiting for idleness of vma->active via fence_update(). That commit introduced vma->fence->active in order for the fence_update() to be able to wait selectively on that one instead of vma->active since only idleness of fence registers was needed. But then, another commit 0d86ee35097a ("drm/i915/gt: Make fence revocation unequivocal") replaced the call to fence_update() in i915_vma_revoke_fence() with only fence_write(), and also added that GEM_BUG_ON(!i915_active_is_idle(&fence->active)) in front. No justification was provided on why we might then expect idleness of vma->fence->active without first waiting on it. The issue can be potentially caused by a race among revocation of fence registers on one side and sequential execution of signal callbacks invoked on completion of a request that was using them on the other, still processed in parallel to revocation of those fence registers. Fix it by waiting for idleness of vma->fence->active in i915_vma_revoke_fence(). Fixes: 0d86ee35097a ("drm/i915/gt: Make fence revocation unequivocal") Closes: https://gitlab.freedesktop.org/drm/intel/issues/10021 Signed-off-by: Janusz Krzysztofik Cc: stable@vger.kernel.org # v5.8+ Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20240603195446.297690-2-janusz.krzysztofik@linux.intel.com (cherry picked from commit 24bb052d3dd499c5956abad5f7d8e4fd07da7fb1) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c index 40371b8a9bbb..93bc1cc1ee7e 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c @@ -298,6 +298,7 @@ void i915_vma_revoke_fence(struct i915_vma *vma) return; GEM_BUG_ON(fence->vma != vma); + i915_active_wait(&fence->active); GEM_BUG_ON(!i915_active_is_idle(&fence->active)); GEM_BUG_ON(atomic_read(&fence->pin_count)); From d56fbfbaf592a115b2e11c1044829afba34069d2 Mon Sep 17 00:00:00 2001 From: Chen Ni Date: Wed, 5 Jun 2024 11:27:45 +0800 Subject: [PATCH 621/778] platform/mellanox: nvsw-sn2201: Add check for platform_device_add_resources Add check for the return value of platform_device_add_resources() and return the error if it fails in order to catch the error. Signed-off-by: Chen Ni Link: https://lore.kernel.org/r/20240605032745.2916183-1-nichen@iscas.ac.cn Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/mellanox/nvsw-sn2201.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/platform/mellanox/nvsw-sn2201.c b/drivers/platform/mellanox/nvsw-sn2201.c index 3ef655591424..abe7be602f84 100644 --- a/drivers/platform/mellanox/nvsw-sn2201.c +++ b/drivers/platform/mellanox/nvsw-sn2201.c @@ -1198,6 +1198,7 @@ static int nvsw_sn2201_config_pre_init(struct nvsw_sn2201 *nvsw_sn2201) static int nvsw_sn2201_probe(struct platform_device *pdev) { struct nvsw_sn2201 *nvsw_sn2201; + int ret; nvsw_sn2201 = devm_kzalloc(&pdev->dev, sizeof(*nvsw_sn2201), GFP_KERNEL); if (!nvsw_sn2201) @@ -1205,8 +1206,10 @@ static int nvsw_sn2201_probe(struct platform_device *pdev) nvsw_sn2201->dev = &pdev->dev; platform_set_drvdata(pdev, nvsw_sn2201); - platform_device_add_resources(pdev, nvsw_sn2201_lpc_io_resources, + ret = platform_device_add_resources(pdev, nvsw_sn2201_lpc_io_resources, ARRAY_SIZE(nvsw_sn2201_lpc_io_resources)); + if (ret) + return ret; nvsw_sn2201->main_mux_deferred_nr = NVSW_SN2201_MAIN_MUX_DEFER_NR; nvsw_sn2201->main_mux_devs = nvsw_sn2201_main_mux_brdinfo; From 7aa9b96e9a73e4ec1771492d0527bd5fc5ef9164 Mon Sep 17 00:00:00 2001 From: Aleksandr Mishin Date: Tue, 18 Jun 2024 17:43:44 +0300 Subject: [PATCH 622/778] gpio: davinci: Validate the obtained number of IRQs Value of pdata->gpio_unbanked is taken from Device Tree. In case of broken DT due to any error this value can be any. Without this value validation there can be out of chips->irqs array boundaries access in davinci_gpio_probe(). Validate the obtained nirq value so that it won't exceed the maximum number of IRQs per bank. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: eb3744a2dd01 ("gpio: davinci: Do not assume continuous IRQ numbering") Signed-off-by: Aleksandr Mishin Link: https://lore.kernel.org/r/20240618144344.16943-1-amishin@t-argos.ru Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-davinci.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpio/gpio-davinci.c b/drivers/gpio/gpio-davinci.c index bb499e362912..1d0175d6350b 100644 --- a/drivers/gpio/gpio-davinci.c +++ b/drivers/gpio/gpio-davinci.c @@ -225,6 +225,11 @@ static int davinci_gpio_probe(struct platform_device *pdev) else nirq = DIV_ROUND_UP(ngpio, 16); + if (nirq > MAX_INT_PER_BANK) { + dev_err(dev, "Too many IRQs!\n"); + return -EINVAL; + } + chips = devm_kzalloc(dev, sizeof(*chips), GFP_KERNEL); if (!chips) return -ENOMEM; From 151e78a0b89ee6dec93382dbdf5b1ef83f9c4716 Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Fri, 7 Jun 2024 01:35:37 +0200 Subject: [PATCH 623/778] platform/x86: wireless-hotkey: Add support for LG Airplane Button MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The LGEX0815 ACPI device is used by the "LG Airplane Mode Button" Windows driver for handling rfkill requests. When the ACPI device receives an 0x80 ACPI notification, an rfkill event is to be send to userspace. Add support for the LGEX0815 ACPI device to the driver. Tested-by: Agathe Boutmy Signed-off-by: Armin Wolf Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20240606233540.9774-2-W_Armin@gmx.de Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/wireless-hotkey.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/platform/x86/wireless-hotkey.c b/drivers/platform/x86/wireless-hotkey.c index e95cdbbfb708..ab46164cbe13 100644 --- a/drivers/platform/x86/wireless-hotkey.c +++ b/drivers/platform/x86/wireless-hotkey.c @@ -19,6 +19,7 @@ MODULE_AUTHOR("Alex Hung"); MODULE_ALIAS("acpi*:HPQ6001:*"); MODULE_ALIAS("acpi*:WSTADEF:*"); MODULE_ALIAS("acpi*:AMDI0051:*"); +MODULE_ALIAS("acpi*:LGEX0815:*"); struct wl_button { struct input_dev *input_dev; @@ -29,6 +30,7 @@ static const struct acpi_device_id wl_ids[] = { {"HPQ6001", 0}, {"WSTADEF", 0}, {"AMDI0051", 0}, + {"LGEX0815", 0}, {"", 0}, }; From 413c204595ca98a4f33414a948c18d7314087342 Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Fri, 7 Jun 2024 01:35:38 +0200 Subject: [PATCH 624/778] platform/x86: lg-laptop: Remove LGEX0815 hotkey handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The rfkill hotkey handling is already provided by the wireless-hotkey driver. Remove the now unnecessary rfkill hotkey handling to avoid duplicating functionality. The ACPI notify handler still prints debugging information when receiving ACPI notifications to aid in reverse-engineering. Tested-by: Agathe Boutmy Signed-off-by: Armin Wolf Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20240606233540.9774-3-W_Armin@gmx.de Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/lg-laptop.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/platform/x86/lg-laptop.c b/drivers/platform/x86/lg-laptop.c index d0fee5d375d7..ea83630106e8 100644 --- a/drivers/platform/x86/lg-laptop.c +++ b/drivers/platform/x86/lg-laptop.c @@ -84,7 +84,6 @@ static const struct key_entry wmi_keymap[] = { * this key both sends an event and * changes backlight level. */ - {KE_KEY, 0x80, {KEY_RFKILL} }, {KE_END, 0} }; @@ -272,14 +271,7 @@ static void wmi_input_setup(void) static void acpi_notify(struct acpi_device *device, u32 event) { - struct key_entry *key; - acpi_handle_debug(device->handle, "notify: %d\n", event); - if (inited & INIT_SPARSE_KEYMAP) { - key = sparse_keymap_entry_from_scancode(wmi_input_dev, 0x80); - if (key && key->type == KE_KEY) - sparse_keymap_report_entry(wmi_input_dev, key, 1, true); - } } static ssize_t fan_mode_store(struct device *dev, From 58a54f27a0dac81f7fd3514be01012635219a53c Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Fri, 7 Jun 2024 01:35:39 +0200 Subject: [PATCH 625/778] platform/x86: lg-laptop: Change ACPI device id MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The LGEX0815 ACPI device id is used for handling hotkey events, but this functionality is already handled by the wireless-hotkey driver. The LGEX0820 ACPI device id however is used to manage various platform features using the WMAB/WMBB ACPI methods. Use this ACPI device id to avoid blocking the wireless-hotkey driver from probing. Tested-by: Agathe Boutmy Signed-off-by: Armin Wolf Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20240606233540.9774-4-W_Armin@gmx.de Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/lg-laptop.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/lg-laptop.c b/drivers/platform/x86/lg-laptop.c index ea83630106e8..db8a2f79bf0a 100644 --- a/drivers/platform/x86/lg-laptop.c +++ b/drivers/platform/x86/lg-laptop.c @@ -768,7 +768,7 @@ static void acpi_remove(struct acpi_device *device) } static const struct acpi_device_id device_ids[] = { - {"LGEX0815", 0}, + {"LGEX0820", 0}, {"", 0} }; MODULE_DEVICE_TABLE(acpi, device_ids); From b27ea279556121b54d3f45d0529706cf100cdb3a Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Fri, 7 Jun 2024 01:35:40 +0200 Subject: [PATCH 626/778] platform/x86: lg-laptop: Use ACPI device handle when evaluating WMAB/WMBB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On the LG Gram 16Z90S, the WMAB and WMBB ACPI methods are not mapped under \XINI, but instead are mapped under \_SB.XINI. The reason for this is that the LGEX0820 ACPI device used by this driver is mapped at \_SB.XINI, so the ACPI methods where moved as well to appear below the LGEX0820 ACPI device. Fix this by using the ACPI handle from the ACPI device when evaluating both methods. Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218901 Tested-by: Agathe Boutmy Signed-off-by: Armin Wolf Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20240606233540.9774-5-W_Armin@gmx.de Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/lg-laptop.c | 79 +++++++++++++------------------- 1 file changed, 33 insertions(+), 46 deletions(-) diff --git a/drivers/platform/x86/lg-laptop.c b/drivers/platform/x86/lg-laptop.c index db8a2f79bf0a..9c7857842caf 100644 --- a/drivers/platform/x86/lg-laptop.c +++ b/drivers/platform/x86/lg-laptop.c @@ -39,8 +39,6 @@ MODULE_LICENSE("GPL"); #define WMI_METHOD_WMBB "2B4F501A-BD3C-4394-8DCF-00A7D2BC8210" #define WMI_EVENT_GUID WMI_EVENT_GUID0 -#define WMAB_METHOD "\\XINI.WMAB" -#define WMBB_METHOD "\\XINI.WMBB" #define SB_GGOV_METHOD "\\_SB.GGOV" #define GOV_TLED 0x2020008 #define WM_GET 1 @@ -74,7 +72,7 @@ static u32 inited; static int battery_limit_use_wmbb; static struct led_classdev kbd_backlight; -static enum led_brightness get_kbd_backlight_level(void); +static enum led_brightness get_kbd_backlight_level(struct device *dev); static const struct key_entry wmi_keymap[] = { {KE_KEY, 0x70, {KEY_F15} }, /* LG control panel (F1) */ @@ -127,11 +125,10 @@ static int ggov(u32 arg0) return res; } -static union acpi_object *lg_wmab(u32 method, u32 arg1, u32 arg2) +static union acpi_object *lg_wmab(struct device *dev, u32 method, u32 arg1, u32 arg2) { union acpi_object args[3]; acpi_status status; - acpi_handle handle; struct acpi_object_list arg; struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; @@ -142,29 +139,22 @@ static union acpi_object *lg_wmab(u32 method, u32 arg1, u32 arg2) args[2].type = ACPI_TYPE_INTEGER; args[2].integer.value = arg2; - status = acpi_get_handle(NULL, (acpi_string) WMAB_METHOD, &handle); - if (ACPI_FAILURE(status)) { - pr_err("Cannot get handle"); - return NULL; - } - arg.count = 3; arg.pointer = args; - status = acpi_evaluate_object(handle, NULL, &arg, &buffer); + status = acpi_evaluate_object(ACPI_HANDLE(dev), "WMAB", &arg, &buffer); if (ACPI_FAILURE(status)) { - acpi_handle_err(handle, "WMAB: call failed.\n"); + dev_err(dev, "WMAB: call failed.\n"); return NULL; } return buffer.pointer; } -static union acpi_object *lg_wmbb(u32 method_id, u32 arg1, u32 arg2) +static union acpi_object *lg_wmbb(struct device *dev, u32 method_id, u32 arg1, u32 arg2) { union acpi_object args[3]; acpi_status status; - acpi_handle handle; struct acpi_object_list arg; struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; u8 buf[32]; @@ -180,18 +170,12 @@ static union acpi_object *lg_wmbb(u32 method_id, u32 arg1, u32 arg2) args[2].buffer.length = 32; args[2].buffer.pointer = buf; - status = acpi_get_handle(NULL, (acpi_string)WMBB_METHOD, &handle); - if (ACPI_FAILURE(status)) { - pr_err("Cannot get handle"); - return NULL; - } - arg.count = 3; arg.pointer = args; - status = acpi_evaluate_object(handle, NULL, &arg, &buffer); + status = acpi_evaluate_object(ACPI_HANDLE(dev), "WMBB", &arg, &buffer); if (ACPI_FAILURE(status)) { - acpi_handle_err(handle, "WMAB: call failed.\n"); + dev_err(dev, "WMBB: call failed.\n"); return NULL; } @@ -222,7 +206,7 @@ static void wmi_notify(u32 value, void *context) if (eventcode == 0x10000000) { led_classdev_notify_brightness_hw_changed( - &kbd_backlight, get_kbd_backlight_level()); + &kbd_backlight, get_kbd_backlight_level(kbd_backlight.dev->parent)); } else { key = sparse_keymap_entry_from_scancode( wmi_input_dev, eventcode); @@ -287,7 +271,7 @@ static ssize_t fan_mode_store(struct device *dev, if (ret) return ret; - r = lg_wmab(WM_FAN_MODE, WM_GET, 0); + r = lg_wmab(dev, WM_FAN_MODE, WM_GET, 0); if (!r) return -EIO; @@ -298,9 +282,9 @@ static ssize_t fan_mode_store(struct device *dev, m = r->integer.value; kfree(r); - r = lg_wmab(WM_FAN_MODE, WM_SET, (m & 0xffffff0f) | (value << 4)); + r = lg_wmab(dev, WM_FAN_MODE, WM_SET, (m & 0xffffff0f) | (value << 4)); kfree(r); - r = lg_wmab(WM_FAN_MODE, WM_SET, (m & 0xfffffff0) | value); + r = lg_wmab(dev, WM_FAN_MODE, WM_SET, (m & 0xfffffff0) | value); kfree(r); return count; @@ -312,7 +296,7 @@ static ssize_t fan_mode_show(struct device *dev, unsigned int status; union acpi_object *r; - r = lg_wmab(WM_FAN_MODE, WM_GET, 0); + r = lg_wmab(dev, WM_FAN_MODE, WM_GET, 0); if (!r) return -EIO; @@ -339,7 +323,7 @@ static ssize_t usb_charge_store(struct device *dev, if (ret) return ret; - r = lg_wmbb(WMBB_USB_CHARGE, WM_SET, value); + r = lg_wmbb(dev, WMBB_USB_CHARGE, WM_SET, value); if (!r) return -EIO; @@ -353,7 +337,7 @@ static ssize_t usb_charge_show(struct device *dev, unsigned int status; union acpi_object *r; - r = lg_wmbb(WMBB_USB_CHARGE, WM_GET, 0); + r = lg_wmbb(dev, WMBB_USB_CHARGE, WM_GET, 0); if (!r) return -EIO; @@ -381,7 +365,7 @@ static ssize_t reader_mode_store(struct device *dev, if (ret) return ret; - r = lg_wmab(WM_READER_MODE, WM_SET, value); + r = lg_wmab(dev, WM_READER_MODE, WM_SET, value); if (!r) return -EIO; @@ -395,7 +379,7 @@ static ssize_t reader_mode_show(struct device *dev, unsigned int status; union acpi_object *r; - r = lg_wmab(WM_READER_MODE, WM_GET, 0); + r = lg_wmab(dev, WM_READER_MODE, WM_GET, 0); if (!r) return -EIO; @@ -423,7 +407,7 @@ static ssize_t fn_lock_store(struct device *dev, if (ret) return ret; - r = lg_wmab(WM_FN_LOCK, WM_SET, value); + r = lg_wmab(dev, WM_FN_LOCK, WM_SET, value); if (!r) return -EIO; @@ -437,7 +421,7 @@ static ssize_t fn_lock_show(struct device *dev, unsigned int status; union acpi_object *r; - r = lg_wmab(WM_FN_LOCK, WM_GET, 0); + r = lg_wmab(dev, WM_FN_LOCK, WM_GET, 0); if (!r) return -EIO; @@ -467,9 +451,9 @@ static ssize_t charge_control_end_threshold_store(struct device *dev, union acpi_object *r; if (battery_limit_use_wmbb) - r = lg_wmbb(WMBB_BATT_LIMIT, WM_SET, value); + r = lg_wmbb(&pf_device->dev, WMBB_BATT_LIMIT, WM_SET, value); else - r = lg_wmab(WM_BATT_LIMIT, WM_SET, value); + r = lg_wmab(&pf_device->dev, WM_BATT_LIMIT, WM_SET, value); if (!r) return -EIO; @@ -488,7 +472,7 @@ static ssize_t charge_control_end_threshold_show(struct device *device, union acpi_object *r; if (battery_limit_use_wmbb) { - r = lg_wmbb(WMBB_BATT_LIMIT, WM_GET, 0); + r = lg_wmbb(&pf_device->dev, WMBB_BATT_LIMIT, WM_GET, 0); if (!r) return -EIO; @@ -499,7 +483,7 @@ static ssize_t charge_control_end_threshold_show(struct device *device, status = r->buffer.pointer[0x10]; } else { - r = lg_wmab(WM_BATT_LIMIT, WM_GET, 0); + r = lg_wmab(&pf_device->dev, WM_BATT_LIMIT, WM_GET, 0); if (!r) return -EIO; @@ -578,7 +562,7 @@ static void tpad_led_set(struct led_classdev *cdev, { union acpi_object *r; - r = lg_wmab(WM_TLED, WM_SET, brightness > LED_OFF); + r = lg_wmab(cdev->dev->parent, WM_TLED, WM_SET, brightness > LED_OFF); kfree(r); } @@ -600,16 +584,16 @@ static void kbd_backlight_set(struct led_classdev *cdev, val = 0; if (brightness >= LED_FULL) val = 0x24; - r = lg_wmab(WM_KEY_LIGHT, WM_SET, val); + r = lg_wmab(cdev->dev->parent, WM_KEY_LIGHT, WM_SET, val); kfree(r); } -static enum led_brightness get_kbd_backlight_level(void) +static enum led_brightness get_kbd_backlight_level(struct device *dev) { union acpi_object *r; int val; - r = lg_wmab(WM_KEY_LIGHT, WM_GET, 0); + r = lg_wmab(dev, WM_KEY_LIGHT, WM_GET, 0); if (!r) return LED_OFF; @@ -637,7 +621,7 @@ static enum led_brightness get_kbd_backlight_level(void) static enum led_brightness kbd_backlight_get(struct led_classdev *cdev) { - return get_kbd_backlight_level(); + return get_kbd_backlight_level(cdev->dev->parent); } static LED_DEVICE(kbd_backlight, 255, LED_BRIGHT_HW_CHANGED); @@ -664,6 +648,11 @@ static struct platform_driver pf_driver = { static int acpi_add(struct acpi_device *device) { + struct platform_device_info pdev_info = { + .fwnode = acpi_fwnode_handle(device), + .name = PLATFORM_NAME, + .id = PLATFORM_DEVID_NONE, + }; int ret; const char *product; int year = 2017; @@ -675,9 +664,7 @@ static int acpi_add(struct acpi_device *device) if (ret) return ret; - pf_device = platform_device_register_simple(PLATFORM_NAME, - PLATFORM_DEVID_NONE, - NULL, 0); + pf_device = platform_device_register_full(&pdev_info); if (IS_ERR(pf_device)) { ret = PTR_ERR(pf_device); pf_device = NULL; From 3fd8ca27073e963aba7e64cd087968d87a953ac1 Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Tue, 11 Jun 2024 21:25:56 -0700 Subject: [PATCH 627/778] platform/x86/siemens: add missing MODULE_DESCRIPTION() macros With ARCH=x86, make allmodconfig && make W=1 C=1 reports: WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/platform/x86/siemens/simatic-ipc.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/platform/x86/siemens/simatic-ipc-batt.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/platform/x86/siemens/simatic-ipc-batt-apollolake.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/platform/x86/siemens/simatic-ipc-batt-elkhartlake.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/platform/x86/siemens/simatic-ipc-batt-f7188x.o Add the missing invocations of the MODULE_DESCRIPTION() macro. Signed-off-by: Jeff Johnson Link: https://lore.kernel.org/r/20240611-md-drivers-platform-x86-siemens-v1-1-b399d7d6ae64@quicinc.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/siemens/simatic-ipc-batt-apollolake.c | 1 + drivers/platform/x86/siemens/simatic-ipc-batt-elkhartlake.c | 1 + drivers/platform/x86/siemens/simatic-ipc-batt-f7188x.c | 1 + drivers/platform/x86/siemens/simatic-ipc-batt.c | 1 + drivers/platform/x86/siemens/simatic-ipc.c | 1 + 5 files changed, 5 insertions(+) diff --git a/drivers/platform/x86/siemens/simatic-ipc-batt-apollolake.c b/drivers/platform/x86/siemens/simatic-ipc-batt-apollolake.c index 31a139d87d9a..5edc294de6e4 100644 --- a/drivers/platform/x86/siemens/simatic-ipc-batt-apollolake.c +++ b/drivers/platform/x86/siemens/simatic-ipc-batt-apollolake.c @@ -45,6 +45,7 @@ static struct platform_driver simatic_ipc_batt_driver = { module_platform_driver(simatic_ipc_batt_driver); +MODULE_DESCRIPTION("CMOS Battery monitoring for Simatic IPCs based on Apollo Lake GPIO"); MODULE_LICENSE("GPL"); MODULE_ALIAS("platform:" KBUILD_MODNAME); MODULE_SOFTDEP("pre: simatic-ipc-batt platform:apollolake-pinctrl"); diff --git a/drivers/platform/x86/siemens/simatic-ipc-batt-elkhartlake.c b/drivers/platform/x86/siemens/simatic-ipc-batt-elkhartlake.c index a7676f224075..e6a56d14b505 100644 --- a/drivers/platform/x86/siemens/simatic-ipc-batt-elkhartlake.c +++ b/drivers/platform/x86/siemens/simatic-ipc-batt-elkhartlake.c @@ -45,6 +45,7 @@ static struct platform_driver simatic_ipc_batt_driver = { module_platform_driver(simatic_ipc_batt_driver); +MODULE_DESCRIPTION("CMOS Battery monitoring for Simatic IPCs based on Elkhart Lake GPIO"); MODULE_LICENSE("GPL"); MODULE_ALIAS("platform:" KBUILD_MODNAME); MODULE_SOFTDEP("pre: simatic-ipc-batt platform:elkhartlake-pinctrl"); diff --git a/drivers/platform/x86/siemens/simatic-ipc-batt-f7188x.c b/drivers/platform/x86/siemens/simatic-ipc-batt-f7188x.c index 5e77e05fdb5d..f8849d0e48a8 100644 --- a/drivers/platform/x86/siemens/simatic-ipc-batt-f7188x.c +++ b/drivers/platform/x86/siemens/simatic-ipc-batt-f7188x.c @@ -81,6 +81,7 @@ static struct platform_driver simatic_ipc_batt_driver = { module_platform_driver(simatic_ipc_batt_driver); +MODULE_DESCRIPTION("CMOS Battery monitoring for Simatic IPCs based on Nuvoton GPIO"); MODULE_LICENSE("GPL"); MODULE_ALIAS("platform:" KBUILD_MODNAME); MODULE_SOFTDEP("pre: simatic-ipc-batt gpio_f7188x platform:elkhartlake-pinctrl platform:alderlake-pinctrl"); diff --git a/drivers/platform/x86/siemens/simatic-ipc-batt.c b/drivers/platform/x86/siemens/simatic-ipc-batt.c index c6dd263b4ee3..d9aff10608cf 100644 --- a/drivers/platform/x86/siemens/simatic-ipc-batt.c +++ b/drivers/platform/x86/siemens/simatic-ipc-batt.c @@ -247,6 +247,7 @@ static struct platform_driver simatic_ipc_batt_driver = { module_platform_driver(simatic_ipc_batt_driver); +MODULE_DESCRIPTION("CMOS core battery driver for Siemens Simatic IPCs"); MODULE_LICENSE("GPL"); MODULE_ALIAS("platform:" KBUILD_MODNAME); MODULE_AUTHOR("Henning Schild "); diff --git a/drivers/platform/x86/siemens/simatic-ipc.c b/drivers/platform/x86/siemens/simatic-ipc.c index 8ca6e277fa03..7039874d8f11 100644 --- a/drivers/platform/x86/siemens/simatic-ipc.c +++ b/drivers/platform/x86/siemens/simatic-ipc.c @@ -231,6 +231,7 @@ static void __exit simatic_ipc_exit_module(void) module_init(simatic_ipc_init_module); module_exit(simatic_ipc_exit_module); +MODULE_DESCRIPTION("Siemens SIMATIC IPC platform driver"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Gerd Haeussler "); MODULE_ALIAS("dmi:*:svnSIEMENSAG:*"); From 41ab81ce8490b9cad88e87c49315fb8c46208be7 Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Tue, 11 Jun 2024 21:56:26 -0700 Subject: [PATCH 628/778] platform/x86/intel: add missing MODULE_DESCRIPTION() macros With ARCH=x86, make allmodconfig && make W=1 C=1 reports: WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/platform/x86/intel/pmc/intel_pmc_core_pltdrv.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/platform/x86/intel/intel-hid.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/platform/x86/intel/intel-vbtn.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/platform/x86/intel/intel-rst.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/platform/x86/intel/intel-smartconnect.o Add the missing invocations of the MODULE_DESCRIPTION() macro. Signed-off-by: Jeff Johnson Link: https://lore.kernel.org/r/20240611-md-drivers-platform-x86-intel-v1-1-5ed967425b04@quicinc.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/intel/hid.c | 1 + drivers/platform/x86/intel/pmc/pltdrv.c | 1 + drivers/platform/x86/intel/rst.c | 1 + drivers/platform/x86/intel/smartconnect.c | 1 + drivers/platform/x86/intel/vbtn.c | 1 + 5 files changed, 5 insertions(+) diff --git a/drivers/platform/x86/intel/hid.c b/drivers/platform/x86/intel/hid.c index c7a827645864..10cd65497cc1 100644 --- a/drivers/platform/x86/intel/hid.c +++ b/drivers/platform/x86/intel/hid.c @@ -38,6 +38,7 @@ MODULE_PARM_DESC(enable_sw_tablet_mode, /* When NOT in tablet mode, VGBS returns with the flag 0x40 */ #define TABLET_MODE_FLAG BIT(6) +MODULE_DESCRIPTION("Intel HID Event hotkey driver"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Alex Hung"); diff --git a/drivers/platform/x86/intel/pmc/pltdrv.c b/drivers/platform/x86/intel/pmc/pltdrv.c index ddfba38c2104..f2cb87dc2d37 100644 --- a/drivers/platform/x86/intel/pmc/pltdrv.c +++ b/drivers/platform/x86/intel/pmc/pltdrv.c @@ -86,4 +86,5 @@ static void __exit pmc_core_platform_exit(void) module_init(pmc_core_platform_init); module_exit(pmc_core_platform_exit); +MODULE_DESCRIPTION("Intel PMC Core platform driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/platform/x86/intel/rst.c b/drivers/platform/x86/intel/rst.c index 6bc9c4a603e0..f3a60e14d4c1 100644 --- a/drivers/platform/x86/intel/rst.c +++ b/drivers/platform/x86/intel/rst.c @@ -7,6 +7,7 @@ #include #include +MODULE_DESCRIPTION("Intel Rapid Start Technology Driver"); MODULE_LICENSE("GPL"); static ssize_t irst_show_wakeup_events(struct device *dev, diff --git a/drivers/platform/x86/intel/smartconnect.c b/drivers/platform/x86/intel/smartconnect.c index cd25d0585324..31019a1a6d5e 100644 --- a/drivers/platform/x86/intel/smartconnect.c +++ b/drivers/platform/x86/intel/smartconnect.c @@ -6,6 +6,7 @@ #include #include +MODULE_DESCRIPTION("Intel Smart Connect disabling driver"); MODULE_LICENSE("GPL"); static int smartconnect_acpi_init(struct acpi_device *acpi) diff --git a/drivers/platform/x86/intel/vbtn.c b/drivers/platform/x86/intel/vbtn.c index 84c1353eb12b..9b7ce03ba085 100644 --- a/drivers/platform/x86/intel/vbtn.c +++ b/drivers/platform/x86/intel/vbtn.c @@ -24,6 +24,7 @@ #define VGBS_TABLET_MODE_FLAGS (VGBS_TABLET_MODE_FLAG | VGBS_TABLET_MODE_FLAG_ALT) +MODULE_DESCRIPTION("Intel Virtual Button driver"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("AceLan Kao"); From 7add1ee34692aabd146b86a8e88abad843ed6659 Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Tue, 11 Jun 2024 22:20:59 -0700 Subject: [PATCH 629/778] platform/x86: add missing MODULE_DESCRIPTION() macros With ARCH=x86, make allmodconfig && make W=1 C=1 reports: WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/platform/x86/amilo-rfkill.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/platform/x86/uv_sysfs.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/platform/x86/ibm_rtl.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/platform/x86/xo1-rfkill.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/platform/x86/firmware_attributes_class.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/platform/x86/wireless-hotkey.o Add the missing invocations of the MODULE_DESCRIPTION() macro. Signed-off-by: Jeff Johnson Link: https://lore.kernel.org/r/20240611-md-drivers-platform-x86-v1-1-d850e53619ee@quicinc.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/amilo-rfkill.c | 1 + drivers/platform/x86/firmware_attributes_class.c | 1 + drivers/platform/x86/ibm_rtl.c | 1 + drivers/platform/x86/uv_sysfs.c | 1 + drivers/platform/x86/wireless-hotkey.c | 1 + drivers/platform/x86/xo1-rfkill.c | 1 + 6 files changed, 6 insertions(+) diff --git a/drivers/platform/x86/amilo-rfkill.c b/drivers/platform/x86/amilo-rfkill.c index efcf909786a5..2423dc91debb 100644 --- a/drivers/platform/x86/amilo-rfkill.c +++ b/drivers/platform/x86/amilo-rfkill.c @@ -171,6 +171,7 @@ static void __exit amilo_rfkill_exit(void) } MODULE_AUTHOR("Ben Hutchings "); +MODULE_DESCRIPTION("Fujitsu-Siemens Amilo rfkill support"); MODULE_LICENSE("GPL"); MODULE_DEVICE_TABLE(dmi, amilo_rfkill_id_table); diff --git a/drivers/platform/x86/firmware_attributes_class.c b/drivers/platform/x86/firmware_attributes_class.c index dd8240009565..182a07d8ae3d 100644 --- a/drivers/platform/x86/firmware_attributes_class.c +++ b/drivers/platform/x86/firmware_attributes_class.c @@ -49,4 +49,5 @@ int fw_attributes_class_put(void) EXPORT_SYMBOL_GPL(fw_attributes_class_put); MODULE_AUTHOR("Mark Pearson "); +MODULE_DESCRIPTION("Firmware attributes class helper module"); MODULE_LICENSE("GPL"); diff --git a/drivers/platform/x86/ibm_rtl.c b/drivers/platform/x86/ibm_rtl.c index 1d4bbae115f1..231b37909801 100644 --- a/drivers/platform/x86/ibm_rtl.c +++ b/drivers/platform/x86/ibm_rtl.c @@ -29,6 +29,7 @@ static bool debug; module_param(debug, bool, 0644); MODULE_PARM_DESC(debug, "Show debug output"); +MODULE_DESCRIPTION("IBM Premium Real Time Mode (PRTM) driver"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Keith Mannthey "); MODULE_AUTHOR("Vernon Mauery "); diff --git a/drivers/platform/x86/uv_sysfs.c b/drivers/platform/x86/uv_sysfs.c index 37372d7cc54a..f6a0627f36db 100644 --- a/drivers/platform/x86/uv_sysfs.c +++ b/drivers/platform/x86/uv_sysfs.c @@ -929,4 +929,5 @@ module_init(uv_sysfs_init); module_exit(uv_sysfs_exit); MODULE_AUTHOR("Hewlett Packard Enterprise"); +MODULE_DESCRIPTION("Sysfs structure for HPE UV systems"); MODULE_LICENSE("GPL"); diff --git a/drivers/platform/x86/wireless-hotkey.c b/drivers/platform/x86/wireless-hotkey.c index ab46164cbe13..a220fe4f9ef8 100644 --- a/drivers/platform/x86/wireless-hotkey.c +++ b/drivers/platform/x86/wireless-hotkey.c @@ -14,6 +14,7 @@ #include #include +MODULE_DESCRIPTION("Airplane mode button for AMD, HP & Xiaomi laptops"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Alex Hung"); MODULE_ALIAS("acpi*:HPQ6001:*"); diff --git a/drivers/platform/x86/xo1-rfkill.c b/drivers/platform/x86/xo1-rfkill.c index e64d5646b4c7..5fe68296501c 100644 --- a/drivers/platform/x86/xo1-rfkill.c +++ b/drivers/platform/x86/xo1-rfkill.c @@ -74,5 +74,6 @@ static struct platform_driver xo1_rfkill_driver = { module_platform_driver(xo1_rfkill_driver); MODULE_AUTHOR("Daniel Drake "); +MODULE_DESCRIPTION("OLPC XO-1 software RF kill switch"); MODULE_LICENSE("GPL"); MODULE_ALIAS("platform:xo1-rfkill"); From ecc54006f158ae0245a13e59026da2f0239c1b86 Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Fri, 21 Jun 2024 17:28:09 +0800 Subject: [PATCH 630/778] arm64: Clear the initial ID map correctly before remapping In the attempt to clear and recreate the initial ID map for LPA2, we wrongly use 'start - end' as the map size and make the memset() almost a nop. Fix it by passing the correct map size. Fixes: 9684ec186f8f ("arm64: Enable LPA2 at boot if supported by the system") Signed-off-by: Zenghui Yu Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20240621092809.162-1-yuzenghui@huawei.com Signed-off-by: Will Deacon --- arch/arm64/kernel/pi/map_kernel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/pi/map_kernel.c b/arch/arm64/kernel/pi/map_kernel.c index 5fa08e13e17e..f374a3e5a5fe 100644 --- a/arch/arm64/kernel/pi/map_kernel.c +++ b/arch/arm64/kernel/pi/map_kernel.c @@ -173,7 +173,7 @@ static void __init remap_idmap_for_lpa2(void) * Don't bother with the FDT, we no longer need it after this. */ memset(init_idmap_pg_dir, 0, - (u64)init_idmap_pg_dir - (u64)init_idmap_pg_end); + (u64)init_idmap_pg_end - (u64)init_idmap_pg_dir); create_init_idmap(init_idmap_pg_dir, mask); dsb(ishst); From 316930d06b92a2419d8e767193266e678545b31d Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 21 Jun 2024 16:08:28 +0200 Subject: [PATCH 631/778] selftests/bpf: Add more ring buffer test coverage Add test coverage for reservations beyond the ring buffer size in order to validate that bpf_ringbuf_reserve() rejects the request with NULL, all other ring buffer tests keep passing as well: # ./vmtest.sh -- ./test_progs -t ringbuf [...] ./test_progs -t ringbuf [ 1.165434] bpf_testmod: loading out-of-tree module taints kernel. [ 1.165825] bpf_testmod: module verification failed: signature and/or required key missing - tainting kernel [ 1.284001] tsc: Refined TSC clocksource calibration: 3407.982 MHz [ 1.286871] clocksource: tsc: mask: 0xffffffffffffffff max_cycles: 0x311fc34e357, max_idle_ns: 440795379773 ns [ 1.289555] clocksource: Switched to clocksource tsc #274/1 ringbuf/ringbuf:OK #274/2 ringbuf/ringbuf_n:OK #274/3 ringbuf/ringbuf_map_key:OK #274/4 ringbuf/ringbuf_write:OK #274 ringbuf:OK #275 ringbuf_multi:OK [...] Signed-off-by: Daniel Borkmann Signed-off-by: Andrii Nakryiko [ Test fixups for getting BPF CI back to work ] Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20240621140828.18238-2-daniel@iogearbox.net --- tools/testing/selftests/bpf/Makefile | 2 +- .../selftests/bpf/prog_tests/ringbuf.c | 56 +++++++++++++++++++ .../selftests/bpf/progs/test_ringbuf_write.c | 46 +++++++++++++++ 3 files changed, 103 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/bpf/progs/test_ringbuf_write.c diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index e0b3887b3d2d..dd49c1d23a60 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -457,7 +457,7 @@ LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h \ LSKELS := fentry_test.c fexit_test.c fexit_sleep.c atomics.c \ trace_printk.c trace_vprintk.c map_ptr_kern.c \ core_kern.c core_kern_overflow.c test_ringbuf.c \ - test_ringbuf_n.c test_ringbuf_map_key.c + test_ringbuf_n.c test_ringbuf_map_key.c test_ringbuf_write.c # Generate both light skeleton and libbpf skeleton for these LSKELS_EXTRA := test_ksyms_module.c test_ksyms_weak.c kfunc_call_test.c \ diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c index 4c6f42dae409..da430df45aa4 100644 --- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c +++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c @@ -12,9 +12,11 @@ #include #include #include + #include "test_ringbuf.lskel.h" #include "test_ringbuf_n.lskel.h" #include "test_ringbuf_map_key.lskel.h" +#include "test_ringbuf_write.lskel.h" #define EDONE 7777 @@ -84,6 +86,58 @@ static void *poll_thread(void *input) return (void *)(long)ring_buffer__poll(ringbuf, timeout); } +static void ringbuf_write_subtest(void) +{ + struct test_ringbuf_write_lskel *skel; + int page_size = getpagesize(); + size_t *mmap_ptr; + int err, rb_fd; + + skel = test_ringbuf_write_lskel__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + skel->maps.ringbuf.max_entries = 0x4000; + + err = test_ringbuf_write_lskel__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + rb_fd = skel->maps.ringbuf.map_fd; + + mmap_ptr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, rb_fd, 0); + if (!ASSERT_OK_PTR(mmap_ptr, "rw_cons_pos")) + goto cleanup; + *mmap_ptr = 0x3000; + ASSERT_OK(munmap(mmap_ptr, page_size), "unmap_rw"); + + skel->bss->pid = getpid(); + + ringbuf = ring_buffer__new(rb_fd, process_sample, NULL, NULL); + if (!ASSERT_OK_PTR(ringbuf, "ringbuf_new")) + goto cleanup; + + err = test_ringbuf_write_lskel__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto cleanup_ringbuf; + + skel->bss->discarded = 0; + skel->bss->passed = 0; + + /* trigger exactly two samples */ + syscall(__NR_getpgid); + syscall(__NR_getpgid); + + ASSERT_EQ(skel->bss->discarded, 2, "discarded"); + ASSERT_EQ(skel->bss->passed, 0, "passed"); + + test_ringbuf_write_lskel__detach(skel); +cleanup_ringbuf: + ring_buffer__free(ringbuf); +cleanup: + test_ringbuf_write_lskel__destroy(skel); +} + static void ringbuf_subtest(void) { const size_t rec_sz = BPF_RINGBUF_HDR_SZ + sizeof(struct sample); @@ -451,4 +505,6 @@ void test_ringbuf(void) ringbuf_n_subtest(); if (test__start_subtest("ringbuf_map_key")) ringbuf_map_key_subtest(); + if (test__start_subtest("ringbuf_write")) + ringbuf_write_subtest(); } diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf_write.c b/tools/testing/selftests/bpf/progs/test_ringbuf_write.c new file mode 100644 index 000000000000..350513c0e4c9 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_ringbuf_write.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); +} ringbuf SEC(".maps"); + +/* inputs */ +int pid = 0; + +/* outputs */ +long passed = 0; +long discarded = 0; + +SEC("fentry/" SYS_PREFIX "sys_getpgid") +int test_ringbuf_write(void *ctx) +{ + int *foo, cur_pid = bpf_get_current_pid_tgid() >> 32; + void *sample1, *sample2; + + if (cur_pid != pid) + return 0; + + sample1 = bpf_ringbuf_reserve(&ringbuf, 0x3000, 0); + if (!sample1) + return 0; + /* first one can pass */ + sample2 = bpf_ringbuf_reserve(&ringbuf, 0x3000, 0); + if (!sample2) { + bpf_ringbuf_discard(sample1, 0); + __sync_fetch_and_add(&discarded, 1); + return 0; + } + /* second one must not */ + __sync_fetch_and_add(&passed, 1); + foo = sample2 + 4084; + *foo = 256; + bpf_ringbuf_discard(sample1, 0); + bpf_ringbuf_discard(sample2, 0); + return 0; +} From 2b2efe1937ca9f8815884bd4dcd5b32733025103 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 19 Jun 2024 16:53:54 -0700 Subject: [PATCH 632/778] bpf: Fix may_goto with negative offset. Zac's syzbot crafted a bpf prog that exposed two bugs in may_goto. The 1st bug is the way may_goto is patched. When offset is negative it should be patched differently. The 2nd bug is in the verifier: when current state may_goto_depth is equal to visited state may_goto_depth it means there is an actual infinite loop. It's not correct to prune exploration of the program at this point. Note, that this check doesn't limit the program to only one may_goto insn, since 2nd and any further may_goto will increment may_goto_depth only in the queued state pushed for future exploration. The current state will have may_goto_depth == 0 regardless of number of may_goto insns and the verifier has to explore the program until bpf_exit. Fixes: 011832b97b31 ("bpf: Introduce may_goto instruction") Reported-by: Zac Ecob Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Eduard Zingerman Closes: https://lore.kernel.org/bpf/CAADnVQL-15aNp04-cyHRn47Yv61NXfYyhopyZtUyxNojUZUXpA@mail.gmail.com/ Link: https://lore.kernel.org/bpf/20240619235355.85031-1-alexei.starovoitov@gmail.com --- kernel/bpf/verifier.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 5586a571bf55..214a9fa8c6fb 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -17460,11 +17460,11 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) goto skip_inf_loop_check; } if (is_may_goto_insn_at(env, insn_idx)) { - if (states_equal(env, &sl->state, cur, RANGE_WITHIN)) { + if (sl->state.may_goto_depth != cur->may_goto_depth && + states_equal(env, &sl->state, cur, RANGE_WITHIN)) { update_loop_entry(cur, &sl->state); goto hit; } - goto skip_inf_loop_check; } if (calls_callback(env, insn_idx)) { if (states_equal(env, &sl->state, cur, RANGE_WITHIN)) @@ -20049,7 +20049,10 @@ static int do_misc_fixups(struct bpf_verifier_env *env) stack_depth_extra = 8; insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_AX, BPF_REG_10, stack_off); - insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off + 2); + if (insn->off >= 0) + insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off + 2); + else + insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off - 1); insn_buf[2] = BPF_ALU64_IMM(BPF_SUB, BPF_REG_AX, 1); insn_buf[3] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_AX, stack_off); cnt = 4; From 280e4ebffd16ea1b55dc09761448545e216f60a9 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 19 Jun 2024 16:53:55 -0700 Subject: [PATCH 633/778] selftests/bpf: Add tests for may_goto with negative offset. Add few tests with may_goto and negative offset. Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20240619235355.85031-2-alexei.starovoitov@gmail.com --- .../bpf/progs/verifier_iterating_callbacks.c | 52 +++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c index 8885e5239d6b..80c737b6d340 100644 --- a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c +++ b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c @@ -274,6 +274,58 @@ static __naked void iter_limit_bug_cb(void) ); } +int tmp_var; +SEC("socket") +__failure __msg("infinite loop detected at insn 2") +__naked void jgt_imm64_and_may_goto(void) +{ + asm volatile (" \ + r0 = %[tmp_var] ll; \ +l0_%=: .byte 0xe5; /* may_goto */ \ + .byte 0; /* regs */ \ + .short -3; /* off -3 */ \ + .long 0; /* imm */ \ + if r0 > 10 goto l0_%=; \ + r0 = 0; \ + exit; \ +" :: __imm_addr(tmp_var) + : __clobber_all); +} + +SEC("socket") +__failure __msg("infinite loop detected at insn 1") +__naked void may_goto_self(void) +{ + asm volatile (" \ + r0 = *(u32 *)(r10 - 4); \ +l0_%=: .byte 0xe5; /* may_goto */ \ + .byte 0; /* regs */ \ + .short -1; /* off -1 */ \ + .long 0; /* imm */ \ + if r0 > 10 goto l0_%=; \ + r0 = 0; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__success __retval(0) +__naked void may_goto_neg_off(void) +{ + asm volatile (" \ + r0 = *(u32 *)(r10 - 4); \ + goto l0_%=; \ + goto l1_%=; \ +l0_%=: .byte 0xe5; /* may_goto */ \ + .byte 0; /* regs */ \ + .short -2; /* off -2 */ \ + .long 0; /* imm */ \ + if r0 > 10 goto l0_%=; \ +l1_%=: r0 = 0; \ + exit; \ +" ::: __clobber_all); +} + SEC("tc") __failure __flag(BPF_F_TEST_STATE_FREQ) From 7e9f79428372c6eab92271390851be34ab26bfb4 Mon Sep 17 00:00:00 2001 From: Daniil Dulov Date: Mon, 24 Jun 2024 11:07:47 +0300 Subject: [PATCH 634/778] xdp: Remove WARN() from __xdp_reg_mem_model() syzkaller reports a warning in __xdp_reg_mem_model(). The warning occurs only if __mem_id_init_hash_table() returns an error. It returns the error in two cases: 1. memory allocation fails; 2. rhashtable_init() fails when some fields of rhashtable_params struct are not initialized properly. The second case cannot happen since there is a static const rhashtable_params struct with valid fields. So, warning is only triggered when there is a problem with memory allocation. Thus, there is no sense in using WARN() to handle this error and it can be safely removed. WARNING: CPU: 0 PID: 5065 at net/core/xdp.c:299 __xdp_reg_mem_model+0x2d9/0x650 net/core/xdp.c:299 CPU: 0 PID: 5065 Comm: syz-executor883 Not tainted 6.8.0-syzkaller-05271-gf99c5f563c17 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/27/2024 RIP: 0010:__xdp_reg_mem_model+0x2d9/0x650 net/core/xdp.c:299 Call Trace: xdp_reg_mem_model+0x22/0x40 net/core/xdp.c:344 xdp_test_run_setup net/bpf/test_run.c:188 [inline] bpf_test_run_xdp_live+0x365/0x1e90 net/bpf/test_run.c:377 bpf_prog_test_run_xdp+0x813/0x11b0 net/bpf/test_run.c:1267 bpf_prog_test_run+0x33a/0x3b0 kernel/bpf/syscall.c:4240 __sys_bpf+0x48d/0x810 kernel/bpf/syscall.c:5649 __do_sys_bpf kernel/bpf/syscall.c:5738 [inline] __se_sys_bpf kernel/bpf/syscall.c:5736 [inline] __x64_sys_bpf+0x7c/0x90 kernel/bpf/syscall.c:5736 do_syscall_64+0xfb/0x240 entry_SYSCALL_64_after_hwframe+0x6d/0x75 Found by Linux Verification Center (linuxtesting.org) with syzkaller. Fixes: 8d5d88527587 ("xdp: rhashtable with allocator ID to pointer mapping") Signed-off-by: Daniil Dulov Signed-off-by: Daniel Borkmann Acked-by: Jesper Dangaard Brouer Link: https://lore.kernel.org/all/20240617162708.492159-1-d.dulov@aladdin.ru Link: https://lore.kernel.org/bpf/20240624080747.36858-1-d.dulov@aladdin.ru --- net/core/xdp.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/core/xdp.c b/net/core/xdp.c index 41693154e426..022c12059cf2 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -295,10 +295,8 @@ static struct xdp_mem_allocator *__xdp_reg_mem_model(struct xdp_mem_info *mem, mutex_lock(&mem_id_lock); ret = __mem_id_init_hash_table(); mutex_unlock(&mem_id_lock); - if (ret < 0) { - WARN_ON(1); + if (ret < 0) return ERR_PTR(ret); - } } xdp_alloc = kzalloc(sizeof(*xdp_alloc), gfp); From 282a4482e198e03781c152c88aac8aa382ef9a55 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Mon, 24 Jun 2024 14:12:56 +0800 Subject: [PATCH 635/778] ASoC: mediatek: mt8195: Add platform entry for ETDM1_OUT_BE dai link Commit e70b8dd26711 ("ASoC: mediatek: mt8195: Remove afe-dai component and rework codec link") removed the codec entry for the ETDM1_OUT_BE dai link entirely instead of replacing it with COMP_EMPTY(). This worked by accident as the remaining COMP_EMPTY() platform entry became the codec entry, and the platform entry became completely empty, effectively the same as COMP_DUMMY() since snd_soc_fill_dummy_dai() doesn't do anything for platform entries. This causes a KASAN out-of-bounds warning in mtk_soundcard_common_probe() in sound/soc/mediatek/common/mtk-soundcard-driver.c: for_each_card_prelinks(card, i, dai_link) { if (adsp_node && !strncmp(dai_link->name, "AFE_SOF", strlen("AFE_SOF"))) dai_link->platforms->of_node = adsp_node; else if (!dai_link->platforms->name && !dai_link->platforms->of_node) dai_link->platforms->of_node = platform_node; } where the code expects the platforms array to have space for at least one entry. Add an COMP_EMPTY() entry so that dai_link->platforms has space. Fixes: e70b8dd26711 ("ASoC: mediatek: mt8195: Remove afe-dai component and rework codec link") Signed-off-by: Chen-Yu Tsai Reviewed-by: AngeloGioacchino Del Regno Link: https://patch.msgid.link/20240624061257.3115467-1-wenst@chromium.org Signed-off-by: Mark Brown --- sound/soc/mediatek/mt8195/mt8195-mt6359.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/mediatek/mt8195/mt8195-mt6359.c b/sound/soc/mediatek/mt8195/mt8195-mt6359.c index ca8751190520..2832ef78eaed 100644 --- a/sound/soc/mediatek/mt8195/mt8195-mt6359.c +++ b/sound/soc/mediatek/mt8195/mt8195-mt6359.c @@ -827,6 +827,7 @@ SND_SOC_DAILINK_DEFS(ETDM2_IN_BE, SND_SOC_DAILINK_DEFS(ETDM1_OUT_BE, DAILINK_COMP_ARRAY(COMP_CPU("ETDM1_OUT")), + DAILINK_COMP_ARRAY(COMP_EMPTY()), DAILINK_COMP_ARRAY(COMP_EMPTY())); SND_SOC_DAILINK_DEFS(ETDM2_OUT_BE, From d941b5877a4eaef367ca48325c22d2e1bd8e8619 Mon Sep 17 00:00:00 2001 From: Kieran Bingham Date: Thu, 20 Jun 2024 23:10:42 +0100 Subject: [PATCH 636/778] staging: vc04_services: vchiq_arm: Fix initialisation check The vchiq_state used to be obtained through an accessor which would validate that the VCHIQ had been initialised correctly with the remote, or return a null state. In commit 42a2f6664e18 ("staging: vc04_services: Move global g_state to vchiq_state") the global state was moved to the vchiq_mgnt structures stored as a vchiq instance specific context. This conversion removed the helpers and instead replaced users of this helper with the assumption that the state is always available and the remote connected. The conversion does ensure that the state is always available, so some remaining state null pointer checks that remain are unnecessary, but the assumption that the remote is present and initialised is incorrect. Fix this broken assumption by re-introducing the logic that was lost during the conversion. Fixes: 42a2f6664e18 ("staging: vc04_services: Move global g_state to vchiq_state") Signed-off-by: Kieran Bingham Reviewed-by: Stefan Wahren Reviewed-by: Umang Jain Link: https://lore.kernel.org/r/20240620221046.2731704-1-kieran.bingham@ideasonboard.com Signed-off-by: Greg Kroah-Hartman --- .../staging/vc04_services/interface/vchiq_arm/vchiq_arm.c | 4 ++-- .../staging/vc04_services/interface/vchiq_arm/vchiq_core.h | 5 +++++ .../staging/vc04_services/interface/vchiq_arm/vchiq_dev.c | 7 ++++++- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c index 69daeba974f2..5f518e5a9273 100644 --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c @@ -707,7 +707,7 @@ int vchiq_initialise(struct vchiq_state *state, struct vchiq_instance **instance * block forever. */ for (i = 0; i < VCHIQ_INIT_RETRIES; i++) { - if (state) + if (vchiq_remote_initialised(state)) break; usleep_range(500, 600); } @@ -1202,7 +1202,7 @@ void vchiq_dump_platform_instances(struct vchiq_state *state, struct seq_file *f { int i; - if (!state) + if (!vchiq_remote_initialised(state)) return; /* diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.h b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.h index 8af209e34fb2..382ec08f6a14 100644 --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.h +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.h @@ -413,6 +413,11 @@ struct vchiq_state { struct opaque_platform_state *platform_state; }; +static inline bool vchiq_remote_initialised(const struct vchiq_state *state) +{ + return state->remote && state->remote->initialised; +} + struct bulk_waiter { struct vchiq_bulk *bulk; struct completion event; diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_dev.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_dev.c index 3c63347d2d08..430f2ed2ccd3 100644 --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_dev.c +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_dev.c @@ -1170,6 +1170,11 @@ static int vchiq_open(struct inode *inode, struct file *file) dev_dbg(state->dev, "arm: vchiq open\n"); + if (!vchiq_remote_initialised(state)) { + dev_dbg(state->dev, "arm: vchiq has no connection to VideoCore\n"); + return -ENOTCONN; + } + instance = kzalloc(sizeof(*instance), GFP_KERNEL); if (!instance) return -ENOMEM; @@ -1200,7 +1205,7 @@ static int vchiq_release(struct inode *inode, struct file *file) dev_dbg(state->dev, "arm: instance=%p\n", instance); - if (!state) { + if (!vchiq_remote_initialised(state)) { ret = -EPERM; goto out; } From aef5daa2c49d510436b733827d4f0bab79fcc4a0 Mon Sep 17 00:00:00 2001 From: Jianguo Wu Date: Fri, 21 Jun 2024 10:41:13 +0800 Subject: [PATCH 637/778] netfilter: fix undefined reference to 'netfilter_lwtunnel_*' when CONFIG_SYSCTL=n if CONFIG_SYSFS is not enabled in config, we get the below compile error, All errors (new ones prefixed by >>): csky-linux-ld: net/netfilter/core.o: in function `netfilter_init': core.c:(.init.text+0x42): undefined reference to `netfilter_lwtunnel_init' >> csky-linux-ld: core.c:(.init.text+0x56): undefined reference to `netfilter_lwtunnel_fini' >> csky-linux-ld: core.c:(.init.text+0x70): undefined reference to `netfilter_lwtunnel_init' csky-linux-ld: core.c:(.init.text+0x78): undefined reference to `netfilter_lwtunnel_fini' Fixes: a2225e0250c5 ("netfilter: move the sysctl nf_hooks_lwtunnel into the netfilter core") Reported-by: Mirsad Todorovac Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202406210511.8vbByYj3-lkp@intel.com/ Closes: https://lore.kernel.org/oe-kbuild-all/202406210520.6HmrUaA2-lkp@intel.com/ Signed-off-by: Jianguo Wu Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_hooks_lwtunnel.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/nf_hooks_lwtunnel.c b/net/netfilter/nf_hooks_lwtunnel.c index 7cdb59bb4459..d8ebebc9775d 100644 --- a/net/netfilter/nf_hooks_lwtunnel.c +++ b/net/netfilter/nf_hooks_lwtunnel.c @@ -117,4 +117,7 @@ void netfilter_lwtunnel_fini(void) { unregister_pernet_subsys(&nf_lwtunnel_net_ops); } +#else +int __init netfilter_lwtunnel_init(void) { return 0; } +void netfilter_lwtunnel_fini(void) {} #endif /* CONFIG_SYSCTL */ From 0ac18dac43103ab1df6d26ec9a781c0126f83ced Mon Sep 17 00:00:00 2001 From: Crescent Hsieh Date: Mon, 17 Jun 2024 14:30:58 +0800 Subject: [PATCH 638/778] tty: serial: 8250: Fix port count mismatch with the device Normally, the number of ports is indicated by the third digit of the device ID on Moxa PCI serial boards. For example, `0x1121` indicates a device with 2 ports. However, `CP116E_A_A` and `CP116E_A_B` are exceptions; they have 8 ports, but the third digit of the device ID is `6`. This patch introduces a function to retrieve the number of ports on Moxa PCI serial boards, addressing the issue described above. Fixes: 37058fd5d239 ("tty: serial: 8250: Add support for MOXA Mini PCIe boards") Cc: stable Signed-off-by: Crescent Hsieh Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20240617063058.18866-1-crescentcy.hsieh@moxa.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_pci.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index 40af74b55933..e1d7aa2fa347 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -1985,6 +1985,17 @@ enum { MOXA_SUPP_RS485 = BIT(2), }; +static unsigned short moxa_get_nports(unsigned short device) +{ + switch (device) { + case PCI_DEVICE_ID_MOXA_CP116E_A_A: + case PCI_DEVICE_ID_MOXA_CP116E_A_B: + return 8; + } + + return FIELD_GET(0x00F0, device); +} + static bool pci_moxa_is_mini_pcie(unsigned short device) { if (device == PCI_DEVICE_ID_MOXA_CP102N || @@ -2038,7 +2049,7 @@ static int pci_moxa_init(struct pci_dev *dev) { unsigned short device = dev->device; resource_size_t iobar_addr = pci_resource_start(dev, 2); - unsigned int num_ports = (device & 0x00F0) >> 4, i; + unsigned int i, num_ports = moxa_get_nports(device); u8 val, init_mode = MOXA_RS232; if (!(pci_moxa_supported_rs(dev) & MOXA_SUPP_RS232)) { From 9d141c1e615795eeb93cd35501ad144ee997a826 Mon Sep 17 00:00:00 2001 From: Udit Kumar Date: Wed, 19 Jun 2024 16:29:03 +0530 Subject: [PATCH 639/778] serial: 8250_omap: Implementation of Errata i2310 As per Errata i2310[0], Erroneous timeout can be triggered, if this Erroneous interrupt is not cleared then it may leads to storm of interrupts, therefore apply Errata i2310 solution. [0] https://www.ti.com/lit/pdf/sprz536 page 23 Fixes: b67e830d38fa ("serial: 8250: 8250_omap: Fix possible interrupt storm on K3 SoCs") Cc: stable@vger.kernel.org Signed-off-by: Udit Kumar Link: https://lore.kernel.org/r/20240619105903.165434-1-u-kumar1@ti.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_omap.c | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c index 170639d12b2a..ddac0a13cf84 100644 --- a/drivers/tty/serial/8250/8250_omap.c +++ b/drivers/tty/serial/8250/8250_omap.c @@ -115,6 +115,10 @@ /* RX FIFO occupancy indicator */ #define UART_OMAP_RX_LVL 0x19 +/* Timeout low and High */ +#define UART_OMAP_TO_L 0x26 +#define UART_OMAP_TO_H 0x27 + /* * Copy of the genpd flags for the console. * Only used if console suspend is disabled @@ -663,13 +667,24 @@ static irqreturn_t omap8250_irq(int irq, void *dev_id) /* * On K3 SoCs, it is observed that RX TIMEOUT is signalled after - * FIFO has been drained, in which case a dummy read of RX FIFO - * is required to clear RX TIMEOUT condition. + * FIFO has been drained or erroneously. + * So apply solution of Errata i2310 as mentioned in + * https://www.ti.com/lit/pdf/sprz536 */ if (priv->habit & UART_RX_TIMEOUT_QUIRK && - (iir & UART_IIR_RX_TIMEOUT) == UART_IIR_RX_TIMEOUT && - serial_port_in(port, UART_OMAP_RX_LVL) == 0) { - serial_port_in(port, UART_RX); + (iir & UART_IIR_RX_TIMEOUT) == UART_IIR_RX_TIMEOUT) { + unsigned char efr2, timeout_h, timeout_l; + + efr2 = serial_in(up, UART_OMAP_EFR2); + timeout_h = serial_in(up, UART_OMAP_TO_H); + timeout_l = serial_in(up, UART_OMAP_TO_L); + serial_out(up, UART_OMAP_TO_H, 0xFF); + serial_out(up, UART_OMAP_TO_L, 0xFF); + serial_out(up, UART_OMAP_EFR2, UART_OMAP_EFR2_TIMEOUT_BEHAVE); + serial_in(up, UART_IIR); + serial_out(up, UART_OMAP_EFR2, efr2); + serial_out(up, UART_OMAP_TO_H, timeout_h); + serial_out(up, UART_OMAP_TO_L, timeout_l); } /* Stop processing interrupts on input overrun */ From 7c92a8bd53f24d50c8cf4aba53bb75505b382fed Mon Sep 17 00:00:00 2001 From: Jean-Michel Hautbois Date: Thu, 20 Jun 2024 18:29:59 +0200 Subject: [PATCH 640/778] tty: mcf: MCF54418 has 10 UARTS Most of the colfires have up to 5 UARTs but MCF54418 has up-to 10 ! Change the maximum value authorized. Signed-off-by: Jean-Michel Hautbois Cc: stable Fixes: 2545cf6e94b4 ("m68knommu: allow 4 coldfire serial ports") Link: https://lore.kernel.org/r/20240620-upstream-uart-v1-1-a9d0d95fb19e@yoseli.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/mcf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/mcf.c b/drivers/tty/serial/mcf.c index b0604d6da025..58858dd352c5 100644 --- a/drivers/tty/serial/mcf.c +++ b/drivers/tty/serial/mcf.c @@ -462,7 +462,7 @@ static const struct uart_ops mcf_uart_ops = { .verify_port = mcf_verify_port, }; -static struct mcf_uart mcf_ports[4]; +static struct mcf_uart mcf_ports[10]; #define MCF_MAXPORTS ARRAY_SIZE(mcf_ports) From a81dbd0463eca317eee44985a66aa6cc2ce5c101 Mon Sep 17 00:00:00 2001 From: Stefan Eichenberger Date: Fri, 21 Jun 2024 17:37:49 +0200 Subject: [PATCH 641/778] serial: imx: set receiver level before starting uart Set the receiver level to something > 0 before calling imx_uart_start_rx in rs485_config. This is necessary to avoid an interrupt storm that might prevent the system from booting. This was seen on an i.MX7 device when the rs485-rts-active-low property was active in the device tree. Fixes: 6d215f83e5fc ("serial: imx: warn user when using unsupported configuration") Cc: stable Signed-off-by: Stefan Eichenberger Link: https://lore.kernel.org/r/20240621153829.183780-1-eichest@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/imx.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c index 2eb22594960f..f4f40c9373c2 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c @@ -1952,8 +1952,10 @@ static int imx_uart_rs485_config(struct uart_port *port, struct ktermios *termio /* Make sure Rx is enabled in case Tx is active with Rx disabled */ if (!(rs485conf->flags & SER_RS485_ENABLED) || - rs485conf->flags & SER_RS485_RX_DURING_TX) + rs485conf->flags & SER_RS485_RX_DURING_TX) { + imx_uart_setup_ufcr(sport, TXTL_DEFAULT, RXTL_DEFAULT); imx_uart_start_rx(port); + } return 0; } From c5603e2a621dac10c5e21cc430848ebcfa6c7e01 Mon Sep 17 00:00:00 2001 From: Doug Brown Date: Thu, 6 Jun 2024 12:56:31 -0700 Subject: [PATCH 642/778] Revert "serial: core: only stop transmit when HW fifo is empty" This reverts commit 7bfb915a597a301abb892f620fe5c283a9fdbd77. This commit broke pxa and omap-serial, because it inhibited them from calling stop_tx() if their TX FIFOs weren't completely empty. This resulted in these two drivers hanging during transmits because the TX interrupt would stay enabled, and a new TX interrupt would never fire. Cc: stable@vger.kernel.org Fixes: 7bfb915a597a ("serial: core: only stop transmit when HW fifo is empty") Signed-off-by: Doug Brown Link: https://lore.kernel.org/r/20240606195632.173255-2-doug@schmorgal.com Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 8cb65f50e830..3fb9a29e025f 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -811,8 +811,7 @@ enum UART_TX_FLAGS { if (pending < WAKEUP_CHARS) { \ uart_write_wakeup(__port); \ \ - if (!((flags) & UART_TX_NOSTOP) && pending == 0 && \ - __port->ops->tx_empty(__port)) \ + if (!((flags) & UART_TX_NOSTOP) && pending == 0) \ __port->ops->stop_tx(__port); \ } \ \ From 9bb43b9e8d9a288a214e9b17acc9e46fda3977cf Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Thu, 6 Jun 2024 12:56:32 -0700 Subject: [PATCH 643/778] serial: core: introduce uart_port_tx_limited_flags() Analogue to uart_port_tx_flags() introduced in commit 3ee07964d407 ("serial: core: introduce uart_port_tx_flags()"), add a _flags variant for uart_port_tx_limited(). Fixes: d11cc8c3c4b6 ("tty: serial: use uart_port_tx_limited()") Cc: stable@vger.kernel.org Signed-off-by: Jonas Gorski Signed-off-by: Doug Brown Link: https://lore.kernel.org/r/20240606195632.173255-3-doug@schmorgal.com Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 3fb9a29e025f..aea25eef9a1a 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -850,6 +850,24 @@ enum UART_TX_FLAGS { __count--); \ }) +/** + * uart_port_tx_limited_flags -- transmit helper for uart_port with count limiting with flags + * @port: uart port + * @ch: variable to store a character to be written to the HW + * @flags: %UART_TX_NOSTOP or similar + * @count: a limit of characters to send + * @tx_ready: can HW accept more data function + * @put_char: function to write a character + * @tx_done: function to call after the loop is done + * + * See uart_port_tx_limited() for more details. + */ +#define uart_port_tx_limited_flags(port, ch, flags, count, tx_ready, put_char, tx_done) ({ \ + unsigned int __count = (count); \ + __uart_port_tx(port, ch, flags, tx_ready, put_char, tx_done, __count, \ + __count--); \ +}) + /** * uart_port_tx -- transmit helper for uart_port * @port: uart port From ea55c65dedf40e9c1911dc1e63e26bc9a59692b9 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Thu, 6 Jun 2024 12:56:33 -0700 Subject: [PATCH 644/778] serial: bcm63xx-uart: fix tx after conversion to uart_port_tx_limited() When bcm63xx-uart was converted to uart_port_tx_limited(), it implicitly added a call to stop_tx(). This causes garbage to be put out on the serial console. To fix this, pass UART_TX_NOSTOP in flags, and manually call stop_tx() ourselves analogue to how a similar issue was fixed in commit 7be50f2e8f20 ("serial: mxs-auart: fix tx"). Fixes: d11cc8c3c4b6 ("tty: serial: use uart_port_tx_limited()") Cc: stable@vger.kernel.org Signed-off-by: Jonas Gorski Signed-off-by: Doug Brown Link: https://lore.kernel.org/r/20240606195632.173255-4-doug@schmorgal.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/bcm63xx_uart.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/bcm63xx_uart.c b/drivers/tty/serial/bcm63xx_uart.c index 34801a6f300b..b88cc28c94e3 100644 --- a/drivers/tty/serial/bcm63xx_uart.c +++ b/drivers/tty/serial/bcm63xx_uart.c @@ -308,8 +308,8 @@ static void bcm_uart_do_tx(struct uart_port *port) val = bcm_uart_readl(port, UART_MCTL_REG); val = (val & UART_MCTL_TXFIFOFILL_MASK) >> UART_MCTL_TXFIFOFILL_SHIFT; - - pending = uart_port_tx_limited(port, ch, port->fifosize - val, + pending = uart_port_tx_limited_flags(port, ch, UART_TX_NOSTOP, + port->fifosize - val, true, bcm_uart_writel(port, ch, UART_FIFO_REG), ({})); @@ -320,6 +320,9 @@ static void bcm_uart_do_tx(struct uart_port *port) val = bcm_uart_readl(port, UART_IR_REG); val &= ~UART_TX_INT_MASK; bcm_uart_writel(port, val, UART_IR_REG); + + if (uart_tx_stopped(port)) + bcm_uart_stop_tx(port); } /* From 4b8e88e563b5f666446d002ad0dc1e6e8e7102b0 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 19 Jun 2024 11:34:09 +0200 Subject: [PATCH 645/778] ftruncate: pass a signed offset The old ftruncate() syscall, using the 32-bit off_t misses a sign extension when called in compat mode on 64-bit architectures. As a result, passing a negative length accidentally succeeds in truncating to file size between 2GiB and 4GiB. Changing the type of the compat syscall to the signed compat_off_t changes the behavior so it instead returns -EINVAL. The native entry point, the truncate() syscall and the corresponding loff_t based variants are all correct already and do not suffer from this mistake. Fixes: 3f6d078d4acc ("fix compat truncate/ftruncate") Reviewed-by: Christian Brauner Cc: stable@vger.kernel.org Signed-off-by: Arnd Bergmann --- fs/open.c | 4 ++-- include/linux/compat.h | 2 +- include/linux/syscalls.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/open.c b/fs/open.c index 89cafb572061..50e45bc7c4d8 100644 --- a/fs/open.c +++ b/fs/open.c @@ -202,13 +202,13 @@ long do_sys_ftruncate(unsigned int fd, loff_t length, int small) return error; } -SYSCALL_DEFINE2(ftruncate, unsigned int, fd, unsigned long, length) +SYSCALL_DEFINE2(ftruncate, unsigned int, fd, off_t, length) { return do_sys_ftruncate(fd, length, 1); } #ifdef CONFIG_COMPAT -COMPAT_SYSCALL_DEFINE2(ftruncate, unsigned int, fd, compat_ulong_t, length) +COMPAT_SYSCALL_DEFINE2(ftruncate, unsigned int, fd, compat_off_t, length) { return do_sys_ftruncate(fd, length, 1); } diff --git a/include/linux/compat.h b/include/linux/compat.h index 233f61ec8afc..56cebaff0c91 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -608,7 +608,7 @@ asmlinkage long compat_sys_fstatfs(unsigned int fd, asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct compat_statfs64 __user *buf); asmlinkage long compat_sys_truncate(const char __user *, compat_off_t); -asmlinkage long compat_sys_ftruncate(unsigned int, compat_ulong_t); +asmlinkage long compat_sys_ftruncate(unsigned int, compat_off_t); /* No generic prototype for truncate64, ftruncate64, fallocate */ asmlinkage long compat_sys_openat(int dfd, const char __user *filename, int flags, umode_t mode); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 9104952d323d..ba9337709878 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -418,7 +418,7 @@ asmlinkage long sys_listmount(const struct mnt_id_req __user *req, u64 __user *mnt_ids, size_t nr_mnt_ids, unsigned int flags); asmlinkage long sys_truncate(const char __user *path, long length); -asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length); +asmlinkage long sys_ftruncate(unsigned int fd, off_t length); #if BITS_PER_LONG == 32 asmlinkage long sys_truncate64(const char __user *path, loff_t length); asmlinkage long sys_ftruncate64(unsigned int fd, loff_t length); From a1ff59784b277795a613beaa5d3dd9c5595c69a7 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 20 Jun 2024 18:14:53 +0200 Subject: [PATCH 646/778] cpufreq: intel_pstate: Use HWP to initialize ITMT if CPPC is missing It is reported that single-thread performance on some hybrid systems dropped significantly after commit 7feec7430edd ("ACPI: CPPC: Only probe for _CPC if CPPC v2 is acked") which prevented _CPC from being used if the support for it had not been confirmed by the platform firmware. The problem is that if the platform firmware does not confirm CPPC v2 support, cppc_get_perf_caps() returns an error which prevents the intel_pstate driver from enabling ITMT. Consequently, the scheduler does not get any hints on CPU performance differences, so in a hybrid system some tasks may run on CPUs with lower capacity even though they should be running on high-capacity CPUs. To address this, modify intel_pstate to use the information from MSR_HWP_CAPABILITIES to enable ITMT if CPPC is not available (which is done already if the highest performance number coming from CPPC is not realistic). Fixes: 7feec7430edd ("ACPI: CPPC: Only probe for _CPC if CPPC v2 is acked") Closes: https://lore.kernel.org/linux-acpi/d01b0a1f-bd33-47fe-ab41-43843d8a374f@kfocus.org Link: https://lore.kernel.org/linux-acpi/ZnD22b3Br1ng7alf@kf-XE Reported-by: Aaron Rainbolt Tested-by: Aaron Rainbolt Cc: 5.19+ # 5.19+ Link: https://patch.msgid.link/12460110.O9o76ZdvQC@rjwysocki.net Signed-off-by: Rafael J. Wysocki Reviewed-by: Mario Limonciello --- drivers/cpufreq/intel_pstate.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 15de5e3d96fd..c31914a9876f 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -355,15 +355,14 @@ static void intel_pstate_set_itmt_prio(int cpu) int ret; ret = cppc_get_perf_caps(cpu, &cppc_perf); - if (ret) - return; - /* - * On some systems with overclocking enabled, CPPC.highest_perf is hardcoded to 0xff. - * In this case we can't use CPPC.highest_perf to enable ITMT. - * In this case we can look at MSR_HWP_CAPABILITIES bits [8:0] to decide. + * If CPPC is not available, fall back to MSR_HWP_CAPABILITIES bits [8:0]. + * + * Also, on some systems with overclocking enabled, CPPC.highest_perf is + * hardcoded to 0xff, so CPPC.highest_perf cannot be used to enable ITMT. + * Fall back to MSR_HWP_CAPABILITIES then too. */ - if (cppc_perf.highest_perf == CPPC_MAX_PERF) + if (ret || cppc_perf.highest_perf == CPPC_MAX_PERF) cppc_perf.highest_perf = HWP_HIGHEST_PERF(READ_ONCE(all_cpu_data[cpu]->hwp_cap_cached)); /* From 9eee5330656bf92f51cb1f09b2dc9f8cf975b3d1 Mon Sep 17 00:00:00 2001 From: Mostafa Saleh Date: Mon, 24 Jun 2024 20:37:28 +0000 Subject: [PATCH 647/778] PCI/MSI: Fix UAF in msi_capability_init KFENCE reports the following UAF: BUG: KFENCE: use-after-free read in __pci_enable_msi_range+0x2c0/0x488 Use-after-free read at 0x0000000024629571 (in kfence-#12): __pci_enable_msi_range+0x2c0/0x488 pci_alloc_irq_vectors_affinity+0xec/0x14c pci_alloc_irq_vectors+0x18/0x28 kfence-#12: 0x0000000008614900-0x00000000e06c228d, size=104, cache=kmalloc-128 allocated by task 81 on cpu 7 at 10.808142s: __kmem_cache_alloc_node+0x1f0/0x2bc kmalloc_trace+0x44/0x138 msi_alloc_desc+0x3c/0x9c msi_domain_insert_msi_desc+0x30/0x78 msi_setup_msi_desc+0x13c/0x184 __pci_enable_msi_range+0x258/0x488 pci_alloc_irq_vectors_affinity+0xec/0x14c pci_alloc_irq_vectors+0x18/0x28 freed by task 81 on cpu 7 at 10.811436s: msi_domain_free_descs+0xd4/0x10c msi_domain_free_locked.part.0+0xc0/0x1d8 msi_domain_alloc_irqs_all_locked+0xb4/0xbc pci_msi_setup_msi_irqs+0x30/0x4c __pci_enable_msi_range+0x2a8/0x488 pci_alloc_irq_vectors_affinity+0xec/0x14c pci_alloc_irq_vectors+0x18/0x28 Descriptor allocation done in: __pci_enable_msi_range msi_capability_init msi_setup_msi_desc msi_insert_msi_desc msi_domain_insert_msi_desc msi_alloc_desc ... Freed in case of failure in __msi_domain_alloc_locked() __pci_enable_msi_range msi_capability_init pci_msi_setup_msi_irqs msi_domain_alloc_irqs_all_locked msi_domain_alloc_locked __msi_domain_alloc_locked => fails msi_domain_free_locked ... That failure propagates back to pci_msi_setup_msi_irqs() in msi_capability_init() which accesses the descriptor for unmasking in the error exit path. Cure it by copying the descriptor and using the copy for the error exit path unmask operation. [ tglx: Massaged change log ] Fixes: bf6e054e0e3f ("genirq/msi: Provide msi_device_populate/destroy_sysfs()") Suggested-by: Thomas Gleixner Signed-off-by: Mostafa Saleh Signed-off-by: Thomas Gleixner Cc: Bjorn Heelgas Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240624203729.1094506-1-smostafa@google.com --- drivers/pci/msi/msi.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/pci/msi/msi.c b/drivers/pci/msi/msi.c index c5625dd9bf49..3a45879d85db 100644 --- a/drivers/pci/msi/msi.c +++ b/drivers/pci/msi/msi.c @@ -352,7 +352,7 @@ static int msi_capability_init(struct pci_dev *dev, int nvec, struct irq_affinity *affd) { struct irq_affinity_desc *masks = NULL; - struct msi_desc *entry; + struct msi_desc *entry, desc; int ret; /* Reject multi-MSI early on irq domain enabled architectures */ @@ -377,6 +377,12 @@ static int msi_capability_init(struct pci_dev *dev, int nvec, /* All MSIs are unmasked by default; mask them all */ entry = msi_first_desc(&dev->dev, MSI_DESC_ALL); pci_msi_mask(entry, msi_multi_mask(entry)); + /* + * Copy the MSI descriptor for the error path because + * pci_msi_setup_msi_irqs() will free it for the hierarchical + * interrupt domain case. + */ + memcpy(&desc, entry, sizeof(desc)); /* Configure MSI capability structure */ ret = pci_msi_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSI); @@ -396,7 +402,7 @@ static int msi_capability_init(struct pci_dev *dev, int nvec, goto unlock; err: - pci_msi_unmask(entry, msi_multi_mask(entry)); + pci_msi_unmask(&desc, msi_multi_mask(&desc)); pci_free_msi_irqs(dev); fail: dev->msi_enabled = 0; From d1825752e3074b5ff8d7f6016160e2b7c5c367ca Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 13 Jun 2024 11:16:19 +0100 Subject: [PATCH 648/778] btrfs: use NOFS context when getting inodes during logging and log replay During inode logging (and log replay too), we are holding a transaction handle and we often need to call btrfs_iget(), which will read an inode from its subvolume btree if it's not loaded in memory and that results in allocating an inode with GFP_KERNEL semantics at the btrfs_alloc_inode() callback - and this may recurse into the filesystem in case we are under memory pressure and attempt to commit the current transaction, resulting in a deadlock since the logging (or log replay) task is holding a transaction handle open. Syzbot reported this with the following stack traces: WARNING: possible circular locking dependency detected 6.10.0-rc2-syzkaller-00361-g061d1af7b030 #0 Not tainted ------------------------------------------------------ syz-executor.1/9919 is trying to acquire lock: ffffffff8dd3aac0 (fs_reclaim){+.+.}-{0:0}, at: might_alloc include/linux/sched/mm.h:334 [inline] ffffffff8dd3aac0 (fs_reclaim){+.+.}-{0:0}, at: slab_pre_alloc_hook mm/slub.c:3891 [inline] ffffffff8dd3aac0 (fs_reclaim){+.+.}-{0:0}, at: slab_alloc_node mm/slub.c:3981 [inline] ffffffff8dd3aac0 (fs_reclaim){+.+.}-{0:0}, at: kmem_cache_alloc_lru_noprof+0x58/0x2f0 mm/slub.c:4020 but task is already holding lock: ffff88804b569358 (&ei->log_mutex){+.+.}-{3:3}, at: btrfs_log_inode+0x39c/0x4660 fs/btrfs/tree-log.c:6481 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #3 (&ei->log_mutex){+.+.}-{3:3}: __mutex_lock_common kernel/locking/mutex.c:608 [inline] __mutex_lock+0x175/0x9c0 kernel/locking/mutex.c:752 btrfs_log_inode+0x39c/0x4660 fs/btrfs/tree-log.c:6481 btrfs_log_inode_parent+0x8cb/0x2a90 fs/btrfs/tree-log.c:7079 btrfs_log_dentry_safe+0x59/0x80 fs/btrfs/tree-log.c:7180 btrfs_sync_file+0x9c1/0xe10 fs/btrfs/file.c:1959 vfs_fsync_range+0x141/0x230 fs/sync.c:188 generic_write_sync include/linux/fs.h:2794 [inline] btrfs_do_write_iter+0x584/0x10c0 fs/btrfs/file.c:1705 new_sync_write fs/read_write.c:497 [inline] vfs_write+0x6b6/0x1140 fs/read_write.c:590 ksys_write+0x12f/0x260 fs/read_write.c:643 do_syscall_32_irqs_on arch/x86/entry/common.c:165 [inline] __do_fast_syscall_32+0x73/0x120 arch/x86/entry/common.c:386 do_fast_syscall_32+0x32/0x80 arch/x86/entry/common.c:411 entry_SYSENTER_compat_after_hwframe+0x84/0x8e -> #2 (btrfs_trans_num_extwriters){++++}-{0:0}: join_transaction+0x164/0xf40 fs/btrfs/transaction.c:315 start_transaction+0x427/0x1a70 fs/btrfs/transaction.c:700 btrfs_commit_super+0xa1/0x110 fs/btrfs/disk-io.c:4170 close_ctree+0xcb0/0xf90 fs/btrfs/disk-io.c:4324 generic_shutdown_super+0x159/0x3d0 fs/super.c:642 kill_anon_super+0x3a/0x60 fs/super.c:1226 btrfs_kill_super+0x3b/0x50 fs/btrfs/super.c:2096 deactivate_locked_super+0xbe/0x1a0 fs/super.c:473 deactivate_super+0xde/0x100 fs/super.c:506 cleanup_mnt+0x222/0x450 fs/namespace.c:1267 task_work_run+0x14e/0x250 kernel/task_work.c:180 resume_user_mode_work include/linux/resume_user_mode.h:50 [inline] exit_to_user_mode_loop kernel/entry/common.c:114 [inline] exit_to_user_mode_prepare include/linux/entry-common.h:328 [inline] __syscall_exit_to_user_mode_work kernel/entry/common.c:207 [inline] syscall_exit_to_user_mode+0x278/0x2a0 kernel/entry/common.c:218 __do_fast_syscall_32+0x80/0x120 arch/x86/entry/common.c:389 do_fast_syscall_32+0x32/0x80 arch/x86/entry/common.c:411 entry_SYSENTER_compat_after_hwframe+0x84/0x8e -> #1 (btrfs_trans_num_writers){++++}-{0:0}: __lock_release kernel/locking/lockdep.c:5468 [inline] lock_release+0x33e/0x6c0 kernel/locking/lockdep.c:5774 percpu_up_read include/linux/percpu-rwsem.h:99 [inline] __sb_end_write include/linux/fs.h:1650 [inline] sb_end_intwrite include/linux/fs.h:1767 [inline] __btrfs_end_transaction+0x5ca/0x920 fs/btrfs/transaction.c:1071 btrfs_commit_inode_delayed_inode+0x228/0x330 fs/btrfs/delayed-inode.c:1301 btrfs_evict_inode+0x960/0xe80 fs/btrfs/inode.c:5291 evict+0x2ed/0x6c0 fs/inode.c:667 iput_final fs/inode.c:1741 [inline] iput.part.0+0x5a8/0x7f0 fs/inode.c:1767 iput+0x5c/0x80 fs/inode.c:1757 dentry_unlink_inode+0x295/0x480 fs/dcache.c:400 __dentry_kill+0x1d0/0x600 fs/dcache.c:603 dput.part.0+0x4b1/0x9b0 fs/dcache.c:845 dput+0x1f/0x30 fs/dcache.c:835 ovl_stack_put+0x60/0x90 fs/overlayfs/util.c:132 ovl_destroy_inode+0xc6/0x190 fs/overlayfs/super.c:182 destroy_inode+0xc4/0x1b0 fs/inode.c:311 iput_final fs/inode.c:1741 [inline] iput.part.0+0x5a8/0x7f0 fs/inode.c:1767 iput+0x5c/0x80 fs/inode.c:1757 dentry_unlink_inode+0x295/0x480 fs/dcache.c:400 __dentry_kill+0x1d0/0x600 fs/dcache.c:603 shrink_kill fs/dcache.c:1048 [inline] shrink_dentry_list+0x140/0x5d0 fs/dcache.c:1075 prune_dcache_sb+0xeb/0x150 fs/dcache.c:1156 super_cache_scan+0x32a/0x550 fs/super.c:221 do_shrink_slab+0x44f/0x11c0 mm/shrinker.c:435 shrink_slab_memcg mm/shrinker.c:548 [inline] shrink_slab+0xa87/0x1310 mm/shrinker.c:626 shrink_one+0x493/0x7c0 mm/vmscan.c:4790 shrink_many mm/vmscan.c:4851 [inline] lru_gen_shrink_node+0x89f/0x1750 mm/vmscan.c:4951 shrink_node mm/vmscan.c:5910 [inline] kswapd_shrink_node mm/vmscan.c:6720 [inline] balance_pgdat+0x1105/0x1970 mm/vmscan.c:6911 kswapd+0x5ea/0xbf0 mm/vmscan.c:7180 kthread+0x2c1/0x3a0 kernel/kthread.c:389 ret_from_fork+0x45/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244 -> #0 (fs_reclaim){+.+.}-{0:0}: check_prev_add kernel/locking/lockdep.c:3134 [inline] check_prevs_add kernel/locking/lockdep.c:3253 [inline] validate_chain kernel/locking/lockdep.c:3869 [inline] __lock_acquire+0x2478/0x3b30 kernel/locking/lockdep.c:5137 lock_acquire kernel/locking/lockdep.c:5754 [inline] lock_acquire+0x1b1/0x560 kernel/locking/lockdep.c:5719 __fs_reclaim_acquire mm/page_alloc.c:3801 [inline] fs_reclaim_acquire+0x102/0x160 mm/page_alloc.c:3815 might_alloc include/linux/sched/mm.h:334 [inline] slab_pre_alloc_hook mm/slub.c:3891 [inline] slab_alloc_node mm/slub.c:3981 [inline] kmem_cache_alloc_lru_noprof+0x58/0x2f0 mm/slub.c:4020 btrfs_alloc_inode+0x118/0xb20 fs/btrfs/inode.c:8411 alloc_inode+0x5d/0x230 fs/inode.c:261 iget5_locked fs/inode.c:1235 [inline] iget5_locked+0x1c9/0x2c0 fs/inode.c:1228 btrfs_iget_locked fs/btrfs/inode.c:5590 [inline] btrfs_iget_path fs/btrfs/inode.c:5607 [inline] btrfs_iget+0xfb/0x230 fs/btrfs/inode.c:5636 add_conflicting_inode fs/btrfs/tree-log.c:5657 [inline] copy_inode_items_to_log+0x1039/0x1e30 fs/btrfs/tree-log.c:5928 btrfs_log_inode+0xa48/0x4660 fs/btrfs/tree-log.c:6592 log_new_delayed_dentries fs/btrfs/tree-log.c:6363 [inline] btrfs_log_inode+0x27dd/0x4660 fs/btrfs/tree-log.c:6718 btrfs_log_all_parents fs/btrfs/tree-log.c:6833 [inline] btrfs_log_inode_parent+0x22ba/0x2a90 fs/btrfs/tree-log.c:7141 btrfs_log_dentry_safe+0x59/0x80 fs/btrfs/tree-log.c:7180 btrfs_sync_file+0x9c1/0xe10 fs/btrfs/file.c:1959 vfs_fsync_range+0x141/0x230 fs/sync.c:188 generic_write_sync include/linux/fs.h:2794 [inline] btrfs_do_write_iter+0x584/0x10c0 fs/btrfs/file.c:1705 do_iter_readv_writev+0x504/0x780 fs/read_write.c:741 vfs_writev+0x36f/0xde0 fs/read_write.c:971 do_pwritev+0x1b2/0x260 fs/read_write.c:1072 __do_compat_sys_pwritev2 fs/read_write.c:1218 [inline] __se_compat_sys_pwritev2 fs/read_write.c:1210 [inline] __ia32_compat_sys_pwritev2+0x121/0x1b0 fs/read_write.c:1210 do_syscall_32_irqs_on arch/x86/entry/common.c:165 [inline] __do_fast_syscall_32+0x73/0x120 arch/x86/entry/common.c:386 do_fast_syscall_32+0x32/0x80 arch/x86/entry/common.c:411 entry_SYSENTER_compat_after_hwframe+0x84/0x8e other info that might help us debug this: Chain exists of: fs_reclaim --> btrfs_trans_num_extwriters --> &ei->log_mutex Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&ei->log_mutex); lock(btrfs_trans_num_extwriters); lock(&ei->log_mutex); lock(fs_reclaim); *** DEADLOCK *** 7 locks held by syz-executor.1/9919: #0: ffff88802be20420 (sb_writers#23){.+.+}-{0:0}, at: do_pwritev+0x1b2/0x260 fs/read_write.c:1072 #1: ffff888065c0f8f0 (&sb->s_type->i_mutex_key#33){++++}-{3:3}, at: inode_lock include/linux/fs.h:791 [inline] #1: ffff888065c0f8f0 (&sb->s_type->i_mutex_key#33){++++}-{3:3}, at: btrfs_inode_lock+0xc8/0x110 fs/btrfs/inode.c:385 #2: ffff888065c0f778 (&ei->i_mmap_lock){++++}-{3:3}, at: btrfs_inode_lock+0xee/0x110 fs/btrfs/inode.c:388 #3: ffff88802be20610 (sb_internal#4){.+.+}-{0:0}, at: btrfs_sync_file+0x95b/0xe10 fs/btrfs/file.c:1952 #4: ffff8880546323f0 (btrfs_trans_num_writers){++++}-{0:0}, at: join_transaction+0x430/0xf40 fs/btrfs/transaction.c:290 #5: ffff888054632418 (btrfs_trans_num_extwriters){++++}-{0:0}, at: join_transaction+0x430/0xf40 fs/btrfs/transaction.c:290 #6: ffff88804b569358 (&ei->log_mutex){+.+.}-{3:3}, at: btrfs_log_inode+0x39c/0x4660 fs/btrfs/tree-log.c:6481 stack backtrace: CPU: 2 PID: 9919 Comm: syz-executor.1 Not tainted 6.10.0-rc2-syzkaller-00361-g061d1af7b030 #0 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-debian-1.16.2-1 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x116/0x1f0 lib/dump_stack.c:114 check_noncircular+0x31a/0x400 kernel/locking/lockdep.c:2187 check_prev_add kernel/locking/lockdep.c:3134 [inline] check_prevs_add kernel/locking/lockdep.c:3253 [inline] validate_chain kernel/locking/lockdep.c:3869 [inline] __lock_acquire+0x2478/0x3b30 kernel/locking/lockdep.c:5137 lock_acquire kernel/locking/lockdep.c:5754 [inline] lock_acquire+0x1b1/0x560 kernel/locking/lockdep.c:5719 __fs_reclaim_acquire mm/page_alloc.c:3801 [inline] fs_reclaim_acquire+0x102/0x160 mm/page_alloc.c:3815 might_alloc include/linux/sched/mm.h:334 [inline] slab_pre_alloc_hook mm/slub.c:3891 [inline] slab_alloc_node mm/slub.c:3981 [inline] kmem_cache_alloc_lru_noprof+0x58/0x2f0 mm/slub.c:4020 btrfs_alloc_inode+0x118/0xb20 fs/btrfs/inode.c:8411 alloc_inode+0x5d/0x230 fs/inode.c:261 iget5_locked fs/inode.c:1235 [inline] iget5_locked+0x1c9/0x2c0 fs/inode.c:1228 btrfs_iget_locked fs/btrfs/inode.c:5590 [inline] btrfs_iget_path fs/btrfs/inode.c:5607 [inline] btrfs_iget+0xfb/0x230 fs/btrfs/inode.c:5636 add_conflicting_inode fs/btrfs/tree-log.c:5657 [inline] copy_inode_items_to_log+0x1039/0x1e30 fs/btrfs/tree-log.c:5928 btrfs_log_inode+0xa48/0x4660 fs/btrfs/tree-log.c:6592 log_new_delayed_dentries fs/btrfs/tree-log.c:6363 [inline] btrfs_log_inode+0x27dd/0x4660 fs/btrfs/tree-log.c:6718 btrfs_log_all_parents fs/btrfs/tree-log.c:6833 [inline] btrfs_log_inode_parent+0x22ba/0x2a90 fs/btrfs/tree-log.c:7141 btrfs_log_dentry_safe+0x59/0x80 fs/btrfs/tree-log.c:7180 btrfs_sync_file+0x9c1/0xe10 fs/btrfs/file.c:1959 vfs_fsync_range+0x141/0x230 fs/sync.c:188 generic_write_sync include/linux/fs.h:2794 [inline] btrfs_do_write_iter+0x584/0x10c0 fs/btrfs/file.c:1705 do_iter_readv_writev+0x504/0x780 fs/read_write.c:741 vfs_writev+0x36f/0xde0 fs/read_write.c:971 do_pwritev+0x1b2/0x260 fs/read_write.c:1072 __do_compat_sys_pwritev2 fs/read_write.c:1218 [inline] __se_compat_sys_pwritev2 fs/read_write.c:1210 [inline] __ia32_compat_sys_pwritev2+0x121/0x1b0 fs/read_write.c:1210 do_syscall_32_irqs_on arch/x86/entry/common.c:165 [inline] __do_fast_syscall_32+0x73/0x120 arch/x86/entry/common.c:386 do_fast_syscall_32+0x32/0x80 arch/x86/entry/common.c:411 entry_SYSENTER_compat_after_hwframe+0x84/0x8e RIP: 0023:0xf7334579 Code: b8 01 10 06 03 (...) RSP: 002b:00000000f5f265ac EFLAGS: 00000292 ORIG_RAX: 000000000000017b RAX: ffffffffffffffda RBX: 0000000000000004 RCX: 00000000200002c0 RDX: 0000000000000001 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000292 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 Fix this by ensuring we are under a NOFS scope whenever we call btrfs_iget() during inode logging and log replay. Reported-by: syzbot+8576cfa84070dce4d59b@syzkaller.appspotmail.com Link: https://lore.kernel.org/linux-btrfs/000000000000274a3a061abbd928@google.com/ Fixes: 712e36c5f2a7 ("btrfs: use GFP_KERNEL in btrfs_alloc_inode") Reviewed-by: Johannes Thumshirn Reviewed-by: Josef Bacik Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 43 ++++++++++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 26a2e5aa08e9..0bce1d45e252 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -138,6 +138,25 @@ static void wait_log_commit(struct btrfs_root *root, int transid); * and once to do all the other items. */ +static struct inode *btrfs_iget_logging(u64 objectid, struct btrfs_root *root) +{ + unsigned int nofs_flag; + struct inode *inode; + + /* + * We're holding a transaction handle whether we are logging or + * replaying a log tree, so we must make sure NOFS semantics apply + * because btrfs_alloc_inode() may be triggered and it uses GFP_KERNEL + * to allocate an inode, which can recurse back into the filesystem and + * attempt a transaction commit, resulting in a deadlock. + */ + nofs_flag = memalloc_nofs_save(); + inode = btrfs_iget(root->fs_info->sb, objectid, root); + memalloc_nofs_restore(nofs_flag); + + return inode; +} + /* * start a sub transaction and setup the log tree * this increments the log tree writer count to make the people @@ -600,7 +619,7 @@ static noinline struct inode *read_one_inode(struct btrfs_root *root, { struct inode *inode; - inode = btrfs_iget(root->fs_info->sb, objectid, root); + inode = btrfs_iget_logging(objectid, root); if (IS_ERR(inode)) inode = NULL; return inode; @@ -5438,7 +5457,6 @@ static int log_new_dir_dentries(struct btrfs_trans_handle *trans, struct btrfs_log_ctx *ctx) { struct btrfs_root *root = start_inode->root; - struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_path *path; LIST_HEAD(dir_list); struct btrfs_dir_list *dir_elem; @@ -5499,7 +5517,7 @@ again: continue; btrfs_release_path(path); - di_inode = btrfs_iget(fs_info->sb, di_key.objectid, root); + di_inode = btrfs_iget_logging(di_key.objectid, root); if (IS_ERR(di_inode)) { ret = PTR_ERR(di_inode); goto out; @@ -5559,7 +5577,7 @@ again: btrfs_add_delayed_iput(curr_inode); curr_inode = NULL; - vfs_inode = btrfs_iget(fs_info->sb, ino, root); + vfs_inode = btrfs_iget_logging(ino, root); if (IS_ERR(vfs_inode)) { ret = PTR_ERR(vfs_inode); break; @@ -5654,7 +5672,7 @@ static int add_conflicting_inode(struct btrfs_trans_handle *trans, if (ctx->num_conflict_inodes >= MAX_CONFLICT_INODES) return BTRFS_LOG_FORCE_COMMIT; - inode = btrfs_iget(root->fs_info->sb, ino, root); + inode = btrfs_iget_logging(ino, root); /* * If the other inode that had a conflicting dir entry was deleted in * the current transaction then we either: @@ -5755,7 +5773,6 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_log_ctx *ctx) { - struct btrfs_fs_info *fs_info = root->fs_info; int ret = 0; /* @@ -5786,7 +5803,7 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans, list_del(&curr->list); kfree(curr); - inode = btrfs_iget(fs_info->sb, ino, root); + inode = btrfs_iget_logging(ino, root); /* * If the other inode that had a conflicting dir entry was * deleted in the current transaction, we need to log its parent @@ -5797,7 +5814,7 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans, if (ret != -ENOENT) break; - inode = btrfs_iget(fs_info->sb, parent, root); + inode = btrfs_iget_logging(parent, root); if (IS_ERR(inode)) { ret = PTR_ERR(inode); break; @@ -6319,7 +6336,6 @@ static int log_new_delayed_dentries(struct btrfs_trans_handle *trans, struct btrfs_log_ctx *ctx) { const bool orig_log_new_dentries = ctx->log_new_dentries; - struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_delayed_item *item; int ret = 0; @@ -6345,7 +6361,7 @@ static int log_new_delayed_dentries(struct btrfs_trans_handle *trans, if (key.type == BTRFS_ROOT_ITEM_KEY) continue; - di_inode = btrfs_iget(fs_info->sb, key.objectid, inode->root); + di_inode = btrfs_iget_logging(key.objectid, inode->root); if (IS_ERR(di_inode)) { ret = PTR_ERR(di_inode); break; @@ -6729,7 +6745,6 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans, struct btrfs_inode *inode, struct btrfs_log_ctx *ctx) { - struct btrfs_fs_info *fs_info = trans->fs_info; int ret; struct btrfs_path *path; struct btrfs_key key; @@ -6794,8 +6809,7 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans, cur_offset = item_size; } - dir_inode = btrfs_iget(fs_info->sb, inode_key.objectid, - root); + dir_inode = btrfs_iget_logging(inode_key.objectid, root); /* * If the parent inode was deleted, return an error to * fallback to a transaction commit. This is to prevent @@ -6857,7 +6871,6 @@ static int log_new_ancestors(struct btrfs_trans_handle *trans, btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); while (true) { - struct btrfs_fs_info *fs_info = root->fs_info; struct extent_buffer *leaf; int slot; struct btrfs_key search_key; @@ -6872,7 +6885,7 @@ static int log_new_ancestors(struct btrfs_trans_handle *trans, search_key.objectid = found_key.offset; search_key.type = BTRFS_INODE_ITEM_KEY; search_key.offset = 0; - inode = btrfs_iget(fs_info->sb, ino, root); + inode = btrfs_iget_logging(ino, root); if (IS_ERR(inode)) return PTR_ERR(inode); From b9fd2affe4aa99a4ca14ee87e1f38fea22ece52a Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Tue, 11 Jun 2024 17:17:30 +0900 Subject: [PATCH 649/778] btrfs: zoned: fix initial free space detection When creating a new block group, it calls btrfs_add_new_free_space() to add the entire block group range into the free space accounting. __btrfs_add_free_space_zoned() checks if size == block_group->length to detect the initial free space adding, and proceed that case properly. However, if the zone_capacity == zone_size and the over-write speed is fast enough, the entire zone can be over-written within one transaction. That confuses __btrfs_add_free_space_zoned() to handle it as an initial free space accounting. As a result, that block group becomes a strange state: 0 used bytes, 0 zone_unusable bytes, but alloc_offset == zone_capacity (no allocation anymore). The initial free space accounting can properly be checked by checking alloc_offset too. Fixes: 98173255bddd ("btrfs: zoned: calculate free space from zone capacity") CC: stable@vger.kernel.org # 6.1+ Reviewed-by: Johannes Thumshirn Signed-off-by: Naohiro Aota Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/free-space-cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index c8a05d5eb9cb..b642df5e5255 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -2697,7 +2697,7 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group, u64 offset = bytenr - block_group->start; u64 to_free, to_unusable; int bg_reclaim_threshold = 0; - bool initial = (size == block_group->length); + bool initial = ((size == block_group->length) && (block_group->alloc_offset == 0)); u64 reclaimable_unusable; WARN_ON(!initial && offset + size > block_group->zone_capacity); From 2c49908634a2b97b1c3abe0589be2739ac5e7fd5 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 17 Jun 2024 15:18:44 +0930 Subject: [PATCH 650/778] btrfs: scrub: handle RST lookup error correctly [BUG] When running btrfs/060 with forced RST feature, it would crash the following ASSERT() inside scrub_read_endio(): ASSERT(sector_nr < stripe->nr_sectors); Before that, we would have tree dump from btrfs_get_raid_extent_offset(), as we failed to find the RST entry for the range. [CAUSE] Inside scrub_submit_extent_sector_read() every time we allocated a new bbio we immediately called btrfs_map_block() to make sure there was some RST range covering the scrub target. But if btrfs_map_block() fails, we immediately call endio for the bbio, while the bbio is newly allocated, it's completely empty. Then inside scrub_read_endio(), we go through the bvecs to find the sector number (as bi_sector is no longer reliable if the bio is submitted to lower layers). And since the bio is empty, such bvecs iteration would not find any sector matching the sector, and return sector_nr == stripe->nr_sectors, triggering the ASSERT(). [FIX] Instead of calling btrfs_map_block() after allocating a new bbio, call btrfs_map_block() first. Since our only objective of calling btrfs_map_block() is only to update stripe_len, there is really no need to do that after btrfs_alloc_bio(). This new timing would avoid the problem of handling empty bbio completely, and in fact fixes a possible race window for the old code, where if the submission thread is the only owner of the pending_io, the scrub would never finish (since we didn't decrease the pending_io counter). Although the root cause of RST lookup failure still needs to be addressed. Reviewed-by: Johannes Thumshirn Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/scrub.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index afd6932f5e89..d7caa3732f07 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -1688,20 +1688,24 @@ static void scrub_submit_extent_sector_read(struct scrub_ctx *sctx, (i << fs_info->sectorsize_bits); int err; + io_stripe.is_scrub = true; + stripe_len = (nr_sectors - i) << fs_info->sectorsize_bits; + /* + * For RST cases, we need to manually split the bbio to + * follow the RST boundary. + */ + err = btrfs_map_block(fs_info, BTRFS_MAP_READ, logical, + &stripe_len, &bioc, &io_stripe, &mirror); + btrfs_put_bioc(bioc); + if (err < 0) { + set_bit(i, &stripe->io_error_bitmap); + set_bit(i, &stripe->error_bitmap); + continue; + } + bbio = btrfs_bio_alloc(stripe->nr_sectors, REQ_OP_READ, fs_info, scrub_read_endio, stripe); bbio->bio.bi_iter.bi_sector = logical >> SECTOR_SHIFT; - - io_stripe.is_scrub = true; - err = btrfs_map_block(fs_info, BTRFS_MAP_READ, logical, - &stripe_len, &bioc, &io_stripe, - &mirror); - btrfs_put_bioc(bioc); - if (err) { - btrfs_bio_end_io(bbio, - errno_to_blk_status(err)); - return; - } } __bio_add_page(&bbio->bio, page, fs_info->sectorsize, pgoff); From a7e4c6a3031c74078dba7fa36239d0f4fe476c53 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 20 Jun 2024 12:32:00 +0100 Subject: [PATCH 651/778] btrfs: qgroup: fix quota root leak after quota disable failure If during the quota disable we fail when cleaning the quota tree or when deleting the root from the root tree, we jump to the 'out' label without ever dropping the reference on the quota root, resulting in a leak of the root since fs_info->quota_root is no longer pointing to the root (we have set it to NULL just before those steps). Fix this by always doing a btrfs_put_root() call under the 'out' label. This is a problem that exists since qgroups were first added in 2012 by commit bed92eae26cc ("Btrfs: qgroup implementation and prototypes"), but back then we missed a kfree on the quota root and free_extent_buffer() calls on its root and commit root nodes, since back then roots were not yet reference counted. Reviewed-by: Boris Burkov Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/qgroup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index fc2a7ea26354..bf0f81d59b6b 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1351,7 +1351,7 @@ static int flush_reservations(struct btrfs_fs_info *fs_info) int btrfs_quota_disable(struct btrfs_fs_info *fs_info) { - struct btrfs_root *quota_root; + struct btrfs_root *quota_root = NULL; struct btrfs_trans_handle *trans = NULL; int ret = 0; @@ -1449,9 +1449,9 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info) btrfs_free_tree_block(trans, btrfs_root_id(quota_root), quota_root->node, 0, 1); - btrfs_put_root(quota_root); out: + btrfs_put_root(quota_root); mutex_unlock(&fs_info->qgroup_ioctl_lock); if (ret && trans) btrfs_end_transaction(trans); From 26b97668e5339434e5df8ddc7b1898a37a350112 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 24 Jun 2024 13:47:22 -0600 Subject: [PATCH 652/778] io_uring: remove dead struct io_submit_state member When the intermediate CQE aux cache got removed, any usage of the this member went away. As it isn't used anymore, kill it. Fixes: 902ce82c2aa1 ("io_uring: get rid of intermediate aux cqe caches") Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index b48570eaa449..7abdc0927124 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -207,7 +207,6 @@ struct io_submit_state { bool need_plug; bool cq_flush; unsigned short submit_nr; - unsigned int cqes_count; struct blk_plug plug; }; From dbcabac138fdfc730ba458ed2199ff1f29a271fc Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 24 Jun 2024 19:07:18 -0600 Subject: [PATCH 653/778] io_uring: signal SQPOLL task_work with TWA_SIGNAL_NO_IPI Before SQPOLL was transitioned to managing its own task_work, the core used TWA_SIGNAL_NO_IPI to ensure that task_work was processed. If not, we can't be sure that all task_work is processed at SQPOLL thread exit time. Fixes: af5d68f8892f ("io_uring/sqpoll: manage task_work privately") Cc: stable@vger.kernel.org Signed-off-by: Jens Axboe --- io_uring/io_uring.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 154b25b8a613..c326e2127dd4 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -1259,8 +1259,8 @@ static void io_req_normal_work_add(struct io_kiocb *req) if (ctx->flags & IORING_SETUP_SQPOLL) { struct io_sq_data *sqd = ctx->sq_data; - if (wq_has_sleeper(&sqd->wait)) - wake_up(&sqd->wait); + if (sqd->thread) + __set_notify_signal(sqd->thread); return; } From 8c61291fd8500e3b35c7ec0c781b273d8cc96cde Mon Sep 17 00:00:00 2001 From: Zhaoyang Huang Date: Fri, 7 Jun 2024 10:31:16 +0800 Subject: [PATCH 654/778] mm: fix incorrect vbq reference in purge_fragmented_block MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit xa_for_each() in _vm_unmap_aliases() loops through all vbs. However, since commit 062eacf57ad9 ("mm: vmalloc: remove a global vmap_blocks xarray") the vb from xarray may not be on the corresponding CPU vmap_block_queue. Consequently, purge_fragmented_block() might use the wrong vbq->lock to protect the free list, leading to vbq->free breakage. Incorrect lock protection can exhaust all vmalloc space as follows: CPU0 CPU1 +--------------------------------------------+ | +--------------------+ +-----+ | +--> | |---->| |------+ | CPU1:vbq free_list | | vb1 | +--- | |<----| |<-----+ | +--------------------+ +-----+ | +--------------------------------------------+ _vm_unmap_aliases() vb_alloc() new_vmap_block() xa_for_each(&vbq->vmap_blocks, idx, vb) --> vb in CPU1:vbq->freelist purge_fragmented_block(vb) spin_lock(&vbq->lock) spin_lock(&vbq->lock) --> use CPU0:vbq->lock --> use CPU1:vbq->lock list_del_rcu(&vb->free_list) list_add_tail_rcu(&vb->free_list, &vbq->free) __list_del(vb->prev, vb->next) next->prev = prev +--------------------+ | | | CPU1:vbq free_list | +---| |<--+ | +--------------------+ | +----------------------------+ __list_add(new, head->prev, head) +--------------------------------------------+ | +--------------------+ +-----+ | +--> | |---->| |------+ | CPU1:vbq free_list | | vb2 | +--- | |<----| |<-----+ | +--------------------+ +-----+ | +--------------------------------------------+ prev->next = next +--------------------------------------------+ |----------------------------+ | | +--------------------+ | +-----+ | +--> | |--+ | |------+ | CPU1:vbq free_list | | vb2 | +--- | |<----| |<-----+ | +--------------------+ +-----+ | +--------------------------------------------+ Here’s a list breakdown. All vbs, which were to be added to ‘prev’, cannot be used by list_for_each_entry_rcu(vb, &vbq->free, free_list) in vb_alloc(). Thus, vmalloc space is exhausted. This issue affects both erofs and f2fs, the stacktrace is as follows: erofs: [] __switch_to+0x174 [] __schedule+0x624 [] schedule+0x7c [] schedule_preempt_disabled+0x24 [] __mutex_lock+0x374 [] __mutex_lock_slowpath+0x14 [] mutex_lock+0x24 [] reclaim_and_purge_vmap_areas+0x44 [] alloc_vmap_area+0x2e0 [] vm_map_ram+0x1b0 [] z_erofs_lz4_decompress+0x278 [] z_erofs_decompress_queue+0x650 [] z_erofs_runqueue+0x7f4 [] z_erofs_read_folio+0x104 [] filemap_read_folio+0x6c [] filemap_fault+0x300 [] __do_fault+0xc8 [] handle_mm_fault+0xb38 [] do_page_fault+0x288 [] do_translation_fault[jt]+0x40 [] do_mem_abort+0x58 [] el0_ia+0x70 [] el0t_64_sync_handler[jt]+0xb0 [] ret_to_user[jt]+0x0 f2fs: [] __switch_to+0x174 [] __schedule+0x624 [] schedule+0x7c [] schedule_preempt_disabled+0x24 [] __mutex_lock+0x374 [] __mutex_lock_slowpath+0x14 [] mutex_lock+0x24 [] reclaim_and_purge_vmap_areas+0x44 [] alloc_vmap_area+0x2e0 [] vm_map_ram+0x1b0 [] f2fs_prepare_decomp_mem+0x144 [] f2fs_alloc_dic+0x264 [] f2fs_read_multi_pages+0x428 [] f2fs_mpage_readpages+0x314 [] f2fs_readahead+0x50 [] read_pages+0x80 [] page_cache_ra_unbounded+0x1a0 [] page_cache_ra_order+0x274 [] do_sync_mmap_readahead+0x11c [] filemap_fault+0x1a0 [] f2fs_filemap_fault+0x28 [] __do_fault+0xc8 [] handle_mm_fault+0xb38 [] do_page_fault+0x288 [] do_translation_fault[jt]+0x40 [] do_mem_abort+0x58 [] el0_ia+0x70 [] el0t_64_sync_handler[jt]+0xb0 [] ret_to_user[jt]+0x0 To fix this, introducee cpu within vmap_block to record which this vb belongs to. Link: https://lkml.kernel.org/r/20240614021352.1822225-1-zhaoyang.huang@unisoc.com Link: https://lkml.kernel.org/r/20240607023116.1720640-1-zhaoyang.huang@unisoc.com Fixes: fc1e0d980037 ("mm/vmalloc: prevent stale TLBs in fully utilized blocks") Signed-off-by: Zhaoyang Huang Suggested-by: Hailong.Liu Reviewed-by: Uladzislau Rezki (Sony) Cc: Baoquan He Cc: Christoph Hellwig Cc: Lorenzo Stoakes Cc: Thomas Gleixner Cc: Signed-off-by: Andrew Morton --- mm/vmalloc.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 45e1506d58c3..d0cbdd7c1e5b 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2498,6 +2498,7 @@ struct vmap_block { struct list_head free_list; struct rcu_head rcu_head; struct list_head purge; + unsigned int cpu; }; /* Queue of free and dirty vmap blocks, for allocation and flushing purposes */ @@ -2625,8 +2626,15 @@ static void *new_vmap_block(unsigned int order, gfp_t gfp_mask) free_vmap_area(va); return ERR_PTR(err); } - - vbq = raw_cpu_ptr(&vmap_block_queue); + /* + * list_add_tail_rcu could happened in another core + * rather than vb->cpu due to task migration, which + * is safe as list_add_tail_rcu will ensure the list's + * integrity together with list_for_each_rcu from read + * side. + */ + vb->cpu = raw_smp_processor_id(); + vbq = per_cpu_ptr(&vmap_block_queue, vb->cpu); spin_lock(&vbq->lock); list_add_tail_rcu(&vb->free_list, &vbq->free); spin_unlock(&vbq->lock); @@ -2654,9 +2662,10 @@ static void free_vmap_block(struct vmap_block *vb) } static bool purge_fragmented_block(struct vmap_block *vb, - struct vmap_block_queue *vbq, struct list_head *purge_list, - bool force_purge) + struct list_head *purge_list, bool force_purge) { + struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, vb->cpu); + if (vb->free + vb->dirty != VMAP_BBMAP_BITS || vb->dirty == VMAP_BBMAP_BITS) return false; @@ -2704,7 +2713,7 @@ static void purge_fragmented_blocks(int cpu) continue; spin_lock(&vb->lock); - purge_fragmented_block(vb, vbq, &purge, true); + purge_fragmented_block(vb, &purge, true); spin_unlock(&vb->lock); } rcu_read_unlock(); @@ -2841,7 +2850,7 @@ static void _vm_unmap_aliases(unsigned long start, unsigned long end, int flush) * not purgeable, check whether there is dirty * space to be flushed. */ - if (!purge_fragmented_block(vb, vbq, &purge_list, false) && + if (!purge_fragmented_block(vb, &purge_list, false) && vb->dirty_max && vb->dirty != VMAP_BBMAP_BITS) { unsigned long va_start = vb->va->va_start; unsigned long s, e; From 399ab86ea55039f9d0a5f621a68cb4631f796f37 Mon Sep 17 00:00:00 2001 From: Jeff Xu Date: Fri, 14 Jun 2024 23:20:14 +0000 Subject: [PATCH 655/778] /proc/pid/smaps: add mseal info for vma MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add sl in /proc/pid/smaps to indicate vma is sealed Link: https://lkml.kernel.org/r/20240614232014.806352-2-jeffxu@google.com Fixes: 8be7258aad44 ("mseal: add mseal syscall") Signed-off-by: Jeff Xu Acked-by: David Hildenbrand Cc: Adhemerval Zanella Cc: Jann Horn Cc: Jorge Lucangeli Obes Cc: Kees Cook Cc: Randy Dunlap Cc: Stephen Röttger Signed-off-by: Andrew Morton --- Documentation/filesystems/proc.rst | 1 + fs/proc/task_mmu.c | 3 +++ include/linux/mm.h | 5 +++++ mm/internal.h | 5 ----- 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst index 7c3a565ffbef..82d142de3461 100644 --- a/Documentation/filesystems/proc.rst +++ b/Documentation/filesystems/proc.rst @@ -571,6 +571,7 @@ encoded manner. The codes are the following: um userfaultfd missing tracking uw userfaultfd wr-protect tracking ss shadow stack page + sl sealed == ======================================= Note that there is no guarantee that every flag and associated mnemonic will diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index f8d35f993fe5..71e5039d940d 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -706,6 +706,9 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) #endif /* CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */ #ifdef CONFIG_X86_USER_SHADOW_STACK [ilog2(VM_SHADOW_STACK)] = "ss", +#endif +#ifdef CONFIG_64BIT + [ilog2(VM_SEALED)] = "sl", #endif }; size_t i; diff --git a/include/linux/mm.h b/include/linux/mm.h index 9a5652c5fadd..eb7c96d24ac0 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -406,6 +406,11 @@ extern unsigned int kobjsize(const void *objp); #define VM_ALLOW_ANY_UNCACHED VM_NONE #endif +#ifdef CONFIG_64BIT +/* VM is sealed, in vm_flags */ +#define VM_SEALED _BITUL(63) +#endif + /* Bits set in the VMA until the stack is in its final location */ #define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ | VM_SEQ_READ | VM_STACK_EARLY) diff --git a/mm/internal.h b/mm/internal.h index c72c306761a4..6902b7dd8509 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -1434,11 +1434,6 @@ void __meminit __init_single_page(struct page *page, unsigned long pfn, unsigned long shrink_slab(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg, int priority); -#ifdef CONFIG_64BIT -/* VM is sealed, in vm_flags */ -#define VM_SEALED _BITUL(63) -#endif - #ifdef CONFIG_64BIT static inline int can_do_mseal(unsigned long flags) { From b4601d096aac8ed26afa88ef8b249975b0530ca1 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Fri, 14 Jun 2024 15:59:51 -0700 Subject: [PATCH 656/778] mm/slab: fix 'variable obj_exts set but not used' warning slab_post_alloc_hook() uses prepare_slab_obj_exts_hook() to obtain slabobj_ext object. Currently the only user of slabobj_ext object in this path is memory allocation profiling, therefore when it's not enabled this object is not needed. This also generates a warning when compiling with CONFIG_MEM_ALLOC_PROFILING=n. Move the code under this configuration to fix the warning. If more slabobj_ext users appear in the future, the code will have to be changed back to call prepare_slab_obj_exts_hook(). Link: https://lkml.kernel.org/r/20240614225951.3845577-1-surenb@google.com Fixes: 4b8736964640 ("mm/slab: add allocation accounting into slab allocation and free paths") Signed-off-by: Suren Baghdasaryan Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202406150444.F6neSaiy-lkp@intel.com/ Cc: Kent Overstreet Cc: Kees Cook Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- mm/slub.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 1373ac365a46..4927edec6a8c 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3902,7 +3902,6 @@ bool slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru, unsigned int orig_size) { unsigned int zero_size = s->object_size; - struct slabobj_ext *obj_exts; bool kasan_init = init; size_t i; gfp_t init_flags = flags & gfp_allowed_mask; @@ -3945,9 +3944,11 @@ bool slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru, kmemleak_alloc_recursive(p[i], s->object_size, 1, s->flags, init_flags); kmsan_slab_alloc(s, p[i], init_flags); - if (need_slab_obj_ext()) { - obj_exts = prepare_slab_obj_exts_hook(s, flags, p[i]); #ifdef CONFIG_MEM_ALLOC_PROFILING + if (need_slab_obj_ext()) { + struct slabobj_ext *obj_exts; + + obj_exts = prepare_slab_obj_exts_hook(s, flags, p[i]); /* * Currently obj_exts is used only for allocation profiling. * If other users appear then mem_alloc_profiling_enabled() @@ -3955,8 +3956,8 @@ bool slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru, */ if (likely(obj_exts)) alloc_tag_add(&obj_exts->ref, current->alloc_tag, s->size); -#endif } +#endif } return memcg_slab_post_alloc_hook(s, lru, flags, size, p); From 34a023dc88696afed9ade7825f11f87ba657b133 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Fri, 14 Jun 2024 16:05:04 -0700 Subject: [PATCH 657/778] mm: handle profiling for fake memory allocations during compaction During compaction isolated free pages are marked allocated so that they can be split and/or freed. For that, post_alloc_hook() is used inside split_map_pages() and release_free_list(). split_map_pages() marks free pages allocated, splits the pages and then lets alloc_contig_range_noprof() free those pages. release_free_list() marks free pages and immediately frees them. This usage of post_alloc_hook() affect memory allocation profiling because these functions might not be called from an instrumented allocator, therefore current->alloc_tag is NULL and when debugging is enabled (CONFIG_MEM_ALLOC_PROFILING_DEBUG=y) that causes warnings. To avoid that, wrap such post_alloc_hook() calls into an instrumented function which acts as an allocator which will be charged for these fake allocations. Note that these allocations are very short lived until they are freed, therefore the associated counters should usually read 0. Link: https://lkml.kernel.org/r/20240614230504.3849136-1-surenb@google.com Signed-off-by: Suren Baghdasaryan Acked-by: Vlastimil Babka Cc: Kees Cook Cc: Kent Overstreet Cc: Pasha Tatashin Cc: Sourav Panda Signed-off-by: Andrew Morton --- mm/compaction.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/mm/compaction.c b/mm/compaction.c index e731d45befc7..739b1bf3d637 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -79,6 +79,13 @@ static inline bool is_via_compact_memory(int order) { return false; } #define COMPACTION_HPAGE_ORDER (PMD_SHIFT - PAGE_SHIFT) #endif +static struct page *mark_allocated_noprof(struct page *page, unsigned int order, gfp_t gfp_flags) +{ + post_alloc_hook(page, order, __GFP_MOVABLE); + return page; +} +#define mark_allocated(...) alloc_hooks(mark_allocated_noprof(__VA_ARGS__)) + static void split_map_pages(struct list_head *freepages) { unsigned int i, order; @@ -93,7 +100,7 @@ static void split_map_pages(struct list_head *freepages) nr_pages = 1 << order; - post_alloc_hook(page, order, __GFP_MOVABLE); + mark_allocated(page, order, __GFP_MOVABLE); if (order) split_page(page, order); @@ -122,7 +129,7 @@ static unsigned long release_free_list(struct list_head *freepages) * Convert free pages into post allocation pages, so * that we can free them via __free_page. */ - post_alloc_hook(page, order, __GFP_MOVABLE); + mark_allocated(page, order, __GFP_MOVABLE); __free_pages(page, order); if (pfn > high_pfn) high_pfn = pfn; From 1c61990d3762a020817daa353da0a0af6794140b Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Fri, 14 Jun 2024 16:32:38 +0200 Subject: [PATCH 658/778] kasan: fix bad call to unpoison_slab_object Commit 29d7355a9d05 ("kasan: save alloc stack traces for mempool") messed up one of the calls to unpoison_slab_object: the last two arguments are supposed to be GFP flags and whether to init the object memory. Fix the call. Without this fix, __kasan_mempool_unpoison_object provides the object's size as GFP flags to unpoison_slab_object, which can cause LOCKDEP reports (and probably other issues). Link: https://lkml.kernel.org/r/20240614143238.60323-1-andrey.konovalov@linux.dev Fixes: 29d7355a9d05 ("kasan: save alloc stack traces for mempool") Signed-off-by: Andrey Konovalov Reported-by: Brad Spengler Acked-by: Marco Elver Cc: Signed-off-by: Andrew Morton --- mm/kasan/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/kasan/common.c b/mm/kasan/common.c index e7c9a4dc89f8..85e7c6b4575c 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -532,7 +532,7 @@ void __kasan_mempool_unpoison_object(void *ptr, size_t size, unsigned long ip) return; /* Unpoison the object and save alloc info for non-kmalloc() allocations. */ - unpoison_slab_object(slab->slab_cache, ptr, size, flags); + unpoison_slab_object(slab->slab_cache, ptr, flags, false); /* Poison the redzone and save alloc info for kmalloc() allocations. */ if (is_kmalloc_cache(slab->slab_cache)) From be346c1a6eeb49d8fda827d2a9522124c2f72f36 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 14 Jun 2024 16:52:43 +0200 Subject: [PATCH 659/778] ocfs2: fix DIO failure due to insufficient transaction credits The code in ocfs2_dio_end_io_write() estimates number of necessary transaction credits using ocfs2_calc_extend_credits(). This however does not take into account that the IO could be arbitrarily large and can contain arbitrary number of extents. Extent tree manipulations do often extend the current transaction but not in all of the cases. For example if we have only single block extents in the tree, ocfs2_mark_extent_written() will end up calling ocfs2_replace_extent_rec() all the time and we will never extend the current transaction and eventually exhaust all the transaction credits if the IO contains many single block extents. Once that happens a WARN_ON(jbd2_handle_buffer_credits(handle) <= 0) is triggered in jbd2_journal_dirty_metadata() and subsequently OCFS2 aborts in response to this error. This was actually triggered by one of our customers on a heavily fragmented OCFS2 filesystem. To fix the issue make sure the transaction always has enough credits for one extent insert before each call of ocfs2_mark_extent_written(). Heming Zhao said: ------ PANIC: "Kernel panic - not syncing: OCFS2: (device dm-1): panic forced after error" PID: xxx TASK: xxxx CPU: 5 COMMAND: "SubmitThread-CA" #0 machine_kexec at ffffffff8c069932 #1 __crash_kexec at ffffffff8c1338fa #2 panic at ffffffff8c1d69b9 #3 ocfs2_handle_error at ffffffffc0c86c0c [ocfs2] #4 __ocfs2_abort at ffffffffc0c88387 [ocfs2] #5 ocfs2_journal_dirty at ffffffffc0c51e98 [ocfs2] #6 ocfs2_split_extent at ffffffffc0c27ea3 [ocfs2] #7 ocfs2_change_extent_flag at ffffffffc0c28053 [ocfs2] #8 ocfs2_mark_extent_written at ffffffffc0c28347 [ocfs2] #9 ocfs2_dio_end_io_write at ffffffffc0c2bef9 [ocfs2] #10 ocfs2_dio_end_io at ffffffffc0c2c0f5 [ocfs2] #11 dio_complete at ffffffff8c2b9fa7 #12 do_blockdev_direct_IO at ffffffff8c2bc09f #13 ocfs2_direct_IO at ffffffffc0c2b653 [ocfs2] #14 generic_file_direct_write at ffffffff8c1dcf14 #15 __generic_file_write_iter at ffffffff8c1dd07b #16 ocfs2_file_write_iter at ffffffffc0c49f1f [ocfs2] #17 aio_write at ffffffff8c2cc72e #18 kmem_cache_alloc at ffffffff8c248dde #19 do_io_submit at ffffffff8c2ccada #20 do_syscall_64 at ffffffff8c004984 #21 entry_SYSCALL_64_after_hwframe at ffffffff8c8000ba Link: https://lkml.kernel.org/r/20240617095543.6971-1-jack@suse.cz Link: https://lkml.kernel.org/r/20240614145243.8837-1-jack@suse.cz Fixes: c15471f79506 ("ocfs2: fix sparse file & data ordering issue in direct io") Signed-off-by: Jan Kara Reviewed-by: Joseph Qi Reviewed-by: Heming Zhao Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Cc: Signed-off-by: Andrew Morton --- fs/ocfs2/aops.c | 5 +++++ fs/ocfs2/journal.c | 17 +++++++++++++++++ fs/ocfs2/journal.h | 2 ++ fs/ocfs2/ocfs2_trace.h | 2 ++ 4 files changed, 26 insertions(+) diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index f0467d3b3c88..6be175a1ab3c 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -2366,6 +2366,11 @@ static int ocfs2_dio_end_io_write(struct inode *inode, } list_for_each_entry(ue, &dwc->dw_zero_list, ue_node) { + ret = ocfs2_assure_trans_credits(handle, credits); + if (ret < 0) { + mlog_errno(ret); + break; + } ret = ocfs2_mark_extent_written(inode, &et, handle, ue->ue_cpos, 1, ue->ue_phys, diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 86807086b2df..530fba34f6d3 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -445,6 +445,23 @@ bail: return status; } +/* + * Make sure handle has at least 'nblocks' credits available. If it does not + * have that many credits available, we will try to extend the handle to have + * enough credits. If that fails, we will restart transaction to have enough + * credits. Similar notes regarding data consistency and locking implications + * as for ocfs2_extend_trans() apply here. + */ +int ocfs2_assure_trans_credits(handle_t *handle, int nblocks) +{ + int old_nblks = jbd2_handle_buffer_credits(handle); + + trace_ocfs2_assure_trans_credits(old_nblks); + if (old_nblks >= nblocks) + return 0; + return ocfs2_extend_trans(handle, nblocks - old_nblks); +} + /* * If we have fewer than thresh credits, extend by OCFS2_MAX_TRANS_DATA. * If that fails, restart the transaction & regain write access for the diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index 41c9fe7e62f9..e3c3a35dc5e0 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h @@ -243,6 +243,8 @@ handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int ocfs2_commit_trans(struct ocfs2_super *osb, handle_t *handle); int ocfs2_extend_trans(handle_t *handle, int nblocks); +int ocfs2_assure_trans_credits(handle_t *handle, + int nblocks); int ocfs2_allocate_extend_trans(handle_t *handle, int thresh); diff --git a/fs/ocfs2/ocfs2_trace.h b/fs/ocfs2/ocfs2_trace.h index 60e208b01c8d..0511c69c9fde 100644 --- a/fs/ocfs2/ocfs2_trace.h +++ b/fs/ocfs2/ocfs2_trace.h @@ -2577,6 +2577,8 @@ DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_commit_cache_end); DEFINE_OCFS2_INT_INT_EVENT(ocfs2_extend_trans); +DEFINE_OCFS2_INT_EVENT(ocfs2_assure_trans_credits); + DEFINE_OCFS2_INT_EVENT(ocfs2_extend_trans_restart); DEFINE_OCFS2_INT_INT_EVENT(ocfs2_allocate_extend_trans); From ff202303c398ed56386ca4954154de9a96eb732a Mon Sep 17 00:00:00 2001 From: Stephen Brennan Date: Fri, 7 Jun 2024 13:29:53 -0700 Subject: [PATCH 660/778] mm: convert page type macros to enum Changing PG_slab from a page flag to a page type in commit 46df8e73a4a3 ("mm: free up PG_slab") in has the unintended consequence of removing the PG_slab constant from kernel debuginfo. The commit does add the value to the vmcoreinfo note, which allows debuggers to find the value without hardcoding it. However it's most flexible to continue representing the constant with an enum. To that end, convert the page type fields into an enum. Debuggers will now be able to detect that PG_slab's type has changed from enum pageflags to enum pagetype. Link: https://lkml.kernel.org/r/20240607202954.1198180-1-stephen.s.brennan@oracle.com Fixes: 46df8e73a4a3 ("mm: free up PG_slab") Signed-off-by: Stephen Brennan Acked-by: Vlastimil Babka Cc: David Hildenbrand Cc: Hao Ge Cc: Matthew Wilcox (Oracle) Cc: Omar Sandoval Cc: Vishal Moola (Oracle) Signed-off-by: Andrew Morton --- include/linux/page-flags.h | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 104078afe0b1..b9e914e1face 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -944,15 +944,18 @@ PAGEFLAG_FALSE(HasHWPoisoned, has_hwpoisoned) * mistaken for a page type value. */ -#define PAGE_TYPE_BASE 0xf0000000 -/* Reserve 0x0000007f to catch underflows of _mapcount */ -#define PAGE_MAPCOUNT_RESERVE -128 -#define PG_buddy 0x00000080 -#define PG_offline 0x00000100 -#define PG_table 0x00000200 -#define PG_guard 0x00000400 -#define PG_hugetlb 0x00000800 -#define PG_slab 0x00001000 +enum pagetype { + PG_buddy = 0x00000080, + PG_offline = 0x00000100, + PG_table = 0x00000200, + PG_guard = 0x00000400, + PG_hugetlb = 0x00000800, + PG_slab = 0x00001000, + + PAGE_TYPE_BASE = 0xf0000000, + /* Reserve 0x0000007f to catch underflows of _mapcount */ + PAGE_MAPCOUNT_RESERVE = -128, +}; #define PageType(page, flag) \ ((page->page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE) From 8b8546d298dc9ce9d5d01a06c822e255d2159ca7 Mon Sep 17 00:00:00 2001 From: aigourensheng Date: Mon, 17 Jun 2024 01:29:34 -0400 Subject: [PATCH 661/778] selftests/mm:fix test_prctl_fork_exec return failure After calling fork() in test_prctl_fork_exec(), the global variable ksm_full_scans_fd is initialized to 0 in the child process upon entering the main function of ./ksm_functional_tests. In the function call chain test_child_ksm() -> __mmap_and_merge_range -> ksm_merge-> ksm_get_full_scans, start_scans = ksm_get_full_scans() will return an error. Therefore, the value of ksm_full_scans_fd needs to be initialized before calling test_child_ksm in the child process. Link: https://lkml.kernel.org/r/20240617052934.5834-1-shechenglong001@gmail.com Signed-off-by: aigourensheng Acked-by: David Hildenbrand Cc: Shuah Khan Cc: Signed-off-by: Andrew Morton --- .../selftests/mm/ksm_functional_tests.c | 42 +++++++++++-------- 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/tools/testing/selftests/mm/ksm_functional_tests.c b/tools/testing/selftests/mm/ksm_functional_tests.c index 37de82da9be7..b61803e36d1c 100644 --- a/tools/testing/selftests/mm/ksm_functional_tests.c +++ b/tools/testing/selftests/mm/ksm_functional_tests.c @@ -656,24 +656,8 @@ unmap: munmap(map, size); } -int main(int argc, char **argv) +static void init_global_file_handles(void) { - unsigned int tests = 8; - int err; - - if (argc > 1 && !strcmp(argv[1], FORK_EXEC_CHILD_PRG_NAME)) { - exit(test_child_ksm()); - } - -#ifdef __NR_userfaultfd - tests++; -#endif - - ksft_print_header(); - ksft_set_plan(tests); - - pagesize = getpagesize(); - mem_fd = open("/proc/self/mem", O_RDWR); if (mem_fd < 0) ksft_exit_fail_msg("opening /proc/self/mem failed\n"); @@ -688,8 +672,30 @@ int main(int argc, char **argv) ksft_exit_skip("open(\"/proc/self/pagemap\") failed\n"); proc_self_ksm_stat_fd = open("/proc/self/ksm_stat", O_RDONLY); proc_self_ksm_merging_pages_fd = open("/proc/self/ksm_merging_pages", - O_RDONLY); + O_RDONLY); ksm_use_zero_pages_fd = open("/sys/kernel/mm/ksm/use_zero_pages", O_RDWR); +} + +int main(int argc, char **argv) +{ + unsigned int tests = 8; + int err; + + if (argc > 1 && !strcmp(argv[1], FORK_EXEC_CHILD_PRG_NAME)) { + init_global_file_handles(); + exit(test_child_ksm()); + } + +#ifdef __NR_userfaultfd + tests++; +#endif + + ksft_print_header(); + ksft_set_plan(tests); + + pagesize = getpagesize(); + + init_global_file_handles(); test_unmerge(); test_unmerge_zero_pages(); From f3228a2d4c3bf19e49bf933ca9a6e64555aafee3 Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Tue, 18 Jun 2024 16:35:56 +0300 Subject: [PATCH 662/778] MAINTAINERS: TPM DEVICE DRIVER: update the W-tag Git hosting for the test suite has been migrated from Gitlab to Codeberg, given the "less hostile environment". Link: https://lkml.kernel.org/r/20240618133556.105604-1-jarkko@kernel.org Link: https://codeberg.org/jarkko/linux-tpmdd-test Signed-off-by: Jarkko Sakkinen Signed-off-by: Andrew Morton --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 2ca8f35dfe03..43353b705988 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -22745,7 +22745,7 @@ M: Jarkko Sakkinen R: Jason Gunthorpe L: linux-integrity@vger.kernel.org S: Maintained -W: https://gitlab.com/jarkkojs/linux-tpmdd-test +W: https://codeberg.org/jarkko/linux-tpmdd-test Q: https://patchwork.kernel.org/project/linux-integrity/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/jarkko/linux-tpmdd.git F: Documentation/devicetree/bindings/tpm/ From c6408250703530187cc6250dcd702d12a71c44f5 Mon Sep 17 00:00:00 2001 From: Zi Yan Date: Tue, 18 Jun 2024 09:41:51 -0400 Subject: [PATCH 663/778] mm/migrate: make migrate_pages_batch() stats consistent As Ying pointed out in [1], stats->nr_thp_failed needs to be updated to avoid stats inconsistency between MIGRATE_SYNC and MIGRATE_ASYNC when calling migrate_pages_batch(). Because if not, when migrate_pages_batch() is called via migrate_pages(MIGRATE_ASYNC), nr_thp_failed will not be increased and when migrate_pages_batch() is called via migrate_pages(MIGRATE_SYNC*), nr_thp_failed will be increase in migrate_pages_sync() by stats->nr_thp_failed += astats.nr_thp_split. [1] https://lore.kernel.org/linux-mm/87msnq7key.fsf@yhuang6-desk2.ccr.corp.intel.com/ Link: https://lkml.kernel.org/r/20240620012712.19804-1-zi.yan@sent.com Link: https://lkml.kernel.org/r/20240618134151.29214-1-zi.yan@sent.com Fixes: 7262f208ca68 ("mm/migrate: split source folio if it is on deferred split list") Signed-off-by: Zi Yan Suggested-by: "Huang, Ying" Reviewed-by: "Huang, Ying" Cc: David Hildenbrand Cc: Hugh Dickins Cc: Matthew Wilcox (Oracle) Cc: Yang Shi Cc: Yin Fengwei Signed-off-by: Andrew Morton --- mm/migrate.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mm/migrate.c b/mm/migrate.c index 2cc5a68f6843..20cb9f5f7446 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1659,6 +1659,10 @@ static int migrate_pages_batch(struct list_head *from, * migrate_pages() may report success with (split but * unmigrated) pages still on its fromlist; whereas it * always reports success when its fromlist is empty. + * stats->nr_thp_failed should be increased too, + * otherwise stats inconsistency will happen when + * migrate_pages_batch is called via migrate_pages() + * with MIGRATE_SYNC and MIGRATE_ASYNC. * * Only check it without removing it from the list. * Since the folio can be on deferred_split_scan() @@ -1675,6 +1679,7 @@ static int migrate_pages_batch(struct list_head *from, !list_empty(&folio->_deferred_list)) { if (try_split_folio(folio, split_folios) == 0) { nr_failed++; + stats->nr_thp_failed += is_thp; stats->nr_thp_split += is_thp; stats->nr_split++; continue; From 54e7d59841dab977f6cb1183d658b1b82c9f4e94 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 18 Jun 2024 18:56:47 +1200 Subject: [PATCH 664/778] nfs: drop the incorrect assertion in nfs_swap_rw() Since commit 2282679fb20b ("mm: submit multipage write for SWP_FS_OPS swap-space"), we can plug multiple pages then unplug them all together. That means iov_iter_count(iter) could be way bigger than PAGE_SIZE, it actually equals the size of iov_iter_npages(iter, INT_MAX). Note this issue has nothing to do with large folios as we don't support THP_SWPOUT to non-block devices. [v-songbaohua@oppo.com: figure out the cause and correct the commit message] Link: https://lkml.kernel.org/r/20240618065647.21791-1-21cnbao@gmail.com Fixes: 2282679fb20b ("mm: submit multipage write for SWP_FS_OPS swap-space") Signed-off-by: Christoph Hellwig Signed-off-by: Barry Song Closes: https://lore.kernel.org/linux-mm/20240617053201.GA16852@lst.de/ Reviewed-by: Martin Wege Cc: NeilBrown Cc: Anna Schumaker Cc: Steve French Cc: Trond Myklebust Cc: Chuanhua Han Cc: Ryan Roberts Cc: Chris Li Cc: "Huang, Ying" Cc: Jeff Layton Cc: Matthew Wilcox Cc: Signed-off-by: Andrew Morton --- fs/nfs/direct.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index bb2f583eb28b..90079ca134dd 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -141,8 +141,6 @@ int nfs_swap_rw(struct kiocb *iocb, struct iov_iter *iter) { ssize_t ret; - VM_BUG_ON(iov_iter_count(iter) != PAGE_SIZE); - if (iov_iter_rw(iter) == READ) ret = nfs_file_direct_read(iocb, iter, true); else From bf14ed81f571f8dba31cd72ab2e50fbcc877cc31 Mon Sep 17 00:00:00 2001 From: yangge Date: Thu, 20 Jun 2024 08:59:50 +0800 Subject: [PATCH 665/778] mm/page_alloc: Separate THP PCP into movable and non-movable categories Since commit 5d0a661d808f ("mm/page_alloc: use only one PCP list for THP-sized allocations") no longer differentiates the migration type of pages in THP-sized PCP list, it's possible that non-movable allocation requests may get a CMA page from the list, in some cases, it's not acceptable. If a large number of CMA memory are configured in system (for example, the CMA memory accounts for 50% of the system memory), starting a virtual machine with device passthrough will get stuck. During starting the virtual machine, it will call pin_user_pages_remote(..., FOLL_LONGTERM, ...) to pin memory. Normally if a page is present and in CMA area, pin_user_pages_remote() will migrate the page from CMA area to non-CMA area because of FOLL_LONGTERM flag. But if non-movable allocation requests return CMA memory, migrate_longterm_unpinnable_pages() will migrate a CMA page to another CMA page, which will fail to pass the check in check_and_migrate_movable_pages() and cause migration endless. Call trace: pin_user_pages_remote --__gup_longterm_locked // endless loops in this function ----_get_user_pages_locked ----check_and_migrate_movable_pages ------migrate_longterm_unpinnable_pages --------alloc_migration_target This problem will also have a negative impact on CMA itself. For example, when CMA is borrowed by THP, and we need to reclaim it through cma_alloc() or dma_alloc_coherent(), we must move those pages out to ensure CMA's users can retrieve that contigous memory. Currently, CMA's memory is occupied by non-movable pages, meaning we can't relocate them. As a result, cma_alloc() is more likely to fail. To fix the problem above, we add one PCP list for THP, which will not introduce a new cacheline for struct per_cpu_pages. THP will have 2 PCP lists, one PCP list is used by MOVABLE allocation, and the other PCP list is used by UNMOVABLE allocation. MOVABLE allocation contains GPF_MOVABLE, and UNMOVABLE allocation contains GFP_UNMOVABLE and GFP_RECLAIMABLE. Link: https://lkml.kernel.org/r/1718845190-4456-1-git-send-email-yangge1116@126.com Fixes: 5d0a661d808f ("mm/page_alloc: use only one PCP list for THP-sized allocations") Signed-off-by: yangge Cc: Baolin Wang Cc: Barry Song <21cnbao@gmail.com> Cc: Mel Gorman Cc: Signed-off-by: Andrew Morton --- include/linux/mmzone.h | 9 ++++----- mm/page_alloc.c | 9 +++++++-- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 8f9c9590a42c..586a8f0104d7 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -654,13 +654,12 @@ enum zone_watermarks { }; /* - * One per migratetype for each PAGE_ALLOC_COSTLY_ORDER. One additional list - * for THP which will usually be GFP_MOVABLE. Even if it is another type, - * it should not contribute to serious fragmentation causing THP allocation - * failures. + * One per migratetype for each PAGE_ALLOC_COSTLY_ORDER. Two additional lists + * are added for THP. One PCP list is used by GPF_MOVABLE, and the other PCP list + * is used by GFP_UNMOVABLE and GFP_RECLAIMABLE. */ #ifdef CONFIG_TRANSPARENT_HUGEPAGE -#define NR_PCP_THP 1 +#define NR_PCP_THP 2 #else #define NR_PCP_THP 0 #endif diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 7300aa9f14b0..9ecf99190ea2 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -504,10 +504,15 @@ out: static inline unsigned int order_to_pindex(int migratetype, int order) { + bool __maybe_unused movable; + #ifdef CONFIG_TRANSPARENT_HUGEPAGE if (order > PAGE_ALLOC_COSTLY_ORDER) { VM_BUG_ON(order != HPAGE_PMD_ORDER); - return NR_LOWORDER_PCP_LISTS; + + movable = migratetype == MIGRATE_MOVABLE; + + return NR_LOWORDER_PCP_LISTS + movable; } #else VM_BUG_ON(order > PAGE_ALLOC_COSTLY_ORDER); @@ -521,7 +526,7 @@ static inline int pindex_to_order(unsigned int pindex) int order = pindex / MIGRATE_PCPTYPES; #ifdef CONFIG_TRANSPARENT_HUGEPAGE - if (pindex == NR_LOWORDER_PCP_LISTS) + if (pindex >= NR_LOWORDER_PCP_LISTS) order = HPAGE_PMD_ORDER; #else VM_BUG_ON(order > PAGE_ALLOC_COSTLY_ORDER); From ab1ffc86cb5bec1c92387b9811d9036512f8f4eb Mon Sep 17 00:00:00 2001 From: Andrew Bresticker Date: Tue, 11 Jun 2024 08:32:16 -0700 Subject: [PATCH 666/778] mm/memory: don't require head page for do_set_pmd() The requirement that the head page be passed to do_set_pmd() was added in commit ef37b2ea08ac ("mm/memory: page_add_file_rmap() -> folio_add_file_rmap_[pte|pmd]()") and prevents pmd-mapping in the finish_fault() and filemap_map_pages() paths if the page to be inserted is anything but the head page for an otherwise suitable vma and pmd-sized page. Matthew said: : We're going to stop using PMDs to map large folios unless the fault is : within the first 4KiB of the PMD. No idea how many workloads that : affects, but it only needs to be backported as far as v6.8, so we may : as well backport it. Link: https://lkml.kernel.org/r/20240611153216.2794513-1-abrestic@rivosinc.com Fixes: ef37b2ea08ac ("mm/memory: page_add_file_rmap() -> folio_add_file_rmap_[pte|pmd]()") Signed-off-by: Andrew Bresticker Acked-by: David Hildenbrand Acked-by: Hugh Dickins Cc: Signed-off-by: Andrew Morton --- mm/memory.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/memory.c b/mm/memory.c index 25a77c4fe4a0..d10e616d7389 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4608,8 +4608,9 @@ vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page) if (!thp_vma_suitable_order(vma, haddr, PMD_ORDER)) return ret; - if (page != &folio->page || folio_order(folio) != HPAGE_PMD_ORDER) + if (folio_order(folio) != HPAGE_PMD_ORDER) return ret; + page = &folio->page; /* * Just backoff if any subpage of a THP is corrupted otherwise From 7312b740b70e77ccdc0dce11316f05861907d3ca Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 25 Jun 2024 07:54:54 +0200 Subject: [PATCH 667/778] Revert "serial: core: Fix ifdef for serial base console functions" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit b20172ca6bf489534892b801a5db41bbf5ceec75. Let's roll back all of the serial core and printk console changes that went into 6.10-rc1 as there still are problems with them that need to be sorted out. Link: https://lore.kernel.org/r/ZnpRozsdw6zbjqze@tlindgre-MOBL1 Reported-by: Petr Mladek Reported-by: Tony Lindgren Cc: Jiri Slaby Cc: John Ogness Cc: Sergey Senozhatsky Cc: Andy Shevchenko Cc: Ilpo Järvinen Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/serial_base_bus.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/serial_base_bus.c b/drivers/tty/serial/serial_base_bus.c index 73c6ee540c83..281a2d5a7332 100644 --- a/drivers/tty/serial/serial_base_bus.c +++ b/drivers/tty/serial/serial_base_bus.c @@ -219,6 +219,8 @@ static int serial_base_add_one_prefcon(const char *match, const char *dev_name, return ret; } +#endif + #ifdef __sparc__ /* Handle Sparc ttya and ttyb options as done in console_setup() */ @@ -317,8 +319,6 @@ int serial_base_add_preferred_console(struct uart_driver *drv, return serial_base_add_one_prefcon(port_match, drv->dev_name, port->line); } -#endif - #ifdef CONFIG_SERIAL_8250_CONSOLE /* From 3ddbc427df968f50e6048368fc01088d428491c7 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 25 Jun 2024 07:57:48 +0200 Subject: [PATCH 668/778] Revert "serial: 8250: Fix add preferred console for serial8250_isa_init_ports()" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 4547cd76f08a6f301f6ad563f5d0e4566924ec6b. Let's roll back all of the serial core and printk console changes that went into 6.10-rc1 as there still are problems with them that need to be sorted out. Link: https://lore.kernel.org/r/ZnpRozsdw6zbjqze@tlindgre-MOBL1 Reported-by: Petr Mladek Reported-by: Tony Lindgren Cc: Jiri Slaby Cc: John Ogness Cc: Sergey Senozhatsky Cc: Andy Shevchenko Cc: Ilpo Järvinen Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/serial_base.h | 10 ++-------- drivers/tty/serial/serial_base_bus.c | 11 +++++++++-- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/tty/serial/serial_base.h b/drivers/tty/serial/serial_base.h index 743a72ac34f3..40af7052b4b6 100644 --- a/drivers/tty/serial/serial_base.h +++ b/drivers/tty/serial/serial_base.h @@ -55,6 +55,8 @@ void serial_core_unregister_port(struct uart_driver *drv, struct uart_port *port int serial_base_add_preferred_console(struct uart_driver *drv, struct uart_port *port); +int serial_base_add_isa_preferred_console(const char *name, int idx); + #else static inline @@ -64,14 +66,6 @@ int serial_base_add_preferred_console(struct uart_driver *drv, return 0; } -#endif - -#ifdef CONFIG_SERIAL_8250_CONSOLE - -int serial_base_add_isa_preferred_console(const char *name, int idx); - -#else - static inline int serial_base_add_isa_preferred_console(const char *name, int idx) { diff --git a/drivers/tty/serial/serial_base_bus.c b/drivers/tty/serial/serial_base_bus.c index 281a2d5a7332..ae18871c0f36 100644 --- a/drivers/tty/serial/serial_base_bus.c +++ b/drivers/tty/serial/serial_base_bus.c @@ -219,8 +219,6 @@ static int serial_base_add_one_prefcon(const char *match, const char *dev_name, return ret; } -#endif - #ifdef __sparc__ /* Handle Sparc ttya and ttyb options as done in console_setup() */ @@ -331,6 +329,15 @@ int serial_base_add_isa_preferred_console(const char *name, int idx) return serial_base_add_prefcon(name, idx); } +#else + +int serial_base_add_isa_preferred_console(const char *name, int idx) +{ + return 0; +} + +#endif + #endif static int serial_base_init(void) From 12b7210ea83e7119a0041a54027948d65c5e6206 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 25 Jun 2024 07:57:51 +0200 Subject: [PATCH 669/778] Revert "Documentation: kernel-parameters: Add DEVNAME:0.0 format for serial ports" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 5c3a766e9f057ee7a54b5d7addff7fab02676fea. Let's roll back all of the serial core and printk console changes that went into 6.10-rc1 as there still are problems with them that need to be sorted out. Link: https://lore.kernel.org/r/ZnpRozsdw6zbjqze@tlindgre-MOBL1 Reported-by: Petr Mladek Reported-by: Tony Lindgren Cc: Jiri Slaby Cc: John Ogness Cc: Sergey Senozhatsky Cc: Andy Shevchenko Cc: Ilpo Järvinen Signed-off-by: Greg Kroah-Hartman --- .../admin-guide/kernel-parameters.txt | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index b600df82669d..4d4c640e51e7 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -788,25 +788,6 @@ Documentation/networking/netconsole.rst for an alternative. - :.[,options] - Use the specified serial port on the serial core bus. - The addressing uses DEVNAME of the physical serial port - device, followed by the serial core controller instance, - and the serial port instance. The options are the same - as documented for the ttyS addressing above. - - The mapping of the serial ports to the tty instances - can be viewed with: - - $ ls -d /sys/bus/serial-base/devices/*:*.*/tty/* - /sys/bus/serial-base/devices/00:04:0.0/tty/ttyS0 - - In the above example, the console can be addressed with - console=00:04:0.0. Note that a console addressed this - way will only get added when the related device driver - is ready. The use of an earlycon parameter in addition to - the console may be desired for console output early on. - uart[8250],io,[,options] uart[8250],mmio,[,options] uart[8250],mmio16,[,options] From 740a79675833db5e14c89113d94be769ab8369c0 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 25 Jun 2024 07:57:55 +0200 Subject: [PATCH 670/778] Revert "serial: 8250: Add preferred console in serial8250_isa_init_ports()" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit a8b04cfe7dad84e65df5996e14b435fd356fe62c. Let's roll back all of the serial core and printk console changes that went into 6.10-rc1 as there still are problems with them that need to be sorted out. Link: https://lore.kernel.org/r/ZnpRozsdw6zbjqze@tlindgre-MOBL1 Reported-by: Petr Mladek Reported-by: Tony Lindgren Cc: Jiri Slaby Cc: John Ogness Cc: Sergey Senozhatsky Cc: Andy Shevchenko Cc: Ilpo Järvinen Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_core.c | 5 ----- drivers/tty/serial/serial_base.h | 8 -------- drivers/tty/serial/serial_base_bus.c | 21 --------------------- 3 files changed, 34 deletions(-) diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c index ff15022369e4..b0adafc44747 100644 --- a/drivers/tty/serial/8250/8250_core.c +++ b/drivers/tty/serial/8250/8250_core.c @@ -15,7 +15,6 @@ */ #include -#include #include #include #include @@ -42,8 +41,6 @@ #include -#include "../serial_base.h" /* For serial_base_add_isa_preferred_console() */ - #include "8250.h" /* @@ -563,8 +560,6 @@ static void __init serial8250_isa_init_ports(void) port->irqflags |= irqflag; if (serial8250_isa_config != NULL) serial8250_isa_config(i, &up->port, &up->capabilities); - - serial_base_add_isa_preferred_console(serial8250_reg.dev_name, i); } } diff --git a/drivers/tty/serial/serial_base.h b/drivers/tty/serial/serial_base.h index 40af7052b4b6..68a86df984da 100644 --- a/drivers/tty/serial/serial_base.h +++ b/drivers/tty/serial/serial_base.h @@ -55,8 +55,6 @@ void serial_core_unregister_port(struct uart_driver *drv, struct uart_port *port int serial_base_add_preferred_console(struct uart_driver *drv, struct uart_port *port); -int serial_base_add_isa_preferred_console(const char *name, int idx); - #else static inline @@ -66,10 +64,4 @@ int serial_base_add_preferred_console(struct uart_driver *drv, return 0; } -static inline -int serial_base_add_isa_preferred_console(const char *name, int idx) -{ - return 0; -} - #endif diff --git a/drivers/tty/serial/serial_base_bus.c b/drivers/tty/serial/serial_base_bus.c index ae18871c0f36..1375be3315e6 100644 --- a/drivers/tty/serial/serial_base_bus.c +++ b/drivers/tty/serial/serial_base_bus.c @@ -317,27 +317,6 @@ int serial_base_add_preferred_console(struct uart_driver *drv, return serial_base_add_one_prefcon(port_match, drv->dev_name, port->line); } -#ifdef CONFIG_SERIAL_8250_CONSOLE - -/* - * Early ISA ports initialize the console before there is no struct device. - * This should be only called from serial8250_isa_init_preferred_console(), - * other callers are likely wrong and should rely on earlycon instead. - */ -int serial_base_add_isa_preferred_console(const char *name, int idx) -{ - return serial_base_add_prefcon(name, idx); -} - -#else - -int serial_base_add_isa_preferred_console(const char *name, int idx) -{ - return 0; -} - -#endif - #endif static int serial_base_init(void) From e406c56dd9e35cc0b456b592d6814b5d2d8791de Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 25 Jun 2024 07:57:58 +0200 Subject: [PATCH 671/778] Revert "serial: core: Handle serial console options" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit a0f32e2dd99867b164bfebcf36729c2a0d41b30b. Let's roll back all of the serial core and printk console changes that went into 6.10-rc1 as there still are problems with them that need to be sorted out. Link: https://lore.kernel.org/r/ZnpRozsdw6zbjqze@tlindgre-MOBL1 Reported-by: Petr Mladek Reported-by: Tony Lindgren Cc: Jiri Slaby Cc: John Ogness Cc: Sergey Senozhatsky Cc: Andy Shevchenko Cc: Ilpo Järvinen Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/serial_base_bus.c | 49 ---------------------------- 1 file changed, 49 deletions(-) diff --git a/drivers/tty/serial/serial_base_bus.c b/drivers/tty/serial/serial_base_bus.c index 1375be3315e6..4ef26a1b09b1 100644 --- a/drivers/tty/serial/serial_base_bus.c +++ b/drivers/tty/serial/serial_base_bus.c @@ -219,58 +219,9 @@ static int serial_base_add_one_prefcon(const char *match, const char *dev_name, return ret; } -#ifdef __sparc__ - -/* Handle Sparc ttya and ttyb options as done in console_setup() */ -static int serial_base_add_sparc_console(const char *dev_name, int idx) -{ - const char *name; - - switch (idx) { - case 0: - name = "ttya"; - break; - case 1: - name = "ttyb"; - break; - default: - return 0; - } - - return serial_base_add_one_prefcon(name, dev_name, idx); -} - -#else - -static inline int serial_base_add_sparc_console(const char *dev_name, int idx) -{ - return 0; -} - -#endif - static int serial_base_add_prefcon(const char *name, int idx) { const char *char_match __free(kfree) = NULL; - const char *nmbr_match __free(kfree) = NULL; - int ret; - - /* Handle ttyS specific options */ - if (strstarts(name, "ttyS")) { - /* No name, just a number */ - nmbr_match = kasprintf(GFP_KERNEL, "%i", idx); - if (!nmbr_match) - return -ENODEV; - - ret = serial_base_add_one_prefcon(nmbr_match, name, idx); - if (ret) - return ret; - - /* Sparc ttya and ttyb */ - ret = serial_base_add_sparc_console(name, idx); - if (ret) - return ret; - } /* Handle the traditional character device name style console=ttyS0 */ char_match = kasprintf(GFP_KERNEL, "%s%i", name, idx); From a5e4bb69ecddb4ad7895e21091f3657ad832e270 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 25 Jun 2024 07:58:00 +0200 Subject: [PATCH 672/778] Revert "serial: core: Add support for DEVNAME:0.0 style naming for kernel console" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 787a1cabac01c99846070fcf702e53befaf89f79. Let's roll back all of the serial core and printk console changes that went into 6.10-rc1 as there still are problems with them that need to be sorted out. Link: https://lore.kernel.org/r/ZnpRozsdw6zbjqze@tlindgre-MOBL1 Reported-by: Petr Mladek Reported-by: Tony Lindgren Cc: Jiri Slaby Cc: John Ogness Cc: Sergey Senozhatsky Cc: Andy Shevchenko Cc: Ilpo Järvinen Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/serial_base.h | 16 ------- drivers/tty/serial/serial_base_bus.c | 66 ---------------------------- drivers/tty/serial/serial_core.c | 4 -- 3 files changed, 86 deletions(-) diff --git a/drivers/tty/serial/serial_base.h b/drivers/tty/serial/serial_base.h index 68a86df984da..b6c38d2edfd4 100644 --- a/drivers/tty/serial/serial_base.h +++ b/drivers/tty/serial/serial_base.h @@ -49,19 +49,3 @@ void serial_ctrl_unregister_port(struct uart_driver *drv, struct uart_port *port int serial_core_register_port(struct uart_driver *drv, struct uart_port *port); void serial_core_unregister_port(struct uart_driver *drv, struct uart_port *port); - -#ifdef CONFIG_SERIAL_CORE_CONSOLE - -int serial_base_add_preferred_console(struct uart_driver *drv, - struct uart_port *port); - -#else - -static inline -int serial_base_add_preferred_console(struct uart_driver *drv, - struct uart_port *port) -{ - return 0; -} - -#endif diff --git a/drivers/tty/serial/serial_base_bus.c b/drivers/tty/serial/serial_base_bus.c index 4ef26a1b09b1..4df2a4b10445 100644 --- a/drivers/tty/serial/serial_base_bus.c +++ b/drivers/tty/serial/serial_base_bus.c @@ -8,7 +8,6 @@ * The serial core bus manages the serial core controller instances. */ -#include #include #include #include @@ -205,71 +204,6 @@ void serial_base_port_device_remove(struct serial_port_device *port_dev) put_device(&port_dev->dev); } -#ifdef CONFIG_SERIAL_CORE_CONSOLE - -static int serial_base_add_one_prefcon(const char *match, const char *dev_name, - int port_id) -{ - int ret; - - ret = add_preferred_console_match(match, dev_name, port_id); - if (ret == -ENOENT) - return 0; - - return ret; -} - -static int serial_base_add_prefcon(const char *name, int idx) -{ - const char *char_match __free(kfree) = NULL; - - /* Handle the traditional character device name style console=ttyS0 */ - char_match = kasprintf(GFP_KERNEL, "%s%i", name, idx); - if (!char_match) - return -ENOMEM; - - return serial_base_add_one_prefcon(char_match, name, idx); -} - -/** - * serial_base_add_preferred_console - Adds a preferred console - * @drv: Serial port device driver - * @port: Serial port instance - * - * Tries to add a preferred console for a serial port if specified in the - * kernel command line. Supports both the traditional character device such - * as console=ttyS0, and a hardware addressing based console=DEVNAME:0.0 - * style name. - * - * Translates the kernel command line option using a hardware based addressing - * console=DEVNAME:0.0 to the serial port character device such as ttyS0. - * Cannot be called early for ISA ports, depends on struct device. - * - * Note that duplicates are ignored by add_preferred_console(). - * - * Return: 0 on success, negative error code on failure. - */ -int serial_base_add_preferred_console(struct uart_driver *drv, - struct uart_port *port) -{ - const char *port_match __free(kfree) = NULL; - int ret; - - ret = serial_base_add_prefcon(drv->dev_name, port->line); - if (ret) - return ret; - - port_match = kasprintf(GFP_KERNEL, "%s:%i.%i", dev_name(port->dev), - port->ctrl_id, port->port_id); - if (!port_match) - return -ENOMEM; - - /* Translate a hardware addressing style console=DEVNAME:0.0 */ - return serial_base_add_one_prefcon(port_match, drv->dev_name, port->line); -} - -#endif - static int serial_base_init(void) { int ret; diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index 0c4d60976663..2a8006e3d687 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -3422,10 +3422,6 @@ int serial_core_register_port(struct uart_driver *drv, struct uart_port *port) if (ret) goto err_unregister_ctrl_dev; - ret = serial_base_add_preferred_console(drv, port); - if (ret) - goto err_unregister_port_dev; - ret = serial_core_add_one_port(drv, port); if (ret) goto err_unregister_port_dev; From deb091cb05a2b8555e15fcc2df5a0dcd9d06fea0 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 25 Jun 2024 07:58:03 +0200 Subject: [PATCH 673/778] Revert "printk: Flag register_console() if console is set on command line" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit b73c9cbe4f1fc02645228aa575998dd54067f8ef. Let's roll back all of the serial core and printk console changes that went into 6.10-rc1 as there still are problems with them that need to be sorted out. Link: https://lore.kernel.org/r/ZnpRozsdw6zbjqze@tlindgre-MOBL1 Reported-by: Petr Mladek Reported-by: Tony Lindgren Cc: Jiri Slaby Cc: John Ogness Cc: Sergey Senozhatsky Cc: Andy Shevchenko Cc: Ilpo Järvinen Signed-off-by: Greg Kroah-Hartman --- kernel/printk/printk.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 420fd310129d..e6e6a47acec8 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -2504,9 +2504,6 @@ static int __init console_setup(char *str) if (console_opt_save(str, brl_options)) return 1; - /* Flag register_console() to not call try_enable_default_console() */ - console_set_on_cmdline = 1; - /* Don't attempt to parse a DEVNAME:0.0 style console */ if (strchr(str, ':')) return 1; @@ -3522,7 +3519,7 @@ void register_console(struct console *newcon) * Note that a console with tty binding will have CON_CONSDEV * flag set and will be first in the list. */ - if (preferred_console < 0 && !console_set_on_cmdline) { + if (preferred_console < 0) { if (hlist_empty(&console_list) || !console_first()->device || console_first()->flags & CON_BOOT) { try_enable_default_console(newcon); From 64f9f010c6177dd4f33e5023d2eab9af4af291e9 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 25 Jun 2024 07:58:07 +0200 Subject: [PATCH 674/778] Revert "printk: Don't try to parse DEVNAME:0.0 console options" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 8a831c584e6e80cf68f79893dc395c16cdf47dc8. Let's roll back all of the serial core and printk console changes that went into 6.10-rc1 as there still are problems with them that need to be sorted out. Link: https://lore.kernel.org/r/ZnpRozsdw6zbjqze@tlindgre-MOBL1 Reported-by: Petr Mladek Reported-by: Tony Lindgren Cc: Jiri Slaby Cc: John Ogness Cc: Sergey Senozhatsky Cc: Andy Shevchenko Cc: Ilpo Järvinen Signed-off-by: Greg Kroah-Hartman --- kernel/printk/printk.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index e6e6a47acec8..b582404cd29d 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -2504,10 +2504,6 @@ static int __init console_setup(char *str) if (console_opt_save(str, brl_options)) return 1; - /* Don't attempt to parse a DEVNAME:0.0 style console */ - if (strchr(str, ':')) - return 1; - /* * Decode str into name, index, options. */ From cc8d5a2f09a54405321769abfd6ec3395482336a Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 25 Jun 2024 07:58:10 +0200 Subject: [PATCH 675/778] Revert "printk: Save console options for add_preferred_console_match()" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit f03e8c1060f86c23eb49bafee99d9fcbd1c1bd77. Let's roll back all of the serial core and printk console changes that went into 6.10-rc1 as there still are problems with them that need to be sorted out. Link: https://lore.kernel.org/r/ZnpRozsdw6zbjqze@tlindgre-MOBL1 Reported-by: Petr Mladek Reported-by: Tony Lindgren Cc: Jiri Slaby Cc: John Ogness Cc: Sergey Senozhatsky Cc: Andy Shevchenko Cc: Ilpo Järvinen Signed-off-by: Greg Kroah-Hartman --- include/linux/printk.h | 3 - kernel/printk/Makefile | 2 +- kernel/printk/conopt.c | 146 -------------------------------- kernel/printk/console_cmdline.h | 6 -- kernel/printk/printk.c | 14 +-- 5 files changed, 4 insertions(+), 167 deletions(-) delete mode 100644 kernel/printk/conopt.c diff --git a/include/linux/printk.h b/include/linux/printk.h index 40afab23881a..65c5184470f1 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -60,9 +60,6 @@ static inline const char *printk_skip_headers(const char *buffer) #define CONSOLE_LOGLEVEL_DEFAULT CONFIG_CONSOLE_LOGLEVEL_DEFAULT #define CONSOLE_LOGLEVEL_QUIET CONFIG_CONSOLE_LOGLEVEL_QUIET -int add_preferred_console_match(const char *match, const char *name, - const short idx); - extern int console_printk[]; #define console_loglevel (console_printk[0]) diff --git a/kernel/printk/Makefile b/kernel/printk/Makefile index 040fe7d1eda2..39a2b61c7232 100644 --- a/kernel/printk/Makefile +++ b/kernel/printk/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-y = printk.o conopt.o +obj-y = printk.o obj-$(CONFIG_PRINTK) += printk_safe.o nbcon.o obj-$(CONFIG_A11Y_BRAILLE_CONSOLE) += braille.o obj-$(CONFIG_PRINTK_INDEX) += index.o diff --git a/kernel/printk/conopt.c b/kernel/printk/conopt.c deleted file mode 100644 index 9d507bac3657..000000000000 --- a/kernel/printk/conopt.c +++ /dev/null @@ -1,146 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Kernel command line console options for hardware based addressing - * - * Copyright (C) 2023 Texas Instruments Incorporated - https://www.ti.com/ - * Author: Tony Lindgren - */ - -#include -#include -#include -#include - -#include - -#include "console_cmdline.h" - -/* - * Allow longer DEVNAME:0.0 style console naming such as abcd0000.serial:0.0 - * in addition to the legacy ttyS0 style naming. - */ -#define CONSOLE_NAME_MAX 32 - -#define CONSOLE_OPT_MAX 16 -#define CONSOLE_BRL_OPT_MAX 16 - -struct console_option { - char name[CONSOLE_NAME_MAX]; - char opt[CONSOLE_OPT_MAX]; - char brl_opt[CONSOLE_BRL_OPT_MAX]; - u8 has_brl_opt:1; -}; - -/* Updated only at console_setup() time, no locking needed */ -static struct console_option conopt[MAX_CMDLINECONSOLES]; - -/** - * console_opt_save - Saves kernel command line console option for driver use - * @str: Kernel command line console name and option - * @brl_opt: Braille console options - * - * Saves a kernel command line console option for driver subsystems to use for - * adding a preferred console during init. Called from console_setup() only. - * - * Return: 0 on success, negative error code on failure. - */ -int __init console_opt_save(const char *str, const char *brl_opt) -{ - struct console_option *con; - size_t namelen, optlen; - const char *opt; - int i; - - namelen = strcspn(str, ","); - if (namelen == 0 || namelen >= CONSOLE_NAME_MAX) - return -EINVAL; - - opt = str + namelen; - if (*opt == ',') - opt++; - - optlen = strlen(opt); - if (optlen >= CONSOLE_OPT_MAX) - return -EINVAL; - - for (i = 0; i < MAX_CMDLINECONSOLES; i++) { - con = &conopt[i]; - - if (con->name[0]) { - if (!strncmp(str, con->name, namelen)) - return 0; - continue; - } - - /* - * The name isn't terminated, only opt is. Empty opt is fine, - * but brl_opt can be either empty or NULL. For more info, see - * _braille_console_setup(). - */ - strscpy(con->name, str, namelen + 1); - strscpy(con->opt, opt, CONSOLE_OPT_MAX); - if (brl_opt) { - strscpy(con->brl_opt, brl_opt, CONSOLE_BRL_OPT_MAX); - con->has_brl_opt = 1; - } - - return 0; - } - - return -ENOMEM; -} - -static struct console_option *console_opt_find(const char *name) -{ - struct console_option *con; - int i; - - for (i = 0; i < MAX_CMDLINECONSOLES; i++) { - con = &conopt[i]; - if (!strcmp(name, con->name)) - return con; - } - - return NULL; -} - -/** - * add_preferred_console_match - Adds a preferred console if a match is found - * @match: Expected console on kernel command line, such as console=DEVNAME:0.0 - * @name: Name of the console character device to add such as ttyS - * @idx: Index for the console - * - * Allows driver subsystems to add a console after translating the command - * line name to the character device name used for the console. Options are - * added automatically based on the kernel command line. Duplicate preferred - * consoles are ignored by __add_preferred_console(). - * - * Return: 0 on success, negative error code on failure. - */ -int add_preferred_console_match(const char *match, const char *name, - const short idx) -{ - struct console_option *con; - char *brl_opt = NULL; - - if (!match || !strlen(match) || !name || !strlen(name) || - idx < 0) - return -EINVAL; - - con = console_opt_find(match); - if (!con) - return -ENOENT; - - /* - * See __add_preferred_console(). It checks for NULL brl_options to set - * the preferred_console flag. Empty brl_opt instead of NULL leads into - * the preferred_console flag not set, and CON_CONSDEV not being set, - * and the boot console won't get disabled at the end of console_setup(). - */ - if (con->has_brl_opt) - brl_opt = con->brl_opt; - - console_opt_add_preferred_console(name, idx, con->opt, brl_opt); - - return 0; -} diff --git a/kernel/printk/console_cmdline.h b/kernel/printk/console_cmdline.h index a125e0235589..3ca74ad391d6 100644 --- a/kernel/printk/console_cmdline.h +++ b/kernel/printk/console_cmdline.h @@ -2,12 +2,6 @@ #ifndef _CONSOLE_CMDLINE_H #define _CONSOLE_CMDLINE_H -#define MAX_CMDLINECONSOLES 8 - -int console_opt_save(const char *str, const char *brl_opt); -int console_opt_add_preferred_console(const char *name, const short idx, - char *options, char *brl_options); - struct console_cmdline { char name[16]; /* Name of the driver */ diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index b582404cd29d..dddb15f48d59 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -383,6 +383,9 @@ static int console_locked; /* * Array of consoles built from command line options (console=) */ + +#define MAX_CMDLINECONSOLES 8 + static struct console_cmdline console_cmdline[MAX_CMDLINECONSOLES]; static int preferred_console = -1; @@ -2500,10 +2503,6 @@ static int __init console_setup(char *str) if (_braille_console_setup(&str, &brl_options)) return 1; - /* Save the console for driver subsystem use */ - if (console_opt_save(str, brl_options)) - return 1; - /* * Decode str into name, index, options. */ @@ -2534,13 +2533,6 @@ static int __init console_setup(char *str) } __setup("console=", console_setup); -/* Only called from add_preferred_console_match() */ -int console_opt_add_preferred_console(const char *name, const short idx, - char *options, char *brl_options) -{ - return __add_preferred_console(name, idx, options, brl_options, true); -} - /** * add_preferred_console - add a device to the list of preferred consoles. * @name: device name From 0983d288caf984de0202c66641577b739caad561 Mon Sep 17 00:00:00 2001 From: Nick Child Date: Thu, 20 Jun 2024 10:23:11 -0500 Subject: [PATCH 676/778] ibmvnic: Add tx check to prevent skb leak Below is a summary of how the driver stores a reference to an skb during transmit: tx_buff[free_map[consumer_index]]->skb = new_skb; free_map[consumer_index] = IBMVNIC_INVALID_MAP; consumer_index ++; Where variable data looks like this: free_map == [4, IBMVNIC_INVALID_MAP, IBMVNIC_INVALID_MAP, 0, 3] consumer_index^ tx_buff == [skb=null, skb=, skb=, skb=null, skb=null] The driver has checks to ensure that free_map[consumer_index] pointed to a valid index but there was no check to ensure that this index pointed to an unused/null skb address. So, if, by some chance, our free_map and tx_buff lists become out of sync then we were previously risking an skb memory leak. This could then cause tcp congestion control to stop sending packets, eventually leading to ETIMEDOUT. Therefore, add a conditional to ensure that the skb address is null. If not then warn the user (because this is still a bug that should be patched) and free the old pointer to prevent memleak/tcp problems. Signed-off-by: Nick Child Signed-off-by: Paolo Abeni --- drivers/net/ethernet/ibm/ibmvnic.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 5490f0f9c112..23ebeb143987 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -2482,6 +2482,18 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) (tx_pool->consumer_index + 1) % tx_pool->num_buffers; tx_buff = &tx_pool->tx_buff[bufidx]; + + /* Sanity checks on our free map to make sure it points to an index + * that is not being occupied by another skb. If skb memory is + * not freed then we see congestion control kick in and halt tx. + */ + if (unlikely(tx_buff->skb)) { + dev_warn_ratelimited(dev, "TX free map points to untracked skb (%s %d idx=%d)\n", + skb_is_gso(skb) ? "tso_pool" : "tx_pool", + queue_num, bufidx); + dev_kfree_skb_any(tx_buff->skb); + } + tx_buff->skb = skb; tx_buff->index = bufidx; tx_buff->pool_index = queue_num; From ff46e3b4421923937b7f6e44ffcd3549a074f321 Mon Sep 17 00:00:00 2001 From: luoxuanqiang Date: Fri, 21 Jun 2024 09:39:29 +0800 Subject: [PATCH 677/778] Fix race for duplicate reqsk on identical SYN When bonding is configured in BOND_MODE_BROADCAST mode, if two identical SYN packets are received at the same time and processed on different CPUs, it can potentially create the same sk (sock) but two different reqsk (request_sock) in tcp_conn_request(). These two different reqsk will respond with two SYNACK packets, and since the generation of the seq (ISN) incorporates a timestamp, the final two SYNACK packets will have different seq values. The consequence is that when the Client receives and replies with an ACK to the earlier SYNACK packet, we will reset(RST) it. ======================================================================== This behavior is consistently reproducible in my local setup, which comprises: | NETA1 ------ NETB1 | PC_A --- bond --- | | --- bond --- PC_B | NETA2 ------ NETB2 | - PC_A is the Server and has two network cards, NETA1 and NETA2. I have bonded these two cards using BOND_MODE_BROADCAST mode and configured them to be handled by different CPU. - PC_B is the Client, also equipped with two network cards, NETB1 and NETB2, which are also bonded and configured in BOND_MODE_BROADCAST mode. If the client attempts a TCP connection to the server, it might encounter a failure. Capturing packets from the server side reveals: 10.10.10.10.45182 > localhost: Flags [S], seq 320236027, 10.10.10.10.45182 > localhost: Flags [S], seq 320236027, localhost > 10.10.10.10.45182: Flags [S.], seq 2967855116, localhost > 10.10.10.10.45182: Flags [S.], seq 2967855123, <== 10.10.10.10.45182 > localhost: Flags [.], ack 4294967290, 10.10.10.10.45182 > localhost: Flags [.], ack 4294967290, localhost > 10.10.10.10.45182: Flags [R], seq 2967855117, <== localhost > 10.10.10.10.45182: Flags [R], seq 2967855117, Two SYNACKs with different seq numbers are sent by localhost, resulting in an anomaly. ======================================================================== The attempted solution is as follows: Add a return value to inet_csk_reqsk_queue_hash_add() to confirm if the ehash insertion is successful (Up to now, the reason for unsuccessful insertion is that a reqsk for the same connection has already been inserted). If the insertion fails, release the reqsk. Due to the refcnt, Kuniyuki suggests also adding a return value check for the DCCP module; if ehash insertion fails, indicating a successful insertion of the same connection, simply release the reqsk as well. Simultaneously, In the reqsk_queue_hash_req(), the start of the req->rsk_timer is adjusted to be after successful insertion. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: luoxuanqiang Reviewed-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240621013929.1386815-1-luoxuanqiang@kylinos.cn Signed-off-by: Paolo Abeni --- include/net/inet_connection_sock.h | 2 +- net/dccp/ipv4.c | 7 +++++-- net/dccp/ipv6.c | 7 +++++-- net/ipv4/inet_connection_sock.c | 17 +++++++++++++---- net/ipv4/tcp_input.c | 7 ++++++- 5 files changed, 30 insertions(+), 10 deletions(-) diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 7d6b1254c92d..c0deaafebfdc 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -263,7 +263,7 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk, struct sock *inet_csk_reqsk_queue_add(struct sock *sk, struct request_sock *req, struct sock *child); -void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, +bool inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, unsigned long timeout); struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child, struct request_sock *req, diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index ff41bd6f99c3..5926159a6f20 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -657,8 +657,11 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) if (dccp_v4_send_response(sk, req)) goto drop_and_free; - inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); - reqsk_put(req); + if (unlikely(!inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT))) + reqsk_free(req); + else + reqsk_put(req); + return 0; drop_and_free: diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 85f4b8fdbe5e..da5dba120bc9 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -400,8 +400,11 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) if (dccp_v6_send_response(sk, req)) goto drop_and_free; - inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); - reqsk_put(req); + if (unlikely(!inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT))) + reqsk_free(req); + else + reqsk_put(req); + return 0; drop_and_free: diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index d81f74ce0f02..d4f0eff8b20f 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -1122,25 +1122,34 @@ drop: inet_csk_reqsk_queue_drop_and_put(oreq->rsk_listener, oreq); } -static void reqsk_queue_hash_req(struct request_sock *req, +static bool reqsk_queue_hash_req(struct request_sock *req, unsigned long timeout) { + bool found_dup_sk = false; + + if (!inet_ehash_insert(req_to_sk(req), NULL, &found_dup_sk)) + return false; + + /* The timer needs to be setup after a successful insertion. */ timer_setup(&req->rsk_timer, reqsk_timer_handler, TIMER_PINNED); mod_timer(&req->rsk_timer, jiffies + timeout); - inet_ehash_insert(req_to_sk(req), NULL, NULL); /* before letting lookups find us, make sure all req fields * are committed to memory and refcnt initialized. */ smp_wmb(); refcount_set(&req->rsk_refcnt, 2 + 1); + return true; } -void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, +bool inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, unsigned long timeout) { - reqsk_queue_hash_req(req, timeout); + if (!reqsk_queue_hash_req(req, timeout)) + return false; + inet_csk_reqsk_queue_added(sk); + return true; } EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 01d208e0eef3..b6d7666ac912 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -7257,7 +7257,12 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, tcp_rsk(req)->tfo_listener = false; if (!want_cookie) { req->timeout = tcp_timeout_init((struct sock *)req); - inet_csk_reqsk_queue_hash_add(sk, req, req->timeout); + if (unlikely(!inet_csk_reqsk_queue_hash_add(sk, req, + req->timeout))) { + reqsk_free(req); + return 0; + } + } af_ops->send_synack(sk, dst, &fl, req, &foc, !want_cookie ? TCP_SYNACK_NORMAL : From c5ab94ea280a9b4108723eecf0a636e22a5bb137 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 25 Jun 2024 11:51:58 +0200 Subject: [PATCH 678/778] ALSA: seq: Fix missing channel at encoding RPN/NRPN MIDI2 messages The conversion from the legacy event to MIDI2 UMP for RPN and NRPN missed the setup of the channel number, resulting in always the channel 0. Fix it. Fixes: e9e02819a98a ("ALSA: seq: Automatic conversion of UMP events") Link: https://patch.msgid.link/20240625095200.25745-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/seq/seq_ump_convert.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sound/core/seq/seq_ump_convert.c b/sound/core/seq/seq_ump_convert.c index d81f776a4c3d..6687efdceea1 100644 --- a/sound/core/seq/seq_ump_convert.c +++ b/sound/core/seq/seq_ump_convert.c @@ -791,7 +791,8 @@ static int paf_ev_to_ump_midi2(const struct snd_seq_event *event, /* set up the MIDI2 RPN/NRPN packet data from the parsed info */ static void fill_rpn(struct snd_seq_ump_midi2_bank *cc, - union snd_ump_midi2_msg *data) + union snd_ump_midi2_msg *data, + unsigned char channel) { if (cc->rpn_set) { data->rpn.status = UMP_MSG_STATUS_RPN; @@ -808,6 +809,7 @@ static void fill_rpn(struct snd_seq_ump_midi2_bank *cc, } data->rpn.data = upscale_14_to_32bit((cc->cc_data_msb << 7) | cc->cc_data_lsb); + data->rpn.channel = channel; cc->cc_data_msb = cc->cc_data_lsb = 0; } @@ -855,7 +857,7 @@ static int cc_ev_to_ump_midi2(const struct snd_seq_event *event, cc->cc_data_lsb = val; if (!(cc->rpn_set || cc->nrpn_set)) return 0; // skip - fill_rpn(cc, data); + fill_rpn(cc, data, channel); return 1; } @@ -957,7 +959,7 @@ static int ctrl14_ev_to_ump_midi2(const struct snd_seq_event *event, cc->cc_data_lsb = lsb; if (!(cc->rpn_set || cc->nrpn_set)) return 0; // skip - fill_rpn(cc, data); + fill_rpn(cc, data, channel); return 1; } From 1225675ca74c746f09211528588e83b3def1ff6a Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 24 Jun 2024 14:54:34 +0200 Subject: [PATCH 679/778] ALSA: PCM: Allow resume only for suspended streams snd_pcm_resume() should bail out if the stream isn't in a suspended state. Otherwise it'd allow doubly resume. Link: https://patch.msgid.link/20240624125443.27808-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/pcm_native.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 521ba56392a0..c152ccf32214 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -1775,6 +1775,8 @@ static int snd_pcm_pre_resume(struct snd_pcm_substream *substream, snd_pcm_state_t state) { struct snd_pcm_runtime *runtime = substream->runtime; + if (runtime->state != SNDRV_PCM_STATE_SUSPENDED) + return -EBADFD; if (!(runtime->info & SNDRV_PCM_INFO_RESUME)) return -ENOSYS; runtime->trigger_master = substream; From 1d091a98c399c17d0571fa1d91a7123a698446e4 Mon Sep 17 00:00:00 2001 From: Aivaz Latypov Date: Tue, 25 Jun 2024 13:12:02 +0500 Subject: [PATCH 680/778] ALSA: hda/relatek: Enable Mute LED on HP Laptop 15-gw0xxx This HP Laptop uses ALC236 codec with COEF 0x07 controlling the mute LED. Enable existing quirk for this device. Signed-off-by: Aivaz Latypov Link: https://patch.msgid.link/20240625081217.1049-1-reichaivaz@gmail.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index f4454abadc8d..cb9b11da5581 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10069,6 +10069,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8788, "HP OMEN 15", ALC285_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x87b7, "HP Laptop 14-fq0xxx", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), SND_PCI_QUIRK(0x103c, 0x87c8, "HP", ALC287_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x87d3, "HP Laptop 15-gw0xxx", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), SND_PCI_QUIRK(0x103c, 0x87e5, "HP ProBook 440 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x87e7, "HP ProBook 450 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x87f1, "HP ProBook 630 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED), From 6a7db25aad8ce6512b366d2ce1d0e60bac00a09d Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Thu, 20 Jun 2024 10:40:18 +0800 Subject: [PATCH 681/778] ALSA: dmaengine_pcm: terminate dmaengine before synchronize When dmaengine supports pause function, in suspend state, dmaengine_pause() is called instead of dmaengine_terminate_async(), In end of playback stream, the runtime->state will go to SNDRV_PCM_STATE_DRAINING, if system suspend & resume happen at this time, application will not resume playback stream, the stream will be closed directly, the dmaengine_terminate_async() will not be called before the dmaengine_synchronize(), which violates the call sequence for dmaengine_synchronize(). This behavior also happens for capture streams, but there is no SNDRV_PCM_STATE_DRAINING state for capture. So use dmaengine_tx_status() to check the DMA status if the status is DMA_PAUSED, then call dmaengine_terminate_async() to terminate dmaengine before dmaengine_synchronize(). Signed-off-by: Shengjiu Wang Link: https://patch.msgid.link/1718851218-27803-1-git-send-email-shengjiu.wang@nxp.com Signed-off-by: Takashi Iwai --- sound/core/pcm_dmaengine.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/sound/core/pcm_dmaengine.c b/sound/core/pcm_dmaengine.c index 12aa1cef11a1..7346c993a067 100644 --- a/sound/core/pcm_dmaengine.c +++ b/sound/core/pcm_dmaengine.c @@ -358,6 +358,12 @@ EXPORT_SYMBOL_GPL(snd_dmaengine_pcm_open_request_chan); int snd_dmaengine_pcm_close(struct snd_pcm_substream *substream) { struct dmaengine_pcm_runtime_data *prtd = substream_to_prtd(substream); + struct dma_tx_state state; + enum dma_status status; + + status = dmaengine_tx_status(prtd->dma_chan, prtd->cookie, &state); + if (status == DMA_PAUSED) + dmaengine_terminate_async(prtd->dma_chan); dmaengine_synchronize(prtd->dma_chan); kfree(prtd); @@ -378,6 +384,12 @@ EXPORT_SYMBOL_GPL(snd_dmaengine_pcm_close); int snd_dmaengine_pcm_close_release_chan(struct snd_pcm_substream *substream) { struct dmaengine_pcm_runtime_data *prtd = substream_to_prtd(substream); + struct dma_tx_state state; + enum dma_status status; + + status = dmaengine_tx_status(prtd->dma_chan, prtd->cookie, &state); + if (status == DMA_PAUSED) + dmaengine_terminate_async(prtd->dma_chan); dmaengine_synchronize(prtd->dma_chan); dma_release_channel(prtd->dma_chan); From b1c4b4d45263241ec6c2405a8df8265d4b58e707 Mon Sep 17 00:00:00 2001 From: Tristram Ha Date: Fri, 21 Jun 2024 15:34:22 -0700 Subject: [PATCH 682/778] net: dsa: microchip: fix wrong register write when masking interrupt The switch global port interrupt mask, REG_SW_PORT_INT_MASK__4, is defined as 0x001C in ksz9477_reg.h. The designers used 32-bit value in anticipation for increase of port count in future product but currently the maximum port count is 7 and the effective value is 0x7F in register 0x001F. Each port has its own interrupt mask and is defined as 0x#01F. It uses only 4 bits for different interrupts. The developer who implemented the current interrupt mechanism in the switch driver noticed there are similarities between the mechanism to mask port interrupts in global interrupt and individual interrupts in each port and so used the same code to handle these interrupts. He updated the code to use the new macro REG_SW_PORT_INT_MASK__1 which is defined as 0x1F in ksz_common.h but he forgot to update the 32-bit write to 8-bit as now the mask registers are 0x1F and 0x#01F. In addition all KSZ switches other than the KSZ9897/KSZ9893 and LAN937X families use only 8-bit access and so this common code will eventually be changed to accommodate them. Fixes: e1add7dd6183 ("net: dsa: microchip: use common irq routines for girq and pirq") Signed-off-by: Tristram Ha Link: https://lore.kernel.org/r/1719009262-2948-1-git-send-email-Tristram.Ha@microchip.com Signed-off-by: Paolo Abeni --- drivers/net/dsa/microchip/ksz_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index 0433109b42e5..0580b2fee21c 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -2196,7 +2196,7 @@ static void ksz_irq_bus_sync_unlock(struct irq_data *d) struct ksz_device *dev = kirq->dev; int ret; - ret = ksz_write32(dev, kirq->reg_mask, kirq->masked); + ret = ksz_write8(dev, kirq->reg_mask, kirq->masked); if (ret) dev_err(dev->dev, "failed to change IRQ mask\n"); From 529038146ba189f7551d64faf4f4871e4ab97538 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 22 Jun 2024 14:26:33 +0200 Subject: [PATCH 683/778] thermal: gov_step_wise: Go straight to instance->lower when mitigation is over Commit b6846826982b ("thermal: gov_step_wise: Restore passive polling management") attempted to fix a Step-Wise thermal governor issue introduced by commit 042a3d80f118 ("thermal: core: Move passive polling management to the core"), which caused the governor to leave cooling devices in high states, by partially reverting that commit. However, this turns out to be insufficient on some systems due to interactions between the governor code restored by commit b6846826982b and the passive polling management in the thermal core. For this reason, revert commit b6846826982b and make the governor set the target cooling device state to the "lower" one as soon as the zone temperature falls below the threshold of the trip point corresponding to the given thermal instance, which means that thermal mitigation is not necessary any more. Before this change the "lower" cooling device state would be reached in steps through the passive polling mechanism which was questionable for three reasons: (1) cooling device were kept in high states when that was not necessary (and it could adversely impact performance), (2) it only worked for thermal zones with nonzero passive_delay_jiffies value, and (3) passive polling belongs to the core and should not be hijacked by governors for their internal purposes. Fixes: b6846826982b ("thermal: gov_step_wise: Restore passive polling management") Closes: https://lore.kernel.org/linux-pm/6759ce9f-281d-4fcd-bb4c-b784a1cc5f6e@oldschoolsolutions.biz Reported-by: Jens Glathe Tested-by: Jens Glathe Link: https://patch.msgid.link/12464461.O9o76ZdvQC@rjwysocki.net Signed-off-by: Rafael J. Wysocki Tested-by: Steev Klimaszewski Tested-by: Johan Hovold --- drivers/thermal/gov_step_wise.c | 23 +++++------------------ 1 file changed, 5 insertions(+), 18 deletions(-) diff --git a/drivers/thermal/gov_step_wise.c b/drivers/thermal/gov_step_wise.c index 65974fe8be0d..fd5527188cf9 100644 --- a/drivers/thermal/gov_step_wise.c +++ b/drivers/thermal/gov_step_wise.c @@ -55,7 +55,11 @@ static unsigned long get_target_state(struct thermal_instance *instance, if (cur_state <= instance->lower) return THERMAL_NO_TARGET; - return clamp(cur_state - 1, instance->lower, instance->upper); + /* + * If 'throttle' is false, no mitigation is necessary, so + * request the lower state for this instance. + */ + return instance->lower; } return instance->target; @@ -93,23 +97,6 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz, if (instance->initialized && old_target == instance->target) continue; - if (trip->type == THERMAL_TRIP_PASSIVE) { - /* - * If the target state for this thermal instance - * changes from THERMAL_NO_TARGET to something else, - * ensure that the zone temperature will be updated - * (assuming enabled passive cooling) until it becomes - * THERMAL_NO_TARGET again, or the cooling device may - * not be reset to its initial state. - */ - if (old_target == THERMAL_NO_TARGET && - instance->target != THERMAL_NO_TARGET) - tz->passive++; - else if (old_target != THERMAL_NO_TARGET && - instance->target == THERMAL_NO_TARGET) - tz->passive--; - } - instance->initialized = true; mutex_lock(&instance->cdev->lock); From cea5589e958f8aef301ce9d004bc73fa5bb3b304 Mon Sep 17 00:00:00 2001 From: Jens Remus Date: Thu, 20 Jun 2024 11:12:49 +0200 Subject: [PATCH 684/778] s390/boot: Do not adjust GOT entries for undef weak sym Since commit 778666df60f0 ("s390: compile relocatable kernel without -fPIE") and commit 00cda11d3b2e ("s390: Compile kernel with -fPIC and link with -no-pie") the kernel on s390x may have a Global Offset Table (GOT) whose entries are adjusted for KASLR in kaslr_adjust_got(). The GOT may contain entries for undefined weak symbols that resolved to zero. That is the resulting GOT entry value is zero. Adjusting those entries unconditionally in kaslr_adjust_got() is wrong. Otherwise the following sample code would erroneously assume foo to be defined, due to the adjustment changing the zero-value to a non-zero one: extern int foo __attribute__((weak)); if (*foo) /* foo is defined [or undefined and erroneously adjusted] */ The vmlinux build at commit 00cda11d3b2e ("s390: Compile kernel with -fPIC and link with -no-pie") with defconfig actually had two GOT entries for the undefined weak symbols __start_BTF and __stop_BTF: $ objdump -tw vmlinux | grep -F "*UND*" 0000000000000000 w *UND* 0000000000000000 __stop_BTF 0000000000000000 w *UND* 0000000000000000 __start_BTF $ readelf -rw vmlinux | grep -E "R_390_GOTENT +0{16}" 000000345760 2776a0000001a R_390_GOTENT 0000000000000000 __stop_BTF + 2 000000345766 2d5480000001a R_390_GOTENT 0000000000000000 __start_BTF + 2 The s390-specific vmlinux linker script sets the section start to __START_KERNEL, which is currently defined as 0x100000 on s390x. Access to lowcore is performed via a pointer of 0 and not a symbol in a section starting at 0. The first 64K are reserved for the loader on s390x. Thus it is safe to assume that __START_KERNEL will never be 0. As a result there cannot be any defined symbols resolving to zero in the kernel. Note that the first three GOT entries are reserved for the dynamic loader on s390x. [1] In the kernel they are zero. Therefore no extra handling is required to skip these. Skip adjusting GOT entries with a value of zero in kaslr_adjust_got(). While at it update the comment when a GOT exists on s390x. Since commit 00cda11d3b2e ("s390: Compile kernel with -fPIC and link with -no-pie") it no longer only exists when compiling with Clang, but also with GCC. [1]: s390x ELF ABI, section "Global Offset Table", https://github.com/IBM/s390x-abi/releases Fixes: 778666df60f0 ("s390: compile relocatable kernel without -fPIE") Reviewed-by: Ilya Leoshkevich Acked-by: Sumanth Korikkar Acked-by: Alexander Gordeev Signed-off-by: Jens Remus Signed-off-by: Alexander Gordeev --- arch/s390/boot/startup.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index 48ef5fe5c08a..5a36d5538dae 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -170,11 +170,14 @@ static void kaslr_adjust_got(unsigned long offset) u64 *entry; /* - * Even without -fPIE, Clang still uses a global offset table for some - * reason. Adjust the GOT entries. + * Adjust GOT entries, except for ones for undefined weak symbols + * that resolved to zero. This also skips the first three reserved + * entries on s390x that are zero. */ - for (entry = (u64 *)vmlinux.got_start; entry < (u64 *)vmlinux.got_end; entry++) - *entry += offset - __START_KERNEL; + for (entry = (u64 *)vmlinux.got_start; entry < (u64 *)vmlinux.got_end; entry++) { + if (*entry) + *entry += offset - __START_KERNEL; + } } /* From d3882564a77c21eb746ba5364f3fa89b88de3d61 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 20 Jun 2024 14:16:37 +0200 Subject: [PATCH 685/778] syscalls: fix compat_sys_io_pgetevents_time64 usage Using sys_io_pgetevents() as the entry point for compat mode tasks works almost correctly, but misses the sign extension for the min_nr and nr arguments. This was addressed on parisc by switching to compat_sys_io_pgetevents_time64() in commit 6431e92fc827 ("parisc: io_pgetevents_time64() needs compat syscall in 32-bit compat mode"), as well as by using more sophisticated system call wrappers on x86 and s390. However, arm64, mips, powerpc, sparc and riscv still have the same bug. Change all of them over to use compat_sys_io_pgetevents_time64() like parisc already does. This was clearly the intention when the function was originally added, but it got hooked up incorrectly in the tables. Cc: stable@vger.kernel.org Fixes: 48166e6ea47d ("y2038: add 64-bit time_t syscalls to all 32-bit architectures") Acked-by: Heiko Carstens # s390 Signed-off-by: Arnd Bergmann --- arch/arm64/include/asm/unistd32.h | 2 +- arch/mips/kernel/syscalls/syscall_n32.tbl | 2 +- arch/mips/kernel/syscalls/syscall_o32.tbl | 2 +- arch/powerpc/kernel/syscalls/syscall.tbl | 2 +- arch/s390/kernel/syscalls/syscall.tbl | 2 +- arch/sparc/kernel/syscalls/syscall.tbl | 2 +- arch/x86/entry/syscalls/syscall_32.tbl | 2 +- include/uapi/asm-generic/unistd.h | 2 +- kernel/sys_ni.c | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 266b96acc014..1386e8e751f2 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -840,7 +840,7 @@ __SYSCALL(__NR_pselect6_time64, compat_sys_pselect6_time64) #define __NR_ppoll_time64 414 __SYSCALL(__NR_ppoll_time64, compat_sys_ppoll_time64) #define __NR_io_pgetevents_time64 416 -__SYSCALL(__NR_io_pgetevents_time64, sys_io_pgetevents) +__SYSCALL(__NR_io_pgetevents_time64, compat_sys_io_pgetevents_time64) #define __NR_recvmmsg_time64 417 __SYSCALL(__NR_recvmmsg_time64, compat_sys_recvmmsg_time64) #define __NR_mq_timedsend_time64 418 diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index cc869f5d5693..953f5b7dc723 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -354,7 +354,7 @@ 412 n32 utimensat_time64 sys_utimensat 413 n32 pselect6_time64 compat_sys_pselect6_time64 414 n32 ppoll_time64 compat_sys_ppoll_time64 -416 n32 io_pgetevents_time64 sys_io_pgetevents +416 n32 io_pgetevents_time64 compat_sys_io_pgetevents_time64 417 n32 recvmmsg_time64 compat_sys_recvmmsg_time64 418 n32 mq_timedsend_time64 sys_mq_timedsend 419 n32 mq_timedreceive_time64 sys_mq_timedreceive diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index 81428a2eb660..2439a2491cff 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -403,7 +403,7 @@ 412 o32 utimensat_time64 sys_utimensat sys_utimensat 413 o32 pselect6_time64 sys_pselect6 compat_sys_pselect6_time64 414 o32 ppoll_time64 sys_ppoll compat_sys_ppoll_time64 -416 o32 io_pgetevents_time64 sys_io_pgetevents sys_io_pgetevents +416 o32 io_pgetevents_time64 sys_io_pgetevents compat_sys_io_pgetevents_time64 417 o32 recvmmsg_time64 sys_recvmmsg compat_sys_recvmmsg_time64 418 o32 mq_timedsend_time64 sys_mq_timedsend sys_mq_timedsend 419 o32 mq_timedreceive_time64 sys_mq_timedreceive sys_mq_timedreceive diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index 3656f1ca7a21..c6b0546b284d 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -502,7 +502,7 @@ 412 32 utimensat_time64 sys_utimensat sys_utimensat 413 32 pselect6_time64 sys_pselect6 compat_sys_pselect6_time64 414 32 ppoll_time64 sys_ppoll compat_sys_ppoll_time64 -416 32 io_pgetevents_time64 sys_io_pgetevents sys_io_pgetevents +416 32 io_pgetevents_time64 sys_io_pgetevents compat_sys_io_pgetevents_time64 417 32 recvmmsg_time64 sys_recvmmsg compat_sys_recvmmsg_time64 418 32 mq_timedsend_time64 sys_mq_timedsend sys_mq_timedsend 419 32 mq_timedreceive_time64 sys_mq_timedreceive sys_mq_timedreceive diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index bd0fee24ad10..01071182763e 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -418,7 +418,7 @@ 412 32 utimensat_time64 - sys_utimensat 413 32 pselect6_time64 - compat_sys_pselect6_time64 414 32 ppoll_time64 - compat_sys_ppoll_time64 -416 32 io_pgetevents_time64 - sys_io_pgetevents +416 32 io_pgetevents_time64 - compat_sys_io_pgetevents_time64 417 32 recvmmsg_time64 - compat_sys_recvmmsg_time64 418 32 mq_timedsend_time64 - sys_mq_timedsend 419 32 mq_timedreceive_time64 - sys_mq_timedreceive diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index ac6c281ccfe0..b354139b40be 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -461,7 +461,7 @@ 412 32 utimensat_time64 sys_utimensat sys_utimensat 413 32 pselect6_time64 sys_pselect6 compat_sys_pselect6_time64 414 32 ppoll_time64 sys_ppoll compat_sys_ppoll_time64 -416 32 io_pgetevents_time64 sys_io_pgetevents sys_io_pgetevents +416 32 io_pgetevents_time64 sys_io_pgetevents compat_sys_io_pgetevents_time64 417 32 recvmmsg_time64 sys_recvmmsg compat_sys_recvmmsg_time64 418 32 mq_timedsend_time64 sys_mq_timedsend sys_mq_timedsend 419 32 mq_timedreceive_time64 sys_mq_timedreceive sys_mq_timedreceive diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 7fd1f57ad3d3..d6ebcab1d8b2 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -420,7 +420,7 @@ 412 i386 utimensat_time64 sys_utimensat 413 i386 pselect6_time64 sys_pselect6 compat_sys_pselect6_time64 414 i386 ppoll_time64 sys_ppoll compat_sys_ppoll_time64 -416 i386 io_pgetevents_time64 sys_io_pgetevents +416 i386 io_pgetevents_time64 sys_io_pgetevents compat_sys_io_pgetevents_time64 417 i386 recvmmsg_time64 sys_recvmmsg compat_sys_recvmmsg_time64 418 i386 mq_timedsend_time64 sys_mq_timedsend 419 i386 mq_timedreceive_time64 sys_mq_timedreceive diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index d983c48a3b6a..d4cc26932ff4 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -737,7 +737,7 @@ __SC_COMP(__NR_pselect6_time64, sys_pselect6, compat_sys_pselect6_time64) #define __NR_ppoll_time64 414 __SC_COMP(__NR_ppoll_time64, sys_ppoll, compat_sys_ppoll_time64) #define __NR_io_pgetevents_time64 416 -__SYSCALL(__NR_io_pgetevents_time64, sys_io_pgetevents) +__SC_COMP(__NR_io_pgetevents_time64, sys_io_pgetevents, compat_sys_io_pgetevents_time64) #define __NR_recvmmsg_time64 417 __SC_COMP(__NR_recvmmsg_time64, sys_recvmmsg, compat_sys_recvmmsg_time64) #define __NR_mq_timedsend_time64 418 diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index d7eee421d4bc..b696b85ac63e 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -46,8 +46,8 @@ COND_SYSCALL(io_getevents_time32); COND_SYSCALL(io_getevents); COND_SYSCALL(io_pgetevents_time32); COND_SYSCALL(io_pgetevents); -COND_SYSCALL_COMPAT(io_pgetevents_time32); COND_SYSCALL_COMPAT(io_pgetevents); +COND_SYSCALL_COMPAT(io_pgetevents_time64); COND_SYSCALL(io_uring_setup); COND_SYSCALL(io_uring_enter); COND_SYSCALL(io_uring_register); From bae6428a9fffb2023191b0723e276cf1377a7c9f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 19 Jun 2024 14:07:30 +0200 Subject: [PATCH 686/778] sparc: fix old compat_sys_select() sparc has two identical select syscalls at numbers 93 and 230, respectively. During the conversion to the modern syscall.tbl format, the older one of the two broke in compat mode, and now refers to the native 64-bit syscall. Restore the correct behavior. This has very little effect, as glibc has been using the newer number anyway. Fixes: 6ff645dd683a ("sparc: add system call table generation support") Signed-off-by: Arnd Bergmann --- arch/sparc/kernel/syscalls/syscall.tbl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index b354139b40be..5e55f73f9880 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -117,7 +117,7 @@ 90 common dup2 sys_dup2 91 32 setfsuid32 sys_setfsuid 92 common fcntl sys_fcntl compat_sys_fcntl -93 common select sys_select +93 common select sys_select compat_sys_select 94 32 setfsgid32 sys_setfsgid 95 common fsync sys_fsync 96 common setpriority sys_setpriority From d6fbd26fb872ec518d25433a12e8ce8163e20909 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 19 Jun 2024 12:49:39 +0200 Subject: [PATCH 687/778] sparc: fix compat recv/recvfrom syscalls sparc has the wrong compat version of recv() and recvfrom() for both the direct syscalls and socketcall(). The direct syscalls just need to use the compat version. For socketcall, the same thing could be done, but it seems better to completely remove the custom assembler code for it and just use the same implementation that everyone else has. Fixes: 1dacc76d0014 ("net/compat/wext: send different messages to compat tasks") Signed-off-by: Arnd Bergmann --- arch/sparc/kernel/sys32.S | 221 ------------------------- arch/sparc/kernel/syscalls/syscall.tbl | 4 +- 2 files changed, 2 insertions(+), 223 deletions(-) diff --git a/arch/sparc/kernel/sys32.S b/arch/sparc/kernel/sys32.S index a45f0f31fe51..a3d308f2043e 100644 --- a/arch/sparc/kernel/sys32.S +++ b/arch/sparc/kernel/sys32.S @@ -18,224 +18,3 @@ sys32_mmap2: sethi %hi(sys_mmap), %g1 jmpl %g1 + %lo(sys_mmap), %g0 sllx %o5, 12, %o5 - - .align 32 - .globl sys32_socketcall -sys32_socketcall: /* %o0=call, %o1=args */ - cmp %o0, 1 - bl,pn %xcc, do_einval - cmp %o0, 18 - bg,pn %xcc, do_einval - sub %o0, 1, %o0 - sllx %o0, 5, %o0 - sethi %hi(__socketcall_table_begin), %g2 - or %g2, %lo(__socketcall_table_begin), %g2 - jmpl %g2 + %o0, %g0 - nop -do_einval: - retl - mov -EINVAL, %o0 - - .align 32 -__socketcall_table_begin: - - /* Each entry is exactly 32 bytes. */ -do_sys_socket: /* sys_socket(int, int, int) */ -1: ldswa [%o1 + 0x0] %asi, %o0 - sethi %hi(sys_socket), %g1 -2: ldswa [%o1 + 0x8] %asi, %o2 - jmpl %g1 + %lo(sys_socket), %g0 -3: ldswa [%o1 + 0x4] %asi, %o1 - nop - nop - nop -do_sys_bind: /* sys_bind(int fd, struct sockaddr *, int) */ -4: ldswa [%o1 + 0x0] %asi, %o0 - sethi %hi(sys_bind), %g1 -5: ldswa [%o1 + 0x8] %asi, %o2 - jmpl %g1 + %lo(sys_bind), %g0 -6: lduwa [%o1 + 0x4] %asi, %o1 - nop - nop - nop -do_sys_connect: /* sys_connect(int, struct sockaddr *, int) */ -7: ldswa [%o1 + 0x0] %asi, %o0 - sethi %hi(sys_connect), %g1 -8: ldswa [%o1 + 0x8] %asi, %o2 - jmpl %g1 + %lo(sys_connect), %g0 -9: lduwa [%o1 + 0x4] %asi, %o1 - nop - nop - nop -do_sys_listen: /* sys_listen(int, int) */ -10: ldswa [%o1 + 0x0] %asi, %o0 - sethi %hi(sys_listen), %g1 - jmpl %g1 + %lo(sys_listen), %g0 -11: ldswa [%o1 + 0x4] %asi, %o1 - nop - nop - nop - nop -do_sys_accept: /* sys_accept(int, struct sockaddr *, int *) */ -12: ldswa [%o1 + 0x0] %asi, %o0 - sethi %hi(sys_accept), %g1 -13: lduwa [%o1 + 0x8] %asi, %o2 - jmpl %g1 + %lo(sys_accept), %g0 -14: lduwa [%o1 + 0x4] %asi, %o1 - nop - nop - nop -do_sys_getsockname: /* sys_getsockname(int, struct sockaddr *, int *) */ -15: ldswa [%o1 + 0x0] %asi, %o0 - sethi %hi(sys_getsockname), %g1 -16: lduwa [%o1 + 0x8] %asi, %o2 - jmpl %g1 + %lo(sys_getsockname), %g0 -17: lduwa [%o1 + 0x4] %asi, %o1 - nop - nop - nop -do_sys_getpeername: /* sys_getpeername(int, struct sockaddr *, int *) */ -18: ldswa [%o1 + 0x0] %asi, %o0 - sethi %hi(sys_getpeername), %g1 -19: lduwa [%o1 + 0x8] %asi, %o2 - jmpl %g1 + %lo(sys_getpeername), %g0 -20: lduwa [%o1 + 0x4] %asi, %o1 - nop - nop - nop -do_sys_socketpair: /* sys_socketpair(int, int, int, int *) */ -21: ldswa [%o1 + 0x0] %asi, %o0 - sethi %hi(sys_socketpair), %g1 -22: ldswa [%o1 + 0x8] %asi, %o2 -23: lduwa [%o1 + 0xc] %asi, %o3 - jmpl %g1 + %lo(sys_socketpair), %g0 -24: ldswa [%o1 + 0x4] %asi, %o1 - nop - nop -do_sys_send: /* sys_send(int, void *, size_t, unsigned int) */ -25: ldswa [%o1 + 0x0] %asi, %o0 - sethi %hi(sys_send), %g1 -26: lduwa [%o1 + 0x8] %asi, %o2 -27: lduwa [%o1 + 0xc] %asi, %o3 - jmpl %g1 + %lo(sys_send), %g0 -28: lduwa [%o1 + 0x4] %asi, %o1 - nop - nop -do_sys_recv: /* sys_recv(int, void *, size_t, unsigned int) */ -29: ldswa [%o1 + 0x0] %asi, %o0 - sethi %hi(sys_recv), %g1 -30: lduwa [%o1 + 0x8] %asi, %o2 -31: lduwa [%o1 + 0xc] %asi, %o3 - jmpl %g1 + %lo(sys_recv), %g0 -32: lduwa [%o1 + 0x4] %asi, %o1 - nop - nop -do_sys_sendto: /* sys_sendto(int, u32, compat_size_t, unsigned int, u32, int) */ -33: ldswa [%o1 + 0x0] %asi, %o0 - sethi %hi(sys_sendto), %g1 -34: lduwa [%o1 + 0x8] %asi, %o2 -35: lduwa [%o1 + 0xc] %asi, %o3 -36: lduwa [%o1 + 0x10] %asi, %o4 -37: ldswa [%o1 + 0x14] %asi, %o5 - jmpl %g1 + %lo(sys_sendto), %g0 -38: lduwa [%o1 + 0x4] %asi, %o1 -do_sys_recvfrom: /* sys_recvfrom(int, u32, compat_size_t, unsigned int, u32, u32) */ -39: ldswa [%o1 + 0x0] %asi, %o0 - sethi %hi(sys_recvfrom), %g1 -40: lduwa [%o1 + 0x8] %asi, %o2 -41: lduwa [%o1 + 0xc] %asi, %o3 -42: lduwa [%o1 + 0x10] %asi, %o4 -43: lduwa [%o1 + 0x14] %asi, %o5 - jmpl %g1 + %lo(sys_recvfrom), %g0 -44: lduwa [%o1 + 0x4] %asi, %o1 -do_sys_shutdown: /* sys_shutdown(int, int) */ -45: ldswa [%o1 + 0x0] %asi, %o0 - sethi %hi(sys_shutdown), %g1 - jmpl %g1 + %lo(sys_shutdown), %g0 -46: ldswa [%o1 + 0x4] %asi, %o1 - nop - nop - nop - nop -do_sys_setsockopt: /* sys_setsockopt(int, int, int, char *, int) */ -47: ldswa [%o1 + 0x0] %asi, %o0 - sethi %hi(sys_setsockopt), %g1 -48: ldswa [%o1 + 0x8] %asi, %o2 -49: lduwa [%o1 + 0xc] %asi, %o3 -50: ldswa [%o1 + 0x10] %asi, %o4 - jmpl %g1 + %lo(sys_setsockopt), %g0 -51: ldswa [%o1 + 0x4] %asi, %o1 - nop -do_sys_getsockopt: /* sys_getsockopt(int, int, int, u32, u32) */ -52: ldswa [%o1 + 0x0] %asi, %o0 - sethi %hi(sys_getsockopt), %g1 -53: ldswa [%o1 + 0x8] %asi, %o2 -54: lduwa [%o1 + 0xc] %asi, %o3 -55: lduwa [%o1 + 0x10] %asi, %o4 - jmpl %g1 + %lo(sys_getsockopt), %g0 -56: ldswa [%o1 + 0x4] %asi, %o1 - nop -do_sys_sendmsg: /* compat_sys_sendmsg(int, struct compat_msghdr *, unsigned int) */ -57: ldswa [%o1 + 0x0] %asi, %o0 - sethi %hi(compat_sys_sendmsg), %g1 -58: lduwa [%o1 + 0x8] %asi, %o2 - jmpl %g1 + %lo(compat_sys_sendmsg), %g0 -59: lduwa [%o1 + 0x4] %asi, %o1 - nop - nop - nop -do_sys_recvmsg: /* compat_sys_recvmsg(int, struct compat_msghdr *, unsigned int) */ -60: ldswa [%o1 + 0x0] %asi, %o0 - sethi %hi(compat_sys_recvmsg), %g1 -61: lduwa [%o1 + 0x8] %asi, %o2 - jmpl %g1 + %lo(compat_sys_recvmsg), %g0 -62: lduwa [%o1 + 0x4] %asi, %o1 - nop - nop - nop -do_sys_accept4: /* sys_accept4(int, struct sockaddr *, int *, int) */ -63: ldswa [%o1 + 0x0] %asi, %o0 - sethi %hi(sys_accept4), %g1 -64: lduwa [%o1 + 0x8] %asi, %o2 -65: ldswa [%o1 + 0xc] %asi, %o3 - jmpl %g1 + %lo(sys_accept4), %g0 -66: lduwa [%o1 + 0x4] %asi, %o1 - nop - nop - - .section __ex_table,"a" - .align 4 - .word 1b, __retl_efault, 2b, __retl_efault - .word 3b, __retl_efault, 4b, __retl_efault - .word 5b, __retl_efault, 6b, __retl_efault - .word 7b, __retl_efault, 8b, __retl_efault - .word 9b, __retl_efault, 10b, __retl_efault - .word 11b, __retl_efault, 12b, __retl_efault - .word 13b, __retl_efault, 14b, __retl_efault - .word 15b, __retl_efault, 16b, __retl_efault - .word 17b, __retl_efault, 18b, __retl_efault - .word 19b, __retl_efault, 20b, __retl_efault - .word 21b, __retl_efault, 22b, __retl_efault - .word 23b, __retl_efault, 24b, __retl_efault - .word 25b, __retl_efault, 26b, __retl_efault - .word 27b, __retl_efault, 28b, __retl_efault - .word 29b, __retl_efault, 30b, __retl_efault - .word 31b, __retl_efault, 32b, __retl_efault - .word 33b, __retl_efault, 34b, __retl_efault - .word 35b, __retl_efault, 36b, __retl_efault - .word 37b, __retl_efault, 38b, __retl_efault - .word 39b, __retl_efault, 40b, __retl_efault - .word 41b, __retl_efault, 42b, __retl_efault - .word 43b, __retl_efault, 44b, __retl_efault - .word 45b, __retl_efault, 46b, __retl_efault - .word 47b, __retl_efault, 48b, __retl_efault - .word 49b, __retl_efault, 50b, __retl_efault - .word 51b, __retl_efault, 52b, __retl_efault - .word 53b, __retl_efault, 54b, __retl_efault - .word 55b, __retl_efault, 56b, __retl_efault - .word 57b, __retl_efault, 58b, __retl_efault - .word 59b, __retl_efault, 60b, __retl_efault - .word 61b, __retl_efault, 62b, __retl_efault - .word 63b, __retl_efault, 64b, __retl_efault - .word 65b, __retl_efault, 66b, __retl_efault - .previous diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index 5e55f73f9880..cfdfb3707c16 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -155,7 +155,7 @@ 123 32 fchown sys_fchown16 123 64 fchown sys_fchown 124 common fchmod sys_fchmod -125 common recvfrom sys_recvfrom +125 common recvfrom sys_recvfrom compat_sys_recvfrom 126 32 setreuid sys_setreuid16 126 64 setreuid sys_setreuid 127 32 setregid sys_setregid16 @@ -247,7 +247,7 @@ 204 32 readdir sys_old_readdir compat_sys_old_readdir 204 64 readdir sys_nis_syscall 205 common readahead sys_readahead compat_sys_readahead -206 common socketcall sys_socketcall sys32_socketcall +206 common socketcall sys_socketcall compat_sys_socketcall 207 common syslog sys_syslog 208 common lookup_dcookie sys_ni_syscall 209 common fadvise64 sys_fadvise64 compat_sys_fadvise64 From 20a50787349fadf66ac5c48f62e58d753878d2bb Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 19 Jun 2024 14:27:55 +0200 Subject: [PATCH 688/778] parisc: use correct compat recv/recvfrom syscalls Johannes missed parisc back when he introduced the compat version of these syscalls, so receiving cmsg messages that require a compat conversion is still broken. Use the correct calls like the other architectures do. Fixes: 1dacc76d0014 ("net/compat/wext: send different messages to compat tasks") Acked-by: Helge Deller Signed-off-by: Arnd Bergmann --- arch/parisc/kernel/syscalls/syscall.tbl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index b13c21373974..39e67fab7515 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -108,7 +108,7 @@ 95 common fchown sys_fchown 96 common getpriority sys_getpriority 97 common setpriority sys_setpriority -98 common recv sys_recv +98 common recv sys_recv compat_sys_recv 99 common statfs sys_statfs compat_sys_statfs 100 common fstatfs sys_fstatfs compat_sys_fstatfs 101 common stat64 sys_stat64 @@ -135,7 +135,7 @@ 120 common clone sys_clone_wrapper 121 common setdomainname sys_setdomainname 122 common sendfile sys_sendfile compat_sys_sendfile -123 common recvfrom sys_recvfrom +123 common recvfrom sys_recvfrom compat_sys_recvfrom 124 32 adjtimex sys_adjtimex_time32 124 64 adjtimex sys_adjtimex 125 common mprotect sys_mprotect From 403f17a330732a666ae793f3b15bc75bb5540524 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 7 Jun 2024 13:40:45 +0200 Subject: [PATCH 689/778] parisc: use generic sys_fanotify_mark implementation The sys_fanotify_mark() syscall on parisc uses the reverse word order for the two halves of the 64-bit argument compared to all syscalls on all 32-bit architectures. As far as I can tell, the problem is that the function arguments on parisc are sorted backwards (26, 25, 24, 23, ...) compared to everyone else, so the calling conventions of using an even/odd register pair in native word order result in the lower word coming first in function arguments, matching the expected behavior on little-endian architectures. The system call conventions however ended up matching what the other 32-bit architectures do. A glibc cleanup in 2020 changed the userspace behavior in a way that handles all architectures consistently, but this inadvertently broke parisc32 by changing to the same method as everyone else. The change made it into glibc-2.35 and subsequently into debian 12 (bookworm), which is the latest stable release. This means we need to choose between reverting the glibc change or changing the kernel to match it again, but either hange will leave some systems broken. Pick the option that is more likely to help current and future users and change the kernel to match current glibc. This also means the behavior is now consistent across architectures, but it breaks running new kernels with old glibc builds before 2.35. Link: https://sourceware.org/git/?p=glibc.git;a=commitdiff;h=d150181d73d9 Link: https://git.kernel.org/pub/scm/linux/kernel/git/history/history.git/commit/arch/parisc/kernel/sys_parisc.c?h=57b1dfbd5b4a39d Cc: Adhemerval Zanella Tested-by: Helge Deller Acked-by: Helge Deller Signed-off-by: Arnd Bergmann --- I found this through code inspection, please double-check to make sure I got the bug and the fix right. The alternative is to fix this by reverting glibc back to the unusual behavior. --- arch/parisc/Kconfig | 1 + arch/parisc/kernel/sys_parisc32.c | 9 --------- arch/parisc/kernel/syscalls/syscall.tbl | 2 +- 3 files changed, 2 insertions(+), 10 deletions(-) diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index daafeb20f993..dc9b902de8ea 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -16,6 +16,7 @@ config PARISC select ARCH_HAS_UBSAN select ARCH_HAS_PTE_SPECIAL select ARCH_NO_SG_CHAIN + select ARCH_SPLIT_ARG64 if !64BIT select ARCH_SUPPORTS_HUGETLBFS if PA20 select ARCH_SUPPORTS_MEMORY_FAILURE select ARCH_STACKWALK diff --git a/arch/parisc/kernel/sys_parisc32.c b/arch/parisc/kernel/sys_parisc32.c index 2a12a547b447..826c8e51b585 100644 --- a/arch/parisc/kernel/sys_parisc32.c +++ b/arch/parisc/kernel/sys_parisc32.c @@ -23,12 +23,3 @@ asmlinkage long sys32_unimplemented(int r26, int r25, int r24, int r23, current->comm, current->pid, r20); return -ENOSYS; } - -asmlinkage long sys32_fanotify_mark(compat_int_t fanotify_fd, compat_uint_t flags, - compat_uint_t mask0, compat_uint_t mask1, compat_int_t dfd, - const char __user * pathname) -{ - return sys_fanotify_mark(fanotify_fd, flags, - ((__u64)mask1 << 32) | mask0, - dfd, pathname); -} diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index 39e67fab7515..66dc406b12e4 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -364,7 +364,7 @@ 320 common accept4 sys_accept4 321 common prlimit64 sys_prlimit64 322 common fanotify_init sys_fanotify_init -323 common fanotify_mark sys_fanotify_mark sys32_fanotify_mark +323 common fanotify_mark sys_fanotify_mark compat_sys_fanotify_mark 324 32 clock_adjtime sys_clock_adjtime32 324 64 clock_adjtime sys_clock_adjtime 325 common name_to_handle_at sys_name_to_handle_at From b1e31c134a8ab2e8f5fd62323b6b45a950ac704d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 24 Apr 2024 16:36:13 +0200 Subject: [PATCH 690/778] powerpc: restore some missing spu syscalls A couple of system calls were inadventently removed from the table during a bugfix for 32-bit powerpc entry. Restore the original behavior. Fixes: e23750623835 ("powerpc/32: fix syscall wrappers with 64-bit arguments of unaligned register-pairs") Acked-by: Michael Ellerman Signed-off-by: Arnd Bergmann --- arch/powerpc/kernel/syscalls/syscall.tbl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index c6b0546b284d..ebae8415dfbb 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -230,8 +230,10 @@ 178 nospu rt_sigsuspend sys_rt_sigsuspend compat_sys_rt_sigsuspend 179 32 pread64 sys_ppc_pread64 compat_sys_ppc_pread64 179 64 pread64 sys_pread64 +179 spu pread64 sys_pread64 180 32 pwrite64 sys_ppc_pwrite64 compat_sys_ppc_pwrite64 180 64 pwrite64 sys_pwrite64 +180 spu pwrite64 sys_pwrite64 181 common chown sys_chown 182 common getcwd sys_getcwd 183 common capget sys_capget @@ -246,6 +248,7 @@ 190 common ugetrlimit sys_getrlimit compat_sys_getrlimit 191 32 readahead sys_ppc_readahead compat_sys_ppc_readahead 191 64 readahead sys_readahead +191 spu readahead sys_readahead 192 32 mmap2 sys_mmap2 compat_sys_mmap2 193 32 truncate64 sys_ppc_truncate64 compat_sys_ppc_truncate64 194 32 ftruncate64 sys_ppc_ftruncate64 compat_sys_ppc_ftruncate64 @@ -293,6 +296,7 @@ 232 nospu set_tid_address sys_set_tid_address 233 32 fadvise64 sys_ppc32_fadvise64 compat_sys_ppc32_fadvise64 233 64 fadvise64 sys_fadvise64 +233 spu fadvise64 sys_fadvise64 234 nospu exit_group sys_exit_group 235 nospu lookup_dcookie sys_ni_syscall 236 common epoll_create sys_epoll_create From 30766f1105d6d2459c3b9fe34a3e52b637a72950 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 11 Jun 2024 22:12:43 +0200 Subject: [PATCH 691/778] sh: rework sync_file_range ABI The unusual function calling conventions on SuperH ended up causing sync_file_range to have the wrong argument order, with the 'flags' argument getting sorted before 'nbytes' by the compiler. In userspace, I found that musl, glibc, uclibc and strace all expect the normal calling conventions with 'nbytes' last, so changing the kernel to match them should make all of those work. In order to be able to also fix libc implementations to work with existing kernels, they need to be able to tell which ABI is used. An easy way to do this is to add yet another system call using the sync_file_range2 ABI that works the same on all architectures. Old user binaries can now work on new kernels, and new binaries can try the new sync_file_range2() to work with new kernels or fall back to the old sync_file_range() version if that doesn't exist. Cc: stable@vger.kernel.org Fixes: 75c92acdd5b1 ("sh: Wire up new syscalls.") Acked-by: John Paul Adrian Glaubitz Signed-off-by: Arnd Bergmann --- arch/sh/kernel/sys_sh32.c | 11 +++++++++++ arch/sh/kernel/syscalls/syscall.tbl | 3 ++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/arch/sh/kernel/sys_sh32.c b/arch/sh/kernel/sys_sh32.c index 9dca568509a5..d6f4afcb0e87 100644 --- a/arch/sh/kernel/sys_sh32.c +++ b/arch/sh/kernel/sys_sh32.c @@ -59,3 +59,14 @@ asmlinkage int sys_fadvise64_64_wrapper(int fd, u32 offset0, u32 offset1, (u64)len0 << 32 | len1, advice); #endif } + +/* + * swap the arguments the way that libc wants them instead of + * moving flags ahead of the 64-bit nbytes argument + */ +SYSCALL_DEFINE6(sh_sync_file_range6, int, fd, SC_ARG64(offset), + SC_ARG64(nbytes), unsigned int, flags) +{ + return ksys_sync_file_range(fd, SC_VAL64(loff_t, offset), + SC_VAL64(loff_t, nbytes), flags); +} diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl index bbf83a2db986..c55fd7696d40 100644 --- a/arch/sh/kernel/syscalls/syscall.tbl +++ b/arch/sh/kernel/syscalls/syscall.tbl @@ -321,7 +321,7 @@ 311 common set_robust_list sys_set_robust_list 312 common get_robust_list sys_get_robust_list 313 common splice sys_splice -314 common sync_file_range sys_sync_file_range +314 common sync_file_range sys_sh_sync_file_range6 315 common tee sys_tee 316 common vmsplice sys_vmsplice 317 common move_pages sys_move_pages @@ -395,6 +395,7 @@ 385 common pkey_alloc sys_pkey_alloc 386 common pkey_free sys_pkey_free 387 common rseq sys_rseq +388 common sync_file_range2 sys_sync_file_range2 # room for arch specific syscalls 393 common semget sys_semget 394 common semctl sys_semctl From 3339b99ef6fe38dac43b534cba3a8a0e29fb2eff Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 14 Jun 2024 09:54:20 +0200 Subject: [PATCH 692/778] csky, hexagon: fix broken sys_sync_file_range Both of these architectures require u64 function arguments to be passed in even/odd pairs of registers or stack slots, which in case of sync_file_range would result in a seven-argument system call that is not currently possible. The system call is therefore incompatible with all existing binaries. While it would be possible to implement support for seven arguments like on mips, it seems better to use a six-argument version, either with the normal argument order but misaligned as on most architectures or with the reordered sync_file_range2() calling conventions as on arm and powerpc. Cc: stable@vger.kernel.org Acked-by: Guo Ren Signed-off-by: Arnd Bergmann --- arch/csky/include/uapi/asm/unistd.h | 1 + arch/hexagon/include/uapi/asm/unistd.h | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/csky/include/uapi/asm/unistd.h b/arch/csky/include/uapi/asm/unistd.h index 7ff6a2466af1..e0594b6370a6 100644 --- a/arch/csky/include/uapi/asm/unistd.h +++ b/arch/csky/include/uapi/asm/unistd.h @@ -6,6 +6,7 @@ #define __ARCH_WANT_SYS_CLONE3 #define __ARCH_WANT_SET_GET_RLIMIT #define __ARCH_WANT_TIME32_SYSCALLS +#define __ARCH_WANT_SYNC_FILE_RANGE2 #include #define __NR_set_thread_area (__NR_arch_specific_syscall + 0) diff --git a/arch/hexagon/include/uapi/asm/unistd.h b/arch/hexagon/include/uapi/asm/unistd.h index 432c4db1b623..21ae22306b5d 100644 --- a/arch/hexagon/include/uapi/asm/unistd.h +++ b/arch/hexagon/include/uapi/asm/unistd.h @@ -36,5 +36,6 @@ #define __ARCH_WANT_SYS_VFORK #define __ARCH_WANT_SYS_FORK #define __ARCH_WANT_TIME32_SYSCALLS +#define __ARCH_WANT_SYNC_FILE_RANGE2 #include From 896842284c6ccba25ec9d78b7b6e62cdd507c083 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 20 Jun 2024 15:24:11 +0200 Subject: [PATCH 693/778] hexagon: fix fadvise64_64 calling conventions fadvise64_64() has two 64-bit arguments at the wrong alignment for hexagon, which turns them into a 7-argument syscall that is not supported by Linux. The downstream musl port for hexagon actually asks for a 6-argument version the same way we do it on arm, csky, powerpc, so make the kernel do it the same way to avoid having to change both. Link: https://github.com/quic/musl/blob/hexagon/arch/hexagon/syscall_arch.h#L78 Cc: stable@vger.kernel.org Signed-off-by: Arnd Bergmann --- arch/hexagon/include/asm/syscalls.h | 6 ++++++ arch/hexagon/kernel/syscalltab.c | 7 +++++++ 2 files changed, 13 insertions(+) create mode 100644 arch/hexagon/include/asm/syscalls.h diff --git a/arch/hexagon/include/asm/syscalls.h b/arch/hexagon/include/asm/syscalls.h new file mode 100644 index 000000000000..40f2d08bec92 --- /dev/null +++ b/arch/hexagon/include/asm/syscalls.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include + +asmlinkage long sys_hexagon_fadvise64_64(int fd, int advice, + u32 a2, u32 a3, u32 a4, u32 a5); diff --git a/arch/hexagon/kernel/syscalltab.c b/arch/hexagon/kernel/syscalltab.c index 0fadd582cfc7..5d98bdc494ec 100644 --- a/arch/hexagon/kernel/syscalltab.c +++ b/arch/hexagon/kernel/syscalltab.c @@ -14,6 +14,13 @@ #undef __SYSCALL #define __SYSCALL(nr, call) [nr] = (call), +SYSCALL_DEFINE6(hexagon_fadvise64_64, int, fd, int, advice, + SC_ARG64(offset), SC_ARG64(len)) +{ + return ksys_fadvise64_64(fd, SC_VAL64(loff_t, offset), SC_VAL64(loff_t, len), advice); +} +#define sys_fadvise64_64 sys_hexagon_fadvise64_64 + void *sys_call_table[__NR_syscalls] = { #include }; From 5daf62da52ecd5761d63cbb6489eb434645547df Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 4 Jun 2024 23:46:36 +0200 Subject: [PATCH 694/778] s390: remove native mmap2() syscall The mmap2() syscall has never been used on 64-bit s390x and should have been removed as part of 5a79859ae0f3 ("s390: remove 31 bit support"). Remove it now. Acked-by: Heiko Carstens Signed-off-by: Arnd Bergmann --- arch/s390/kernel/syscall.c | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/arch/s390/kernel/syscall.c b/arch/s390/kernel/syscall.c index dc2355c623d6..50cbcbbaa03d 100644 --- a/arch/s390/kernel/syscall.c +++ b/arch/s390/kernel/syscall.c @@ -38,33 +38,6 @@ #include "entry.h" -/* - * Perform the mmap() system call. Linux for S/390 isn't able to handle more - * than 5 system call parameters, so this system call uses a memory block - * for parameter passing. - */ - -struct s390_mmap_arg_struct { - unsigned long addr; - unsigned long len; - unsigned long prot; - unsigned long flags; - unsigned long fd; - unsigned long offset; -}; - -SYSCALL_DEFINE1(mmap2, struct s390_mmap_arg_struct __user *, arg) -{ - struct s390_mmap_arg_struct a; - int error = -EFAULT; - - if (copy_from_user(&a, arg, sizeof(a))) - goto out; - error = ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset); -out: - return error; -} - #ifdef CONFIG_SYSVIPC /* * sys_ipc() is the de-multiplexer for the SysV IPC calls. From 295f10061af024099440b46602bcc47364551db7 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 4 Jun 2024 14:20:26 +0200 Subject: [PATCH 695/778] syscalls: mmap(): use unsigned offset type consistently Most architectures that implement the old-style mmap() with byte offset use 'unsigned long' as the type for that offset, but microblaze and riscv have the off_t type that is shared with userspace, matching the prototype in include/asm-generic/syscalls.h. Make this consistent by using an unsigned argument everywhere. This changes the behavior slightly, as the argument is shifted to a page number, and an user input with the top bit set would result in a negative page offset rather than a large one as we use elsewhere. For riscv, the 32-bit sys_mmap2() definition actually used a custom type that is different from the global declaration, but this was missed due to an incorrect type check. Signed-off-by: Arnd Bergmann --- arch/csky/kernel/syscall.c | 2 +- arch/loongarch/kernel/syscall.c | 2 +- arch/microblaze/kernel/sys_microblaze.c | 2 +- arch/riscv/kernel/sys_riscv.c | 4 ++-- include/asm-generic/syscalls.h | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/csky/kernel/syscall.c b/arch/csky/kernel/syscall.c index 3d30e58a45d2..4540a271ee39 100644 --- a/arch/csky/kernel/syscall.c +++ b/arch/csky/kernel/syscall.c @@ -20,7 +20,7 @@ SYSCALL_DEFINE6(mmap2, unsigned long, prot, unsigned long, flags, unsigned long, fd, - off_t, offset) + unsigned long, offset) { if (unlikely(offset & (~PAGE_MASK >> 12))) return -EINVAL; diff --git a/arch/loongarch/kernel/syscall.c b/arch/loongarch/kernel/syscall.c index b4c5acd7aa3b..8801611143ab 100644 --- a/arch/loongarch/kernel/syscall.c +++ b/arch/loongarch/kernel/syscall.c @@ -22,7 +22,7 @@ #define __SYSCALL(nr, call) [nr] = (call), SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, unsigned long, - prot, unsigned long, flags, unsigned long, fd, off_t, offset) + prot, unsigned long, flags, unsigned long, fd, unsigned long, offset) { if (offset & ~PAGE_MASK) return -EINVAL; diff --git a/arch/microblaze/kernel/sys_microblaze.c b/arch/microblaze/kernel/sys_microblaze.c index ed9f34da1a2a..0850b099f300 100644 --- a/arch/microblaze/kernel/sys_microblaze.c +++ b/arch/microblaze/kernel/sys_microblaze.c @@ -35,7 +35,7 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, unsigned long, prot, unsigned long, flags, unsigned long, fd, - off_t, pgoff) + unsigned long, pgoff) { if (pgoff & ~PAGE_MASK) return -EINVAL; diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c index 64155323cc92..d77afe05578f 100644 --- a/arch/riscv/kernel/sys_riscv.c +++ b/arch/riscv/kernel/sys_riscv.c @@ -23,7 +23,7 @@ static long riscv_sys_mmap(unsigned long addr, unsigned long len, #ifdef CONFIG_64BIT SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, unsigned long, prot, unsigned long, flags, - unsigned long, fd, off_t, offset) + unsigned long, fd, unsigned long, offset) { return riscv_sys_mmap(addr, len, prot, flags, fd, offset, 0); } @@ -32,7 +32,7 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, #if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT) SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len, unsigned long, prot, unsigned long, flags, - unsigned long, fd, off_t, offset) + unsigned long, fd, unsigned long, offset) { /* * Note that the shift for mmap2 is constant (12), diff --git a/include/asm-generic/syscalls.h b/include/asm-generic/syscalls.h index 933ca6581aba..fabcefe8a80a 100644 --- a/include/asm-generic/syscalls.h +++ b/include/asm-generic/syscalls.h @@ -19,7 +19,7 @@ asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, #ifndef sys_mmap asmlinkage long sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, - unsigned long fd, off_t pgoff); + unsigned long fd, unsigned long off); #endif #ifndef sys_rt_sigreturn From 0fa8ab5f3533b307a7d0e438ab08ecd92725dad7 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 20 Jun 2024 14:47:27 +0200 Subject: [PATCH 696/778] linux/syscalls.h: add missing __user annotations A couple of declarations in linux/syscalls.h are missing __user annotations on their pointers, which can lead to warnings from sparse because these don't match the implementation that have the correct address space annotations. Signed-off-by: Arnd Bergmann --- include/linux/syscalls.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index ba9337709878..63424af87bba 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -322,13 +322,13 @@ asmlinkage long sys_io_pgetevents(aio_context_t ctx_id, long nr, struct io_event __user *events, struct __kernel_timespec __user *timeout, - const struct __aio_sigset *sig); + const struct __aio_sigset __user *sig); asmlinkage long sys_io_pgetevents_time32(aio_context_t ctx_id, long min_nr, long nr, struct io_event __user *events, struct old_timespec32 __user *timeout, - const struct __aio_sigset *sig); + const struct __aio_sigset __user *sig); asmlinkage long sys_io_uring_setup(u32 entries, struct io_uring_params __user *p); asmlinkage long sys_io_uring_enter(unsigned int fd, u32 to_submit, @@ -441,7 +441,7 @@ asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group); asmlinkage long sys_openat(int dfd, const char __user *filename, int flags, umode_t mode); asmlinkage long sys_openat2(int dfd, const char __user *filename, - struct open_how *how, size_t size); + struct open_how __user *how, size_t size); asmlinkage long sys_close(unsigned int fd); asmlinkage long sys_close_range(unsigned int fd, unsigned int max_fd, unsigned int flags); @@ -555,7 +555,7 @@ asmlinkage long sys_get_robust_list(int pid, asmlinkage long sys_set_robust_list(struct robust_list_head __user *head, size_t len); -asmlinkage long sys_futex_waitv(struct futex_waitv *waiters, +asmlinkage long sys_futex_waitv(struct futex_waitv __user *waiters, unsigned int nr_futexes, unsigned int flags, struct __kernel_timespec __user *timeout, clockid_t clockid); @@ -907,7 +907,7 @@ asmlinkage long sys_seccomp(unsigned int op, unsigned int flags, asmlinkage long sys_getrandom(char __user *buf, size_t count, unsigned int flags); asmlinkage long sys_memfd_create(const char __user *uname_ptr, unsigned int flags); -asmlinkage long sys_bpf(int cmd, union bpf_attr *attr, unsigned int size); +asmlinkage long sys_bpf(int cmd, union bpf_attr __user *attr, unsigned int size); asmlinkage long sys_execveat(int dfd, const char __user *filename, const char __user *const __user *argv, const char __user *const __user *envp, int flags); @@ -960,11 +960,11 @@ asmlinkage long sys_cachestat(unsigned int fd, struct cachestat_range __user *cstat_range, struct cachestat __user *cstat, unsigned int flags); asmlinkage long sys_map_shadow_stack(unsigned long addr, unsigned long size, unsigned int flags); -asmlinkage long sys_lsm_get_self_attr(unsigned int attr, struct lsm_ctx *ctx, - u32 *size, u32 flags); -asmlinkage long sys_lsm_set_self_attr(unsigned int attr, struct lsm_ctx *ctx, +asmlinkage long sys_lsm_get_self_attr(unsigned int attr, struct lsm_ctx __user *ctx, + u32 __user *size, u32 flags); +asmlinkage long sys_lsm_set_self_attr(unsigned int attr, struct lsm_ctx __user *ctx, u32 size, u32 flags); -asmlinkage long sys_lsm_list_modules(u64 *ids, u32 *size, u32 flags); +asmlinkage long sys_lsm_list_modules(u64 __user *ids, u32 __user *size, u32 flags); /* * Architecture-specific system calls From e0011bca603c101f2a3c007bdb77f7006fa78fb1 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 25 Jun 2024 09:04:56 +1000 Subject: [PATCH 697/778] nfsd: initialise nfsd_info.mutex early. nfsd_info.mutex can be dereferenced by svc_pool_stats_start() immediately after the new netns is created. Currently this can trigger an oops. Move the initialisation earlier before it can possibly be dereferenced. Fixes: 7b207ccd9833 ("svc: don't hold reference for poolstats, only mutex.") Reported-by: Sourabh Jain Closes: https://lore.kernel.org/all/c2e9f6de-1ec4-4d3a-b18d-d5a6ec0814a0@linux.ibm.com/ Signed-off-by: NeilBrown Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfsctl.c | 2 ++ fs/nfsd/nfssvc.c | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 533b65057e18..c848ebe5d08f 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -2169,6 +2169,8 @@ static __net_init int nfsd_net_init(struct net *net) nn->nfsd_svcstats.program = &nfsd_program; nn->nfsd_versions = NULL; nn->nfsd4_minorversions = NULL; + nn->nfsd_info.mutex = &nfsd_mutex; + nn->nfsd_serv = NULL; nfsd4_init_leases_net(nn); get_random_bytes(&nn->siphash_key, sizeof(nn->siphash_key)); seqlock_init(&nn->writeverf_lock); diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index cd9a6a1a9fc8..89d7918de7b1 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -672,7 +672,6 @@ int nfsd_create_serv(struct net *net) return error; } spin_lock(&nfsd_notifier_lock); - nn->nfsd_info.mutex = &nfsd_mutex; nn->nfsd_serv = serv; spin_unlock(&nfsd_notifier_lock); From ac03629b1612ad008ea6603a3d142e291e3de9bb Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 25 Jun 2024 09:04:57 +1000 Subject: [PATCH 698/778] Revert "nfsd: fix oops when reading pool_stats before server is started" This reverts commit 8e948c365d9c10b685d1deb946bd833d6a9b43e0. The reverted commit moves a test on a field protected by a mutex outside of the protection of that mutex, and so is obviously racey. Depending on how the race goes, si->serv might be NULL when dereferenced in svc_pool_stats_start(), or svc_pool_stats_stop() might unlock a mutex that hadn't been locked. This bug that the commit tried to fix has been addressed by initialising ->mutex earlier. Fixes: 8e948c365d9c ("nfsd: fix oops when reading pool_stats before server is started") Signed-off-by: NeilBrown Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- net/sunrpc/svc_xprt.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 49a3bea33f9d..dd86d7f1e97e 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -1421,13 +1421,12 @@ static void *svc_pool_stats_start(struct seq_file *m, loff_t *pos) dprintk("svc_pool_stats_start, *pidx=%u\n", pidx); - if (!si->serv) - return NULL; - mutex_lock(si->mutex); if (!pidx) return SEQ_START_TOKEN; + if (!si->serv) + return NULL; return pidx > si->serv->sv_nrpools ? NULL : &si->serv->sv_pools[pidx - 1]; } @@ -1459,8 +1458,7 @@ static void svc_pool_stats_stop(struct seq_file *m, void *p) { struct svc_info *si = m->private; - if (si->serv) - mutex_unlock(si->mutex); + mutex_unlock(si->mutex); } static int svc_pool_stats_show(struct seq_file *m, void *p) From 5a830bbce3af16833fe0092dec47b6dd30279825 Mon Sep 17 00:00:00 2001 From: Phil Chang Date: Mon, 10 Jun 2024 21:31:36 +0800 Subject: [PATCH 699/778] hrtimer: Prevent queuing of hrtimer without a function callback The hrtimer function callback must not be NULL. It has to be specified by the call side but it is not validated by the hrtimer code. When a hrtimer is queued without a function callback, the kernel crashes with a null pointer dereference when trying to execute the callback in __run_hrtimer(). Introduce a validation before queuing the hrtimer in hrtimer_start_range_ns(). [anna-maria: Rephrase commit message] Signed-off-by: Phil Chang Signed-off-by: Anna-Maria Behnsen Signed-off-by: Thomas Gleixner Reviewed-by: Anna-Maria Behnsen --- kernel/time/hrtimer.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 492c14aac642..b8ee320208d4 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1285,6 +1285,8 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, struct hrtimer_clock_base *base; unsigned long flags; + if (WARN_ON_ONCE(!timer->function)) + return; /* * Check whether the HRTIMER_MODE_SOFT bit and hrtimer.is_soft * match on CONFIG_PREEMPT_RT = n. With PREEMPT_RT check the hard From 9cc5f3bf63aa98bd7cc7ce8a8599077fde13283e Mon Sep 17 00:00:00 2001 From: Dragan Simic Date: Mon, 10 Jun 2024 07:21:12 +0200 Subject: [PATCH 700/778] kbuild: Install dtb files as 0644 in Makefile.dtbinst The compiled dtb files aren't executable, so install them with 0644 as their permission mode, instead of defaulting to 0755 for the permission mode and installing them with the executable bits set. Some Linux distributions, including Debian, [1][2][3] already include fixes in their kernel package build recipes to change the dtb file permissions to 0644 in their kernel packages. These changes, when additionally propagated into the long-term kernel versions, will allow such distributions to remove their downstream fixes. [1] https://salsa.debian.org/kernel-team/linux/-/merge_requests/642 [2] https://salsa.debian.org/kernel-team/linux/-/merge_requests/749 [3] https://salsa.debian.org/kernel-team/linux/-/blob/debian/6.8.12-1/debian/rules.real#L193 Cc: Diederik de Haas Cc: Fixes: aefd80307a05 ("kbuild: refactor Makefile.dtbinst more") Signed-off-by: Dragan Simic Reviewed-by: Nicolas Schier Signed-off-by: Masahiro Yamada --- scripts/Makefile.dtbinst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/Makefile.dtbinst b/scripts/Makefile.dtbinst index 67956f6496a5..9d920419a62c 100644 --- a/scripts/Makefile.dtbinst +++ b/scripts/Makefile.dtbinst @@ -17,7 +17,7 @@ include $(srctree)/scripts/Kbuild.include dst := $(INSTALL_DTBS_PATH) quiet_cmd_dtb_install = INSTALL $@ - cmd_dtb_install = install -D $< $@ + cmd_dtb_install = install -D -m 0644 $< $@ $(dst)/%: $(obj)/% $(call cmd,dtb_install) From 07d4cc2e7444356faac6552d0688a1670cc9d749 Mon Sep 17 00:00:00 2001 From: Mark-PK Tsai Date: Fri, 14 Jun 2024 15:15:02 +0800 Subject: [PATCH 701/778] kbuild: doc: Update default INSTALL_MOD_DIR from extra to updates The default INSTALL_MOD_DIR was changed from 'extra' to 'updates' in commit b74d7bb7ca24 ("kbuild: Modify default INSTALL_MOD_DIR from extra to updates"). This commit updates the documentation to align with the latest kernel. Fixes: b74d7bb7ca24 ("kbuild: Modify default INSTALL_MOD_DIR from extra to updates") Signed-off-by: Mark-PK Tsai Signed-off-by: Masahiro Yamada --- Documentation/kbuild/modules.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/kbuild/modules.rst b/Documentation/kbuild/modules.rst index a1f3eb7a43e2..131863142cbb 100644 --- a/Documentation/kbuild/modules.rst +++ b/Documentation/kbuild/modules.rst @@ -128,7 +128,7 @@ executed to make module versioning work. modules_install Install the external module(s). The default location is - /lib/modules//extra/, but a prefix may + /lib/modules//updates/, but a prefix may be added with INSTALL_MOD_PATH (discussed in section 5). clean @@ -417,7 +417,7 @@ directory: And external modules are installed in: - /lib/modules/$(KERNELRELEASE)/extra/ + /lib/modules/$(KERNELRELEASE)/updates/ 5.1 INSTALL_MOD_PATH -------------------- @@ -438,10 +438,10 @@ And external modules are installed in: ------------------- External modules are by default installed to a directory under - /lib/modules/$(KERNELRELEASE)/extra/, but you may wish to + /lib/modules/$(KERNELRELEASE)/updates/, but you may wish to locate modules for a specific functionality in a separate directory. For this purpose, use INSTALL_MOD_DIR to specify an - alternative name to "extra.":: + alternative name to "updates.":: $ make INSTALL_MOD_DIR=gandalf -C $KDIR \ M=$PWD modules_install From c61566538968ffb040acc411246fd7ad38c7e8c9 Mon Sep 17 00:00:00 2001 From: Thayne Harbaugh Date: Sat, 15 Jun 2024 23:34:54 -0600 Subject: [PATCH 702/778] kbuild: Fix build target deb-pkg: ln: failed to create hard link The make deb-pkg target calls debian-orig which attempts to either hard link the source .tar to the build-output location or copy the source .tar to the build-output location. The test to determine whether to ln or cp is incorrectly expanded by Make and consequently always attempts to ln the source .tar. This fix corrects the escaping of '$' so that the test is expanded by the shell rather than by Make and appropriately selects between ln and cp. Fixes: b44aa8c96e9e ("kbuild: deb-pkg: make .orig tarball a hard link if possible") Signed-off-by: Thayne Harbaugh Signed-off-by: Masahiro Yamada --- scripts/Makefile.package | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/Makefile.package b/scripts/Makefile.package index 38653f3e8108..bf016af8bf8a 100644 --- a/scripts/Makefile.package +++ b/scripts/Makefile.package @@ -103,7 +103,7 @@ debian-orig: private version = $(shell dpkg-parsechangelog -S Version | sed 's/- debian-orig: private orig-name = $(source)_$(version).orig.tar$(debian-orig-suffix) debian-orig: mkdebian-opts = --need-source debian-orig: linux.tar$(debian-orig-suffix) debian - $(Q)if [ "$(df --output=target .. 2>/dev/null)" = "$(df --output=target $< 2>/dev/null)" ]; then \ + $(Q)if [ "$$(df --output=target .. 2>/dev/null)" = "$$(df --output=target $< 2>/dev/null)" ]; then \ ln -f $< ../$(orig-name); \ else \ cp $< ../$(orig-name); \ From 8d1001f7bdd0553a796998f4fff07ee13e1c1cad Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 18 Jun 2024 20:08:43 +0900 Subject: [PATCH 703/778] kbuild: rpm-pkg: fix build error with CONFIG_MODULES=n When CONFIG_MODULES is disabled, 'make (bin)rpm-pkg' fails: $ make allnoconfig binrpm-pkg [ snip ] error: File not found: .../linux/rpmbuild/BUILDROOT/kernel-6.10.0_rc3-1.i386/lib/modules/6.10.0-rc3/kernel error: File not found: .../linux/rpmbuild/BUILDROOT/kernel-6.10.0_rc3-1.i386/lib/modules/6.10.0-rc3/modules.order To make it work irrespective of CONFIG_MODULES, this commit specifies the directory path, /lib/modules/%{KERNELRELEASE}, instead of individual files. However, doing so would cause new warnings: warning: File listed twice: /lib/modules/6.10.0-rc3-dirty/modules.alias warning: File listed twice: /lib/modules/6.10.0-rc3-dirty/modules.alias.bin warning: File listed twice: /lib/modules/6.10.0-rc3-dirty/modules.builtin.alias.bin warning: File listed twice: /lib/modules/6.10.0-rc3-dirty/modules.builtin.bin warning: File listed twice: /lib/modules/6.10.0-rc3-dirty/modules.dep warning: File listed twice: /lib/modules/6.10.0-rc3-dirty/modules.dep.bin warning: File listed twice: /lib/modules/6.10.0-rc3-dirty/modules.devname warning: File listed twice: /lib/modules/6.10.0-rc3-dirty/modules.softdep warning: File listed twice: /lib/modules/6.10.0-rc3-dirty/modules.symbols warning: File listed twice: /lib/modules/6.10.0-rc3-dirty/modules.symbols.bin These files exist in /lib/modules/%{KERNELRELEASE} and are also explicitly marked as %ghost. Suppress depmod because depmod-generated files are not packaged. Fixes: 615b3a3d2d41 ("kbuild: rpm-pkg: do not include depmod-generated files") Signed-off-by: Masahiro Yamada Reviewed-by: Nathan Chancellor --- scripts/package/kernel.spec | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/scripts/package/kernel.spec b/scripts/package/kernel.spec index e095eb1e290e..fffc8af8deb1 100644 --- a/scripts/package/kernel.spec +++ b/scripts/package/kernel.spec @@ -57,7 +57,8 @@ patch -p1 < %{SOURCE2} %install mkdir -p %{buildroot}/lib/modules/%{KERNELRELEASE} cp $(%{make} %{makeflags} -s image_name) %{buildroot}/lib/modules/%{KERNELRELEASE}/vmlinuz -%{make} %{makeflags} INSTALL_MOD_PATH=%{buildroot} modules_install +# DEPMOD=true makes depmod no-op. We do not package depmod-generated files. +%{make} %{makeflags} INSTALL_MOD_PATH=%{buildroot} DEPMOD=true modules_install %{make} %{makeflags} INSTALL_HDR_PATH=%{buildroot}/usr headers_install cp System.map %{buildroot}/lib/modules/%{KERNELRELEASE} cp .config %{buildroot}/lib/modules/%{KERNELRELEASE}/config @@ -70,10 +71,7 @@ ln -fns /usr/src/kernels/%{KERNELRELEASE} %{buildroot}/lib/modules/%{KERNELRELEA %endif { - for x in System.map config kernel modules.builtin \ - modules.builtin.modinfo modules.order vmlinuz; do - echo "/lib/modules/%{KERNELRELEASE}/${x}" - done + echo "/lib/modules/%{KERNELRELEASE}" for x in alias alias.bin builtin.alias.bin builtin.bin dep dep.bin \ devname softdep symbols symbols.bin; do From 7ed9d1318c127b3aec77099802a9fdf2480250b4 Mon Sep 17 00:00:00 2001 From: Nicolas Schier Date: Mon, 24 Jun 2024 13:12:14 +0200 Subject: [PATCH 704/778] kbuild: Use $(obj)/%.cc to fix host C++ module builds Use $(obj)/ instead of $(src)/ prefix when building C++ modules for host, as explained in commit b1992c3772e6 ("kbuild: use $(src) instead of $(srctree)/$(src) for source directory"). This fixes build failures of 'xconfig': $ make O=build/ xconfig make[1]: Entering directory '/data/linux/kbuild-review/build' GEN Makefile make[3]: *** No rule to make target '../scripts/kconfig/qconf-moc.cc', needed by 'scripts/kconfig/qconf-moc.o'. Stop. Fixes: b1992c3772e6 ("kbuild: use $(src) instead of $(srctree)/$(src) for source directory") Reported-by: Rolf Eike Beer Signed-off-by: Nicolas Schier Tested-by: Rolf Eike Beer Signed-off-by: Masahiro Yamada --- scripts/Makefile.host | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/Makefile.host b/scripts/Makefile.host index d35f55e0d141..e85be7721a48 100644 --- a/scripts/Makefile.host +++ b/scripts/Makefile.host @@ -146,7 +146,7 @@ $(call multi_depend, $(host-cxxmulti), , -objs -cxxobjs) # Create .o file from a single .cc (C++) file quiet_cmd_host-cxxobjs = HOSTCXX $@ cmd_host-cxxobjs = $(HOSTCXX) $(hostcxx_flags) -c -o $@ $< -$(host-cxxobjs): $(obj)/%.o: $(src)/%.cc FORCE +$(host-cxxobjs): $(obj)/%.o: $(obj)/%.cc FORCE $(call if_changed_dep,host-cxxobjs) # Create executable from a single Rust crate (which may consist of From 04a2aef59cfe192aa99020601d922359978cc72a Mon Sep 17 00:00:00 2001 From: Jesse Taube Date: Thu, 6 Jun 2024 14:28:00 -0400 Subject: [PATCH 705/778] RISC-V: fix vector insn load/store width mask RVFDQ_FL_FS_WIDTH_MASK should be 3 bits [14-12], shifted down by 12 bits. Replace GENMASK(3, 0) with GENMASK(2, 0). Fixes: cd054837243b ("riscv: Allocate user's vector context in the first-use trap") Signed-off-by: Jesse Taube Reviewed-by: Charlie Jenkins Link: https://lore.kernel.org/r/20240606182800.415831-1-jesse@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/insn.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/insn.h b/arch/riscv/include/asm/insn.h index 06e439eeef9a..09fde95a5e8f 100644 --- a/arch/riscv/include/asm/insn.h +++ b/arch/riscv/include/asm/insn.h @@ -145,7 +145,7 @@ /* parts of opcode for RVF, RVD and RVQ */ #define RVFDQ_FL_FS_WIDTH_OFF 12 -#define RVFDQ_FL_FS_WIDTH_MASK GENMASK(3, 0) +#define RVFDQ_FL_FS_WIDTH_MASK GENMASK(2, 0) #define RVFDQ_FL_FS_WIDTH_W 2 #define RVFDQ_FL_FS_WIDTH_D 3 #define RVFDQ_LS_FS_WIDTH_Q 4 From c223376b3019a00a0241faea0bc8c966738d1cc5 Mon Sep 17 00:00:00 2001 From: Li Ma Date: Thu, 6 Jun 2024 20:25:34 +0800 Subject: [PATCH 706/778] drm/amd/swsmu: add MALL init support workaround for smu_v14_0_1 [Why] SMU firmware has not supported MALL PG. [How] Disable MALL PG and make it always on until SMU firmware is ready. Signed-off-by: Li Ma Reviewed-by: Tim Huang Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 13 ++++ drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 5 ++ .../pm/swsmu/inc/pmfw_if/smu_v14_0_0_ppsmc.h | 4 +- drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h | 4 +- .../drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c | 73 +++++++++++++++++++ 5 files changed, 96 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 7789b313285c..e1796ecf9c05 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -324,6 +324,18 @@ static int smu_dpm_set_umsch_mm_enable(struct smu_context *smu, return ret; } +static int smu_set_mall_enable(struct smu_context *smu) +{ + int ret = 0; + + if (!smu->ppt_funcs->set_mall_enable) + return 0; + + ret = smu->ppt_funcs->set_mall_enable(smu); + + return ret; +} + /** * smu_dpm_set_power_gate - power gate/ungate the specific IP block * @@ -1791,6 +1803,7 @@ static int smu_hw_init(void *handle) smu_dpm_set_jpeg_enable(smu, true); smu_dpm_set_vpe_enable(smu, true); smu_dpm_set_umsch_mm_enable(smu, true); + smu_set_mall_enable(smu); smu_set_gfx_cgpg(smu, true); } diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index 0917dec8efe3..64ccdb5f14ea 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -1394,6 +1394,11 @@ struct pptable_funcs { */ int (*dpm_set_umsch_mm_enable)(struct smu_context *smu, bool enable); + /** + * @set_mall_enable: Init MALL power gating control. + */ + int (*set_mall_enable)(struct smu_context *smu); + /** * @notify_rlc_state: Notify RLC power state to SMU. */ diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_ppsmc.h index c4dc5881d8df..e7f5ef49049f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_ppsmc.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_ppsmc.h @@ -106,8 +106,8 @@ #define PPSMC_MSG_DisableLSdma 0x35 ///< Disable LSDMA #define PPSMC_MSG_SetSoftMaxVpe 0x36 ///< #define PPSMC_MSG_SetSoftMinVpe 0x37 ///< -#define PPSMC_MSG_AllocMALLCache 0x38 ///< Allocating MALL Cache -#define PPSMC_MSG_ReleaseMALLCache 0x39 ///< Releasing MALL Cache +#define PPSMC_MSG_MALLPowerController 0x38 ///< Set MALL control +#define PPSMC_MSG_MALLPowerState 0x39 ///< Enter/Exit MALL PG #define PPSMC_Message_Count 0x3A ///< Total number of PPSMC messages /** @}*/ diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h index c48214e3dc8e..2e32b085824a 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h @@ -272,7 +272,9 @@ __SMU_DUMMY_MAP(SetSoftMinVpe), \ __SMU_DUMMY_MAP(GetMetricsVersion), \ __SMU_DUMMY_MAP(EnableUCLKShadow), \ - __SMU_DUMMY_MAP(RmaDueToBadPageThreshold), + __SMU_DUMMY_MAP(RmaDueToBadPageThreshold), \ + __SMU_DUMMY_MAP(MALLPowerController), \ + __SMU_DUMMY_MAP(MALLPowerState), #undef __SMU_DUMMY_MAP #define __SMU_DUMMY_MAP(type) SMU_MSG_##type diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c index e4419e1561ef..18abfbd6d059 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c @@ -52,6 +52,19 @@ #define mmMP1_SMN_C2PMSG_90 0x029a #define mmMP1_SMN_C2PMSG_90_BASE_IDX 0 +/* MALLPowerController message arguments (Defines for the Cache mode control) */ +#define SMU_MALL_PMFW_CONTROL 0 +#define SMU_MALL_DRIVER_CONTROL 1 + +/* + * MALLPowerState message arguments + * (Defines for the Allocate/Release Cache mode if in driver mode) + */ +#define SMU_MALL_EXIT_PG 0 +#define SMU_MALL_ENTER_PG 1 + +#define SMU_MALL_PG_CONFIG_DEFAULT SMU_MALL_PG_CONFIG_DRIVER_CONTROL_ALWAYS_ON + #define FEATURE_MASK(feature) (1ULL << feature) #define SMC_DPM_FEATURE ( \ FEATURE_MASK(FEATURE_CCLK_DPM_BIT) | \ @@ -66,6 +79,12 @@ FEATURE_MASK(FEATURE_GFX_DPM_BIT) | \ FEATURE_MASK(FEATURE_VPE_DPM_BIT)) +enum smu_mall_pg_config { + SMU_MALL_PG_CONFIG_PMFW_CONTROL = 0, + SMU_MALL_PG_CONFIG_DRIVER_CONTROL_ALWAYS_ON = 1, + SMU_MALL_PG_CONFIG_DRIVER_CONTROL_ALWAYS_OFF = 2, +}; + static struct cmn2asic_msg_mapping smu_v14_0_0_message_map[SMU_MSG_MAX_COUNT] = { MSG_MAP(TestMessage, PPSMC_MSG_TestMessage, 1), MSG_MAP(GetSmuVersion, PPSMC_MSG_GetPmfwVersion, 1), @@ -113,6 +132,8 @@ static struct cmn2asic_msg_mapping smu_v14_0_0_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(PowerDownUmsch, PPSMC_MSG_PowerDownUmsch, 1), MSG_MAP(SetSoftMaxVpe, PPSMC_MSG_SetSoftMaxVpe, 1), MSG_MAP(SetSoftMinVpe, PPSMC_MSG_SetSoftMinVpe, 1), + MSG_MAP(MALLPowerController, PPSMC_MSG_MALLPowerController, 1), + MSG_MAP(MALLPowerState, PPSMC_MSG_MALLPowerState, 1), }; static struct cmn2asic_mapping smu_v14_0_0_feature_mask_map[SMU_FEATURE_COUNT] = { @@ -1423,6 +1444,57 @@ static int smu_v14_0_common_get_dpm_table(struct smu_context *smu, struct dpm_cl return 0; } +static int smu_v14_0_1_init_mall_power_gating(struct smu_context *smu, enum smu_mall_pg_config pg_config) +{ + struct amdgpu_device *adev = smu->adev; + int ret = 0; + + if (pg_config == SMU_MALL_PG_CONFIG_PMFW_CONTROL) { + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_MALLPowerController, + SMU_MALL_PMFW_CONTROL, NULL); + if (ret) { + dev_err(adev->dev, "Init MALL PMFW CONTROL Failure\n"); + return ret; + } + } else { + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_MALLPowerController, + SMU_MALL_DRIVER_CONTROL, NULL); + if (ret) { + dev_err(adev->dev, "Init MALL Driver CONTROL Failure\n"); + return ret; + } + + if (pg_config == SMU_MALL_PG_CONFIG_DRIVER_CONTROL_ALWAYS_ON) { + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_MALLPowerState, + SMU_MALL_EXIT_PG, NULL); + if (ret) { + dev_err(adev->dev, "EXIT MALL PG Failure\n"); + return ret; + } + } else if (pg_config == SMU_MALL_PG_CONFIG_DRIVER_CONTROL_ALWAYS_OFF) { + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_MALLPowerState, + SMU_MALL_ENTER_PG, NULL); + if (ret) { + dev_err(adev->dev, "Enter MALL PG Failure\n"); + return ret; + } + } + } + + return ret; +} + +static int smu_v14_0_common_set_mall_enable(struct smu_context *smu) +{ + enum smu_mall_pg_config pg_config = SMU_MALL_PG_CONFIG_DEFAULT; + int ret = 0; + + if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1)) + ret = smu_v14_0_1_init_mall_power_gating(smu, pg_config); + + return ret; +} + static const struct pptable_funcs smu_v14_0_0_ppt_funcs = { .check_fw_status = smu_v14_0_check_fw_status, .check_fw_version = smu_v14_0_check_fw_version, @@ -1454,6 +1526,7 @@ static const struct pptable_funcs smu_v14_0_0_ppt_funcs = { .dpm_set_vpe_enable = smu_v14_0_0_set_vpe_enable, .dpm_set_umsch_mm_enable = smu_v14_0_0_set_umsch_mm_enable, .get_dpm_clock_table = smu_v14_0_common_get_dpm_table, + .set_mall_enable = smu_v14_0_common_set_mall_enable, }; static void smu_v14_0_0_set_smu_mailbox_registers(struct smu_context *smu) From f6f49dda49db72e7a0b4ca32c77391d5ff5ce232 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 14 Jun 2024 13:48:26 -0400 Subject: [PATCH 707/778] drm/amdgpu/atomfirmware: fix parsing of vram_info v3.x changed the how vram width was encoded. The previous implementation actually worked correctly for most boards. Fix the implementation to work correctly everywhere. This fixes the vram width reported in the kernel log on some boards. Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index 108003bdf1e9..2e13c7c4b2b4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -400,7 +400,7 @@ amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev, mem_channel_number = vram_info->v30.channel_num; mem_channel_width = vram_info->v30.channel_width; if (vram_width) - *vram_width = mem_channel_number * (1 << mem_channel_width); + *vram_width = mem_channel_number * 16; break; default: return -EINVAL; From 74fa02c4a5ea1ade5156a6ce494d3ea83881c2d8 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Tue, 18 Jun 2024 14:04:38 +0530 Subject: [PATCH 708/778] drm/amdgpu: Fix pci state save during mode-1 reset Cache the PCI state before bus master is disabled. The saved state is later used for other cases like restoring config space after mode-2 reset. Fixes: 5c03e5843e6b ("drm/amdgpu:add smu mode1/2 support for aldebaran") Signed-off-by: Lijo Lazar Reviewed-by: Feifei Xu Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 932dc93b2e63..33f791d92ddf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5220,11 +5220,14 @@ int amdgpu_device_mode1_reset(struct amdgpu_device *adev) dev_info(adev->dev, "GPU mode1 reset\n"); + /* Cache the state before bus master disable. The saved config space + * values are used in other cases like restore after mode-2 reset. + */ + amdgpu_device_cache_pci_state(adev->pdev); + /* disable BM */ pci_clear_master(adev->pdev); - amdgpu_device_cache_pci_state(adev->pdev); - if (amdgpu_dpm_is_mode1_reset_supported(adev)) { dev_info(adev->dev, "GPU smu mode1 reset\n"); ret = amdgpu_dpm_mode1_reset(adev); From 2ec6c7f802332d1eff16f03e7c757f1543ee1183 Mon Sep 17 00:00:00 2001 From: Michael Strauss Date: Tue, 28 Nov 2023 10:31:12 -0500 Subject: [PATCH 709/778] drm/amd/display: Send DP_TOTAL_LTTPR_CNT during detection if LTTPR is present [WHY] New register field added in DP2.1 SCR, needed for auxless ALPM [HOW] Echo value read from 0xF0007 back to sink Reviewed-by: Wenjing Liu Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Alex Hung Signed-off-by: Michael Strauss Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../amd/display/dc/link/protocols/link_dp_capability.c | 10 +++++++++- drivers/gpu/drm/amd/display/include/dpcd_defs.h | 5 +++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c index a01d0842bf8e..d487dfcd219b 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c @@ -1590,9 +1590,17 @@ static bool retrieve_link_cap(struct dc_link *link) return false; } - if (dp_is_lttpr_present(link)) + if (dp_is_lttpr_present(link)) { configure_lttpr_mode_transparent(link); + // Echo TOTAL_LTTPR_CNT back downstream + core_link_write_dpcd( + link, + DP_TOTAL_LTTPR_CNT, + &link->dpcd_caps.lttpr_caps.phy_repeater_cnt, + sizeof(link->dpcd_caps.lttpr_caps.phy_repeater_cnt)); + } + /* Read DP tunneling information. */ status = dpcd_get_tunneling_device_data(link); diff --git a/drivers/gpu/drm/amd/display/include/dpcd_defs.h b/drivers/gpu/drm/amd/display/include/dpcd_defs.h index 914f28e9f224..aee5170f5fb2 100644 --- a/drivers/gpu/drm/amd/display/include/dpcd_defs.h +++ b/drivers/gpu/drm/amd/display/include/dpcd_defs.h @@ -177,4 +177,9 @@ enum dpcd_psr_sink_states { #define DP_SINK_PR_PIXEL_DEVIATION_PER_LINE 0x379 #define DP_SINK_PR_MAX_NUMBER_OF_DEVIATION_LINE 0x37A +/* Remove once drm_dp_helper.h is updated upstream */ +#ifndef DP_TOTAL_LTTPR_CNT +#define DP_TOTAL_LTTPR_CNT 0xF000A /* 2.1 */ +#endif + #endif /* __DAL_DPCD_DEFS_H__ */ From bcfa48ff785bd121316592b131ff6531e3e696bb Mon Sep 17 00:00:00 2001 From: Julia Zhang Date: Mon, 3 Jun 2024 19:31:09 +0800 Subject: [PATCH 710/778] drm/amdgpu: avoid using null object of framebuffer Instead of using state->fb->obj[0] directly, get object from framebuffer by calling drm_gem_fb_get_obj() and return error code when object is null to avoid using null object of framebuffer. Reported-by: Fusheng Huang Signed-off-by: Julia Zhang Reviewed-by: Huang Rui Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c index e30eecd02ae1..fde66225c481 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include "amdgpu.h" @@ -314,7 +315,13 @@ static int amdgpu_vkms_prepare_fb(struct drm_plane *plane, return 0; } afb = to_amdgpu_framebuffer(new_state->fb); - obj = new_state->fb->obj[0]; + + obj = drm_gem_fb_get_obj(new_state->fb, 0); + if (!obj) { + DRM_ERROR("Failed to get obj from framebuffer\n"); + return -EINVAL; + } + rbo = gem_to_amdgpu_bo(obj); adev = amdgpu_ttm_adev(rbo->tbo.bdev); @@ -368,12 +375,19 @@ static void amdgpu_vkms_cleanup_fb(struct drm_plane *plane, struct drm_plane_state *old_state) { struct amdgpu_bo *rbo; + struct drm_gem_object *obj; int r; if (!old_state->fb) return; - rbo = gem_to_amdgpu_bo(old_state->fb->obj[0]); + obj = drm_gem_fb_get_obj(old_state->fb, 0); + if (!obj) { + DRM_ERROR("Failed to get obj from framebuffer\n"); + return; + } + + rbo = gem_to_amdgpu_bo(obj); r = amdgpu_bo_reserve(rbo, false); if (unlikely(r)) { DRM_ERROR("failed to reserve rbo before unpin\n"); From 48880f9686b1ac2ea0831f65df953a63d1437fc0 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Mon, 3 Jun 2024 12:12:18 +0530 Subject: [PATCH 711/778] drm/amdgpu: Don't show false warning for reg list If reg list is already loaded on PSP 13.0.2 SOCs, psp will give TEE_ERR_CANCEL response on second time load. Avoid printing warn message for it. Signed-off-by: Lijo Lazar Reviewed-by: Feifei Xu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 25 +++++++++++++++++++++---- drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h | 5 +++-- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 4bd4602d11b1..cef9dd0a012b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -640,6 +640,20 @@ static const char *psp_gfx_cmd_name(enum psp_gfx_cmd_id cmd_id) } } +static bool psp_err_warn(struct psp_context *psp) +{ + struct psp_gfx_cmd_resp *cmd = psp->cmd_buf_mem; + + /* This response indicates reg list is already loaded */ + if (amdgpu_ip_version(psp->adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 2) && + cmd->cmd_id == GFX_CMD_ID_LOAD_IP_FW && + cmd->cmd.cmd_load_ip_fw.fw_type == GFX_FW_TYPE_REG_LIST && + cmd->resp.status == TEE_ERROR_CANCEL) + return false; + + return true; +} + static int psp_cmd_submit_buf(struct psp_context *psp, struct amdgpu_firmware_info *ucode, @@ -699,10 +713,13 @@ psp_cmd_submit_buf(struct psp_context *psp, dev_warn(psp->adev->dev, "failed to load ucode %s(0x%X) ", amdgpu_ucode_name(ucode->ucode_id), ucode->ucode_id); - dev_warn(psp->adev->dev, - "psp gfx command %s(0x%X) failed and response status is (0x%X)\n", - psp_gfx_cmd_name(psp->cmd_buf_mem->cmd_id), psp->cmd_buf_mem->cmd_id, - psp->cmd_buf_mem->resp.status); + if (psp_err_warn(psp)) + dev_warn( + psp->adev->dev, + "psp gfx command %s(0x%X) failed and response status is (0x%X)\n", + psp_gfx_cmd_name(psp->cmd_buf_mem->cmd_id), + psp->cmd_buf_mem->cmd_id, + psp->cmd_buf_mem->resp.status); /* If any firmware (including CAP) load fails under SRIOV, it should * return failure to stop the VF from initializing. * Also return failure in case of timeout diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h index 7566973ed8f5..37b5ddd6f13b 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h +++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h @@ -464,8 +464,9 @@ struct psp_gfx_rb_frame #define PSP_ERR_UNKNOWN_COMMAND 0x00000100 enum tee_error_code { - TEE_SUCCESS = 0x00000000, - TEE_ERROR_NOT_SUPPORTED = 0xFFFF000A, + TEE_SUCCESS = 0x00000000, + TEE_ERROR_CANCEL = 0xFFFF0002, + TEE_ERROR_NOT_SUPPORTED = 0xFFFF000A, }; #endif /* _PSP_TEE_GFX_IF_H_ */ From 6d411c8ccc0137a612e0044489030a194ff5c843 Mon Sep 17 00:00:00 2001 From: Ma Ke Date: Tue, 25 Jun 2024 16:10:29 +0800 Subject: [PATCH 712/778] drm/nouveau/dispnv04: fix null pointer dereference in nv17_tv_get_hd_modes In nv17_tv_get_hd_modes(), the return value of drm_mode_duplicate() is assigned to mode, which will lead to a possible NULL pointer dereference on failure of drm_mode_duplicate(). The same applies to drm_cvt_mode(). Add a check to avoid null pointer dereference. Cc: stable@vger.kernel.org Signed-off-by: Ma Ke Signed-off-by: Lyude Paul Link: https://patchwork.freedesktop.org/patch/msgid/20240625081029.2619437-1-make24@iscas.ac.cn --- drivers/gpu/drm/nouveau/dispnv04/tvnv17.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/nouveau/dispnv04/tvnv17.c b/drivers/gpu/drm/nouveau/dispnv04/tvnv17.c index 670c9739e5e1..9c3dc9a5bb46 100644 --- a/drivers/gpu/drm/nouveau/dispnv04/tvnv17.c +++ b/drivers/gpu/drm/nouveau/dispnv04/tvnv17.c @@ -258,6 +258,8 @@ static int nv17_tv_get_hd_modes(struct drm_encoder *encoder, if (modes[i].hdisplay == output_mode->hdisplay && modes[i].vdisplay == output_mode->vdisplay) { mode = drm_mode_duplicate(encoder->dev, output_mode); + if (!mode) + continue; mode->type |= DRM_MODE_TYPE_PREFERRED; } else { @@ -265,6 +267,8 @@ static int nv17_tv_get_hd_modes(struct drm_encoder *encoder, modes[i].vdisplay, 60, false, (output_mode->flags & DRM_MODE_FLAG_INTERLACE), false); + if (!mode) + continue; } /* CVT modes are sometimes unsuitable... */ From 66edf3fb331b6c55439b10f9862987b0916b3726 Mon Sep 17 00:00:00 2001 From: Ma Ke Date: Tue, 25 Jun 2024 16:18:28 +0800 Subject: [PATCH 713/778] drm/nouveau/dispnv04: fix null pointer dereference in nv17_tv_get_ld_modes In nv17_tv_get_ld_modes(), the return value of drm_mode_duplicate() is assigned to mode, which will lead to a possible NULL pointer dereference on failure of drm_mode_duplicate(). Add a check to avoid npd. Cc: stable@vger.kernel.org Signed-off-by: Ma Ke Signed-off-by: Lyude Paul Link: https://patchwork.freedesktop.org/patch/msgid/20240625081828.2620794-1-make24@iscas.ac.cn --- drivers/gpu/drm/nouveau/dispnv04/tvnv17.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/nouveau/dispnv04/tvnv17.c b/drivers/gpu/drm/nouveau/dispnv04/tvnv17.c index 9c3dc9a5bb46..2033214c4b78 100644 --- a/drivers/gpu/drm/nouveau/dispnv04/tvnv17.c +++ b/drivers/gpu/drm/nouveau/dispnv04/tvnv17.c @@ -209,6 +209,8 @@ static int nv17_tv_get_ld_modes(struct drm_encoder *encoder, struct drm_display_mode *mode; mode = drm_mode_duplicate(encoder->dev, tv_mode); + if (!mode) + continue; mode->clock = tv_norm->tv_enc_mode.vrefresh * mode->htotal / 1000 * From 211c581de28e7741898720b5f74da4e62f37f972 Mon Sep 17 00:00:00 2001 From: Pei Li Date: Tue, 25 Jun 2024 13:04:59 -0700 Subject: [PATCH 714/778] bcachefs: slab-use-after-free Read in bch2_sb_errors_from_cpu Acquire fsck_error_counts_lock before accessing the critical section protected by this lock. syzbot has tested the proposed patch and the reproducer did not trigger any issue. Reported-by: syzbot+a2bc0e838efd7663f4d9@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=a2bc0e838efd7663f4d9 Signed-off-by: Pei Li Signed-off-by: Kent Overstreet --- fs/bcachefs/sb-errors.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/fs/bcachefs/sb-errors.c b/fs/bcachefs/sb-errors.c index bda33e59e226..c1270d790e43 100644 --- a/fs/bcachefs/sb-errors.c +++ b/fs/bcachefs/sb-errors.c @@ -110,19 +110,25 @@ out: void bch2_sb_errors_from_cpu(struct bch_fs *c) { bch_sb_errors_cpu *src = &c->fsck_error_counts; - struct bch_sb_field_errors *dst = - bch2_sb_field_resize(&c->disk_sb, errors, - bch2_sb_field_errors_u64s(src->nr)); + struct bch_sb_field_errors *dst; unsigned i; + mutex_lock(&c->fsck_error_counts_lock); + + dst = bch2_sb_field_resize(&c->disk_sb, errors, + bch2_sb_field_errors_u64s(src->nr)); + if (!dst) - return; + goto err; for (i = 0; i < src->nr; i++) { SET_BCH_SB_ERROR_ENTRY_ID(&dst->entries[i], src->data[i].id); SET_BCH_SB_ERROR_ENTRY_NR(&dst->entries[i], src->data[i].nr); dst->entries[i].last_error_time = cpu_to_le64(src->data[i].last_error_time); } + +err: + mutex_unlock(&c->fsck_error_counts_lock); } static int bch2_sb_errors_to_cpu(struct bch_fs *c) From 472237b69d071c877e97bf0bc3eab1be865fad29 Mon Sep 17 00:00:00 2001 From: Pei Li Date: Tue, 25 Jun 2024 11:41:29 -0700 Subject: [PATCH 715/778] bcachefs: Fix shift-out-of-bounds in bch2_blacklist_entries_gc This series fix the shift-out-of-bounds issue in bch2_blacklist_entries_gc(). Instead of passing 0 to eytzinger0_first() when iterating the entries, we explicitly check 0 and initialize i to be 0. syzbot has tested the proposed patch and the reproducer did not trigger any issue: Reported-and-tested-by: syzbot+835d255ad6bc7f29ee12@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=835d255ad6bc7f29ee12 Signed-off-by: Pei Li Signed-off-by: Kent Overstreet --- fs/bcachefs/journal_seq_blacklist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/journal_seq_blacklist.c b/fs/bcachefs/journal_seq_blacklist.c index ed4846709611..1f25c111c54c 100644 --- a/fs/bcachefs/journal_seq_blacklist.c +++ b/fs/bcachefs/journal_seq_blacklist.c @@ -232,7 +232,7 @@ bool bch2_blacklist_entries_gc(struct bch_fs *c) BUG_ON(nr != t->nr); unsigned i; - for (src = bl->start, i = eytzinger0_first(t->nr); + for (src = bl->start, i = t->nr == 0 ? 0 : eytzinger0_first(t->nr); src < bl->start + nr; src++, i = eytzinger0_next(i, nr)) { BUG_ON(t->entries[i].start != le64_to_cpu(src->start)); From 64ee1431cc7d11e01a1007ead0afe737781cbbab Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 23 Jun 2024 00:53:44 -0400 Subject: [PATCH 716/778] bcachefs: Discard, invalidate workers are now per device There's no reason for discards to be single threaded across all devices; this will improve performance on multi device setups. Additionally, making them per-device simplifies the refcounting on bch_dev->io_ref; we now hold it for the duration that the discard path is running, which fixes a race between the discard path and device removal. Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 263 +++++++++++++++++---------------- fs/bcachefs/alloc_background.h | 6 +- fs/bcachefs/alloc_foreground.c | 4 +- fs/bcachefs/bcachefs.h | 16 +- fs/bcachefs/super.c | 5 +- 5 files changed, 161 insertions(+), 133 deletions(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 8dec2c6cbb7e..1de9fac3bcf4 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -29,7 +29,7 @@ #include #include -static void bch2_discard_one_bucket_fast(struct bch_fs *c, struct bpos bucket); +static void bch2_discard_one_bucket_fast(struct bch_dev *, u64); /* Persistent alloc info: */ @@ -893,12 +893,12 @@ int bch2_trigger_alloc(struct btree_trans *trans, if (statechange(a->data_type == BCH_DATA_need_discard) && !bch2_bucket_is_open_safe(c, new.k->p.inode, new.k->p.offset) && bucket_flushed(new_a)) - bch2_discard_one_bucket_fast(c, new.k->p); + bch2_discard_one_bucket_fast(ca, new.k->p.offset); if (statechange(a->data_type == BCH_DATA_cached) && !bch2_bucket_is_open(c, new.k->p.inode, new.k->p.offset) && should_invalidate_buckets(ca, bch2_dev_usage_read(ca))) - bch2_do_invalidates(c); + bch2_dev_do_invalidates(ca); if (statechange(a->data_type == BCH_DATA_need_gc_gens)) bch2_gc_gens_async(c); @@ -1636,34 +1636,38 @@ int bch2_check_alloc_to_lru_refs(struct bch_fs *c) return ret; } -static int discard_in_flight_add(struct bch_fs *c, struct bpos bucket) +static int discard_in_flight_add(struct bch_dev *ca, u64 bucket, bool in_progress) { int ret; - mutex_lock(&c->discard_buckets_in_flight_lock); - darray_for_each(c->discard_buckets_in_flight, i) - if (bkey_eq(*i, bucket)) { + mutex_lock(&ca->discard_buckets_in_flight_lock); + darray_for_each(ca->discard_buckets_in_flight, i) + if (i->bucket == bucket) { ret = -BCH_ERR_EEXIST_discard_in_flight_add; goto out; } - ret = darray_push(&c->discard_buckets_in_flight, bucket); + ret = darray_push(&ca->discard_buckets_in_flight, ((struct discard_in_flight) { + .in_progress = in_progress, + .bucket = bucket, + })); out: - mutex_unlock(&c->discard_buckets_in_flight_lock); + mutex_unlock(&ca->discard_buckets_in_flight_lock); return ret; } -static void discard_in_flight_remove(struct bch_fs *c, struct bpos bucket) +static void discard_in_flight_remove(struct bch_dev *ca, u64 bucket) { - mutex_lock(&c->discard_buckets_in_flight_lock); - darray_for_each(c->discard_buckets_in_flight, i) - if (bkey_eq(*i, bucket)) { - darray_remove_item(&c->discard_buckets_in_flight, i); + mutex_lock(&ca->discard_buckets_in_flight_lock); + darray_for_each(ca->discard_buckets_in_flight, i) + if (i->bucket == bucket) { + BUG_ON(!i->in_progress); + darray_remove_item(&ca->discard_buckets_in_flight, i); goto found; } BUG(); found: - mutex_unlock(&c->discard_buckets_in_flight_lock); + mutex_unlock(&ca->discard_buckets_in_flight_lock); } struct discard_buckets_state { @@ -1671,26 +1675,11 @@ struct discard_buckets_state { u64 open; u64 need_journal_commit; u64 discarded; - struct bch_dev *ca; u64 need_journal_commit_this_dev; }; -static void discard_buckets_next_dev(struct bch_fs *c, struct discard_buckets_state *s, struct bch_dev *ca) -{ - if (s->ca == ca) - return; - - if (s->ca && s->need_journal_commit_this_dev > - bch2_dev_usage_read(s->ca).d[BCH_DATA_free].buckets) - bch2_journal_flush_async(&c->journal, NULL); - - if (s->ca) - percpu_ref_put(&s->ca->io_ref); - s->ca = ca; - s->need_journal_commit_this_dev = 0; -} - static int bch2_discard_one_bucket(struct btree_trans *trans, + struct bch_dev *ca, struct btree_iter *need_discard_iter, struct bpos *discard_pos_done, struct discard_buckets_state *s) @@ -1704,16 +1693,6 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, bool discard_locked = false; int ret = 0; - struct bch_dev *ca = s->ca && s->ca->dev_idx == pos.inode - ? s->ca - : bch2_dev_get_ioref(c, pos.inode, WRITE); - if (!ca) { - bch2_btree_iter_set_pos(need_discard_iter, POS(pos.inode + 1, 0)); - return 0; - } - - discard_buckets_next_dev(c, s, ca); - if (bch2_bucket_is_open_safe(c, pos.inode, pos.offset)) { s->open++; goto out; @@ -1773,7 +1752,7 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, goto out; } - if (discard_in_flight_add(c, SPOS(iter.pos.inode, iter.pos.offset, true))) + if (discard_in_flight_add(ca, iter.pos.offset, true)) goto out; discard_locked = true; @@ -1811,7 +1790,7 @@ write: s->discarded++; out: if (discard_locked) - discard_in_flight_remove(c, iter.pos); + discard_in_flight_remove(ca, iter.pos.offset); s->seen++; bch2_trans_iter_exit(trans, &iter); printbuf_exit(&buf); @@ -1820,7 +1799,8 @@ out: static void bch2_do_discards_work(struct work_struct *work) { - struct bch_fs *c = container_of(work, struct bch_fs, discard_work); + struct bch_dev *ca = container_of(work, struct bch_dev, discard_work); + struct bch_fs *c = ca->fs; struct discard_buckets_state s = {}; struct bpos discard_pos_done = POS_MAX; int ret; @@ -1831,23 +1811,41 @@ static void bch2_do_discards_work(struct work_struct *work) * successful commit: */ ret = bch2_trans_run(c, - for_each_btree_key(trans, iter, - BTREE_ID_need_discard, POS_MIN, 0, k, - bch2_discard_one_bucket(trans, &iter, &discard_pos_done, &s))); - - discard_buckets_next_dev(c, &s, NULL); + for_each_btree_key_upto(trans, iter, + BTREE_ID_need_discard, + POS(ca->dev_idx, 0), + POS(ca->dev_idx, U64_MAX), 0, k, + bch2_discard_one_bucket(trans, ca, &iter, &discard_pos_done, &s))); trace_discard_buckets(c, s.seen, s.open, s.need_journal_commit, s.discarded, bch2_err_str(ret)); bch2_write_ref_put(c, BCH_WRITE_REF_discard); + percpu_ref_put(&ca->io_ref); +} + +void bch2_dev_do_discards(struct bch_dev *ca) +{ + struct bch_fs *c = ca->fs; + + if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE)) + return; + + if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_discard)) + goto put_ioref; + + if (queue_work(c->write_ref_wq, &ca->discard_work)) + return; + + bch2_write_ref_put(c, BCH_WRITE_REF_discard); +put_ioref: + percpu_ref_put(&ca->io_ref); } void bch2_do_discards(struct bch_fs *c) { - if (bch2_write_ref_tryget(c, BCH_WRITE_REF_discard) && - !queue_work(c->write_ref_wq, &c->discard_work)) - bch2_write_ref_put(c, BCH_WRITE_REF_discard); + for_each_member_device(c, ca) + bch2_dev_do_discards(ca); } static int bch2_clear_bucket_needs_discard(struct btree_trans *trans, struct bpos bucket) @@ -1876,68 +1874,69 @@ err: static void bch2_do_discards_fast_work(struct work_struct *work) { - struct bch_fs *c = container_of(work, struct bch_fs, discard_fast_work); + struct bch_dev *ca = container_of(work, struct bch_dev, discard_fast_work); + struct bch_fs *c = ca->fs; while (1) { bool got_bucket = false; - struct bpos bucket; - struct bch_dev *ca; + u64 bucket; - mutex_lock(&c->discard_buckets_in_flight_lock); - darray_for_each(c->discard_buckets_in_flight, i) { - if (i->snapshot) + mutex_lock(&ca->discard_buckets_in_flight_lock); + darray_for_each(ca->discard_buckets_in_flight, i) { + if (i->in_progress) continue; - ca = bch2_dev_get_ioref(c, i->inode, WRITE); - if (!ca) { - darray_remove_item(&c->discard_buckets_in_flight, i); - continue; - } - got_bucket = true; - bucket = *i; - i->snapshot = true; + bucket = i->bucket; + i->in_progress = true; break; } - mutex_unlock(&c->discard_buckets_in_flight_lock); + mutex_unlock(&ca->discard_buckets_in_flight_lock); if (!got_bucket) break; if (ca->mi.discard && !c->opts.nochanges) blkdev_issue_discard(ca->disk_sb.bdev, - bucket.offset * ca->mi.bucket_size, + bucket_to_sector(ca, bucket), ca->mi.bucket_size, GFP_KERNEL); int ret = bch2_trans_do(c, NULL, NULL, - BCH_WATERMARK_btree| - BCH_TRANS_COMMIT_no_enospc, - bch2_clear_bucket_needs_discard(trans, bucket)); + BCH_WATERMARK_btree| + BCH_TRANS_COMMIT_no_enospc, + bch2_clear_bucket_needs_discard(trans, POS(ca->dev_idx, bucket))); bch_err_fn(c, ret); - percpu_ref_put(&ca->io_ref); - discard_in_flight_remove(c, bucket); + discard_in_flight_remove(ca, bucket); if (ret) break; } bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast); + percpu_ref_put(&ca->io_ref); } -static void bch2_discard_one_bucket_fast(struct bch_fs *c, struct bpos bucket) +static void bch2_discard_one_bucket_fast(struct bch_dev *ca, u64 bucket) { - rcu_read_lock(); - struct bch_dev *ca = bch2_dev_rcu(c, bucket.inode); - bool dead = !ca || percpu_ref_is_dying(&ca->io_ref); - rcu_read_unlock(); + struct bch_fs *c = ca->fs; - if (!dead && - !discard_in_flight_add(c, bucket) && - bch2_write_ref_tryget(c, BCH_WRITE_REF_discard_fast) && - !queue_work(c->write_ref_wq, &c->discard_fast_work)) - bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast); + if (discard_in_flight_add(ca, bucket, false)) + return; + + if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE)) + return; + + if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_discard_fast)) + goto put_ioref; + + if (queue_work(c->write_ref_wq, &ca->discard_fast_work)) + return; + + bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast); +put_ioref: + percpu_ref_put(&ca->io_ref); } static int invalidate_one_bucket(struct btree_trans *trans, @@ -2038,7 +2037,8 @@ again: static void bch2_do_invalidates_work(struct work_struct *work) { - struct bch_fs *c = container_of(work, struct bch_fs, invalidate_work); + struct bch_dev *ca = container_of(work, struct bch_dev, invalidate_work); + struct bch_fs *c = ca->fs; struct btree_trans *trans = bch2_trans_get(c); int ret = 0; @@ -2046,52 +2046,63 @@ static void bch2_do_invalidates_work(struct work_struct *work) if (ret) goto err; - for_each_member_device(c, ca) { - s64 nr_to_invalidate = - should_invalidate_buckets(ca, bch2_dev_usage_read(ca)); - struct btree_iter iter; - bool wrapped = false; + s64 nr_to_invalidate = + should_invalidate_buckets(ca, bch2_dev_usage_read(ca)); + struct btree_iter iter; + bool wrapped = false; - bch2_trans_iter_init(trans, &iter, BTREE_ID_lru, - lru_pos(ca->dev_idx, 0, - ((bch2_current_io_time(c, READ) + U32_MAX) & - LRU_TIME_MAX)), 0); + bch2_trans_iter_init(trans, &iter, BTREE_ID_lru, + lru_pos(ca->dev_idx, 0, + ((bch2_current_io_time(c, READ) + U32_MAX) & + LRU_TIME_MAX)), 0); - while (true) { - bch2_trans_begin(trans); + while (true) { + bch2_trans_begin(trans); - struct bkey_s_c k = next_lru_key(trans, &iter, ca, &wrapped); - ret = bkey_err(k); - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - continue; - if (ret) - break; - if (!k.k) - break; - - ret = invalidate_one_bucket(trans, &iter, k, &nr_to_invalidate); - if (ret) - break; - - bch2_btree_iter_advance(&iter); - } - bch2_trans_iter_exit(trans, &iter); - - if (ret < 0) { - bch2_dev_put(ca); + struct bkey_s_c k = next_lru_key(trans, &iter, ca, &wrapped); + ret = bkey_err(k); + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) + continue; + if (ret) break; - } + if (!k.k) + break; + + ret = invalidate_one_bucket(trans, &iter, k, &nr_to_invalidate); + if (ret) + break; + + bch2_btree_iter_advance(&iter); } + bch2_trans_iter_exit(trans, &iter); err: bch2_trans_put(trans); bch2_write_ref_put(c, BCH_WRITE_REF_invalidate); + percpu_ref_put(&ca->io_ref); +} + +void bch2_dev_do_invalidates(struct bch_dev *ca) +{ + struct bch_fs *c = ca->fs; + + if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE)) + return; + + if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_invalidate)) + goto put_ioref; + + if (queue_work(c->write_ref_wq, &ca->invalidate_work)) + return; + + bch2_write_ref_put(c, BCH_WRITE_REF_invalidate); +put_ioref: + percpu_ref_put(&ca->io_ref); } void bch2_do_invalidates(struct bch_fs *c) { - if (bch2_write_ref_tryget(c, BCH_WRITE_REF_invalidate) && - !queue_work(c->write_ref_wq, &c->invalidate_work)) - bch2_write_ref_put(c, BCH_WRITE_REF_invalidate); + for_each_member_device(c, ca) + bch2_dev_do_invalidates(ca); } int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca, @@ -2407,16 +2418,20 @@ void bch2_dev_allocator_add(struct bch_fs *c, struct bch_dev *ca) set_bit(ca->dev_idx, c->rw_devs[i].d); } -void bch2_fs_allocator_background_exit(struct bch_fs *c) +void bch2_dev_allocator_background_exit(struct bch_dev *ca) { - darray_exit(&c->discard_buckets_in_flight); + darray_exit(&ca->discard_buckets_in_flight); +} + +void bch2_dev_allocator_background_init(struct bch_dev *ca) +{ + mutex_init(&ca->discard_buckets_in_flight_lock); + INIT_WORK(&ca->discard_work, bch2_do_discards_work); + INIT_WORK(&ca->discard_fast_work, bch2_do_discards_fast_work); + INIT_WORK(&ca->invalidate_work, bch2_do_invalidates_work); } void bch2_fs_allocator_background_init(struct bch_fs *c) { spin_lock_init(&c->freelist_lock); - mutex_init(&c->discard_buckets_in_flight_lock); - INIT_WORK(&c->discard_work, bch2_do_discards_work); - INIT_WORK(&c->discard_fast_work, bch2_do_discards_fast_work); - INIT_WORK(&c->invalidate_work, bch2_do_invalidates_work); } diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h index c3cc3c5ba5b6..ba2c5557a3f0 100644 --- a/fs/bcachefs/alloc_background.h +++ b/fs/bcachefs/alloc_background.h @@ -275,6 +275,7 @@ int bch2_trigger_alloc(struct btree_trans *, enum btree_id, unsigned, enum btree_iter_update_trigger_flags); int bch2_check_alloc_info(struct bch_fs *); int bch2_check_alloc_to_lru_refs(struct bch_fs *); +void bch2_dev_do_discards(struct bch_dev *); void bch2_do_discards(struct bch_fs *); static inline u64 should_invalidate_buckets(struct bch_dev *ca, @@ -289,6 +290,7 @@ static inline u64 should_invalidate_buckets(struct bch_dev *ca, return clamp_t(s64, want_free - free, 0, u.d[BCH_DATA_cached].buckets); } +void bch2_dev_do_invalidates(struct bch_dev *); void bch2_do_invalidates(struct bch_fs *); static inline struct bch_backpointer *alloc_v4_backpointers(struct bch_alloc_v4 *a) @@ -312,7 +314,9 @@ u64 bch2_min_rw_member_capacity(struct bch_fs *); void bch2_dev_allocator_remove(struct bch_fs *, struct bch_dev *); void bch2_dev_allocator_add(struct bch_fs *, struct bch_dev *); -void bch2_fs_allocator_background_exit(struct bch_fs *); +void bch2_dev_allocator_background_exit(struct bch_dev *); +void bch2_dev_allocator_background_init(struct bch_dev *); + void bch2_fs_allocator_background_init(struct bch_fs *); #endif /* _BCACHEFS_ALLOC_BACKGROUND_H */ diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index 927a5f300b30..9d3d64746a5b 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -621,13 +621,13 @@ again: avail = dev_buckets_free(ca, *usage, watermark); if (usage->d[BCH_DATA_need_discard].buckets > avail) - bch2_do_discards(c); + bch2_dev_do_discards(ca); if (usage->d[BCH_DATA_need_gc_gens].buckets > avail) bch2_gc_gens_async(c); if (should_invalidate_buckets(ca, *usage)) - bch2_do_invalidates(c); + bch2_dev_do_invalidates(ca); if (!avail) { if (cl && !waiting) { diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index a6b83ecab7ce..1106fec6e155 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -493,6 +493,11 @@ struct io_count { u64 sectors[2][BCH_DATA_NR]; }; +struct discard_in_flight { + bool in_progress:1; + u64 bucket:63; +}; + struct bch_dev { struct kobject kobj; #ifdef CONFIG_BCACHEFS_DEBUG @@ -554,6 +559,12 @@ struct bch_dev { size_t inc_gen_really_needs_gc; size_t buckets_waiting_on_journal; + struct work_struct invalidate_work; + struct work_struct discard_work; + struct mutex discard_buckets_in_flight_lock; + DARRAY(struct discard_in_flight) discard_buckets_in_flight; + struct work_struct discard_fast_work; + atomic64_t rebalance_work; struct journal_device journal; @@ -915,11 +926,6 @@ struct bch_fs { unsigned write_points_nr; struct buckets_waiting_for_journal buckets_waiting_for_journal; - struct work_struct invalidate_work; - struct work_struct discard_work; - struct mutex discard_buckets_in_flight_lock; - DARRAY(struct bpos) discard_buckets_in_flight; - struct work_struct discard_fast_work; /* GARBAGE COLLECTION */ struct work_struct gc_gens_work; diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 641f2975177b..fb906467201e 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -536,7 +536,6 @@ static void __bch2_fs_free(struct bch_fs *c) bch2_find_btree_nodes_exit(&c->found_btree_nodes); bch2_free_pending_node_rewrites(c); - bch2_fs_allocator_background_exit(c); bch2_fs_sb_errors_exit(c); bch2_fs_counters_exit(c); bch2_fs_snapshots_exit(c); @@ -1195,6 +1194,7 @@ static void bch2_dev_free(struct bch_dev *ca) kfree(ca->buckets_nouse); bch2_free_super(&ca->disk_sb); + bch2_dev_allocator_background_exit(ca); bch2_dev_journal_exit(ca); free_percpu(ca->io_done); @@ -1317,6 +1317,8 @@ static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c, atomic_long_set(&ca->ref, 1); #endif + bch2_dev_allocator_background_init(ca); + if (percpu_ref_init(&ca->io_ref, bch2_dev_io_ref_complete, PERCPU_REF_INIT_DEAD, GFP_KERNEL) || !(ca->sb_read_scratch = (void *) __get_free_page(GFP_KERNEL)) || @@ -1541,6 +1543,7 @@ static void __bch2_dev_read_write(struct bch_fs *c, struct bch_dev *ca) bch2_dev_allocator_add(c, ca); bch2_recalc_capacity(c); + bch2_dev_do_discards(ca); } int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca, From 84b767f9e34fdb143c09e66a2a20722fc2921821 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Mon, 24 Jun 2024 10:50:15 -0700 Subject: [PATCH 717/778] ionic: use dev_consume_skb_any outside of napi If we're not in a NAPI softirq context, we need to be careful about how we call napi_consume_skb(), specifically we need to call it with budget==0 to signal to it that we're not in a safe context. This was found while running some configuration stress testing of traffic and a change queue config loop running, and this curious note popped out: [ 4371.402645] BUG: using smp_processor_id() in preemptible [00000000] code: ethtool/20545 [ 4371.402897] caller is napi_skb_cache_put+0x16/0x80 [ 4371.403120] CPU: 25 PID: 20545 Comm: ethtool Kdump: loaded Tainted: G OE 6.10.0-rc3-netnext+ #8 [ 4371.403302] Hardware name: HPE ProLiant DL360 Gen10/ProLiant DL360 Gen10, BIOS U32 01/23/2021 [ 4371.403460] Call Trace: [ 4371.403613] [ 4371.403758] dump_stack_lvl+0x4f/0x70 [ 4371.403904] check_preemption_disabled+0xc1/0xe0 [ 4371.404051] napi_skb_cache_put+0x16/0x80 [ 4371.404199] ionic_tx_clean+0x18a/0x240 [ionic] [ 4371.404354] ionic_tx_cq_service+0xc4/0x200 [ionic] [ 4371.404505] ionic_tx_flush+0x15/0x70 [ionic] [ 4371.404653] ? ionic_lif_qcq_deinit.isra.23+0x5b/0x70 [ionic] [ 4371.404805] ionic_txrx_deinit+0x71/0x190 [ionic] [ 4371.404956] ionic_reconfigure_queues+0x5f5/0xff0 [ionic] [ 4371.405111] ionic_set_ringparam+0x2e8/0x3e0 [ionic] [ 4371.405265] ethnl_set_rings+0x1f1/0x300 [ 4371.405418] ethnl_default_set_doit+0xbb/0x160 [ 4371.405571] genl_family_rcv_msg_doit+0xff/0x130 [...] I found that ionic_tx_clean() calls napi_consume_skb() which calls napi_skb_cache_put(), but before that last call is the note /* Zero budget indicate non-NAPI context called us, like netpoll */ and DEBUG_NET_WARN_ON_ONCE(!in_softirq()); Those are pretty big hints that we're doing it wrong. We can pass a context hint down through the calls to let ionic_tx_clean() know what we're doing so it can call napi_consume_skb() correctly. Fixes: 386e69865311 ("ionic: Make use napi_consume_skb") Signed-off-by: Shannon Nelson Link: https://patch.msgid.link/20240624175015.4520-1-shannon.nelson@amd.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/pensando/ionic/ionic_dev.h | 4 ++- .../net/ethernet/pensando/ionic/ionic_lif.c | 2 +- .../net/ethernet/pensando/ionic/ionic_txrx.c | 28 +++++++++++-------- 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h index f30eee4a5a80..b6c01a88098d 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h @@ -375,7 +375,9 @@ typedef void (*ionic_cq_done_cb)(void *done_arg); unsigned int ionic_cq_service(struct ionic_cq *cq, unsigned int work_to_do, ionic_cq_cb cb, ionic_cq_done_cb done_cb, void *done_arg); -unsigned int ionic_tx_cq_service(struct ionic_cq *cq, unsigned int work_to_do); +unsigned int ionic_tx_cq_service(struct ionic_cq *cq, + unsigned int work_to_do, + bool in_napi); int ionic_q_init(struct ionic_lif *lif, struct ionic_dev *idev, struct ionic_queue *q, unsigned int index, const char *name, diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 1934e9d6d9e4..1837a30ba08a 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -1189,7 +1189,7 @@ static int ionic_adminq_napi(struct napi_struct *napi, int budget) ionic_rx_service, NULL, NULL); if (lif->hwstamp_txq) - tx_work = ionic_tx_cq_service(&lif->hwstamp_txq->cq, budget); + tx_work = ionic_tx_cq_service(&lif->hwstamp_txq->cq, budget, !!budget); work_done = max(max(n_work, a_work), max(rx_work, tx_work)); if (work_done < budget && napi_complete_done(napi, work_done)) { diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c index aed7d9cbce03..9fdd7cd3ef19 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c @@ -23,7 +23,8 @@ static void ionic_tx_desc_unmap_bufs(struct ionic_queue *q, static void ionic_tx_clean(struct ionic_queue *q, struct ionic_tx_desc_info *desc_info, - struct ionic_txq_comp *comp); + struct ionic_txq_comp *comp, + bool in_napi); static inline void ionic_txq_post(struct ionic_queue *q, bool ring_dbell) { @@ -944,7 +945,7 @@ int ionic_tx_napi(struct napi_struct *napi, int budget) u32 work_done = 0; u32 flags = 0; - work_done = ionic_tx_cq_service(cq, budget); + work_done = ionic_tx_cq_service(cq, budget, !!budget); if (unlikely(!budget)) return budget; @@ -1028,7 +1029,7 @@ int ionic_txrx_napi(struct napi_struct *napi, int budget) txqcq = lif->txqcqs[qi]; txcq = &lif->txqcqs[qi]->cq; - tx_work_done = ionic_tx_cq_service(txcq, IONIC_TX_BUDGET_DEFAULT); + tx_work_done = ionic_tx_cq_service(txcq, IONIC_TX_BUDGET_DEFAULT, !!budget); if (unlikely(!budget)) return budget; @@ -1161,7 +1162,8 @@ static void ionic_tx_desc_unmap_bufs(struct ionic_queue *q, static void ionic_tx_clean(struct ionic_queue *q, struct ionic_tx_desc_info *desc_info, - struct ionic_txq_comp *comp) + struct ionic_txq_comp *comp, + bool in_napi) { struct ionic_tx_stats *stats = q_to_tx_stats(q); struct ionic_qcq *qcq = q_to_qcq(q); @@ -1213,11 +1215,13 @@ static void ionic_tx_clean(struct ionic_queue *q, desc_info->bytes = skb->len; stats->clean++; - napi_consume_skb(skb, 1); + napi_consume_skb(skb, likely(in_napi) ? 1 : 0); } static bool ionic_tx_service(struct ionic_cq *cq, - unsigned int *total_pkts, unsigned int *total_bytes) + unsigned int *total_pkts, + unsigned int *total_bytes, + bool in_napi) { struct ionic_tx_desc_info *desc_info; struct ionic_queue *q = cq->bound_q; @@ -1239,7 +1243,7 @@ static bool ionic_tx_service(struct ionic_cq *cq, desc_info->bytes = 0; index = q->tail_idx; q->tail_idx = (q->tail_idx + 1) & (q->num_descs - 1); - ionic_tx_clean(q, desc_info, comp); + ionic_tx_clean(q, desc_info, comp, in_napi); if (desc_info->skb) { pkts++; bytes += desc_info->bytes; @@ -1253,7 +1257,9 @@ static bool ionic_tx_service(struct ionic_cq *cq, return true; } -unsigned int ionic_tx_cq_service(struct ionic_cq *cq, unsigned int work_to_do) +unsigned int ionic_tx_cq_service(struct ionic_cq *cq, + unsigned int work_to_do, + bool in_napi) { unsigned int work_done = 0; unsigned int bytes = 0; @@ -1262,7 +1268,7 @@ unsigned int ionic_tx_cq_service(struct ionic_cq *cq, unsigned int work_to_do) if (work_to_do == 0) return 0; - while (ionic_tx_service(cq, &pkts, &bytes)) { + while (ionic_tx_service(cq, &pkts, &bytes, in_napi)) { if (cq->tail_idx == cq->num_descs - 1) cq->done_color = !cq->done_color; cq->tail_idx = (cq->tail_idx + 1) & (cq->num_descs - 1); @@ -1288,7 +1294,7 @@ void ionic_tx_flush(struct ionic_cq *cq) { u32 work_done; - work_done = ionic_tx_cq_service(cq, cq->num_descs); + work_done = ionic_tx_cq_service(cq, cq->num_descs, false); if (work_done) ionic_intr_credits(cq->idev->intr_ctrl, cq->bound_intr->index, work_done, IONIC_INTR_CRED_RESET_COALESCE); @@ -1305,7 +1311,7 @@ void ionic_tx_empty(struct ionic_queue *q) desc_info = &q->tx_info[q->tail_idx]; desc_info->bytes = 0; q->tail_idx = (q->tail_idx + 1) & (q->num_descs - 1); - ionic_tx_clean(q, desc_info, NULL); + ionic_tx_clean(q, desc_info, NULL, false); if (desc_info->skb) { pkts++; bytes += desc_info->bytes; From 5dfe9d273932c647bdc9d664f939af9a5a398cbc Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Mon, 24 Jun 2024 14:43:23 +0000 Subject: [PATCH 718/778] tcp: fix tcp_rcv_fastopen_synack() to enter TCP_CA_Loss for failed TFO Testing determined that the recent commit 9e046bb111f1 ("tcp: clear tp->retrans_stamp in tcp_rcv_fastopen_synack()") has a race, and does not always ensure retrans_stamp is 0 after a TFO payload retransmit. If transmit completion for the SYN+data skb happens after the client TCP stack receives the SYNACK (which sometimes happens), then retrans_stamp can erroneously remain non-zero for the lifetime of the connection, causing a premature ETIMEDOUT later. Testing and tracing showed that the buggy scenario is the following somewhat tricky sequence: + Client attempts a TFO handshake. tcp_send_syn_data() sends SYN + TFO cookie + data in a single packet in the syn_data skb. It hands the syn_data skb to tcp_transmit_skb(), which makes a clone. Crucially, it then reuses the same original (non-clone) syn_data skb, transforming it by advancing the seq by one byte and removing the FIN bit, and enques the resulting payload-only skb in the sk->tcp_rtx_queue. + Client sets retrans_stamp to the start time of the three-way handshake. + Cookie mismatches or server has TFO disabled, and server only ACKs SYN. + tcp_ack() sees SYN is acked, tcp_clean_rtx_queue() clears retrans_stamp. + Since the client SYN was acked but not the payload, the TFO failure code path in tcp_rcv_fastopen_synack() tries to retransmit the payload skb. However, in some cases the transmit completion for the clone of the syn_data (which had SYN + TFO cookie + data) hasn't happened. In those cases, skb_still_in_host_queue() returns true for the retransmitted TFO payload, because the clone of the syn_data skb has not had its tx completetion. + Because skb_still_in_host_queue() finds skb_fclone_busy() is true, it sets the TSQ_THROTTLED bit and the retransmit does not happen in the tcp_rcv_fastopen_synack() call chain. + The tcp_rcv_fastopen_synack() code next implicitly assumes the retransmit process is finished, and sets retrans_stamp to 0 to clear it, but this is later overwritten (see below). + Later, upon tx completion, tcp_tsq_write() calls tcp_xmit_retransmit_queue(), which puts the retransmit in flight and sets retrans_stamp to a non-zero value. + The client receives an ACK for the retransmitted TFO payload data. + Since we're in CA_Open and there are no dupacks/SACKs/DSACKs/ECN to make tcp_ack_is_dubious() true and make us call tcp_fastretrans_alert() and reach a code path that clears retrans_stamp, retrans_stamp stays nonzero. + Later, if there is a TLP, RTO, RTO sequence, then the connection will suffer an early ETIMEDOUT due to the erroneously ancient retrans_stamp. The fix: this commit refactors the code to have tcp_rcv_fastopen_synack() retransmit by reusing the relevant parts of tcp_simple_retransmit() that enter CA_Loss (without changing cwnd) and call tcp_xmit_retransmit_queue(). We have tcp_simple_retransmit() and tcp_rcv_fastopen_synack() share code in this way because in both cases we get a packet indicating non-congestion loss (MTU reduction or TFO failure) and thus in both cases we want to retransmit as many packets as cwnd allows, without reducing cwnd. And given that retransmits will set retrans_stamp to a non-zero value (and may do so in a later calling context due to TSQ), we also want to enter CA_Loss so that we track when all retransmitted packets are ACked and clear retrans_stamp when that happens (to ensure later recurring RTOs are using the correct retrans_stamp and don't declare ETIMEDOUT prematurely). Fixes: 9e046bb111f1 ("tcp: clear tp->retrans_stamp in tcp_rcv_fastopen_synack()") Fixes: a7abf3cd76e1 ("tcp: consider using standard rtx logic in tcp_rcv_fastopen_synack()") Signed-off-by: Neal Cardwell Signed-off-by: Eric Dumazet Cc: Yuchung Cheng Link: https://patch.msgid.link/20240624144323.2371403-1-ncardwell.sw@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_input.c | 38 +++++++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index b6d7666ac912..2e39cb881e20 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2782,13 +2782,37 @@ static void tcp_mtup_probe_success(struct sock *sk) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMTUPSUCCESS); } +/* Sometimes we deduce that packets have been dropped due to reasons other than + * congestion, like path MTU reductions or failed client TFO attempts. In these + * cases we call this function to retransmit as many packets as cwnd allows, + * without reducing cwnd. Given that retransmits will set retrans_stamp to a + * non-zero value (and may do so in a later calling context due to TSQ), we + * also enter CA_Loss so that we track when all retransmitted packets are ACKed + * and clear retrans_stamp when that happens (to ensure later recurring RTOs + * are using the correct retrans_stamp and don't declare ETIMEDOUT + * prematurely). + */ +static void tcp_non_congestion_loss_retransmit(struct sock *sk) +{ + const struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); + + if (icsk->icsk_ca_state != TCP_CA_Loss) { + tp->high_seq = tp->snd_nxt; + tp->snd_ssthresh = tcp_current_ssthresh(sk); + tp->prior_ssthresh = 0; + tp->undo_marker = 0; + tcp_set_ca_state(sk, TCP_CA_Loss); + } + tcp_xmit_retransmit_queue(sk); +} + /* Do a simple retransmit without using the backoff mechanisms in * tcp_timer. This is used for path mtu discovery. * The socket is already locked here. */ void tcp_simple_retransmit(struct sock *sk) { - const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; int mss; @@ -2828,14 +2852,7 @@ void tcp_simple_retransmit(struct sock *sk) * in network, but units changed and effective * cwnd/ssthresh really reduced now. */ - if (icsk->icsk_ca_state != TCP_CA_Loss) { - tp->high_seq = tp->snd_nxt; - tp->snd_ssthresh = tcp_current_ssthresh(sk); - tp->prior_ssthresh = 0; - tp->undo_marker = 0; - tcp_set_ca_state(sk, TCP_CA_Loss); - } - tcp_xmit_retransmit_queue(sk); + tcp_non_congestion_loss_retransmit(sk); } EXPORT_SYMBOL(tcp_simple_retransmit); @@ -6295,8 +6312,7 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, tp->fastopen_client_fail = TFO_DATA_NOT_ACKED; skb_rbtree_walk_from(data) tcp_mark_skb_lost(sk, data); - tcp_xmit_retransmit_queue(sk); - tp->retrans_stamp = 0; + tcp_non_congestion_loss_retransmit(sk); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVEFAIL); return true; From 64cd7de998f393e73981e2aa4ee13e4e887f01ea Mon Sep 17 00:00:00 2001 From: Pei Li Date: Tue, 25 Jun 2024 17:39:56 -0700 Subject: [PATCH 719/778] bcachefs: Fix kmalloc bug in __snapshot_t_mut When allocating too huge a snapshot table, we should fail gracefully in __snapshot_t_mut() instead of fail in kmalloc(). Reported-by: syzbot+770e99b65e26fa023ab1@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=770e99b65e26fa023ab1 Tested-by: syzbot+770e99b65e26fa023ab1@syzkaller.appspotmail.com Signed-off-by: Pei Li Signed-off-by: Kent Overstreet --- fs/bcachefs/snapshot.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index 4ef98e696673..24023d6a9698 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -168,6 +168,9 @@ static noinline struct snapshot_t *__snapshot_t_mut(struct bch_fs *c, u32 id) size_t new_bytes = kmalloc_size_roundup(struct_size(new, s, idx + 1)); size_t new_size = (new_bytes - sizeof(*new)) / sizeof(new->s[0]); + if (unlikely(new_bytes > INT_MAX)) + return NULL; + new = kvzalloc(new_bytes, GFP_KERNEL); if (!new) return NULL; From d3710853fd4a7020904a16686986cf5541ad1c38 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 25 Jun 2024 17:52:12 +0200 Subject: [PATCH 720/778] ALSA: hda/realtek: Fix conflicting quirk for PCI SSID 17aa:3820 The recent fix for Lenovo IdeaPad 330-17IKB replaced the quirk entry, and this eventually breaks the existing quirk for Lenovo Yoga Duet 7 13ITL6 equipped with the same PCI SSID 17aa:3820. For applying a proper quirk for each model, check the codec SSID additionally. Fortunately Yoga Duet has a different codec SSID, 0x17aa3802. (Interestingly, 17aa:3802 has another conflict of SSID between another Yoga model vs 14IRP8 which we had to work around similarly.) Fixes: b1fd0d1285b1 ("ALSA: hda/realtek: Enable headset mic on IdeaPad 330-17IKB 81DM") Link: https://patch.msgid.link/20240625155217.18767-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index cb9b11da5581..c901daf8fd45 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -7525,6 +7525,7 @@ enum { ALC287_FIXUP_LENOVO_THKPAD_WH_ALC1318, ALC256_FIXUP_CHROME_BOOK, ALC287_FIXUP_LENOVO_14ARP8_LEGION_IAH7, + ALC287_FIXUP_LENOVO_SSID_17AA3820, }; /* A special fixup for Lenovo C940 and Yoga Duet 7; @@ -7596,6 +7597,20 @@ static void alc287_fixup_lenovo_legion_7(struct hda_codec *codec, __snd_hda_apply_fixup(codec, id, action, 0); } +/* Yet more conflicting PCI SSID (17aa:3820) on two Lenovo models */ +static void alc287_fixup_lenovo_ssid_17aa3820(struct hda_codec *codec, + const struct hda_fixup *fix, + int action) +{ + int id; + + if (codec->core.subsystem_id == 0x17aa3820) + id = ALC269_FIXUP_ASPIRE_HEADSET_MIC; /* IdeaPad 330-17IKB 81DM */ + else /* 0x17aa3802 */ + id = ALC287_FIXUP_YOGA7_14ITL_SPEAKERS; /* "Yoga Duet 7 13ITL6 */ + __snd_hda_apply_fixup(codec, id, action, 0); +} + static const struct hda_fixup alc269_fixups[] = { [ALC269_FIXUP_GPIO2] = { .type = HDA_FIXUP_FUNC, @@ -9832,6 +9847,10 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC225_FIXUP_HEADSET_JACK }, + [ALC287_FIXUP_LENOVO_SSID_17AA3820] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc287_fixup_lenovo_ssid_17aa3820, + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -10531,7 +10550,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x3813, "Legion 7i 15IMHG05", ALC287_FIXUP_LEGION_15IMHG05_SPEAKERS), SND_PCI_QUIRK(0x17aa, 0x3818, "Lenovo C940 / Yoga Duet 7", ALC298_FIXUP_LENOVO_C940_DUET7), SND_PCI_QUIRK(0x17aa, 0x3819, "Lenovo 13s Gen2 ITL", ALC287_FIXUP_13S_GEN2_SPEAKERS), - SND_PCI_QUIRK(0x17aa, 0x3820, "IdeaPad 330-17IKB 81DM", ALC269_FIXUP_ASPIRE_HEADSET_MIC), + SND_PCI_QUIRK(0x17aa, 0x3820, "IdeaPad 330 / Yoga Duet 7", ALC287_FIXUP_LENOVO_SSID_17AA3820), SND_PCI_QUIRK(0x17aa, 0x3824, "Legion Y9000X 2020", ALC285_FIXUP_LEGION_Y9000X_SPEAKERS), SND_PCI_QUIRK(0x17aa, 0x3827, "Ideapad S740", ALC285_FIXUP_IDEAPAD_S740_COEF), SND_PCI_QUIRK(0x17aa, 0x3834, "Lenovo IdeaPad Slim 9i 14ITL5", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS), From 3cd59d8ef8df7d7a079f54d56502dae8f716b39b Mon Sep 17 00:00:00 2001 From: Dirk Su Date: Wed, 26 Jun 2024 10:14:36 +0800 Subject: [PATCH 721/778] ALSA: hda/realtek: fix mute/micmute LEDs don't work for EliteBook 645/665 G11. HP EliteBook 645/665 G11 needs ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF quirk to make mic-mute/audio-mute working. Signed-off-by: Dirk Su Cc: Link: https://patch.msgid.link/20240626021437.77039-1-dirk.su@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index c901daf8fd45..811e82474200 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10242,6 +10242,9 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8c7c, "HP ProBook 445 G11", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8c7d, "HP ProBook 465 G11", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8c7e, "HP ProBook 465 G11", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), + SND_PCI_QUIRK(0x103c, 0x8c7f, "HP EliteBook 645 G11", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), + SND_PCI_QUIRK(0x103c, 0x8c80, "HP EliteBook 645 G11", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), + SND_PCI_QUIRK(0x103c, 0x8c81, "HP EliteBook 665 G11", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8c89, "HP ProBook 460 G11", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8c8a, "HP EliteBook 630", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8c8c, "HP EliteBook 660", ALC236_FIXUP_HP_GPIO_LED), From 888119571b7c9518faeeeb2b431ffc4455e0028d Mon Sep 17 00:00:00 2001 From: Aapo Vienamo Date: Tue, 25 Jun 2024 16:53:43 +0300 Subject: [PATCH 722/778] gpio: graniterapids: Add missing raw_spinlock_init() Add the missing raw_spin_lock_init() call to gnr_gpio_probe(). Fixes: ecc4b1418e23 ("gpio: Add Intel Granite Rapids-D vGPIO driver") Signed-off-by: Aapo Vienamo Acked-by: Mika Westerberg Link: https://lore.kernel.org/r/20240625135343.673745-1-aapo.vienamo@linux.intel.com Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-graniterapids.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpio/gpio-graniterapids.c b/drivers/gpio/gpio-graniterapids.c index c693fe05d50f..f2e911a3d2ca 100644 --- a/drivers/gpio/gpio-graniterapids.c +++ b/drivers/gpio/gpio-graniterapids.c @@ -296,6 +296,8 @@ static int gnr_gpio_probe(struct platform_device *pdev) if (!priv) return -ENOMEM; + raw_spin_lock_init(&priv->lock); + regs = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(regs)) return PTR_ERR(regs); From 610b29161b0aa9feb59b78dc867553274f17fb01 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 19 Jun 2024 10:32:43 -0700 Subject: [PATCH 723/778] xfs: fix freeing speculative preallocations for preallocated files xfs_can_free_eofblocks returns false for files that have persistent preallocations unless the force flag is passed and there are delayed blocks. This means it won't free delalloc reservations for files with persistent preallocations unless the force flag is set, and it will also free the persistent preallocations if the force flag is set and the file happens to have delayed allocations. Both of these are bad, so do away with the force flag and always free only post-EOF delayed allocations for files with the XFS_DIFLAG_PREALLOC or APPEND flags set. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Chandan Babu R --- fs/xfs/xfs_bmap_util.c | 30 ++++++++++++++++++++++-------- fs/xfs/xfs_bmap_util.h | 2 +- fs/xfs/xfs_icache.c | 2 +- fs/xfs/xfs_inode.c | 14 ++++---------- 4 files changed, 28 insertions(+), 20 deletions(-) diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index ac2e77ebb54c..a4d9fbc21b83 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -486,13 +486,11 @@ out_unlock: /* * Test whether it is appropriate to check an inode for and free post EOF - * blocks. The 'force' parameter determines whether we should also consider - * regular files that are marked preallocated or append-only. + * blocks. */ bool xfs_can_free_eofblocks( - struct xfs_inode *ip, - bool force) + struct xfs_inode *ip) { struct xfs_bmbt_irec imap; struct xfs_mount *mp = ip->i_mount; @@ -526,11 +524,11 @@ xfs_can_free_eofblocks( return false; /* - * Do not free real preallocated or append-only files unless the file - * has delalloc blocks and we are forced to remove them. + * Only free real extents for inodes with persistent preallocations or + * the append-only flag. */ if (ip->i_diflags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) - if (!force || ip->i_delayed_blks == 0) + if (ip->i_delayed_blks == 0) return false; /* @@ -584,6 +582,22 @@ xfs_free_eofblocks( /* Wait on dio to ensure i_size has settled. */ inode_dio_wait(VFS_I(ip)); + /* + * For preallocated files only free delayed allocations. + * + * Note that this means we also leave speculative preallocations in + * place for preallocated files. + */ + if (ip->i_diflags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) { + if (ip->i_delayed_blks) { + xfs_bmap_punch_delalloc_range(ip, + round_up(XFS_ISIZE(ip), mp->m_sb.sb_blocksize), + LLONG_MAX); + } + xfs_inode_clear_eofblocks_tag(ip); + return 0; + } + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp); if (error) { ASSERT(xfs_is_shutdown(mp)); @@ -891,7 +905,7 @@ xfs_prepare_shift( * Trim eofblocks to avoid shifting uninitialized post-eof preallocation * into the accessible region of the file. */ - if (xfs_can_free_eofblocks(ip, true)) { + if (xfs_can_free_eofblocks(ip)) { error = xfs_free_eofblocks(ip); if (error) return error; diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h index 51f84d8ff372..eb0895bfb9da 100644 --- a/fs/xfs/xfs_bmap_util.h +++ b/fs/xfs/xfs_bmap_util.h @@ -63,7 +63,7 @@ int xfs_insert_file_space(struct xfs_inode *, xfs_off_t offset, xfs_off_t len); /* EOF block manipulation functions */ -bool xfs_can_free_eofblocks(struct xfs_inode *ip, bool force); +bool xfs_can_free_eofblocks(struct xfs_inode *ip); int xfs_free_eofblocks(struct xfs_inode *ip); int xfs_swap_extents(struct xfs_inode *ip, struct xfs_inode *tip, diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 0953163a2d84..9967334ea99f 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -1155,7 +1155,7 @@ xfs_inode_free_eofblocks( } *lockflags |= XFS_IOLOCK_EXCL; - if (xfs_can_free_eofblocks(ip, false)) + if (xfs_can_free_eofblocks(ip)) return xfs_free_eofblocks(ip); /* inode could be preallocated or append-only */ diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index f36091e1e7f5..38f946e3be2d 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1595,7 +1595,7 @@ xfs_release( if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) return 0; - if (xfs_can_free_eofblocks(ip, false)) { + if (xfs_can_free_eofblocks(ip)) { /* * Check if the inode is being opened, written and closed * frequently and we have delayed allocation blocks outstanding @@ -1856,15 +1856,13 @@ xfs_inode_needs_inactive( /* * This file isn't being freed, so check if there are post-eof blocks - * to free. @force is true because we are evicting an inode from the - * cache. Post-eof blocks must be freed, lest we end up with broken - * free space accounting. + * to free. * * Note: don't bother with iolock here since lockdep complains about * acquiring it in reclaim context. We have the only reference to the * inode at this point anyways. */ - return xfs_can_free_eofblocks(ip, true); + return xfs_can_free_eofblocks(ip); } /* @@ -1947,15 +1945,11 @@ xfs_inactive( if (VFS_I(ip)->i_nlink != 0) { /* - * force is true because we are evicting an inode from the - * cache. Post-eof blocks must be freed, lest we end up with - * broken free space accounting. - * * Note: don't bother with iolock here since lockdep complains * about acquiring it in reclaim context. We have the only * reference to the inode at this point anyways. */ - if (xfs_can_free_eofblocks(ip, true)) + if (xfs_can_free_eofblocks(ip)) error = xfs_free_eofblocks(ip); goto out; From 288e1f693f04e66be99f27e7cbe4a45936a66745 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 19 Jun 2024 10:32:44 -0700 Subject: [PATCH 724/778] xfs: restrict when we try to align cow fork delalloc to cowextsz hints xfs/205 produces the following failure when always_cow is enabled: --- a/tests/xfs/205.out 2024-02-28 16:20:24.437887970 -0800 +++ b/tests/xfs/205.out.bad 2024-06-03 21:13:40.584000000 -0700 @@ -1,4 +1,5 @@ QA output created by 205 *** one file + !!! disk full (expected) *** one file, a few bytes at a time *** done This is the result of overly aggressive attempts to align cow fork delalloc reservations to the CoW extent size hint. Looking at the trace data, we're trying to append a single fsblock to the "fred" file. Trying to create a speculative post-eof reservation fails because there's not enough space. We then set @prealloc_blocks to zero and try again, but the cowextsz alignment code triggers, which expands our request for a 1-fsblock reservation into a 39-block reservation. There's not enough space for that, so the whole write fails with ENOSPC even though there's sufficient space in the filesystem to allocate the single block that we need to land the write. There are two things wrong here -- first, we shouldn't be attempting speculative preallocations beyond what was requested when we're low on space. Second, if we've already computed a posteof preallocation, we shouldn't bother trying to align that to the cowextsize hint. Fix both of these problems by adding a flag that only enables the expansion of the delalloc reservation to the cowextsize if we're doing a non-extending write, and only if we're not doing an ENOSPC retry. This requires us to move the ENOSPC retry logic to xfs_bmapi_reserve_delalloc. I probably should have caught this six years ago when 6ca30729c206d was being reviewed, but oh well. Update the comments to reflect what the code does now. Fixes: 6ca30729c206d ("xfs: bmap code cleanup") Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Chandan Babu R --- fs/xfs/libxfs/xfs_bmap.c | 31 +++++++++++++++++++++++++++---- fs/xfs/xfs_iomap.c | 34 ++++++++++++---------------------- 2 files changed, 39 insertions(+), 26 deletions(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index c101cf266bc4..6af6f744fdd6 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -4058,20 +4058,32 @@ xfs_bmapi_reserve_delalloc( xfs_extlen_t indlen; uint64_t fdblocks; int error; - xfs_fileoff_t aoff = off; + xfs_fileoff_t aoff; + bool use_cowextszhint = + whichfork == XFS_COW_FORK && !prealloc; +retry: /* * Cap the alloc length. Keep track of prealloc so we know whether to * tag the inode before we return. */ + aoff = off; alen = XFS_FILBLKS_MIN(len + prealloc, XFS_MAX_BMBT_EXTLEN); if (!eof) alen = XFS_FILBLKS_MIN(alen, got->br_startoff - aoff); if (prealloc && alen >= len) prealloc = alen - len; - /* Figure out the extent size, adjust alen */ - if (whichfork == XFS_COW_FORK) { + /* + * If we're targetting the COW fork but aren't creating a speculative + * posteof preallocation, try to expand the reservation to align with + * the COW extent size hint if there's sufficient free space. + * + * Unlike the data fork, the CoW cancellation functions will free all + * the reservations at inactivation, so we don't require that every + * delalloc reservation have a dirty pagecache. + */ + if (use_cowextszhint) { struct xfs_bmbt_irec prev; xfs_extlen_t extsz = xfs_get_cowextsz_hint(ip); @@ -4090,7 +4102,7 @@ xfs_bmapi_reserve_delalloc( */ error = xfs_quota_reserve_blkres(ip, alen); if (error) - return error; + goto out; /* * Split changing sb for alen and indlen since they could be coming @@ -4140,6 +4152,17 @@ out_unreserve_frextents: out_unreserve_quota: if (XFS_IS_QUOTA_ON(mp)) xfs_quota_unreserve_blkres(ip, alen); +out: + if (error == -ENOSPC || error == -EDQUOT) { + trace_xfs_delalloc_enospc(ip, off, len); + + if (prealloc || use_cowextszhint) { + /* retry without any preallocation */ + use_cowextszhint = false; + prealloc = 0; + goto retry; + } + } return error; } diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 378342673925..414903885ab9 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -1148,33 +1148,23 @@ xfs_buffered_write_iomap_begin( } } -retry: - error = xfs_bmapi_reserve_delalloc(ip, allocfork, offset_fsb, - end_fsb - offset_fsb, prealloc_blocks, - allocfork == XFS_DATA_FORK ? &imap : &cmap, - allocfork == XFS_DATA_FORK ? &icur : &ccur, - allocfork == XFS_DATA_FORK ? eof : cow_eof); - switch (error) { - case 0: - break; - case -ENOSPC: - case -EDQUOT: - /* retry without any preallocation */ - trace_xfs_delalloc_enospc(ip, offset, count); - if (prealloc_blocks) { - prealloc_blocks = 0; - goto retry; - } - fallthrough; - default: - goto out_unlock; - } - if (allocfork == XFS_COW_FORK) { + error = xfs_bmapi_reserve_delalloc(ip, allocfork, offset_fsb, + end_fsb - offset_fsb, prealloc_blocks, &cmap, + &ccur, cow_eof); + if (error) + goto out_unlock; + trace_xfs_iomap_alloc(ip, offset, count, allocfork, &cmap); goto found_cow; } + error = xfs_bmapi_reserve_delalloc(ip, allocfork, offset_fsb, + end_fsb - offset_fsb, prealloc_blocks, &imap, &icur, + eof); + if (error) + goto out_unlock; + /* * Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch * them out if the write happens to fail. From 1ec9307fc066dd8a140d5430f8a7576aa9d78cd3 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 19 Jun 2024 10:32:45 -0700 Subject: [PATCH 725/778] xfs: allow unlinked symlinks and dirs with zero size For a very very long time, inode inactivation has set the inode size to zero before unmapping the extents associated with the data fork. Unfortunately, commit 3c6f46eacd876 changed the inode verifier to prohibit zero-length symlinks and directories. If an inode happens to get logged in this state and the system crashes before freeing the inode, log recovery will also fail on the broken inode. Therefore, allow zero-size symlinks and directories as long as the link count is zero; nobody will be able to open these files by handle so there isn't any risk of data exposure. Fixes: 3c6f46eacd876 ("xfs: sanity check directory inode di_size") Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Chandan Babu R --- fs/xfs/libxfs/xfs_inode_buf.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index e7a7bfbe75b4..513b50da6215 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -379,10 +379,13 @@ xfs_dinode_verify_fork( /* * A directory small enough to fit in the inode must be stored * in local format. The directory sf <-> extents conversion - * code updates the directory size accordingly. + * code updates the directory size accordingly. Directories + * being truncated have zero size and are not subject to this + * check. */ if (S_ISDIR(mode)) { - if (be64_to_cpu(dip->di_size) <= fork_size && + if (dip->di_size && + be64_to_cpu(dip->di_size) <= fork_size && fork_format != XFS_DINODE_FMT_LOCAL) return __this_address; } @@ -528,9 +531,19 @@ xfs_dinode_verify( if (mode && xfs_mode_to_ftype(mode) == XFS_DIR3_FT_UNKNOWN) return __this_address; - /* No zero-length symlinks/dirs. */ - if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0) - return __this_address; + /* + * No zero-length symlinks/dirs unless they're unlinked and hence being + * inactivated. + */ + if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0) { + if (dip->di_version > 1) { + if (dip->di_nlink) + return __this_address; + } else { + if (dip->di_onlink) + return __this_address; + } + } fa = xfs_dinode_verify_nrext64(mp, dip); if (fa) From dc5e1cbae270b625dcb978f8ea762eb16a93a016 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 20 Jun 2024 15:05:26 -0700 Subject: [PATCH 726/778] xfs: fix direction in XFS_IOC_EXCHANGE_RANGE The kernel reads userspace's buffer but does not write it back. Therefore this is really an _IOW ioctl. Change this before 6.10 final releases. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Chandan Babu R --- fs/xfs/libxfs/xfs_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index 97996cb79aaa..454b63ef7201 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -996,7 +996,7 @@ struct xfs_getparents_by_handle { #define XFS_IOC_FSGEOMETRY _IOR ('X', 126, struct xfs_fsop_geom) #define XFS_IOC_BULKSTAT _IOR ('X', 127, struct xfs_bulkstat_req) #define XFS_IOC_INUMBERS _IOR ('X', 128, struct xfs_inumbers_req) -#define XFS_IOC_EXCHANGE_RANGE _IOWR('X', 129, struct xfs_exchange_range) +#define XFS_IOC_EXCHANGE_RANGE _IOW ('X', 129, struct xfs_exchange_range) /* XFS_IOC_GETFSUUID ---------- deprecated 140 */ From 673cd885bbbfd873aa6983ce2363a813b7826425 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 19 Jun 2024 10:32:46 -0700 Subject: [PATCH 727/778] xfs: honor init_xattrs in xfs_init_new_inode for !ATTR fs xfs_init_new_inode ignores the init_xattrs parameter for filesystems that do not have ATTR enabled. As a result, the first init_xattrs file to be created by the kernel will not have an attr fork created to store acls. Storing that first acl will add ATTR to the superblock flags, so subsequent files will be created with attr forks. The overhead of this is so small that chances are that nobody has noticed this behavior. However, this is disastrous on a filesystem with parent pointers because it requires that a new linkable file /must/ have a pre-existing attr fork, and the parent pointers code uses init_xattrs to create that fork. The preproduction version of mkfs.xfs used to set this, but the V5 sb verifier only requires ATTR2, not ATTR. There is no guard for filesystems with (PARENT && !ATTR). It turns out that I misunderstood the two flags -- ATTR means that we at some point created an attr fork to store xattrs in a file; ATTR2 apparently means only that inodes have dynamic fork offsets or that the filesystem was mounted with the "attr2" option. Fixes: 2442ee15bb1e ("xfs: eager inode attr fork init needs attr feature awareness") Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Chandan Babu R --- fs/xfs/xfs_inode.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 38f946e3be2d..a4e3cd8971fc 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -42,6 +42,7 @@ #include "xfs_pnfs.h" #include "xfs_parent.h" #include "xfs_xattr.h" +#include "xfs_sb.h" struct kmem_cache *xfs_inode_cache; @@ -870,9 +871,16 @@ xfs_init_new_inode( * this saves us from needing to run a separate transaction to set the * fork offset in the immediate future. */ - if (init_xattrs && xfs_has_attr(mp)) { + if (init_xattrs) { ip->i_forkoff = xfs_default_attroffset(ip) >> 3; xfs_ifork_init_attr(ip, XFS_DINODE_FMT_EXTENTS, 0); + + if (!xfs_has_attr(mp)) { + spin_lock(&mp->m_sb_lock); + xfs_add_attr(mp); + spin_unlock(&mp->m_sb_lock); + xfs_log_sb(tp); + } } /* From 63b47f026cc841bd3d3438dd6fccbc394dfead87 Mon Sep 17 00:00:00 2001 From: Vyacheslav Frantsishko Date: Wed, 26 Jun 2024 10:03:34 +0300 Subject: [PATCH 728/778] ASoC: amd: yc: Fix non-functional mic on ASUS M5602RA The Vivobook S 16X IPS needs a quirks-table entry for the internal microphone to function properly. Signed-off-by: Vyacheslav Frantsishko Reviewed-by: Mario Limonciello Link: https://patch.msgid.link/20240626070334.45633-1-itmymaill@gmail.com Signed-off-by: Mark Brown --- sound/soc/amd/yc/acp6x-mach.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index 1760b5d42460..4e3a8ce690a4 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -283,6 +283,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "M5402RA"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "M5602RA"), + } + }, { .driver_data = &acp6x_card, .matches = { From 103458874baca0bbc8ae0b66d50201d5faa8c17b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 28 May 2024 14:06:30 +0200 Subject: [PATCH 729/778] i2c: viai2c: turn common code into a proper module The i2c-viai2c-common.c file is used by two drivers, but is not a proper abstraction and can get linked into both modules in the same configuration, which results in a warning: scripts/Makefile.build:236: drivers/i2c/busses/Makefile: i2c-viai2c-common.o is added to multiple modules: i2c-wmt i2c-zhaoxin The other problems with this include the incorrect use of a __weak function when both are built-in, and the fact that the "common" module is sprinked with 'if (i2c->plat == ...)' checks that have knowledge about the differences between the drivers using it. Avoid the link time warning by making the common driver a proper module with MODULE_LICENCE()/MODULE_AUTHOR() tags, and remove the __weak function by slightly rearranging the code. This adds a little more duplication between the two main drivers, but those versions get more readable in the process. Fixes: a06b80e83011 ("i2c: add zhaoxin i2c controller driver") Signed-off-by: Arnd Bergmann Tested-by: Hans Hu Signed-off-by: Andi Shyti --- drivers/i2c/busses/Makefile | 6 +- drivers/i2c/busses/i2c-viai2c-common.c | 71 ++------------- drivers/i2c/busses/i2c-viai2c-common.h | 2 +- drivers/i2c/busses/i2c-viai2c-wmt.c | 36 ++++++++ drivers/i2c/busses/i2c-viai2c-zhaoxin.c | 115 +++++++++++++++++++----- 5 files changed, 140 insertions(+), 90 deletions(-) diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile index 3d65934f5eb4..78d0561339e5 100644 --- a/drivers/i2c/busses/Makefile +++ b/drivers/i2c/busses/Makefile @@ -29,8 +29,7 @@ obj-$(CONFIG_I2C_SIS630) += i2c-sis630.o obj-$(CONFIG_I2C_SIS96X) += i2c-sis96x.o obj-$(CONFIG_I2C_VIA) += i2c-via.o obj-$(CONFIG_I2C_VIAPRO) += i2c-viapro.o -i2c-zhaoxin-objs := i2c-viai2c-zhaoxin.o i2c-viai2c-common.o -obj-$(CONFIG_I2C_ZHAOXIN) += i2c-zhaoxin.o +obj-$(CONFIG_I2C_ZHAOXIN) += i2c-viai2c-zhaoxin.o i2c-viai2c-common.o # Mac SMBus host controller drivers obj-$(CONFIG_I2C_HYDRA) += i2c-hydra.o @@ -120,8 +119,7 @@ obj-$(CONFIG_I2C_TEGRA_BPMP) += i2c-tegra-bpmp.o obj-$(CONFIG_I2C_UNIPHIER) += i2c-uniphier.o obj-$(CONFIG_I2C_UNIPHIER_F) += i2c-uniphier-f.o obj-$(CONFIG_I2C_VERSATILE) += i2c-versatile.o -i2c-wmt-objs := i2c-viai2c-wmt.o i2c-viai2c-common.o -obj-$(CONFIG_I2C_WMT) += i2c-wmt.o +obj-$(CONFIG_I2C_WMT) += i2c-viai2c-wmt.o i2c-viai2c-common.o i2c-octeon-objs := i2c-octeon-core.o i2c-octeon-platdrv.o obj-$(CONFIG_I2C_OCTEON) += i2c-octeon.o i2c-thunderx-objs := i2c-octeon-core.o i2c-thunderx-pcidrv.o diff --git a/drivers/i2c/busses/i2c-viai2c-common.c b/drivers/i2c/busses/i2c-viai2c-common.c index 1844d13f1f79..162b31306cba 100644 --- a/drivers/i2c/busses/i2c-viai2c-common.c +++ b/drivers/i2c/busses/i2c-viai2c-common.c @@ -17,6 +17,7 @@ int viai2c_wait_bus_not_busy(struct viai2c *i2c) return 0; } +EXPORT_SYMBOL_GPL(viai2c_wait_bus_not_busy); static int viai2c_write(struct viai2c *i2c, struct i2c_msg *pmsg, int last) { @@ -121,6 +122,7 @@ int viai2c_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num) return (ret < 0) ? ret : i; } +EXPORT_SYMBOL_GPL(viai2c_xfer); /* * Main process of the byte mode xfer @@ -130,7 +132,7 @@ int viai2c_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num) * 0: there is still data that needs to be transferred * -EIO: error occurred */ -static int viai2c_irq_xfer(struct viai2c *i2c) +int viai2c_irq_xfer(struct viai2c *i2c) { u16 val; struct i2c_msg *msg = i2c->msg; @@ -171,51 +173,11 @@ static int viai2c_irq_xfer(struct viai2c *i2c) return i2c->xfered_len == msg->len; } - -int __weak viai2c_fifo_irq_xfer(struct viai2c *i2c, bool irq) -{ - return 0; -} - -static irqreturn_t viai2c_isr(int irq, void *data) -{ - struct viai2c *i2c = data; - u8 status; - - /* save the status and write-clear it */ - status = readw(i2c->base + VIAI2C_REG_ISR); - if (!status && i2c->platform == VIAI2C_PLAT_ZHAOXIN) - return IRQ_NONE; - - writew(status, i2c->base + VIAI2C_REG_ISR); - - i2c->ret = 0; - if (status & VIAI2C_ISR_NACK_ADDR) - i2c->ret = -EIO; - - if (i2c->platform == VIAI2C_PLAT_WMT && (status & VIAI2C_ISR_SCL_TIMEOUT)) - i2c->ret = -ETIMEDOUT; - - if (!i2c->ret) { - if (i2c->mode == VIAI2C_BYTE_MODE) - i2c->ret = viai2c_irq_xfer(i2c); - else - i2c->ret = viai2c_fifo_irq_xfer(i2c, true); - } - - /* All the data has been successfully transferred or error occurred */ - if (i2c->ret) - complete(&i2c->complete); - - return IRQ_HANDLED; -} +EXPORT_SYMBOL_GPL(viai2c_irq_xfer); int viai2c_init(struct platform_device *pdev, struct viai2c **pi2c, int plat) { - int err; - int irq_flags; struct viai2c *i2c; - struct device_node *np = pdev->dev.of_node; i2c = devm_kzalloc(&pdev->dev, sizeof(*i2c), GFP_KERNEL); if (!i2c) @@ -225,28 +187,8 @@ int viai2c_init(struct platform_device *pdev, struct viai2c **pi2c, int plat) if (IS_ERR(i2c->base)) return PTR_ERR(i2c->base); - if (plat == VIAI2C_PLAT_WMT) { - irq_flags = 0; - i2c->irq = irq_of_parse_and_map(np, 0); - if (!i2c->irq) - return -EINVAL; - } else if (plat == VIAI2C_PLAT_ZHAOXIN) { - irq_flags = IRQF_SHARED; - i2c->irq = platform_get_irq(pdev, 0); - if (i2c->irq < 0) - return i2c->irq; - } else { - return dev_err_probe(&pdev->dev, -EINVAL, "wrong platform type\n"); - } - i2c->platform = plat; - err = devm_request_irq(&pdev->dev, i2c->irq, viai2c_isr, - irq_flags, pdev->name, i2c); - if (err) - return dev_err_probe(&pdev->dev, err, - "failed to request irq %i\n", i2c->irq); - i2c->dev = &pdev->dev; init_completion(&i2c->complete); platform_set_drvdata(pdev, i2c); @@ -254,3 +196,8 @@ int viai2c_init(struct platform_device *pdev, struct viai2c **pi2c, int plat) *pi2c = i2c; return 0; } +EXPORT_SYMBOL_GPL(viai2c_init); + +MODULE_DESCRIPTION("Via/Wondermedia/Zhaoxin I2C master-mode bus adapter"); +MODULE_AUTHOR("Tony Prisk "); +MODULE_LICENSE("GPL"); diff --git a/drivers/i2c/busses/i2c-viai2c-common.h b/drivers/i2c/busses/i2c-viai2c-common.h index 81e827c54434..00f17733223c 100644 --- a/drivers/i2c/busses/i2c-viai2c-common.h +++ b/drivers/i2c/busses/i2c-viai2c-common.h @@ -80,6 +80,6 @@ struct viai2c { int viai2c_wait_bus_not_busy(struct viai2c *i2c); int viai2c_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num); int viai2c_init(struct platform_device *pdev, struct viai2c **pi2c, int plat); -int viai2c_fifo_irq_xfer(struct viai2c *i2c, bool irq); +int viai2c_irq_xfer(struct viai2c *i2c); #endif diff --git a/drivers/i2c/busses/i2c-viai2c-wmt.c b/drivers/i2c/busses/i2c-viai2c-wmt.c index e1988f946026..420fd10fe3aa 100644 --- a/drivers/i2c/busses/i2c-viai2c-wmt.c +++ b/drivers/i2c/busses/i2c-viai2c-wmt.c @@ -72,6 +72,32 @@ static int wmt_i2c_reset_hardware(struct viai2c *i2c) return 0; } +static irqreturn_t wmt_i2c_isr(int irq, void *data) +{ + struct viai2c *i2c = data; + u8 status; + + /* save the status and write-clear it */ + status = readw(i2c->base + VIAI2C_REG_ISR); + writew(status, i2c->base + VIAI2C_REG_ISR); + + i2c->ret = 0; + if (status & VIAI2C_ISR_NACK_ADDR) + i2c->ret = -EIO; + + if (status & VIAI2C_ISR_SCL_TIMEOUT) + i2c->ret = -ETIMEDOUT; + + if (!i2c->ret) + i2c->ret = viai2c_irq_xfer(i2c); + + /* All the data has been successfully transferred or error occurred */ + if (i2c->ret) + complete(&i2c->complete); + + return IRQ_HANDLED; +} + static int wmt_i2c_probe(struct platform_device *pdev) { struct device_node *np = pdev->dev.of_node; @@ -84,6 +110,16 @@ static int wmt_i2c_probe(struct platform_device *pdev) if (err) return err; + i2c->irq = platform_get_irq(pdev, 0); + if (i2c->irq < 0) + return i2c->irq; + + err = devm_request_irq(&pdev->dev, i2c->irq, wmt_i2c_isr, + 0, pdev->name, i2c); + if (err) + return dev_err_probe(&pdev->dev, err, + "failed to request irq %i\n", i2c->irq); + i2c->clk = of_clk_get(np, 0); if (IS_ERR(i2c->clk)) { dev_err(&pdev->dev, "unable to request clock\n"); diff --git a/drivers/i2c/busses/i2c-viai2c-zhaoxin.c b/drivers/i2c/busses/i2c-viai2c-zhaoxin.c index 7e3ac2a3e1fd..ab3e44e147e9 100644 --- a/drivers/i2c/busses/i2c-viai2c-zhaoxin.c +++ b/drivers/i2c/busses/i2c-viai2c-zhaoxin.c @@ -49,8 +49,7 @@ struct viai2c_zhaoxin { u16 xfer_len; }; -/* 'irq == true' means in interrupt context */ -int viai2c_fifo_irq_xfer(struct viai2c *i2c, bool irq) +static int viai2c_fifo_xfer(struct viai2c *i2c) { u16 i; u8 tmp; @@ -59,17 +58,6 @@ int viai2c_fifo_irq_xfer(struct viai2c *i2c, bool irq) bool read = !!(msg->flags & I2C_M_RD); struct viai2c_zhaoxin *priv = i2c->pltfm_priv; - if (irq) { - /* get the received data */ - if (read) - for (i = 0; i < priv->xfer_len; i++) - msg->buf[i2c->xfered_len + i] = ioread8(base + ZXI2C_REG_HRDR); - - i2c->xfered_len += priv->xfer_len; - if (i2c->xfered_len == msg->len) - return 1; - } - /* reset fifo buffer */ tmp = ioread8(base + ZXI2C_REG_HCR); iowrite8(tmp | ZXI2C_HCR_RST_FIFO, base + ZXI2C_REG_HCR); @@ -92,18 +80,59 @@ int viai2c_fifo_irq_xfer(struct viai2c *i2c, bool irq) iowrite8(tmp, base + VIAI2C_REG_CR); } - if (irq) { - /* continue transmission */ - tmp = ioread8(base + VIAI2C_REG_CR); - iowrite8(tmp |= VIAI2C_CR_CPU_RDY, base + VIAI2C_REG_CR); - } else { - u16 tcr_val = i2c->tcr; + u16 tcr_val = i2c->tcr; - /* start transmission */ - tcr_val |= read ? VIAI2C_TCR_READ : 0; - writew(tcr_val | msg->addr, base + VIAI2C_REG_TCR); + /* start transmission */ + tcr_val |= read ? VIAI2C_TCR_READ : 0; + writew(tcr_val | msg->addr, base + VIAI2C_REG_TCR); + + return 0; +} + +static int viai2c_fifo_irq_xfer(struct viai2c *i2c) +{ + u16 i; + u8 tmp; + struct i2c_msg *msg = i2c->msg; + void __iomem *base = i2c->base; + bool read = !!(msg->flags & I2C_M_RD); + struct viai2c_zhaoxin *priv = i2c->pltfm_priv; + + /* get the received data */ + if (read) + for (i = 0; i < priv->xfer_len; i++) + msg->buf[i2c->xfered_len + i] = ioread8(base + ZXI2C_REG_HRDR); + + i2c->xfered_len += priv->xfer_len; + if (i2c->xfered_len == msg->len) + return 1; + + /* reset fifo buffer */ + tmp = ioread8(base + ZXI2C_REG_HCR); + iowrite8(tmp | ZXI2C_HCR_RST_FIFO, base + ZXI2C_REG_HCR); + + /* set xfer len */ + priv->xfer_len = min_t(u16, msg->len - i2c->xfered_len, ZXI2C_FIFO_SIZE); + if (read) { + iowrite8(priv->xfer_len - 1, base + ZXI2C_REG_HRLR); + } else { + iowrite8(priv->xfer_len - 1, base + ZXI2C_REG_HTLR); + /* set write data */ + for (i = 0; i < priv->xfer_len; i++) + iowrite8(msg->buf[i2c->xfered_len + i], base + ZXI2C_REG_HTDR); } + /* prepare to stop transmission */ + if (priv->hrv && msg->len == (i2c->xfered_len + priv->xfer_len)) { + tmp = ioread8(base + VIAI2C_REG_CR); + tmp |= read ? VIAI2C_CR_RX_END : VIAI2C_CR_TX_END; + iowrite8(tmp, base + VIAI2C_REG_CR); + } + + /* continue transmission */ + tmp = ioread8(base + VIAI2C_REG_CR); + iowrite8(tmp |= VIAI2C_CR_CPU_RDY, base + VIAI2C_REG_CR); + return 0; } @@ -135,7 +164,7 @@ static int zxi2c_master_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int priv->xfer_len = 0; i2c->xfered_len = 0; - viai2c_fifo_irq_xfer(i2c, 0); + viai2c_fifo_xfer(i2c); if (!wait_for_completion_timeout(&i2c->complete, VIAI2C_TIMEOUT)) return -ETIMEDOUT; @@ -228,6 +257,36 @@ static void zxi2c_get_bus_speed(struct viai2c *i2c) dev_info(i2c->dev, "speed mode is %s\n", i2c_freq_mode_string(params[0])); } +static irqreturn_t zxi2c_isr(int irq, void *data) +{ + struct viai2c *i2c = data; + u8 status; + + /* save the status and write-clear it */ + status = readw(i2c->base + VIAI2C_REG_ISR); + if (!status) + return IRQ_NONE; + + writew(status, i2c->base + VIAI2C_REG_ISR); + + i2c->ret = 0; + if (status & VIAI2C_ISR_NACK_ADDR) + i2c->ret = -EIO; + + if (!i2c->ret) { + if (i2c->mode == VIAI2C_BYTE_MODE) + i2c->ret = viai2c_irq_xfer(i2c); + else + i2c->ret = viai2c_fifo_irq_xfer(i2c); + } + + /* All the data has been successfully transferred or error occurred */ + if (i2c->ret) + complete(&i2c->complete); + + return IRQ_HANDLED; +} + static int zxi2c_probe(struct platform_device *pdev) { int error; @@ -239,6 +298,16 @@ static int zxi2c_probe(struct platform_device *pdev) if (error) return error; + i2c->irq = platform_get_irq(pdev, 0); + if (i2c->irq < 0) + return i2c->irq; + + error = devm_request_irq(&pdev->dev, i2c->irq, zxi2c_isr, + IRQF_SHARED, pdev->name, i2c); + if (error) + return dev_err_probe(&pdev->dev, error, + "failed to request irq %i\n", i2c->irq); + priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; From 77453e2b015b5ced5b3f45364dd5a72dfc3bdecb Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Tue, 25 Jun 2024 12:22:36 +0200 Subject: [PATCH 730/778] net: usb: qmi_wwan: add Telit FN912 compositions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the following Telit FN912 compositions: 0x3000: rmnet + tty (AT/NMEA) + tty (AT) + tty (diag) T: Bus=03 Lev=01 Prnt=03 Port=07 Cnt=01 Dev#= 8 Spd=480 MxCh= 0 D: Ver= 2.01 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=3000 Rev=05.15 S: Manufacturer=Telit Cinterion S: Product=FN912 S: SerialNumber=92c4c4d8 C: #Ifs= 4 Cfg#= 1 Atr=e0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=50 Driver=qmi_wwan E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=82(I) Atr=03(Int.) MxPS= 8 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=60 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms 0x3001: rmnet + tty (AT) + tty (diag) + DPL (data packet logging) + adb T: Bus=03 Lev=01 Prnt=03 Port=07 Cnt=01 Dev#= 7 Spd=480 MxCh= 0 D: Ver= 2.01 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=3001 Rev=05.15 S: Manufacturer=Telit Cinterion S: Product=FN912 S: SerialNumber=92c4c4d8 C: #Ifs= 5 Cfg#= 1 Atr=e0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=50 Driver=qmi_wwan E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=82(I) Atr=03(Int.) MxPS= 8 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 3 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=80 Driver=(none) E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=usbfs E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms Signed-off-by: Daniele Palmas Acked-by: Bjørn Mork Link: https://patch.msgid.link/20240625102236.69539-1-dnlplm@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/qmi_wwan.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 663e46348ce3..386d62769ded 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1372,6 +1372,8 @@ static const struct usb_device_id products[] = { {QMI_QUIRK_SET_DTR(0x1bc7, 0x1260, 2)}, /* Telit LE910Cx */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1261, 2)}, /* Telit LE910Cx */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1900, 1)}, /* Telit LN940 series */ + {QMI_QUIRK_SET_DTR(0x1bc7, 0x3000, 0)}, /* Telit FN912 series */ + {QMI_QUIRK_SET_DTR(0x1bc7, 0x3001, 0)}, /* Telit FN912 series */ {QMI_FIXED_INTF(0x1c9e, 0x9801, 3)}, /* Telewell TW-3G HSPA+ */ {QMI_FIXED_INTF(0x1c9e, 0x9803, 4)}, /* Telewell TW-3G HSPA+ */ {QMI_FIXED_INTF(0x1c9e, 0x9b01, 3)}, /* XS Stick W100-2 from 4G Systems */ From edf2d546bfd6f5c4d143715cef1b1e7ce5718c4e Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Mon, 24 Jun 2024 10:21:41 +0200 Subject: [PATCH 731/778] riscv: patch: Flush the icache right after patching to avoid illegal insns We cannot delay the icache flush after patching some functions as we may have patched a function that will get called before the icache flush. The only way to completely avoid such scenario is by flushing the icache as soon as we patch a function. This will probably be costly as we don't batch the icache maintenance anymore. Fixes: 6ca445d8af0e ("riscv: Fix early ftrace nop patching") Reported-by: Conor Dooley Closes: https://lore.kernel.org/linux-riscv/20240613-lubricant-breath-061192a9489a@wendy/ Signed-off-by: Alexandre Ghiti Reviewed-by: Andy Chiu Link: https://lore.kernel.org/r/20240624082141.153871-1-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/ftrace.c | 7 ++----- arch/riscv/kernel/patch.c | 26 ++++++++++++++++++-------- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c index 87cbd86576b2..4b95c574fd04 100644 --- a/arch/riscv/kernel/ftrace.c +++ b/arch/riscv/kernel/ftrace.c @@ -120,9 +120,6 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) out = ftrace_make_nop(mod, rec, MCOUNT_ADDR); mutex_unlock(&text_mutex); - if (!mod) - local_flush_icache_range(rec->ip, rec->ip + MCOUNT_INSN_SIZE); - return out; } @@ -156,9 +153,9 @@ static int __ftrace_modify_code(void *data) } else { while (atomic_read(¶m->cpu_count) <= num_online_cpus()) cpu_relax(); - } - local_flush_icache_all(); + local_flush_icache_all(); + } return 0; } diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c index 4007563fb607..ab03732d06c4 100644 --- a/arch/riscv/kernel/patch.c +++ b/arch/riscv/kernel/patch.c @@ -89,6 +89,14 @@ static int __patch_insn_set(void *addr, u8 c, size_t len) memset(waddr, c, len); + /* + * We could have just patched a function that is about to be + * called so make sure we don't execute partially patched + * instructions by flushing the icache as soon as possible. + */ + local_flush_icache_range((unsigned long)waddr, + (unsigned long)waddr + len); + patch_unmap(FIX_TEXT_POKE0); if (across_pages) @@ -135,6 +143,14 @@ static int __patch_insn_write(void *addr, const void *insn, size_t len) ret = copy_to_kernel_nofault(waddr, insn, len); + /* + * We could have just patched a function that is about to be + * called so make sure we don't execute partially patched + * instructions by flushing the icache as soon as possible. + */ + local_flush_icache_range((unsigned long)waddr, + (unsigned long)waddr + len); + patch_unmap(FIX_TEXT_POKE0); if (across_pages) @@ -189,9 +205,6 @@ int patch_text_set_nosync(void *addr, u8 c, size_t len) ret = patch_insn_set(tp, c, len); - if (!ret) - flush_icache_range((uintptr_t)tp, (uintptr_t)tp + len); - return ret; } NOKPROBE_SYMBOL(patch_text_set_nosync); @@ -224,9 +237,6 @@ int patch_text_nosync(void *addr, const void *insns, size_t len) ret = patch_insn_write(tp, insns, len); - if (!ret) - flush_icache_range((uintptr_t) tp, (uintptr_t) tp + len); - return ret; } NOKPROBE_SYMBOL(patch_text_nosync); @@ -253,9 +263,9 @@ static int patch_text_cb(void *data) } else { while (atomic_read(&patch->cpu_count) <= num_online_cpus()) cpu_relax(); - } - local_flush_icache_all(); + local_flush_icache_all(); + } return ret; } From 23b2188920a25e88d447dd7d819a0b0f62fb4455 Mon Sep 17 00:00:00 2001 From: Andy Chiu Date: Thu, 13 Jun 2024 15:11:06 +0800 Subject: [PATCH 732/778] riscv: stacktrace: convert arch_stack_walk() to noinstr arch_stack_walk() is called intensively in function_graph when the kernel is compiled with CONFIG_TRACE_IRQFLAGS. As a result, the kernel logs a lot of arch_stack_walk and its sub-functions into the ftrace buffer. However, these functions should not appear on the trace log because they are part of the ftrace itself. This patch references what arm64 does for the smae function. So it further prevent the re-enter kprobe issue, which is also possible on riscv. Related-to: commit 0fbcd8abf337 ("arm64: Prohibit instrumentation on arch_stack_walk()") Fixes: 680341382da5 ("riscv: add CALLER_ADDRx support") Signed-off-by: Andy Chiu Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20240613-dev-andyc-dyn-ftrace-v4-v1-1-1a538e12c01e@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/stacktrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index 528ec7cc9a62..0d3f00eb0bae 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -156,7 +156,7 @@ unsigned long __get_wchan(struct task_struct *task) return pc; } -noinline void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, +noinline noinstr void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, struct task_struct *task, struct pt_regs *regs) { walk_stackframe(task, regs, consume_entry, cookie); From 9d65ab6050d25f17c13f4195aa8e160c6ac638f6 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 26 Jun 2024 16:51:13 +0200 Subject: [PATCH 733/778] ALSA: seq: Fix missing MSB in MIDI2 SPP conversion The conversion of SPP to MIDI2 UMP called a wrong function, and the secondary argument wasn't taken. As a result, MSB of SPP was always zero. Fix to call the right function. Fixes: e9e02819a98a ("ALSA: seq: Automatic conversion of UMP events") Link: https://patch.msgid.link/20240626145141.16648-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/seq/seq_ump_convert.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/core/seq/seq_ump_convert.c b/sound/core/seq/seq_ump_convert.c index 6687efdceea1..e90b27a135e6 100644 --- a/sound/core/seq/seq_ump_convert.c +++ b/sound/core/seq/seq_ump_convert.c @@ -1020,7 +1020,7 @@ static int system_2p_ev_to_ump_midi2(const struct snd_seq_event *event, union snd_ump_midi2_msg *data, unsigned char status) { - return system_1p_ev_to_ump_midi1(event, dest_port, + return system_2p_ev_to_ump_midi1(event, dest_port, (union snd_ump_midi1_msg *)data, status); } From 68f97fe330e01450ace63da0ce5cab676fc97f9a Mon Sep 17 00:00:00 2001 From: Jack Yu Date: Wed, 26 Jun 2024 08:25:34 +0000 Subject: [PATCH 734/778] ASoC: rt5645: fix issue of random interrupt from push-button Modify register setting sequence of enabling inline command to fix issue of random interrupt from push-button. Signed-off-by: Jack Yu Link: https://patch.msgid.link/9a7a3a66cbcb426487ca6f558f45e922@realtek.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt5645.c | 24 ++++++++++++++++++------ sound/soc/codecs/rt5645.h | 6 ++++++ 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index cdb7ff7020e9..51187b1e0ed2 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -81,7 +81,7 @@ static const struct reg_sequence init_list[] = { static const struct reg_sequence rt5650_init_list[] = { {0xf6, 0x0100}, {RT5645_PWR_ANLG1, 0x02}, - {RT5645_IL_CMD3, 0x0018}, + {RT5645_IL_CMD3, 0x6728}, }; static const struct reg_default rt5645_reg[] = { @@ -3130,20 +3130,32 @@ static void rt5645_enable_push_button_irq(struct snd_soc_component *component, bool enable) { struct snd_soc_dapm_context *dapm = snd_soc_component_get_dapm(component); + int ret; if (enable) { snd_soc_dapm_force_enable_pin(dapm, "ADC L power"); snd_soc_dapm_force_enable_pin(dapm, "ADC R power"); snd_soc_dapm_sync(dapm); + snd_soc_component_update_bits(component, RT5650_4BTN_IL_CMD2, + RT5645_EN_4BTN_IL_MASK | RT5645_RST_4BTN_IL_MASK, + RT5645_EN_4BTN_IL_EN | RT5645_RST_4BTN_IL_RST); + usleep_range(10000, 15000); + snd_soc_component_update_bits(component, RT5650_4BTN_IL_CMD2, + RT5645_EN_4BTN_IL_MASK | RT5645_RST_4BTN_IL_MASK, + RT5645_EN_4BTN_IL_EN | RT5645_RST_4BTN_IL_NORM); + msleep(50); + ret = snd_soc_component_read(component, RT5645_INT_IRQ_ST); + pr_debug("%s read %x = %x\n", __func__, RT5645_INT_IRQ_ST, + snd_soc_component_read(component, RT5645_INT_IRQ_ST)); + snd_soc_component_write(component, RT5645_INT_IRQ_ST, ret); + ret = snd_soc_component_read(component, RT5650_4BTN_IL_CMD1); + pr_debug("%s read %x = %x\n", __func__, RT5650_4BTN_IL_CMD1, + snd_soc_component_read(component, RT5650_4BTN_IL_CMD1)); + snd_soc_component_write(component, RT5650_4BTN_IL_CMD1, ret); snd_soc_component_update_bits(component, RT5650_4BTN_IL_CMD1, 0x3, 0x3); snd_soc_component_update_bits(component, RT5645_INT_IRQ_ST, 0x8, 0x8); - snd_soc_component_update_bits(component, - RT5650_4BTN_IL_CMD2, 0x8000, 0x8000); - snd_soc_component_read(component, RT5650_4BTN_IL_CMD1); - pr_debug("%s read %x = %x\n", __func__, RT5650_4BTN_IL_CMD1, - snd_soc_component_read(component, RT5650_4BTN_IL_CMD1)); } else { snd_soc_component_update_bits(component, RT5650_4BTN_IL_CMD2, 0x8000, 0x0); snd_soc_component_update_bits(component, RT5645_INT_IRQ_ST, 0x8, 0x0); diff --git a/sound/soc/codecs/rt5645.h b/sound/soc/codecs/rt5645.h index 90816b2c5489..bef74b29fd54 100644 --- a/sound/soc/codecs/rt5645.h +++ b/sound/soc/codecs/rt5645.h @@ -2011,6 +2011,12 @@ #define RT5645_ZCD_HP_DIS (0x0 << 15) #define RT5645_ZCD_HP_EN (0x1 << 15) +/* Buttons Inline Command Function 2 (0xe0) */ +#define RT5645_EN_4BTN_IL_MASK (0x1 << 15) +#define RT5645_EN_4BTN_IL_EN (0x1 << 15) +#define RT5645_RST_4BTN_IL_MASK (0x1 << 14) +#define RT5645_RST_4BTN_IL_RST (0x0 << 14) +#define RT5645_RST_4BTN_IL_NORM (0x1 << 14) /* Codec Private Register definition */ /* DAC ADC Digital Volume (0x00) */ From 440e2051c577896275c0e0513ec26964e04c7810 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 29 May 2024 14:42:40 -0700 Subject: [PATCH 735/778] nvmet-fc: Remove __counted_by from nvmet_fc_tgt_queue.fod[] Work for __counted_by on generic pointers in structures (not just flexible array members) has started landing in Clang 19 (current tip of tree). During the development of this feature, a restriction was added to __counted_by to prevent the flexible array member's element type from including a flexible array member itself such as: struct foo { int count; char buf[]; }; struct bar { int count; struct foo data[] __counted_by(count); }; because the size of data cannot be calculated with the standard array size formula: sizeof(struct foo) * count This restriction was downgraded to a warning but due to CONFIG_WERROR, it can still break the build. The application of __counted_by on the fod member of 'struct nvmet_fc_tgt_queue' triggers this restriction, resulting in: drivers/nvme/target/fc.c:151:2: error: 'counted_by' should not be applied to an array with element of unknown size because 'struct nvmet_fc_fcp_iod' is a struct type with a flexible array member. This will be an error in a future compiler version [-Werror,-Wbounds-safety-counted-by-elt-type-unknown-size] 151 | struct nvmet_fc_fcp_iod fod[] __counted_by(sqsize); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1 error generated. Remove this use of __counted_by to fix the warning/error. However, rather than remove it altogether, leave it commented, as it may be possible to support this in future compiler releases. Cc: stable@vger.kernel.org Closes: https://github.com/ClangBuiltLinux/linux/issues/2027 Fixes: ccd3129aca28 ("nvmet-fc: Annotate struct nvmet_fc_tgt_queue with __counted_by") Signed-off-by: Nathan Chancellor Signed-off-by: Keith Busch --- drivers/nvme/target/fc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 337ee1cb09ae..381b4394731f 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -148,7 +148,7 @@ struct nvmet_fc_tgt_queue { struct workqueue_struct *work_q; struct kref ref; /* array of fcp_iods */ - struct nvmet_fc_fcp_iod fod[] __counted_by(sqsize); + struct nvmet_fc_fcp_iod fod[] /* __counted_by(sqsize) */; } __aligned(sizeof(unsigned long long)); struct nvmet_fc_hostport { From a11aaf6d0bb4282ce1989e388b13f8d87154ba75 Mon Sep 17 00:00:00 2001 From: Joel Granados Date: Wed, 26 Jun 2024 14:06:16 +0200 Subject: [PATCH 736/778] kbuild: scripts/gdb: bring the "abspath" back Use the "abspath" call when symlinking the gdb python scripts in scripts/gdb/linux. This call is needed to avoid broken links when running the scripts_gdb target on a build directory located directly under the source tree (e.g., O=builddir). Fixes: 659bbf7e1b08 ("kbuild: scripts/gdb: Replace missed $(srctree)/$(src) w/ $(src)") Signed-off-by: Joel Granados Reviewed-by: Douglas Anderson Tested-by: Douglas Anderson Signed-off-by: Masahiro Yamada --- scripts/gdb/linux/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/gdb/linux/Makefile b/scripts/gdb/linux/Makefile index fd1402c0a1a1..fcd32fcf3ae0 100644 --- a/scripts/gdb/linux/Makefile +++ b/scripts/gdb/linux/Makefile @@ -5,7 +5,7 @@ ifdef building_out_of_srctree symlinks := $(patsubst $(src)/%,%,$(wildcard $(src)/*.py)) quiet_cmd_symlink = SYMLINK $@ - cmd_symlink = ln -fsn $(patsubst $(obj)/%,$(src)/%,$@) $@ + cmd_symlink = ln -fsn $(patsubst $(obj)/%,$(abspath $(src))/%,$@) $@ always-y += $(symlinks) $(addprefix $(obj)/, $(symlinks)): FORCE From 7931d32955e09d0a11b1fe0b6aac1bfa061c005c Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 26 Jun 2024 23:15:38 +0200 Subject: [PATCH 737/778] netfilter: nf_tables: fully validate NFT_DATA_VALUE on store to data registers register store validation for NFT_DATA_VALUE is conditional, however, the datatype is always either NFT_DATA_VALUE or NFT_DATA_VERDICT. This only requires a new helper function to infer the register type from the set datatype so this conditional check can be removed. Otherwise, pointer to chain object can be leaked through the registers. Fixes: 96518518cc41 ("netfilter: add nftables") Reported-by: Linus Torvalds Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 5 +++++ net/netfilter/nf_tables_api.c | 8 ++++---- net/netfilter/nft_lookup.c | 3 ++- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 2796153b03da..188d41da1a40 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -619,6 +619,11 @@ static inline void *nft_set_priv(const struct nft_set *set) return (void *)set->data; } +static inline enum nft_data_types nft_set_datatype(const struct nft_set *set) +{ + return set->dtype == NFT_DATA_VERDICT ? NFT_DATA_VERDICT : NFT_DATA_VALUE; +} + static inline bool nft_set_gc_is_pending(const struct nft_set *s) { return refcount_read(&s->refs) != 1; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index be3b4c90d2ed..e8dcf41d360d 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5740,8 +5740,7 @@ static int nf_tables_fill_setelem(struct sk_buff *skb, if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) && nft_data_dump(skb, NFTA_SET_ELEM_DATA, nft_set_ext_data(ext), - set->dtype == NFT_DATA_VERDICT ? NFT_DATA_VERDICT : NFT_DATA_VALUE, - set->dlen) < 0) + nft_set_datatype(set), set->dlen) < 0) goto nla_put_failure; if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPRESSIONS) && @@ -11073,6 +11072,9 @@ static int nft_validate_register_store(const struct nft_ctx *ctx, return 0; default: + if (type != NFT_DATA_VALUE) + return -EINVAL; + if (reg < NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE) return -EINVAL; if (len == 0) @@ -11081,8 +11083,6 @@ static int nft_validate_register_store(const struct nft_ctx *ctx, sizeof_field(struct nft_regs, data)) return -ERANGE; - if (data != NULL && type != NFT_DATA_VALUE) - return -EINVAL; return 0; } } diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index b314ca728a29..f3080fa1b226 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -132,7 +132,8 @@ static int nft_lookup_init(const struct nft_ctx *ctx, return -EINVAL; err = nft_parse_register_store(ctx, tb[NFTA_LOOKUP_DREG], - &priv->dreg, NULL, set->dtype, + &priv->dreg, NULL, + nft_set_datatype(set), set->dlen); if (err < 0) return err; From bab4923132feb3e439ae45962979c5d9d5c7c1f1 Mon Sep 17 00:00:00 2001 From: Yunseong Kim Date: Tue, 25 Jun 2024 02:33:23 +0900 Subject: [PATCH 738/778] tracing/net_sched: NULL pointer dereference in perf_trace_qdisc_reset() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the TRACE_EVENT(qdisc_reset) NULL dereference occurred from qdisc->dev_queue->dev ->name This situation simulated from bunch of veths and Bluetooth disconnection and reconnection. During qdisc initialization, qdisc was being set to noop_queue. In veth_init_queue, the initial tx_num was reduced back to one, causing the qdisc reset to be called with noop, which led to the kernel panic. I've attached the GitHub gist link that C converted syz-execprogram source code and 3 log of reproduced vmcore-dmesg. https://gist.github.com/yskelg/cc64562873ce249cdd0d5a358b77d740 Yeoreum and I use two fuzzing tool simultaneously. One process with syz-executor : https://github.com/google/syzkaller $ ./syz-execprog -executor=./syz-executor -repeat=1 -sandbox=setuid \ -enable=none -collide=false log1 The other process with perf fuzzer: https://github.com/deater/perf_event_tests/tree/master/fuzzer $ perf_event_tests/fuzzer/perf_fuzzer I think this will happen on the kernel version. Linux kernel version +v6.7.10, +v6.8, +v6.9 and it could happen in v6.10. This occurred from 51270d573a8d. I think this patch is absolutely necessary. Previously, It was showing not intended string value of name. I've reproduced 3 time from my fedora 40 Debug Kernel with any other module or patched. version: 6.10.0-0.rc2.20240608gitdc772f8237f9.29.fc41.aarch64+debug [ 5287.164555] veth0_vlan: left promiscuous mode [ 5287.164929] veth1_macvtap: left promiscuous mode [ 5287.164950] veth0_macvtap: left promiscuous mode [ 5287.164983] veth1_vlan: left promiscuous mode [ 5287.165008] veth0_vlan: left promiscuous mode [ 5287.165450] veth1_macvtap: left promiscuous mode [ 5287.165472] veth0_macvtap: left promiscuous mode [ 5287.165502] veth1_vlan: left promiscuous mode … [ 5297.598240] bridge0: port 2(bridge_slave_1) entered blocking state [ 5297.598262] bridge0: port 2(bridge_slave_1) entered forwarding state [ 5297.598296] bridge0: port 1(bridge_slave_0) entered blocking state [ 5297.598313] bridge0: port 1(bridge_slave_0) entered forwarding state [ 5297.616090] 8021q: adding VLAN 0 to HW filter on device bond0 [ 5297.620405] bridge0: port 1(bridge_slave_0) entered disabled state [ 5297.620730] bridge0: port 2(bridge_slave_1) entered disabled state [ 5297.627247] 8021q: adding VLAN 0 to HW filter on device team0 [ 5297.629636] bridge0: port 1(bridge_slave_0) entered blocking state … [ 5298.002798] bridge_slave_0: left promiscuous mode [ 5298.002869] bridge0: port 1(bridge_slave_0) entered disabled state [ 5298.309444] bond0 (unregistering): (slave bond_slave_0): Releasing backup interface [ 5298.315206] bond0 (unregistering): (slave bond_slave_1): Releasing backup interface [ 5298.320207] bond0 (unregistering): Released all slaves [ 5298.354296] hsr_slave_0: left promiscuous mode [ 5298.360750] hsr_slave_1: left promiscuous mode [ 5298.374889] veth1_macvtap: left promiscuous mode [ 5298.374931] veth0_macvtap: left promiscuous mode [ 5298.374988] veth1_vlan: left promiscuous mode [ 5298.375024] veth0_vlan: left promiscuous mode [ 5299.109741] team0 (unregistering): Port device team_slave_1 removed [ 5299.185870] team0 (unregistering): Port device team_slave_0 removed … [ 5300.155443] Bluetooth: hci3: unexpected cc 0x0c03 length: 249 > 1 [ 5300.155724] Bluetooth: hci3: unexpected cc 0x1003 length: 249 > 9 [ 5300.155988] Bluetooth: hci3: unexpected cc 0x1001 length: 249 > 9 …. [ 5301.075531] team0: Port device team_slave_1 added [ 5301.085515] bridge0: port 1(bridge_slave_0) entered blocking state [ 5301.085531] bridge0: port 1(bridge_slave_0) entered disabled state [ 5301.085588] bridge_slave_0: entered allmulticast mode [ 5301.085800] bridge_slave_0: entered promiscuous mode [ 5301.095617] bridge0: port 1(bridge_slave_0) entered blocking state [ 5301.095633] bridge0: port 1(bridge_slave_0) entered disabled state … [ 5301.149734] bond0: (slave bond_slave_0): Enslaving as an active interface with an up link [ 5301.173234] bond0: (slave bond_slave_0): Enslaving as an active interface with an up link [ 5301.180517] bond0: (slave bond_slave_1): Enslaving as an active interface with an up link [ 5301.193481] hsr_slave_0: entered promiscuous mode [ 5301.204425] hsr_slave_1: entered promiscuous mode [ 5301.210172] debugfs: Directory 'hsr0' with parent 'hsr' already present! [ 5301.210185] Cannot create hsr debugfs directory [ 5301.224061] bond0: (slave bond_slave_1): Enslaving as an active interface with an up link [ 5301.246901] bond0: (slave bond_slave_0): Enslaving as an active interface with an up link [ 5301.255934] team0: Port device team_slave_0 added [ 5301.256480] team0: Port device team_slave_1 added [ 5301.256948] team0: Port device team_slave_0 added … [ 5301.435928] hsr_slave_0: entered promiscuous mode [ 5301.446029] hsr_slave_1: entered promiscuous mode [ 5301.455872] debugfs: Directory 'hsr0' with parent 'hsr' already present! [ 5301.455884] Cannot create hsr debugfs directory [ 5301.502664] hsr_slave_0: entered promiscuous mode [ 5301.513675] hsr_slave_1: entered promiscuous mode [ 5301.526155] debugfs: Directory 'hsr0' with parent 'hsr' already present! [ 5301.526164] Cannot create hsr debugfs directory [ 5301.563662] hsr_slave_0: entered promiscuous mode [ 5301.576129] hsr_slave_1: entered promiscuous mode [ 5301.580259] debugfs: Directory 'hsr0' with parent 'hsr' already present! [ 5301.580270] Cannot create hsr debugfs directory [ 5301.590269] 8021q: adding VLAN 0 to HW filter on device bond0 [ 5301.595872] KASAN: null-ptr-deref in range [0x0000000000000130-0x0000000000000137] [ 5301.595877] Mem abort info: [ 5301.595881] ESR = 0x0000000096000006 [ 5301.595885] EC = 0x25: DABT (current EL), IL = 32 bits [ 5301.595889] SET = 0, FnV = 0 [ 5301.595893] EA = 0, S1PTW = 0 [ 5301.595896] FSC = 0x06: level 2 translation fault [ 5301.595900] Data abort info: [ 5301.595903] ISV = 0, ISS = 0x00000006, ISS2 = 0x00000000 [ 5301.595907] CM = 0, WnR = 0, TnD = 0, TagAccess = 0 [ 5301.595911] GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 [ 5301.595915] [dfff800000000026] address between user and kernel address ranges [ 5301.595971] Internal error: Oops: 0000000096000006 [#1] SMP … [ 5301.596076] CPU: 2 PID: 102769 Comm: syz-executor.3 Kdump: loaded Tainted: G W ------- --- 6.10.0-0.rc2.20240608gitdc772f8237f9.29.fc41.aarch64+debug #1 [ 5301.596080] Hardware name: VMware, Inc. VMware20,1/VBSA, BIOS VMW201.00V.21805430.BA64.2305221830 05/22/2023 [ 5301.596082] pstate: 01400005 (nzcv daif +PAN -UAO -TCO +DIT -SSBS BTYPE=--) [ 5301.596085] pc : strnlen+0x40/0x88 [ 5301.596114] lr : trace_event_get_offsets_qdisc_reset+0x6c/0x2b0 [ 5301.596124] sp : ffff8000beef6b40 [ 5301.596126] x29: ffff8000beef6b40 x28: dfff800000000000 x27: 0000000000000001 [ 5301.596131] x26: 6de1800082c62bd0 x25: 1ffff000110aa9e0 x24: ffff800088554f00 [ 5301.596136] x23: ffff800088554ec0 x22: 0000000000000130 x21: 0000000000000140 [ 5301.596140] x20: dfff800000000000 x19: ffff8000beef6c60 x18: ffff7000115106d8 [ 5301.596143] x17: ffff800121bad000 x16: ffff800080020000 x15: 0000000000000006 [ 5301.596147] x14: 0000000000000002 x13: ffff0001f3ed8d14 x12: ffff700017ddeda5 [ 5301.596151] x11: 1ffff00017ddeda4 x10: ffff700017ddeda4 x9 : ffff800082cc5eec [ 5301.596155] x8 : 0000000000000004 x7 : 00000000f1f1f1f1 x6 : 00000000f2f2f200 [ 5301.596158] x5 : 00000000f3f3f3f3 x4 : ffff700017dded80 x3 : 00000000f204f1f1 [ 5301.596162] x2 : 0000000000000026 x1 : 0000000000000000 x0 : 0000000000000130 [ 5301.596166] Call trace: [ 5301.596175] strnlen+0x40/0x88 [ 5301.596179] trace_event_get_offsets_qdisc_reset+0x6c/0x2b0 [ 5301.596182] perf_trace_qdisc_reset+0xb0/0x538 [ 5301.596184] __traceiter_qdisc_reset+0x68/0xc0 [ 5301.596188] qdisc_reset+0x43c/0x5e8 [ 5301.596190] netif_set_real_num_tx_queues+0x288/0x770 [ 5301.596194] veth_init_queues+0xfc/0x130 [veth] [ 5301.596198] veth_newlink+0x45c/0x850 [veth] [ 5301.596202] rtnl_newlink_create+0x2c8/0x798 [ 5301.596205] __rtnl_newlink+0x92c/0xb60 [ 5301.596208] rtnl_newlink+0xd8/0x130 [ 5301.596211] rtnetlink_rcv_msg+0x2e0/0x890 [ 5301.596214] netlink_rcv_skb+0x1c4/0x380 [ 5301.596225] rtnetlink_rcv+0x20/0x38 [ 5301.596227] netlink_unicast+0x3c8/0x640 [ 5301.596231] netlink_sendmsg+0x658/0xa60 [ 5301.596234] __sock_sendmsg+0xd0/0x180 [ 5301.596243] __sys_sendto+0x1c0/0x280 [ 5301.596246] __arm64_sys_sendto+0xc8/0x150 [ 5301.596249] invoke_syscall+0xdc/0x268 [ 5301.596256] el0_svc_common.constprop.0+0x16c/0x240 [ 5301.596259] do_el0_svc+0x48/0x68 [ 5301.596261] el0_svc+0x50/0x188 [ 5301.596265] el0t_64_sync_handler+0x120/0x130 [ 5301.596268] el0t_64_sync+0x194/0x198 [ 5301.596272] Code: eb15001f 54000120 d343fc02 12000801 (38f46842) [ 5301.596285] SMP: stopping secondary CPUs [ 5301.597053] Starting crashdump kernel... [ 5301.597057] Bye! After applying our patch, I didn't find any kernel panic errors. We've found a simple reproducer # echo 1 > /sys/kernel/debug/tracing/events/qdisc/qdisc_reset/enable # ip link add veth0 type veth peer name veth1 Error: Unknown device type. However, without our patch applied, I tested upstream 6.10.0-rc3 kernel using the qdisc_reset event and the ip command on my qemu virtual machine. This 2 commands makes always kernel panic. Linux version: 6.10.0-rc3 [ 0.000000] Linux version 6.10.0-rc3-00164-g44ef20baed8e-dirty (paran@fedora) (gcc (GCC) 14.1.1 20240522 (Red Hat 14.1.1-4), GNU ld version 2.41-34.fc40) #20 SMP PREEMPT Sat Jun 15 16:51:25 KST 2024 Kernel panic message: [ 615.236484] Internal error: Oops: 0000000096000005 [#1] PREEMPT SMP [ 615.237250] Dumping ftrace buffer: [ 615.237679] (ftrace buffer empty) [ 615.238097] Modules linked in: veth crct10dif_ce virtio_gpu virtio_dma_buf drm_shmem_helper drm_kms_helper zynqmp_fpga xilinx_can xilinx_spi xilinx_selectmap xilinx_core xilinx_pr_decoupler versal_fpga uvcvideo uvc videobuf2_vmalloc videobuf2_memops videobuf2_v4l2 videodev videobuf2_common mc usbnet deflate zstd ubifs ubi rcar_canfd rcar_can omap_mailbox ntb_msi_test ntb_hw_epf lattice_sysconfig_spi lattice_sysconfig ice40_spi gpio_xilinx dwmac_altr_socfpga mdio_regmap stmmac_platform stmmac pcs_xpcs dfl_fme_region dfl_fme_mgr dfl_fme_br dfl_afu dfl fpga_region fpga_bridge can can_dev br_netfilter bridge stp llc atl1c ath11k_pci mhi ath11k_ahb ath11k qmi_helpers ath10k_sdio ath10k_pci ath10k_core ath mac80211 libarc4 cfg80211 drm fuse backlight ipv6 Jun 22 02:36:5[3 6k152.62-4sm98k4-0k]v kCePUr:n e1l :P IUDn:a b4le6 8t oC ohmma: nidpl eN oketr nteali nptaedg i6n.g1 0re.0q-urecs3t- 0at0 1v6i4r-tgu4a4le fa2d0dbraeeds0se-dir tyd f#f2f08 615.252376] Hardware name: linux,dummy-virt (DT) [ 615.253220] pstate: 80400005 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 615.254433] pc : strnlen+0x6c/0xe0 [ 615.255096] lr : trace_event_get_offsets_qdisc_reset+0x94/0x3d0 [ 615.256088] sp : ffff800080b269a0 [ 615.256615] x29: ffff800080b269a0 x28: ffffc070f3f98500 x27: 0000000000000001 [ 615.257831] x26: 0000000000000010 x25: ffffc070f3f98540 x24: ffffc070f619cf60 [ 615.259020] x23: 0000000000000128 x22: 0000000000000138 x21: dfff800000000000 [ 615.260241] x20: ffffc070f631ad00 x19: 0000000000000128 x18: ffffc070f448b800 [ 615.261454] x17: 0000000000000000 x16: 0000000000000001 x15: ffffc070f4ba2a90 [ 615.262635] x14: ffff700010164d73 x13: 1ffff80e1e8d5eb3 x12: 1ffff00010164d72 [ 615.263877] x11: ffff700010164d72 x10: dfff800000000000 x9 : ffffc070e85d6184 [ 615.265047] x8 : ffffc070e4402070 x7 : 000000000000f1f1 x6 : 000000001504a6d3 [ 615.266336] x5 : ffff28ca21122140 x4 : ffffc070f5043ea8 x3 : 0000000000000000 [ 615.267528] x2 : 0000000000000025 x1 : 0000000000000000 x0 : 0000000000000000 [ 615.268747] Call trace: [ 615.269180] strnlen+0x6c/0xe0 [ 615.269767] trace_event_get_offsets_qdisc_reset+0x94/0x3d0 [ 615.270716] trace_event_raw_event_qdisc_reset+0xe8/0x4e8 [ 615.271667] __traceiter_qdisc_reset+0xa0/0x140 [ 615.272499] qdisc_reset+0x554/0x848 [ 615.273134] netif_set_real_num_tx_queues+0x360/0x9a8 [ 615.274050] veth_init_queues+0x110/0x220 [veth] [ 615.275110] veth_newlink+0x538/0xa50 [veth] [ 615.276172] __rtnl_newlink+0x11e4/0x1bc8 [ 615.276944] rtnl_newlink+0xac/0x120 [ 615.277657] rtnetlink_rcv_msg+0x4e4/0x1370 [ 615.278409] netlink_rcv_skb+0x25c/0x4f0 [ 615.279122] rtnetlink_rcv+0x48/0x70 [ 615.279769] netlink_unicast+0x5a8/0x7b8 [ 615.280462] netlink_sendmsg+0xa70/0x1190 Yeoreum and I don't know if the patch we wrote will fix the underlying cause, but we think that priority is to prevent kernel panic happening. So, we're sending this patch. Fixes: 51270d573a8d ("tracing/net_sched: Fix tracepoints that save qdisc_dev() as a string") Link: https://lore.kernel.org/lkml/20240229143432.273b4871@gandalf.local.home/t/ Cc: netdev@vger.kernel.org Tested-by: Yunseong Kim Signed-off-by: Yunseong Kim Signed-off-by: Yeoreum Yun Link: https://lore.kernel.org/r/20240624173320.24945-4-yskelg@gmail.com Signed-off-by: Paolo Abeni --- include/trace/events/qdisc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/trace/events/qdisc.h b/include/trace/events/qdisc.h index f1b5e816e7e5..ff33f41a9db7 100644 --- a/include/trace/events/qdisc.h +++ b/include/trace/events/qdisc.h @@ -81,7 +81,7 @@ TRACE_EVENT(qdisc_reset, TP_ARGS(q), TP_STRUCT__entry( - __string( dev, qdisc_dev(q)->name ) + __string( dev, qdisc_dev(q) ? qdisc_dev(q)->name : "(null)" ) __string( kind, q->ops->id ) __field( u32, parent ) __field( u32, handle ) From 7d139181a8912d8bf0ede4f38d37688449b9af23 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 24 Jun 2024 18:36:35 -0700 Subject: [PATCH 739/778] selftest: af_unix: Remove test_unix_oob.c. test_unix_oob.c does not fully cover AF_UNIX's MSG_OOB functionality, thus there are discrepancies between TCP behaviour. Also, the test uses fork() to create message producer, and it's not easy to understand and add more test cases. Let's remove test_unix_oob.c and rewrite a new test. Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/.gitignore | 1 - tools/testing/selftests/net/af_unix/Makefile | 2 +- .../selftests/net/af_unix/test_unix_oob.c | 436 ------------------ 3 files changed, 1 insertion(+), 438 deletions(-) delete mode 100644 tools/testing/selftests/net/af_unix/test_unix_oob.c diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 49a56eb5d036..666ab7d9390b 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -43,7 +43,6 @@ tap tcp_fastopen_backup_key tcp_inq tcp_mmap -test_unix_oob timestamping tls toeplitz diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile index 3b83c797650d..a25845251eed 100644 --- a/tools/testing/selftests/net/af_unix/Makefile +++ b/tools/testing/selftests/net/af_unix/Makefile @@ -1,4 +1,4 @@ CFLAGS += $(KHDR_INCLUDES) -TEST_GEN_PROGS := diag_uid test_unix_oob unix_connect scm_pidfd scm_rights +TEST_GEN_PROGS := diag_uid scm_pidfd scm_rights unix_connect include ../../lib.mk diff --git a/tools/testing/selftests/net/af_unix/test_unix_oob.c b/tools/testing/selftests/net/af_unix/test_unix_oob.c deleted file mode 100644 index a7c51889acd5..000000000000 --- a/tools/testing/selftests/net/af_unix/test_unix_oob.c +++ /dev/null @@ -1,436 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static int pipefd[2]; -static int signal_recvd; -static pid_t producer_id; -static char sock_name[32]; - -static void sig_hand(int sn, siginfo_t *si, void *p) -{ - signal_recvd = sn; -} - -static int set_sig_handler(int signal) -{ - struct sigaction sa; - - sa.sa_sigaction = sig_hand; - sigemptyset(&sa.sa_mask); - sa.sa_flags = SA_SIGINFO | SA_RESTART; - - return sigaction(signal, &sa, NULL); -} - -static void set_filemode(int fd, int set) -{ - int flags = fcntl(fd, F_GETFL, 0); - - if (set) - flags &= ~O_NONBLOCK; - else - flags |= O_NONBLOCK; - fcntl(fd, F_SETFL, flags); -} - -static void signal_producer(int fd) -{ - char cmd; - - cmd = 'S'; - write(fd, &cmd, sizeof(cmd)); -} - -static void wait_for_signal(int fd) -{ - char buf[5]; - - read(fd, buf, 5); -} - -static void die(int status) -{ - fflush(NULL); - unlink(sock_name); - kill(producer_id, SIGTERM); - exit(status); -} - -int is_sioctatmark(int fd) -{ - int ans = -1; - - if (ioctl(fd, SIOCATMARK, &ans, sizeof(ans)) < 0) { -#ifdef DEBUG - perror("SIOCATMARK Failed"); -#endif - } - return ans; -} - -void read_oob(int fd, char *c) -{ - - *c = ' '; - if (recv(fd, c, sizeof(*c), MSG_OOB) < 0) { -#ifdef DEBUG - perror("Reading MSG_OOB Failed"); -#endif - } -} - -int read_data(int pfd, char *buf, int size) -{ - int len = 0; - - memset(buf, size, '0'); - len = read(pfd, buf, size); -#ifdef DEBUG - if (len < 0) - perror("read failed"); -#endif - return len; -} - -static void wait_for_data(int pfd, int event) -{ - struct pollfd pfds[1]; - - pfds[0].fd = pfd; - pfds[0].events = event; - poll(pfds, 1, -1); -} - -void producer(struct sockaddr_un *consumer_addr) -{ - int cfd; - char buf[64]; - int i; - - memset(buf, 'x', sizeof(buf)); - cfd = socket(AF_UNIX, SOCK_STREAM, 0); - - wait_for_signal(pipefd[0]); - if (connect(cfd, (struct sockaddr *)consumer_addr, - sizeof(*consumer_addr)) != 0) { - perror("Connect failed"); - kill(0, SIGTERM); - exit(1); - } - - for (i = 0; i < 2; i++) { - /* Test 1: Test for SIGURG and OOB */ - wait_for_signal(pipefd[0]); - memset(buf, 'x', sizeof(buf)); - buf[63] = '@'; - send(cfd, buf, sizeof(buf), MSG_OOB); - - wait_for_signal(pipefd[0]); - - /* Test 2: Test for OOB being overwitten */ - memset(buf, 'x', sizeof(buf)); - buf[63] = '%'; - send(cfd, buf, sizeof(buf), MSG_OOB); - - memset(buf, 'x', sizeof(buf)); - buf[63] = '#'; - send(cfd, buf, sizeof(buf), MSG_OOB); - - wait_for_signal(pipefd[0]); - - /* Test 3: Test for SIOCATMARK */ - memset(buf, 'x', sizeof(buf)); - buf[63] = '@'; - send(cfd, buf, sizeof(buf), MSG_OOB); - - memset(buf, 'x', sizeof(buf)); - buf[63] = '%'; - send(cfd, buf, sizeof(buf), MSG_OOB); - - memset(buf, 'x', sizeof(buf)); - send(cfd, buf, sizeof(buf), 0); - - wait_for_signal(pipefd[0]); - - /* Test 4: Test for 1byte OOB msg */ - memset(buf, 'x', sizeof(buf)); - buf[0] = '@'; - send(cfd, buf, 1, MSG_OOB); - } -} - -int -main(int argc, char **argv) -{ - int lfd, pfd; - struct sockaddr_un consumer_addr, paddr; - socklen_t len = sizeof(consumer_addr); - char buf[1024]; - int on = 0; - char oob; - int atmark; - - lfd = socket(AF_UNIX, SOCK_STREAM, 0); - memset(&consumer_addr, 0, sizeof(consumer_addr)); - consumer_addr.sun_family = AF_UNIX; - sprintf(sock_name, "unix_oob_%d", getpid()); - unlink(sock_name); - strcpy(consumer_addr.sun_path, sock_name); - - if ((bind(lfd, (struct sockaddr *)&consumer_addr, - sizeof(consumer_addr))) != 0) { - perror("socket bind failed"); - exit(1); - } - - pipe(pipefd); - - listen(lfd, 1); - - producer_id = fork(); - if (producer_id == 0) { - producer(&consumer_addr); - exit(0); - } - - set_sig_handler(SIGURG); - signal_producer(pipefd[1]); - - pfd = accept(lfd, (struct sockaddr *) &paddr, &len); - fcntl(pfd, F_SETOWN, getpid()); - - signal_recvd = 0; - signal_producer(pipefd[1]); - - /* Test 1: - * veriyf that SIGURG is - * delivered, 63 bytes are - * read, oob is '@', and POLLPRI works. - */ - wait_for_data(pfd, POLLPRI); - read_oob(pfd, &oob); - len = read_data(pfd, buf, 1024); - if (!signal_recvd || len != 63 || oob != '@') { - fprintf(stderr, "Test 1 failed sigurg %d len %d %c\n", - signal_recvd, len, oob); - die(1); - } - - signal_recvd = 0; - signal_producer(pipefd[1]); - - /* Test 2: - * Verify that the first OOB is over written by - * the 2nd one and the first OOB is returned as - * part of the read, and sigurg is received. - */ - wait_for_data(pfd, POLLIN | POLLPRI); - len = 0; - while (len < 70) - len = recv(pfd, buf, 1024, MSG_PEEK); - len = read_data(pfd, buf, 1024); - read_oob(pfd, &oob); - if (!signal_recvd || len != 127 || oob != '#') { - fprintf(stderr, "Test 2 failed, sigurg %d len %d OOB %c\n", - signal_recvd, len, oob); - die(1); - } - - signal_recvd = 0; - signal_producer(pipefd[1]); - - /* Test 3: - * verify that 2nd oob over writes - * the first one and read breaks at - * oob boundary returning 127 bytes - * and sigurg is received and atmark - * is set. - * oob is '%' and second read returns - * 64 bytes. - */ - len = 0; - wait_for_data(pfd, POLLIN | POLLPRI); - while (len < 150) - len = recv(pfd, buf, 1024, MSG_PEEK); - len = read_data(pfd, buf, 1024); - atmark = is_sioctatmark(pfd); - read_oob(pfd, &oob); - - if (!signal_recvd || len != 127 || oob != '%' || atmark != 1) { - fprintf(stderr, - "Test 3 failed, sigurg %d len %d OOB %c atmark %d\n", - signal_recvd, len, oob, atmark); - die(1); - } - - signal_recvd = 0; - - len = read_data(pfd, buf, 1024); - if (len != 64) { - fprintf(stderr, "Test 3.1 failed, sigurg %d len %d OOB %c\n", - signal_recvd, len, oob); - die(1); - } - - signal_recvd = 0; - signal_producer(pipefd[1]); - - /* Test 4: - * verify that a single byte - * oob message is delivered. - * set non blocking mode and - * check proper error is - * returned and sigurg is - * received and correct - * oob is read. - */ - - set_filemode(pfd, 0); - - wait_for_data(pfd, POLLIN | POLLPRI); - len = read_data(pfd, buf, 1024); - if ((len == -1) && (errno == 11)) - len = 0; - - read_oob(pfd, &oob); - - if (!signal_recvd || len != 0 || oob != '@') { - fprintf(stderr, "Test 4 failed, sigurg %d len %d OOB %c\n", - signal_recvd, len, oob); - die(1); - } - - set_filemode(pfd, 1); - - /* Inline Testing */ - - on = 1; - if (setsockopt(pfd, SOL_SOCKET, SO_OOBINLINE, &on, sizeof(on))) { - perror("SO_OOBINLINE"); - die(1); - } - - signal_recvd = 0; - signal_producer(pipefd[1]); - - /* Test 1 -- Inline: - * Check that SIGURG is - * delivered and 63 bytes are - * read and oob is '@' - */ - - wait_for_data(pfd, POLLIN | POLLPRI); - len = read_data(pfd, buf, 1024); - - if (!signal_recvd || len != 63) { - fprintf(stderr, "Test 1 Inline failed, sigurg %d len %d\n", - signal_recvd, len); - die(1); - } - - len = read_data(pfd, buf, 1024); - - if (len != 1) { - fprintf(stderr, - "Test 1.1 Inline failed, sigurg %d len %d oob %c\n", - signal_recvd, len, oob); - die(1); - } - - signal_recvd = 0; - signal_producer(pipefd[1]); - - /* Test 2 -- Inline: - * Verify that the first OOB is over written by - * the 2nd one and read breaks correctly on - * 2nd OOB boundary with the first OOB returned as - * part of the read, and sigurg is delivered and - * siocatmark returns true. - * next read returns one byte, the oob byte - * and siocatmark returns false. - */ - len = 0; - wait_for_data(pfd, POLLIN | POLLPRI); - while (len < 70) - len = recv(pfd, buf, 1024, MSG_PEEK); - len = read_data(pfd, buf, 1024); - atmark = is_sioctatmark(pfd); - if (len != 127 || atmark != 1 || !signal_recvd) { - fprintf(stderr, "Test 2 Inline failed, len %d atmark %d\n", - len, atmark); - die(1); - } - - len = read_data(pfd, buf, 1024); - atmark = is_sioctatmark(pfd); - if (len != 1 || buf[0] != '#' || atmark == 1) { - fprintf(stderr, "Test 2.1 Inline failed, len %d data %c atmark %d\n", - len, buf[0], atmark); - die(1); - } - - signal_recvd = 0; - signal_producer(pipefd[1]); - - /* Test 3 -- Inline: - * verify that 2nd oob over writes - * the first one and read breaks at - * oob boundary returning 127 bytes - * and sigurg is received and siocatmark - * is true after the read. - * subsequent read returns 65 bytes - * because of oob which should be '%'. - */ - len = 0; - wait_for_data(pfd, POLLIN | POLLPRI); - while (len < 126) - len = recv(pfd, buf, 1024, MSG_PEEK); - len = read_data(pfd, buf, 1024); - atmark = is_sioctatmark(pfd); - if (!signal_recvd || len != 127 || !atmark) { - fprintf(stderr, - "Test 3 Inline failed, sigurg %d len %d data %c\n", - signal_recvd, len, buf[0]); - die(1); - } - - len = read_data(pfd, buf, 1024); - atmark = is_sioctatmark(pfd); - if (len != 65 || buf[0] != '%' || atmark != 0) { - fprintf(stderr, - "Test 3.1 Inline failed, len %d oob %c atmark %d\n", - len, buf[0], atmark); - die(1); - } - - signal_recvd = 0; - signal_producer(pipefd[1]); - - /* Test 4 -- Inline: - * verify that a single - * byte oob message is delivered - * and read returns one byte, the oob - * byte and sigurg is received - */ - wait_for_data(pfd, POLLIN | POLLPRI); - len = read_data(pfd, buf, 1024); - if (!signal_recvd || len != 1 || buf[0] != '@') { - fprintf(stderr, - "Test 4 Inline failed, signal %d len %d data %c\n", - signal_recvd, len, buf[0]); - die(1); - } - die(0); -} From d098d77232c375cb2cded4a7099f0a763016ee0d Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 24 Jun 2024 18:36:36 -0700 Subject: [PATCH 740/778] selftest: af_unix: Add msg_oob.c. AF_UNIX's MSG_OOB functionality lacked thorough testing, and we found some bizarre behaviour. The new selftest validates every MSG_OOB operation against TCP as a reference implementation. This patch adds only a few tests with basic send() and recv() that do not fail. The following patches will add more test cases for SO_OOBINLINE, SIGURG, EPOLLPRI, and SIOCATMARK. Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/af_unix/Makefile | 2 +- tools/testing/selftests/net/af_unix/msg_oob.c | 220 ++++++++++++++++++ 2 files changed, 221 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/net/af_unix/msg_oob.c diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile index a25845251eed..50584479540b 100644 --- a/tools/testing/selftests/net/af_unix/Makefile +++ b/tools/testing/selftests/net/af_unix/Makefile @@ -1,4 +1,4 @@ CFLAGS += $(KHDR_INCLUDES) -TEST_GEN_PROGS := diag_uid scm_pidfd scm_rights unix_connect +TEST_GEN_PROGS := diag_uid msg_oob scm_pidfd scm_rights unix_connect include ../../lib.mk diff --git a/tools/testing/selftests/net/af_unix/msg_oob.c b/tools/testing/selftests/net/af_unix/msg_oob.c new file mode 100644 index 000000000000..d427d39d0806 --- /dev/null +++ b/tools/testing/selftests/net/af_unix/msg_oob.c @@ -0,0 +1,220 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright Amazon.com Inc. or its affiliates. */ + +#include +#include +#include + +#include +#include + +#include "../../kselftest_harness.h" + +#define BUF_SZ 32 + +FIXTURE(msg_oob) +{ + int fd[4]; /* 0: AF_UNIX sender + * 1: AF_UNIX receiver + * 2: TCP sender + * 3: TCP receiver + */ +}; + +static void create_unix_socketpair(struct __test_metadata *_metadata, + FIXTURE_DATA(msg_oob) *self) +{ + int ret; + + ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, 0, self->fd); + ASSERT_EQ(ret, 0); +} + +static void create_tcp_socketpair(struct __test_metadata *_metadata, + FIXTURE_DATA(msg_oob) *self) +{ + struct sockaddr_in addr; + socklen_t addrlen; + int listen_fd; + int ret; + + listen_fd = socket(AF_INET, SOCK_STREAM, 0); + ASSERT_GE(listen_fd, 0); + + ret = listen(listen_fd, -1); + ASSERT_EQ(ret, 0); + + addrlen = sizeof(addr); + ret = getsockname(listen_fd, (struct sockaddr *)&addr, &addrlen); + ASSERT_EQ(ret, 0); + + self->fd[2] = socket(AF_INET, SOCK_STREAM, 0); + ASSERT_GE(self->fd[2], 0); + + ret = connect(self->fd[2], (struct sockaddr *)&addr, addrlen); + ASSERT_EQ(ret, 0); + + self->fd[3] = accept(listen_fd, (struct sockaddr *)&addr, &addrlen); + ASSERT_GE(self->fd[3], 0); + + ret = fcntl(self->fd[3], F_SETFL, O_NONBLOCK); + ASSERT_EQ(ret, 0); +} + +static void close_sockets(FIXTURE_DATA(msg_oob) *self) +{ + int i; + + for (i = 0; i < 4; i++) + close(self->fd[i]); +} + +FIXTURE_SETUP(msg_oob) +{ + create_unix_socketpair(_metadata, self); + create_tcp_socketpair(_metadata, self); +} + +FIXTURE_TEARDOWN(msg_oob) +{ + close_sockets(self); +} + +static void __sendpair(struct __test_metadata *_metadata, + FIXTURE_DATA(msg_oob) *self, + const void *buf, size_t len, int flags) +{ + int i, ret[2]; + + for (i = 0; i < 2; i++) + ret[i] = send(self->fd[i * 2], buf, len, flags); + + ASSERT_EQ(ret[0], len); + ASSERT_EQ(ret[0], ret[1]); +} + +static void __recvpair(struct __test_metadata *_metadata, + FIXTURE_DATA(msg_oob) *self, + const void *expected_buf, int expected_len, + int buf_len, int flags) +{ + int i, ret[2], recv_errno[2], expected_errno = 0; + char recv_buf[2][BUF_SZ] = {}; + + ASSERT_GE(BUF_SZ, buf_len); + + errno = 0; + + for (i = 0; i < 2; i++) { + ret[i] = recv(self->fd[i * 2 + 1], recv_buf[i], buf_len, flags); + recv_errno[i] = errno; + } + + if (expected_len < 0) { + expected_errno = -expected_len; + expected_len = -1; + } + + if (ret[0] != expected_len || recv_errno[0] != expected_errno) { + TH_LOG("AF_UNIX :%s", ret[0] < 0 ? strerror(recv_errno[0]) : recv_buf[0]); + TH_LOG("Expected:%s", expected_errno ? strerror(expected_errno) : expected_buf); + + ASSERT_EQ(ret[0], expected_len); + ASSERT_EQ(recv_errno[0], expected_errno); + } + + if (ret[0] != ret[1] || recv_errno[0] != recv_errno[1]) { + TH_LOG("AF_UNIX :%s", ret[0] < 0 ? strerror(recv_errno[0]) : recv_buf[0]); + TH_LOG("TCP :%s", ret[1] < 0 ? strerror(recv_errno[1]) : recv_buf[1]); + + ASSERT_EQ(ret[0], ret[1]); + ASSERT_EQ(recv_errno[0], recv_errno[1]); + } + + if (expected_len >= 0) { + int cmp; + + cmp = strncmp(expected_buf, recv_buf[0], expected_len); + if (cmp) { + TH_LOG("AF_UNIX :%s", ret[0] < 0 ? strerror(recv_errno[0]) : recv_buf[0]); + TH_LOG("Expected:%s", expected_errno ? strerror(expected_errno) : expected_buf); + + ASSERT_EQ(cmp, 0); + } + + cmp = strncmp(recv_buf[0], recv_buf[1], expected_len); + if (cmp) { + TH_LOG("AF_UNIX :%s", ret[0] < 0 ? strerror(recv_errno[0]) : recv_buf[0]); + TH_LOG("TCP :%s", ret[1] < 0 ? strerror(recv_errno[1]) : recv_buf[1]); + + ASSERT_EQ(cmp, 0); + } + } +} + +#define sendpair(buf, len, flags) \ + __sendpair(_metadata, self, buf, len, flags) + +#define recvpair(expected_buf, expected_len, buf_len, flags) \ + __recvpair(_metadata, self, \ + expected_buf, expected_len, buf_len, flags) + +TEST_F(msg_oob, non_oob) +{ + sendpair("x", 1, 0); + + recvpair("", -EINVAL, 1, MSG_OOB); +} + +TEST_F(msg_oob, oob) +{ + sendpair("x", 1, MSG_OOB); + + recvpair("x", 1, 1, MSG_OOB); +} + +TEST_F(msg_oob, oob_drop) +{ + sendpair("x", 1, MSG_OOB); + + recvpair("", -EAGAIN, 1, 0); /* Drop OOB. */ + recvpair("", -EINVAL, 1, MSG_OOB); +} + +TEST_F(msg_oob, oob_ahead) +{ + sendpair("hello", 5, MSG_OOB); + + recvpair("o", 1, 1, MSG_OOB); + recvpair("hell", 4, 4, 0); +} + +TEST_F(msg_oob, oob_break) +{ + sendpair("hello", 5, MSG_OOB); + + recvpair("hell", 4, 5, 0); /* Break at OOB even with enough buffer. */ + recvpair("o", 1, 1, MSG_OOB); +} + +TEST_F(msg_oob, oob_ahead_break) +{ + sendpair("hello", 5, MSG_OOB); + sendpair("world", 5, 0); + + recvpair("o", 1, 1, MSG_OOB); + recvpair("hell", 4, 9, 0); /* Break at OOB even after it's recv()ed. */ + recvpair("world", 5, 5, 0); +} + +TEST_F(msg_oob, oob_break_drop) +{ + sendpair("hello", 5, MSG_OOB); + sendpair("world", 5, 0); + + recvpair("hell", 4, 10, 0); /* Break at OOB even with enough buffer. */ + recvpair("world", 5, 10, 0); /* Drop OOB and recv() the next skb. */ + recvpair("", -EINVAL, 1, MSG_OOB); +} + +TEST_HARNESS_MAIN From b94038d841a91d0e3f59cfe4d073e210910366ee Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 24 Jun 2024 18:36:37 -0700 Subject: [PATCH 741/778] af_unix: Stop recv(MSG_PEEK) at consumed OOB skb. After consuming OOB data, recv() reading the preceding data must break at the OOB skb regardless of MSG_PEEK. Currently, MSG_PEEK does not stop recv() for AF_UNIX, and the behaviour is not compliant with TCP. >>> from socket import * >>> c1, c2 = socketpair(AF_UNIX) >>> c1.send(b'hello', MSG_OOB) 5 >>> c1.send(b'world') 5 >>> c2.recv(1, MSG_OOB) b'o' >>> c2.recv(9, MSG_PEEK) # This should return b'hell' b'hellworld' # even with enough buffer. Let's fix it by returning NULL for consumed skb and unlinking it only if MSG_PEEK is not specified. This patch also adds test cases that add recv(MSG_PEEK) before each recv(). Without fix: # RUN msg_oob.peek.oob_ahead_break ... # msg_oob.c:134:oob_ahead_break:AF_UNIX :hellworld # msg_oob.c:135:oob_ahead_break:Expected:hell # msg_oob.c:137:oob_ahead_break:Expected ret[0] (9) == expected_len (4) # oob_ahead_break: Test terminated by assertion # FAIL msg_oob.peek.oob_ahead_break not ok 13 msg_oob.peek.oob_ahead_break With fix: # RUN msg_oob.peek.oob_ahead_break ... # OK msg_oob.peek.oob_ahead_break ok 13 msg_oob.peek.oob_ahead_break Fixes: 314001f0bf92 ("af_unix: Add OOB support") Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- net/unix/af_unix.c | 9 ++++--- tools/testing/selftests/net/af_unix/msg_oob.c | 25 +++++++++++++++++-- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 5e695a9a609c..2eaecf9d78a4 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2613,9 +2613,12 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk, { struct unix_sock *u = unix_sk(sk); - if (!unix_skb_len(skb) && !(flags & MSG_PEEK)) { - skb_unlink(skb, &sk->sk_receive_queue); - consume_skb(skb); + if (!unix_skb_len(skb)) { + if (!(flags & MSG_PEEK)) { + skb_unlink(skb, &sk->sk_receive_queue); + consume_skb(skb); + } + skb = NULL; } else { struct sk_buff *unlinked_skb = NULL; diff --git a/tools/testing/selftests/net/af_unix/msg_oob.c b/tools/testing/selftests/net/af_unix/msg_oob.c index d427d39d0806..de8d1fcde883 100644 --- a/tools/testing/selftests/net/af_unix/msg_oob.c +++ b/tools/testing/selftests/net/af_unix/msg_oob.c @@ -21,6 +21,21 @@ FIXTURE(msg_oob) */ }; +FIXTURE_VARIANT(msg_oob) +{ + bool peek; +}; + +FIXTURE_VARIANT_ADD(msg_oob, no_peek) +{ + .peek = false, +}; + +FIXTURE_VARIANT_ADD(msg_oob, peek) +{ + .peek = true +}; + static void create_unix_socketpair(struct __test_metadata *_metadata, FIXTURE_DATA(msg_oob) *self) { @@ -156,8 +171,14 @@ static void __recvpair(struct __test_metadata *_metadata, __sendpair(_metadata, self, buf, len, flags) #define recvpair(expected_buf, expected_len, buf_len, flags) \ - __recvpair(_metadata, self, \ - expected_buf, expected_len, buf_len, flags) + do { \ + if (variant->peek) \ + __recvpair(_metadata, self, \ + expected_buf, expected_len, \ + buf_len, (flags) | MSG_PEEK); \ + __recvpair(_metadata, self, \ + expected_buf, expected_len, buf_len, flags); \ + } while (0) TEST_F(msg_oob, non_oob) { From 93c99f21db360957d49853e5666b5c147f593bda Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 24 Jun 2024 18:36:38 -0700 Subject: [PATCH 742/778] af_unix: Don't stop recv(MSG_DONTWAIT) if consumed OOB skb is at the head. Let's say a socket send()s "hello" with MSG_OOB and "world" without flags, >>> from socket import * >>> c1, c2 = socketpair(AF_UNIX) >>> c1.send(b'hello', MSG_OOB) 5 >>> c1.send(b'world') 5 and its peer recv()s "hell" and "o". >>> c2.recv(10) b'hell' >>> c2.recv(1, MSG_OOB) b'o' Now the consumed OOB skb stays at the head of recvq to return a correct value for ioctl(SIOCATMARK), which is broken now and fixed by a later patch. Then, if peer issues recv() with MSG_DONTWAIT, manage_oob() returns NULL, so recv() ends up with -EAGAIN. >>> c2.setblocking(False) # This causes -EAGAIN even with available data >>> c2.recv(5) Traceback (most recent call last): File "", line 1, in BlockingIOError: [Errno 11] Resource temporarily unavailable However, next recv() will return the following available data, "world". >>> c2.recv(5) b'world' When the consumed OOB skb is at the head of the queue, we need to fetch the next skb to fix the weird behaviour. Note that the issue does not happen without MSG_DONTWAIT because we can retry after manage_oob(). This patch also adds a test case that covers the issue. Without fix: # RUN msg_oob.no_peek.ex_oob_break ... # msg_oob.c:134:ex_oob_break:AF_UNIX :Resource temporarily unavailable # msg_oob.c:135:ex_oob_break:Expected:ld # msg_oob.c:137:ex_oob_break:Expected ret[0] (-1) == expected_len (2) # ex_oob_break: Test terminated by assertion # FAIL msg_oob.no_peek.ex_oob_break not ok 8 msg_oob.no_peek.ex_oob_break With fix: # RUN msg_oob.no_peek.ex_oob_break ... # OK msg_oob.no_peek.ex_oob_break ok 8 msg_oob.no_peek.ex_oob_break Fixes: 314001f0bf92 ("af_unix: Add OOB support") Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- net/unix/af_unix.c | 19 +++++++++++++++---- tools/testing/selftests/net/af_unix/msg_oob.c | 11 +++++++++++ 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 2eaecf9d78a4..b0b97f8d0d09 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2614,12 +2614,23 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk, struct unix_sock *u = unix_sk(sk); if (!unix_skb_len(skb)) { - if (!(flags & MSG_PEEK)) { - skb_unlink(skb, &sk->sk_receive_queue); - consume_skb(skb); + struct sk_buff *unlinked_skb = NULL; + + spin_lock(&sk->sk_receive_queue.lock); + + if (copied) { + skb = NULL; + } else if (flags & MSG_PEEK) { + skb = skb_peek_next(skb, &sk->sk_receive_queue); + } else { + unlinked_skb = skb; + skb = skb_peek_next(skb, &sk->sk_receive_queue); + __skb_unlink(unlinked_skb, &sk->sk_receive_queue); } - skb = NULL; + spin_unlock(&sk->sk_receive_queue.lock); + + consume_skb(unlinked_skb); } else { struct sk_buff *unlinked_skb = NULL; diff --git a/tools/testing/selftests/net/af_unix/msg_oob.c b/tools/testing/selftests/net/af_unix/msg_oob.c index de8d1fcde883..b5226ccec3ec 100644 --- a/tools/testing/selftests/net/af_unix/msg_oob.c +++ b/tools/testing/selftests/net/af_unix/msg_oob.c @@ -238,4 +238,15 @@ TEST_F(msg_oob, oob_break_drop) recvpair("", -EINVAL, 1, MSG_OOB); } +TEST_F(msg_oob, ex_oob_break) +{ + sendpair("hello", 5, MSG_OOB); + sendpair("wor", 3, MSG_OOB); + sendpair("ld", 2, 0); + + recvpair("hellowo", 7, 10, 0); /* Break at OOB but not at ex-OOB. */ + recvpair("r", 1, 1, MSG_OOB); + recvpair("ld", 2, 2, 0); +} + TEST_HARNESS_MAIN From f5ea0768a2554152cac0a6202fcefb597b77486d Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 24 Jun 2024 18:36:39 -0700 Subject: [PATCH 743/778] selftest: af_unix: Add non-TCP-compliant test cases in msg_oob.c. While testing, I found some weird behaviour on the TCP side as well. For example, TCP drops the preceding OOB data when queueing a new OOB data if the old OOB data is at the head of recvq. # RUN msg_oob.no_peek.ex_oob_drop ... # msg_oob.c:146:ex_oob_drop:AF_UNIX :x # msg_oob.c:147:ex_oob_drop:TCP :Resource temporarily unavailable # msg_oob.c:146:ex_oob_drop:AF_UNIX :y # msg_oob.c:147:ex_oob_drop:TCP :Invalid argument # OK msg_oob.no_peek.ex_oob_drop ok 9 msg_oob.no_peek.ex_oob_drop # RUN msg_oob.no_peek.ex_oob_drop_2 ... # msg_oob.c:146:ex_oob_drop_2:AF_UNIX :x # msg_oob.c:147:ex_oob_drop_2:TCP :Resource temporarily unavailable # OK msg_oob.no_peek.ex_oob_drop_2 ok 10 msg_oob.no_peek.ex_oob_drop_2 This patch allows AF_UNIX's MSG_OOB implementation to produce different results from TCP when operations are guarded with tcp_incompliant{}. Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/af_unix/msg_oob.c | 49 +++++++++++++++++-- 1 file changed, 44 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/net/af_unix/msg_oob.c b/tools/testing/selftests/net/af_unix/msg_oob.c index b5226ccec3ec..46e92d06b0a3 100644 --- a/tools/testing/selftests/net/af_unix/msg_oob.c +++ b/tools/testing/selftests/net/af_unix/msg_oob.c @@ -19,6 +19,7 @@ FIXTURE(msg_oob) * 2: TCP sender * 3: TCP receiver */ + bool tcp_compliant; }; FIXTURE_VARIANT(msg_oob) @@ -88,6 +89,8 @@ FIXTURE_SETUP(msg_oob) { create_unix_socketpair(_metadata, self); create_tcp_socketpair(_metadata, self); + + self->tcp_compliant = true; } FIXTURE_TEARDOWN(msg_oob) @@ -115,6 +118,7 @@ static void __recvpair(struct __test_metadata *_metadata, { int i, ret[2], recv_errno[2], expected_errno = 0; char recv_buf[2][BUF_SZ] = {}; + bool printed = false; ASSERT_GE(BUF_SZ, buf_len); @@ -142,8 +146,12 @@ static void __recvpair(struct __test_metadata *_metadata, TH_LOG("AF_UNIX :%s", ret[0] < 0 ? strerror(recv_errno[0]) : recv_buf[0]); TH_LOG("TCP :%s", ret[1] < 0 ? strerror(recv_errno[1]) : recv_buf[1]); - ASSERT_EQ(ret[0], ret[1]); - ASSERT_EQ(recv_errno[0], recv_errno[1]); + printed = true; + + if (self->tcp_compliant) { + ASSERT_EQ(ret[0], ret[1]); + ASSERT_EQ(recv_errno[0], recv_errno[1]); + } } if (expected_len >= 0) { @@ -159,10 +167,13 @@ static void __recvpair(struct __test_metadata *_metadata, cmp = strncmp(recv_buf[0], recv_buf[1], expected_len); if (cmp) { - TH_LOG("AF_UNIX :%s", ret[0] < 0 ? strerror(recv_errno[0]) : recv_buf[0]); - TH_LOG("TCP :%s", ret[1] < 0 ? strerror(recv_errno[1]) : recv_buf[1]); + if (!printed) { + TH_LOG("AF_UNIX :%s", ret[0] < 0 ? strerror(recv_errno[0]) : recv_buf[0]); + TH_LOG("TCP :%s", ret[1] < 0 ? strerror(recv_errno[1]) : recv_buf[1]); + } - ASSERT_EQ(cmp, 0); + if (self->tcp_compliant) + ASSERT_EQ(cmp, 0); } } } @@ -180,6 +191,11 @@ static void __recvpair(struct __test_metadata *_metadata, expected_buf, expected_len, buf_len, flags); \ } while (0) +#define tcp_incompliant \ + for (self->tcp_compliant = false; \ + self->tcp_compliant == false; \ + self->tcp_compliant = true) + TEST_F(msg_oob, non_oob) { sendpair("x", 1, 0); @@ -249,4 +265,27 @@ TEST_F(msg_oob, ex_oob_break) recvpair("ld", 2, 2, 0); } +TEST_F(msg_oob, ex_oob_drop) +{ + sendpair("x", 1, MSG_OOB); + sendpair("y", 1, MSG_OOB); /* TCP drops "x" at this moment. */ + + tcp_incompliant { + recvpair("x", 1, 1, 0); /* TCP drops "y" by passing through it. */ + recvpair("y", 1, 1, MSG_OOB); /* TCP returns -EINVAL. */ + } +} + +TEST_F(msg_oob, ex_oob_drop_2) +{ + sendpair("x", 1, MSG_OOB); + sendpair("y", 1, MSG_OOB); /* TCP drops "x" at this moment. */ + + recvpair("y", 1, 1, MSG_OOB); + + tcp_incompliant { + recvpair("x", 1, 1, 0); /* TCP returns -EAGAIN. */ + } +} + TEST_HARNESS_MAIN From 36893ef0b661671ee64eb37bf5f345f33d2cabb7 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 24 Jun 2024 18:36:40 -0700 Subject: [PATCH 744/778] af_unix: Don't stop recv() at consumed ex-OOB skb. Currently, recv() is stopped at a consumed OOB skb even if a new OOB skb is queued and we can ignore the old OOB skb. >>> from socket import * >>> c1, c2 = socket(AF_UNIX, SOCK_STREAM) >>> c1.send(b'hellowor', MSG_OOB) 8 >>> c2.recv(1, MSG_OOB) # consume OOB data stays at middle of recvq. b'r' >>> c1.send(b'ld', MSG_OOB) 2 >>> c2.recv(10) # recv() stops at the old consumed OOB b'hellowo' # should be 'hellowol' manage_oob() should not stop recv() at the old consumed OOB skb if there is a new OOB data queued. Note that TCP behaviour is apparently wrong in this test case because we can recv() the same OOB data twice. Without fix: # RUN msg_oob.no_peek.ex_oob_ahead_break ... # msg_oob.c:138:ex_oob_ahead_break:AF_UNIX :hellowo # msg_oob.c:139:ex_oob_ahead_break:Expected:hellowol # msg_oob.c:141:ex_oob_ahead_break:Expected ret[0] (7) == expected_len (8) # ex_oob_ahead_break: Test terminated by assertion # FAIL msg_oob.no_peek.ex_oob_ahead_break not ok 11 msg_oob.no_peek.ex_oob_ahead_break With fix: # RUN msg_oob.no_peek.ex_oob_ahead_break ... # msg_oob.c:146:ex_oob_ahead_break:AF_UNIX :hellowol # msg_oob.c:147:ex_oob_ahead_break:TCP :helloworl # OK msg_oob.no_peek.ex_oob_ahead_break ok 11 msg_oob.no_peek.ex_oob_ahead_break Fixes: 314001f0bf92 ("af_unix: Add OOB support") Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- net/unix/af_unix.c | 2 +- tools/testing/selftests/net/af_unix/msg_oob.c | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index b0b97f8d0d09..07f5eaa04b5b 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2618,7 +2618,7 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk, spin_lock(&sk->sk_receive_queue.lock); - if (copied) { + if (copied && (!u->oob_skb || skb == u->oob_skb)) { skb = NULL; } else if (flags & MSG_PEEK) { skb = skb_peek_next(skb, &sk->sk_receive_queue); diff --git a/tools/testing/selftests/net/af_unix/msg_oob.c b/tools/testing/selftests/net/af_unix/msg_oob.c index 46e92d06b0a3..acf4bd0afe17 100644 --- a/tools/testing/selftests/net/af_unix/msg_oob.c +++ b/tools/testing/selftests/net/af_unix/msg_oob.c @@ -288,4 +288,20 @@ TEST_F(msg_oob, ex_oob_drop_2) } } +TEST_F(msg_oob, ex_oob_ahead_break) +{ + sendpair("hello", 5, MSG_OOB); + sendpair("wor", 3, MSG_OOB); + + recvpair("r", 1, 1, MSG_OOB); + + sendpair("ld", 2, MSG_OOB); + + tcp_incompliant { + recvpair("hellowol", 8, 10, 0); /* TCP recv()s "helloworl", why "r" ?? */ + } + + recvpair("d", 1, 1, MSG_OOB); +} + TEST_HARNESS_MAIN From 436352e8e57e219aae83d28568d9bd9857311e5a Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 24 Jun 2024 18:36:41 -0700 Subject: [PATCH 745/778] selftest: af_unix: Add SO_OOBINLINE test cases in msg_oob.c When SO_OOBINLINE is enabled on a socket, MSG_OOB can be recv()ed without MSG_OOB flag, and ioctl(SIOCATMARK) will behaves differently. This patch adds some test cases for SO_OOBINLINE. Note the new test cases found two bugs in TCP. 1) After reading OOB data with non-inline mode, we can re-read the data by setting SO_OOBINLINE. # RUN msg_oob.no_peek.inline_oob_ahead_break ... # msg_oob.c:146:inline_oob_ahead_break:AF_UNIX :world # msg_oob.c:147:inline_oob_ahead_break:TCP :oworld # OK msg_oob.no_peek.inline_oob_ahead_break ok 14 msg_oob.no_peek.inline_oob_ahead_break 2) The head OOB data is dropped if SO_OOBINLINE is disabled if a new OOB data is queued. # RUN msg_oob.no_peek.inline_ex_oob_drop ... # msg_oob.c:171:inline_ex_oob_drop:AF_UNIX :x # msg_oob.c:172:inline_ex_oob_drop:TCP :y # msg_oob.c:146:inline_ex_oob_drop:AF_UNIX :y # msg_oob.c:147:inline_ex_oob_drop:TCP :Resource temporarily unavailable # OK msg_oob.no_peek.inline_ex_oob_drop ok 17 msg_oob.no_peek.inline_ex_oob_drop Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/af_unix/msg_oob.c | 91 +++++++++++++++++++ 1 file changed, 91 insertions(+) diff --git a/tools/testing/selftests/net/af_unix/msg_oob.c b/tools/testing/selftests/net/af_unix/msg_oob.c index acf4bd0afe17..62361b5e98c3 100644 --- a/tools/testing/selftests/net/af_unix/msg_oob.c +++ b/tools/testing/selftests/net/af_unix/msg_oob.c @@ -178,6 +178,20 @@ static void __recvpair(struct __test_metadata *_metadata, } } +static void __setinlinepair(struct __test_metadata *_metadata, + FIXTURE_DATA(msg_oob) *self) +{ + int i, oob_inline = 1; + + for (i = 0; i < 2; i++) { + int ret; + + ret = setsockopt(self->fd[i * 2 + 1], SOL_SOCKET, SO_OOBINLINE, + &oob_inline, sizeof(oob_inline)); + ASSERT_EQ(ret, 0); + } +} + #define sendpair(buf, len, flags) \ __sendpair(_metadata, self, buf, len, flags) @@ -191,6 +205,9 @@ static void __recvpair(struct __test_metadata *_metadata, expected_buf, expected_len, buf_len, flags); \ } while (0) +#define setinlinepair() \ + __setinlinepair(_metadata, self) + #define tcp_incompliant \ for (self->tcp_compliant = false; \ self->tcp_compliant == false; \ @@ -304,4 +321,78 @@ TEST_F(msg_oob, ex_oob_ahead_break) recvpair("d", 1, 1, MSG_OOB); } +TEST_F(msg_oob, inline_oob) +{ + setinlinepair(); + + sendpair("x", 1, MSG_OOB); + + recvpair("", -EINVAL, 1, MSG_OOB); + recvpair("x", 1, 1, 0); +} + +TEST_F(msg_oob, inline_oob_break) +{ + setinlinepair(); + + sendpair("hello", 5, MSG_OOB); + + recvpair("", -EINVAL, 1, MSG_OOB); + recvpair("hell", 4, 5, 0); /* Break at OOB but not at ex-OOB. */ + recvpair("o", 1, 1, 0); +} + +TEST_F(msg_oob, inline_oob_ahead_break) +{ + sendpair("hello", 5, MSG_OOB); + sendpair("world", 5, 0); + + recvpair("o", 1, 1, MSG_OOB); + + setinlinepair(); + + recvpair("hell", 4, 9, 0); /* Break at OOB even with enough buffer. */ + + tcp_incompliant { + recvpair("world", 5, 6, 0); /* TCP recv()s "oworld", ... "o" ??? */ + } +} + +TEST_F(msg_oob, inline_ex_oob_break) +{ + sendpair("hello", 5, MSG_OOB); + sendpair("wor", 3, MSG_OOB); + sendpair("ld", 2, 0); + + setinlinepair(); + + recvpair("hellowo", 7, 10, 0); /* Break at OOB but not at ex-OOB. */ + recvpair("rld", 3, 3, 0); +} + +TEST_F(msg_oob, inline_ex_oob_no_drop) +{ + sendpair("x", 1, MSG_OOB); + + setinlinepair(); + + sendpair("y", 1, MSG_OOB); /* TCP does NOT drops "x" at this moment. */ + + recvpair("x", 1, 1, 0); + recvpair("y", 1, 1, 0); +} + +TEST_F(msg_oob, inline_ex_oob_drop) +{ + sendpair("x", 1, MSG_OOB); + sendpair("y", 1, MSG_OOB); /* TCP drops "x" at this moment. */ + + setinlinepair(); + + tcp_incompliant { + recvpair("x", 1, 1, 0); /* TCP recv()s "y". */ + recvpair("y", 1, 1, 0); /* TCP returns -EAGAIN. */ + } +} + TEST_HARNESS_MAIN From d02689e6860df0f7eff066f268bfb53ef6993ea7 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 24 Jun 2024 18:36:42 -0700 Subject: [PATCH 746/778] selftest: af_unix: Check SIGURG after every send() in msg_oob.c When data is sent with MSG_OOB, SIGURG is sent to a process if the receiver socket has set its owner to the process by ioctl(FIOSETOWN) or fcntl(F_SETOWN). This patch adds SIGURG check after every send(MSG_OOB) call. Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/af_unix/msg_oob.c | 51 ++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/af_unix/msg_oob.c b/tools/testing/selftests/net/af_unix/msg_oob.c index 62361b5e98c3..123dee0b6739 100644 --- a/tools/testing/selftests/net/af_unix/msg_oob.c +++ b/tools/testing/selftests/net/af_unix/msg_oob.c @@ -6,6 +6,8 @@ #include #include +#include +#include #include #include "../../kselftest_harness.h" @@ -19,6 +21,7 @@ FIXTURE(msg_oob) * 2: TCP sender * 3: TCP receiver */ + int signal_fd; bool tcp_compliant; }; @@ -77,6 +80,35 @@ static void create_tcp_socketpair(struct __test_metadata *_metadata, ASSERT_EQ(ret, 0); } +static void setup_sigurg(struct __test_metadata *_metadata, + FIXTURE_DATA(msg_oob) *self) +{ + struct signalfd_siginfo siginfo; + int pid = getpid(); + sigset_t mask; + int i, ret; + + for (i = 0; i < 2; i++) { + ret = ioctl(self->fd[i * 2 + 1], FIOSETOWN, &pid); + ASSERT_EQ(ret, 0); + } + + ret = sigemptyset(&mask); + ASSERT_EQ(ret, 0); + + ret = sigaddset(&mask, SIGURG); + ASSERT_EQ(ret, 0); + + ret = sigprocmask(SIG_BLOCK, &mask, NULL); + ASSERT_EQ(ret, 0); + + self->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK); + ASSERT_GE(self->signal_fd, 0); + + ret = read(self->signal_fd, &siginfo, sizeof(siginfo)); + ASSERT_EQ(ret, -1); +} + static void close_sockets(FIXTURE_DATA(msg_oob) *self) { int i; @@ -90,6 +122,8 @@ FIXTURE_SETUP(msg_oob) create_unix_socketpair(_metadata, self); create_tcp_socketpair(_metadata, self); + setup_sigurg(_metadata, self); + self->tcp_compliant = true; } @@ -104,9 +138,24 @@ static void __sendpair(struct __test_metadata *_metadata, { int i, ret[2]; - for (i = 0; i < 2; i++) + for (i = 0; i < 2; i++) { + struct signalfd_siginfo siginfo = {}; + int bytes; + ret[i] = send(self->fd[i * 2], buf, len, flags); + bytes = read(self->signal_fd, &siginfo, sizeof(siginfo)); + + if (flags & MSG_OOB) { + ASSERT_EQ(bytes, sizeof(siginfo)); + ASSERT_EQ(siginfo.ssi_signo, SIGURG); + + bytes = read(self->signal_fd, &siginfo, sizeof(siginfo)); + } + + ASSERT_EQ(bytes, -1); + } + ASSERT_EQ(ret[0], len); ASSERT_EQ(ret[0], ret[1]); } From 48a998373090f33e558a20a976c6028e11e93184 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 24 Jun 2024 18:36:43 -0700 Subject: [PATCH 747/778] selftest: af_unix: Check EPOLLPRI after every send()/recv() in msg_oob.c When OOB data is in recvq, we can detect it with epoll by checking EPOLLPRI. This patch add checks for EPOLLPRI after every send() and recv() in all test cases. Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/af_unix/msg_oob.c | 147 ++++++++++++++++++ 1 file changed, 147 insertions(+) diff --git a/tools/testing/selftests/net/af_unix/msg_oob.c b/tools/testing/selftests/net/af_unix/msg_oob.c index 123dee0b6739..28b09b36a2f1 100644 --- a/tools/testing/selftests/net/af_unix/msg_oob.c +++ b/tools/testing/selftests/net/af_unix/msg_oob.c @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -22,6 +23,9 @@ FIXTURE(msg_oob) * 3: TCP receiver */ int signal_fd; + int epoll_fd[2]; /* 0: AF_UNIX receiver + * 1: TCP receiver + */ bool tcp_compliant; }; @@ -109,6 +113,25 @@ static void setup_sigurg(struct __test_metadata *_metadata, ASSERT_EQ(ret, -1); } +static void setup_epollpri(struct __test_metadata *_metadata, + FIXTURE_DATA(msg_oob) *self) +{ + struct epoll_event event = { + .events = EPOLLPRI, + }; + int i; + + for (i = 0; i < 2; i++) { + int ret; + + self->epoll_fd[i] = epoll_create1(0); + ASSERT_GE(self->epoll_fd[i], 0); + + ret = epoll_ctl(self->epoll_fd[i], EPOLL_CTL_ADD, self->fd[i * 2 + 1], &event); + ASSERT_EQ(ret, 0); + } +} + static void close_sockets(FIXTURE_DATA(msg_oob) *self) { int i; @@ -123,6 +146,7 @@ FIXTURE_SETUP(msg_oob) create_tcp_socketpair(_metadata, self); setup_sigurg(_metadata, self); + setup_epollpri(_metadata, self); self->tcp_compliant = true; } @@ -132,6 +156,29 @@ FIXTURE_TEARDOWN(msg_oob) close_sockets(self); } +static void __epollpair(struct __test_metadata *_metadata, + FIXTURE_DATA(msg_oob) *self, + bool oob_remaining) +{ + struct epoll_event event[2] = {}; + int i, ret[2]; + + for (i = 0; i < 2; i++) + ret[i] = epoll_wait(self->epoll_fd[i], &event[i], 1, 0); + + ASSERT_EQ(ret[0], oob_remaining); + + if (self->tcp_compliant) + ASSERT_EQ(ret[0], ret[1]); + + if (oob_remaining) { + ASSERT_EQ(event[0].events, EPOLLPRI); + + if (self->tcp_compliant) + ASSERT_EQ(event[0].events, event[1].events); + } +} + static void __sendpair(struct __test_metadata *_metadata, FIXTURE_DATA(msg_oob) *self, const void *buf, size_t len, int flags) @@ -254,6 +301,9 @@ static void __setinlinepair(struct __test_metadata *_metadata, expected_buf, expected_len, buf_len, flags); \ } while (0) +#define epollpair(oob_remaining) \ + __epollpair(_metadata, self, oob_remaining) + #define setinlinepair() \ __setinlinepair(_metadata, self) @@ -265,109 +315,170 @@ static void __setinlinepair(struct __test_metadata *_metadata, TEST_F(msg_oob, non_oob) { sendpair("x", 1, 0); + epollpair(false); recvpair("", -EINVAL, 1, MSG_OOB); + epollpair(false); } TEST_F(msg_oob, oob) { sendpair("x", 1, MSG_OOB); + epollpair(true); recvpair("x", 1, 1, MSG_OOB); + epollpair(false); } TEST_F(msg_oob, oob_drop) { sendpair("x", 1, MSG_OOB); + epollpair(true); recvpair("", -EAGAIN, 1, 0); /* Drop OOB. */ + epollpair(false); + recvpair("", -EINVAL, 1, MSG_OOB); + epollpair(false); } TEST_F(msg_oob, oob_ahead) { sendpair("hello", 5, MSG_OOB); + epollpair(true); recvpair("o", 1, 1, MSG_OOB); + epollpair(false); + recvpair("hell", 4, 4, 0); + epollpair(false); } TEST_F(msg_oob, oob_break) { sendpair("hello", 5, MSG_OOB); + epollpair(true); recvpair("hell", 4, 5, 0); /* Break at OOB even with enough buffer. */ + epollpair(true); + recvpair("o", 1, 1, MSG_OOB); + epollpair(false); } TEST_F(msg_oob, oob_ahead_break) { sendpair("hello", 5, MSG_OOB); + epollpair(true); + sendpair("world", 5, 0); + epollpair(true); recvpair("o", 1, 1, MSG_OOB); + epollpair(false); + recvpair("hell", 4, 9, 0); /* Break at OOB even after it's recv()ed. */ + epollpair(false); + recvpair("world", 5, 5, 0); + epollpair(false); } TEST_F(msg_oob, oob_break_drop) { sendpair("hello", 5, MSG_OOB); + epollpair(true); + sendpair("world", 5, 0); + epollpair(true); recvpair("hell", 4, 10, 0); /* Break at OOB even with enough buffer. */ + epollpair(true); + recvpair("world", 5, 10, 0); /* Drop OOB and recv() the next skb. */ + epollpair(false); + recvpair("", -EINVAL, 1, MSG_OOB); + epollpair(false); } TEST_F(msg_oob, ex_oob_break) { sendpair("hello", 5, MSG_OOB); + epollpair(true); + sendpair("wor", 3, MSG_OOB); + epollpair(true); + sendpair("ld", 2, 0); + epollpair(true); recvpair("hellowo", 7, 10, 0); /* Break at OOB but not at ex-OOB. */ + epollpair(true); + recvpair("r", 1, 1, MSG_OOB); + epollpair(false); + recvpair("ld", 2, 2, 0); + epollpair(false); } TEST_F(msg_oob, ex_oob_drop) { sendpair("x", 1, MSG_OOB); + epollpair(true); + sendpair("y", 1, MSG_OOB); /* TCP drops "x" at this moment. */ + epollpair(true); tcp_incompliant { recvpair("x", 1, 1, 0); /* TCP drops "y" by passing through it. */ + epollpair(true); + recvpair("y", 1, 1, MSG_OOB); /* TCP returns -EINVAL. */ + epollpair(false); } } TEST_F(msg_oob, ex_oob_drop_2) { sendpair("x", 1, MSG_OOB); + epollpair(true); + sendpair("y", 1, MSG_OOB); /* TCP drops "x" at this moment. */ + epollpair(true); recvpair("y", 1, 1, MSG_OOB); + epollpair(false); tcp_incompliant { recvpair("x", 1, 1, 0); /* TCP returns -EAGAIN. */ + epollpair(false); } } TEST_F(msg_oob, ex_oob_ahead_break) { sendpair("hello", 5, MSG_OOB); + epollpair(true); + sendpair("wor", 3, MSG_OOB); + epollpair(true); recvpair("r", 1, 1, MSG_OOB); + epollpair(false); sendpair("ld", 2, MSG_OOB); + epollpair(true); tcp_incompliant { recvpair("hellowol", 8, 10, 0); /* TCP recv()s "helloworl", why "r" ?? */ } + epollpair(true); + recvpair("d", 1, 1, MSG_OOB); + epollpair(false); } TEST_F(msg_oob, inline_oob) @@ -375,9 +486,13 @@ TEST_F(msg_oob, inline_oob) setinlinepair(); sendpair("x", 1, MSG_OOB); + epollpair(true); recvpair("", -EINVAL, 1, MSG_OOB); + epollpair(true); + recvpair("x", 1, 1, 0); + epollpair(false); } TEST_F(msg_oob, inline_oob_break) @@ -385,62 +500,94 @@ TEST_F(msg_oob, inline_oob_break) setinlinepair(); sendpair("hello", 5, MSG_OOB); + epollpair(true); recvpair("", -EINVAL, 1, MSG_OOB); + epollpair(true); + recvpair("hell", 4, 5, 0); /* Break at OOB but not at ex-OOB. */ + epollpair(true); + recvpair("o", 1, 1, 0); + epollpair(false); } TEST_F(msg_oob, inline_oob_ahead_break) { sendpair("hello", 5, MSG_OOB); + epollpair(true); + sendpair("world", 5, 0); + epollpair(true); recvpair("o", 1, 1, MSG_OOB); + epollpair(false); setinlinepair(); recvpair("hell", 4, 9, 0); /* Break at OOB even with enough buffer. */ + epollpair(false); tcp_incompliant { recvpair("world", 5, 6, 0); /* TCP recv()s "oworld", ... "o" ??? */ } + + epollpair(false); } TEST_F(msg_oob, inline_ex_oob_break) { sendpair("hello", 5, MSG_OOB); + epollpair(true); + sendpair("wor", 3, MSG_OOB); + epollpair(true); + sendpair("ld", 2, 0); + epollpair(true); setinlinepair(); recvpair("hellowo", 7, 10, 0); /* Break at OOB but not at ex-OOB. */ + epollpair(true); + recvpair("rld", 3, 3, 0); + epollpair(false); } TEST_F(msg_oob, inline_ex_oob_no_drop) { sendpair("x", 1, MSG_OOB); + epollpair(true); setinlinepair(); sendpair("y", 1, MSG_OOB); /* TCP does NOT drops "x" at this moment. */ + epollpair(true); recvpair("x", 1, 1, 0); + epollpair(true); + recvpair("y", 1, 1, 0); + epollpair(false); } TEST_F(msg_oob, inline_ex_oob_drop) { sendpair("x", 1, MSG_OOB); + epollpair(true); + sendpair("y", 1, MSG_OOB); /* TCP drops "x" at this moment. */ + epollpair(true); setinlinepair(); tcp_incompliant { recvpair("x", 1, 1, 0); /* TCP recv()s "y". */ + epollpair(true); + recvpair("y", 1, 1, 0); /* TCP returns -EAGAIN. */ + epollpair(false); } } From e400cfa38bb0419cf1313e5494ea2b7d114e86d7 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 24 Jun 2024 18:36:44 -0700 Subject: [PATCH 748/778] af_unix: Fix wrong ioctl(SIOCATMARK) when consumed OOB skb is at the head. Even if OOB data is recv()ed, ioctl(SIOCATMARK) must return 1 when the OOB skb is at the head of the receive queue and no new OOB data is queued. Without fix: # RUN msg_oob.no_peek.oob ... # msg_oob.c:305:oob:Expected answ[0] (0) == oob_head (1) # oob: Test terminated by assertion # FAIL msg_oob.no_peek.oob not ok 2 msg_oob.no_peek.oob With fix: # RUN msg_oob.no_peek.oob ... # OK msg_oob.no_peek.oob ok 2 msg_oob.no_peek.oob Fixes: 314001f0bf92 ("af_unix: Add OOB support") Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- net/unix/af_unix.c | 15 +++- tools/testing/selftests/net/af_unix/msg_oob.c | 68 +++++++++++++++++++ 2 files changed, 81 insertions(+), 2 deletions(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 07f5eaa04b5b..142f56770b77 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -3107,12 +3107,23 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) #if IS_ENABLED(CONFIG_AF_UNIX_OOB) case SIOCATMARK: { + struct unix_sock *u = unix_sk(sk); struct sk_buff *skb; int answ = 0; + mutex_lock(&u->iolock); + skb = skb_peek(&sk->sk_receive_queue); - if (skb && skb == READ_ONCE(unix_sk(sk)->oob_skb)) - answ = 1; + if (skb) { + struct sk_buff *oob_skb = READ_ONCE(u->oob_skb); + + if (skb == oob_skb || + (!oob_skb && !unix_skb_len(skb))) + answ = 1; + } + + mutex_unlock(&u->iolock); + err = put_user(answ, (int __user *)arg); } break; diff --git a/tools/testing/selftests/net/af_unix/msg_oob.c b/tools/testing/selftests/net/af_unix/msg_oob.c index 28b09b36a2f1..2d0024329437 100644 --- a/tools/testing/selftests/net/af_unix/msg_oob.c +++ b/tools/testing/selftests/net/af_unix/msg_oob.c @@ -288,6 +288,26 @@ static void __setinlinepair(struct __test_metadata *_metadata, } } +static void __siocatmarkpair(struct __test_metadata *_metadata, + FIXTURE_DATA(msg_oob) *self, + bool oob_head) +{ + int answ[2] = {}; + int i; + + for (i = 0; i < 2; i++) { + int ret; + + ret = ioctl(self->fd[i * 2 + 1], SIOCATMARK, &answ[i]); + ASSERT_EQ(ret, 0); + } + + ASSERT_EQ(answ[0], oob_head); + + if (self->tcp_compliant) + ASSERT_EQ(answ[0], answ[1]); +} + #define sendpair(buf, len, flags) \ __sendpair(_metadata, self, buf, len, flags) @@ -304,6 +324,9 @@ static void __setinlinepair(struct __test_metadata *_metadata, #define epollpair(oob_remaining) \ __epollpair(_metadata, self, oob_remaining) +#define siocatmarkpair(oob_head) \ + __siocatmarkpair(_metadata, self, oob_head) + #define setinlinepair() \ __setinlinepair(_metadata, self) @@ -325,9 +348,11 @@ TEST_F(msg_oob, oob) { sendpair("x", 1, MSG_OOB); epollpair(true); + siocatmarkpair(true); recvpair("x", 1, 1, MSG_OOB); epollpair(false); + siocatmarkpair(true); } TEST_F(msg_oob, oob_drop) @@ -481,18 +506,40 @@ TEST_F(msg_oob, ex_oob_ahead_break) epollpair(false); } +TEST_F(msg_oob, ex_oob_siocatmark) +{ + sendpair("hello", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + recvpair("o", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(false); + + sendpair("world", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + recvpair("hell", 4, 4, 0); /* Intentionally stop at ex-OOB. */ + epollpair(true); + siocatmarkpair(false); +} + TEST_F(msg_oob, inline_oob) { setinlinepair(); sendpair("x", 1, MSG_OOB); epollpair(true); + siocatmarkpair(true); recvpair("", -EINVAL, 1, MSG_OOB); epollpair(true); + siocatmarkpair(true); recvpair("x", 1, 1, 0); epollpair(false); + siocatmarkpair(false); } TEST_F(msg_oob, inline_oob_break) @@ -591,4 +638,25 @@ TEST_F(msg_oob, inline_ex_oob_drop) } } +TEST_F(msg_oob, inline_ex_oob_siocatmark) +{ + sendpair("hello", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + recvpair("o", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(false); + + setinlinepair(); + + sendpair("world", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + recvpair("hell", 4, 4, 0); /* Intentionally stop at ex-OOB. */ + epollpair(true); + siocatmarkpair(false); +} + TEST_HARNESS_MAIN From 91b7186c8d141fb89412930d6b9974c9cba24af7 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 24 Jun 2024 18:36:45 -0700 Subject: [PATCH 749/778] selftest: af_unix: Check SIOCATMARK after every send()/recv() in msg_oob.c. To catch regression, let's check ioctl(SIOCATMARK) after every send() and recv() calls. Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/af_unix/msg_oob.c | 72 +++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/tools/testing/selftests/net/af_unix/msg_oob.c b/tools/testing/selftests/net/af_unix/msg_oob.c index 2d0024329437..16d0c172eaeb 100644 --- a/tools/testing/selftests/net/af_unix/msg_oob.c +++ b/tools/testing/selftests/net/af_unix/msg_oob.c @@ -339,9 +339,11 @@ TEST_F(msg_oob, non_oob) { sendpair("x", 1, 0); epollpair(false); + siocatmarkpair(false); recvpair("", -EINVAL, 1, MSG_OOB); epollpair(false); + siocatmarkpair(false); } TEST_F(msg_oob, oob) @@ -359,109 +361,142 @@ TEST_F(msg_oob, oob_drop) { sendpair("x", 1, MSG_OOB); epollpair(true); + siocatmarkpair(true); recvpair("", -EAGAIN, 1, 0); /* Drop OOB. */ epollpair(false); + siocatmarkpair(false); recvpair("", -EINVAL, 1, MSG_OOB); epollpair(false); + siocatmarkpair(false); } TEST_F(msg_oob, oob_ahead) { sendpair("hello", 5, MSG_OOB); epollpair(true); + siocatmarkpair(false); recvpair("o", 1, 1, MSG_OOB); epollpair(false); + siocatmarkpair(false); recvpair("hell", 4, 4, 0); epollpair(false); + siocatmarkpair(true); } TEST_F(msg_oob, oob_break) { sendpair("hello", 5, MSG_OOB); epollpair(true); + siocatmarkpair(false); recvpair("hell", 4, 5, 0); /* Break at OOB even with enough buffer. */ epollpair(true); + siocatmarkpair(true); recvpair("o", 1, 1, MSG_OOB); epollpair(false); + siocatmarkpair(true); + + recvpair("", -EAGAIN, 1, 0); + siocatmarkpair(false); } TEST_F(msg_oob, oob_ahead_break) { sendpair("hello", 5, MSG_OOB); epollpair(true); + siocatmarkpair(false); sendpair("world", 5, 0); epollpair(true); + siocatmarkpair(false); recvpair("o", 1, 1, MSG_OOB); epollpair(false); + siocatmarkpair(false); recvpair("hell", 4, 9, 0); /* Break at OOB even after it's recv()ed. */ epollpair(false); + siocatmarkpair(true); recvpair("world", 5, 5, 0); epollpair(false); + siocatmarkpair(false); } TEST_F(msg_oob, oob_break_drop) { sendpair("hello", 5, MSG_OOB); epollpair(true); + siocatmarkpair(false); sendpair("world", 5, 0); epollpair(true); + siocatmarkpair(false); recvpair("hell", 4, 10, 0); /* Break at OOB even with enough buffer. */ epollpair(true); + siocatmarkpair(true); recvpair("world", 5, 10, 0); /* Drop OOB and recv() the next skb. */ epollpair(false); + siocatmarkpair(false); recvpair("", -EINVAL, 1, MSG_OOB); epollpair(false); + siocatmarkpair(false); } TEST_F(msg_oob, ex_oob_break) { sendpair("hello", 5, MSG_OOB); epollpair(true); + siocatmarkpair(false); sendpair("wor", 3, MSG_OOB); epollpair(true); + siocatmarkpair(false); sendpair("ld", 2, 0); epollpair(true); + siocatmarkpair(false); recvpair("hellowo", 7, 10, 0); /* Break at OOB but not at ex-OOB. */ epollpair(true); + siocatmarkpair(true); recvpair("r", 1, 1, MSG_OOB); epollpair(false); + siocatmarkpair(true); recvpair("ld", 2, 2, 0); epollpair(false); + siocatmarkpair(false); } TEST_F(msg_oob, ex_oob_drop) { sendpair("x", 1, MSG_OOB); epollpair(true); + siocatmarkpair(true); sendpair("y", 1, MSG_OOB); /* TCP drops "x" at this moment. */ epollpair(true); tcp_incompliant { + siocatmarkpair(false); + recvpair("x", 1, 1, 0); /* TCP drops "y" by passing through it. */ epollpair(true); + siocatmarkpair(true); recvpair("y", 1, 1, MSG_OOB); /* TCP returns -EINVAL. */ epollpair(false); + siocatmarkpair(true); } } @@ -469,16 +504,24 @@ TEST_F(msg_oob, ex_oob_drop_2) { sendpair("x", 1, MSG_OOB); epollpair(true); + siocatmarkpair(true); sendpair("y", 1, MSG_OOB); /* TCP drops "x" at this moment. */ epollpair(true); + tcp_incompliant { + siocatmarkpair(false); + } + recvpair("y", 1, 1, MSG_OOB); epollpair(false); tcp_incompliant { + siocatmarkpair(false); + recvpair("x", 1, 1, 0); /* TCP returns -EAGAIN. */ epollpair(false); + siocatmarkpair(true); } } @@ -486,24 +529,30 @@ TEST_F(msg_oob, ex_oob_ahead_break) { sendpair("hello", 5, MSG_OOB); epollpair(true); + siocatmarkpair(false); sendpair("wor", 3, MSG_OOB); epollpair(true); + siocatmarkpair(false); recvpair("r", 1, 1, MSG_OOB); epollpair(false); + siocatmarkpair(false); sendpair("ld", 2, MSG_OOB); epollpair(true); + siocatmarkpair(false); tcp_incompliant { recvpair("hellowol", 8, 10, 0); /* TCP recv()s "helloworl", why "r" ?? */ } epollpair(true); + siocatmarkpair(true); recvpair("d", 1, 1, MSG_OOB); epollpair(false); + siocatmarkpair(true); } TEST_F(msg_oob, ex_oob_siocatmark) @@ -548,81 +597,100 @@ TEST_F(msg_oob, inline_oob_break) sendpair("hello", 5, MSG_OOB); epollpair(true); + siocatmarkpair(false); recvpair("", -EINVAL, 1, MSG_OOB); epollpair(true); + siocatmarkpair(false); recvpair("hell", 4, 5, 0); /* Break at OOB but not at ex-OOB. */ epollpair(true); + siocatmarkpair(true); recvpair("o", 1, 1, 0); epollpair(false); + siocatmarkpair(false); } TEST_F(msg_oob, inline_oob_ahead_break) { sendpair("hello", 5, MSG_OOB); epollpair(true); + siocatmarkpair(false); sendpair("world", 5, 0); epollpair(true); + siocatmarkpair(false); recvpair("o", 1, 1, MSG_OOB); epollpair(false); + siocatmarkpair(false); setinlinepair(); recvpair("hell", 4, 9, 0); /* Break at OOB even with enough buffer. */ epollpair(false); + siocatmarkpair(true); tcp_incompliant { recvpair("world", 5, 6, 0); /* TCP recv()s "oworld", ... "o" ??? */ } epollpair(false); + siocatmarkpair(false); } TEST_F(msg_oob, inline_ex_oob_break) { sendpair("hello", 5, MSG_OOB); epollpair(true); + siocatmarkpair(false); sendpair("wor", 3, MSG_OOB); epollpair(true); + siocatmarkpair(false); sendpair("ld", 2, 0); epollpair(true); + siocatmarkpair(false); setinlinepair(); recvpair("hellowo", 7, 10, 0); /* Break at OOB but not at ex-OOB. */ epollpair(true); + siocatmarkpair(true); recvpair("rld", 3, 3, 0); epollpair(false); + siocatmarkpair(false); } TEST_F(msg_oob, inline_ex_oob_no_drop) { sendpair("x", 1, MSG_OOB); epollpair(true); + siocatmarkpair(true); setinlinepair(); sendpair("y", 1, MSG_OOB); /* TCP does NOT drops "x" at this moment. */ epollpair(true); + siocatmarkpair(false); recvpair("x", 1, 1, 0); epollpair(true); + siocatmarkpair(true); recvpair("y", 1, 1, 0); epollpair(false); + siocatmarkpair(false); } TEST_F(msg_oob, inline_ex_oob_drop) { sendpair("x", 1, MSG_OOB); epollpair(true); + siocatmarkpair(true); sendpair("y", 1, MSG_OOB); /* TCP drops "x" at this moment. */ epollpair(true); @@ -630,11 +698,15 @@ TEST_F(msg_oob, inline_ex_oob_drop) setinlinepair(); tcp_incompliant { + siocatmarkpair(false); + recvpair("x", 1, 1, 0); /* TCP recv()s "y". */ epollpair(true); + siocatmarkpair(true); recvpair("y", 1, 1, 0); /* TCP returns -EAGAIN. */ epollpair(false); + siocatmarkpair(false); } } From c362f32a59a84fe4453abecc6b53f5f70894a6d5 Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Thu, 20 Jun 2024 06:05:52 +0000 Subject: [PATCH 750/778] iommu/amd: Invalidate cache before removing device from domain list Commit 87a6f1f22c97 ("iommu/amd: Introduce per-device domain ID to fix potential TLB aliasing issue") introduced per device domain ID when domain is configured with v2 page table. And in invalidation path, it uses per device structure (dev_data->gcr3_info.domid) to get the domain ID. In detach_device() path, current code tries to invalidate IOMMU cache after removing dev_data from domain device list. This means when domain is configured with v2 page table, amd_iommu_domain_flush_all() will not be able to invalidate cache as device is already removed from domain device list. This is causing change domain tests (changing domain type from identity to DMA) to fail with IO_PAGE_FAULT issue. Hence invalidate cache and update DTE before updating data structures. Reported-by: FahHean Lee Reported-by: Dheeraj Kumar Srivastava Fixes: 87a6f1f22c97 ("iommu/amd: Introduce per-device domain ID to fix potential TLB aliasing issue") Tested-by: Dheeraj Kumar Srivastava Tested-by: Sairaj Arun Kodilkar Tested-by: FahHean Lee Signed-off-by: Vasant Hegde Reviewed-by: Jerry Snitselaar Link: https://lore.kernel.org/r/20240620060552.13984-1-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index c2703599bb16..b19e8c0f48fa 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2061,6 +2061,12 @@ static void do_detach(struct iommu_dev_data *dev_data) struct protection_domain *domain = dev_data->domain; struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data); + /* Clear DTE and flush the entry */ + amd_iommu_dev_update_dte(dev_data, false); + + /* Flush IOTLB and wait for the flushes to finish */ + amd_iommu_domain_flush_all(domain); + /* Clear GCR3 table */ if (pdom_is_sva_capable(domain)) destroy_gcr3_table(dev_data, domain); @@ -2069,12 +2075,6 @@ static void do_detach(struct iommu_dev_data *dev_data) dev_data->domain = NULL; list_del(&dev_data->list); - /* Clear DTE and flush the entry */ - amd_iommu_dev_update_dte(dev_data, false); - - /* Flush IOTLB and wait for the flushes to finish */ - amd_iommu_domain_flush_all(domain); - /* decrease reference counters - needs to happen after the flushes */ domain->dev_iommu[iommu->index] -= 1; domain->dev_cnt -= 1; From 041be2717b198dd65032f726648401ba293c1bba Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 20 Jun 2024 14:29:40 +0800 Subject: [PATCH 751/778] iommu/vt-d: Fix missed device TLB cache tag When a domain is attached to a device, the required cache tags are assigned to the domain so that the related caches can be flushed whenever it is needed. The device TLB cache tag is created based on whether the ats_enabled field of the device's iommu data is set. This creates an ordered dependency between cache tag assignment and ATS enabling. The device TLB cache tag would not be created if device's ATS is enabled after the cache tag assignment. This causes devices with PCI ATS support to malfunction. The ATS control is exclusively owned by the iommu driver. Hence, move cache_tag_assign_domain() after PCI ATS enabling to make sure that the device TLB cache tag is created for the domain. Fixes: 3b1d9e2b2d68 ("iommu/vt-d: Add cache tag assignment interface") Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240620062940.201786-1-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 2e9811bf2a4e..fd11a080380c 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -2114,12 +2114,6 @@ static int dmar_domain_attach_device(struct dmar_domain *domain, if (ret) return ret; - ret = cache_tag_assign_domain(domain, dev, IOMMU_NO_PASID); - if (ret) { - domain_detach_iommu(domain, iommu); - return ret; - } - info->domain = domain; spin_lock_irqsave(&domain->lock, flags); list_add(&info->link, &domain->devices); @@ -2137,15 +2131,21 @@ static int dmar_domain_attach_device(struct dmar_domain *domain, else ret = intel_pasid_setup_second_level(iommu, domain, dev, IOMMU_NO_PASID); - if (ret) { - device_block_translation(dev); - return ret; - } + if (ret) + goto out_block_translation; if (sm_supported(info->iommu) || !domain_type_is_si(info->domain)) iommu_enable_pci_caps(info); + ret = cache_tag_assign_domain(domain, dev, IOMMU_NO_PASID); + if (ret) + goto out_block_translation; + return 0; + +out_block_translation: + device_block_translation(dev); + return ret; } /** From 150bdf5f8d8f805d70bebbbfd07697bd2416771a Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Fri, 21 Jun 2024 10:15:33 +0000 Subject: [PATCH 752/778] iommu/amd: Fix GT feature enablement again MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Current code configures GCR3 even when device is attached to identity domain. So that we can support SVA with identity domain. This means in attach device path it updates Guest Translation related bits in DTE. Commit de111f6b4f6a ("iommu/amd: Enable Guest Translation after reading IOMMU feature register") missed to enable Control[GT] bit in resume path. Its causing certain laptop to fail to resume after suspend. This is because we have inconsistency between between control register (GT is disabled) and DTE (where we have enabled guest translation related bits) in resume path. And IOMMU hardware throws ILLEGAL_DEV_TABLE_ENTRY. Fix it by enabling GT bit in resume path. Reported-by: Błażej Szczygieł Link: https://bugzilla.kernel.org/show_bug.cgi?id=218975 Fixes: de111f6b4f6a ("iommu/amd: Enable Guest Translation after reading IOMMU feature register") Tested-by: Błażej Szczygieł Signed-off-by: Vasant Hegde Reviewed-by: Jerry Snitselaar Link: https://lore.kernel.org/r/20240621101533.20216-1-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/init.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 161248067776..c89d85b54a1a 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -2743,6 +2743,7 @@ static void early_enable_iommu(struct amd_iommu *iommu) iommu_enable_command_buffer(iommu); iommu_enable_event_buffer(iommu); iommu_set_exclusion_range(iommu); + iommu_enable_gt(iommu); iommu_enable_ga(iommu); iommu_enable_xt(iommu); iommu_enable_irtcachedis(iommu); From 1864b8224195d0e43ddb92a8151f54f6562090cc Mon Sep 17 00:00:00 2001 From: Ma Ke Date: Tue, 25 Jun 2024 21:03:14 +0800 Subject: [PATCH 753/778] net: mana: Fix possible double free in error handling path When auxiliary_device_add() returns error and then calls auxiliary_device_uninit(), callback function adev_release calls kfree(madev). We shouldn't call kfree(madev) again in the error handling path. Set 'madev' to NULL. Fixes: a69839d4327d ("net: mana: Add support for auxiliary device") Signed-off-by: Ma Ke Link: https://patch.msgid.link/20240625130314.2661257-1-make24@iscas.ac.cn Signed-off-by: Paolo Abeni --- drivers/net/ethernet/microsoft/mana/mana_en.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index d087cf954f75..608ad31a9702 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -2798,6 +2798,8 @@ static int add_adev(struct gdma_dev *gd) if (ret) goto init_fail; + /* madev is owned by the auxiliary device */ + madev = NULL; ret = auxiliary_device_add(adev); if (ret) goto add_fail; From fcdd7b7bda3c21d1ba1247419e4a1eb8e2d0bfbb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bernhard=20Rosenkr=C3=A4nzer?= Date: Thu, 27 Jun 2024 14:44:19 +0200 Subject: [PATCH 754/778] staging: vchiq_debugfs: Fix build if CONFIG_DEBUG_FS is not set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 42a2f6664e18 ("staging: vc04_services: Move global g_state to vchiq_state") adds a parameter to vchiq_debugfs_init, but leaves the dummy implementation in the !CONFIG_DEBUG_FS case untouched, causing a compile time error. Fixes: c3552ab19aeb ("staging: vchiq_debugfs: Fix NPD in vchiq_dump_state") Signed-off-by: Bernhard Rosenkränzer Reviewed-by: Stefan Wahren Link: https://lore.kernel.org/r/20240627124419.2498642-1-bero@baylibre.com Signed-off-by: Greg Kroah-Hartman --- .../staging/vc04_services/interface/vchiq_arm/vchiq_debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_debugfs.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_debugfs.c index 1f74d0bb33ba..d5f7f61c5626 100644 --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_debugfs.c +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_debugfs.c @@ -138,7 +138,7 @@ void vchiq_debugfs_deinit(void) #else /* CONFIG_DEBUG_FS */ -void vchiq_debugfs_init(void) +void vchiq_debugfs_init(struct vchiq_state *state) { } From 24bf27b92b1c6a322faa88977de2207aa8c26509 Mon Sep 17 00:00:00 2001 From: Ferry Toth Date: Thu, 20 Jun 2024 22:46:41 +0200 Subject: [PATCH 755/778] Revert "usb: gadget: u_ether: Re-attach netif device to mirror detachment" This reverts commit 76c945730cdffb572c7767073cc6515fd3f646b4. Prerequisite revert for the reverting of the original commit f49449fbc21e. Fixes: 76c945730cdf ("usb: gadget: u_ether: Re-attach netif device to mirror detachment") Fixes: f49449fbc21e ("usb: gadget: u_ether: Replace netif_stop_queue with netif_device_detach") Reported-by: Ferry Toth Cc: stable@vger.kernel.org Signed-off-by: Ferry Toth Link: https://lore.kernel.org/r/20240620204832.24518-2-ftoth@exalondelft.nl Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/u_ether.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/usb/gadget/function/u_ether.c b/drivers/usb/gadget/function/u_ether.c index 11dd0b9e847f..aa0511c3a62c 100644 --- a/drivers/usb/gadget/function/u_ether.c +++ b/drivers/usb/gadget/function/u_ether.c @@ -1163,8 +1163,6 @@ struct net_device *gether_connect(struct gether *link) if (netif_running(dev->net)) eth_start(dev, GFP_ATOMIC); - netif_device_attach(dev->net); - /* on error, disable any endpoints */ } else { (void) usb_ep_disable(link->out_ep); From c50814a288dcee687285abc0cf935e9fe8928e59 Mon Sep 17 00:00:00 2001 From: Ferry Toth Date: Thu, 20 Jun 2024 22:46:42 +0200 Subject: [PATCH 756/778] Revert "usb: gadget: u_ether: Replace netif_stop_queue with netif_device_detach" This reverts commit f49449fbc21e7e9550a5203902d69c8ae7dfd918. This commit breaks u_ether on some setups (at least Merrifield). The fix "usb: gadget: u_ether: Re-attach netif device to mirror detachment" party restores u-ether. However the netif usb: remains up even usb is switched from device to host mode. This creates problems for user space as the interface remains in the routing table while not realy present and network managers (connman) not detecting a network change. Various attempts to find the root cause were unsuccesful up to now. Therefore revert until a solution is found. Link: https://lore.kernel.org/linux-usb/20231006141231.7220-1-hgajjar@de.adit-jv.com/ Reported-by: Andy Shevchenko Reported-by: Ferry Toth Fixes: f49449fbc21e ("usb: gadget: u_ether: Replace netif_stop_queue with netif_device_detach") Cc: stable@vger.kernel.org Signed-off-by: Ferry Toth Link: https://lore.kernel.org/r/20240620204832.24518-3-ftoth@exalondelft.nl Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/u_ether.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/u_ether.c b/drivers/usb/gadget/function/u_ether.c index aa0511c3a62c..95191083b455 100644 --- a/drivers/usb/gadget/function/u_ether.c +++ b/drivers/usb/gadget/function/u_ether.c @@ -1200,7 +1200,7 @@ void gether_disconnect(struct gether *link) DBG(dev, "%s\n", __func__); - netif_device_detach(dev->net); + netif_stop_queue(dev->net); netif_carrier_off(dev->net); /* disable endpoints, forcing (synchronous) completion From fc1d1a712b517bbcb383b1f1f7ef478e7d0579f2 Mon Sep 17 00:00:00 2001 From: Jos Wang Date: Wed, 19 Jun 2024 19:45:29 +0800 Subject: [PATCH 757/778] usb: dwc3: core: Workaround for CSR read timeout This is a workaround for STAR 4846132, which only affects DWC_usb31 version2.00a operating in host mode. There is a problem in DWC_usb31 version 2.00a operating in host mode that would cause a CSR read timeout When CSR read coincides with RAM Clock Gating Entry. By disable Clock Gating, sacrificing power consumption for normal operation. Cc: stable # 5.10.x: 1e43c86d: usb: dwc3: core: Add DWC31 version 2.00a controller Signed-off-by: Jos Wang Acked-by: Thinh Nguyen Link: https://lore.kernel.org/r/20240619114529.3441-1-joswang1221@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/core.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 9d47c3aa5777..cb82557678dd 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -957,12 +957,16 @@ static bool dwc3_core_is_valid(struct dwc3 *dwc) static void dwc3_core_setup_global_control(struct dwc3 *dwc) { + unsigned int power_opt; + unsigned int hw_mode; u32 reg; reg = dwc3_readl(dwc->regs, DWC3_GCTL); reg &= ~DWC3_GCTL_SCALEDOWN_MASK; + hw_mode = DWC3_GHWPARAMS0_MODE(dwc->hwparams.hwparams0); + power_opt = DWC3_GHWPARAMS1_EN_PWROPT(dwc->hwparams.hwparams1); - switch (DWC3_GHWPARAMS1_EN_PWROPT(dwc->hwparams.hwparams1)) { + switch (power_opt) { case DWC3_GHWPARAMS1_EN_PWROPT_CLK: /** * WORKAROUND: DWC3 revisions between 2.10a and 2.50a have an @@ -995,6 +999,20 @@ static void dwc3_core_setup_global_control(struct dwc3 *dwc) break; } + /* + * This is a workaround for STAR#4846132, which only affects + * DWC_usb31 version2.00a operating in host mode. + * + * There is a problem in DWC_usb31 version 2.00a operating + * in host mode that would cause a CSR read timeout When CSR + * read coincides with RAM Clock Gating Entry. By disable + * Clock Gating, sacrificing power consumption for normal + * operation. + */ + if (power_opt != DWC3_GHWPARAMS1_EN_PWROPT_NO && + hw_mode != DWC3_GHWPARAMS0_MODE_GADGET && DWC3_VER_IS(DWC31, 200A)) + reg |= DWC3_GCTL_DSBLCLKGTNG; + /* check if current dwc3 is on simulation board */ if (dwc->hwparams.hwparams6 & DWC3_GHWPARAMS6_EN_FPGA) { dev_info(dwc->dev, "Running with FPGA optimizations\n"); From 9919cce62f68e6ab68dc2a975b5dc670f8ca7d40 Mon Sep 17 00:00:00 2001 From: Kent Gibson Date: Wed, 26 Jun 2024 13:29:22 +0800 Subject: [PATCH 758/778] gpiolib: cdev: Disallow reconfiguration without direction (uAPI v1) linehandle_set_config() behaves badly when direction is not set. The configuration validation is borrowed from linehandle_create(), where, to verify the intent of the user, the direction must be set to in order to effect a change to the electrical configuration of a line. But, when applied to reconfiguration, that validation does not allow for the unset direction case, making it possible to clear flags set previously without specifying the line direction. Adding to the inconsistency, those changes are not immediately applied by linehandle_set_config(), but will take effect when the line value is next get or set. For example, by requesting a configuration with no flags set, an output line with GPIOHANDLE_REQUEST_ACTIVE_LOW and GPIOHANDLE_REQUEST_OPEN_DRAIN requested could have those flags cleared, inverting the sense of the line and changing the line drive to push-pull on the next line value set. Ensure the intent of the user by disallowing configurations which do not have direction set, returning an error to userspace to indicate that the configuration is invalid. And, for clarity, use lflags, a local copy of gcnf.flags, throughout when dealing with the requested flags, rather than a mixture of both. Fixes: e588bb1eae31 ("gpio: add new SET_CONFIG ioctl() to gpio chardev") Signed-off-by: Kent Gibson Link: https://lore.kernel.org/r/20240626052925.174272-2-warthog618@gmail.com Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib-cdev.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c index 9dad67ea2597..04261adf320b 100644 --- a/drivers/gpio/gpiolib-cdev.c +++ b/drivers/gpio/gpiolib-cdev.c @@ -89,6 +89,10 @@ struct linehandle_state { GPIOHANDLE_REQUEST_OPEN_DRAIN | \ GPIOHANDLE_REQUEST_OPEN_SOURCE) +#define GPIOHANDLE_REQUEST_DIRECTION_FLAGS \ + (GPIOHANDLE_REQUEST_INPUT | \ + GPIOHANDLE_REQUEST_OUTPUT) + static int linehandle_validate_flags(u32 flags) { /* Return an error if an unknown flag is set */ @@ -169,21 +173,21 @@ static long linehandle_set_config(struct linehandle_state *lh, if (ret) return ret; + /* Lines must be reconfigured explicitly as input or output. */ + if (!(lflags & GPIOHANDLE_REQUEST_DIRECTION_FLAGS)) + return -EINVAL; + for (i = 0; i < lh->num_descs; i++) { desc = lh->descs[i]; - linehandle_flags_to_desc_flags(gcnf.flags, &desc->flags); + linehandle_flags_to_desc_flags(lflags, &desc->flags); - /* - * Lines have to be requested explicitly for input - * or output, else the line will be treated "as is". - */ if (lflags & GPIOHANDLE_REQUEST_OUTPUT) { int val = !!gcnf.default_values[i]; ret = gpiod_direction_output(desc, val); if (ret) return ret; - } else if (lflags & GPIOHANDLE_REQUEST_INPUT) { + } else { ret = gpiod_direction_input(desc); if (ret) return ret; From b440396387418fe2feaacd41ca16080e7a8bc9ad Mon Sep 17 00:00:00 2001 From: Kent Gibson Date: Wed, 26 Jun 2024 13:29:23 +0800 Subject: [PATCH 759/778] gpiolib: cdev: Ignore reconfiguration without direction linereq_set_config() behaves badly when direction is not set. The configuration validation is borrowed from linereq_create(), where, to verify the intent of the user, the direction must be set to in order to effect a change to the electrical configuration of a line. But, when applied to reconfiguration, that validation does not allow for the unset direction case, making it possible to clear flags set previously without specifying the line direction. Adding to the inconsistency, those changes are not immediately applied by linereq_set_config(), but will take effect when the line value is next get or set. For example, by requesting a configuration with no flags set, an output line with GPIO_V2_LINE_FLAG_ACTIVE_LOW and GPIO_V2_LINE_FLAG_OPEN_DRAIN set could have those flags cleared, inverting the sense of the line and changing the line drive to push-pull on the next line value set. Skip the reconfiguration of lines for which the direction is not set, and only reconfigure the lines for which direction is set. Fixes: a54756cb24ea ("gpiolib: cdev: support GPIO_V2_LINE_SET_CONFIG_IOCTL") Signed-off-by: Kent Gibson Link: https://lore.kernel.org/r/20240626052925.174272-3-warthog618@gmail.com Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib-cdev.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c index 04261adf320b..5639abce6ec5 100644 --- a/drivers/gpio/gpiolib-cdev.c +++ b/drivers/gpio/gpiolib-cdev.c @@ -1534,12 +1534,14 @@ static long linereq_set_config(struct linereq *lr, void __user *ip) line = &lr->lines[i]; desc = lr->lines[i].desc; flags = gpio_v2_line_config_flags(&lc, i); + /* + * Lines not explicitly reconfigured as input or output + * are left unchanged. + */ + if (!(flags & GPIO_V2_LINE_DIRECTION_FLAGS)) + continue; gpio_v2_line_config_flags_to_desc_flags(flags, &desc->flags); edflags = flags & GPIO_V2_LINE_EDGE_DETECTOR_FLAGS; - /* - * Lines have to be requested explicitly for input - * or output, else the line will be treated "as is". - */ if (flags & GPIO_V2_LINE_FLAG_OUTPUT) { int val = gpio_v2_line_config_output_value(&lc, i); @@ -1547,7 +1549,7 @@ static long linereq_set_config(struct linereq *lr, void __user *ip) ret = gpiod_direction_output(desc, val); if (ret) return ret; - } else if (flags & GPIO_V2_LINE_FLAG_INPUT) { + } else { ret = gpiod_direction_input(desc); if (ret) return ret; From 7e1f4eb9a60d40dd17a97d9b76818682a024a127 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 4 Apr 2024 12:04:54 +0200 Subject: [PATCH 760/778] kallsyms: rework symbol lookup return codes Building with W=1 in some configurations produces a false positive warning for kallsyms: kernel/kallsyms.c: In function '__sprint_symbol.isra': kernel/kallsyms.c:503:17: error: 'strcpy' source argument is the same as destination [-Werror=restrict] 503 | strcpy(buffer, name); | ^~~~~~~~~~~~~~~~~~~~ This originally showed up while building with -O3, but later started happening in other configurations as well, depending on inlining decisions. The underlying issue is that the local 'name' variable is always initialized to the be the same as 'buffer' in the called functions that fill the buffer, which gcc notices while inlining, though it could see that the address check always skips the copy. The calling conventions here are rather unusual, as all of the internal lookup functions (bpf_address_lookup, ftrace_mod_address_lookup, ftrace_func_address_lookup, module_address_lookup and kallsyms_lookup_buildid) already use the provided buffer and either return the address of that buffer to indicate success, or NULL for failure, but the callers are written to also expect an arbitrary other buffer to be returned. Rework the calling conventions to return the length of the filled buffer instead of its address, which is simpler and easier to follow as well as avoiding the warning. Leave only the kallsyms_lookup() calling conventions unchanged, since that is called from 16 different functions and adapting this would be a much bigger change. Link: https://lore.kernel.org/lkml/20200107214042.855757-1-arnd@arndb.de/ Link: https://lore.kernel.org/lkml/20240326130647.7bfb1d92@gandalf.local.home/ Tested-by: Geert Uytterhoeven Reviewed-by: Luis Chamberlain Acked-by: Steven Rostedt (Google) Signed-off-by: Arnd Bergmann --- include/linux/filter.h | 14 +++++++------- include/linux/ftrace.h | 6 +++--- include/linux/module.h | 14 +++++++------- kernel/bpf/core.c | 7 +++---- kernel/kallsyms.c | 23 ++++++++++++----------- kernel/module/kallsyms.c | 25 ++++++++++++------------- kernel/trace/ftrace.c | 13 +++++-------- 7 files changed, 49 insertions(+), 53 deletions(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index 0f12cf01070e..5669da513cd7 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1208,18 +1208,18 @@ static inline bool bpf_jit_kallsyms_enabled(void) return false; } -const char *__bpf_address_lookup(unsigned long addr, unsigned long *size, +int __bpf_address_lookup(unsigned long addr, unsigned long *size, unsigned long *off, char *sym); bool is_bpf_text_address(unsigned long addr); int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type, char *sym); struct bpf_prog *bpf_prog_ksym_find(unsigned long addr); -static inline const char * +static inline int bpf_address_lookup(unsigned long addr, unsigned long *size, unsigned long *off, char **modname, char *sym) { - const char *ret = __bpf_address_lookup(addr, size, off, sym); + int ret = __bpf_address_lookup(addr, size, off, sym); if (ret && modname) *modname = NULL; @@ -1263,11 +1263,11 @@ static inline bool bpf_jit_kallsyms_enabled(void) return false; } -static inline const char * +static inline int __bpf_address_lookup(unsigned long addr, unsigned long *size, unsigned long *off, char *sym) { - return NULL; + return 0; } static inline bool is_bpf_text_address(unsigned long addr) @@ -1286,11 +1286,11 @@ static inline struct bpf_prog *bpf_prog_ksym_find(unsigned long addr) return NULL; } -static inline const char * +static inline int bpf_address_lookup(unsigned long addr, unsigned long *size, unsigned long *off, char **modname, char *sym) { - return NULL; + return 0; } static inline void bpf_prog_kallsyms_add(struct bpf_prog *fp) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 800995c425e0..b792274189a3 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -86,15 +86,15 @@ struct ftrace_hash; #if defined(CONFIG_FUNCTION_TRACER) && defined(CONFIG_MODULES) && \ defined(CONFIG_DYNAMIC_FTRACE) -const char * +int ftrace_mod_address_lookup(unsigned long addr, unsigned long *size, unsigned long *off, char **modname, char *sym); #else -static inline const char * +static inline int ftrace_mod_address_lookup(unsigned long addr, unsigned long *size, unsigned long *off, char **modname, char *sym) { - return NULL; + return 0; } #endif diff --git a/include/linux/module.h b/include/linux/module.h index ffa1c603163c..330ffb59efe5 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -931,11 +931,11 @@ int module_kallsyms_on_each_symbol(const char *modname, * least KSYM_NAME_LEN long: a pointer to namebuf is returned if * found, otherwise NULL. */ -const char *module_address_lookup(unsigned long addr, - unsigned long *symbolsize, - unsigned long *offset, - char **modname, const unsigned char **modbuildid, - char *namebuf); +int module_address_lookup(unsigned long addr, + unsigned long *symbolsize, + unsigned long *offset, + char **modname, const unsigned char **modbuildid, + char *namebuf); int lookup_module_symbol_name(unsigned long addr, char *symname); int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size, @@ -964,14 +964,14 @@ static inline int module_kallsyms_on_each_symbol(const char *modname, } /* For kallsyms to ask for address resolution. NULL means not found. */ -static inline const char *module_address_lookup(unsigned long addr, +static inline int module_address_lookup(unsigned long addr, unsigned long *symbolsize, unsigned long *offset, char **modname, const unsigned char **modbuildid, char *namebuf) { - return NULL; + return 0; } static inline int lookup_module_symbol_name(unsigned long addr, char *symname) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 1a6c3faa6e4a..695a0fb2cd4d 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -736,11 +736,11 @@ static struct bpf_ksym *bpf_ksym_find(unsigned long addr) return n ? container_of(n, struct bpf_ksym, tnode) : NULL; } -const char *__bpf_address_lookup(unsigned long addr, unsigned long *size, +int __bpf_address_lookup(unsigned long addr, unsigned long *size, unsigned long *off, char *sym) { struct bpf_ksym *ksym; - char *ret = NULL; + int ret = 0; rcu_read_lock(); ksym = bpf_ksym_find(addr); @@ -748,9 +748,8 @@ const char *__bpf_address_lookup(unsigned long addr, unsigned long *size, unsigned long symbol_start = ksym->start; unsigned long symbol_end = ksym->end; - strscpy(sym, ksym->name, KSYM_NAME_LEN); + ret = strscpy(sym, ksym->name, KSYM_NAME_LEN); - ret = sym; if (size) *size = symbol_end - symbol_start; if (off) diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 22ea19a36e6e..98b9622d372e 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -388,12 +388,12 @@ int kallsyms_lookup_size_offset(unsigned long addr, unsigned long *symbolsize, !!__bpf_address_lookup(addr, symbolsize, offset, namebuf); } -static const char *kallsyms_lookup_buildid(unsigned long addr, +static int kallsyms_lookup_buildid(unsigned long addr, unsigned long *symbolsize, unsigned long *offset, char **modname, const unsigned char **modbuildid, char *namebuf) { - const char *ret; + int ret; namebuf[KSYM_NAME_LEN - 1] = 0; namebuf[0] = 0; @@ -410,7 +410,7 @@ static const char *kallsyms_lookup_buildid(unsigned long addr, if (modbuildid) *modbuildid = NULL; - ret = namebuf; + ret = strlen(namebuf); goto found; } @@ -442,8 +442,13 @@ const char *kallsyms_lookup(unsigned long addr, unsigned long *offset, char **modname, char *namebuf) { - return kallsyms_lookup_buildid(addr, symbolsize, offset, modname, - NULL, namebuf); + int ret = kallsyms_lookup_buildid(addr, symbolsize, offset, modname, + NULL, namebuf); + + if (!ret) + return NULL; + + return namebuf; } int lookup_symbol_name(unsigned long addr, char *symname) @@ -478,19 +483,15 @@ static int __sprint_symbol(char *buffer, unsigned long address, { char *modname; const unsigned char *buildid; - const char *name; unsigned long offset, size; int len; address += symbol_offset; - name = kallsyms_lookup_buildid(address, &size, &offset, &modname, &buildid, + len = kallsyms_lookup_buildid(address, &size, &offset, &modname, &buildid, buffer); - if (!name) + if (!len) return sprintf(buffer, "0x%lx", address - symbol_offset); - if (name != buffer) - strcpy(buffer, name); - len = strlen(buffer); offset -= symbol_offset; if (add_offset) diff --git a/kernel/module/kallsyms.c b/kernel/module/kallsyms.c index 62fb57bb9f16..bf65e0c3c86f 100644 --- a/kernel/module/kallsyms.c +++ b/kernel/module/kallsyms.c @@ -321,14 +321,15 @@ void * __weak dereference_module_function_descriptor(struct module *mod, * For kallsyms to ask for address resolution. NULL means not found. Careful * not to lock to avoid deadlock on oopses, simply disable preemption. */ -const char *module_address_lookup(unsigned long addr, - unsigned long *size, - unsigned long *offset, - char **modname, - const unsigned char **modbuildid, - char *namebuf) +int module_address_lookup(unsigned long addr, + unsigned long *size, + unsigned long *offset, + char **modname, + const unsigned char **modbuildid, + char *namebuf) { - const char *ret = NULL; + const char *sym; + int ret = 0; struct module *mod; preempt_disable(); @@ -344,12 +345,10 @@ const char *module_address_lookup(unsigned long addr, #endif } - ret = find_kallsyms_symbol(mod, addr, size, offset); - } - /* Make a copy in here where it's safe */ - if (ret) { - strscpy(namebuf, ret, KSYM_NAME_LEN); - ret = namebuf; + sym = find_kallsyms_symbol(mod, addr, size, offset); + + if (sym) + ret = strscpy(namebuf, sym, KSYM_NAME_LEN); } preempt_enable(); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 65208d3b5ed9..eacab4020508 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -6969,7 +6969,7 @@ allocate_ftrace_mod_map(struct module *mod, return mod_map; } -static const char * +static int ftrace_func_address_lookup(struct ftrace_mod_map *mod_map, unsigned long addr, unsigned long *size, unsigned long *off, char *sym) @@ -6990,21 +6990,18 @@ ftrace_func_address_lookup(struct ftrace_mod_map *mod_map, *size = found_func->size; if (off) *off = addr - found_func->ip; - if (sym) - strscpy(sym, found_func->name, KSYM_NAME_LEN); - - return found_func->name; + return strscpy(sym, found_func->name, KSYM_NAME_LEN); } - return NULL; + return 0; } -const char * +int ftrace_mod_address_lookup(unsigned long addr, unsigned long *size, unsigned long *off, char **modname, char *sym) { struct ftrace_mod_map *mod_map; - const char *ret = NULL; + int ret = 0; /* mod_map is freed via call_rcu() */ preempt_disable(); From 4f2a129b33a2054e62273edd5a051c34c08d96e9 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Thu, 27 Jun 2024 11:26:00 +1000 Subject: [PATCH 761/778] drm/drm_file: Fix pid refcounting race , Maxime Ripard , Thomas Zimmermann filp->pid is supposed to be a refcounted pointer; however, before this patch, drm_file_update_pid() only increments the refcount of a struct pid after storing a pointer to it in filp->pid and dropping the dev->filelist_mutex, making the following race possible: process A process B ========= ========= begin drm_file_update_pid mutex_lock(&dev->filelist_mutex) rcu_replace_pointer(filp->pid, , 1) mutex_unlock(&dev->filelist_mutex) begin drm_file_update_pid mutex_lock(&dev->filelist_mutex) rcu_replace_pointer(filp->pid, , 1) mutex_unlock(&dev->filelist_mutex) get_pid() synchronize_rcu() put_pid() *** pid B reaches refcount 0 and is freed here *** get_pid() *** UAF *** synchronize_rcu() put_pid() As far as I know, this race can only occur with CONFIG_PREEMPT_RCU=y because it requires RCU to detect a quiescent state in code that is not explicitly calling into the scheduler. This race leads to use-after-free of a "struct pid". It is probably somewhat hard to hit because process A has to pass through a synchronize_rcu() operation while process B is between mutex_unlock() and get_pid(). Fix it by ensuring that by the time a pointer to the current task's pid is stored in the file, an extra reference to the pid has been taken. This fix also removes the condition for synchronize_rcu(); I think that optimization is unnecessary complexity, since in that case we would usually have bailed out on the lockless check above. Fixes: 1c7a387ffef8 ("drm: Update file owner during use") Cc: Signed-off-by: Jann Horn Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_file.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c index 638ffa4444f5..714e42b05108 100644 --- a/drivers/gpu/drm/drm_file.c +++ b/drivers/gpu/drm/drm_file.c @@ -469,14 +469,12 @@ void drm_file_update_pid(struct drm_file *filp) dev = filp->minor->dev; mutex_lock(&dev->filelist_mutex); + get_pid(pid); old = rcu_replace_pointer(filp->pid, pid, 1); mutex_unlock(&dev->filelist_mutex); - if (pid != old) { - get_pid(pid); - synchronize_rcu(); - put_pid(old); - } + synchronize_rcu(); + put_pid(old); } /** From ebb5b260af67c677700cd51be6845c2cab3edfbd Mon Sep 17 00:00:00 2001 From: David Arcari Date: Mon, 20 May 2024 14:57:49 -0400 Subject: [PATCH 762/778] tools/power turbostat: option '-n' is ambiguous In some cases specifying the '-n' command line argument will cause turbostat to fail. For instance 'turbostat -n 1' works fine; however, 'turbostat -n 1 -d' will fail. This is the result of the first call to getopt_long_only() where "MP" is specified as the optstring. This can be easily fixed by changing the optstring from "MP" to "MPn:" to remove ambiguity between the arguments. tools/power turbostat: option '-n' is ambiguous; possibilities: '-num_iterations' '-no-msr' '-no-perf' Fixes: a0e86c90b83c ("tools/power turbostat: Add --no-perf option") Signed-off-by: David Arcari Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 8cdf41906e98..12c1872aa42e 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -8424,7 +8424,7 @@ void cmdline(int argc, char **argv) * Parse some options early, because they may make other options invalid, * like adding the MSR counter with --add and at the same time using --no-msr. */ - while ((opt = getopt_long_only(argc, argv, "MP", long_options, &option_index)) != -1) { + while ((opt = getopt_long_only(argc, argv, "MPn:", long_options, &option_index)) != -1) { switch (opt) { case 'M': no_msr = 1; From c5120a3356755f9e9d2d592c1347f3b9ff4022a7 Mon Sep 17 00:00:00 2001 From: Adam Hawley Date: Wed, 22 May 2024 16:27:21 +0300 Subject: [PATCH 763/778] tools/power turbostat: Fix unc freq columns not showing with '-q' or '-l' Commit 78464d7681f7 ("tools/power turbostat: Add columns for clustered uncore frequency") introduced 'probe_intel_uncore_frequency_cluster()' in a way which prevents printing uncore frequency columns if either of the '-q' or '-l' options are used. Systems which do not have multiple uncore frequencies per package are unaffected by this regression. Fix the function so that uncore frequency columns are shown when either the '-l' or '-q' option is used by checking if 'quiet' is true after adding counters for the uncore frequency columns. Fixes: 78464d7681f7 ("tools/power turbostat: Add columns for clustered uncore frequency") Signed-off-by: Adam Hawley Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 12c1872aa42e..ad10feb9fafd 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -5695,9 +5695,6 @@ static void probe_intel_uncore_frequency_cluster(void) if (access("/sys/devices/system/cpu/intel_uncore_frequency/uncore00/current_freq_khz", R_OK)) return; - if (quiet) - return; - for (uncore_max_id = 0;; ++uncore_max_id) { sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/uncore%02d", uncore_max_id); @@ -5727,6 +5724,14 @@ static void probe_intel_uncore_frequency_cluster(void) sprintf(path, "%s/fabric_cluster_id", path_base); cluster_id = read_sysfs_int(path); + sprintf(path, "%s/current_freq_khz", path_base); + sprintf(name_buf, "UMHz%d.%d", domain_id, cluster_id); + + add_counter(0, path, name_buf, 0, SCOPE_PACKAGE, COUNTER_K2M, FORMAT_AVERAGE, 0, package_id); + + if (quiet) + continue; + sprintf(path, "%s/min_freq_khz", path_base); k = read_sysfs_int(path); sprintf(path, "%s/max_freq_khz", path_base); @@ -5743,11 +5748,6 @@ static void probe_intel_uncore_frequency_cluster(void) sprintf(path, "%s/current_freq_khz", path_base); k = read_sysfs_int(path); fprintf(outf, " %d MHz\n", k / 1000); - - sprintf(path, "%s/current_freq_khz", path_base); - sprintf(name_buf, "UMHz%d.%d", domain_id, cluster_id); - - add_counter(0, path, name_buf, 0, SCOPE_PACKAGE, COUNTER_K2M, FORMAT_AVERAGE, 0, package_id); } } From b15943c4b3351173d5f3b0d87362d2994a89bb66 Mon Sep 17 00:00:00 2001 From: Patryk Wlazlyn Date: Thu, 30 May 2024 09:16:39 +0200 Subject: [PATCH 764/778] tools/power turbostat: Add local build_bug.h header for snapshot target Fixes compilation errors for Makefile snapshot target described in: commit 231ce08b662a ("tools/power turbostat: Add "snapshot:" Makefile target") Signed-off-by: Patryk Wlazlyn Signed-off-by: Len Brown --- tools/power/x86/turbostat/Makefile | 6 +++++- tools/power/x86/turbostat/turbostat.c | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile index 2d6dce2c8f77..b1e6817f1e54 100644 --- a/tools/power/x86/turbostat/Makefile +++ b/tools/power/x86/turbostat/Makefile @@ -14,6 +14,7 @@ turbostat : turbostat.c override CFLAGS += -O2 -Wall -Wextra -I../../../include override CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/asm/msr-index.h"' override CFLAGS += -DINTEL_FAMILY_HEADER='"../../../../arch/x86/include/asm/intel-family.h"' +override CFLAGS += -DBUILD_BUG_HEADER='"../../../../include/linux/build_bug.h"' override CFLAGS += -D_FILE_OFFSET_BITS=64 override CFLAGS += -D_FORTIFY_SOURCE=2 @@ -44,10 +45,13 @@ snapshot: turbostat @echo "#define GENMASK(h, l) (((~0UL) << (l)) & (~0UL >> (sizeof(long) * 8 - 1 - (h))))" >> $(SNAPSHOT)/bits.h @echo "#define GENMASK_ULL(h, l) (((~0ULL) << (l)) & (~0ULL >> (sizeof(long long) * 8 - 1 - (h))))" >> $(SNAPSHOT)/bits.h + @echo '#define BUILD_BUG_ON(cond) do { enum { compile_time_check ## __COUNTER__ = 1/(!(cond)) }; } while (0)' > $(SNAPSHOT)/build_bug.h + @echo PWD=. > $(SNAPSHOT)/Makefile @echo "CFLAGS += -DMSRHEADER='\"msr-index.h\"'" >> $(SNAPSHOT)/Makefile @echo "CFLAGS += -DINTEL_FAMILY_HEADER='\"intel-family.h\"'" >> $(SNAPSHOT)/Makefile - @sed -e's/.*MSRHEADER.*//' -e's/.*INTEL_FAMILY_HEADER.*//' Makefile >> $(SNAPSHOT)/Makefile + @echo "CFLAGS += -DBUILD_BUG_HEADER='\"build_bug.h\"'" >> $(SNAPSHOT)/Makefile + @sed -e's/.*MSRHEADER.*//' -e's/.*INTEL_FAMILY_HEADER.*//' -e's/.*BUILD_BUG_HEADER.*//' Makefile >> $(SNAPSHOT)/Makefile @rm -f $(SNAPSHOT).tar.gz tar cvzf $(SNAPSHOT).tar.gz $(SNAPSHOT) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index ad10feb9fafd..9f5d053d4bc6 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -10,6 +10,7 @@ #define _GNU_SOURCE #include MSRHEADER #include INTEL_FAMILY_HEADER +#include BUILD_BUG_HEADER #include #include #include @@ -38,7 +39,6 @@ #include #include #include -#include #define UNUSED(x) (void)(x) From 09aaa2d0642359fddae607b6950b2ca7bd1cf04f Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 28 Jun 2024 14:28:06 +0200 Subject: [PATCH 765/778] MAINTAINERS: Update IOMMU tree location Update the maintainers entries to the new location of the IOMMU tree. Signed-off-by: Joerg Roedel --- MAINTAINERS | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 2ca8f35dfe03..932caa703e83 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1044,7 +1044,7 @@ M: Joerg Roedel R: Suravee Suthikulpanit L: iommu@lists.linux.dev S: Maintained -T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/iommu/linux.git F: drivers/iommu/amd/ F: include/linux/amd-iommu.h @@ -11156,7 +11156,7 @@ M: David Woodhouse M: Lu Baolu L: iommu@lists.linux.dev S: Supported -T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/iommu/linux.git F: drivers/iommu/intel/ INTEL IPU3 CSI-2 CIO2 DRIVER @@ -11529,7 +11529,7 @@ IOMMU DMA-API LAYER M: Robin Murphy L: iommu@lists.linux.dev S: Maintained -T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/iommu/linux.git F: drivers/iommu/dma-iommu.c F: drivers/iommu/dma-iommu.h F: drivers/iommu/iova.c @@ -11541,7 +11541,7 @@ M: Will Deacon R: Robin Murphy L: iommu@lists.linux.dev S: Maintained -T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/iommu/linux.git F: Documentation/devicetree/bindings/iommu/ F: Documentation/userspace-api/iommu.rst F: drivers/iommu/ From 1066fe825987da007669d7c25306b4dbb50bd7dd Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Thu, 27 Jun 2024 12:55:52 +0200 Subject: [PATCH 766/778] ata: libata-core: Add ATA_HORKAGE_NOLPM for all Crucial BX SSD1 models We got another report that CT1000BX500SSD1 does not work with LPM. If you look in libata-core.c, we have six different Crucial devices that are marked with ATA_HORKAGE_NOLPM. This model would have been the seventh. (This quirk is used on Crucial models starting with both CT* and Crucial_CT*) It is obvious that this vendor does not have a great history of supporting LPM properly, therefore, add the ATA_HORKAGE_NOLPM quirk for all Crucial BX SSD1 models. Fixes: 7627a0edef54 ("ata: ahci: Drop low power policy board type") Cc: stable@vger.kernel.org Reported-by: Alessandro Maggio Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218832 Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20240627105551.4159447-2-cassel@kernel.org Signed-off-by: Niklas Cassel --- drivers/ata/libata-core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index e1bf8a19b3c8..efb5195da60c 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4137,8 +4137,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { { "PIONEER BD-RW BDR-205", NULL, ATA_HORKAGE_NOLPM }, /* Crucial devices with broken LPM support */ - { "CT500BX100SSD1", NULL, ATA_HORKAGE_NOLPM }, - { "CT240BX500SSD1", NULL, ATA_HORKAGE_NOLPM }, + { "CT*0BX*00SSD1", NULL, ATA_HORKAGE_NOLPM }, /* 512GB MX100 with MU01 firmware has both queued TRIM and LPM issues */ { "Crucial_CT512MX100*", "MU01", ATA_HORKAGE_NO_NCQ_TRIM | From 6a4805b2f51a2e5dc346651e0d0cd8abcc2937c8 Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Fri, 31 May 2024 16:07:26 -0700 Subject: [PATCH 767/778] string: kunit: add missing MODULE_DESCRIPTION() macros make allmodconfig && make W=1 C=1 reports: WARNING: modpost: missing MODULE_DESCRIPTION() in lib/string_kunit.o WARNING: modpost: missing MODULE_DESCRIPTION() in lib/string_helpers_kunit.o Add the missing invocation of the MODULE_DESCRIPTION() macro. Signed-off-by: Jeff Johnson Link: https://lore.kernel.org/r/20240531-md-lib-string-v1-1-2738cf057d94@quicinc.com Signed-off-by: Kees Cook --- lib/string_helpers_kunit.c | 1 + lib/string_kunit.c | 1 + 2 files changed, 2 insertions(+) diff --git a/lib/string_helpers_kunit.c b/lib/string_helpers_kunit.c index f88e39fd68d6..c853046183d2 100644 --- a/lib/string_helpers_kunit.c +++ b/lib/string_helpers_kunit.c @@ -625,4 +625,5 @@ static struct kunit_suite string_helpers_test_suite = { kunit_test_suites(&string_helpers_test_suite); +MODULE_DESCRIPTION("Test cases for string helpers module"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/lib/string_kunit.c b/lib/string_kunit.c index 2a812decf14b..c919e3293da6 100644 --- a/lib/string_kunit.c +++ b/lib/string_kunit.c @@ -633,4 +633,5 @@ static struct kunit_suite string_test_suite = { kunit_test_suites(&string_test_suite); +MODULE_DESCRIPTION("Test cases for string functions"); MODULE_LICENSE("GPL v2"); From 6db1208bf95b4c091897b597c415e11edeab2e2d Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 19 Jun 2024 14:47:15 -0700 Subject: [PATCH 768/778] randomize_kstack: Remove non-functional per-arch entropy filtering An unintended consequence of commit 9c573cd31343 ("randomize_kstack: Improve entropy diffusion") was that the per-architecture entropy size filtering reduced how many bits were being added to the mix, rather than how many bits were being used during the offsetting. All architectures fell back to the existing default of 0x3FF (10 bits), which will consume at most 1KiB of stack space. It seems that this is working just fine, so let's avoid the confusion and update everything to use the default. The prior intent of the per-architecture limits were: arm64: capped at 0x1FF (9 bits), 5 bits effective powerpc: uncapped (10 bits), 6 or 7 bits effective riscv: uncapped (10 bits), 6 bits effective x86: capped at 0xFF (8 bits), 5 (x86_64) or 6 (ia32) bits effective s390: capped at 0xFF (8 bits), undocumented effective entropy Current discussion has led to just dropping the original per-architecture filters. The additional entropy appears to be safe for arm64, x86, and s390. Quoting Arnd, "There is no point pretending that 15.75KB is somehow safe to use while 15.00KB is not." Co-developed-by: Yuntao Liu Signed-off-by: Yuntao Liu Fixes: 9c573cd31343 ("randomize_kstack: Improve entropy diffusion") Link: https://lore.kernel.org/r/20240617133721.377540-1-liuyuntao12@huawei.com Reviewed-by: Arnd Bergmann Acked-by: Mark Rutland Acked-by: Heiko Carstens # s390 Link: https://lore.kernel.org/r/20240619214711.work.953-kees@kernel.org Signed-off-by: Kees Cook --- arch/arm64/kernel/syscall.c | 16 +++++++--------- arch/s390/include/asm/entry-common.h | 2 +- arch/x86/include/asm/entry-common.h | 15 ++++++--------- 3 files changed, 14 insertions(+), 19 deletions(-) diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index ad198262b981..7230f6e20ab8 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -53,17 +53,15 @@ static void invoke_syscall(struct pt_regs *regs, unsigned int scno, syscall_set_return_value(current, regs, 0, ret); /* - * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(), - * but not enough for arm64 stack utilization comfort. To keep - * reasonable stack head room, reduce the maximum offset to 9 bits. + * This value will get limited by KSTACK_OFFSET_MAX(), which is 10 + * bits. The actual entropy will be further reduced by the compiler + * when applying stack alignment constraints: the AAPCS mandates a + * 16-byte aligned SP at function boundaries, which will remove the + * 4 low bits from any entropy chosen here. * - * The actual entropy will be further reduced by the compiler when - * applying stack alignment constraints: the AAPCS mandates a - * 16-byte (i.e. 4-bit) aligned SP at function boundaries. - * - * The resulting 5 bits of entropy is seen in SP[8:4]. + * The resulting 6 bits of entropy is seen in SP[9:4]. */ - choose_random_kstack_offset(get_random_u16() & 0x1FF); + choose_random_kstack_offset(get_random_u16()); } static inline bool has_syscall_work(unsigned long flags) diff --git a/arch/s390/include/asm/entry-common.h b/arch/s390/include/asm/entry-common.h index 7f5004065e8a..35555c944630 100644 --- a/arch/s390/include/asm/entry-common.h +++ b/arch/s390/include/asm/entry-common.h @@ -54,7 +54,7 @@ static __always_inline void arch_exit_to_user_mode(void) static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, unsigned long ti_work) { - choose_random_kstack_offset(get_tod_clock_fast() & 0xff); + choose_random_kstack_offset(get_tod_clock_fast()); } #define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h index 7e523bb3d2d3..fb2809b20b0a 100644 --- a/arch/x86/include/asm/entry-common.h +++ b/arch/x86/include/asm/entry-common.h @@ -73,19 +73,16 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, #endif /* - * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(), - * but not enough for x86 stack utilization comfort. To keep - * reasonable stack head room, reduce the maximum offset to 8 bits. - * - * The actual entropy will be further reduced by the compiler when - * applying stack alignment constraints (see cc_stack_align4/8 in + * This value will get limited by KSTACK_OFFSET_MAX(), which is 10 + * bits. The actual entropy will be further reduced by the compiler + * when applying stack alignment constraints (see cc_stack_align4/8 in * arch/x86/Makefile), which will remove the 3 (x86_64) or 2 (ia32) * low bits from any entropy chosen here. * - * Therefore, final stack offset entropy will be 5 (x86_64) or - * 6 (ia32) bits. + * Therefore, final stack offset entropy will be 7 (x86_64) or + * 8 (ia32) bits. */ - choose_random_kstack_offset(rdtsc() & 0xFF); + choose_random_kstack_offset(rdtsc()); } #define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare From 1c07c9be87dd3dd0634033bf08728b32465f08fb Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 29 May 2024 14:29:42 -0700 Subject: [PATCH 769/778] tty: mxser: Remove __counted_by from mxser_board.ports[] Work for __counted_by on generic pointers in structures (not just flexible array members) has started landing in Clang 19 (current tip of tree). During the development of this feature, a restriction was added to __counted_by to prevent the flexible array member's element type from including a flexible array member itself such as: struct foo { int count; char buf[]; }; struct bar { int count; struct foo data[] __counted_by(count); }; because the size of data cannot be calculated with the standard array size formula: sizeof(struct foo) * count This restriction was downgraded to a warning but due to CONFIG_WERROR, it can still break the build. The application of __counted_by on the ports member of 'struct mxser_board' triggers this restriction, resulting in: drivers/tty/mxser.c:291:2: error: 'counted_by' should not be applied to an array with element of unknown size because 'struct mxser_port' is a struct type with a flexible array member. This will be an error in a future compiler version [-Werror,-Wbounds-safety-counted-by-elt-type-unknown-size] 291 | struct mxser_port ports[] __counted_by(nports); | ^~~~~~~~~~~~~~~~~~~~~~~~~ 1 error generated. Remove this use of __counted_by to fix the warning/error. However, rather than remove it altogether, leave it commented, as it may be possible to support this in future compiler releases. Cc: Closes: https://github.com/ClangBuiltLinux/linux/issues/2026 Fixes: f34907ecca71 ("mxser: Annotate struct mxser_board with __counted_by") Signed-off-by: Nathan Chancellor Link: https://lore.kernel.org/r/20240529-drop-counted-by-ports-mxser-board-v1-1-0ab217f4da6d@kernel.org Signed-off-by: Kees Cook --- drivers/tty/mxser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/mxser.c b/drivers/tty/mxser.c index 458bb1280ebf..5b97e420a95f 100644 --- a/drivers/tty/mxser.c +++ b/drivers/tty/mxser.c @@ -288,7 +288,7 @@ struct mxser_board { enum mxser_must_hwid must_hwid; speed_t max_baud; - struct mxser_port ports[] __counted_by(nports); + struct mxser_port ports[] /* __counted_by(nports) */; }; static DECLARE_BITMAP(mxser_boards, MXSER_BOARDS); From c422b6a630240f706063e0ecbb894aa8491b1fa1 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 27 Jun 2024 13:14:47 +0200 Subject: [PATCH 770/778] i2c: testunit: don't erase registers after STOP STOP fallsthrough to WRITE_REQUESTED but this became problematic when clearing the testunit registers was added to the latter. Actually, there is no reason to clear the testunit state after STOP. Doing it when a new WRITE_REQUESTED arrives is enough. So, no need to fallthrough, at all. Fixes: b39ab96aa894 ("i2c: testunit: add support for block process calls") Signed-off-by: Wolfram Sang Reviewed-by: Andi Shyti --- drivers/i2c/i2c-slave-testunit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/i2c-slave-testunit.c b/drivers/i2c/i2c-slave-testunit.c index a49642bbae4b..a5dcbc3c2c14 100644 --- a/drivers/i2c/i2c-slave-testunit.c +++ b/drivers/i2c/i2c-slave-testunit.c @@ -118,7 +118,7 @@ static int i2c_slave_testunit_slave_cb(struct i2c_client *client, queue_delayed_work(system_long_wq, &tu->worker, msecs_to_jiffies(10 * tu->regs[TU_REG_DELAY])); } - fallthrough; + break; case I2C_SLAVE_WRITE_REQUESTED: memset(tu->regs, 0, TU_NUM_REGS); From c116deafd1a5cc1e9739099eb32114e90623209c Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 27 Jun 2024 13:14:48 +0200 Subject: [PATCH 771/778] i2c: testunit: discard write requests while old command is running When clearing registers on new write requests was added, the protection for currently running commands was missed leading to concurrent access to the testunit registers. Check the flag beforehand. Fixes: b39ab96aa894 ("i2c: testunit: add support for block process calls") Signed-off-by: Wolfram Sang Reviewed-by: Andi Shyti --- drivers/i2c/i2c-slave-testunit.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/i2c/i2c-slave-testunit.c b/drivers/i2c/i2c-slave-testunit.c index a5dcbc3c2c14..ca43e98cae1b 100644 --- a/drivers/i2c/i2c-slave-testunit.c +++ b/drivers/i2c/i2c-slave-testunit.c @@ -121,6 +121,9 @@ static int i2c_slave_testunit_slave_cb(struct i2c_client *client, break; case I2C_SLAVE_WRITE_REQUESTED: + if (test_bit(TU_FLAG_IN_PROCESS, &tu->flags)) + return -EBUSY; + memset(tu->regs, 0, TU_NUM_REGS); tu->reg_idx = 0; break; From 093d9603b60093a9aaae942db56107f6432a5dca Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 28 Jun 2024 14:27:22 -0700 Subject: [PATCH 772/778] x86: stop playing stack games in profile_pc() The 'profile_pc()' function is used for timer-based profiling, which isn't really all that relevant any more to begin with, but it also ends up making assumptions based on the stack layout that aren't necessarily valid. Basically, the code tries to account the time spent in spinlocks to the caller rather than the spinlock, and while I support that as a concept, it's not worth the code complexity or the KASAN warnings when no serious profiling is done using timers anyway these days. And the code really does depend on stack layout that is only true in the simplest of cases. We've lost the comment at some point (I think when the 32-bit and 64-bit code was unified), but it used to say: Assume the lock function has either no stack frame or a copy of eflags from PUSHF. which explains why it just blindly loads a word or two straight off the stack pointer and then takes a minimal look at the values to just check if they might be eflags or the return pc: Eflags always has bits 22 and up cleared unlike kernel addresses but that basic stack layout assumption assumes that there isn't any lock debugging etc going on that would complicate the code and cause a stack frame. It causes KASAN unhappiness reported for years by syzkaller [1] and others [2]. With no real practical reason for this any more, just remove the code. Just for historical interest, here's some background commits relating to this code from 2006: 0cb91a229364 ("i386: Account spinlocks to the caller during profiling for !FP kernels") 31679f38d886 ("Simplify profile_pc on x86-64") and a code unification from 2009: ef4512882dbe ("x86: time_32/64.c unify profile_pc") but the basics of this thing actually goes back to before the git tree. Link: https://syzkaller.appspot.com/bug?extid=84fe685c02cd112a2ac3 [1] Link: https://lore.kernel.org/all/CAK55_s7Xyq=nh97=K=G1sxueOFrJDAvPOJAL4TPTCAYvmxO9_A@mail.gmail.com/ [2] Signed-off-by: Linus Torvalds --- arch/x86/kernel/time.c | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index e42faa792c07..52e1f3f0b361 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -27,25 +27,7 @@ unsigned long profile_pc(struct pt_regs *regs) { - unsigned long pc = instruction_pointer(regs); - - if (!user_mode(regs) && in_lock_functions(pc)) { -#ifdef CONFIG_FRAME_POINTER - return *(unsigned long *)(regs->bp + sizeof(long)); -#else - unsigned long *sp = (unsigned long *)regs->sp; - /* - * Return address is either directly at stack pointer - * or above a saved flags. Eflags has bits 22-31 zero, - * kernel addresses don't. - */ - if (sp[0] >> 22) - return sp[0]; - if (sp[1] >> 22) - return sp[1]; -#endif - } - return pc; + return instruction_pointer(regs); } EXPORT_SYMBOL(profile_pc); From 769327258a141ba80ac8b96fce35c68631228370 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 25 Jun 2024 17:50:04 -0700 Subject: [PATCH 773/778] x86-32: fix cmpxchg8b_emu build error with clang The kernel test robot reported that clang no longer compiles the 32-bit x86 kernel in some configurations due to commit 95ece48165c1 ("locking/atomic/x86: Rewrite x86_32 arch_atomic64_{,fetch}_{and,or,xor}() functions"). The build fails with arch/x86/include/asm/cmpxchg_32.h:149:9: error: inline assembly requires more registers than available and the reason seems to be that not only does the cmpxchg8b instruction need four fixed registers (EDX:EAX and ECX:EBX), with the emulation fallback the inline asm also wants a fifth fixed register for the address (it uses %esi for that, but that's just a software convention with cmpxchg8b_emu). Avoiding using another pointer input to the asm (and just forcing it to use the "0(%esi)" addressing that we end up requiring for the sw fallback) seems to fix the issue. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202406230912.F6XFIyA6-lkp@intel.com/ Fixes: 95ece48165c1 ("locking/atomic/x86: Rewrite x86_32 arch_atomic64_{,fetch}_{and,or,xor}() functions") Link: https://lore.kernel.org/all/202406230912.F6XFIyA6-lkp@intel.com/ Suggested-by: Uros Bizjak Reviewed-and-Tested-by: Uros Bizjak Signed-off-by: Linus Torvalds --- arch/x86/include/asm/cmpxchg_32.h | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h index ed2797f132ce..62cef2113ca7 100644 --- a/arch/x86/include/asm/cmpxchg_32.h +++ b/arch/x86/include/asm/cmpxchg_32.h @@ -93,10 +93,9 @@ static __always_inline bool __try_cmpxchg64_local(volatile u64 *ptr, u64 *oldp, \ asm volatile(ALTERNATIVE(_lock_loc \ "call cmpxchg8b_emu", \ - _lock "cmpxchg8b %[ptr]", X86_FEATURE_CX8) \ - : [ptr] "+m" (*(_ptr)), \ - "+a" (o.low), "+d" (o.high) \ - : "b" (n.low), "c" (n.high), "S" (_ptr) \ + _lock "cmpxchg8b %a[ptr]", X86_FEATURE_CX8) \ + : "+a" (o.low), "+d" (o.high) \ + : "b" (n.low), "c" (n.high), [ptr] "S" (_ptr) \ : "memory"); \ \ o.full; \ @@ -122,12 +121,11 @@ static __always_inline u64 arch_cmpxchg64_local(volatile u64 *ptr, u64 old, u64 \ asm volatile(ALTERNATIVE(_lock_loc \ "call cmpxchg8b_emu", \ - _lock "cmpxchg8b %[ptr]", X86_FEATURE_CX8) \ + _lock "cmpxchg8b %a[ptr]", X86_FEATURE_CX8) \ CC_SET(e) \ : CC_OUT(e) (ret), \ - [ptr] "+m" (*(_ptr)), \ "+a" (o.low), "+d" (o.high) \ - : "b" (n.low), "c" (n.high), "S" (_ptr) \ + : "b" (n.low), "c" (n.high), [ptr] "S" (_ptr) \ : "memory"); \ \ if (unlikely(!ret)) \ From 5d92c7c566dc76d96e0e19e481d926bbe6631c1e Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Sat, 29 Jun 2024 14:42:11 +0200 Subject: [PATCH 774/778] ata: libata-core: Fix null pointer dereference on error If the ata_port_alloc() call in ata_host_alloc() fails, ata_host_release() will get called. However, the code in ata_host_release() tries to free ata_port struct members unconditionally, which can lead to the following: BUG: unable to handle page fault for address: 0000000000003990 PGD 0 P4D 0 Oops: Oops: 0000 [#1] PREEMPT SMP NOPTI CPU: 10 PID: 594 Comm: (udev-worker) Not tainted 6.10.0-rc5 #44 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-2.fc40 04/01/2014 RIP: 0010:ata_host_release.cold+0x2f/0x6e [libata] Code: e4 4d 63 f4 44 89 e2 48 c7 c6 90 ad 32 c0 48 c7 c7 d0 70 33 c0 49 83 c6 0e 41 RSP: 0018:ffffc90000ebb968 EFLAGS: 00010246 RAX: 0000000000000041 RBX: ffff88810fb52e78 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffff88813b3218c0 RDI: ffff88813b3218c0 RBP: ffff88810fb52e40 R08: 0000000000000000 R09: 6c65725f74736f68 R10: ffffc90000ebb738 R11: 73692033203a746e R12: 0000000000000004 R13: 0000000000000000 R14: 0000000000000011 R15: 0000000000000006 FS: 00007f6cc55b9980(0000) GS:ffff88813b300000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000003990 CR3: 00000001122a2000 CR4: 0000000000750ef0 PKRU: 55555554 Call Trace: ? __die_body.cold+0x19/0x27 ? page_fault_oops+0x15a/0x2f0 ? exc_page_fault+0x7e/0x180 ? asm_exc_page_fault+0x26/0x30 ? ata_host_release.cold+0x2f/0x6e [libata] ? ata_host_release.cold+0x2f/0x6e [libata] release_nodes+0x35/0xb0 devres_release_group+0x113/0x140 ata_host_alloc+0xed/0x120 [libata] ata_host_alloc_pinfo+0x14/0xa0 [libata] ahci_init_one+0x6c9/0xd20 [ahci] Do not access ata_port struct members unconditionally. Fixes: 633273a3ed1c ("libata-pmp: hook PMP support and enable it") Cc: stable@vger.kernel.org Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: John Garry Link: https://lore.kernel.org/r/20240629124210.181537-7-cassel@kernel.org Signed-off-by: Niklas Cassel --- drivers/ata/libata-core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index efb5195da60c..bdccf4ea251a 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -5517,6 +5517,9 @@ static void ata_host_release(struct kref *kref) for (i = 0; i < host->n_ports; i++) { struct ata_port *ap = host->ports[i]; + if (!ap) + continue; + kfree(ap->pmp_link); kfree(ap->slave_link); kfree(ap->ncq_sense_buf); From f6549f538fe0b2c389e1a7037f4e21039e25137a Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Sat, 29 Jun 2024 14:42:12 +0200 Subject: [PATCH 775/778] ata,scsi: libata-core: Do not leak memory for ata_port struct members libsas is currently not freeing all the struct ata_port struct members, e.g. ncq_sense_buf for a driver supporting Command Duration Limits (CDL). Add a function, ata_port_free(), that is used to free a ata_port, including its struct members. It makes sense to keep the code related to freeing a ata_port in its own function, which will also free all the struct members of struct ata_port. Fixes: 18bd7718b5c4 ("scsi: ata: libata: Handle completion of CDL commands using policy 0xD") Reviewed-by: John Garry Link: https://lore.kernel.org/r/20240629124210.181537-8-cassel@kernel.org Signed-off-by: Niklas Cassel --- drivers/ata/libata-core.c | 24 ++++++++++++++---------- drivers/scsi/libsas/sas_ata.c | 6 +++--- drivers/scsi/libsas/sas_discover.c | 2 +- include/linux/libata.h | 1 + 4 files changed, 19 insertions(+), 14 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index bdccf4ea251a..27d22bc43a95 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -5489,6 +5489,18 @@ struct ata_port *ata_port_alloc(struct ata_host *host) return ap; } +void ata_port_free(struct ata_port *ap) +{ + if (!ap) + return; + + kfree(ap->pmp_link); + kfree(ap->slave_link); + kfree(ap->ncq_sense_buf); + kfree(ap); +} +EXPORT_SYMBOL_GPL(ata_port_free); + static void ata_devres_release(struct device *gendev, void *res) { struct ata_host *host = dev_get_drvdata(gendev); @@ -5515,15 +5527,7 @@ static void ata_host_release(struct kref *kref) int i; for (i = 0; i < host->n_ports; i++) { - struct ata_port *ap = host->ports[i]; - - if (!ap) - continue; - - kfree(ap->pmp_link); - kfree(ap->slave_link); - kfree(ap->ncq_sense_buf); - kfree(ap); + ata_port_free(host->ports[i]); host->ports[i] = NULL; } kfree(host); @@ -5906,7 +5910,7 @@ int ata_host_register(struct ata_host *host, const struct scsi_host_template *sh * allocation time. */ for (i = host->n_ports; host->ports[i]; i++) - kfree(host->ports[i]); + ata_port_free(host->ports[i]); /* give ports names and add SCSI hosts */ for (i = 0; i < host->n_ports; i++) { diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c index 4c69fc63c119..cbbe43d8ef87 100644 --- a/drivers/scsi/libsas/sas_ata.c +++ b/drivers/scsi/libsas/sas_ata.c @@ -610,15 +610,15 @@ int sas_ata_init(struct domain_device *found_dev) rc = ata_sas_tport_add(ata_host->dev, ap); if (rc) - goto destroy_port; + goto free_port; found_dev->sata_dev.ata_host = ata_host; found_dev->sata_dev.ap = ap; return 0; -destroy_port: - kfree(ap); +free_port: + ata_port_free(ap); free_host: ata_host_put(ata_host); return rc; diff --git a/drivers/scsi/libsas/sas_discover.c b/drivers/scsi/libsas/sas_discover.c index 8fb7c41c0962..48d975c6dbf2 100644 --- a/drivers/scsi/libsas/sas_discover.c +++ b/drivers/scsi/libsas/sas_discover.c @@ -301,7 +301,7 @@ void sas_free_device(struct kref *kref) if (dev_is_sata(dev) && dev->sata_dev.ap) { ata_sas_tport_delete(dev->sata_dev.ap); - kfree(dev->sata_dev.ap); + ata_port_free(dev->sata_dev.ap); ata_host_put(dev->sata_dev.ata_host); dev->sata_dev.ata_host = NULL; dev->sata_dev.ap = NULL; diff --git a/include/linux/libata.h b/include/linux/libata.h index 13fb41d25da6..7d3bd7c9664a 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1249,6 +1249,7 @@ extern int ata_slave_link_init(struct ata_port *ap); extern struct ata_port *ata_sas_port_alloc(struct ata_host *, struct ata_port_info *, struct Scsi_Host *); extern void ata_port_probe(struct ata_port *ap); +extern void ata_port_free(struct ata_port *ap); extern int ata_sas_tport_add(struct device *parent, struct ata_port *ap); extern void ata_sas_tport_delete(struct ata_port *ap); int ata_sas_device_configure(struct scsi_device *sdev, struct queue_limits *lim, From ab9e0c529eb7cafebdd31fe1644524e80a48b05d Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Sat, 29 Jun 2024 14:42:13 +0200 Subject: [PATCH 776/778] ata: libata-core: Fix double free on error If e.g. the ata_port_alloc() call in ata_host_alloc() fails, we will jump to the err_out label, which will call devres_release_group(). devres_release_group() will trigger a call to ata_host_release(). ata_host_release() calls kfree(host), so executing the kfree(host) in ata_host_alloc() will lead to a double free: kernel BUG at mm/slub.c:553! Oops: invalid opcode: 0000 [#1] PREEMPT SMP NOPTI CPU: 11 PID: 599 Comm: (udev-worker) Not tainted 6.10.0-rc5 #47 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-2.fc40 04/01/2014 RIP: 0010:kfree+0x2cf/0x2f0 Code: 5d 41 5e 41 5f 5d e9 80 d6 ff ff 4d 89 f1 41 b8 01 00 00 00 48 89 d9 48 89 da RSP: 0018:ffffc90000f377f0 EFLAGS: 00010246 RAX: ffff888112b1f2c0 RBX: ffff888112b1f2c0 RCX: ffff888112b1f320 RDX: 000000000000400b RSI: ffffffffc02c9de5 RDI: ffff888112b1f2c0 RBP: ffffc90000f37830 R08: 0000000000000000 R09: 0000000000000000 R10: ffffc90000f37610 R11: 617461203a736b6e R12: ffffea00044ac780 R13: ffff888100046400 R14: ffffffffc02c9de5 R15: 0000000000000006 FS: 00007f2f1cabe980(0000) GS:ffff88813b380000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f2f1c3acf75 CR3: 0000000111724000 CR4: 0000000000750ef0 PKRU: 55555554 Call Trace: ? __die_body.cold+0x19/0x27 ? die+0x2e/0x50 ? do_trap+0xca/0x110 ? do_error_trap+0x6a/0x90 ? kfree+0x2cf/0x2f0 ? exc_invalid_op+0x50/0x70 ? kfree+0x2cf/0x2f0 ? asm_exc_invalid_op+0x1a/0x20 ? ata_host_alloc+0xf5/0x120 [libata] ? ata_host_alloc+0xf5/0x120 [libata] ? kfree+0x2cf/0x2f0 ata_host_alloc+0xf5/0x120 [libata] ata_host_alloc_pinfo+0x14/0xa0 [libata] ahci_init_one+0x6c9/0xd20 [ahci] Ensure that we will not call kfree(host) twice, by performing the kfree() only if the devres_open_group() call failed. Fixes: dafd6c496381 ("libata: ensure host is free'd on error exit paths") Cc: stable@vger.kernel.org Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240629124210.181537-9-cassel@kernel.org Signed-off-by: Niklas Cassel --- drivers/ata/libata-core.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 27d22bc43a95..74b59b78d278 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -5577,8 +5577,10 @@ struct ata_host *ata_host_alloc(struct device *dev, int max_ports) if (!host) return NULL; - if (!devres_open_group(dev, NULL, GFP_KERNEL)) - goto err_free; + if (!devres_open_group(dev, NULL, GFP_KERNEL)) { + kfree(host); + return NULL; + } dr = devres_alloc(ata_devres_release, 0, GFP_KERNEL); if (!dr) @@ -5610,8 +5612,6 @@ struct ata_host *ata_host_alloc(struct device *dev, int max_ports) err_out: devres_release_group(dev, NULL); - err_free: - kfree(host); return NULL; } EXPORT_SYMBOL_GPL(ata_host_alloc); From eeb25a09c5e0805d92e4ebd12c4b0ad0df1b0295 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Sat, 29 Jun 2024 14:42:14 +0200 Subject: [PATCH 777/778] ata: ahci: Clean up sysfs file on error .probe() (ahci_init_one()) calls sysfs_add_file_to_group(), however, if probe() fails after this call, we currently never call sysfs_remove_file_from_group(). (The sysfs_remove_file_from_group() call in .remove() (ahci_remove_one()) does not help, as .remove() is not called on .probe() error.) Thus, if probe() fails after the sysfs_add_file_to_group() call, the next time we insmod the module we will get: sysfs: cannot create duplicate filename '/devices/pci0000:00/0000:00:04.0/remapped_nvme' CPU: 11 PID: 954 Comm: modprobe Not tainted 6.10.0-rc5 #43 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-2.fc40 04/01/2014 Call Trace: dump_stack_lvl+0x5d/0x80 sysfs_warn_dup.cold+0x17/0x23 sysfs_add_file_mode_ns+0x11a/0x130 sysfs_add_file_to_group+0x7e/0xc0 ahci_init_one+0x31f/0xd40 [ahci] Fixes: 894fba7f434a ("ata: ahci: Add sysfs attribute to show remapped NVMe device count") Cc: stable@vger.kernel.org Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240629124210.181537-10-cassel@kernel.org Signed-off-by: Niklas Cassel --- drivers/ata/ahci.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 5eb38fbbbecd..fc6fd583faf8 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -1975,8 +1975,10 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) n_ports = max(ahci_nr_ports(hpriv->cap), fls(hpriv->port_map)); host = ata_host_alloc_pinfo(&pdev->dev, ppi, n_ports); - if (!host) - return -ENOMEM; + if (!host) { + rc = -ENOMEM; + goto err_rm_sysfs_file; + } host->private_data = hpriv; if (ahci_init_msi(pdev, n_ports, hpriv) < 0) { @@ -2031,11 +2033,11 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* initialize adapter */ rc = ahci_configure_dma_masks(pdev, hpriv); if (rc) - return rc; + goto err_rm_sysfs_file; rc = ahci_pci_reset_controller(host); if (rc) - return rc; + goto err_rm_sysfs_file; ahci_pci_init_controller(host); ahci_pci_print_info(host); @@ -2044,10 +2046,15 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) rc = ahci_host_activate(host, &ahci_sht); if (rc) - return rc; + goto err_rm_sysfs_file; pm_runtime_put_noidle(&pdev->dev); return 0; + +err_rm_sysfs_file: + sysfs_remove_file_from_group(&pdev->dev.kobj, + &dev_attr_remapped_nvme.attr, NULL); + return rc; } static void ahci_shutdown_one(struct pci_dev *pdev) From 22a40d14b572deb80c0648557f4bd502d7e83826 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 30 Jun 2024 14:40:44 -0700 Subject: [PATCH 778/778] Linux 6.10-rc6 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 4d36f943b3b1..06aa6402b385 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 10 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc6 NAME = Baby Opossum Posse # *DOCUMENTATION*