From c066fafc595eef5ae3c83ae3a8305956b8c3ef15 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 14 Aug 2018 20:37:45 +1000 Subject: [PATCH 001/269] KVM: PPC: Book3S HV: Use correct pagesize in kvm_unmap_radix() Since commit e641a317830b ("KVM: PPC: Book3S HV: Unify dirty page map between HPT and radix", 2017-10-26), kvm_unmap_radix() computes the number of PAGE_SIZEd pages being unmapped and passes it to kvmppc_update_dirty_map(), which expects to be passed the page size instead. Consequently it will only mark one system page dirty even when a large page (for example a THP page) is being unmapped. The consequence of this is that part of the THP page might not get copied during live migration, resulting in memory corruption for the guest. This fixes it by computing and passing the page size in kvm_unmap_radix(). Cc: stable@vger.kernel.org # v4.15+ Fixes: e641a317830b (KVM: PPC: Book3S HV: Unify dirty page map between HPT and radix) Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_64_mmu_radix.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 176f911ee983..7efc42538ccf 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -738,10 +738,10 @@ int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, gpa, shift); kvmppc_radix_tlbie_page(kvm, gpa, shift); if ((old & _PAGE_DIRTY) && memslot->dirty_bitmap) { - unsigned long npages = 1; + unsigned long psize = PAGE_SIZE; if (shift) - npages = 1ul << (shift - PAGE_SHIFT); - kvmppc_update_dirty_map(memslot, gfn, npages); + psize = 1ul << shift; + kvmppc_update_dirty_map(memslot, gfn, psize); } } return 0; From 8ecebf4d767e2307a946c8905278d6358eda35c3 Mon Sep 17 00:00:00 2001 From: Robbie Ko Date: Mon, 6 Aug 2018 10:30:30 +0800 Subject: [PATCH 002/269] Btrfs: fix unexpected failure of nocow buffered writes after snapshotting when low on space Commit e9894fd3e3b3 ("Btrfs: fix snapshot vs nocow writting") forced nocow writes to fallback to COW, during writeback, when a snapshot is created. This resulted in writes made before creating the snapshot to unexpectedly fail with ENOSPC during writeback when success (0) was returned to user space through the write system call. The steps leading to this problem are: 1. When it's not possible to allocate data space for a write, the buffered write path checks if a NOCOW write is possible. If it is, it will not reserve space and success (0) is returned to user space. 2. Then when a snapshot is created, the root's will_be_snapshotted atomic is incremented and writeback is triggered for all inode's that belong to the root being snapshotted. Incrementing that atomic forces all previous writes to fallback to COW during writeback (running delalloc). 3. This results in the writeback for the inodes to fail and therefore setting the ENOSPC error in their mappings, so that a subsequent fsync on them will report the error to user space. So it's not a completely silent data loss (since fsync will report ENOSPC) but it's a very unexpected and undesirable behaviour, because if a clean shutdown/unmount of the filesystem happens without previous calls to fsync, it is expected to have the data present in the files after mounting the filesystem again. So fix this by adding a new atomic named snapshot_force_cow to the root structure which prevents this behaviour and works the following way: 1. It is incremented when we start to create a snapshot after triggering writeback and before waiting for writeback to finish. 2. This new atomic is now what is used by writeback (running delalloc) to decide whether we need to fallback to COW or not. Because we incremented this new atomic after triggering writeback in the snapshot creation ioctl, we ensure that all buffered writes that happened before snapshot creation will succeed and not fallback to COW (which would make them fail with ENOSPC). 3. The existing atomic, will_be_snapshotted, is kept because it is used to force new buffered writes, that start after we started snapshotting, to reserve data space even when NOCOW is possible. This makes these writes fail early with ENOSPC when there's no available space to allocate, preventing the unexpected behaviour of writeback later failing with ENOSPC due to a fallback to COW mode. Fixes: e9894fd3e3b3 ("Btrfs: fix snapshot vs nocow writting") Signed-off-by: Robbie Ko Reviewed-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 1 + fs/btrfs/inode.c | 25 ++++--------------------- fs/btrfs/ioctl.c | 16 ++++++++++++++++ 4 files changed, 22 insertions(+), 21 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 318be7864072..a67cc190a84b 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1280,6 +1280,7 @@ struct btrfs_root { int send_in_progress; struct btrfs_subvolume_writers *subv_writers; atomic_t will_be_snapshotted; + atomic_t snapshot_force_cow; /* For qgroup metadata reserved space */ spinlock_t qgroup_meta_rsv_lock; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 5124c15705ce..05dc3c17cb62 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1187,6 +1187,7 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info, atomic_set(&root->log_batch, 0); refcount_set(&root->refs, 1); atomic_set(&root->will_be_snapshotted, 0); + atomic_set(&root->snapshot_force_cow, 0); root->log_transid = 0; root->log_transid_committed = -1; root->last_log_commit = 0; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3f51ddc18f98..c6d8c5d19ff0 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1271,7 +1271,7 @@ static noinline int run_delalloc_nocow(struct inode *inode, u64 disk_num_bytes; u64 ram_bytes; int extent_type; - int ret, err; + int ret; int type; int nocow; int check_prev = 1; @@ -1403,11 +1403,8 @@ next_slot: * if there are pending snapshots for this root, * we fall into common COW way. */ - if (!nolock) { - err = btrfs_start_write_no_snapshotting(root); - if (!err) - goto out_check; - } + if (!nolock && atomic_read(&root->snapshot_force_cow)) + goto out_check; /* * force cow if csum exists in the range. * this ensure that csum for a given extent are @@ -1416,9 +1413,6 @@ next_slot: ret = csum_exist_in_range(fs_info, disk_bytenr, num_bytes); if (ret) { - if (!nolock) - btrfs_end_write_no_snapshotting(root); - /* * ret could be -EIO if the above fails to read * metadata. @@ -1431,11 +1425,8 @@ next_slot: WARN_ON_ONCE(nolock); goto out_check; } - if (!btrfs_inc_nocow_writers(fs_info, disk_bytenr)) { - if (!nolock) - btrfs_end_write_no_snapshotting(root); + if (!btrfs_inc_nocow_writers(fs_info, disk_bytenr)) goto out_check; - } nocow = 1; } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { extent_end = found_key.offset + @@ -1448,8 +1439,6 @@ next_slot: out_check: if (extent_end <= start) { path->slots[0]++; - if (!nolock && nocow) - btrfs_end_write_no_snapshotting(root); if (nocow) btrfs_dec_nocow_writers(fs_info, disk_bytenr); goto next_slot; @@ -1471,8 +1460,6 @@ out_check: end, page_started, nr_written, 1, NULL); if (ret) { - if (!nolock && nocow) - btrfs_end_write_no_snapshotting(root); if (nocow) btrfs_dec_nocow_writers(fs_info, disk_bytenr); @@ -1492,8 +1479,6 @@ out_check: ram_bytes, BTRFS_COMPRESS_NONE, BTRFS_ORDERED_PREALLOC); if (IS_ERR(em)) { - if (!nolock && nocow) - btrfs_end_write_no_snapshotting(root); if (nocow) btrfs_dec_nocow_writers(fs_info, disk_bytenr); @@ -1532,8 +1517,6 @@ out_check: EXTENT_CLEAR_DATA_RESV, PAGE_UNLOCK | PAGE_SET_PRIVATE2); - if (!nolock && nocow) - btrfs_end_write_no_snapshotting(root); cur_offset = extent_end; /* diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index d3a5d2a41e5f..85c4284bb2cf 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -747,6 +747,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, struct btrfs_pending_snapshot *pending_snapshot; struct btrfs_trans_handle *trans; int ret; + bool snapshot_force_cow = false; if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state)) return -EINVAL; @@ -763,6 +764,11 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, goto free_pending; } + /* + * Force new buffered writes to reserve space even when NOCOW is + * possible. This is to avoid later writeback (running dealloc) to + * fallback to COW mode and unexpectedly fail with ENOSPC. + */ atomic_inc(&root->will_be_snapshotted); smp_mb__after_atomic(); /* wait for no snapshot writes */ @@ -773,6 +779,14 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, if (ret) goto dec_and_free; + /* + * All previous writes have started writeback in NOCOW mode, so now + * we force future writes to fallback to COW mode during snapshot + * creation. + */ + atomic_inc(&root->snapshot_force_cow); + snapshot_force_cow = true; + btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1); btrfs_init_block_rsv(&pending_snapshot->block_rsv, @@ -837,6 +851,8 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, fail: btrfs_subvolume_release_metadata(fs_info, &pending_snapshot->block_rsv); dec_and_free: + if (snapshot_force_cow) + atomic_dec(&root->snapshot_force_cow); if (atomic_dec_and_test(&root->will_be_snapshotted)) wake_up_var(&root->will_be_snapshotted); free_pending: From 46dec40fb741f00f1864580130779aeeaf24fb3d Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 20 Aug 2018 16:05:45 +1000 Subject: [PATCH 003/269] KVM: PPC: Book3S HV: Don't truncate HPTE index in xlate function This fixes a bug which causes guest virtual addresses to get translated to guest real addresses incorrectly when the guest is using the HPT MMU and has more than 256GB of RAM, or more specifically has a HPT larger than 2GB. This has showed up in testing as a failure of the host to emulate doorbell instructions correctly on POWER9 for HPT guests with more than 256GB of RAM. The bug is that the HPTE index in kvmppc_mmu_book3s_64_hv_xlate() is stored as an int, and in forming the HPTE address, the index gets shifted left 4 bits as an int before being signed-extended to 64 bits. The simple fix is to make the variable a long int, matching the return type of kvmppc_hv_find_lock_hpte(), which is what calculates the index. Fixes: 697d3899dcb4 ("KVM: PPC: Implement MMIO emulation support for Book3S HV guests") Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_64_mmu_hv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 7f3a8cf5d66f..4c08f42f6406 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -359,7 +359,7 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned long pp, key; unsigned long v, orig_v, gr; __be64 *hptep; - int index; + long int index; int virtmode = vcpu->arch.shregs.msr & (data ? MSR_DR : MSR_IR); if (kvm_is_radix(vcpu->kvm)) From d4682ba03ef618b6ef4be7cedc7aacaf505d3a58 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 11 Jun 2018 19:24:28 +0100 Subject: [PATCH 004/269] Btrfs: sync log after logging new name When we add a new name for an inode which was logged in the current transaction, we update the inode in the log so that its new name and ancestors are added to the log. However when we do this we do not persist the log, so the changes remain in memory only, and as a consequence, any ancestors that were created in the current transaction are updated such that future calls to btrfs_inode_in_log() return true. This leads to a subsequent fsync against such new ancestor directories returning immediately, without persisting the log, therefore after a power failure the new ancestor directories do not exist, despite fsync being called against them explicitly. Example: $ mkfs.btrfs -f /dev/sdb $ mount /dev/sdb /mnt $ mkdir /mnt/A $ mkdir /mnt/B $ mkdir /mnt/A/C $ touch /mnt/B/foo $ xfs_io -c "fsync" /mnt/B/foo $ ln /mnt/B/foo /mnt/A/C/foo $ xfs_io -c "fsync" /mnt/A After the power failure, directory "A" does not exist, despite the explicit fsync on it. Instead of fixing this by changing the behaviour of the explicit fsync on directory "A" to persist the log instead of doing nothing, make the logging of the new file name (which happens when creating a hard link or renaming) persist the log. This approach not only is simpler, not requiring addition of new fields to the inode in memory structure, but also gives us the same behaviour as ext4, xfs and f2fs (possibly other filesystems too). A test case for fstests follows soon. Fixes: 12fcfd22fe5b ("Btrfs: tree logging unlink/rename fixes") Reported-by: Vijay Chidambaram Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/inode.c | 92 +++++++++++++++++++++++++++++++++++++++------ fs/btrfs/tree-log.c | 48 ++++++++++++++++++++--- fs/btrfs/tree-log.h | 10 ++++- 3 files changed, 131 insertions(+), 19 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index c6d8c5d19ff0..cb09873ad270 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6634,6 +6634,8 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, drop_inode = 1; } else { struct dentry *parent = dentry->d_parent; + int ret; + err = btrfs_update_inode(trans, root, inode); if (err) goto fail; @@ -6647,7 +6649,12 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, goto fail; } d_instantiate(dentry, inode); - btrfs_log_new_name(trans, BTRFS_I(inode), NULL, parent); + ret = btrfs_log_new_name(trans, BTRFS_I(inode), NULL, parent, + true, NULL); + if (ret == BTRFS_NEED_TRANS_COMMIT) { + err = btrfs_commit_transaction(trans); + trans = NULL; + } } fail: @@ -9386,14 +9393,21 @@ static int btrfs_rename_exchange(struct inode *old_dir, u64 new_idx = 0; u64 root_objectid; int ret; - int ret2; bool root_log_pinned = false; bool dest_log_pinned = false; + struct btrfs_log_ctx ctx_root; + struct btrfs_log_ctx ctx_dest; + bool sync_log_root = false; + bool sync_log_dest = false; + bool commit_transaction = false; /* we only allow rename subvolume link between subvolumes */ if (old_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest) return -EXDEV; + btrfs_init_log_ctx(&ctx_root, old_inode); + btrfs_init_log_ctx(&ctx_dest, new_inode); + /* close the race window with snapshot create/destroy ioctl */ if (old_ino == BTRFS_FIRST_FREE_OBJECTID) down_read(&fs_info->subvol_sem); @@ -9540,15 +9554,29 @@ static int btrfs_rename_exchange(struct inode *old_dir, if (root_log_pinned) { parent = new_dentry->d_parent; - btrfs_log_new_name(trans, BTRFS_I(old_inode), BTRFS_I(old_dir), - parent); + ret = btrfs_log_new_name(trans, BTRFS_I(old_inode), + BTRFS_I(old_dir), parent, + false, &ctx_root); + if (ret == BTRFS_NEED_LOG_SYNC) + sync_log_root = true; + else if (ret == BTRFS_NEED_TRANS_COMMIT) + commit_transaction = true; + ret = 0; btrfs_end_log_trans(root); root_log_pinned = false; } if (dest_log_pinned) { - parent = old_dentry->d_parent; - btrfs_log_new_name(trans, BTRFS_I(new_inode), BTRFS_I(new_dir), - parent); + if (!commit_transaction) { + parent = old_dentry->d_parent; + ret = btrfs_log_new_name(trans, BTRFS_I(new_inode), + BTRFS_I(new_dir), parent, + false, &ctx_dest); + if (ret == BTRFS_NEED_LOG_SYNC) + sync_log_dest = true; + else if (ret == BTRFS_NEED_TRANS_COMMIT) + commit_transaction = true; + ret = 0; + } btrfs_end_log_trans(dest); dest_log_pinned = false; } @@ -9581,8 +9609,26 @@ out_fail: dest_log_pinned = false; } } - ret2 = btrfs_end_transaction(trans); - ret = ret ? ret : ret2; + if (!ret && sync_log_root && !commit_transaction) { + ret = btrfs_sync_log(trans, BTRFS_I(old_inode)->root, + &ctx_root); + if (ret) + commit_transaction = true; + } + if (!ret && sync_log_dest && !commit_transaction) { + ret = btrfs_sync_log(trans, BTRFS_I(new_inode)->root, + &ctx_dest); + if (ret) + commit_transaction = true; + } + if (commit_transaction) { + ret = btrfs_commit_transaction(trans); + } else { + int ret2; + + ret2 = btrfs_end_transaction(trans); + ret = ret ? ret : ret2; + } out_notrans: if (new_ino == BTRFS_FIRST_FREE_OBJECTID) up_read(&fs_info->subvol_sem); @@ -9659,6 +9705,9 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, int ret; u64 old_ino = btrfs_ino(BTRFS_I(old_inode)); bool log_pinned = false; + struct btrfs_log_ctx ctx; + bool sync_log = false; + bool commit_transaction = false; if (btrfs_ino(BTRFS_I(new_dir)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) return -EPERM; @@ -9816,8 +9865,15 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (log_pinned) { struct dentry *parent = new_dentry->d_parent; - btrfs_log_new_name(trans, BTRFS_I(old_inode), BTRFS_I(old_dir), - parent); + btrfs_init_log_ctx(&ctx, old_inode); + ret = btrfs_log_new_name(trans, BTRFS_I(old_inode), + BTRFS_I(old_dir), parent, + false, &ctx); + if (ret == BTRFS_NEED_LOG_SYNC) + sync_log = true; + else if (ret == BTRFS_NEED_TRANS_COMMIT) + commit_transaction = true; + ret = 0; btrfs_end_log_trans(root); log_pinned = false; } @@ -9854,7 +9910,19 @@ out_fail: btrfs_end_log_trans(root); log_pinned = false; } - btrfs_end_transaction(trans); + if (!ret && sync_log) { + ret = btrfs_sync_log(trans, BTRFS_I(old_inode)->root, &ctx); + if (ret) + commit_transaction = true; + } + if (commit_transaction) { + ret = btrfs_commit_transaction(trans); + } else { + int ret2; + + ret2 = btrfs_end_transaction(trans); + ret = ret ? ret : ret2; + } out_notrans: if (old_ino == BTRFS_FIRST_FREE_OBJECTID) up_read(&fs_info->subvol_sem); diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 1650dc44a5e3..3c2ae0e4f25a 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -6025,14 +6025,25 @@ void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans, * Call this after adding a new name for a file and it will properly * update the log to reflect the new name. * - * It will return zero if all goes well, and it will return 1 if a - * full transaction commit is required. + * @ctx can not be NULL when @sync_log is false, and should be NULL when it's + * true (because it's not used). + * + * Return value depends on whether @sync_log is true or false. + * When true: returns BTRFS_NEED_TRANS_COMMIT if the transaction needs to be + * committed by the caller, and BTRFS_DONT_NEED_TRANS_COMMIT + * otherwise. + * When false: returns BTRFS_DONT_NEED_LOG_SYNC if the caller does not need to + * to sync the log, BTRFS_NEED_LOG_SYNC if it needs to sync the log, + * or BTRFS_NEED_TRANS_COMMIT if the transaction needs to be + * committed (without attempting to sync the log). */ int btrfs_log_new_name(struct btrfs_trans_handle *trans, struct btrfs_inode *inode, struct btrfs_inode *old_dir, - struct dentry *parent) + struct dentry *parent, + bool sync_log, struct btrfs_log_ctx *ctx) { struct btrfs_fs_info *fs_info = trans->fs_info; + int ret; /* * this will force the logging code to walk the dentry chain @@ -6047,9 +6058,34 @@ int btrfs_log_new_name(struct btrfs_trans_handle *trans, */ if (inode->logged_trans <= fs_info->last_trans_committed && (!old_dir || old_dir->logged_trans <= fs_info->last_trans_committed)) - return 0; + return sync_log ? BTRFS_DONT_NEED_TRANS_COMMIT : + BTRFS_DONT_NEED_LOG_SYNC; - return btrfs_log_inode_parent(trans, inode, parent, 0, LLONG_MAX, - LOG_INODE_EXISTS, NULL); + if (sync_log) { + struct btrfs_log_ctx ctx2; + + btrfs_init_log_ctx(&ctx2, &inode->vfs_inode); + ret = btrfs_log_inode_parent(trans, inode, parent, 0, LLONG_MAX, + LOG_INODE_EXISTS, &ctx2); + if (ret == BTRFS_NO_LOG_SYNC) + return BTRFS_DONT_NEED_TRANS_COMMIT; + else if (ret) + return BTRFS_NEED_TRANS_COMMIT; + + ret = btrfs_sync_log(trans, inode->root, &ctx2); + if (ret) + return BTRFS_NEED_TRANS_COMMIT; + return BTRFS_DONT_NEED_TRANS_COMMIT; + } + + ASSERT(ctx); + ret = btrfs_log_inode_parent(trans, inode, parent, 0, LLONG_MAX, + LOG_INODE_EXISTS, ctx); + if (ret == BTRFS_NO_LOG_SYNC) + return BTRFS_DONT_NEED_LOG_SYNC; + else if (ret) + return BTRFS_NEED_TRANS_COMMIT; + + return BTRFS_NEED_LOG_SYNC; } diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h index 122e68b89a5a..7ab9bb88a639 100644 --- a/fs/btrfs/tree-log.h +++ b/fs/btrfs/tree-log.h @@ -71,8 +71,16 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans, int for_rename); void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans, struct btrfs_inode *dir); +/* Return values for btrfs_log_new_name() */ +enum { + BTRFS_DONT_NEED_TRANS_COMMIT, + BTRFS_NEED_TRANS_COMMIT, + BTRFS_DONT_NEED_LOG_SYNC, + BTRFS_NEED_LOG_SYNC, +}; int btrfs_log_new_name(struct btrfs_trans_handle *trans, struct btrfs_inode *inode, struct btrfs_inode *old_dir, - struct dentry *parent); + struct dentry *parent, + bool sync_log, struct btrfs_log_ctx *ctx); #endif From de02b9f6bb65a6a1848f346f7a3617b7a9b930c0 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 17 Aug 2018 09:38:59 +0100 Subject: [PATCH 005/269] Btrfs: fix data corruption when deduplicating between different files If we deduplicate extents between two different files we can end up corrupting data if the source range ends at the size of the source file, the source file's size is not aligned to the filesystem's block size and the destination range does not go past the size of the destination file size. Example: $ mkfs.btrfs -f /dev/sdb $ mount /dev/sdb /mnt $ xfs_io -f -c "pwrite -S 0x6b 0 2518890" /mnt/foo # The first byte with a value of 0xae starts at an offset (2518890) # which is not a multiple of the sector size. $ xfs_io -c "pwrite -S 0xae 2518890 102398" /mnt/foo # Confirm the file content is full of bytes with values 0x6b and 0xae. $ od -t x1 /mnt/foo 0000000 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b * 11467540 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b ae ae ae ae ae ae 11467560 ae ae ae ae ae ae ae ae ae ae ae ae ae ae ae ae * 11777540 ae ae ae ae ae ae ae ae 11777550 # Create a second file with a length not aligned to the sector size, # whose bytes all have the value 0x6b, so that its extent(s) can be # deduplicated with the first file. $ xfs_io -f -c "pwrite -S 0x6b 0 557771" /mnt/bar # Now deduplicate the entire second file into a range of the first file # that also has all bytes with the value 0x6b. The destination range's # end offset must not be aligned to the sector size and must be less # then the offset of the first byte with the value 0xae (byte at offset # 2518890). $ xfs_io -c "dedupe /mnt/bar 0 1957888 557771" /mnt/foo # The bytes in the range starting at offset 2515659 (end of the # deduplication range) and ending at offset 2519040 (start offset # rounded up to the block size) must all have the value 0xae (and not # replaced with 0x00 values). In other words, we should have exactly # the same data we had before we asked for deduplication. $ od -t x1 /mnt/foo 0000000 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b * 11467540 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b ae ae ae ae ae ae 11467560 ae ae ae ae ae ae ae ae ae ae ae ae ae ae ae ae * 11777540 ae ae ae ae ae ae ae ae 11777550 # Unmount the filesystem and mount it again. This guarantees any file # data in the page cache is dropped. $ umount /dev/sdb $ mount /dev/sdb /mnt $ od -t x1 /mnt/foo 0000000 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b * 11461300 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 00 00 00 00 00 11461320 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 11470000 ae ae ae ae ae ae ae ae ae ae ae ae ae ae ae ae * 11777540 ae ae ae ae ae ae ae ae 11777550 # The bytes in range 2515659 to 2519040 have a value of 0x00 and not a # value of 0xae, data corruption happened due to the deduplication # operation. So fix this by rounding down, to the sector size, the length used for the deduplication when the following conditions are met: 1) Source file's range ends at its i_size; 2) Source file's i_size is not aligned to the sector size; 3) Destination range does not cross the i_size of the destination file. Fixes: e1d227a42ea2 ("btrfs: Handle unaligned length in extent_same") CC: stable@vger.kernel.org # 4.2+ Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 85c4284bb2cf..011ddfcc96e2 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3469,6 +3469,25 @@ static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 olen, same_lock_start = min_t(u64, loff, dst_loff); same_lock_len = max_t(u64, loff, dst_loff) + len - same_lock_start; + } else { + /* + * If the source and destination inodes are different, the + * source's range end offset matches the source's i_size, that + * i_size is not a multiple of the sector size, and the + * destination range does not go past the destination's i_size, + * we must round down the length to the nearest sector size + * multiple. If we don't do this adjustment we end replacing + * with zeroes the bytes in the range that starts at the + * deduplication range's end offset and ends at the next sector + * size multiple. + */ + if (loff + olen == i_size_read(src) && + dst_loff + len < i_size_read(dst)) { + const u64 sz = BTRFS_I(src)->root->fs_info->sectorsize; + + len = round_down(i_size_read(src), sz) - loff; + olen = len; + } } again: From a5b7f4295eeae8b05ca91f6d145cd8773b08de9e Mon Sep 17 00:00:00 2001 From: Lu Fengqi Date: Thu, 9 Aug 2018 09:46:04 +0800 Subject: [PATCH 006/269] btrfs: fix qgroup_free wrong num_bytes in btrfs_subvolume_reserve_metadata After btrfs_qgroup_reserve_meta_prealloc(), num_bytes will be assigned again by btrfs_calc_trans_metadata_size(). Once block_rsv fails, we can't properly free the num_bytes of the previous qgroup_reserve. Use a separate variable to store the num_bytes of the qgroup_reserve. Delete the comment for the qgroup_reserved that does not exist and add a comment about use_global_rsv. Fixes: c4c129db5da8 ("btrfs: drop unused parameter qgroup_reserved") CC: stable@vger.kernel.org # 4.18+ Signed-off-by: Lu Fengqi Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index de6f75f5547b..2d9074295d7f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5800,7 +5800,7 @@ void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans) * root: the root of the parent directory * rsv: block reservation * items: the number of items that we need do reservation - * qgroup_reserved: used to return the reserved size in qgroup + * use_global_rsv: allow fallback to the global block reservation * * This function is used to reserve the space for snapshot/subvolume * creation and deletion. Those operations are different with the @@ -5810,10 +5810,10 @@ void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans) * the space reservation mechanism in start_transaction(). */ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root, - struct btrfs_block_rsv *rsv, - int items, + struct btrfs_block_rsv *rsv, int items, bool use_global_rsv) { + u64 qgroup_num_bytes = 0; u64 num_bytes; int ret; struct btrfs_fs_info *fs_info = root->fs_info; @@ -5821,12 +5821,11 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root, if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) { /* One for parent inode, two for dir entries */ - num_bytes = 3 * fs_info->nodesize; - ret = btrfs_qgroup_reserve_meta_prealloc(root, num_bytes, true); + qgroup_num_bytes = 3 * fs_info->nodesize; + ret = btrfs_qgroup_reserve_meta_prealloc(root, + qgroup_num_bytes, true); if (ret) return ret; - } else { - num_bytes = 0; } num_bytes = btrfs_calc_trans_metadata_size(fs_info, items); @@ -5838,8 +5837,8 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root, if (ret == -ENOSPC && use_global_rsv) ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes, 1); - if (ret && num_bytes) - btrfs_qgroup_free_meta_prealloc(root, num_bytes); + if (ret && qgroup_num_bytes) + btrfs_qgroup_free_meta_prealloc(root, qgroup_num_bytes); return ret; } From 801660b040d132f67fac6a95910ad307c5929b49 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Mon, 6 Aug 2018 18:12:37 +0800 Subject: [PATCH 007/269] btrfs: btrfs_shrink_device should call commit transaction at the end Test case btrfs/164 reports use-after-free: [ 6712.084324] general protection fault: 0000 [#1] PREEMPT SMP .. [ 6712.195423] btrfs_update_commit_device_size+0x75/0xf0 [btrfs] [ 6712.201424] btrfs_commit_transaction+0x57d/0xa90 [btrfs] [ 6712.206999] btrfs_rm_device+0x627/0x850 [btrfs] [ 6712.211800] btrfs_ioctl+0x2b03/0x3120 [btrfs] Reason for this is that btrfs_shrink_device adds the resized device to the fs_devices::resized_devices after it has called the last commit transaction. So the list fs_devices::resized_devices is not empty when btrfs_shrink_device returns. Now the parent function btrfs_rm_device calls: btrfs_close_bdev(device); call_rcu(&device->rcu, free_device_rcu); and then does the transactio ncommit. It goes through the fs_devices::resized_devices in btrfs_update_commit_device_size and leads to use-after-free. Fix this by making sure btrfs_shrink_device calls the last needed btrfs_commit_transaction before the return. This is consistent with what the grow counterpart does and this makes sure the on-disk state is persistent when the function returns. Reported-by: Lu Fengqi Tested-by: Lu Fengqi Signed-off-by: Anand Jain Reviewed-by: David Sterba [ update changelog ] Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index da86706123ff..f4405e430da6 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -4491,7 +4491,12 @@ again: /* Now btrfs_update_device() will change the on-disk size. */ ret = btrfs_update_device(trans, device); - btrfs_end_transaction(trans); + if (ret < 0) { + btrfs_abort_transaction(trans, ret); + btrfs_end_transaction(trans); + } else { + ret = btrfs_commit_transaction(trans); + } done: btrfs_free_path(path); if (ret) { From b9b8a41adeff5666b402996020b698504c927353 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 20 Aug 2018 11:25:33 +0300 Subject: [PATCH 008/269] btrfs: use after free in btrfs_quota_enable The issue here is that btrfs_commit_transaction() frees "trans" on both the error and the success path. So the problem would be if btrfs_commit_transaction() succeeds, and then qgroup_rescan_init() fails. That means that "ret" is non-zero and "trans" is non-NULL and it leads to a use after free inside the btrfs_end_transaction() macro. Fixes: 340f1aa27f36 ("btrfs: qgroups: Move transaction management inside btrfs_quota_enable/disable") Signed-off-by: Dan Carpenter Reviewed-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/qgroup.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 4353bb69bb86..d4917c0cddf5 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1019,10 +1019,9 @@ out_add_root: spin_unlock(&fs_info->qgroup_lock); ret = btrfs_commit_transaction(trans); - if (ret) { - trans = NULL; + trans = NULL; + if (ret) goto out_free_path; - } ret = qgroup_rescan_init(fs_info, 0, 1); if (!ret) { From 444c8263151afc06c01ac8ddcd1204624a7d4bb3 Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Tue, 21 Aug 2018 14:03:04 +0000 Subject: [PATCH 009/269] netfilter: conntrack: remove duplicated include from nf_conntrack_proto_udp.c Remove duplicated include. Fixes: c779e849608a ("netfilter: conntrack: remove get_timeout() indirection") Signed-off-by: Yue Haibing Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_proto_udp.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 7a1b8988a931..9272a2c525a8 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -393,4 +393,3 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 = }; EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udplite6); #endif -#include From c1dc2912059901f97345d9e10c96b841215fdc0f Mon Sep 17 00:00:00 2001 From: Martin Willi Date: Wed, 22 Aug 2018 10:27:17 +0200 Subject: [PATCH 010/269] netfilter: xt_cluster: add dependency on conntrack module The cluster match requires conntrack for matching packets. If the netns does not have conntrack hooks registered, the match does not work at all. Implicitly load the conntrack hook for the family, exactly as many other extensions do. This ensures that the match works even if the hooks have not been registered by other means. Signed-off-by: Martin Willi Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_cluster.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c index dfbdbb2fc0ed..51d0c257e7a5 100644 --- a/net/netfilter/xt_cluster.c +++ b/net/netfilter/xt_cluster.c @@ -125,6 +125,7 @@ xt_cluster_mt(const struct sk_buff *skb, struct xt_action_param *par) static int xt_cluster_mt_checkentry(const struct xt_mtchk_param *par) { struct xt_cluster_match_info *info = par->matchinfo; + int ret; if (info->total_nodes > XT_CLUSTER_NODES_MAX) { pr_info_ratelimited("you have exceeded the maximum number of cluster nodes (%u > %u)\n", @@ -135,7 +136,17 @@ static int xt_cluster_mt_checkentry(const struct xt_mtchk_param *par) pr_info_ratelimited("node mask cannot exceed total number of nodes\n"); return -EDOM; } - return 0; + + ret = nf_ct_netns_get(par->net, par->family); + if (ret < 0) + pr_info_ratelimited("cannot load conntrack support for proto=%u\n", + par->family); + return ret; +} + +static void xt_cluster_mt_destroy(const struct xt_mtdtor_param *par) +{ + nf_ct_netns_put(par->net, par->family); } static struct xt_match xt_cluster_match __read_mostly = { @@ -144,6 +155,7 @@ static struct xt_match xt_cluster_match __read_mostly = { .match = xt_cluster_mt, .checkentry = xt_cluster_mt_checkentry, .matchsize = sizeof(struct xt_cluster_match_info), + .destroy = xt_cluster_mt_destroy, .me = THIS_MODULE, }; From 10568f6c5761db24249c610c94d6e44d5505a0ba Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 22 Aug 2018 11:33:27 +0200 Subject: [PATCH 011/269] netfilter: xt_checksum: ignore gso skbs Satish Patel reports a skb_warn_bad_offload() splat caused by -j CHECKSUM rules: -A POSTROUTING -p tcp -m tcp --sport 80 -j CHECKSUM The CHECKSUM target has never worked with GSO skbs, and the above rule makes no sense as kernel will handle checksum updates on transmit. Unfortunately, there are 3rd party tools that install such rules, so we cannot reject this from the config plane without potential breakage. Amend Kconfig text to clarify that the CHECKSUM target is only useful in virtualized environments, where old dhcp clients that use AF_PACKET used to discard UDP packets with a 'bad' header checksum and add a one-time warning in case such rule isn't restricted to UDP. v2: check IP6T_F_PROTO flag before cmp (Michal Kubecek) Reported-by: Satish Patel Reported-by: Markos Chandras Reported-by: Michal Kubecek Signed-off-by: Florian Westphal Reviewed-by: Michal Kubecek Signed-off-by: Pablo Neira Ayuso --- net/netfilter/Kconfig | 12 ++++++------ net/netfilter/xt_CHECKSUM.c | 22 +++++++++++++++++++++- 2 files changed, 27 insertions(+), 7 deletions(-) diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 71709c104081..f61c306de1d0 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -771,13 +771,13 @@ config NETFILTER_XT_TARGET_CHECKSUM depends on NETFILTER_ADVANCED ---help--- This option adds a `CHECKSUM' target, which can be used in the iptables mangle - table. + table to work around buggy DHCP clients in virtualized environments. - You can use this target to compute and fill in the checksum in - a packet that lacks a checksum. This is particularly useful, - if you need to work around old applications such as dhcp clients, - that do not work well with checksum offloads, but don't want to disable - checksum offload in your device. + Some old DHCP clients drop packets because they are not aware + that the checksum would normally be offloaded to hardware and + thus should be considered valid. + This target can be used to fill in the checksum using iptables + when such packets are sent via a virtual network device. To compile it as a module, choose M here. If unsure, say N. diff --git a/net/netfilter/xt_CHECKSUM.c b/net/netfilter/xt_CHECKSUM.c index 9f4151ec3e06..6c7aa6a0a0d2 100644 --- a/net/netfilter/xt_CHECKSUM.c +++ b/net/netfilter/xt_CHECKSUM.c @@ -16,6 +16,9 @@ #include #include +#include +#include + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Michael S. Tsirkin "); MODULE_DESCRIPTION("Xtables: checksum modification"); @@ -25,7 +28,7 @@ MODULE_ALIAS("ip6t_CHECKSUM"); static unsigned int checksum_tg(struct sk_buff *skb, const struct xt_action_param *par) { - if (skb->ip_summed == CHECKSUM_PARTIAL) + if (skb->ip_summed == CHECKSUM_PARTIAL && !skb_is_gso(skb)) skb_checksum_help(skb); return XT_CONTINUE; @@ -34,6 +37,8 @@ checksum_tg(struct sk_buff *skb, const struct xt_action_param *par) static int checksum_tg_check(const struct xt_tgchk_param *par) { const struct xt_CHECKSUM_info *einfo = par->targinfo; + const struct ip6t_ip6 *i6 = par->entryinfo; + const struct ipt_ip *i4 = par->entryinfo; if (einfo->operation & ~XT_CHECKSUM_OP_FILL) { pr_info_ratelimited("unsupported CHECKSUM operation %x\n", @@ -43,6 +48,21 @@ static int checksum_tg_check(const struct xt_tgchk_param *par) if (!einfo->operation) return -EINVAL; + switch (par->family) { + case NFPROTO_IPV4: + if (i4->proto == IPPROTO_UDP && + (i4->invflags & XT_INV_PROTO) == 0) + return 0; + break; + case NFPROTO_IPV6: + if ((i6->flags & IP6T_F_PROTO) && + i6->proto == IPPROTO_UDP && + (i6->invflags & XT_INV_PROTO) == 0) + return 0; + break; + } + + pr_warn_once("CHECKSUM should be avoided. If really needed, restrict with \"-p udp\" and only use in OUTPUT\n"); return 0; } From b6fdfbff078975c53383fc146a2a54985eab6b6d Mon Sep 17 00:00:00 2001 From: Misono Tomohiro Date: Fri, 24 Aug 2018 11:35:28 +0900 Subject: [PATCH 012/269] btrfs: Fix suspicious RCU usage warning in btrfs_debug_in_rcu Commit 672d599041c8 ("btrfs: Use wrapper macro for rcu string to remove duplicate code") replaces some open coded RCU string handling with macro. It turns out that btrfs_debug_in_rcu() is used for the first time and the macro lacks lock/unlock of RCU string for non-debug case (i.e. when the message is not printed), leading to suspicious RCU usage warning when CONFIG_PROVE_RCU is on. Fix this by adding a wrapper to call lock/unlock for the non-debug case too. Fixes: 672d599041c8 ("btrfs: Use wrapper macro for rcu string to remove duplicate code") Reported-by: David Howells Tested-by: David Howells Signed-off-by: Misono Tomohiro Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index a67cc190a84b..0b856da2fd3b 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3390,9 +3390,9 @@ do { \ #define btrfs_debug(fs_info, fmt, args...) \ btrfs_no_printk(fs_info, KERN_DEBUG fmt, ##args) #define btrfs_debug_in_rcu(fs_info, fmt, args...) \ - btrfs_no_printk(fs_info, KERN_DEBUG fmt, ##args) + btrfs_no_printk_in_rcu(fs_info, KERN_DEBUG fmt, ##args) #define btrfs_debug_rl_in_rcu(fs_info, fmt, args...) \ - btrfs_no_printk(fs_info, KERN_DEBUG fmt, ##args) + btrfs_no_printk_in_rcu(fs_info, KERN_DEBUG fmt, ##args) #define btrfs_debug_rl(fs_info, fmt, args...) \ btrfs_no_printk(fs_info, KERN_DEBUG fmt, ##args) #endif @@ -3404,6 +3404,13 @@ do { \ rcu_read_unlock(); \ } while (0) +#define btrfs_no_printk_in_rcu(fs_info, fmt, args...) \ +do { \ + rcu_read_lock(); \ + btrfs_no_printk(fs_info, fmt, ##args); \ + rcu_read_unlock(); \ +} while (0) + #define btrfs_printk_ratelimited(fs_info, fmt, args...) \ do { \ static DEFINE_RATELIMIT_STATE(_rs, \ From d177c8b61d6b4ef360b1c2682e4d8e3bae01738b Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Thu, 26 Jul 2018 12:48:05 +0800 Subject: [PATCH 013/269] arm64: allwinner: dts: h6: fix Pine H64 MMC bus width Currently the enabled MMC controllers on Pine H64 do not have bus-width set, which make them fall back to 1-bit mode and become quite slow. Fix this by add the corresponding bus-width properties. Fixes: ecbd611882a1 ("arm64: allwinner: h6: enable MMC0/2 on Pine H64") Signed-off-by: Icenowy Zheng Signed-off-by: Maxime Ripard --- arch/arm64/boot/dts/allwinner/sun50i-h6-pine-h64.dts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6-pine-h64.dts b/arch/arm64/boot/dts/allwinner/sun50i-h6-pine-h64.dts index ceffc40810ee..48daec7f78ba 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h6-pine-h64.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h6-pine-h64.dts @@ -46,6 +46,7 @@ pinctrl-0 = <&mmc0_pins>; vmmc-supply = <®_cldo1>; cd-gpios = <&pio 5 6 GPIO_ACTIVE_LOW>; + bus-width = <4>; status = "okay"; }; @@ -56,6 +57,7 @@ vqmmc-supply = <®_bldo2>; non-removable; cap-mmc-hw-reset; + bus-width = <8>; status = "okay"; }; From 4051c323c59b535cfb7dc10b349544b4ad49c21e Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Thu, 2 Aug 2018 11:41:07 +0300 Subject: [PATCH 014/269] ARC: configs: cleanup - Remove CONFIG_DEFAULT_HOSTNAME from defconfigs There's no reason to set the same hostname to all ARC boards by default. It usually gets overwritten by init scripts anyways. - Remove disabled CONFIG_DEVKMEM from defconfigs It is disabled by default Signed-off-by: Alexey Brodkin Signed-off-by: Vineet Gupta --- arch/arc/configs/axs101_defconfig | 2 -- arch/arc/configs/axs103_defconfig | 2 -- arch/arc/configs/axs103_smp_defconfig | 2 -- arch/arc/configs/haps_hs_defconfig | 2 -- arch/arc/configs/haps_hs_smp_defconfig | 2 -- arch/arc/configs/hsdk_defconfig | 1 - arch/arc/configs/nps_defconfig | 1 - arch/arc/configs/nsim_700_defconfig | 2 -- arch/arc/configs/nsim_hs_defconfig | 2 -- arch/arc/configs/nsim_hs_smp_defconfig | 2 -- arch/arc/configs/nsimosci_defconfig | 2 -- arch/arc/configs/nsimosci_hs_defconfig | 2 -- arch/arc/configs/nsimosci_hs_smp_defconfig | 2 -- arch/arc/configs/tb10x_defconfig | 1 - arch/arc/configs/vdk_hs38_defconfig | 2 -- arch/arc/configs/vdk_hs38_smp_defconfig | 1 - 16 files changed, 28 deletions(-) diff --git a/arch/arc/configs/axs101_defconfig b/arch/arc/configs/axs101_defconfig index a635ea972304..41a97eb7598d 100644 --- a/arch/arc/configs/axs101_defconfig +++ b/arch/arc/configs/axs101_defconfig @@ -1,4 +1,3 @@ -CONFIG_DEFAULT_HOSTNAME="ARCLinux" # CONFIG_SWAP is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y @@ -63,7 +62,6 @@ CONFIG_MOUSE_PS2_TOUCHKIT=y CONFIG_MOUSE_SERIAL=y CONFIG_MOUSE_SYNAPTICS_USB=y # CONFIG_LEGACY_PTYS is not set -# CONFIG_DEVKMEM is not set CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_DW=y diff --git a/arch/arc/configs/axs103_defconfig b/arch/arc/configs/axs103_defconfig index aa507e423075..d8e2ca2385cc 100644 --- a/arch/arc/configs/axs103_defconfig +++ b/arch/arc/configs/axs103_defconfig @@ -1,4 +1,3 @@ -CONFIG_DEFAULT_HOSTNAME="ARCLinux" # CONFIG_SWAP is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y @@ -64,7 +63,6 @@ CONFIG_MOUSE_PS2_TOUCHKIT=y CONFIG_MOUSE_SERIAL=y CONFIG_MOUSE_SYNAPTICS_USB=y # CONFIG_LEGACY_PTYS is not set -# CONFIG_DEVKMEM is not set CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_DW=y diff --git a/arch/arc/configs/axs103_smp_defconfig b/arch/arc/configs/axs103_smp_defconfig index eba07f468654..1e729b9726cd 100644 --- a/arch/arc/configs/axs103_smp_defconfig +++ b/arch/arc/configs/axs103_smp_defconfig @@ -1,4 +1,3 @@ -CONFIG_DEFAULT_HOSTNAME="ARCLinux" # CONFIG_SWAP is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y @@ -65,7 +64,6 @@ CONFIG_MOUSE_PS2_TOUCHKIT=y CONFIG_MOUSE_SERIAL=y CONFIG_MOUSE_SYNAPTICS_USB=y # CONFIG_LEGACY_PTYS is not set -# CONFIG_DEVKMEM is not set CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_DW=y diff --git a/arch/arc/configs/haps_hs_defconfig b/arch/arc/configs/haps_hs_defconfig index 098b19fbaa51..240dd2cd5148 100644 --- a/arch/arc/configs/haps_hs_defconfig +++ b/arch/arc/configs/haps_hs_defconfig @@ -1,4 +1,3 @@ -CONFIG_DEFAULT_HOSTNAME="ARCLinux" # CONFIG_SWAP is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y @@ -57,7 +56,6 @@ CONFIG_MOUSE_PS2_TOUCHKIT=y # CONFIG_SERIO_SERPORT is not set CONFIG_SERIO_ARC_PS2=y # CONFIG_LEGACY_PTYS is not set -# CONFIG_DEVKMEM is not set CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_NR_UARTS=1 diff --git a/arch/arc/configs/haps_hs_smp_defconfig b/arch/arc/configs/haps_hs_smp_defconfig index 0104c404d897..14ae7e5acc7c 100644 --- a/arch/arc/configs/haps_hs_smp_defconfig +++ b/arch/arc/configs/haps_hs_smp_defconfig @@ -1,4 +1,3 @@ -CONFIG_DEFAULT_HOSTNAME="ARCLinux" # CONFIG_SWAP is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y @@ -60,7 +59,6 @@ CONFIG_MOUSE_PS2_TOUCHKIT=y # CONFIG_SERIO_SERPORT is not set CONFIG_SERIO_ARC_PS2=y # CONFIG_LEGACY_PTYS is not set -# CONFIG_DEVKMEM is not set CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_NR_UARTS=1 diff --git a/arch/arc/configs/hsdk_defconfig b/arch/arc/configs/hsdk_defconfig index 6491be0ddbc9..1dec2b4bc5e6 100644 --- a/arch/arc/configs/hsdk_defconfig +++ b/arch/arc/configs/hsdk_defconfig @@ -1,4 +1,3 @@ -CONFIG_DEFAULT_HOSTNAME="ARCLinux" CONFIG_SYSVIPC=y # CONFIG_CROSS_MEMORY_ATTACH is not set CONFIG_NO_HZ_IDLE=y diff --git a/arch/arc/configs/nps_defconfig b/arch/arc/configs/nps_defconfig index 7c9c706ae7f6..31ba224bbfb4 100644 --- a/arch/arc/configs/nps_defconfig +++ b/arch/arc/configs/nps_defconfig @@ -59,7 +59,6 @@ CONFIG_NETCONSOLE=y # CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO is not set # CONFIG_LEGACY_PTYS is not set -# CONFIG_DEVKMEM is not set CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_NR_UARTS=1 diff --git a/arch/arc/configs/nsim_700_defconfig b/arch/arc/configs/nsim_700_defconfig index 99e05cf63fca..8e0b8b134cd9 100644 --- a/arch/arc/configs/nsim_700_defconfig +++ b/arch/arc/configs/nsim_700_defconfig @@ -1,5 +1,4 @@ # CONFIG_LOCALVERSION_AUTO is not set -CONFIG_DEFAULT_HOSTNAME="ARCLinux" # CONFIG_SWAP is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y @@ -44,7 +43,6 @@ CONFIG_LXT_PHY=y # CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO is not set # CONFIG_LEGACY_PTYS is not set -# CONFIG_DEVKMEM is not set CONFIG_SERIAL_ARC=y CONFIG_SERIAL_ARC_CONSOLE=y # CONFIG_HW_RANDOM is not set diff --git a/arch/arc/configs/nsim_hs_defconfig b/arch/arc/configs/nsim_hs_defconfig index 0dc4f9b737e7..739b90e5e893 100644 --- a/arch/arc/configs/nsim_hs_defconfig +++ b/arch/arc/configs/nsim_hs_defconfig @@ -1,5 +1,4 @@ # CONFIG_LOCALVERSION_AUTO is not set -CONFIG_DEFAULT_HOSTNAME="ARCLinux" # CONFIG_SWAP is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y @@ -45,7 +44,6 @@ CONFIG_DEVTMPFS=y # CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO is not set # CONFIG_LEGACY_PTYS is not set -# CONFIG_DEVKMEM is not set CONFIG_SERIAL_ARC=y CONFIG_SERIAL_ARC_CONSOLE=y # CONFIG_HW_RANDOM is not set diff --git a/arch/arc/configs/nsim_hs_smp_defconfig b/arch/arc/configs/nsim_hs_smp_defconfig index be3c30a15e54..b5895bdf3a93 100644 --- a/arch/arc/configs/nsim_hs_smp_defconfig +++ b/arch/arc/configs/nsim_hs_smp_defconfig @@ -1,5 +1,4 @@ # CONFIG_LOCALVERSION_AUTO is not set -CONFIG_DEFAULT_HOSTNAME="ARCLinux" # CONFIG_SWAP is not set # CONFIG_CROSS_MEMORY_ATTACH is not set CONFIG_HIGH_RES_TIMERS=y @@ -44,7 +43,6 @@ CONFIG_DEVTMPFS=y # CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO is not set # CONFIG_LEGACY_PTYS is not set -# CONFIG_DEVKMEM is not set CONFIG_SERIAL_ARC=y CONFIG_SERIAL_ARC_CONSOLE=y # CONFIG_HW_RANDOM is not set diff --git a/arch/arc/configs/nsimosci_defconfig b/arch/arc/configs/nsimosci_defconfig index 3a74b9b21772..f14eeff7d308 100644 --- a/arch/arc/configs/nsimosci_defconfig +++ b/arch/arc/configs/nsimosci_defconfig @@ -1,5 +1,4 @@ # CONFIG_LOCALVERSION_AUTO is not set -CONFIG_DEFAULT_HOSTNAME="ARCLinux" # CONFIG_SWAP is not set CONFIG_SYSVIPC=y # CONFIG_CROSS_MEMORY_ATTACH is not set @@ -48,7 +47,6 @@ CONFIG_MOUSE_PS2_TOUCHKIT=y # CONFIG_SERIO_SERPORT is not set CONFIG_SERIO_ARC_PS2=y # CONFIG_LEGACY_PTYS is not set -# CONFIG_DEVKMEM is not set CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_NR_UARTS=1 diff --git a/arch/arc/configs/nsimosci_hs_defconfig b/arch/arc/configs/nsimosci_hs_defconfig index ea2834b4dc1d..025298a48305 100644 --- a/arch/arc/configs/nsimosci_hs_defconfig +++ b/arch/arc/configs/nsimosci_hs_defconfig @@ -1,5 +1,4 @@ # CONFIG_LOCALVERSION_AUTO is not set -CONFIG_DEFAULT_HOSTNAME="ARCLinux" # CONFIG_SWAP is not set CONFIG_SYSVIPC=y # CONFIG_CROSS_MEMORY_ATTACH is not set @@ -47,7 +46,6 @@ CONFIG_MOUSE_PS2_TOUCHKIT=y # CONFIG_SERIO_SERPORT is not set CONFIG_SERIO_ARC_PS2=y # CONFIG_LEGACY_PTYS is not set -# CONFIG_DEVKMEM is not set CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_NR_UARTS=1 diff --git a/arch/arc/configs/nsimosci_hs_smp_defconfig b/arch/arc/configs/nsimosci_hs_smp_defconfig index 80a5a1b4924b..df7b77b13b82 100644 --- a/arch/arc/configs/nsimosci_hs_smp_defconfig +++ b/arch/arc/configs/nsimosci_hs_smp_defconfig @@ -1,4 +1,3 @@ -CONFIG_DEFAULT_HOSTNAME="ARCLinux" # CONFIG_SWAP is not set CONFIG_SYSVIPC=y # CONFIG_CROSS_MEMORY_ATTACH is not set @@ -58,7 +57,6 @@ CONFIG_MOUSE_PS2_TOUCHKIT=y # CONFIG_SERIO_SERPORT is not set CONFIG_SERIO_ARC_PS2=y # CONFIG_LEGACY_PTYS is not set -# CONFIG_DEVKMEM is not set CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_NR_UARTS=1 diff --git a/arch/arc/configs/tb10x_defconfig b/arch/arc/configs/tb10x_defconfig index 2cc87f909747..a7f65313f84a 100644 --- a/arch/arc/configs/tb10x_defconfig +++ b/arch/arc/configs/tb10x_defconfig @@ -57,7 +57,6 @@ CONFIG_STMMAC_ETH=y # CONFIG_SERIO is not set # CONFIG_VT is not set # CONFIG_LEGACY_PTYS is not set -# CONFIG_DEVKMEM is not set CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_NR_UARTS=1 diff --git a/arch/arc/configs/vdk_hs38_defconfig b/arch/arc/configs/vdk_hs38_defconfig index f629493929ea..db47c3541f15 100644 --- a/arch/arc/configs/vdk_hs38_defconfig +++ b/arch/arc/configs/vdk_hs38_defconfig @@ -1,5 +1,4 @@ # CONFIG_LOCALVERSION_AUTO is not set -CONFIG_DEFAULT_HOSTNAME="ARCLinux" # CONFIG_CROSS_MEMORY_ATTACH is not set CONFIG_HIGH_RES_TIMERS=y CONFIG_IKCONFIG=y @@ -53,7 +52,6 @@ CONFIG_NATIONAL_PHY=y CONFIG_MOUSE_PS2_TOUCHKIT=y CONFIG_SERIO_ARC_PS2=y # CONFIG_LEGACY_PTYS is not set -# CONFIG_DEVKMEM is not set CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_DW=y diff --git a/arch/arc/configs/vdk_hs38_smp_defconfig b/arch/arc/configs/vdk_hs38_smp_defconfig index 21f0ca26a05d..a8ac5e917d9a 100644 --- a/arch/arc/configs/vdk_hs38_smp_defconfig +++ b/arch/arc/configs/vdk_hs38_smp_defconfig @@ -1,5 +1,4 @@ # CONFIG_LOCALVERSION_AUTO is not set -CONFIG_DEFAULT_HOSTNAME="ARCLinux" # CONFIG_CROSS_MEMORY_ATTACH is not set CONFIG_HIGH_RES_TIMERS=y CONFIG_IKCONFIG=y From 5c0920897af59779546e9ea0e89c5db45c8aff33 Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Thu, 2 Aug 2018 13:19:37 +0300 Subject: [PATCH 015/269] ARC: [plat-axs*/plat-hsdk]: Allow U-Boot to pass MAC-address to the kernel Otherwise kernel uses random MAC which is not very conveniet. With that change in place use might set desired MAC in U-Boot with "setenv ethaddr 11:22:33:44:55:66", save environment and then from boot to boot the same MAC will be used by the kernel. One other note for this to happen it's required to pass board's .dtb in U-Boot's "bootm" command like that: ------------------->8----------------- bootm 0x82000000 - 0x84000000 ------------------->8----------------- Here 0x82000000 is location of uImage while 0x80000000 is location of either axs10x.dtb or hsdk.dtb previously loaded from SD-card, USB storage or TFTP server. Signed-off-by: Alexey Brodkin Cc: Rob Herring Cc: stable@vger.kernel.org # 4.14 Cc: devicetree@vger.kernel.org Signed-off-by: Vineet Gupta --- arch/arc/boot/dts/axs10x_mb.dtsi | 7 ++++++- arch/arc/boot/dts/hsdk.dts | 7 ++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/arch/arc/boot/dts/axs10x_mb.dtsi b/arch/arc/boot/dts/axs10x_mb.dtsi index 47b74fbc403c..37bafd44e36d 100644 --- a/arch/arc/boot/dts/axs10x_mb.dtsi +++ b/arch/arc/boot/dts/axs10x_mb.dtsi @@ -9,6 +9,10 @@ */ / { + aliases { + ethernet = &gmac; + }; + axs10x_mb { compatible = "simple-bus"; #address-cells = <1>; @@ -68,7 +72,7 @@ }; }; - ethernet@0x18000 { + gmac: ethernet@0x18000 { #interrupt-cells = <1>; compatible = "snps,dwmac"; reg = < 0x18000 0x2000 >; @@ -81,6 +85,7 @@ max-speed = <100>; resets = <&creg_rst 5>; reset-names = "stmmaceth"; + mac-address = [00 00 00 00 00 00]; /* Filled in by U-Boot */ }; ehci@0x40000 { diff --git a/arch/arc/boot/dts/hsdk.dts b/arch/arc/boot/dts/hsdk.dts index 006aa3de5348..d00f283094d3 100644 --- a/arch/arc/boot/dts/hsdk.dts +++ b/arch/arc/boot/dts/hsdk.dts @@ -25,6 +25,10 @@ bootargs = "earlycon=uart8250,mmio32,0xf0005000,115200n8 console=ttyS0,115200n8 debug print-fatal-signals=1"; }; + aliases { + ethernet = &gmac; + }; + cpus { #address-cells = <1>; #size-cells = <0>; @@ -163,7 +167,7 @@ #clock-cells = <0>; }; - ethernet@8000 { + gmac: ethernet@8000 { #interrupt-cells = <1>; compatible = "snps,dwmac"; reg = <0x8000 0x2000>; @@ -176,6 +180,7 @@ phy-handle = <&phy0>; resets = <&cgu_rst HSDK_ETH_RESET>; reset-names = "stmmaceth"; + mac-address = [00 00 00 00 00 00]; /* Filled in by U-Boot */ mdio { #address-cells = <1>; From c83532fb0fe053d2e43e9387354cb1b52ba26427 Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Thu, 2 Aug 2018 11:50:16 +0300 Subject: [PATCH 016/269] ARC: [plat-axs*]: Enable SWAP SWAP support on ARC was fixed earlier by commit 6e3761145a9b ("ARC: Fix CONFIG_SWAP") so now we may safely enable it on platforms that have external media like USB and SD-card. Note: it was already allowed for HSDK Signed-off-by: Alexey Brodkin Cc: stable@vger.kernel.org # 6e3761145a9b: ARC: Fix CONFIG_SWAP Signed-off-by: Vineet Gupta --- arch/arc/configs/axs101_defconfig | 1 - arch/arc/configs/axs103_defconfig | 1 - arch/arc/configs/axs103_smp_defconfig | 1 - 3 files changed, 3 deletions(-) diff --git a/arch/arc/configs/axs101_defconfig b/arch/arc/configs/axs101_defconfig index 41a97eb7598d..41bc08be6a3b 100644 --- a/arch/arc/configs/axs101_defconfig +++ b/arch/arc/configs/axs101_defconfig @@ -1,4 +1,3 @@ -# CONFIG_SWAP is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y # CONFIG_CROSS_MEMORY_ATTACH is not set diff --git a/arch/arc/configs/axs103_defconfig b/arch/arc/configs/axs103_defconfig index d8e2ca2385cc..1e1c4a8011b5 100644 --- a/arch/arc/configs/axs103_defconfig +++ b/arch/arc/configs/axs103_defconfig @@ -1,4 +1,3 @@ -# CONFIG_SWAP is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y # CONFIG_CROSS_MEMORY_ATTACH is not set diff --git a/arch/arc/configs/axs103_smp_defconfig b/arch/arc/configs/axs103_smp_defconfig index 1e729b9726cd..6b0c0cfd5c30 100644 --- a/arch/arc/configs/axs103_smp_defconfig +++ b/arch/arc/configs/axs103_smp_defconfig @@ -1,4 +1,3 @@ -# CONFIG_SWAP is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y # CONFIG_CROSS_MEMORY_ATTACH is not set From 1e3bece2ded71eb85ac297a43002a942964e381d Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Mon, 6 Aug 2018 19:44:23 +0300 Subject: [PATCH 017/269] ARC: cleanup show_faulting_vma() - Remove unused variables - check return value of file_path Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta --- arch/arc/kernel/troubleshoot.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c index 783b20354f8b..e8d9fb452346 100644 --- a/arch/arc/kernel/troubleshoot.c +++ b/arch/arc/kernel/troubleshoot.c @@ -83,9 +83,6 @@ done: static void show_faulting_vma(unsigned long address, char *buf) { struct vm_area_struct *vma; - struct inode *inode; - unsigned long ino = 0; - dev_t dev = 0; char *nm = buf; struct mm_struct *active_mm = current->active_mm; @@ -99,12 +96,10 @@ static void show_faulting_vma(unsigned long address, char *buf) * if the container VMA is not found */ if (vma && (vma->vm_start <= address)) { - struct file *file = vma->vm_file; - if (file) { - nm = file_path(file, buf, PAGE_SIZE - 1); - inode = file_inode(vma->vm_file); - dev = inode->i_sb->s_dev; - ino = inode->i_ino; + if (vma->vm_file) { + nm = file_path(vma->vm_file, buf, PAGE_SIZE - 1); + if (IS_ERR(nm)) + nm = "?"; } pr_info(" @off 0x%lx in [%s]\n" " VMA: 0x%08lx to 0x%08lx\n", From c27d0e9045bbbabffdde2bdba74e9971c4194ac4 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 16 Aug 2018 10:20:33 -0700 Subject: [PATCH 018/269] ARC: sort Kconfig Signed-off-by: Vineet Gupta --- arch/arc/Kconfig | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 6d5eb8267e42..b4441b0764d7 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -9,6 +9,7 @@ config ARC def_bool y select ARC_TIMERS + select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SYNC_DMA_FOR_CPU select ARCH_HAS_SYNC_DMA_FOR_DEVICE select ARCH_HAS_SG_CHAIN @@ -28,8 +29,12 @@ config ARC select GENERIC_SMP_IDLE_THREAD select HAVE_ARCH_KGDB select HAVE_ARCH_TRACEHOOK + select HAVE_DEBUG_STACKOVERFLOW select HAVE_FUTEX_CMPXCHG if FUTEX + select HAVE_GENERIC_DMA_COHERENT select HAVE_IOREMAP_PROT + select HAVE_KERNEL_GZIP + select HAVE_KERNEL_LZMA select HAVE_KPROBES select HAVE_KRETPROBES select HAVE_MEMBLOCK @@ -44,11 +49,6 @@ config ARC select OF_EARLY_FLATTREE select OF_RESERVED_MEM select PERF_USE_VMALLOC if ARC_CACHE_VIPT_ALIASING - select HAVE_DEBUG_STACKOVERFLOW - select HAVE_GENERIC_DMA_COHERENT - select HAVE_KERNEL_GZIP - select HAVE_KERNEL_LZMA - select ARCH_HAS_PTE_SPECIAL config ARCH_HAS_CACHE_LINE_SIZE def_bool y From 3fba68fa35a2cbda157c6f49f26eefccb2e10043 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 21 Aug 2018 13:44:07 +0200 Subject: [PATCH 019/269] scsi: core: Update SCSI_MQ_DEFAULT help text to match default The default was changed, but the help text was not updated. Fix grammar (s/the option/this option/) while at it. [mkp: drop "new" as suggested by John Garry] Fixes: d5038a13eca72fb2 ("scsi: core: switch to scsi-mq by default") Signed-off-by: Geert Uytterhoeven Reviewed-by: Ming Lei Signed-off-by: Martin K. Petersen --- drivers/scsi/Kconfig | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index 8fc851a9e116..7c097006c54d 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -52,12 +52,12 @@ config SCSI_MQ_DEFAULT default y depends on SCSI ---help--- - This option enables the new blk-mq based I/O path for SCSI - devices by default. With the option the scsi_mod.use_blk_mq - module/boot option defaults to Y, without it to N, but it can - still be overridden either way. + This option enables the blk-mq based I/O path for SCSI devices by + default. With this option the scsi_mod.use_blk_mq module/boot + option defaults to Y, without it to N, but it can still be + overridden either way. - If unsure say N. + If unsure say Y. config SCSI_PROC_FS bool "legacy /proc/scsi/ support" From a7ccd92c8d2ac4eb168b621e086be2dc9b8344f6 Mon Sep 17 00:00:00 2001 From: John Pittman Date: Thu, 23 Aug 2018 15:49:18 -0400 Subject: [PATCH 020/269] scsi: documentation: add scsi_mod.use_blk_mq to scsi-parameters Kernel line argument scsi_mod.use_blk_mq is missing from file Documentation/scsi/scsi-parameters.txt. Add this option, providing mention of config setting and format. [mkp: clarified where to look] Signed-off-by: John Pittman Signed-off-by: Martin K. Petersen --- Documentation/scsi/scsi-parameters.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Documentation/scsi/scsi-parameters.txt b/Documentation/scsi/scsi-parameters.txt index 25a4b4cf04a6..92999d4e0cb8 100644 --- a/Documentation/scsi/scsi-parameters.txt +++ b/Documentation/scsi/scsi-parameters.txt @@ -97,6 +97,11 @@ parameters may be changed at runtime by the command allowing boot to proceed. none ignores them, expecting user space to do the scan. + scsi_mod.use_blk_mq= + [SCSI] use blk-mq I/O path by default + See SCSI_MQ_DEFAULT in drivers/scsi/Kconfig. + Format: + sim710= [SCSI,HW] See header of drivers/scsi/sim710.c. From 89809b028b6f54187b7d81a0c69b35d394c52e62 Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Sat, 11 Aug 2018 21:03:58 +0530 Subject: [PATCH 021/269] scsi: csiostor: add a check for NULL pointer after kmalloc() Reported-by: Colin Ian King Signed-off-by: Varun Prakash Signed-off-by: Martin K. Petersen --- drivers/scsi/csiostor/csio_hw.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/scsi/csiostor/csio_hw.c b/drivers/scsi/csiostor/csio_hw.c index 23d07e9f87d0..6ff7c5580fcb 100644 --- a/drivers/scsi/csiostor/csio_hw.c +++ b/drivers/scsi/csiostor/csio_hw.c @@ -2364,8 +2364,8 @@ bye: } /* - * Returns -EINVAL if attempts to flash the firmware failed - * else returns 0, + * Returns -EINVAL if attempts to flash the firmware failed, + * -ENOMEM if memory allocation failed else returns 0, * if flashing was not attempted because the card had the * latest firmware ECANCELED is returned */ @@ -2393,6 +2393,13 @@ csio_hw_flash_fw(struct csio_hw *hw, int *reset) return -EINVAL; } + /* allocate memory to read the header of the firmware on the + * card + */ + card_fw = kmalloc(sizeof(*card_fw), GFP_KERNEL); + if (!card_fw) + return -ENOMEM; + if (csio_is_t5(pci_dev->device & CSIO_HW_CHIP_MASK)) fw_bin_file = FW_FNAME_T5; else @@ -2406,11 +2413,6 @@ csio_hw_flash_fw(struct csio_hw *hw, int *reset) fw_size = fw->size; } - /* allocate memory to read the header of the firmware on the - * card - */ - card_fw = kmalloc(sizeof(*card_fw), GFP_KERNEL); - /* upgrade FW logic */ ret = csio_hw_prep_fw(hw, fw_info, fw_data, fw_size, card_fw, hw->fw_state, reset); From 68bdc630721c40e908d22cffe07b5ca225a69f6e Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Sat, 11 Aug 2018 21:14:08 +0530 Subject: [PATCH 022/269] scsi: csiostor: fix incorrect port capabilities - use be32_to_cpu() instead of ntohs() for 32 bit port capabilities. - add a new function fwcaps32_to_caps16() to convert 32 bit port capabilities to 16 bit port capabilities. Signed-off-by: Varun Prakash Signed-off-by: Martin K. Petersen --- drivers/scsi/csiostor/csio_hw.c | 55 ++++++++++++++++++++++++++------- drivers/scsi/csiostor/csio_hw.h | 1 + drivers/scsi/csiostor/csio_mb.c | 6 ++-- 3 files changed, 48 insertions(+), 14 deletions(-) diff --git a/drivers/scsi/csiostor/csio_hw.c b/drivers/scsi/csiostor/csio_hw.c index 6ff7c5580fcb..e51923886475 100644 --- a/drivers/scsi/csiostor/csio_hw.c +++ b/drivers/scsi/csiostor/csio_hw.c @@ -1601,6 +1601,46 @@ fw_port_cap32_t fwcaps16_to_caps32(fw_port_cap16_t caps16) return caps32; } +/** + * fwcaps32_to_caps16 - convert 32-bit Port Capabilities to 16-bits + * @caps32: a 32-bit Port Capabilities value + * + * Returns the equivalent 16-bit Port Capabilities value. Note that + * not all 32-bit Port Capabilities can be represented in the 16-bit + * Port Capabilities and some fields/values may not make it. + */ +fw_port_cap16_t fwcaps32_to_caps16(fw_port_cap32_t caps32) +{ + fw_port_cap16_t caps16 = 0; + + #define CAP32_TO_CAP16(__cap) \ + do { \ + if (caps32 & FW_PORT_CAP32_##__cap) \ + caps16 |= FW_PORT_CAP_##__cap; \ + } while (0) + + CAP32_TO_CAP16(SPEED_100M); + CAP32_TO_CAP16(SPEED_1G); + CAP32_TO_CAP16(SPEED_10G); + CAP32_TO_CAP16(SPEED_25G); + CAP32_TO_CAP16(SPEED_40G); + CAP32_TO_CAP16(SPEED_100G); + CAP32_TO_CAP16(FC_RX); + CAP32_TO_CAP16(FC_TX); + CAP32_TO_CAP16(802_3_PAUSE); + CAP32_TO_CAP16(802_3_ASM_DIR); + CAP32_TO_CAP16(ANEG); + CAP32_TO_CAP16(FORCE_PAUSE); + CAP32_TO_CAP16(MDIAUTO); + CAP32_TO_CAP16(MDISTRAIGHT); + CAP32_TO_CAP16(FEC_RS); + CAP32_TO_CAP16(FEC_BASER_RS); + + #undef CAP32_TO_CAP16 + + return caps16; +} + /** * lstatus_to_fwcap - translate old lstatus to 32-bit Port Capabilities * @lstatus: old FW_PORT_ACTION_GET_PORT_INFO lstatus value @@ -1759,7 +1799,7 @@ csio_enable_ports(struct csio_hw *hw) val = 1; csio_mb_params(hw, mbp, CSIO_MB_DEFAULT_TMO, - hw->pfn, 0, 1, ¶m, &val, false, + hw->pfn, 0, 1, ¶m, &val, true, NULL); if (csio_mb_issue(hw, mbp)) { @@ -1769,16 +1809,9 @@ csio_enable_ports(struct csio_hw *hw) return -EINVAL; } - csio_mb_process_read_params_rsp(hw, mbp, &retval, 1, - &val); - if (retval != FW_SUCCESS) { - csio_err(hw, "FW_PARAMS_CMD(r) port:%d failed: 0x%x\n", - portid, retval); - mempool_free(mbp, hw->mb_mempool); - return -EINVAL; - } - - fw_caps = val; + csio_mb_process_read_params_rsp(hw, mbp, &retval, + 0, NULL); + fw_caps = retval ? FW_CAPS16 : FW_CAPS32; } /* Read PORT information */ diff --git a/drivers/scsi/csiostor/csio_hw.h b/drivers/scsi/csiostor/csio_hw.h index 9e73ef771eb7..e351af6e7c81 100644 --- a/drivers/scsi/csiostor/csio_hw.h +++ b/drivers/scsi/csiostor/csio_hw.h @@ -639,6 +639,7 @@ int csio_handle_intr_status(struct csio_hw *, unsigned int, fw_port_cap32_t fwcap_to_fwspeed(fw_port_cap32_t acaps); fw_port_cap32_t fwcaps16_to_caps32(fw_port_cap16_t caps16); +fw_port_cap16_t fwcaps32_to_caps16(fw_port_cap32_t caps32); fw_port_cap32_t lstatus_to_fwcap(u32 lstatus); int csio_hw_start(struct csio_hw *); diff --git a/drivers/scsi/csiostor/csio_mb.c b/drivers/scsi/csiostor/csio_mb.c index c026417269c3..6f13673d6aa0 100644 --- a/drivers/scsi/csiostor/csio_mb.c +++ b/drivers/scsi/csiostor/csio_mb.c @@ -368,7 +368,7 @@ csio_mb_port(struct csio_hw *hw, struct csio_mb *mbp, uint32_t tmo, FW_CMD_LEN16_V(sizeof(*cmdp) / 16)); if (fw_caps == FW_CAPS16) - cmdp->u.l1cfg.rcap = cpu_to_be32(fc); + cmdp->u.l1cfg.rcap = cpu_to_be32(fwcaps32_to_caps16(fc)); else cmdp->u.l1cfg32.rcap32 = cpu_to_be32(fc); } @@ -395,8 +395,8 @@ csio_mb_process_read_port_rsp(struct csio_hw *hw, struct csio_mb *mbp, *pcaps = fwcaps16_to_caps32(ntohs(rsp->u.info.pcap)); *acaps = fwcaps16_to_caps32(ntohs(rsp->u.info.acap)); } else { - *pcaps = ntohs(rsp->u.info32.pcaps32); - *acaps = ntohs(rsp->u.info32.acaps32); + *pcaps = be32_to_cpu(rsp->u.info32.pcaps32); + *acaps = be32_to_cpu(rsp->u.info32.acaps32); } } } From 9abd9990e9779dc9c548c3599aaca7e3505ab19d Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 14 Aug 2018 12:55:05 -0700 Subject: [PATCH 023/269] scsi: lpfc: Default fdmi_on to on Change default behavior for fdmi registration to on. [mkp: patch was mangled] Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_attr.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index 5a25553415f8..057a60abe664 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -5122,16 +5122,16 @@ LPFC_ATTR_R(enable_SmartSAN, 0, 0, 1, "Enable SmartSAN functionality"); /* # lpfc_fdmi_on: Controls FDMI support. -# 0 No FDMI support (default) -# 1 Traditional FDMI support +# 0 No FDMI support +# 1 Traditional FDMI support (default) # Traditional FDMI support means the driver will assume FDMI-2 support; # however, if that fails, it will fallback to FDMI-1. # If lpfc_enable_SmartSAN is set to 1, the driver ignores lpfc_fdmi_on. # If lpfc_enable_SmartSAN is set 0, the driver uses the current value of # lpfc_fdmi_on. -# Value range [0,1]. Default value is 0. +# Value range [0,1]. Default value is 1. */ -LPFC_ATTR_R(fdmi_on, 0, 0, 1, "Enable FDMI support"); +LPFC_ATTR_R(fdmi_on, 1, 0, 1, "Enable FDMI support"); /* # Specifies the maximum number of ELS cmds we can have outstanding (for From 53e13ee087a80e8d4fc95436318436e5c2c1f8c2 Mon Sep 17 00:00:00 2001 From: James Smart Date: Thu, 16 Aug 2018 16:04:05 -0700 Subject: [PATCH 024/269] scsi: lpfc: Correct MDS diag and nvmet configuration A recent change added some MDS processing in the lpfc_drain_txq routine that relies on the fcp_wq being allocated. For nvmet operation the fcp_wq is not allocated because it can only be an nvme-target. When the original MDS support was added LS_MDS_LOOPBACK was defined wrong, (0x16) it should have been 0x10 (decimal value used for hex setting). This incorrect value allowed MDS_LOOPBACK to be set simultaneously with LS_NPIV_FAB_SUPPORTED, causing the driver to crash when it accesses the non-existent fcp_wq. Correct the bad value setting for LS_MDS_LOOPBACK. Fixes: ae9e28f36a6c ("lpfc: Add MDS Diagnostic support.") Cc: # v4.12+ Signed-off-by: Dick Kennedy Signed-off-by: James Smart Tested-by: Ewan D. Milne Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index e0d0da5f43d6..43732e8d1347 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -672,7 +672,7 @@ struct lpfc_hba { #define LS_NPIV_FAB_SUPPORTED 0x2 /* Fabric supports NPIV */ #define LS_IGNORE_ERATT 0x4 /* intr handler should ignore ERATT */ #define LS_MDS_LINK_DOWN 0x8 /* MDS Diagnostics Link Down */ -#define LS_MDS_LOOPBACK 0x16 /* MDS Diagnostics Link Up (Loopback) */ +#define LS_MDS_LOOPBACK 0x10 /* MDS Diagnostics Link Up (Loopback) */ uint32_t hba_flag; /* hba generic flags */ #define HBA_ERATT_HANDLED 0x1 /* This flag is set when eratt handled */ From eb53a3ea3e009578a388f106620b22b1707cf2f6 Mon Sep 17 00:00:00 2001 From: Martin Wilck Date: Wed, 22 Aug 2018 13:25:44 +0200 Subject: [PATCH 025/269] scsi: hpsa: limit transfer length to 1MB, not 512kB e2c7b43 was supposed to limit transfer length to 1MB, but got the unit of max_sectors wrong. Fixes: e2c7b433f729 ("scsi: hpsa: limit transfer length to 1MB") Signed-off-by: Martin Wilck Signed-off-by: Martin K. Petersen --- drivers/scsi/hpsa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index 58bb70b886d7..c120929d4ffe 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -976,7 +976,7 @@ static struct scsi_host_template hpsa_driver_template = { #endif .sdev_attrs = hpsa_sdev_attrs, .shost_attrs = hpsa_shost_attrs, - .max_sectors = 1024, + .max_sectors = 2048, .no_write_same = 1, }; From cedefa8544c6be216b4710575065e3a11065f8d0 Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Sat, 11 Aug 2018 21:10:29 +0530 Subject: [PATCH 026/269] scsi: target: iscsi: cxgbit: use pr_debug() instead of pr_info() DDP programming happens in data path and it can fail because of lack of resources so use pr_debug() instead of pr_info() for this case. Signed-off-by: Varun Prakash Reviewed-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/target/iscsi/cxgbit/cxgbit_ddp.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/target/iscsi/cxgbit/cxgbit_ddp.c b/drivers/target/iscsi/cxgbit/cxgbit_ddp.c index 768cce0ccb80..76a262674c8d 100644 --- a/drivers/target/iscsi/cxgbit/cxgbit_ddp.c +++ b/drivers/target/iscsi/cxgbit/cxgbit_ddp.c @@ -207,8 +207,8 @@ cxgbit_ddp_reserve(struct cxgbit_sock *csk, struct cxgbi_task_tag_info *ttinfo, ret = dma_map_sg(&ppm->pdev->dev, sgl, sgcnt, DMA_FROM_DEVICE); sgl->offset = sg_offset; if (!ret) { - pr_info("%s: 0x%x, xfer %u, sgl %u dma mapping err.\n", - __func__, 0, xferlen, sgcnt); + pr_debug("%s: 0x%x, xfer %u, sgl %u dma mapping err.\n", + __func__, 0, xferlen, sgcnt); goto rel_ppods; } @@ -250,8 +250,8 @@ cxgbit_get_r2t_ttt(struct iscsi_conn *conn, struct iscsi_cmd *cmd, ret = cxgbit_ddp_reserve(csk, ttinfo, cmd->se_cmd.data_length); if (ret < 0) { - pr_info("csk 0x%p, cmd 0x%p, xfer len %u, sgcnt %u no ddp.\n", - csk, cmd, cmd->se_cmd.data_length, ttinfo->nents); + pr_debug("csk 0x%p, cmd 0x%p, xfer len %u, sgcnt %u no ddp.\n", + csk, cmd, cmd->se_cmd.data_length, ttinfo->nents); ttinfo->sgl = NULL; ttinfo->nents = 0; From 4e8065aa6c6f50765290be27ab8a64a4e44cb009 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 23 Aug 2018 23:23:06 +0200 Subject: [PATCH 027/269] scsi: libata: Add missing newline at end of file With gcc 4.1.2: drivers/ata/libata-core.c:7396:33: warning: no newline at end of file Fixes: 2fa4a32613c9182b ("scsi: libsas: dynamically allocate and free ata host") Signed-off-by: Geert Uytterhoeven Signed-off-by: Martin K. Petersen --- drivers/ata/libata-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 172e32840256..599e01bcdef2 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -7394,4 +7394,4 @@ EXPORT_SYMBOL_GPL(ata_cable_unknown); EXPORT_SYMBOL_GPL(ata_cable_ignore); EXPORT_SYMBOL_GPL(ata_cable_sata); EXPORT_SYMBOL_GPL(ata_host_get); -EXPORT_SYMBOL_GPL(ata_host_put); \ No newline at end of file +EXPORT_SYMBOL_GPL(ata_host_put); From 23aa8e69f2c6ceb0bdca52f4450ad1f45675ca73 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 27 Aug 2018 15:24:42 +0800 Subject: [PATCH 028/269] Revert "scsi: core: fix scsi_host_queue_ready" This reverts commit 265d59aacbce7e50bdc1f5d25033c38dd70b3767. There is fundamental issue in commit 328728630d9f2bf1 (scsi: core: avoid host-wide host_busy counter for scsi_mq) because SCSI's host busy counter may not be same with counter of blk-mq's inflight tags, especially in case of none io scheduler. So revert this commit first. Cc: Omar Sandoval , Cc: "Martin K. Petersen" , Cc: James Bottomley , Cc: Christoph Hellwig , Cc: Don Brace Cc: Kashyap Desai Cc: Mike Snitzer Cc: Hannes Reinecke Cc: Laurence Oberman Cc: Bart Van Assche Cc: Guenter Roeck Cc: Jens Axboe Reported-by: Jens Axboe Signed-off-by: Ming Lei Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 0adfb3bce0fd..1046679f5473 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1611,7 +1611,7 @@ static inline int scsi_host_queue_ready(struct request_queue *q, else busy = 0; if (atomic_read(&shost->host_blocked) > 0) { - if (busy) + if (busy || scsi_host_busy(shost)) goto starved; /* From d772a65d8a6c45c376a8200a38f7f82fb480af6a Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 27 Aug 2018 15:24:43 +0800 Subject: [PATCH 029/269] Revert "scsi: core: avoid host-wide host_busy counter for scsi_mq" This reverts commit 328728630d9f2bf14b82ca30b5e47489beefe361. There is fundamental issue in commit 328728630d9f2bf1 (scsi: core: avoid host-wide host_busy counter for scsi_mq) because SCSI's host busy counter may not be same with counter of blk-mq's inflight tags, especially in case of none io scheduler. We may switch to other approach for addressing this scsi_mq's performance issue, such as percpu counter or kind of ways, so revert this commit first for fixing this kind of issue in EH path, as reported by Jens. Cc: Omar Sandoval , Cc: "Martin K. Petersen" , Cc: James Bottomley , Cc: Christoph Hellwig , Cc: Don Brace Cc: Kashyap Desai Cc: Mike Snitzer Cc: Hannes Reinecke Cc: Laurence Oberman Cc: Bart Van Assche Cc: Jens Axboe Reported-by: Jens Axboe Signed-off-by: Ming Lei Signed-off-by: Martin K. Petersen --- drivers/scsi/hosts.c | 24 +----------------------- drivers/scsi/scsi_lib.c | 23 ++++++----------------- 2 files changed, 7 insertions(+), 40 deletions(-) diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c index f02dcc875a09..ea4b0bb0c1cd 100644 --- a/drivers/scsi/hosts.c +++ b/drivers/scsi/hosts.c @@ -563,35 +563,13 @@ struct Scsi_Host *scsi_host_get(struct Scsi_Host *shost) } EXPORT_SYMBOL(scsi_host_get); -struct scsi_host_mq_in_flight { - int cnt; -}; - -static void scsi_host_check_in_flight(struct request *rq, void *data, - bool reserved) -{ - struct scsi_host_mq_in_flight *in_flight = data; - - if (blk_mq_request_started(rq)) - in_flight->cnt++; -} - /** * scsi_host_busy - Return the host busy counter * @shost: Pointer to Scsi_Host to inc. **/ int scsi_host_busy(struct Scsi_Host *shost) { - struct scsi_host_mq_in_flight in_flight = { - .cnt = 0, - }; - - if (!shost->use_blk_mq) - return atomic_read(&shost->host_busy); - - blk_mq_tagset_busy_iter(&shost->tag_set, scsi_host_check_in_flight, - &in_flight); - return in_flight.cnt; + return atomic_read(&shost->host_busy); } EXPORT_SYMBOL(scsi_host_busy); diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 1046679f5473..eb97d2dd3651 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -345,8 +345,7 @@ static void scsi_dec_host_busy(struct Scsi_Host *shost) unsigned long flags; rcu_read_lock(); - if (!shost->use_blk_mq) - atomic_dec(&shost->host_busy); + atomic_dec(&shost->host_busy); if (unlikely(scsi_host_in_recovery(shost))) { spin_lock_irqsave(shost->host_lock, flags); if (shost->host_failed || shost->host_eh_scheduled) @@ -445,12 +444,7 @@ static inline bool scsi_target_is_busy(struct scsi_target *starget) static inline bool scsi_host_is_busy(struct Scsi_Host *shost) { - /* - * blk-mq can handle host queue busy efficiently via host-wide driver - * tag allocation - */ - - if (!shost->use_blk_mq && shost->can_queue > 0 && + if (shost->can_queue > 0 && atomic_read(&shost->host_busy) >= shost->can_queue) return true; if (atomic_read(&shost->host_blocked) > 0) @@ -1606,12 +1600,9 @@ static inline int scsi_host_queue_ready(struct request_queue *q, if (scsi_host_in_recovery(shost)) return 0; - if (!shost->use_blk_mq) - busy = atomic_inc_return(&shost->host_busy) - 1; - else - busy = 0; + busy = atomic_inc_return(&shost->host_busy) - 1; if (atomic_read(&shost->host_blocked) > 0) { - if (busy || scsi_host_busy(shost)) + if (busy) goto starved; /* @@ -1625,7 +1616,7 @@ static inline int scsi_host_queue_ready(struct request_queue *q, "unblocking host at zero depth\n")); } - if (!shost->use_blk_mq && shost->can_queue > 0 && busy >= shost->can_queue) + if (shost->can_queue > 0 && busy >= shost->can_queue) goto starved; if (shost->host_self_blocked) goto starved; @@ -1711,9 +1702,7 @@ static void scsi_kill_request(struct request *req, struct request_queue *q) * with the locks as normal issue path does. */ atomic_inc(&sdev->device_busy); - - if (!shost->use_blk_mq) - atomic_inc(&shost->host_busy); + atomic_inc(&shost->host_busy); if (starget->can_queue > 0) atomic_inc(&starget->target_busy); From b9eb3b14f1dbf16bf27b6c1ffe6b8c00ec945c9b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 27 Aug 2018 12:23:01 +0300 Subject: [PATCH 030/269] scsi: aacraid: fix a signedness bug The problem is that ->reset_state is a u8 but it can be set to -1 or -2 in aac_tmf_callback() and the error handling in aac_eh_target_reset() relies on it to be signed. [mkp: fixed typo] Fixes: 0d643ff3c353 ("scsi: aacraid: use aac_tmf_callback for reset fib") Signed-off-by: Dan Carpenter Signed-off-by: Martin K. Petersen --- drivers/scsi/aacraid/aacraid.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h index 29bf1e60f542..39eb415987fc 100644 --- a/drivers/scsi/aacraid/aacraid.h +++ b/drivers/scsi/aacraid/aacraid.h @@ -1346,7 +1346,7 @@ struct fib { struct aac_hba_map_info { __le32 rmw_nexus; /* nexus for native HBA devices */ u8 devtype; /* device type */ - u8 reset_state; /* 0 - no reset, 1..x - */ + s8 reset_state; /* 0 - no reset, 1..x - */ /* after xth TM LUN reset */ u16 qd_limit; u32 scan_counter; From e0ab8b26aa9661df0541a657e2b2416d90488809 Mon Sep 17 00:00:00 2001 From: Andreas Bosch Date: Fri, 17 Aug 2018 22:16:00 +0200 Subject: [PATCH 031/269] HID: intel-ish-hid: Enable Sunrise Point-H ish driver Added PCI ID for Sunrise Point-H ISH. Signed-off-by: Andreas Bosch Acked-by: Srinivas Pandruvada Signed-off-by: Jiri Kosina --- drivers/hid/intel-ish-hid/ipc/hw-ish.h | 1 + drivers/hid/intel-ish-hid/ipc/pci-ish.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/hid/intel-ish-hid/ipc/hw-ish.h b/drivers/hid/intel-ish-hid/ipc/hw-ish.h index 97869b7410eb..da133716bed0 100644 --- a/drivers/hid/intel-ish-hid/ipc/hw-ish.h +++ b/drivers/hid/intel-ish-hid/ipc/hw-ish.h @@ -29,6 +29,7 @@ #define CNL_Ax_DEVICE_ID 0x9DFC #define GLK_Ax_DEVICE_ID 0x31A2 #define CNL_H_DEVICE_ID 0xA37C +#define SPT_H_DEVICE_ID 0xA135 #define REVISION_ID_CHT_A0 0x6 #define REVISION_ID_CHT_Ax_SI 0x0 diff --git a/drivers/hid/intel-ish-hid/ipc/pci-ish.c b/drivers/hid/intel-ish-hid/ipc/pci-ish.c index 050f9872f5c0..a1125a5c7965 100644 --- a/drivers/hid/intel-ish-hid/ipc/pci-ish.c +++ b/drivers/hid/intel-ish-hid/ipc/pci-ish.c @@ -38,6 +38,7 @@ static const struct pci_device_id ish_pci_tbl[] = { {PCI_DEVICE(PCI_VENDOR_ID_INTEL, CNL_Ax_DEVICE_ID)}, {PCI_DEVICE(PCI_VENDOR_ID_INTEL, GLK_Ax_DEVICE_ID)}, {PCI_DEVICE(PCI_VENDOR_ID_INTEL, CNL_H_DEVICE_ID)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, SPT_H_DEVICE_ID)}, {0, } }; MODULE_DEVICE_TABLE(pci, ish_pci_tbl); From fb6acf76c3fdd97fea6995e64e2c665725f00fc5 Mon Sep 17 00:00:00 2001 From: AceLan Kao Date: Tue, 21 Aug 2018 16:55:13 +0800 Subject: [PATCH 032/269] HID: i2c-hid: Fix flooded incomplete report after S3 on Rayd touchscreen The incomplete report flooded after S3 and touchscreen becomes malfunctioned. [ 1367.646244] i2c_hid i2c-CUST0000:00: i2c_hid_get_input: incomplete report (58/18785) [ 1367.649471] i2c_hid i2c-CUST0000:00: i2c_hid_get_input: incomplete report (58/28743) [ 1367.651092] i2c_hid i2c-CUST0000:00: i2c_hid_get_input: incomplete report (58/26757) [ 1367.652658] i2c_hid i2c-CUST0000:00: i2c_hid_get_input: incomplete report (58/52280) [ 1367.654287] i2c_hid i2c-CUST0000:00: i2c_hid_get_input: incomplete report (58/56059) Adding device ID, 04F3:30CC, to the quirk to re-send report description after resume. Cc: stable@vger.kernel.org Signed-off-by: AceLan Kao Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 1 + drivers/hid/i2c-hid/i2c-hid.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 79bdf0c7e351..34367df61b28 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -530,6 +530,7 @@ #define I2C_VENDOR_ID_RAYD 0x2386 #define I2C_PRODUCT_ID_RAYD_3118 0x3118 +#define I2C_PRODUCT_ID_RAYD_4B33 0x4B33 #define USB_VENDOR_ID_HANWANG 0x0b57 #define USB_DEVICE_ID_HANWANG_TABLET_FIRST 0x5000 diff --git a/drivers/hid/i2c-hid/i2c-hid.c b/drivers/hid/i2c-hid/i2c-hid.c index 2ce194a84868..57126f6837bb 100644 --- a/drivers/hid/i2c-hid/i2c-hid.c +++ b/drivers/hid/i2c-hid/i2c-hid.c @@ -174,6 +174,8 @@ static const struct i2c_hid_quirks { I2C_HID_QUIRK_RESEND_REPORT_DESCR }, { USB_VENDOR_ID_SIS_TOUCH, USB_DEVICE_ID_SIS10FB_TOUCH, I2C_HID_QUIRK_RESEND_REPORT_DESCR }, + { I2C_VENDOR_ID_RAYD, I2C_PRODUCT_ID_RAYD_4B33, + I2C_HID_QUIRK_RESEND_REPORT_DESCR }, { 0, 0 } }; From ee345492437043a79db058a3d4f029ebcb52089a Mon Sep 17 00:00:00 2001 From: Sean O'Brien Date: Mon, 27 Aug 2018 13:02:15 -0700 Subject: [PATCH 033/269] HID: add support for Apple Magic Keyboards USB device Vendor 05ac (Apple) Device 026c (Magic Keyboard with Numeric Keypad) Bluetooth devices Vendor 004c (Apple) Device 0267 (Magic Keyboard) Device 026c (Magic Keyboard with Numeric Keypad) Support already exists for the Magic Keyboard over USB connection. Add support for the Magic Keyboard over Bluetooth connection, and for the Magic Keyboard with Numeric Keypad over Bluetooth and USB connection. Signed-off-by: Sean O'Brien Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/hid-apple.c | 9 ++++++++- drivers/hid/hid-ids.h | 2 ++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-apple.c b/drivers/hid/hid-apple.c index 25b7bd56ae11..1cb41992aaa1 100644 --- a/drivers/hid/hid-apple.c +++ b/drivers/hid/hid-apple.c @@ -335,7 +335,8 @@ static int apple_input_mapping(struct hid_device *hdev, struct hid_input *hi, struct hid_field *field, struct hid_usage *usage, unsigned long **bit, int *max) { - if (usage->hid == (HID_UP_CUSTOM | 0x0003)) { + if (usage->hid == (HID_UP_CUSTOM | 0x0003) || + usage->hid == (HID_UP_MSVENDOR | 0x0003)) { /* The fn key on Apple USB keyboards */ set_bit(EV_REP, hi->input->evbit); hid_map_usage_clear(hi, usage, bit, max, EV_KEY, KEY_FN); @@ -472,6 +473,12 @@ static const struct hid_device_id apple_devices[] = { .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_ANSI), .driver_data = APPLE_HAS_FN }, + { HID_BLUETOOTH_DEVICE(BT_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_ANSI), + .driver_data = APPLE_HAS_FN }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_NUMPAD_ANSI), + .driver_data = APPLE_HAS_FN }, + { HID_BLUETOOTH_DEVICE(BT_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_NUMPAD_ANSI), + .driver_data = APPLE_HAS_FN }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING_ANSI), .driver_data = APPLE_HAS_FN }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING_ISO), diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 34367df61b28..cb2d3170d9dc 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -88,6 +88,7 @@ #define USB_DEVICE_ID_ANTON_TOUCH_PAD 0x3101 #define USB_VENDOR_ID_APPLE 0x05ac +#define BT_VENDOR_ID_APPLE 0x004c #define USB_DEVICE_ID_APPLE_MIGHTYMOUSE 0x0304 #define USB_DEVICE_ID_APPLE_MAGICMOUSE 0x030d #define USB_DEVICE_ID_APPLE_MAGICTRACKPAD 0x030e @@ -157,6 +158,7 @@ #define USB_DEVICE_ID_APPLE_ALU_WIRELESS_2011_ISO 0x0256 #define USB_DEVICE_ID_APPLE_ALU_WIRELESS_2011_JIS 0x0257 #define USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_ANSI 0x0267 +#define USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_NUMPAD_ANSI 0x026c #define USB_DEVICE_ID_APPLE_WELLSPRING8_ANSI 0x0290 #define USB_DEVICE_ID_APPLE_WELLSPRING8_ISO 0x0291 #define USB_DEVICE_ID_APPLE_WELLSPRING8_JIS 0x0292 From e38c0ac55ee67cf3626cfbc2283f8873dc44d370 Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Tue, 28 Aug 2018 13:29:55 +0200 Subject: [PATCH 034/269] HID: input: fix leaking custom input node name Make sure to free the custom input node name on disconnect. Cc: stable@vger.kernel.org # v4.18+ Fixes: c554bb045511 ("HID: input: append a suffix matching the application") Signed-off-by: Stefan Agner Signed-off-by: Jiri Kosina --- drivers/hid/hid-input.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index 4e94ea3e280a..ac201817a2dd 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -1815,6 +1815,7 @@ void hidinput_disconnect(struct hid_device *hid) input_unregister_device(hidinput->input); else input_free_device(hidinput->input); + kfree(hidinput->name); kfree(hidinput); } From b2dd9f2e5a8a4a6afa9d41411cdbfc2f5ceeba71 Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Tue, 28 Aug 2018 13:29:54 +0200 Subject: [PATCH 035/269] HID: core: fix memory leak on probe The dynamically allocted collection stack does not get freed in all situations. Make sure to also free the collection stack when using the parser in hid_open_report(). Fixes: 08a8a7cf1459 ("HID: core: do not upper bound the collection stack") Signed-off-by: Stefan Agner Signed-off-by: Jiri Kosina --- drivers/hid/hid-core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 3da354af7a0a..44a465db3f96 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1039,6 +1039,7 @@ int hid_open_report(struct hid_device *device) hid_err(device, "unbalanced delimiter at end of report description\n"); goto err; } + kfree(parser->collection_stack); vfree(parser); device->status |= HID_STAT_PARSED; return 0; @@ -1047,6 +1048,7 @@ int hid_open_report(struct hid_device *device) hid_err(device, "item fetching failed at offset %d\n", (int)(end - start)); err: + kfree(parser->collection_stack); vfree(parser); hid_close_report(device); return ret; From 6537886cdc9a637711fd6da980dbb87c2c87c9aa Mon Sep 17 00:00:00 2001 From: Michael Hennerich Date: Mon, 13 Aug 2018 15:57:44 +0200 Subject: [PATCH 036/269] gpio: adp5588: Fix sleep-in-atomic-context bug This fixes: [BUG] gpio: gpio-adp5588: A possible sleep-in-atomic-context bug in adp5588_gpio_write() [BUG] gpio: gpio-adp5588: A possible sleep-in-atomic-context bug in adp5588_gpio_direction_input() Reported-by: Jia-Ju Bai Signed-off-by: Michael Hennerich Signed-off-by: Linus Walleij --- drivers/gpio/gpio-adp5588.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/drivers/gpio/gpio-adp5588.c b/drivers/gpio/gpio-adp5588.c index 3530ccd17e04..da9781a2ef4a 100644 --- a/drivers/gpio/gpio-adp5588.c +++ b/drivers/gpio/gpio-adp5588.c @@ -41,6 +41,8 @@ struct adp5588_gpio { uint8_t int_en[3]; uint8_t irq_mask[3]; uint8_t irq_stat[3]; + uint8_t int_input_en[3]; + uint8_t int_lvl_cached[3]; }; static int adp5588_gpio_read(struct i2c_client *client, u8 reg) @@ -173,12 +175,28 @@ static void adp5588_irq_bus_sync_unlock(struct irq_data *d) struct adp5588_gpio *dev = irq_data_get_irq_chip_data(d); int i; - for (i = 0; i <= ADP5588_BANK(ADP5588_MAXGPIO); i++) + for (i = 0; i <= ADP5588_BANK(ADP5588_MAXGPIO); i++) { + if (dev->int_input_en[i]) { + mutex_lock(&dev->lock); + dev->dir[i] &= ~dev->int_input_en[i]; + dev->int_input_en[i] = 0; + adp5588_gpio_write(dev->client, GPIO_DIR1 + i, + dev->dir[i]); + mutex_unlock(&dev->lock); + } + + if (dev->int_lvl_cached[i] != dev->int_lvl[i]) { + dev->int_lvl_cached[i] = dev->int_lvl[i]; + adp5588_gpio_write(dev->client, GPIO_INT_LVL1 + i, + dev->int_lvl[i]); + } + if (dev->int_en[i] ^ dev->irq_mask[i]) { dev->int_en[i] = dev->irq_mask[i]; adp5588_gpio_write(dev->client, GPIO_INT_EN1 + i, dev->int_en[i]); } + } mutex_unlock(&dev->irq_lock); } @@ -221,9 +239,7 @@ static int adp5588_irq_set_type(struct irq_data *d, unsigned int type) else return -EINVAL; - adp5588_gpio_direction_input(&dev->gpio_chip, gpio); - adp5588_gpio_write(dev->client, GPIO_INT_LVL1 + bank, - dev->int_lvl[bank]); + dev->int_input_en[bank] |= bit; return 0; } From ef39078d6342deaddacdd550c4197421bd83fb76 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 24 Aug 2018 08:43:35 +0200 Subject: [PATCH 037/269] netfilter: conntrack: place 'new' timeout in first location too tcp, sctp and dccp trackers re-use the userspace ctnetlink states to index their timeout arrays, which means timeout[0] is never used. Copy the 'new' state (syn-sent, dccp-request, ..) to 0 as well so external users can simply read it off timeouts[0] without need to differentiate dccp/sctp/tcp and udp/icmp/gre/generic. The alternative is to map all array accesses to 'i - 1', but that is a much more intrusive change. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_proto_dccp.c | 7 +++++++ net/netfilter/nf_conntrack_proto_sctp.c | 7 +++++++ net/netfilter/nf_conntrack_proto_tcp.c | 7 +++++++ 3 files changed, 21 insertions(+) diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 8c58f96b59e7..b81f70039828 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -697,6 +697,8 @@ static int dccp_timeout_nlattr_to_obj(struct nlattr *tb[], timeouts[i] = ntohl(nla_get_be32(tb[i])) * HZ; } } + + timeouts[CTA_TIMEOUT_DCCP_UNSPEC] = timeouts[CTA_TIMEOUT_DCCP_REQUEST]; return 0; } @@ -827,6 +829,11 @@ static int dccp_init_net(struct net *net, u_int16_t proto) dn->dccp_timeout[CT_DCCP_CLOSEREQ] = 64 * HZ; dn->dccp_timeout[CT_DCCP_CLOSING] = 64 * HZ; dn->dccp_timeout[CT_DCCP_TIMEWAIT] = 2 * DCCP_MSL; + + /* timeouts[0] is unused, make it same as SYN_SENT so + * ->timeouts[0] contains 'new' timeout, like udp or icmp. + */ + dn->dccp_timeout[CT_DCCP_NONE] = dn->dccp_timeout[CT_DCCP_REQUEST]; } return dccp_kmemdup_sysctl_table(net, pn, dn); diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 8d1e085fc14a..5eddfd32b852 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -613,6 +613,8 @@ static int sctp_timeout_nlattr_to_obj(struct nlattr *tb[], timeouts[i] = ntohl(nla_get_be32(tb[i])) * HZ; } } + + timeouts[CTA_TIMEOUT_SCTP_UNSPEC] = timeouts[CTA_TIMEOUT_SCTP_CLOSED]; return 0; } @@ -743,6 +745,11 @@ static int sctp_init_net(struct net *net, u_int16_t proto) for (i = 0; i < SCTP_CONNTRACK_MAX; i++) sn->timeouts[i] = sctp_timeouts[i]; + + /* timeouts[0] is unused, init it so ->timeouts[0] contains + * 'new' timeout, like udp or icmp. + */ + sn->timeouts[0] = sctp_timeouts[SCTP_CONNTRACK_CLOSED]; } return sctp_kmemdup_sysctl_table(pn, sn); diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index d80d322b9d8b..3e2dc56a96c3 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1301,6 +1301,7 @@ static int tcp_timeout_nlattr_to_obj(struct nlattr *tb[], timeouts[TCP_CONNTRACK_SYN_SENT] = ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT]))*HZ; } + if (tb[CTA_TIMEOUT_TCP_SYN_RECV]) { timeouts[TCP_CONNTRACK_SYN_RECV] = ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_RECV]))*HZ; @@ -1341,6 +1342,8 @@ static int tcp_timeout_nlattr_to_obj(struct nlattr *tb[], timeouts[TCP_CONNTRACK_UNACK] = ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_UNACK]))*HZ; } + + timeouts[CTA_TIMEOUT_TCP_UNSPEC] = timeouts[CTA_TIMEOUT_TCP_SYN_SENT]; return 0; } @@ -1518,6 +1521,10 @@ static int tcp_init_net(struct net *net, u_int16_t proto) for (i = 0; i < TCP_CONNTRACK_TIMEOUT_MAX; i++) tn->timeouts[i] = tcp_timeouts[i]; + /* timeouts[0] is unused, make it same as SYN_SENT so + * ->timeouts[0] contains 'new' timeout, like udp or icmp. + */ + tn->timeouts[0] = tcp_timeouts[TCP_CONNTRACK_SYN_SENT]; tn->tcp_loose = nf_ct_tcp_loose; tn->tcp_be_liberal = nf_ct_tcp_be_liberal; tn->tcp_max_retrans = nf_ct_tcp_max_retrans; From 0434ccdcf883e53ec7156a6843943e940dc1feb8 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 24 Aug 2018 08:43:36 +0200 Subject: [PATCH 038/269] netfilter: nf_tables: rework ct timeout set support Using a private template is problematic: 1. We can't assign both a zone and a timeout policy (zone assigns a conntrack template, so we hit problem 1) 2. Using a template needs to take care of ct refcount, else we'll eventually free the private template due to ->use underflow. This patch reworks template policy to instead work with existing conntrack. As long as such conntrack has not yet been placed into the hash table (unconfirmed) we can still add the timeout extension. The only caveat is that we now need to update/correct ct->timeout to reflect the initial/new state, otherwise the conntrack entry retains the default 'new' timeout. Side effect of this change is that setting the policy must now occur from chains that are evaluated *after* the conntrack lookup has taken place. No released kernel contains the timeout policy feature yet, so this change should be ok. Changes since v2: - don't handle 'ct is confirmed case' - after previous patch, no need to special-case tcp/dccp/sctp timeout anymore Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_timeout.h | 2 +- net/netfilter/nft_ct.c | 59 ++++++++++---------- 2 files changed, 30 insertions(+), 31 deletions(-) diff --git a/include/net/netfilter/nf_conntrack_timeout.h b/include/net/netfilter/nf_conntrack_timeout.h index d5f62cc6c2ae..3394d75e1c80 100644 --- a/include/net/netfilter/nf_conntrack_timeout.h +++ b/include/net/netfilter/nf_conntrack_timeout.h @@ -30,7 +30,7 @@ struct nf_conn_timeout { }; static inline unsigned int * -nf_ct_timeout_data(struct nf_conn_timeout *t) +nf_ct_timeout_data(const struct nf_conn_timeout *t) { struct nf_ct_timeout *timeout; diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 26a8baebd072..5dd87748afa8 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -799,7 +799,7 @@ err: } struct nft_ct_timeout_obj { - struct nf_conn *tmpl; + struct nf_ct_timeout *timeout; u8 l4proto; }; @@ -809,26 +809,42 @@ static void nft_ct_timeout_obj_eval(struct nft_object *obj, { const struct nft_ct_timeout_obj *priv = nft_obj_data(obj); struct nf_conn *ct = (struct nf_conn *)skb_nfct(pkt->skb); - struct sk_buff *skb = pkt->skb; + struct nf_conn_timeout *timeout; + const unsigned int *values; - if (ct || - priv->l4proto != pkt->tprot) + if (priv->l4proto != pkt->tprot) return; - nf_ct_set(skb, priv->tmpl, IP_CT_NEW); + if (!ct || nf_ct_is_template(ct) || nf_ct_is_confirmed(ct)) + return; + + timeout = nf_ct_timeout_find(ct); + if (!timeout) { + timeout = nf_ct_timeout_ext_add(ct, priv->timeout, GFP_ATOMIC); + if (!timeout) { + regs->verdict.code = NF_DROP; + return; + } + } + + rcu_assign_pointer(timeout->timeout, priv->timeout); + + /* adjust the timeout as per 'new' state. ct is unconfirmed, + * so the current timestamp must not be added. + */ + values = nf_ct_timeout_data(timeout); + if (values) + nf_ct_refresh(ct, pkt->skb, values[0]); } static int nft_ct_timeout_obj_init(const struct nft_ctx *ctx, const struct nlattr * const tb[], struct nft_object *obj) { - const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt; struct nft_ct_timeout_obj *priv = nft_obj_data(obj); const struct nf_conntrack_l4proto *l4proto; - struct nf_conn_timeout *timeout_ext; struct nf_ct_timeout *timeout; int l3num = ctx->family; - struct nf_conn *tmpl; __u8 l4num; int ret; @@ -863,28 +879,14 @@ static int nft_ct_timeout_obj_init(const struct nft_ctx *ctx, timeout->l3num = l3num; timeout->l4proto = l4proto; - tmpl = nf_ct_tmpl_alloc(ctx->net, zone, GFP_ATOMIC); - if (!tmpl) { - ret = -ENOMEM; - goto err_free_timeout; - } - - timeout_ext = nf_ct_timeout_ext_add(tmpl, timeout, GFP_ATOMIC); - if (!timeout_ext) { - ret = -ENOMEM; - goto err_free_tmpl; - } ret = nf_ct_netns_get(ctx->net, ctx->family); if (ret < 0) - goto err_free_tmpl; - - priv->tmpl = tmpl; + goto err_free_timeout; + priv->timeout = timeout; return 0; -err_free_tmpl: - nf_ct_tmpl_free(tmpl); err_free_timeout: kfree(timeout); err_proto_put: @@ -896,22 +898,19 @@ static void nft_ct_timeout_obj_destroy(const struct nft_ctx *ctx, struct nft_object *obj) { struct nft_ct_timeout_obj *priv = nft_obj_data(obj); - struct nf_conn_timeout *t = nf_ct_timeout_find(priv->tmpl); - struct nf_ct_timeout *timeout; + struct nf_ct_timeout *timeout = priv->timeout; - timeout = rcu_dereference_raw(t->timeout); nf_ct_untimeout(ctx->net, timeout); nf_ct_l4proto_put(timeout->l4proto); nf_ct_netns_put(ctx->net, ctx->family); - nf_ct_tmpl_free(priv->tmpl); + kfree(priv->timeout); } static int nft_ct_timeout_obj_dump(struct sk_buff *skb, struct nft_object *obj, bool reset) { const struct nft_ct_timeout_obj *priv = nft_obj_data(obj); - const struct nf_conn_timeout *t = nf_ct_timeout_find(priv->tmpl); - const struct nf_ct_timeout *timeout = rcu_dereference_raw(t->timeout); + const struct nf_ct_timeout *timeout = priv->timeout; struct nlattr *nest_params; int ret; From 993b9bc5c47fda86f8ab4e53d68c6fea5ff2764a Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 13 Aug 2018 19:00:27 +0300 Subject: [PATCH 039/269] gpiolib: acpi: Switch to cansleep version of GPIO library call The commit ca876c7483b6 ("gpiolib-acpi: make sure we trigger edge events at least once on boot") added a initial value check for pin which is about to be locked as IRQ. Unfortunately, not all GPIO drivers can do that atomically. Thus, switch to cansleep version of the call. Otherwise we have a warning: ... WARNING: CPU: 2 PID: 1408 at drivers/gpio/gpiolib.c:2883 gpiod_get_value+0x46/0x50 ... RIP: 0010:gpiod_get_value+0x46/0x50 ... The change tested on Intel Broxton with Whiskey Cove PMIC GPIO controller. Fixes: ca876c7483b6 ("gpiolib-acpi: make sure we trigger edge events at least once on boot") Signed-off-by: Andy Shevchenko Cc: Hans de Goede Cc: Benjamin Tissoires Acked-by: Mika Westerberg Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib-acpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index c48ed9d89ff5..f9134f23c7e8 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -186,7 +186,7 @@ static acpi_status acpi_gpiochip_request_interrupt(struct acpi_resource *ares, gpiod_direction_input(desc); - value = gpiod_get_value(desc); + value = gpiod_get_value_cansleep(desc); ret = gpiochip_lock_as_irq(chip, pin); if (ret) { From 78d3a92edbfb02e8cb83173cad84c3f2d5e1f070 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 14 Aug 2018 16:07:03 +0200 Subject: [PATCH 040/269] gpiolib-acpi: Register GpioInt ACPI event handlers from a late_initcall GpioInt ACPI event handlers may see there IRQ triggered immediately after requesting the IRQ (esp. level triggered ones). This means that they may run before any other (builtin) drivers have had a chance to register their OpRegion handlers, leading to errors like this: [ 1.133274] ACPI Error: No handler for Region [PMOP] ((____ptrval____)) [UserDefinedRegion] (20180531/evregion-132) [ 1.133286] ACPI Error: Region UserDefinedRegion (ID=141) has no handler (20180531/exfldio-265) [ 1.133297] ACPI Error: Method parse/execution failed \_SB.GPO2._L01, AE_NOT_EXIST (20180531/psparse-516) We already defer the manual initial trigger of edge triggered interrupts by running it from a late_initcall handler, this commit replaces this with deferring the entire acpi_gpiochip_request_interrupts() call till then, fixing the problem of some OpRegions not being registered yet. Note that this removes the need to have a list of edge triggered handlers which need to run, since the entire acpi_gpiochip_request_interrupts() call is now delayed, acpi_gpiochip_request_interrupt() can call these directly now. Acked-by: Mika Westerberg Signed-off-by: Hans de Goede Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib-acpi.c | 84 +++++++++++++++++++++---------------- 1 file changed, 49 insertions(+), 35 deletions(-) diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index f9134f23c7e8..8b9d7e42c600 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -25,7 +25,6 @@ struct acpi_gpio_event { struct list_head node; - struct list_head initial_sync_list; acpi_handle handle; unsigned int pin; unsigned int irq; @@ -49,10 +48,19 @@ struct acpi_gpio_chip { struct mutex conn_lock; struct gpio_chip *chip; struct list_head events; + struct list_head deferred_req_irqs_list_entry; }; -static LIST_HEAD(acpi_gpio_initial_sync_list); -static DEFINE_MUTEX(acpi_gpio_initial_sync_list_lock); +/* + * For gpiochips which call acpi_gpiochip_request_interrupts() before late_init + * (so builtin drivers) we register the ACPI GpioInt event handlers from a + * late_initcall_sync handler, so that other builtin drivers can register their + * OpRegions before the event handlers can run. This list contains gpiochips + * for which the acpi_gpiochip_request_interrupts() has been deferred. + */ +static DEFINE_MUTEX(acpi_gpio_deferred_req_irqs_lock); +static LIST_HEAD(acpi_gpio_deferred_req_irqs_list); +static bool acpi_gpio_deferred_req_irqs_done; static int acpi_gpiochip_find(struct gpio_chip *gc, void *data) { @@ -89,21 +97,6 @@ static struct gpio_desc *acpi_get_gpiod(char *path, int pin) return gpiochip_get_desc(chip, pin); } -static void acpi_gpio_add_to_initial_sync_list(struct acpi_gpio_event *event) -{ - mutex_lock(&acpi_gpio_initial_sync_list_lock); - list_add(&event->initial_sync_list, &acpi_gpio_initial_sync_list); - mutex_unlock(&acpi_gpio_initial_sync_list_lock); -} - -static void acpi_gpio_del_from_initial_sync_list(struct acpi_gpio_event *event) -{ - mutex_lock(&acpi_gpio_initial_sync_list_lock); - if (!list_empty(&event->initial_sync_list)) - list_del_init(&event->initial_sync_list); - mutex_unlock(&acpi_gpio_initial_sync_list_lock); -} - static irqreturn_t acpi_gpio_irq_handler(int irq, void *data) { struct acpi_gpio_event *event = data; @@ -229,7 +222,6 @@ static acpi_status acpi_gpiochip_request_interrupt(struct acpi_resource *ares, event->irq = irq; event->pin = pin; event->desc = desc; - INIT_LIST_HEAD(&event->initial_sync_list); ret = request_threaded_irq(event->irq, NULL, handler, irqflags, "ACPI:Event", event); @@ -251,10 +243,9 @@ static acpi_status acpi_gpiochip_request_interrupt(struct acpi_resource *ares, * may refer to OperationRegions from other (builtin) drivers which * may be probed after us. */ - if (handler == acpi_gpio_irq_handler && - (((irqflags & IRQF_TRIGGER_RISING) && value == 1) || - ((irqflags & IRQF_TRIGGER_FALLING) && value == 0))) - acpi_gpio_add_to_initial_sync_list(event); + if (((irqflags & IRQF_TRIGGER_RISING) && value == 1) || + ((irqflags & IRQF_TRIGGER_FALLING) && value == 0)) + handler(event->irq, event); return AE_OK; @@ -283,6 +274,7 @@ void acpi_gpiochip_request_interrupts(struct gpio_chip *chip) struct acpi_gpio_chip *acpi_gpio; acpi_handle handle; acpi_status status; + bool defer; if (!chip->parent || !chip->to_irq) return; @@ -295,6 +287,16 @@ void acpi_gpiochip_request_interrupts(struct gpio_chip *chip) if (ACPI_FAILURE(status)) return; + mutex_lock(&acpi_gpio_deferred_req_irqs_lock); + defer = !acpi_gpio_deferred_req_irqs_done; + if (defer) + list_add(&acpi_gpio->deferred_req_irqs_list_entry, + &acpi_gpio_deferred_req_irqs_list); + mutex_unlock(&acpi_gpio_deferred_req_irqs_lock); + + if (defer) + return; + acpi_walk_resources(handle, "_AEI", acpi_gpiochip_request_interrupt, acpi_gpio); } @@ -325,11 +327,14 @@ void acpi_gpiochip_free_interrupts(struct gpio_chip *chip) if (ACPI_FAILURE(status)) return; + mutex_lock(&acpi_gpio_deferred_req_irqs_lock); + if (!list_empty(&acpi_gpio->deferred_req_irqs_list_entry)) + list_del_init(&acpi_gpio->deferred_req_irqs_list_entry); + mutex_unlock(&acpi_gpio_deferred_req_irqs_lock); + list_for_each_entry_safe_reverse(event, ep, &acpi_gpio->events, node) { struct gpio_desc *desc; - acpi_gpio_del_from_initial_sync_list(event); - if (irqd_is_wakeup_set(irq_get_irq_data(event->irq))) disable_irq_wake(event->irq); @@ -1052,6 +1057,7 @@ void acpi_gpiochip_add(struct gpio_chip *chip) acpi_gpio->chip = chip; INIT_LIST_HEAD(&acpi_gpio->events); + INIT_LIST_HEAD(&acpi_gpio->deferred_req_irqs_list_entry); status = acpi_attach_data(handle, acpi_gpio_chip_dh, acpi_gpio); if (ACPI_FAILURE(status)) { @@ -1198,20 +1204,28 @@ bool acpi_can_fallback_to_crs(struct acpi_device *adev, const char *con_id) return con_id == NULL; } -/* Sync the initial state of handlers after all builtin drivers have probed */ -static int acpi_gpio_initial_sync(void) +/* Run deferred acpi_gpiochip_request_interrupts() */ +static int acpi_gpio_handle_deferred_request_interrupts(void) { - struct acpi_gpio_event *event, *ep; + struct acpi_gpio_chip *acpi_gpio, *tmp; - mutex_lock(&acpi_gpio_initial_sync_list_lock); - list_for_each_entry_safe(event, ep, &acpi_gpio_initial_sync_list, - initial_sync_list) { - acpi_evaluate_object(event->handle, NULL, NULL, NULL); - list_del_init(&event->initial_sync_list); + mutex_lock(&acpi_gpio_deferred_req_irqs_lock); + list_for_each_entry_safe(acpi_gpio, tmp, + &acpi_gpio_deferred_req_irqs_list, + deferred_req_irqs_list_entry) { + acpi_handle handle; + + handle = ACPI_HANDLE(acpi_gpio->chip->parent); + acpi_walk_resources(handle, "_AEI", + acpi_gpiochip_request_interrupt, acpi_gpio); + + list_del_init(&acpi_gpio->deferred_req_irqs_list_entry); } - mutex_unlock(&acpi_gpio_initial_sync_list_lock); + + acpi_gpio_deferred_req_irqs_done = true; + mutex_unlock(&acpi_gpio_deferred_req_irqs_lock); return 0; } /* We must use _sync so that this runs after the first deferred_probe run */ -late_initcall_sync(acpi_gpio_initial_sync); +late_initcall_sync(acpi_gpio_handle_deferred_request_interrupts); From a618cf4800970d260871c159b7eec014a1da2e81 Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Tue, 28 Aug 2018 23:40:26 +0300 Subject: [PATCH 041/269] gpio: dwapb: Fix error handling in dwapb_gpio_probe() If dwapb_gpio_add_port() fails in dwapb_gpio_probe(), gpio->clk is left undisabled. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Signed-off-by: Linus Walleij --- drivers/gpio/gpio-dwapb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpio/gpio-dwapb.c b/drivers/gpio/gpio-dwapb.c index 28da700f5f52..044888fd96a1 100644 --- a/drivers/gpio/gpio-dwapb.c +++ b/drivers/gpio/gpio-dwapb.c @@ -728,6 +728,7 @@ static int dwapb_gpio_probe(struct platform_device *pdev) out_unregister: dwapb_gpio_unregister(gpio); dwapb_irq_teardown(gpio); + clk_disable_unprepare(gpio->clk); return err; } From 9174c1d6196d612799808009ec2796df021ab625 Mon Sep 17 00:00:00 2001 From: Xiaolin Zhang Date: Tue, 7 Aug 2018 20:39:16 +0800 Subject: [PATCH 042/269] drm/i915/gvt: emulate gen9 dbuf ctl register access there is below call track at boot time when booting guest with kabylake vgpu with specifal configuration and this try to fix it. [drm:gen9_dbuf_enable [i915]] *ERROR* DBuf power enable timeout ------------[ cut here ]------------ WARNING: gen9_dc_off_power_well_enable+0x224/0x230 [i915] Unexpected DBuf power power state (0x8000000a) Hardware name: Red Hat KVM, BIOS 1.11.0-2.el7 04/01/2014 Call Trace: [] dump_stack+0x19/0x1b [] __warn+0xd8/0x100 [] warn_slowpath_fmt+0x5f/0x80 [] gen9_dc_off_power_well_enable+0x224/0x230 [i915] [] intel_power_well_enable+0x42/0x50 [i915] [] __intel_display_power_get_domain+0x8a/0xb0 [i915] [] intel_display_power_get+0x33/0x50 [i915] [] intel_display_set_init_power+0x45/0x50 [i915] [] intel_power_domains_init_hw+0x63/0x8a0 [i915] [] i915_driver_load+0xae3/0x1760 [i915] [] ? nvmem_register+0x500/0x500 [] i915_pci_probe+0x2c/0x50 [i915] [] local_pci_probe+0x4a/0xb0 [] pci_device_probe+0x109/0x160 [] driver_probe_device+0xc5/0x3e0 [] __driver_attach+0x93/0xa0 [] ? __device_attach+0x50/0x50 [] bus_for_each_dev+0x75/0xc0 [] driver_attach+0x1e/0x20 [] bus_add_driver+0x200/0x2d0 [] driver_register+0x64/0xf0 [] __pci_register_driver+0xa5/0xc0 [] ? 0xffffffffc0928fff [] i915_init+0x59/0x5c [i915] [] do_one_initcall+0xba/0x240 [] load_module+0x272c/0x2bc0 [] ? ddebug_proc_write+0xf0/0xf0 [] SyS_init_module+0xc5/0x110 [] system_call_fastpath+0x1c/0x21 Signed-off-by: Xiaolin Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 7a58ca555197..450e730743a1 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1296,6 +1296,19 @@ static int power_well_ctl_mmio_write(struct intel_vgpu *vgpu, return 0; } +static int gen9_dbuf_ctl_mmio_write(struct intel_vgpu *vgpu, + unsigned int offset, void *p_data, unsigned int bytes) +{ + write_vreg(vgpu, offset, p_data, bytes); + + if (vgpu_vreg(vgpu, offset) & DBUF_POWER_REQUEST) + vgpu_vreg(vgpu, offset) |= DBUF_POWER_STATE; + else + vgpu_vreg(vgpu, offset) &= ~DBUF_POWER_STATE; + + return 0; +} + static int fpga_dbg_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { @@ -2812,6 +2825,8 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) MMIO_DH(HSW_PWR_WELL_CTL_DRIVER(SKL_DISP_PW_MISC_IO), D_SKL_PLUS, NULL, skl_power_well_ctl_write); + MMIO_DH(DBUF_CTL, D_SKL_PLUS, NULL, gen9_dbuf_ctl_mmio_write); + MMIO_D(_MMIO(0xa210), D_SKL_PLUS); MMIO_D(GEN9_MEDIA_PG_IDLE_HYSTERESIS, D_SKL_PLUS); MMIO_D(GEN9_RENDER_PG_IDLE_HYSTERESIS, D_SKL_PLUS); @@ -2987,8 +3002,6 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) NULL, gen9_trtte_write); MMIO_DH(_MMIO(0x4dfc), D_SKL_PLUS, NULL, gen9_trtt_chicken_write); - MMIO_D(_MMIO(0x45008), D_SKL_PLUS); - MMIO_D(_MMIO(0x46430), D_SKL_PLUS); MMIO_D(_MMIO(0x46520), D_SKL_PLUS); From c8ab5ac30ccc20a31672ab0f8938a6271dfe4122 Mon Sep 17 00:00:00 2001 From: Colin Xu Date: Mon, 20 Aug 2018 16:46:34 +0800 Subject: [PATCH 043/269] drm/i915/gvt: Make correct handling to vreg BXT_PHY_CTL_FAMILY Guest kernel will write to BXT_PHY_CTL_FAMILY to reset DDI PHY and pull BXT_PHY_CTL to check PHY status. Previous handling will set/reset BXT_PHY_CTL of all PHYs at same time on receiving vreg write to some BXT_PHY_CTL_FAMILY. If some BXT_PHY_CTL is already enabled, following reset to another BXT_PHY_CTL_FAMILY will clear the enabled BXT_PHY_CTL, which result in guest kernel print: ----------------------------------- [drm:intel_ddi_get_hw_state [i915]] *ERROR* Port B enabled but PHY powered down? (PHY_CTL 00000000) ----------------------------------- The correct handling should operate BXT_PHY_CTL_FAMILY and BXT_PHY_CTL on the same DDI. v2: Use correct reg define. The naming looks confusing, however current i915_reg.h bind DPIO_PHY0 to _PHY_CTL_FAMILY_DDI and bind DPIO_PHY1 to _PHY_CTL_FAMILY_EDP, pairing to _BXT_PHY_CTL_DDI_A and _BXT_PHY_CTL_DDI_B respectively. v3: v2 incorrectly map _PHY_CTL_FAMILY_EDP to _BXT_PHY_CTL_DDI_A. BXT_PHY_CTL() looks up DDI using PORTx but not PHYx. Based on DPIO_PHY to DDI mapping, make correct vreg handle to BXT_PHY_CTL on receiving vreg write to BXT_PHY_CTL_FAMILY. (He, Min) Current mapping according to bxt_power_wells: dpio-common-a: >>> DPIO_PHY1 >>> BXT_DPIO_CMN_A_POWER_DOMAINS >>> POWER_DOMAIN_PORT_DDI_A_LANES >>> PORT_A dpio-common-bc: >>> DPIO_PHY0 >>> BXT_DPIO_CMN_BC_POWER_DOMAINS >>> POWER_DOMAIN_PORT_DDI_B_LANES | POWER_DOMAIN_PORT_DDI_C_LANES >>> PORT_B or PORT_C Signed-off-by: Colin Xu Reviewed-by: He, Min Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 450e730743a1..d0db55a79627 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1538,9 +1538,15 @@ static int bxt_phy_ctl_family_write(struct intel_vgpu *vgpu, u32 v = *(u32 *)p_data; u32 data = v & COMMON_RESET_DIS ? BXT_PHY_LANE_ENABLED : 0; - vgpu_vreg(vgpu, _BXT_PHY_CTL_DDI_A) = data; - vgpu_vreg(vgpu, _BXT_PHY_CTL_DDI_B) = data; - vgpu_vreg(vgpu, _BXT_PHY_CTL_DDI_C) = data; + switch (offset) { + case _PHY_CTL_FAMILY_EDP: + vgpu_vreg(vgpu, _BXT_PHY_CTL_DDI_A) = data; + break; + case _PHY_CTL_FAMILY_DDI: + vgpu_vreg(vgpu, _BXT_PHY_CTL_DDI_B) = data; + vgpu_vreg(vgpu, _BXT_PHY_CTL_DDI_C) = data; + break; + } vgpu_vreg(vgpu, offset) = v; From b9b824a55876275f8506c1c187558ab22d879f73 Mon Sep 17 00:00:00 2001 From: Colin Xu Date: Fri, 17 Aug 2018 16:42:24 +0800 Subject: [PATCH 044/269] drm/i915/gvt: Handle GEN9_WM_CHICKEN3 with F_CMD_ACCESS. Recent patch introduce strict check on scanning cmd: Commit 8d458ea0ec33 ("drm/i915/gvt: return error on cmd access") Before 8d458ea0ec33, if cmd_reg_handler() checks that a cmd access a mmio that not marked as F_CMD_ACCESS, it simply returns 0 and log an error. Now it will return -EBADRQC which will cause the workload fail to submit. On BXT, i915 applies WaClearHIZ_WM_CHICKEN3 which will program GEN9_WM_CHICKEN3 by LRI when init wa ctx. If it has no F_CMD_ACCESS flag, vgpu will fail to start. Also add F_MODE_MASK since it's mode mask reg. v2: Refresh commit message to elaborate issue symptom in detail. v3: Make SKL_PLUS share same handling since GEN9_WM_CHICKEN3 should be F_CMD_ACCESS from HW aspect. (yan, zhenyu) Signed-off-by: Colin Xu Acked-by: Zhao Yan Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index d0db55a79627..72afa518edd9 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -3044,7 +3044,9 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) MMIO_D(_MMIO(0x44500), D_SKL_PLUS); MMIO_DFH(GEN9_CSFE_CHICKEN1_RCS, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(GEN8_HDC_CHICKEN1, D_SKL_PLUS, F_MODE_MASK | F_CMD_ACCESS, - NULL, NULL); + NULL, NULL); + MMIO_DFH(GEN9_WM_CHICKEN3, D_SKL_PLUS, F_MODE_MASK | F_CMD_ACCESS, + NULL, NULL); MMIO_D(_MMIO(0x4ab8), D_KBL); MMIO_D(_MMIO(0x2248), D_KBL | D_SKL); From b2b599fb54f90ae395ddc51f0d49e4f28244a8f8 Mon Sep 17 00:00:00 2001 From: Hang Yuan Date: Wed, 29 Aug 2018 17:15:56 +0800 Subject: [PATCH 045/269] drm/i915/gvt: move intel_runtime_pm_get out of spin_lock in stop_schedule pm_runtime_get_sync in intel_runtime_pm_get might sleep if i915 device is not active. When stop vgpu schedule, the device may be inactive. So need to move runtime_pm_get out of spin_lock/unlock. Fixes: b24881e0b0b6("drm/i915/gvt: Add runtime_pm_get/put into gvt_switch_mmio Cc: Signed-off-by: Hang Yuan Signed-off-by: Xiong Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/mmio_context.c | 2 -- drivers/gpu/drm/i915/gvt/sched_policy.c | 3 +++ 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c index 42e1e6bdcc2c..e872f4847fbe 100644 --- a/drivers/gpu/drm/i915/gvt/mmio_context.c +++ b/drivers/gpu/drm/i915/gvt/mmio_context.c @@ -562,11 +562,9 @@ void intel_gvt_switch_mmio(struct intel_vgpu *pre, * performace for batch mmio read/write, so we need * handle forcewake mannually. */ - intel_runtime_pm_get(dev_priv); intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); switch_mmio(pre, next, ring_id); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); - intel_runtime_pm_put(dev_priv); } /** diff --git a/drivers/gpu/drm/i915/gvt/sched_policy.c b/drivers/gpu/drm/i915/gvt/sched_policy.c index 09d7bb72b4ff..985fe81794dd 100644 --- a/drivers/gpu/drm/i915/gvt/sched_policy.c +++ b/drivers/gpu/drm/i915/gvt/sched_policy.c @@ -426,6 +426,7 @@ void intel_vgpu_stop_schedule(struct intel_vgpu *vgpu) &vgpu->gvt->scheduler; int ring_id; struct vgpu_sched_data *vgpu_data = vgpu->sched_data; + struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; if (!vgpu_data->active) return; @@ -444,6 +445,7 @@ void intel_vgpu_stop_schedule(struct intel_vgpu *vgpu) scheduler->current_vgpu = NULL; } + intel_runtime_pm_get(dev_priv); spin_lock_bh(&scheduler->mmio_context_lock); for (ring_id = 0; ring_id < I915_NUM_ENGINES; ring_id++) { if (scheduler->engine_owner[ring_id] == vgpu) { @@ -452,5 +454,6 @@ void intel_vgpu_stop_schedule(struct intel_vgpu *vgpu) } } spin_unlock_bh(&scheduler->mmio_context_lock); + intel_runtime_pm_put(dev_priv); mutex_unlock(&vgpu->gvt->sched_lock); } From b244ffa15c8b1aabdc117c0b6008086df7b668b7 Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Thu, 30 Aug 2018 10:50:36 +0800 Subject: [PATCH 046/269] drm/i915/gvt: Fix drm_format_mod value for vGPU plane Physical plane's tiling mode value is given directly as drm_format_mod for plane query, which is not correct fourcc code. Fix it by using correct intel tiling fourcc mod definition. Current qemu seems also doesn't correctly utilize drm_format_mod for plane object setting. Anyway this is required to fix the usage. v3: use DRM_FORMAT_MOD_LINEAR, fix comment v2: Fix missed old 'tiled' use for stride calculation Fixes: e546e281d33d ("drm/i915/gvt: Dmabuf support for GVT-g") Cc: Tina Zhang Cc: Gerd Hoffmann Cc: Colin Xu Reviewed-by: Colin Xu Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/dmabuf.c | 33 +++++++++++++++++++++------ drivers/gpu/drm/i915/gvt/fb_decoder.c | 5 ++-- drivers/gpu/drm/i915/gvt/fb_decoder.h | 2 +- 3 files changed, 29 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.c b/drivers/gpu/drm/i915/gvt/dmabuf.c index 6e3f56684f4e..51ed99a37803 100644 --- a/drivers/gpu/drm/i915/gvt/dmabuf.c +++ b/drivers/gpu/drm/i915/gvt/dmabuf.c @@ -170,20 +170,22 @@ static struct drm_i915_gem_object *vgpu_create_gem(struct drm_device *dev, unsigned int tiling_mode = 0; unsigned int stride = 0; - switch (info->drm_format_mod << 10) { - case PLANE_CTL_TILED_LINEAR: + switch (info->drm_format_mod) { + case DRM_FORMAT_MOD_LINEAR: tiling_mode = I915_TILING_NONE; break; - case PLANE_CTL_TILED_X: + case I915_FORMAT_MOD_X_TILED: tiling_mode = I915_TILING_X; stride = info->stride; break; - case PLANE_CTL_TILED_Y: + case I915_FORMAT_MOD_Y_TILED: + case I915_FORMAT_MOD_Yf_TILED: tiling_mode = I915_TILING_Y; stride = info->stride; break; default: - gvt_dbg_core("not supported tiling mode\n"); + gvt_dbg_core("invalid drm_format_mod %llx for tiling\n", + info->drm_format_mod); } obj->tiling_and_stride = tiling_mode | stride; } else { @@ -222,9 +224,26 @@ static int vgpu_get_plane_info(struct drm_device *dev, info->height = p.height; info->stride = p.stride; info->drm_format = p.drm_format; - info->drm_format_mod = p.tiled; + + switch (p.tiled) { + case PLANE_CTL_TILED_LINEAR: + info->drm_format_mod = DRM_FORMAT_MOD_LINEAR; + break; + case PLANE_CTL_TILED_X: + info->drm_format_mod = I915_FORMAT_MOD_X_TILED; + break; + case PLANE_CTL_TILED_Y: + info->drm_format_mod = I915_FORMAT_MOD_Y_TILED; + break; + case PLANE_CTL_TILED_YF: + info->drm_format_mod = I915_FORMAT_MOD_Yf_TILED; + break; + default: + gvt_vgpu_err("invalid tiling mode: %x\n", p.tiled); + } + info->size = (((p.stride * p.height * p.bpp) / 8) + - (PAGE_SIZE - 1)) >> PAGE_SHIFT; + (PAGE_SIZE - 1)) >> PAGE_SHIFT; } else if (plane_id == DRM_PLANE_TYPE_CURSOR) { ret = intel_vgpu_decode_cursor_plane(vgpu, &c); if (ret) diff --git a/drivers/gpu/drm/i915/gvt/fb_decoder.c b/drivers/gpu/drm/i915/gvt/fb_decoder.c index face664be3e8..481896fb712a 100644 --- a/drivers/gpu/drm/i915/gvt/fb_decoder.c +++ b/drivers/gpu/drm/i915/gvt/fb_decoder.c @@ -220,8 +220,7 @@ int intel_vgpu_decode_primary_plane(struct intel_vgpu *vgpu, if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv) || IS_BROXTON(dev_priv)) { - plane->tiled = (val & PLANE_CTL_TILED_MASK) >> - _PLANE_CTL_TILED_SHIFT; + plane->tiled = val & PLANE_CTL_TILED_MASK; fmt = skl_format_to_drm( val & PLANE_CTL_FORMAT_MASK, val & PLANE_CTL_ORDER_RGBX, @@ -260,7 +259,7 @@ int intel_vgpu_decode_primary_plane(struct intel_vgpu *vgpu, return -EINVAL; } - plane->stride = intel_vgpu_get_stride(vgpu, pipe, (plane->tiled << 10), + plane->stride = intel_vgpu_get_stride(vgpu, pipe, plane->tiled, (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv) || IS_BROXTON(dev_priv)) ? diff --git a/drivers/gpu/drm/i915/gvt/fb_decoder.h b/drivers/gpu/drm/i915/gvt/fb_decoder.h index cb055f3c81a2..60c155085029 100644 --- a/drivers/gpu/drm/i915/gvt/fb_decoder.h +++ b/drivers/gpu/drm/i915/gvt/fb_decoder.h @@ -101,7 +101,7 @@ struct intel_gvt; /* color space conversion and gamma correction are not included */ struct intel_vgpu_primary_plane_format { u8 enabled; /* plane is enabled */ - u8 tiled; /* X-tiled */ + u32 tiled; /* tiling mode: linear, X-tiled, Y tiled, etc */ u8 bpp; /* bits per pixel */ u32 hw_format; /* format field in the PRI_CTL register */ u32 drm_format; /* format in DRM definition */ From 43822c98f2ebb2cbd5e467ab72bbcdae7f0caa22 Mon Sep 17 00:00:00 2001 From: Harry Mallon Date: Tue, 28 Aug 2018 22:51:29 +0100 Subject: [PATCH 047/269] HID: hid-saitek: Add device ID for RAT 7 Contagion Signed-off-by: Harry Mallon Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 1 + drivers/hid/hid-saitek.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index cb2d3170d9dc..19a66ceca217 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -953,6 +953,7 @@ #define USB_DEVICE_ID_SAITEK_RUMBLEPAD 0xff17 #define USB_DEVICE_ID_SAITEK_PS1000 0x0621 #define USB_DEVICE_ID_SAITEK_RAT7_OLD 0x0ccb +#define USB_DEVICE_ID_SAITEK_RAT7_CONTAGION 0x0ccd #define USB_DEVICE_ID_SAITEK_RAT7 0x0cd7 #define USB_DEVICE_ID_SAITEK_RAT9 0x0cfa #define USB_DEVICE_ID_SAITEK_MMO7 0x0cd0 diff --git a/drivers/hid/hid-saitek.c b/drivers/hid/hid-saitek.c index 39e642686ff0..683861f324e3 100644 --- a/drivers/hid/hid-saitek.c +++ b/drivers/hid/hid-saitek.c @@ -183,6 +183,8 @@ static const struct hid_device_id saitek_devices[] = { .driver_data = SAITEK_RELEASE_MODE_RAT7 }, { HID_USB_DEVICE(USB_VENDOR_ID_SAITEK, USB_DEVICE_ID_SAITEK_RAT7), .driver_data = SAITEK_RELEASE_MODE_RAT7 }, + { HID_USB_DEVICE(USB_VENDOR_ID_SAITEK, USB_DEVICE_ID_SAITEK_RAT7_CONTAGION), + .driver_data = SAITEK_RELEASE_MODE_RAT7 }, { HID_USB_DEVICE(USB_VENDOR_ID_SAITEK, USB_DEVICE_ID_SAITEK_RAT9), .driver_data = SAITEK_RELEASE_MODE_RAT7 }, { HID_USB_DEVICE(USB_VENDOR_ID_MADCATZ, USB_DEVICE_ID_MADCATZ_RAT9), From 7915919bb94e12460c58e27c708472e6f85f6699 Mon Sep 17 00:00:00 2001 From: Vincent Pelletier Date: Mon, 27 Aug 2018 14:45:15 -0500 Subject: [PATCH 048/269] scsi: iscsi: target: Set conn->sess to NULL when iscsi_login_set_conn_values fails Fixes a use-after-free reported by KASAN when later iscsi_target_login_sess_out gets called and it tries to access conn->sess->se_sess: Disabling lock debugging due to kernel taint iSCSI Login timeout on Network Portal [::]:3260 iSCSI Login negotiation failed. ================================================================== BUG: KASAN: use-after-free in iscsi_target_login_sess_out.cold.12+0x58/0xff [iscsi_target_mod] Read of size 8 at addr ffff880109d070c8 by task iscsi_np/980 CPU: 1 PID: 980 Comm: iscsi_np Tainted: G O 4.17.8kasan.sess.connops+ #4 Hardware name: To be filled by O.E.M. To be filled by O.E.M./Aptio CRB, BIOS 5.6.5 05/19/2014 Call Trace: dump_stack+0x71/0xac print_address_description+0x65/0x22e ? iscsi_target_login_sess_out.cold.12+0x58/0xff [iscsi_target_mod] kasan_report.cold.6+0x241/0x2fd iscsi_target_login_sess_out.cold.12+0x58/0xff [iscsi_target_mod] iscsi_target_login_thread+0x1086/0x1710 [iscsi_target_mod] ? __sched_text_start+0x8/0x8 ? iscsi_target_login_sess_out+0x250/0x250 [iscsi_target_mod] ? __kthread_parkme+0xcc/0x100 ? parse_args.cold.14+0xd3/0xd3 ? iscsi_target_login_sess_out+0x250/0x250 [iscsi_target_mod] kthread+0x1a0/0x1c0 ? kthread_bind+0x30/0x30 ret_from_fork+0x35/0x40 Allocated by task 980: kasan_kmalloc+0xbf/0xe0 kmem_cache_alloc_trace+0x112/0x210 iscsi_target_login_thread+0x816/0x1710 [iscsi_target_mod] kthread+0x1a0/0x1c0 ret_from_fork+0x35/0x40 Freed by task 980: __kasan_slab_free+0x125/0x170 kfree+0x90/0x1d0 iscsi_target_login_thread+0x1577/0x1710 [iscsi_target_mod] kthread+0x1a0/0x1c0 ret_from_fork+0x35/0x40 The buggy address belongs to the object at ffff880109d06f00 which belongs to the cache kmalloc-512 of size 512 The buggy address is located 456 bytes inside of 512-byte region [ffff880109d06f00, ffff880109d07100) The buggy address belongs to the page: page:ffffea0004274180 count:1 mapcount:0 mapping:0000000000000000 index:0x0 compound_mapcount: 0 flags: 0x17fffc000008100(slab|head) raw: 017fffc000008100 0000000000000000 0000000000000000 00000001000c000c raw: dead000000000100 dead000000000200 ffff88011b002e00 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff880109d06f80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff880109d07000: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff880109d07080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff880109d07100: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff880109d07180: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ================================================================== Signed-off-by: Vincent Pelletier [rebased against idr/ida changes and to handle ret review comments from Matthew] Signed-off-by: Mike Christie Cc: Matthew Wilcox Reviewed-by: Matthew Wilcox Signed-off-by: Martin K. Petersen --- drivers/target/iscsi/iscsi_target_login.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c index 9e74f8bc2963..f58b9c1d6fd4 100644 --- a/drivers/target/iscsi/iscsi_target_login.c +++ b/drivers/target/iscsi/iscsi_target_login.c @@ -310,11 +310,9 @@ static int iscsi_login_zero_tsih_s1( return -ENOMEM; } - ret = iscsi_login_set_conn_values(sess, conn, pdu->cid); - if (unlikely(ret)) { - kfree(sess); - return ret; - } + if (iscsi_login_set_conn_values(sess, conn, pdu->cid)) + goto free_sess; + sess->init_task_tag = pdu->itt; memcpy(&sess->isid, pdu->isid, 6); sess->exp_cmd_sn = be32_to_cpu(pdu->cmdsn); From 05a86e78ea9823ec25b3515db078dd8a76fc263c Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Mon, 27 Aug 2018 14:45:16 -0500 Subject: [PATCH 049/269] scsi: iscsi: target: Fix conn_ops double free If iscsi_login_init_conn fails it can free conn_ops. __iscsi_target_login_thread will then call iscsi_target_login_sess_out which will also free it. This fixes the problem by organizing conn allocation/setup into parts that are needed through the life of the conn and parts that are only needed for the login. The free functions then release what was allocated in the alloc functions. With this patch we have: iscsit_alloc_conn/iscsit_free_conn - allocs/frees the conn we need for the entire life of the conn. iscsi_login_init_conn/iscsi_target_nego_release - allocs/frees the parts of the conn that are only needed during login. Signed-off-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/target/iscsi/iscsi_target.c | 9 +- drivers/target/iscsi/iscsi_target_login.c | 141 ++++++++++++---------- drivers/target/iscsi/iscsi_target_login.h | 2 +- 3 files changed, 77 insertions(+), 75 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 94bad43c41ff..9cdfccbdd06f 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -4208,22 +4208,15 @@ int iscsit_close_connection( crypto_free_ahash(tfm); } - free_cpumask_var(conn->conn_cpumask); - - kfree(conn->conn_ops); - conn->conn_ops = NULL; - if (conn->sock) sock_release(conn->sock); if (conn->conn_transport->iscsit_free_conn) conn->conn_transport->iscsit_free_conn(conn); - iscsit_put_transport(conn->conn_transport); - pr_debug("Moving to TARG_CONN_STATE_FREE.\n"); conn->conn_state = TARG_CONN_STATE_FREE; - kfree(conn); + iscsit_free_conn(conn); spin_lock_bh(&sess->conn_lock); atomic_dec(&sess->nconn); diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c index f58b9c1d6fd4..bb90c80ff388 100644 --- a/drivers/target/iscsi/iscsi_target_login.c +++ b/drivers/target/iscsi/iscsi_target_login.c @@ -67,45 +67,10 @@ static struct iscsi_login *iscsi_login_init_conn(struct iscsi_conn *conn) goto out_req_buf; } - conn->conn_ops = kzalloc(sizeof(struct iscsi_conn_ops), GFP_KERNEL); - if (!conn->conn_ops) { - pr_err("Unable to allocate memory for" - " struct iscsi_conn_ops.\n"); - goto out_rsp_buf; - } - - init_waitqueue_head(&conn->queues_wq); - INIT_LIST_HEAD(&conn->conn_list); - INIT_LIST_HEAD(&conn->conn_cmd_list); - INIT_LIST_HEAD(&conn->immed_queue_list); - INIT_LIST_HEAD(&conn->response_queue_list); - init_completion(&conn->conn_post_wait_comp); - init_completion(&conn->conn_wait_comp); - init_completion(&conn->conn_wait_rcfr_comp); - init_completion(&conn->conn_waiting_on_uc_comp); - init_completion(&conn->conn_logout_comp); - init_completion(&conn->rx_half_close_comp); - init_completion(&conn->tx_half_close_comp); - init_completion(&conn->rx_login_comp); - spin_lock_init(&conn->cmd_lock); - spin_lock_init(&conn->conn_usage_lock); - spin_lock_init(&conn->immed_queue_lock); - spin_lock_init(&conn->nopin_timer_lock); - spin_lock_init(&conn->response_queue_lock); - spin_lock_init(&conn->state_lock); - - if (!zalloc_cpumask_var(&conn->conn_cpumask, GFP_KERNEL)) { - pr_err("Unable to allocate conn->conn_cpumask\n"); - goto out_conn_ops; - } conn->conn_login = login; return login; -out_conn_ops: - kfree(conn->conn_ops); -out_rsp_buf: - kfree(login->rsp_buf); out_req_buf: kfree(login->req_buf); out_login: @@ -1147,6 +1112,75 @@ iscsit_conn_set_transport(struct iscsi_conn *conn, struct iscsit_transport *t) return 0; } +static struct iscsi_conn *iscsit_alloc_conn(struct iscsi_np *np) +{ + struct iscsi_conn *conn; + + conn = kzalloc(sizeof(struct iscsi_conn), GFP_KERNEL); + if (!conn) { + pr_err("Could not allocate memory for new connection\n"); + return NULL; + } + pr_debug("Moving to TARG_CONN_STATE_FREE.\n"); + conn->conn_state = TARG_CONN_STATE_FREE; + + init_waitqueue_head(&conn->queues_wq); + INIT_LIST_HEAD(&conn->conn_list); + INIT_LIST_HEAD(&conn->conn_cmd_list); + INIT_LIST_HEAD(&conn->immed_queue_list); + INIT_LIST_HEAD(&conn->response_queue_list); + init_completion(&conn->conn_post_wait_comp); + init_completion(&conn->conn_wait_comp); + init_completion(&conn->conn_wait_rcfr_comp); + init_completion(&conn->conn_waiting_on_uc_comp); + init_completion(&conn->conn_logout_comp); + init_completion(&conn->rx_half_close_comp); + init_completion(&conn->tx_half_close_comp); + init_completion(&conn->rx_login_comp); + spin_lock_init(&conn->cmd_lock); + spin_lock_init(&conn->conn_usage_lock); + spin_lock_init(&conn->immed_queue_lock); + spin_lock_init(&conn->nopin_timer_lock); + spin_lock_init(&conn->response_queue_lock); + spin_lock_init(&conn->state_lock); + + timer_setup(&conn->nopin_response_timer, + iscsit_handle_nopin_response_timeout, 0); + timer_setup(&conn->nopin_timer, iscsit_handle_nopin_timeout, 0); + + if (iscsit_conn_set_transport(conn, np->np_transport) < 0) + goto free_conn; + + conn->conn_ops = kzalloc(sizeof(struct iscsi_conn_ops), GFP_KERNEL); + if (!conn->conn_ops) { + pr_err("Unable to allocate memory for struct iscsi_conn_ops.\n"); + goto put_transport; + } + + if (!zalloc_cpumask_var(&conn->conn_cpumask, GFP_KERNEL)) { + pr_err("Unable to allocate conn->conn_cpumask\n"); + goto free_mask; + } + + return conn; + +free_mask: + free_cpumask_var(conn->conn_cpumask); +put_transport: + iscsit_put_transport(conn->conn_transport); +free_conn: + kfree(conn); + return NULL; +} + +void iscsit_free_conn(struct iscsi_conn *conn) +{ + free_cpumask_var(conn->conn_cpumask); + kfree(conn->conn_ops); + iscsit_put_transport(conn->conn_transport); + kfree(conn); +} + void iscsi_target_login_sess_out(struct iscsi_conn *conn, struct iscsi_np *np, bool zero_tsih, bool new_sess) { @@ -1196,10 +1230,6 @@ old_sess_out: crypto_free_ahash(tfm); } - free_cpumask_var(conn->conn_cpumask); - - kfree(conn->conn_ops); - if (conn->param_list) { iscsi_release_param_list(conn->param_list); conn->param_list = NULL; @@ -1217,8 +1247,7 @@ old_sess_out: if (conn->conn_transport->iscsit_free_conn) conn->conn_transport->iscsit_free_conn(conn); - iscsit_put_transport(conn->conn_transport); - kfree(conn); + iscsit_free_conn(conn); } static int __iscsi_target_login_thread(struct iscsi_np *np) @@ -1248,31 +1277,16 @@ static int __iscsi_target_login_thread(struct iscsi_np *np) } spin_unlock_bh(&np->np_thread_lock); - conn = kzalloc(sizeof(struct iscsi_conn), GFP_KERNEL); + conn = iscsit_alloc_conn(np); if (!conn) { - pr_err("Could not allocate memory for" - " new connection\n"); /* Get another socket */ return 1; } - pr_debug("Moving to TARG_CONN_STATE_FREE.\n"); - conn->conn_state = TARG_CONN_STATE_FREE; - - timer_setup(&conn->nopin_response_timer, - iscsit_handle_nopin_response_timeout, 0); - timer_setup(&conn->nopin_timer, iscsit_handle_nopin_timeout, 0); - - if (iscsit_conn_set_transport(conn, np->np_transport) < 0) { - kfree(conn); - return 1; - } rc = np->np_transport->iscsit_accept_np(np, conn); if (rc == -ENOSYS) { complete(&np->np_restart_comp); - iscsit_put_transport(conn->conn_transport); - kfree(conn); - conn = NULL; + iscsit_free_conn(conn); goto exit; } else if (rc < 0) { spin_lock_bh(&np->np_thread_lock); @@ -1280,17 +1294,13 @@ static int __iscsi_target_login_thread(struct iscsi_np *np) np->np_thread_state = ISCSI_NP_THREAD_ACTIVE; spin_unlock_bh(&np->np_thread_lock); complete(&np->np_restart_comp); - iscsit_put_transport(conn->conn_transport); - kfree(conn); - conn = NULL; + iscsit_free_conn(conn); /* Get another socket */ return 1; } spin_unlock_bh(&np->np_thread_lock); - iscsit_put_transport(conn->conn_transport); - kfree(conn); - conn = NULL; - goto out; + iscsit_free_conn(conn); + return 1; } /* * Perform the remaining iSCSI connection initialization items.. @@ -1440,7 +1450,6 @@ old_sess_out: tpg_np = NULL; } -out: return 1; exit: diff --git a/drivers/target/iscsi/iscsi_target_login.h b/drivers/target/iscsi/iscsi_target_login.h index 74ac3abc44a0..3b8e3639ff5d 100644 --- a/drivers/target/iscsi/iscsi_target_login.h +++ b/drivers/target/iscsi/iscsi_target_login.h @@ -19,7 +19,7 @@ extern int iscsi_target_setup_login_socket(struct iscsi_np *, extern int iscsit_accept_np(struct iscsi_np *, struct iscsi_conn *); extern int iscsit_get_login_rx(struct iscsi_conn *, struct iscsi_login *); extern int iscsit_put_login_tx(struct iscsi_conn *, struct iscsi_login *, u32); -extern void iscsit_free_conn(struct iscsi_np *, struct iscsi_conn *); +extern void iscsit_free_conn(struct iscsi_conn *); extern int iscsit_start_kthreads(struct iscsi_conn *); extern void iscsi_post_login_handler(struct iscsi_np *, struct iscsi_conn *, u8); extern void iscsi_target_login_sess_out(struct iscsi_conn *, struct iscsi_np *, From c77a2fa3ff8f73d1a485e67e6f81c64823739d59 Mon Sep 17 00:00:00 2001 From: Nilesh Javali Date: Wed, 29 Aug 2018 23:55:53 -0700 Subject: [PATCH 050/269] scsi: qedi: Add the CRC size within iSCSI NVM image The QED driver commit, 1ac4329a1cff ("qed: Add configuration information to register dump and debug data"), removes the CRC length validation causing nvm_get_image failure while loading qedi driver: [qed_mcp_get_nvm_image:2700(host_10-0)]Image [0] is too big - 00006008 bytes where only 00006004 are available [qedi_get_boot_info:2253]:10: Could not get NVM image. ret = -12 Hence add and adjust the CRC size to iSCSI NVM image to read boot info at qedi load time. Signed-off-by: Nilesh Javali Signed-off-by: Martin K. Petersen --- drivers/scsi/qedi/qedi.h | 7 ++++++- drivers/scsi/qedi/qedi_main.c | 28 +++++++++++++++------------- 2 files changed, 21 insertions(+), 14 deletions(-) diff --git a/drivers/scsi/qedi/qedi.h b/drivers/scsi/qedi/qedi.h index fc3babc15fa3..a6f96b35e971 100644 --- a/drivers/scsi/qedi/qedi.h +++ b/drivers/scsi/qedi/qedi.h @@ -77,6 +77,11 @@ enum qedi_nvm_tgts { QEDI_NVM_TGT_SEC, }; +struct qedi_nvm_iscsi_image { + struct nvm_iscsi_cfg iscsi_cfg; + u32 crc; +}; + struct qedi_uio_ctrl { /* meta data */ u32 uio_hsi_version; @@ -294,7 +299,7 @@ struct qedi_ctx { void *bdq_pbl_list; dma_addr_t bdq_pbl_list_dma; u8 bdq_pbl_list_num_entries; - struct nvm_iscsi_cfg *iscsi_cfg; + struct qedi_nvm_iscsi_image *iscsi_image; dma_addr_t nvm_buf_dma; void __iomem *bdq_primary_prod; void __iomem *bdq_secondary_prod; diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c index aa96bccb5a96..cc8e64dc65ad 100644 --- a/drivers/scsi/qedi/qedi_main.c +++ b/drivers/scsi/qedi/qedi_main.c @@ -1346,23 +1346,26 @@ exit_setup_int: static void qedi_free_nvm_iscsi_cfg(struct qedi_ctx *qedi) { - if (qedi->iscsi_cfg) + if (qedi->iscsi_image) dma_free_coherent(&qedi->pdev->dev, - sizeof(struct nvm_iscsi_cfg), - qedi->iscsi_cfg, qedi->nvm_buf_dma); + sizeof(struct qedi_nvm_iscsi_image), + qedi->iscsi_image, qedi->nvm_buf_dma); } static int qedi_alloc_nvm_iscsi_cfg(struct qedi_ctx *qedi) { - qedi->iscsi_cfg = dma_zalloc_coherent(&qedi->pdev->dev, - sizeof(struct nvm_iscsi_cfg), - &qedi->nvm_buf_dma, GFP_KERNEL); - if (!qedi->iscsi_cfg) { + struct qedi_nvm_iscsi_image nvm_image; + + qedi->iscsi_image = dma_zalloc_coherent(&qedi->pdev->dev, + sizeof(nvm_image), + &qedi->nvm_buf_dma, + GFP_KERNEL); + if (!qedi->iscsi_image) { QEDI_ERR(&qedi->dbg_ctx, "Could not allocate NVM BUF.\n"); return -ENOMEM; } QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_INFO, - "NVM BUF addr=0x%p dma=0x%llx.\n", qedi->iscsi_cfg, + "NVM BUF addr=0x%p dma=0x%llx.\n", qedi->iscsi_image, qedi->nvm_buf_dma); return 0; @@ -1905,7 +1908,7 @@ qedi_get_nvram_block(struct qedi_ctx *qedi) struct nvm_iscsi_block *block; pf = qedi->dev_info.common.abs_pf_id; - block = &qedi->iscsi_cfg->block[0]; + block = &qedi->iscsi_image->iscsi_cfg.block[0]; for (i = 0; i < NUM_OF_ISCSI_PF_SUPPORTED; i++, block++) { flags = ((block->id) & NVM_ISCSI_CFG_BLK_CTRL_FLAG_MASK) >> NVM_ISCSI_CFG_BLK_CTRL_FLAG_OFFSET; @@ -2194,15 +2197,14 @@ static void qedi_boot_release(void *data) static int qedi_get_boot_info(struct qedi_ctx *qedi) { int ret = 1; - u16 len; - - len = sizeof(struct nvm_iscsi_cfg); + struct qedi_nvm_iscsi_image nvm_image; QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_INFO, "Get NVM iSCSI CFG image\n"); ret = qedi_ops->common->nvm_get_image(qedi->cdev, QED_NVM_IMAGE_ISCSI_CFG, - (char *)qedi->iscsi_cfg, len); + (char *)qedi->iscsi_image, + sizeof(nvm_image)); if (ret) QEDI_ERR(&qedi->dbg_ctx, "Could not get NVM image. ret = %d\n", ret); From 16037643969e095509cd8446a3f8e406a6dc3a2c Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 30 Aug 2018 15:13:16 +0200 Subject: [PATCH 051/269] ALSA: hda - Fix cancel_work_sync() stall from jackpoll work On AMD/ATI controllers, the HD-audio controller driver allows a bus reset upon the error recovery, and its procedure includes the cancellation of pending jack polling work as found in snd_hda_bus_codec_reset(). This works usually fine, but it becomes a problem when the reset happens from the jack poll work itself; then calling cancel_work_sync() from the work being processed tries to wait the finish endlessly. As a workaround, this patch adds the check of current_work() and applies the cancel_work_sync() only when it's not from the jackpoll_work. This doesn't fix the root cause of the reported error below, but at least, it eases the unexpected stall of the whole system. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=200937 Cc: Cc: Lukas Wunner Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_codec.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index 0a5085537034..26d348b47867 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -3935,7 +3935,8 @@ void snd_hda_bus_reset_codecs(struct hda_bus *bus) list_for_each_codec(codec, bus) { /* FIXME: maybe a better way needed for forced reset */ - cancel_delayed_work_sync(&codec->jackpoll_work); + if (current_work() != &codec->jackpoll_work.work) + cancel_delayed_work_sync(&codec->jackpoll_work); #ifdef CONFIG_PM if (hda_codec_is_power_on(codec)) { hda_call_codec_suspend(codec); From b871da4a7778eda2e700ae8bf5f86e74a004c1ec Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 23 Aug 2018 18:24:24 +0200 Subject: [PATCH 052/269] KVM: nVMX: avoid redundant double assignment of nested_run_pending MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit nested_run_pending is set 20 lines above and check_vmentry_prereqs()/ check_vmentry_postreqs() don't seem to be resetting it (the later, however, checks it). Signed-off-by: Vitaly Kuznetsov Reviewed-by: Paolo Bonzini Reviewed-by: Jim Mattson Reviewed-by: Eduardo Valentin Reviewed-by: Krish Sadhukhan Signed-off-by: Radim Krčmář --- arch/x86/kvm/vmx.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 1d26f3c4985b..92f027745842 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -13988,9 +13988,6 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, check_vmentry_postreqs(vcpu, vmcs12, &exit_qual)) return -EINVAL; - if (kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING) - vmx->nested.nested_run_pending = 1; - vmx->nested.dirty_vmcs12 = true; ret = enter_vmx_non_root_mode(vcpu, NULL); if (ret) From 0186ec823280f5db55ab2e6291933bebe2e92772 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 28 Aug 2018 16:22:28 +0100 Subject: [PATCH 053/269] KVM: SVM: remove unused variable dst_vaddr_end MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Variable dst_vaddr_end is being assigned but is never used hence it is redundant and can be removed. Cleans up clang warning: variable 'dst_vaddr_end' set but not used [-Wunused-but-set-variable] Signed-off-by: Colin Ian King Signed-off-by: Radim Krčmář --- arch/x86/kvm/svm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 6276140044d0..4339fc4715fa 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -6747,7 +6747,7 @@ e_free: static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec) { unsigned long vaddr, vaddr_end, next_vaddr; - unsigned long dst_vaddr, dst_vaddr_end; + unsigned long dst_vaddr; struct page **src_p, **dst_p; struct kvm_sev_dbg debug; unsigned long n; @@ -6763,7 +6763,6 @@ static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec) size = debug.len; vaddr_end = vaddr + size; dst_vaddr = debug.dst_uaddr; - dst_vaddr_end = dst_vaddr + size; for (; vaddr < vaddr_end; vaddr = next_vaddr) { int len, s_off, d_off; From c4409905cd6eb42cfd06126e9226b0150e05a715 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 23 Aug 2018 13:56:46 -0700 Subject: [PATCH 054/269] KVM: VMX: Do not allow reexecute_instruction() when skipping MMIO instr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Re-execution after an emulation decode failure is only intended to handle a case where two or vCPUs race to write a shadowed page, i.e. we should never re-execute an instruction as part of MMIO emulation. As handle_ept_misconfig() is only used for MMIO emulation, it should pass EMULTYPE_NO_REEXECUTE when using the emulator to skip an instr in the fast-MMIO case where VM_EXIT_INSTRUCTION_LEN is invalid. And because the cr2 value passed to x86_emulate_instruction() is only destined for use when retrying or reexecuting, we can simply call emulate_instruction(). Fixes: d391f1207067 ("x86/kvm/vmx: do not use vm-exit instruction length for fast MMIO when running nested") Cc: Vitaly Kuznetsov Signed-off-by: Sean Christopherson Cc: stable@vger.kernel.org Signed-off-by: Radim Krčmář --- arch/x86/kvm/vmx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 92f027745842..b345ad91809c 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -7704,8 +7704,8 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) if (!static_cpu_has(X86_FEATURE_HYPERVISOR)) return kvm_skip_emulated_instruction(vcpu); else - return x86_emulate_instruction(vcpu, gpa, EMULTYPE_SKIP, - NULL, 0) == EMULATE_DONE; + return emulate_instruction(vcpu, EMULTYPE_SKIP) == + EMULATE_DONE; } return kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0); From 35be0aded76b54a24dc8aa678a71bca22273e8d8 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 23 Aug 2018 13:56:47 -0700 Subject: [PATCH 055/269] KVM: x86: SVM: Set EMULTYPE_NO_REEXECUTE for RSM emulation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Re-execution after an emulation decode failure is only intended to handle a case where two or vCPUs race to write a shadowed page, i.e. we should never re-execute an instruction as part of RSM emulation. Add a new helper, kvm_emulate_instruction_from_buffer(), to support emulating from a pre-defined buffer. This eliminates the last direct call to x86_emulate_instruction() outside of kvm_mmu_page_fault(), which means x86_emulate_instruction() can be unexported in a future patch. Fixes: 7607b7174405 ("KVM: SVM: install RSM intercept") Cc: Brijesh Singh Signed-off-by: Sean Christopherson Cc: stable@vger.kernel.org Signed-off-by: Radim Krčmář --- arch/x86/include/asm/kvm_host.h | 7 +++++++ arch/x86/kvm/svm.c | 4 ++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 00ddb0c9e612..3b220b86c8e4 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1251,6 +1251,13 @@ static inline int emulate_instruction(struct kvm_vcpu *vcpu, emulation_type | EMULTYPE_NO_REEXECUTE, NULL, 0); } +static inline int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu, + void *insn, int insn_len) +{ + return x86_emulate_instruction(vcpu, 0, EMULTYPE_NO_REEXECUTE, + insn, insn_len); +} + void kvm_enable_efer_bits(u64); bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer); int kvm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 4339fc4715fa..b3cdfde8ff5e 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3874,8 +3874,8 @@ static int emulate_on_interception(struct vcpu_svm *svm) static int rsm_interception(struct vcpu_svm *svm) { - return x86_emulate_instruction(&svm->vcpu, 0, 0, - rsm_ins_bytes, 2) == EMULATE_DONE; + return kvm_emulate_instruction_from_buffer(&svm->vcpu, + rsm_ins_bytes, 2) == EMULATE_DONE; } static int rdpmc_interception(struct vcpu_svm *svm) From 8065dbd1ee0ef04321d80da7999b4f0086e0a407 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 23 Aug 2018 13:56:48 -0700 Subject: [PATCH 056/269] KVM: x86: Invert emulation re-execute behavior to make it opt-in MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Re-execution of an instruction after emulation decode failure is intended to be used only when emulating shadow page accesses. Invert the flag to make allowing re-execution opt-in since that behavior is by far in the minority. Signed-off-by: Sean Christopherson Cc: stable@vger.kernel.org Signed-off-by: Radim Krčmář --- arch/x86/include/asm/kvm_host.h | 8 +++----- arch/x86/kvm/mmu.c | 2 +- arch/x86/kvm/x86.c | 2 +- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 3b220b86c8e4..a69ea11f3bab 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1238,7 +1238,7 @@ enum emulation_result { #define EMULTYPE_TRAP_UD (1 << 1) #define EMULTYPE_SKIP (1 << 2) #define EMULTYPE_RETRY (1 << 3) -#define EMULTYPE_NO_REEXECUTE (1 << 4) +#define EMULTYPE_ALLOW_REEXECUTE (1 << 4) #define EMULTYPE_NO_UD_ON_FAIL (1 << 5) #define EMULTYPE_VMWARE (1 << 6) int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, @@ -1247,15 +1247,13 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, static inline int emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type) { - return x86_emulate_instruction(vcpu, 0, - emulation_type | EMULTYPE_NO_REEXECUTE, NULL, 0); + return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0); } static inline int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu, void *insn, int insn_len) { - return x86_emulate_instruction(vcpu, 0, EMULTYPE_NO_REEXECUTE, - insn, insn_len); + return x86_emulate_instruction(vcpu, 0, 0, insn, insn_len); } void kvm_enable_efer_bits(u64); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index a282321329b5..4508c34eef20 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -5217,7 +5217,7 @@ static int make_mmu_pages_available(struct kvm_vcpu *vcpu) int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, void *insn, int insn_len) { - int r, emulation_type = EMULTYPE_RETRY; + int r, emulation_type = EMULTYPE_RETRY | EMULTYPE_ALLOW_REEXECUTE; enum emulation_result er; bool direct = vcpu->arch.mmu.direct_map; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 506bd2b4b8bb..d6f85ea23101 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5870,7 +5870,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2, gpa_t gpa = cr2; kvm_pfn_t pfn; - if (emulation_type & EMULTYPE_NO_REEXECUTE) + if (!(emulation_type & EMULTYPE_ALLOW_REEXECUTE)) return false; if (!vcpu->arch.mmu.direct_map) { From 384bf2218e96f57118270945b1841e4dbbe9e352 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 23 Aug 2018 13:56:49 -0700 Subject: [PATCH 057/269] KVM: x86: Merge EMULTYPE_RETRY and EMULTYPE_ALLOW_REEXECUTE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit retry_instruction() and reexecute_instruction() are a package deal, i.e. there is no scenario where one is allowed and the other is not. Merge their controlling emulation type flags to enforce this in code. Name the combined flag EMULTYPE_ALLOW_RETRY to make it abundantly clear that we are allowing re{try,execute} to occur, as opposed to explicitly requesting retry of a previously failed instruction. Signed-off-by: Sean Christopherson Cc: stable@vger.kernel.org Signed-off-by: Radim Krčmář --- arch/x86/include/asm/kvm_host.h | 7 +++---- arch/x86/kvm/mmu.c | 2 +- arch/x86/kvm/x86.c | 4 ++-- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index a69ea11f3bab..35e03b13edcb 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1237,10 +1237,9 @@ enum emulation_result { #define EMULTYPE_NO_DECODE (1 << 0) #define EMULTYPE_TRAP_UD (1 << 1) #define EMULTYPE_SKIP (1 << 2) -#define EMULTYPE_RETRY (1 << 3) -#define EMULTYPE_ALLOW_REEXECUTE (1 << 4) -#define EMULTYPE_NO_UD_ON_FAIL (1 << 5) -#define EMULTYPE_VMWARE (1 << 6) +#define EMULTYPE_ALLOW_RETRY (1 << 3) +#define EMULTYPE_NO_UD_ON_FAIL (1 << 4) +#define EMULTYPE_VMWARE (1 << 5) int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, int emulation_type, void *insn, int insn_len); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 4508c34eef20..0246a1ea7f55 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -5217,7 +5217,7 @@ static int make_mmu_pages_available(struct kvm_vcpu *vcpu) int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, void *insn, int insn_len) { - int r, emulation_type = EMULTYPE_RETRY | EMULTYPE_ALLOW_REEXECUTE; + int r, emulation_type = EMULTYPE_ALLOW_RETRY; enum emulation_result er; bool direct = vcpu->arch.mmu.direct_map; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d6f85ea23101..924ce28723c4 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5870,7 +5870,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2, gpa_t gpa = cr2; kvm_pfn_t pfn; - if (!(emulation_type & EMULTYPE_ALLOW_REEXECUTE)) + if (!(emulation_type & EMULTYPE_ALLOW_RETRY)) return false; if (!vcpu->arch.mmu.direct_map) { @@ -5958,7 +5958,7 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt, */ vcpu->arch.last_retry_eip = vcpu->arch.last_retry_addr = 0; - if (!(emulation_type & EMULTYPE_RETRY)) + if (!(emulation_type & EMULTYPE_ALLOW_RETRY)) return false; if (x86_page_table_writing_insn(ctxt)) From 472faffacd9032164f611f56329d0025ddca55b5 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 23 Aug 2018 13:56:50 -0700 Subject: [PATCH 058/269] KVM: x86: Default to not allowing emulation retry in kvm_mmu_page_fault MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Effectively force kvm_mmu_page_fault() to opt-in to allowing retry to make it more obvious when and why it allows emulation to be retried. Previously this approach was less convenient due to retry and re-execute behavior being controlled by separate flags that were also inverted in their implementations (opt-in versus opt-out). Suggested-by: Paolo Bonzini Signed-off-by: Sean Christopherson Cc: stable@vger.kernel.org Signed-off-by: Radim Krčmář --- arch/x86/kvm/mmu.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 0246a1ea7f55..01fb8701ccd0 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -5217,7 +5217,7 @@ static int make_mmu_pages_available(struct kvm_vcpu *vcpu) int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, void *insn, int insn_len) { - int r, emulation_type = EMULTYPE_ALLOW_RETRY; + int r, emulation_type = 0; enum emulation_result er; bool direct = vcpu->arch.mmu.direct_map; @@ -5230,10 +5230,8 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, r = RET_PF_INVALID; if (unlikely(error_code & PFERR_RSVD_MASK)) { r = handle_mmio_page_fault(vcpu, cr2, direct); - if (r == RET_PF_EMULATE) { - emulation_type = 0; + if (r == RET_PF_EMULATE) goto emulate; - } } if (r == RET_PF_INVALID) { @@ -5260,8 +5258,16 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, return 1; } - if (mmio_info_in_cache(vcpu, cr2, direct)) - emulation_type = 0; + /* + * vcpu->arch.mmu.page_fault returned RET_PF_EMULATE, but we can still + * optimistically try to just unprotect the page and let the processor + * re-execute the instruction that caused the page fault. Do not allow + * retrying MMIO emulation, as it's not only pointless but could also + * cause us to enter an infinite loop because the processor will keep + * faulting on the non-existent MMIO address. + */ + if (!mmio_info_in_cache(vcpu, cr2, direct)) + emulation_type = EMULTYPE_ALLOW_RETRY; emulate: /* * On AMD platforms, under certain conditions insn_len may be zero on #NPF. From 6c3dfeb6a48b1562bd5b8ec5f3317ef34d0134ef Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 23 Aug 2018 13:56:51 -0700 Subject: [PATCH 059/269] KVM: x86: Do not re-{try,execute} after failed emulation in L2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit a6f177efaa58 ("KVM: Reenter guest after emulation failure if due to access to non-mmio address") added reexecute_instruction() to handle the scenario where two (or more) vCPUS race to write a shadowed page, i.e. reexecute_instruction() is intended to return true if and only if the instruction being emulated was accessing a shadowed page. As L0 is only explicitly shadowing L1 tables, an emulation failure of a nested VM instruction cannot be due to a race to write a shadowed page and so should never be re-executed. This fixes an issue where an "MMIO" emulation failure[1] in L2 is all but guaranteed to result in an infinite loop when TDP is enabled. Because "cr2" is actually an L2 GPA when TDP is enabled, calling kvm_mmu_gva_to_gpa_write() to translate cr2 in the non-direct mapped case (L2 is never direct mapped) will almost always yield UNMAPPED_GVA and cause reexecute_instruction() to immediately return true. The !mmio_info_in_cache() check in kvm_mmu_page_fault() doesn't catch this case because mmio_info_in_cache() returns false for a nested MMU (the MMIO caching currently handles L1 only, e.g. to cache nested guests' GPAs we'd have to manually flush the cache when switching between VMs and when L1 updated its page tables controlling the nested guest). Way back when, commit 68be0803456b ("KVM: x86: never re-execute instruction with enabled tdp") changed reexecute_instruction() to always return false when using TDP under the assumption that KVM would only get into the emulator for MMIO. Commit 95b3cf69bdf8 ("KVM: x86: let reexecute_instruction work for tdp") effectively reverted that behavior in order to handle the scenario where emulation failed due to an access from L1 to the shadow page tables for L2, but it didn't account for the case where emulation failed in L2 with TDP enabled. All of the above logic also applies to retry_instruction(), added by commit 1cb3f3ae5a38 ("KVM: x86: retry non-page-table writing instructions"). An indefinite loop in retry_instruction() should be impossible as it protects against retrying the same instruction over and over, but it's still correct to not retry an L2 instruction in the first place. Fix the immediate issue by adding a check for a nested guest when determining whether or not to allow retry in kvm_mmu_page_fault(). In addition to fixing the immediate bug, add WARN_ON_ONCE in the retry functions since they are not designed to handle nested cases, i.e. they need to be modified even if there is some scenario in the future where we want to allow retrying a nested guest. [1] This issue was encountered after commit 3a2936dedd20 ("kvm: mmu: Don't expose private memslots to L2") changed the page fault path to return KVM_PFN_NOSLOT when translating an L2 access to a prive memslot. Returning KVM_PFN_NOSLOT is semantically correct when we want to hide a memslot from L2, i.e. there effectively is no defined memory region for L2, but it has the unfortunate side effect of making KVM think the GFN is a MMIO page, thus triggering emulation. The failure occurred with in-development code that deliberately exposed a private memslot to L2, which L2 accessed with an instruction that is not emulated by KVM. Fixes: 95b3cf69bdf8 ("KVM: x86: let reexecute_instruction work for tdp") Fixes: 1cb3f3ae5a38 ("KVM: x86: retry non-page-table writing instructions") Signed-off-by: Sean Christopherson Cc: Jim Mattson Cc: Krish Sadhukhan Cc: Xiao Guangrong Cc: stable@vger.kernel.org Signed-off-by: Radim Krčmář --- arch/x86/kvm/mmu.c | 7 +++++-- arch/x86/kvm/x86.c | 6 ++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 01fb8701ccd0..f7e83b1e0eb2 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -5264,9 +5264,12 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, * re-execute the instruction that caused the page fault. Do not allow * retrying MMIO emulation, as it's not only pointless but could also * cause us to enter an infinite loop because the processor will keep - * faulting on the non-existent MMIO address. + * faulting on the non-existent MMIO address. Retrying an instruction + * from a nested guest is also pointless and dangerous as we are only + * explicitly shadowing L1's page tables, i.e. unprotecting something + * for L1 isn't going to magically fix whatever issue cause L2 to fail. */ - if (!mmio_info_in_cache(vcpu, cr2, direct)) + if (!mmio_info_in_cache(vcpu, cr2, direct) && !is_guest_mode(vcpu)) emulation_type = EMULTYPE_ALLOW_RETRY; emulate: /* diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 924ce28723c4..cbe2921e972b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5873,6 +5873,9 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2, if (!(emulation_type & EMULTYPE_ALLOW_RETRY)) return false; + if (WARN_ON_ONCE(is_guest_mode(vcpu))) + return false; + if (!vcpu->arch.mmu.direct_map) { /* * Write permission should be allowed since only @@ -5961,6 +5964,9 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt, if (!(emulation_type & EMULTYPE_ALLOW_RETRY)) return false; + if (WARN_ON_ONCE(is_guest_mode(vcpu))) + return false; + if (x86_page_table_writing_insn(ctxt)) return false; From 0ce97a2b627c5e26347aee298f571ddf925e5fe4 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 23 Aug 2018 13:56:52 -0700 Subject: [PATCH 060/269] KVM: x86: Rename emulate_instruction() to kvm_emulate_instruction() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lack of the kvm_ prefix gives the impression that it's a VMX or SVM specific function, and there's no conflict that prevents adding the kvm_ prefix. Signed-off-by: Sean Christopherson Signed-off-by: Radim Krčmář --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/svm.c | 12 ++++++------ arch/x86/kvm/vmx.c | 16 ++++++++-------- arch/x86/kvm/x86.c | 4 ++-- 4 files changed, 17 insertions(+), 17 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 35e03b13edcb..8c9023661351 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1243,7 +1243,7 @@ enum emulation_result { int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, int emulation_type, void *insn, int insn_len); -static inline int emulate_instruction(struct kvm_vcpu *vcpu, +static inline int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type) { return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index b3cdfde8ff5e..89c4c5aa15f1 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -776,7 +776,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) } if (!svm->next_rip) { - if (emulate_instruction(vcpu, EMULTYPE_SKIP) != + if (kvm_emulate_instruction(vcpu, EMULTYPE_SKIP) != EMULATE_DONE) printk(KERN_DEBUG "%s: NOP\n", __func__); return; @@ -2715,7 +2715,7 @@ static int gp_interception(struct vcpu_svm *svm) WARN_ON_ONCE(!enable_vmware_backdoor); - er = emulate_instruction(vcpu, + er = kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL); if (er == EMULATE_USER_EXIT) return 0; @@ -2819,7 +2819,7 @@ static int io_interception(struct vcpu_svm *svm) string = (io_info & SVM_IOIO_STR_MASK) != 0; in = (io_info & SVM_IOIO_TYPE_MASK) != 0; if (string) - return emulate_instruction(vcpu, 0) == EMULATE_DONE; + return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE; port = io_info >> 16; size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; @@ -3861,7 +3861,7 @@ static int iret_interception(struct vcpu_svm *svm) static int invlpg_interception(struct vcpu_svm *svm) { if (!static_cpu_has(X86_FEATURE_DECODEASSISTS)) - return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; + return kvm_emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; kvm_mmu_invlpg(&svm->vcpu, svm->vmcb->control.exit_info_1); return kvm_skip_emulated_instruction(&svm->vcpu); @@ -3869,7 +3869,7 @@ static int invlpg_interception(struct vcpu_svm *svm) static int emulate_on_interception(struct vcpu_svm *svm) { - return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; + return kvm_emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; } static int rsm_interception(struct vcpu_svm *svm) @@ -4700,7 +4700,7 @@ static int avic_unaccelerated_access_interception(struct vcpu_svm *svm) ret = avic_unaccel_trap_write(svm); } else { /* Handling Fault */ - ret = (emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE); + ret = (kvm_emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE); } return ret; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index b345ad91809c..f910d33858d9 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6983,7 +6983,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, * Cause the #SS fault with 0 error code in VM86 mode. */ if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) { - if (emulate_instruction(vcpu, 0) == EMULATE_DONE) { + if (kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE) { if (vcpu->arch.halt_request) { vcpu->arch.halt_request = 0; return kvm_vcpu_halt(vcpu); @@ -7054,7 +7054,7 @@ static int handle_exception(struct kvm_vcpu *vcpu) if (!vmx->rmode.vm86_active && is_gp_fault(intr_info)) { WARN_ON_ONCE(!enable_vmware_backdoor); - er = emulate_instruction(vcpu, + er = kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL); if (er == EMULATE_USER_EXIT) return 0; @@ -7157,7 +7157,7 @@ static int handle_io(struct kvm_vcpu *vcpu) ++vcpu->stat.io_exits; if (string) - return emulate_instruction(vcpu, 0) == EMULATE_DONE; + return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE; port = exit_qualification >> 16; size = (exit_qualification & 7) + 1; @@ -7231,7 +7231,7 @@ static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val) static int handle_desc(struct kvm_vcpu *vcpu) { WARN_ON(!(vcpu->arch.cr4 & X86_CR4_UMIP)); - return emulate_instruction(vcpu, 0) == EMULATE_DONE; + return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE; } static int handle_cr(struct kvm_vcpu *vcpu) @@ -7480,7 +7480,7 @@ static int handle_vmcall(struct kvm_vcpu *vcpu) static int handle_invd(struct kvm_vcpu *vcpu) { - return emulate_instruction(vcpu, 0) == EMULATE_DONE; + return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE; } static int handle_invlpg(struct kvm_vcpu *vcpu) @@ -7547,7 +7547,7 @@ static int handle_apic_access(struct kvm_vcpu *vcpu) return kvm_skip_emulated_instruction(vcpu); } } - return emulate_instruction(vcpu, 0) == EMULATE_DONE; + return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE; } static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu) @@ -7704,7 +7704,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) if (!static_cpu_has(X86_FEATURE_HYPERVISOR)) return kvm_skip_emulated_instruction(vcpu); else - return emulate_instruction(vcpu, EMULTYPE_SKIP) == + return kvm_emulate_instruction(vcpu, EMULTYPE_SKIP) == EMULATE_DONE; } @@ -7748,7 +7748,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) if (kvm_test_request(KVM_REQ_EVENT, vcpu)) return 1; - err = emulate_instruction(vcpu, 0); + err = kvm_emulate_instruction(vcpu, 0); if (err == EMULATE_USER_EXIT) { ++vcpu->stat.mmio_exits; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index cbe2921e972b..915002c7f07e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4987,7 +4987,7 @@ int handle_ud(struct kvm_vcpu *vcpu) emul_type = 0; } - er = emulate_instruction(vcpu, emul_type); + er = kvm_emulate_instruction(vcpu, emul_type); if (er == EMULATE_USER_EXIT) return 0; if (er != EMULATE_DONE) @@ -7740,7 +7740,7 @@ static inline int complete_emulated_io(struct kvm_vcpu *vcpu) { int r; vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); - r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE); + r = kvm_emulate_instruction(vcpu, EMULTYPE_NO_DECODE); srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); if (r != EMULATE_DONE) return 0; From c60658d1d983641fcdbb16f86bc2f3806d88bab4 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 23 Aug 2018 13:56:53 -0700 Subject: [PATCH 061/269] KVM: x86: Unexport x86_emulate_instruction() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allowing x86_emulate_instruction() to be called directly has led to subtle bugs being introduced, e.g. not setting EMULTYPE_NO_REEXECUTE in the emulation type. While most of the blame lies on re-execute being opt-out, exporting x86_emulate_instruction() also exposes its cr2 parameter, which may have contributed to commit d391f1207067 ("x86/kvm/vmx: do not use vm-exit instruction length for fast MMIO when running nested") using x86_emulate_instruction() instead of emulate_instruction() because "hey, I have a cr2!", which in turn introduced its EMULTYPE_NO_REEXECUTE bug. Signed-off-by: Sean Christopherson Signed-off-by: Radim Krčmář --- arch/x86/include/asm/kvm_host.h | 17 +++-------------- arch/x86/kvm/x86.c | 14 +++++++++++++- arch/x86/kvm/x86.h | 2 ++ 3 files changed, 18 insertions(+), 15 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 8c9023661351..e12916e7c2fb 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1240,20 +1240,9 @@ enum emulation_result { #define EMULTYPE_ALLOW_RETRY (1 << 3) #define EMULTYPE_NO_UD_ON_FAIL (1 << 4) #define EMULTYPE_VMWARE (1 << 5) -int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, - int emulation_type, void *insn, int insn_len); - -static inline int kvm_emulate_instruction(struct kvm_vcpu *vcpu, - int emulation_type) -{ - return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0); -} - -static inline int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu, - void *insn, int insn_len) -{ - return x86_emulate_instruction(vcpu, 0, 0, insn, insn_len); -} +int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type); +int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu, + void *insn, int insn_len); void kvm_enable_efer_bits(u64); bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 915002c7f07e..542f6315444d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6282,7 +6282,19 @@ restart: return r; } -EXPORT_SYMBOL_GPL(x86_emulate_instruction); + +int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type) +{ + return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0); +} +EXPORT_SYMBOL_GPL(kvm_emulate_instruction); + +int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu, + void *insn, int insn_len) +{ + return x86_emulate_instruction(vcpu, 0, 0, insn, insn_len); +} +EXPORT_SYMBOL_GPL(kvm_emulate_instruction_from_buffer); static int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port) diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 257f27620bc2..67b9568613f3 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -274,6 +274,8 @@ int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, int page_num); bool kvm_vector_hashing_enabled(void); +int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, + int emulation_type, void *insn, int insn_len); #define KVM_SUPPORTED_XCR0 (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \ | XFEATURE_MASK_YMM | XFEATURE_MASK_BNDREGS \ From 58f33cfe73076b6497bada4f7b5bda961ed68083 Mon Sep 17 00:00:00 2001 From: Stefan Raspl Date: Fri, 24 Aug 2018 14:03:55 +0200 Subject: [PATCH 062/269] tools/kvm_stat: fix python3 issues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Python3 returns a float for a regular division - switch to a division operator that returns an integer. Furthermore, filters return a generator object instead of the actual list - wrap result in yet another list, which makes it still work in both, Python2 and 3. Signed-off-by: Stefan Raspl Signed-off-by: Radim Krčmář --- tools/kvm/kvm_stat/kvm_stat | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat index 56c4b3f8a01b..e10b90a8917a 100755 --- a/tools/kvm/kvm_stat/kvm_stat +++ b/tools/kvm/kvm_stat/kvm_stat @@ -759,7 +759,7 @@ class DebugfsProvider(Provider): if len(vms) == 0: self.do_read = False - self.paths = filter(lambda x: "{}-".format(pid) in x, vms) + self.paths = list(filter(lambda x: "{}-".format(pid) in x, vms)) else: self.paths = [] @@ -1219,10 +1219,10 @@ class Tui(object): (x, term_width) = self.screen.getmaxyx() row = 2 for line in text: - start = (term_width - len(line)) / 2 + start = (term_width - len(line)) // 2 self.screen.addstr(row, start, line) row += 1 - self.screen.addstr(row + 1, (term_width - len(hint)) / 2, hint, + self.screen.addstr(row + 1, (term_width - len(hint)) // 2, hint, curses.A_STANDOUT) self.screen.getkey() From 617c66b9f236d20f11cecbb3f45e6d5675b2fae1 Mon Sep 17 00:00:00 2001 From: Stefan Raspl Date: Fri, 24 Aug 2018 14:03:56 +0200 Subject: [PATCH 063/269] tools/kvm_stat: fix handling of invalid paths in debugfs provider MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When filtering by guest, kvm_stat displays garbage when the guest is destroyed - see sample output below. We add code to remove the invalid paths from the providers, so at least no more garbage is displayed. Here's a sample output to illustrate: kvm statistics - pid 13986 (foo) Event Total %Total CurAvg/s diagnose_258 -2 0.0 0 deliver_program_interruption -3 0.0 0 diagnose_308 -4 0.0 0 halt_poll_invalid -91 0.0 -6 deliver_service_signal -244 0.0 -16 halt_successful_poll -250 0.1 -17 exit_pei -285 0.1 -19 exit_external_request -312 0.1 -21 diagnose_9c -328 0.1 -22 userspace_handled -713 0.1 -47 halt_attempted_poll -939 0.2 -62 deliver_emergency_signal -3126 0.6 -208 halt_wakeup -7199 1.5 -481 exit_wait_state -7379 1.5 -493 diagnose_500 -56499 11.5 -3757 exit_null -85491 17.4 -5685 diagnose_44 -133300 27.1 -8874 exit_instruction -195898 39.8 -13037 Total -492063 Signed-off-by: Stefan Raspl Signed-off-by: Radim Krčmář --- tools/kvm/kvm_stat/kvm_stat | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat index e10b90a8917a..b9e8d0def1ab 100755 --- a/tools/kvm/kvm_stat/kvm_stat +++ b/tools/kvm/kvm_stat/kvm_stat @@ -766,6 +766,13 @@ class DebugfsProvider(Provider): self.do_read = True self.reset() + def _verify_paths(self): + """Remove invalid paths""" + for path in self.paths: + if not os.path.exists(os.path.join(PATH_DEBUGFS_KVM, path)): + self.paths.remove(path) + continue + def read(self, reset=0, by_guest=0): """Returns a dict with format:'file name / field -> current value'. @@ -780,6 +787,7 @@ class DebugfsProvider(Provider): # If no debugfs filtering support is available, then don't read. if not self.do_read: return results + self._verify_paths() paths = self.paths if self._pid == 0: From 710ab11ad9329d2d4b044405e328c994b19a2aa9 Mon Sep 17 00:00:00 2001 From: Stefan Raspl Date: Fri, 24 Aug 2018 14:03:57 +0200 Subject: [PATCH 064/269] tools/kvm_stat: fix updates for dead guests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With pid filtering active, when a guest is removed e.g. via virsh shutdown, successive updates produce garbage. Therefore, we add code to detect this case and prevent further body updates. Note that when displaying the help dialog via 'h' in this case, once we exit we're stuck with the 'Collecting data...' message till we remove the filter. Signed-off-by: Stefan Raspl Signed-off-by: Radim Krčmář --- tools/kvm/kvm_stat/kvm_stat | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat index b9e8d0def1ab..7c92545931e3 100755 --- a/tools/kvm/kvm_stat/kvm_stat +++ b/tools/kvm/kvm_stat/kvm_stat @@ -1170,6 +1170,9 @@ class Tui(object): return sorted_items + if not self._is_running_guest(self.stats.pid_filter): + # leave final data on screen + return row = 3 self.screen.move(row, 0) self.screen.clrtobot() @@ -1327,6 +1330,12 @@ class Tui(object): msg = '"' + str(val) + '": Invalid value' self._refresh_header() + def _is_running_guest(self, pid): + """Check if pid is still a running process.""" + if not pid: + return True + return os.path.isdir(os.path.join('/proc/', str(pid))) + def _show_vm_selection_by_guest(self): """Draws guest selection mask. @@ -1354,7 +1363,7 @@ class Tui(object): if not guest or guest == '0': break if guest.isdigit(): - if not os.path.isdir(os.path.join('/proc/', guest)): + if not self._is_running_guest(guest): msg = '"' + guest + '": Not a running process' continue pid = int(guest) From 0db8b3102368755242b44f2b30f93302c70e8e82 Mon Sep 17 00:00:00 2001 From: Stefan Raspl Date: Fri, 24 Aug 2018 14:03:58 +0200 Subject: [PATCH 065/269] tools/kvm_stat: don't reset stats when setting PID filter for debugfs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When setting a PID filter in debugfs, we unnecessarily reset the statistics, although there is no reason to do so. This behavior was merely introduced with commit 9f114a03c6854f "tools/kvm_stat: add interactive command 'r'", most likely to mimic the behavior of the tracepoints provider in this respect. However, there are plenty of differences between the two providers, so there is no reason not to take advantage of the possibility to filter by PID without resetting the statistics. Signed-off-by: Stefan Raspl Signed-off-by: Radim Krčmář --- tools/kvm/kvm_stat/kvm_stat | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat index 7c92545931e3..62fbb8802f60 100755 --- a/tools/kvm/kvm_stat/kvm_stat +++ b/tools/kvm/kvm_stat/kvm_stat @@ -764,7 +764,6 @@ class DebugfsProvider(Provider): else: self.paths = [] self.do_read = True - self.reset() def _verify_paths(self): """Remove invalid paths""" From 29c39f38e4e8dbf0497e81db6c985ee59259f002 Mon Sep 17 00:00:00 2001 From: Stefan Raspl Date: Fri, 24 Aug 2018 14:03:59 +0200 Subject: [PATCH 066/269] tools/kvm_stat: handle guest removals more gracefully MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When running with the DebugFS provider, removal of a guest can result in a negative CurAvg/s, which looks rather confusing. If so, suppress the body refresh and print a message instead. To reproduce, have at least one guest A completely booted. Then start another guest B (which generates a huge amount of events), then destroy B. On the next refresh, kvm_stat should display a whole lot of negative values in the CurAvg/s column. Signed-off-by: Stefan Raspl Signed-off-by: Radim Krčmář --- tools/kvm/kvm_stat/kvm_stat | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat index 62fbb8802f60..bd620579eb6f 100755 --- a/tools/kvm/kvm_stat/kvm_stat +++ b/tools/kvm/kvm_stat/kvm_stat @@ -1194,6 +1194,7 @@ class Tui(object): # print events tavg = 0 tcur = 0 + guest_removed = False for key, values in get_sorted_events(self, stats): if row >= self.screen.getmaxyx()[0] - 1 or values == (0, 0): break @@ -1201,7 +1202,10 @@ class Tui(object): key = self.get_gname_from_pid(key) if not key: continue - cur = int(round(values.delta / sleeptime)) if values.delta else '' + cur = int(round(values.delta / sleeptime)) if values.delta else 0 + if cur < 0: + guest_removed = True + continue if key[0] != ' ': if values.delta: tcur += values.delta @@ -1214,7 +1218,10 @@ class Tui(object): values.value * 100 / float(ltotal), cur)) row += 1 if row == 3: - self.screen.addstr(4, 1, 'No matching events reported yet') + if guest_removed: + self.screen.addstr(4, 1, 'Guest removed, updating...') + else: + self.screen.addstr(4, 1, 'No matching events reported yet') if row > 4: tavg = int(round(tcur / sleeptime)) if tcur > 0 else '' self.screen.addstr(row, 1, '%-40s %10d %8s' % From 404517e40867aef60554ef497d5cf8d089a5b9cf Mon Sep 17 00:00:00 2001 From: Stefan Raspl Date: Fri, 24 Aug 2018 14:04:00 +0200 Subject: [PATCH 067/269] tools/kvm_stat: indicate dead guests as such MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For destroyed guests, kvm_stat essentially freezes with the last data displayed. This is acceptable for users, in case they want to inspect the final data. But it looks a bit irritating. Therefore, detect this situation and display a respective indicator in the header. Signed-off-by: Stefan Raspl Signed-off-by: Radim Krčmář --- tools/kvm/kvm_stat/kvm_stat | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat index bd620579eb6f..5c2422b0f2f8 100755 --- a/tools/kvm/kvm_stat/kvm_stat +++ b/tools/kvm/kvm_stat/kvm_stat @@ -1108,10 +1108,10 @@ class Tui(object): if len(gname) > MAX_GUEST_NAME_LEN else gname)) if pid > 0: - self.screen.addstr(0, 0, 'kvm statistics - pid {0} {1}' - .format(pid, gname), curses.A_BOLD) + self._headline = 'kvm statistics - pid {0} {1}'.format(pid, gname) else: - self.screen.addstr(0, 0, 'kvm statistics - summary', curses.A_BOLD) + self._headline = 'kvm statistics - summary' + self.screen.addstr(0, 0, self._headline, curses.A_BOLD) if self.stats.fields_filter: regex = self.stats.fields_filter if len(regex) > MAX_REGEX_LEN: @@ -1170,6 +1170,7 @@ class Tui(object): return sorted_items if not self._is_running_guest(self.stats.pid_filter): + self._display_guest_dead() # leave final data on screen return row = 3 @@ -1228,6 +1229,11 @@ class Tui(object): ('Total', total, tavg), curses.A_BOLD) self.screen.refresh() + def _display_guest_dead(self): + marker = ' Guest is DEAD ' + y = min(len(self._headline), 80 - len(marker)) + self.screen.addstr(0, y, marker, curses.A_BLINK | curses.A_STANDOUT) + def _show_msg(self, text): """Display message centered text and exit on key press""" hint = 'Press any key to continue' From c012a0f2677529a0ae8f53a15bd7c61dc4ca5b5e Mon Sep 17 00:00:00 2001 From: Stefan Raspl Date: Fri, 24 Aug 2018 14:04:01 +0200 Subject: [PATCH 068/269] tools/kvm_stat: re-animate display of dead guests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When filtering by guest (interactive commands 'p'/'g'), and the respective guest was destroyed, detect when the guest is up again through the guest name if possible. I.e. when displaying events for a specific guest, it is not necessary anymore to restart kvm_stat in case the guest is restarted. Signed-off-by: Stefan Raspl Signed-off-by: Radim Krčmář --- tools/kvm/kvm_stat/kvm_stat | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat index 5c2422b0f2f8..439b8a27488d 100755 --- a/tools/kvm/kvm_stat/kvm_stat +++ b/tools/kvm/kvm_stat/kvm_stat @@ -1103,6 +1103,7 @@ class Tui(object): pid = self.stats.pid_filter self.screen.erase() gname = self.get_gname_from_pid(pid) + self._gname = gname if gname: gname = ('({})'.format(gname[:MAX_GUEST_NAME_LEN] + '...' if len(gname) > MAX_GUEST_NAME_LEN @@ -1170,6 +1171,15 @@ class Tui(object): return sorted_items if not self._is_running_guest(self.stats.pid_filter): + if self._gname: + try: # ...to identify the guest by name in case it's back + pids = self.get_pid_from_gname(self._gname) + if len(pids) == 1: + self._refresh_header(pids[0]) + self._update_pid(pids[0]) + return + except: + pass self._display_guest_dead() # leave final data on screen return From 2b52e2a67c86b0714eeae0d2030cb7fc14737626 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 27 Aug 2018 12:07:37 +0900 Subject: [PATCH 069/269] arc: remove redundant GCC version checks Commit cafa0010cd51 ("Raise the minimum required gcc version to 4.6") bumped the minimum GCC version to 4.6 for all architectures. With GCC >= 4.6 assumed, 'upto_gcc44' is empty, 'atleast_gcc44' is y. Signed-off-by: Masahiro Yamada Signed-off-by: Vineet Gupta --- arch/arc/Makefile | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/arch/arc/Makefile b/arch/arc/Makefile index fb026196aaab..99cce77ab98f 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -43,10 +43,7 @@ ifdef CONFIG_ARC_CURR_IN_REG LINUXINCLUDE += -include ${src}/arch/arc/include/asm/current.h endif -upto_gcc44 := $(call cc-ifversion, -le, 0404, y) -atleast_gcc44 := $(call cc-ifversion, -ge, 0404, y) - -cflags-$(atleast_gcc44) += -fsection-anchors +cflags-y += -fsection-anchors cflags-$(CONFIG_ARC_HAS_LLSC) += -mlock cflags-$(CONFIG_ARC_HAS_SWAPE) += -mswape @@ -82,11 +79,6 @@ cflags-$(disable_small_data) += -mno-sdata -fcall-used-gp cflags-$(CONFIG_CPU_BIG_ENDIAN) += -mbig-endian ldflags-$(CONFIG_CPU_BIG_ENDIAN) += -EB -# STAR 9000518362: (fixed with binutils shipping with gcc 4.8) -# arc-linux-uclibc-ld (buildroot) or arceb-elf32-ld (EZChip) don't accept -# --build-id w/o "-marclinux". Default arc-elf32-ld is OK -ldflags-$(upto_gcc44) += -marclinux - LIBGCC := $(shell $(CC) $(cflags-y) --print-libgcc-file-name) # Modules with short calls might break for calls into builtin-kernel From d49b48f088c323dbacae44dfbe56d9c985c8a2a1 Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Fri, 31 Aug 2018 09:04:18 +0200 Subject: [PATCH 070/269] gpio: Fix crash due to registration race gpiochip_add_data_with_key() adds the gpiochip to the gpio_devices list before of_gpiochip_add() is called, but it's only the latter which sets the ->of_xlate function pointer. gpiochip_find() can be called by someone else between these two actions, and it can find the chip and call of_gpiochip_match_node_and_xlate() which leads to the following crash due to a NULL ->of_xlate(). Unhandled prefetch abort: page domain fault (0x01b) at 0x00000000 Modules linked in: leds_gpio(+) gpio_generic(+) CPU: 0 PID: 830 Comm: insmod Not tainted 4.18.0+ #43 Hardware name: ARM-Versatile Express PC is at (null) LR is at of_gpiochip_match_node_and_xlate+0x2c/0x38 Process insmod (pid: 830, stack limit = 0x(ptrval)) (of_gpiochip_match_node_and_xlate) from (gpiochip_find+0x48/0x84) (gpiochip_find) from (of_get_named_gpiod_flags+0xa8/0x238) (of_get_named_gpiod_flags) from (gpiod_get_from_of_node+0x2c/0xc8) (gpiod_get_from_of_node) from (devm_fwnode_get_index_gpiod_from_child+0xb8/0x144) (devm_fwnode_get_index_gpiod_from_child) from (gpio_led_probe+0x208/0x3c4 [leds_gpio]) (gpio_led_probe [leds_gpio]) from (platform_drv_probe+0x48/0x9c) (platform_drv_probe) from (really_probe+0x1d0/0x3d4) (really_probe) from (driver_probe_device+0x78/0x1c0) (driver_probe_device) from (__driver_attach+0x120/0x13c) (__driver_attach) from (bus_for_each_dev+0x68/0xb4) (bus_for_each_dev) from (bus_add_driver+0x1a8/0x268) (bus_add_driver) from (driver_register+0x78/0x10c) (driver_register) from (do_one_initcall+0x54/0x1fc) (do_one_initcall) from (do_init_module+0x64/0x1f4) (do_init_module) from (load_module+0x2198/0x26ac) (load_module) from (sys_finit_module+0xe0/0x110) (sys_finit_module) from (ret_fast_syscall+0x0/0x54) One way to fix this would be to rework the hairy registration sequence in gpiochip_add_data_with_key(), but since I'd probably introduce a couple of new bugs if I attempted that, simply add a check for a non-NULL of_xlate function pointer in of_gpiochip_match_node_and_xlate(). This works since the driver looking for the gpio will simply fail to find the gpio and defer its probe and be reprobed when the driver which is registering the gpiochip has fully completed its probe. Signed-off-by: Vincent Whitchurch Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib-of.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index a4f1157d6aa0..d4e7a09598fa 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -31,6 +31,7 @@ static int of_gpiochip_match_node_and_xlate(struct gpio_chip *chip, void *data) struct of_phandle_args *gpiospec = data; return chip->gpiodev->dev.of_node == gpiospec->np && + chip->of_xlate && chip->of_xlate(chip, gpiospec, NULL) >= 0; } From 6147b1cf19651c7de297e69108b141fb30aa2349 Mon Sep 17 00:00:00 2001 From: Genki Sky Date: Tue, 28 Aug 2018 23:26:24 -0400 Subject: [PATCH 071/269] scripts/setlocalversion: git: Make -dirty check more robust $(git diff-index) relies on the index being refreshed. This refreshing of the index used to happen, but was removed in cdf2bc632ebc ("scripts/setlocalversion on write-protected source tree", 2013-06-14) due to issues with a read-only filesystem. If the index is not refreshed, one runs into problems. E.g. as described in [0], git stores the uid in its index, so even if just the uid has changed (or git is tricked into thinking so), then we will think the tree is dirty. So as in [1], if you package linux-git with a system that uses fakeroot(1), you get a "-dirty" version. Unless you manually $(git update-index --refresh) themselves. The simplest solution seems to be $(git status --porcelain), with an additional flag saying "ignore untracked files". It seems clearer about what it does, and avoids issues regarding cached indexes and writable filesystems, but still has stable output for scripting. [0]: https://public-inbox.org/git/0190ae30-b6c8-2a8b-b1fb-fd9d84e6dfdf@oracle.com/ [1]: https://bbs.archlinux.org/viewtopic.php?id=236702 Signed-off-by: Genki Sky Signed-off-by: Masahiro Yamada --- scripts/setlocalversion | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/setlocalversion b/scripts/setlocalversion index 71f39410691b..79f7dd57d571 100755 --- a/scripts/setlocalversion +++ b/scripts/setlocalversion @@ -74,7 +74,7 @@ scm_version() fi # Check for uncommitted changes - if git diff-index --name-only HEAD | grep -qv "^scripts/package"; then + if git status -uno --porcelain | grep -qv '^.. scripts/package'; then printf '%s' -dirty fi From bc8d2e20a3eb2f8d268ad7bbca878cf3acdcf389 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Thu, 30 Aug 2018 11:18:42 +0200 Subject: [PATCH 072/269] kconfig: remove a spurious self-assignment The self assignment was probably introduced by an automated code refactoring in commit 694c49a7c01c ("kconfig: drop localization support"). The issue was identified by a self-assign warning when running make menuconfig with clang. Fixes: 694c49a7c01c ("kconfig: drop localization support") Signed-off-by: Lukas Bulwahn Signed-off-by: Masahiro Yamada --- scripts/kconfig/mconf.c | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/kconfig/mconf.c b/scripts/kconfig/mconf.c index 83b5836615fb..143c05fec161 100644 --- a/scripts/kconfig/mconf.c +++ b/scripts/kconfig/mconf.c @@ -490,7 +490,6 @@ static void build_conf(struct menu *menu) switch (prop->type) { case P_MENU: child_count++; - prompt = prompt; if (single_menu_mode) { item_make("%s%*c%s", menu->data ? "-->" : "++>", From e0758412208960be9de11e6d2350c81ffd88410f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 25 Aug 2018 01:14:46 +0200 Subject: [PATCH 073/269] netfilter: kconfig: nat related expression depend on nftables core NF_TABLES_IPV4 is now boolean so it is possible to set NF_TABLES=m NF_TABLES_IPV4=y NFT_CHAIN_NAT_IPV4=y which causes: nft_chain_nat_ipv4.c:(.text+0x6d): undefined reference to `nft_do_chain' Wrap NFT_CHAIN_NAT_IPV4 and related nat expressions with NF_TABLES to restore the dependency. Reported-by: Randy Dunlap Fixes: 02c7b25e5f54 ("netfilter: nf_tables: build-in filter chain type") Signed-off-by: Florian Westphal Acked-by: Randy Dunlap Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/Kconfig | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index d9504adc47b3..184bf2e0a1ed 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -106,6 +106,10 @@ config NF_NAT_IPV4 if NF_NAT_IPV4 +config NF_NAT_MASQUERADE_IPV4 + bool + +if NF_TABLES config NFT_CHAIN_NAT_IPV4 depends on NF_TABLES_IPV4 tristate "IPv4 nf_tables nat chain support" @@ -115,9 +119,6 @@ config NFT_CHAIN_NAT_IPV4 packet transformations such as the source, destination address and source and destination ports. -config NF_NAT_MASQUERADE_IPV4 - bool - config NFT_MASQ_IPV4 tristate "IPv4 masquerading support for nf_tables" depends on NF_TABLES_IPV4 @@ -135,6 +136,7 @@ config NFT_REDIR_IPV4 help This is the expression that provides IPv4 redirect support for nf_tables. +endif # NF_TABLES config NF_NAT_SNMP_BASIC tristate "Basic SNMP-ALG support" From 7acfda539c0b9636a58bfee56abfb3aeee806d96 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sun, 26 Aug 2018 02:35:44 +0900 Subject: [PATCH 074/269] netfilter: nf_tables: release chain in flushing set When element of verdict map is deleted, the delete routine should release chain. however, flush element of verdict map routine doesn't release chain. test commands: %nft add table ip filter %nft add chain ip filter c1 %nft add map ip filter map1 { type ipv4_addr : verdict \; } %nft add element ip filter map1 { 1 : jump c1 } %nft flush map ip filter map1 %nft flush ruleset splat looks like: [ 4895.170899] kernel BUG at net/netfilter/nf_tables_api.c:1415! [ 4895.178114] invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN PTI [ 4895.178880] CPU: 0 PID: 1670 Comm: nft Not tainted 4.18.0+ #55 [ 4895.178880] RIP: 0010:nf_tables_chain_destroy.isra.28+0x39/0x220 [nf_tables] [ 4895.178880] Code: fc ff df 53 48 89 fb 48 83 c7 50 48 89 fa 48 c1 ea 03 0f b6 04 02 84 c0 74 09 3c 03 7f 05 e8 3e 4c 25 e1 8b 43 50 85 c0 74 02 <0f> 0b 48 89 da 48 b8 00 00 00 00 00 fc ff df 48 c1 ea 03 80 3c 02 [ 4895.228342] RSP: 0018:ffff88010b98f4c0 EFLAGS: 00010202 [ 4895.234841] RAX: 0000000000000001 RBX: ffff8801131c6968 RCX: ffff8801146585b0 [ 4895.234841] RDX: 1ffff10022638d37 RSI: ffff8801191a9348 RDI: ffff8801131c69b8 [ 4895.234841] RBP: ffff8801146585a8 R08: 1ffff1002323526a R09: 0000000000000000 [ 4895.234841] R10: 0000000000000000 R11: 0000000000000000 R12: dead000000000200 [ 4895.234841] R13: dead000000000100 R14: ffffffffa3638af8 R15: dffffc0000000000 [ 4895.234841] FS: 00007f6d188e6700(0000) GS:ffff88011b600000(0000) knlGS:0000000000000000 [ 4895.234841] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 4895.234841] CR2: 00007ffe72b8df88 CR3: 000000010e2d4000 CR4: 00000000001006f0 [ 4895.234841] Call Trace: [ 4895.234841] nf_tables_commit+0x2704/0x2c70 [nf_tables] [ 4895.234841] ? nfnetlink_rcv_batch+0xa4f/0x11b0 [nfnetlink] [ 4895.234841] ? nf_tables_setelem_notify.constprop.48+0x1a0/0x1a0 [nf_tables] [ 4895.323824] ? __lock_is_held+0x9d/0x130 [ 4895.323824] ? kasan_unpoison_shadow+0x30/0x40 [ 4895.333299] ? kasan_kmalloc+0xa9/0xc0 [ 4895.333299] ? kmem_cache_alloc_trace+0x2c0/0x310 [ 4895.333299] ? nfnetlink_rcv_batch+0xa4f/0x11b0 [nfnetlink] [ 4895.333299] nfnetlink_rcv_batch+0xdb9/0x11b0 [nfnetlink] [ 4895.333299] ? debug_show_all_locks+0x290/0x290 [ 4895.333299] ? nfnetlink_net_init+0x150/0x150 [nfnetlink] [ 4895.333299] ? sched_clock_cpu+0xe5/0x170 [ 4895.333299] ? sched_clock_local+0xff/0x130 [ 4895.333299] ? sched_clock_cpu+0xe5/0x170 [ 4895.333299] ? find_held_lock+0x39/0x1b0 [ 4895.333299] ? sched_clock_local+0xff/0x130 [ 4895.333299] ? memset+0x1f/0x40 [ 4895.333299] ? nla_parse+0x33/0x260 [ 4895.333299] ? ns_capable_common+0x6e/0x110 [ 4895.333299] nfnetlink_rcv+0x2c0/0x310 [nfnetlink] [ ... ] Fixes: 591054469b3e ("netfilter: nf_tables: revisit chain/object refcounting from elements") Signed-off-by: Taehee Yoo Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 1dca5683f59f..2cfb173cd0b2 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4637,6 +4637,7 @@ static int nft_flush_set(const struct nft_ctx *ctx, } set->ndeact++; + nft_set_elem_deactivate(ctx->net, set, elem); nft_trans_elem_set(trans) = set; nft_trans_elem(trans) = *elem; list_add_tail(&trans->list, &ctx->net->nft.commit_list); From 0f02cfbc3d9e413d450d8d0fd660077c23f67eff Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Thu, 30 Aug 2018 11:01:21 -0700 Subject: [PATCH 075/269] MIPS: VDSO: Match data page cache colouring when D$ aliases When a system suffers from dcache aliasing a user program may observe stale VDSO data from an aliased cache line. Notably this can break the expectation that clock_gettime(CLOCK_MONOTONIC, ...) is, as its name suggests, monotonic. In order to ensure that users observe updates to the VDSO data page as intended, align the user mappings of the VDSO data page such that their cache colouring matches that of the virtual address range which the kernel will use to update the data page - typically its unmapped address within kseg0. This ensures that we don't introduce aliasing cache lines for the VDSO data page, and therefore that userland will observe updates without requiring cache invalidation. Signed-off-by: Paul Burton Reported-by: Hauke Mehrtens Reported-by: Rene Nielsen Reported-by: Alexandre Belloni Fixes: ebb5e78cc634 ("MIPS: Initial implementation of a VDSO") Patchwork: https://patchwork.linux-mips.org/patch/20344/ Tested-by: Alexandre Belloni Tested-by: Hauke Mehrtens Cc: James Hogan Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org # v4.4+ --- arch/mips/kernel/vdso.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/arch/mips/kernel/vdso.c b/arch/mips/kernel/vdso.c index 019035d7225c..8f845f6e5f42 100644 --- a/arch/mips/kernel/vdso.c +++ b/arch/mips/kernel/vdso.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -20,6 +21,7 @@ #include #include +#include #include /* Kernel-provided data used by the VDSO. */ @@ -128,12 +130,30 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) vvar_size = gic_size + PAGE_SIZE; size = vvar_size + image->size; + /* + * Find a region that's large enough for us to perform the + * colour-matching alignment below. + */ + if (cpu_has_dc_aliases) + size += shm_align_mask + 1; + base = get_unmapped_area(NULL, 0, size, 0, 0); if (IS_ERR_VALUE(base)) { ret = base; goto out; } + /* + * If we suffer from dcache aliasing, ensure that the VDSO data page + * mapping is coloured the same as the kernel's mapping of that memory. + * This ensures that when the kernel updates the VDSO data userland + * will observe it without requiring cache invalidations. + */ + if (cpu_has_dc_aliases) { + base = __ALIGN_MASK(base, shm_align_mask); + base += ((unsigned long)&vdso_data - gic_size) & shm_align_mask; + } + data_addr = base + gic_size; vdso_addr = data_addr + PAGE_SIZE; From 3fcbb8260a87efb691d837e8cd24e81f65b3eb70 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 30 Aug 2018 13:52:38 -0700 Subject: [PATCH 076/269] ARC: atomics: unbork atomic_fetch_##op() In 4.19-rc1, Eugeniy reported weird boot and IO errors on ARC HSDK | INFO: task syslogd:77 blocked for more than 10 seconds. | Not tainted 4.19.0-rc1-00007-gf213acea4e88 #40 | "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this | message. | syslogd D 0 77 76 0x00000000 | | Stack Trace: | __switch_to+0x0/0xac | __schedule+0x1b2/0x730 | io_schedule+0x5c/0xc0 | __lock_page+0x98/0xdc | find_lock_entry+0x38/0x100 | shmem_getpage_gfp.isra.3+0x82/0xbfc | shmem_fault+0x46/0x138 | handle_mm_fault+0x5bc/0x924 | do_page_fault+0x100/0x2b8 | ret_from_exception+0x0/0x8 He bisected to 84c6591103db ("locking/atomics, asm-generic/bitops/lock.h: Rewrite using atomic_fetch_*()") This commit however only unmasked the real issue introduced by commit 4aef66c8ae9 ("locking/atomic, arch/arc: Fix build") which missed the retry-if-scond-failed branch in atomic_fetch_##op() macros. The bisected commit started using atomic_fetch_##op() macros for building the rest of atomics. Fixes: 4aef66c8ae9 ("locking/atomic, arch/arc: Fix build") Reported-by: Eugeniy Paltsev Acked-by: Peter Zijlstra (Intel) Signed-off-by: Will Deacon Signed-off-by: Vineet Gupta [vgupta: wrote changelog] --- arch/arc/include/asm/atomic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h index 4e0072730241..158af079838d 100644 --- a/arch/arc/include/asm/atomic.h +++ b/arch/arc/include/asm/atomic.h @@ -84,7 +84,7 @@ static inline int atomic_fetch_##op(int i, atomic_t *v) \ "1: llock %[orig], [%[ctr]] \n" \ " " #asm_op " %[val], %[orig], %[i] \n" \ " scond %[val], [%[ctr]] \n" \ - " \n" \ + " bnz 1b \n" \ : [val] "=&r" (val), \ [orig] "=&r" (orig) \ : [ctr] "r" (&v->counter), \ From 678c8110d23c0ac7d69fda558992c31be64e7507 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Mon, 30 Jul 2018 19:26:33 +0300 Subject: [PATCH 077/269] ARC: dma [IOC]: mark DMA devices connected as dma-coherent Mark DMA devices on AXS103 and HSDK boards connected through IOC port as dma-coherent. Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta --- arch/arc/boot/dts/axc003.dtsi | 26 ++++++++++++++++++++++++++ arch/arc/boot/dts/axc003_idu.dtsi | 26 ++++++++++++++++++++++++++ arch/arc/boot/dts/hsdk.dts | 4 ++++ 3 files changed, 56 insertions(+) diff --git a/arch/arc/boot/dts/axc003.dtsi b/arch/arc/boot/dts/axc003.dtsi index dc91c663bcc0..d75d65ddf8e3 100644 --- a/arch/arc/boot/dts/axc003.dtsi +++ b/arch/arc/boot/dts/axc003.dtsi @@ -93,6 +93,32 @@ }; }; + /* + * Mark DMA peripherals connected via IOC port as dma-coherent. We do + * it via overlay because peripherals defined in axs10x_mb.dtsi are + * used for both AXS101 and AXS103 boards and only AXS103 has IOC (so + * only AXS103 board has HW-coherent DMA peripherals) + * We don't need to mark pgu@17000 as dma-coherent because it uses + * external DMA buffer located outside of IOC aperture. + */ + axs10x_mb { + ethernet@0x18000 { + dma-coherent; + }; + + ehci@0x40000 { + dma-coherent; + }; + + ohci@0x60000 { + dma-coherent; + }; + + mmc@0x15000 { + dma-coherent; + }; + }; + /* * The DW APB ICTL intc on MB is connected to CPU intc via a * DT "invisible" DW APB GPIO block, configured to simply pass thru diff --git a/arch/arc/boot/dts/axc003_idu.dtsi b/arch/arc/boot/dts/axc003_idu.dtsi index 69ff4895f2ba..a05bb737ea63 100644 --- a/arch/arc/boot/dts/axc003_idu.dtsi +++ b/arch/arc/boot/dts/axc003_idu.dtsi @@ -100,6 +100,32 @@ }; }; + /* + * Mark DMA peripherals connected via IOC port as dma-coherent. We do + * it via overlay because peripherals defined in axs10x_mb.dtsi are + * used for both AXS101 and AXS103 boards and only AXS103 has IOC (so + * only AXS103 board has HW-coherent DMA peripherals) + * We don't need to mark pgu@17000 as dma-coherent because it uses + * external DMA buffer located outside of IOC aperture. + */ + axs10x_mb { + ethernet@0x18000 { + dma-coherent; + }; + + ehci@0x40000 { + dma-coherent; + }; + + ohci@0x60000 { + dma-coherent; + }; + + mmc@0x15000 { + dma-coherent; + }; + }; + /* * This INTC is actually connected to DW APB GPIO * which acts as a wire between MB INTC and CPU INTC. diff --git a/arch/arc/boot/dts/hsdk.dts b/arch/arc/boot/dts/hsdk.dts index d00f283094d3..ef149f59929a 100644 --- a/arch/arc/boot/dts/hsdk.dts +++ b/arch/arc/boot/dts/hsdk.dts @@ -181,6 +181,7 @@ resets = <&cgu_rst HSDK_ETH_RESET>; reset-names = "stmmaceth"; mac-address = [00 00 00 00 00 00]; /* Filled in by U-Boot */ + dma-coherent; mdio { #address-cells = <1>; @@ -199,12 +200,14 @@ compatible = "snps,hsdk-v1.0-ohci", "generic-ohci"; reg = <0x60000 0x100>; interrupts = <15>; + dma-coherent; }; ehci@40000 { compatible = "snps,hsdk-v1.0-ehci", "generic-ehci"; reg = <0x40000 0x100>; interrupts = <15>; + dma-coherent; }; mmc@a000 { @@ -217,6 +220,7 @@ clock-names = "biu", "ciu"; interrupts = <12>; bus-width = <4>; + dma-coherent; }; }; From 6b06546206868f723f2061d703a3c3c378dcbf4c Mon Sep 17 00:00:00 2001 From: "Dennis Zhou (Facebook)" Date: Fri, 31 Aug 2018 16:22:42 -0400 Subject: [PATCH 078/269] Revert "blk-throttle: fix race between blkcg_bio_issue_check() and cgroup_rmdir()" This reverts commit 4c6994806f708559c2812b73501406e21ae5dcd0. Destroying blkgs is tricky because of the nature of the relationship. A blkg should go away when either a blkcg or a request_queue goes away. However, blkg's pin the blkcg to ensure they remain valid. To break this cycle, when a blkcg is offlined, blkgs put back their css ref. This eventually lets css_free() get called which frees the blkcg. The above commit (4c6994806f70) breaks this order of events by trying to destroy blkgs in css_free(). As the blkgs still hold references to the blkcg, css_free() is never called. The race between blkcg_bio_issue_check() and cgroup_rmdir() will be addressed in the following patch by delaying destruction of a blkg until all writeback associated with the blkcg has been finished. Fixes: 4c6994806f70 ("blk-throttle: fix race between blkcg_bio_issue_check() and cgroup_rmdir()") Reviewed-by: Josef Bacik Signed-off-by: Dennis Zhou Cc: Jiufei Xue Cc: Joseph Qi Cc: Tejun Heo Cc: Jens Axboe Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 78 ++++++++------------------------------ include/linux/blk-cgroup.h | 1 - 2 files changed, 16 insertions(+), 63 deletions(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 694595b29b8f..2998e4f095d1 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -310,28 +310,11 @@ struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, } } -static void blkg_pd_offline(struct blkcg_gq *blkg) -{ - int i; - - lockdep_assert_held(blkg->q->queue_lock); - lockdep_assert_held(&blkg->blkcg->lock); - - for (i = 0; i < BLKCG_MAX_POLS; i++) { - struct blkcg_policy *pol = blkcg_policy[i]; - - if (blkg->pd[i] && !blkg->pd[i]->offline && - pol->pd_offline_fn) { - pol->pd_offline_fn(blkg->pd[i]); - blkg->pd[i]->offline = true; - } - } -} - static void blkg_destroy(struct blkcg_gq *blkg) { struct blkcg *blkcg = blkg->blkcg; struct blkcg_gq *parent = blkg->parent; + int i; lockdep_assert_held(blkg->q->queue_lock); lockdep_assert_held(&blkcg->lock); @@ -340,6 +323,13 @@ static void blkg_destroy(struct blkcg_gq *blkg) WARN_ON_ONCE(list_empty(&blkg->q_node)); WARN_ON_ONCE(hlist_unhashed(&blkg->blkcg_node)); + for (i = 0; i < BLKCG_MAX_POLS; i++) { + struct blkcg_policy *pol = blkcg_policy[i]; + + if (blkg->pd[i] && pol->pd_offline_fn) + pol->pd_offline_fn(blkg->pd[i]); + } + if (parent) { blkg_rwstat_add_aux(&parent->stat_bytes, &blkg->stat_bytes); blkg_rwstat_add_aux(&parent->stat_ios, &blkg->stat_ios); @@ -382,7 +372,6 @@ static void blkg_destroy_all(struct request_queue *q) struct blkcg *blkcg = blkg->blkcg; spin_lock(&blkcg->lock); - blkg_pd_offline(blkg); blkg_destroy(blkg); spin_unlock(&blkcg->lock); } @@ -1058,25 +1047,25 @@ static struct cftype blkcg_legacy_files[] = { * @css: css of interest * * This function is called when @css is about to go away and responsible - * for offlining all blkgs pd and killing all wbs associated with @css. - * blkgs pd offline should be done while holding both q and blkcg locks. - * As blkcg lock is nested inside q lock, this function performs reverse - * double lock dancing. + * for shooting down all blkgs associated with @css. blkgs should be + * removed while holding both q and blkcg locks. As blkcg lock is nested + * inside q lock, this function performs reverse double lock dancing. * * This is the blkcg counterpart of ioc_release_fn(). */ static void blkcg_css_offline(struct cgroup_subsys_state *css) { struct blkcg *blkcg = css_to_blkcg(css); - struct blkcg_gq *blkg; spin_lock_irq(&blkcg->lock); - hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) { + while (!hlist_empty(&blkcg->blkg_list)) { + struct blkcg_gq *blkg = hlist_entry(blkcg->blkg_list.first, + struct blkcg_gq, blkcg_node); struct request_queue *q = blkg->q; if (spin_trylock(q->queue_lock)) { - blkg_pd_offline(blkg); + blkg_destroy(blkg); spin_unlock(q->queue_lock); } else { spin_unlock_irq(&blkcg->lock); @@ -1090,43 +1079,11 @@ static void blkcg_css_offline(struct cgroup_subsys_state *css) wb_blkcg_offline(blkcg); } -/** - * blkcg_destroy_all_blkgs - destroy all blkgs associated with a blkcg - * @blkcg: blkcg of interest - * - * This function is called when blkcg css is about to free and responsible for - * destroying all blkgs associated with @blkcg. - * blkgs should be removed while holding both q and blkcg locks. As blkcg lock - * is nested inside q lock, this function performs reverse double lock dancing. - */ -static void blkcg_destroy_all_blkgs(struct blkcg *blkcg) -{ - spin_lock_irq(&blkcg->lock); - while (!hlist_empty(&blkcg->blkg_list)) { - struct blkcg_gq *blkg = hlist_entry(blkcg->blkg_list.first, - struct blkcg_gq, - blkcg_node); - struct request_queue *q = blkg->q; - - if (spin_trylock(q->queue_lock)) { - blkg_destroy(blkg); - spin_unlock(q->queue_lock); - } else { - spin_unlock_irq(&blkcg->lock); - cpu_relax(); - spin_lock_irq(&blkcg->lock); - } - } - spin_unlock_irq(&blkcg->lock); -} - static void blkcg_css_free(struct cgroup_subsys_state *css) { struct blkcg *blkcg = css_to_blkcg(css); int i; - blkcg_destroy_all_blkgs(blkcg); - mutex_lock(&blkcg_pol_mutex); list_del(&blkcg->all_blkcgs_node); @@ -1480,11 +1437,8 @@ void blkcg_deactivate_policy(struct request_queue *q, list_for_each_entry(blkg, &q->blkg_list, q_node) { if (blkg->pd[pol->plid]) { - if (!blkg->pd[pol->plid]->offline && - pol->pd_offline_fn) { + if (pol->pd_offline_fn) pol->pd_offline_fn(blkg->pd[pol->plid]); - blkg->pd[pol->plid]->offline = true; - } pol->pd_free_fn(blkg->pd[pol->plid]); blkg->pd[pol->plid] = NULL; } diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 34aec30e06c7..1615cdd4c797 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -89,7 +89,6 @@ struct blkg_policy_data { /* the blkg and policy id this per-policy data belongs to */ struct blkcg_gq *blkg; int plid; - bool offline; }; /* From 59b57717fff8b562825d9d25e0180ad7e8048ca9 Mon Sep 17 00:00:00 2001 From: "Dennis Zhou (Facebook)" Date: Fri, 31 Aug 2018 16:22:43 -0400 Subject: [PATCH 079/269] blkcg: delay blkg destruction until after writeback has finished Currently, blkcg destruction relies on a sequence of events: 1. Destruction starts. blkcg_css_offline() is called and blkgs release their reference to the blkcg. This immediately destroys the cgwbs (writeback). 2. With blkgs giving up their reference, the blkcg ref count should become zero and eventually call blkcg_css_free() which finally frees the blkcg. Jiufei Xue reported that there is a race between blkcg_bio_issue_check() and cgroup_rmdir(). To remedy this, blkg destruction becomes contingent on the completion of all writeback associated with the blkcg. A count of the number of cgwbs is maintained and once that goes to zero, blkg destruction can follow. This should prevent premature blkg destruction related to writeback. The new process for blkcg cleanup is as follows: 1. Destruction starts. blkcg_css_offline() is called which offlines writeback. Blkg destruction is delayed on the cgwb_refcnt count to avoid punting potentially large amounts of outstanding writeback to root while maintaining any ongoing policies. Here, the base cgwb_refcnt is put back. 2. When the cgwb_refcnt becomes zero, blkcg_destroy_blkgs() is called and handles destruction of blkgs. This is where the css reference held by each blkg is released. 3. Once the blkcg ref count goes to zero, blkcg_css_free() is called. This finally frees the blkg. It seems in the past blk-throttle didn't do the most understandable things with taking data from a blkg while associating with current. So, the simplification and unification of what blk-throttle is doing caused this. Fixes: 08e18eab0c579 ("block: add bi_blkg to the bio for cgroups") Reviewed-by: Josef Bacik Signed-off-by: Dennis Zhou Cc: Jiufei Xue Cc: Joseph Qi Cc: Tejun Heo Cc: Josef Bacik Cc: Jens Axboe Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 53 ++++++++++++++++++++++++++++++++------ include/linux/blk-cgroup.h | 44 +++++++++++++++++++++++++++++++ mm/backing-dev.c | 5 ++++ 3 files changed, 94 insertions(+), 8 deletions(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 2998e4f095d1..c19f9078da1e 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1042,21 +1042,59 @@ static struct cftype blkcg_legacy_files[] = { { } /* terminate */ }; +/* + * blkcg destruction is a three-stage process. + * + * 1. Destruction starts. The blkcg_css_offline() callback is invoked + * which offlines writeback. Here we tie the next stage of blkg destruction + * to the completion of writeback associated with the blkcg. This lets us + * avoid punting potentially large amounts of outstanding writeback to root + * while maintaining any ongoing policies. The next stage is triggered when + * the nr_cgwbs count goes to zero. + * + * 2. When the nr_cgwbs count goes to zero, blkcg_destroy_blkgs() is called + * and handles the destruction of blkgs. Here the css reference held by + * the blkg is put back eventually allowing blkcg_css_free() to be called. + * This work may occur in cgwb_release_workfn() on the cgwb_release + * workqueue. Any submitted ios that fail to get the blkg ref will be + * punted to the root_blkg. + * + * 3. Once the blkcg ref count goes to zero, blkcg_css_free() is called. + * This finally frees the blkcg. + */ + /** * blkcg_css_offline - cgroup css_offline callback * @css: css of interest * - * This function is called when @css is about to go away and responsible - * for shooting down all blkgs associated with @css. blkgs should be - * removed while holding both q and blkcg locks. As blkcg lock is nested - * inside q lock, this function performs reverse double lock dancing. - * - * This is the blkcg counterpart of ioc_release_fn(). + * This function is called when @css is about to go away. Here the cgwbs are + * offlined first and only once writeback associated with the blkcg has + * finished do we start step 2 (see above). */ static void blkcg_css_offline(struct cgroup_subsys_state *css) { struct blkcg *blkcg = css_to_blkcg(css); + /* this prevents anyone from attaching or migrating to this blkcg */ + wb_blkcg_offline(blkcg); + + /* put the base cgwb reference allowing step 2 to be triggered */ + blkcg_cgwb_put(blkcg); +} + +/** + * blkcg_destroy_blkgs - responsible for shooting down blkgs + * @blkcg: blkcg of interest + * + * blkgs should be removed while holding both q and blkcg locks. As blkcg lock + * is nested inside q lock, this function performs reverse double lock dancing. + * Destroying the blkgs releases the reference held on the blkcg's css allowing + * blkcg_css_free to eventually be called. + * + * This is the blkcg counterpart of ioc_release_fn(). + */ +void blkcg_destroy_blkgs(struct blkcg *blkcg) +{ spin_lock_irq(&blkcg->lock); while (!hlist_empty(&blkcg->blkg_list)) { @@ -1075,8 +1113,6 @@ static void blkcg_css_offline(struct cgroup_subsys_state *css) } spin_unlock_irq(&blkcg->lock); - - wb_blkcg_offline(blkcg); } static void blkcg_css_free(struct cgroup_subsys_state *css) @@ -1146,6 +1182,7 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css) INIT_HLIST_HEAD(&blkcg->blkg_list); #ifdef CONFIG_CGROUP_WRITEBACK INIT_LIST_HEAD(&blkcg->cgwb_list); + refcount_set(&blkcg->cgwb_refcnt, 1); #endif list_add_tail(&blkcg->all_blkcgs_node, &all_blkcgs); diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 1615cdd4c797..6d766a19f2bb 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -56,6 +56,7 @@ struct blkcg { struct list_head all_blkcgs_node; #ifdef CONFIG_CGROUP_WRITEBACK struct list_head cgwb_list; + refcount_t cgwb_refcnt; #endif }; @@ -386,6 +387,49 @@ static inline struct blkcg *cpd_to_blkcg(struct blkcg_policy_data *cpd) return cpd ? cpd->blkcg : NULL; } +extern void blkcg_destroy_blkgs(struct blkcg *blkcg); + +#ifdef CONFIG_CGROUP_WRITEBACK + +/** + * blkcg_cgwb_get - get a reference for blkcg->cgwb_list + * @blkcg: blkcg of interest + * + * This is used to track the number of active wb's related to a blkcg. + */ +static inline void blkcg_cgwb_get(struct blkcg *blkcg) +{ + refcount_inc(&blkcg->cgwb_refcnt); +} + +/** + * blkcg_cgwb_put - put a reference for @blkcg->cgwb_list + * @blkcg: blkcg of interest + * + * This is used to track the number of active wb's related to a blkcg. + * When this count goes to zero, all active wb has finished so the + * blkcg can continue destruction by calling blkcg_destroy_blkgs(). + * This work may occur in cgwb_release_workfn() on the cgwb_release + * workqueue. + */ +static inline void blkcg_cgwb_put(struct blkcg *blkcg) +{ + if (refcount_dec_and_test(&blkcg->cgwb_refcnt)) + blkcg_destroy_blkgs(blkcg); +} + +#else + +static inline void blkcg_cgwb_get(struct blkcg *blkcg) { } + +static inline void blkcg_cgwb_put(struct blkcg *blkcg) +{ + /* wb isn't being accounted, so trigger destruction right away */ + blkcg_destroy_blkgs(blkcg); +} + +#endif + /** * blkg_path - format cgroup path of blkg * @blkg: blkg of interest diff --git a/mm/backing-dev.c b/mm/backing-dev.c index f5981e9d6ae2..8a8bb8796c6c 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -491,6 +491,7 @@ static void cgwb_release_workfn(struct work_struct *work) { struct bdi_writeback *wb = container_of(work, struct bdi_writeback, release_work); + struct blkcg *blkcg = css_to_blkcg(wb->blkcg_css); mutex_lock(&wb->bdi->cgwb_release_mutex); wb_shutdown(wb); @@ -499,6 +500,9 @@ static void cgwb_release_workfn(struct work_struct *work) css_put(wb->blkcg_css); mutex_unlock(&wb->bdi->cgwb_release_mutex); + /* triggers blkg destruction if cgwb_refcnt becomes zero */ + blkcg_cgwb_put(blkcg); + fprop_local_destroy_percpu(&wb->memcg_completions); percpu_ref_exit(&wb->refcnt); wb_exit(wb); @@ -597,6 +601,7 @@ static int cgwb_create(struct backing_dev_info *bdi, list_add_tail_rcu(&wb->bdi_node, &bdi->wb_list); list_add(&wb->memcg_node, memcg_cgwb_list); list_add(&wb->blkcg_node, blkcg_cgwb_list); + blkcg_cgwb_get(blkcg); css_get(memcg_css); css_get(blkcg_css); } From 3111885015b458c97b4cf272e2a87f1d6f0ed06a Mon Sep 17 00:00:00 2001 From: "Dennis Zhou (Facebook)" Date: Fri, 31 Aug 2018 16:22:44 -0400 Subject: [PATCH 080/269] blkcg: use tryget logic when associating a blkg with a bio There is a very small change a bio gets caught up in a really unfortunate race between a task migration, cgroup exiting, and itself trying to associate with a blkg. This is due to css offlining being performed after the css->refcnt is killed which triggers removal of blkgs that reach their blkg->refcnt of 0. To avoid this, association with a blkg should use tryget and fallback to using the root_blkg. Fixes: 08e18eab0c579 ("block: add bi_blkg to the bio for cgroups") Reviewed-by: Josef Bacik Signed-off-by: Dennis Zhou Cc: Jiufei Xue Cc: Joseph Qi Cc: Tejun Heo Cc: Josef Bacik Cc: Jens Axboe Signed-off-by: Jens Axboe --- block/bio.c | 3 ++- block/blk-throttle.c | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/block/bio.c b/block/bio.c index b12966e415d3..8c680a776171 100644 --- a/block/bio.c +++ b/block/bio.c @@ -2015,7 +2015,8 @@ int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg) { if (unlikely(bio->bi_blkg)) return -EBUSY; - blkg_get(blkg); + if (!blkg_try_get(blkg)) + return -ENODEV; bio->bi_blkg = blkg; return 0; } diff --git a/block/blk-throttle.c b/block/blk-throttle.c index a3eede00d302..01d0620a4e4a 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -2129,8 +2129,9 @@ static inline void throtl_update_latency_buckets(struct throtl_data *td) static void blk_throtl_assoc_bio(struct throtl_grp *tg, struct bio *bio) { #ifdef CONFIG_BLK_DEV_THROTTLING_LOW - if (bio->bi_css) - bio_associate_blkg(bio, tg_to_blkg(tg)); + /* fallback to root_blkg if we fail to get a blkg ref */ + if (bio->bi_css && (bio_associate_blkg(bio, tg_to_blkg(tg)) == -ENODEV)) + bio_associate_blkg(bio, bio->bi_disk->queue->root_blkg); bio_issue_init(&bio->bi_issue, bio_sectors(bio)); #endif } From e254de6bcf3f5b6e78a92ac95fb91acef8adfe1a Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Wed, 29 Aug 2018 11:05:42 -0700 Subject: [PATCH 081/269] md/raid5-cache: disable reshape completely We don't support reshape yet if an array supports log device. Previously we determine the fact by checking ->log. However, ->log could be NULL after a log device is removed, but the array is still marked to support log device. Don't allow reshape in this case too. User can disable log device support by setting 'consistency_policy' to 'resync' then do reshape. Reported-by: Xiao Ni Tested-by: Xiao Ni Signed-off-by: Shaohua Li --- drivers/md/raid5-log.h | 5 +++++ drivers/md/raid5.c | 6 +++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/md/raid5-log.h b/drivers/md/raid5-log.h index a001808a2b77..bfb811407061 100644 --- a/drivers/md/raid5-log.h +++ b/drivers/md/raid5-log.h @@ -46,6 +46,11 @@ extern int ppl_modify_log(struct r5conf *conf, struct md_rdev *rdev, bool add); extern void ppl_quiesce(struct r5conf *conf, int quiesce); extern int ppl_handle_flush_request(struct r5l_log *log, struct bio *bio); +static inline bool raid5_has_log(struct r5conf *conf) +{ + return test_bit(MD_HAS_JOURNAL, &conf->mddev->flags); +} + static inline bool raid5_has_ppl(struct r5conf *conf) { return test_bit(MD_HAS_PPL, &conf->mddev->flags); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 4ce0d7502fad..e4e98f47865d 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -733,7 +733,7 @@ static bool stripe_can_batch(struct stripe_head *sh) { struct r5conf *conf = sh->raid_conf; - if (conf->log || raid5_has_ppl(conf)) + if (raid5_has_log(conf) || raid5_has_ppl(conf)) return false; return test_bit(STRIPE_BATCH_READY, &sh->state) && !test_bit(STRIPE_BITMAP_PENDING, &sh->state) && @@ -7737,7 +7737,7 @@ static int raid5_resize(struct mddev *mddev, sector_t sectors) sector_t newsize; struct r5conf *conf = mddev->private; - if (conf->log || raid5_has_ppl(conf)) + if (raid5_has_log(conf) || raid5_has_ppl(conf)) return -EINVAL; sectors &= ~((sector_t)conf->chunk_sectors - 1); newsize = raid5_size(mddev, sectors, mddev->raid_disks); @@ -7788,7 +7788,7 @@ static int check_reshape(struct mddev *mddev) { struct r5conf *conf = mddev->private; - if (conf->log || raid5_has_ppl(conf)) + if (raid5_has_log(conf) || raid5_has_ppl(conf)) return -EINVAL; if (mddev->delta_disks == 0 && mddev->new_layout == mddev->layout && From 1d0ffd264204eba1861865560f1f7f7a92919384 Mon Sep 17 00:00:00 2001 From: Xiao Ni Date: Thu, 30 Aug 2018 15:57:09 +0800 Subject: [PATCH 082/269] RAID10 BUG_ON in raise_barrier when force is true and conf->barrier is 0 In raid10 reshape_request it gets max_sectors in read_balance. If the underlayer disks have bad blocks, the max_sectors is less than last. It will call goto read_more many times. It calls raise_barrier(conf, sectors_done != 0) every time. In this condition sectors_done is not 0. So the value passed to the argument force of raise_barrier is true. In raise_barrier it checks conf->barrier when force is true. If force is true and conf->barrier is 0, it panic. In this case reshape_request submits bio to under layer disks. And in the callback function of the bio it calls lower_barrier. If the bio finishes before calling raise_barrier again, it can trigger the BUG_ON. Add one pair of raise_barrier/lower_barrier to fix this bug. Signed-off-by: Xiao Ni Suggested-by: Neil Brown Signed-off-by: Shaohua Li --- drivers/md/raid10.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 981898049491..d6f7978b4449 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -4529,11 +4529,12 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, allow_barrier(conf); } + raise_barrier(conf, 0); read_more: /* Now schedule reads for blocks from sector_nr to last */ r10_bio = raid10_alloc_init_r10buf(conf); r10_bio->state = 0; - raise_barrier(conf, sectors_done != 0); + raise_barrier(conf, 1); atomic_set(&r10_bio->remaining, 0); r10_bio->mddev = mddev; r10_bio->sector = sector_nr; @@ -4629,6 +4630,8 @@ read_more: if (sector_nr <= last) goto read_more; + lower_barrier(conf); + /* Now that we have done the whole section we can * update reshape_progress */ From 41a95041126522a921fb73df22cbdd520dfdebad Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Fri, 31 Aug 2018 10:05:57 +0800 Subject: [PATCH 083/269] md-cluster: release RESYNC lock after the last resync message All the RESYNC messages are sent with resync lock held, the only exception is resync_finish which releases resync_lockres before send the last resync message, this should be changed as well. Otherwise, we can see deadlock issue as follows: clustermd2-gqjiang2:~ # cat /proc/mdstat Personalities : [raid10] [raid1] md0 : active raid1 sdg[0] sdf[1] 134144 blocks super 1.2 [2/2] [UU] [===================>.] resync = 99.6% (134144/134144) finish=0.0min speed=26K/sec bitmap: 1/1 pages [4KB], 65536KB chunk unused devices: clustermd2-gqjiang2:~ # ps aux|grep md|grep D root 20497 0.0 0.0 0 0 ? D 16:00 0:00 [md0_raid1] clustermd2-gqjiang2:~ # cat /proc/20497/stack [] dlm_lock_sync+0x8e/0xc0 [md_cluster] [] __sendmsg+0x98/0x130 [md_cluster] [] sendmsg+0x20/0x30 [md_cluster] [] resync_info_update+0xb5/0xc0 [md_cluster] [] md_reap_sync_thread+0x134/0x170 [md_mod] [] md_check_recovery+0x28c/0x510 [md_mod] [] raid1d+0x42/0x800 [raid1] [] md_thread+0x121/0x150 [md_mod] [] kthread+0xff/0x140 [] ret_from_fork+0x35/0x40 [] 0xffffffffffffffff clustermd-gqjiang1:~ # ps aux|grep md|grep D root 20531 0.0 0.0 0 0 ? D 16:00 0:00 [md0_raid1] root 20537 0.0 0.0 0 0 ? D 16:00 0:00 [md0_cluster_rec] root 20676 0.0 0.0 0 0 ? D 16:01 0:00 [md0_resync] clustermd-gqjiang1:~ # cat /proc/mdstat Personalities : [raid10] [raid1] md0 : active raid1 sdf[1] sdg[0] 134144 blocks super 1.2 [2/2] [UU] [===================>.] resync = 97.3% (131072/134144) finish=8076.8min speed=0K/sec bitmap: 1/1 pages [4KB], 65536KB chunk unused devices: clustermd-gqjiang1:~ # cat /proc/20531/stack [] metadata_update_start+0xcd/0xd0 [md_cluster] [] md_update_sb.part.61+0x97/0x820 [md_mod] [] md_check_recovery+0x29b/0x510 [md_mod] [] raid1d+0x42/0x800 [raid1] [] md_thread+0x121/0x150 [md_mod] [] kthread+0xff/0x140 [] ret_from_fork+0x35/0x40 [] 0xffffffffffffffff clustermd-gqjiang1:~ # cat /proc/20537/stack [] freeze_array+0xf2/0x140 [raid1] [] recv_daemon+0x41e/0x580 [md_cluster] [] md_thread+0x121/0x150 [md_mod] [] kthread+0xff/0x140 [] ret_from_fork+0x35/0x40 [] 0xffffffffffffffff clustermd-gqjiang1:~ # cat /proc/20676/stack [] dlm_lock_sync+0x8e/0xc0 [md_cluster] [] lock_token+0x2f/0xa0 [md_cluster] [] lock_comm+0x32/0x90 [md_cluster] [] sendmsg+0x15/0x30 [md_cluster] [] resync_info_update+0x8a/0xc0 [md_cluster] [] raid1_sync_request+0xa9a/0xb10 [raid1] [] md_do_sync+0xbaa/0xf90 [md_mod] [] md_thread+0x121/0x150 [md_mod] [] kthread+0xff/0x140 [] ret_from_fork+0x35/0x40 [] 0xffffffffffffffff Reviewed-by: NeilBrown Signed-off-by: Guoqing Jiang Signed-off-by: Shaohua Li --- drivers/md/md-cluster.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c index 94329e03001e..0b2af6e74fc3 100644 --- a/drivers/md/md-cluster.c +++ b/drivers/md/md-cluster.c @@ -1276,18 +1276,18 @@ static int resync_info_update(struct mddev *mddev, sector_t lo, sector_t hi) static int resync_finish(struct mddev *mddev) { struct md_cluster_info *cinfo = mddev->cluster_info; + int ret = 0; clear_bit(MD_RESYNCING_REMOTE, &mddev->recovery); - dlm_unlock_sync(cinfo->resync_lockres); /* * If resync thread is interrupted so we can't say resync is finished, * another node will launch resync thread to continue. */ - if (test_bit(MD_CLOSING, &mddev->flags)) - return 0; - else - return resync_info_update(mddev, 0, 0); + if (!test_bit(MD_CLOSING, &mddev->flags)) + ret = resync_info_update(mddev, 0, 0); + dlm_unlock_sync(cinfo->resync_lockres); + return ret; } static int area_resyncing(struct mddev *mddev, int direction, From 9b25436662d5fb4c66eb527ead53cab15f596ee0 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 27 Aug 2018 14:51:54 -0700 Subject: [PATCH 084/269] random: make CPU trust a boot parameter Instead of forcing a distro or other system builder to choose at build time whether the CPU is trusted for CRNG seeding via CONFIG_RANDOM_TRUST_CPU, provide a boot-time parameter for end users to control the choice. The CONFIG will set the default state instead. Signed-off-by: Kees Cook Signed-off-by: Theodore Ts'o --- Documentation/admin-guide/kernel-parameters.txt | 6 ++++++ drivers/char/Kconfig | 4 ++-- drivers/char/random.c | 11 ++++++++--- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 0c8f7889efa1..227c5c6fa4c1 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3390,6 +3390,12 @@ ramdisk_size= [RAM] Sizes of RAM disks in kilobytes See Documentation/blockdev/ramdisk.txt. + random.trust_cpu={on,off} + [KNL] Enable or disable trusting the use of the + CPU's random number generator (if available) to + fully seed the kernel's CRNG. Default is controlled + by CONFIG_RANDOM_TRUST_CPU. + ras=option[,option,...] [KNL] RAS-specific options cec_disable [X86] diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index ce277ee0a28a..40728491f37b 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -566,5 +566,5 @@ config RANDOM_TRUST_CPU that CPU manufacturer (perhaps with the insistence or mandate of a Nation State's intelligence or law enforcement agencies) has not installed a hidden back door to compromise the CPU's - random number generation facilities. - + random number generation facilities. This can also be configured + at boot with "random.trust_cpu=on/off". diff --git a/drivers/char/random.c b/drivers/char/random.c index bf5f99fc36f1..c75b6cdf0053 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -779,6 +779,13 @@ static struct crng_state **crng_node_pool __read_mostly; static void invalidate_batched_entropy(void); +static bool trust_cpu __ro_after_init = IS_ENABLED(CONFIG_RANDOM_TRUST_CPU); +static int __init parse_trust_cpu(char *arg) +{ + return kstrtobool(arg, &trust_cpu); +} +early_param("random.trust_cpu", parse_trust_cpu); + static void crng_initialize(struct crng_state *crng) { int i; @@ -799,12 +806,10 @@ static void crng_initialize(struct crng_state *crng) } crng->state[i] ^= rv; } -#ifdef CONFIG_RANDOM_TRUST_CPU - if (arch_init) { + if (trust_cpu && arch_init) { crng_init = 2; pr_notice("random: crng done (trusting CPU's manufacturer)\n"); } -#endif crng->init_time = jiffies - CRNG_RESEED_INTERVAL - 1; } From 370a132bb2227ff76278f98370e0e701d86ff752 Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Tue, 31 Jul 2018 07:27:39 -0400 Subject: [PATCH 085/269] x86/microcode: Make sure boot_cpu_data.microcode is up-to-date When preparing an MCE record for logging, boot_cpu_data.microcode is used to read out the microcode revision on the box. However, on systems where late microcode update has happened, the microcode revision output in a MCE log record is wrong because boot_cpu_data.microcode is not updated when the microcode gets updated. But, the microcode revision saved in boot_cpu_data's microcode member should be kept up-to-date, regardless, for consistency. Make it so. Fixes: fa94d0c6e0f3 ("x86/MCE: Save microcode revision in machine check records") Signed-off-by: Prarit Bhargava Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Cc: Tony Luck Cc: sironi@amazon.de Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20180731112739.32338-1-prarit@redhat.com --- arch/x86/kernel/cpu/microcode/amd.c | 4 ++++ arch/x86/kernel/cpu/microcode/intel.c | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 0624957aa068..602f17134103 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -537,6 +537,10 @@ static enum ucode_state apply_microcode_amd(int cpu) uci->cpu_sig.rev = mc_amd->hdr.patch_id; c->microcode = mc_amd->hdr.patch_id; + /* Update boot_cpu_data's revision too, if we're on the BSP: */ + if (c->cpu_index == boot_cpu_data.cpu_index) + boot_cpu_data.microcode = mc_amd->hdr.patch_id; + return UCODE_UPDATED; } diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 97ccf4c3b45b..256d336cbc04 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -851,6 +851,10 @@ static enum ucode_state apply_microcode_intel(int cpu) uci->cpu_sig.rev = rev; c->microcode = rev; + /* Update boot_cpu_data's revision too, if we're on the BSP: */ + if (c->cpu_index == boot_cpu_data.cpu_index) + boot_cpu_data.microcode = rev; + return UCODE_UPDATED; } From 8da38ebaad23fe1b0c4a205438676f6356607cfc Mon Sep 17 00:00:00 2001 From: Filippo Sironi Date: Tue, 31 Jul 2018 17:29:30 +0200 Subject: [PATCH 086/269] x86/microcode: Update the new microcode revision unconditionally Handle the case where microcode gets loaded on the BSP's hyperthread sibling first and the boot_cpu_data's microcode revision doesn't get updated because of early exit due to the siblings sharing a microcode engine. For that, simply write the updated revision on all CPUs unconditionally. Signed-off-by: Filippo Sironi Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Cc: prarit@redhat.com Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/1533050970-14385-1-git-send-email-sironi@amazon.de --- arch/x86/kernel/cpu/microcode/amd.c | 22 +++++++++++++--------- arch/x86/kernel/cpu/microcode/intel.c | 13 ++++++++----- 2 files changed, 21 insertions(+), 14 deletions(-) diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 602f17134103..07b5fc00b188 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -504,6 +504,7 @@ static enum ucode_state apply_microcode_amd(int cpu) struct microcode_amd *mc_amd; struct ucode_cpu_info *uci; struct ucode_patch *p; + enum ucode_state ret; u32 rev, dummy; BUG_ON(raw_smp_processor_id() != cpu); @@ -521,9 +522,8 @@ static enum ucode_state apply_microcode_amd(int cpu) /* need to apply patch? */ if (rev >= mc_amd->hdr.patch_id) { - c->microcode = rev; - uci->cpu_sig.rev = rev; - return UCODE_OK; + ret = UCODE_OK; + goto out; } if (__apply_microcode_amd(mc_amd)) { @@ -531,17 +531,21 @@ static enum ucode_state apply_microcode_amd(int cpu) cpu, mc_amd->hdr.patch_id); return UCODE_ERROR; } - pr_info("CPU%d: new patch_level=0x%08x\n", cpu, - mc_amd->hdr.patch_id); - uci->cpu_sig.rev = mc_amd->hdr.patch_id; - c->microcode = mc_amd->hdr.patch_id; + rev = mc_amd->hdr.patch_id; + ret = UCODE_UPDATED; + + pr_info("CPU%d: new patch_level=0x%08x\n", cpu, rev); + +out: + uci->cpu_sig.rev = rev; + c->microcode = rev; /* Update boot_cpu_data's revision too, if we're on the BSP: */ if (c->cpu_index == boot_cpu_data.cpu_index) - boot_cpu_data.microcode = mc_amd->hdr.patch_id; + boot_cpu_data.microcode = rev; - return UCODE_UPDATED; + return ret; } static int install_equiv_cpu_table(const u8 *buf) diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 256d336cbc04..16936a24795c 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -795,6 +795,7 @@ static enum ucode_state apply_microcode_intel(int cpu) struct ucode_cpu_info *uci = ucode_cpu_info + cpu; struct cpuinfo_x86 *c = &cpu_data(cpu); struct microcode_intel *mc; + enum ucode_state ret; static int prev_rev; u32 rev; @@ -817,9 +818,8 @@ static enum ucode_state apply_microcode_intel(int cpu) */ rev = intel_get_microcode_revision(); if (rev >= mc->hdr.rev) { - uci->cpu_sig.rev = rev; - c->microcode = rev; - return UCODE_OK; + ret = UCODE_OK; + goto out; } /* @@ -848,14 +848,17 @@ static enum ucode_state apply_microcode_intel(int cpu) prev_rev = rev; } + ret = UCODE_UPDATED; + +out: uci->cpu_sig.rev = rev; - c->microcode = rev; + c->microcode = rev; /* Update boot_cpu_data's revision too, if we're on the BSP: */ if (c->cpu_index == boot_cpu_data.cpu_index) boot_cpu_data.microcode = rev; - return UCODE_UPDATED; + return ret; } static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, From fd65465b7016dc6d9fa5c2f39cc706c231c9a089 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 31 Aug 2018 18:34:55 +0900 Subject: [PATCH 087/269] kconfig: do not require pkg-config on make {menu,n}config Meelis Roos reported a {menu,n}config regression: "I have libncurses devel package installed in the default system location (as do 99%+ on actual developers probably) and in this case, pkg-config is useless. pkg-config is needed only when libraries and headers are installed in non-default locations but it is bad to require installation of pkg-config on all the machines where make menuconfig would be possibly run." For {menu,n}config, do not use pkg-config if it is not installed. For {g,x}config, keep checking pkg-config since we really rely on it for finding the installation paths of the required packages. Fixes: 4ab3b80159d4 ("kconfig: check for pkg-config on make {menu,n,g,x}config") Reported-by: Meelis Roos Signed-off-by: Masahiro Yamada Tested-by: Meelis Roos Tested-by: Randy Dunlap --- Documentation/process/changes.rst | 2 +- scripts/kconfig/Makefile | 1 - scripts/kconfig/check-pkgconfig.sh | 8 -------- scripts/kconfig/gconf-cfg.sh | 7 +++++++ scripts/kconfig/mconf-cfg.sh | 27 +++++++++++++++------------ scripts/kconfig/nconf-cfg.sh | 27 +++++++++++++++------------ scripts/kconfig/qconf-cfg.sh | 7 +++++++ 7 files changed, 45 insertions(+), 34 deletions(-) delete mode 100644 scripts/kconfig/check-pkgconfig.sh diff --git a/Documentation/process/changes.rst b/Documentation/process/changes.rst index 61f918b10a0c..d1bf143b446f 100644 --- a/Documentation/process/changes.rst +++ b/Documentation/process/changes.rst @@ -86,7 +86,7 @@ pkg-config The build system, as of 4.18, requires pkg-config to check for installed kconfig tools and to determine flags settings for use in -'make {menu,n,g,x}config'. Previously pkg-config was being used but not +'make {g,x}config'. Previously pkg-config was being used but not verified or documented. Flex diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile index 4a7bd2192073..67ed9f6ccdf8 100644 --- a/scripts/kconfig/Makefile +++ b/scripts/kconfig/Makefile @@ -221,7 +221,6 @@ $(obj)/zconf.tab.o: $(obj)/zconf.lex.c # check if necessary packages are available, and configure build flags define filechk_conf_cfg - $(CONFIG_SHELL) $(srctree)/scripts/kconfig/check-pkgconfig.sh; \ $(CONFIG_SHELL) $< endef diff --git a/scripts/kconfig/check-pkgconfig.sh b/scripts/kconfig/check-pkgconfig.sh deleted file mode 100644 index 7a1c40bfb58c..000000000000 --- a/scripts/kconfig/check-pkgconfig.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0 -# Check for pkg-config presence - -if [ -z $(command -v pkg-config) ]; then - echo "'make *config' requires 'pkg-config'. Please install it." 1>&2 - exit 1 -fi diff --git a/scripts/kconfig/gconf-cfg.sh b/scripts/kconfig/gconf-cfg.sh index 533b3d8f8f08..480ecd8b9f41 100755 --- a/scripts/kconfig/gconf-cfg.sh +++ b/scripts/kconfig/gconf-cfg.sh @@ -3,6 +3,13 @@ PKG="gtk+-2.0 gmodule-2.0 libglade-2.0" +if [ -z "$(command -v pkg-config)" ]; then + echo >&2 "*" + echo >&2 "* 'make gconfig' requires 'pkg-config'. Please install it." + echo >&2 "*" + exit 1 +fi + if ! pkg-config --exists $PKG; then echo >&2 "*" echo >&2 "* Unable to find the GTK+ installation. Please make sure that" diff --git a/scripts/kconfig/mconf-cfg.sh b/scripts/kconfig/mconf-cfg.sh index e6f9facd0077..c812872d7f9d 100755 --- a/scripts/kconfig/mconf-cfg.sh +++ b/scripts/kconfig/mconf-cfg.sh @@ -4,20 +4,23 @@ PKG="ncursesw" PKG2="ncurses" -if pkg-config --exists $PKG; then - echo cflags=\"$(pkg-config --cflags $PKG)\" - echo libs=\"$(pkg-config --libs $PKG)\" - exit 0 +if [ -n "$(command -v pkg-config)" ]; then + if pkg-config --exists $PKG; then + echo cflags=\"$(pkg-config --cflags $PKG)\" + echo libs=\"$(pkg-config --libs $PKG)\" + exit 0 + fi + + if pkg-config --exists $PKG2; then + echo cflags=\"$(pkg-config --cflags $PKG2)\" + echo libs=\"$(pkg-config --libs $PKG2)\" + exit 0 + fi fi -if pkg-config --exists $PKG2; then - echo cflags=\"$(pkg-config --cflags $PKG2)\" - echo libs=\"$(pkg-config --libs $PKG2)\" - exit 0 -fi - -# Unfortunately, some distributions (e.g. openSUSE) cannot find ncurses -# by pkg-config. +# Check the default paths in case pkg-config is not installed. +# (Even if it is installed, some distributions such as openSUSE cannot +# find ncurses by pkg-config.) if [ -f /usr/include/ncursesw/ncurses.h ]; then echo cflags=\"-D_GNU_SOURCE -I/usr/include/ncursesw\" echo libs=\"-lncursesw\" diff --git a/scripts/kconfig/nconf-cfg.sh b/scripts/kconfig/nconf-cfg.sh index 42f5ac73548e..001559ef0a60 100644 --- a/scripts/kconfig/nconf-cfg.sh +++ b/scripts/kconfig/nconf-cfg.sh @@ -4,20 +4,23 @@ PKG="ncursesw menuw panelw" PKG2="ncurses menu panel" -if pkg-config --exists $PKG; then - echo cflags=\"$(pkg-config --cflags $PKG)\" - echo libs=\"$(pkg-config --libs $PKG)\" - exit 0 +if [ -n "$(command -v pkg-config)" ]; then + if pkg-config --exists $PKG; then + echo cflags=\"$(pkg-config --cflags $PKG)\" + echo libs=\"$(pkg-config --libs $PKG)\" + exit 0 + fi + + if pkg-config --exists $PKG2; then + echo cflags=\"$(pkg-config --cflags $PKG2)\" + echo libs=\"$(pkg-config --libs $PKG2)\" + exit 0 + fi fi -if pkg-config --exists $PKG2; then - echo cflags=\"$(pkg-config --cflags $PKG2)\" - echo libs=\"$(pkg-config --libs $PKG2)\" - exit 0 -fi - -# Unfortunately, some distributions (e.g. openSUSE) cannot find ncurses -# by pkg-config. +# Check the default paths in case pkg-config is not installed. +# (Even if it is installed, some distributions such as openSUSE cannot +# find ncurses by pkg-config.) if [ -f /usr/include/ncursesw/ncurses.h ]; then echo cflags=\"-D_GNU_SOURCE -I/usr/include/ncursesw\" echo libs=\"-lncursesw -lmenuw -lpanelw\" diff --git a/scripts/kconfig/qconf-cfg.sh b/scripts/kconfig/qconf-cfg.sh index 0862e1562536..02ccc0ae1031 100755 --- a/scripts/kconfig/qconf-cfg.sh +++ b/scripts/kconfig/qconf-cfg.sh @@ -4,6 +4,13 @@ PKG="Qt5Core Qt5Gui Qt5Widgets" PKG2="QtCore QtGui" +if [ -z "$(command -v pkg-config)" ]; then + echo >&2 "*" + echo >&2 "* 'make xconfig' requires 'pkg-config'. Please install it." + echo >&2 "*" + exit 1 +fi + if pkg-config --exists $PKG; then echo cflags=\"-std=c++11 -fPIC $(pkg-config --cflags Qt5Core Qt5Gui Qt5Widgets)\" echo libs=\"$(pkg-config --libs $PKG)\" From 914b087ff9e0e9a399a4927fa30793064afc0178 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 28 Aug 2018 12:59:10 -0700 Subject: [PATCH 088/269] kbuild: make missing $DEPMOD a Warning instead of an Error When $DEPMOD is not found, only print a warning instead of exiting with an error message and error status: Warning: 'make modules_install' requires /sbin/depmod. Please install it. This is probably in the kmod package. Change the Error to a Warning because "not all build hosts for cross compiling Linux are Linux systems and are able to provide a working port of depmod, especially at the file patch /sbin/depmod." I.e., "make modules_install" may be used to copy/install the loadable modules files to a target directory on a build system and then transferred to an embedded device where /sbin/depmod is run instead of it being run on the build system. Fixes: 934193a654c1 ("kbuild: verify that $DEPMOD is installed") Signed-off-by: Randy Dunlap Reported-by: H. Nikolaus Schaller Cc: stable@vger.kernel.org Cc: Lucas De Marchi Cc: Lucas De Marchi Cc: Michal Marek Cc: Jessica Yu Cc: Chih-Wei Huang Signed-off-by: Masahiro Yamada --- scripts/depmod.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/depmod.sh b/scripts/depmod.sh index 999d585eaa73..e5f0aad75b96 100755 --- a/scripts/depmod.sh +++ b/scripts/depmod.sh @@ -15,9 +15,9 @@ if ! test -r System.map ; then fi if [ -z $(command -v $DEPMOD) ]; then - echo "'make modules_install' requires $DEPMOD. Please install it." >&2 + echo "Warning: 'make modules_install' requires $DEPMOD. Please install it." >&2 echo "This is probably in the kmod package." >&2 - exit 1 + exit 0 fi # older versions of depmod require the version string to start with three From 38f5d8d8cbb2ffa2b54315118185332329ec891c Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 31 Aug 2018 23:30:47 +0900 Subject: [PATCH 089/269] i2c: uniphier: issue STOP only for last message or I2C_M_STOP This driver currently emits a STOP if the next message is not I2C_MD_RD. It should not do it because it disturbs the I2C_RDWR ioctl, where read/write transactions are combined without STOP between. Issue STOP only when the message is the last one _or_ flagged with I2C_M_STOP. Fixes: dd6fd4a32793 ("i2c: uniphier: add UniPhier FIFO-less I2C driver") Signed-off-by: Masahiro Yamada Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-uniphier.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/i2c/busses/i2c-uniphier.c b/drivers/i2c/busses/i2c-uniphier.c index bb181b088291..454f914ae66d 100644 --- a/drivers/i2c/busses/i2c-uniphier.c +++ b/drivers/i2c/busses/i2c-uniphier.c @@ -248,11 +248,8 @@ static int uniphier_i2c_master_xfer(struct i2c_adapter *adap, return ret; for (msg = msgs; msg < emsg; msg++) { - /* If next message is read, skip the stop condition */ - bool stop = !(msg + 1 < emsg && msg[1].flags & I2C_M_RD); - /* but, force it if I2C_M_STOP is set */ - if (msg->flags & I2C_M_STOP) - stop = true; + /* Emit STOP if it is the last message or I2C_M_STOP is set. */ + bool stop = (msg + 1 == emsg) || (msg->flags & I2C_M_STOP); ret = uniphier_i2c_master_xfer_one(adap, msg, stop); if (ret) From 4c85609b08c4761eca0a40fd7beb06bc650f252d Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 31 Aug 2018 23:30:48 +0900 Subject: [PATCH 090/269] i2c: uniphier-f: issue STOP only for last message or I2C_M_STOP This driver currently emits a STOP if the next message is not I2C_MD_RD. It should not do it because it disturbs the I2C_RDWR ioctl, where read/write transactions are combined without STOP between. Issue STOP only when the message is the last one _or_ flagged with I2C_M_STOP. Fixes: 6a62974b667f ("i2c: uniphier_f: add UniPhier FIFO-builtin I2C driver") Signed-off-by: Masahiro Yamada Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-uniphier-f.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/i2c/busses/i2c-uniphier-f.c b/drivers/i2c/busses/i2c-uniphier-f.c index 9918bdd81619..a403e8579b65 100644 --- a/drivers/i2c/busses/i2c-uniphier-f.c +++ b/drivers/i2c/busses/i2c-uniphier-f.c @@ -401,11 +401,8 @@ static int uniphier_fi2c_master_xfer(struct i2c_adapter *adap, return ret; for (msg = msgs; msg < emsg; msg++) { - /* If next message is read, skip the stop condition */ - bool stop = !(msg + 1 < emsg && msg[1].flags & I2C_M_RD); - /* but, force it if I2C_M_STOP is set */ - if (msg->flags & I2C_M_STOP) - stop = true; + /* Emit STOP if it is the last message or I2C_M_STOP is set. */ + bool stop = (msg + 1 == emsg) || (msg->flags & I2C_M_STOP); ret = uniphier_fi2c_master_xfer_one(adap, msg, stop); if (ret) From f6eb89349078b2ca3e42dbb272ab444bab1b9f42 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Fri, 31 Aug 2018 10:24:14 -0300 Subject: [PATCH 091/269] dt-bindings: imx-lpi2c: Remove mx8dv compatible entry mx8dv never entered into production and there is no other place in the kernel referring to this SoC, so remove it from the dt bindings documentation. Signed-off-by: Fabio Estevam Signed-off-by: Wolfram Sang --- Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.txt b/Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.txt index 00e4365d7206..091c8dfd3229 100644 --- a/Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.txt +++ b/Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.txt @@ -3,7 +3,6 @@ Required properties: - compatible : - "fsl,imx7ulp-lpi2c" for LPI2C compatible with the one integrated on i.MX7ULP soc - - "fsl,imx8dv-lpi2c" for LPI2C compatible with the one integrated on i.MX8DV soc - reg : address and length of the lpi2c master registers - interrupts : lpi2c interrupt - clocks : lpi2c clock specifier @@ -11,7 +10,7 @@ Required properties: Examples: lpi2c7: lpi2c7@40a50000 { - compatible = "fsl,imx8dv-lpi2c"; + compatible = "fsl,imx7ulp-lpi2c"; reg = <0x40A50000 0x10000>; interrupt-parent = <&intc>; interrupts = ; From 20fdcd760a63ea66335c58bceb175e7712ab18ff Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Fri, 31 Aug 2018 10:24:15 -0300 Subject: [PATCH 092/269] i2c: imx-lpi2c: Remove mx8dv compatible entry mx8dv never entered into production and there is no other place in the kernel referring to this SoC, so remove it from the driver's compatible entry. Signed-off-by: Fabio Estevam Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-imx-lpi2c.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-imx-lpi2c.c b/drivers/i2c/busses/i2c-imx-lpi2c.c index 6d975f5221ca..06c4c767af32 100644 --- a/drivers/i2c/busses/i2c-imx-lpi2c.c +++ b/drivers/i2c/busses/i2c-imx-lpi2c.c @@ -538,7 +538,6 @@ static const struct i2c_algorithm lpi2c_imx_algo = { static const struct of_device_id lpi2c_imx_of_match[] = { { .compatible = "fsl,imx7ulp-lpi2c" }, - { .compatible = "fsl,imx8dv-lpi2c" }, { }, }; MODULE_DEVICE_TABLE(of, lpi2c_imx_of_match); From c90bbce9eedd14f9428388d9442b5b3e98a2a6dd Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Mon, 20 Aug 2018 08:25:37 +0300 Subject: [PATCH 093/269] m68k: fix early memory reservation for ColdFire MMU systems The bootmem to memblock conversion introduced by the commit 1008a11590b9 ("m68k: switch to MEMBLOCK + NO_BOOTMEM") made reservation of kernel code and data to start from a wrong address. Fix it. Signed-off-by: Mike Rapoport Tested-by: Angelo Dureghello Signed-off-by: Greg Ungerer --- arch/m68k/mm/mcfmmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/m68k/mm/mcfmmu.c b/arch/m68k/mm/mcfmmu.c index 70dde040779b..f5453d944ff5 100644 --- a/arch/m68k/mm/mcfmmu.c +++ b/arch/m68k/mm/mcfmmu.c @@ -172,7 +172,7 @@ void __init cf_bootmem_alloc(void) high_memory = (void *)_ramend; /* Reserve kernel text/data/bss */ - memblock_reserve(memstart, memstart - _rambase); + memblock_reserve(_rambase, memstart - _rambase); m68k_virt_to_node_shift = fls(_ramend - 1) - 6; module_fixup(NULL, __start_fixup, __stop_fixup); From c15e3f19a6d5c89b1209dc94b40e568177cb0921 Mon Sep 17 00:00:00 2001 From: Jon Kuhn Date: Mon, 9 Jul 2018 14:33:14 +0000 Subject: [PATCH 094/269] fs/cifs: don't translate SFM_SLASH (U+F026) to backslash When a Mac client saves an item containing a backslash to a file server the backslash is represented in the CIFS/SMB protocol as as U+F026. Before this change, listing a directory containing an item with a backslash in its name will return that item with the backslash represented with a true backslash character (U+005C) because convert_sfm_character mapped U+F026 to U+005C when interpretting the CIFS/SMB protocol response. However, attempting to open or stat the path using a true backslash will result in an error because convert_to_sfm_char does not map U+005C back to U+F026 causing the CIFS/SMB request to be made with the backslash represented as U+005C. This change simply prevents the U+F026 to U+005C conversion from happenning. This is analogous to how the code does not do any translation of UNI_SLASH (U+F000). Signed-off-by: Jon Kuhn Signed-off-by: Steve French --- fs/cifs/cifs_unicode.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c index b380e0871372..a2b2355e7f01 100644 --- a/fs/cifs/cifs_unicode.c +++ b/fs/cifs/cifs_unicode.c @@ -105,9 +105,6 @@ convert_sfm_char(const __u16 src_char, char *target) case SFM_LESSTHAN: *target = '<'; break; - case SFM_SLASH: - *target = '\\'; - break; case SFM_SPACE: *target = ' '; break; From 5e19697b56a64004e2d0ff1bb952ea05493c088f Mon Sep 17 00:00:00 2001 From: Steve French Date: Mon, 27 Aug 2018 17:04:13 -0500 Subject: [PATCH 095/269] SMB3: Backup intent flag missing for directory opens with backupuid mounts When "backup intent" is requested on the mount (e.g. backupuid or backupgid mount options), the corresponding flag needs to be set on opens of directories (and files) but was missing in some places causing access denied trying to enumerate and backup servers. Fixes kernel bugzilla #200953 https://bugzilla.kernel.org/show_bug.cgi?id=200953 Reported-and-tested-by: Signed-off-by: Steve French CC: Stable Reviewed-by: Pavel Shilovsky --- fs/cifs/inode.c | 2 ++ fs/cifs/smb2ops.c | 25 ++++++++++++++++++++----- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index d32eaa4b2437..6e8765f44508 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -467,6 +467,8 @@ cifs_sfu_type(struct cifs_fattr *fattr, const char *path, oparms.cifs_sb = cifs_sb; oparms.desired_access = GENERIC_READ; oparms.create_options = CREATE_NOT_DIR; + if (backup_cred(cifs_sb)) + oparms.create_options |= CREATE_OPEN_BACKUP_INTENT; oparms.disposition = FILE_OPEN; oparms.path = path; oparms.fid = &fid; diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 247a98e6c856..b6fada244527 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -630,7 +630,10 @@ smb2_is_path_accessible(const unsigned int xid, struct cifs_tcon *tcon, oparms.tcon = tcon; oparms.desired_access = FILE_READ_ATTRIBUTES; oparms.disposition = FILE_OPEN; - oparms.create_options = 0; + if (backup_cred(cifs_sb)) + oparms.create_options = CREATE_OPEN_BACKUP_INTENT; + else + oparms.create_options = 0; oparms.fid = &fid; oparms.reconnect = false; @@ -779,7 +782,10 @@ smb2_query_eas(const unsigned int xid, struct cifs_tcon *tcon, oparms.tcon = tcon; oparms.desired_access = FILE_READ_EA; oparms.disposition = FILE_OPEN; - oparms.create_options = 0; + if (backup_cred(cifs_sb)) + oparms.create_options = CREATE_OPEN_BACKUP_INTENT; + else + oparms.create_options = 0; oparms.fid = &fid; oparms.reconnect = false; @@ -858,7 +864,10 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon, oparms.tcon = tcon; oparms.desired_access = FILE_WRITE_EA; oparms.disposition = FILE_OPEN; - oparms.create_options = 0; + if (backup_cred(cifs_sb)) + oparms.create_options = CREATE_OPEN_BACKUP_INTENT; + else + oparms.create_options = 0; oparms.fid = &fid; oparms.reconnect = false; @@ -1453,7 +1462,10 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon, oparms.tcon = tcon; oparms.desired_access = FILE_READ_ATTRIBUTES | FILE_READ_DATA; oparms.disposition = FILE_OPEN; - oparms.create_options = 0; + if (backup_cred(cifs_sb)) + oparms.create_options = CREATE_OPEN_BACKUP_INTENT; + else + oparms.create_options = 0; oparms.fid = fid; oparms.reconnect = false; @@ -1857,7 +1869,10 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon, oparms.tcon = tcon; oparms.desired_access = FILE_READ_ATTRIBUTES; oparms.disposition = FILE_OPEN; - oparms.create_options = 0; + if (backup_cred(cifs_sb)) + oparms.create_options = CREATE_OPEN_BACKUP_INTENT; + else + oparms.create_options = 0; oparms.fid = &fid; oparms.reconnect = false; From 25f2573512d7b38bca4c0878109db9600b8b711f Mon Sep 17 00:00:00 2001 From: Steve French Date: Wed, 29 Aug 2018 09:22:22 -0500 Subject: [PATCH 096/269] smb3: minor debugging clarifications in rfc1001 len processing I ran into some cases where server was returning the wrong length on frames but I couldn't easily match them to the command in the network trace (or server logs) since I need the command and/or multiplex id to find the offending SMB2/SMB3 command. Add these two fields to the log message. In the case of padding too much it may not be a problem in all cases but might have correlated to a network disconnect case in some problems we have been looking at. In the case of frame too short is even more important. Signed-off-by: Steve French Reviewed-by: Ronnie Sahlberg --- fs/cifs/smb2misc.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index db0453660ff6..6a9c47541c53 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -248,16 +248,20 @@ smb2_check_message(char *buf, unsigned int len, struct TCP_Server_Info *srvr) * MacOS server pads after SMB2.1 write response with 3 bytes * of junk. Other servers match RFC1001 len to actual * SMB2/SMB3 frame length (header + smb2 response specific data) - * Some windows servers do too when compounding is used. - * Log the server error (once), but allow it and continue + * Some windows servers also pad up to 8 bytes when compounding. + * If pad is longer than eight bytes, log the server behavior + * (once), since may indicate a problem but allow it and continue * since the frame is parseable. */ if (clc_len < len) { - printk_once(KERN_WARNING - "SMB2 server sent bad RFC1001 len %d not %d\n", - len, clc_len); + pr_warn_once( + "srv rsp padded more than expected. Length %d not %d for cmd:%d mid:%llu\n", + len, clc_len, command, mid); return 0; } + pr_warn_once( + "srv rsp too short, len %d not %d. cmd:%d mid:%llu\n", + len, clc_len, command, mid); return 1; } From f801568332321e2b1e7a8bd26c3e4913a312a2ec Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 31 Aug 2018 15:12:10 -0500 Subject: [PATCH 097/269] smb3: check for and properly advertise directory lease support Although servers will typically ignore unsupported features, we should advertise the support for directory leases (as Windows e.g. does) in the negotiate protocol capabilities we pass to the server, and should check for the server capability (CAP_DIRECTORY_LEASING) before sending a lease request for an open of a directory. This will prevent us from accidentally sending directory leases to SMB2.1 or SMB2 server for example. Signed-off-by: Steve French CC: Stable Reviewed-by: Ronnie Sahlberg --- fs/cifs/smb2ops.c | 10 +++++----- fs/cifs/smb2pdu.c | 3 +++ 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index b6fada244527..d954ce36b473 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -3654,7 +3654,7 @@ struct smb_version_values smb21_values = { struct smb_version_values smb3any_values = { .version_string = SMB3ANY_VERSION_STRING, .protocol_id = SMB302_PROT_ID, /* doesn't matter, send protocol array */ - .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES | SMB2_GLOBAL_CAP_ENCRYPTION, + .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES | SMB2_GLOBAL_CAP_ENCRYPTION | SMB2_GLOBAL_CAP_DIRECTORY_LEASING, .large_lock_type = 0, .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK, .shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK, @@ -3675,7 +3675,7 @@ struct smb_version_values smb3any_values = { struct smb_version_values smbdefault_values = { .version_string = SMBDEFAULT_VERSION_STRING, .protocol_id = SMB302_PROT_ID, /* doesn't matter, send protocol array */ - .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES | SMB2_GLOBAL_CAP_ENCRYPTION, + .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES | SMB2_GLOBAL_CAP_ENCRYPTION | SMB2_GLOBAL_CAP_DIRECTORY_LEASING, .large_lock_type = 0, .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK, .shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK, @@ -3696,7 +3696,7 @@ struct smb_version_values smbdefault_values = { struct smb_version_values smb30_values = { .version_string = SMB30_VERSION_STRING, .protocol_id = SMB30_PROT_ID, - .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES | SMB2_GLOBAL_CAP_ENCRYPTION, + .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES | SMB2_GLOBAL_CAP_ENCRYPTION | SMB2_GLOBAL_CAP_DIRECTORY_LEASING, .large_lock_type = 0, .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK, .shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK, @@ -3717,7 +3717,7 @@ struct smb_version_values smb30_values = { struct smb_version_values smb302_values = { .version_string = SMB302_VERSION_STRING, .protocol_id = SMB302_PROT_ID, - .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES | SMB2_GLOBAL_CAP_ENCRYPTION, + .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES | SMB2_GLOBAL_CAP_ENCRYPTION | SMB2_GLOBAL_CAP_DIRECTORY_LEASING, .large_lock_type = 0, .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK, .shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK, @@ -3738,7 +3738,7 @@ struct smb_version_values smb302_values = { struct smb_version_values smb311_values = { .version_string = SMB311_VERSION_STRING, .protocol_id = SMB311_PROT_ID, - .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES | SMB2_GLOBAL_CAP_ENCRYPTION, + .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES | SMB2_GLOBAL_CAP_ENCRYPTION | SMB2_GLOBAL_CAP_DIRECTORY_LEASING, .large_lock_type = 0, .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK, .shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK, diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 5740aa809be6..c08acfc77abc 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -2178,6 +2178,9 @@ SMB2_open_init(struct cifs_tcon *tcon, struct smb_rqst *rqst, __u8 *oplock, if (!(server->capabilities & SMB2_GLOBAL_CAP_LEASING) || *oplock == SMB2_OPLOCK_LEVEL_NONE) req->RequestedOplockLevel = *oplock; + else if (!(server->capabilities & SMB2_GLOBAL_CAP_DIRECTORY_LEASING) && + (oparms->create_options & CREATE_NOT_FILE)) + req->RequestedOplockLevel = *oplock; /* no srv lease support */ else { rc = add_lease_context(server, iov, &n_iov, oparms->fid->lease_key, oplock); From 395a2076b4064f97d3fce03af15210ff2a7bb7f9 Mon Sep 17 00:00:00 2001 From: Thomas Werschlein Date: Thu, 30 Aug 2018 18:29:20 +0200 Subject: [PATCH 098/269] cifs: connect to servername instead of IP for IPC$ share MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch is required allows access to a Microsoft fileserver failover cluster behind a 1:1 NAT firewall. The change also provides stronger context for authentication and share connection (see MS-SMB2 3.3.5.7 and MS-SRVS 3.1.6.8) as noted by Tom Talpey, and addresses comments about the buffer size for the UNC made by Aurélien Aptel. Signed-off-by: Thomas Werschlein Signed-off-by: Steve French CC: Tom Talpey Reviewed-by: Aurelien Aptel CC: Stable --- fs/cifs/connect.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index c832a8a1970a..7aa08dba4719 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2547,7 +2547,7 @@ cifs_setup_ipc(struct cifs_ses *ses, struct smb_vol *volume_info) if (tcon == NULL) return -ENOMEM; - snprintf(unc, sizeof(unc), "\\\\%s\\IPC$", ses->serverName); + snprintf(unc, sizeof(unc), "\\\\%s\\IPC$", ses->server->hostname); /* cannot fail */ nls_codepage = load_nls_default(); From 54ff01fd0d44b9681615f77c15fe9ea6dfadb501 Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Thu, 30 Aug 2018 11:33:43 +0800 Subject: [PATCH 099/269] drm/i915/gvt: Give new born vGPU higher scheduling chance This trys to give new born vGPU with higher scheduling chance not only with adding to sched list head and also have higher priority for workload sched for 2 seconds after starting to schedule it. In order for fast GPU execution during VM boot, and ensure guest driver setup with required state given in time. This fixes recent failure seen on one VM with multiple linux VMs running on kernel with commit 2621cefaa42b3("drm/i915: Provide a timeout to i915_gem_wait_for_idle() on setup"), which had shorter setup timeout that caused context state init failed. v2: change to 2s for higher scheduling period Cc: Yuan Hang Reviewed-by: Hang Yuan Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/sched_policy.c | 34 ++++++++++++++++++++----- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/sched_policy.c b/drivers/gpu/drm/i915/gvt/sched_policy.c index 985fe81794dd..c32e7d5e8629 100644 --- a/drivers/gpu/drm/i915/gvt/sched_policy.c +++ b/drivers/gpu/drm/i915/gvt/sched_policy.c @@ -47,11 +47,15 @@ static bool vgpu_has_pending_workload(struct intel_vgpu *vgpu) return false; } +/* We give 2 seconds higher prio for vGPU during start */ +#define GVT_SCHED_VGPU_PRI_TIME 2 + struct vgpu_sched_data { struct list_head lru_list; struct intel_vgpu *vgpu; bool active; - + bool pri_sched; + ktime_t pri_time; ktime_t sched_in_time; ktime_t sched_time; ktime_t left_ts; @@ -183,6 +187,14 @@ static struct intel_vgpu *find_busy_vgpu(struct gvt_sched_data *sched_data) if (!vgpu_has_pending_workload(vgpu_data->vgpu)) continue; + if (vgpu_data->pri_sched) { + if (ktime_before(ktime_get(), vgpu_data->pri_time)) { + vgpu = vgpu_data->vgpu; + break; + } else + vgpu_data->pri_sched = false; + } + /* Return the vGPU only if it has time slice left */ if (vgpu_data->left_ts > 0) { vgpu = vgpu_data->vgpu; @@ -202,6 +214,7 @@ static void tbs_sched_func(struct gvt_sched_data *sched_data) struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; struct vgpu_sched_data *vgpu_data; struct intel_vgpu *vgpu = NULL; + /* no active vgpu or has already had a target */ if (list_empty(&sched_data->lru_runq_head) || scheduler->next_vgpu) goto out; @@ -209,12 +222,13 @@ static void tbs_sched_func(struct gvt_sched_data *sched_data) vgpu = find_busy_vgpu(sched_data); if (vgpu) { scheduler->next_vgpu = vgpu; - - /* Move the last used vGPU to the tail of lru_list */ vgpu_data = vgpu->sched_data; - list_del_init(&vgpu_data->lru_list); - list_add_tail(&vgpu_data->lru_list, - &sched_data->lru_runq_head); + if (!vgpu_data->pri_sched) { + /* Move the last used vGPU to the tail of lru_list */ + list_del_init(&vgpu_data->lru_list); + list_add_tail(&vgpu_data->lru_list, + &sched_data->lru_runq_head); + } } else { scheduler->next_vgpu = gvt->idle_vgpu; } @@ -328,11 +342,17 @@ static void tbs_sched_start_schedule(struct intel_vgpu *vgpu) { struct gvt_sched_data *sched_data = vgpu->gvt->scheduler.sched_data; struct vgpu_sched_data *vgpu_data = vgpu->sched_data; + ktime_t now; if (!list_empty(&vgpu_data->lru_list)) return; - list_add_tail(&vgpu_data->lru_list, &sched_data->lru_runq_head); + now = ktime_get(); + vgpu_data->pri_time = ktime_add(now, + ktime_set(GVT_SCHED_VGPU_PRI_TIME, 0)); + vgpu_data->pri_sched = true; + + list_add(&vgpu_data->lru_list, &sched_data->lru_runq_head); if (!hrtimer_active(&sched_data->timer)) hrtimer_start(&sched_data->timer, ktime_add_ns(ktime_get(), From 4331f4d5ada5684fc77fa16e3f6177f077c9e6ec Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 2 Sep 2018 19:30:53 -0700 Subject: [PATCH 100/269] x86: Fix kernel-doc atomic.h warnings Fix kernel-doc warnings in arch/x86/include/asm/atomic.h that are caused by having a #define macro between the kernel-doc notation and the function name. Fixed by moving the #define macro to after the function implementation. Make the same change for atomic64_{32,64}.h for consistency even though there were no kernel-doc warnings found in these header files, but there would be if they were used in generation of documentation. Fixes these kernel-doc warnings: ../arch/x86/include/asm/atomic.h:84: warning: Excess function parameter 'i' description in 'arch_atomic_sub_and_test' ../arch/x86/include/asm/atomic.h:84: warning: Excess function parameter 'v' description in 'arch_atomic_sub_and_test' ../arch/x86/include/asm/atomic.h:96: warning: Excess function parameter 'v' description in 'arch_atomic_inc' ../arch/x86/include/asm/atomic.h:109: warning: Excess function parameter 'v' description in 'arch_atomic_dec' ../arch/x86/include/asm/atomic.h:124: warning: Excess function parameter 'v' description in 'arch_atomic_dec_and_test' ../arch/x86/include/asm/atomic.h:138: warning: Excess function parameter 'v' description in 'arch_atomic_inc_and_test' ../arch/x86/include/asm/atomic.h:153: warning: Excess function parameter 'i' description in 'arch_atomic_add_negative' ../arch/x86/include/asm/atomic.h:153: warning: Excess function parameter 'v' description in 'arch_atomic_add_negative' Fixes: 18cc1814d4e7 ("atomics/treewide: Make test ops optional") Signed-off-by: Randy Dunlap Signed-off-by: Thomas Gleixner Cc: Mark Rutland Link: https://lkml.kernel.org/r/0a1e678d-c8c5-b32c-2640-ed4e94d399d2@infradead.org --- arch/x86/include/asm/atomic.h | 12 ++++++------ arch/x86/include/asm/atomic64_32.h | 8 ++++---- arch/x86/include/asm/atomic64_64.h | 12 ++++++------ 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index b143717b92b3..ce84388e540c 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -80,11 +80,11 @@ static __always_inline void arch_atomic_sub(int i, atomic_t *v) * true if the result is zero, or false for all * other cases. */ -#define arch_atomic_sub_and_test arch_atomic_sub_and_test static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v) { GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, "er", i, "%0", e); } +#define arch_atomic_sub_and_test arch_atomic_sub_and_test /** * arch_atomic_inc - increment atomic variable @@ -92,12 +92,12 @@ static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v) * * Atomically increments @v by 1. */ -#define arch_atomic_inc arch_atomic_inc static __always_inline void arch_atomic_inc(atomic_t *v) { asm volatile(LOCK_PREFIX "incl %0" : "+m" (v->counter)); } +#define arch_atomic_inc arch_atomic_inc /** * arch_atomic_dec - decrement atomic variable @@ -105,12 +105,12 @@ static __always_inline void arch_atomic_inc(atomic_t *v) * * Atomically decrements @v by 1. */ -#define arch_atomic_dec arch_atomic_dec static __always_inline void arch_atomic_dec(atomic_t *v) { asm volatile(LOCK_PREFIX "decl %0" : "+m" (v->counter)); } +#define arch_atomic_dec arch_atomic_dec /** * arch_atomic_dec_and_test - decrement and test @@ -120,11 +120,11 @@ static __always_inline void arch_atomic_dec(atomic_t *v) * returns true if the result is 0, or false for all other * cases. */ -#define arch_atomic_dec_and_test arch_atomic_dec_and_test static __always_inline bool arch_atomic_dec_and_test(atomic_t *v) { GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", e); } +#define arch_atomic_dec_and_test arch_atomic_dec_and_test /** * arch_atomic_inc_and_test - increment and test @@ -134,11 +134,11 @@ static __always_inline bool arch_atomic_dec_and_test(atomic_t *v) * and returns true if the result is zero, or false for all * other cases. */ -#define arch_atomic_inc_and_test arch_atomic_inc_and_test static __always_inline bool arch_atomic_inc_and_test(atomic_t *v) { GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, "%0", e); } +#define arch_atomic_inc_and_test arch_atomic_inc_and_test /** * arch_atomic_add_negative - add and test if negative @@ -149,11 +149,11 @@ static __always_inline bool arch_atomic_inc_and_test(atomic_t *v) * if the result is negative, or false when * result is greater than or equal to zero. */ -#define arch_atomic_add_negative arch_atomic_add_negative static __always_inline bool arch_atomic_add_negative(int i, atomic_t *v) { GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, "er", i, "%0", s); } +#define arch_atomic_add_negative arch_atomic_add_negative /** * arch_atomic_add_return - add integer and return diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h index ef959f02d070..6a5b0ec460da 100644 --- a/arch/x86/include/asm/atomic64_32.h +++ b/arch/x86/include/asm/atomic64_32.h @@ -205,12 +205,12 @@ static inline long long arch_atomic64_sub(long long i, atomic64_t *v) * * Atomically increments @v by 1. */ -#define arch_atomic64_inc arch_atomic64_inc static inline void arch_atomic64_inc(atomic64_t *v) { __alternative_atomic64(inc, inc_return, /* no output */, "S" (v) : "memory", "eax", "ecx", "edx"); } +#define arch_atomic64_inc arch_atomic64_inc /** * arch_atomic64_dec - decrement atomic64 variable @@ -218,12 +218,12 @@ static inline void arch_atomic64_inc(atomic64_t *v) * * Atomically decrements @v by 1. */ -#define arch_atomic64_dec arch_atomic64_dec static inline void arch_atomic64_dec(atomic64_t *v) { __alternative_atomic64(dec, dec_return, /* no output */, "S" (v) : "memory", "eax", "ecx", "edx"); } +#define arch_atomic64_dec arch_atomic64_dec /** * arch_atomic64_add_unless - add unless the number is a given value @@ -245,7 +245,6 @@ static inline int arch_atomic64_add_unless(atomic64_t *v, long long a, return (int)a; } -#define arch_atomic64_inc_not_zero arch_atomic64_inc_not_zero static inline int arch_atomic64_inc_not_zero(atomic64_t *v) { int r; @@ -253,8 +252,8 @@ static inline int arch_atomic64_inc_not_zero(atomic64_t *v) "S" (v) : "ecx", "edx", "memory"); return r; } +#define arch_atomic64_inc_not_zero arch_atomic64_inc_not_zero -#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive static inline long long arch_atomic64_dec_if_positive(atomic64_t *v) { long long r; @@ -262,6 +261,7 @@ static inline long long arch_atomic64_dec_if_positive(atomic64_t *v) "S" (v) : "ecx", "memory"); return r; } +#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive #undef alternative_atomic64 #undef __alternative_atomic64 diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h index 4343d9b4f30e..5f851d92eecd 100644 --- a/arch/x86/include/asm/atomic64_64.h +++ b/arch/x86/include/asm/atomic64_64.h @@ -71,11 +71,11 @@ static inline void arch_atomic64_sub(long i, atomic64_t *v) * true if the result is zero, or false for all * other cases. */ -#define arch_atomic64_sub_and_test arch_atomic64_sub_and_test static inline bool arch_atomic64_sub_and_test(long i, atomic64_t *v) { GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, "er", i, "%0", e); } +#define arch_atomic64_sub_and_test arch_atomic64_sub_and_test /** * arch_atomic64_inc - increment atomic64 variable @@ -83,13 +83,13 @@ static inline bool arch_atomic64_sub_and_test(long i, atomic64_t *v) * * Atomically increments @v by 1. */ -#define arch_atomic64_inc arch_atomic64_inc static __always_inline void arch_atomic64_inc(atomic64_t *v) { asm volatile(LOCK_PREFIX "incq %0" : "=m" (v->counter) : "m" (v->counter)); } +#define arch_atomic64_inc arch_atomic64_inc /** * arch_atomic64_dec - decrement atomic64 variable @@ -97,13 +97,13 @@ static __always_inline void arch_atomic64_inc(atomic64_t *v) * * Atomically decrements @v by 1. */ -#define arch_atomic64_dec arch_atomic64_dec static __always_inline void arch_atomic64_dec(atomic64_t *v) { asm volatile(LOCK_PREFIX "decq %0" : "=m" (v->counter) : "m" (v->counter)); } +#define arch_atomic64_dec arch_atomic64_dec /** * arch_atomic64_dec_and_test - decrement and test @@ -113,11 +113,11 @@ static __always_inline void arch_atomic64_dec(atomic64_t *v) * returns true if the result is 0, or false for all other * cases. */ -#define arch_atomic64_dec_and_test arch_atomic64_dec_and_test static inline bool arch_atomic64_dec_and_test(atomic64_t *v) { GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, "%0", e); } +#define arch_atomic64_dec_and_test arch_atomic64_dec_and_test /** * arch_atomic64_inc_and_test - increment and test @@ -127,11 +127,11 @@ static inline bool arch_atomic64_dec_and_test(atomic64_t *v) * and returns true if the result is zero, or false for all * other cases. */ -#define arch_atomic64_inc_and_test arch_atomic64_inc_and_test static inline bool arch_atomic64_inc_and_test(atomic64_t *v) { GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, "%0", e); } +#define arch_atomic64_inc_and_test arch_atomic64_inc_and_test /** * arch_atomic64_add_negative - add and test if negative @@ -142,11 +142,11 @@ static inline bool arch_atomic64_inc_and_test(atomic64_t *v) * if the result is negative, or false when * result is greater than or equal to zero. */ -#define arch_atomic64_add_negative arch_atomic64_add_negative static inline bool arch_atomic64_add_negative(long i, atomic64_t *v) { GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, "er", i, "%0", s); } +#define arch_atomic64_add_negative arch_atomic64_add_negative /** * arch_atomic64_add_return - add and return From c43c5e9f524ec914e7e202f9c5ab91779629ccc6 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Mon, 3 Sep 2018 10:15:33 +0200 Subject: [PATCH 101/269] timekeeping: Fix declaration of read_persistent_wall_and_boot_offset() It is read_persistent_wall_and_boot_offset() and not read_persistent_clock_and_boot_offset() Fixes: 3eca993740b8eb40f51 ("timekeeping: Replace read_boot_clock64() with read_persistent_wall_and_boot_offset()") Signed-off-by: Christian Borntraeger Signed-off-by: Thomas Gleixner Cc: Pavel Tatashin Link: https://lkml.kernel.org/r/20180903081533.34366-1-borntraeger@de.ibm.com --- include/linux/timekeeping.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 5d738804e3d6..a5a3cfc3c2fa 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -258,8 +258,8 @@ extern void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot); extern int persistent_clock_is_local; extern void read_persistent_clock64(struct timespec64 *ts); -void read_persistent_clock_and_boot_offset(struct timespec64 *wall_clock, - struct timespec64 *boot_offset); +void read_persistent_wall_and_boot_offset(struct timespec64 *wall_clock, + struct timespec64 *boot_offset); extern int update_persistent_clock64(struct timespec64 now); /* From 9bdda4e9cf2dcecb60a0683b10ffb8cd7e5f2f45 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sat, 1 Sep 2018 09:40:01 +0300 Subject: [PATCH 102/269] fsnotify: fix ignore mask logic in fsnotify() Commit 92183a42898d ("fsnotify: fix ignore mask logic in send_to_group()") acknoledges the use case of ignoring an event on an inode mark, because of an ignore mask on a mount mark of the same group (i.e. I want to get all events on this file, except for the events that came from that mount). This change depends on correctly merging the inode marks and mount marks group lists, so that the mount mark ignore mask would be tested in send_to_group(). Alas, the merging of the lists did not take into account the case where event in question is not in the mask of any of the mount marks. To fix this, completely remove the tests for inode and mount event masks from the lists merging code. Fixes: 92183a42898d ("fsnotify: fix ignore mask logic in send_to_group") Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- fs/notify/fsnotify.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index f174397b63a0..ababdbfab537 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -351,16 +351,9 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, iter_info.srcu_idx = srcu_read_lock(&fsnotify_mark_srcu); - if ((mask & FS_MODIFY) || - (test_mask & to_tell->i_fsnotify_mask)) { - iter_info.marks[FSNOTIFY_OBJ_TYPE_INODE] = - fsnotify_first_mark(&to_tell->i_fsnotify_marks); - } - - if (mnt && ((mask & FS_MODIFY) || - (test_mask & mnt->mnt_fsnotify_mask))) { - iter_info.marks[FSNOTIFY_OBJ_TYPE_INODE] = - fsnotify_first_mark(&to_tell->i_fsnotify_marks); + iter_info.marks[FSNOTIFY_OBJ_TYPE_INODE] = + fsnotify_first_mark(&to_tell->i_fsnotify_marks); + if (mnt) { iter_info.marks[FSNOTIFY_OBJ_TYPE_VFSMOUNT] = fsnotify_first_mark(&mnt->mnt_fsnotify_marks); } From 5a7b44a8df822e0667fc76ed7130252523993bda Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 3 Sep 2018 15:16:43 +0200 Subject: [PATCH 103/269] ALSA: rawmidi: Initialize allocated buffers syzbot reported the uninitialized value exposure in certain situations using virmidi loop. It's likely a very small race at writing and reading, and the influence is almost negligible. But it's safer to paper over this just by replacing the existing kvmalloc() with kvzalloc(). Reported-by: syzbot+194dffdb8b22fc5d207a@syzkaller.appspotmail.com Signed-off-by: Takashi Iwai --- sound/core/rawmidi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c index 69517e18ef07..08d5662039e3 100644 --- a/sound/core/rawmidi.c +++ b/sound/core/rawmidi.c @@ -129,7 +129,7 @@ static int snd_rawmidi_runtime_create(struct snd_rawmidi_substream *substream) runtime->avail = 0; else runtime->avail = runtime->buffer_size; - runtime->buffer = kvmalloc(runtime->buffer_size, GFP_KERNEL); + runtime->buffer = kvzalloc(runtime->buffer_size, GFP_KERNEL); if (!runtime->buffer) { kfree(runtime); return -ENOMEM; @@ -655,7 +655,7 @@ static int resize_runtime_buffer(struct snd_rawmidi_runtime *runtime, if (params->avail_min < 1 || params->avail_min > params->buffer_size) return -EINVAL; if (params->buffer_size != runtime->buffer_size) { - newbuf = kvmalloc(params->buffer_size, GFP_KERNEL); + newbuf = kvzalloc(params->buffer_size, GFP_KERNEL); if (!newbuf) return -ENOMEM; spin_lock_irq(&runtime->lock); From edf4e7b7b9104b58fddfcd073bd7dcc1585d5326 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Sat, 1 Sep 2018 01:57:52 -0700 Subject: [PATCH 104/269] apparmor: fix bad debug check in apparmor_secid_to_secctx() apparmor_secid_to_secctx() has a bad debug statement tripping on a condition handle by the code. When kconfig SECURITY_APPARMOR_DEBUG is enabled the debug WARN_ON will trip when **secdata is NULL resulting in the following trace. ------------[ cut here ]------------ AppArmor WARN apparmor_secid_to_secctx: ((!secdata)): WARNING: CPU: 0 PID: 14826 at security/apparmor/secid.c:82 apparmor_secid_to_secctx+0x2b5/0x2f0 security/apparmor/secid.c:82 Kernel panic - not syncing: panic_on_warn set ... CPU: 0 PID: 14826 Comm: syz-executor1 Not tainted 4.19.0-rc1+ #193 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1c9/0x2b4 lib/dump_stack.c:113 panic+0x238/0x4e7 kernel/panic.c:184 __warn.cold.8+0x163/0x1ba kernel/panic.c:536 report_bug+0x252/0x2d0 lib/bug.c:186 fixup_bug arch/x86/kernel/traps.c:178 [inline] do_error_trap+0x1fc/0x4d0 arch/x86/kernel/traps.c:296 do_invalid_op+0x1b/0x20 arch/x86/kernel/traps.c:316 invalid_op+0x14/0x20 arch/x86/entry/entry_64.S:993 RIP: 0010:apparmor_secid_to_secctx+0x2b5/0x2f0 security/apparmor/secid.c:82 Code: c7 c7 40 66 58 87 e8 6a 6d 0f fe 0f 0b e9 6c fe ff ff e8 3e aa 44 fe 48 c7 c6 80 67 58 87 48 c7 c7 a0 65 58 87 e8 4b 6d 0f fe <0f> 0b e9 3f fe ff ff 48 89 df e8 fc a7 83 fe e9 ed fe ff ff bb f4 RSP: 0018:ffff8801ba1bed10 EFLAGS: 00010286 RAX: 0000000000000000 RBX: ffff8801ba1beed0 RCX: ffffc9000227e000 RDX: 0000000000018482 RSI: ffffffff8163ac01 RDI: 0000000000000001 RBP: ffff8801ba1bed30 R08: ffff8801b80ec080 R09: ffffed003b603eca R10: ffffed003b603eca R11: ffff8801db01f657 R12: 0000000000000001 R13: 0000000000000000 R14: 0000000000000000 R15: ffff8801ba1beed0 security_secid_to_secctx+0x63/0xc0 security/security.c:1314 ctnetlink_secctx_size net/netfilter/nf_conntrack_netlink.c:621 [inline] ctnetlink_nlmsg_size net/netfilter/nf_conntrack_netlink.c:659 [inline] ctnetlink_conntrack_event+0x303/0x1470 net/netfilter/nf_conntrack_netlink.c:706 nf_conntrack_eventmask_report+0x55f/0x930 net/netfilter/nf_conntrack_ecache.c:151 nf_conntrack_event_report include/net/netfilter/nf_conntrack_ecache.h:112 [inline] nf_ct_delete+0x33c/0x5d0 net/netfilter/nf_conntrack_core.c:601 nf_ct_iterate_cleanup+0x48c/0x5e0 net/netfilter/nf_conntrack_core.c:1892 nf_ct_iterate_cleanup_net+0x23c/0x2d0 net/netfilter/nf_conntrack_core.c:1974 ctnetlink_flush_conntrack net/netfilter/nf_conntrack_netlink.c:1226 [inline] ctnetlink_del_conntrack+0x66c/0x850 net/netfilter/nf_conntrack_netlink.c:1258 nfnetlink_rcv_msg+0xd88/0x1070 net/netfilter/nfnetlink.c:228 netlink_rcv_skb+0x172/0x440 net/netlink/af_netlink.c:2454 nfnetlink_rcv+0x1c0/0x4d0 net/netfilter/nfnetlink.c:560 netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline] netlink_unicast+0x5a0/0x760 net/netlink/af_netlink.c:1343 netlink_sendmsg+0xa18/0xfc0 net/netlink/af_netlink.c:1908 sock_sendmsg_nosec net/socket.c:621 [inline] sock_sendmsg+0xd5/0x120 net/socket.c:631 ___sys_sendmsg+0x7fd/0x930 net/socket.c:2114 __sys_sendmsg+0x11d/0x290 net/socket.c:2152 __do_sys_sendmsg net/socket.c:2161 [inline] __se_sys_sendmsg net/socket.c:2159 [inline] __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2159 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x457089 Code: fd b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 cb b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007f7bc6e03c78 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007f7bc6e046d4 RCX: 0000000000457089 RDX: 0000000000000000 RSI: 0000000020d65000 RDI: 0000000000000003 RBP: 00000000009300a0 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff R13: 00000000004d4588 R14: 00000000004c8d5c R15: 0000000000000000 Dumping ftrace buffer: (ftrace buffer empty) Kernel Offset: disabled Rebooting in 86400 seconds.. CC: #4.18 Fixes: c092921219d2 ("apparmor: add support for mapping secids and using secctxes") Reported-by: syzbot+21016130b0580a9de3b5@syzkaller.appspotmail.com Signed-off-by: John Johansen --- security/apparmor/secid.c | 1 - 1 file changed, 1 deletion(-) diff --git a/security/apparmor/secid.c b/security/apparmor/secid.c index f2f22d00db18..4ccec1bcf6f5 100644 --- a/security/apparmor/secid.c +++ b/security/apparmor/secid.c @@ -79,7 +79,6 @@ int apparmor_secid_to_secctx(u32 secid, char **secdata, u32 *seclen) struct aa_label *label = aa_secid_to_label(secid); int len; - AA_BUG(!secdata); AA_BUG(!seclen); if (!label) From f7c50fa636f72490baceb1664ba64973137266f2 Mon Sep 17 00:00:00 2001 From: Keyon Jie Date: Mon, 3 Sep 2018 10:47:09 +0800 Subject: [PATCH 105/269] ALSA: hda: Fix several mismatch for register mask and value E.g. for snd_hdac_ext_link_clear_stream_id(), we should set (1 << stream) as mask, and 0 as value, here correct it and several similar mismatches. And, here also remove unreadable register_mask usage for those mask value updating. Signed-off-by: Keyon Jie Signed-off-by: Takashi Iwai --- sound/hda/ext/hdac_ext_stream.c | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/sound/hda/ext/hdac_ext_stream.c b/sound/hda/ext/hdac_ext_stream.c index 1bd27576db98..a835558ddbc9 100644 --- a/sound/hda/ext/hdac_ext_stream.c +++ b/sound/hda/ext/hdac_ext_stream.c @@ -146,7 +146,8 @@ EXPORT_SYMBOL_GPL(snd_hdac_ext_stream_decouple); */ void snd_hdac_ext_link_stream_start(struct hdac_ext_stream *stream) { - snd_hdac_updatel(stream->pplc_addr, AZX_REG_PPLCCTL, 0, AZX_PPLCCTL_RUN); + snd_hdac_updatel(stream->pplc_addr, AZX_REG_PPLCCTL, + AZX_PPLCCTL_RUN, AZX_PPLCCTL_RUN); } EXPORT_SYMBOL_GPL(snd_hdac_ext_link_stream_start); @@ -171,7 +172,8 @@ void snd_hdac_ext_link_stream_reset(struct hdac_ext_stream *stream) snd_hdac_ext_link_stream_clear(stream); - snd_hdac_updatel(stream->pplc_addr, AZX_REG_PPLCCTL, 0, AZX_PPLCCTL_STRST); + snd_hdac_updatel(stream->pplc_addr, AZX_REG_PPLCCTL, + AZX_PPLCCTL_STRST, AZX_PPLCCTL_STRST); udelay(3); timeout = 50; do { @@ -242,7 +244,7 @@ EXPORT_SYMBOL_GPL(snd_hdac_ext_link_set_stream_id); void snd_hdac_ext_link_clear_stream_id(struct hdac_ext_link *link, int stream) { - snd_hdac_updatew(link->ml_addr, AZX_REG_ML_LOSIDV, 0, (1 << stream)); + snd_hdac_updatew(link->ml_addr, AZX_REG_ML_LOSIDV, (1 << stream), 0); } EXPORT_SYMBOL_GPL(snd_hdac_ext_link_clear_stream_id); @@ -415,7 +417,6 @@ void snd_hdac_ext_stream_spbcap_enable(struct hdac_bus *bus, bool enable, int index) { u32 mask = 0; - u32 register_mask = 0; if (!bus->spbcap) { dev_err(bus->dev, "Address of SPB capability is NULL\n"); @@ -424,12 +425,8 @@ void snd_hdac_ext_stream_spbcap_enable(struct hdac_bus *bus, mask |= (1 << index); - register_mask = readl(bus->spbcap + AZX_REG_SPB_SPBFCCTL); - - mask |= register_mask; - if (enable) - snd_hdac_updatel(bus->spbcap, AZX_REG_SPB_SPBFCCTL, 0, mask); + snd_hdac_updatel(bus->spbcap, AZX_REG_SPB_SPBFCCTL, mask, mask); else snd_hdac_updatel(bus->spbcap, AZX_REG_SPB_SPBFCCTL, mask, 0); } @@ -503,7 +500,6 @@ void snd_hdac_ext_stream_drsm_enable(struct hdac_bus *bus, bool enable, int index) { u32 mask = 0; - u32 register_mask = 0; if (!bus->drsmcap) { dev_err(bus->dev, "Address of DRSM capability is NULL\n"); @@ -512,12 +508,8 @@ void snd_hdac_ext_stream_drsm_enable(struct hdac_bus *bus, mask |= (1 << index); - register_mask = readl(bus->drsmcap + AZX_REG_SPB_SPBFCCTL); - - mask |= register_mask; - if (enable) - snd_hdac_updatel(bus->drsmcap, AZX_REG_DRSM_CTL, 0, mask); + snd_hdac_updatel(bus->drsmcap, AZX_REG_DRSM_CTL, mask, mask); else snd_hdac_updatel(bus->drsmcap, AZX_REG_DRSM_CTL, mask, 0); } From 399334708b4f07b107094e5db4a390f0f25d2d4f Mon Sep 17 00:00:00 2001 From: Jan-Marek Glogowski Date: Sat, 25 Aug 2018 15:10:35 -0400 Subject: [PATCH 106/269] drm/i915: Re-apply "Perform link quality check, unconditionally during long pulse" This re-applies the workaround for "some DP sinks, [which] are a little nuts" from commit 1a36147bb939 ("drm/i915: Perform link quality check unconditionally during long pulse"). It makes the secondary AOC E2460P monitor connected via DP to an acer Veriton N4640G usable again. This hunk was dropped in commit c85d200e8321 ("drm/i915: Move SST DP link retraining into the ->post_hotplug() hook") Fixes: c85d200e8321 ("drm/i915: Move SST DP link retraining into the ->post_hotplug() hook") [Cleaned up commit message, added stable cc] Signed-off-by: Lyude Paul Signed-off-by: Jan-Marek Glogowski Cc: stable@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20180825191035.3945-1-lyude@redhat.com (cherry picked from commit 3cf71bc9904d7ee4a25a822c5dcb54c7804ea388) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/intel_dp.c | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index cd0f649b57a5..1193202766a2 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -4160,18 +4160,6 @@ intel_dp_needs_link_retrain(struct intel_dp *intel_dp) return !drm_dp_channel_eq_ok(link_status, intel_dp->lane_count); } -/* - * If display is now connected check links status, - * there has been known issues of link loss triggering - * long pulse. - * - * Some sinks (eg. ASUS PB287Q) seem to perform some - * weird HPD ping pong during modesets. So we can apparently - * end up with HPD going low during a modeset, and then - * going back up soon after. And once that happens we must - * retrain the link to get a picture. That's in case no - * userspace component reacted to intermittent HPD dip. - */ int intel_dp_retrain_link(struct intel_encoder *encoder, struct drm_modeset_acquire_ctx *ctx) { @@ -4661,7 +4649,8 @@ intel_dp_unset_edid(struct intel_dp *intel_dp) } static int -intel_dp_long_pulse(struct intel_connector *connector) +intel_dp_long_pulse(struct intel_connector *connector, + struct drm_modeset_acquire_ctx *ctx) { struct drm_i915_private *dev_priv = to_i915(connector->base.dev); struct intel_dp *intel_dp = intel_attached_dp(&connector->base); @@ -4720,6 +4709,22 @@ intel_dp_long_pulse(struct intel_connector *connector) */ status = connector_status_disconnected; goto out; + } else { + /* + * If display is now connected check links status, + * there has been known issues of link loss triggering + * long pulse. + * + * Some sinks (eg. ASUS PB287Q) seem to perform some + * weird HPD ping pong during modesets. So we can apparently + * end up with HPD going low during a modeset, and then + * going back up soon after. And once that happens we must + * retrain the link to get a picture. That's in case no + * userspace component reacted to intermittent HPD dip. + */ + struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; + + intel_dp_retrain_link(encoder, ctx); } /* @@ -4781,7 +4786,7 @@ intel_dp_detect(struct drm_connector *connector, return ret; } - status = intel_dp_long_pulse(intel_dp->attached_connector); + status = intel_dp_long_pulse(intel_dp->attached_connector, ctx); } intel_dp->detect_done = false; From 4fe967912ee83048beb45a6b4f0f6774fddcfa0a Mon Sep 17 00:00:00 2001 From: Manasi Navare Date: Thu, 23 Aug 2018 18:48:07 -0700 Subject: [PATCH 107/269] drm/i915/dsc: Fix PPS register definition macros for 2nd VDSC engine This patch fixes the PPS4 and PPS5 register definition macros that were resulting into an incorect MMIO address. Fixes: 2efbb2f099fb ("i915/dp/dsc: Add DSC PPS register definitions") Cc: Anusha Srivatsa Signed-off-by: Manasi Navare Reviewed-by: Rodrigo Vivi Reviewed-by: Anusha Srivatsa Link: https://patchwork.freedesktop.org/patch/msgid/20180824014807.14681-1-manasi.d.navare@intel.com (cherry picked from commit 5df52391ddbed869c7d67b00fbb013bd64334115) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_reg.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 08ec7446282e..9e63cd47b60f 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -10422,7 +10422,7 @@ enum skl_power_gate { _ICL_DSC0_PICTURE_PARAMETER_SET_4_PB, \ _ICL_DSC0_PICTURE_PARAMETER_SET_4_PC) #define ICL_DSC1_PICTURE_PARAMETER_SET_4(pipe) _MMIO_PIPE((pipe) - PIPE_B, \ - _ICL_DSC0_PICTURE_PARAMETER_SET_4_PB, \ + _ICL_DSC1_PICTURE_PARAMETER_SET_4_PB, \ _ICL_DSC1_PICTURE_PARAMETER_SET_4_PC) #define DSC_INITIAL_DEC_DELAY(dec_delay) ((dec_delay) << 16) #define DSC_INITIAL_XMIT_DELAY(xmit_delay) ((xmit_delay) << 0) @@ -10437,7 +10437,7 @@ enum skl_power_gate { _ICL_DSC0_PICTURE_PARAMETER_SET_5_PB, \ _ICL_DSC0_PICTURE_PARAMETER_SET_5_PC) #define ICL_DSC1_PICTURE_PARAMETER_SET_5(pipe) _MMIO_PIPE((pipe) - PIPE_B, \ - _ICL_DSC1_PICTURE_PARAMETER_SET_5_PC, \ + _ICL_DSC1_PICTURE_PARAMETER_SET_5_PB, \ _ICL_DSC1_PICTURE_PARAMETER_SET_5_PC) #define DSC_SCALE_DEC_INTINT(scale_dec) ((scale_dec) << 16) #define DSC_SCALE_INC_INT(scale_inc) ((scale_inc) << 0) From 2b82435cb90bed2c5f8398730d964dd11602217c Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 31 Aug 2018 20:47:39 +0300 Subject: [PATCH 108/269] drm/i915/dp_mst: Fix enabling pipe clock for all streams MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit afb2c4437dae ("drm/i915/ddi: Push pipe clock enabling to encoders") inadvertently stopped enabling the pipe clock for any DP-MST stream after the first one. It also rearranged the pipe clock enabling wrt. initial MST payload allocation step (which may or may not be a problem, but it's contrary to the spec.). Fix things by making the above commit truly a non-functional change. Fixes: afb2c4437dae ("drm/i915/ddi: Push pipe clock enabling to encoders") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107365 Reported-by: Lyude Paul Reported-by: dmummenschanz@web.de Tested-by: dmummenschanz@web.de Tested-by: Lyude Paul Cc: Lyude Paul Cc: dmummenschanz@web.de Cc: Ville Syrjälä Cc: Rodrigo Vivi Cc: Chris Wilson Signed-off-by: Imre Deak Reviewed-by: Ville Syrjälä Reviewed-by: Lyude Paul Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20180831174739.30387-1-imre.deak@intel.com (cherry picked from commit 2b5cf4ef541f1b2facaca58cae5e8e0b5f19ad4c) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/intel_ddi.c | 17 +++++++++-------- drivers/gpu/drm/i915/intel_dp_mst.c | 4 ++++ 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 8761513f3532..c9af34861d9e 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -2708,7 +2708,8 @@ static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder, if (port != PORT_A || INTEL_GEN(dev_priv) >= 9) intel_dp_stop_link_train(intel_dp); - intel_ddi_enable_pipe_clock(crtc_state); + if (!is_mst) + intel_ddi_enable_pipe_clock(crtc_state); } static void intel_ddi_pre_enable_hdmi(struct intel_encoder *encoder, @@ -2810,14 +2811,14 @@ static void intel_ddi_post_disable_dp(struct intel_encoder *encoder, bool is_mst = intel_crtc_has_type(old_crtc_state, INTEL_OUTPUT_DP_MST); - intel_ddi_disable_pipe_clock(old_crtc_state); - - /* - * Power down sink before disabling the port, otherwise we end - * up getting interrupts from the sink on detecting link loss. - */ - if (!is_mst) + if (!is_mst) { + intel_ddi_disable_pipe_clock(old_crtc_state); + /* + * Power down sink before disabling the port, otherwise we end + * up getting interrupts from the sink on detecting link loss. + */ intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF); + } intel_disable_ddi_buf(encoder); diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c index 7e3e01607643..4ecd65375603 100644 --- a/drivers/gpu/drm/i915/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/intel_dp_mst.c @@ -166,6 +166,8 @@ static void intel_mst_post_disable_dp(struct intel_encoder *encoder, struct intel_connector *connector = to_intel_connector(old_conn_state->connector); + intel_ddi_disable_pipe_clock(old_crtc_state); + /* this can fail */ drm_dp_check_act_status(&intel_dp->mst_mgr); /* and this can also fail */ @@ -252,6 +254,8 @@ static void intel_mst_pre_enable_dp(struct intel_encoder *encoder, I915_WRITE(DP_TP_STATUS(port), temp); ret = drm_dp_update_payload_part1(&intel_dp->mst_mgr); + + intel_ddi_enable_pipe_clock(pipe_config); } static void intel_mst_enable_dp(struct intel_encoder *encoder, From 1dfdf99106668679b0de5a62fd4f42c1a11c9445 Mon Sep 17 00:00:00 2001 From: Greentime Hu Date: Wed, 18 Jul 2018 09:54:55 +0800 Subject: [PATCH 109/269] nds32: fix logic for module This bug is report by Dan Carpenter. We shall use ~loc_mask instead of !loc_mask because we need to and(&) the bits of ~loc_mask. Reported-by: Dan Carpenter Fixes: c9a4a8da6baa ("nds32: Loadable modules") Signed-off-by: Greentime Hu --- arch/nds32/kernel/module.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/nds32/kernel/module.c b/arch/nds32/kernel/module.c index 4167283d8293..1e31829cbc2a 100644 --- a/arch/nds32/kernel/module.c +++ b/arch/nds32/kernel/module.c @@ -40,7 +40,7 @@ void do_reloc16(unsigned int val, unsigned int *loc, unsigned int val_mask, tmp2 = tmp & loc_mask; if (partial_in_place) { - tmp &= (!loc_mask); + tmp &= (~loc_mask); tmp = tmp2 | ((tmp + ((val & val_mask) >> val_shift)) & val_mask); } else { @@ -70,7 +70,7 @@ void do_reloc32(unsigned int val, unsigned int *loc, unsigned int val_mask, tmp2 = tmp & loc_mask; if (partial_in_place) { - tmp &= (!loc_mask); + tmp &= (~loc_mask); tmp = tmp2 | ((tmp + ((val & val_mask) >> val_shift)) & val_mask); } else { From 1944a50859ec2b570b42b459ac25d607fc7c31f0 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Tue, 7 Aug 2018 12:03:13 +0800 Subject: [PATCH 110/269] nds32: add NULL entry to the end of_device_id array Make sure of_device_id tables are NULL terminated. Found by coccinelle spatch "misc/of_table.cocci" Signed-off-by: YueHaibing Acked-by: Greentime Hu Signed-off-by: Greentime Hu --- arch/nds32/kernel/atl2c.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/nds32/kernel/atl2c.c b/arch/nds32/kernel/atl2c.c index 0c6d031a1c4a..0c5386e72098 100644 --- a/arch/nds32/kernel/atl2c.c +++ b/arch/nds32/kernel/atl2c.c @@ -9,7 +9,8 @@ void __iomem *atl2c_base; static const struct of_device_id atl2c_ids[] __initconst = { - {.compatible = "andestech,atl2c",} + {.compatible = "andestech,atl2c",}, + {} }; static int __init atl2c_of_init(void) From c17df7960534357fb74074c2f514c831d4a9cf5a Mon Sep 17 00:00:00 2001 From: Zong Li Date: Mon, 13 Aug 2018 13:28:23 +0800 Subject: [PATCH 111/269] nds32: Fix empty call trace The compiler predefined macro 'NDS32_ABI_2' had been removed, it should use the '__NDS32_ABI_2' here. Signed-off-by: Zong Li Acked-by: Greentime Hu Signed-off-by: Greentime Hu --- arch/nds32/kernel/traps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/nds32/kernel/traps.c b/arch/nds32/kernel/traps.c index a6205fd4db52..f0e974347c26 100644 --- a/arch/nds32/kernel/traps.c +++ b/arch/nds32/kernel/traps.c @@ -137,7 +137,7 @@ static void __dump(struct task_struct *tsk, unsigned long *base_reg) !((unsigned long)base_reg & 0x3) && ((unsigned long)base_reg >= TASK_SIZE)) { unsigned long next_fp; -#if !defined(NDS32_ABI_2) +#if !defined(__NDS32_ABI_2) ret_addr = base_reg[0]; next_fp = base_reg[1]; #else From 6cce95a6c7d288ac2126eee4b95df448b9015b84 Mon Sep 17 00:00:00 2001 From: Zong Li Date: Mon, 13 Aug 2018 14:48:49 +0800 Subject: [PATCH 112/269] nds32: Fix get_user/put_user macro expand pointer problem The pointer argument of macro need to be taken out once first, and then use the new pointer in the macro body. In kernel/trace/trace.c, get_user(ch, ubuf++) causes the unexpected increment after expand the macro. Signed-off-by: Zong Li Acked-by: Greentime Hu Signed-off-by: Greentime Hu --- arch/nds32/include/asm/uaccess.h | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/arch/nds32/include/asm/uaccess.h b/arch/nds32/include/asm/uaccess.h index 18a009f3804d..3f771e0595e8 100644 --- a/arch/nds32/include/asm/uaccess.h +++ b/arch/nds32/include/asm/uaccess.h @@ -78,8 +78,9 @@ static inline void set_fs(mm_segment_t fs) #define get_user(x,p) \ ({ \ long __e = -EFAULT; \ - if(likely(access_ok(VERIFY_READ, p, sizeof(*p)))) { \ - __e = __get_user(x,p); \ + const __typeof__(*(p)) __user *__p = (p); \ + if(likely(access_ok(VERIFY_READ, __p, sizeof(*__p)))) { \ + __e = __get_user(x, __p); \ } else \ x = 0; \ __e; \ @@ -99,10 +100,10 @@ static inline void set_fs(mm_segment_t fs) #define __get_user_err(x,ptr,err) \ do { \ - unsigned long __gu_addr = (unsigned long)(ptr); \ + const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ unsigned long __gu_val; \ - __chk_user_ptr(ptr); \ - switch (sizeof(*(ptr))) { \ + __chk_user_ptr(__gu_addr); \ + switch (sizeof(*(__gu_addr))) { \ case 1: \ __get_user_asm("lbi",__gu_val,__gu_addr,err); \ break; \ @@ -119,7 +120,7 @@ do { \ BUILD_BUG(); \ break; \ } \ - (x) = (__typeof__(*(ptr)))__gu_val; \ + (x) = (__typeof__(*(__gu_addr)))__gu_val; \ } while (0) #define __get_user_asm(inst,x,addr,err) \ @@ -169,8 +170,9 @@ do { \ #define put_user(x,p) \ ({ \ long __e = -EFAULT; \ - if(likely(access_ok(VERIFY_WRITE, p, sizeof(*p)))) { \ - __e = __put_user(x,p); \ + __typeof__(*(p)) __user *__p = (p); \ + if(likely(access_ok(VERIFY_WRITE, __p, sizeof(*__p)))) { \ + __e = __put_user(x, __p); \ } \ __e; \ }) @@ -189,10 +191,10 @@ do { \ #define __put_user_err(x,ptr,err) \ do { \ - unsigned long __pu_addr = (unsigned long)(ptr); \ - __typeof__(*(ptr)) __pu_val = (x); \ - __chk_user_ptr(ptr); \ - switch (sizeof(*(ptr))) { \ + __typeof__(*(ptr)) __user *__pu_addr = (ptr); \ + __typeof__(*(__pu_addr)) __pu_val = (x); \ + __chk_user_ptr(__pu_addr); \ + switch (sizeof(*(__pu_addr))) { \ case 1: \ __put_user_asm("sbi",__pu_val,__pu_addr,err); \ break; \ From 7ef39548df8cdb6406e3b4b7255e7f8cd3fe3b13 Mon Sep 17 00:00:00 2001 From: Zong Li Date: Mon, 13 Aug 2018 15:08:52 +0800 Subject: [PATCH 113/269] nds32: Clean up the coding style 1. Adjust indentation. 2. Unify argument name of each macro. 3. Add space after comma in parameters list. 4. Add space after 'if' keyword. 5. Replace space by tab. 6. Change asm volatile to __asm__ __volatile__ Signed-off-by: Zong Li Acked-by: Greentime Hu Signed-off-by: Greentime Hu --- arch/nds32/include/asm/uaccess.h | 201 ++++++++++++++++--------------- 1 file changed, 103 insertions(+), 98 deletions(-) diff --git a/arch/nds32/include/asm/uaccess.h b/arch/nds32/include/asm/uaccess.h index 3f771e0595e8..e1a2b5b749b9 100644 --- a/arch/nds32/include/asm/uaccess.h +++ b/arch/nds32/include/asm/uaccess.h @@ -38,7 +38,7 @@ struct exception_table_entry { extern int fixup_exception(struct pt_regs *regs); #define KERNEL_DS ((mm_segment_t) { ~0UL }) -#define USER_DS ((mm_segment_t) {TASK_SIZE - 1}) +#define USER_DS ((mm_segment_t) {TASK_SIZE - 1}) #define get_ds() (KERNEL_DS) #define get_fs() (current_thread_info()->addr_limit) @@ -49,11 +49,11 @@ static inline void set_fs(mm_segment_t fs) current_thread_info()->addr_limit = fs; } -#define segment_eq(a, b) ((a) == (b)) +#define segment_eq(a, b) ((a) == (b)) #define __range_ok(addr, size) (size <= get_fs() && addr <= (get_fs() -size)) -#define access_ok(type, addr, size) \ +#define access_ok(type, addr, size) \ __range_ok((unsigned long)addr, (unsigned long)size) /* * Single-value transfer routines. They automatically use the right @@ -75,46 +75,48 @@ static inline void set_fs(mm_segment_t fs) * versions are void (ie, don't return a value as such). */ -#define get_user(x,p) \ +#define get_user(x, ptr) \ ({ \ long __e = -EFAULT; \ - const __typeof__(*(p)) __user *__p = (p); \ - if(likely(access_ok(VERIFY_READ, __p, sizeof(*__p)))) { \ + const __typeof__(*(ptr)) __user *__p = (ptr); \ + if (likely(access_ok(VERIFY_READ, __p, sizeof(*__p)))) { \ __e = __get_user(x, __p); \ - } else \ - x = 0; \ + } else { \ + (x) = 0; \ + } \ __e; \ }) -#define __get_user(x,ptr) \ + +#define __get_user(x, ptr) \ ({ \ long __gu_err = 0; \ - __get_user_err((x),(ptr),__gu_err); \ + __get_user_err((x), (ptr), __gu_err); \ __gu_err; \ }) -#define __get_user_error(x,ptr,err) \ +#define __get_user_error(x, ptr, err) \ ({ \ - __get_user_err((x),(ptr),err); \ - (void) 0; \ + __get_user_err((x), (ptr), err); \ + (void)0; \ }) -#define __get_user_err(x,ptr,err) \ +#define __get_user_err(x, ptr, err) \ do { \ const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ unsigned long __gu_val; \ __chk_user_ptr(__gu_addr); \ switch (sizeof(*(__gu_addr))) { \ case 1: \ - __get_user_asm("lbi",__gu_val,__gu_addr,err); \ + __get_user_asm("lbi", __gu_val, __gu_addr, (err)); \ break; \ case 2: \ - __get_user_asm("lhi",__gu_val,__gu_addr,err); \ + __get_user_asm("lhi", __gu_val, __gu_addr, (err)); \ break; \ case 4: \ - __get_user_asm("lwi",__gu_val,__gu_addr,err); \ + __get_user_asm("lwi", __gu_val, __gu_addr, (err)); \ break; \ case 8: \ - __get_user_asm_dword(__gu_val,__gu_addr,err); \ + __get_user_asm_dword(__gu_val, __gu_addr, (err)); \ break; \ default: \ BUILD_BUG(); \ @@ -123,23 +125,23 @@ do { \ (x) = (__typeof__(*(__gu_addr)))__gu_val; \ } while (0) -#define __get_user_asm(inst,x,addr,err) \ - asm volatile( \ - "1: "inst" %1,[%2]\n" \ - "2:\n" \ - " .section .fixup,\"ax\"\n" \ - " .align 2\n" \ - "3: move %0, %3\n" \ - " move %1, #0\n" \ - " b 2b\n" \ - " .previous\n" \ - " .section __ex_table,\"a\"\n" \ - " .align 3\n" \ - " .long 1b, 3b\n" \ - " .previous" \ - : "+r" (err), "=&r" (x) \ - : "r" (addr), "i" (-EFAULT) \ - : "cc") +#define __get_user_asm(inst, x, addr, err) \ + __asm__ __volatile__ ( \ + "1: "inst" %1,[%2]\n" \ + "2:\n" \ + " .section .fixup,\"ax\"\n" \ + " .align 2\n" \ + "3: move %0, %3\n" \ + " move %1, #0\n" \ + " b 2b\n" \ + " .previous\n" \ + " .section __ex_table,\"a\"\n" \ + " .align 3\n" \ + " .long 1b, 3b\n" \ + " .previous" \ + : "+r" (err), "=&r" (x) \ + : "r" (addr), "i" (-EFAULT) \ + : "cc") #ifdef __NDS32_EB__ #define __gu_reg_oper0 "%H1" @@ -150,62 +152,64 @@ do { \ #endif #define __get_user_asm_dword(x, addr, err) \ - asm volatile( \ - "\n1:\tlwi " __gu_reg_oper0 ",[%2]\n" \ - "\n2:\tlwi " __gu_reg_oper1 ",[%2+4]\n" \ - "3:\n" \ - " .section .fixup,\"ax\"\n" \ - " .align 2\n" \ - "4: move %0, %3\n" \ - " b 3b\n" \ - " .previous\n" \ - " .section __ex_table,\"a\"\n" \ - " .align 3\n" \ - " .long 1b, 4b\n" \ - " .long 2b, 4b\n" \ - " .previous" \ - : "+r"(err), "=&r"(x) \ - : "r"(addr), "i"(-EFAULT) \ - : "cc") -#define put_user(x,p) \ + __asm__ __volatile__ ( \ + "\n1:\tlwi " __gu_reg_oper0 ",[%2]\n" \ + "\n2:\tlwi " __gu_reg_oper1 ",[%2+4]\n" \ + "3:\n" \ + " .section .fixup,\"ax\"\n" \ + " .align 2\n" \ + "4: move %0, %3\n" \ + " b 3b\n" \ + " .previous\n" \ + " .section __ex_table,\"a\"\n" \ + " .align 3\n" \ + " .long 1b, 4b\n" \ + " .long 2b, 4b\n" \ + " .previous" \ + : "+r"(err), "=&r"(x) \ + : "r"(addr), "i"(-EFAULT) \ + : "cc") + +#define put_user(x, ptr) \ ({ \ long __e = -EFAULT; \ - __typeof__(*(p)) __user *__p = (p); \ - if(likely(access_ok(VERIFY_WRITE, __p, sizeof(*__p)))) { \ + __typeof__(*(ptr)) __user *__p = (ptr); \ + if (likely(access_ok(VERIFY_WRITE, __p, sizeof(*__p)))) { \ __e = __put_user(x, __p); \ } \ __e; \ }) -#define __put_user(x,ptr) \ + +#define __put_user(x, ptr) \ ({ \ long __pu_err = 0; \ - __put_user_err((x),(ptr),__pu_err); \ + __put_user_err((x), (ptr), __pu_err); \ __pu_err; \ }) -#define __put_user_error(x,ptr,err) \ +#define __put_user_error(x, ptr, err) \ ({ \ - __put_user_err((x),(ptr),err); \ - (void) 0; \ + __put_user_err((x), (ptr), err); \ + (void)0; \ }) -#define __put_user_err(x,ptr,err) \ +#define __put_user_err(x, ptr, err) \ do { \ __typeof__(*(ptr)) __user *__pu_addr = (ptr); \ __typeof__(*(__pu_addr)) __pu_val = (x); \ __chk_user_ptr(__pu_addr); \ switch (sizeof(*(__pu_addr))) { \ case 1: \ - __put_user_asm("sbi",__pu_val,__pu_addr,err); \ + __put_user_asm("sbi", __pu_val, __pu_addr, (err)); \ break; \ case 2: \ - __put_user_asm("shi",__pu_val,__pu_addr,err); \ + __put_user_asm("shi", __pu_val, __pu_addr, (err)); \ break; \ case 4: \ - __put_user_asm("swi",__pu_val,__pu_addr,err); \ + __put_user_asm("swi", __pu_val, __pu_addr, (err)); \ break; \ case 8: \ - __put_user_asm_dword(__pu_val,__pu_addr,err); \ + __put_user_asm_dword(__pu_val, __pu_addr, (err)); \ break; \ default: \ BUILD_BUG(); \ @@ -213,22 +217,22 @@ do { \ } \ } while (0) -#define __put_user_asm(inst,x,addr,err) \ - asm volatile( \ - "1: "inst" %1,[%2]\n" \ - "2:\n" \ - " .section .fixup,\"ax\"\n" \ - " .align 2\n" \ - "3: move %0, %3\n" \ - " b 2b\n" \ - " .previous\n" \ - " .section __ex_table,\"a\"\n" \ - " .align 3\n" \ - " .long 1b, 3b\n" \ - " .previous" \ - : "+r" (err) \ - : "r" (x), "r" (addr), "i" (-EFAULT) \ - : "cc") +#define __put_user_asm(inst, x, addr, err) \ + __asm__ __volatile__ ( \ + "1: "inst" %1,[%2]\n" \ + "2:\n" \ + " .section .fixup,\"ax\"\n" \ + " .align 2\n" \ + "3: move %0, %3\n" \ + " b 2b\n" \ + " .previous\n" \ + " .section __ex_table,\"a\"\n" \ + " .align 3\n" \ + " .long 1b, 3b\n" \ + " .previous" \ + : "+r" (err) \ + : "r" (x), "r" (addr), "i" (-EFAULT) \ + : "cc") #ifdef __NDS32_EB__ #define __pu_reg_oper0 "%H2" @@ -239,23 +243,24 @@ do { \ #endif #define __put_user_asm_dword(x, addr, err) \ - asm volatile( \ - "\n1:\tswi " __pu_reg_oper0 ",[%1]\n" \ - "\n2:\tswi " __pu_reg_oper1 ",[%1+4]\n" \ - "3:\n" \ - " .section .fixup,\"ax\"\n" \ - " .align 2\n" \ - "4: move %0, %3\n" \ - " b 3b\n" \ - " .previous\n" \ - " .section __ex_table,\"a\"\n" \ - " .align 3\n" \ - " .long 1b, 4b\n" \ - " .long 2b, 4b\n" \ - " .previous" \ - : "+r"(err) \ - : "r"(addr), "r"(x), "i"(-EFAULT) \ - : "cc") + __asm__ __volatile__ ( \ + "\n1:\tswi " __pu_reg_oper0 ",[%1]\n" \ + "\n2:\tswi " __pu_reg_oper1 ",[%1+4]\n" \ + "3:\n" \ + " .section .fixup,\"ax\"\n" \ + " .align 2\n" \ + "4: move %0, %3\n" \ + " b 3b\n" \ + " .previous\n" \ + " .section __ex_table,\"a\"\n" \ + " .align 3\n" \ + " .long 1b, 4b\n" \ + " .long 2b, 4b\n" \ + " .previous" \ + : "+r"(err) \ + : "r"(addr), "r"(x), "i"(-EFAULT) \ + : "cc") + extern unsigned long __arch_clear_user(void __user * addr, unsigned long n); extern long strncpy_from_user(char *dest, const char __user * src, long count); extern __must_check long strlen_user(const char __user * str); From 487913ab18c215b06611c4c91c7e905fc0960eb8 Mon Sep 17 00:00:00 2001 From: Zong Li Date: Mon, 13 Aug 2018 16:02:53 +0800 Subject: [PATCH 114/269] nds32: Extract the checking and getting pointer to a macro Signed-off-by: Zong Li Acked-by: Greentime Hu Signed-off-by: Greentime Hu --- arch/nds32/include/asm/uaccess.h | 80 ++++++++++++++++---------------- 1 file changed, 41 insertions(+), 39 deletions(-) diff --git a/arch/nds32/include/asm/uaccess.h b/arch/nds32/include/asm/uaccess.h index e1a2b5b749b9..362a32d9bd16 100644 --- a/arch/nds32/include/asm/uaccess.h +++ b/arch/nds32/include/asm/uaccess.h @@ -75,54 +75,54 @@ static inline void set_fs(mm_segment_t fs) * versions are void (ie, don't return a value as such). */ -#define get_user(x, ptr) \ -({ \ - long __e = -EFAULT; \ - const __typeof__(*(ptr)) __user *__p = (ptr); \ - if (likely(access_ok(VERIFY_READ, __p, sizeof(*__p)))) { \ - __e = __get_user(x, __p); \ - } else { \ - (x) = 0; \ - } \ - __e; \ -}) +#define get_user __get_user \ #define __get_user(x, ptr) \ ({ \ long __gu_err = 0; \ - __get_user_err((x), (ptr), __gu_err); \ + __get_user_check((x), (ptr), __gu_err); \ __gu_err; \ }) #define __get_user_error(x, ptr, err) \ ({ \ - __get_user_err((x), (ptr), err); \ + __get_user_check((x), (ptr), (err)); \ (void)0; \ }) +#define __get_user_check(x, ptr, err) \ +({ \ + const __typeof__(*(ptr)) __user *__p = (ptr); \ + might_fault(); \ + if (access_ok(VERIFY_READ, __p, sizeof(*__p))) { \ + __get_user_err((x), __p, (err)); \ + } else { \ + (x) = 0; (err) = -EFAULT; \ + } \ +}) + #define __get_user_err(x, ptr, err) \ do { \ - const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ unsigned long __gu_val; \ - __chk_user_ptr(__gu_addr); \ - switch (sizeof(*(__gu_addr))) { \ + __chk_user_ptr(ptr); \ + switch (sizeof(*(ptr))) { \ case 1: \ - __get_user_asm("lbi", __gu_val, __gu_addr, (err)); \ + __get_user_asm("lbi", __gu_val, (ptr), (err)); \ break; \ case 2: \ - __get_user_asm("lhi", __gu_val, __gu_addr, (err)); \ + __get_user_asm("lhi", __gu_val, (ptr), (err)); \ break; \ case 4: \ - __get_user_asm("lwi", __gu_val, __gu_addr, (err)); \ + __get_user_asm("lwi", __gu_val, (ptr), (err)); \ break; \ case 8: \ - __get_user_asm_dword(__gu_val, __gu_addr, (err)); \ + __get_user_asm_dword(__gu_val, (ptr), (err)); \ break; \ default: \ BUILD_BUG(); \ break; \ } \ - (x) = (__typeof__(*(__gu_addr)))__gu_val; \ + (x) = (__force __typeof__(*(ptr)))__gu_val; \ } while (0) #define __get_user_asm(inst, x, addr, err) \ @@ -170,15 +170,7 @@ do { \ : "r"(addr), "i"(-EFAULT) \ : "cc") -#define put_user(x, ptr) \ -({ \ - long __e = -EFAULT; \ - __typeof__(*(ptr)) __user *__p = (ptr); \ - if (likely(access_ok(VERIFY_WRITE, __p, sizeof(*__p)))) { \ - __e = __put_user(x, __p); \ - } \ - __e; \ -}) +#define put_user __put_user \ #define __put_user(x, ptr) \ ({ \ @@ -189,27 +181,37 @@ do { \ #define __put_user_error(x, ptr, err) \ ({ \ - __put_user_err((x), (ptr), err); \ + __put_user_err((x), (ptr), (err)); \ (void)0; \ }) +#define __put_user_check(x, ptr, err) \ +({ \ + __typeof__(*(ptr)) __user *__p = (ptr); \ + might_fault(); \ + if (access_ok(VERIFY_WRITE, __p, sizeof(*__p))) { \ + __put_user_err((x), __p, (err)); \ + } else { \ + (err) = -EFAULT; \ + } \ +}) + #define __put_user_err(x, ptr, err) \ do { \ - __typeof__(*(ptr)) __user *__pu_addr = (ptr); \ - __typeof__(*(__pu_addr)) __pu_val = (x); \ - __chk_user_ptr(__pu_addr); \ - switch (sizeof(*(__pu_addr))) { \ + __typeof__(*(ptr)) __pu_val = (x); \ + __chk_user_ptr(ptr); \ + switch (sizeof(*(ptr))) { \ case 1: \ - __put_user_asm("sbi", __pu_val, __pu_addr, (err)); \ + __put_user_asm("sbi", __pu_val, (ptr), (err)); \ break; \ case 2: \ - __put_user_asm("shi", __pu_val, __pu_addr, (err)); \ + __put_user_asm("shi", __pu_val, (ptr), (err)); \ break; \ case 4: \ - __put_user_asm("swi", __pu_val, __pu_addr, (err)); \ + __put_user_asm("swi", __pu_val, (ptr), (err)); \ break; \ case 8: \ - __put_user_asm_dword(__pu_val, __pu_addr, (err)); \ + __put_user_asm_dword(__pu_val, (ptr), (err)); \ break; \ default: \ BUILD_BUG(); \ From a18082575c664847d36c6ca030b09ce8d93aec2f Mon Sep 17 00:00:00 2001 From: Zong Li Date: Wed, 15 Aug 2018 10:45:59 +0800 Subject: [PATCH 115/269] nds32/ftrace: Support static function tracer This patch support the static function tracer. On nds32 ABI, we need to always push return address to stack for __builtin_return_address can work correctly, otherwise, it will get the wrong value of $lp at leaf function. Signed-off-by: Zong Li Acked-by: Greentime Hu Signed-off-by: Greentime Hu --- arch/nds32/Kconfig | 1 + arch/nds32/Makefile | 4 ++++ arch/nds32/include/asm/ftrace.h | 20 ++++++++++++++++++++ arch/nds32/kernel/Makefile | 6 ++++++ arch/nds32/kernel/ftrace.c | 28 ++++++++++++++++++++++++++++ 5 files changed, 59 insertions(+) create mode 100644 arch/nds32/include/asm/ftrace.h create mode 100644 arch/nds32/kernel/ftrace.c diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig index 1d4248fa55e9..853497fe4266 100644 --- a/arch/nds32/Kconfig +++ b/arch/nds32/Kconfig @@ -40,6 +40,7 @@ config NDS32 select NO_IOPORT_MAP select RTC_LIB select THREAD_INFO_IN_TASK + select HAVE_FUNCTION_TRACER help Andes(nds32) Linux support. diff --git a/arch/nds32/Makefile b/arch/nds32/Makefile index 63f4f173e5f4..3509fac10491 100644 --- a/arch/nds32/Makefile +++ b/arch/nds32/Makefile @@ -5,6 +5,10 @@ KBUILD_DEFCONFIG := defconfig comma = , +ifdef CONFIG_FUNCTION_TRACER +arch-y += -malways-save-lp -mno-relax +endif + KBUILD_CFLAGS += $(call cc-option, -mno-sched-prolog-epilog) KBUILD_CFLAGS += -mcmodel=large diff --git a/arch/nds32/include/asm/ftrace.h b/arch/nds32/include/asm/ftrace.h new file mode 100644 index 000000000000..bac7657f576a --- /dev/null +++ b/arch/nds32/include/asm/ftrace.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __ASM_NDS32_FTRACE_H +#define __ASM_NDS32_FTRACE_H + +#ifdef CONFIG_FUNCTION_TRACER + +#define HAVE_FUNCTION_GRAPH_FP_TEST + +#define MCOUNT_ADDR ((unsigned long)(_mcount)) +/* mcount call is composed of three instructions: + * sethi + ori + jral + */ +#define MCOUNT_INSN_SIZE 12 + +extern void _mcount(unsigned long parent_ip); + +#endif /* CONFIG_FUNCTION_TRACER */ + +#endif /* __ASM_NDS32_FTRACE_H */ diff --git a/arch/nds32/kernel/Makefile b/arch/nds32/kernel/Makefile index 42792743e8b9..27cded39fa66 100644 --- a/arch/nds32/kernel/Makefile +++ b/arch/nds32/kernel/Makefile @@ -21,3 +21,9 @@ extra-y := head.o vmlinux.lds obj-y += vdso/ + +obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o + +ifdef CONFIG_FUNCTION_TRACER +CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE) +endif diff --git a/arch/nds32/kernel/ftrace.c b/arch/nds32/kernel/ftrace.c new file mode 100644 index 000000000000..563f64c070b3 --- /dev/null +++ b/arch/nds32/kernel/ftrace.c @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include + +extern void (*ftrace_trace_function)(unsigned long, unsigned long, + struct ftrace_ops*, struct pt_regs*); + +noinline void __naked ftrace_stub(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct pt_regs *regs) +{ + __asm__ (""); /* avoid to optimize as pure function */ +} + +noinline void _mcount(unsigned long parent_ip) +{ + /* save all state by the compiler prologue */ + + unsigned long ip = (unsigned long)__builtin_return_address(0); + + if (ftrace_trace_function != ftrace_stub) + ftrace_trace_function(ip - MCOUNT_INSN_SIZE, parent_ip, + NULL, NULL); + + /* restore all state by the compiler epilogue */ +} +EXPORT_SYMBOL(_mcount); From 1e9b14c0d92b61a0979fd5ee24d5e7f080f11030 Mon Sep 17 00:00:00 2001 From: Zong Li Date: Wed, 15 Aug 2018 10:53:04 +0800 Subject: [PATCH 116/269] nds32/ftrace: Support static function graph tracer This patch contains implementation of static function graph tracer. Signed-off-by: Zong Li Acked-by: Greentime Hu Signed-off-by: Greentime Hu --- arch/nds32/Kconfig | 1 + arch/nds32/kernel/ftrace.c | 69 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+) diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig index 853497fe4266..ea171a00327c 100644 --- a/arch/nds32/Kconfig +++ b/arch/nds32/Kconfig @@ -41,6 +41,7 @@ config NDS32 select RTC_LIB select THREAD_INFO_IN_TASK select HAVE_FUNCTION_TRACER + select HAVE_FUNCTION_GRAPH_TRACER help Andes(nds32) Linux support. diff --git a/arch/nds32/kernel/ftrace.c b/arch/nds32/kernel/ftrace.c index 563f64c070b3..707fce76522e 100644 --- a/arch/nds32/kernel/ftrace.c +++ b/arch/nds32/kernel/ftrace.c @@ -6,6 +6,8 @@ extern void (*ftrace_trace_function)(unsigned long, unsigned long, struct ftrace_ops*, struct pt_regs*); +extern int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace); +extern void ftrace_graph_caller(void); noinline void __naked ftrace_stub(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct pt_regs *regs) @@ -23,6 +25,73 @@ noinline void _mcount(unsigned long parent_ip) ftrace_trace_function(ip - MCOUNT_INSN_SIZE, parent_ip, NULL, NULL); +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + if (ftrace_graph_return != (trace_func_graph_ret_t)ftrace_stub + || ftrace_graph_entry != ftrace_graph_entry_stub) + ftrace_graph_caller(); +#endif + /* restore all state by the compiler epilogue */ } EXPORT_SYMBOL(_mcount); + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, + unsigned long frame_pointer) +{ + unsigned long return_hooker = (unsigned long)&return_to_handler; + struct ftrace_graph_ent trace; + unsigned long old; + int err; + + if (unlikely(atomic_read(¤t->tracing_graph_pause))) + return; + + old = *parent; + + trace.func = self_addr; + trace.depth = current->curr_ret_stack + 1; + + /* Only trace if the calling function expects to */ + if (!ftrace_graph_entry(&trace)) + return; + + err = ftrace_push_return_trace(old, self_addr, &trace.depth, + frame_pointer, NULL); + + if (err == -EBUSY) + return; + + *parent = return_hooker; +} + +noinline void ftrace_graph_caller(void) +{ + unsigned long *parent_ip = + (unsigned long *)(__builtin_frame_address(2) - 4); + + unsigned long selfpc = + (unsigned long)(__builtin_return_address(1) - MCOUNT_INSN_SIZE); + + unsigned long frame_pointer = + (unsigned long)__builtin_frame_address(3); + + prepare_ftrace_return(parent_ip, selfpc, frame_pointer); +} + +extern unsigned long ftrace_return_to_handler(unsigned long frame_pointer); +void __naked return_to_handler(void) +{ + __asm__ __volatile__ ( + /* save state needed by the ABI */ + "smw.adm $r0,[$sp],$r1,#0x0 \n\t" + + /* get original return address */ + "move $r0, $fp \n\t" + "bal ftrace_return_to_handler\n\t" + "move $lp, $r0 \n\t" + + /* restore state nedded by the ABI */ + "lmw.bim $r0,[$sp],$r1,#0x0 \n\t"); +} +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ From fbf58a52ac088669dfa930e557d0303a9fbb7e17 Mon Sep 17 00:00:00 2001 From: Zong Li Date: Wed, 15 Aug 2018 10:57:16 +0800 Subject: [PATCH 117/269] nds32/ftrace: Add RECORD_MCOUNT support Recognize NDS32 object files in recordmcount.pl. Signed-off-by: Zong Li Acked-by: Greentime Hu Signed-off-by: Greentime Hu --- arch/nds32/Kconfig | 1 + scripts/recordmcount.pl | 3 +++ 2 files changed, 4 insertions(+) diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig index ea171a00327c..48d92171ea20 100644 --- a/arch/nds32/Kconfig +++ b/arch/nds32/Kconfig @@ -42,6 +42,7 @@ config NDS32 select THREAD_INFO_IN_TASK select HAVE_FUNCTION_TRACER select HAVE_FUNCTION_GRAPH_TRACER + select HAVE_FTRACE_MCOUNT_RECORD help Andes(nds32) Linux support. diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl index fe06e77c15eb..f599031260d5 100755 --- a/scripts/recordmcount.pl +++ b/scripts/recordmcount.pl @@ -389,6 +389,9 @@ if ($arch eq "x86_64") { $mcount_regex = "^\\s*([0-9a-fA-F]+):\\sR_RISCV_CALL\\s_mcount\$"; $type = ".quad"; $alignment = 2; +} elsif ($arch eq "nds32") { + $mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*R_NDS32_HI20_RELA\\s+_mcount\$"; + $alignment = 2; } else { die "Arch $arch is not supported with CONFIG_FTRACE_MCOUNT_RECORD"; } From 6b1d6d2fba37129f690ee7e9164f225c55626cac Mon Sep 17 00:00:00 2001 From: Zong Li Date: Wed, 15 Aug 2018 11:00:08 +0800 Subject: [PATCH 118/269] nds32/ftrace: Support dynamic function tracer This patch contains the implementation of dynamic function tracer. The mcount call is composed of three instructions, so there are three nop for enough placeholder. Signed-off-by: Zong Li Acked-by: Greentime Hu Signed-off-by: Greentime Hu --- arch/nds32/Kconfig | 1 + arch/nds32/include/asm/ftrace.h | 26 +++++ arch/nds32/kernel/ftrace.c | 164 ++++++++++++++++++++++++++++++++ 3 files changed, 191 insertions(+) diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig index 48d92171ea20..7068f341133d 100644 --- a/arch/nds32/Kconfig +++ b/arch/nds32/Kconfig @@ -43,6 +43,7 @@ config NDS32 select HAVE_FUNCTION_TRACER select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FTRACE_MCOUNT_RECORD + select HAVE_DYNAMIC_FTRACE help Andes(nds32) Linux support. diff --git a/arch/nds32/include/asm/ftrace.h b/arch/nds32/include/asm/ftrace.h index bac7657f576a..2f96cc96aa35 100644 --- a/arch/nds32/include/asm/ftrace.h +++ b/arch/nds32/include/asm/ftrace.h @@ -15,6 +15,32 @@ extern void _mcount(unsigned long parent_ip); +#ifdef CONFIG_DYNAMIC_FTRACE + +#define FTRACE_ADDR ((unsigned long)_ftrace_caller) + +#ifdef __NDS32_EL__ +#define INSN_NOP 0x09000040 +#define INSN_SIZE(insn) (((insn & 0x00000080) == 0) ? 4 : 2) +#define IS_SETHI(insn) ((insn & 0x000000fe) == 0x00000046) +#define ENDIAN_CONVERT(insn) be32_to_cpu(insn) +#else /* __NDS32_EB__ */ +#define INSN_NOP 0x40000009 +#define INSN_SIZE(insn) (((insn & 0x80000000) == 0) ? 4 : 2) +#define IS_SETHI(insn) ((insn & 0xfe000000) == 0x46000000) +#define ENDIAN_CONVERT(insn) (insn) +#endif + +extern void _ftrace_caller(unsigned long parent_ip); +static inline unsigned long ftrace_call_adjust(unsigned long addr) +{ + return addr; +} +struct dyn_arch_ftrace { +}; + +#endif /* CONFIG_DYNAMIC_FTRACE */ + #endif /* CONFIG_FUNCTION_TRACER */ #endif /* __ASM_NDS32_FTRACE_H */ diff --git a/arch/nds32/kernel/ftrace.c b/arch/nds32/kernel/ftrace.c index 707fce76522e..3ca676b75d97 100644 --- a/arch/nds32/kernel/ftrace.c +++ b/arch/nds32/kernel/ftrace.c @@ -4,6 +4,7 @@ #include #include +#ifndef CONFIG_DYNAMIC_FTRACE extern void (*ftrace_trace_function)(unsigned long, unsigned long, struct ftrace_ops*, struct pt_regs*); extern int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace); @@ -35,6 +36,168 @@ noinline void _mcount(unsigned long parent_ip) } EXPORT_SYMBOL(_mcount); +#else /* CONFIG_DYNAMIC_FTRACE */ + +noinline void __naked ftrace_stub(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct pt_regs *regs) +{ + __asm__ (""); /* avoid to optimize as pure function */ +} + +noinline void __naked _mcount(unsigned long parent_ip) +{ + __asm__ (""); /* avoid to optimize as pure function */ +} +EXPORT_SYMBOL(_mcount); + +#define XSTR(s) STR(s) +#define STR(s) #s +void _ftrace_caller(unsigned long parent_ip) +{ + /* save all state needed by the compiler prologue */ + + /* + * prepare arguments for real tracing function + * first arg : __builtin_return_address(0) - MCOUNT_INSN_SIZE + * second arg : parent_ip + */ + __asm__ __volatile__ ( + "move $r1, %0 \n\t" + "addi $r0, %1, #-" XSTR(MCOUNT_INSN_SIZE) "\n\t" + : + : "r" (parent_ip), "r" (__builtin_return_address(0))); + + /* a placeholder for the call to a real tracing function */ + __asm__ __volatile__ ( + "ftrace_call: \n\t" + "nop \n\t" + "nop \n\t" + "nop \n\t"); + + /* restore all state needed by the compiler epilogue */ +} + +int __init ftrace_dyn_arch_init(void) +{ + return 0; +} + +int ftrace_arch_code_modify_prepare(void) +{ + set_all_modules_text_rw(); + return 0; +} + +int ftrace_arch_code_modify_post_process(void) +{ + set_all_modules_text_ro(); + return 0; +} + +static unsigned long gen_sethi_insn(unsigned long addr) +{ + unsigned long opcode = 0x46000000; + unsigned long imm = addr >> 12; + unsigned long rt_num = 0xf << 20; + + return ENDIAN_CONVERT(opcode | rt_num | imm); +} + +static unsigned long gen_ori_insn(unsigned long addr) +{ + unsigned long opcode = 0x58000000; + unsigned long imm = addr & 0x0000fff; + unsigned long rt_num = 0xf << 20; + unsigned long ra_num = 0xf << 15; + + return ENDIAN_CONVERT(opcode | rt_num | ra_num | imm); +} + +static unsigned long gen_jral_insn(unsigned long addr) +{ + unsigned long opcode = 0x4a000001; + unsigned long rt_num = 0x1e << 20; + unsigned long rb_num = 0xf << 10; + + return ENDIAN_CONVERT(opcode | rt_num | rb_num); +} + +static void ftrace_gen_call_insn(unsigned long *call_insns, + unsigned long addr) +{ + call_insns[0] = gen_sethi_insn(addr); /* sethi $r15, imm20u */ + call_insns[1] = gen_ori_insn(addr); /* ori $r15, $r15, imm15u */ + call_insns[2] = gen_jral_insn(addr); /* jral $lp, $r15 */ +} + +static int __ftrace_modify_code(unsigned long pc, unsigned long *old_insn, + unsigned long *new_insn, bool validate) +{ + unsigned long orig_insn[3]; + + if (validate) { + if (probe_kernel_read(orig_insn, (void *)pc, MCOUNT_INSN_SIZE)) + return -EFAULT; + if (memcmp(orig_insn, old_insn, MCOUNT_INSN_SIZE)) + return -EINVAL; + } + + if (probe_kernel_write((void *)pc, new_insn, MCOUNT_INSN_SIZE)) + return -EPERM; + + return 0; +} + +static int ftrace_modify_code(unsigned long pc, unsigned long *old_insn, + unsigned long *new_insn, bool validate) +{ + int ret; + + ret = __ftrace_modify_code(pc, old_insn, new_insn, validate); + if (ret) + return ret; + + flush_icache_range(pc, pc + MCOUNT_INSN_SIZE); + + return ret; +} + +int ftrace_update_ftrace_func(ftrace_func_t func) +{ + unsigned long pc = (unsigned long)&ftrace_call; + unsigned long old_insn[3] = {INSN_NOP, INSN_NOP, INSN_NOP}; + unsigned long new_insn[3] = {INSN_NOP, INSN_NOP, INSN_NOP}; + + if (func != ftrace_stub) + ftrace_gen_call_insn(new_insn, (unsigned long)func); + + return ftrace_modify_code(pc, old_insn, new_insn, false); +} + +int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned long pc = rec->ip; + unsigned long nop_insn[3] = {INSN_NOP, INSN_NOP, INSN_NOP}; + unsigned long call_insn[3] = {INSN_NOP, INSN_NOP, INSN_NOP}; + + ftrace_gen_call_insn(call_insn, addr); + + return ftrace_modify_code(pc, nop_insn, call_insn, true); +} + +int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, + unsigned long addr) +{ + unsigned long pc = rec->ip; + unsigned long nop_insn[3] = {INSN_NOP, INSN_NOP, INSN_NOP}; + unsigned long call_insn[3] = {INSN_NOP, INSN_NOP, INSN_NOP}; + + ftrace_gen_call_insn(call_insn, addr); + + return ftrace_modify_code(pc, call_insn, nop_insn, true); +} +#endif /* CONFIG_DYNAMIC_FTRACE */ + #ifdef CONFIG_FUNCTION_GRAPH_TRACER void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, unsigned long frame_pointer) @@ -94,4 +257,5 @@ void __naked return_to_handler(void) /* restore state nedded by the ABI */ "lmw.bim $r0,[$sp],$r1,#0x0 \n\t"); } + #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ From 95cd2f7bce9aa712473bba1b5b3f4fdec148baee Mon Sep 17 00:00:00 2001 From: Zong Li Date: Wed, 15 Aug 2018 11:01:10 +0800 Subject: [PATCH 119/269] nds32/ftrace: Support dynamic function graph tracer This patch contains the implementation of dynamic function graph tracer. Signed-off-by: Zong Li Acked-by: Greentime Hu Signed-off-by: Greentime Hu --- arch/nds32/kernel/ftrace.c | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/arch/nds32/kernel/ftrace.c b/arch/nds32/kernel/ftrace.c index 3ca676b75d97..a646a8339052 100644 --- a/arch/nds32/kernel/ftrace.c +++ b/arch/nds32/kernel/ftrace.c @@ -74,6 +74,14 @@ void _ftrace_caller(unsigned long parent_ip) "nop \n\t" "nop \n\t"); +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + /* a placeholder for the call to ftrace_graph_caller */ + __asm__ __volatile__ ( + "ftrace_graph_call: \n\t" + "nop \n\t" + "nop \n\t" + "nop \n\t"); +#endif /* restore all state needed by the compiler epilogue */ } @@ -258,4 +266,32 @@ void __naked return_to_handler(void) "lmw.bim $r0,[$sp],$r1,#0x0 \n\t"); } +#ifdef CONFIG_DYNAMIC_FTRACE +extern unsigned long ftrace_graph_call; + +static int ftrace_modify_graph_caller(bool enable) +{ + unsigned long pc = (unsigned long)&ftrace_graph_call; + unsigned long nop_insn[3] = {INSN_NOP, INSN_NOP, INSN_NOP}; + unsigned long call_insn[3] = {INSN_NOP, INSN_NOP, INSN_NOP}; + + ftrace_gen_call_insn(call_insn, (unsigned long)ftrace_graph_caller); + + if (enable) + return ftrace_modify_code(pc, nop_insn, call_insn, true); + else + return ftrace_modify_code(pc, call_insn, nop_insn, true); +} + +int ftrace_enable_ftrace_graph_caller(void) +{ + return ftrace_modify_graph_caller(true); +} + +int ftrace_disable_ftrace_graph_caller(void) +{ + return ftrace_modify_graph_caller(false); +} +#endif /* CONFIG_DYNAMIC_FTRACE */ + #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ From 1e377ae9b04aef4dc531fa4c5f81b65d440ebcba Mon Sep 17 00:00:00 2001 From: Zong Li Date: Wed, 15 Aug 2018 11:05:40 +0800 Subject: [PATCH 120/269] nds32/stack: Get real return address by using ftrace_graph_ret_addr Function graph tracer has modified the return address to 'return_to_handler' on stack, and provide the 'ftrace_graph_ret_addr' to get the real return address. Signed-off-by: Zong Li Acked-by: Greentime Hu Signed-off-by: Greentime Hu --- arch/nds32/kernel/stacktrace.c | 4 ++++ arch/nds32/kernel/traps.c | 30 ++++++------------------------ 2 files changed, 10 insertions(+), 24 deletions(-) diff --git a/arch/nds32/kernel/stacktrace.c b/arch/nds32/kernel/stacktrace.c index 8b231e910ea6..36bc87003e83 100644 --- a/arch/nds32/kernel/stacktrace.c +++ b/arch/nds32/kernel/stacktrace.c @@ -4,6 +4,7 @@ #include #include #include +#include void save_stack_trace(struct stack_trace *trace) { @@ -16,6 +17,7 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) unsigned long *fpn; int skip = trace->skip; int savesched; + int graph_idx = 0; if (tsk == current) { __asm__ __volatile__("\tori\t%0, $fp, #0\n":"=r"(fpn)); @@ -33,6 +35,8 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) fpp = fpn[FP_OFFSET]; if (!__kernel_text_address(lpp)) break; + else + lpp = ftrace_graph_ret_addr(tsk, &graph_idx, lpp, NULL); if (savesched || !in_sched_functions(lpp)) { if (skip) { diff --git a/arch/nds32/kernel/traps.c b/arch/nds32/kernel/traps.c index f0e974347c26..7684c8f597ed 100644 --- a/arch/nds32/kernel/traps.c +++ b/arch/nds32/kernel/traps.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -94,28 +95,6 @@ static void dump_instr(struct pt_regs *regs) set_fs(fs); } -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -#include -static void -get_real_ret_addr(unsigned long *addr, struct task_struct *tsk, int *graph) -{ - if (*addr == (unsigned long)return_to_handler) { - int index = tsk->curr_ret_stack; - - if (tsk->ret_stack && index >= *graph) { - index -= *graph; - *addr = tsk->ret_stack[index].ret; - (*graph)++; - } - } -} -#else -static inline void -get_real_ret_addr(unsigned long *addr, struct task_struct *tsk, int *graph) -{ -} -#endif - #define LOOP_TIMES (100) static void __dump(struct task_struct *tsk, unsigned long *base_reg) { @@ -126,7 +105,8 @@ static void __dump(struct task_struct *tsk, unsigned long *base_reg) while (!kstack_end(base_reg)) { ret_addr = *base_reg++; if (__kernel_text_address(ret_addr)) { - get_real_ret_addr(&ret_addr, tsk, &graph); + ret_addr = ftrace_graph_ret_addr( + tsk, &graph, ret_addr, NULL); print_ip_sym(ret_addr); } if (--cnt < 0) @@ -145,7 +125,9 @@ static void __dump(struct task_struct *tsk, unsigned long *base_reg) next_fp = base_reg[FP_OFFSET]; #endif if (__kernel_text_address(ret_addr)) { - get_real_ret_addr(&ret_addr, tsk, &graph); + + ret_addr = ftrace_graph_ret_addr( + tsk, &graph, ret_addr, NULL); print_ip_sym(ret_addr); } if (--cnt < 0) From c5fdf7e00d490dc094a97d287e0fa27e253cca84 Mon Sep 17 00:00:00 2001 From: Zong Li Date: Mon, 20 Aug 2018 09:40:08 +0800 Subject: [PATCH 121/269] nds32: Remove the deprecated ABI implementation We are not using NDS32 ABI 2 for now, just remove the preprocessor directives __NDS32_ABI_2. Signed-off-by: Zong Li Acked-by: Greentime Hu Signed-off-by: Greentime Hu --- arch/nds32/kernel/traps.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/nds32/kernel/traps.c b/arch/nds32/kernel/traps.c index 7684c8f597ed..f432310f3d02 100644 --- a/arch/nds32/kernel/traps.c +++ b/arch/nds32/kernel/traps.c @@ -117,13 +117,8 @@ static void __dump(struct task_struct *tsk, unsigned long *base_reg) !((unsigned long)base_reg & 0x3) && ((unsigned long)base_reg >= TASK_SIZE)) { unsigned long next_fp; -#if !defined(__NDS32_ABI_2) - ret_addr = base_reg[0]; - next_fp = base_reg[1]; -#else ret_addr = base_reg[-1]; next_fp = base_reg[FP_OFFSET]; -#endif if (__kernel_text_address(ret_addr)) { ret_addr = ftrace_graph_ret_addr( From 95f93ed7fe92c16f5346e477491d91e4fa8e92b8 Mon Sep 17 00:00:00 2001 From: Zong Li Date: Mon, 20 Aug 2018 09:51:29 +0800 Subject: [PATCH 122/269] nds32: Add macro definition for offset of lp register on stack Use macro to replace the magic number. Signed-off-by: Zong Li Acked-by: Greentime Hu Signed-off-by: Greentime Hu --- arch/nds32/include/asm/nds32.h | 1 + arch/nds32/kernel/stacktrace.c | 2 +- arch/nds32/kernel/traps.c | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/nds32/include/asm/nds32.h b/arch/nds32/include/asm/nds32.h index 19b19394a936..68c38151c3e4 100644 --- a/arch/nds32/include/asm/nds32.h +++ b/arch/nds32/include/asm/nds32.h @@ -17,6 +17,7 @@ #else #define FP_OFFSET (-2) #endif +#define LP_OFFSET (-1) extern void __init early_trap_init(void); static inline void GIE_ENABLE(void) diff --git a/arch/nds32/kernel/stacktrace.c b/arch/nds32/kernel/stacktrace.c index 36bc87003e83..d974c0c1c65f 100644 --- a/arch/nds32/kernel/stacktrace.c +++ b/arch/nds32/kernel/stacktrace.c @@ -31,7 +31,7 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) && (fpn >= (unsigned long *)TASK_SIZE)) { unsigned long lpp, fpp; - lpp = fpn[-1]; + lpp = fpn[LP_OFFSET]; fpp = fpn[FP_OFFSET]; if (!__kernel_text_address(lpp)) break; diff --git a/arch/nds32/kernel/traps.c b/arch/nds32/kernel/traps.c index f432310f3d02..b0b85b7ab079 100644 --- a/arch/nds32/kernel/traps.c +++ b/arch/nds32/kernel/traps.c @@ -117,7 +117,7 @@ static void __dump(struct task_struct *tsk, unsigned long *base_reg) !((unsigned long)base_reg & 0x3) && ((unsigned long)base_reg >= TASK_SIZE)) { unsigned long next_fp; - ret_addr = base_reg[-1]; + ret_addr = base_reg[LP_OFFSET]; next_fp = base_reg[FP_OFFSET]; if (__kernel_text_address(ret_addr)) { From 487c4b2323b26cfb5fd4d77d3605a92c182b6288 Mon Sep 17 00:00:00 2001 From: Greentime Hu Date: Thu, 23 Aug 2018 14:47:43 +0800 Subject: [PATCH 123/269] nds32: Only print one page of stack when die to prevent printing too much information. It may print too much information sometimes if the stack is wrong or too big. This patch can limit the debug information in a page of stack. Signed-off-by: Greentime Hu --- arch/nds32/kernel/traps.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/nds32/kernel/traps.c b/arch/nds32/kernel/traps.c index b0b85b7ab079..1496aab48998 100644 --- a/arch/nds32/kernel/traps.c +++ b/arch/nds32/kernel/traps.c @@ -173,11 +173,10 @@ void die(const char *str, struct pt_regs *regs, int err) pr_emerg("CPU: %i\n", smp_processor_id()); show_regs(regs); pr_emerg("Process %s (pid: %d, stack limit = 0x%p)\n", - tsk->comm, tsk->pid, task_thread_info(tsk) + 1); + tsk->comm, tsk->pid, end_of_stack(tsk)); if (!user_mode(regs) || in_interrupt()) { - dump_mem("Stack: ", regs->sp, - THREAD_SIZE + (unsigned long)task_thread_info(tsk)); + dump_mem("Stack: ", regs->sp, (regs->sp + PAGE_SIZE) & PAGE_MASK); dump_instr(regs); dump_stack(); } From 0cde56e0280d70ce26b54d22131944c2fe584b38 Mon Sep 17 00:00:00 2001 From: Greentime Hu Date: Thu, 23 Aug 2018 15:05:46 +0800 Subject: [PATCH 124/269] nds32: Fix a kernel panic issue because of wrong frame pointer access. It can make sure that trace_hardirqs_off/trace_hardirqs_on can get a correct return address by frame pointer through __builtin_return_address() in this fix. Unable to handle kernel paging request at virtual address fffffffc pgd = 3c42e9cf [fffffffc] *pgd=02a9c000 Internal error: Oops: 1 [#1] Modules linked in: CPU: 0 PC is at trace_hardirqs_off+0x78/0xec LP is at common_exception_handler+0xda/0xf4 pc : [] lp : [] Tainted: G W sp : ada60ab0 fp : efcaff48 gp : 3a020490 r25: efcb0000 r24: 00000000 r23: 00000000 r22: 00000000 r21: 00000000 r20: 000700c1 r19: 000700ca r18: 3a21b018 r17: 00000001 r16: 00000002 r15: 00000001 r14: 0000002a r13: 3a00a804 r12: ada60ab0 r11: 3a113af8 r10: 3a01c530 r9 : 3a124404 r8 : 00120f9c r7 : b2352eba r6 : 00000000 r5 : 3a126b58 r4 : 00000000 r3 : 3a1726a8 r2 : b2921000 r1 : 00000000 r0 : 00000000 IRQs off Segment user Process init (pid: 1, stack limit = 0x069d7f15) Stack: (0xada60ab0 to 0xada61000) Stack: 0aa0: 00000000 00000003 3a110000 0011f000 Stack: 0ac0: 00000005 00000000 00000000 00000000 ada60b10 3a01fe68 ada60b0c ada60b08 Stack: 0ae0: 00000000 ada60ab8 ada60b30 3a020550 00000000 00000001 3a11c2f8 3a01c6e8 Stack: 0b00: 3a01cb80 fffffba8 3a113af8 3a21b018 3a122c28 00003ec4 00000165 00000000 Stack: 0b20: 3a126aec 0000006c 00000000 00000001 3a01fe68 00000000 00000003 00000000 Stack: 0b40: 00000001 000003f8 3a020930 3a01c530 00000008 ada60c18 3a020490 3a003120 Stack: 0b60: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 Stack: 0b80: 00000000 00000000 00000000 00000000 ffff8000 00000000 00000000 00000000 Stack: 0ba0: 00000000 00000001 3a020550 00000000 3a01d020 00000000 fffff000 fffff000 Stack: 0bc0: 00000000 00000000 00000000 00000000 ada60f2c 00000000 00000001 00000000 Stack: 0be0: 00000000 00000000 3a01fe68 fffffab0 00008034 00000008 3a0010cc 3a01fe68 Stack: 0c00: 00000000 00000000 00000001 ada60c88 3a020490 3a0139d4 0009dc6f 00000000 Stack: 0c20: 00000000 00000000 ada60fce fffff000 00000000 0000ebe0 3a020038 3a020550 Stack: 0c40: ada60f20 ada60c90 3a0007f0 3a0002a8 ada60c8c 00000000 00000000 ada60c88 Stack: 0c60: 3a020490 3a004570 00000000 00000000 ada60f20 3a0007f0 3a000000 00000000 Stack: 0c80: 3a020490 3a004850 00000000 3a013f24 3a000000 00000000 3a01ff44 00000000 Stack: 0ca0: 00000000 00000000 00000000 00000000 00000000 00000000 3a01ff84 3a01ff7c Stack: 0cc0: 3a01ff4c 3a01ff5c 3a01ff64 3a01ff9c 3a01ffa4 3a01ffac 3a01ff6c 3a01ff74 Stack: 0ce0: 00000000 00000000 3a01ff44 00000000 00000000 00000000 00000000 00000000 Stack: 0d00: 3a01ff8c 00000000 00000000 3a01ff94 00000000 00000000 00000000 00000000 Stack: 0d20: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 Stack: 0d40: 3a01ffbc 3a01ffb4 00000000 00000000 00000000 00000000 00000000 00000000 Stack: 0d60: 00000000 00000000 00000000 00000000 00000000 3a01ffc4 00000000 00000000 Stack: 0d80: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 Stack: 0da0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 Stack: 0dc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 3a01ff54 Stack: 0de0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 Stack: 0e00: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 Stack: 0e20: 00000000 00000004 00000000 00000000 00000000 00000000 00000000 00000000 Stack: 0e40: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 Stack: 0e60: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 Stack: 0e80: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 Stack: 0ea0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 Stack: 0ec0: 00000000 00000000 00000000 00000000 ffffffff 00000000 00000000 00000000 Stack: 0ee0: 00000000 00000000 00000000 00000000 ada60f20 00000000 00000000 00000000 Stack: 0f00: 00000000 00000000 00000000 00000000 00000000 00000000 3a020490 3a000b24 Stack: 0f20: 00000001 ada60fde 00000000 ada60fe4 ada60feb 00000000 00000021 3a038000 Stack: 0f40: 00000010 0009dc6f 00000006 00001000 00000011 00000064 00000003 00008034 Stack: 0f60: 00000004 00000020 00000005 00000008 00000007 3a000000 00000008 00000000 Stack: 0f80: 00000009 0000ebe0 0000000b 00000000 0000000c 00000000 0000000d 00000000 Stack: 0fa0: 0000000e 00000000 00000017 00000000 00000019 ada60fce 0000001f ada60ff6 Stack: 0fc0: 00000000 00000000 00000000 b5010000 fa839914 23b5dd89 a2aea540 692fc82e Stack: 0fe0: 0074696e 454d4f48 54002f3d 3d4d5245 756e696c 692f0078 0074696e 00000000 CPU: 0 PID: 1 Comm: init Tainted: G W 4.18.0-00015-g1888b64a2558-dirty #112 Hardware name: andestech,ae3xx (DT) Call Trace: [] dump_stack+0x2c/0x38 [] die+0x128/0x18c [] do_page_fault+0x3b8/0x4e0 [] ret_from_exception+0x0/0x10 [] common_exception_handler+0xda/0xf4 Signed-off-by: Greentime Hu --- arch/nds32/kernel/ex-entry.S | 2 +- arch/nds32/kernel/ex-exit.S | 4 ++-- arch/nds32/kernel/ftrace.c | 12 ++++++++++++ 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/arch/nds32/kernel/ex-entry.S b/arch/nds32/kernel/ex-entry.S index b8ae4e9a6b93..21a144071566 100644 --- a/arch/nds32/kernel/ex-entry.S +++ b/arch/nds32/kernel/ex-entry.S @@ -118,7 +118,7 @@ common_exception_handler: /* interrupt */ 2: #ifdef CONFIG_TRACE_IRQFLAGS - jal trace_hardirqs_off + jal __trace_hardirqs_off #endif move $r0, $sp sethi $lp, hi20(ret_from_intr) diff --git a/arch/nds32/kernel/ex-exit.S b/arch/nds32/kernel/ex-exit.S index 03e4f7788a18..f00af92f7e22 100644 --- a/arch/nds32/kernel/ex-exit.S +++ b/arch/nds32/kernel/ex-exit.S @@ -138,8 +138,8 @@ no_work_pending: #ifdef CONFIG_TRACE_IRQFLAGS lwi $p0, [$sp+(#IPSW_OFFSET)] andi $p0, $p0, #0x1 - la $r10, trace_hardirqs_off - la $r9, trace_hardirqs_on + la $r10, __trace_hardirqs_off + la $r9, __trace_hardirqs_on cmovz $r9, $p0, $r10 jral $r9 #endif diff --git a/arch/nds32/kernel/ftrace.c b/arch/nds32/kernel/ftrace.c index a646a8339052..a0a9679ad5de 100644 --- a/arch/nds32/kernel/ftrace.c +++ b/arch/nds32/kernel/ftrace.c @@ -295,3 +295,15 @@ int ftrace_disable_ftrace_graph_caller(void) #endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + + +#ifdef CONFIG_TRACE_IRQFLAGS +noinline void __trace_hardirqs_off(void) +{ + trace_hardirqs_off(); +} +noinline void __trace_hardirqs_on(void) +{ + trace_hardirqs_on(); +} +#endif /* CONFIG_TRACE_IRQFLAGS */ From ec865393292f5ad8d52da20788b3685ebce44c48 Mon Sep 17 00:00:00 2001 From: Greentime Hu Date: Tue, 28 Aug 2018 16:07:39 +0800 Subject: [PATCH 125/269] nds32: fix build error because of wrong semicolon It shall be removed in the define usage. We shall not put a semicolon there. /kisskb/src/arch/nds32/include/asm/elf.h:126:29: error: expected '}' before ';' token #define ELF_DATA ELFDATA2LSB; ^ /kisskb/src/fs/proc/kcore.c:318:17: note: in expansion of macro 'ELF_DATA' [EI_DATA] = ELF_DATA, ^~~~~~~~ /kisskb/src/fs/proc/kcore.c:312:15: note: to match this '{' .e_ident = { ^ /kisskb/src/scripts/Makefile.build:307: recipe for target 'fs/proc/kcore.o' failed Signed-off-by: Greentime Hu --- arch/nds32/include/asm/elf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/nds32/include/asm/elf.h b/arch/nds32/include/asm/elf.h index 56c479058802..f5f9cf7e0544 100644 --- a/arch/nds32/include/asm/elf.h +++ b/arch/nds32/include/asm/elf.h @@ -121,9 +121,9 @@ struct elf32_hdr; */ #define ELF_CLASS ELFCLASS32 #ifdef __NDS32_EB__ -#define ELF_DATA ELFDATA2MSB; +#define ELF_DATA ELFDATA2MSB #else -#define ELF_DATA ELFDATA2LSB; +#define ELF_DATA ELFDATA2LSB #endif #define ELF_ARCH EM_NDS32 #define USE_ELF_CORE_DUMP From a11bdb1a6b782ee97587f92fae798efc78c31093 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Thu, 30 Aug 2018 10:13:55 +0200 Subject: [PATCH 126/269] KVM: s390: Fix pfmf and conditional skey emulation We should not return with a lock. We also have to increase the address when we do page clearing. Fixes: bd096f644319 ("KVM: s390: Add skey emulation fault handling") Signed-off-by: Janosch Frank Message-Id: <20180830081355.59234-1-frankja@linux.ibm.com> Reviewed-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/kvm/priv.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index d68f10441a16..8679bd74d337 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -280,9 +280,11 @@ retry: goto retry; } } - if (rc) - return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); up_read(¤t->mm->mmap_sem); + if (rc == -EFAULT) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + if (rc < 0) + return rc; vcpu->run->s.regs.gprs[reg1] &= ~0xff; vcpu->run->s.regs.gprs[reg1] |= key; return 0; @@ -324,9 +326,11 @@ retry: goto retry; } } - if (rc < 0) - return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); up_read(¤t->mm->mmap_sem); + if (rc == -EFAULT) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + if (rc < 0) + return rc; kvm_s390_set_psw_cc(vcpu, rc); return 0; } @@ -390,12 +394,12 @@ static int handle_sske(struct kvm_vcpu *vcpu) FAULT_FLAG_WRITE, &unlocked); rc = !rc ? -EAGAIN : rc; } + up_read(¤t->mm->mmap_sem); if (rc == -EFAULT) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - - up_read(¤t->mm->mmap_sem); - if (rc >= 0) - start += PAGE_SIZE; + if (rc < 0) + return rc; + start += PAGE_SIZE; } if (m3 & (SSKE_MC | SSKE_MR)) { @@ -1002,13 +1006,15 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) FAULT_FLAG_WRITE, &unlocked); rc = !rc ? -EAGAIN : rc; } + up_read(¤t->mm->mmap_sem); if (rc == -EFAULT) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - - up_read(¤t->mm->mmap_sem); - if (rc >= 0) - start += PAGE_SIZE; + if (rc == -EAGAIN) + continue; + if (rc < 0) + return rc; } + start += PAGE_SIZE; } if (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) { if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_64BIT) { From 204c97245612b6c255edf4e21e24d417c4a0c008 Mon Sep 17 00:00:00 2001 From: Pierre Morel Date: Thu, 23 Aug 2018 12:25:54 +0200 Subject: [PATCH 127/269] KVM: s390: vsie: copy wrapping keys to right place Copy the key mask to the right offset inside the shadow CRYCB Fixes: bbeaa58b3 ("KVM: s390: vsie: support aes dea wrapping keys") Signed-off-by: Pierre Morel Reviewed-by: David Hildenbrand Reviewed-by: Cornelia Huck Reviewed-by: Janosch Frank Cc: stable@vger.kernel.org # v4.8+ Message-Id: <1535019956-23539-2-git-send-email-pmorel@linux.ibm.com> Signed-off-by: Janosch Frank Signed-off-by: Christian Borntraeger --- arch/s390/kvm/vsie.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 63844b95c22c..a2b28cd1e3fe 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -173,7 +173,8 @@ static int shadow_crycb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) return set_validity_icpt(scb_s, 0x0039U); /* copy only the wrapping keys */ - if (read_guest_real(vcpu, crycb_addr + 72, &vsie_page->crycb, 56)) + if (read_guest_real(vcpu, crycb_addr + 72, + vsie_page->crycb.dea_wrapping_key_mask, 56)) return set_validity_icpt(scb_s, 0x0035U); scb_s->ecb3 |= ecb3_flags; From df88f3181f10565c6e3a89eb6f0f9e6afaaf15f1 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Thu, 30 Aug 2018 16:14:18 +0200 Subject: [PATCH 128/269] KVM: s390: Properly lock mm context allow_gmap_hpage_1m setting We have to do down_write on the mm semaphore to set a bitfield in the mm context. Signed-off-by: Janosch Frank Fixes: a4499382 ("KVM: s390: Add huge page enablement control") Reviewed-by: Christian Borntraeger Signed-off-by: Christian Borntraeger --- arch/s390/include/asm/mmu.h | 8 +++++++- arch/s390/kvm/kvm-s390.c | 2 ++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h index f31a15044c24..a8418e1379eb 100644 --- a/arch/s390/include/asm/mmu.h +++ b/arch/s390/include/asm/mmu.h @@ -16,7 +16,13 @@ typedef struct { unsigned long asce; unsigned long asce_limit; unsigned long vdso_base; - /* The mmu context allocates 4K page tables. */ + /* + * The following bitfields need a down_write on the mm + * semaphore when they are written to. As they are only + * written once, they can be read without a lock. + * + * The mmu context allocates 4K page tables. + */ unsigned int alloc_pgste:1; /* The mmu context uses extended page tables. */ unsigned int has_pgste:1; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 91ad4a9425c0..f69333fd2fa3 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -695,7 +695,9 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) r = -EINVAL; else { r = 0; + down_write(&kvm->mm->mmap_sem); kvm->mm->context.allow_gmap_hpage_1m = 1; + up_write(&kvm->mm->mmap_sem); /* * We might have to create fake 4k page * tables. To avoid that the hardware works on From 851a15114895c5bce163a6f2d57e0aa4658a1be4 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Mon, 3 Sep 2018 11:24:57 +0300 Subject: [PATCH 129/269] i2c: i801: fix DNV's SMBCTRL register offset DNV's iTCO is slightly different with SMBCTRL sitting at a different offset when compared to all other devices. Let's fix so that we can properly use iTCO watchdog. Fixes: 84d7f2ebd70d ("i2c: i801: Add support for Intel DNV") Cc: # v4.4+ Signed-off-by: Felipe Balbi Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-i801.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index 04b60a349d7e..c91e145ef5a5 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -140,6 +140,7 @@ #define SBREG_BAR 0x10 #define SBREG_SMBCTRL 0xc6000c +#define SBREG_SMBCTRL_DNV 0xcf000c /* Host status bits for SMBPCISTS */ #define SMBPCISTS_INTS BIT(3) @@ -1399,7 +1400,11 @@ static void i801_add_tco(struct i801_priv *priv) spin_unlock(&p2sb_spinlock); res = &tco_res[ICH_RES_MEM_OFF]; - res->start = (resource_size_t)base64_addr + SBREG_SMBCTRL; + if (pci_dev->device == PCI_DEVICE_ID_INTEL_DNV_SMBUS) + res->start = (resource_size_t)base64_addr + SBREG_SMBCTRL_DNV; + else + res->start = (resource_size_t)base64_addr + SBREG_SMBCTRL; + res->end = res->start + 3; res->flags = IORESOURCE_MEM; From bc811f05d77f47059c197a98b6ad242eb03999cb Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 4 Sep 2018 11:52:34 -0600 Subject: [PATCH 130/269] nbd: don't allow invalid blocksize settings syzbot reports a divide-by-zero off the NBD_SET_BLKSIZE ioctl. We need proper validation of the input here. Not just if it's zero, but also if the value is a power-of-2 and in a valid range. Add that. Cc: stable@vger.kernel.org Reported-by: syzbot Reviewed-by: Josef Bacik Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 3863c00372bb..14a51254c3db 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -1239,6 +1239,9 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, case NBD_SET_SOCK: return nbd_add_socket(nbd, arg, false); case NBD_SET_BLKSIZE: + if (!arg || !is_power_of_2(arg) || arg < 512 || + arg > PAGE_SIZE) + return -EINVAL; nbd_size_set(nbd, arg, div_s64(config->bytesize, arg)); return 0; From ec6adef5fbc3f140c70e7499fdad818acb3a46c6 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Tue, 4 Sep 2018 15:31:12 +0200 Subject: [PATCH 131/269] HID: multitouch: fix Elan panels with 2 input modes declaration When implementing commit 7f81c8db5489 ("HID: multitouch: simplify the settings of the various features"), I wrongly removed a test that made sure we never try to set the second InputMode feature to something else than 0. This broke badly some recent Elan panels that now forget to send the click button in some area of the touchpad. Link: https://bugzilla.kernel.org/show_bug.cgi?id=200899 Fixes: 7f81c8db5489 ("HID: multitouch: simplify the settings of the various features") Cc: stable@vger.kernel.org # v4.18+ Signed-off-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/hid-multitouch.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index 40fbb7c52723..88da991ef256 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -1375,7 +1375,8 @@ static bool mt_need_to_apply_feature(struct hid_device *hdev, struct hid_usage *usage, enum latency_mode latency, bool surface_switch, - bool button_switch) + bool button_switch, + bool *inputmode_found) { struct mt_device *td = hid_get_drvdata(hdev); struct mt_class *cls = &td->mtclass; @@ -1387,6 +1388,14 @@ static bool mt_need_to_apply_feature(struct hid_device *hdev, switch (usage->hid) { case HID_DG_INPUTMODE: + /* + * Some elan panels wrongly declare 2 input mode features, + * and silently ignore when we set the value in the second + * field. Skip the second feature and hope for the best. + */ + if (*inputmode_found) + return false; + if (cls->quirks & MT_QUIRK_FORCE_GET_FEATURE) { report_len = hid_report_len(report); buf = hid_alloc_report_buf(report, GFP_KERNEL); @@ -1402,6 +1411,7 @@ static bool mt_need_to_apply_feature(struct hid_device *hdev, } field->value[index] = td->inputmode_value; + *inputmode_found = true; return true; case HID_DG_CONTACTMAX: @@ -1439,6 +1449,7 @@ static void mt_set_modes(struct hid_device *hdev, enum latency_mode latency, struct hid_usage *usage; int i, j; bool update_report; + bool inputmode_found = false; rep_enum = &hdev->report_enum[HID_FEATURE_REPORT]; list_for_each_entry(rep, &rep_enum->report_list, list) { @@ -1457,7 +1468,8 @@ static void mt_set_modes(struct hid_device *hdev, enum latency_mode latency, usage, latency, surface_switch, - button_switch)) + button_switch, + &inputmode_found)) update_report = true; } } From 0d6c3011409135ea84e2a231b013a22017ff999a Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Tue, 4 Sep 2018 15:31:14 +0200 Subject: [PATCH 132/269] HID: core: fix grouping by application commit f07b3c1da92d ("HID: generic: create one input report per application type") was effectively the same as MULTI_INPUT: hidinput->report was never set, so hidinput_match_application() always returned null. Fix that by testing against the real application. Note that this breaks some old eGalax touchscreens that expect MULTI_INPUT instead of HID_QUIRK_INPUT_PER_APP. Enable this quirk for backward compatibility on all non-Win8 touchscreens. link: https://bugzilla.kernel.org/show_bug.cgi?id=200847 link: https://bugzilla.kernel.org/show_bug.cgi?id=200849 link: https://bugs.archlinux.org/task/59699 link: https://github.com/NixOS/nixpkgs/issues/45165 Cc: stable@vger.kernel.org # v4.18+ Signed-off-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/hid-input.c | 4 ++-- drivers/hid/hid-multitouch.c | 3 +++ include/linux/hid.h | 1 + 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index ac201817a2dd..a481eaf39e88 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -1582,6 +1582,7 @@ static struct hid_input *hidinput_allocate(struct hid_device *hid, input_dev->dev.parent = &hid->dev; hidinput->input = input_dev; + hidinput->application = application; list_add_tail(&hidinput->list, &hid->inputs); INIT_LIST_HEAD(&hidinput->reports); @@ -1677,8 +1678,7 @@ static struct hid_input *hidinput_match_application(struct hid_report *report) struct hid_input *hidinput; list_for_each_entry(hidinput, &hid->inputs, list) { - if (hidinput->report && - hidinput->report->application == report->application) + if (hidinput->application == report->application) return hidinput; } diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index 88da991ef256..da954f3f4da7 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -1697,6 +1697,9 @@ static int mt_probe(struct hid_device *hdev, const struct hid_device_id *id) */ hdev->quirks |= HID_QUIRK_INPUT_PER_APP; + if (id->group != HID_GROUP_MULTITOUCH_WIN_8) + hdev->quirks |= HID_QUIRK_MULTI_INPUT; + timer_setup(&td->release_timer, mt_expired_timeout, 0); ret = hid_parse(hdev); diff --git a/include/linux/hid.h b/include/linux/hid.h index 834e6461a690..d44a78362942 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -526,6 +526,7 @@ struct hid_input { const char *name; bool registered; struct list_head reports; /* the list of reports */ + unsigned int application; /* application usage for this input */ }; enum hid_type { From 2820a708d5a321342bef34e459fdc8679c30e20f Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Mon, 30 Jul 2018 19:26:34 +0300 Subject: [PATCH 133/269] ARC: dma [IOC] Enable per device io coherency So far the IOC treatment was global on ARC, being turned on (or off) for all devices in the system. With this patch, this can now be done per device using the "dma-coherent" DT property; IOW with this patch we can use both HW-coherent and regular DMA peripherals simultaneously. The changes involved are too many so enlisting the summary below: 1. common code calls ARC arch_setup_dma_ops() per device. 2. For coherent dma (IOC) it plugs in generic @dma_direct_ops which doesn't need any arch specific backend: No need for any explicit cache flushes or MMU mappings to provide for uncached access - dma_(map|sync)_single* return early as corresponding dma ops callbacks are NULL in generic code. So arch_sync_dma_*() -> dma_cache_*() need not handle the coherent dma case, hence drop ARC __dma_cache_*_ioc() which were no-op anyways 3. For noncoherent dma (non IOC) generic @dma_noncoherent_ops is used which in turns calls ARC specific routines - arch_dma_alloc() no longer checks for @ioc_enable since this is called only for !IOC case. Reviewed-by: Christoph Hellwig Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta [vgupta: rewrote changelog] --- arch/arc/include/asm/dma-mapping.h | 13 +++++++ arch/arc/mm/cache.c | 23 +++++-------- arch/arc/mm/dma.c | 54 ++++++++++++++++-------------- 3 files changed, 50 insertions(+), 40 deletions(-) create mode 100644 arch/arc/include/asm/dma-mapping.h diff --git a/arch/arc/include/asm/dma-mapping.h b/arch/arc/include/asm/dma-mapping.h new file mode 100644 index 000000000000..c946c0a83e76 --- /dev/null +++ b/arch/arc/include/asm/dma-mapping.h @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 +// (C) 2018 Synopsys, Inc. (www.synopsys.com) + +#ifndef ASM_ARC_DMA_MAPPING_H +#define ASM_ARC_DMA_MAPPING_H + +#include + +void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, + const struct iommu_ops *iommu, bool coherent); +#define arch_setup_dma_ops arch_setup_dma_ops + +#endif diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index 25c631942500..2d389cab46ba 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -65,7 +65,7 @@ char *arc_cache_mumbojumbo(int c, char *buf, int len) n += scnprintf(buf + n, len - n, "Peripherals\t: %#lx%s%s\n", perip_base, - IS_AVAIL3(ioc_exists, ioc_enable, ", IO-Coherency ")); + IS_AVAIL3(ioc_exists, ioc_enable, ", IO-Coherency (per-device) ")); return buf; } @@ -896,15 +896,6 @@ static void __dma_cache_wback_slc(phys_addr_t start, unsigned long sz) slc_op(start, sz, OP_FLUSH); } -/* - * DMA ops for systems with IOC - * IOC hardware snoops all DMA traffic keeping the caches consistent with - * memory - eliding need for any explicit cache maintenance of DMA buffers - */ -static void __dma_cache_wback_inv_ioc(phys_addr_t start, unsigned long sz) {} -static void __dma_cache_inv_ioc(phys_addr_t start, unsigned long sz) {} -static void __dma_cache_wback_ioc(phys_addr_t start, unsigned long sz) {} - /* * Exported DMA API */ @@ -1264,11 +1255,7 @@ void __init arc_cache_init_master(void) if (is_isa_arcv2() && ioc_enable) arc_ioc_setup(); - if (is_isa_arcv2() && ioc_enable) { - __dma_cache_wback_inv = __dma_cache_wback_inv_ioc; - __dma_cache_inv = __dma_cache_inv_ioc; - __dma_cache_wback = __dma_cache_wback_ioc; - } else if (is_isa_arcv2() && l2_line_sz && slc_enable) { + if (is_isa_arcv2() && l2_line_sz && slc_enable) { __dma_cache_wback_inv = __dma_cache_wback_inv_slc; __dma_cache_inv = __dma_cache_inv_slc; __dma_cache_wback = __dma_cache_wback_slc; @@ -1277,6 +1264,12 @@ void __init arc_cache_init_master(void) __dma_cache_inv = __dma_cache_inv_l1; __dma_cache_wback = __dma_cache_wback_l1; } + /* + * In case of IOC (say IOC+SLC case), pointers above could still be set + * but end up not being relevant as the first function in chain is not + * called at all for @dma_direct_ops + * arch_sync_dma_for_cpu() -> dma_cache_*() -> __dma_cache_*() + */ } void __ref arc_cache_init(void) diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c index ec47e6079f5d..c0b49399225d 100644 --- a/arch/arc/mm/dma.c +++ b/arch/arc/mm/dma.c @@ -6,20 +6,17 @@ * published by the Free Software Foundation. */ -/* - * DMA Coherent API Notes - * - * I/O is inherently non-coherent on ARC. So a coherent DMA buffer is - * implemented by accessing it using a kernel virtual address, with - * Cache bit off in the TLB entry. - * - * The default DMA address == Phy address which is 0x8000_0000 based. - */ - #include #include #include +/* + * ARCH specific callbacks for generic noncoherent DMA ops (dma/noncoherent.c) + * - hardware IOC not available (or "dma-coherent" not set for device in DT) + * - But still handle both coherent and non-coherent requests from caller + * + * For DMA coherent hardware (IOC) generic code suffices + */ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) { @@ -33,19 +30,7 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, if (!page) return NULL; - /* - * IOC relies on all data (even coherent DMA data) being in cache - * Thus allocate normal cached memory - * - * The gains with IOC are two pronged: - * -For streaming data, elides need for cache maintenance, saving - * cycles in flush code, and bus bandwidth as all the lines of a - * buffer need to be flushed out to memory - * -For coherent data, Read/Write to buffers terminate early in cache - * (vs. always going to memory - thus are faster) - */ - if ((is_isa_arcv2() && ioc_enable) || - (attrs & DMA_ATTR_NON_CONSISTENT)) + if (attrs & DMA_ATTR_NON_CONSISTENT) need_coh = 0; /* @@ -95,8 +80,7 @@ void arch_dma_free(struct device *dev, size_t size, void *vaddr, struct page *page = virt_to_page(paddr); int is_non_coh = 1; - is_non_coh = (attrs & DMA_ATTR_NON_CONSISTENT) || - (is_isa_arcv2() && ioc_enable); + is_non_coh = (attrs & DMA_ATTR_NON_CONSISTENT); if (PageHighMem(page) || !is_non_coh) iounmap((void __force __iomem *)vaddr); @@ -185,3 +169,23 @@ void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr, break; } } + +/* + * Plug in coherent or noncoherent dma ops + */ +void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, + const struct iommu_ops *iommu, bool coherent) +{ + /* + * IOC hardware snoops all DMA traffic keeping the caches consistent + * with memory - eliding need for any explicit cache maintenance of + * DMA buffers - so we can use dma_direct cache ops. + */ + if (is_isa_arcv2() && ioc_enable && coherent) { + set_dma_ops(dev, &dma_direct_ops); + dev_info(dev, "use dma_direct_ops cache ops\n"); + } else { + set_dma_ops(dev, &dma_noncoherent_ops); + dev_info(dev, "use dma_noncoherent_ops cache ops\n"); + } +} From 2b720e99a1297417b979bf4810a0f01d27133c48 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Mon, 30 Jul 2018 19:26:35 +0300 Subject: [PATCH 134/269] ARC: IOC: panic if both IOC and ZONE_HIGHMEM enabled Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta --- arch/arc/mm/cache.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index 2d389cab46ba..f2701c13a66b 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -1144,6 +1144,19 @@ noinline void __init arc_ioc_setup(void) { unsigned int ioc_base, mem_sz; + /* + * As for today we don't support both IOC and ZONE_HIGHMEM enabled + * simultaneously. This happens because as of today IOC aperture covers + * only ZONE_NORMAL (low mem) and any dma transactions outside this + * region won't be HW coherent. + * If we want to use both IOC and ZONE_HIGHMEM we can use + * bounce_buffer to handle dma transactions to HIGHMEM. + * Also it is possible to modify dma_direct cache ops or increase IOC + * aperture size if we are planning to use HIGHMEM without PAE. + */ + if (IS_ENABLED(CONFIG_HIGHMEM)) + panic("IOC and HIGHMEM can't be used simultaneously"); + /* Flush + invalidate + disable L1 dcache */ __dc_disable(); From dd45210b6dd4f1512eafcc41774154ebb762360f Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Mon, 30 Jul 2018 19:26:36 +0300 Subject: [PATCH 135/269] ARC: don't check for HIGHMEM pages in arch_dma_alloc __GFP_HIGHMEM flag is cleared by upper layer functions (in include/linux/dma-mapping.h) so we'll never get a __GFP_HIGHMEM flag in arch_dma_alloc gfp argument. That's why alloc_pages will never return highmem page here. Get rid of highmem pages handling and cleanup arch_dma_alloc and arch_dma_free functions. Reviewed-by: Christoph Hellwig Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta --- arch/arc/mm/dma.c | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c index c0b49399225d..c75d5c3470e3 100644 --- a/arch/arc/mm/dma.c +++ b/arch/arc/mm/dma.c @@ -24,30 +24,29 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, struct page *page; phys_addr_t paddr; void *kvaddr; - int need_coh = 1, need_kvaddr = 0; + bool need_coh = !(attrs & DMA_ATTR_NON_CONSISTENT); + + /* + * __GFP_HIGHMEM flag is cleared by upper layer functions + * (in include/linux/dma-mapping.h) so we should never get a + * __GFP_HIGHMEM here. + */ + BUG_ON(gfp & __GFP_HIGHMEM); page = alloc_pages(gfp, order); if (!page) return NULL; - if (attrs & DMA_ATTR_NON_CONSISTENT) - need_coh = 0; - - /* - * - A coherent buffer needs MMU mapping to enforce non-cachability - * - A highmem page needs a virtual handle (hence MMU mapping) - * independent of cachability - */ - if (PageHighMem(page) || need_coh) - need_kvaddr = 1; - /* This is linear addr (0x8000_0000 based) */ paddr = page_to_phys(page); *dma_handle = paddr; - /* This is kernel Virtual address (0x7000_0000 based) */ - if (need_kvaddr) { + /* + * A coherent buffer needs MMU mapping to enforce non-cachability. + * kvaddr is kernel Virtual address (0x7000_0000 based). + */ + if (need_coh) { kvaddr = ioremap_nocache(paddr, size); if (kvaddr == NULL) { __free_pages(page, order); @@ -78,11 +77,8 @@ void arch_dma_free(struct device *dev, size_t size, void *vaddr, { phys_addr_t paddr = dma_handle; struct page *page = virt_to_page(paddr); - int is_non_coh = 1; - is_non_coh = (attrs & DMA_ATTR_NON_CONSISTENT); - - if (PageHighMem(page) || !is_non_coh) + if (!(attrs & DMA_ATTR_NON_CONSISTENT)) iounmap((void __force __iomem *)vaddr); __free_pages(page, get_order(size)); From 0d23ba6034b9cf48b8918404367506da3e4b3ee5 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Mon, 3 Sep 2018 18:54:14 +0200 Subject: [PATCH 136/269] RDMA/ucma: check fd type in ucma_migrate_id() The current code grabs the private_data of whatever file descriptor userspace has supplied and implicitly casts it to a `struct ucma_file *`, potentially causing a type confusion. This is probably fine in practice because the pointer is only used for comparisons, it is never actually dereferenced; and even in the comparisons, it is unlikely that a file from another filesystem would have a ->private_data pointer that happens to also be valid in this context. But ->private_data is not always guaranteed to be a valid pointer to an object owned by the file's filesystem; for example, some filesystems just cram numbers in there. Check the type of the supplied file descriptor to be safe, analogous to how other places in the kernel do it. Fixes: 88314e4dda1e ("RDMA/cma: add support for rdma_migrate_id()") Signed-off-by: Jann Horn Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/ucma.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index ec8fb289621f..5f437d1570fb 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -124,6 +124,8 @@ static DEFINE_MUTEX(mut); static DEFINE_IDR(ctx_idr); static DEFINE_IDR(multicast_idr); +static const struct file_operations ucma_fops; + static inline struct ucma_context *_ucma_find_context(int id, struct ucma_file *file) { @@ -1581,6 +1583,10 @@ static ssize_t ucma_migrate_id(struct ucma_file *new_file, f = fdget(cmd.fd); if (!f.file) return -ENOENT; + if (f.file->f_op != &ucma_fops) { + ret = -EINVAL; + goto file_put; + } /* Validate current fd and prevent destruction of id. */ ctx = ucma_get_ctx(f.file->private_data, cmd.id); From e4ff3d22c11dd505353896cdcad0ee8f3251be68 Mon Sep 17 00:00:00 2001 From: Artemy Kovalyov Date: Tue, 28 Aug 2018 14:40:32 +0300 Subject: [PATCH 137/269] IB/core: Release object lock if destroy failed The object lock was supposed to always be released during destroy, but when the destruction retry series was integrated with the destroy series it created a failure path that missed the unlock. Keep with convention, if destroy fails the caller must undo all locking. Fixes: 87ad80abc70d ("IB/uverbs: Consolidate uobject destruction") Signed-off-by: Artemy Kovalyov Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/rdma_core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index 6eb64c6f0802..c4118bcd5103 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -882,6 +882,8 @@ static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, WARN_ON(uverbs_try_lock_object(obj, UVERBS_LOOKUP_WRITE)); if (!uverbs_destroy_uobject(obj, reason)) ret = 0; + else + atomic_set(&obj->usecnt, 0); } return ret; } From 308aa2b8f7b7db3332a7d41099fd37851fb793b2 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Fri, 31 Aug 2018 07:15:56 -0700 Subject: [PATCH 138/269] iw_cxgb4: only allow 1 flush on user qps Once the qp has been flushed, it cannot be flushed again. The user qp flush logic wasn't enforcing it however. The bug can cause touch-after-free crashes like: Unable to handle kernel paging request for data at address 0x000001ec Faulting instruction address: 0xc008000016069100 Oops: Kernel access of bad area, sig: 11 [#1] ... NIP [c008000016069100] flush_qp+0x80/0x480 [iw_cxgb4] LR [c00800001606cd6c] c4iw_modify_qp+0x71c/0x11d0 [iw_cxgb4] Call Trace: [c00800001606cd6c] c4iw_modify_qp+0x71c/0x11d0 [iw_cxgb4] [c00800001606e868] c4iw_ib_modify_qp+0x118/0x200 [iw_cxgb4] [c0080000119eae80] ib_security_modify_qp+0xd0/0x3d0 [ib_core] [c0080000119c4e24] ib_modify_qp+0xc4/0x2c0 [ib_core] [c008000011df0284] iwcm_modify_qp_err+0x44/0x70 [iw_cm] [c008000011df0fec] destroy_cm_id+0xcc/0x370 [iw_cm] [c008000011ed4358] rdma_destroy_id+0x3c8/0x520 [rdma_cm] [c0080000134b0540] ucma_close+0x90/0x1b0 [rdma_ucm] [c000000000444da4] __fput+0xe4/0x2f0 So fix flush_qp() to only flush the wq once. Cc: stable@vger.kernel.org Signed-off-by: Steve Wise Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/cxgb4/qp.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index b3203afa3b1d..347fe18b1a41 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -1685,6 +1685,12 @@ static void flush_qp(struct c4iw_qp *qhp) schp = to_c4iw_cq(qhp->ibqp.send_cq); if (qhp->ibqp.uobject) { + + /* for user qps, qhp->wq.flushed is protected by qhp->mutex */ + if (qhp->wq.flushed) + return; + + qhp->wq.flushed = 1; t4_set_wq_in_error(&qhp->wq, 0); t4_set_cq_in_error(&rchp->cq); spin_lock_irqsave(&rchp->comp_handler_lock, flag); From 3100dab2aa09dc6e082956e306fc9f81b3cc0f7a Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Tue, 4 Sep 2018 15:45:34 -0700 Subject: [PATCH 139/269] mm: memcontrol: print proper OOM header when no eligible victim left When the memcg OOM killer runs out of killable tasks, it currently prints a WARN with no further OOM context. This has caused some user confusion. Warnings indicate a kernel problem. In a reported case, however, the situation was triggered by a nonsensical memcg configuration (hard limit set to 0). But without any VM context this wasn't obvious from the report, and it took some back and forth on the mailing list to identify what is actually a trivial issue. Handle this OOM condition like we handle it in the global OOM killer: dump the full OOM context and tell the user we ran out of tasks. This way the user can identify misconfigurations easily by themselves and rectify the problem - without having to go through the hassle of running into an obscure but unsettling warning, finding the appropriate kernel mailing list and waiting for a kernel developer to remote-analyze that the memcg configuration caused this. If users cannot make sense of why the OOM killer was triggered or why it failed, they will still report it to the mailing list, we know that from experience. So in case there is an actual kernel bug causing this, kernel developers will very likely hear about it. Link: http://lkml.kernel.org/r/20180821160406.22578-1-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Acked-by: Michal Hocko Cc: Dmitry Vyukov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 2 -- mm/oom_kill.c | 13 ++++++++++--- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 4ead5a4817de..e79cb59552d9 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1701,8 +1701,6 @@ static enum oom_status mem_cgroup_oom(struct mem_cgroup *memcg, gfp_t mask, int if (mem_cgroup_out_of_memory(memcg, mask, order)) return OOM_SUCCESS; - WARN(1,"Memory cgroup charge failed because of no reclaimable memory! " - "This looks like a misconfiguration or a kernel bug."); return OOM_FAILED; } diff --git a/mm/oom_kill.c b/mm/oom_kill.c index b5b25e4dcbbb..95fbbc46f68f 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -1103,10 +1103,17 @@ bool out_of_memory(struct oom_control *oc) } select_bad_process(oc); - /* Found nothing?!?! Either we hang forever, or we panic. */ - if (!oc->chosen && !is_sysrq_oom(oc) && !is_memcg_oom(oc)) { + /* Found nothing?!?! */ + if (!oc->chosen) { dump_header(oc, NULL); - panic("Out of memory and no killable processes...\n"); + pr_warn("Out of memory and no killable processes...\n"); + /* + * If we got here due to an actual allocation at the + * system level, we cannot survive this and will enter + * an endless loop in the allocator. Bail out now. + */ + if (!is_sysrq_oom(oc) && !is_memcg_oom(oc)) + panic("System is deadlocked on memory\n"); } if (oc->chosen && oc->chosen != (void *)-1UL) oom_kill_process(oc, !is_memcg_oom(oc) ? "Out of memory" : From 79cc81057eef7ad846588976296ab0f266c1a7a5 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Tue, 4 Sep 2018 15:45:37 -0700 Subject: [PATCH 140/269] mm, oom: fix missing tlb_finish_mmu() in __oom_reap_task_mm(). Commit 93065ac753e4 ("mm, oom: distinguish blockable mode for mmu notifiers") has added an ability to skip over vmas with blockable mmu notifiers. This however didn't call tlb_finish_mmu as it should. As a result inc_tlb_flush_pending has been called without its pairing dec_tlb_flush_pending and all callers mm_tlb_flush_pending would flush even though this is not really needed. This alone is not harmful and it seems there shouldn't be any such callers for oom victims at all but there is no real reason to skip tlb_finish_mmu on early skip either so call it. [mhocko@suse.com: new changelog] Link: http://lkml.kernel.org/r/b752d1d5-81ad-7a35-2394-7870641be51c@i-love.sakura.ne.jp Signed-off-by: Tetsuo Handa Acked-by: Michal Hocko Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/oom_kill.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 95fbbc46f68f..f10aa5360616 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -522,6 +522,7 @@ bool __oom_reap_task_mm(struct mm_struct *mm) tlb_gather_mmu(&tlb, mm, start, end); if (mmu_notifier_invalidate_range_start_nonblock(mm, start, end)) { + tlb_finish_mmu(&tlb, start, end); ret = false; continue; } From 1ed0cc5a01a4d868d9907ce96468c4b4c6709556 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Tue, 4 Sep 2018 15:45:41 -0700 Subject: [PATCH 141/269] mm: respect arch_dup_mmap() return value Commit d70f2a14b72a ("include/linux/sched/mm.h: uninline mmdrop_async(), etc") ignored the return value of arch_dup_mmap(). As a result, on x86, a failure to duplicate the LDT (e.g. due to memory allocation error) would leave the duplicated memory mapping in an inconsistent state. Fix by using the return value, as it was before the change. Link: http://lkml.kernel.org/r/20180823051229.211856-1-namit@vmware.com Fixes: d70f2a14b72a4 ("include/linux/sched/mm.h: uninline mmdrop_async(), etc") Signed-off-by: Nadav Amit Acked-by: Michal Hocko Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/fork.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/fork.c b/kernel/fork.c index d896e9ca38b0..f0b58479534f 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -550,8 +550,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, goto out; } /* a new mm has just been created */ - arch_dup_mmap(oldmm, mm); - retval = 0; + retval = arch_dup_mmap(oldmm, mm); out: up_write(&mm->mmap_sem); flush_tlb_mm(oldmm); From b353756b2b71915e81ed41239292306622d08c9f Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Tue, 4 Sep 2018 15:45:44 -0700 Subject: [PATCH 142/269] kmemleak: always register debugfs file If kmemleak built in to the kernel, but is disabled by default, the debugfs file is never registered. Because of this, it is not possible to find out if the kernel is built with kmemleak support by checking for the presence of this file. To allow this, always register the file. After this patch, if the file doesn't exist, kmemleak is not available in the kernel. If writing "scan" or any other value than "clear" to this file results in EBUSY, then kmemleak is available but is disabled by default and can be activated via the kernel command line. Catalin: "that's also consistent with a late disabling of kmemleak when the debugfs entry sticks around." Link: http://lkml.kernel.org/r/20180824131220.19176-1-vincent.whitchurch@axis.com Signed-off-by: Vincent Whitchurch Acked-by: Catalin Marinas Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kmemleak.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 9a085d525bbc..17dd883198ae 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -2097,6 +2097,11 @@ static int __init kmemleak_late_init(void) kmemleak_initialized = 1; + dentry = debugfs_create_file("kmemleak", 0644, NULL, NULL, + &kmemleak_fops); + if (!dentry) + pr_warn("Failed to create the debugfs kmemleak file\n"); + if (kmemleak_error) { /* * Some error occurred and kmemleak was disabled. There is a @@ -2108,10 +2113,6 @@ static int __init kmemleak_late_init(void) return -ENOMEM; } - dentry = debugfs_create_file("kmemleak", 0644, NULL, NULL, - &kmemleak_fops); - if (!dentry) - pr_warn("Failed to create the debugfs kmemleak file\n"); mutex_lock(&scan_mutex); start_scan_thread(); mutex_unlock(&scan_mutex); From 904506562e0856f2535d876407d087c9459d345b Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Tue, 4 Sep 2018 15:45:48 -0700 Subject: [PATCH 143/269] tools/vm/slabinfo.c: fix sign-compare warning Currently we get the following compiler warning: slabinfo.c:854:22: warning: comparison between signed and unsigned integer expressions [-Wsign-compare] if (s->object_size < min_objsize) ^ due to the mismatch of signed/unsigned comparison. ->object_size and ->slab_size are never expected to be negative, so let's define them as unsigned int. [n-horiguchi@ah.jp.nec.com: convert everything - none of these can be negative] Link: http://lkml.kernel.org/r/20180826234947.GA9787@hori1.linux.bs1.fc.nec.co.jp Link: http://lkml.kernel.org/r/1535103134-20239-1-git-send-email-n-horiguchi@ah.jp.nec.com Signed-off-by: Naoya Horiguchi Reviewed-by: Andrew Morton Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/vm/slabinfo.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/vm/slabinfo.c b/tools/vm/slabinfo.c index f82c2eaa859d..334b16db0ebb 100644 --- a/tools/vm/slabinfo.c +++ b/tools/vm/slabinfo.c @@ -30,8 +30,8 @@ struct slabinfo { int alias; int refs; int aliases, align, cache_dma, cpu_slabs, destroy_by_rcu; - int hwcache_align, object_size, objs_per_slab; - int sanity_checks, slab_size, store_user, trace; + unsigned int hwcache_align, object_size, objs_per_slab; + unsigned int sanity_checks, slab_size, store_user, trace; int order, poison, reclaim_account, red_zone; unsigned long partial, objects, slabs, objects_partial, objects_total; unsigned long alloc_fastpath, alloc_slowpath; From 7ab660f8baecfe26c1c267fa8e64d2073feae2bb Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Tue, 4 Sep 2018 15:45:51 -0700 Subject: [PATCH 144/269] tools/vm/page-types.c: fix "defined but not used" warning debugfs_known_mountpoints[] is not used any more, so let's remove it. Link: http://lkml.kernel.org/r/1535102651-19418-1-git-send-email-n-horiguchi@ah.jp.nec.com Signed-off-by: Naoya Horiguchi Reviewed-by: Andrew Morton Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/vm/page-types.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c index 30cb0a0713ff..37908a83ddc2 100644 --- a/tools/vm/page-types.c +++ b/tools/vm/page-types.c @@ -159,12 +159,6 @@ static const char * const page_flag_names[] = { }; -static const char * const debugfs_known_mountpoints[] = { - "/sys/kernel/debug", - "/debug", - 0, -}; - /* * data structures */ From 04b8e946075d4582093e84f54dc1a004b227794d Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 4 Sep 2018 15:45:55 -0700 Subject: [PATCH 145/269] mm/util.c: improve kvfree() kerneldoc Scooped from an email from Matthew. Cc: Mike Rapoport Cc: Jonathan Corbet Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/util.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/mm/util.c b/mm/util.c index d2890a407332..9e3ebd2ef65f 100644 --- a/mm/util.c +++ b/mm/util.c @@ -435,11 +435,14 @@ void *kvmalloc_node(size_t size, gfp_t flags, int node) EXPORT_SYMBOL(kvmalloc_node); /** - * kvfree - free memory allocated with kvmalloc - * @addr: pointer returned by kvmalloc + * kvfree() - Free memory. + * @addr: Pointer to allocated memory. * - * If the memory is allocated from vmalloc area it is freed with vfree(). - * Otherwise kfree() is used. + * kvfree frees memory allocated by any of vmalloc(), kmalloc() or kvmalloc(). + * It is slightly more efficient to use kfree() or vfree() if you are certain + * that you know which one to use. + * + * Context: Any context except NMI. */ void kvfree(const void *addr) { From 464c7ffbcb164b2e5cebfa406b7fc6cdb7945344 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 4 Sep 2018 15:45:59 -0700 Subject: [PATCH 146/269] mm/hugetlb: filter out hugetlb pages if HUGEPAGE migration is not supported. When scanning for movable pages, filter out Hugetlb pages if hugepage migration is not supported. Without this we hit infinte loop in __offline_pages() where we do pfn = scan_movable_pages(start_pfn, end_pfn); if (pfn) { /* We have movable pages */ ret = do_migrate_range(pfn, end_pfn); goto repeat; } Fix this by checking hugepage_migration_supported both in has_unmovable_pages which is the primary backoff mechanism for page offlining and for consistency reasons also into scan_movable_pages because it doesn't make any sense to return a pfn to non-migrateable huge page. This issue was revealed by, but not caused by 72b39cfc4d75 ("mm, memory_hotplug: do not fail offlining too early"). Link: http://lkml.kernel.org/r/20180824063314.21981-1-aneesh.kumar@linux.ibm.com Fixes: 72b39cfc4d75 ("mm, memory_hotplug: do not fail offlining too early") Signed-off-by: Aneesh Kumar K.V Reported-by: Haren Myneni Acked-by: Michal Hocko Reviewed-by: Naoya Horiguchi Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory_hotplug.c | 3 ++- mm/page_alloc.c | 4 ++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 9eea6e809a4e..38d94b703e9d 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1333,7 +1333,8 @@ static unsigned long scan_movable_pages(unsigned long start, unsigned long end) if (__PageMovable(page)) return pfn; if (PageHuge(page)) { - if (page_huge_active(page)) + if (hugepage_migration_supported(page_hstate(page)) && + page_huge_active(page)) return pfn; else pfn = round_up(pfn + 1, diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 05e983f42316..89d2a2ab3fe6 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -7708,6 +7708,10 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count, * handle each tail page individually in migration. */ if (PageHuge(page)) { + + if (!hugepage_migration_supported(page_hstate(page))) + goto unmovable; + iter = round_up(iter + 1, 1< Date: Tue, 4 Sep 2018 15:46:02 -0700 Subject: [PATCH 147/269] ipc/shm: properly return EIDRM in shm_lock() When getting rid of the general ipc_lock(), this was missed furthermore, making the comment around the ipc object validity check bogus. Under EIDRM conditions, callers will in turn not see the error and continue with the operation. Link: http://lkml.kernel.org/r/20180824030920.GD3677@linux-r8p5 Link: http://lkml.kernel.org/r/20180823024051.GC13343@shao2-debian Fixes: 82061c57ce9 ("ipc: drop ipc_lock()") Signed-off-by: Davidlohr Bueso Reported-by: kernel test robot Cc: Manfred Spraul Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- ipc/shm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/ipc/shm.c b/ipc/shm.c index b0eb3757ab89..4cd402e4cfeb 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -199,6 +199,7 @@ static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id) } ipc_unlock_object(ipcp); + ipcp = ERR_PTR(-EIDRM); err: rcu_read_unlock(); /* From 328b5f417a4ac929e20d98b989a2babac66c8520 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 4 Sep 2018 15:46:06 -0700 Subject: [PATCH 148/269] checkpatch: add optional static const to blank line declarations test Using a static const struct definition as part of a series of declarations produces a false positive "Missing a blank line after declarations" for code like: WARNING: Missing a blank line after declarations #710: FILE: drivers/gpu/drm/tidss/tidss_scale_coefs.c:137: + int inc; + static const struct { So fix it. Link: http://lkml.kernel.org/r/5905126e70b0ed1781e49265fd5c49c5090d0223.camel@perches.com Signed-off-by: Joe Perches Reported-by: Jyri Sarha Cc: "Valkeinen, Tomi" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 5219280bf7ff..b4caee6e269c 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -3311,7 +3311,7 @@ sub process { # known declaration macros $sline =~ /^\+\s+$declaration_macros/ || # start of struct or union or enum - $sline =~ /^\+\s+(?:union|struct|enum|typedef)\b/ || + $sline =~ /^\+\s+(?:static\s+)?(?:const\s+)?(?:union|struct|enum|typedef)\b/ || # start or end of block or continuation of declaration $sline =~ /^\+\s+(?:$|[\{\}\.\#\"\?\:\(\[])/ || # bitfield continuation From 4e8346d0be889c7ab5eb2d3deedc18111d741e99 Mon Sep 17 00:00:00 2001 From: Mikhail Zaslonko Date: Tue, 4 Sep 2018 15:46:09 -0700 Subject: [PATCH 149/269] memory_hotplug: fix kernel_panic on offline page processing Within show_valid_zones() the function test_pages_in_a_zone() should be called for online memory blocks only. Otherwise it might lead to the VM_BUG_ON due to uninitialized struct pages (when CONFIG_DEBUG_VM_PGFLAGS kernel option is set): page dumped because: VM_BUG_ON_PAGE(PagePoisoned(p)) ------------[ cut here ]------------ Call Trace: ([<000000000038f91e>] test_pages_in_a_zone+0xe6/0x168) [<0000000000923472>] show_valid_zones+0x5a/0x1a8 [<0000000000900284>] dev_attr_show+0x3c/0x78 [<000000000046f6f0>] sysfs_kf_seq_show+0xd0/0x150 [<00000000003ef662>] seq_read+0x212/0x4b8 [<00000000003bf202>] __vfs_read+0x3a/0x178 [<00000000003bf3ca>] vfs_read+0x8a/0x148 [<00000000003bfa3a>] ksys_read+0x62/0xb8 [<0000000000bc2220>] system_call+0xdc/0x2d8 That VM_BUG_ON was triggered by the page poisoning introduced in mm/sparse.c with the git commit d0dc12e86b31 ("mm/memory_hotplug: optimize memory hotplug"). With the same commit the new 'nid' field has been added to the struct memory_block in order to store and later on derive the node id for offline pages (instead of accessing struct page which might be uninitialized). But one reference to nid in show_valid_zones() function has been overlooked. Fixed with current commit. Also, nr_pages will not be used any more after test_pages_in_a_zone() call, do not update it. Link: http://lkml.kernel.org/r/20180828090539.41491-1-zaslonko@linux.ibm.com Fixes: d0dc12e86b31 ("mm/memory_hotplug: optimize memory hotplug") Signed-off-by: Mikhail Zaslonko Acked-by: Michal Hocko Reviewed-by: Pavel Tatashin Cc: [4.17+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/memory.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/drivers/base/memory.c b/drivers/base/memory.c index c8a1cb0b6136..817320c7c4c1 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -416,26 +416,24 @@ static ssize_t show_valid_zones(struct device *dev, struct zone *default_zone; int nid; - /* - * The block contains more than one zone can not be offlined. - * This can happen e.g. for ZONE_DMA and ZONE_DMA32 - */ - if (!test_pages_in_a_zone(start_pfn, start_pfn + nr_pages, &valid_start_pfn, &valid_end_pfn)) - return sprintf(buf, "none\n"); - - start_pfn = valid_start_pfn; - nr_pages = valid_end_pfn - start_pfn; - /* * Check the existing zone. Make sure that we do that only on the * online nodes otherwise the page_zone is not reliable */ if (mem->state == MEM_ONLINE) { + /* + * The block contains more than one zone can not be offlined. + * This can happen e.g. for ZONE_DMA and ZONE_DMA32 + */ + if (!test_pages_in_a_zone(start_pfn, start_pfn + nr_pages, + &valid_start_pfn, &valid_end_pfn)) + return sprintf(buf, "none\n"); + start_pfn = valid_start_pfn; strcat(buf, page_zone(pfn_to_page(start_pfn))->name); goto out; } - nid = pfn_to_nid(start_pfn); + nid = mem->nid; default_zone = zone_for_pfn_range(MMOP_ONLINE_KEEP, nid, start_pfn, nr_pages); strcat(buf, default_zone->name); From 8a2336e549d385bb0b46880435b411df8d8200e8 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 4 Sep 2018 15:46:13 -0700 Subject: [PATCH 150/269] uapi/linux/keyctl.h: don't use C++ reserved keyword as a struct member name Since this header is in "include/uapi/linux/", apparently people want to use it in userspace programs -- even in C++ ones. However, the header uses a C++ reserved keyword ("private"), so change that to "dh_private" instead to allow the header file to be used in C++ userspace. Fixes https://bugzilla.kernel.org/show_bug.cgi?id=191051 Link: http://lkml.kernel.org/r/0db6c314-1ef4-9bfa-1baa-7214dd2ee061@infradead.org Fixes: ddbb41148724 ("KEYS: Add KEYCTL_DH_COMPUTE command") Signed-off-by: Randy Dunlap Reviewed-by: Andrew Morton Cc: David Howells Cc: James Morris Cc: "Serge E. Hallyn" Cc: Mat Martineau Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/keyctl.h | 2 +- security/keys/dh.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/keyctl.h b/include/uapi/linux/keyctl.h index 7b8c9e19bad1..910cc4334b21 100644 --- a/include/uapi/linux/keyctl.h +++ b/include/uapi/linux/keyctl.h @@ -65,7 +65,7 @@ /* keyctl structures */ struct keyctl_dh_params { - __s32 private; + __s32 dh_private; __s32 prime; __s32 base; }; diff --git a/security/keys/dh.c b/security/keys/dh.c index 711e89d8c415..3b602a1e27fa 100644 --- a/security/keys/dh.c +++ b/security/keys/dh.c @@ -300,7 +300,7 @@ long __keyctl_dh_compute(struct keyctl_dh_params __user *params, } dh_inputs.g_size = dlen; - dlen = dh_data_from_key(pcopy.private, &dh_inputs.key); + dlen = dh_data_from_key(pcopy.dh_private, &dh_inputs.key); if (dlen < 0) { ret = dlen; goto out2; From 62ec0d8c4f332dedf19d6fad15ddea639044d5fe Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 4 Sep 2018 15:46:16 -0700 Subject: [PATCH 151/269] mm: fix BUG_ON() in vmf_insert_pfn_pud() from VM_MIXEDMAP removal It looks like I missed the PUD path when doing VM_MIXEDMAP removal. This can be triggered by: 1. Boot with memmap=4G!8G 2. build ndctl with destructive flag on 3. make TESTS=device-dax check [ +0.000675] kernel BUG at mm/huge_memory.c:824! Applying the same change that was applied to vmf_insert_pfn_pmd() in the original patch. Link: http://lkml.kernel.org/r/153565957352.35524.1005746906902065126.stgit@djiang5-desk3.ch.intel.com Fixes: e1fb4a08649 ("dax: remove VM_MIXEDMAP for fsdax and device dax") Signed-off-by: Dave Jiang Reported-by: Vishal Verma Tested-by: Vishal Verma Acked-by: Jeff Moyer Reviewed-by: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/huge_memory.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index c3bc7e9c9a2a..533f9b00147d 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -821,11 +821,11 @@ vm_fault_t vmf_insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr, * but we need to be consistent with PTEs and architectures that * can't support a 'special' bit. */ - BUG_ON(!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))); + BUG_ON(!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) && + !pfn_t_devmap(pfn)); BUG_ON((vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) == (VM_PFNMAP|VM_MIXEDMAP)); BUG_ON((vma->vm_flags & VM_PFNMAP) && is_cow_mapping(vma->vm_flags)); - BUG_ON(!pfn_t_devmap(pfn)); if (addr < vma->vm_start || addr >= vma->vm_end) return VM_FAULT_SIGBUS; From c5967e989f1fe702e75d7405b9251ec7e490d847 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 4 Sep 2018 15:46:20 -0700 Subject: [PATCH 152/269] checkpatch: add __ro_after_init to known $Attribute __ro_after_init is a specific __attribute__ that checkpatch does currently not understand. Add it to the known $Attribute types so that code that uses variables declared with __ro_after_init are not thought to be a modifier type. This appears as a defect in checkpatch output of code like: static bool trust_cpu __ro_after_init = IS_ENABLED(CONFIG_RANDOM_TRUST_CPU); [...] if (trust_cpu && arch_init) { where checkpatch reports: ERROR: space prohibited after that '&&' (ctx:WxW) if (trust_cpu && arch_init) { Link: http://lkml.kernel.org/r/0fa8a2cb83ade4c525e18261ecf6cfede3015983.camel@perches.com Signed-off-by: Joe Perches Reported-by: Kees Cook Tested-by: Kees Cook Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index b4caee6e269c..161b0224d6ae 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -380,6 +380,7 @@ our $Attribute = qr{ __noclone| __deprecated| __read_mostly| + __ro_after_init| __kprobes| $InitAttribute| ____cacheline_aligned| From 4c5d114ea04d5b6c7009d46895ec26109aa654f3 Mon Sep 17 00:00:00 2001 From: Thibaut Sautereau Date: Tue, 4 Sep 2018 15:46:23 -0700 Subject: [PATCH 153/269] lib/Kconfig.debug: fix three typos in help text Fix three typos in CONFIG_WARN_ALL_UNSEEDED_RANDOM help text. Link: http://lkml.kernel.org/r/20180830194505.4778-1-thibaut@sautereau.fr Signed-off-by: Thibaut Sautereau Acked-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Kconfig.debug | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 613316724c6a..4966c4fbe7f7 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1277,13 +1277,13 @@ config WARN_ALL_UNSEEDED_RANDOM time. This is really bad from a security perspective, and so architecture maintainers really need to do what they can to get the CRNG seeded sooner after the system is booted. - However, since users can not do anything actionble to + However, since users cannot do anything actionable to address this, by default the kernel will issue only a single warning for the first use of unseeded randomness. Say Y here if you want to receive warnings for all uses of unseeded randomness. This will be of use primarily for - those developers interersted in improving the security of + those developers interested in improving the security of Linux kernels running on their architecture (or subarchitecture). From 36bdac1e674debd2714cb3e80eaa18266c2426e4 Mon Sep 17 00:00:00 2001 From: Souptick Joarder Date: Tue, 4 Sep 2018 15:46:26 -0700 Subject: [PATCH 154/269] drivers/dax/device.c: convert variable to vm_fault_t type As part of 226ab561075f ("device-dax: Convert to vmf_insert_mixed and vm_fault_t") in 4.19-rc1, 'rc' was not converted to vm_fault_t. Now converted. Link: http://lkml.kernel.org/r/20180830153813.GA26059@jordon-HP-15-Notebook-PC Signed-off-by: Souptick Joarder Cc: Dan Williams Cc: Dave Jiang Cc: Ross Zwisler Cc: Vishal Verma Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/dax/device.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/dax/device.c b/drivers/dax/device.c index 6fd46083e629..bbe4d72ca105 100644 --- a/drivers/dax/device.c +++ b/drivers/dax/device.c @@ -392,7 +392,8 @@ static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf, { struct file *filp = vmf->vma->vm_file; unsigned long fault_size; - int rc, id; + vm_fault_t rc = VM_FAULT_SIGBUS; + int id; pfn_t pfn; struct dev_dax *dev_dax = filp->private_data; From ae98043f5f7fa45b65084f70e3ada3209873ebb4 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Tue, 4 Sep 2018 15:46:30 -0700 Subject: [PATCH 155/269] nilfs2: convert to SPDX license tags Remove the verbose license text from NILFS2 files and replace them with SPDX tags. This does not change the license of any of the code. Link: http://lkml.kernel.org/r/1535624528-5982-1-git-send-email-konishi.ryusuke@lab.ntt.co.jp Signed-off-by: Ryusuke Konishi Reviewed-by: Andrew Morton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/alloc.c | 11 +---------- fs/nilfs2/alloc.h | 11 +---------- fs/nilfs2/bmap.c | 11 +---------- fs/nilfs2/bmap.h | 11 +---------- fs/nilfs2/btnode.c | 11 +---------- fs/nilfs2/btnode.h | 11 +---------- fs/nilfs2/btree.c | 11 +---------- fs/nilfs2/btree.h | 11 +---------- fs/nilfs2/cpfile.c | 11 +---------- fs/nilfs2/cpfile.h | 11 +---------- fs/nilfs2/dat.c | 11 +---------- fs/nilfs2/dat.h | 11 +---------- fs/nilfs2/dir.c | 11 +---------- fs/nilfs2/direct.c | 11 +---------- fs/nilfs2/direct.h | 11 +---------- fs/nilfs2/file.c | 11 +---------- fs/nilfs2/gcinode.c | 11 +---------- fs/nilfs2/ifile.c | 11 +---------- fs/nilfs2/ifile.h | 11 +---------- fs/nilfs2/inode.c | 11 +---------- fs/nilfs2/ioctl.c | 11 +---------- fs/nilfs2/mdt.c | 11 +---------- fs/nilfs2/mdt.h | 11 +---------- fs/nilfs2/namei.c | 11 +---------- fs/nilfs2/nilfs.h | 11 +---------- fs/nilfs2/page.c | 11 +---------- fs/nilfs2/page.h | 11 +---------- fs/nilfs2/recovery.c | 11 +---------- fs/nilfs2/segbuf.c | 11 +---------- fs/nilfs2/segbuf.h | 11 +---------- fs/nilfs2/segment.c | 11 +---------- fs/nilfs2/segment.h | 11 +---------- fs/nilfs2/sufile.c | 11 +---------- fs/nilfs2/sufile.h | 11 +---------- fs/nilfs2/super.c | 11 +---------- fs/nilfs2/sysfs.c | 11 +---------- fs/nilfs2/sysfs.h | 11 +---------- fs/nilfs2/the_nilfs.c | 11 +---------- fs/nilfs2/the_nilfs.h | 11 +---------- 39 files changed, 39 insertions(+), 390 deletions(-) diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c index 03b8ba933eb2..235b959fc2b3 100644 --- a/fs/nilfs2/alloc.c +++ b/fs/nilfs2/alloc.c @@ -1,18 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * alloc.c - NILFS dat/inode allocator * * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * * Originally written by Koji Sato. * Two allocators were unified by Ryusuke Konishi and Amagai Yoshiji. */ diff --git a/fs/nilfs2/alloc.h b/fs/nilfs2/alloc.h index 05149e606a78..0303c3968cee 100644 --- a/fs/nilfs2/alloc.h +++ b/fs/nilfs2/alloc.h @@ -1,18 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ /* * alloc.h - persistent object (dat entry/disk inode) allocator/deallocator * * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * * Originally written by Koji Sato. * Two allocators were unified by Ryusuke Konishi and Amagai Yoshiji. */ diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c index 01fb1831ca25..fb5a9a8a13cf 100644 --- a/fs/nilfs2/bmap.c +++ b/fs/nilfs2/bmap.c @@ -1,18 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * bmap.c - NILFS block mapping. * * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * * Written by Koji Sato. */ diff --git a/fs/nilfs2/bmap.h b/fs/nilfs2/bmap.h index 2b6ffbe5997a..2c63858e81c9 100644 --- a/fs/nilfs2/bmap.h +++ b/fs/nilfs2/bmap.h @@ -1,18 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ /* * bmap.h - NILFS block mapping. * * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * * Written by Koji Sato. */ diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c index dec98cab729d..ebb24a314f43 100644 --- a/fs/nilfs2/btnode.c +++ b/fs/nilfs2/btnode.c @@ -1,18 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * btnode.c - NILFS B-tree node cache * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * * Originally written by Seiji Kihara. * Fully revised by Ryusuke Konishi for stabilization and simplification. * diff --git a/fs/nilfs2/btnode.h b/fs/nilfs2/btnode.h index 4e8aaa1aeb65..0f88dbc9bcb3 100644 --- a/fs/nilfs2/btnode.h +++ b/fs/nilfs2/btnode.h @@ -1,18 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ /* * btnode.h - NILFS B-tree node cache * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * * Written by Seiji Kihara. * Revised by Ryusuke Konishi. */ diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index 16a7a67a11c9..23e043eca237 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -1,18 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * btree.c - NILFS B-tree. * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * * Written by Koji Sato. */ diff --git a/fs/nilfs2/btree.h b/fs/nilfs2/btree.h index 2184e47fa4bf..d1421b646ce4 100644 --- a/fs/nilfs2/btree.h +++ b/fs/nilfs2/btree.h @@ -1,18 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ /* * btree.h - NILFS B-tree. * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * * Written by Koji Sato. */ diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c index a15a1601e931..8d41311b5db4 100644 --- a/fs/nilfs2/cpfile.c +++ b/fs/nilfs2/cpfile.c @@ -1,18 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * cpfile.c - NILFS checkpoint file. * * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * * Written by Koji Sato. */ diff --git a/fs/nilfs2/cpfile.h b/fs/nilfs2/cpfile.h index 6eca972f9673..6336222df24a 100644 --- a/fs/nilfs2/cpfile.h +++ b/fs/nilfs2/cpfile.h @@ -1,18 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ /* * cpfile.h - NILFS checkpoint file. * * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * * Written by Koji Sato. */ diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c index dffedb2f8817..6f4066636be9 100644 --- a/fs/nilfs2/dat.c +++ b/fs/nilfs2/dat.c @@ -1,18 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * dat.c - NILFS disk address translation. * * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * * Written by Koji Sato. */ diff --git a/fs/nilfs2/dat.h b/fs/nilfs2/dat.h index 57dc6cf466d0..b17ee34580ae 100644 --- a/fs/nilfs2/dat.h +++ b/fs/nilfs2/dat.h @@ -1,18 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ /* * dat.h - NILFS disk address translation. * * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * * Written by Koji Sato. */ diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c index 582831ab3eb9..81394e22d0a0 100644 --- a/fs/nilfs2/dir.c +++ b/fs/nilfs2/dir.c @@ -1,18 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * dir.c - NILFS directory entry operations * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * * Modified for NILFS by Amagai Yoshiji. */ /* diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c index 96e3ed0d9652..533e24ea3a88 100644 --- a/fs/nilfs2/direct.c +++ b/fs/nilfs2/direct.c @@ -1,18 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * direct.c - NILFS direct block pointer. * * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * * Written by Koji Sato. */ diff --git a/fs/nilfs2/direct.h b/fs/nilfs2/direct.h index cfe85e848bba..ec9a23c77994 100644 --- a/fs/nilfs2/direct.h +++ b/fs/nilfs2/direct.h @@ -1,18 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ /* * direct.h - NILFS direct block pointer. * * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * * Written by Koji Sato. */ diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c index 7da0fac71dc2..64bc81363c6c 100644 --- a/fs/nilfs2/file.c +++ b/fs/nilfs2/file.c @@ -1,18 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * file.c - NILFS regular file handling primitives including fsync(). * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * * Written by Amagai Yoshiji and Ryusuke Konishi. */ diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c index 853a831dcde0..aa3c328ee189 100644 --- a/fs/nilfs2/gcinode.c +++ b/fs/nilfs2/gcinode.c @@ -1,18 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * gcinode.c - dummy inodes to buffer blocks for garbage collection * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * * Written by Seiji Kihara, Amagai Yoshiji, and Ryusuke Konishi. * Revised by Ryusuke Konishi. * diff --git a/fs/nilfs2/ifile.c b/fs/nilfs2/ifile.c index b8fa45c20c63..4140d232cadc 100644 --- a/fs/nilfs2/ifile.c +++ b/fs/nilfs2/ifile.c @@ -1,18 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * ifile.c - NILFS inode file * * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * * Written by Amagai Yoshiji. * Revised by Ryusuke Konishi. * diff --git a/fs/nilfs2/ifile.h b/fs/nilfs2/ifile.h index 188b94fe0ec5..a1e1e5711a05 100644 --- a/fs/nilfs2/ifile.h +++ b/fs/nilfs2/ifile.h @@ -1,18 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ /* * ifile.h - NILFS inode file * * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * * Written by Amagai Yoshiji. * Revised by Ryusuke Konishi. * diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 6a612d832e7d..671085512e0f 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -1,18 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * inode.c - NILFS inode operations. * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * * Written by Ryusuke Konishi. * */ diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 1d2c3d7711fe..9b96d79eea6c 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -1,18 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * ioctl.c - NILFS ioctl operations. * * Copyright (C) 2007, 2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * * Written by Koji Sato. */ diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index c6bc1033e7d2..700870a92bc4 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@ -1,18 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * mdt.c - meta data file for NILFS * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * * Written by Ryusuke Konishi. */ diff --git a/fs/nilfs2/mdt.h b/fs/nilfs2/mdt.h index 3f67f3932097..e77aea4bb921 100644 --- a/fs/nilfs2/mdt.h +++ b/fs/nilfs2/mdt.h @@ -1,18 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ /* * mdt.h - NILFS meta data file prototype and definitions * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * * Written by Ryusuke Konishi. */ diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index dd52d3f82e8d..9fe6d4ab74f0 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c @@ -1,18 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * namei.c - NILFS pathname lookup operations. * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * * Modified for NILFS by Amagai Yoshiji and Ryusuke Konishi. */ /* diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index 33f8c8fc96e8..a2f247b6a209 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -1,18 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ /* * nilfs.h - NILFS local header file. * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * * Written by Koji Sato and Ryusuke Konishi. */ diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c index 4cb850a6f1c2..329a056b73b1 100644 --- a/fs/nilfs2/page.c +++ b/fs/nilfs2/page.c @@ -1,18 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * page.c - buffer/page management specific to NILFS * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * * Written by Ryusuke Konishi and Seiji Kihara. */ diff --git a/fs/nilfs2/page.h b/fs/nilfs2/page.h index f3687c958fa8..62b9bb469e92 100644 --- a/fs/nilfs2/page.h +++ b/fs/nilfs2/page.h @@ -1,18 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ /* * page.h - buffer/page management specific to NILFS * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * * Written by Ryusuke Konishi and Seiji Kihara. */ diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c index 5139efed1888..140b663e91c7 100644 --- a/fs/nilfs2/recovery.c +++ b/fs/nilfs2/recovery.c @@ -1,18 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * recovery.c - NILFS recovery logic * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * * Written by Ryusuke Konishi. */ diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c index 68cb9e4740b4..20c479b5e41b 100644 --- a/fs/nilfs2/segbuf.c +++ b/fs/nilfs2/segbuf.c @@ -1,18 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * segbuf.c - NILFS segment buffer * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * * Written by Ryusuke Konishi. * */ diff --git a/fs/nilfs2/segbuf.h b/fs/nilfs2/segbuf.h index 10e16935fff6..9bea1bd59041 100644 --- a/fs/nilfs2/segbuf.h +++ b/fs/nilfs2/segbuf.h @@ -1,18 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ /* * segbuf.h - NILFS Segment buffer prototypes and definitions * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * * Written by Ryusuke Konishi. * */ diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 0953635e7d48..445eef41bfaf 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -1,18 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * segment.c - NILFS segment constructor. * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * * Written by Ryusuke Konishi. * */ diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h index 04634e3e3d58..f5cf5308f3fc 100644 --- a/fs/nilfs2/segment.h +++ b/fs/nilfs2/segment.h @@ -1,18 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ /* * segment.h - NILFS Segment constructor prototypes and definitions * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * * Written by Ryusuke Konishi. * */ diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index c7fa139d50e8..bf3f8f05c89b 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -1,18 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * sufile.c - NILFS segment usage file. * * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * * Written by Koji Sato. * Revised by Ryusuke Konishi. */ diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h index 673a891350f4..c4e2c7a7add1 100644 --- a/fs/nilfs2/sufile.h +++ b/fs/nilfs2/sufile.h @@ -1,18 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ /* * sufile.h - NILFS segment usage file. * * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * * Written by Koji Sato. */ diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 1b9067cf4511..26290aa1023f 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -1,18 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * super.c - NILFS module and super block management. * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * * Written by Ryusuke Konishi. */ /* diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c index 4b25837e7724..e60be7bb55b0 100644 --- a/fs/nilfs2/sysfs.c +++ b/fs/nilfs2/sysfs.c @@ -1,19 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * sysfs.c - sysfs support implementation. * * Copyright (C) 2005-2014 Nippon Telegraph and Telephone Corporation. * Copyright (C) 2014 HGST, Inc., a Western Digital Company. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * * Written by Vyacheslav Dubeyko */ diff --git a/fs/nilfs2/sysfs.h b/fs/nilfs2/sysfs.h index 648cedf9c06e..d001eb862dae 100644 --- a/fs/nilfs2/sysfs.h +++ b/fs/nilfs2/sysfs.h @@ -1,19 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ /* * sysfs.h - sysfs support declarations. * * Copyright (C) 2005-2014 Nippon Telegraph and Telephone Corporation. * Copyright (C) 2014 HGST, Inc., a Western Digital Company. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * * Written by Vyacheslav Dubeyko */ diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 1a85317e83f0..484785cdf96e 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -1,18 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * the_nilfs.c - the_nilfs shared structure. * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * * Written by Ryusuke Konishi. * */ diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h index 36da1779f976..380a543c5b19 100644 --- a/fs/nilfs2/the_nilfs.h +++ b/fs/nilfs2/the_nilfs.h @@ -1,18 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ /* * the_nilfs.h - the_nilfs shared structure. * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * * Written by Ryusuke Konishi. * */ From e866d3e84eb7c9588afb77604d417e8cc49fe216 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 28 Aug 2018 17:33:46 -0700 Subject: [PATCH 156/269] riscv: Do not overwrite initrd_start and initrd_end setup_initrd() overwrites initrd_start and initrd_end if __initramfs_size is larger than 0, which is always true even if there is no embedded initramfs. This prevents booting qemu with "-initrd" parameter. Overwriting initrd_start and initrd_end is not necessary since __initramfs_start and __initramfs_size are used directly in populate_rootfs() to load the built-in initramfs, so just drop that code. Signed-off-by: Guenter Roeck Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/setup.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index db20dc630e7e..aee603123030 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -85,15 +85,8 @@ atomic_t hart_lottery; #ifdef CONFIG_BLK_DEV_INITRD static void __init setup_initrd(void) { - extern char __initramfs_start[]; - extern unsigned long __initramfs_size; unsigned long size; - if (__initramfs_size > 0) { - initrd_start = (unsigned long)(&__initramfs_start); - initrd_end = initrd_start + __initramfs_size; - } - if (initrd_start >= initrd_end) { printk(KERN_INFO "initrd not found or empty"); goto disable; From 3350139c0ff3c95724b784f7109987d533cb3ecd Mon Sep 17 00:00:00 2001 From: Greentime Hu Date: Tue, 4 Sep 2018 14:25:57 +0800 Subject: [PATCH 157/269] nds32: linker script: GCOV kernel may refers data in __exit This patch is used to fix nds32 allmodconfig/allyesconfig build error because GCOV kernel embeds counters in the kernel for each line and a part of that embed in __exit text. So we need to keep the EXIT_TEXT and EXIT_DATA if CONFIG_GCOV_KERNEL=y. Link: https://lkml.org/lkml/2018/9/1/125 Signed-off-by: Greentime Hu Reviewed-by: Masami Hiramatsu --- arch/nds32/kernel/vmlinux.lds.S | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/nds32/kernel/vmlinux.lds.S b/arch/nds32/kernel/vmlinux.lds.S index 288313b886ef..9e90f30a181d 100644 --- a/arch/nds32/kernel/vmlinux.lds.S +++ b/arch/nds32/kernel/vmlinux.lds.S @@ -13,14 +13,26 @@ OUTPUT_ARCH(nds32) ENTRY(_stext_lma) jiffies = jiffies_64; +#if defined(CONFIG_GCOV_KERNEL) +#define NDS32_EXIT_KEEP(x) x +#else +#define NDS32_EXIT_KEEP(x) +#endif + SECTIONS { _stext_lma = TEXTADDR - LOAD_OFFSET; . = TEXTADDR; __init_begin = .; HEAD_TEXT_SECTION + .exit.text : { + NDS32_EXIT_KEEP(EXIT_TEXT) + } INIT_TEXT_SECTION(PAGE_SIZE) INIT_DATA_SECTION(16) + .exit.data : { + NDS32_EXIT_KEEP(EXIT_DATA) + } PERCPU_SECTION(L1_CACHE_BYTES) __init_end = .; From b034ed50a2bb517c4b76e84f7723cb6bf60a4edd Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 29 Aug 2018 10:22:09 -0500 Subject: [PATCH 158/269] HID: core: fix NULL pointer dereference There is a NULL pointer dereference in case memory resources for *parse* are not successfully allocated. Fix this by adding a new goto label and make the execution path jump to it in case vzalloc() fails. Addresses-Coverity-ID: 1473081 ("Dereference after null check") Fixes: b2dd9f2e5a8a ("HID: core: fix memory leak on probe") Signed-off-by: Gustavo A. R. Silva Reviewed-by: Stefan Agner Signed-off-by: Jiri Kosina --- drivers/hid/hid-core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 44a465db3f96..44564f61e9cc 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1000,7 +1000,7 @@ int hid_open_report(struct hid_device *device) parser = vzalloc(sizeof(struct hid_parser)); if (!parser) { ret = -ENOMEM; - goto err; + goto alloc_err; } parser->device = device; @@ -1049,6 +1049,7 @@ int hid_open_report(struct hid_device *device) hid_err(device, "item fetching failed at offset %d\n", (int)(end - start)); err: kfree(parser->collection_stack); +alloc_err: vfree(parser); hid_close_report(device); return ret; From ade573eb1e03d1ee5abcb3359b1259469ab6e8ed Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 18 Aug 2018 10:12:08 +0200 Subject: [PATCH 159/269] HID: sensor-hub: Restore fixup for Lenovo ThinkPad Helix 2 sensor hub report Commit b0f847e16c1e ("HID: hid-sensor-hub: Force logical minimum to 1 for power and report state") not only replaced the descriptor fixup done for devices with the HID_SENSOR_HUB_ENUM_QUIRK with a generic fix, but also accidentally removed the unrelated descriptor fixup for the Lenovo ThinkPad Helix 2 sensor hub. This commit restores this fixup. Restoring this fixup not only fixes the Lenovo ThinkPad Helix 2's sensors, but also the Lenovo ThinkPad 8's sensors. Fixes: b0f847e16c1e ("HID: hid-sensor-hub: Force logical minimum ...") Cc: Srinivas Pandruvada Cc: Fernando D S Lima Acked-by: Srinivas Pandruvada Signed-off-by: Hans de Goede Signed-off-by: Jiri Kosina --- drivers/hid/hid-sensor-hub.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drivers/hid/hid-sensor-hub.c b/drivers/hid/hid-sensor-hub.c index 50af72baa5ca..2b63487057c2 100644 --- a/drivers/hid/hid-sensor-hub.c +++ b/drivers/hid/hid-sensor-hub.c @@ -579,6 +579,28 @@ void sensor_hub_device_close(struct hid_sensor_hub_device *hsdev) } EXPORT_SYMBOL_GPL(sensor_hub_device_close); +static __u8 *sensor_hub_report_fixup(struct hid_device *hdev, __u8 *rdesc, + unsigned int *rsize) +{ + /* + * Checks if the report descriptor of Thinkpad Helix 2 has a logical + * minimum for magnetic flux axis greater than the maximum. + */ + if (hdev->product == USB_DEVICE_ID_TEXAS_INSTRUMENTS_LENOVO_YOGA && + *rsize == 2558 && rdesc[913] == 0x17 && rdesc[914] == 0x40 && + rdesc[915] == 0x81 && rdesc[916] == 0x08 && + rdesc[917] == 0x00 && rdesc[918] == 0x27 && + rdesc[921] == 0x07 && rdesc[922] == 0x00) { + /* Sets negative logical minimum for mag x, y and z */ + rdesc[914] = rdesc[935] = rdesc[956] = 0xc0; + rdesc[915] = rdesc[936] = rdesc[957] = 0x7e; + rdesc[916] = rdesc[937] = rdesc[958] = 0xf7; + rdesc[917] = rdesc[938] = rdesc[959] = 0xff; + } + + return rdesc; +} + static int sensor_hub_probe(struct hid_device *hdev, const struct hid_device_id *id) { @@ -743,6 +765,7 @@ static struct hid_driver sensor_hub_driver = { .probe = sensor_hub_probe, .remove = sensor_hub_remove, .raw_event = sensor_hub_raw_event, + .report_fixup = sensor_hub_report_fixup, #ifdef CONFIG_PM .suspend = sensor_hub_suspend, .resume = sensor_hub_resume, From 3a3539cd36327c6f9e0ffd9f3fd3dea7ff8b3567 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 5 Sep 2018 12:16:00 +0200 Subject: [PATCH 160/269] mlxsw: spectrum_buffers: Set up a dedicated pool for BUM traffic MC-aware mode was recently enabled by mlxsw on Spectrum switches in commit 7b8195306694 ("mlxsw: spectrum: Configure MC-aware mode on mlxsw ports"). Unfortunately, testing has shown that the fix is incomplete and in the presented form actually makes the problem even worse, because any amount of MC traffic causes UC disruption. The reason for this is that currently, mlxsw configures the MC-specific TCs (8..15) to map to pool 0. It also configures a maximum buffer size of 0, but for MC traffic that maximum is disregarded and not part of the quota. Therefore MC traffic is always admitted to the egress buffer. Fix the configuration by directing the MC TCs into pool 15, which is dedicated to MC traffic and recognized as such by the silicon. Fixes: 7b8195306694 ("mlxsw: spectrum: Configure MC-aware mode on mlxsw ports") Signed-off-by: Petr Machata Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_buffers.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c index 4327487553c5..3589432d1643 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c @@ -337,14 +337,14 @@ static const struct mlxsw_sp_sb_cm mlxsw_sp_sb_cms_egress[] = { MLXSW_SP_SB_CM(1500, 9, 0), MLXSW_SP_SB_CM(1500, 9, 0), MLXSW_SP_SB_CM(1500, 9, 0), - MLXSW_SP_SB_CM(0, 0, 0), - MLXSW_SP_SB_CM(0, 0, 0), - MLXSW_SP_SB_CM(0, 0, 0), - MLXSW_SP_SB_CM(0, 0, 0), - MLXSW_SP_SB_CM(0, 0, 0), - MLXSW_SP_SB_CM(0, 0, 0), - MLXSW_SP_SB_CM(0, 0, 0), - MLXSW_SP_SB_CM(0, 0, 0), + MLXSW_SP_SB_CM(0, 140000, 15), + MLXSW_SP_SB_CM(0, 140000, 15), + MLXSW_SP_SB_CM(0, 140000, 15), + MLXSW_SP_SB_CM(0, 140000, 15), + MLXSW_SP_SB_CM(0, 140000, 15), + MLXSW_SP_SB_CM(0, 140000, 15), + MLXSW_SP_SB_CM(0, 140000, 15), + MLXSW_SP_SB_CM(0, 140000, 15), MLXSW_SP_SB_CM(1, 0xff, 0), }; From 9d7f19dc4673fbafebfcbf30eb90e09fa7d1c037 Mon Sep 17 00:00:00 2001 From: Petr Oros Date: Wed, 5 Sep 2018 14:37:45 +0200 Subject: [PATCH 161/269] be2net: Fix memory leak in be_cmd_get_profile_config() DMA allocated memory is lost in be_cmd_get_profile_config() when we call it with non-NULL port_res parameter. Signed-off-by: Petr Oros Reviewed-by: Ivan Vecera Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_cmds.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index ff92ab1daeb8..1e9d882c04ef 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -4500,7 +4500,7 @@ int be_cmd_get_profile_config(struct be_adapter *adapter, port_res->max_vfs += le16_to_cpu(pcie->num_vfs); } } - return status; + goto err; } pcie = be_get_pcie_desc(resp->func_param, desc_count, From e65a9e480e91ddf9e15155454d370cead64689c8 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Wed, 5 Sep 2018 15:23:18 +0200 Subject: [PATCH 162/269] net: qca_spi: Fix race condition in spi transfers With performance optimization the spi transfer and messages of basic register operations like qcaspi_read_register moved into the private driver structure. But they weren't protected against mutual access (e.g. between driver kthread and ethtool). So dumping the QCA7000 registers via ethtool during network traffic could make spi_sync hang forever, because the completion in spi_message is overwritten. So revert the optimization completely. Fixes: 291ab06ecf676 ("net: qualcomm: new Ethernet over SPI driver for QCA700") Signed-off-by: Stefan Wahren Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/qca_7k.c | 76 ++++++++-------- drivers/net/ethernet/qualcomm/qca_spi.c | 112 ++++++++++++------------ drivers/net/ethernet/qualcomm/qca_spi.h | 5 -- 3 files changed, 94 insertions(+), 99 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/qca_7k.c b/drivers/net/ethernet/qualcomm/qca_7k.c index ffe7a16bdfc8..6c8543fb90c0 100644 --- a/drivers/net/ethernet/qualcomm/qca_7k.c +++ b/drivers/net/ethernet/qualcomm/qca_7k.c @@ -45,34 +45,33 @@ qcaspi_read_register(struct qcaspi *qca, u16 reg, u16 *result) { __be16 rx_data; __be16 tx_data; - struct spi_transfer *transfer; - struct spi_message *msg; + struct spi_transfer transfer[2]; + struct spi_message msg; int ret; + memset(transfer, 0, sizeof(transfer)); + + spi_message_init(&msg); + tx_data = cpu_to_be16(QCA7K_SPI_READ | QCA7K_SPI_INTERNAL | reg); + *result = 0; + + transfer[0].tx_buf = &tx_data; + transfer[0].len = QCASPI_CMD_LEN; + transfer[1].rx_buf = &rx_data; + transfer[1].len = QCASPI_CMD_LEN; + + spi_message_add_tail(&transfer[0], &msg); if (qca->legacy_mode) { - msg = &qca->spi_msg1; - transfer = &qca->spi_xfer1; - transfer->tx_buf = &tx_data; - transfer->rx_buf = NULL; - transfer->len = QCASPI_CMD_LEN; - spi_sync(qca->spi_dev, msg); - } else { - msg = &qca->spi_msg2; - transfer = &qca->spi_xfer2[0]; - transfer->tx_buf = &tx_data; - transfer->rx_buf = NULL; - transfer->len = QCASPI_CMD_LEN; - transfer = &qca->spi_xfer2[1]; + spi_sync(qca->spi_dev, &msg); + spi_message_init(&msg); } - transfer->tx_buf = NULL; - transfer->rx_buf = &rx_data; - transfer->len = QCASPI_CMD_LEN; - ret = spi_sync(qca->spi_dev, msg); + spi_message_add_tail(&transfer[1], &msg); + ret = spi_sync(qca->spi_dev, &msg); if (!ret) - ret = msg->status; + ret = msg.status; if (ret) qcaspi_spi_error(qca); @@ -86,35 +85,32 @@ int qcaspi_write_register(struct qcaspi *qca, u16 reg, u16 value) { __be16 tx_data[2]; - struct spi_transfer *transfer; - struct spi_message *msg; + struct spi_transfer transfer[2]; + struct spi_message msg; int ret; + memset(&transfer, 0, sizeof(transfer)); + + spi_message_init(&msg); + tx_data[0] = cpu_to_be16(QCA7K_SPI_WRITE | QCA7K_SPI_INTERNAL | reg); tx_data[1] = cpu_to_be16(value); + transfer[0].tx_buf = &tx_data[0]; + transfer[0].len = QCASPI_CMD_LEN; + transfer[1].tx_buf = &tx_data[1]; + transfer[1].len = QCASPI_CMD_LEN; + + spi_message_add_tail(&transfer[0], &msg); if (qca->legacy_mode) { - msg = &qca->spi_msg1; - transfer = &qca->spi_xfer1; - transfer->tx_buf = &tx_data[0]; - transfer->rx_buf = NULL; - transfer->len = QCASPI_CMD_LEN; - spi_sync(qca->spi_dev, msg); - } else { - msg = &qca->spi_msg2; - transfer = &qca->spi_xfer2[0]; - transfer->tx_buf = &tx_data[0]; - transfer->rx_buf = NULL; - transfer->len = QCASPI_CMD_LEN; - transfer = &qca->spi_xfer2[1]; + spi_sync(qca->spi_dev, &msg); + spi_message_init(&msg); } - transfer->tx_buf = &tx_data[1]; - transfer->rx_buf = NULL; - transfer->len = QCASPI_CMD_LEN; - ret = spi_sync(qca->spi_dev, msg); + spi_message_add_tail(&transfer[1], &msg); + ret = spi_sync(qca->spi_dev, &msg); if (!ret) - ret = msg->status; + ret = msg.status; if (ret) qcaspi_spi_error(qca); diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c index 206f0266463e..66b775d462fd 100644 --- a/drivers/net/ethernet/qualcomm/qca_spi.c +++ b/drivers/net/ethernet/qualcomm/qca_spi.c @@ -99,22 +99,24 @@ static u32 qcaspi_write_burst(struct qcaspi *qca, u8 *src, u32 len) { __be16 cmd; - struct spi_message *msg = &qca->spi_msg2; - struct spi_transfer *transfer = &qca->spi_xfer2[0]; + struct spi_message msg; + struct spi_transfer transfer[2]; int ret; + memset(&transfer, 0, sizeof(transfer)); + spi_message_init(&msg); + cmd = cpu_to_be16(QCA7K_SPI_WRITE | QCA7K_SPI_EXTERNAL); - transfer->tx_buf = &cmd; - transfer->rx_buf = NULL; - transfer->len = QCASPI_CMD_LEN; - transfer = &qca->spi_xfer2[1]; - transfer->tx_buf = src; - transfer->rx_buf = NULL; - transfer->len = len; + transfer[0].tx_buf = &cmd; + transfer[0].len = QCASPI_CMD_LEN; + transfer[1].tx_buf = src; + transfer[1].len = len; - ret = spi_sync(qca->spi_dev, msg); + spi_message_add_tail(&transfer[0], &msg); + spi_message_add_tail(&transfer[1], &msg); + ret = spi_sync(qca->spi_dev, &msg); - if (ret || (msg->actual_length != QCASPI_CMD_LEN + len)) { + if (ret || (msg.actual_length != QCASPI_CMD_LEN + len)) { qcaspi_spi_error(qca); return 0; } @@ -125,17 +127,20 @@ qcaspi_write_burst(struct qcaspi *qca, u8 *src, u32 len) static u32 qcaspi_write_legacy(struct qcaspi *qca, u8 *src, u32 len) { - struct spi_message *msg = &qca->spi_msg1; - struct spi_transfer *transfer = &qca->spi_xfer1; + struct spi_message msg; + struct spi_transfer transfer; int ret; - transfer->tx_buf = src; - transfer->rx_buf = NULL; - transfer->len = len; + memset(&transfer, 0, sizeof(transfer)); + spi_message_init(&msg); - ret = spi_sync(qca->spi_dev, msg); + transfer.tx_buf = src; + transfer.len = len; - if (ret || (msg->actual_length != len)) { + spi_message_add_tail(&transfer, &msg); + ret = spi_sync(qca->spi_dev, &msg); + + if (ret || (msg.actual_length != len)) { qcaspi_spi_error(qca); return 0; } @@ -146,23 +151,25 @@ qcaspi_write_legacy(struct qcaspi *qca, u8 *src, u32 len) static u32 qcaspi_read_burst(struct qcaspi *qca, u8 *dst, u32 len) { - struct spi_message *msg = &qca->spi_msg2; + struct spi_message msg; __be16 cmd; - struct spi_transfer *transfer = &qca->spi_xfer2[0]; + struct spi_transfer transfer[2]; int ret; + memset(&transfer, 0, sizeof(transfer)); + spi_message_init(&msg); + cmd = cpu_to_be16(QCA7K_SPI_READ | QCA7K_SPI_EXTERNAL); - transfer->tx_buf = &cmd; - transfer->rx_buf = NULL; - transfer->len = QCASPI_CMD_LEN; - transfer = &qca->spi_xfer2[1]; - transfer->tx_buf = NULL; - transfer->rx_buf = dst; - transfer->len = len; + transfer[0].tx_buf = &cmd; + transfer[0].len = QCASPI_CMD_LEN; + transfer[1].rx_buf = dst; + transfer[1].len = len; - ret = spi_sync(qca->spi_dev, msg); + spi_message_add_tail(&transfer[0], &msg); + spi_message_add_tail(&transfer[1], &msg); + ret = spi_sync(qca->spi_dev, &msg); - if (ret || (msg->actual_length != QCASPI_CMD_LEN + len)) { + if (ret || (msg.actual_length != QCASPI_CMD_LEN + len)) { qcaspi_spi_error(qca); return 0; } @@ -173,17 +180,20 @@ qcaspi_read_burst(struct qcaspi *qca, u8 *dst, u32 len) static u32 qcaspi_read_legacy(struct qcaspi *qca, u8 *dst, u32 len) { - struct spi_message *msg = &qca->spi_msg1; - struct spi_transfer *transfer = &qca->spi_xfer1; + struct spi_message msg; + struct spi_transfer transfer; int ret; - transfer->tx_buf = NULL; - transfer->rx_buf = dst; - transfer->len = len; + memset(&transfer, 0, sizeof(transfer)); + spi_message_init(&msg); - ret = spi_sync(qca->spi_dev, msg); + transfer.rx_buf = dst; + transfer.len = len; - if (ret || (msg->actual_length != len)) { + spi_message_add_tail(&transfer, &msg); + ret = spi_sync(qca->spi_dev, &msg); + + if (ret || (msg.actual_length != len)) { qcaspi_spi_error(qca); return 0; } @@ -195,19 +205,23 @@ static int qcaspi_tx_cmd(struct qcaspi *qca, u16 cmd) { __be16 tx_data; - struct spi_message *msg = &qca->spi_msg1; - struct spi_transfer *transfer = &qca->spi_xfer1; + struct spi_message msg; + struct spi_transfer transfer; int ret; - tx_data = cpu_to_be16(cmd); - transfer->len = sizeof(tx_data); - transfer->tx_buf = &tx_data; - transfer->rx_buf = NULL; + memset(&transfer, 0, sizeof(transfer)); - ret = spi_sync(qca->spi_dev, msg); + spi_message_init(&msg); + + tx_data = cpu_to_be16(cmd); + transfer.len = sizeof(cmd); + transfer.tx_buf = &tx_data; + spi_message_add_tail(&transfer, &msg); + + ret = spi_sync(qca->spi_dev, &msg); if (!ret) - ret = msg->status; + ret = msg.status; if (ret) qcaspi_spi_error(qca); @@ -835,16 +849,6 @@ qcaspi_netdev_setup(struct net_device *dev) qca = netdev_priv(dev); memset(qca, 0, sizeof(struct qcaspi)); - memset(&qca->spi_xfer1, 0, sizeof(struct spi_transfer)); - memset(&qca->spi_xfer2, 0, sizeof(struct spi_transfer) * 2); - - spi_message_init(&qca->spi_msg1); - spi_message_add_tail(&qca->spi_xfer1, &qca->spi_msg1); - - spi_message_init(&qca->spi_msg2); - spi_message_add_tail(&qca->spi_xfer2[0], &qca->spi_msg2); - spi_message_add_tail(&qca->spi_xfer2[1], &qca->spi_msg2); - memset(&qca->txr, 0, sizeof(qca->txr)); qca->txr.count = TX_RING_MAX_LEN; } diff --git a/drivers/net/ethernet/qualcomm/qca_spi.h b/drivers/net/ethernet/qualcomm/qca_spi.h index fc4beb1b32d1..fc0e98726b36 100644 --- a/drivers/net/ethernet/qualcomm/qca_spi.h +++ b/drivers/net/ethernet/qualcomm/qca_spi.h @@ -83,11 +83,6 @@ struct qcaspi { struct tx_ring txr; struct qcaspi_stats stats; - struct spi_message spi_msg1; - struct spi_message spi_msg2; - struct spi_transfer spi_xfer1; - struct spi_transfer spi_xfer2[2]; - u8 *rx_buffer; u32 buffer_size; u8 sync; From 865e63b04e9b2a658d7f26bd13a71dcd964a9118 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 4 Sep 2018 16:26:11 -0400 Subject: [PATCH 163/269] tracing: Add back in rcu_irq_enter/exit_irqson() for rcuidle tracepoints Borislav reported the following splat: ============================= WARNING: suspicious RCU usage 4.19.0-rc1+ #1 Not tainted ----------------------------- ./include/linux/rcupdate.h:631 rcu_read_lock() used illegally while idle! other info that might help us debug this: RCU used illegally from idle CPU! rcu_scheduler_active = 2, debug_locks = 1 RCU used illegally from extended quiescent state! 1 lock held by swapper/0/0: #0: 000000004557ee0e (rcu_read_lock){....}, at: perf_event_output_forward+0x0/0x130 stack backtrace: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.19.0-rc1+ #1 Hardware name: LENOVO 2320CTO/2320CTO, BIOS G2ET86WW (2.06 ) 11/13/2012 Call Trace: dump_stack+0x85/0xcb perf_event_output_forward+0xf6/0x130 __perf_event_overflow+0x52/0xe0 perf_swevent_overflow+0x91/0xb0 perf_tp_event+0x11a/0x350 ? find_held_lock+0x2d/0x90 ? __lock_acquire+0x2ce/0x1350 ? __lock_acquire+0x2ce/0x1350 ? retint_kernel+0x2d/0x2d ? find_held_lock+0x2d/0x90 ? tick_nohz_get_sleep_length+0x83/0xb0 ? perf_trace_cpu+0xbb/0xd0 ? perf_trace_buf_alloc+0x5a/0xa0 perf_trace_cpu+0xbb/0xd0 cpuidle_enter_state+0x185/0x340 do_idle+0x1eb/0x260 cpu_startup_entry+0x5f/0x70 start_kernel+0x49b/0x4a6 secondary_startup_64+0xa4/0xb0 This is due to the tracepoints moving to SRCU usage which does not require RCU to be "watching". But perf uses these tracepoints with RCU and expects it to be. Hence, we still need to add in the rcu_irq_enter/exit_irqson() calls for "rcuidle" tracepoints. This is a temporary fix until we have SRCU working in NMI context, and then perf can be converted to use that instead of normal RCU. Link: http://lkml.kernel.org/r/20180904162611.6a120068@gandalf.local.home Cc: x86-ml Cc: Peter Zijlstra Reported-by: Borislav Petkov Tested-by: Borislav Petkov Reviewed-by: "Paul E. McKenney" Fixes: e6753f23d961d ("tracepoint: Make rcuidle tracepoint callers use SRCU") Signed-off-by: Steven Rostedt (VMware) --- include/linux/tracepoint.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 7f2e16e76ac4..041f7e56a289 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -158,8 +158,10 @@ extern void syscall_unregfunc(void); * For rcuidle callers, use srcu since sched-rcu \ * doesn't work from the idle path. \ */ \ - if (rcuidle) \ + if (rcuidle) { \ idx = srcu_read_lock_notrace(&tracepoint_srcu); \ + rcu_irq_enter_irqson(); \ + } \ \ it_func_ptr = rcu_dereference_raw((tp)->funcs); \ \ @@ -171,8 +173,10 @@ extern void syscall_unregfunc(void); } while ((++it_func_ptr)->func); \ } \ \ - if (rcuidle) \ + if (rcuidle) { \ + rcu_irq_exit_irqson(); \ srcu_read_unlock_notrace(&tracepoint_srcu, idx);\ + } \ \ preempt_enable_notrace(); \ } while (0) From d07f05fb86439c41dd6967c94be3ba3837b21567 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Sat, 21 Jul 2018 00:02:12 +0100 Subject: [PATCH 164/269] hwmon: rpi: add module alias to raspberrypi-hwmon The raspberrypi-hwmon driver doesn't automatically load, although it does work when loaded, by adding the alias it auto loads as expected when built as a module. Tested on RPi2/RPi3 on 32 bit kernel and RPi3B+ on aarch64 with Fedora 28 and a patched 4.18 RC kernel. Fixes: 3c493c885cf ("hwmon: Add support for RPi voltage sensor") Signed-off-by: Peter Robinson CC: Stefan Wahren CC: Eric Anholt Acked-by: Guenter Roeck Tested-by: Stefan Wahren Reviewed-by: Eric Anholt Signed-off-by: Florian Fainelli --- drivers/hwmon/raspberrypi-hwmon.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hwmon/raspberrypi-hwmon.c b/drivers/hwmon/raspberrypi-hwmon.c index fb4e4a6bb1f6..be5ba4690895 100644 --- a/drivers/hwmon/raspberrypi-hwmon.c +++ b/drivers/hwmon/raspberrypi-hwmon.c @@ -164,3 +164,4 @@ module_platform_driver(rpi_hwmon_driver); MODULE_AUTHOR("Stefan Wahren "); MODULE_DESCRIPTION("Raspberry Pi voltage sensor driver"); MODULE_LICENSE("GPL v2"); +MODULE_ALIAS("platform:raspberrypi-hwmon"); From 816e846c2eb9129a3e0afa5f920c8bbc71efecaa Mon Sep 17 00:00:00 2001 From: Aaron Knister Date: Fri, 24 Aug 2018 08:42:46 -0400 Subject: [PATCH 165/269] IB/ipoib: Avoid a race condition between start_xmit and cm_rep_handler Inside of start_xmit() the call to check if the connection is up and the queueing of the packets for later transmission is not atomic which leaves a window where cm_rep_handler can run, set the connection up, dequeue pending packets and leave the subsequently queued packets by start_xmit() sitting on neigh->queue until they're dropped when the connection is torn down. This only applies to connected mode. These dropped packets can really upset TCP, for example, and cause multi-minute delays in transmission for open connections. Here's the code in start_xmit where we check to see if the connection is up: if (ipoib_cm_get(neigh)) { if (ipoib_cm_up(neigh)) { ipoib_cm_send(dev, skb, ipoib_cm_get(neigh)); goto unref; } } The race occurs if cm_rep_handler execution occurs after the above connection check (specifically if it gets to the point where it acquires priv->lock to dequeue pending skb's) but before the below code snippet in start_xmit where packets are queued. if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) { push_pseudo_header(skb, phdr->hwaddr); spin_lock_irqsave(&priv->lock, flags); __skb_queue_tail(&neigh->queue, skb); spin_unlock_irqrestore(&priv->lock, flags); } else { ++dev->stats.tx_dropped; dev_kfree_skb_any(skb); } The patch acquires the netif tx lock in cm_rep_handler for the section where it sets the connection up and dequeues and retransmits deferred skb's. Fixes: 839fcaba355a ("IPoIB: Connected mode experimental support") Cc: stable@vger.kernel.org Signed-off-by: Aaron Knister Tested-by: Ira Weiny Reviewed-by: Ira Weiny Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index ea01b8dd2be6..3d5424f335cb 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -1027,12 +1027,14 @@ static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, skb_queue_head_init(&skqueue); + netif_tx_lock_bh(p->dev); spin_lock_irq(&priv->lock); set_bit(IPOIB_FLAG_OPER_UP, &p->flags); if (p->neigh) while ((skb = __skb_dequeue(&p->neigh->queue))) __skb_queue_tail(&skqueue, skb); spin_unlock_irq(&priv->lock); + netif_tx_unlock_bh(p->dev); while ((skb = __skb_dequeue(&skqueue))) { skb->dev = p->dev; From f40f299bbe806a2e2c8b0d7cdda822fa3bdd171b Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Wed, 5 Sep 2018 13:20:34 +0530 Subject: [PATCH 166/269] bnxt_re: Fix couple of memory leaks that could lead to IOMMU call traces 1. DMA-able memory allocated for Shadow QP was not being freed. 2. bnxt_qplib_alloc_qp_hdr_buf() had a bug wherein the SQ pointer was erroneously pointing to the RQ. But since the corresponding free_qp_hdr_buf() was correct, memory being free was less than what was allocated. Fixes: 1ac5a4047975 ("RDMA/bnxt_re: Add bnxt_re RoCE driver") Signed-off-by: Somnath Kotur Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 2 ++ drivers/infiniband/hw/bnxt_re/qplib_fp.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index bbfb86eb2d24..bc2b9e038439 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -833,6 +833,8 @@ int bnxt_re_destroy_qp(struct ib_qp *ib_qp) "Failed to destroy Shadow QP"); return rc; } + bnxt_qplib_free_qp_res(&rdev->qplib_res, + &rdev->qp1_sqp->qplib_qp); mutex_lock(&rdev->qp_lock); list_del(&rdev->qp1_sqp->list); atomic_dec(&rdev->qp_count); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index e426b990c1dd..6ad0d46ab879 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -196,7 +196,7 @@ static int bnxt_qplib_alloc_qp_hdr_buf(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) { struct bnxt_qplib_q *rq = &qp->rq; - struct bnxt_qplib_q *sq = &qp->rq; + struct bnxt_qplib_q *sq = &qp->sq; int rc = 0; if (qp->sq_hdr_buf_size && sq->hwq.max_elements) { From 8b2ded1c94c06f841f8c1612bcfa33c85012a36b Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 5 Sep 2018 16:14:36 -0600 Subject: [PATCH 167/269] block: don't warn when doing fsync on read-only devices It is possible to call fsync on a read-only handle (for example, fsck.ext2 does it when doing read-only check), and this call results in kernel warning. The patch b089cfd95d32 ("block: don't warn for flush on read-only device") attempted to disable the warning, but it is buggy and it doesn't (op_is_flush tests flags, but bio_op strips off the flags). Signed-off-by: Mikulas Patocka Fixes: 721c7fc701c7 ("block: fail op_is_write() requests to read-only partitions") Cc: stable@vger.kernel.org # 4.18 Signed-off-by: Jens Axboe --- block/blk-core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/block/blk-core.c b/block/blk-core.c index dee56c282efb..4dbc93f43b38 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2163,9 +2163,12 @@ static inline bool bio_check_ro(struct bio *bio, struct hd_struct *part) { const int op = bio_op(bio); - if (part->policy && (op_is_write(op) && !op_is_flush(op))) { + if (part->policy && op_is_write(op)) { char b[BDEVNAME_SIZE]; + if (op_is_flush(bio->bi_opf) && !bio_sectors(bio)) + return false; + WARN_ONCE(1, "generic_make_request: Trying to write " "to read-only block-device %s (partno %d)\n", From 08e74be103051861eb2c1ee52a2dcf119cde264f Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 5 Sep 2018 09:47:57 +0300 Subject: [PATCH 168/269] RDMA/uverbs: Fix error cleanup path of ib_uverbs_add_one() If ib_uverbs_create_uapi() fails, dev_num should be freed from the bitmap. Fixes: 7d96c9b17636 ("IB/uverbs: Have the core code create the uverbs_root_spec") Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_main.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 823beca448e1..6d974e2363df 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -1050,7 +1050,7 @@ static void ib_uverbs_add_one(struct ib_device *device) uverbs_dev->num_comp_vectors = device->num_comp_vectors; if (ib_uverbs_create_uapi(device, uverbs_dev)) - goto err; + goto err_uapi; cdev_init(&uverbs_dev->cdev, NULL); uverbs_dev->cdev.owner = THIS_MODULE; @@ -1077,11 +1077,10 @@ static void ib_uverbs_add_one(struct ib_device *device) err_class: device_destroy(uverbs_class, uverbs_dev->cdev.dev); - err_cdev: cdev_del(&uverbs_dev->cdev); +err_uapi: clear_bit(devnum, dev_map); - err: if (atomic_dec_and_test(&uverbs_dev->refcount)) ib_uverbs_comp_dev(uverbs_dev); From 76d5581c870454be5f1f1a106c57985902e7ea20 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Sun, 5 Aug 2018 09:19:33 +0300 Subject: [PATCH 169/269] net/mlx5: Fix use-after-free in self-healing flow When the mlx5 health mechanism detects a problem while the driver is in the middle of init_one or remove_one, the driver needs to prevent the health mechanism from scheduling future work; if future work is scheduled, there is a problem with use-after-free: the system WQ tries to run the work item (which has been freed) at the scheduled future time. Prevent this by disabling work item scheduling in the health mechanism when the driver is in the middle of init_one() or remove_one(). Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Signed-off-by: Jack Morgenstein Reviewed-by: Feras Daoud Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/health.c | 10 +++++++++- drivers/net/ethernet/mellanox/mlx5/core/main.c | 6 +++--- include/linux/mlx5/driver.h | 2 +- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index d39b0b7011b2..9f39aeca863f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -331,9 +331,17 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev) add_timer(&health->timer); } -void mlx5_stop_health_poll(struct mlx5_core_dev *dev) +void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health) { struct mlx5_core_health *health = &dev->priv.health; + unsigned long flags; + + if (disable_health) { + spin_lock_irqsave(&health->wq_lock, flags); + set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags); + set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags); + spin_unlock_irqrestore(&health->wq_lock, flags); + } del_timer_sync(&health->timer); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index cf3e4a659052..739aad0a0b35 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1286,7 +1286,7 @@ err_cleanup_once: mlx5_cleanup_once(dev); err_stop_poll: - mlx5_stop_health_poll(dev); + mlx5_stop_health_poll(dev, boot); if (mlx5_cmd_teardown_hca(dev)) { dev_err(&dev->pdev->dev, "tear_down_hca failed, skip cleanup\n"); goto out_err; @@ -1346,7 +1346,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, mlx5_free_irq_vectors(dev); if (cleanup) mlx5_cleanup_once(dev); - mlx5_stop_health_poll(dev); + mlx5_stop_health_poll(dev, cleanup); err = mlx5_cmd_teardown_hca(dev); if (err) { dev_err(&dev->pdev->dev, "tear_down_hca failed, skip cleanup\n"); @@ -1608,7 +1608,7 @@ static int mlx5_try_fast_unload(struct mlx5_core_dev *dev) * with the HCA, so the health polll is no longer needed. */ mlx5_drain_health_wq(dev); - mlx5_stop_health_poll(dev); + mlx5_stop_health_poll(dev, false); ret = mlx5_cmd_force_teardown_hca(dev); if (ret) { diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 7a452716de4b..aa65f58c6610 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1052,7 +1052,7 @@ int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn); void mlx5_health_cleanup(struct mlx5_core_dev *dev); int mlx5_health_init(struct mlx5_core_dev *dev); void mlx5_start_health_poll(struct mlx5_core_dev *dev); -void mlx5_stop_health_poll(struct mlx5_core_dev *dev); +void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health); void mlx5_drain_health_wq(struct mlx5_core_dev *dev); void mlx5_trigger_health_work(struct mlx5_core_dev *dev); void mlx5_drain_health_recovery(struct mlx5_core_dev *dev); From 5df816e7f43f1297c40021ef17ec6e722b45c82f Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Tue, 7 Aug 2018 09:59:03 +0300 Subject: [PATCH 170/269] net/mlx5: Fix debugfs cleanup in the device init/remove flow When initializing the device (procedure init_one), the driver calls mlx5_pci_init to perform pci initialization. As part of this initialization, mlx5_pci_init creates a debugfs directory. If this creation fails, init_one aborts, returning failure to the caller (which is the probe method caller). The main reason for such a failure to occur is if the debugfs directory already exists. This can happen if the last time mlx5_pci_close was called, debugfs_remove (silently) failed due to the debugfs directory not being empty. Guarantee that such a debugfs_remove failure will not occur by instead calling debugfs_remove_recursive in procedure mlx5_pci_close. Fixes: 59211bd3b632 ("net/mlx5: Split the load/unload flow into hardware and software flows") Signed-off-by: Jack Morgenstein Reviewed-by: Daniel Jurgens Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 739aad0a0b35..b5e9f664fc66 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -878,8 +878,10 @@ static int mlx5_pci_init(struct mlx5_core_dev *dev, struct mlx5_priv *priv) priv->numa_node = dev_to_node(&dev->pdev->dev); priv->dbg_root = debugfs_create_dir(dev_name(&pdev->dev), mlx5_debugfs_root); - if (!priv->dbg_root) + if (!priv->dbg_root) { + dev_err(&pdev->dev, "Cannot create debugfs dir, aborting\n"); return -ENOMEM; + } err = mlx5_pci_enable_device(dev); if (err) { @@ -928,7 +930,7 @@ static void mlx5_pci_close(struct mlx5_core_dev *dev, struct mlx5_priv *priv) pci_clear_master(dev->pdev); release_bar(dev->pdev); mlx5_pci_disable_device(dev); - debugfs_remove(priv->dbg_root); + debugfs_remove_recursive(priv->dbg_root); } static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv) From 8d71e818506718e8d7032ce824b5c74a17d4f7a5 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Tue, 21 Aug 2018 16:04:41 +0300 Subject: [PATCH 171/269] net/mlx5: Use u16 for Work Queue buffer fragment size Minimal stride size is 16. Hence, the number of strides in a fragment (of PAGE_SIZE) is <= PAGE_SIZE / 16 <= 4K. u16 is sufficient to represent this. Fixes: 388ca8be0037 ("IB/mlx5: Implement fragmented completion queue (CQ)") Signed-off-by: Tariq Toukan Reviewed-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/wq.c | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/wq.h | 2 +- include/linux/mlx5/driver.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c index c8c315eb5128..d838af9539b1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c @@ -39,9 +39,9 @@ u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq) return (u32)wq->fbc.sz_m1 + 1; } -u32 mlx5_wq_cyc_get_frag_size(struct mlx5_wq_cyc *wq) +u16 mlx5_wq_cyc_get_frag_size(struct mlx5_wq_cyc *wq) { - return (u32)wq->fbc.frag_sz_m1 + 1; + return wq->fbc.frag_sz_m1 + 1; } u32 mlx5_cqwq_get_size(struct mlx5_cqwq *wq) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h index 2bd4c3184eba..3a1a170bb2d7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h @@ -80,7 +80,7 @@ int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, void *wqc, struct mlx5_wq_cyc *wq, struct mlx5_wq_ctrl *wq_ctrl); u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq); -u32 mlx5_wq_cyc_get_frag_size(struct mlx5_wq_cyc *wq); +u16 mlx5_wq_cyc_get_frag_size(struct mlx5_wq_cyc *wq); int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, void *qpc, struct mlx5_wq_qp *wq, diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index aa65f58c6610..3a1258fd8ac3 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -362,7 +362,7 @@ struct mlx5_frag_buf { struct mlx5_frag_buf_ctrl { struct mlx5_frag_buf frag_buf; u32 sz_m1; - u32 frag_sz_m1; + u16 frag_sz_m1; u32 strides_offset; u8 log_sz; u8 log_stride; From a09036221092989b88c55d24d1f12ceb1d7d361f Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Tue, 21 Aug 2018 16:07:58 +0300 Subject: [PATCH 172/269] net/mlx5: Use u16 for Work Queue buffer strides offset Minimal stride size is 16. Hence, the number of strides in a fragment (of PAGE_SIZE) is <= PAGE_SIZE / 16 <= 4K. u16 is sufficient to represent this. Fixes: d7037ad73daa ("net/mlx5: Fix QP fragmented buffer allocation") Signed-off-by: Tariq Toukan Reviewed-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/wq.c | 2 +- include/linux/mlx5/driver.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c index d838af9539b1..68e7f8df2a6d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c @@ -138,7 +138,7 @@ int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, void *qpc, struct mlx5_wq_qp *wq, struct mlx5_wq_ctrl *wq_ctrl) { - u32 sq_strides_offset; + u16 sq_strides_offset; u32 rq_pg_remainder; int err; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 3a1258fd8ac3..66d94b4557cf 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -363,7 +363,7 @@ struct mlx5_frag_buf_ctrl { struct mlx5_frag_buf frag_buf; u32 sz_m1; u16 frag_sz_m1; - u32 strides_offset; + u16 strides_offset; u8 log_sz; u8 log_stride; u8 log_frag_strides; @@ -995,7 +995,7 @@ static inline u32 mlx5_base_mkey(const u32 key) } static inline void mlx5_fill_fbc_offset(u8 log_stride, u8 log_sz, - u32 strides_offset, + u16 strides_offset, struct mlx5_frag_buf_ctrl *fbc) { fbc->log_stride = log_stride; From c88a026e01219488e745f4f0267fd76c2bb68421 Mon Sep 17 00:00:00 2001 From: Raed Salem Date: Tue, 21 Aug 2018 15:22:42 +0300 Subject: [PATCH 173/269] net/mlx5: E-Switch, Fix memory leak when creating switchdev mode FDB tables The memory allocated for the slow path table flow group input structure was not freed upon successful return, fix that. Fixes: 1967ce6ea5c8 ("net/mlx5: E-Switch, Refactor fast path FDB table creation in switchdev mode") Signed-off-by: Raed Salem Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index f72b5c9dcfe9..3028e8d90920 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -663,6 +663,7 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) if (err) goto miss_rule_err; + kvfree(flow_group_in); return 0; miss_rule_err: From 071304772fc747d5df13c51f1cf48a4b922a5e0d Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Sun, 19 Aug 2018 08:56:09 +0300 Subject: [PATCH 174/269] net/mlx5: Fix not releasing read lock when adding flow rules If building match list fg fails and we never jumped to search_again_locked label then the function returned without unlocking the read lock. Fixes: bd71b08ec2ee ("net/mlx5: Support multiple updates of steering rules in parallel") Signed-off-by: Roi Dayan Reviewed-by: Maor Gottlieb Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index f418541af7cf..384b560f2a93 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1726,6 +1726,8 @@ search_again_locked: if (err) { if (take_write) up_write_ref_node(&ft->node); + else + up_read_ref_node(&ft->node); return ERR_PTR(err); } From df7ddb2396cd162e64aaff9401be05e31e438961 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Mon, 27 Aug 2018 09:09:46 -0500 Subject: [PATCH 175/269] net/mlx5: Consider PCI domain in search for next dev The PCI BDF is not unique. PCI domain must also be considered when searching for the next physical device during lag setup. Example below: mlx5_core 0000:01:00.0: MLX5E: StrdRq(1) RqSz(8) StrdSz(128) RxCqeCmprss(0) mlx5_core 0000:01:00.1: MLX5E: StrdRq(1) RqSz(8) StrdSz(128) RxCqeCmprss(0) mlx5_core 0001:01:00.0: MLX5E: StrdRq(1) RqSz(8) StrdSz(128) RxCqeCmprss(0) mlx5_core 0001:01:00.1: MLX5E: StrdRq(1) RqSz(8) StrdSz(128) RxCqeCmprss(0) Signed-off-by: Daniel Jurgens Reviewed-by: Aviv Heller Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/dev.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index b994b80d5714..ada723bd91b6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -391,16 +391,17 @@ void mlx5_remove_dev_by_protocol(struct mlx5_core_dev *dev, int protocol) } } -static u16 mlx5_gen_pci_id(struct mlx5_core_dev *dev) +static u32 mlx5_gen_pci_id(struct mlx5_core_dev *dev) { - return (u16)((dev->pdev->bus->number << 8) | + return (u32)((pci_domain_nr(dev->pdev->bus) << 16) | + (dev->pdev->bus->number << 8) | PCI_SLOT(dev->pdev->devfn)); } /* Must be called with intf_mutex held */ struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev) { - u16 pci_id = mlx5_gen_pci_id(dev); + u32 pci_id = mlx5_gen_pci_id(dev); struct mlx5_core_dev *res = NULL; struct mlx5_core_dev *tmp_dev; struct mlx5_priv *priv; From 47bc94b82291e007da61ee1b3d18c77871f3e158 Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Wed, 15 Aug 2018 11:08:48 -0500 Subject: [PATCH 176/269] net/mlx5: Check for error in mlx5_attach_interface Currently, mlx5_attach_interface does not check for error after calling intf->attach or intf->add. When these two calls fails, the client is not initialized and will cause issues such as kernel panic on invalid address in the teardown path (mlx5_detach_interface) Fixes: 737a234bb638 ("net/mlx5: Introduce attach/detach to interface API") Signed-off-by: Huy Nguyen Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/dev.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index ada723bd91b6..37ba7c78859d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -132,11 +132,11 @@ void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv) delayed_event_start(priv); dev_ctx->context = intf->add(dev); - set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state); - if (intf->attach) - set_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state); - if (dev_ctx->context) { + set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state); + if (intf->attach) + set_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state); + spin_lock_irq(&priv->ctx_lock); list_add_tail(&dev_ctx->list, &priv->ctx_list); @@ -211,12 +211,17 @@ static void mlx5_attach_interface(struct mlx5_interface *intf, struct mlx5_priv if (intf->attach) { if (test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state)) goto out; - intf->attach(dev, dev_ctx->context); + if (intf->attach(dev, dev_ctx->context)) + goto out; + set_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state); } else { if (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state)) goto out; dev_ctx->context = intf->add(dev); + if (!dev_ctx->context) + goto out; + set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state); } From fc433829f9a29530d492f0eb20804ac5e6967204 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Fri, 24 Aug 2018 12:24:10 -0700 Subject: [PATCH 177/269] net/mlx5e: Ethtool steering, fix udp source port value Copy and paste bug was introduced in the offending patch. We need to write udp source port value into the headers value and not headers criteria "mask". Fixes: 142644f8a1f8 ("net/mlx5e: Ethtool steering flow parsing refactoring") Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c index 75bb981e00b7..41cde926cdab 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c @@ -191,7 +191,7 @@ set_udp(void *headers_c, void *headers_v, __be16 psrc_m, __be16 psrc_v, { if (psrc_m) { MLX5E_FTE_SET(headers_c, udp_sport, 0xffff); - MLX5E_FTE_SET(headers_c, udp_sport, ntohs(psrc_v)); + MLX5E_FTE_SET(headers_v, udp_sport, ntohs(psrc_v)); } if (pdst_m) { From ad9421e36a77056a4f095d49b9605e80b4d216ed Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Mon, 20 Aug 2018 11:43:03 +0300 Subject: [PATCH 178/269] net/mlx5: Fix possible deadlock from lockdep when adding fte to fg This is a false positive report due to incorrect nested lock annotations as we lock multiple fgs with the same subclass. Instead of locking all fgs only lock the one being used as was done before. Fixes: bd71b08ec2ee ("net/mlx5: Support multiple updates of steering rules in parallel") Signed-off-by: Roi Dayan Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/fs_core.c | 74 +++++++++---------- 1 file changed, 37 insertions(+), 37 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 384b560f2a93..37d114c668b7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1578,6 +1578,33 @@ static u64 matched_fgs_get_version(struct list_head *match_head) return version; } +static struct fs_fte * +lookup_fte_locked(struct mlx5_flow_group *g, + u32 *match_value, + bool take_write) +{ + struct fs_fte *fte_tmp; + + if (take_write) + nested_down_write_ref_node(&g->node, FS_LOCK_PARENT); + else + nested_down_read_ref_node(&g->node, FS_LOCK_PARENT); + fte_tmp = rhashtable_lookup_fast(&g->ftes_hash, match_value, + rhash_fte); + if (!fte_tmp || !tree_get_node(&fte_tmp->node)) { + fte_tmp = NULL; + goto out; + } + + nested_down_write_ref_node(&fte_tmp->node, FS_LOCK_CHILD); +out: + if (take_write) + up_write_ref_node(&g->node); + else + up_read_ref_node(&g->node); + return fte_tmp; +} + static struct mlx5_flow_handle * try_add_to_existing_fg(struct mlx5_flow_table *ft, struct list_head *match_head, @@ -1600,10 +1627,6 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft, if (IS_ERR(fte)) return ERR_PTR(-ENOMEM); - list_for_each_entry(iter, match_head, list) { - nested_down_read_ref_node(&iter->g->node, FS_LOCK_PARENT); - } - search_again_locked: version = matched_fgs_get_version(match_head); /* Try to find a fg that already contains a matching fte */ @@ -1611,20 +1634,9 @@ search_again_locked: struct fs_fte *fte_tmp; g = iter->g; - fte_tmp = rhashtable_lookup_fast(&g->ftes_hash, spec->match_value, - rhash_fte); - if (!fte_tmp || !tree_get_node(&fte_tmp->node)) + fte_tmp = lookup_fte_locked(g, spec->match_value, take_write); + if (!fte_tmp) continue; - - nested_down_write_ref_node(&fte_tmp->node, FS_LOCK_CHILD); - if (!take_write) { - list_for_each_entry(iter, match_head, list) - up_read_ref_node(&iter->g->node); - } else { - list_for_each_entry(iter, match_head, list) - up_write_ref_node(&iter->g->node); - } - rule = add_rule_fg(g, spec->match_value, flow_act, dest, dest_num, fte_tmp); up_write_ref_node(&fte_tmp->node); @@ -1633,19 +1645,6 @@ search_again_locked: return rule; } - /* No group with matching fte found. Try to add a new fte to any - * matching fg. - */ - - if (!take_write) { - list_for_each_entry(iter, match_head, list) - up_read_ref_node(&iter->g->node); - list_for_each_entry(iter, match_head, list) - nested_down_write_ref_node(&iter->g->node, - FS_LOCK_PARENT); - take_write = true; - } - /* Check the ft version, for case that new flow group * was added while the fgs weren't locked */ @@ -1657,27 +1656,30 @@ search_again_locked: /* Check the fgs version, for case the new FTE with the * same values was added while the fgs weren't locked */ - if (version != matched_fgs_get_version(match_head)) + if (version != matched_fgs_get_version(match_head)) { + take_write = true; goto search_again_locked; + } list_for_each_entry(iter, match_head, list) { g = iter->g; if (!g->node.active) continue; + + nested_down_write_ref_node(&g->node, FS_LOCK_PARENT); + err = insert_fte(g, fte); if (err) { + up_write_ref_node(&g->node); if (err == -ENOSPC) continue; - list_for_each_entry(iter, match_head, list) - up_write_ref_node(&iter->g->node); kmem_cache_free(steering->ftes_cache, fte); return ERR_PTR(err); } nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD); - list_for_each_entry(iter, match_head, list) - up_write_ref_node(&iter->g->node); + up_write_ref_node(&g->node); rule = add_rule_fg(g, spec->match_value, flow_act, dest, dest_num, fte); up_write_ref_node(&fte->node); @@ -1686,8 +1688,6 @@ search_again_locked: } rule = ERR_PTR(-ENOENT); out: - list_for_each_entry(iter, match_head, list) - up_write_ref_node(&iter->g->node); kmem_cache_free(steering->ftes_cache, fte); return rule; } From 0a3b8b2b215f9e84b82ae97df71292ccfd92b1e7 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 3 Sep 2018 19:12:41 -0700 Subject: [PATCH 179/269] tipc: orphan sock in tipc_release() Before we unlock the sock in tipc_release(), we have to detach sk->sk_socket from sk, otherwise a parallel tipc_sk_fill_sock_diag() could stil read it after we free this socket. Fixes: c30b70deb5f4 ("tipc: implement socket diagnostics for AF_TIPC") Reported-and-tested-by: syzbot+48804b87c16588ad491d@syzkaller.appspotmail.com Cc: Jon Maloy Cc: Ying Xue Signed-off-by: Cong Wang Acked-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/socket.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index ab7a2a7178f7..a0ff8bffc96b 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -576,6 +576,7 @@ static int tipc_release(struct socket *sock) sk_stop_timer(sk, &sk->sk_timer); tipc_sk_remove(tsk); + sock_orphan(sk); /* Reject any messages that accumulated in backlog queue */ release_sock(sk); tipc_dest_list_purge(&tsk->cong_links); From ee28bb56ac5b4c0c08ef10d33cc7adb749bbf4c6 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Tue, 4 Sep 2018 19:00:19 +0200 Subject: [PATCH 180/269] net/sched: fix memory leak in act_tunnel_key_init() If users try to install act_tunnel_key 'set' rules with duplicate values of 'index', the tunnel metadata are allocated, but never released. Then, kmemleak complains as follows: # tc a a a tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 index 111 # echo clear > /sys/kernel/debug/kmemleak # tc a a a tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 index 111 Error: TC IDR already exists. We have an error talking to the kernel # echo scan > /sys/kernel/debug/kmemleak # cat /sys/kernel/debug/kmemleak unreferenced object 0xffff8800574e6c80 (size 256): comm "tc", pid 5617, jiffies 4298118009 (age 57.990s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 1c e8 b0 ff ff ff ff ................ 81 24 c2 ad ff ff ff ff 00 00 00 00 00 00 00 00 .$.............. backtrace: [<00000000b7afbf4e>] tunnel_key_init+0x8a5/0x1800 [act_tunnel_key] [<000000007d98fccd>] tcf_action_init_1+0x698/0xac0 [<0000000099b8f7cc>] tcf_action_init+0x15c/0x590 [<00000000dc60eebe>] tc_ctl_action+0x336/0x5c2 [<000000002f5a2f7d>] rtnetlink_rcv_msg+0x357/0x8e0 [<000000000bfe7575>] netlink_rcv_skb+0x124/0x350 [<00000000edab656f>] netlink_unicast+0x40f/0x5d0 [<00000000b322cdcb>] netlink_sendmsg+0x6e8/0xba0 [<0000000063d9d490>] sock_sendmsg+0xb3/0xf0 [<00000000f0d3315a>] ___sys_sendmsg+0x654/0x960 [<00000000c06cbd42>] __sys_sendmsg+0xd3/0x170 [<00000000ce72e4b0>] do_syscall_64+0xa5/0x470 [<000000005caa2d97>] entry_SYSCALL_64_after_hwframe+0x49/0xbe [<00000000fac1b476>] 0xffffffffffffffff This problem theoretically happens also in case users attempt to setup a geneve rule having wrong configuration data, or when the kernel fails to allocate 'params_new'. Ensure that tunnel_key_init() releases the tunnel metadata also in the above conditions. Addresses-Coverity-ID: 1373974 ("Resource leak") Fixes: d0f6dd8a914f4 ("net/sched: Introduce act_tunnel_key") Fixes: 0ed5269f9e41f ("net/sched: add tunnel option support to act_tunnel_key") Signed-off-by: Davide Caratti Acked-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/act_tunnel_key.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index 420759153d5f..28d58bbc953e 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -317,7 +317,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, &metadata->u.tun_info, opts_len, extack); if (ret < 0) - goto err_out; + goto release_tun_meta; } metadata->u.tun_info.mode |= IP_TUNNEL_INFO_TX; @@ -333,23 +333,24 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, &act_tunnel_key_ops, bind, true); if (ret) { NL_SET_ERR_MSG(extack, "Cannot create TC IDR"); - goto err_out; + goto release_tun_meta; } ret = ACT_P_CREATED; } else if (!ovr) { - tcf_idr_release(*a, bind); NL_SET_ERR_MSG(extack, "TC IDR already exists"); - return -EEXIST; + ret = -EEXIST; + goto release_tun_meta; } t = to_tunnel_key(*a); params_new = kzalloc(sizeof(*params_new), GFP_KERNEL); if (unlikely(!params_new)) { - tcf_idr_release(*a, bind); NL_SET_ERR_MSG(extack, "Cannot allocate tunnel key parameters"); - return -ENOMEM; + ret = -ENOMEM; + exists = true; + goto release_tun_meta; } params_new->tcft_action = parm->t_action; params_new->tcft_enc_metadata = metadata; @@ -367,6 +368,9 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, return ret; +release_tun_meta: + dst_release(&metadata->dst); + err_out: if (exists) tcf_idr_release(*a, bind); From 222440996d6daf635bed6cb35041be22ede3e8a0 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Wed, 5 Sep 2018 16:55:10 +0200 Subject: [PATCH 181/269] net/af_iucv: drop inbound packets with invalid flags Inbound packets may have any combination of flag bits set in their iucv header. If we don't know how to handle a specific combination, drop the skb instead of leaking it. To clarify what error is returned in this case, replace the hard-coded 0 with the corresponding macro. Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- net/iucv/af_iucv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index a21d8ed0a325..01000c14417f 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -2155,8 +2155,8 @@ static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev, struct sock *sk; struct iucv_sock *iucv; struct af_iucv_trans_hdr *trans_hdr; + int err = NET_RX_SUCCESS; char nullstring[8]; - int err = 0; if (skb->len < (ETH_HLEN + sizeof(struct af_iucv_trans_hdr))) { WARN_ONCE(1, "AF_IUCV too short skb, len=%d, min=%d", @@ -2254,7 +2254,7 @@ static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev, err = afiucv_hs_callback_rx(sk, skb); break; default: - ; + kfree_skb(skb); } return err; From b2f543949acd1ba64313fdad9e672ef47550d773 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Wed, 5 Sep 2018 16:55:11 +0200 Subject: [PATCH 182/269] net/af_iucv: fix skb handling on HiperTransport xmit error When sending an skb, afiucv_hs_send() bails out on various error conditions. But currently the caller has no way of telling whether the skb was freed or not - resulting in potentially either a) leaked skbs from iucv_send_ctrl(), or b) double-free's from iucv_sock_sendmsg(). As dev_queue_xmit() will always consume the skb (even on error), be consistent and also free the skb from all other error paths. This way callers no longer need to care about managing the skb. Signed-off-by: Julian Wiedmann Reviewed-by: Ursula Braun Signed-off-by: David S. Miller --- net/iucv/af_iucv.c | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 01000c14417f..e2f16a0173a9 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -351,20 +351,28 @@ static int afiucv_hs_send(struct iucv_message *imsg, struct sock *sock, memcpy(&phs_hdr->iucv_hdr, imsg, sizeof(struct iucv_message)); skb->dev = iucv->hs_dev; - if (!skb->dev) - return -ENODEV; - if (!(skb->dev->flags & IFF_UP) || !netif_carrier_ok(skb->dev)) - return -ENETDOWN; + if (!skb->dev) { + err = -ENODEV; + goto err_free; + } + if (!(skb->dev->flags & IFF_UP) || !netif_carrier_ok(skb->dev)) { + err = -ENETDOWN; + goto err_free; + } if (skb->len > skb->dev->mtu) { - if (sock->sk_type == SOCK_SEQPACKET) - return -EMSGSIZE; - else - skb_trim(skb, skb->dev->mtu); + if (sock->sk_type == SOCK_SEQPACKET) { + err = -EMSGSIZE; + goto err_free; + } + skb_trim(skb, skb->dev->mtu); } skb->protocol = cpu_to_be16(ETH_P_AF_IUCV); nskb = skb_clone(skb, GFP_ATOMIC); - if (!nskb) - return -ENOMEM; + if (!nskb) { + err = -ENOMEM; + goto err_free; + } + skb_queue_tail(&iucv->send_skb_q, nskb); err = dev_queue_xmit(skb); if (net_xmit_eval(err)) { @@ -375,6 +383,10 @@ static int afiucv_hs_send(struct iucv_message *imsg, struct sock *sock, WARN_ON(atomic_read(&iucv->msg_recv) < 0); } return net_xmit_eval(err); + +err_free: + kfree_skb(skb); + return err; } static struct sock *__iucv_get_sock_by_name(char *nm) @@ -1167,7 +1179,7 @@ static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg, err = afiucv_hs_send(&txmsg, sk, skb, 0); if (err) { atomic_dec(&iucv->msg_sent); - goto fail; + goto out; } } else { /* Classic VM IUCV transport */ skb_queue_tail(&iucv->send_skb_q, skb); From b7f41565546d393747fd554f9526c1187c6bf652 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Wed, 5 Sep 2018 16:55:12 +0200 Subject: [PATCH 183/269] net/iucv: declare iucv_path_table_empty() as static Fixes a compile warning. Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- net/iucv/iucv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index 8f7ef167c45a..eb502c6290c2 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -1874,7 +1874,7 @@ static void iucv_pm_complete(struct device *dev) * Returns 0 if there are still iucv pathes defined * 1 if there are no iucv pathes defined */ -int iucv_path_table_empty(void) +static int iucv_path_table_empty(void) { int i; From 5d128fbd8b20f8a48cb13c3eced789d1f9573ecd Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Tue, 4 Sep 2018 14:55:26 +0200 Subject: [PATCH 184/269] ACPI / bus: Only call dmi_check_system() on X86 Calling dmi_check_system() early only works on X86. Other architectures initialize the DMI subsystem later so it's not ready yet when ACPI itself gets initialized. In the best case it results in a useless call to a function which will do nothing. But depending on the dmi implementation, it could also result in warnings. Best is to not call the function when it can't work and isn't needed. Additionally, if anyone ever needs to add non-x86 quirks, it would surprisingly not work, so document the limitation to avoid confusion. Signed-off-by: Jean Delvare Fixes: cce4f632db20 (ACPI: fix early DSDT dmi check warnings on ia64) Signed-off-by: Rafael J. Wysocki --- drivers/acpi/bus.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index 292088fcc624..d2e29a19890d 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -35,11 +35,11 @@ #include #ifdef CONFIG_X86 #include +#include #endif #include #include #include -#include #include #include "internal.h" @@ -82,10 +82,6 @@ static const struct dmi_system_id dsdt_dmi_table[] __initconst = { }, {} }; -#else -static const struct dmi_system_id dsdt_dmi_table[] __initconst = { - {} -}; #endif /* -------------------------------------------------------------------------- @@ -1033,11 +1029,16 @@ void __init acpi_early_init(void) acpi_permanent_mmap = true; +#ifdef CONFIG_X86 /* * If the machine falls into the DMI check table, - * DSDT will be copied to memory + * DSDT will be copied to memory. + * Note that calling dmi_check_system() here on other architectures + * would not be OK because only x86 initializes dmi early enough. + * Thankfully only x86 systems need such quirks for now. */ dmi_check_system(dsdt_dmi_table); +#endif status = acpi_reallocate_root_table(); if (ACPI_FAILURE(status)) { From f11fc4bc669b8622510c1039499f5a9d24248fec Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Mon, 3 Sep 2018 10:00:07 +0800 Subject: [PATCH 185/269] ACPI / LPSS: Force LPSS quirks on boot Commit 12864ff8545f (ACPI / LPSS: Avoid PM quirks on suspend and resume from hibernation) bypasses lpss quirks for S3 and S4, by setting a flag for S3/S4 in acpi_lpss_suspend(), and check that flag in acpi_lpss_resume(). But this overlooks the boot case where acpi_lpss_resume() may get called without a corresponding acpi_lpss_suspend() having been called. Thus force setting the flag during boot. Fixes: 12864ff8545f (ACPI / LPSS: Avoid PM quirks on suspend and resume from hibernation) Link: https://bugzilla.kernel.org/show_bug.cgi?id=200989 Reported-and-tested-by: William Lieurance Signed-off-by: Zhang Rui Cc: 4.15+ # 4.15+: 12864ff8545f (ACPI / LPSS: Avoid ...) Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpi_lpss.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c index 9706613eecf9..bf64cfa30feb 100644 --- a/drivers/acpi/acpi_lpss.c +++ b/drivers/acpi/acpi_lpss.c @@ -879,7 +879,7 @@ static void acpi_lpss_dismiss(struct device *dev) #define LPSS_GPIODEF0_DMA_LLP BIT(13) static DEFINE_MUTEX(lpss_iosf_mutex); -static bool lpss_iosf_d3_entered; +static bool lpss_iosf_d3_entered = true; static void lpss_iosf_enter_d3_state(void) { From 17f6bac2249356c795339e03a0742cd79be3cab8 Mon Sep 17 00:00:00 2001 From: Chuanhua Lei Date: Thu, 6 Sep 2018 18:03:23 +0800 Subject: [PATCH 186/269] x86/tsc: Prevent result truncation on 32bit Loops per jiffy is calculated by multiplying tsc_khz with 1e3 and then dividing it by HZ. Both tsc_khz and the temporary variable holding the multiplication result are of type unsigned long, so on 32bit the result is truncated to the lower 32bit. Use u64 as type for the temporary variable and cast tsc_khz to it before multiplying. [ tglx: Massaged changelog and removed pointless braces ] Fixes: cf7a63ef4e02 ("x86/tsc: Calibrate tsc only once") Signed-off-by: Chuanhua Lei Signed-off-by: Thomas Gleixner Cc: yixin.zhu@linux.intel.com Cc: "H. Peter Anvin" Cc: Peter Zijlstra Cc: Len Brown Cc: Pavel Tatashin Cc: Rajvi Jingar Cc: Dou Liyang Link: https://lkml.kernel.org/r/1536228203-18701-1-git-send-email-chuanhua.lei@linux.intel.com --- arch/x86/kernel/tsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 1463468ba9a0..6490f618e096 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -1415,7 +1415,7 @@ static bool __init determine_cpu_tsc_frequencies(bool early) static unsigned long __init get_loops_per_jiffy(void) { - unsigned long lpj = tsc_khz * KHZ; + u64 lpj = (u64)tsc_khz * KHZ; do_div(lpj, HZ); return lpj; From 9fe6299dde587788f245e9f7a5a1b296fad4e8c7 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Fri, 31 Aug 2018 21:41:51 +0200 Subject: [PATCH 187/269] x86/process: Don't mix user/kernel regs in 64bit __show_regs() When the kernel.print-fatal-signals sysctl has been enabled, a simple userspace crash will cause the kernel to write a crash dump that contains, among other things, the kernel gsbase into dmesg. As suggested by Andy, limit output to pt_regs, FS_BASE and KERNEL_GS_BASE in this case. This also moves the bitness-specific logic from show_regs() into process_{32,64}.c. Fixes: 45807a1df9f5 ("vdso: print fatal signals") Signed-off-by: Jann Horn Signed-off-by: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Greg Kroah-Hartman Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180831194151.123586-1-jannh@google.com --- arch/x86/include/asm/kdebug.h | 12 +++++++++++- arch/x86/kernel/dumpstack.c | 11 +++-------- arch/x86/kernel/process_32.c | 4 ++-- arch/x86/kernel/process_64.c | 12 ++++++++++-- 4 files changed, 26 insertions(+), 13 deletions(-) diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h index 395c9631e000..75f1e35e7c15 100644 --- a/arch/x86/include/asm/kdebug.h +++ b/arch/x86/include/asm/kdebug.h @@ -22,10 +22,20 @@ enum die_val { DIE_NMIUNKNOWN, }; +enum show_regs_mode { + SHOW_REGS_SHORT, + /* + * For when userspace crashed, but we don't think it's our fault, and + * therefore don't print kernel registers. + */ + SHOW_REGS_USER, + SHOW_REGS_ALL +}; + extern void die(const char *, struct pt_regs *,long); extern int __must_check __die(const char *, struct pt_regs *, long); extern void show_stack_regs(struct pt_regs *regs); -extern void __show_regs(struct pt_regs *regs, int all); +extern void __show_regs(struct pt_regs *regs, enum show_regs_mode); extern void show_iret_regs(struct pt_regs *regs); extern unsigned long oops_begin(void); extern void oops_end(unsigned long, struct pt_regs *, int signr); diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index f56895106ccf..2b5886401e5f 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -146,7 +146,7 @@ static void show_regs_if_on_stack(struct stack_info *info, struct pt_regs *regs, * they can be printed in the right context. */ if (!partial && on_stack(info, regs, sizeof(*regs))) { - __show_regs(regs, 0); + __show_regs(regs, SHOW_REGS_SHORT); } else if (partial && on_stack(info, (void *)regs + IRET_FRAME_OFFSET, IRET_FRAME_SIZE)) { @@ -344,7 +344,7 @@ void oops_end(unsigned long flags, struct pt_regs *regs, int signr) oops_exit(); /* Executive summary in case the oops scrolled away */ - __show_regs(&exec_summary_regs, true); + __show_regs(&exec_summary_regs, SHOW_REGS_ALL); if (!signr) return; @@ -407,14 +407,9 @@ void die(const char *str, struct pt_regs *regs, long err) void show_regs(struct pt_regs *regs) { - bool all = true; - show_regs_print_info(KERN_DEFAULT); - if (IS_ENABLED(CONFIG_X86_32)) - all = !user_mode(regs); - - __show_regs(regs, all); + __show_regs(regs, user_mode(regs) ? SHOW_REGS_USER : SHOW_REGS_ALL); /* * When in-kernel, we also print out the stack at the time of the fault.. diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 2924fd447e61..5046a3c9dec2 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -59,7 +59,7 @@ #include #include -void __show_regs(struct pt_regs *regs, int all) +void __show_regs(struct pt_regs *regs, enum show_regs_mode mode) { unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; unsigned long d0, d1, d2, d3, d6, d7; @@ -85,7 +85,7 @@ void __show_regs(struct pt_regs *regs, int all) printk(KERN_DEFAULT "DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x EFLAGS: %08lx\n", (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss, regs->flags); - if (!all) + if (mode != SHOW_REGS_ALL) return; cr0 = read_cr0(); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index a451bc374b9b..ea5ea850348d 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -62,7 +62,7 @@ __visible DEFINE_PER_CPU(unsigned long, rsp_scratch); /* Prints also some state that isn't saved in the pt_regs */ -void __show_regs(struct pt_regs *regs, int all) +void __show_regs(struct pt_regs *regs, enum show_regs_mode mode) { unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs; unsigned long d0, d1, d2, d3, d6, d7; @@ -87,9 +87,17 @@ void __show_regs(struct pt_regs *regs, int all) printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n", regs->r13, regs->r14, regs->r15); - if (!all) + if (mode == SHOW_REGS_SHORT) return; + if (mode == SHOW_REGS_USER) { + rdmsrl(MSR_FS_BASE, fs); + rdmsrl(MSR_KERNEL_GS_BASE, shadowgs); + printk(KERN_DEFAULT "FS: %016lx GS: %016lx\n", + fs, shadowgs); + return; + } + asm("movl %%ds,%0" : "=r" (ds)); asm("movl %%cs,%0" : "=r" (cs)); asm("movl %%es,%0" : "=r" (es)); From f8b7530aa0a1def79c93101216b5b17cf408a70a Mon Sep 17 00:00:00 2001 From: Neeraj Upadhyay Date: Wed, 5 Sep 2018 11:22:07 +0530 Subject: [PATCH 188/269] cpu/hotplug: Adjust misplaced smb() in cpuhp_thread_fun() The smp_mb() in cpuhp_thread_fun() is misplaced. It needs to be after the load of st->should_run to prevent reordering of the later load/stores w.r.t. the load of st->should_run. Fixes: 4dddfb5faa61 ("smp/hotplug: Rewrite AP state machine core") Signed-off-by: Neeraj Upadhyay Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Cc: josh@joshtriplett.org Cc: peterz@infradead.org Cc: jiangshanlai@gmail.com Cc: dzickus@redhat.com Cc: brendan.jackman@arm.com Cc: malat@debian.org Cc: mojha@codeaurora.org Cc: sramana@codeaurora.org Cc: linux-arm-msm@vger.kernel.org Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/1536126727-11629-1-git-send-email-neeraju@codeaurora.org --- kernel/cpu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/cpu.c b/kernel/cpu.c index aa7fe85ad62e..eb4041f78073 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -607,15 +607,15 @@ static void cpuhp_thread_fun(unsigned int cpu) bool bringup = st->bringup; enum cpuhp_state state; + if (WARN_ON_ONCE(!st->should_run)) + return; + /* * ACQUIRE for the cpuhp_should_run() load of ->should_run. Ensures * that if we see ->should_run we also see the rest of the state. */ smp_mb(); - if (WARN_ON_ONCE(!st->should_run)) - return; - cpuhp_lock_acquire(bringup); if (st->single) { From 69fa6eb7d6a64801ea261025cce9723d9442d773 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 6 Sep 2018 15:21:38 +0200 Subject: [PATCH 189/269] cpu/hotplug: Prevent state corruption on error rollback When a teardown callback fails, the CPU hotplug code brings the CPU back to the previous state. The previous state becomes the new target state. The rollback happens in undo_cpu_down() which increments the state unconditionally even if the state is already the same as the target. As a consequence the next CPU hotplug operation will start at the wrong state. This is easily to observe when __cpu_disable() fails. Prevent the unconditional undo by checking the state vs. target before incrementing state and fix up the consequently wrong conditional in the unplug code which handles the failure of the final CPU take down on the control CPU side. Fixes: 4dddfb5faa61 ("smp/hotplug: Rewrite AP state machine core") Reported-by: Neeraj Upadhyay Signed-off-by: Thomas Gleixner Tested-by: Geert Uytterhoeven Tested-by: Sudeep Holla Tested-by: Neeraj Upadhyay Cc: josh@joshtriplett.org Cc: peterz@infradead.org Cc: jiangshanlai@gmail.com Cc: dzickus@redhat.com Cc: brendan.jackman@arm.com Cc: malat@debian.org Cc: sramana@codeaurora.org Cc: linux-arm-msm@vger.kernel.org Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1809051419580.1416@nanos.tec.linutronix.de ---- --- kernel/cpu.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/cpu.c b/kernel/cpu.c index eb4041f78073..0097acec1c71 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -916,7 +916,8 @@ static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st, ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL); if (ret) { st->target = prev_state; - undo_cpu_down(cpu, st); + if (st->state < prev_state) + undo_cpu_down(cpu, st); break; } } @@ -969,7 +970,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen, * to do the further cleanups. */ ret = cpuhp_down_callbacks(cpu, st, target); - if (ret && st->state > CPUHP_TEARDOWN_CPU && st->state < prev_state) { + if (ret && st->state == CPUHP_TEARDOWN_CPU && st->state < prev_state) { cpuhp_reset_state(st, prev_state); __cpuhp_kick_ap(st); } From 8aaff15168cfbc7c8980fdb0e8a585f1afe56ec0 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 24 Aug 2018 15:32:43 +0200 Subject: [PATCH 190/269] ceph: avoid a use-after-free in ceph_destroy_options() syzbot reported a use-after-free in ceph_destroy_options(), called from ceph_mount(). The problem was that create_fs_client() consumed the opt pointer on some errors, but not on all of them. Make sure it always consumes both libceph and ceph options. Reported-by: syzbot+8ab6f1042021b4eed062@syzkaller.appspotmail.com Signed-off-by: Ilya Dryomov Reviewed-by: "Yan, Zheng" --- fs/ceph/super.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 43ca3b763875..eab1359d0553 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -602,6 +602,8 @@ static int extra_mon_dispatch(struct ceph_client *client, struct ceph_msg *msg) /* * create a new fs client + * + * Success or not, this function consumes @fsopt and @opt. */ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt, struct ceph_options *opt) @@ -609,17 +611,20 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt, struct ceph_fs_client *fsc; int page_count; size_t size; - int err = -ENOMEM; + int err; fsc = kzalloc(sizeof(*fsc), GFP_KERNEL); - if (!fsc) - return ERR_PTR(-ENOMEM); + if (!fsc) { + err = -ENOMEM; + goto fail; + } fsc->client = ceph_create_client(opt, fsc); if (IS_ERR(fsc->client)) { err = PTR_ERR(fsc->client); goto fail; } + opt = NULL; /* fsc->client now owns this */ fsc->client->extra_mon_dispatch = extra_mon_dispatch; fsc->client->osdc.abort_on_full = true; @@ -677,6 +682,9 @@ fail_client: ceph_destroy_client(fsc->client); fail: kfree(fsc); + if (opt) + ceph_destroy_options(opt); + destroy_mount_options(fsopt); return ERR_PTR(err); } @@ -1042,8 +1050,6 @@ static struct dentry *ceph_mount(struct file_system_type *fs_type, fsc = create_fs_client(fsopt, opt); if (IS_ERR(fsc)) { res = ERR_CAST(fsc); - destroy_mount_options(fsopt); - ceph_destroy_options(opt); goto out_final; } From eb3b2d6be4b5e1612827b986cca241c5d104fc41 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 22 Aug 2018 17:11:27 +0200 Subject: [PATCH 191/269] rbd: factor out get_parent_info() In preparation for the new parent_get and parent_overlap_get class methods, factor out the fetching and decoding of parent data. As a side effect, we now decode all four fields in the "no parent" case. Signed-off-by: Ilya Dryomov Reviewed-by: Jason Dillaman --- drivers/block/rbd.c | 136 ++++++++++++++++++++++++++++---------------- 1 file changed, 87 insertions(+), 49 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 7915f3b03736..bec5a50c9890 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -4584,47 +4584,95 @@ static int rbd_dev_v2_features(struct rbd_device *rbd_dev) &rbd_dev->header.features); } +struct parent_image_info { + u64 pool_id; + const char *image_id; + u64 snap_id; + + u64 overlap; +}; + +/* + * The caller is responsible for @pii. + */ +static int __get_parent_info_legacy(struct rbd_device *rbd_dev, + struct page *req_page, + struct page *reply_page, + struct parent_image_info *pii) +{ + struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; + size_t reply_len = PAGE_SIZE; + void *p, *end; + int ret; + + ret = ceph_osdc_call(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc, + "rbd", "get_parent", CEPH_OSD_FLAG_READ, + req_page, sizeof(u64), reply_page, &reply_len); + if (ret) + return ret; + + p = page_address(reply_page); + end = p + reply_len; + ceph_decode_64_safe(&p, end, pii->pool_id, e_inval); + pii->image_id = ceph_extract_encoded_string(&p, end, NULL, GFP_KERNEL); + if (IS_ERR(pii->image_id)) { + ret = PTR_ERR(pii->image_id); + pii->image_id = NULL; + return ret; + } + ceph_decode_64_safe(&p, end, pii->snap_id, e_inval); + ceph_decode_64_safe(&p, end, pii->overlap, e_inval); + + return 0; + +e_inval: + return -EINVAL; +} + +static int get_parent_info(struct rbd_device *rbd_dev, + struct parent_image_info *pii) +{ + struct page *req_page, *reply_page; + void *p; + int ret; + + req_page = alloc_page(GFP_KERNEL); + if (!req_page) + return -ENOMEM; + + reply_page = alloc_page(GFP_KERNEL); + if (!reply_page) { + __free_page(req_page); + return -ENOMEM; + } + + p = page_address(req_page); + ceph_encode_64(&p, rbd_dev->spec->snap_id); + ret = __get_parent_info_legacy(rbd_dev, req_page, reply_page, pii); + + __free_page(req_page); + __free_page(reply_page); + return ret; +} + static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) { struct rbd_spec *parent_spec; - size_t size; - void *reply_buf = NULL; - __le64 snapid; - void *p; - void *end; - u64 pool_id; - char *image_id; - u64 snap_id; - u64 overlap; + struct parent_image_info pii = { 0 }; int ret; parent_spec = rbd_spec_alloc(); if (!parent_spec) return -ENOMEM; - size = sizeof (__le64) + /* pool_id */ - sizeof (__le32) + RBD_IMAGE_ID_LEN_MAX + /* image_id */ - sizeof (__le64) + /* snap_id */ - sizeof (__le64); /* overlap */ - reply_buf = kmalloc(size, GFP_KERNEL); - if (!reply_buf) { - ret = -ENOMEM; - goto out_err; - } - - snapid = cpu_to_le64(rbd_dev->spec->snap_id); - ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid, - &rbd_dev->header_oloc, "get_parent", - &snapid, sizeof(snapid), reply_buf, size); - dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); - if (ret < 0) + ret = get_parent_info(rbd_dev, &pii); + if (ret) goto out_err; - p = reply_buf; - end = reply_buf + ret; - ret = -ERANGE; - ceph_decode_64_safe(&p, end, pool_id, out_err); - if (pool_id == CEPH_NOPOOL) { + dout("%s pool_id %llu image_id %s snap_id %llu overlap %llu\n", + __func__, pii.pool_id, pii.image_id, pii.snap_id, pii.overlap); + + if (pii.pool_id == CEPH_NOPOOL) { /* * Either the parent never existed, or we have * record of it but the image got flattened so it no @@ -4647,29 +4695,22 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) /* The ceph file layout needs to fit pool id in 32 bits */ ret = -EIO; - if (pool_id > (u64)U32_MAX) { + if (pii.pool_id > (u64)U32_MAX) { rbd_warn(NULL, "parent pool id too large (%llu > %u)", - (unsigned long long)pool_id, U32_MAX); + (unsigned long long)pii.pool_id, U32_MAX); goto out_err; } - image_id = ceph_extract_encoded_string(&p, end, NULL, GFP_KERNEL); - if (IS_ERR(image_id)) { - ret = PTR_ERR(image_id); - goto out_err; - } - ceph_decode_64_safe(&p, end, snap_id, out_err); - ceph_decode_64_safe(&p, end, overlap, out_err); - /* * The parent won't change (except when the clone is * flattened, already handled that). So we only need to * record the parent spec we have not already done so. */ if (!rbd_dev->parent_spec) { - parent_spec->pool_id = pool_id; - parent_spec->image_id = image_id; - parent_spec->snap_id = snap_id; + parent_spec->pool_id = pii.pool_id; + parent_spec->image_id = pii.image_id; + pii.image_id = NULL; + parent_spec->snap_id = pii.snap_id; /* TODO: support cloning across namespaces */ if (rbd_dev->spec->pool_ns) { @@ -4683,15 +4724,13 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) rbd_dev->parent_spec = parent_spec; parent_spec = NULL; /* rbd_dev now owns this */ - } else { - kfree(image_id); } /* * We always update the parent overlap. If it's zero we issue * a warning, as we will proceed as if there was no parent. */ - if (!overlap) { + if (!pii.overlap) { if (parent_spec) { /* refresh, careful to warn just once */ if (rbd_dev->parent_overlap) @@ -4702,14 +4741,13 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) rbd_warn(rbd_dev, "clone is standalone (overlap 0)"); } } - rbd_dev->parent_overlap = overlap; + rbd_dev->parent_overlap = pii.overlap; out: ret = 0; out_err: - kfree(reply_buf); + kfree(pii.image_id); rbd_spec_put(parent_spec); - return ret; } From e92c0eaf754310f9f31e9229a3f7274a67478f82 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 22 Aug 2018 17:26:10 +0200 Subject: [PATCH 192/269] rbd: support cloning across namespaces If parent_get class method is not supported by the OSDs, fall back to the legacy class method and assume that the parent is in the default (i.e. "") namespace. The "use the child's image namespace" workaround is no longer needed because creating images within namespaces will require parent_get aware OSDs. Signed-off-by: Ilya Dryomov Reviewed-by: Jason Dillaman --- drivers/block/rbd.c | 111 ++++++++++++++++++++++++++++++++++++++------ 1 file changed, 97 insertions(+), 14 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index bec5a50c9890..73ed5f3a862d 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -4207,11 +4207,13 @@ static ssize_t rbd_parent_show(struct device *dev, count += sprintf(&buf[count], "%s" "pool_id %llu\npool_name %s\n" + "pool_ns %s\n" "image_id %s\nimage_name %s\n" "snap_id %llu\nsnap_name %s\n" "overlap %llu\n", !count ? "" : "\n", /* first? */ spec->pool_id, spec->pool_name, + spec->pool_ns ?: "", spec->image_id, spec->image_name ?: "(unknown)", spec->snap_id, spec->snap_name, rbd_dev->parent_overlap); @@ -4586,12 +4588,89 @@ static int rbd_dev_v2_features(struct rbd_device *rbd_dev) struct parent_image_info { u64 pool_id; + const char *pool_ns; const char *image_id; u64 snap_id; + bool has_overlap; u64 overlap; }; +/* + * The caller is responsible for @pii. + */ +static int decode_parent_image_spec(void **p, void *end, + struct parent_image_info *pii) +{ + u8 struct_v; + u32 struct_len; + int ret; + + ret = ceph_start_decoding(p, end, 1, "ParentImageSpec", + &struct_v, &struct_len); + if (ret) + return ret; + + ceph_decode_64_safe(p, end, pii->pool_id, e_inval); + pii->pool_ns = ceph_extract_encoded_string(p, end, NULL, GFP_KERNEL); + if (IS_ERR(pii->pool_ns)) { + ret = PTR_ERR(pii->pool_ns); + pii->pool_ns = NULL; + return ret; + } + pii->image_id = ceph_extract_encoded_string(p, end, NULL, GFP_KERNEL); + if (IS_ERR(pii->image_id)) { + ret = PTR_ERR(pii->image_id); + pii->image_id = NULL; + return ret; + } + ceph_decode_64_safe(p, end, pii->snap_id, e_inval); + return 0; + +e_inval: + return -EINVAL; +} + +static int __get_parent_info(struct rbd_device *rbd_dev, + struct page *req_page, + struct page *reply_page, + struct parent_image_info *pii) +{ + struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; + size_t reply_len = PAGE_SIZE; + void *p, *end; + int ret; + + ret = ceph_osdc_call(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc, + "rbd", "parent_get", CEPH_OSD_FLAG_READ, + req_page, sizeof(u64), reply_page, &reply_len); + if (ret) + return ret == -EOPNOTSUPP ? 1 : ret; + + p = page_address(reply_page); + end = p + reply_len; + ret = decode_parent_image_spec(&p, end, pii); + if (ret) + return ret; + + ret = ceph_osdc_call(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc, + "rbd", "parent_overlap_get", CEPH_OSD_FLAG_READ, + req_page, sizeof(u64), reply_page, &reply_len); + if (ret) + return ret; + + p = page_address(reply_page); + end = p + reply_len; + ceph_decode_8_safe(&p, end, pii->has_overlap, e_inval); + if (pii->has_overlap) + ceph_decode_64_safe(&p, end, pii->overlap, e_inval); + + return 0; + +e_inval: + return -EINVAL; +} + /* * The caller is responsible for @pii. */ @@ -4621,6 +4700,7 @@ static int __get_parent_info_legacy(struct rbd_device *rbd_dev, return ret; } ceph_decode_64_safe(&p, end, pii->snap_id, e_inval); + pii->has_overlap = true; ceph_decode_64_safe(&p, end, pii->overlap, e_inval); return 0; @@ -4648,7 +4728,10 @@ static int get_parent_info(struct rbd_device *rbd_dev, p = page_address(req_page); ceph_encode_64(&p, rbd_dev->spec->snap_id); - ret = __get_parent_info_legacy(rbd_dev, req_page, reply_page, pii); + ret = __get_parent_info(rbd_dev, req_page, reply_page, pii); + if (ret > 0) + ret = __get_parent_info_legacy(rbd_dev, req_page, reply_page, + pii); __free_page(req_page); __free_page(reply_page); @@ -4669,10 +4752,11 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) if (ret) goto out_err; - dout("%s pool_id %llu image_id %s snap_id %llu overlap %llu\n", - __func__, pii.pool_id, pii.image_id, pii.snap_id, pii.overlap); + dout("%s pool_id %llu pool_ns %s image_id %s snap_id %llu has_overlap %d overlap %llu\n", + __func__, pii.pool_id, pii.pool_ns, pii.image_id, pii.snap_id, + pii.has_overlap, pii.overlap); - if (pii.pool_id == CEPH_NOPOOL) { + if (pii.pool_id == CEPH_NOPOOL || !pii.has_overlap) { /* * Either the parent never existed, or we have * record of it but the image got flattened so it no @@ -4681,6 +4765,10 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) * overlap to 0. The effect of this is that all new * requests will be treated as if the image had no * parent. + * + * If !pii.has_overlap, the parent image spec is not + * applicable. It's there to avoid duplication in each + * snapshot record. */ if (rbd_dev->parent_overlap) { rbd_dev->parent_overlap = 0; @@ -4708,20 +4796,14 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) */ if (!rbd_dev->parent_spec) { parent_spec->pool_id = pii.pool_id; + if (pii.pool_ns && *pii.pool_ns) { + parent_spec->pool_ns = pii.pool_ns; + pii.pool_ns = NULL; + } parent_spec->image_id = pii.image_id; pii.image_id = NULL; parent_spec->snap_id = pii.snap_id; - /* TODO: support cloning across namespaces */ - if (rbd_dev->spec->pool_ns) { - parent_spec->pool_ns = kstrdup(rbd_dev->spec->pool_ns, - GFP_KERNEL); - if (!parent_spec->pool_ns) { - ret = -ENOMEM; - goto out_err; - } - } - rbd_dev->parent_spec = parent_spec; parent_spec = NULL; /* rbd_dev now owns this */ } @@ -4746,6 +4828,7 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) out: ret = 0; out_err: + kfree(pii.pool_ns); kfree(pii.image_id); rbd_spec_put(parent_spec); return ret; From 52cf93e63ee672a92f349edc6ddad86ec8808fd8 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Thu, 6 Sep 2018 10:55:18 +0800 Subject: [PATCH 193/269] HID: i2c-hid: Don't reset device upon system resume Raydium touchscreen triggers interrupt storm after system-wide suspend: [ 179.085033] i2c_hid i2c-CUST0000:00: i2c_hid_get_input: incomplete report (58/65535) According to Raydium, Windows driver does not reset the device after system resume. The HID over I2C spec does specify a reset should be used at intialization, but it doesn't specify if reset is required for system suspend. Tested this patch on other i2c-hid touchpanels I have and those touchpanels do work after S3 without doing reset. If any regression happens to other touchpanel vendors, we can use quirk for Raydium devices. There's still one device uses I2C_HID_QUIRK_RESEND_REPORT_DESCR so keep it there. Cc: Aaron Ma Cc: AceLan Kao Signed-off-by: Kai-Heng Feng Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 4 ---- drivers/hid/i2c-hid/i2c-hid.c | 13 +++++++------ 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 19a66ceca217..5146ee029db4 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -530,10 +530,6 @@ #define I2C_VENDOR_ID_HANTICK 0x0911 #define I2C_PRODUCT_ID_HANTICK_5288 0x5288 -#define I2C_VENDOR_ID_RAYD 0x2386 -#define I2C_PRODUCT_ID_RAYD_3118 0x3118 -#define I2C_PRODUCT_ID_RAYD_4B33 0x4B33 - #define USB_VENDOR_ID_HANWANG 0x0b57 #define USB_DEVICE_ID_HANWANG_TABLET_FIRST 0x5000 #define USB_DEVICE_ID_HANWANG_TABLET_LAST 0x8fff diff --git a/drivers/hid/i2c-hid/i2c-hid.c b/drivers/hid/i2c-hid/i2c-hid.c index 57126f6837bb..f3076659361a 100644 --- a/drivers/hid/i2c-hid/i2c-hid.c +++ b/drivers/hid/i2c-hid/i2c-hid.c @@ -170,12 +170,8 @@ static const struct i2c_hid_quirks { I2C_HID_QUIRK_SET_PWR_WAKEUP_DEV }, { I2C_VENDOR_ID_HANTICK, I2C_PRODUCT_ID_HANTICK_5288, I2C_HID_QUIRK_NO_IRQ_AFTER_RESET }, - { I2C_VENDOR_ID_RAYD, I2C_PRODUCT_ID_RAYD_3118, - I2C_HID_QUIRK_RESEND_REPORT_DESCR }, { USB_VENDOR_ID_SIS_TOUCH, USB_DEVICE_ID_SIS10FB_TOUCH, I2C_HID_QUIRK_RESEND_REPORT_DESCR }, - { I2C_VENDOR_ID_RAYD, I2C_PRODUCT_ID_RAYD_4B33, - I2C_HID_QUIRK_RESEND_REPORT_DESCR }, { 0, 0 } }; @@ -1237,11 +1233,16 @@ static int i2c_hid_resume(struct device *dev) pm_runtime_enable(dev); enable_irq(client->irq); - ret = i2c_hid_hwreset(client); + + /* Instead of resetting device, simply powers the device on. This + * solves "incomplete reports" on Raydium devices 2386:3118 and + * 2386:4B33 + */ + ret = i2c_hid_set_power(client, I2C_HID_PWR_ON); if (ret) return ret; - /* RAYDIUM device (2386:3118) need to re-send report descr cmd + /* Some devices need to re-send report descr cmd * after resume, after this it will be back normal. * otherwise it issues too many incomplete reports. */ From d1c392c9e2a301f38998a353f467f76414e38725 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 5 Sep 2018 16:29:49 -0400 Subject: [PATCH 194/269] printk/tracing: Do not trace printk_nmi_enter() I hit the following splat in my tests: ------------[ cut here ]------------ IRQs not enabled as expected WARNING: CPU: 3 PID: 0 at kernel/time/tick-sched.c:982 tick_nohz_idle_enter+0x44/0x8c Modules linked in: ip6t_REJECT nf_reject_ipv6 ip6table_filter ip6_tables ipv6 CPU: 3 PID: 0 Comm: swapper/3 Not tainted 4.19.0-rc2-test+ #2 Hardware name: MSI MS-7823/CSM-H87M-G43 (MS-7823), BIOS V1.6 02/22/2014 EIP: tick_nohz_idle_enter+0x44/0x8c Code: ec 05 00 00 00 75 26 83 b8 c0 05 00 00 00 75 1d 80 3d d0 36 3e c1 00 75 14 68 94 63 12 c1 c6 05 d0 36 3e c1 01 e8 04 ee f8 ff <0f> 0b 58 fa bb a0 e5 66 c1 e8 25 0f 04 00 64 03 1d 28 31 52 c1 8b EAX: 0000001c EBX: f26e7f8c ECX: 00000006 EDX: 00000007 ESI: f26dd1c0 EDI: 00000000 EBP: f26e7f40 ESP: f26e7f38 DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 EFLAGS: 00010296 CR0: 80050033 CR2: 0813c6b0 CR3: 2f342000 CR4: 001406f0 Call Trace: do_idle+0x33/0x202 cpu_startup_entry+0x61/0x63 start_secondary+0x18e/0x1ed startup_32_smp+0x164/0x168 irq event stamp: 18773830 hardirqs last enabled at (18773829): [] trace_hardirqs_on_thunk+0xc/0x10 hardirqs last disabled at (18773830): [] trace_hardirqs_off_thunk+0xc/0x10 softirqs last enabled at (18773824): [] __do_softirq+0x25f/0x2bf softirqs last disabled at (18773767): [] call_on_stack+0x45/0x4b ---[ end trace b7c64aa79e17954a ]--- After a bit of debugging, I found what was happening. This would trigger when performing "perf" with a high NMI interrupt rate, while enabling and disabling function tracer. Ftrace uses breakpoints to convert the nops at the start of functions to calls to the function trampolines. The breakpoint traps disable interrupts and this makes calls into lockdep via the trace_hardirqs_off_thunk in the entry.S code. What happens is the following: do_idle { [interrupts enabled] [interrupts disabled] TRACE_IRQS_OFF [lockdep says irqs off] [...] TRACE_IRQS_IRET test if pt_regs say return to interrupts enabled [yes] TRACE_IRQS_ON [lockdep says irqs are on] nmi_enter() { printk_nmi_enter() [traced by ftrace] [ hit ftrace breakpoint ] TRACE_IRQS_OFF [lockdep says irqs off] [...] TRACE_IRQS_IRET [return from breakpoint] test if pt_regs say interrupts enabled [no] [iret back to interrupt] [iret back to code] tick_nohz_idle_enter() { lockdep_assert_irqs_enabled() [lockdep say no!] Although interrupts are indeed enabled, lockdep thinks it is not, and since we now do asserts via lockdep, it gives a false warning. The issue here is that printk_nmi_enter() is called before lockdep_off(), which disables lockdep (for this reason) in NMIs. By simply not allowing ftrace to see printk_nmi_enter() (via notrace annotation) we keep lockdep from getting confused. Cc: stable@vger.kernel.org Fixes: 42a0bb3f71383 ("printk/nmi: generic solution for safe printk in NMI") Acked-by: Sergey Senozhatsky Acked-by: Petr Mladek Signed-off-by: Steven Rostedt (VMware) --- kernel/printk/printk_safe.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c index a0a74c533e4b..0913b4d385de 100644 --- a/kernel/printk/printk_safe.c +++ b/kernel/printk/printk_safe.c @@ -306,12 +306,12 @@ static __printf(1, 0) int vprintk_nmi(const char *fmt, va_list args) return printk_safe_log_store(s, fmt, args); } -void printk_nmi_enter(void) +void notrace printk_nmi_enter(void) { this_cpu_or(printk_context, PRINTK_NMI_CONTEXT_MASK); } -void printk_nmi_exit(void) +void notrace printk_nmi_exit(void) { this_cpu_and(printk_context, ~PRINTK_NMI_CONTEXT_MASK); } From 96d529bac562574600eda85726fcfa3eef6dde8e Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Thu, 6 Sep 2018 16:10:39 +0100 Subject: [PATCH 195/269] firmware: arm_scmi: fix divide by zero when sustained_perf_level is zero Firmware can provide zero as values for sustained performance level and corresponding sustained frequency in kHz in order to hide the actual frequencies and provide only abstract values. It may endup with divide by zero scenario resulting in kernel panic. Let's set the multiplication factor to one if either one or both of them (sustained_perf_level and sustained_freq) are set to zero. Fixes: a9e3fbfaa0ff ("firmware: arm_scmi: add initial support for performance protocol") Reported-by: Ionela Voinescu Signed-off-by: Sudeep Holla Signed-off-by: Olof Johansson --- drivers/firmware/arm_scmi/perf.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/arm_scmi/perf.c b/drivers/firmware/arm_scmi/perf.c index 721e6c57beae..64342944d917 100644 --- a/drivers/firmware/arm_scmi/perf.c +++ b/drivers/firmware/arm_scmi/perf.c @@ -166,7 +166,13 @@ scmi_perf_domain_attributes_get(const struct scmi_handle *handle, u32 domain, le32_to_cpu(attr->sustained_freq_khz); dom_info->sustained_perf_level = le32_to_cpu(attr->sustained_perf_level); - dom_info->mult_factor = (dom_info->sustained_freq_khz * 1000) / + if (!dom_info->sustained_freq_khz || + !dom_info->sustained_perf_level) + /* CPUFreq converts to kHz, hence default 1000 */ + dom_info->mult_factor = 1000; + else + dom_info->mult_factor = + (dom_info->sustained_freq_khz * 1000) / dom_info->sustained_perf_level; memcpy(dom_info->name, attr->name, SCMI_MAX_STR_SIZE); } From fac880c7d074fdfca874114b5c47b36aa034e4ee Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 5 Sep 2018 17:38:57 +0100 Subject: [PATCH 196/269] arm64: fix erroneous warnings in page freeing functions In pmd_free_pte_page() and pud_free_pmd_page() we try to warn if they hit a present non-table entry. In both cases we'll warn for non-present entries, as the VM_WARN_ON() only checks the entry is not a table entry. This has been observed to result in warnings when booting a v4.19-rc2 kernel under qemu. Fix this by bailing out earlier for non-present entries. Fixes: ec28bb9c9b0826d7 ("arm64: Implement page table free interfaces") Signed-off-by: Mark Rutland Cc: Will Deacon Cc: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/mm/mmu.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 65f86271f02b..8080c9f489c3 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -985,8 +985,9 @@ int pmd_free_pte_page(pmd_t *pmdp, unsigned long addr) pmd = READ_ONCE(*pmdp); - /* No-op for empty entry and WARN_ON for valid entry */ - if (!pmd_present(pmd) || !pmd_table(pmd)) { + if (!pmd_present(pmd)) + return 1; + if (!pmd_table(pmd)) { VM_WARN_ON(!pmd_table(pmd)); return 1; } @@ -1007,8 +1008,9 @@ int pud_free_pmd_page(pud_t *pudp, unsigned long addr) pud = READ_ONCE(*pudp); - /* No-op for empty entry and WARN_ON for valid entry */ - if (!pud_present(pud) || !pud_table(pud)) { + if (!pud_present(pud)) + return 1; + if (!pud_table(pud)) { VM_WARN_ON(!pud_table(pud)); return 1; } From 6b45a2b1c0bc2aec84d1c56a1976ca9c8a621ecb Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Thu, 6 Sep 2018 14:12:19 +0200 Subject: [PATCH 197/269] memory: ti-aemif: fix a potential NULL-pointer dereference Platform data pointer may be NULL. We check it everywhere but in one place. Fix it. Fixes: 8af70cd2ca50 ("memory: aemif: add support for board files") Reported-by: Dan Carpenter Signed-off-by: Bartosz Golaszewski Cc: stable@vger.kernel.org Signed-off-by: Olof Johansson --- drivers/memory/ti-aemif.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/memory/ti-aemif.c b/drivers/memory/ti-aemif.c index 31112f622b88..475e5b3790ed 100644 --- a/drivers/memory/ti-aemif.c +++ b/drivers/memory/ti-aemif.c @@ -411,7 +411,7 @@ static int aemif_probe(struct platform_device *pdev) if (ret < 0) goto error; } - } else { + } else if (pdata) { for (i = 0; i < pdata->num_sub_devices; i++) { pdata->sub_devices[i].dev.parent = dev; ret = platform_device_register(&pdata->sub_devices[i]); From d5274b3cd6a814ccb2f56d81ee87cbbf51bd4cf7 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Thu, 6 Sep 2018 11:05:44 +0300 Subject: [PATCH 198/269] block: bfq: swap puts in bfqg_and_blkg_put Fix trivial use-after-free. This could be last reference to bfqg. Fixes: 8f9bebc33dd7 ("block, bfq: access and cache blkg data only when safe") Acked-by: Paolo Valente Signed-off-by: Konstantin Khlebnikov Signed-off-by: Jens Axboe --- block/bfq-cgroup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index 58c6efa9f9a9..9fe5952d117d 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -275,9 +275,9 @@ static void bfqg_and_blkg_get(struct bfq_group *bfqg) void bfqg_and_blkg_put(struct bfq_group *bfqg) { - bfqg_put(bfqg); - blkg_put(bfqg_to_blkg(bfqg)); + + bfqg_put(bfqg); } /* @stats = 0 */ From 4cb205c0c50f613e2de91f0eb19d5247ed003e89 Mon Sep 17 00:00:00 2001 From: Jia He Date: Tue, 28 Aug 2018 12:53:26 +0800 Subject: [PATCH 199/269] irqchip/gic-v3-its: Cap lpi_id_bits to reduce memory footprint Commit fe8e93504ce8 ("irqchip/gic-v3-its: Use full range of LPIs"), removes the cap for lpi_id_bits, which causes the following warning to trigger on a QDF2400 server: WARNING: CPU: 0 PID: 0 at mm/page_alloc.c:4066 __alloc_pages_nodemask ... Call trace: __alloc_pages_nodemask+0x2d8/0x1188 alloc_pages_current+0x8c/0xd8 its_allocate_prop_table+0x5c/0xb8 its_init+0x220/0x3c0 gic_init_bases+0x250/0x380 gic_acpi_init+0x16c/0x2a4 In its_alloc_lpi_tables(), lpi_id_bits is 24 in QDF2400. The allocation in allocate_prop_table() tries therefore to allocate 16M (order 12 if pagesize=4k), which triggers the warning. As said by MarcL Capping lpi_id_bits at 16 (which is what we had before) is plenty, will save a some memory, and gives some margin before we need to push it up again. Bring the upper limit of lpi_id_bits back to prevent Fixes: fe8e93504ce8 ("irqchip/gic-v3-its: Use full range of LPIs") Suggested-by: Marc Zyngier Signed-off-by: Jia He Signed-off-by: Thomas Gleixner Acked-by: Marc Zyngier Tested-by: Olof Johansson Cc: Jason Cooper Cc: linux-arm-kernel@lists.infradead.org Link: https://lkml.kernel.org/r/1535432006-2304-1-git-send-email-jia.he@hxt-semitech.com --- drivers/irqchip/irq-gic-v3-its.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 316a57530f6d..c2df341ff6fa 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -1439,6 +1439,7 @@ static struct irq_chip its_irq_chip = { * The consequence of the above is that allocation is cost is low, but * freeing is expensive. We assumes that freeing rarely occurs. */ +#define ITS_MAX_LPI_NRBITS 16 /* 64K LPIs */ static DEFINE_MUTEX(lpi_range_lock); static LIST_HEAD(lpi_range_list); @@ -1625,7 +1626,8 @@ static int __init its_alloc_lpi_tables(void) { phys_addr_t paddr; - lpi_id_bits = GICD_TYPER_ID_BITS(gic_rdists->gicd_typer); + lpi_id_bits = min_t(u32, GICD_TYPER_ID_BITS(gic_rdists->gicd_typer), + ITS_MAX_LPI_NRBITS); gic_rdists->prop_page = its_allocate_prop_table(GFP_NOWAIT); if (!gic_rdists->prop_page) { pr_err("Failed to allocate PROPBASE\n"); From ae7304c3ea28a3ba47a7a8312c76c654ef24967e Mon Sep 17 00:00:00 2001 From: Shubhrajyoti Datta Date: Mon, 3 Sep 2018 15:11:11 +0530 Subject: [PATCH 200/269] i2c: xiic: Make the start and the byte count write atomic Disable interrupts while configuring the transfer and enable them back. We have below as the programming sequence 1. start and slave address 2. byte count and stop In some customer platform there was a lot of interrupts between 1 and 2 and after slave address (around 7 clock cyles) if 2 is not executed then the transaction is nacked. To fix this case make the 2 writes atomic. Signed-off-by: Shubhrajyoti Datta Signed-off-by: Michal Simek [wsa: added a newline for better readability] Signed-off-by: Wolfram Sang Cc: stable@kernel.org --- drivers/i2c/busses/i2c-xiic.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/i2c/busses/i2c-xiic.c b/drivers/i2c/busses/i2c-xiic.c index 9a71e50d21f1..0c51c0ffdda9 100644 --- a/drivers/i2c/busses/i2c-xiic.c +++ b/drivers/i2c/busses/i2c-xiic.c @@ -532,6 +532,7 @@ static void xiic_start_recv(struct xiic_i2c *i2c) { u8 rx_watermark; struct i2c_msg *msg = i2c->rx_msg = i2c->tx_msg; + unsigned long flags; /* Clear and enable Rx full interrupt. */ xiic_irq_clr_en(i2c, XIIC_INTR_RX_FULL_MASK | XIIC_INTR_TX_ERROR_MASK); @@ -547,6 +548,7 @@ static void xiic_start_recv(struct xiic_i2c *i2c) rx_watermark = IIC_RX_FIFO_DEPTH; xiic_setreg8(i2c, XIIC_RFD_REG_OFFSET, rx_watermark - 1); + local_irq_save(flags); if (!(msg->flags & I2C_M_NOSTART)) /* write the address */ xiic_setreg16(i2c, XIIC_DTR_REG_OFFSET, @@ -556,6 +558,8 @@ static void xiic_start_recv(struct xiic_i2c *i2c) xiic_setreg16(i2c, XIIC_DTR_REG_OFFSET, msg->len | ((i2c->nmsgs == 1) ? XIIC_TX_DYN_STOP_MASK : 0)); + local_irq_restore(flags); + if (i2c->nmsgs == 1) /* very last, enable bus not busy as well */ xiic_irq_clr_en(i2c, XIIC_INTR_BNB_MASK); From 954a8e3aea87e896e320cf648c1a5bbe47de443e Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Thu, 30 Aug 2018 08:35:19 +0300 Subject: [PATCH 201/269] RDMA/cma: Protect cma dev list with lock When AF_IB addresses are used during rdma_resolve_addr() a lock is not held. A cma device can get removed while list traversal is in progress which may lead to crash. ie CPU0 CPU1 ==== ==== rdma_resolve_addr() cma_resolve_ib_dev() list_for_each() cma_remove_one() cur_dev->device mutex_lock(&lock) list_del(); mutex_unlock(&lock); cma_process_remove(); Therefore, hold a lock while traversing the list which avoids such situation. Cc: # 3.10 Fixes: f17df3b0dede ("RDMA/cma: Add support for AF_IB to rdma_resolve_addr()") Signed-off-by: Parav Pandit Reviewed-by: Daniel Jurgens Signed-off-by: Leon Romanovsky Reviewed-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index f72677291b69..a36c94930c31 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -724,6 +724,7 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv) dgid = (union ib_gid *) &addr->sib_addr; pkey = ntohs(addr->sib_pkey); + mutex_lock(&lock); list_for_each_entry(cur_dev, &dev_list, list) { for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) { if (!rdma_cap_af_ib(cur_dev->device, p)) @@ -750,18 +751,19 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv) cma_dev = cur_dev; sgid = gid; id_priv->id.port_num = p; + goto found; } } } } - - if (!cma_dev) - return -ENODEV; + mutex_unlock(&lock); + return -ENODEV; found: cma_attach_to_dev(id_priv, cma_dev); - addr = (struct sockaddr_ib *) cma_src_addr(id_priv); - memcpy(&addr->sib_addr, &sgid, sizeof sgid); + mutex_unlock(&lock); + addr = (struct sockaddr_ib *)cma_src_addr(id_priv); + memcpy(&addr->sib_addr, &sgid, sizeof(sgid)); cma_translate_ib(addr, &id_priv->id.route.addr.dev_addr); return 0; } From 8f28b178f71cc56eccf2a6e2c0ace17c82f900d7 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 3 Sep 2018 09:11:14 +0300 Subject: [PATCH 202/269] RDMA/mlx4: Ensure that maximal send/receive SGE less than supported by HW In calculating the global maximum number of the Scatter/Gather elements supported, the following four maximum parameters must be taken into consideration: max_sg_rq, max_sg_sq, max_desc_sz_rq and max_desc_sz_sq. However instead of bringing this complexity to query_device, which still won't be sufficient anyway (the calculations are dependent on QP type), the safer approach will be to restore old code, which will give us 32 SGEs. Fixes: 33023fb85a42 ("IB/core: add max_send_sge and max_recv_sge attributes") Reported-by: Chuck Lever Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx4/main.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index ca0f1ee26091..0bbeaaae47e0 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -517,9 +517,11 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->page_size_cap = dev->dev->caps.page_size_cap; props->max_qp = dev->dev->quotas.qp; props->max_qp_wr = dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE; - props->max_send_sge = dev->dev->caps.max_sq_sg; - props->max_recv_sge = dev->dev->caps.max_rq_sg; - props->max_sge_rd = MLX4_MAX_SGE_RD; + props->max_send_sge = + min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg); + props->max_recv_sge = + min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg); + props->max_sge_rd = MLX4_MAX_SGE_RD; props->max_cq = dev->dev->quotas.cq; props->max_cqe = dev->dev->caps.max_cqes; props->max_mr = dev->dev->quotas.mpt; From d77ef138ff572409ab93d492e5e6c826ee6fb21d Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Wed, 15 Aug 2018 15:00:11 -0400 Subject: [PATCH 203/269] drm/nouveau/drm/nouveau: Fix bogus drm_kms_helper_poll_enable() placement Turns out this part is my fault for not noticing when reviewing 9a2eba337cace ("drm/nouveau: Fix drm poll_helper handling"). Currently we call drm_kms_helper_poll_enable() from nouveau_display_hpd_work(). This makes basically no sense however, because that means we're calling drm_kms_helper_poll_enable() every time we schedule the hotplug detection work. This is also against the advice mentioned in drm_kms_helper_poll_enable()'s documentation: Note that calls to enable and disable polling must be strictly ordered, which is automatically the case when they're only call from suspend/resume callbacks. Of course, hotplugs can't really be ordered. They could even happen immediately after we called drm_kms_helper_poll_disable() in nouveau_display_fini(), which can lead to all sorts of issues. Additionally; enabling polling /after/ we call drm_helper_hpd_irq_event() could also mean that we'd miss a hotplug event anyway, since drm_helper_hpd_irq_event() wouldn't bother trying to probe connectors so long as polling is disabled. So; simply move this back into nouveau_display_init() again. The race condition that both of these patches attempted to work around has already been fixed properly in d61a5c106351 ("drm/nouveau: Fix deadlock on runtime suspend") Fixes: 9a2eba337cace ("drm/nouveau: Fix drm poll_helper handling") Signed-off-by: Lyude Paul Acked-by: Karol Herbst Acked-by: Daniel Vetter Cc: Lukas Wunner Cc: Peter Ujfalusi Cc: stable@vger.kernel.org Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_display.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c index 139368b31916..7db01ea7fd41 100644 --- a/drivers/gpu/drm/nouveau/nouveau_display.c +++ b/drivers/gpu/drm/nouveau/nouveau_display.c @@ -355,8 +355,6 @@ nouveau_display_hpd_work(struct work_struct *work) pm_runtime_get_sync(drm->dev->dev); drm_helper_hpd_irq_event(drm->dev); - /* enable polling for external displays */ - drm_kms_helper_poll_enable(drm->dev); pm_runtime_mark_last_busy(drm->dev->dev); pm_runtime_put_sync(drm->dev->dev); @@ -411,6 +409,11 @@ nouveau_display_init(struct drm_device *dev) if (ret) return ret; + /* enable connector detection and polling for connectors without HPD + * support + */ + drm_kms_helper_poll_enable(dev); + /* enable hotplug interrupts */ drm_connector_list_iter_begin(dev, &conn_iter); nouveau_for_each_non_mst_connector_iter(connector, &conn_iter) { From 611ce855420a6e8b9ff47af5f47431d52c7709f8 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Wed, 15 Aug 2018 15:00:12 -0400 Subject: [PATCH 204/269] drm/nouveau: Remove duplicate poll_enable() in pmops_runtime_suspend() Since actual hotplug notifications don't get disabled until nouveau_display_fini() is called, all this will do is cause any hotplugs that happen between this drm_kms_helper_poll_disable() call and the actual hotplug disablement to potentially be dropped if ACPI isn't around to help us. Signed-off-by: Lyude Paul Acked-by: Karol Herbst Acked-by: Daniel Vetter Cc: stable@vger.kernel.org Cc: Lukas Wunner Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_drm.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index c7ec86d6c3c9..5fdc1fbe2ee5 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -835,7 +835,6 @@ nouveau_pmops_runtime_suspend(struct device *dev) return -EBUSY; } - drm_kms_helper_poll_disable(drm_dev); nouveau_switcheroo_optimus_dsm(); ret = nouveau_do_suspend(drm_dev, true); pci_save_state(pdev); From 7fec8f5379fb6eddabc0aaef6d2304c366808f97 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Wed, 15 Aug 2018 15:00:13 -0400 Subject: [PATCH 205/269] drm/nouveau/drm/nouveau: Fix deadlock with fb_helper with async RPM requests Currently, nouveau uses the generic drm_fb_helper_output_poll_changed() function provided by DRM as it's output_poll_changed callback. Unfortunately however, this function doesn't grab runtime PM references early enough and even if it did-we can't block waiting for the device to resume in output_poll_changed() since it's very likely that we'll need to grab the fb_helper lock at some point during the runtime resume process. This currently results in deadlocking like so: [ 246.669625] INFO: task kworker/4:0:37 blocked for more than 120 seconds. [ 246.673398] Not tainted 4.18.0-rc5Lyude-Test+ #2 [ 246.675271] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 246.676527] kworker/4:0 D 0 37 2 0x80000000 [ 246.677580] Workqueue: events output_poll_execute [drm_kms_helper] [ 246.678704] Call Trace: [ 246.679753] __schedule+0x322/0xaf0 [ 246.680916] schedule+0x33/0x90 [ 246.681924] schedule_preempt_disabled+0x15/0x20 [ 246.683023] __mutex_lock+0x569/0x9a0 [ 246.684035] ? kobject_uevent_env+0x117/0x7b0 [ 246.685132] ? drm_fb_helper_hotplug_event.part.28+0x20/0xb0 [drm_kms_helper] [ 246.686179] mutex_lock_nested+0x1b/0x20 [ 246.687278] ? mutex_lock_nested+0x1b/0x20 [ 246.688307] drm_fb_helper_hotplug_event.part.28+0x20/0xb0 [drm_kms_helper] [ 246.689420] drm_fb_helper_output_poll_changed+0x23/0x30 [drm_kms_helper] [ 246.690462] drm_kms_helper_hotplug_event+0x2a/0x30 [drm_kms_helper] [ 246.691570] output_poll_execute+0x198/0x1c0 [drm_kms_helper] [ 246.692611] process_one_work+0x231/0x620 [ 246.693725] worker_thread+0x214/0x3a0 [ 246.694756] kthread+0x12b/0x150 [ 246.695856] ? wq_pool_ids_show+0x140/0x140 [ 246.696888] ? kthread_create_worker_on_cpu+0x70/0x70 [ 246.697998] ret_from_fork+0x3a/0x50 [ 246.699034] INFO: task kworker/0:1:60 blocked for more than 120 seconds. [ 246.700153] Not tainted 4.18.0-rc5Lyude-Test+ #2 [ 246.701182] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 246.702278] kworker/0:1 D 0 60 2 0x80000000 [ 246.703293] Workqueue: pm pm_runtime_work [ 246.704393] Call Trace: [ 246.705403] __schedule+0x322/0xaf0 [ 246.706439] ? wait_for_completion+0x104/0x190 [ 246.707393] schedule+0x33/0x90 [ 246.708375] schedule_timeout+0x3a5/0x590 [ 246.709289] ? mark_held_locks+0x58/0x80 [ 246.710208] ? _raw_spin_unlock_irq+0x2c/0x40 [ 246.711222] ? wait_for_completion+0x104/0x190 [ 246.712134] ? trace_hardirqs_on_caller+0xf4/0x190 [ 246.713094] ? wait_for_completion+0x104/0x190 [ 246.713964] wait_for_completion+0x12c/0x190 [ 246.714895] ? wake_up_q+0x80/0x80 [ 246.715727] ? get_work_pool+0x90/0x90 [ 246.716649] flush_work+0x1c9/0x280 [ 246.717483] ? flush_workqueue_prep_pwqs+0x1b0/0x1b0 [ 246.718442] __cancel_work_timer+0x146/0x1d0 [ 246.719247] cancel_delayed_work_sync+0x13/0x20 [ 246.720043] drm_kms_helper_poll_disable+0x1f/0x30 [drm_kms_helper] [ 246.721123] nouveau_pmops_runtime_suspend+0x3d/0xb0 [nouveau] [ 246.721897] pci_pm_runtime_suspend+0x6b/0x190 [ 246.722825] ? pci_has_legacy_pm_support+0x70/0x70 [ 246.723737] __rpm_callback+0x7a/0x1d0 [ 246.724721] ? pci_has_legacy_pm_support+0x70/0x70 [ 246.725607] rpm_callback+0x24/0x80 [ 246.726553] ? pci_has_legacy_pm_support+0x70/0x70 [ 246.727376] rpm_suspend+0x142/0x6b0 [ 246.728185] pm_runtime_work+0x97/0xc0 [ 246.728938] process_one_work+0x231/0x620 [ 246.729796] worker_thread+0x44/0x3a0 [ 246.730614] kthread+0x12b/0x150 [ 246.731395] ? wq_pool_ids_show+0x140/0x140 [ 246.732202] ? kthread_create_worker_on_cpu+0x70/0x70 [ 246.732878] ret_from_fork+0x3a/0x50 [ 246.733768] INFO: task kworker/4:2:422 blocked for more than 120 seconds. [ 246.734587] Not tainted 4.18.0-rc5Lyude-Test+ #2 [ 246.735393] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 246.736113] kworker/4:2 D 0 422 2 0x80000080 [ 246.736789] Workqueue: events_long drm_dp_mst_link_probe_work [drm_kms_helper] [ 246.737665] Call Trace: [ 246.738490] __schedule+0x322/0xaf0 [ 246.739250] schedule+0x33/0x90 [ 246.739908] rpm_resume+0x19c/0x850 [ 246.740750] ? finish_wait+0x90/0x90 [ 246.741541] __pm_runtime_resume+0x4e/0x90 [ 246.742370] nv50_disp_atomic_commit+0x31/0x210 [nouveau] [ 246.743124] drm_atomic_commit+0x4a/0x50 [drm] [ 246.743775] restore_fbdev_mode_atomic+0x1c8/0x240 [drm_kms_helper] [ 246.744603] restore_fbdev_mode+0x31/0x140 [drm_kms_helper] [ 246.745373] drm_fb_helper_restore_fbdev_mode_unlocked+0x54/0xb0 [drm_kms_helper] [ 246.746220] drm_fb_helper_set_par+0x2d/0x50 [drm_kms_helper] [ 246.746884] drm_fb_helper_hotplug_event.part.28+0x96/0xb0 [drm_kms_helper] [ 246.747675] drm_fb_helper_output_poll_changed+0x23/0x30 [drm_kms_helper] [ 246.748544] drm_kms_helper_hotplug_event+0x2a/0x30 [drm_kms_helper] [ 246.749439] nv50_mstm_hotplug+0x15/0x20 [nouveau] [ 246.750111] drm_dp_send_link_address+0x177/0x1c0 [drm_kms_helper] [ 246.750764] drm_dp_check_and_send_link_address+0xa8/0xd0 [drm_kms_helper] [ 246.751602] drm_dp_mst_link_probe_work+0x51/0x90 [drm_kms_helper] [ 246.752314] process_one_work+0x231/0x620 [ 246.752979] worker_thread+0x44/0x3a0 [ 246.753838] kthread+0x12b/0x150 [ 246.754619] ? wq_pool_ids_show+0x140/0x140 [ 246.755386] ? kthread_create_worker_on_cpu+0x70/0x70 [ 246.756162] ret_from_fork+0x3a/0x50 [ 246.756847] Showing all locks held in the system: [ 246.758261] 3 locks held by kworker/4:0/37: [ 246.759016] #0: 00000000f8df4d2d ((wq_completion)"events"){+.+.}, at: process_one_work+0x1b3/0x620 [ 246.759856] #1: 00000000e6065461 ((work_completion)(&(&dev->mode_config.output_poll_work)->work)){+.+.}, at: process_one_work+0x1b3/0x620 [ 246.760670] #2: 00000000cb66735f (&helper->lock){+.+.}, at: drm_fb_helper_hotplug_event.part.28+0x20/0xb0 [drm_kms_helper] [ 246.761516] 2 locks held by kworker/0:1/60: [ 246.762274] #0: 00000000fff6be0f ((wq_completion)"pm"){+.+.}, at: process_one_work+0x1b3/0x620 [ 246.762982] #1: 000000005ab44fb4 ((work_completion)(&dev->power.work)){+.+.}, at: process_one_work+0x1b3/0x620 [ 246.763890] 1 lock held by khungtaskd/64: [ 246.764664] #0: 000000008cb8b5c3 (rcu_read_lock){....}, at: debug_show_all_locks+0x23/0x185 [ 246.765588] 5 locks held by kworker/4:2/422: [ 246.766440] #0: 00000000232f0959 ((wq_completion)"events_long"){+.+.}, at: process_one_work+0x1b3/0x620 [ 246.767390] #1: 00000000bb59b134 ((work_completion)(&mgr->work)){+.+.}, at: process_one_work+0x1b3/0x620 [ 246.768154] #2: 00000000cb66735f (&helper->lock){+.+.}, at: drm_fb_helper_restore_fbdev_mode_unlocked+0x4c/0xb0 [drm_kms_helper] [ 246.768966] #3: 000000004c8f0b6b (crtc_ww_class_acquire){+.+.}, at: restore_fbdev_mode_atomic+0x4b/0x240 [drm_kms_helper] [ 246.769921] #4: 000000004c34a296 (crtc_ww_class_mutex){+.+.}, at: drm_modeset_backoff+0x8a/0x1b0 [drm] [ 246.770839] 1 lock held by dmesg/1038: [ 246.771739] 2 locks held by zsh/1172: [ 246.772650] #0: 00000000836d0438 (&tty->ldisc_sem){++++}, at: ldsem_down_read+0x37/0x40 [ 246.773680] #1: 000000001f4f4d48 (&ldata->atomic_read_lock){+.+.}, at: n_tty_read+0xc1/0x870 [ 246.775522] ============================================= After trying dozens of different solutions, I found one very simple one that should also have the benefit of preventing us from having to fight locking for the rest of our lives. So, we work around these deadlocks by deferring all fbcon hotplug events that happen after the runtime suspend process starts until after the device is resumed again. Changes since v7: - Fixup commit message - Daniel Vetter Changes since v6: - Remove unused nouveau_fbcon_hotplugged_in_suspend() - Ilia Changes since v5: - Come up with the (hopefully final) solution for solving this dumb problem, one that is a lot less likely to cause issues with locking in the future. This should work around all deadlock conditions with fbcon brought up thus far. Changes since v4: - Add nouveau_fbcon_hotplugged_in_suspend() to workaround deadlock condition that Lukas described - Just move all of this out of drm_fb_helper. It seems that other DRM drivers have already figured out other workarounds for this. If other drivers do end up needing this in the future, we can just move this back into drm_fb_helper again. Changes since v3: - Actually check if fb_helper is NULL in both new helpers - Actually check drm_fbdev_emulation in both new helpers - Don't fire off a fb_helper hotplug unconditionally; only do it if the following conditions are true (as otherwise, calling this in the wrong spot will cause Bad Things to happen): - fb_helper hotplug handling was actually inhibited previously - fb_helper actually has a delayed hotplug pending - fb_helper is actually bound - fb_helper is actually initialized - Add __must_check to drm_fb_helper_suspend_hotplug(). There's no situation where a driver would actually want to use this without checking the return value, so enforce that - Rewrite and clarify the documentation for both helpers. - Make sure to return true in the drm_fb_helper_suspend_hotplug() stub that's provided in drm_fb_helper.h when CONFIG_DRM_FBDEV_EMULATION isn't enabled - Actually grab the toplevel fb_helper lock in drm_fb_helper_resume_hotplug(), since it's possible other activity (such as a hotplug) could be going on at the same time the driver calls drm_fb_helper_resume_hotplug(). We need this to check whether or not drm_fb_helper_hotplug_event() needs to be called anyway Signed-off-by: Lyude Paul Reviewed-by: Karol Herbst Acked-by: Daniel Vetter Cc: stable@vger.kernel.org Cc: Lukas Wunner Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/dispnv50/disp.c | 2 +- drivers/gpu/drm/nouveau/nouveau_display.c | 2 +- drivers/gpu/drm/nouveau/nouveau_fbcon.c | 57 +++++++++++++++++++++++ drivers/gpu/drm/nouveau/nouveau_fbcon.h | 5 ++ 4 files changed, 64 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c index 8412119bd940..aec6ee1ff4e0 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c @@ -2074,7 +2074,7 @@ nv50_disp_atomic_state_alloc(struct drm_device *dev) static const struct drm_mode_config_funcs nv50_disp_func = { .fb_create = nouveau_user_framebuffer_create, - .output_poll_changed = drm_fb_helper_output_poll_changed, + .output_poll_changed = nouveau_fbcon_output_poll_changed, .atomic_check = nv50_disp_atomic_check, .atomic_commit = nv50_disp_atomic_commit, .atomic_state_alloc = nv50_disp_atomic_state_alloc, diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c index 7db01ea7fd41..42e7c35e3fba 100644 --- a/drivers/gpu/drm/nouveau/nouveau_display.c +++ b/drivers/gpu/drm/nouveau/nouveau_display.c @@ -293,7 +293,7 @@ nouveau_user_framebuffer_create(struct drm_device *dev, static const struct drm_mode_config_funcs nouveau_mode_config_funcs = { .fb_create = nouveau_user_framebuffer_create, - .output_poll_changed = drm_fb_helper_output_poll_changed, + .output_poll_changed = nouveau_fbcon_output_poll_changed, }; diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c index 844498c4267c..0f64c0a1d4b3 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c +++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c @@ -466,6 +466,7 @@ nouveau_fbcon_set_suspend_work(struct work_struct *work) console_unlock(); if (state == FBINFO_STATE_RUNNING) { + nouveau_fbcon_hotplug_resume(drm->fbcon); pm_runtime_mark_last_busy(drm->dev->dev); pm_runtime_put_sync(drm->dev->dev); } @@ -487,6 +488,61 @@ nouveau_fbcon_set_suspend(struct drm_device *dev, int state) schedule_work(&drm->fbcon_work); } +void +nouveau_fbcon_output_poll_changed(struct drm_device *dev) +{ + struct nouveau_drm *drm = nouveau_drm(dev); + struct nouveau_fbdev *fbcon = drm->fbcon; + int ret; + + if (!fbcon) + return; + + mutex_lock(&fbcon->hotplug_lock); + + ret = pm_runtime_get(dev->dev); + if (ret == 1 || ret == -EACCES) { + drm_fb_helper_hotplug_event(&fbcon->helper); + + pm_runtime_mark_last_busy(dev->dev); + pm_runtime_put_autosuspend(dev->dev); + } else if (ret == 0) { + /* If the GPU was already in the process of suspending before + * this event happened, then we can't block here as we'll + * deadlock the runtime pmops since they wait for us to + * finish. So, just defer this event for when we runtime + * resume again. It will be handled by fbcon_work. + */ + NV_DEBUG(drm, "fbcon HPD event deferred until runtime resume\n"); + fbcon->hotplug_waiting = true; + pm_runtime_put_noidle(drm->dev->dev); + } else { + DRM_WARN("fbcon HPD event lost due to RPM failure: %d\n", + ret); + } + + mutex_unlock(&fbcon->hotplug_lock); +} + +void +nouveau_fbcon_hotplug_resume(struct nouveau_fbdev *fbcon) +{ + struct nouveau_drm *drm; + + if (!fbcon) + return; + drm = nouveau_drm(fbcon->helper.dev); + + mutex_lock(&fbcon->hotplug_lock); + if (fbcon->hotplug_waiting) { + fbcon->hotplug_waiting = false; + + NV_DEBUG(drm, "Handling deferred fbcon HPD events\n"); + drm_fb_helper_hotplug_event(&fbcon->helper); + } + mutex_unlock(&fbcon->hotplug_lock); +} + int nouveau_fbcon_init(struct drm_device *dev) { @@ -505,6 +561,7 @@ nouveau_fbcon_init(struct drm_device *dev) drm->fbcon = fbcon; INIT_WORK(&drm->fbcon_work, nouveau_fbcon_set_suspend_work); + mutex_init(&fbcon->hotplug_lock); drm_fb_helper_prepare(dev, &fbcon->helper, &nouveau_fbcon_helper_funcs); diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.h b/drivers/gpu/drm/nouveau/nouveau_fbcon.h index a6f192ea3fa6..db9d52047ef8 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fbcon.h +++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.h @@ -41,6 +41,9 @@ struct nouveau_fbdev { struct nvif_object gdi; struct nvif_object blit; struct nvif_object twod; + + struct mutex hotplug_lock; + bool hotplug_waiting; }; void nouveau_fbcon_restore(void); @@ -68,6 +71,8 @@ void nouveau_fbcon_set_suspend(struct drm_device *dev, int state); void nouveau_fbcon_accel_save_disable(struct drm_device *dev); void nouveau_fbcon_accel_restore(struct drm_device *dev); +void nouveau_fbcon_output_poll_changed(struct drm_device *dev); +void nouveau_fbcon_hotplug_resume(struct nouveau_fbdev *fbcon); extern int nouveau_nofbaccel; #endif /* __NV50_FBCON_H__ */ From 6833fb1ec120bf078e1a527c573a09d4de286224 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Wed, 15 Aug 2018 15:00:14 -0400 Subject: [PATCH 206/269] drm/nouveau/drm/nouveau: Use pm_runtime_get_noresume() in connector_detect() It's true we can't resume the device from poll workers in nouveau_connector_detect(). We can however, prevent the autosuspend timer from elapsing immediately if it hasn't already without risking any sort of deadlock with the runtime suspend/resume operations. So do that instead of entirely avoiding grabbing a power reference. Signed-off-by: Lyude Paul Reviewed-by: Karol Herbst Acked-by: Daniel Vetter Cc: stable@vger.kernel.org Cc: Lukas Wunner Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_connector.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c index 51932c72334e..31b31a35c8fe 100644 --- a/drivers/gpu/drm/nouveau/nouveau_connector.c +++ b/drivers/gpu/drm/nouveau/nouveau_connector.c @@ -555,12 +555,16 @@ nouveau_connector_detect(struct drm_connector *connector, bool force) nv_connector->edid = NULL; } - /* Outputs are only polled while runtime active, so acquiring a - * runtime PM ref here is unnecessary (and would deadlock upon - * runtime suspend because it waits for polling to finish). + /* Outputs are only polled while runtime active, so resuming the + * device here is unnecessary (and would deadlock upon runtime suspend + * because it waits for polling to finish). We do however, want to + * prevent the autosuspend timer from elapsing during this operation + * if possible. */ - if (!drm_kms_helper_is_poll_worker()) { - ret = pm_runtime_get_sync(connector->dev->dev); + if (drm_kms_helper_is_poll_worker()) { + pm_runtime_get_noresume(dev->dev); + } else { + ret = pm_runtime_get_sync(dev->dev); if (ret < 0 && ret != -EACCES) return conn_status; } @@ -638,10 +642,8 @@ detect_analog: out: - if (!drm_kms_helper_is_poll_worker()) { - pm_runtime_mark_last_busy(connector->dev->dev); - pm_runtime_put_autosuspend(connector->dev->dev); - } + pm_runtime_mark_last_busy(dev->dev); + pm_runtime_put_autosuspend(dev->dev); return conn_status; } From 3e1a12754d4df5804bfca5dedf09d2ba291bdc2a Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Wed, 15 Aug 2018 15:00:15 -0400 Subject: [PATCH 207/269] drm/nouveau: Fix deadlocks in nouveau_connector_detect() When we disable hotplugging on the GPU, we need to be able to synchronize with each connector's hotplug interrupt handler before the interrupt is finally disabled. This can be a problem however, since nouveau_connector_detect() currently grabs a runtime power reference when handling connector probing. This will deadlock the runtime suspend handler like so: [ 861.480896] INFO: task kworker/0:2:61 blocked for more than 120 seconds. [ 861.483290] Tainted: G O 4.18.0-rc6Lyude-Test+ #1 [ 861.485158] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 861.486332] kworker/0:2 D 0 61 2 0x80000000 [ 861.487044] Workqueue: events nouveau_display_hpd_work [nouveau] [ 861.487737] Call Trace: [ 861.488394] __schedule+0x322/0xaf0 [ 861.489070] schedule+0x33/0x90 [ 861.489744] rpm_resume+0x19c/0x850 [ 861.490392] ? finish_wait+0x90/0x90 [ 861.491068] __pm_runtime_resume+0x4e/0x90 [ 861.491753] nouveau_display_hpd_work+0x22/0x60 [nouveau] [ 861.492416] process_one_work+0x231/0x620 [ 861.493068] worker_thread+0x44/0x3a0 [ 861.493722] kthread+0x12b/0x150 [ 861.494342] ? wq_pool_ids_show+0x140/0x140 [ 861.494991] ? kthread_create_worker_on_cpu+0x70/0x70 [ 861.495648] ret_from_fork+0x3a/0x50 [ 861.496304] INFO: task kworker/6:2:320 blocked for more than 120 seconds. [ 861.496968] Tainted: G O 4.18.0-rc6Lyude-Test+ #1 [ 861.497654] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 861.498341] kworker/6:2 D 0 320 2 0x80000080 [ 861.499045] Workqueue: pm pm_runtime_work [ 861.499739] Call Trace: [ 861.500428] __schedule+0x322/0xaf0 [ 861.501134] ? wait_for_completion+0x104/0x190 [ 861.501851] schedule+0x33/0x90 [ 861.502564] schedule_timeout+0x3a5/0x590 [ 861.503284] ? mark_held_locks+0x58/0x80 [ 861.503988] ? _raw_spin_unlock_irq+0x2c/0x40 [ 861.504710] ? wait_for_completion+0x104/0x190 [ 861.505417] ? trace_hardirqs_on_caller+0xf4/0x190 [ 861.506136] ? wait_for_completion+0x104/0x190 [ 861.506845] wait_for_completion+0x12c/0x190 [ 861.507555] ? wake_up_q+0x80/0x80 [ 861.508268] flush_work+0x1c9/0x280 [ 861.508990] ? flush_workqueue_prep_pwqs+0x1b0/0x1b0 [ 861.509735] nvif_notify_put+0xb1/0xc0 [nouveau] [ 861.510482] nouveau_display_fini+0xbd/0x170 [nouveau] [ 861.511241] nouveau_display_suspend+0x67/0x120 [nouveau] [ 861.511969] nouveau_do_suspend+0x5e/0x2d0 [nouveau] [ 861.512715] nouveau_pmops_runtime_suspend+0x47/0xb0 [nouveau] [ 861.513435] pci_pm_runtime_suspend+0x6b/0x180 [ 861.514165] ? pci_has_legacy_pm_support+0x70/0x70 [ 861.514897] __rpm_callback+0x7a/0x1d0 [ 861.515618] ? pci_has_legacy_pm_support+0x70/0x70 [ 861.516313] rpm_callback+0x24/0x80 [ 861.517027] ? pci_has_legacy_pm_support+0x70/0x70 [ 861.517741] rpm_suspend+0x142/0x6b0 [ 861.518449] pm_runtime_work+0x97/0xc0 [ 861.519144] process_one_work+0x231/0x620 [ 861.519831] worker_thread+0x44/0x3a0 [ 861.520522] kthread+0x12b/0x150 [ 861.521220] ? wq_pool_ids_show+0x140/0x140 [ 861.521925] ? kthread_create_worker_on_cpu+0x70/0x70 [ 861.522622] ret_from_fork+0x3a/0x50 [ 861.523299] INFO: task kworker/6:0:1329 blocked for more than 120 seconds. [ 861.523977] Tainted: G O 4.18.0-rc6Lyude-Test+ #1 [ 861.524644] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 861.525349] kworker/6:0 D 0 1329 2 0x80000000 [ 861.526073] Workqueue: events nvif_notify_work [nouveau] [ 861.526751] Call Trace: [ 861.527411] __schedule+0x322/0xaf0 [ 861.528089] schedule+0x33/0x90 [ 861.528758] rpm_resume+0x19c/0x850 [ 861.529399] ? finish_wait+0x90/0x90 [ 861.530073] __pm_runtime_resume+0x4e/0x90 [ 861.530798] nouveau_connector_detect+0x7e/0x510 [nouveau] [ 861.531459] ? ww_mutex_lock+0x47/0x80 [ 861.532097] ? ww_mutex_lock+0x47/0x80 [ 861.532819] ? drm_modeset_lock+0x88/0x130 [drm] [ 861.533481] drm_helper_probe_detect_ctx+0xa0/0x100 [drm_kms_helper] [ 861.534127] drm_helper_hpd_irq_event+0xa4/0x120 [drm_kms_helper] [ 861.534940] nouveau_connector_hotplug+0x98/0x120 [nouveau] [ 861.535556] nvif_notify_work+0x2d/0xb0 [nouveau] [ 861.536221] process_one_work+0x231/0x620 [ 861.536994] worker_thread+0x44/0x3a0 [ 861.537757] kthread+0x12b/0x150 [ 861.538463] ? wq_pool_ids_show+0x140/0x140 [ 861.539102] ? kthread_create_worker_on_cpu+0x70/0x70 [ 861.539815] ret_from_fork+0x3a/0x50 [ 861.540521] Showing all locks held in the system: [ 861.541696] 2 locks held by kworker/0:2/61: [ 861.542406] #0: 000000002dbf8af5 ((wq_completion)"events"){+.+.}, at: process_one_work+0x1b3/0x620 [ 861.543071] #1: 0000000076868126 ((work_completion)(&drm->hpd_work)){+.+.}, at: process_one_work+0x1b3/0x620 [ 861.543814] 1 lock held by khungtaskd/64: [ 861.544535] #0: 0000000059db4b53 (rcu_read_lock){....}, at: debug_show_all_locks+0x23/0x185 [ 861.545160] 3 locks held by kworker/6:2/320: [ 861.545896] #0: 00000000d9e1bc59 ((wq_completion)"pm"){+.+.}, at: process_one_work+0x1b3/0x620 [ 861.546702] #1: 00000000c9f92d84 ((work_completion)(&dev->power.work)){+.+.}, at: process_one_work+0x1b3/0x620 [ 861.547443] #2: 000000004afc5de1 (drm_connector_list_iter){.+.+}, at: nouveau_display_fini+0x96/0x170 [nouveau] [ 861.548146] 1 lock held by dmesg/983: [ 861.548889] 2 locks held by zsh/1250: [ 861.549605] #0: 00000000348e3cf6 (&tty->ldisc_sem){++++}, at: ldsem_down_read+0x37/0x40 [ 861.550393] #1: 000000007009a7a8 (&ldata->atomic_read_lock){+.+.}, at: n_tty_read+0xc1/0x870 [ 861.551122] 6 locks held by kworker/6:0/1329: [ 861.551957] #0: 000000002dbf8af5 ((wq_completion)"events"){+.+.}, at: process_one_work+0x1b3/0x620 [ 861.552765] #1: 00000000ddb499ad ((work_completion)(¬ify->work)#2){+.+.}, at: process_one_work+0x1b3/0x620 [ 861.553582] #2: 000000006e013cbe (&dev->mode_config.mutex){+.+.}, at: drm_helper_hpd_irq_event+0x6c/0x120 [drm_kms_helper] [ 861.554357] #3: 000000004afc5de1 (drm_connector_list_iter){.+.+}, at: drm_helper_hpd_irq_event+0x78/0x120 [drm_kms_helper] [ 861.555227] #4: 0000000044f294d9 (crtc_ww_class_acquire){+.+.}, at: drm_helper_probe_detect_ctx+0x3d/0x100 [drm_kms_helper] [ 861.556133] #5: 00000000db193642 (crtc_ww_class_mutex){+.+.}, at: drm_modeset_lock+0x4b/0x130 [drm] [ 861.557864] ============================================= [ 861.559507] NMI backtrace for cpu 2 [ 861.560363] CPU: 2 PID: 64 Comm: khungtaskd Tainted: G O 4.18.0-rc6Lyude-Test+ #1 [ 861.561197] Hardware name: LENOVO 20EQS64N0B/20EQS64N0B, BIOS N1EET78W (1.51 ) 05/18/2018 [ 861.561948] Call Trace: [ 861.562757] dump_stack+0x8e/0xd3 [ 861.563516] nmi_cpu_backtrace.cold.3+0x14/0x5a [ 861.564269] ? lapic_can_unplug_cpu.cold.27+0x42/0x42 [ 861.565029] nmi_trigger_cpumask_backtrace+0xa1/0xae [ 861.565789] arch_trigger_cpumask_backtrace+0x19/0x20 [ 861.566558] watchdog+0x316/0x580 [ 861.567355] kthread+0x12b/0x150 [ 861.568114] ? reset_hung_task_detector+0x20/0x20 [ 861.568863] ? kthread_create_worker_on_cpu+0x70/0x70 [ 861.569598] ret_from_fork+0x3a/0x50 [ 861.570370] Sending NMI from CPU 2 to CPUs 0-1,3-7: [ 861.571426] NMI backtrace for cpu 6 skipped: idling at intel_idle+0x7f/0x120 [ 861.571429] NMI backtrace for cpu 7 skipped: idling at intel_idle+0x7f/0x120 [ 861.571432] NMI backtrace for cpu 3 skipped: idling at intel_idle+0x7f/0x120 [ 861.571464] NMI backtrace for cpu 5 skipped: idling at intel_idle+0x7f/0x120 [ 861.571467] NMI backtrace for cpu 0 skipped: idling at intel_idle+0x7f/0x120 [ 861.571469] NMI backtrace for cpu 4 skipped: idling at intel_idle+0x7f/0x120 [ 861.571472] NMI backtrace for cpu 1 skipped: idling at intel_idle+0x7f/0x120 [ 861.572428] Kernel panic - not syncing: hung_task: blocked tasks So: fix this by making it so that normal hotplug handling /only/ happens so long as the GPU is currently awake without any pending runtime PM requests. In the event that a hotplug occurs while the device is suspending or resuming, we can simply defer our response until the GPU is fully runtime resumed again. Changes since v4: - Use a new trick I came up with using pm_runtime_get() instead of the hackish junk we had before Signed-off-by: Lyude Paul Reviewed-by: Karol Herbst Acked-by: Daniel Vetter Cc: stable@vger.kernel.org Cc: Lukas Wunner Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_connector.c | 22 +++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c index 31b31a35c8fe..76660bc1ccfb 100644 --- a/drivers/gpu/drm/nouveau/nouveau_connector.c +++ b/drivers/gpu/drm/nouveau/nouveau_connector.c @@ -1107,6 +1107,26 @@ nouveau_connector_hotplug(struct nvif_notify *notify) const struct nvif_notify_conn_rep_v0 *rep = notify->data; const char *name = connector->name; struct nouveau_encoder *nv_encoder; + int ret; + + ret = pm_runtime_get(drm->dev->dev); + if (ret == 0) { + /* We can't block here if there's a pending PM request + * running, as we'll deadlock nouveau_display_fini() when it + * calls nvif_put() on our nvif_notify struct. So, simply + * defer the hotplug event until the device finishes resuming + */ + NV_DEBUG(drm, "Deferring HPD on %s until runtime resume\n", + name); + schedule_work(&drm->hpd_work); + + pm_runtime_put_noidle(drm->dev->dev); + return NVIF_NOTIFY_KEEP; + } else if (ret != 1 && ret != -EACCES) { + NV_WARN(drm, "HPD on %s dropped due to RPM failure: %d\n", + name, ret); + return NVIF_NOTIFY_DROP; + } if (rep->mask & NVIF_NOTIFY_CONN_V0_IRQ) { NV_DEBUG(drm, "service %s\n", name); @@ -1124,6 +1144,8 @@ nouveau_connector_hotplug(struct nvif_notify *notify) drm_helper_hpd_irq_event(connector->dev); } + pm_runtime_mark_last_busy(drm->dev->dev); + pm_runtime_put_autosuspend(drm->dev->dev); return NVIF_NOTIFY_KEEP; } From 0445f7537d0742e4f8bcf594a8d81fb901fd131e Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Wed, 15 Aug 2018 15:15:11 -0400 Subject: [PATCH 208/269] drm/nouveau: Remove useless poll_enable() call in switcheroo_set_state() This doesn't do anything, drm_kms_helper_poll_enable() gets called in nouveau_pmops_resume()->nouveau_display_resume()->nouveau_display_init() already. Signed-off-by: Lyude Paul Reviewed-by: Karol Herbst Acked-by: Daniel Vetter Cc: Lukas Wunner Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_vga.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_vga.c b/drivers/gpu/drm/nouveau/nouveau_vga.c index 3da5a4305aa4..09b1d8151881 100644 --- a/drivers/gpu/drm/nouveau/nouveau_vga.c +++ b/drivers/gpu/drm/nouveau/nouveau_vga.c @@ -46,7 +46,6 @@ nouveau_switcheroo_set_state(struct pci_dev *pdev, pr_err("VGA switcheroo: switched nouveau on\n"); dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; nouveau_pmops_resume(&pdev->dev); - drm_kms_helper_poll_enable(dev); dev->switch_power_state = DRM_SWITCH_POWER_ON; } else { pr_err("VGA switcheroo: switched nouveau off\n"); From 0d7b2d4def679cae3bf2728fc31be7f8a48ceab3 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Wed, 15 Aug 2018 15:15:12 -0400 Subject: [PATCH 209/269] drm/nouveau: Remove useless poll_disable() call in switcheroo_set_state() This won't do anything but potentially make us miss hotplugs. We already call drm_kms_helper_poll_disable() in nouveau_pmops_suspend()->nouveau_display_suspend()->nouveau_display_fini() Signed-off-by: Lyude Paul Reviewed-by: Karol Herbst Acked-by: Daniel Vetter Cc: Lukas Wunner Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_vga.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_vga.c b/drivers/gpu/drm/nouveau/nouveau_vga.c index 09b1d8151881..8f1ce4833230 100644 --- a/drivers/gpu/drm/nouveau/nouveau_vga.c +++ b/drivers/gpu/drm/nouveau/nouveau_vga.c @@ -50,7 +50,6 @@ nouveau_switcheroo_set_state(struct pci_dev *pdev, } else { pr_err("VGA switcheroo: switched nouveau off\n"); dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; - drm_kms_helper_poll_disable(dev); nouveau_switcheroo_optimus_dsm(); nouveau_pmops_suspend(&pdev->dev); dev->switch_power_state = DRM_SWITCH_POWER_OFF; From 7326ead9828e5eb5c6030d80310241c404e919f9 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Wed, 15 Aug 2018 15:15:13 -0400 Subject: [PATCH 210/269] drm/nouveau: Remove useless poll_enable() call in drm_load() Again, this doesn't do anything. drm_kms_helper_poll_enable() will have already been called in nouveau_display_init() Signed-off-by: Lyude Paul Reviewed-by: Karol Herbst Acked-by: Daniel Vetter Cc: Lukas Wunner Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_drm.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index 5fdc1fbe2ee5..04f704b77a3c 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -592,10 +592,8 @@ nouveau_drm_load(struct drm_device *dev, unsigned long flags) pm_runtime_allow(dev->dev); pm_runtime_mark_last_busy(dev->dev); pm_runtime_put(dev->dev); - } else { - /* enable polling for external displays */ - drm_kms_helper_poll_enable(dev); } + return 0; fail_dispinit: From b26b4590dd53e012526342e749c423e6c0e73437 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Thu, 9 Aug 2018 18:22:05 -0400 Subject: [PATCH 211/269] drm/nouveau: Only write DP_MSTM_CTRL when needed Currently, nouveau will re-write the DP_MSTM_CTRL register for an MST hub every time it receives a long HPD pulse on DP. This isn't actually necessary and additionally, has some unintended side effects. With the P50 I've got here, rewriting DP_MSTM_CTRL constantly seems to make it rather likely (1 out of 5 times usually) that bringing up MST with it's ThinkPad dock will fail and result in sideband messages timing out in the middle. Afterwards, successive probes don't manage to get the dock to communicate properly over MST sideband properly. Many times sideband message timeouts from MST hubs are indicative of either the source or the sink dropping an ESI event, which can cause DRM's perspective of the topology's current state to go out of sync with reality. While it's tough to really know for sure what's happening to the dock, using userspace tools to write to DP_MSTM_CTRL in the middle of the MST link probing process does appear to make things flaky. It's possible that when we write to DP_MSTM_CTRL, the function that gets triggered to respond in the dock's firmware temporarily puts it in a state where it might end up not reporting an ESI to the source, or ends up dropping a sideband message we sent it. So, to fix this we make it so that when probing an MST topology, we respect it's current state. If the dock's already enabled, we simply read DP_MSTM_CTRL and disable the topology if it's value is not what we expected. Otherwise, we perform the normal MST probing dance. We avoid taking any action except if the state of the MST topology actually changes. This fixes MST sideband message timeouts and detection failures on my P50 with its ThinkPad dock. Signed-off-by: Lyude Paul Cc: stable@vger.kernel.org Cc: Karol Herbst Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/dispnv50/disp.c | 47 +++++++++++++++++++------ 1 file changed, 37 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c index aec6ee1ff4e0..1a06c165a8df 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c @@ -1142,31 +1142,58 @@ nv50_mstm_enable(struct nv50_mstm *mstm, u8 dpcd, int state) int nv50_mstm_detect(struct nv50_mstm *mstm, u8 dpcd[8], int allow) { - int ret, state = 0; + struct drm_dp_aux *aux; + int ret; + bool old_state, new_state; + u8 mstm_ctrl; if (!mstm) return 0; - if (dpcd[0] >= 0x12) { - ret = drm_dp_dpcd_readb(mstm->mgr.aux, DP_MSTM_CAP, &dpcd[1]); + mutex_lock(&mstm->mgr.lock); + + old_state = mstm->mgr.mst_state; + new_state = old_state; + aux = mstm->mgr.aux; + + if (old_state) { + /* Just check that the MST hub is still as we expect it */ + ret = drm_dp_dpcd_readb(aux, DP_MSTM_CTRL, &mstm_ctrl); + if (ret < 0 || !(mstm_ctrl & DP_MST_EN)) { + DRM_DEBUG_KMS("Hub gone, disabling MST topology\n"); + new_state = false; + } + } else if (dpcd[0] >= 0x12) { + ret = drm_dp_dpcd_readb(aux, DP_MSTM_CAP, &dpcd[1]); if (ret < 0) - return ret; + goto probe_error; if (!(dpcd[1] & DP_MST_CAP)) dpcd[0] = 0x11; else - state = allow; + new_state = allow; } - ret = nv50_mstm_enable(mstm, dpcd[0], state); - if (ret) - return ret; + if (new_state == old_state) { + mutex_unlock(&mstm->mgr.lock); + return new_state; + } - ret = drm_dp_mst_topology_mgr_set_mst(&mstm->mgr, state); + ret = nv50_mstm_enable(mstm, dpcd[0], new_state); + if (ret) + goto probe_error; + + mutex_unlock(&mstm->mgr.lock); + + ret = drm_dp_mst_topology_mgr_set_mst(&mstm->mgr, new_state); if (ret) return nv50_mstm_enable(mstm, dpcd[0], 0); - return mstm->mgr.mst_state; + return new_state; + +probe_error: + mutex_unlock(&mstm->mgr.lock); + return ret; } static void From fa3cdf8d0b092c4561f9f017dfac409eb7644737 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Thu, 9 Aug 2018 18:22:06 -0400 Subject: [PATCH 212/269] drm/nouveau: Reset MST branching unit before enabling When probing a new MST device, it's not safe to make any assumptions about it's current state. While most well mannered MST hubs will just disable the branching unit on hotplug disconnects, this isn't enough to save us from various other scenarios that might have resulted in something writing to the MST branching unit before we got control of it. This could happen if a previous probe we tried failed, if we're booting in kexec context and the hub is still in the state the last kernel put it in, etc. Luckily; there is no reason we can't just reset the branching unit every time we enable a new topology. So, fix this by resetting it on enabling new topologies to ensure that we always start off with a clean, unmodified topology state on MST sinks. This fixes occasional hard-lockups on my P50's laptop dock (e.g. AUX times out all DPCD trasactions) observed after multiple docks, undocks, and module reloads. Signed-off-by: Lyude Paul Cc: stable@vger.kernel.org Cc: Karol Herbst Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/dispnv50/disp.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c index 1a06c165a8df..5691dfa1db6f 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c @@ -1123,17 +1123,21 @@ nv50_mstm_enable(struct nv50_mstm *mstm, u8 dpcd, int state) int ret; if (dpcd >= 0x12) { - ret = drm_dp_dpcd_readb(mstm->mgr.aux, DP_MSTM_CTRL, &dpcd); + /* Even if we're enabling MST, start with disabling the + * branching unit to clear any sink-side MST topology state + * that wasn't set by us + */ + ret = drm_dp_dpcd_writeb(mstm->mgr.aux, DP_MSTM_CTRL, 0); if (ret < 0) return ret; - dpcd &= ~DP_MST_EN; - if (state) - dpcd |= DP_MST_EN; - - ret = drm_dp_dpcd_writeb(mstm->mgr.aux, DP_MSTM_CTRL, dpcd); - if (ret < 0) - return ret; + if (state) { + /* Now, start initializing */ + ret = drm_dp_dpcd_writeb(mstm->mgr.aux, DP_MSTM_CTRL, + DP_MST_EN); + if (ret < 0) + return ret; + } } return nvif_mthd(disp, 0, &args, sizeof(args)); From 79e765ad665da4b8aa7e9c878bd2fef837f6fea5 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Thu, 16 Aug 2018 16:13:13 -0400 Subject: [PATCH 213/269] drm/nouveau/drm/nouveau: Prevent handling ACPI HPD events too early On most systems with ACPI hotplugging support, it seems that we always receive a hotplug event once we re-enable EC interrupts even if the GPU hasn't even been resumed yet. This can cause problems since even though we schedule hpd_work to handle connector reprobing for us, hpd_work synchronizes on pm_runtime_get_sync() to wait until the device is ready to perform reprobing. Since runtime suspend/resume callbacks are disabled before the PM core calls ->suspend(), any calls to pm_runtime_get_sync() during this period will grab a runtime PM ref and return immediately with -EACCES. Because we schedule hpd_work from our ACPI HPD handler, and hpd_work synchronizes on pm_runtime_get_sync(), this causes us to launch a connector reprobe immediately even if the GPU isn't actually resumed just yet. This causes various warnings in dmesg and occasionally, also prevents some displays connected to the dedicated GPU from coming back up after suspend. Example: usb 1-4: USB disconnect, device number 14 usb 1-4.1: USB disconnect, device number 15 WARNING: CPU: 0 PID: 838 at drivers/gpu/drm/nouveau/include/nvkm/subdev/i2c.h:170 nouveau_dp_detect+0x17e/0x370 [nouveau] CPU: 0 PID: 838 Comm: kworker/0:6 Not tainted 4.17.14-201.Lyude.bz1477182.V3.fc28.x86_64 #1 Hardware name: LENOVO 20EQS64N00/20EQS64N00, BIOS N1EET77W (1.50 ) 03/28/2018 Workqueue: events nouveau_display_hpd_work [nouveau] RIP: 0010:nouveau_dp_detect+0x17e/0x370 [nouveau] RSP: 0018:ffffa15143933cf0 EFLAGS: 00010293 RAX: 0000000000000000 RBX: ffff8cb4f656c400 RCX: 0000000000000000 RDX: ffffa1514500e4e4 RSI: ffffa1514500e4e4 RDI: 0000000001009002 RBP: ffff8cb4f4a8a800 R08: ffffa15143933cfd R09: ffffa15143933cfc R10: 0000000000000000 R11: 0000000000000000 R12: ffff8cb4fb57a000 R13: ffff8cb4fb57a000 R14: ffff8cb4f4a8f800 R15: ffff8cb4f656c418 FS: 0000000000000000(0000) GS:ffff8cb51f400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f78ec938000 CR3: 000000073720a003 CR4: 00000000003606f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ? _cond_resched+0x15/0x30 nouveau_connector_detect+0x2ce/0x520 [nouveau] ? _cond_resched+0x15/0x30 ? ww_mutex_lock+0x12/0x40 drm_helper_probe_detect_ctx+0x8b/0xe0 [drm_kms_helper] drm_helper_hpd_irq_event+0xa8/0x120 [drm_kms_helper] nouveau_display_hpd_work+0x2a/0x60 [nouveau] process_one_work+0x187/0x340 worker_thread+0x2e/0x380 ? pwq_unbound_release_workfn+0xd0/0xd0 kthread+0x112/0x130 ? kthread_create_worker_on_cpu+0x70/0x70 ret_from_fork+0x35/0x40 Code: 4c 8d 44 24 0d b9 00 05 00 00 48 89 ef ba 09 00 00 00 be 01 00 00 00 e8 e1 09 f8 ff 85 c0 0f 85 b2 01 00 00 80 7c 24 0c 03 74 02 <0f> 0b 48 89 ef e8 b8 07 f8 ff f6 05 51 1b c8 ff 02 0f 84 72 ff ---[ end trace 55d811b38fc8e71a ]--- So, to fix this we attempt to grab a runtime PM reference in the ACPI handler itself asynchronously. If the GPU is already awake (it will have normal hotplugging at this point) or runtime PM callbacks are currently disabled on the device, we drop our reference without updating the autosuspend delay. We only schedule connector reprobes when we successfully managed to queue up a resume request with our asynchronous PM ref. This also has the added benefit of preventing redundant connector reprobes from ACPI while the GPU is runtime resumed! Signed-off-by: Lyude Paul Cc: stable@vger.kernel.org Cc: Karol Herbst Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1477182#c41 Signed-off-by: Lyude Paul Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_display.c | 26 +++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c index 42e7c35e3fba..e4024af5a46f 100644 --- a/drivers/gpu/drm/nouveau/nouveau_display.c +++ b/drivers/gpu/drm/nouveau/nouveau_display.c @@ -377,15 +377,29 @@ nouveau_display_acpi_ntfy(struct notifier_block *nb, unsigned long val, { struct nouveau_drm *drm = container_of(nb, typeof(*drm), acpi_nb); struct acpi_bus_event *info = data; + int ret; if (!strcmp(info->device_class, ACPI_VIDEO_CLASS)) { if (info->type == ACPI_VIDEO_NOTIFY_PROBE) { - /* - * This may be the only indication we receive of a - * connector hotplug on a runtime suspended GPU, - * schedule hpd_work to check. - */ - schedule_work(&drm->hpd_work); + ret = pm_runtime_get(drm->dev->dev); + if (ret == 1 || ret == -EACCES) { + /* If the GPU is already awake, or in a state + * where we can't wake it up, it can handle + * it's own hotplug events. + */ + pm_runtime_put_autosuspend(drm->dev->dev); + } else if (ret == 0) { + /* This may be the only indication we receive + * of a connector hotplug on a runtime + * suspended GPU, schedule hpd_work to check. + */ + NV_DEBUG(drm, "ACPI requested connector reprobe\n"); + schedule_work(&drm->hpd_work); + pm_runtime_put_noidle(drm->dev->dev); + } else { + NV_WARN(drm, "Dropped ACPI reprobe event due to RPM error: %d\n", + ret); + } /* acpi-video should not generate keypresses for this */ return NOTIFY_BAD; From 2f7ca781fd382cf8dde73ed36dfdd93fd05b3332 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Tue, 7 Aug 2018 17:32:48 -0400 Subject: [PATCH 214/269] drm/nouveau/drm/nouveau: Don't forget to cancel hpd_work on suspend/unload Currently, there's nothing in nouveau that actually cancels this work struct. So, cancel it on suspend/unload. Otherwise, if we're unlucky enough hpd_work might try to keep running up until the system is suspended. Signed-off-by: Lyude Paul Cc: stable@vger.kernel.org Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_display.c | 9 ++++++--- drivers/gpu/drm/nouveau/nouveau_display.h | 2 +- drivers/gpu/drm/nouveau/nouveau_drm.c | 2 +- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c index e4024af5a46f..540c0cbbfcee 100644 --- a/drivers/gpu/drm/nouveau/nouveau_display.c +++ b/drivers/gpu/drm/nouveau/nouveau_display.c @@ -442,7 +442,7 @@ nouveau_display_init(struct drm_device *dev) } void -nouveau_display_fini(struct drm_device *dev, bool suspend) +nouveau_display_fini(struct drm_device *dev, bool suspend, bool runtime) { struct nouveau_display *disp = nouveau_display(dev); struct nouveau_drm *drm = nouveau_drm(dev); @@ -467,6 +467,9 @@ nouveau_display_fini(struct drm_device *dev, bool suspend) } drm_connector_list_iter_end(&conn_iter); + if (!runtime) + cancel_work_sync(&drm->hpd_work); + drm_kms_helper_poll_disable(dev); disp->fini(dev); } @@ -635,11 +638,11 @@ nouveau_display_suspend(struct drm_device *dev, bool runtime) } } - nouveau_display_fini(dev, true); + nouveau_display_fini(dev, true, runtime); return 0; } - nouveau_display_fini(dev, true); + nouveau_display_fini(dev, true, runtime); list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { struct nouveau_framebuffer *nouveau_fb; diff --git a/drivers/gpu/drm/nouveau/nouveau_display.h b/drivers/gpu/drm/nouveau/nouveau_display.h index 54aa7c3fa42d..ff92b54ce448 100644 --- a/drivers/gpu/drm/nouveau/nouveau_display.h +++ b/drivers/gpu/drm/nouveau/nouveau_display.h @@ -62,7 +62,7 @@ nouveau_display(struct drm_device *dev) int nouveau_display_create(struct drm_device *dev); void nouveau_display_destroy(struct drm_device *dev); int nouveau_display_init(struct drm_device *dev); -void nouveau_display_fini(struct drm_device *dev, bool suspend); +void nouveau_display_fini(struct drm_device *dev, bool suspend, bool runtime); int nouveau_display_suspend(struct drm_device *dev, bool runtime); void nouveau_display_resume(struct drm_device *dev, bool runtime); int nouveau_display_vblank_enable(struct drm_device *, unsigned int); diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index 04f704b77a3c..f1a119113d04 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -627,7 +627,7 @@ nouveau_drm_unload(struct drm_device *dev) nouveau_debugfs_fini(drm); if (dev->mode_config.num_crtc) - nouveau_display_fini(dev, false); + nouveau_display_fini(dev, false, false); nouveau_display_destroy(dev); nouveau_bios_takedown(dev); From d5986a1c4dcd00cb8b9eee4a56ee93868222a9a2 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Thu, 30 Aug 2018 13:16:28 -0400 Subject: [PATCH 215/269] drm/nouveau: Fix nouveau_connector_ddc_detect() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It looks like that when we moved over to using drm_connector_for_each_possible_encoder() in nouveau, that one rather important part of this function got dropped by accident: /* Right v here */ for (i = 0; nv_encoder = NULL, i < DRM_CONNECTOR_MAX_ENCODER; i++) { int id = connector->encoder_ids[i]; if (id == 0) break; Since it's rather difficult to notice: the conditional in this loop is actually: nv_encoder = NULL, i < DRM_CONNECTOR_MAX_ENCODER Meaning that all early breaks result in nv_encoder keeping it's value, otherwise nv_encoder = NULL. Ugh. Since this got dropped, nouveau_connector_ddc_detect() now returns an encoder for every single connector, regardless of whether or not it's detected: [ 1780.056185] nouveau 0000:01:00.0: DRM: DDC responded, but no EDID for DP-2 So: fix this to ensure we only return an encoder if we actually found one, and clean up the rest of the function while we're at it since it's nearly impossible to read properly. Changes since v1: - Don't skip ddc probing for LVDS if we can't switch DDC through vga-switcheroo, just do the DDC probing without calling vga_switcheroo_lock_ddc() - skeggsb Signed-off-by: Lyude Paul Cc: Ville Syrjälä Fixes: ddba766dd07e ("drm/nouveau: Use drm_connector_for_each_possible_encoder()") Reviewed-by: Ville Syrjälä Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_connector.c | 49 ++++++++++++--------- 1 file changed, 28 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c index 76660bc1ccfb..259ee5039125 100644 --- a/drivers/gpu/drm/nouveau/nouveau_connector.c +++ b/drivers/gpu/drm/nouveau/nouveau_connector.c @@ -412,9 +412,10 @@ nouveau_connector_ddc_detect(struct drm_connector *connector) struct nouveau_connector *nv_connector = nouveau_connector(connector); struct nouveau_drm *drm = nouveau_drm(dev); struct nvkm_gpio *gpio = nvxx_gpio(&drm->client.device); - struct nouveau_encoder *nv_encoder = NULL; + struct nouveau_encoder *nv_encoder = NULL, *found = NULL; struct drm_encoder *encoder; - int i, panel = -ENODEV; + int i, ret, panel = -ENODEV; + bool switcheroo_ddc = false; /* eDP panels need powering on by us (if the VBIOS doesn't default it * to on) before doing any AUX channel transactions. LVDS panel power @@ -431,37 +432,43 @@ nouveau_connector_ddc_detect(struct drm_connector *connector) drm_connector_for_each_possible_encoder(connector, encoder, i) { nv_encoder = nouveau_encoder(encoder); - if (nv_encoder->dcb->type == DCB_OUTPUT_DP) { - int ret = nouveau_dp_detect(nv_encoder); + switch (nv_encoder->dcb->type) { + case DCB_OUTPUT_DP: + ret = nouveau_dp_detect(nv_encoder); if (ret == NOUVEAU_DP_MST) return NULL; - if (ret == NOUVEAU_DP_SST) + else if (ret == NOUVEAU_DP_SST) + found = nv_encoder; + + break; + case DCB_OUTPUT_LVDS: + switcheroo_ddc = !!(vga_switcheroo_handler_flags() & + VGA_SWITCHEROO_CAN_SWITCH_DDC); + /* fall-through */ + default: + if (!nv_encoder->i2c) break; - } else - if ((vga_switcheroo_handler_flags() & - VGA_SWITCHEROO_CAN_SWITCH_DDC) && - nv_encoder->dcb->type == DCB_OUTPUT_LVDS && - nv_encoder->i2c) { - int ret; - vga_switcheroo_lock_ddc(dev->pdev); - ret = nvkm_probe_i2c(nv_encoder->i2c, 0x50); - vga_switcheroo_unlock_ddc(dev->pdev); - if (ret) - break; - } else - if (nv_encoder->i2c) { + + if (switcheroo_ddc) + vga_switcheroo_lock_ddc(dev->pdev); if (nvkm_probe_i2c(nv_encoder->i2c, 0x50)) - break; + found = nv_encoder; + if (switcheroo_ddc) + vga_switcheroo_unlock_ddc(dev->pdev); + + break; } + if (found) + break; } /* eDP panel not detected, restore panel power GPIO to previous * state to avoid confusing the SOR for other output types. */ - if (!nv_encoder && panel == 0) + if (!found && panel == 0) nvkm_gpio_set(gpio, 0, DCB_GPIO_PANEL_POWER, 0xff, panel); - return nv_encoder; + return found; } static struct nouveau_encoder * From a43b16dda2d7485f5c5aed075c1dc9785e339515 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 28 Aug 2018 14:10:34 +1000 Subject: [PATCH 216/269] drm/nouveau: fix oops in client init failure path The NV_ERROR macro requires drm->client to be initialised, which it may not be at this stage of the init process. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_drm.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index f1a119113d04..74d2283f2c28 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -230,7 +230,7 @@ nouveau_cli_init(struct nouveau_drm *drm, const char *sname, mutex_unlock(&drm->master.lock); } if (ret) { - NV_ERROR(drm, "Client allocation failed: %d\n", ret); + NV_PRINTK(err, cli, "Client allocation failed: %d\n", ret); goto done; } @@ -240,37 +240,37 @@ nouveau_cli_init(struct nouveau_drm *drm, const char *sname, }, sizeof(struct nv_device_v0), &cli->device); if (ret) { - NV_ERROR(drm, "Device allocation failed: %d\n", ret); + NV_PRINTK(err, cli, "Device allocation failed: %d\n", ret); goto done; } ret = nvif_mclass(&cli->device.object, mmus); if (ret < 0) { - NV_ERROR(drm, "No supported MMU class\n"); + NV_PRINTK(err, cli, "No supported MMU class\n"); goto done; } ret = nvif_mmu_init(&cli->device.object, mmus[ret].oclass, &cli->mmu); if (ret) { - NV_ERROR(drm, "MMU allocation failed: %d\n", ret); + NV_PRINTK(err, cli, "MMU allocation failed: %d\n", ret); goto done; } ret = nvif_mclass(&cli->mmu.object, vmms); if (ret < 0) { - NV_ERROR(drm, "No supported VMM class\n"); + NV_PRINTK(err, cli, "No supported VMM class\n"); goto done; } ret = nouveau_vmm_init(cli, vmms[ret].oclass, &cli->vmm); if (ret) { - NV_ERROR(drm, "VMM allocation failed: %d\n", ret); + NV_PRINTK(err, cli, "VMM allocation failed: %d\n", ret); goto done; } ret = nvif_mclass(&cli->mmu.object, mems); if (ret < 0) { - NV_ERROR(drm, "No supported MEM class\n"); + NV_PRINTK(err, cli, "No supported MEM class\n"); goto done; } From 51ed833c881b9d96557c773f6a37018d79e29a46 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 28 Aug 2018 14:10:42 +1000 Subject: [PATCH 217/269] drm/nouveau/mmu: don't attempt to dereference vmm without valid instance pointer Fixes oopses in certain failure paths. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c index de269eb482dd..7459def78d50 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c @@ -1423,7 +1423,7 @@ nvkm_vmm_get(struct nvkm_vmm *vmm, u8 page, u64 size, struct nvkm_vma **pvma) void nvkm_vmm_part(struct nvkm_vmm *vmm, struct nvkm_memory *inst) { - if (vmm->func->part && inst) { + if (inst && vmm->func->part) { mutex_lock(&vmm->mutex); vmm->func->part(vmm, inst); mutex_unlock(&vmm->mutex); From 0a6986c6595e9afd20ff7280dab36431c1e467f8 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 4 Sep 2018 15:56:57 +1000 Subject: [PATCH 218/269] drm/nouveau/TBDdevinit: don't fail when PMU/PRE_OS is missing from VBIOS This Falcon application doesn't appear to be present on some newer systems, so let's not fail init if we can't find it. TBD: is there a way to determine whether it *should* be there? Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm200.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm200.c index b80618e35491..d65959ef0564 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm200.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm200.c @@ -158,7 +158,8 @@ gm200_devinit_post(struct nvkm_devinit *base, bool post) } /* load and execute some other ucode image (bios therm?) */ - return pmu_load(init, 0x01, post, NULL, NULL); + pmu_load(init, 0x01, post, NULL, NULL); + return 0; } static const struct nvkm_devinit_func From 606557708fa06ebf21372d8fabf6f97529ab2349 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 4 Sep 2018 15:57:04 +1000 Subject: [PATCH 219/269] drm/nouveau/disp: remove unused struct member Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.c | 1 - drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.h | 1 - 2 files changed, 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.c index be9e7f8c3b23..4b6973f90309 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.c @@ -245,7 +245,6 @@ nvkm_outp_ctor(const struct nvkm_outp_func *func, struct nvkm_disp *disp, outp->index = index; outp->info = *dcbE; outp->i2c = nvkm_i2c_bus_find(i2c, dcbE->i2c_index); - outp->or = ffs(outp->info.or) - 1; OUTP_DBG(outp, "type %02x loc %d or %d link %d con %x " "edid %x bus %d head %x", diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.h index ea84d7d5741a..776e36972daa 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.h @@ -13,7 +13,6 @@ struct nvkm_outp { struct dcb_output info; struct nvkm_i2c_bus *i2c; - int or; struct list_head head; struct nvkm_conn *conn; From f6d52b2172b1adfde010df34730290c282ee641b Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 4 Sep 2018 15:57:07 +1000 Subject: [PATCH 220/269] drm/nouveau/disp: move eDP panel power handling We need to do this earlier to prevent aux channel timeouts in resume paths on certain systems. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_connector.c | 23 +----------- drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c | 37 +++++++++++++++++-- 2 files changed, 35 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c index 259ee5039125..247f72cc4d10 100644 --- a/drivers/gpu/drm/nouveau/nouveau_connector.c +++ b/drivers/gpu/drm/nouveau/nouveau_connector.c @@ -409,26 +409,11 @@ static struct nouveau_encoder * nouveau_connector_ddc_detect(struct drm_connector *connector) { struct drm_device *dev = connector->dev; - struct nouveau_connector *nv_connector = nouveau_connector(connector); - struct nouveau_drm *drm = nouveau_drm(dev); - struct nvkm_gpio *gpio = nvxx_gpio(&drm->client.device); struct nouveau_encoder *nv_encoder = NULL, *found = NULL; struct drm_encoder *encoder; - int i, ret, panel = -ENODEV; + int i, ret; bool switcheroo_ddc = false; - /* eDP panels need powering on by us (if the VBIOS doesn't default it - * to on) before doing any AUX channel transactions. LVDS panel power - * is handled by the SOR itself, and not required for LVDS DDC. - */ - if (nv_connector->type == DCB_CONNECTOR_eDP) { - panel = nvkm_gpio_get(gpio, 0, DCB_GPIO_PANEL_POWER, 0xff); - if (panel == 0) { - nvkm_gpio_set(gpio, 0, DCB_GPIO_PANEL_POWER, 0xff, 1); - msleep(300); - } - } - drm_connector_for_each_possible_encoder(connector, encoder, i) { nv_encoder = nouveau_encoder(encoder); @@ -462,12 +447,6 @@ nouveau_connector_ddc_detect(struct drm_connector *connector) break; } - /* eDP panel not detected, restore panel power GPIO to previous - * state to avoid confusing the SOR for other output types. - */ - if (!found && panel == 0) - nvkm_gpio_set(gpio, 0, DCB_GPIO_PANEL_POWER, 0xff, panel); - return found; } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c index 7c5bed29ffef..bb34ee77458e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c @@ -28,6 +28,7 @@ #include #include +#include #include #include @@ -491,7 +492,7 @@ done: return ret; } -static void +static bool nvkm_dp_enable(struct nvkm_dp *dp, bool enable) { struct nvkm_i2c_aux *aux = dp->aux; @@ -505,7 +506,7 @@ nvkm_dp_enable(struct nvkm_dp *dp, bool enable) if (!nvkm_rdaux(aux, DPCD_RC00_DPCD_REV, dp->dpcd, sizeof(dp->dpcd))) - return; + return true; } if (dp->present) { @@ -515,6 +516,7 @@ nvkm_dp_enable(struct nvkm_dp *dp, bool enable) } atomic_set(&dp->lt.done, 0); + return false; } static int @@ -555,9 +557,38 @@ nvkm_dp_fini(struct nvkm_outp *outp) static void nvkm_dp_init(struct nvkm_outp *outp) { + struct nvkm_gpio *gpio = outp->disp->engine.subdev.device->gpio; struct nvkm_dp *dp = nvkm_dp(outp); + nvkm_notify_put(&dp->outp.conn->hpd); - nvkm_dp_enable(dp, true); + + /* eDP panels need powering on by us (if the VBIOS doesn't default it + * to on) before doing any AUX channel transactions. LVDS panel power + * is handled by the SOR itself, and not required for LVDS DDC. + */ + if (dp->outp.conn->info.type == DCB_CONNECTOR_eDP) { + int power = nvkm_gpio_get(gpio, 0, DCB_GPIO_PANEL_POWER, 0xff); + if (power == 0) + nvkm_gpio_set(gpio, 0, DCB_GPIO_PANEL_POWER, 0xff, 1); + + /* We delay here unconditionally, even if already powered, + * because some laptop panels having a significant resume + * delay before the panel begins responding. + * + * This is likely a bit of a hack, but no better idea for + * handling this at the moment. + */ + msleep(300); + + /* If the eDP panel can't be detected, we need to restore + * the panel power GPIO to avoid breaking another output. + */ + if (!nvkm_dp_enable(dp, true) && power == 0) + nvkm_gpio_set(gpio, 0, DCB_GPIO_PANEL_POWER, 0xff, 0); + } else { + nvkm_dp_enable(dp, true); + } + nvkm_notify_get(&dp->hpd); } From e04cfdc9b7398c60dbc70212415ea63b6c6a93ae Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 4 Sep 2018 15:57:09 +1000 Subject: [PATCH 221/269] drm/nouveau/disp: fix DP disable race If a HPD pulse signalling the need to retrain the link occurs between the KMS driver releasing the output and the supervisor interrupt that finishes the teardown, it was possible get a NULL-ptr deref. Avoid this by marking the link as inactive earlier. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c | 17 ++++++++++++----- drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c | 6 +++--- drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.c | 2 ++ drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.h | 3 ++- 4 files changed, 19 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c index bb34ee77458e..5f301e632599 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c @@ -413,14 +413,10 @@ nvkm_dp_train(struct nvkm_dp *dp, u32 dataKBps) } static void -nvkm_dp_release(struct nvkm_outp *outp, struct nvkm_ior *ior) +nvkm_dp_disable(struct nvkm_outp *outp, struct nvkm_ior *ior) { struct nvkm_dp *dp = nvkm_dp(outp); - /* Prevent link from being retrained if sink sends an IRQ. */ - atomic_set(&dp->lt.done, 0); - ior->dp.nr = 0; - /* Execute DisableLT script from DP Info Table. */ nvbios_init(&ior->disp->engine.subdev, dp->info.script[4], init.outp = &dp->outp.info; @@ -429,6 +425,16 @@ nvkm_dp_release(struct nvkm_outp *outp, struct nvkm_ior *ior) ); } +static void +nvkm_dp_release(struct nvkm_outp *outp) +{ + struct nvkm_dp *dp = nvkm_dp(outp); + + /* Prevent link from being retrained if sink sends an IRQ. */ + atomic_set(&dp->lt.done, 0); + dp->outp.ior->dp.nr = 0; +} + static int nvkm_dp_acquire(struct nvkm_outp *outp) { @@ -607,6 +613,7 @@ nvkm_dp_func = { .fini = nvkm_dp_fini, .acquire = nvkm_dp_acquire, .release = nvkm_dp_release, + .disable = nvkm_dp_disable, }; static int diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c index f89c7b977aa5..def005dd5fda 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c @@ -501,11 +501,11 @@ nv50_disp_super_2_0(struct nv50_disp *disp, struct nvkm_head *head) nv50_disp_super_ied_off(head, ior, 2); /* If we're shutting down the OR's only active head, execute - * the output path's release function. + * the output path's disable function. */ if (ior->arm.head == (1 << head->id)) { - if ((outp = ior->arm.outp) && outp->func->release) - outp->func->release(outp, ior); + if ((outp = ior->arm.outp) && outp->func->disable) + outp->func->disable(outp, ior); } } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.c index 4b6973f90309..9fcaf3147eb8 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.c @@ -93,6 +93,8 @@ nvkm_outp_release(struct nvkm_outp *outp, u8 user) if (ior) { outp->acquired &= ~user; if (!outp->acquired) { + if (outp->func->release && outp->ior) + outp->func->release(outp); outp->ior->asy.outp = NULL; outp->ior = NULL; } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.h index 776e36972daa..96272ecccb59 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.h @@ -40,7 +40,8 @@ struct nvkm_outp_func { void (*init)(struct nvkm_outp *); void (*fini)(struct nvkm_outp *); int (*acquire)(struct nvkm_outp *); - void (*release)(struct nvkm_outp *, struct nvkm_ior *); + void (*release)(struct nvkm_outp *); + void (*disable)(struct nvkm_outp *, struct nvkm_ior *); }; #define OUTP_MSG(o,l,f,a...) do { \ From 53b0cc46f27cfc2cadca609b503a7d92b5185a47 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 4 Sep 2018 15:57:11 +1000 Subject: [PATCH 222/269] drm/nouveau/disp/gm200-: enforce identity-mapped SOR assignment for LVDS/eDP panels Fixes eDP backlight issues on more recent laptops. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/engine/disp/base.c | 14 ++++++++++++++ drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h | 1 + drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.c | 15 ++++++++++++--- drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.h | 1 + 4 files changed, 28 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/base.c index 32fa94a9773f..cbd33e87b799 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/base.c @@ -275,6 +275,7 @@ nvkm_disp_oneinit(struct nvkm_engine *engine) struct nvkm_outp *outp, *outt, *pair; struct nvkm_conn *conn; struct nvkm_head *head; + struct nvkm_ior *ior; struct nvbios_connE connE; struct dcb_output dcbE; u8 hpd = 0, ver, hdr; @@ -399,6 +400,19 @@ nvkm_disp_oneinit(struct nvkm_engine *engine) return ret; } + /* Enforce identity-mapped SOR assignment for panels, which have + * certain bits (ie. backlight controls) wired to a specific SOR. + */ + list_for_each_entry(outp, &disp->outp, head) { + if (outp->conn->info.type == DCB_CONNECTOR_LVDS || + outp->conn->info.type == DCB_CONNECTOR_eDP) { + ior = nvkm_ior_find(disp, SOR, ffs(outp->info.or) - 1); + if (!WARN_ON(!ior)) + ior->identity = true; + outp->identity = true; + } + } + i = 0; list_for_each_entry(head, &disp->head, head) i = max(i, head->id + 1); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h index e0b4e0c5704e..19911211a12a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h @@ -16,6 +16,7 @@ struct nvkm_ior { char name[8]; struct list_head head; + bool identity; struct nvkm_ior_state { struct nvkm_outp *outp; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.c index 9fcaf3147eb8..c62030c96fba 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.c @@ -129,17 +129,26 @@ nvkm_outp_acquire(struct nvkm_outp *outp, u8 user) if (proto == UNKNOWN) return -ENOSYS; + /* Deal with panels requiring identity-mapped SOR assignment. */ + if (outp->identity) { + ior = nvkm_ior_find(outp->disp, SOR, ffs(outp->info.or) - 1); + if (WARN_ON(!ior)) + return -ENOSPC; + return nvkm_outp_acquire_ior(outp, user, ior); + } + /* First preference is to reuse the OR that is currently armed * on HW, if any, in order to prevent unnecessary switching. */ list_for_each_entry(ior, &outp->disp->ior, head) { - if (!ior->asy.outp && ior->arm.outp == outp) + if (!ior->identity && !ior->asy.outp && ior->arm.outp == outp) return nvkm_outp_acquire_ior(outp, user, ior); } /* Failing that, a completely unused OR is the next best thing. */ list_for_each_entry(ior, &outp->disp->ior, head) { - if (!ior->asy.outp && ior->type == type && !ior->arm.outp && + if (!ior->identity && + !ior->asy.outp && ior->type == type && !ior->arm.outp && (ior->func->route.set || ior->id == __ffs(outp->info.or))) return nvkm_outp_acquire_ior(outp, user, ior); } @@ -148,7 +157,7 @@ nvkm_outp_acquire(struct nvkm_outp *outp, u8 user) * but will be released during the next modeset. */ list_for_each_entry(ior, &outp->disp->ior, head) { - if (!ior->asy.outp && ior->type == type && + if (!ior->identity && !ior->asy.outp && ior->type == type && (ior->func->route.set || ior->id == __ffs(outp->info.or))) return nvkm_outp_acquire_ior(outp, user, ior); } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.h index 96272ecccb59..6c8aa5cfed9d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.h @@ -16,6 +16,7 @@ struct nvkm_outp { struct list_head head; struct nvkm_conn *conn; + bool identity; /* Assembly state. */ #define NVKM_OUTP_PRIV 1 From e2c631ba75a7e727e8db0a9d30a06bfd434adb3a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 5 Sep 2018 10:41:58 +0200 Subject: [PATCH 223/269] clocksource: Revert "Remove kthread" I turns out that the silly spawn kthread from worker was actually needed. clocksource_watchdog_kthread() cannot be called directly from clocksource_watchdog_work(), because clocksource_select() calls timekeeping_notify() which uses stop_machine(). One cannot use stop_machine() from a workqueue() due lock inversions wrt CPU hotplug. Revert the patch but add a comment that explain why we jump through such apparently silly hoops. Fixes: 7197e77abcb6 ("clocksource: Remove kthread") Reported-by: Siegfried Metz Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Tested-by: Niklas Cassel Tested-by: Kevin Shanahan Tested-by: viktor_jaegerskuepper@freenet.de Tested-by: Siegfried Metz Cc: rafael.j.wysocki@intel.com Cc: len.brown@intel.com Cc: diego.viola@gmail.com Cc: rui.zhang@intel.com Cc: bjorn.andersson@linaro.org Link: https://lkml.kernel.org/r/20180905084158.GR24124@hirez.programming.kicks-ass.net --- kernel/time/clocksource.c | 40 +++++++++++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index f74fb00d8064..0e6e97a01942 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -133,19 +133,40 @@ static void inline clocksource_watchdog_unlock(unsigned long *flags) spin_unlock_irqrestore(&watchdog_lock, *flags); } +static int clocksource_watchdog_kthread(void *data); +static void __clocksource_change_rating(struct clocksource *cs, int rating); + /* * Interval: 0.5sec Threshold: 0.0625s */ #define WATCHDOG_INTERVAL (HZ >> 1) #define WATCHDOG_THRESHOLD (NSEC_PER_SEC >> 4) +static void clocksource_watchdog_work(struct work_struct *work) +{ + /* + * We cannot directly run clocksource_watchdog_kthread() here, because + * clocksource_select() calls timekeeping_notify() which uses + * stop_machine(). One cannot use stop_machine() from a workqueue() due + * lock inversions wrt CPU hotplug. + * + * Also, we only ever run this work once or twice during the lifetime + * of the kernel, so there is no point in creating a more permanent + * kthread for this. + * + * If kthread_run fails the next watchdog scan over the + * watchdog_list will find the unstable clock again. + */ + kthread_run(clocksource_watchdog_kthread, NULL, "kwatchdog"); +} + static void __clocksource_unstable(struct clocksource *cs) { cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG); cs->flags |= CLOCK_SOURCE_UNSTABLE; /* - * If the clocksource is registered clocksource_watchdog_work() will + * If the clocksource is registered clocksource_watchdog_kthread() will * re-rate and re-select. */ if (list_empty(&cs->list)) { @@ -156,7 +177,7 @@ static void __clocksource_unstable(struct clocksource *cs) if (cs->mark_unstable) cs->mark_unstable(cs); - /* kick clocksource_watchdog_work() */ + /* kick clocksource_watchdog_kthread() */ if (finished_booting) schedule_work(&watchdog_work); } @@ -166,7 +187,7 @@ static void __clocksource_unstable(struct clocksource *cs) * @cs: clocksource to be marked unstable * * This function is called by the x86 TSC code to mark clocksources as unstable; - * it defers demotion and re-selection to a work. + * it defers demotion and re-selection to a kthread. */ void clocksource_mark_unstable(struct clocksource *cs) { @@ -391,9 +412,7 @@ static void clocksource_dequeue_watchdog(struct clocksource *cs) } } -static void __clocksource_change_rating(struct clocksource *cs, int rating); - -static int __clocksource_watchdog_work(void) +static int __clocksource_watchdog_kthread(void) { struct clocksource *cs, *tmp; unsigned long flags; @@ -418,12 +437,13 @@ static int __clocksource_watchdog_work(void) return select; } -static void clocksource_watchdog_work(struct work_struct *work) +static int clocksource_watchdog_kthread(void *data) { mutex_lock(&clocksource_mutex); - if (__clocksource_watchdog_work()) + if (__clocksource_watchdog_kthread()) clocksource_select(); mutex_unlock(&clocksource_mutex); + return 0; } static bool clocksource_is_watchdog(struct clocksource *cs) @@ -442,7 +462,7 @@ static void clocksource_enqueue_watchdog(struct clocksource *cs) static void clocksource_select_watchdog(bool fallback) { } static inline void clocksource_dequeue_watchdog(struct clocksource *cs) { } static inline void clocksource_resume_watchdog(void) { } -static inline int __clocksource_watchdog_work(void) { return 0; } +static inline int __clocksource_watchdog_kthread(void) { return 0; } static bool clocksource_is_watchdog(struct clocksource *cs) { return false; } void clocksource_mark_unstable(struct clocksource *cs) { } @@ -810,7 +830,7 @@ static int __init clocksource_done_booting(void) /* * Run the watchdog first to eliminate unstable clock sources */ - __clocksource_watchdog_work(); + __clocksource_watchdog_kthread(); clocksource_select(); mutex_unlock(&clocksource_mutex); return 0; From 8f5c5fcf353302374b36232d6885c1a3b579e5ca Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 4 Sep 2018 14:54:55 -0700 Subject: [PATCH 224/269] tipc: call start and done ops directly in __tipc_nl_compat_dumpit() __tipc_nl_compat_dumpit() uses a netlink_callback on stack, so the only way to align it with other ->dumpit() call path is calling tipc_dump_start() and tipc_dump_done() directly inside it. Otherwise ->dumpit() would always get NULL from cb->args[]. But tipc_dump_start() uses sock_net(cb->skb->sk) to retrieve net pointer, the cb->skb here doesn't set skb->sk, the net pointer is saved in msg->net instead, so introduce a helper function __tipc_dump_start() to pass in msg->net. Ying pointed out cb->args[0...3] are already used by other callbacks on this call path, so we can't use cb->args[0] any more, use cb->args[4] instead. Fixes: 9a07efa9aea2 ("tipc: switch to rhashtable iterator") Reported-and-tested-by: syzbot+e93a2c41f91b8e2c7d9b@syzkaller.appspotmail.com Cc: Jon Maloy Cc: Ying Xue Signed-off-by: Cong Wang Acked-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/netlink_compat.c | 2 ++ net/tipc/socket.c | 17 +++++++++++------ net/tipc/socket.h | 1 + 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index a2f76743c73a..82f665728382 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -185,6 +185,7 @@ static int __tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd, return -ENOMEM; buf->sk = msg->dst_sk; + __tipc_dump_start(&cb, msg->net); do { int rem; @@ -216,6 +217,7 @@ static int __tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd, err = 0; err_out: + tipc_dump_done(&cb); kfree_skb(buf); if (err == -EMSGSIZE) { diff --git a/net/tipc/socket.c b/net/tipc/socket.c index a0ff8bffc96b..3f03ddd0e35b 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -3230,7 +3230,7 @@ int tipc_nl_sk_walk(struct sk_buff *skb, struct netlink_callback *cb, struct netlink_callback *cb, struct tipc_sock *tsk)) { - struct rhashtable_iter *iter = (void *)cb->args[0]; + struct rhashtable_iter *iter = (void *)cb->args[4]; struct tipc_sock *tsk; int err; @@ -3266,8 +3266,14 @@ EXPORT_SYMBOL(tipc_nl_sk_walk); int tipc_dump_start(struct netlink_callback *cb) { - struct rhashtable_iter *iter = (void *)cb->args[0]; - struct net *net = sock_net(cb->skb->sk); + return __tipc_dump_start(cb, sock_net(cb->skb->sk)); +} +EXPORT_SYMBOL(tipc_dump_start); + +int __tipc_dump_start(struct netlink_callback *cb, struct net *net) +{ + /* tipc_nl_name_table_dump() uses cb->args[0...3]. */ + struct rhashtable_iter *iter = (void *)cb->args[4]; struct tipc_net *tn = tipc_net(net); if (!iter) { @@ -3275,17 +3281,16 @@ int tipc_dump_start(struct netlink_callback *cb) if (!iter) return -ENOMEM; - cb->args[0] = (long)iter; + cb->args[4] = (long)iter; } rhashtable_walk_enter(&tn->sk_rht, iter); return 0; } -EXPORT_SYMBOL(tipc_dump_start); int tipc_dump_done(struct netlink_callback *cb) { - struct rhashtable_iter *hti = (void *)cb->args[0]; + struct rhashtable_iter *hti = (void *)cb->args[4]; rhashtable_walk_exit(hti); kfree(hti); diff --git a/net/tipc/socket.h b/net/tipc/socket.h index d43032e26532..5e575f205afe 100644 --- a/net/tipc/socket.h +++ b/net/tipc/socket.h @@ -69,5 +69,6 @@ int tipc_nl_sk_walk(struct sk_buff *skb, struct netlink_callback *cb, struct netlink_callback *cb, struct tipc_sock *tsk)); int tipc_dump_start(struct netlink_callback *cb); +int __tipc_dump_start(struct netlink_callback *cb, struct net *net); int tipc_dump_done(struct netlink_callback *cb); #endif From da4dfaf8428d9f71e2ac4f736bacb81adab36504 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Fri, 7 Sep 2018 08:02:05 +0200 Subject: [PATCH 225/269] i2c: xiic: Record xilinx i2c with Zynq fragment Include xilinx soft i2c controller to Zynq fragment to make clear who is responsible for it. Signed-off-by: Michal Simek Signed-off-by: Wolfram Sang --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 9ad052aeac39..d870cb57c887 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2311,6 +2311,7 @@ F: drivers/clocksource/cadence_ttc_timer.c F: drivers/i2c/busses/i2c-cadence.c F: drivers/mmc/host/sdhci-of-arasan.c F: drivers/edac/synopsys_edac.c +F: drivers/i2c/busses/i2c-xiic.c ARM64 PORT (AARCH64 ARCHITECTURE) M: Catalin Marinas From 694556d54f354d3fe43bb2e61fd6103cca2638a4 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 23 Aug 2018 09:58:27 +0100 Subject: [PATCH 226/269] KVM: arm/arm64: Clean dcache to PoC when changing PTE due to CoW When triggering a CoW, we unmap the RO page via an MMU notifier (invalidate_range_start), and then populate the new PTE using another one (change_pte). In the meantime, we'll have copied the old page into the new one. The problem is that the data for the new page is sitting in the cache, and should the guest have an uncached mapping to that page (or its MMU off), following accesses will bypass the cache. In a way, this is similar to what happens on a translation fault: We need to clean the page to the PoC before mapping it. So let's just do that. This fixes a KVM unit test regression observed on a HiSilicon platform, and subsequently reproduced on Seattle. Fixes: a9c0e12ebee5 ("KVM: arm/arm64: Only clean the dcache on translation fault") Cc: stable@vger.kernel.org # v4.16+ Reported-by: Mike Galbraith Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/mmu.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index 91aaf73b00df..111a660be3be 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -1860,13 +1860,20 @@ static int kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) { unsigned long end = hva + PAGE_SIZE; + kvm_pfn_t pfn = pte_pfn(pte); pte_t stage2_pte; if (!kvm->arch.pgd) return; trace_kvm_set_spte_hva(hva); - stage2_pte = pfn_pte(pte_pfn(pte), PAGE_S2); + + /* + * We've moved a page around, probably through CoW, so let's treat it + * just like a translation fault and clean the cache to the PoC. + */ + clean_dcache_guest_page(pfn, PAGE_SIZE); + stage2_pte = pfn_pte(pfn, PAGE_S2); handle_hva_to_gpa(kvm, hva, end, &kvm_set_spte_handler, &stage2_pte); } From 7d14919c0d475a795c0127631ac8ecb2b0f31831 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 23 Aug 2018 11:51:43 +0100 Subject: [PATCH 227/269] arm64: KVM: Only force FPEXC32_EL2.EN if trapping FPSIMD If trapping FPSIMD in the context of an AArch32 guest, it is critical to set FPEXC32_EL2.EN to 1 so that the trapping is taken to EL2 and not EL1. Conversely, it is just as critical *not* to set FPEXC32_EL2.EN to 1 if we're not going to trap FPSIMD, as we then corrupt the existing VFP state. Moving the call to __activate_traps_fpsimd32 to the point where we know for sure that we are going to trap ensures that we don't set that bit spuriously. Fixes: e6b673b741ea ("KVM: arm64: Optimise FPSIMD handling to reduce guest/host thrashing") Cc: stable@vger.kernel.org # v4.18 Cc: Dave Martin Reported-by: Alexander Graf Tested-by: Alexander Graf Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- arch/arm64/kvm/hyp/switch.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index d496ef579859..ca46153d7915 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -98,8 +98,10 @@ static void activate_traps_vhe(struct kvm_vcpu *vcpu) val = read_sysreg(cpacr_el1); val |= CPACR_EL1_TTA; val &= ~CPACR_EL1_ZEN; - if (!update_fp_enabled(vcpu)) + if (!update_fp_enabled(vcpu)) { val &= ~CPACR_EL1_FPEN; + __activate_traps_fpsimd32(vcpu); + } write_sysreg(val, cpacr_el1); @@ -114,8 +116,10 @@ static void __hyp_text __activate_traps_nvhe(struct kvm_vcpu *vcpu) val = CPTR_EL2_DEFAULT; val |= CPTR_EL2_TTA | CPTR_EL2_TZ; - if (!update_fp_enabled(vcpu)) + if (!update_fp_enabled(vcpu)) { val |= CPTR_EL2_TFP; + __activate_traps_fpsimd32(vcpu); + } write_sysreg(val, cptr_el2); } @@ -129,7 +133,6 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu) if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE)) write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2); - __activate_traps_fpsimd32(vcpu); if (has_vhe()) activate_traps_vhe(vcpu); else From a35381e10dc46dd75e65e4b3832d9a0005d48d44 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 23 Aug 2018 10:18:14 +0100 Subject: [PATCH 228/269] KVM: Remove obsolete kvm_unmap_hva notifier backend kvm_unmap_hva is long gone, and we only have kvm_unmap_hva_range to deal with. Drop the now obsolete code. Fixes: fb1522e099f0 ("KVM: update to new mmu_notifier semantic v2") Cc: James Hogan Reviewed-by: Paolo Bonzini Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_host.h | 1 - arch/arm64/include/asm/kvm_host.h | 1 - arch/mips/include/asm/kvm_host.h | 1 - arch/mips/kvm/mmu.c | 10 ---------- arch/x86/include/asm/kvm_host.h | 1 - arch/x86/kvm/mmu.c | 5 ----- virt/kvm/arm/mmu.c | 12 ------------ virt/kvm/arm/trace.h | 15 --------------- 8 files changed, 46 deletions(-) diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 79906cecb091..3ad482d2f1eb 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -223,7 +223,6 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu, struct kvm_vcpu_events *events); #define KVM_ARCH_WANT_MMU_NOTIFIER -int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end); void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index f26055f2306e..8e6d46df38aa 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -357,7 +357,6 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu, struct kvm_vcpu_events *events); #define KVM_ARCH_WANT_MMU_NOTIFIER -int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end); void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index a9af1d2dcd69..2c1c53d12179 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -931,7 +931,6 @@ enum kvm_mips_fault_result kvm_trap_emul_gva_fault(struct kvm_vcpu *vcpu, bool write); #define KVM_ARCH_WANT_MMU_NOTIFIER -int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end); void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c index ee64db032793..d8dcdb350405 100644 --- a/arch/mips/kvm/mmu.c +++ b/arch/mips/kvm/mmu.c @@ -512,16 +512,6 @@ static int kvm_unmap_hva_handler(struct kvm *kvm, gfn_t gfn, gfn_t gfn_end, return 1; } -int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) -{ - unsigned long end = hva + PAGE_SIZE; - - handle_hva_to_gpa(kvm, hva, end, &kvm_unmap_hva_handler, NULL); - - kvm_mips_callbacks->flush_shadow_all(kvm); - return 0; -} - int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) { handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, NULL); diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 00ddb0c9e612..e6a33420b871 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1450,7 +1450,6 @@ asmlinkage void kvm_spurious_fault(void); ____kvm_handle_fault_on_reboot(insn, "") #define KVM_ARCH_WANT_MMU_NOTIFIER -int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end); int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index a282321329b5..d440154e8938 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1853,11 +1853,6 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, return kvm_handle_hva_range(kvm, hva, hva + 1, data, handler); } -int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) -{ - return kvm_handle_hva(kvm, hva, 0, kvm_unmap_rmapp); -} - int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) { return kvm_handle_hva_range(kvm, start, end, 0, kvm_unmap_rmapp); diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index 111a660be3be..ed162a6c57c5 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -1817,18 +1817,6 @@ static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *dat return 0; } -int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) -{ - unsigned long end = hva + PAGE_SIZE; - - if (!kvm->arch.pgd) - return 0; - - trace_kvm_unmap_hva(hva); - handle_hva_to_gpa(kvm, hva, end, &kvm_unmap_hva_handler, NULL); - return 0; -} - int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) { diff --git a/virt/kvm/arm/trace.h b/virt/kvm/arm/trace.h index e53b596f483b..57b3edebbb40 100644 --- a/virt/kvm/arm/trace.h +++ b/virt/kvm/arm/trace.h @@ -134,21 +134,6 @@ TRACE_EVENT(kvm_mmio_emulate, __entry->vcpu_pc, __entry->instr, __entry->cpsr) ); -TRACE_EVENT(kvm_unmap_hva, - TP_PROTO(unsigned long hva), - TP_ARGS(hva), - - TP_STRUCT__entry( - __field( unsigned long, hva ) - ), - - TP_fast_assign( - __entry->hva = hva; - ), - - TP_printk("mmu notifier unmap hva: %#08lx", __entry->hva) -); - TRACE_EVENT(kvm_unmap_hva_range, TP_PROTO(unsigned long start, unsigned long end), TP_ARGS(start, end), From df3190e22016abf74ef67c9691e9fa1012a66bd5 Mon Sep 17 00:00:00 2001 From: Steven Price Date: Mon, 13 Aug 2018 17:04:53 +0100 Subject: [PATCH 229/269] arm64: KVM: Remove pgd_lock The lock has never been used and the page tables are protected by mmu_lock in struct kvm. Reviewed-by: Suzuki K Poulose Signed-off-by: Steven Price Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_host.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 8e6d46df38aa..3d6d7336f871 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -61,8 +61,7 @@ struct kvm_arch { u64 vmid_gen; u32 vmid; - /* 1-level 2nd stage table and lock */ - spinlock_t pgd_lock; + /* 1-level 2nd stage table, protected by kvm->mmu_lock */ pgd_t *pgd; /* VTTBR value associated with above pgd and vmid */ From b5861e5cf2fcf83031ea3e26b0a69d887adf7d21 Mon Sep 17 00:00:00 2001 From: Liran Alon Date: Mon, 3 Sep 2018 15:20:22 +0300 Subject: [PATCH 230/269] KVM: nVMX: Fix loss of pending IRQ/NMI before entering L2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Consider the case L1 had a IRQ/NMI event until it executed VMLAUNCH/VMRESUME which wasn't delivered because it was disallowed (e.g. interrupts disabled). When L1 executes VMLAUNCH/VMRESUME, L0 needs to evaluate if this pending event should cause an exit from L2 to L1 or delivered directly to L2 (e.g. In case L1 don't intercept EXTERNAL_INTERRUPT). Usually this would be handled by L0 requesting a IRQ/NMI window by setting VMCS accordingly. However, this setting was done on VMCS01 and now VMCS02 is active instead. Thus, when L1 executes VMLAUNCH/VMRESUME we force L0 to perform pending event evaluation by requesting a KVM_REQ_EVENT. Note that above scenario exists when L1 KVM is about to enter L2 but requests an "immediate-exit". As in this case, L1 will disable-interrupts and then send a self-IPI before entering L2. Reviewed-by: Nikita Leshchenko Co-developed-by: Sean Christopherson Signed-off-by: Sean Christopherson Signed-off-by: Liran Alon Signed-off-by: Radim Krčmář --- arch/x86/kvm/vmx.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index f910d33858d9..533a327372c8 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -12537,8 +12537,11 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, u32 *exit_qual) struct vmcs12 *vmcs12 = get_vmcs12(vcpu); bool from_vmentry = !!exit_qual; u32 dummy_exit_qual; + u32 vmcs01_cpu_exec_ctrl; int r = 0; + vmcs01_cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); + enter_guest_mode(vcpu); if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) @@ -12574,6 +12577,25 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, u32 *exit_qual) kvm_make_request(KVM_REQ_GET_VMCS12_PAGES, vcpu); } + /* + * If L1 had a pending IRQ/NMI until it executed + * VMLAUNCH/VMRESUME which wasn't delivered because it was + * disallowed (e.g. interrupts disabled), L0 needs to + * evaluate if this pending event should cause an exit from L2 + * to L1 or delivered directly to L2 (e.g. In case L1 don't + * intercept EXTERNAL_INTERRUPT). + * + * Usually this would be handled by L0 requesting a + * IRQ/NMI window by setting VMCS accordingly. However, + * this setting was done on VMCS01 and now VMCS02 is active + * instead. Thus, we force L0 to perform pending event + * evaluation by requesting a KVM_REQ_EVENT. + */ + if (vmcs01_cpu_exec_ctrl & + (CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_VIRTUAL_NMI_PENDING)) { + kvm_make_request(KVM_REQ_EVENT, vcpu); + } + /* * Note no nested_vmx_succeed or nested_vmx_fail here. At this point * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet From bdf7ffc89922a52a4f08a12f7421ea24bb7626a0 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Thu, 30 Aug 2018 10:03:30 +0800 Subject: [PATCH 231/269] KVM: LAPIC: Fix pv ipis out-of-bounds access MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Dan Carpenter reported that the untrusted data returns from kvm_register_read() results in the following static checker warning: arch/x86/kvm/lapic.c:576 kvm_pv_send_ipi() error: buffer underflow 'map->phys_map' 's32min-s32max' KVM guest can easily trigger this by executing the following assembly sequence in Ring0: mov $10, %rax mov $0xFFFFFFFF, %rbx mov $0xFFFFFFFF, %rdx mov $0, %rsi vmcall As this will cause KVM to execute the following code-path: vmx_handle_exit() -> handle_vmcall() -> kvm_emulate_hypercall() -> kvm_pv_send_ipi() which will reach out-of-bounds access. This patch fixes it by adding a check to kvm_pv_send_ipi() against map->max_apic_id, ignoring destinations that are not present and delivering the rest. We also check whether or not map->phys_map[min + i] is NULL since the max_apic_id is set to the max apic id, some phys_map maybe NULL when apic id is sparse, especially kvm unconditionally set max_apic_id to 255 to reserve enough space for any xAPIC ID. Reported-by: Dan Carpenter Reviewed-by: Liran Alon Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Liran Alon Cc: Dan Carpenter Signed-off-by: Wanpeng Li [Add second "if (min > map->max_apic_id)" to complete the fix. -Radim] Signed-off-by: Radim Krčmář --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/lapic.c | 27 ++++++++++++++++++++------- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 3ad10f634d4c..8e90488c3d56 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1455,7 +1455,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event); void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu); int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low, - unsigned long ipi_bitmap_high, int min, + unsigned long ipi_bitmap_high, u32 min, unsigned long icr, int op_64_bit); u64 kvm_get_arch_capabilities(void); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 0cefba28c864..17c0472c5b34 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -548,7 +548,7 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, } int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low, - unsigned long ipi_bitmap_high, int min, + unsigned long ipi_bitmap_high, u32 min, unsigned long icr, int op_64_bit) { int i; @@ -571,18 +571,31 @@ int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low, rcu_read_lock(); map = rcu_dereference(kvm->arch.apic_map); + if (min > map->max_apic_id) + goto out; /* Bits above cluster_size are masked in the caller. */ - for_each_set_bit(i, &ipi_bitmap_low, BITS_PER_LONG) { - vcpu = map->phys_map[min + i]->vcpu; - count += kvm_apic_set_irq(vcpu, &irq, NULL); + for_each_set_bit(i, &ipi_bitmap_low, + min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) { + if (map->phys_map[min + i]) { + vcpu = map->phys_map[min + i]->vcpu; + count += kvm_apic_set_irq(vcpu, &irq, NULL); + } } min += cluster_size; - for_each_set_bit(i, &ipi_bitmap_high, BITS_PER_LONG) { - vcpu = map->phys_map[min + i]->vcpu; - count += kvm_apic_set_irq(vcpu, &irq, NULL); + + if (min > map->max_apic_id) + goto out; + + for_each_set_bit(i, &ipi_bitmap_high, + min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) { + if (map->phys_map[min + i]) { + vcpu = map->phys_map[min + i]->vcpu; + count += kvm_apic_set_irq(vcpu, &irq, NULL); + } } +out: rcu_read_unlock(); return count; } From f74dd480cf4e31e12971c58a1d832044db945670 Mon Sep 17 00:00:00 2001 From: "Maciej S. Szmigiero" Date: Fri, 7 Sep 2018 20:15:22 +0200 Subject: [PATCH 232/269] r8169: set TxConfig register after TX / RX is enabled, just like RxConfig Commit 3559d81e76bf ("r8169: simplify rtl_hw_start_8169") changed order of two register writes: 1) Caused RxConfig to be written before TX / RX is enabled, 2) Caused TxConfig to be written before TX / RX is enabled. At least on XIDs 10000000 ("RTL8169sb/8110sb") and 18000000 ("RTL8169sc/8110sc") such writes are ignored by the chip, leaving values in these registers intact. Change 1) was reverted by commit 05212ba8132b42 ("r8169: set RxConfig after tx/rx is enabled for RTL8169sb/8110sb devices"), however change 2) wasn't. In practice, this caused TxConfig's "InterFrameGap time" and "Max DMA Burst Size per Tx DMA Burst" bits to be zero dramatically reducing TX performance (in my tests it dropped from around 500Mbps to around 50Mbps). This patch fixes the issue by moving TxConfig register write a bit later in the code so it happens after TX / RX is already enabled. Fixes: 05212ba8132b42 ("r8169: set RxConfig after tx/rx is enabled for RTL8169sb/8110sb devices") Signed-off-by: Maciej S. Szmigiero Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index b08d51bf7a20..a1f37d58e2fe 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -4634,13 +4634,13 @@ static void rtl_hw_start(struct rtl8169_private *tp) rtl_set_rx_max_size(tp); rtl_set_rx_tx_desc_registers(tp); - rtl_set_tx_config_registers(tp); RTL_W8(tp, Cfg9346, Cfg9346_Lock); /* Initially a 10 us delay. Turned it into a PCI commit. - FR */ RTL_R8(tp, IntrMask); RTL_W8(tp, ChipCmd, CmdTxEnb | CmdRxEnb); rtl_init_rxcfg(tp); + rtl_set_tx_config_registers(tp); rtl_set_rx_mode(tp->dev); /* no early-rx interrupts */ From ecfe951f0c1b169ea4b7dd6f3a404dfedd795bc2 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 7 Sep 2018 23:55:17 +0100 Subject: [PATCH 233/269] afs: Fix cell specification to permit an empty address list Fix the cell specification mechanism to allow cells to be pre-created without having to specify at least one address (the addresses will be upcalled for). This allows the cell information preload service to avoid the need to issue loads of DNS lookups during boot to get the addresses for each cell (500+ lookups for the 'standard' cell list[*]). The lookups can be done later as each cell is accessed through the filesystem. Also remove the print statement that prints a line every time a new cell is added. [*] There are 144 cells in the list. Each cell is first looked up for an SRV record, and if that fails, for an AFSDB record. These get a list of server names, each of which then has to be looked up to get the addresses for that server. E.g.: dig srv _afs3-vlserver._udp.grand.central.org Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- fs/afs/proc.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/fs/afs/proc.c b/fs/afs/proc.c index 0c3285c8db95..476dcbb79713 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c @@ -98,13 +98,13 @@ static int afs_proc_cells_write(struct file *file, char *buf, size_t size) goto inval; args = strchr(name, ' '); - if (!args) - goto inval; - do { - *args++ = 0; - } while(*args == ' '); - if (!*args) - goto inval; + if (args) { + do { + *args++ = 0; + } while(*args == ' '); + if (!*args) + goto inval; + } /* determine command to perform */ _debug("cmd=%s name=%s args=%s", buf, name, args); @@ -120,7 +120,6 @@ static int afs_proc_cells_write(struct file *file, char *buf, size_t size) if (test_and_set_bit(AFS_CELL_FL_NO_GC, &cell->flags)) afs_put_cell(net, cell); - printk("kAFS: Added new cell '%s'\n", name); } else { goto inval; } From 8edfe2e992b75aee3da9316e9697c531194c2f53 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 7 Sep 2018 14:21:30 +0200 Subject: [PATCH 234/269] xen/netfront: fix waiting for xenbus state change Commit 822fb18a82aba ("xen-netfront: wait xenbus state change when load module manually") added a new wait queue to wait on for a state change when the module is loaded manually. Unfortunately there is no wakeup anywhere to stop that waiting. Instead of introducing a new wait queue rename the existing module_unload_q to module_wq and use it for both purposes (loading and unloading). As any state change of the backend might be intended to stop waiting do the wake_up_all() in any case when netback_changed() is called. Fixes: 822fb18a82aba ("xen-netfront: wait xenbus state change when load module manually") Cc: #4.18 Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Signed-off-by: David S. Miller --- drivers/net/xen-netfront.c | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 73f596a90c69..9407acbd19a9 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -87,8 +87,7 @@ struct netfront_cb { /* IRQ name is queue name with "-tx" or "-rx" appended */ #define IRQ_NAME_SIZE (QUEUE_NAME_SIZE + 3) -static DECLARE_WAIT_QUEUE_HEAD(module_load_q); -static DECLARE_WAIT_QUEUE_HEAD(module_unload_q); +static DECLARE_WAIT_QUEUE_HEAD(module_wq); struct netfront_stats { u64 packets; @@ -1332,11 +1331,11 @@ static struct net_device *xennet_create_dev(struct xenbus_device *dev) netif_carrier_off(netdev); xenbus_switch_state(dev, XenbusStateInitialising); - wait_event(module_load_q, - xenbus_read_driver_state(dev->otherend) != - XenbusStateClosed && - xenbus_read_driver_state(dev->otherend) != - XenbusStateUnknown); + wait_event(module_wq, + xenbus_read_driver_state(dev->otherend) != + XenbusStateClosed && + xenbus_read_driver_state(dev->otherend) != + XenbusStateUnknown); return netdev; exit: @@ -2010,15 +2009,14 @@ static void netback_changed(struct xenbus_device *dev, dev_dbg(&dev->dev, "%s\n", xenbus_strstate(backend_state)); + wake_up_all(&module_wq); + switch (backend_state) { case XenbusStateInitialising: case XenbusStateInitialised: case XenbusStateReconfiguring: case XenbusStateReconfigured: - break; - case XenbusStateUnknown: - wake_up_all(&module_unload_q); break; case XenbusStateInitWait: @@ -2034,12 +2032,10 @@ static void netback_changed(struct xenbus_device *dev, break; case XenbusStateClosed: - wake_up_all(&module_unload_q); if (dev->state == XenbusStateClosed) break; /* Missed the backend's CLOSING state -- fallthrough */ case XenbusStateClosing: - wake_up_all(&module_unload_q); xenbus_frontend_closed(dev); break; } @@ -2147,14 +2143,14 @@ static int xennet_remove(struct xenbus_device *dev) if (xenbus_read_driver_state(dev->otherend) != XenbusStateClosed) { xenbus_switch_state(dev, XenbusStateClosing); - wait_event(module_unload_q, + wait_event(module_wq, xenbus_read_driver_state(dev->otherend) == XenbusStateClosing || xenbus_read_driver_state(dev->otherend) == XenbusStateUnknown); xenbus_switch_state(dev, XenbusStateClosed); - wait_event(module_unload_q, + wait_event(module_wq, xenbus_read_driver_state(dev->otherend) == XenbusStateClosed || xenbus_read_driver_state(dev->otherend) == From a162c3511410b50f09c002fea56fea2153b679d0 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 6 Sep 2018 14:50:16 -0700 Subject: [PATCH 235/269] net_sched: properly cancel netlink dump on failure When nla_put*() fails after nla_nest_start(), we need to call nla_nest_cancel() to cancel the message, otherwise we end up calling nla_nest_end() like a success. Fixes: 0ed5269f9e41 ("net/sched: add tunnel option support to act_tunnel_key") Cc: Davide Caratti Cc: Simon Horman Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/act_tunnel_key.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index 28d58bbc953e..681f6f04e7da 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -412,8 +412,10 @@ static int tunnel_key_geneve_opts_dump(struct sk_buff *skb, nla_put_u8(skb, TCA_TUNNEL_KEY_ENC_OPT_GENEVE_TYPE, opt->type) || nla_put(skb, TCA_TUNNEL_KEY_ENC_OPT_GENEVE_DATA, - opt->length * 4, opt + 1)) + opt->length * 4, opt + 1)) { + nla_nest_cancel(skb, start); return -EMSGSIZE; + } len -= sizeof(struct geneve_opt) + opt->length * 4; src += sizeof(struct geneve_opt) + opt->length * 4; @@ -427,7 +429,7 @@ static int tunnel_key_opts_dump(struct sk_buff *skb, const struct ip_tunnel_info *info) { struct nlattr *start; - int err; + int err = -EINVAL; if (!info->options_len) return 0; @@ -439,9 +441,11 @@ static int tunnel_key_opts_dump(struct sk_buff *skb, if (info->key.tun_flags & TUNNEL_GENEVE_OPT) { err = tunnel_key_geneve_opts_dump(skb, info); if (err) - return err; + goto err_out; } else { - return -EINVAL; +err_out: + nla_nest_cancel(skb, start); + return err; } nla_nest_end(skb, start); From 5cf4a8532c992bb22a9ecd5f6d93f873f4eaccc2 Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Thu, 6 Sep 2018 15:54:59 +0200 Subject: [PATCH 236/269] tcp: really ignore MSG_ZEROCOPY if no SO_ZEROCOPY According to the documentation in msg_zerocopy.rst, the SO_ZEROCOPY flag was introduced because send(2) ignores unknown message flags and any legacy application which was accidentally passing the equivalent of MSG_ZEROCOPY earlier should not see any new behaviour. Before commit f214f915e7db ("tcp: enable MSG_ZEROCOPY"), a send(2) call which passed the equivalent of MSG_ZEROCOPY without setting SO_ZEROCOPY would succeed. However, after that commit, it fails with -ENOBUFS. So it appears that the SO_ZEROCOPY flag fails to fulfill its intended purpose. Fix it. Fixes: f214f915e7db ("tcp: enable MSG_ZEROCOPY") Signed-off-by: Vincent Whitchurch Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/core/skbuff.c | 3 --- net/ipv4/tcp.c | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index c996c09d095f..b2c807f67aba 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -939,9 +939,6 @@ struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size) WARN_ON_ONCE(!in_task()); - if (!sock_flag(sk, SOCK_ZEROCOPY)) - return NULL; - skb = sock_omalloc(sk, 0, GFP_KERNEL); if (!skb) return NULL; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index b8af2fec5ad5..10c6246396cc 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1185,7 +1185,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) flags = msg->msg_flags; - if (flags & MSG_ZEROCOPY && size) { + if (flags & MSG_ZEROCOPY && size && sock_flag(sk, SOCK_ZEROCOPY)) { if (sk->sk_state != TCP_ESTABLISHED) { err = -EINVAL; goto out_err; From 47b7360ce563e18c524ce92b55fb4da72b3b3578 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 8 Sep 2018 12:07:26 +0200 Subject: [PATCH 237/269] x86/apic/vector: Make error return value negative activate_managed() returns EINVAL instead of -EINVAL in case of error. While this is unlikely to happen, the positive return value would cause further malfunction at the call site. Fixes: 2db1f959d9dc ("x86/vector: Handle managed interrupts proper") Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org --- arch/x86/kernel/apic/vector.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 9f148e3d45b4..7654febd5102 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -413,7 +413,7 @@ static int activate_managed(struct irq_data *irqd) if (WARN_ON_ONCE(cpumask_empty(vector_searchmask))) { /* Something in the core code broke! Survive gracefully */ pr_err("Managed startup for irq %u, but no CPU\n", irqd->irq); - return EINVAL; + return -EINVAL; } ret = assign_managed_vector(irqd, vector_searchmask); From 9bc4f28af75a91aea0ae383f50b0a430c4509303 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Sun, 2 Sep 2018 11:14:50 -0700 Subject: [PATCH 238/269] x86/mm: Use WRITE_ONCE() when setting PTEs When page-table entries are set, the compiler might optimize their assignment by using multiple instructions to set the PTE. This might turn into a security hazard if the user somehow manages to use the interim PTE. L1TF does not make our lives easier, making even an interim non-present PTE a security hazard. Using WRITE_ONCE() to set PTEs and friends should prevent this potential security hazard. I skimmed the differences in the binary with and without this patch. The differences are (obviously) greater when CONFIG_PARAVIRT=n as more code optimizations are possible. For better and worse, the impact on the binary with this patch is pretty small. Skimming the code did not cause anything to jump out as a security hazard, but it seems that at least move_soft_dirty_pte() caused set_pte_at() to use multiple writes. Signed-off-by: Nadav Amit Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Cc: Dave Hansen Cc: Andi Kleen Cc: Josh Poimboeuf Cc: Michal Hocko Cc: Vlastimil Babka Cc: Sean Christopherson Cc: Andy Lutomirski Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180902181451.80520-1-namit@vmware.com --- arch/x86/include/asm/pgtable.h | 2 +- arch/x86/include/asm/pgtable_64.h | 22 +++++++++++----------- arch/x86/mm/pgtable.c | 8 ++++---- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index e4ffa565a69f..690c0307afed 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -1195,7 +1195,7 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, return xchg(pmdp, pmd); } else { pmd_t old = *pmdp; - *pmdp = pmd; + WRITE_ONCE(*pmdp, pmd); return old; } } diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index f773d5e6c8cc..ce2b59047cb8 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -55,15 +55,15 @@ struct mm_struct; void set_pte_vaddr_p4d(p4d_t *p4d_page, unsigned long vaddr, pte_t new_pte); void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte); +static inline void native_set_pte(pte_t *ptep, pte_t pte) +{ + WRITE_ONCE(*ptep, pte); +} + static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - *ptep = native_make_pte(0); -} - -static inline void native_set_pte(pte_t *ptep, pte_t pte) -{ - *ptep = pte; + native_set_pte(ptep, native_make_pte(0)); } static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte) @@ -73,7 +73,7 @@ static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte) static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd) { - *pmdp = pmd; + WRITE_ONCE(*pmdp, pmd); } static inline void native_pmd_clear(pmd_t *pmd) @@ -109,7 +109,7 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp) static inline void native_set_pud(pud_t *pudp, pud_t pud) { - *pudp = pud; + WRITE_ONCE(*pudp, pud); } static inline void native_pud_clear(pud_t *pud) @@ -137,13 +137,13 @@ static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d) pgd_t pgd; if (pgtable_l5_enabled() || !IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) { - *p4dp = p4d; + WRITE_ONCE(*p4dp, p4d); return; } pgd = native_make_pgd(native_p4d_val(p4d)); pgd = pti_set_user_pgtbl((pgd_t *)p4dp, pgd); - *p4dp = native_make_p4d(native_pgd_val(pgd)); + WRITE_ONCE(*p4dp, native_make_p4d(native_pgd_val(pgd))); } static inline void native_p4d_clear(p4d_t *p4d) @@ -153,7 +153,7 @@ static inline void native_p4d_clear(p4d_t *p4d) static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd) { - *pgdp = pti_set_user_pgtbl(pgdp, pgd); + WRITE_ONCE(*pgdp, pti_set_user_pgtbl(pgdp, pgd)); } static inline void native_pgd_clear(pgd_t *pgd) diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index e848a4811785..ae394552fb94 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -269,7 +269,7 @@ static void mop_up_one_pmd(struct mm_struct *mm, pgd_t *pgdp) if (pgd_val(pgd) != 0) { pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd); - *pgdp = native_make_pgd(0); + pgd_clear(pgdp); paravirt_release_pmd(pgd_val(pgd) >> PAGE_SHIFT); pmd_free(mm, pmd); @@ -494,7 +494,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma, int changed = !pte_same(*ptep, entry); if (changed && dirty) - *ptep = entry; + set_pte(ptep, entry); return changed; } @@ -509,7 +509,7 @@ int pmdp_set_access_flags(struct vm_area_struct *vma, VM_BUG_ON(address & ~HPAGE_PMD_MASK); if (changed && dirty) { - *pmdp = entry; + set_pmd(pmdp, entry); /* * We had a write-protection fault here and changed the pmd * to to more permissive. No need to flush the TLB for that, @@ -529,7 +529,7 @@ int pudp_set_access_flags(struct vm_area_struct *vma, unsigned long address, VM_BUG_ON(address & ~HPAGE_PUD_MASK); if (changed && dirty) { - *pudp = entry; + set_pud(pudp, entry); /* * We had a write-protection fault here and changed the pud * to to more permissive. No need to flush the TLB for that, From f0b0d88a825149ef3b06656886bc211c71dcb852 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 6 Sep 2018 16:37:24 -0700 Subject: [PATCH 239/269] kbuild: modules_install: warn when missing System.map file If there is no System.map file for "make modules_install", scripts/depmod.sh will silently exit with success, having done nothing. Since this is an unexpected situation, change it to report a Warning for the missing file. The behavior is not changed except for the Warning message. The (previous) silent success and new Warning can be reproduced by: $ make mrproper; make defconfig $ make modules; make modules_install and since System.map is produced by "make vmlinux", the steps above omit producing the System.map file. Reported-by: Masahiro Yamada Signed-off-by: Randy Dunlap Signed-off-by: Masahiro Yamada --- scripts/depmod.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/depmod.sh b/scripts/depmod.sh index e5f0aad75b96..e083bcae343f 100755 --- a/scripts/depmod.sh +++ b/scripts/depmod.sh @@ -11,6 +11,7 @@ DEPMOD=$1 KERNELRELEASE=$2 if ! test -r System.map ; then + echo "Warning: modules_install: missing 'System.map' file. Skipping depmod." >&2 exit 0 fi From 772ed869f535b4ec2b134645c951ff22de4d3f79 Mon Sep 17 00:00:00 2001 From: Netanel Belgazal Date: Sun, 9 Sep 2018 08:15:20 +0000 Subject: [PATCH 240/269] net: ena: fix surprise unplug NULL dereference kernel crash Starting with driver version 1.5.0, in case of a surprise device unplug, there is a race caused by invoking ena_destroy_device() from two different places. As a result, the readless register might be accessed after it was destroyed. Signed-off-by: Netanel Belgazal Signed-off-by: David S. Miller --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index c673ac2df65b..170830b807fe 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -3409,12 +3409,12 @@ static void ena_remove(struct pci_dev *pdev) netdev->rx_cpu_rmap = NULL; } #endif /* CONFIG_RFS_ACCEL */ - - unregister_netdev(netdev); del_timer_sync(&adapter->timer_service); cancel_work_sync(&adapter->reset_task); + unregister_netdev(netdev); + /* Reset the device only if the device is running. */ if (test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags)) ena_com_dev_reset(ena_dev, adapter->reset_reason); From ef5b0771d247379c90c8bf1332ff32f7f74bff7f Mon Sep 17 00:00:00 2001 From: Netanel Belgazal Date: Sun, 9 Sep 2018 08:15:21 +0000 Subject: [PATCH 241/269] net: ena: fix driver when PAGE_SIZE == 64kB The buffer length field in the ena rx descriptor is 16 bit, and the current driver passes a full page in each ena rx descriptor. When PAGE_SIZE equals 64kB or more, the buffer length field becomes zero. To solve this issue, limit the ena Rx descriptor to use 16kB even when allocating 64kB kernel pages. This change would not impact ena device functionality, as 16kB is still larger than maximum MTU. Signed-off-by: Netanel Belgazal Signed-off-by: David S. Miller --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 10 +++++----- drivers/net/ethernet/amazon/ena/ena_netdev.h | 11 +++++++++++ 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 170830b807fe..69e684fd2787 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -461,7 +461,7 @@ static inline int ena_alloc_rx_page(struct ena_ring *rx_ring, return -ENOMEM; } - dma = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, + dma = dma_map_page(rx_ring->dev, page, 0, ENA_PAGE_SIZE, DMA_FROM_DEVICE); if (unlikely(dma_mapping_error(rx_ring->dev, dma))) { u64_stats_update_begin(&rx_ring->syncp); @@ -478,7 +478,7 @@ static inline int ena_alloc_rx_page(struct ena_ring *rx_ring, rx_info->page_offset = 0; ena_buf = &rx_info->ena_buf; ena_buf->paddr = dma; - ena_buf->len = PAGE_SIZE; + ena_buf->len = ENA_PAGE_SIZE; return 0; } @@ -495,7 +495,7 @@ static void ena_free_rx_page(struct ena_ring *rx_ring, return; } - dma_unmap_page(rx_ring->dev, ena_buf->paddr, PAGE_SIZE, + dma_unmap_page(rx_ring->dev, ena_buf->paddr, ENA_PAGE_SIZE, DMA_FROM_DEVICE); __free_page(page); @@ -916,10 +916,10 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring, do { dma_unmap_page(rx_ring->dev, dma_unmap_addr(&rx_info->ena_buf, paddr), - PAGE_SIZE, DMA_FROM_DEVICE); + ENA_PAGE_SIZE, DMA_FROM_DEVICE); skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_info->page, - rx_info->page_offset, len, PAGE_SIZE); + rx_info->page_offset, len, ENA_PAGE_SIZE); netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, "rx skb updated. len %d. data_len %d\n", diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h index f1972b5ab650..7c7ae56c52cf 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.h +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h @@ -355,4 +355,15 @@ void ena_dump_stats_to_buf(struct ena_adapter *adapter, u8 *buf); int ena_get_sset_count(struct net_device *netdev, int sset); +/* The ENA buffer length fields is 16 bit long. So when PAGE_SIZE == 64kB the + * driver passas 0. + * Since the max packet size the ENA handles is ~9kB limit the buffer length to + * 16kB. + */ +#if PAGE_SIZE > SZ_16K +#define ENA_PAGE_SIZE SZ_16K +#else +#define ENA_PAGE_SIZE PAGE_SIZE +#endif + #endif /* !(ENA_H) */ From cfa324a514233b28a6934de619183eee941f02d7 Mon Sep 17 00:00:00 2001 From: Netanel Belgazal Date: Sun, 9 Sep 2018 08:15:22 +0000 Subject: [PATCH 242/269] net: ena: fix device destruction to gracefully free resources When ena_destroy_device() is called from ena_suspend(), the device is still reachable from the driver. Therefore, the driver can send a command to the device to free all resources. However, in all other cases of calling ena_destroy_device(), the device is potentially in an error state and unreachable from the driver. In these cases the driver must not send commands to the device. The current implementation does not request resource freeing from the device even when possible. We add the graceful parameter to ena_destroy_device() to enable resource freeing when possible, and use it in ena_suspend(). Signed-off-by: Netanel Belgazal Signed-off-by: David S. Miller --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 69e684fd2787..035d47d2179a 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -76,7 +76,7 @@ MODULE_DEVICE_TABLE(pci, ena_pci_tbl); static int ena_rss_init_default(struct ena_adapter *adapter); static void check_for_admin_com_state(struct ena_adapter *adapter); -static void ena_destroy_device(struct ena_adapter *adapter); +static void ena_destroy_device(struct ena_adapter *adapter, bool graceful); static int ena_restore_device(struct ena_adapter *adapter); static void ena_tx_timeout(struct net_device *dev) @@ -1900,7 +1900,7 @@ static int ena_close(struct net_device *netdev) "Destroy failure, restarting device\n"); ena_dump_stats_to_dmesg(adapter); /* rtnl lock already obtained in dev_ioctl() layer */ - ena_destroy_device(adapter); + ena_destroy_device(adapter, false); ena_restore_device(adapter); } @@ -2550,7 +2550,7 @@ err_disable_msix: return rc; } -static void ena_destroy_device(struct ena_adapter *adapter) +static void ena_destroy_device(struct ena_adapter *adapter, bool graceful) { struct net_device *netdev = adapter->netdev; struct ena_com_dev *ena_dev = adapter->ena_dev; @@ -2563,7 +2563,8 @@ static void ena_destroy_device(struct ena_adapter *adapter) dev_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags); adapter->dev_up_before_reset = dev_up; - ena_com_set_admin_running_state(ena_dev, false); + if (!graceful) + ena_com_set_admin_running_state(ena_dev, false); if (test_bit(ENA_FLAG_DEV_UP, &adapter->flags)) ena_down(adapter); @@ -2665,7 +2666,7 @@ static void ena_fw_reset_device(struct work_struct *work) return; } rtnl_lock(); - ena_destroy_device(adapter); + ena_destroy_device(adapter, false); ena_restore_device(adapter); rtnl_unlock(); } @@ -3467,7 +3468,7 @@ static int ena_suspend(struct pci_dev *pdev, pm_message_t state) "ignoring device reset request as the device is being suspended\n"); clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); } - ena_destroy_device(adapter); + ena_destroy_device(adapter, true); rtnl_unlock(); return 0; } From fe870c77efdf8682252545cbd3d29800d8379efc Mon Sep 17 00:00:00 2001 From: Netanel Belgazal Date: Sun, 9 Sep 2018 08:15:23 +0000 Subject: [PATCH 243/269] net: ena: fix potential double ena_destroy_device() ena_destroy_device() can potentially be called twice. To avoid this, check that the device is running and only then proceed destroying it. Signed-off-by: Netanel Belgazal Signed-off-by: David S. Miller --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 035d47d2179a..a68c2a8d4da2 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -2556,6 +2556,9 @@ static void ena_destroy_device(struct ena_adapter *adapter, bool graceful) struct ena_com_dev *ena_dev = adapter->ena_dev; bool dev_up; + if (!test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags)) + return; + netif_carrier_off(netdev); del_timer_sync(&adapter->timer_service); @@ -2592,6 +2595,7 @@ static void ena_destroy_device(struct ena_adapter *adapter, bool graceful) adapter->reset_reason = ENA_REGS_RESET_NORMAL; clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); + clear_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags); } static int ena_restore_device(struct ena_adapter *adapter) @@ -2636,6 +2640,7 @@ static int ena_restore_device(struct ena_adapter *adapter) } } + set_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags); mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ)); dev_err(&pdev->dev, "Device reset completed successfully\n"); From 944b28aa2982b4590d4d4dfc777cf85135dca2c0 Mon Sep 17 00:00:00 2001 From: Netanel Belgazal Date: Sun, 9 Sep 2018 08:15:24 +0000 Subject: [PATCH 244/269] net: ena: fix missing lock during device destruction acquire the rtnl_lock during device destruction to avoid using partially destroyed device. ena_remove() shares almost the same logic as ena_destroy_device(), so use ena_destroy_device() and avoid duplications. Signed-off-by: Netanel Belgazal Signed-off-by: David S. Miller --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index a68c2a8d4da2..b9ce2a6a87ed 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -3421,24 +3421,18 @@ static void ena_remove(struct pci_dev *pdev) unregister_netdev(netdev); - /* Reset the device only if the device is running. */ + /* If the device is running then we want to make sure the device will be + * reset to make sure no more events will be issued by the device. + */ if (test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags)) - ena_com_dev_reset(ena_dev, adapter->reset_reason); + set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); - ena_free_mgmnt_irq(adapter); - - ena_disable_msix(adapter); + rtnl_lock(); + ena_destroy_device(adapter, true); + rtnl_unlock(); free_netdev(netdev); - ena_com_mmio_reg_read_request_destroy(ena_dev); - - ena_com_abort_admin_commands(ena_dev); - - ena_com_wait_for_abort_completion(ena_dev); - - ena_com_admin_destroy(ena_dev); - ena_com_rss_destroy(ena_dev); ena_com_delete_debug_area(ena_dev); From 28abf4e9c9201eda5c4d29ea609d07e877b464b8 Mon Sep 17 00:00:00 2001 From: Netanel Belgazal Date: Sun, 9 Sep 2018 08:15:25 +0000 Subject: [PATCH 245/269] net: ena: fix missing calls to READ_ONCE Add READ_ONCE calls where necessary (for example when iterating over a memory field that gets updated by the hardware). Signed-off-by: Netanel Belgazal Signed-off-by: David S. Miller --- drivers/net/ethernet/amazon/ena/ena_com.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c index 17f12c18d225..c37deef3bcf1 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_com.c @@ -459,7 +459,7 @@ static void ena_com_handle_admin_completion(struct ena_com_admin_queue *admin_qu cqe = &admin_queue->cq.entries[head_masked]; /* Go over all the completions */ - while ((cqe->acq_common_descriptor.flags & + while ((READ_ONCE(cqe->acq_common_descriptor.flags) & ENA_ADMIN_ACQ_COMMON_DESC_PHASE_MASK) == phase) { /* Do not read the rest of the completion entry before the * phase bit was validated @@ -637,7 +637,7 @@ static u32 ena_com_reg_bar_read32(struct ena_com_dev *ena_dev, u16 offset) mmiowb(); for (i = 0; i < timeout; i++) { - if (read_resp->req_id == mmio_read->seq_num) + if (READ_ONCE(read_resp->req_id) == mmio_read->seq_num) break; udelay(1); @@ -1796,8 +1796,8 @@ void ena_com_aenq_intr_handler(struct ena_com_dev *dev, void *data) aenq_common = &aenq_e->aenq_common_desc; /* Go over all the events */ - while ((aenq_common->flags & ENA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK) == - phase) { + while ((READ_ONCE(aenq_common->flags) & + ENA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK) == phase) { pr_debug("AENQ! Group[%x] Syndrom[%x] timestamp: [%llus]\n", aenq_common->group, aenq_common->syndrom, (u64)aenq_common->timestamp_low + From 37dff155dcf57f6c08bf1641c5ddf9abd45f2b1f Mon Sep 17 00:00:00 2001 From: Netanel Belgazal Date: Sun, 9 Sep 2018 08:15:26 +0000 Subject: [PATCH 246/269] net: ena: fix incorrect usage of memory barriers Added memory barriers where they were missing to support multiple architectures, and removed redundant ones. As part of removing the redundant memory barriers and improving performance, we moved to more relaxed versions of memory barriers, as well as to the more relaxed version of writel - writel_relaxed, while maintaining correctness. Signed-off-by: Netanel Belgazal Signed-off-by: David S. Miller --- drivers/net/ethernet/amazon/ena/ena_com.c | 16 +++++----- drivers/net/ethernet/amazon/ena/ena_eth_com.c | 6 ++++ drivers/net/ethernet/amazon/ena/ena_eth_com.h | 8 ++--- drivers/net/ethernet/amazon/ena/ena_netdev.c | 30 +++++++------------ 4 files changed, 26 insertions(+), 34 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c index c37deef3bcf1..7635c38e77dd 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_com.c @@ -464,7 +464,7 @@ static void ena_com_handle_admin_completion(struct ena_com_admin_queue *admin_qu /* Do not read the rest of the completion entry before the * phase bit was validated */ - rmb(); + dma_rmb(); ena_com_handle_single_admin_completion(admin_queue, cqe); head_masked++; @@ -627,15 +627,8 @@ static u32 ena_com_reg_bar_read32(struct ena_com_dev *ena_dev, u16 offset) mmio_read_reg |= mmio_read->seq_num & ENA_REGS_MMIO_REG_READ_REQ_ID_MASK; - /* make sure read_resp->req_id get updated before the hw can write - * there - */ - wmb(); + writel(mmio_read_reg, ena_dev->reg_bar + ENA_REGS_MMIO_REG_READ_OFF); - writel_relaxed(mmio_read_reg, - ena_dev->reg_bar + ENA_REGS_MMIO_REG_READ_OFF); - - mmiowb(); for (i = 0; i < timeout; i++) { if (READ_ONCE(read_resp->req_id) == mmio_read->seq_num) break; @@ -1798,6 +1791,11 @@ void ena_com_aenq_intr_handler(struct ena_com_dev *dev, void *data) /* Go over all the events */ while ((READ_ONCE(aenq_common->flags) & ENA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK) == phase) { + /* Make sure the phase bit (ownership) is as expected before + * reading the rest of the descriptor. + */ + dma_rmb(); + pr_debug("AENQ! Group[%x] Syndrom[%x] timestamp: [%llus]\n", aenq_common->group, aenq_common->syndrom, (u64)aenq_common->timestamp_low + diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.c b/drivers/net/ethernet/amazon/ena/ena_eth_com.c index ea149c134e15..1c682b76190f 100644 --- a/drivers/net/ethernet/amazon/ena/ena_eth_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.c @@ -51,6 +51,11 @@ static inline struct ena_eth_io_rx_cdesc_base *ena_com_get_next_rx_cdesc( if (desc_phase != expected_phase) return NULL; + /* Make sure we read the rest of the descriptor after the phase bit + * has been read + */ + dma_rmb(); + return cdesc; } @@ -493,6 +498,7 @@ int ena_com_tx_comp_req_id_get(struct ena_com_io_cq *io_cq, u16 *req_id) if (cdesc_phase != expected_phase) return -EAGAIN; + dma_rmb(); if (unlikely(cdesc->req_id >= io_cq->q_depth)) { pr_err("Invalid req id %d\n", cdesc->req_id); return -EINVAL; diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.h b/drivers/net/ethernet/amazon/ena/ena_eth_com.h index 6fdc753d9483..2f7657227cfe 100644 --- a/drivers/net/ethernet/amazon/ena/ena_eth_com.h +++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.h @@ -107,8 +107,7 @@ static inline int ena_com_sq_empty_space(struct ena_com_io_sq *io_sq) return io_sq->q_depth - 1 - cnt; } -static inline int ena_com_write_sq_doorbell(struct ena_com_io_sq *io_sq, - bool relaxed) +static inline int ena_com_write_sq_doorbell(struct ena_com_io_sq *io_sq) { u16 tail; @@ -117,10 +116,7 @@ static inline int ena_com_write_sq_doorbell(struct ena_com_io_sq *io_sq, pr_debug("write submission queue doorbell for queue: %d tail: %d\n", io_sq->qid, tail); - if (relaxed) - writel_relaxed(tail, io_sq->db_addr); - else - writel(tail, io_sq->db_addr); + writel(tail, io_sq->db_addr); return 0; } diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index b9ce2a6a87ed..29b5774dd32d 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -551,14 +551,9 @@ static int ena_refill_rx_bufs(struct ena_ring *rx_ring, u32 num) rx_ring->qid, i, num); } - if (likely(i)) { - /* Add memory barrier to make sure the desc were written before - * issue a doorbell - */ - wmb(); - ena_com_write_sq_doorbell(rx_ring->ena_com_io_sq, true); - mmiowb(); - } + /* ena_com_write_sq_doorbell issues a wmb() */ + if (likely(i)) + ena_com_write_sq_doorbell(rx_ring->ena_com_io_sq); rx_ring->next_to_use = next_to_use; @@ -2112,12 +2107,6 @@ static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev) tx_ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use, tx_ring->ring_size); - /* This WMB is aimed to: - * 1 - perform smp barrier before reading next_to_completion - * 2 - make sure the desc were written before trigger DB - */ - wmb(); - /* stop the queue when no more space available, the packet can have up * to sgl_size + 2. one for the meta descriptor and one for header * (if the header is larger than tx_max_header_size). @@ -2136,10 +2125,11 @@ static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev) * stop the queue but meanwhile clean_tx_irq updates * next_to_completion and terminates. * The queue will remain stopped forever. - * To solve this issue this function perform rmb, check - * the wakeup condition and wake up the queue if needed. + * To solve this issue add a mb() to make sure that + * netif_tx_stop_queue() write is vissible before checking if + * there is additional space in the queue. */ - smp_rmb(); + smp_mb(); if (ena_com_sq_empty_space(tx_ring->ena_com_io_sq) > ENA_TX_WAKEUP_THRESH) { @@ -2151,8 +2141,10 @@ static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev) } if (netif_xmit_stopped(txq) || !skb->xmit_more) { - /* trigger the dma engine */ - ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq, false); + /* trigger the dma engine. ena_com_write_sq_doorbell() + * has a mb + */ + ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq); u64_stats_update_begin(&tx_ring->syncp); tx_ring->tx_stats.doorbells++; u64_stats_update_end(&tx_ring->syncp); From 52ea992cfac357b73180d5c051dca43bc8d20c2a Mon Sep 17 00:00:00 2001 From: Vakul Garg Date: Thu, 6 Sep 2018 21:41:40 +0530 Subject: [PATCH 247/269] net/tls: Set count of SG entries if sk_alloc_sg returns -ENOSPC tls_sw_sendmsg() allocates plaintext and encrypted SG entries using function sk_alloc_sg(). In case the number of SG entries hit MAX_SKB_FRAGS, sk_alloc_sg() returns -ENOSPC and sets the variable for current SG index to '0'. This leads to calling of function tls_push_record() with 'sg_encrypted_num_elem = 0' and later causes kernel crash. To fix this, set the number of SG elements to the number of elements in plaintext/encrypted SG arrays in case sk_alloc_sg() returns -ENOSPC. Fixes: 3c4d7559159b ("tls: kernel TLS support") Signed-off-by: Vakul Garg Signed-off-by: David S. Miller --- net/tls/tls_sw.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 52fbe727d7c1..e28a6ff25d96 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -125,6 +125,9 @@ static int alloc_encrypted_sg(struct sock *sk, int len) &ctx->sg_encrypted_num_elem, &ctx->sg_encrypted_size, 0); + if (rc == -ENOSPC) + ctx->sg_encrypted_num_elem = ARRAY_SIZE(ctx->sg_encrypted_data); + return rc; } @@ -138,6 +141,9 @@ static int alloc_plaintext_sg(struct sock *sk, int len) &ctx->sg_plaintext_num_elem, &ctx->sg_plaintext_size, tls_ctx->pending_open_record_frags); + if (rc == -ENOSPC) + ctx->sg_plaintext_num_elem = ARRAY_SIZE(ctx->sg_plaintext_data); + return rc; } From 5d407b071dc369c26a38398326ee2be53651cfe4 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Mon, 10 Sep 2018 02:47:05 +0900 Subject: [PATCH 248/269] ip: frags: fix crash in ip_do_fragment() A kernel crash occurrs when defragmented packet is fragmented in ip_do_fragment(). In defragment routine, skb_orphan() is called and skb->ip_defrag_offset is set. but skb->sk and skb->ip_defrag_offset are same union member. so that frag->sk is not NULL. Hence crash occurrs in skb->sk check routine in ip_do_fragment() when defragmented packet is fragmented. test commands: %iptables -t nat -I POSTROUTING -j MASQUERADE %hping3 192.168.4.2 -s 1000 -p 2000 -d 60000 splat looks like: [ 261.069429] kernel BUG at net/ipv4/ip_output.c:636! [ 261.075753] invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN PTI [ 261.083854] CPU: 1 PID: 1349 Comm: hping3 Not tainted 4.19.0-rc2+ #3 [ 261.100977] RIP: 0010:ip_do_fragment+0x1613/0x2600 [ 261.106945] Code: e8 e2 38 e3 fe 4c 8b 44 24 18 48 8b 74 24 08 e9 92 f6 ff ff 80 3c 02 00 0f 85 da 07 00 00 48 8b b5 d0 00 00 00 e9 25 f6 ff ff <0f> 0b 0f 0b 44 8b 54 24 58 4c 8b 4c 24 18 4c 8b 5c 24 60 4c 8b 6c [ 261.127015] RSP: 0018:ffff8801031cf2c0 EFLAGS: 00010202 [ 261.134156] RAX: 1ffff1002297537b RBX: ffffed0020639e6e RCX: 0000000000000004 [ 261.142156] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff880114ba9bd8 [ 261.150157] RBP: ffff880114ba8a40 R08: ffffed0022975395 R09: ffffed0022975395 [ 261.158157] R10: 0000000000000001 R11: ffffed0022975394 R12: ffff880114ba9ca4 [ 261.166159] R13: 0000000000000010 R14: ffff880114ba9bc0 R15: dffffc0000000000 [ 261.174169] FS: 00007fbae2199700(0000) GS:ffff88011b400000(0000) knlGS:0000000000000000 [ 261.183012] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 261.189013] CR2: 00005579244fe000 CR3: 0000000119bf4000 CR4: 00000000001006e0 [ 261.198158] Call Trace: [ 261.199018] ? dst_output+0x180/0x180 [ 261.205011] ? save_trace+0x300/0x300 [ 261.209018] ? ip_copy_metadata+0xb00/0xb00 [ 261.213034] ? sched_clock_local+0xd4/0x140 [ 261.218158] ? kill_l4proto+0x120/0x120 [nf_conntrack] [ 261.223014] ? rt_cpu_seq_stop+0x10/0x10 [ 261.227014] ? find_held_lock+0x39/0x1c0 [ 261.233008] ip_finish_output+0x51d/0xb50 [ 261.237006] ? ip_fragment.constprop.56+0x220/0x220 [ 261.243011] ? nf_ct_l4proto_register_one+0x5b0/0x5b0 [nf_conntrack] [ 261.250152] ? rcu_is_watching+0x77/0x120 [ 261.255010] ? nf_nat_ipv4_out+0x1e/0x2b0 [nf_nat_ipv4] [ 261.261033] ? nf_hook_slow+0xb1/0x160 [ 261.265007] ip_output+0x1c7/0x710 [ 261.269005] ? ip_mc_output+0x13f0/0x13f0 [ 261.273002] ? __local_bh_enable_ip+0xe9/0x1b0 [ 261.278152] ? ip_fragment.constprop.56+0x220/0x220 [ 261.282996] ? nf_hook_slow+0xb1/0x160 [ 261.287007] raw_sendmsg+0x21f9/0x4420 [ 261.291008] ? dst_output+0x180/0x180 [ 261.297003] ? sched_clock_cpu+0x126/0x170 [ 261.301003] ? find_held_lock+0x39/0x1c0 [ 261.306155] ? stop_critical_timings+0x420/0x420 [ 261.311004] ? check_flags.part.36+0x450/0x450 [ 261.315005] ? _raw_spin_unlock_irq+0x29/0x40 [ 261.320995] ? _raw_spin_unlock_irq+0x29/0x40 [ 261.326142] ? cyc2ns_read_end+0x10/0x10 [ 261.330139] ? raw_bind+0x280/0x280 [ 261.334138] ? sched_clock_cpu+0x126/0x170 [ 261.338995] ? check_flags.part.36+0x450/0x450 [ 261.342991] ? __lock_acquire+0x4500/0x4500 [ 261.348994] ? inet_sendmsg+0x11c/0x500 [ 261.352989] ? dst_output+0x180/0x180 [ 261.357012] inet_sendmsg+0x11c/0x500 [ ... ] v2: - clear skb->sk at reassembly routine.(Eric Dumarzet) Fixes: fa0f527358bd ("ip: use rb trees for IP frag queue.") Suggested-by: Eric Dumazet Signed-off-by: Taehee Yoo Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/ip_fragment.c | 1 + net/ipv6/netfilter/nf_conntrack_reasm.c | 1 + 2 files changed, 2 insertions(+) diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 88281fbce88c..e7227128df2c 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -599,6 +599,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, nextp = &fp->next; fp->prev = NULL; memset(&fp->rbnode, 0, sizeof(fp->rbnode)); + fp->sk = NULL; head->data_len += fp->len; head->len += fp->len; if (head->ip_summed != fp->ip_summed) diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 2a14d8b65924..8f68a518d9db 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -445,6 +445,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_devic else if (head->ip_summed == CHECKSUM_COMPLETE) head->csum = csum_add(head->csum, fp->csum); head->truesize += fp->truesize; + fp->sk = NULL; } sub_frag_mem_limit(fq->q.net, head->truesize); From 11da3a7f84f19c26da6f86af878298694ede0804 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 9 Sep 2018 17:26:43 -0700 Subject: [PATCH 249/269] Linux 4.19-rc3 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 19948e556941..4d5c883a98e5 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 4 PATCHLEVEL = 19 SUBLEVEL = 0 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc3 NAME = Merciless Moray # *DOCUMENTATION* From 3ebb17446b954b7d39264564ec3f7522d502e785 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Fri, 7 Sep 2018 02:02:45 +0000 Subject: [PATCH 250/269] ethernet: renesas: convert to SPDX identifiers This patch updates license to use SPDX-License-Identifier instead of verbose license text. Signed-off-by: Kuninori Morimoto Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/Kconfig | 1 + drivers/net/ethernet/renesas/Makefile | 1 + drivers/net/ethernet/renesas/ravb_ptp.c | 6 +----- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/renesas/Kconfig b/drivers/net/ethernet/renesas/Kconfig index f3f7477043ce..bb0ebdfd4459 100644 --- a/drivers/net/ethernet/renesas/Kconfig +++ b/drivers/net/ethernet/renesas/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # Renesas device configuration # diff --git a/drivers/net/ethernet/renesas/Makefile b/drivers/net/ethernet/renesas/Makefile index a05102a7df02..f21ab8c02af0 100644 --- a/drivers/net/ethernet/renesas/Makefile +++ b/drivers/net/ethernet/renesas/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # Makefile for the Renesas device drivers. # diff --git a/drivers/net/ethernet/renesas/ravb_ptp.c b/drivers/net/ethernet/renesas/ravb_ptp.c index eede70ec37f8..0721b5c35d91 100644 --- a/drivers/net/ethernet/renesas/ravb_ptp.c +++ b/drivers/net/ethernet/renesas/ravb_ptp.c @@ -1,13 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0+ /* PTP 1588 clock using the Renesas Ethernet AVB * * Copyright (C) 2013-2015 Renesas Electronics Corporation * Copyright (C) 2015 Renesas Solutions Corp. * Copyright (C) 2015-2016 Cogent Embedded, Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. */ #include "ravb.h" From 7c5cca3588545e7f255171e28e0dd6e384ebb91d Mon Sep 17 00:00:00 2001 From: Kristian Evensen Date: Sat, 8 Sep 2018 13:50:48 +0200 Subject: [PATCH 251/269] qmi_wwan: Support dynamic config on Quectel EP06 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Quectel EP06 (and EM06/EG06) supports dynamic configuration of USB interfaces, without the device changing VID/PID or configuration number. When the configuration is updated and interfaces are added/removed, the interface numbers change. This means that the current code for matching EP06 does not work. This patch removes the current EP06 interface number match, and replaces it with a match on class, subclass and protocol. Unfortunately, matching on those three alone is not enough, as the diag interface exports the same values as QMI. The other serial interfaces + adb export different values and do not match. The diag interface only has two endpoints, while the QMI interface has three. I have therefore added a check for number of interfaces, and we ignore the interface if the number of endpoints equals two. Signed-off-by: Kristian Evensen Acked-by: Bjørn Mork Acked-by: Dan Williams Signed-off-by: David S. Miller --- drivers/net/usb/qmi_wwan.c | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index cb0cc30c3d6a..e3270deecec2 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -967,6 +967,13 @@ static const struct usb_device_id products[] = { USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0x581d, USB_CLASS_VENDOR_SPEC, 1, 7), .driver_info = (unsigned long)&qmi_wwan_info, }, + { /* Quectel EP06/EG06/EM06 */ + USB_DEVICE_AND_INTERFACE_INFO(0x2c7c, 0x0306, + USB_CLASS_VENDOR_SPEC, + USB_SUBCLASS_VENDOR_SPEC, + 0xff), + .driver_info = (unsigned long)&qmi_wwan_info_quirk_dtr, + }, /* 3. Combined interface devices matching on interface number */ {QMI_FIXED_INTF(0x0408, 0xea42, 4)}, /* Yota / Megafon M100-1 */ @@ -1255,7 +1262,6 @@ static const struct usb_device_id products[] = { {QMI_QUIRK_SET_DTR(0x2c7c, 0x0121, 4)}, /* Quectel EC21 Mini PCIe */ {QMI_QUIRK_SET_DTR(0x2c7c, 0x0191, 4)}, /* Quectel EG91 */ {QMI_FIXED_INTF(0x2c7c, 0x0296, 4)}, /* Quectel BG96 */ - {QMI_QUIRK_SET_DTR(0x2c7c, 0x0306, 4)}, /* Quectel EP06 Mini PCIe */ /* 4. Gobi 1000 devices */ {QMI_GOBI1K_DEVICE(0x05c6, 0x9212)}, /* Acer Gobi Modem Device */ @@ -1331,6 +1337,19 @@ static bool quectel_ec20_detected(struct usb_interface *intf) return false; } +static bool quectel_ep06_diag_detected(struct usb_interface *intf) +{ + struct usb_device *dev = interface_to_usbdev(intf); + struct usb_interface_descriptor intf_desc = intf->cur_altsetting->desc; + + if (le16_to_cpu(dev->descriptor.idVendor) == 0x2c7c && + le16_to_cpu(dev->descriptor.idProduct) == 0x0306 && + intf_desc.bNumEndpoints == 2) + return true; + + return false; +} + static int qmi_wwan_probe(struct usb_interface *intf, const struct usb_device_id *prod) { @@ -1365,6 +1384,15 @@ static int qmi_wwan_probe(struct usb_interface *intf, return -ENODEV; } + /* Quectel EP06/EM06/EG06 supports dynamic interface configuration, so + * we need to match on class/subclass/protocol. These values are + * identical for the diagnostic- and QMI-interface, but bNumEndpoints is + * different. Ignore the current interface if the number of endpoints + * the number for the diag interface (two). + */ + if (quectel_ep06_diag_detected(intf)) + return -ENODEV; + return usbnet_probe(intf, id); } From f94e63801ab2791ed64c409d0f751f6a0c953ead Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 24 Aug 2018 23:22:08 +0200 Subject: [PATCH 252/269] netfilter: conntrack: reset tcp maxwin on re-register Doug Smythies says: Sometimes it is desirable to temporarily disable, or clear, the iptables rule set on a computer being controlled via a secure shell session (SSH). While unwise on an internet facing computer, I also do it often on non-internet accessible computers while testing. Recently, this has become problematic, with the SSH session being dropped upon re-load of the rule set. The problem is that when all rules are deleted, conntrack hooks get unregistered. In case the rules are re-added later, its possible that tcp window has moved far enough so that all packets are considered invalid (out of window) until entry expires (which can take forever, default established timeout is 5 days). Fix this by clearing maxwin of existing tcp connections on register. v2: don't touch entries on hook removal. v3: remove obsolete expiry check. Reported-by: Doug Smythies Fixes: 4d3a57f23dec59 ("netfilter: conntrack: do not enable connection tracking unless needed") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_proto.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 9f14b0df6960..51c5d7eec0a3 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -776,9 +776,26 @@ static const struct nf_hook_ops ipv6_conntrack_ops[] = { }; #endif +static int nf_ct_tcp_fixup(struct nf_conn *ct, void *_nfproto) +{ + u8 nfproto = (unsigned long)_nfproto; + + if (nf_ct_l3num(ct) != nfproto) + return 0; + + if (nf_ct_protonum(ct) == IPPROTO_TCP && + ct->proto.tcp.state == TCP_CONNTRACK_ESTABLISHED) { + ct->proto.tcp.seen[0].td_maxwin = 0; + ct->proto.tcp.seen[1].td_maxwin = 0; + } + + return 0; +} + static int nf_ct_netns_do_get(struct net *net, u8 nfproto) { struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id); + bool fixup_needed = false; int err = 0; mutex_lock(&nf_ct_proto_mutex); @@ -798,6 +815,8 @@ static int nf_ct_netns_do_get(struct net *net, u8 nfproto) ARRAY_SIZE(ipv4_conntrack_ops)); if (err) cnet->users4 = 0; + else + fixup_needed = true; break; #if IS_ENABLED(CONFIG_IPV6) case NFPROTO_IPV6: @@ -814,6 +833,8 @@ static int nf_ct_netns_do_get(struct net *net, u8 nfproto) ARRAY_SIZE(ipv6_conntrack_ops)); if (err) cnet->users6 = 0; + else + fixup_needed = true; break; #endif default: @@ -822,6 +843,11 @@ static int nf_ct_netns_do_get(struct net *net, u8 nfproto) } out_unlock: mutex_unlock(&nf_ct_proto_mutex); + + if (fixup_needed) + nf_ct_iterate_cleanup_net(net, nf_ct_tcp_fixup, + (void *)(unsigned long)nfproto, 0, 0); + return err; } From a874752a10da113f513980e28f562d946d3f829d Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 31 Aug 2018 12:36:01 +0200 Subject: [PATCH 253/269] netfilter: conntrack: timeout interface depend on CONFIG_NF_CONNTRACK_TIMEOUT Now that cttimeout support for nft_ct is in place, these should depend on CONFIG_NF_CONNTRACK_TIMEOUT otherwise we can crash when dumping the policy if this option is not enabled. [ 71.600121] BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 [...] [ 71.600141] CPU: 3 PID: 7612 Comm: nft Not tainted 4.18.0+ #246 [...] [ 71.600188] Call Trace: [ 71.600201] ? nft_ct_timeout_obj_dump+0xc6/0xf0 [nft_ct] Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_proto_dccp.c | 12 ++++++------ net/netfilter/nf_conntrack_proto_generic.c | 8 ++++---- net/netfilter/nf_conntrack_proto_gre.c | 8 ++++---- net/netfilter/nf_conntrack_proto_icmp.c | 8 ++++---- net/netfilter/nf_conntrack_proto_icmpv6.c | 8 ++++---- net/netfilter/nf_conntrack_proto_sctp.c | 14 +++++++------- net/netfilter/nf_conntrack_proto_tcp.c | 12 ++++++------ net/netfilter/nf_conntrack_proto_udp.c | 20 ++++++++++---------- 8 files changed, 45 insertions(+), 45 deletions(-) diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index b81f70039828..f3f91ed2c21a 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -675,7 +675,7 @@ static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct) } #endif -#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) +#ifdef CONFIG_NF_CONNTRACK_TIMEOUT #include #include @@ -728,7 +728,7 @@ dccp_timeout_nla_policy[CTA_TIMEOUT_DCCP_MAX+1] = { [CTA_TIMEOUT_DCCP_CLOSING] = { .type = NLA_U32 }, [CTA_TIMEOUT_DCCP_TIMEWAIT] = { .type = NLA_U32 }, }; -#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ +#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ #ifdef CONFIG_SYSCTL /* template, data assigned later */ @@ -863,7 +863,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 = { .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, .nla_policy = nf_ct_port_nla_policy, #endif -#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) +#ifdef CONFIG_NF_CONNTRACK_TIMEOUT .ctnl_timeout = { .nlattr_to_obj = dccp_timeout_nlattr_to_obj, .obj_to_nlattr = dccp_timeout_obj_to_nlattr, @@ -871,7 +871,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 = { .obj_size = sizeof(unsigned int) * CT_DCCP_MAX, .nla_policy = dccp_timeout_nla_policy, }, -#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ +#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ .init_net = dccp_init_net, .get_net_proto = dccp_get_net_proto, }; @@ -896,7 +896,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 = { .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, .nla_policy = nf_ct_port_nla_policy, #endif -#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) +#ifdef CONFIG_NF_CONNTRACK_TIMEOUT .ctnl_timeout = { .nlattr_to_obj = dccp_timeout_nlattr_to_obj, .obj_to_nlattr = dccp_timeout_obj_to_nlattr, @@ -904,7 +904,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 = { .obj_size = sizeof(unsigned int) * CT_DCCP_MAX, .nla_policy = dccp_timeout_nla_policy, }, -#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ +#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ .init_net = dccp_init_net, .get_net_proto = dccp_get_net_proto, }; diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c index ac4a0b296dcd..1df3244ecd07 100644 --- a/net/netfilter/nf_conntrack_proto_generic.c +++ b/net/netfilter/nf_conntrack_proto_generic.c @@ -70,7 +70,7 @@ static bool generic_new(struct nf_conn *ct, const struct sk_buff *skb, return ret; } -#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) +#ifdef CONFIG_NF_CONNTRACK_TIMEOUT #include #include @@ -113,7 +113,7 @@ static const struct nla_policy generic_timeout_nla_policy[CTA_TIMEOUT_GENERIC_MAX+1] = { [CTA_TIMEOUT_GENERIC_TIMEOUT] = { .type = NLA_U32 }, }; -#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ +#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ #ifdef CONFIG_SYSCTL static struct ctl_table generic_sysctl_table[] = { @@ -164,7 +164,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic = .pkt_to_tuple = generic_pkt_to_tuple, .packet = generic_packet, .new = generic_new, -#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) +#ifdef CONFIG_NF_CONNTRACK_TIMEOUT .ctnl_timeout = { .nlattr_to_obj = generic_timeout_nlattr_to_obj, .obj_to_nlattr = generic_timeout_obj_to_nlattr, @@ -172,7 +172,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic = .obj_size = sizeof(unsigned int), .nla_policy = generic_timeout_nla_policy, }, -#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ +#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ .init_net = generic_init_net, .get_net_proto = generic_get_net_proto, }; diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index d1632252bf5b..650eb4fba2c5 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -285,7 +285,7 @@ static void gre_destroy(struct nf_conn *ct) nf_ct_gre_keymap_destroy(master); } -#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) +#ifdef CONFIG_NF_CONNTRACK_TIMEOUT #include #include @@ -334,7 +334,7 @@ gre_timeout_nla_policy[CTA_TIMEOUT_GRE_MAX+1] = { [CTA_TIMEOUT_GRE_UNREPLIED] = { .type = NLA_U32 }, [CTA_TIMEOUT_GRE_REPLIED] = { .type = NLA_U32 }, }; -#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ +#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ static int gre_init_net(struct net *net, u_int16_t proto) { @@ -367,7 +367,7 @@ static const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 = { .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, .nla_policy = nf_ct_port_nla_policy, #endif -#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) +#ifdef CONFIG_NF_CONNTRACK_TIMEOUT .ctnl_timeout = { .nlattr_to_obj = gre_timeout_nlattr_to_obj, .obj_to_nlattr = gre_timeout_obj_to_nlattr, @@ -375,7 +375,7 @@ static const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 = { .obj_size = sizeof(unsigned int) * GRE_CT_MAX, .nla_policy = gre_timeout_nla_policy, }, -#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ +#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ .net_id = &proto_gre_net_id, .init_net = gre_init_net, }; diff --git a/net/netfilter/nf_conntrack_proto_icmp.c b/net/netfilter/nf_conntrack_proto_icmp.c index 036670b38282..43c7e1a217b9 100644 --- a/net/netfilter/nf_conntrack_proto_icmp.c +++ b/net/netfilter/nf_conntrack_proto_icmp.c @@ -273,7 +273,7 @@ static unsigned int icmp_nlattr_tuple_size(void) } #endif -#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) +#ifdef CONFIG_NF_CONNTRACK_TIMEOUT #include #include @@ -313,7 +313,7 @@ static const struct nla_policy icmp_timeout_nla_policy[CTA_TIMEOUT_ICMP_MAX+1] = { [CTA_TIMEOUT_ICMP_TIMEOUT] = { .type = NLA_U32 }, }; -#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ +#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ #ifdef CONFIG_SYSCTL static struct ctl_table icmp_sysctl_table[] = { @@ -374,7 +374,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp = .nlattr_to_tuple = icmp_nlattr_to_tuple, .nla_policy = icmp_nla_policy, #endif -#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) +#ifdef CONFIG_NF_CONNTRACK_TIMEOUT .ctnl_timeout = { .nlattr_to_obj = icmp_timeout_nlattr_to_obj, .obj_to_nlattr = icmp_timeout_obj_to_nlattr, @@ -382,7 +382,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp = .obj_size = sizeof(unsigned int), .nla_policy = icmp_timeout_nla_policy, }, -#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ +#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ .init_net = icmp_init_net, .get_net_proto = icmp_get_net_proto, }; diff --git a/net/netfilter/nf_conntrack_proto_icmpv6.c b/net/netfilter/nf_conntrack_proto_icmpv6.c index bed07b998a10..97e40f77d678 100644 --- a/net/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/netfilter/nf_conntrack_proto_icmpv6.c @@ -274,7 +274,7 @@ static unsigned int icmpv6_nlattr_tuple_size(void) } #endif -#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) +#ifdef CONFIG_NF_CONNTRACK_TIMEOUT #include #include @@ -314,7 +314,7 @@ static const struct nla_policy icmpv6_timeout_nla_policy[CTA_TIMEOUT_ICMPV6_MAX+1] = { [CTA_TIMEOUT_ICMPV6_TIMEOUT] = { .type = NLA_U32 }, }; -#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ +#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ #ifdef CONFIG_SYSCTL static struct ctl_table icmpv6_sysctl_table[] = { @@ -373,7 +373,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 = .nlattr_to_tuple = icmpv6_nlattr_to_tuple, .nla_policy = icmpv6_nla_policy, #endif -#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) +#ifdef CONFIG_NF_CONNTRACK_TIMEOUT .ctnl_timeout = { .nlattr_to_obj = icmpv6_timeout_nlattr_to_obj, .obj_to_nlattr = icmpv6_timeout_obj_to_nlattr, @@ -381,7 +381,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 = .obj_size = sizeof(unsigned int), .nla_policy = icmpv6_timeout_nla_policy, }, -#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ +#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ .init_net = icmpv6_init_net, .get_net_proto = icmpv6_get_net_proto, }; diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 5eddfd32b852..e4d738d34cd0 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -591,7 +591,7 @@ static int nlattr_to_sctp(struct nlattr *cda[], struct nf_conn *ct) } #endif -#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) +#ifdef CONFIG_NF_CONNTRACK_TIMEOUT #include #include @@ -646,7 +646,7 @@ sctp_timeout_nla_policy[CTA_TIMEOUT_SCTP_MAX+1] = { [CTA_TIMEOUT_SCTP_HEARTBEAT_SENT] = { .type = NLA_U32 }, [CTA_TIMEOUT_SCTP_HEARTBEAT_ACKED] = { .type = NLA_U32 }, }; -#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ +#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ #ifdef CONFIG_SYSCTL @@ -780,7 +780,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 = { .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, .nla_policy = nf_ct_port_nla_policy, #endif -#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) +#ifdef CONFIG_NF_CONNTRACK_TIMEOUT .ctnl_timeout = { .nlattr_to_obj = sctp_timeout_nlattr_to_obj, .obj_to_nlattr = sctp_timeout_obj_to_nlattr, @@ -788,7 +788,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 = { .obj_size = sizeof(unsigned int) * SCTP_CONNTRACK_MAX, .nla_policy = sctp_timeout_nla_policy, }, -#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ +#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ .init_net = sctp_init_net, .get_net_proto = sctp_get_net_proto, }; @@ -813,7 +813,8 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 = { .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, .nla_policy = nf_ct_port_nla_policy, -#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) +#endif +#ifdef CONFIG_NF_CONNTRACK_TIMEOUT .ctnl_timeout = { .nlattr_to_obj = sctp_timeout_nlattr_to_obj, .obj_to_nlattr = sctp_timeout_obj_to_nlattr, @@ -821,8 +822,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 = { .obj_size = sizeof(unsigned int) * SCTP_CONNTRACK_MAX, .nla_policy = sctp_timeout_nla_policy, }, -#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ -#endif +#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ .init_net = sctp_init_net, .get_net_proto = sctp_get_net_proto, }; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 3e2dc56a96c3..b4bdf9eda7b7 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1279,7 +1279,7 @@ static unsigned int tcp_nlattr_tuple_size(void) } #endif -#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) +#ifdef CONFIG_NF_CONNTRACK_TIMEOUT #include #include @@ -1394,7 +1394,7 @@ static const struct nla_policy tcp_timeout_nla_policy[CTA_TIMEOUT_TCP_MAX+1] = { [CTA_TIMEOUT_TCP_RETRANS] = { .type = NLA_U32 }, [CTA_TIMEOUT_TCP_UNACK] = { .type = NLA_U32 }, }; -#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ +#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ #ifdef CONFIG_SYSCTL static struct ctl_table tcp_sysctl_table[] = { @@ -1558,7 +1558,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 = .nlattr_size = TCP_NLATTR_SIZE, .nla_policy = nf_ct_port_nla_policy, #endif -#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) +#ifdef CONFIG_NF_CONNTRACK_TIMEOUT .ctnl_timeout = { .nlattr_to_obj = tcp_timeout_nlattr_to_obj, .obj_to_nlattr = tcp_timeout_obj_to_nlattr, @@ -1567,7 +1567,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 = TCP_CONNTRACK_TIMEOUT_MAX, .nla_policy = tcp_timeout_nla_policy, }, -#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ +#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ .init_net = tcp_init_net, .get_net_proto = tcp_get_net_proto, }; @@ -1593,7 +1593,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 = .nlattr_tuple_size = tcp_nlattr_tuple_size, .nla_policy = nf_ct_port_nla_policy, #endif -#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) +#ifdef CONFIG_NF_CONNTRACK_TIMEOUT .ctnl_timeout = { .nlattr_to_obj = tcp_timeout_nlattr_to_obj, .obj_to_nlattr = tcp_timeout_obj_to_nlattr, @@ -1602,7 +1602,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 = TCP_CONNTRACK_TIMEOUT_MAX, .nla_policy = tcp_timeout_nla_policy, }, -#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ +#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ .init_net = tcp_init_net, .get_net_proto = tcp_get_net_proto, }; diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 9272a2c525a8..3065fb8ef91b 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -171,7 +171,7 @@ static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, return NF_ACCEPT; } -#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) +#ifdef CONFIG_NF_CONNTRACK_TIMEOUT #include #include @@ -221,7 +221,7 @@ udp_timeout_nla_policy[CTA_TIMEOUT_UDP_MAX+1] = { [CTA_TIMEOUT_UDP_UNREPLIED] = { .type = NLA_U32 }, [CTA_TIMEOUT_UDP_REPLIED] = { .type = NLA_U32 }, }; -#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ +#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ #ifdef CONFIG_SYSCTL static struct ctl_table udp_sysctl_table[] = { @@ -292,7 +292,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 = .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, .nla_policy = nf_ct_port_nla_policy, #endif -#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) +#ifdef CONFIG_NF_CONNTRACK_TIMEOUT .ctnl_timeout = { .nlattr_to_obj = udp_timeout_nlattr_to_obj, .obj_to_nlattr = udp_timeout_obj_to_nlattr, @@ -300,7 +300,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 = .obj_size = sizeof(unsigned int) * CTA_TIMEOUT_UDP_MAX, .nla_policy = udp_timeout_nla_policy, }, -#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ +#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ .init_net = udp_init_net, .get_net_proto = udp_get_net_proto, }; @@ -321,7 +321,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 = .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, .nla_policy = nf_ct_port_nla_policy, #endif -#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) +#ifdef CONFIG_NF_CONNTRACK_TIMEOUT .ctnl_timeout = { .nlattr_to_obj = udp_timeout_nlattr_to_obj, .obj_to_nlattr = udp_timeout_obj_to_nlattr, @@ -329,7 +329,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 = .obj_size = sizeof(unsigned int) * CTA_TIMEOUT_UDP_MAX, .nla_policy = udp_timeout_nla_policy, }, -#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ +#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ .init_net = udp_init_net, .get_net_proto = udp_get_net_proto, }; @@ -350,7 +350,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 = .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, .nla_policy = nf_ct_port_nla_policy, #endif -#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) +#ifdef CONFIG_NF_CONNTRACK_TIMEOUT .ctnl_timeout = { .nlattr_to_obj = udp_timeout_nlattr_to_obj, .obj_to_nlattr = udp_timeout_obj_to_nlattr, @@ -358,7 +358,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 = .obj_size = sizeof(unsigned int) * CTA_TIMEOUT_UDP_MAX, .nla_policy = udp_timeout_nla_policy, }, -#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ +#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ .init_net = udp_init_net, .get_net_proto = udp_get_net_proto, }; @@ -379,7 +379,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 = .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, .nla_policy = nf_ct_port_nla_policy, #endif -#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) +#ifdef CONFIG_NF_CONNTRACK_TIMEOUT .ctnl_timeout = { .nlattr_to_obj = udp_timeout_nlattr_to_obj, .obj_to_nlattr = udp_timeout_obj_to_nlattr, @@ -387,7 +387,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 = .obj_size = sizeof(unsigned int) * CTA_TIMEOUT_UDP_MAX, .nla_policy = udp_timeout_nla_policy, }, -#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ +#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ .init_net = udp_init_net, .get_net_proto = udp_get_net_proto, }; From 99e25d071fca91eb90ffa2f51240547a69137bde Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 3 Sep 2018 13:53:22 +0200 Subject: [PATCH 254/269] netfilter: cttimeout: ctnl_timeout_find_get() returns incorrect pointer to type Compiler did not catch incorrect typing in the rcu hook assignment. % nfct add timeout test-tcp inet tcp established 100 close 10 close_wait 10 % iptables -I OUTPUT -t raw -p tcp -j CT --timeout test-tcp dmesg - xt_CT: Timeout policy `test-tcp' can only be used by L3 protocol number 25000 The CT target bails out with incorrect layer 3 protocol number. Fixes: 6c1fd7dc489d ("netfilter: cttimeout: decouple timeout policy from nfnetlink_cttimeout object") Reported-by: Harsha Sharma Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_cttimeout.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index d46a236cdf31..a30f8ba4b89a 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -489,8 +489,8 @@ err: return err; } -static struct ctnl_timeout * -ctnl_timeout_find_get(struct net *net, const char *name) +static struct nf_ct_timeout *ctnl_timeout_find_get(struct net *net, + const char *name) { struct ctnl_timeout *timeout, *matching = NULL; @@ -509,7 +509,7 @@ ctnl_timeout_find_get(struct net *net, const char *name) break; } err: - return matching; + return matching ? &matching->timeout : NULL; } static void ctnl_timeout_put(struct nf_ct_timeout *t) From ad18d7bf68a3da860ebb62a59c449804a6d237b4 Mon Sep 17 00:00:00 2001 From: Michal 'vorner' Vaner Date: Tue, 4 Sep 2018 13:25:44 +0200 Subject: [PATCH 255/269] netfilter: nfnetlink_queue: Solve the NFQUEUE/conntrack clash for NF_REPEAT NF_REPEAT places the packet at the beginning of the iptables chain instead of accepting or rejecting it right away. The packet however will reach the end of the chain and continue to the end of iptables eventually, so it needs the same handling as NF_ACCEPT and NF_DROP. Fixes: 368982cd7d1b ("netfilter: nfnetlink_queue: resolve clash for unconfirmed conntracks") Signed-off-by: Michal 'vorner' Vaner Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_queue.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index ea4ba551abb2..d33094f4ec41 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -233,6 +233,7 @@ static void nfqnl_reinject(struct nf_queue_entry *entry, unsigned int verdict) int err; if (verdict == NF_ACCEPT || + verdict == NF_REPEAT || verdict == NF_STOP) { rcu_read_lock(); ct_hook = rcu_dereference(nf_ct_hook); From 1286df269f498165061e0cf8092ca212545dbb5a Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 5 Sep 2018 11:41:31 -0700 Subject: [PATCH 256/269] netfilter: xt_hashlimit: use s->file instead of s->private After switching to the new procfs API, it is supposed to retrieve the private pointer from PDE_DATA(file_inode(s->file)), s->private is no longer referred. Fixes: 1cd671827290 ("netfilter/x_tables: switch to proc_create_seq_private") Reported-by: Sami Farin Signed-off-by: Cong Wang Acked-by: Christoph Hellwig Tested-by: Sami Farin Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_hashlimit.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 9b16402f29af..3e7d259e5d8d 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -1057,7 +1057,7 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = { static void *dl_seq_start(struct seq_file *s, loff_t *pos) __acquires(htable->lock) { - struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->private)); + struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->file)); unsigned int *bucket; spin_lock_bh(&htable->lock); @@ -1074,7 +1074,7 @@ static void *dl_seq_start(struct seq_file *s, loff_t *pos) static void *dl_seq_next(struct seq_file *s, void *v, loff_t *pos) { - struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->private)); + struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->file)); unsigned int *bucket = v; *pos = ++(*bucket); @@ -1088,7 +1088,7 @@ static void *dl_seq_next(struct seq_file *s, void *v, loff_t *pos) static void dl_seq_stop(struct seq_file *s, void *v) __releases(htable->lock) { - struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->private)); + struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->file)); unsigned int *bucket = v; if (!IS_ERR(bucket)) @@ -1130,7 +1130,7 @@ static void dl_seq_print(struct dsthash_ent *ent, u_int8_t family, static int dl_seq_real_show_v2(struct dsthash_ent *ent, u_int8_t family, struct seq_file *s) { - struct xt_hashlimit_htable *ht = PDE_DATA(file_inode(s->private)); + struct xt_hashlimit_htable *ht = PDE_DATA(file_inode(s->file)); spin_lock(&ent->lock); /* recalculate to show accurate numbers */ @@ -1145,7 +1145,7 @@ static int dl_seq_real_show_v2(struct dsthash_ent *ent, u_int8_t family, static int dl_seq_real_show_v1(struct dsthash_ent *ent, u_int8_t family, struct seq_file *s) { - struct xt_hashlimit_htable *ht = PDE_DATA(file_inode(s->private)); + struct xt_hashlimit_htable *ht = PDE_DATA(file_inode(s->file)); spin_lock(&ent->lock); /* recalculate to show accurate numbers */ @@ -1160,7 +1160,7 @@ static int dl_seq_real_show_v1(struct dsthash_ent *ent, u_int8_t family, static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family, struct seq_file *s) { - struct xt_hashlimit_htable *ht = PDE_DATA(file_inode(s->private)); + struct xt_hashlimit_htable *ht = PDE_DATA(file_inode(s->file)); spin_lock(&ent->lock); /* recalculate to show accurate numbers */ @@ -1174,7 +1174,7 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family, static int dl_seq_show_v2(struct seq_file *s, void *v) { - struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->private)); + struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->file)); unsigned int *bucket = (unsigned int *)v; struct dsthash_ent *ent; @@ -1188,7 +1188,7 @@ static int dl_seq_show_v2(struct seq_file *s, void *v) static int dl_seq_show_v1(struct seq_file *s, void *v) { - struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->private)); + struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->file)); unsigned int *bucket = v; struct dsthash_ent *ent; @@ -1202,7 +1202,7 @@ static int dl_seq_show_v1(struct seq_file *s, void *v) static int dl_seq_show(struct seq_file *s, void *v) { - struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->private)); + struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->file)); unsigned int *bucket = v; struct dsthash_ent *ent; From 2d946e5bcdabc1deef72d01bc92a2801c71d6d8d Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Sun, 9 Sep 2018 21:26:23 +0200 Subject: [PATCH 257/269] MIPS: lantiq: dma: add dev pointer dma_zalloc_coherent() now crashes if no dev pointer is given. Add a dev pointer to the ltq_dma_channel structure and fill it in the driver using it. This fixes a bug introduced in kernel 4.19. Signed-off-by: Hauke Mehrtens Signed-off-by: David S. Miller --- arch/mips/include/asm/mach-lantiq/xway/xway_dma.h | 1 + arch/mips/lantiq/xway/dma.c | 4 ++-- drivers/net/ethernet/lantiq_etop.c | 1 + 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/mips/include/asm/mach-lantiq/xway/xway_dma.h b/arch/mips/include/asm/mach-lantiq/xway/xway_dma.h index 4901833498f7..8441b2698e64 100644 --- a/arch/mips/include/asm/mach-lantiq/xway/xway_dma.h +++ b/arch/mips/include/asm/mach-lantiq/xway/xway_dma.h @@ -40,6 +40,7 @@ struct ltq_dma_channel { int desc; /* the current descriptor */ struct ltq_dma_desc *desc_base; /* the descriptor base */ int phys; /* physical addr */ + struct device *dev; }; enum { diff --git a/arch/mips/lantiq/xway/dma.c b/arch/mips/lantiq/xway/dma.c index 4b9fbb6744ad..664f2f7f55c1 100644 --- a/arch/mips/lantiq/xway/dma.c +++ b/arch/mips/lantiq/xway/dma.c @@ -130,7 +130,7 @@ ltq_dma_alloc(struct ltq_dma_channel *ch) unsigned long flags; ch->desc = 0; - ch->desc_base = dma_zalloc_coherent(NULL, + ch->desc_base = dma_zalloc_coherent(ch->dev, LTQ_DESC_NUM * LTQ_DESC_SIZE, &ch->phys, GFP_ATOMIC); @@ -182,7 +182,7 @@ ltq_dma_free(struct ltq_dma_channel *ch) if (!ch->desc_base) return; ltq_dma_close(ch); - dma_free_coherent(NULL, LTQ_DESC_NUM * LTQ_DESC_SIZE, + dma_free_coherent(ch->dev, LTQ_DESC_NUM * LTQ_DESC_SIZE, ch->desc_base, ch->phys); } EXPORT_SYMBOL_GPL(ltq_dma_free); diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c index 7a637b51c7d2..e08301d833e2 100644 --- a/drivers/net/ethernet/lantiq_etop.c +++ b/drivers/net/ethernet/lantiq_etop.c @@ -274,6 +274,7 @@ ltq_etop_hw_init(struct net_device *dev) struct ltq_etop_chan *ch = &priv->ch[i]; ch->idx = ch->dma.nr = i; + ch->dma.dev = &priv->pdev->dev; if (IS_TX(i)) { ltq_dma_alloc_tx(&ch->dma); From 0297c1c2eadb5bd996a873b87597af3b91c0d4ba Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Sun, 9 Sep 2018 19:12:12 -0400 Subject: [PATCH 258/269] tcp: rate limit synflood warnings further Convert pr_info to net_info_ratelimited to limit the total number of synflood warnings. Commit 946cedccbd73 ("tcp: Change possible SYN flooding messages") rate limits synflood warnings to one per listener. Workloads that open many listener sockets can still see a high rate of log messages. Syzkaller is one frequent example. Signed-off-by: Willem de Bruijn Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 4c2dd9f863f7..4cf2f7bb2802 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6367,8 +6367,8 @@ static bool tcp_syn_flood_action(const struct sock *sk, if (!queue->synflood_warned && net->ipv4.sysctl_tcp_syncookies != 2 && xchg(&queue->synflood_warned, 1) == 0) - pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n", - proto, ntohs(tcp_hdr(skb)->dest), msg); + net_info_ratelimited("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n", + proto, ntohs(tcp_hdr(skb)->dest), msg); return want_cookie; } From 5a64506b5c2c3cdb29d817723205330378075448 Mon Sep 17 00:00:00 2001 From: Haishuang Yan Date: Mon, 10 Sep 2018 22:19:47 +0800 Subject: [PATCH 259/269] erspan: return PACKET_REJECT when the appropriate tunnel is not found If erspan tunnel hasn't been established, we'd better send icmp port unreachable message after receive erspan packets. Fixes: 84e54fe0a5ea ("gre: introduce native tunnel support for ERSPAN") Cc: William Tu Signed-off-by: Haishuang Yan Acked-by: William Tu Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index ae714aecc31c..85a714d36b66 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -328,6 +328,8 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi, ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error); return PACKET_RCVD; } + return PACKET_REJECT; + drop: kfree_skb(skb); return PACKET_RCVD; From 51dc63e3911fbb1f0a7a32da2fe56253e2040ea4 Mon Sep 17 00:00:00 2001 From: Haishuang Yan Date: Mon, 10 Sep 2018 22:19:48 +0800 Subject: [PATCH 260/269] erspan: fix error handling for erspan tunnel When processing icmp unreachable message for erspan tunnel, tunnel id should be erspan_net_id instead of ipgre_net_id. Fixes: 84e54fe0a5ea ("gre: introduce native tunnel support for ERSPAN") Cc: William Tu Signed-off-by: Haishuang Yan Acked-by: William Tu Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 85a714d36b66..8cce0e9ea08c 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -178,6 +178,9 @@ static void ipgre_err(struct sk_buff *skb, u32 info, if (tpi->proto == htons(ETH_P_TEB)) itn = net_generic(net, gre_tap_net_id); + else if (tpi->proto == htons(ETH_P_ERSPAN) || + tpi->proto == htons(ETH_P_ERSPAN2)) + itn = net_generic(net, erspan_net_id); else itn = net_generic(net, ipgre_net_id); From 6ad569019999300afd8e614d296fdc356550b77f Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Tue, 11 Sep 2018 01:51:43 +0800 Subject: [PATCH 261/269] r8169: Clear RTL_FLAG_TASK_*_PENDING when clearing RTL_FLAG_TASK_ENABLED After system suspend, sometimes the r8169 doesn't work when ethernet cable gets pluggued. This issue happens because rtl_reset_work() doesn't get called from rtl8169_runtime_resume(), after system suspend. In rtl_task(), RTL_FLAG_TASK_* only gets cleared if this condition is met: if (!netif_running(dev) || !test_bit(RTL_FLAG_TASK_ENABLED, tp->wk.flags)) ... If RTL_FLAG_TASK_ENABLED was cleared during system suspend while RTL_FLAG_TASK_RESET_PENDING was set, the next rtl_schedule_task() won't schedule task as the flag is still there. So in addition to clearing RTL_FLAG_TASK_ENABLED, also clears other flags. Cc: Heiner Kallweit Signed-off-by: Kai-Heng Feng Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index a1f37d58e2fe..1d8631303b53 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -631,7 +631,7 @@ struct rtl8169_tc_offsets { }; enum rtl_flag { - RTL_FLAG_TASK_ENABLED, + RTL_FLAG_TASK_ENABLED = 0, RTL_FLAG_TASK_SLOW_PENDING, RTL_FLAG_TASK_RESET_PENDING, RTL_FLAG_MAX @@ -6655,7 +6655,8 @@ static int rtl8169_close(struct net_device *dev) rtl8169_update_counters(tp); rtl_lock_work(tp); - clear_bit(RTL_FLAG_TASK_ENABLED, tp->wk.flags); + /* Clear all task flags */ + bitmap_zero(tp->wk.flags, RTL_FLAG_MAX); rtl8169_down(dev); rtl_unlock_work(tp); @@ -6838,7 +6839,9 @@ static void rtl8169_net_suspend(struct net_device *dev) rtl_lock_work(tp); napi_disable(&tp->napi); - clear_bit(RTL_FLAG_TASK_ENABLED, tp->wk.flags); + /* Clear all task flags */ + bitmap_zero(tp->wk.flags, RTL_FLAG_MAX); + rtl_unlock_work(tp); rtl_pll_power_down(tp); From cc4dfb7f70a344f24c1c71e298deea0771dadcb2 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 10 Sep 2018 18:27:26 -0700 Subject: [PATCH 262/269] rds: fix two RCU related problems When a rds sock is bound, it is inserted into the bind_hash_table which is protected by RCU. But when releasing rds sock, after it is removed from this hash table, it is freed immediately without respecting RCU grace period. This could cause some use-after-free as reported by syzbot. Mark the rds sock with SOCK_RCU_FREE before inserting it into the bind_hash_table, so that it would be always freed after a RCU grace period. The other problem is in rds_find_bound(), the rds sock could be freed in between rhashtable_lookup_fast() and rds_sock_addref(), so we need to extend RCU read lock protection in rds_find_bound() to close this race condition. Reported-and-tested-by: syzbot+8967084bcac563795dc6@syzkaller.appspotmail.com Reported-by: syzbot+93a5839deb355537440f@syzkaller.appspotmail.com Cc: Sowmini Varadhan Cc: Santosh Shilimkar Cc: rds-devel@oss.oracle.com Signed-off-by: Cong Wang Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/bind.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/rds/bind.c b/net/rds/bind.c index 3ab55784b637..762d2c6788a3 100644 --- a/net/rds/bind.c +++ b/net/rds/bind.c @@ -76,11 +76,13 @@ struct rds_sock *rds_find_bound(const struct in6_addr *addr, __be16 port, struct rds_sock *rs; __rds_create_bind_key(key, addr, port, scope_id); - rs = rhashtable_lookup_fast(&bind_hash_table, key, ht_parms); + rcu_read_lock(); + rs = rhashtable_lookup(&bind_hash_table, key, ht_parms); if (rs && !sock_flag(rds_rs_to_sk(rs), SOCK_DEAD)) rds_sock_addref(rs); else rs = NULL; + rcu_read_unlock(); rdsdebug("returning rs %p for %pI6c:%u\n", rs, addr, ntohs(port)); @@ -235,6 +237,7 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) goto out; } + sock_set_flag(sk, SOCK_RCU_FREE); ret = rds_add_bound(rs, binding_addr, &port, scope_id); if (ret) goto out; From 778b1ac737494cec156f17c80da44664c1f77cf6 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Wed, 12 Sep 2018 15:31:32 +0200 Subject: [PATCH 263/269] s390/qeth: indicate error when netdev allocation fails Bailing out on allocation error is nice, but we also need to tell the ccwgroup core that creating the qeth groupdev failed. Fixes: d3d1b205e89f ("s390/qeth: allocate netdevice early") Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 49f64eb3eab0..6b24face21d5 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -5768,8 +5768,10 @@ static int qeth_core_probe_device(struct ccwgroup_device *gdev) qeth_update_from_chp_desc(card); card->dev = qeth_alloc_netdev(card); - if (!card->dev) + if (!card->dev) { + rc = -ENOMEM; goto err_card; + } qeth_determine_capabilities(card); enforced_disc = qeth_enforce_discipline(card); From 04db741d0df02fdb9ea4ddca32615153407dcf7f Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Wed, 12 Sep 2018 15:31:33 +0200 Subject: [PATCH 264/269] s390/qeth: switch on SG by default for IQD devices Scatter-gather transmit brings a nice performance boost. Considering the rather large MTU sizes at play, it's also totally the Right Thing To Do. Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 6b24face21d5..b60055e9cb1a 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -5706,6 +5706,8 @@ static struct net_device *qeth_alloc_netdev(struct qeth_card *card) dev->priv_flags &= ~IFF_TX_SKB_SHARING; dev->hw_features |= NETIF_F_SG; dev->vlan_features |= NETIF_F_SG; + if (IS_IQD(card)) + dev->features |= NETIF_F_SG; } return dev; From aec45e857c5538664edb76a60dd452e3265f37d1 Mon Sep 17 00:00:00 2001 From: Wenjia Zhang Date: Wed, 12 Sep 2018 15:31:34 +0200 Subject: [PATCH 265/269] s390/qeth: use vzalloc for QUERY OAT buffer qeth_query_oat_command() currently allocates the kernel buffer for the SIOC_QETH_QUERY_OAT ioctl with kzalloc. So on systems with fragmented memory, large allocations may fail (eg. the qethqoat tool by default uses 132KB). Solve this issue by using vzalloc, backing the allocation with non-contiguous memory. Signed-off-by: Wenjia Zhang Reviewed-by: Julian Wiedmann Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core_main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index b60055e9cb1a..de8282420f96 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -4699,7 +4700,7 @@ static int qeth_query_oat_command(struct qeth_card *card, char __user *udata) priv.buffer_len = oat_data.buffer_len; priv.response_len = 0; - priv.buffer = kzalloc(oat_data.buffer_len, GFP_KERNEL); + priv.buffer = vzalloc(oat_data.buffer_len); if (!priv.buffer) { rc = -ENOMEM; goto out; @@ -4740,7 +4741,7 @@ static int qeth_query_oat_command(struct qeth_card *card, char __user *udata) rc = -EFAULT; out_free: - kfree(priv.buffer); + vfree(priv.buffer); out: return rc; } From 0ac1487c4b2de383b91ecad1be561b8f7a2c15f4 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Wed, 12 Sep 2018 15:31:35 +0200 Subject: [PATCH 266/269] s390/qeth: don't dump past end of unknown HW header For inbound data with an unsupported HW header format, only dump the actual HW header. We have no idea how much payload follows it, and what it contains. Worst case, we dump past the end of the Inbound Buffer and access whatever is located next in memory. Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_l2_main.c | 2 +- drivers/s390/net/qeth_l3_main.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 710fa74892ae..b5e38531733f 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -423,7 +423,7 @@ static int qeth_l2_process_inbound_buffer(struct qeth_card *card, default: dev_kfree_skb_any(skb); QETH_CARD_TEXT(card, 3, "inbunkno"); - QETH_DBF_HEX(CTRL, 3, hdr, QETH_DBF_CTRL_LEN); + QETH_DBF_HEX(CTRL, 3, hdr, sizeof(*hdr)); continue; } work_done++; diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 7175086677fb..ada258c01a08 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -1390,7 +1390,7 @@ static int qeth_l3_process_inbound_buffer(struct qeth_card *card, default: dev_kfree_skb_any(skb); QETH_CARD_TEXT(card, 3, "inbunkno"); - QETH_DBF_HEX(CTRL, 3, hdr, QETH_DBF_CTRL_LEN); + QETH_DBF_HEX(CTRL, 3, hdr, sizeof(*hdr)); continue; } work_done++; From 12a78b026f870c575d3a98998b25084aac5b3c61 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 11 Sep 2018 15:12:17 -0700 Subject: [PATCH 267/269] tipc: check return value of __tipc_dump_start() When __tipc_dump_start() fails with running out of memory, we have no reason to continue, especially we should avoid calling tipc_dump_done(). Fixes: 8f5c5fcf3533 ("tipc: call start and done ops directly in __tipc_nl_compat_dumpit()") Reported-and-tested-by: syzbot+3f8324abccfbf8c74a9f@syzkaller.appspotmail.com Cc: Jon Maloy Cc: Ying Xue Signed-off-by: Cong Wang Acked-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/netlink_compat.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index 82f665728382..6376467e78f8 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -185,7 +185,10 @@ static int __tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd, return -ENOMEM; buf->sk = msg->dst_sk; - __tipc_dump_start(&cb, msg->net); + if (__tipc_dump_start(&cb, msg->net)) { + kfree_skb(buf); + return -ENOMEM; + } do { int rem; From db191db813722297be36ffce2862e0f2b0e54d82 Mon Sep 17 00:00:00 2001 From: Pieter Jansen van Vuuren Date: Tue, 11 Sep 2018 06:38:44 -0700 Subject: [PATCH 268/269] nfp: flower: fix vlan match by checking both vlan id and vlan pcp Previously we only checked if the vlan id field is present when trying to match a vlan tag. The vlan id and vlan pcp field should be treated independently. Fixes: 5571e8c9f241 ("nfp: extend flower matching capabilities") Signed-off-by: Pieter Jansen van Vuuren Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/flower/main.h | 1 + drivers/net/ethernet/netronome/nfp/flower/match.c | 2 +- drivers/net/ethernet/netronome/nfp/flower/offload.c | 11 +++++++++++ 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h index 85f8209bf007..81d941ab895c 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.h +++ b/drivers/net/ethernet/netronome/nfp/flower/main.h @@ -70,6 +70,7 @@ struct nfp_app; #define NFP_FL_FEATS_GENEVE BIT(0) #define NFP_FL_NBI_MTU_SETTING BIT(1) #define NFP_FL_FEATS_GENEVE_OPT BIT(2) +#define NFP_FL_FEATS_VLAN_PCP BIT(3) #define NFP_FL_FEATS_LAG BIT(31) struct nfp_fl_mask_id { diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c index a0c72f277faa..17acb8cc6044 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/match.c +++ b/drivers/net/ethernet/netronome/nfp/flower/match.c @@ -56,7 +56,7 @@ nfp_flower_compile_meta_tci(struct nfp_flower_meta_tci *frame, FLOW_DISSECTOR_KEY_VLAN, target); /* Populate the tci field. */ - if (flow_vlan->vlan_id) { + if (flow_vlan->vlan_id || flow_vlan->vlan_priority) { tmp_tci = FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO, flow_vlan->vlan_priority) | FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID, diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index 2edab01c3beb..bd19624f10cf 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -192,6 +192,17 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, key_size += sizeof(struct nfp_flower_mac_mpls); } + if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_VLAN)) { + struct flow_dissector_key_vlan *flow_vlan; + + flow_vlan = skb_flow_dissector_target(flow->dissector, + FLOW_DISSECTOR_KEY_VLAN, + flow->mask); + if (!(priv->flower_ext_feats & NFP_FL_FEATS_VLAN_PCP) && + flow_vlan->vlan_priority) + return -EOPNOTSUPP; + } + if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_ENC_CONTROL)) { struct flow_dissector_key_ipv4_addrs *mask_ipv4 = NULL; From 224de549f0beca58fb95c0b8da9cb2bfa8c6cc12 Mon Sep 17 00:00:00 2001 From: Louis Peens Date: Tue, 11 Sep 2018 06:38:45 -0700 Subject: [PATCH 269/269] nfp: flower: reject tunnel encap with ipv6 outer headers for offloading This fixes a bug where ipv6 tunnels would report that it is getting offloaded to hardware but would actually be rejected by hardware. Fixes: b27d6a95a70d ("nfp: compile flower vxlan tunnel set actions") Signed-off-by: Louis Peens Reviewed-by: John Hurley Reviewed-by: Simon Horman Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/flower/action.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index 9044496803e6..46ba0cf257c6 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -52,6 +52,7 @@ #define NFP_FL_TUNNEL_CSUM cpu_to_be16(0x01) #define NFP_FL_TUNNEL_KEY cpu_to_be16(0x04) #define NFP_FL_TUNNEL_GENEVE_OPT cpu_to_be16(0x0800) +#define NFP_FL_SUPPORTED_TUNNEL_INFO_FLAGS IP_TUNNEL_INFO_TX #define NFP_FL_SUPPORTED_IPV4_UDP_TUN_FLAGS (NFP_FL_TUNNEL_CSUM | \ NFP_FL_TUNNEL_KEY | \ NFP_FL_TUNNEL_GENEVE_OPT) @@ -741,11 +742,16 @@ nfp_flower_loop_action(struct nfp_app *app, const struct tc_action *a, nfp_fl_push_vlan(psh_v, a); *a_len += sizeof(struct nfp_fl_push_vlan); } else if (is_tcf_tunnel_set(a)) { + struct ip_tunnel_info *ip_tun = tcf_tunnel_info(a); struct nfp_repr *repr = netdev_priv(netdev); + *tun_type = nfp_fl_get_tun_from_act_l4_port(repr->app, a); if (*tun_type == NFP_FL_TUNNEL_NONE) return -EOPNOTSUPP; + if (ip_tun->mode & ~NFP_FL_SUPPORTED_TUNNEL_INFO_FLAGS) + return -EOPNOTSUPP; + /* Pre-tunnel action is required for tunnel encap. * This checks for next hop entries on NFP. * If none, the packet falls back before applying other actions.