* Detect insecure W+X mappings and warn about them, including a

few bug fixes and relaxing the enforcement
  * Do a long-overdue defconfig update and enabling W+X boot-time
    detection
  * Cleanup _PAGE_PSE handling (follow-up on an earlier bug)
  * Rename a change_page_attr function
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEV76QKkVc4xCGURexaDWVMHDJkrAFAmM+EPcACgkQaDWVMHDJ
 krCTJBAAyCJOcywm9bhKuxCKFzeDwKf5znW3sJVQYPlcskLlLyQdqbPN32BSMH4t
 rM+m7c3nChjx0wwwXbhqxcxmMAN8E3EC2GrPxJg6WY119F9rFsaxica54/ZGvnd3
 7YjShCqmNpQS1PbhfB5aJMRiZU6CkEZ2ER9ewypEg3na5ShhWKiT++nd9jwn8b6R
 8nQyVCBFVS+dcJ3G8XBwikvKB2o1KGysDWI1NfJGhd4XxUMFWL8KVbjgnRIMuPfJ
 A3lvoIFgwTgZuoZA0DfGPnmXqryq+15ADpjK/4R4rllPEawvBcLMGngvyAv5LNT0
 I9yOjBIm8AXMMJf9rHJQlxwouE/QapwVH9rN0QZgRAhPsD5sAaMAD8FqZ11ovtlf
 VGsndZnv2LqIdnf1TV23IPTFoU58diStoYcuwvqqiWhBlMc2UYSNNlwGTeloZVK1
 /JFL9HZWLZzUfq+/cODsvHBJsVS5/X3FdYN1WDzHXSiiN8c/IbaYPduRpZdwbQ2q
 nWGep8z9QkihrSA1sjkfDrtBwA5EKC8U//M4ve7jf3xVv2mcjnSxYKlTrq0e4+q+
 +6F+CvApPavkK4hmdqKBlJeCQo7rhiaD9iSpaO4XQZ463TowoYzj5F3hNtYi/p7I
 +n63UPUhhh6hHAtXKSfpiRpsei4dtRgbtaHQ1m3Z5t35id/gqhA=
 =KkQf
 -----END PGP SIGNATURE-----

Merge tag 'x86_mm_for_v6.1_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 mm updates from Dave Hansen:
 "There are some small things here, plus one big one.

  The big one detected and refused to create W+X kernel mappings. This
  caused a bit of trouble and it is entirely disabled on 32-bit due to
  known unfixable EFI issues. It also oopsed on some systemd eBPF use,
  which kept some users from booting.

  The eBPF issue is fixed, but those troubles were caught relatively
  recently which made me nervous that there are more lurking. The final
  commit in here retains the warnings, but doesn't actually refuse to
  create W+X mappings.

  Summary:

   - Detect insecure W+X mappings and warn about them, including a few
     bug fixes and relaxing the enforcement

   - Do a long-overdue defconfig update and enabling W+X boot-time
     detection

   - Cleanup _PAGE_PSE handling (follow-up on an earlier bug)

   - Rename a change_page_attr function"

* tag 'x86_mm_for_v6.1_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/mm: Ease W^X enforcement back to just a warning
  x86/mm: Disable W^X detection and enforcement on 32-bit
  x86/mm: Add prot_sethuge() helper to abstract out _PAGE_PSE handling
  x86/mm/32: Fix W^X detection when page tables do not support NX
  x86/defconfig: Enable CONFIG_DEBUG_WX=y
  x86/defconfig: Refresh the defconfigs
  x86/mm: Refuse W^X violations
  x86/mm: Rename set_memory_present() to set_memory_p()
This commit is contained in:
Linus Torvalds 2022-10-10 17:13:07 -07:00
commit 70442fc54e
4 changed files with 64 additions and 15 deletions

View File

@ -27,7 +27,6 @@ CONFIG_CGROUP_MISC=y
CONFIG_CGROUP_DEBUG=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_KALLSYMS_ALL=y
# CONFIG_COMPAT_BRK is not set
CONFIG_PROFILING=y
CONFIG_SMP=y
CONFIG_HYPERVISOR_GUEST=y
@ -44,6 +43,7 @@ CONFIG_EFI_STUB=y
CONFIG_HZ_1000=y
CONFIG_KEXEC=y
CONFIG_CRASH_DUMP=y
# CONFIG_RETHUNK is not set
CONFIG_HIBERNATION=y
CONFIG_PM_DEBUG=y
CONFIG_PM_TRACE_RTC=y
@ -62,6 +62,7 @@ CONFIG_BLK_CGROUP_IOLATENCY=y
CONFIG_BLK_CGROUP_IOCOST=y
CONFIG_BLK_CGROUP_IOPRIO=y
CONFIG_BINFMT_MISC=y
# CONFIG_COMPAT_BRK is not set
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
@ -269,9 +270,10 @@ CONFIG_SECURITY_SELINUX=y
CONFIG_SECURITY_SELINUX_BOOTPARAM=y
CONFIG_SECURITY_SELINUX_DISABLE=y
CONFIG_PRINTK_TIME=y
CONFIG_DEBUG_KERNEL=y
CONFIG_FRAME_WARN=1024
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_WX=y
CONFIG_DEBUG_STACK_USAGE=y
# CONFIG_SCHED_DEBUG is not set
CONFIG_SCHEDSTATS=y

View File

@ -26,7 +26,6 @@ CONFIG_CGROUP_MISC=y
CONFIG_CGROUP_DEBUG=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_KALLSYMS_ALL=y
# CONFIG_COMPAT_BRK is not set
CONFIG_PROFILING=y
CONFIG_SMP=y
CONFIG_HYPERVISOR_GUEST=y
@ -62,6 +61,7 @@ CONFIG_BLK_CGROUP_IOLATENCY=y
CONFIG_BLK_CGROUP_IOCOST=y
CONFIG_BLK_CGROUP_IOPRIO=y
CONFIG_BINFMT_MISC=y
# CONFIG_COMPAT_BRK is not set
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
@ -267,8 +267,9 @@ CONFIG_SECURITY_SELINUX=y
CONFIG_SECURITY_SELINUX_BOOTPARAM=y
CONFIG_SECURITY_SELINUX_DISABLE=y
CONFIG_PRINTK_TIME=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_KERNEL=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_WX=y
CONFIG_DEBUG_STACK_USAGE=y
# CONFIG_SCHED_DEBUG is not set
CONFIG_SCHEDSTATS=y

View File

@ -90,6 +90,12 @@ DEFINE_ENTRY(pud, pud, init)
DEFINE_ENTRY(pmd, pmd, init)
DEFINE_ENTRY(pte, pte, init)
static inline pgprot_t prot_sethuge(pgprot_t prot)
{
WARN_ON_ONCE(pgprot_val(prot) & _PAGE_PAT);
return __pgprot(pgprot_val(prot) | _PAGE_PSE);
}
/*
* NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
@ -557,9 +563,8 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
if (page_size_mask & (1<<PG_LEVEL_2M)) {
pages++;
spin_lock(&init_mm.page_table_lock);
set_pte_init((pte_t *)pmd,
pfn_pte((paddr & PMD_MASK) >> PAGE_SHIFT,
__pgprot(pgprot_val(prot) | _PAGE_PSE)),
set_pmd_init(pmd,
pfn_pmd(paddr >> PAGE_SHIFT, prot_sethuge(prot)),
init);
spin_unlock(&init_mm.page_table_lock);
paddr_last = paddr_next;
@ -644,12 +649,8 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
if (page_size_mask & (1<<PG_LEVEL_1G)) {
pages++;
spin_lock(&init_mm.page_table_lock);
prot = __pgprot(pgprot_val(prot) | _PAGE_PSE);
set_pte_init((pte_t *)pud,
pfn_pte((paddr & PUD_MASK) >> PAGE_SHIFT,
prot),
set_pud_init(pud,
pfn_pud(paddr >> PAGE_SHIFT, prot_sethuge(prot)),
init);
spin_unlock(&init_mm.page_table_lock);
paddr_last = paddr_next;

View File

@ -579,6 +579,46 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long start,
return __pgprot(pgprot_val(prot) & ~forbidden);
}
/*
* Validate strict W^X semantics.
*/
static inline pgprot_t verify_rwx(pgprot_t old, pgprot_t new, unsigned long start,
unsigned long pfn, unsigned long npg)
{
unsigned long end;
/*
* 32-bit has some unfixable W+X issues, like EFI code
* and writeable data being in the same page. Disable
* detection and enforcement there.
*/
if (IS_ENABLED(CONFIG_X86_32))
return new;
/* Only verify when NX is supported: */
if (!(__supported_pte_mask & _PAGE_NX))
return new;
if (!((pgprot_val(old) ^ pgprot_val(new)) & (_PAGE_RW | _PAGE_NX)))
return new;
if ((pgprot_val(new) & (_PAGE_RW | _PAGE_NX)) != _PAGE_RW)
return new;
end = start + npg * PAGE_SIZE - 1;
WARN_ONCE(1, "CPA detected W^X violation: %016llx -> %016llx range: 0x%016lx - 0x%016lx PFN %lx\n",
(unsigned long long)pgprot_val(old),
(unsigned long long)pgprot_val(new),
start, end, pfn);
/*
* For now, allow all permission change attempts by returning the
* attempted permissions. This can 'return old' to actively
* refuse the permission change at a later time.
*/
return new;
}
/*
* Lookup the page table entry for a virtual address in a specific pgd.
* Return a pointer to the entry and the level of the mapping.
@ -885,6 +925,8 @@ static int __should_split_large_page(pte_t *kpte, unsigned long address,
new_prot = static_protections(req_prot, lpaddr, old_pfn, numpages,
psize, CPA_DETECT);
new_prot = verify_rwx(old_prot, new_prot, lpaddr, old_pfn, numpages);
/*
* If there is a conflict, split the large page.
*
@ -1525,6 +1567,7 @@ repeat:
if (level == PG_LEVEL_4K) {
pte_t new_pte;
pgprot_t old_prot = pte_pgprot(old_pte);
pgprot_t new_prot = pte_pgprot(old_pte);
unsigned long pfn = pte_pfn(old_pte);
@ -1536,6 +1579,8 @@ repeat:
new_prot = static_protections(new_prot, address, pfn, 1, 0,
CPA_PROTECT);
new_prot = verify_rwx(old_prot, new_prot, address, pfn, 1);
new_prot = pgprot_clear_protnone_bits(new_prot);
/*
@ -1944,7 +1989,7 @@ int set_mce_nospec(unsigned long pfn)
return rc;
}
static int set_memory_present(unsigned long *addr, int numpages)
static int set_memory_p(unsigned long *addr, int numpages)
{
return change_page_attr_set(addr, numpages, __pgprot(_PAGE_PRESENT), 0);
}
@ -1954,7 +1999,7 @@ int clear_mce_nospec(unsigned long pfn)
{
unsigned long addr = (unsigned long) pfn_to_kaddr(pfn);
return set_memory_present(&addr, 1);
return set_memory_p(&addr, 1);
}
EXPORT_SYMBOL_GPL(clear_mce_nospec);
#endif /* CONFIG_X86_64 */