1de4fa14ee
The previous patch allocates bounds tables on-demand. As noted in an earlier description, these can add up to *HUGE* amounts of memory. This has caused OOMs in practice when running tests. This patch adds support for freeing bounds tables when they are no longer in use. There are two types of mappings in play when unmapping tables: 1. The mapping with the actual data, which userspace is munmap()ing or brk()ing away, etc... 2. The mapping for the bounds table *backing* the data (is tagged with VM_MPX, see the patch "add MPX specific mmap interface"). If userspace use the prctl() indroduced earlier in this patchset to enable the management of bounds tables in kernel, when it unmaps the first type of mapping with the actual data, the kernel needs to free the mapping for the bounds table backing the data. This patch hooks in at the very end of do_unmap() to do so. We look at the addresses being unmapped and find the bounds directory entries and tables which cover those addresses. If an entire table is unused, we clear associated directory entry and free the table. Once we unmap the bounds table, we would have a bounds directory entry pointing at empty address space. That address space might now be allocated for some other (random) use, and the MPX hardware might now try to walk it as if it were a bounds table. That would be bad. So any unmapping of an enture bounds table has to be accompanied by a corresponding write to the bounds directory entry to invalidate it. That write to the bounds directory can fault, which causes the following problem: Since we are doing the freeing from munmap() (and other paths like it), we hold mmap_sem for write. If we fault, the page fault handler will attempt to acquire mmap_sem for read and we will deadlock. To avoid the deadlock, we pagefault_disable() when touching the bounds directory entry and use a get_user_pages() to resolve the fault. The unmapping of bounds tables happends under vm_munmap(). We also (indirectly) call vm_munmap() to _do_ the unmapping of the bounds tables. We avoid unbounded recursion by disallowing freeing of bounds tables *for* bounds tables. This would not occur normally, so should not have any practical impact. Being strict about it here helps ensure that we do not have an exploitable stack overflow. Based-on-patch-by: Qiaowei Ren <qiaowei.ren@intel.com> Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com> Cc: linux-mm@kvack.org Cc: linux-mips@linux-mips.org Cc: Dave Hansen <dave@sr71.net> Link: http://lkml.kernel.org/r/20141114151831.E4531C4A@viggo.jf.intel.com Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
104 lines
2.9 KiB
C
104 lines
2.9 KiB
C
#ifndef _ASM_X86_MPX_H
|
|
#define _ASM_X86_MPX_H
|
|
|
|
#include <linux/types.h>
|
|
#include <asm/ptrace.h>
|
|
#include <asm/insn.h>
|
|
|
|
/*
|
|
* NULL is theoretically a valid place to put the bounds
|
|
* directory, so point this at an invalid address.
|
|
*/
|
|
#define MPX_INVALID_BOUNDS_DIR ((void __user *)-1)
|
|
#define MPX_BNDCFG_ENABLE_FLAG 0x1
|
|
#define MPX_BD_ENTRY_VALID_FLAG 0x1
|
|
|
|
#ifdef CONFIG_X86_64
|
|
|
|
/* upper 28 bits [47:20] of the virtual address in 64-bit used to
|
|
* index into bounds directory (BD).
|
|
*/
|
|
#define MPX_BD_ENTRY_OFFSET 28
|
|
#define MPX_BD_ENTRY_SHIFT 3
|
|
/* bits [19:3] of the virtual address in 64-bit used to index into
|
|
* bounds table (BT).
|
|
*/
|
|
#define MPX_BT_ENTRY_OFFSET 17
|
|
#define MPX_BT_ENTRY_SHIFT 5
|
|
#define MPX_IGN_BITS 3
|
|
#define MPX_BD_ENTRY_TAIL 3
|
|
|
|
#else
|
|
|
|
#define MPX_BD_ENTRY_OFFSET 20
|
|
#define MPX_BD_ENTRY_SHIFT 2
|
|
#define MPX_BT_ENTRY_OFFSET 10
|
|
#define MPX_BT_ENTRY_SHIFT 4
|
|
#define MPX_IGN_BITS 2
|
|
#define MPX_BD_ENTRY_TAIL 2
|
|
|
|
#endif
|
|
|
|
#define MPX_BD_SIZE_BYTES (1UL<<(MPX_BD_ENTRY_OFFSET+MPX_BD_ENTRY_SHIFT))
|
|
#define MPX_BT_SIZE_BYTES (1UL<<(MPX_BT_ENTRY_OFFSET+MPX_BT_ENTRY_SHIFT))
|
|
|
|
#define MPX_BNDSTA_TAIL 2
|
|
#define MPX_BNDCFG_TAIL 12
|
|
#define MPX_BNDSTA_ADDR_MASK (~((1UL<<MPX_BNDSTA_TAIL)-1))
|
|
#define MPX_BNDCFG_ADDR_MASK (~((1UL<<MPX_BNDCFG_TAIL)-1))
|
|
#define MPX_BT_ADDR_MASK (~((1UL<<MPX_BD_ENTRY_TAIL)-1))
|
|
|
|
#define MPX_BNDCFG_ADDR_MASK (~((1UL<<MPX_BNDCFG_TAIL)-1))
|
|
#define MPX_BNDSTA_ERROR_CODE 0x3
|
|
|
|
#define MPX_BD_ENTRY_MASK ((1<<MPX_BD_ENTRY_OFFSET)-1)
|
|
#define MPX_BT_ENTRY_MASK ((1<<MPX_BT_ENTRY_OFFSET)-1)
|
|
#define MPX_GET_BD_ENTRY_OFFSET(addr) ((((addr)>>(MPX_BT_ENTRY_OFFSET+ \
|
|
MPX_IGN_BITS)) & MPX_BD_ENTRY_MASK) << MPX_BD_ENTRY_SHIFT)
|
|
#define MPX_GET_BT_ENTRY_OFFSET(addr) ((((addr)>>MPX_IGN_BITS) & \
|
|
MPX_BT_ENTRY_MASK) << MPX_BT_ENTRY_SHIFT)
|
|
|
|
#ifdef CONFIG_X86_INTEL_MPX
|
|
siginfo_t *mpx_generate_siginfo(struct pt_regs *regs,
|
|
struct xsave_struct *xsave_buf);
|
|
int mpx_handle_bd_fault(struct xsave_struct *xsave_buf);
|
|
static inline int kernel_managing_mpx_tables(struct mm_struct *mm)
|
|
{
|
|
return (mm->bd_addr != MPX_INVALID_BOUNDS_DIR);
|
|
}
|
|
static inline void mpx_mm_init(struct mm_struct *mm)
|
|
{
|
|
/*
|
|
* NULL is theoretically a valid place to put the bounds
|
|
* directory, so point this at an invalid address.
|
|
*/
|
|
mm->bd_addr = MPX_INVALID_BOUNDS_DIR;
|
|
}
|
|
void mpx_notify_unmap(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
unsigned long start, unsigned long end);
|
|
#else
|
|
static inline siginfo_t *mpx_generate_siginfo(struct pt_regs *regs,
|
|
struct xsave_struct *xsave_buf)
|
|
{
|
|
return NULL;
|
|
}
|
|
static inline int mpx_handle_bd_fault(struct xsave_struct *xsave_buf)
|
|
{
|
|
return -EINVAL;
|
|
}
|
|
static inline int kernel_managing_mpx_tables(struct mm_struct *mm)
|
|
{
|
|
return 0;
|
|
}
|
|
static inline void mpx_mm_init(struct mm_struct *mm)
|
|
{
|
|
}
|
|
static inline void mpx_notify_unmap(struct mm_struct *mm,
|
|
struct vm_area_struct *vma,
|
|
unsigned long start, unsigned long end)
|
|
{
|
|
}
|
|
#endif /* CONFIG_X86_INTEL_MPX */
|
|
|
|
#endif /* _ASM_X86_MPX_H */
|