More consolidation and correctness fixes for the debug exception:

- Ensure BTF synchronization under all circumstances
 
    - Distangle kernel and user mode #DB further
 
    - Get ordering vs. the debug notifier correct to make KGDB work more
      reliably.
 
    - Cleanup historical gunk and make the code simpler to understand.
 -----BEGIN PGP SIGNATURE-----
 
 iQJHBAABCgAxFiEEQp8+kY+LLUocC4bMphj1TA10mKEFAl+ElmQTHHRnbHhAbGlu
 dXRyb25peC5kZQAKCRCmGPVMDXSYoThRD/4zTPn3NXQEY5VFi2CqhGjrm1Z6mKZ5
 dDfJDwQ5RByJhzehMy9guZ7QKZcUa7zikI/NHTz4FfcweIyaTY9ypJdo0hdY6DqW
 NaUB2zo+AoPDH6/GnxN5pZS4Y3wB2nQRqr0xjYHmbHcv796LRhccZiRb2zVeIpNF
 J9uc92p273ygNRq475TBIxmzxVOOq0K8SDupFN6lxZbibkSUFzYMX6mfT0e8UMG0
 fRQshModZSbQ+VZ26kysdXL2xN3CgLpK/ZuRJSFEFQKLzcmRzPRAYm3m1eZAeKMr
 dtwnScOWCOBDC+/lFuTA3ZOOV06lfGz0tsfmv0lTlg+NzH37weXUeiiBYdQqMobs
 4Iy22pzVODVFB2K+Qqh4RM1ZaukRm9hBCqFq8kWedafNCq7sJEZlaeNkR5HLCdnW
 A0I4SL+XHgsY3Hg3U41tFgrRMcOdKML6BWsmKB65G3S3baxvebZIOc/1CuFiIaQz
 ggN/fhKNclKpGYp+PXgwZQs9cEDeJcI6W2epZdWKjCvi2IE4zJ38liNsg+GeIswD
 kBQFSgx9bogB6BPfuK36AiKzQVzgottE2zhVuXz35J03Ixvu5WhmyNIIyCOzVjHD
 R/GSBarEVsL6N7/j4RenRaAIgsxajKljFUXEVvdtr6YAIje089PcuUmBVnkKeIUr
 HQe/lZcIflg48Q==
 =nIzH
 -----END PGP SIGNATURE-----

Merge tag 'x86-entry-2020-10-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 entry code updates from Thomas Gleixner:
 "More consolidation and correctness fixes for the debug exception:

   - Ensure BTF synchronization under all circumstances

   - Distangle kernel and user mode #DB further

   - Get ordering vs. the debug notifier correct to make KGDB work more
     reliably.

   - Cleanup historical gunk and make the code simpler to understand"

* tag 'x86-entry-2020-10-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/debug: Change thread.debugreg6 to thread.virtual_dr6
  x86/debug: Support negative polarity DR6 bits
  x86/debug: Simplify hw_breakpoint_handler()
  x86/debug: Remove aout_dump_debugregs()
  x86/debug: Remove the historical junk
  x86/debug: Move cond_local_irq_enable() block into exc_debug_user()
  x86/debug: Move historical SYSENTER junk into exc_debug_kernel()
  x86/debug: Simplify #DB signal code
  x86/debug: Remove handle_debug(.user) argument
  x86/debug: Move kprobe_debug_handler() into exc_debug_kernel()
  x86/debug: Sync BTF earlier
This commit is contained in:
Linus Torvalds 2020-10-12 12:05:24 -07:00
commit 13cb73490f
7 changed files with 94 additions and 129 deletions

View File

@ -90,8 +90,6 @@ static __always_inline bool hw_breakpoint_active(void)
return __this_cpu_read(cpu_dr7) & DR_GLOBAL_ENABLE_MASK;
}
extern void aout_dump_debugregs(struct user *dump);
extern void hw_breakpoint_restore(void);
static __always_inline unsigned long local_db_save(void)

View File

@ -106,5 +106,9 @@ extern int kprobe_exceptions_notify(struct notifier_block *self,
extern int kprobe_int3_handler(struct pt_regs *regs);
extern int kprobe_debug_handler(struct pt_regs *regs);
#else
static inline int kprobe_debug_handler(struct pt_regs *regs) { return 0; }
#endif /* CONFIG_KPROBES */
#endif /* _ASM_X86_KPROBES_H */

View File

@ -517,7 +517,7 @@ struct thread_struct {
/* Save middle states of ptrace breakpoints */
struct perf_event *ptrace_bps[HBP_NUM];
/* Debug status used for traps, single steps, etc... */
unsigned long debugreg6;
unsigned long virtual_dr6;
/* Keep track of the exact dr7 value set by the user */
unsigned long ptrace_dr7;
/* Fault info: */

View File

@ -441,42 +441,6 @@ int hw_breakpoint_arch_parse(struct perf_event *bp,
return 0;
}
/*
* Dump the debug register contents to the user.
* We can't dump our per cpu values because it
* may contain cpu wide breakpoint, something that
* doesn't belong to the current task.
*
* TODO: include non-ptrace user breakpoints (perf)
*/
void aout_dump_debugregs(struct user *dump)
{
int i;
int dr7 = 0;
struct perf_event *bp;
struct arch_hw_breakpoint *info;
struct thread_struct *thread = &current->thread;
for (i = 0; i < HBP_NUM; i++) {
bp = thread->ptrace_bps[i];
if (bp && !bp->attr.disabled) {
dump->u_debugreg[i] = bp->attr.bp_addr;
info = counter_arch_bp(bp);
dr7 |= encode_dr7(i, info->len, info->type);
} else {
dump->u_debugreg[i] = 0;
}
}
dump->u_debugreg[4] = 0;
dump->u_debugreg[5] = 0;
dump->u_debugreg[6] = current->thread.debugreg6;
dump->u_debugreg[7] = dr7;
}
EXPORT_SYMBOL_GPL(aout_dump_debugregs);
/*
* Release the user breakpoints used by ptrace
*/
@ -490,7 +454,7 @@ void flush_ptrace_hw_breakpoint(struct task_struct *tsk)
t->ptrace_bps[i] = NULL;
}
t->debugreg6 = 0;
t->virtual_dr6 = 0;
t->ptrace_dr7 = 0;
}
@ -500,7 +464,7 @@ void hw_breakpoint_restore(void)
set_debugreg(__this_cpu_read(cpu_debugreg[1]), 1);
set_debugreg(__this_cpu_read(cpu_debugreg[2]), 2);
set_debugreg(__this_cpu_read(cpu_debugreg[3]), 3);
set_debugreg(current->thread.debugreg6, 6);
set_debugreg(DR6_RESERVED, 6);
set_debugreg(__this_cpu_read(cpu_dr7), 7);
}
EXPORT_SYMBOL_GPL(hw_breakpoint_restore);
@ -523,10 +487,10 @@ EXPORT_SYMBOL_GPL(hw_breakpoint_restore);
*/
static int hw_breakpoint_handler(struct die_args *args)
{
int i, cpu, rc = NOTIFY_STOP;
int i, rc = NOTIFY_STOP;
struct perf_event *bp;
unsigned long dr6;
unsigned long *dr6_p;
unsigned long dr6;
/* The DR6 value is pointed by args->err */
dr6_p = (unsigned long *)ERR_PTR(args->err);
@ -540,14 +504,6 @@ static int hw_breakpoint_handler(struct die_args *args)
if ((dr6 & DR_TRAP_BITS) == 0)
return NOTIFY_DONE;
/*
* Assert that local interrupts are disabled
* Reset the DRn bits in the virtualized register value.
* The ptrace trigger routine will add in whatever is needed.
*/
current->thread.debugreg6 &= ~DR_TRAP_BITS;
cpu = get_cpu();
/* Handle all the breakpoints that were triggered */
for (i = 0; i < HBP_NUM; ++i) {
if (likely(!(dr6 & (DR_TRAP0 << i))))
@ -561,7 +517,7 @@ static int hw_breakpoint_handler(struct die_args *args)
*/
rcu_read_lock();
bp = per_cpu(bp_per_reg[i], cpu);
bp = this_cpu_read(bp_per_reg[i]);
/*
* Reset the 'i'th TRAP bit in dr6 to denote completion of
* exception handling
@ -592,12 +548,10 @@ static int hw_breakpoint_handler(struct die_args *args)
* breakpoints (to generate signals) and b) when the system has
* taken exception due to multiple causes
*/
if ((current->thread.debugreg6 & DR_TRAP_BITS) ||
if ((current->thread.virtual_dr6 & DR_TRAP_BITS) ||
(dr6 & (~DR_TRAP_BITS)))
rc = NOTIFY_DONE;
put_cpu();
return rc;
}

View File

@ -629,9 +629,10 @@ static void kgdb_hw_overflow_handler(struct perf_event *event,
struct task_struct *tsk = current;
int i;
for (i = 0; i < 4; i++)
for (i = 0; i < 4; i++) {
if (breakinfo[i].enabled)
tsk->thread.debugreg6 |= (DR_TRAP0 << i);
tsk->thread.virtual_dr6 |= (DR_TRAP0 << i);
}
}
void kgdb_arch_late(void)

View File

@ -465,7 +465,7 @@ static void ptrace_triggered(struct perf_event *bp,
break;
}
thread->debugreg6 |= (DR_TRAP0 << i);
thread->virtual_dr6 |= (DR_TRAP0 << i);
}
/*
@ -601,7 +601,7 @@ static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n)
if (bp)
val = bp->hw.info.address;
} else if (n == 6) {
val = thread->debugreg6;
val = thread->virtual_dr6 ^ DR6_RESERVED; /* Flip back to arch polarity */
} else if (n == 7) {
val = thread->ptrace_dr7;
}
@ -657,7 +657,7 @@ static int ptrace_set_debugreg(struct task_struct *tsk, int n,
if (n < HBP_NUM) {
rc = ptrace_set_breakpoint_addr(tsk, n, val);
} else if (n == 6) {
thread->debugreg6 = val;
thread->virtual_dr6 = val ^ DR6_RESERVED; /* Flip to positive polarity */
rc = 0;
} else if (n == 7) {
rc = ptrace_write_dr7(tsk, val);

View File

@ -745,9 +745,21 @@ static __always_inline unsigned long debug_read_clear_dr6(void)
* Keep it simple: clear DR6 immediately.
*/
get_debugreg(dr6, 6);
set_debugreg(0, 6);
/* Filter out all the reserved bits which are preset to 1 */
dr6 &= ~DR6_RESERVED;
set_debugreg(DR6_RESERVED, 6);
dr6 ^= DR6_RESERVED; /* Flip to positive polarity */
/*
* Clear the virtual DR6 value, ptrace routines will set bits here for
* things we want signals for.
*/
current->thread.virtual_dr6 = 0;
/*
* The SDM says "The processor clears the BTF flag when it
* generates a debug exception." Clear TIF_BLOCKSTEP to keep
* TIF_BLOCKSTEP in sync with the hardware BTF flag.
*/
clear_thread_flag(TIF_BLOCKSTEP);
return dr6;
}
@ -776,74 +788,20 @@ static __always_inline unsigned long debug_read_clear_dr6(void)
*
* May run on IST stack.
*/
static void handle_debug(struct pt_regs *regs, unsigned long dr6, bool user)
static bool notify_debug(struct pt_regs *regs, unsigned long *dr6)
{
struct task_struct *tsk = current;
bool user_icebp;
int si_code;
/*
* The SDM says "The processor clears the BTF flag when it
* generates a debug exception." Clear TIF_BLOCKSTEP to keep
* TIF_BLOCKSTEP in sync with the hardware BTF flag.
* Notifiers will clear bits in @dr6 to indicate the event has been
* consumed - hw_breakpoint_handler(), single_stop_cont().
*
* Notifiers will set bits in @virtual_dr6 to indicate the desire
* for signals - ptrace_triggered(), kgdb_hw_overflow_handler().
*/
clear_thread_flag(TIF_BLOCKSTEP);
if (notify_die(DIE_DEBUG, "debug", regs, (long)dr6, 0, SIGTRAP) == NOTIFY_STOP)
return true;
/*
* If DR6 is zero, no point in trying to handle it. The kernel is
* not using INT1.
*/
if (!user && !dr6)
return;
/*
* If dr6 has no reason to give us about the origin of this trap,
* then it's very likely the result of an icebp/int01 trap.
* User wants a sigtrap for that.
*/
user_icebp = user && !dr6;
/* Store the virtualized DR6 value */
tsk->thread.debugreg6 = dr6;
#ifdef CONFIG_KPROBES
if (kprobe_debug_handler(regs)) {
return;
}
#endif
if (notify_die(DIE_DEBUG, "debug", regs, (long)&dr6, 0,
SIGTRAP) == NOTIFY_STOP) {
return;
}
/* It's safe to allow irq's after DR6 has been saved */
cond_local_irq_enable(regs);
if (v8086_mode(regs)) {
handle_vm86_trap((struct kernel_vm86_regs *) regs, 0,
X86_TRAP_DB);
goto out;
}
if (WARN_ON_ONCE((dr6 & DR_STEP) && !user_mode(regs))) {
/*
* Historical junk that used to handle SYSENTER single-stepping.
* This should be unreachable now. If we survive for a while
* without anyone hitting this warning, we'll turn this into
* an oops.
*/
tsk->thread.debugreg6 &= ~DR_STEP;
set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
regs->flags &= ~X86_EFLAGS_TF;
}
si_code = get_si_code(tsk->thread.debugreg6);
if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp)
send_sigtrap(regs, 0, si_code);
out:
cond_local_irq_disable(regs);
return false;
}
static __always_inline void exc_debug_kernel(struct pt_regs *regs,
@ -877,8 +835,32 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs,
if ((dr6 & DR_STEP) && is_sysenter_singlestep(regs))
dr6 &= ~DR_STEP;
handle_debug(regs, dr6, false);
if (kprobe_debug_handler(regs))
goto out;
/*
* The kernel doesn't use INT1
*/
if (!dr6)
goto out;
if (notify_debug(regs, &dr6))
goto out;
/*
* The kernel doesn't use TF single-step outside of:
*
* - Kprobes, consumed through kprobe_debug_handler()
* - KGDB, consumed through notify_debug()
*
* So if we get here with DR_STEP set, something is wonky.
*
* A known way to trigger this is through QEMU's GDB stub,
* which leaks #DB into the guest and causes IST recursion.
*/
if (WARN_ON_ONCE(dr6 & DR_STEP))
regs->flags &= ~X86_EFLAGS_TF;
out:
instrumentation_end();
idtentry_exit_nmi(regs, irq_state);
@ -888,6 +870,8 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs,
static __always_inline void exc_debug_user(struct pt_regs *regs,
unsigned long dr6)
{
bool icebp;
/*
* If something gets miswired and we end up here for a kernel mode
* #DB, we will malfunction.
@ -906,8 +890,32 @@ static __always_inline void exc_debug_user(struct pt_regs *regs,
irqentry_enter_from_user_mode(regs);
instrumentation_begin();
handle_debug(regs, dr6, true);
/*
* If dr6 has no reason to give us about the origin of this trap,
* then it's very likely the result of an icebp/int01 trap.
* User wants a sigtrap for that.
*/
icebp = !dr6;
if (notify_debug(regs, &dr6))
goto out;
/* It's safe to allow irq's after DR6 has been saved */
local_irq_enable();
if (v8086_mode(regs)) {
handle_vm86_trap((struct kernel_vm86_regs *)regs, 0, X86_TRAP_DB);
goto out_irq;
}
/* Add the virtual_dr6 bits for signals. */
dr6 |= current->thread.virtual_dr6;
if (dr6 & (DR_STEP | DR_TRAP_BITS) || icebp)
send_sigtrap(regs, 0, get_si_code(dr6));
out_irq:
local_irq_disable();
out:
instrumentation_end();
irqentry_exit_to_user_mode(regs);
}