Frans Pop reported the crash below when running an s390 kernel under Hercules: Kernel BUG at 000738b4 verbose debug info unavailable! fixpoint divide exception: 0009 #1! SMP Modules linked in: nfs lockd nfs_acl sunrpc ctcm fsm tape_34xx cu3088 tape ccwgroup tape_class ext3 jbd mbcache dm_mirror dm_log dm_snapshot dm_mod dasd_eckd_mod dasd_mod CPU: 0 Not tainted 2.6.27.19 #13 Process awk (pid: 2069, task: 0f9ed9b8, ksp: 0f4f7d18) Krnl PSW : 070c1000 800738b4 (acct_update_integrals+0x4c/0x118) R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:0 CC:1 PM:0 Krnl GPRS: 00000000 000007d0 7fffffff fffff830 00000000 ffffffff 00000002 0f9ed9b8 00000000 00008ca0 00000000 0f9ed9b8 0f9edda4 8007386e 0f4f7ec8 0f4f7e98 Krnl Code: 800738aa: a71807d0 lhi %r1,2000 800738ae: 8c200001 srdl %r2,1 800738b2: 1d21 dr %r2,%r1 >800738b4: 5810d10e l %r1,270(%r13) 800738b8: 1823 lr %r2,%r3 800738ba: 4130f060 la %r3,96(%r15) 800738be: 0de1 basr %r14,%r1 800738c0: 5800f060 l %r0,96(%r15) Call Trace: ( <000000000004fdea>! blocking_notifier_call_chain+0x1e/0x2c) <0000000000038502>! do_exit+0x106/0x7c0 <0000000000038c36>! do_group_exit+0x7a/0xb4 <0000000000038c8e>! SyS_exit_group+0x1e/0x30 <0000000000021c28>! sysc_do_restart+0x12/0x16 <0000000077e7e924>! 0x77e7e924 Reason for this is that cpu time accounting usually only happens from interrupt context, but acct_update_integrals gets also called from process context with interrupts enabled. So in acct_update_integrals we may end up with the following scenario: Between reading tsk->stime/tsk->utime and tsk->acct_timexpd an interrupt happens which updates accouting values. This causes acct_timexpd to be greater than the former stime + utime. The subsequent calculation of dtime = cputime_sub(time, tsk->acct_timexpd); will be negative and the division performed by cputime_to_jiffies(dtime) will generate an exception since the result won't fit into a 32 bit register. In order to fix this just always disable interrupts while accessing any of the accounting values. Reported by: Frans Pop <elendil@planet.nl> Tested by: Frans Pop <elendil@planet.nl> Cc: stable@kernel.org Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
		
			
				
	
	
		
			156 lines
		
	
	
		
			4.2 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			156 lines
		
	
	
		
			4.2 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
|  * tsacct.c - System accounting over taskstats interface
 | |
|  *
 | |
|  * Copyright (C) Jay Lan,	<jlan@sgi.com>
 | |
|  *
 | |
|  *
 | |
|  * This program is free software; you can redistribute it and/or modify
 | |
|  * it under the terms of the GNU General Public License as published by
 | |
|  * the Free Software Foundation; either version 2 of the License, or
 | |
|  * (at your option) any later version.
 | |
|  *
 | |
|  * This program is distributed in the hope that it will be useful,
 | |
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | |
|  * GNU General Public License for more details.
 | |
|  *
 | |
|  */
 | |
| 
 | |
| #include <linux/kernel.h>
 | |
| #include <linux/sched.h>
 | |
| #include <linux/tsacct_kern.h>
 | |
| #include <linux/acct.h>
 | |
| #include <linux/jiffies.h>
 | |
| 
 | |
| /*
 | |
|  * fill in basic accounting fields
 | |
|  */
 | |
| void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk)
 | |
| {
 | |
| 	const struct cred *tcred;
 | |
| 	struct timespec uptime, ts;
 | |
| 	u64 ac_etime;
 | |
| 
 | |
| 	BUILD_BUG_ON(TS_COMM_LEN < TASK_COMM_LEN);
 | |
| 
 | |
| 	/* calculate task elapsed time in timespec */
 | |
| 	do_posix_clock_monotonic_gettime(&uptime);
 | |
| 	ts = timespec_sub(uptime, tsk->start_time);
 | |
| 	/* rebase elapsed time to usec (should never be negative) */
 | |
| 	ac_etime = timespec_to_ns(&ts);
 | |
| 	do_div(ac_etime, NSEC_PER_USEC);
 | |
| 	stats->ac_etime = ac_etime;
 | |
| 	stats->ac_btime = get_seconds() - ts.tv_sec;
 | |
| 	if (thread_group_leader(tsk)) {
 | |
| 		stats->ac_exitcode = tsk->exit_code;
 | |
| 		if (tsk->flags & PF_FORKNOEXEC)
 | |
| 			stats->ac_flag |= AFORK;
 | |
| 	}
 | |
| 	if (tsk->flags & PF_SUPERPRIV)
 | |
| 		stats->ac_flag |= ASU;
 | |
| 	if (tsk->flags & PF_DUMPCORE)
 | |
| 		stats->ac_flag |= ACORE;
 | |
| 	if (tsk->flags & PF_SIGNALED)
 | |
| 		stats->ac_flag |= AXSIG;
 | |
| 	stats->ac_nice	 = task_nice(tsk);
 | |
| 	stats->ac_sched	 = tsk->policy;
 | |
| 	stats->ac_pid	 = tsk->pid;
 | |
| 	rcu_read_lock();
 | |
| 	tcred = __task_cred(tsk);
 | |
| 	stats->ac_uid	 = tcred->uid;
 | |
| 	stats->ac_gid	 = tcred->gid;
 | |
| 	stats->ac_ppid	 = pid_alive(tsk) ?
 | |
| 				rcu_dereference(tsk->real_parent)->tgid : 0;
 | |
| 	rcu_read_unlock();
 | |
| 	stats->ac_utime	 = cputime_to_msecs(tsk->utime) * USEC_PER_MSEC;
 | |
| 	stats->ac_stime	 = cputime_to_msecs(tsk->stime) * USEC_PER_MSEC;
 | |
| 	stats->ac_utimescaled =
 | |
| 		cputime_to_msecs(tsk->utimescaled) * USEC_PER_MSEC;
 | |
| 	stats->ac_stimescaled =
 | |
| 		cputime_to_msecs(tsk->stimescaled) * USEC_PER_MSEC;
 | |
| 	stats->ac_minflt = tsk->min_flt;
 | |
| 	stats->ac_majflt = tsk->maj_flt;
 | |
| 
 | |
| 	strncpy(stats->ac_comm, tsk->comm, sizeof(stats->ac_comm));
 | |
| }
 | |
| 
 | |
| 
 | |
| #ifdef CONFIG_TASK_XACCT
 | |
| 
 | |
| #define KB 1024
 | |
| #define MB (1024*KB)
 | |
| /*
 | |
|  * fill in extended accounting fields
 | |
|  */
 | |
| void xacct_add_tsk(struct taskstats *stats, struct task_struct *p)
 | |
| {
 | |
| 	struct mm_struct *mm;
 | |
| 
 | |
| 	/* convert pages-usec to Mbyte-usec */
 | |
| 	stats->coremem = p->acct_rss_mem1 * PAGE_SIZE / MB;
 | |
| 	stats->virtmem = p->acct_vm_mem1 * PAGE_SIZE / MB;
 | |
| 	mm = get_task_mm(p);
 | |
| 	if (mm) {
 | |
| 		/* adjust to KB unit */
 | |
| 		stats->hiwater_rss   = get_mm_hiwater_rss(mm) * PAGE_SIZE / KB;
 | |
| 		stats->hiwater_vm    = get_mm_hiwater_vm(mm)  * PAGE_SIZE / KB;
 | |
| 		mmput(mm);
 | |
| 	}
 | |
| 	stats->read_char	= p->ioac.rchar;
 | |
| 	stats->write_char	= p->ioac.wchar;
 | |
| 	stats->read_syscalls	= p->ioac.syscr;
 | |
| 	stats->write_syscalls	= p->ioac.syscw;
 | |
| #ifdef CONFIG_TASK_IO_ACCOUNTING
 | |
| 	stats->read_bytes	= p->ioac.read_bytes;
 | |
| 	stats->write_bytes	= p->ioac.write_bytes;
 | |
| 	stats->cancelled_write_bytes = p->ioac.cancelled_write_bytes;
 | |
| #else
 | |
| 	stats->read_bytes	= 0;
 | |
| 	stats->write_bytes	= 0;
 | |
| 	stats->cancelled_write_bytes = 0;
 | |
| #endif
 | |
| }
 | |
| #undef KB
 | |
| #undef MB
 | |
| 
 | |
| /**
 | |
|  * acct_update_integrals - update mm integral fields in task_struct
 | |
|  * @tsk: task_struct for accounting
 | |
|  */
 | |
| void acct_update_integrals(struct task_struct *tsk)
 | |
| {
 | |
| 	if (likely(tsk->mm)) {
 | |
| 		cputime_t time, dtime;
 | |
| 		struct timeval value;
 | |
| 		unsigned long flags;
 | |
| 		u64 delta;
 | |
| 
 | |
| 		local_irq_save(flags);
 | |
| 		time = tsk->stime + tsk->utime;
 | |
| 		dtime = cputime_sub(time, tsk->acct_timexpd);
 | |
| 		jiffies_to_timeval(cputime_to_jiffies(dtime), &value);
 | |
| 		delta = value.tv_sec;
 | |
| 		delta = delta * USEC_PER_SEC + value.tv_usec;
 | |
| 
 | |
| 		if (delta == 0)
 | |
| 			goto out;
 | |
| 		tsk->acct_timexpd = time;
 | |
| 		tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm);
 | |
| 		tsk->acct_vm_mem1 += delta * tsk->mm->total_vm;
 | |
| 	out:
 | |
| 		local_irq_restore(flags);
 | |
| 	}
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * acct_clear_integrals - clear the mm integral fields in task_struct
 | |
|  * @tsk: task_struct whose accounting fields are cleared
 | |
|  */
 | |
| void acct_clear_integrals(struct task_struct *tsk)
 | |
| {
 | |
| 	tsk->acct_timexpd = 0;
 | |
| 	tsk->acct_rss_mem1 = 0;
 | |
| 	tsk->acct_vm_mem1 = 0;
 | |
| }
 | |
| #endif
 |