Merge branch 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
* 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (45 commits) rcu: Move propagation of ->completed from rcu_start_gp() to rcu_report_qs_rsp() rcu: Remove rcu_needs_cpu_flush() to avoid false quiescent states rcu: Wire up RCU_BOOST_PRIO for rcutree rcu: Make rcu_torture_boost() exit loops at end of test rcu: Make rcu_torture_fqs() exit loops at end of test rcu: Permit rt_mutex_unlock() with irqs disabled rcu: Avoid having just-onlined CPU resched itself when RCU is idle rcu: Suppress NMI backtraces when stall ends before dump rcu: Prohibit grace periods during early boot rcu: Simplify unboosting checks rcu: Prevent early boot set_need_resched() from __rcu_pending() rcu: Dump local stack if cannot dump all CPUs' stacks rcu: Move __rcu_read_unlock()'s barrier() within if-statement rcu: Improve rcu_assign_pointer() and RCU_INIT_POINTER() documentation rcu: Make rcu_assign_pointer() unconditionally insert a memory barrier rcu: Make rcu_implicit_dynticks_qs() locals be correct size rcu: Eliminate in_irq() checks in rcu_enter_nohz() nohz: Remove nohz_cpu_mask rcu: Document interpretation of RCU-lockdep splats rcu: Allow rcutorture's stat_interval parameter to be changed at runtime ...
This commit is contained in:
		
						commit
						19b4a8d520
					
				| @ -95,7 +95,7 @@ not to return until all ongoing NMI handlers exit.  It is therefore safe | ||||
| to free up the handler's data as soon as synchronize_sched() returns. | ||||
| 
 | ||||
| Important note: for this to work, the architecture in question must | ||||
| invoke irq_enter() and irq_exit() on NMI entry and exit, respectively. | ||||
| invoke nmi_enter() and nmi_exit() on NMI entry and exit, respectively. | ||||
| 
 | ||||
| 
 | ||||
| Answer to Quick Quiz | ||||
|  | ||||
							
								
								
									
										110
									
								
								Documentation/RCU/lockdep-splat.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										110
									
								
								Documentation/RCU/lockdep-splat.txt
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,110 @@ | ||||
| Lockdep-RCU was added to the Linux kernel in early 2010 | ||||
| (http://lwn.net/Articles/371986/).  This facility checks for some common | ||||
| misuses of the RCU API, most notably using one of the rcu_dereference() | ||||
| family to access an RCU-protected pointer without the proper protection. | ||||
| When such misuse is detected, an lockdep-RCU splat is emitted. | ||||
| 
 | ||||
| The usual cause of a lockdep-RCU slat is someone accessing an | ||||
| RCU-protected data structure without either (1) being in the right kind of | ||||
| RCU read-side critical section or (2) holding the right update-side lock. | ||||
| This problem can therefore be serious: it might result in random memory | ||||
| overwriting or worse.  There can of course be false positives, this | ||||
| being the real world and all that. | ||||
| 
 | ||||
| So let's look at an example RCU lockdep splat from 3.0-rc5, one that | ||||
| has long since been fixed: | ||||
| 
 | ||||
| =============================== | ||||
| [ INFO: suspicious RCU usage. ] | ||||
| ------------------------------- | ||||
| block/cfq-iosched.c:2776 suspicious rcu_dereference_protected() usage! | ||||
| 
 | ||||
| other info that might help us debug this: | ||||
| 
 | ||||
| 
 | ||||
| rcu_scheduler_active = 1, debug_locks = 0 | ||||
| 3 locks held by scsi_scan_6/1552: | ||||
|  #0:  (&shost->scan_mutex){+.+.+.}, at: [<ffffffff8145efca>] | ||||
| scsi_scan_host_selected+0x5a/0x150 | ||||
|  #1:  (&eq->sysfs_lock){+.+...}, at: [<ffffffff812a5032>] | ||||
| elevator_exit+0x22/0x60 | ||||
|  #2:  (&(&q->__queue_lock)->rlock){-.-...}, at: [<ffffffff812b6233>] | ||||
| cfq_exit_queue+0x43/0x190 | ||||
| 
 | ||||
| stack backtrace: | ||||
| Pid: 1552, comm: scsi_scan_6 Not tainted 3.0.0-rc5 #17 | ||||
| Call Trace: | ||||
|  [<ffffffff810abb9b>] lockdep_rcu_dereference+0xbb/0xc0 | ||||
|  [<ffffffff812b6139>] __cfq_exit_single_io_context+0xe9/0x120 | ||||
|  [<ffffffff812b626c>] cfq_exit_queue+0x7c/0x190 | ||||
|  [<ffffffff812a5046>] elevator_exit+0x36/0x60 | ||||
|  [<ffffffff812a802a>] blk_cleanup_queue+0x4a/0x60 | ||||
|  [<ffffffff8145cc09>] scsi_free_queue+0x9/0x10 | ||||
|  [<ffffffff81460944>] __scsi_remove_device+0x84/0xd0 | ||||
|  [<ffffffff8145dca3>] scsi_probe_and_add_lun+0x353/0xb10 | ||||
|  [<ffffffff817da069>] ? error_exit+0x29/0xb0 | ||||
|  [<ffffffff817d98ed>] ? _raw_spin_unlock_irqrestore+0x3d/0x80 | ||||
|  [<ffffffff8145e722>] __scsi_scan_target+0x112/0x680 | ||||
|  [<ffffffff812c690d>] ? trace_hardirqs_off_thunk+0x3a/0x3c | ||||
|  [<ffffffff817da069>] ? error_exit+0x29/0xb0 | ||||
|  [<ffffffff812bcc60>] ? kobject_del+0x40/0x40 | ||||
|  [<ffffffff8145ed16>] scsi_scan_channel+0x86/0xb0 | ||||
|  [<ffffffff8145f0b0>] scsi_scan_host_selected+0x140/0x150 | ||||
|  [<ffffffff8145f149>] do_scsi_scan_host+0x89/0x90 | ||||
|  [<ffffffff8145f170>] do_scan_async+0x20/0x160 | ||||
|  [<ffffffff8145f150>] ? do_scsi_scan_host+0x90/0x90 | ||||
|  [<ffffffff810975b6>] kthread+0xa6/0xb0 | ||||
|  [<ffffffff817db154>] kernel_thread_helper+0x4/0x10 | ||||
|  [<ffffffff81066430>] ? finish_task_switch+0x80/0x110 | ||||
|  [<ffffffff817d9c04>] ? retint_restore_args+0xe/0xe | ||||
|  [<ffffffff81097510>] ? __init_kthread_worker+0x70/0x70 | ||||
|  [<ffffffff817db150>] ? gs_change+0xb/0xb | ||||
| 
 | ||||
| Line 2776 of block/cfq-iosched.c in v3.0-rc5 is as follows: | ||||
| 
 | ||||
| 	if (rcu_dereference(ioc->ioc_data) == cic) { | ||||
| 
 | ||||
| This form says that it must be in a plain vanilla RCU read-side critical | ||||
| section, but the "other info" list above shows that this is not the | ||||
| case.  Instead, we hold three locks, one of which might be RCU related. | ||||
| And maybe that lock really does protect this reference.  If so, the fix | ||||
| is to inform RCU, perhaps by changing __cfq_exit_single_io_context() to | ||||
| take the struct request_queue "q" from cfq_exit_queue() as an argument, | ||||
| which would permit us to invoke rcu_dereference_protected as follows: | ||||
| 
 | ||||
| 	if (rcu_dereference_protected(ioc->ioc_data, | ||||
| 				      lockdep_is_held(&q->queue_lock)) == cic) { | ||||
| 
 | ||||
| With this change, there would be no lockdep-RCU splat emitted if this | ||||
| code was invoked either from within an RCU read-side critical section | ||||
| or with the ->queue_lock held.  In particular, this would have suppressed | ||||
| the above lockdep-RCU splat because ->queue_lock is held (see #2 in the | ||||
| list above). | ||||
| 
 | ||||
| On the other hand, perhaps we really do need an RCU read-side critical | ||||
| section.  In this case, the critical section must span the use of the | ||||
| return value from rcu_dereference(), or at least until there is some | ||||
| reference count incremented or some such.  One way to handle this is to | ||||
| add rcu_read_lock() and rcu_read_unlock() as follows: | ||||
| 
 | ||||
| 	rcu_read_lock(); | ||||
| 	if (rcu_dereference(ioc->ioc_data) == cic) { | ||||
| 		spin_lock(&ioc->lock); | ||||
| 		rcu_assign_pointer(ioc->ioc_data, NULL); | ||||
| 		spin_unlock(&ioc->lock); | ||||
| 	} | ||||
| 	rcu_read_unlock(); | ||||
| 
 | ||||
| With this change, the rcu_dereference() is always within an RCU | ||||
| read-side critical section, which again would have suppressed the | ||||
| above lockdep-RCU splat. | ||||
| 
 | ||||
| But in this particular case, we don't actually deference the pointer | ||||
| returned from rcu_dereference().  Instead, that pointer is just compared | ||||
| to the cic pointer, which means that the rcu_dereference() can be replaced | ||||
| by rcu_access_pointer() as follows: | ||||
| 
 | ||||
| 	if (rcu_access_pointer(ioc->ioc_data) == cic) { | ||||
| 
 | ||||
| Because it is legal to invoke rcu_access_pointer() without protection, | ||||
| this change would also suppress the above lockdep-RCU splat. | ||||
| @ -32,9 +32,27 @@ checking of rcu_dereference() primitives: | ||||
| 	srcu_dereference(p, sp): | ||||
| 		Check for SRCU read-side critical section. | ||||
| 	rcu_dereference_check(p, c): | ||||
| 		Use explicit check expression "c".  This is useful in | ||||
| 		code that is invoked by both readers and updaters. | ||||
| 	rcu_dereference_raw(p) | ||||
| 		Use explicit check expression "c" along with | ||||
| 		rcu_read_lock_held().  This is useful in code that is | ||||
| 		invoked by both RCU readers and updaters. | ||||
| 	rcu_dereference_bh_check(p, c): | ||||
| 		Use explicit check expression "c" along with | ||||
| 		rcu_read_lock_bh_held().  This is useful in code that | ||||
| 		is invoked by both RCU-bh readers and updaters. | ||||
| 	rcu_dereference_sched_check(p, c): | ||||
| 		Use explicit check expression "c" along with | ||||
| 		rcu_read_lock_sched_held().  This is useful in code that | ||||
| 		is invoked by both RCU-sched readers and updaters. | ||||
| 	srcu_dereference_check(p, c): | ||||
| 		Use explicit check expression "c" along with | ||||
| 		srcu_read_lock_held()().  This is useful in code that | ||||
| 		is invoked by both SRCU readers and updaters. | ||||
| 	rcu_dereference_index_check(p, c): | ||||
| 		Use explicit check expression "c", but the caller | ||||
| 		must supply one of the rcu_read_lock_held() functions. | ||||
| 		This is useful in code that uses RCU-protected arrays | ||||
| 		that is invoked by both RCU readers and updaters. | ||||
| 	rcu_dereference_raw(p): | ||||
| 		Don't check.  (Use sparingly, if at all.) | ||||
| 	rcu_dereference_protected(p, c): | ||||
| 		Use explicit check expression "c", and omit all barriers | ||||
| @ -48,13 +66,11 @@ checking of rcu_dereference() primitives: | ||||
| 		value of the pointer itself, for example, against NULL. | ||||
| 
 | ||||
| The rcu_dereference_check() check expression can be any boolean | ||||
| expression, but would normally include one of the rcu_read_lock_held() | ||||
| family of functions and a lockdep expression.  However, any boolean | ||||
| expression can be used.  For a moderately ornate example, consider | ||||
| the following: | ||||
| expression, but would normally include a lockdep expression.  However, | ||||
| any boolean expression can be used.  For a moderately ornate example, | ||||
| consider the following: | ||||
| 
 | ||||
| 	file = rcu_dereference_check(fdt->fd[fd], | ||||
| 				     rcu_read_lock_held() || | ||||
| 				     lockdep_is_held(&files->file_lock) || | ||||
| 				     atomic_read(&files->count) == 1); | ||||
| 
 | ||||
| @ -62,7 +78,7 @@ This expression picks up the pointer "fdt->fd[fd]" in an RCU-safe manner, | ||||
| and, if CONFIG_PROVE_RCU is configured, verifies that this expression | ||||
| is used in: | ||||
| 
 | ||||
| 1.	An RCU read-side critical section, or | ||||
| 1.	An RCU read-side critical section (implicit), or | ||||
| 2.	with files->file_lock held, or | ||||
| 3.	on an unshared files_struct. | ||||
| 
 | ||||
|  | ||||
| @ -42,7 +42,7 @@ fqs_holdoff	Holdoff time (in microseconds) between consecutive calls | ||||
| fqs_stutter	Wait time (in seconds) between consecutive bursts | ||||
| 		of calls to force_quiescent_state(). | ||||
| 
 | ||||
| irqreaders	Says to invoke RCU readers from irq level.  This is currently | ||||
| irqreader	Says to invoke RCU readers from irq level.  This is currently | ||||
| 		done via timers.  Defaults to "1" for variants of RCU that | ||||
| 		permit this.  (Or, more accurately, variants of RCU that do | ||||
| 		-not- permit this know to ignore this variable.) | ||||
| @ -79,19 +79,68 @@ stutter		The length of time to run the test before pausing for this | ||||
| 		Specifying "stutter=0" causes the test to run continuously | ||||
| 		without pausing, which is the old default behavior. | ||||
| 
 | ||||
| test_boost	Whether or not to test the ability of RCU to do priority | ||||
| 		boosting.  Defaults to "test_boost=1", which performs | ||||
| 		RCU priority-inversion testing only if the selected | ||||
| 		RCU implementation supports priority boosting.  Specifying | ||||
| 		"test_boost=0" never performs RCU priority-inversion | ||||
| 		testing.  Specifying "test_boost=2" performs RCU | ||||
| 		priority-inversion testing even if the selected RCU | ||||
| 		implementation does not support RCU priority boosting, | ||||
| 		which can be used to test rcutorture's ability to | ||||
| 		carry out RCU priority-inversion testing. | ||||
| 
 | ||||
| test_boost_interval | ||||
| 		The number of seconds in an RCU priority-inversion test | ||||
| 		cycle.	Defaults to "test_boost_interval=7".  It is | ||||
| 		usually wise for this value to be relatively prime to | ||||
| 		the value selected for "stutter". | ||||
| 
 | ||||
| test_boost_duration | ||||
| 		The number of seconds to do RCU priority-inversion testing | ||||
| 		within any given "test_boost_interval".  Defaults to | ||||
| 		"test_boost_duration=4". | ||||
| 
 | ||||
| test_no_idle_hz	Whether or not to test the ability of RCU to operate in | ||||
| 		a kernel that disables the scheduling-clock interrupt to | ||||
| 		idle CPUs.  Boolean parameter, "1" to test, "0" otherwise. | ||||
| 		Defaults to omitting this test. | ||||
| 
 | ||||
| torture_type	The type of RCU to test: "rcu" for the rcu_read_lock() API, | ||||
| 		"rcu_sync" for rcu_read_lock() with synchronous reclamation, | ||||
| 		"rcu_bh" for the rcu_read_lock_bh() API, "rcu_bh_sync" for | ||||
| 		rcu_read_lock_bh() with synchronous reclamation, "srcu" for | ||||
| 		the "srcu_read_lock()" API, "sched" for the use of | ||||
| 		preempt_disable() together with synchronize_sched(), | ||||
| 		and "sched_expedited" for the use of preempt_disable() | ||||
| 		with synchronize_sched_expedited(). | ||||
| torture_type	The type of RCU to test, with string values as follows: | ||||
| 
 | ||||
| 		"rcu":  rcu_read_lock(), rcu_read_unlock() and call_rcu(). | ||||
| 
 | ||||
| 		"rcu_sync":  rcu_read_lock(), rcu_read_unlock(), and | ||||
| 			synchronize_rcu(). | ||||
| 
 | ||||
| 		"rcu_expedited": rcu_read_lock(), rcu_read_unlock(), and | ||||
| 			synchronize_rcu_expedited(). | ||||
| 
 | ||||
| 		"rcu_bh": rcu_read_lock_bh(), rcu_read_unlock_bh(), and | ||||
| 			call_rcu_bh(). | ||||
| 
 | ||||
| 		"rcu_bh_sync": rcu_read_lock_bh(), rcu_read_unlock_bh(), | ||||
| 			and synchronize_rcu_bh(). | ||||
| 
 | ||||
| 		"rcu_bh_expedited": rcu_read_lock_bh(), rcu_read_unlock_bh(), | ||||
| 			and synchronize_rcu_bh_expedited(). | ||||
| 
 | ||||
| 		"srcu": srcu_read_lock(), srcu_read_unlock() and | ||||
| 			synchronize_srcu(). | ||||
| 
 | ||||
| 		"srcu_expedited": srcu_read_lock(), srcu_read_unlock() and | ||||
| 			synchronize_srcu_expedited(). | ||||
| 
 | ||||
| 		"sched": preempt_disable(), preempt_enable(), and | ||||
| 			call_rcu_sched(). | ||||
| 
 | ||||
| 		"sched_sync": preempt_disable(), preempt_enable(), and | ||||
| 			synchronize_sched(). | ||||
| 
 | ||||
| 		"sched_expedited": preempt_disable(), preempt_enable(), and | ||||
| 			synchronize_sched_expedited(). | ||||
| 
 | ||||
| 		Defaults to "rcu". | ||||
| 
 | ||||
| verbose		Enable debug printk()s.  Default is disabled. | ||||
| 
 | ||||
| @ -100,12 +149,12 @@ OUTPUT | ||||
| 
 | ||||
| The statistics output is as follows: | ||||
| 
 | ||||
| 	rcu-torture: --- Start of test: nreaders=16 stat_interval=0 verbose=0 | ||||
| 	rcu-torture: rtc: 0000000000000000 ver: 1916 tfle: 0 rta: 1916 rtaf: 0 rtf: 1915 | ||||
| 	rcu-torture: Reader Pipe:  1466408 9747 0 0 0 0 0 0 0 0 0 | ||||
| 	rcu-torture: Reader Batch:  1464477 11678 0 0 0 0 0 0 0 0 | ||||
| 	rcu-torture: Free-Block Circulation:  1915 1915 1915 1915 1915 1915 1915 1915 1915 1915 0 | ||||
| 	rcu-torture: --- End of test | ||||
| 	rcu-torture:--- Start of test: nreaders=16 nfakewriters=4 stat_interval=30 verbose=0 test_no_idle_hz=1 shuffle_interval=3 stutter=5 irqreader=1 fqs_duration=0 fqs_holdoff=0 fqs_stutter=3 test_boost=1/0 test_boost_interval=7 test_boost_duration=4 | ||||
| 	rcu-torture: rtc:           (null) ver: 155441 tfle: 0 rta: 155441 rtaf: 8884 rtf: 155440 rtmbe: 0 rtbke: 0 rtbre: 0 rtbf: 0 rtb: 0 nt: 3055767 | ||||
| 	rcu-torture: Reader Pipe:  727860534 34213 0 0 0 0 0 0 0 0 0 | ||||
| 	rcu-torture: Reader Batch:  727877838 17003 0 0 0 0 0 0 0 0 0 | ||||
| 	rcu-torture: Free-Block Circulation:  155440 155440 155440 155440 155440 155440 155440 155440 155440 155440 0 | ||||
| 	rcu-torture:--- End of test: SUCCESS: nreaders=16 nfakewriters=4 stat_interval=30 verbose=0 test_no_idle_hz=1 shuffle_interval=3 stutter=5 irqreader=1 fqs_duration=0 fqs_holdoff=0 fqs_stutter=3 test_boost=1/0 test_boost_interval=7 test_boost_duration=4 | ||||
| 
 | ||||
| The command "dmesg | grep torture:" will extract this information on | ||||
| most systems.  On more esoteric configurations, it may be necessary to | ||||
| @ -113,26 +162,55 @@ use other commands to access the output of the printk()s used by | ||||
| the RCU torture test.  The printk()s use KERN_ALERT, so they should | ||||
| be evident.  ;-) | ||||
| 
 | ||||
| The first and last lines show the rcutorture module parameters, and the | ||||
| last line shows either "SUCCESS" or "FAILURE", based on rcutorture's | ||||
| automatic determination as to whether RCU operated correctly. | ||||
| 
 | ||||
| The entries are as follows: | ||||
| 
 | ||||
| o	"rtc": The hexadecimal address of the structure currently visible | ||||
| 	to readers. | ||||
| 
 | ||||
| o	"ver": The number of times since boot that the rcutw writer task | ||||
| o	"ver": The number of times since boot that the RCU writer task | ||||
| 	has changed the structure visible to readers. | ||||
| 
 | ||||
| o	"tfle": If non-zero, indicates that the "torture freelist" | ||||
| 	containing structure to be placed into the "rtc" area is empty. | ||||
| 	containing structures to be placed into the "rtc" area is empty. | ||||
| 	This condition is important, since it can fool you into thinking | ||||
| 	that RCU is working when it is not.  :-/ | ||||
| 
 | ||||
| o	"rta": Number of structures allocated from the torture freelist. | ||||
| 
 | ||||
| o	"rtaf": Number of allocations from the torture freelist that have | ||||
| 	failed due to the list being empty. | ||||
| 	failed due to the list being empty.  It is not unusual for this | ||||
| 	to be non-zero, but it is bad for it to be a large fraction of | ||||
| 	the value indicated by "rta". | ||||
| 
 | ||||
| o	"rtf": Number of frees into the torture freelist. | ||||
| 
 | ||||
| o	"rtmbe": A non-zero value indicates that rcutorture believes that | ||||
| 	rcu_assign_pointer() and rcu_dereference() are not working | ||||
| 	correctly.  This value should be zero. | ||||
| 
 | ||||
| o	"rtbke": rcutorture was unable to create the real-time kthreads | ||||
| 	used to force RCU priority inversion.  This value should be zero. | ||||
| 
 | ||||
| o	"rtbre": Although rcutorture successfully created the kthreads | ||||
| 	used to force RCU priority inversion, it was unable to set them | ||||
| 	to the real-time priority level of 1.  This value should be zero. | ||||
| 
 | ||||
| o	"rtbf": The number of times that RCU priority boosting failed | ||||
| 	to resolve RCU priority inversion. | ||||
| 
 | ||||
| o	"rtb": The number of times that rcutorture attempted to force | ||||
| 	an RCU priority inversion condition.  If you are testing RCU | ||||
| 	priority boosting via the "test_boost" module parameter, this | ||||
| 	value should be non-zero. | ||||
| 
 | ||||
| o	"nt": The number of times rcutorture ran RCU read-side code from | ||||
| 	within a timer handler.  This value should be non-zero only | ||||
| 	if you specified the "irqreader" module parameter. | ||||
| 
 | ||||
| o	"Reader Pipe": Histogram of "ages" of structures seen by readers. | ||||
| 	If any entries past the first two are non-zero, RCU is broken. | ||||
| 	And rcutorture prints the error flag string "!!!" to make sure | ||||
| @ -162,26 +240,15 @@ o	"Free-Block Circulation": Shows the number of torture structures | ||||
| 	somehow gets incremented farther than it should. | ||||
| 
 | ||||
| Different implementations of RCU can provide implementation-specific | ||||
| additional information.  For example, SRCU provides the following: | ||||
| additional information.  For example, SRCU provides the following | ||||
| additional line: | ||||
| 
 | ||||
| 	srcu-torture: rtc: f8cf46a8 ver: 355 tfle: 0 rta: 356 rtaf: 0 rtf: 346 rtmbe: 0 | ||||
| 	srcu-torture: Reader Pipe:  559738 939 0 0 0 0 0 0 0 0 0 | ||||
| 	srcu-torture: Reader Batch:  560434 243 0 0 0 0 0 0 0 0 | ||||
| 	srcu-torture: Free-Block Circulation:  355 354 353 352 351 350 349 348 347 346 0 | ||||
| 	srcu-torture: per-CPU(idx=1): 0(0,1) 1(0,1) 2(0,0) 3(0,1) | ||||
| 
 | ||||
| The first four lines are similar to those for RCU.  The last line shows | ||||
| the per-CPU counter state.  The numbers in parentheses are the values | ||||
| of the "old" and "current" counters for the corresponding CPU.  The | ||||
| "idx" value maps the "old" and "current" values to the underlying array, | ||||
| and is useful for debugging. | ||||
| 
 | ||||
| Similarly, sched_expedited RCU provides the following: | ||||
| 
 | ||||
| 	sched_expedited-torture: rtc: d0000000016c1880 ver: 1090796 tfle: 0 rta: 1090796 rtaf: 0 rtf: 1090787 rtmbe: 0 nt: 27713319 | ||||
| 	sched_expedited-torture: Reader Pipe:  12660320201 95875 0 0 0 0 0 0 0 0 0 | ||||
| 	sched_expedited-torture: Reader Batch:  12660424885 0 0 0 0 0 0 0 0 0 0 | ||||
| 	sched_expedited-torture: Free-Block Circulation:  1090795 1090795 1090794 1090793 1090792 1090791 1090790 1090789 1090788 1090787 0 | ||||
| This line shows the per-CPU counter state.  The numbers in parentheses are | ||||
| the values of the "old" and "current" counters for the corresponding CPU. | ||||
| The "idx" value maps the "old" and "current" values to the underlying | ||||
| array, and is useful for debugging. | ||||
| 
 | ||||
| 
 | ||||
| USAGE | ||||
|  | ||||
| @ -33,23 +33,23 @@ rcu/rcuboost: | ||||
| The output of "cat rcu/rcudata" looks as follows: | ||||
| 
 | ||||
| rcu_sched: | ||||
|   0 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=545/1/0 df=50 of=0 ri=0 ql=163 qs=NRW. kt=0/W/0 ktl=ebc3 b=10 ci=153737 co=0 ca=0 | ||||
|   1 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=967/1/0 df=58 of=0 ri=0 ql=634 qs=NRW. kt=0/W/1 ktl=58c b=10 ci=191037 co=0 ca=0 | ||||
|   2 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=1081/1/0 df=175 of=0 ri=0 ql=74 qs=N.W. kt=0/W/2 ktl=da94 b=10 ci=75991 co=0 ca=0 | ||||
|   3 c=20942 g=20943 pq=1 pqc=20942 qp=1 dt=1846/0/0 df=404 of=0 ri=0 ql=0 qs=.... kt=0/W/3 ktl=d1cd b=10 ci=72261 co=0 ca=0 | ||||
|   4 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=369/1/0 df=83 of=0 ri=0 ql=48 qs=N.W. kt=0/W/4 ktl=e0e7 b=10 ci=128365 co=0 ca=0 | ||||
|   5 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=381/1/0 df=64 of=0 ri=0 ql=169 qs=NRW. kt=0/W/5 ktl=fb2f b=10 ci=164360 co=0 ca=0 | ||||
|   6 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=1037/1/0 df=183 of=0 ri=0 ql=62 qs=N.W. kt=0/W/6 ktl=d2ad b=10 ci=65663 co=0 ca=0 | ||||
|   7 c=20897 g=20897 pq=1 pqc=20896 qp=0 dt=1572/0/0 df=382 of=0 ri=0 ql=0 qs=.... kt=0/W/7 ktl=cf15 b=10 ci=75006 co=0 ca=0 | ||||
|   0 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=545/1/0 df=50 of=0 ri=0 ql=163 qs=NRW. kt=0/W/0 ktl=ebc3 b=10 ci=153737 co=0 ca=0 | ||||
|   1 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=967/1/0 df=58 of=0 ri=0 ql=634 qs=NRW. kt=0/W/1 ktl=58c b=10 ci=191037 co=0 ca=0 | ||||
|   2 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=1081/1/0 df=175 of=0 ri=0 ql=74 qs=N.W. kt=0/W/2 ktl=da94 b=10 ci=75991 co=0 ca=0 | ||||
|   3 c=20942 g=20943 pq=1 pgp=20942 qp=1 dt=1846/0/0 df=404 of=0 ri=0 ql=0 qs=.... kt=0/W/3 ktl=d1cd b=10 ci=72261 co=0 ca=0 | ||||
|   4 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=369/1/0 df=83 of=0 ri=0 ql=48 qs=N.W. kt=0/W/4 ktl=e0e7 b=10 ci=128365 co=0 ca=0 | ||||
|   5 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=381/1/0 df=64 of=0 ri=0 ql=169 qs=NRW. kt=0/W/5 ktl=fb2f b=10 ci=164360 co=0 ca=0 | ||||
|   6 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=1037/1/0 df=183 of=0 ri=0 ql=62 qs=N.W. kt=0/W/6 ktl=d2ad b=10 ci=65663 co=0 ca=0 | ||||
|   7 c=20897 g=20897 pq=1 pgp=20896 qp=0 dt=1572/0/0 df=382 of=0 ri=0 ql=0 qs=.... kt=0/W/7 ktl=cf15 b=10 ci=75006 co=0 ca=0 | ||||
| rcu_bh: | ||||
|   0 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=545/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/0 ktl=ebc3 b=10 ci=0 co=0 ca=0 | ||||
|   1 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=967/1/0 df=3 of=0 ri=1 ql=0 qs=.... kt=0/W/1 ktl=58c b=10 ci=151 co=0 ca=0 | ||||
|   2 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=1081/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/2 ktl=da94 b=10 ci=0 co=0 ca=0 | ||||
|   3 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=1846/0/0 df=8 of=0 ri=1 ql=0 qs=.... kt=0/W/3 ktl=d1cd b=10 ci=0 co=0 ca=0 | ||||
|   4 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=369/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/4 ktl=e0e7 b=10 ci=0 co=0 ca=0 | ||||
|   5 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=381/1/0 df=4 of=0 ri=1 ql=0 qs=.... kt=0/W/5 ktl=fb2f b=10 ci=0 co=0 ca=0 | ||||
|   6 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=1037/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/6 ktl=d2ad b=10 ci=0 co=0 ca=0 | ||||
|   7 c=1474 g=1474 pq=1 pqc=1473 qp=0 dt=1572/0/0 df=8 of=0 ri=1 ql=0 qs=.... kt=0/W/7 ktl=cf15 b=10 ci=0 co=0 ca=0 | ||||
|   0 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=545/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/0 ktl=ebc3 b=10 ci=0 co=0 ca=0 | ||||
|   1 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=967/1/0 df=3 of=0 ri=1 ql=0 qs=.... kt=0/W/1 ktl=58c b=10 ci=151 co=0 ca=0 | ||||
|   2 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=1081/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/2 ktl=da94 b=10 ci=0 co=0 ca=0 | ||||
|   3 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=1846/0/0 df=8 of=0 ri=1 ql=0 qs=.... kt=0/W/3 ktl=d1cd b=10 ci=0 co=0 ca=0 | ||||
|   4 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=369/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/4 ktl=e0e7 b=10 ci=0 co=0 ca=0 | ||||
|   5 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=381/1/0 df=4 of=0 ri=1 ql=0 qs=.... kt=0/W/5 ktl=fb2f b=10 ci=0 co=0 ca=0 | ||||
|   6 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=1037/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/6 ktl=d2ad b=10 ci=0 co=0 ca=0 | ||||
|   7 c=1474 g=1474 pq=1 pgp=1473 qp=0 dt=1572/0/0 df=8 of=0 ri=1 ql=0 qs=.... kt=0/W/7 ktl=cf15 b=10 ci=0 co=0 ca=0 | ||||
| 
 | ||||
| The first section lists the rcu_data structures for rcu_sched, the second | ||||
| for rcu_bh.  Note that CONFIG_TREE_PREEMPT_RCU kernels will have an | ||||
| @ -84,7 +84,7 @@ o	"pq" indicates that this CPU has passed through a quiescent state | ||||
| 	CPU has not yet reported that fact, (2) some other CPU has not | ||||
| 	yet reported for this grace period, or (3) both. | ||||
| 
 | ||||
| o	"pqc" indicates which grace period the last-observed quiescent | ||||
| o	"pgp" indicates which grace period the last-observed quiescent | ||||
| 	state for this CPU corresponds to.  This is important for handling | ||||
| 	the race between CPU 0 reporting an extended dynticks-idle | ||||
| 	quiescent state for CPU 1 and CPU 1 suddenly waking up and | ||||
| @ -184,10 +184,14 @@ o	"kt" is the per-CPU kernel-thread state.  The digit preceding | ||||
| 	The number after the final slash is the CPU that the kthread | ||||
| 	is actually running on. | ||||
| 
 | ||||
| 	This field is displayed only for CONFIG_RCU_BOOST kernels. | ||||
| 
 | ||||
| o	"ktl" is the low-order 16 bits (in hexadecimal) of the count of | ||||
| 	the number of times that this CPU's per-CPU kthread has gone | ||||
| 	through its loop servicing invoke_rcu_cpu_kthread() requests. | ||||
| 
 | ||||
| 	This field is displayed only for CONFIG_RCU_BOOST kernels. | ||||
| 
 | ||||
| o	"b" is the batch limit for this CPU.  If more than this number | ||||
| 	of RCU callbacks is ready to invoke, then the remainder will | ||||
| 	be deferred. | ||||
|  | ||||
| @ -548,7 +548,7 @@ do {									\ | ||||
| #endif | ||||
| 
 | ||||
| #ifdef CONFIG_PROVE_RCU | ||||
| extern void lockdep_rcu_dereference(const char *file, const int line); | ||||
| void lockdep_rcu_suspicious(const char *file, const int line, const char *s); | ||||
| #endif | ||||
| 
 | ||||
| #endif /* __LINUX_LOCKDEP_H */ | ||||
|  | ||||
| @ -33,6 +33,7 @@ | ||||
| #ifndef __LINUX_RCUPDATE_H | ||||
| #define __LINUX_RCUPDATE_H | ||||
| 
 | ||||
| #include <linux/types.h> | ||||
| #include <linux/cache.h> | ||||
| #include <linux/spinlock.h> | ||||
| #include <linux/threads.h> | ||||
| @ -64,32 +65,74 @@ static inline void rcutorture_record_progress(unsigned long vernum) | ||||
| #define ULONG_CMP_GE(a, b)	(ULONG_MAX / 2 >= (a) - (b)) | ||||
| #define ULONG_CMP_LT(a, b)	(ULONG_MAX / 2 < (a) - (b)) | ||||
| 
 | ||||
| /**
 | ||||
|  * struct rcu_head - callback structure for use with RCU | ||||
|  * @next: next update requests in a list | ||||
|  * @func: actual update function to call after the grace period. | ||||
|  */ | ||||
| struct rcu_head { | ||||
| 	struct rcu_head *next; | ||||
| 	void (*func)(struct rcu_head *head); | ||||
| }; | ||||
| 
 | ||||
| /* Exported common interfaces */ | ||||
| 
 | ||||
| #ifdef CONFIG_PREEMPT_RCU | ||||
| 
 | ||||
| /**
 | ||||
|  * call_rcu() - Queue an RCU callback for invocation after a grace period. | ||||
|  * @head: structure to be used for queueing the RCU updates. | ||||
|  * @func: actual callback function to be invoked after the grace period | ||||
|  * | ||||
|  * The callback function will be invoked some time after a full grace | ||||
|  * period elapses, in other words after all pre-existing RCU read-side | ||||
|  * critical sections have completed.  However, the callback function | ||||
|  * might well execute concurrently with RCU read-side critical sections | ||||
|  * that started after call_rcu() was invoked.  RCU read-side critical | ||||
|  * sections are delimited by rcu_read_lock() and rcu_read_unlock(), | ||||
|  * and may be nested. | ||||
|  */ | ||||
| extern void call_rcu(struct rcu_head *head, | ||||
| 			      void (*func)(struct rcu_head *head)); | ||||
| 
 | ||||
| #else /* #ifdef CONFIG_PREEMPT_RCU */ | ||||
| 
 | ||||
| /* In classic RCU, call_rcu() is just call_rcu_sched(). */ | ||||
| #define	call_rcu	call_rcu_sched | ||||
| 
 | ||||
| #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ | ||||
| 
 | ||||
| /**
 | ||||
|  * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period. | ||||
|  * @head: structure to be used for queueing the RCU updates. | ||||
|  * @func: actual callback function to be invoked after the grace period | ||||
|  * | ||||
|  * The callback function will be invoked some time after a full grace | ||||
|  * period elapses, in other words after all currently executing RCU | ||||
|  * read-side critical sections have completed. call_rcu_bh() assumes | ||||
|  * that the read-side critical sections end on completion of a softirq | ||||
|  * handler. This means that read-side critical sections in process | ||||
|  * context must not be interrupted by softirqs. This interface is to be | ||||
|  * used when most of the read-side critical sections are in softirq context. | ||||
|  * RCU read-side critical sections are delimited by : | ||||
|  *  - rcu_read_lock() and  rcu_read_unlock(), if in interrupt context. | ||||
|  *  OR | ||||
|  *  - rcu_read_lock_bh() and rcu_read_unlock_bh(), if in process context. | ||||
|  *  These may be nested. | ||||
|  */ | ||||
| extern void call_rcu_bh(struct rcu_head *head, | ||||
| 			void (*func)(struct rcu_head *head)); | ||||
| 
 | ||||
| /**
 | ||||
|  * call_rcu_sched() - Queue an RCU for invocation after sched grace period. | ||||
|  * @head: structure to be used for queueing the RCU updates. | ||||
|  * @func: actual callback function to be invoked after the grace period | ||||
|  * | ||||
|  * The callback function will be invoked some time after a full grace | ||||
|  * period elapses, in other words after all currently executing RCU | ||||
|  * read-side critical sections have completed. call_rcu_sched() assumes | ||||
|  * that the read-side critical sections end on enabling of preemption | ||||
|  * or on voluntary preemption. | ||||
|  * RCU read-side critical sections are delimited by : | ||||
|  *  - rcu_read_lock_sched() and  rcu_read_unlock_sched(), | ||||
|  *  OR | ||||
|  *  anything that disables preemption. | ||||
|  *  These may be nested. | ||||
|  */ | ||||
| extern void call_rcu_sched(struct rcu_head *head, | ||||
| 			   void (*func)(struct rcu_head *rcu)); | ||||
| 
 | ||||
| extern void synchronize_sched(void); | ||||
| extern void rcu_barrier_bh(void); | ||||
| extern void rcu_barrier_sched(void); | ||||
| 
 | ||||
| static inline void __rcu_read_lock_bh(void) | ||||
| { | ||||
| 	local_bh_disable(); | ||||
| } | ||||
| 
 | ||||
| static inline void __rcu_read_unlock_bh(void) | ||||
| { | ||||
| 	local_bh_enable(); | ||||
| } | ||||
| 
 | ||||
| #ifdef CONFIG_PREEMPT_RCU | ||||
| 
 | ||||
| @ -152,6 +195,15 @@ static inline void rcu_exit_nohz(void) | ||||
| 
 | ||||
| #endif /* #else #ifdef CONFIG_NO_HZ */ | ||||
| 
 | ||||
| /*
 | ||||
|  * Infrastructure to implement the synchronize_() primitives in | ||||
|  * TREE_RCU and rcu_barrier_() primitives in TINY_RCU. | ||||
|  */ | ||||
| 
 | ||||
| typedef void call_rcu_func_t(struct rcu_head *head, | ||||
| 			     void (*func)(struct rcu_head *head)); | ||||
| void wait_rcu_gp(call_rcu_func_t crf); | ||||
| 
 | ||||
| #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) | ||||
| #include <linux/rcutree.h> | ||||
| #elif defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU) | ||||
| @ -297,19 +349,31 @@ extern int rcu_my_thread_group_empty(void); | ||||
| /**
 | ||||
|  * rcu_lockdep_assert - emit lockdep splat if specified condition not met | ||||
|  * @c: condition to check | ||||
|  * @s: informative message | ||||
|  */ | ||||
| #define rcu_lockdep_assert(c)						\ | ||||
| #define rcu_lockdep_assert(c, s)					\ | ||||
| 	do {								\ | ||||
| 		static bool __warned;					\ | ||||
| 		if (debug_lockdep_rcu_enabled() && !__warned && !(c)) {	\ | ||||
| 			__warned = true;				\ | ||||
| 			lockdep_rcu_dereference(__FILE__, __LINE__);	\ | ||||
| 			lockdep_rcu_suspicious(__FILE__, __LINE__, s);	\ | ||||
| 		}							\ | ||||
| 	} while (0) | ||||
| 
 | ||||
| #define rcu_sleep_check()						\ | ||||
| 	do {								\ | ||||
| 		rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map),	\ | ||||
| 				   "Illegal context switch in RCU-bh"	\ | ||||
| 				   " read-side critical section");	\ | ||||
| 		rcu_lockdep_assert(!lock_is_held(&rcu_sched_lock_map),	\ | ||||
| 				   "Illegal context switch in RCU-sched"\ | ||||
| 				   " read-side critical section");	\ | ||||
| 	} while (0) | ||||
| 
 | ||||
| #else /* #ifdef CONFIG_PROVE_RCU */ | ||||
| 
 | ||||
| #define rcu_lockdep_assert(c) do { } while (0) | ||||
| #define rcu_lockdep_assert(c, s) do { } while (0) | ||||
| #define rcu_sleep_check() do { } while (0) | ||||
| 
 | ||||
| #endif /* #else #ifdef CONFIG_PROVE_RCU */ | ||||
| 
 | ||||
| @ -338,14 +402,16 @@ extern int rcu_my_thread_group_empty(void); | ||||
| #define __rcu_dereference_check(p, c, space) \ | ||||
| 	({ \ | ||||
| 		typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \ | ||||
| 		rcu_lockdep_assert(c); \ | ||||
| 		rcu_lockdep_assert(c, "suspicious rcu_dereference_check()" \ | ||||
| 				      " usage"); \ | ||||
| 		rcu_dereference_sparse(p, space); \ | ||||
| 		smp_read_barrier_depends(); \ | ||||
| 		((typeof(*p) __force __kernel *)(_________p1)); \ | ||||
| 	}) | ||||
| #define __rcu_dereference_protected(p, c, space) \ | ||||
| 	({ \ | ||||
| 		rcu_lockdep_assert(c); \ | ||||
| 		rcu_lockdep_assert(c, "suspicious rcu_dereference_protected()" \ | ||||
| 				      " usage"); \ | ||||
| 		rcu_dereference_sparse(p, space); \ | ||||
| 		((typeof(*p) __force __kernel *)(p)); \ | ||||
| 	}) | ||||
| @ -359,15 +425,15 @@ extern int rcu_my_thread_group_empty(void); | ||||
| #define __rcu_dereference_index_check(p, c) \ | ||||
| 	({ \ | ||||
| 		typeof(p) _________p1 = ACCESS_ONCE(p); \ | ||||
| 		rcu_lockdep_assert(c); \ | ||||
| 		rcu_lockdep_assert(c, \ | ||||
| 				   "suspicious rcu_dereference_index_check()" \ | ||||
| 				   " usage"); \ | ||||
| 		smp_read_barrier_depends(); \ | ||||
| 		(_________p1); \ | ||||
| 	}) | ||||
| #define __rcu_assign_pointer(p, v, space) \ | ||||
| 	({ \ | ||||
| 		if (!__builtin_constant_p(v) || \ | ||||
| 		    ((v) != NULL)) \ | ||||
| 			smp_wmb(); \ | ||||
| 		smp_wmb(); \ | ||||
| 		(p) = (typeof(*v) __force space *)(v); \ | ||||
| 	}) | ||||
| 
 | ||||
| @ -500,26 +566,6 @@ extern int rcu_my_thread_group_empty(void); | ||||
| #define rcu_dereference_protected(p, c) \ | ||||
| 	__rcu_dereference_protected((p), (c), __rcu) | ||||
| 
 | ||||
| /**
 | ||||
|  * rcu_dereference_bh_protected() - fetch RCU-bh pointer when updates prevented | ||||
|  * @p: The pointer to read, prior to dereferencing | ||||
|  * @c: The conditions under which the dereference will take place | ||||
|  * | ||||
|  * This is the RCU-bh counterpart to rcu_dereference_protected(). | ||||
|  */ | ||||
| #define rcu_dereference_bh_protected(p, c) \ | ||||
| 	__rcu_dereference_protected((p), (c), __rcu) | ||||
| 
 | ||||
| /**
 | ||||
|  * rcu_dereference_sched_protected() - fetch RCU-sched pointer when updates prevented | ||||
|  * @p: The pointer to read, prior to dereferencing | ||||
|  * @c: The conditions under which the dereference will take place | ||||
|  * | ||||
|  * This is the RCU-sched counterpart to rcu_dereference_protected(). | ||||
|  */ | ||||
| #define rcu_dereference_sched_protected(p, c) \ | ||||
| 	__rcu_dereference_protected((p), (c), __rcu) | ||||
| 
 | ||||
| 
 | ||||
| /**
 | ||||
|  * rcu_dereference() - fetch RCU-protected pointer for dereferencing | ||||
| @ -630,7 +676,7 @@ static inline void rcu_read_unlock(void) | ||||
|  */ | ||||
| static inline void rcu_read_lock_bh(void) | ||||
| { | ||||
| 	__rcu_read_lock_bh(); | ||||
| 	local_bh_disable(); | ||||
| 	__acquire(RCU_BH); | ||||
| 	rcu_read_acquire_bh(); | ||||
| } | ||||
| @ -644,7 +690,7 @@ static inline void rcu_read_unlock_bh(void) | ||||
| { | ||||
| 	rcu_read_release_bh(); | ||||
| 	__release(RCU_BH); | ||||
| 	__rcu_read_unlock_bh(); | ||||
| 	local_bh_enable(); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
| @ -698,11 +744,18 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) | ||||
|  * any prior initialization.  Returns the value assigned. | ||||
|  * | ||||
|  * Inserts memory barriers on architectures that require them | ||||
|  * (pretty much all of them other than x86), and also prevents | ||||
|  * the compiler from reordering the code that initializes the | ||||
|  * structure after the pointer assignment.  More importantly, this | ||||
|  * call documents which pointers will be dereferenced by RCU read-side | ||||
|  * code. | ||||
|  * (which is most of them), and also prevents the compiler from | ||||
|  * reordering the code that initializes the structure after the pointer | ||||
|  * assignment.  More importantly, this call documents which pointers | ||||
|  * will be dereferenced by RCU read-side code. | ||||
|  * | ||||
|  * In some special cases, you may use RCU_INIT_POINTER() instead | ||||
|  * of rcu_assign_pointer().  RCU_INIT_POINTER() is a bit faster due | ||||
|  * to the fact that it does not constrain either the CPU or the compiler. | ||||
|  * That said, using RCU_INIT_POINTER() when you should have used | ||||
|  * rcu_assign_pointer() is a very bad thing that results in | ||||
|  * impossible-to-diagnose memory corruption.  So please be careful. | ||||
|  * See the RCU_INIT_POINTER() comment header for details. | ||||
|  */ | ||||
| #define rcu_assign_pointer(p, v) \ | ||||
| 	__rcu_assign_pointer((p), (v), __rcu) | ||||
| @ -710,105 +763,38 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) | ||||
| /**
 | ||||
|  * RCU_INIT_POINTER() - initialize an RCU protected pointer | ||||
|  * | ||||
|  * Initialize an RCU-protected pointer in such a way to avoid RCU-lockdep | ||||
|  * splats. | ||||
|  * Initialize an RCU-protected pointer in special cases where readers | ||||
|  * do not need ordering constraints on the CPU or the compiler.  These | ||||
|  * special cases are: | ||||
|  * | ||||
|  * 1.	This use of RCU_INIT_POINTER() is NULLing out the pointer -or- | ||||
|  * 2.	The caller has taken whatever steps are required to prevent | ||||
|  *	RCU readers from concurrently accessing this pointer -or- | ||||
|  * 3.	The referenced data structure has already been exposed to | ||||
|  *	readers either at compile time or via rcu_assign_pointer() -and- | ||||
|  *	a.	You have not made -any- reader-visible changes to | ||||
|  *		this structure since then -or- | ||||
|  *	b.	It is OK for readers accessing this structure from its | ||||
|  *		new location to see the old state of the structure.  (For | ||||
|  *		example, the changes were to statistical counters or to | ||||
|  *		other state where exact synchronization is not required.) | ||||
|  * | ||||
|  * Failure to follow these rules governing use of RCU_INIT_POINTER() will | ||||
|  * result in impossible-to-diagnose memory corruption.  As in the structures | ||||
|  * will look OK in crash dumps, but any concurrent RCU readers might | ||||
|  * see pre-initialized values of the referenced data structure.  So | ||||
|  * please be very careful how you use RCU_INIT_POINTER()!!! | ||||
|  * | ||||
|  * If you are creating an RCU-protected linked structure that is accessed | ||||
|  * by a single external-to-structure RCU-protected pointer, then you may | ||||
|  * use RCU_INIT_POINTER() to initialize the internal RCU-protected | ||||
|  * pointers, but you must use rcu_assign_pointer() to initialize the | ||||
|  * external-to-structure pointer -after- you have completely initialized | ||||
|  * the reader-accessible portions of the linked structure. | ||||
|  */ | ||||
| #define RCU_INIT_POINTER(p, v) \ | ||||
| 		p = (typeof(*v) __force __rcu *)(v) | ||||
| 
 | ||||
| /* Infrastructure to implement the synchronize_() primitives. */ | ||||
| 
 | ||||
| struct rcu_synchronize { | ||||
| 	struct rcu_head head; | ||||
| 	struct completion completion; | ||||
| }; | ||||
| 
 | ||||
| extern void wakeme_after_rcu(struct rcu_head  *head); | ||||
| 
 | ||||
| #ifdef CONFIG_PREEMPT_RCU | ||||
| 
 | ||||
| /**
 | ||||
|  * call_rcu() - Queue an RCU callback for invocation after a grace period. | ||||
|  * @head: structure to be used for queueing the RCU updates. | ||||
|  * @func: actual callback function to be invoked after the grace period | ||||
|  * | ||||
|  * The callback function will be invoked some time after a full grace | ||||
|  * period elapses, in other words after all pre-existing RCU read-side | ||||
|  * critical sections have completed.  However, the callback function | ||||
|  * might well execute concurrently with RCU read-side critical sections | ||||
|  * that started after call_rcu() was invoked.  RCU read-side critical | ||||
|  * sections are delimited by rcu_read_lock() and rcu_read_unlock(), | ||||
|  * and may be nested. | ||||
|  */ | ||||
| extern void call_rcu(struct rcu_head *head, | ||||
| 			      void (*func)(struct rcu_head *head)); | ||||
| 
 | ||||
| #else /* #ifdef CONFIG_PREEMPT_RCU */ | ||||
| 
 | ||||
| /* In classic RCU, call_rcu() is just call_rcu_sched(). */ | ||||
| #define	call_rcu	call_rcu_sched | ||||
| 
 | ||||
| #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ | ||||
| 
 | ||||
| /**
 | ||||
|  * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period. | ||||
|  * @head: structure to be used for queueing the RCU updates. | ||||
|  * @func: actual callback function to be invoked after the grace period | ||||
|  * | ||||
|  * The callback function will be invoked some time after a full grace | ||||
|  * period elapses, in other words after all currently executing RCU | ||||
|  * read-side critical sections have completed. call_rcu_bh() assumes | ||||
|  * that the read-side critical sections end on completion of a softirq | ||||
|  * handler. This means that read-side critical sections in process | ||||
|  * context must not be interrupted by softirqs. This interface is to be | ||||
|  * used when most of the read-side critical sections are in softirq context. | ||||
|  * RCU read-side critical sections are delimited by : | ||||
|  *  - rcu_read_lock() and  rcu_read_unlock(), if in interrupt context. | ||||
|  *  OR | ||||
|  *  - rcu_read_lock_bh() and rcu_read_unlock_bh(), if in process context. | ||||
|  *  These may be nested. | ||||
|  */ | ||||
| extern void call_rcu_bh(struct rcu_head *head, | ||||
| 			void (*func)(struct rcu_head *head)); | ||||
| 
 | ||||
| /*
 | ||||
|  * debug_rcu_head_queue()/debug_rcu_head_unqueue() are used internally | ||||
|  * by call_rcu() and rcu callback execution, and are therefore not part of the | ||||
|  * RCU API. Leaving in rcupdate.h because they are used by all RCU flavors. | ||||
|  */ | ||||
| 
 | ||||
| #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD | ||||
| # define STATE_RCU_HEAD_READY	0 | ||||
| # define STATE_RCU_HEAD_QUEUED	1 | ||||
| 
 | ||||
| extern struct debug_obj_descr rcuhead_debug_descr; | ||||
| 
 | ||||
| static inline void debug_rcu_head_queue(struct rcu_head *head) | ||||
| { | ||||
| 	WARN_ON_ONCE((unsigned long)head & 0x3); | ||||
| 	debug_object_activate(head, &rcuhead_debug_descr); | ||||
| 	debug_object_active_state(head, &rcuhead_debug_descr, | ||||
| 				  STATE_RCU_HEAD_READY, | ||||
| 				  STATE_RCU_HEAD_QUEUED); | ||||
| } | ||||
| 
 | ||||
| static inline void debug_rcu_head_unqueue(struct rcu_head *head) | ||||
| { | ||||
| 	debug_object_active_state(head, &rcuhead_debug_descr, | ||||
| 				  STATE_RCU_HEAD_QUEUED, | ||||
| 				  STATE_RCU_HEAD_READY); | ||||
| 	debug_object_deactivate(head, &rcuhead_debug_descr); | ||||
| } | ||||
| #else	/* !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ | ||||
| static inline void debug_rcu_head_queue(struct rcu_head *head) | ||||
| { | ||||
| } | ||||
| 
 | ||||
| static inline void debug_rcu_head_unqueue(struct rcu_head *head) | ||||
| { | ||||
| } | ||||
| #endif	/* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ | ||||
| 
 | ||||
| static __always_inline bool __is_kfree_rcu_offset(unsigned long offset) | ||||
| { | ||||
| 	return offset < 4096; | ||||
| @ -827,18 +813,6 @@ void __kfree_rcu(struct rcu_head *head, unsigned long offset) | ||||
| 	call_rcu(head, (rcu_callback)offset); | ||||
| } | ||||
| 
 | ||||
| extern void kfree(const void *); | ||||
| 
 | ||||
| static inline void __rcu_reclaim(struct rcu_head *head) | ||||
| { | ||||
| 	unsigned long offset = (unsigned long)head->func; | ||||
| 
 | ||||
| 	if (__is_kfree_rcu_offset(offset)) | ||||
| 		kfree((void *)head - offset); | ||||
| 	else | ||||
| 		head->func(head); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * kfree_rcu() - kfree an object after a grace period. | ||||
|  * @ptr:	pointer to kfree | ||||
|  | ||||
| @ -27,9 +27,23 @@ | ||||
| 
 | ||||
| #include <linux/cache.h> | ||||
| 
 | ||||
| #ifdef CONFIG_RCU_BOOST | ||||
| static inline void rcu_init(void) | ||||
| { | ||||
| } | ||||
| #else /* #ifdef CONFIG_RCU_BOOST */ | ||||
| void rcu_init(void); | ||||
| #endif /* #else #ifdef CONFIG_RCU_BOOST */ | ||||
| 
 | ||||
| static inline void rcu_barrier_bh(void) | ||||
| { | ||||
| 	wait_rcu_gp(call_rcu_bh); | ||||
| } | ||||
| 
 | ||||
| static inline void rcu_barrier_sched(void) | ||||
| { | ||||
| 	wait_rcu_gp(call_rcu_sched); | ||||
| } | ||||
| 
 | ||||
| #ifdef CONFIG_TINY_RCU | ||||
| 
 | ||||
| @ -45,9 +59,13 @@ static inline void rcu_barrier(void) | ||||
| 
 | ||||
| #else /* #ifdef CONFIG_TINY_RCU */ | ||||
| 
 | ||||
| void rcu_barrier(void); | ||||
| void synchronize_rcu_expedited(void); | ||||
| 
 | ||||
| static inline void rcu_barrier(void) | ||||
| { | ||||
| 	wait_rcu_gp(call_rcu); | ||||
| } | ||||
| 
 | ||||
| #endif /* #else #ifdef CONFIG_TINY_RCU */ | ||||
| 
 | ||||
| static inline void synchronize_rcu_bh(void) | ||||
|  | ||||
| @ -67,6 +67,8 @@ static inline void synchronize_rcu_bh_expedited(void) | ||||
| } | ||||
| 
 | ||||
| extern void rcu_barrier(void); | ||||
| extern void rcu_barrier_bh(void); | ||||
| extern void rcu_barrier_sched(void); | ||||
| 
 | ||||
| extern unsigned long rcutorture_testseq; | ||||
| extern unsigned long rcutorture_vernum; | ||||
|  | ||||
| @ -270,7 +270,6 @@ extern void init_idle_bootup_task(struct task_struct *idle); | ||||
| 
 | ||||
| extern int runqueue_is_locked(int cpu); | ||||
| 
 | ||||
| extern cpumask_var_t nohz_cpu_mask; | ||||
| #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ) | ||||
| extern void select_nohz_load_balancer(int stop_tick); | ||||
| extern int get_nohz_timer_target(void); | ||||
| @ -1260,9 +1259,6 @@ struct task_struct { | ||||
| #ifdef CONFIG_PREEMPT_RCU | ||||
| 	int rcu_read_lock_nesting; | ||||
| 	char rcu_read_unlock_special; | ||||
| #if defined(CONFIG_RCU_BOOST) && defined(CONFIG_TREE_PREEMPT_RCU) | ||||
| 	int rcu_boosted; | ||||
| #endif /* #if defined(CONFIG_RCU_BOOST) && defined(CONFIG_TREE_PREEMPT_RCU) */ | ||||
| 	struct list_head rcu_node_entry; | ||||
| #endif /* #ifdef CONFIG_PREEMPT_RCU */ | ||||
| #ifdef CONFIG_TREE_PREEMPT_RCU | ||||
|  | ||||
| @ -238,6 +238,16 @@ struct ustat { | ||||
| 	char			f_fpack[6]; | ||||
| }; | ||||
| 
 | ||||
| /**
 | ||||
|  * struct rcu_head - callback structure for use with RCU | ||||
|  * @next: next update requests in a list | ||||
|  * @func: actual update function to call after the grace period. | ||||
|  */ | ||||
| struct rcu_head { | ||||
| 	struct rcu_head *next; | ||||
| 	void (*func)(struct rcu_head *head); | ||||
| }; | ||||
| 
 | ||||
| #endif	/* __KERNEL__ */ | ||||
| #endif /*  __ASSEMBLY__ */ | ||||
| #endif /* _LINUX_TYPES_H */ | ||||
|  | ||||
							
								
								
									
										459
									
								
								include/trace/events/rcu.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										459
									
								
								include/trace/events/rcu.h
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,459 @@ | ||||
| #undef TRACE_SYSTEM | ||||
| #define TRACE_SYSTEM rcu | ||||
| 
 | ||||
| #if !defined(_TRACE_RCU_H) || defined(TRACE_HEADER_MULTI_READ) | ||||
| #define _TRACE_RCU_H | ||||
| 
 | ||||
| #include <linux/tracepoint.h> | ||||
| 
 | ||||
| /*
 | ||||
|  * Tracepoint for start/end markers used for utilization calculations. | ||||
|  * By convention, the string is of the following forms: | ||||
|  * | ||||
|  * "Start <activity>" -- Mark the start of the specified activity, | ||||
|  *			 such as "context switch".  Nesting is permitted. | ||||
|  * "End <activity>" -- Mark the end of the specified activity. | ||||
|  * | ||||
|  * An "@" character within "<activity>" is a comment character: Data | ||||
|  * reduction scripts will ignore the "@" and the remainder of the line. | ||||
|  */ | ||||
| TRACE_EVENT(rcu_utilization, | ||||
| 
 | ||||
| 	TP_PROTO(char *s), | ||||
| 
 | ||||
| 	TP_ARGS(s), | ||||
| 
 | ||||
| 	TP_STRUCT__entry( | ||||
| 		__field(char *, s) | ||||
| 	), | ||||
| 
 | ||||
| 	TP_fast_assign( | ||||
| 		__entry->s = s; | ||||
| 	), | ||||
| 
 | ||||
| 	TP_printk("%s", __entry->s) | ||||
| ); | ||||
| 
 | ||||
| #ifdef CONFIG_RCU_TRACE | ||||
| 
 | ||||
| #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) | ||||
| 
 | ||||
| /*
 | ||||
|  * Tracepoint for grace-period events: starting and ending a grace | ||||
|  * period ("start" and "end", respectively), a CPU noting the start | ||||
|  * of a new grace period or the end of an old grace period ("cpustart" | ||||
|  * and "cpuend", respectively), a CPU passing through a quiescent | ||||
|  * state ("cpuqs"), a CPU coming online or going offline ("cpuonl" | ||||
|  * and "cpuofl", respectively), and a CPU being kicked for being too | ||||
|  * long in dyntick-idle mode ("kick"). | ||||
|  */ | ||||
| TRACE_EVENT(rcu_grace_period, | ||||
| 
 | ||||
| 	TP_PROTO(char *rcuname, unsigned long gpnum, char *gpevent), | ||||
| 
 | ||||
| 	TP_ARGS(rcuname, gpnum, gpevent), | ||||
| 
 | ||||
| 	TP_STRUCT__entry( | ||||
| 		__field(char *, rcuname) | ||||
| 		__field(unsigned long, gpnum) | ||||
| 		__field(char *, gpevent) | ||||
| 	), | ||||
| 
 | ||||
| 	TP_fast_assign( | ||||
| 		__entry->rcuname = rcuname; | ||||
| 		__entry->gpnum = gpnum; | ||||
| 		__entry->gpevent = gpevent; | ||||
| 	), | ||||
| 
 | ||||
| 	TP_printk("%s %lu %s", | ||||
| 		  __entry->rcuname, __entry->gpnum, __entry->gpevent) | ||||
| ); | ||||
| 
 | ||||
| /*
 | ||||
|  * Tracepoint for grace-period-initialization events.  These are | ||||
|  * distinguished by the type of RCU, the new grace-period number, the | ||||
|  * rcu_node structure level, the starting and ending CPU covered by the | ||||
|  * rcu_node structure, and the mask of CPUs that will be waited for. | ||||
|  * All but the type of RCU are extracted from the rcu_node structure. | ||||
|  */ | ||||
| TRACE_EVENT(rcu_grace_period_init, | ||||
| 
 | ||||
| 	TP_PROTO(char *rcuname, unsigned long gpnum, u8 level, | ||||
| 		 int grplo, int grphi, unsigned long qsmask), | ||||
| 
 | ||||
| 	TP_ARGS(rcuname, gpnum, level, grplo, grphi, qsmask), | ||||
| 
 | ||||
| 	TP_STRUCT__entry( | ||||
| 		__field(char *, rcuname) | ||||
| 		__field(unsigned long, gpnum) | ||||
| 		__field(u8, level) | ||||
| 		__field(int, grplo) | ||||
| 		__field(int, grphi) | ||||
| 		__field(unsigned long, qsmask) | ||||
| 	), | ||||
| 
 | ||||
| 	TP_fast_assign( | ||||
| 		__entry->rcuname = rcuname; | ||||
| 		__entry->gpnum = gpnum; | ||||
| 		__entry->level = level; | ||||
| 		__entry->grplo = grplo; | ||||
| 		__entry->grphi = grphi; | ||||
| 		__entry->qsmask = qsmask; | ||||
| 	), | ||||
| 
 | ||||
| 	TP_printk("%s %lu %u %d %d %lx", | ||||
| 		  __entry->rcuname, __entry->gpnum, __entry->level, | ||||
| 		  __entry->grplo, __entry->grphi, __entry->qsmask) | ||||
| ); | ||||
| 
 | ||||
| /*
 | ||||
|  * Tracepoint for tasks blocking within preemptible-RCU read-side | ||||
|  * critical sections.  Track the type of RCU (which one day might | ||||
|  * include SRCU), the grace-period number that the task is blocking | ||||
|  * (the current or the next), and the task's PID. | ||||
|  */ | ||||
| TRACE_EVENT(rcu_preempt_task, | ||||
| 
 | ||||
| 	TP_PROTO(char *rcuname, int pid, unsigned long gpnum), | ||||
| 
 | ||||
| 	TP_ARGS(rcuname, pid, gpnum), | ||||
| 
 | ||||
| 	TP_STRUCT__entry( | ||||
| 		__field(char *, rcuname) | ||||
| 		__field(unsigned long, gpnum) | ||||
| 		__field(int, pid) | ||||
| 	), | ||||
| 
 | ||||
| 	TP_fast_assign( | ||||
| 		__entry->rcuname = rcuname; | ||||
| 		__entry->gpnum = gpnum; | ||||
| 		__entry->pid = pid; | ||||
| 	), | ||||
| 
 | ||||
| 	TP_printk("%s %lu %d", | ||||
| 		  __entry->rcuname, __entry->gpnum, __entry->pid) | ||||
| ); | ||||
| 
 | ||||
| /*
 | ||||
|  * Tracepoint for tasks that blocked within a given preemptible-RCU | ||||
|  * read-side critical section exiting that critical section.  Track the | ||||
|  * type of RCU (which one day might include SRCU) and the task's PID. | ||||
|  */ | ||||
| TRACE_EVENT(rcu_unlock_preempted_task, | ||||
| 
 | ||||
| 	TP_PROTO(char *rcuname, unsigned long gpnum, int pid), | ||||
| 
 | ||||
| 	TP_ARGS(rcuname, gpnum, pid), | ||||
| 
 | ||||
| 	TP_STRUCT__entry( | ||||
| 		__field(char *, rcuname) | ||||
| 		__field(unsigned long, gpnum) | ||||
| 		__field(int, pid) | ||||
| 	), | ||||
| 
 | ||||
| 	TP_fast_assign( | ||||
| 		__entry->rcuname = rcuname; | ||||
| 		__entry->gpnum = gpnum; | ||||
| 		__entry->pid = pid; | ||||
| 	), | ||||
| 
 | ||||
| 	TP_printk("%s %lu %d", __entry->rcuname, __entry->gpnum, __entry->pid) | ||||
| ); | ||||
| 
 | ||||
| /*
 | ||||
|  * Tracepoint for quiescent-state-reporting events.  These are | ||||
|  * distinguished by the type of RCU, the grace-period number, the | ||||
|  * mask of quiescent lower-level entities, the rcu_node structure level, | ||||
|  * the starting and ending CPU covered by the rcu_node structure, and | ||||
|  * whether there are any blocked tasks blocking the current grace period. | ||||
|  * All but the type of RCU are extracted from the rcu_node structure. | ||||
|  */ | ||||
| TRACE_EVENT(rcu_quiescent_state_report, | ||||
| 
 | ||||
| 	TP_PROTO(char *rcuname, unsigned long gpnum, | ||||
| 		 unsigned long mask, unsigned long qsmask, | ||||
| 		 u8 level, int grplo, int grphi, int gp_tasks), | ||||
| 
 | ||||
| 	TP_ARGS(rcuname, gpnum, mask, qsmask, level, grplo, grphi, gp_tasks), | ||||
| 
 | ||||
| 	TP_STRUCT__entry( | ||||
| 		__field(char *, rcuname) | ||||
| 		__field(unsigned long, gpnum) | ||||
| 		__field(unsigned long, mask) | ||||
| 		__field(unsigned long, qsmask) | ||||
| 		__field(u8, level) | ||||
| 		__field(int, grplo) | ||||
| 		__field(int, grphi) | ||||
| 		__field(u8, gp_tasks) | ||||
| 	), | ||||
| 
 | ||||
| 	TP_fast_assign( | ||||
| 		__entry->rcuname = rcuname; | ||||
| 		__entry->gpnum = gpnum; | ||||
| 		__entry->mask = mask; | ||||
| 		__entry->qsmask = qsmask; | ||||
| 		__entry->level = level; | ||||
| 		__entry->grplo = grplo; | ||||
| 		__entry->grphi = grphi; | ||||
| 		__entry->gp_tasks = gp_tasks; | ||||
| 	), | ||||
| 
 | ||||
| 	TP_printk("%s %lu %lx>%lx %u %d %d %u", | ||||
| 		  __entry->rcuname, __entry->gpnum, | ||||
| 		  __entry->mask, __entry->qsmask, __entry->level, | ||||
| 		  __entry->grplo, __entry->grphi, __entry->gp_tasks) | ||||
| ); | ||||
| 
 | ||||
| /*
 | ||||
|  * Tracepoint for quiescent states detected by force_quiescent_state(). | ||||
|  * These trace events include the type of RCU, the grace-period number | ||||
|  * that was blocked by the CPU, the CPU itself, and the type of quiescent | ||||
|  * state, which can be "dti" for dyntick-idle mode, "ofl" for CPU offline, | ||||
|  * or "kick" when kicking a CPU that has been in dyntick-idle mode for | ||||
|  * too long. | ||||
|  */ | ||||
| TRACE_EVENT(rcu_fqs, | ||||
| 
 | ||||
| 	TP_PROTO(char *rcuname, unsigned long gpnum, int cpu, char *qsevent), | ||||
| 
 | ||||
| 	TP_ARGS(rcuname, gpnum, cpu, qsevent), | ||||
| 
 | ||||
| 	TP_STRUCT__entry( | ||||
| 		__field(char *, rcuname) | ||||
| 		__field(unsigned long, gpnum) | ||||
| 		__field(int, cpu) | ||||
| 		__field(char *, qsevent) | ||||
| 	), | ||||
| 
 | ||||
| 	TP_fast_assign( | ||||
| 		__entry->rcuname = rcuname; | ||||
| 		__entry->gpnum = gpnum; | ||||
| 		__entry->cpu = cpu; | ||||
| 		__entry->qsevent = qsevent; | ||||
| 	), | ||||
| 
 | ||||
| 	TP_printk("%s %lu %d %s", | ||||
| 		  __entry->rcuname, __entry->gpnum, | ||||
| 		  __entry->cpu, __entry->qsevent) | ||||
| ); | ||||
| 
 | ||||
| #endif /* #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) */ | ||||
| 
 | ||||
| /*
 | ||||
|  * Tracepoint for dyntick-idle entry/exit events.  These take a string | ||||
|  * as argument: "Start" for entering dyntick-idle mode and "End" for | ||||
|  * leaving it. | ||||
|  */ | ||||
| TRACE_EVENT(rcu_dyntick, | ||||
| 
 | ||||
| 	TP_PROTO(char *polarity), | ||||
| 
 | ||||
| 	TP_ARGS(polarity), | ||||
| 
 | ||||
| 	TP_STRUCT__entry( | ||||
| 		__field(char *, polarity) | ||||
| 	), | ||||
| 
 | ||||
| 	TP_fast_assign( | ||||
| 		__entry->polarity = polarity; | ||||
| 	), | ||||
| 
 | ||||
| 	TP_printk("%s", __entry->polarity) | ||||
| ); | ||||
| 
 | ||||
| /*
 | ||||
|  * Tracepoint for the registration of a single RCU callback function. | ||||
|  * The first argument is the type of RCU, the second argument is | ||||
|  * a pointer to the RCU callback itself, and the third element is the | ||||
|  * new RCU callback queue length for the current CPU. | ||||
|  */ | ||||
| TRACE_EVENT(rcu_callback, | ||||
| 
 | ||||
| 	TP_PROTO(char *rcuname, struct rcu_head *rhp, long qlen), | ||||
| 
 | ||||
| 	TP_ARGS(rcuname, rhp, qlen), | ||||
| 
 | ||||
| 	TP_STRUCT__entry( | ||||
| 		__field(char *, rcuname) | ||||
| 		__field(void *, rhp) | ||||
| 		__field(void *, func) | ||||
| 		__field(long, qlen) | ||||
| 	), | ||||
| 
 | ||||
| 	TP_fast_assign( | ||||
| 		__entry->rcuname = rcuname; | ||||
| 		__entry->rhp = rhp; | ||||
| 		__entry->func = rhp->func; | ||||
| 		__entry->qlen = qlen; | ||||
| 	), | ||||
| 
 | ||||
| 	TP_printk("%s rhp=%p func=%pf %ld", | ||||
| 		  __entry->rcuname, __entry->rhp, __entry->func, __entry->qlen) | ||||
| ); | ||||
| 
 | ||||
| /*
 | ||||
|  * Tracepoint for the registration of a single RCU callback of the special | ||||
|  * kfree() form.  The first argument is the RCU type, the second argument | ||||
|  * is a pointer to the RCU callback, the third argument is the offset | ||||
|  * of the callback within the enclosing RCU-protected data structure, | ||||
|  * and the fourth argument is the new RCU callback queue length for the | ||||
|  * current CPU. | ||||
|  */ | ||||
| TRACE_EVENT(rcu_kfree_callback, | ||||
| 
 | ||||
| 	TP_PROTO(char *rcuname, struct rcu_head *rhp, unsigned long offset, | ||||
| 		 long qlen), | ||||
| 
 | ||||
| 	TP_ARGS(rcuname, rhp, offset, qlen), | ||||
| 
 | ||||
| 	TP_STRUCT__entry( | ||||
| 		__field(char *, rcuname) | ||||
| 		__field(void *, rhp) | ||||
| 		__field(unsigned long, offset) | ||||
| 		__field(long, qlen) | ||||
| 	), | ||||
| 
 | ||||
| 	TP_fast_assign( | ||||
| 		__entry->rcuname = rcuname; | ||||
| 		__entry->rhp = rhp; | ||||
| 		__entry->offset = offset; | ||||
| 		__entry->qlen = qlen; | ||||
| 	), | ||||
| 
 | ||||
| 	TP_printk("%s rhp=%p func=%ld %ld", | ||||
| 		  __entry->rcuname, __entry->rhp, __entry->offset, | ||||
| 		  __entry->qlen) | ||||
| ); | ||||
| 
 | ||||
| /*
 | ||||
|  * Tracepoint for marking the beginning rcu_do_batch, performed to start | ||||
|  * RCU callback invocation.  The first argument is the RCU flavor, | ||||
|  * the second is the total number of callbacks (including those that | ||||
|  * are not yet ready to be invoked), and the third argument is the | ||||
|  * current RCU-callback batch limit. | ||||
|  */ | ||||
| TRACE_EVENT(rcu_batch_start, | ||||
| 
 | ||||
| 	TP_PROTO(char *rcuname, long qlen, int blimit), | ||||
| 
 | ||||
| 	TP_ARGS(rcuname, qlen, blimit), | ||||
| 
 | ||||
| 	TP_STRUCT__entry( | ||||
| 		__field(char *, rcuname) | ||||
| 		__field(long, qlen) | ||||
| 		__field(int, blimit) | ||||
| 	), | ||||
| 
 | ||||
| 	TP_fast_assign( | ||||
| 		__entry->rcuname = rcuname; | ||||
| 		__entry->qlen = qlen; | ||||
| 		__entry->blimit = blimit; | ||||
| 	), | ||||
| 
 | ||||
| 	TP_printk("%s CBs=%ld bl=%d", | ||||
| 		  __entry->rcuname, __entry->qlen, __entry->blimit) | ||||
| ); | ||||
| 
 | ||||
| /*
 | ||||
|  * Tracepoint for the invocation of a single RCU callback function. | ||||
|  * The first argument is the type of RCU, and the second argument is | ||||
|  * a pointer to the RCU callback itself. | ||||
|  */ | ||||
| TRACE_EVENT(rcu_invoke_callback, | ||||
| 
 | ||||
| 	TP_PROTO(char *rcuname, struct rcu_head *rhp), | ||||
| 
 | ||||
| 	TP_ARGS(rcuname, rhp), | ||||
| 
 | ||||
| 	TP_STRUCT__entry( | ||||
| 		__field(char *, rcuname) | ||||
| 		__field(void *, rhp) | ||||
| 		__field(void *, func) | ||||
| 	), | ||||
| 
 | ||||
| 	TP_fast_assign( | ||||
| 		__entry->rcuname = rcuname; | ||||
| 		__entry->rhp = rhp; | ||||
| 		__entry->func = rhp->func; | ||||
| 	), | ||||
| 
 | ||||
| 	TP_printk("%s rhp=%p func=%pf", | ||||
| 		  __entry->rcuname, __entry->rhp, __entry->func) | ||||
| ); | ||||
| 
 | ||||
| /*
 | ||||
|  * Tracepoint for the invocation of a single RCU callback of the special | ||||
|  * kfree() form.  The first argument is the RCU flavor, the second | ||||
|  * argument is a pointer to the RCU callback, and the third argument | ||||
|  * is the offset of the callback within the enclosing RCU-protected | ||||
|  * data structure. | ||||
|  */ | ||||
| TRACE_EVENT(rcu_invoke_kfree_callback, | ||||
| 
 | ||||
| 	TP_PROTO(char *rcuname, struct rcu_head *rhp, unsigned long offset), | ||||
| 
 | ||||
| 	TP_ARGS(rcuname, rhp, offset), | ||||
| 
 | ||||
| 	TP_STRUCT__entry( | ||||
| 		__field(char *, rcuname) | ||||
| 		__field(void *, rhp) | ||||
| 		__field(unsigned long, offset) | ||||
| 	), | ||||
| 
 | ||||
| 	TP_fast_assign( | ||||
| 		__entry->rcuname = rcuname; | ||||
| 		__entry->rhp = rhp; | ||||
| 		__entry->offset	= offset; | ||||
| 	), | ||||
| 
 | ||||
| 	TP_printk("%s rhp=%p func=%ld", | ||||
| 		  __entry->rcuname, __entry->rhp, __entry->offset) | ||||
| ); | ||||
| 
 | ||||
| /*
 | ||||
|  * Tracepoint for exiting rcu_do_batch after RCU callbacks have been | ||||
|  * invoked.  The first argument is the name of the RCU flavor and | ||||
|  * the second argument is number of callbacks actually invoked. | ||||
|  */ | ||||
| TRACE_EVENT(rcu_batch_end, | ||||
| 
 | ||||
| 	TP_PROTO(char *rcuname, int callbacks_invoked), | ||||
| 
 | ||||
| 	TP_ARGS(rcuname, callbacks_invoked), | ||||
| 
 | ||||
| 	TP_STRUCT__entry( | ||||
| 		__field(char *, rcuname) | ||||
| 		__field(int, callbacks_invoked) | ||||
| 	), | ||||
| 
 | ||||
| 	TP_fast_assign( | ||||
| 		__entry->rcuname = rcuname; | ||||
| 		__entry->callbacks_invoked = callbacks_invoked; | ||||
| 	), | ||||
| 
 | ||||
| 	TP_printk("%s CBs-invoked=%d", | ||||
| 		  __entry->rcuname, __entry->callbacks_invoked) | ||||
| ); | ||||
| 
 | ||||
| #else /* #ifdef CONFIG_RCU_TRACE */ | ||||
| 
 | ||||
| #define trace_rcu_grace_period(rcuname, gpnum, gpevent) do { } while (0) | ||||
| #define trace_rcu_grace_period_init(rcuname, gpnum, level, grplo, grphi, qsmask) do { } while (0) | ||||
| #define trace_rcu_preempt_task(rcuname, pid, gpnum) do { } while (0) | ||||
| #define trace_rcu_unlock_preempted_task(rcuname, gpnum, pid) do { } while (0) | ||||
| #define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, grplo, grphi, gp_tasks) do { } while (0) | ||||
| #define trace_rcu_fqs(rcuname, gpnum, cpu, qsevent) do { } while (0) | ||||
| #define trace_rcu_dyntick(polarity) do { } while (0) | ||||
| #define trace_rcu_callback(rcuname, rhp, qlen) do { } while (0) | ||||
| #define trace_rcu_kfree_callback(rcuname, rhp, offset, qlen) do { } while (0) | ||||
| #define trace_rcu_batch_start(rcuname, qlen, blimit) do { } while (0) | ||||
| #define trace_rcu_invoke_callback(rcuname, rhp) do { } while (0) | ||||
| #define trace_rcu_invoke_kfree_callback(rcuname, rhp, offset) do { } while (0) | ||||
| #define trace_rcu_batch_end(rcuname, callbacks_invoked) do { } while (0) | ||||
| 
 | ||||
| #endif /* #else #ifdef CONFIG_RCU_TRACE */ | ||||
| 
 | ||||
| #endif /* _TRACE_RCU_H */ | ||||
| 
 | ||||
| /* This part must be outside protection */ | ||||
| #include <trace/define_trace.h> | ||||
| @ -391,7 +391,7 @@ config TREE_RCU | ||||
| 
 | ||||
| config TREE_PREEMPT_RCU | ||||
| 	bool "Preemptible tree-based hierarchical RCU" | ||||
| 	depends on PREEMPT | ||||
| 	depends on PREEMPT && SMP | ||||
| 	help | ||||
| 	  This option selects the RCU implementation that is | ||||
| 	  designed for very large SMP systems with hundreds or | ||||
| @ -401,7 +401,7 @@ config TREE_PREEMPT_RCU | ||||
| 
 | ||||
| config TINY_RCU | ||||
| 	bool "UP-only small-memory-footprint RCU" | ||||
| 	depends on !SMP | ||||
| 	depends on !PREEMPT && !SMP | ||||
| 	help | ||||
| 	  This option selects the RCU implementation that is | ||||
| 	  designed for UP systems from which real-time response | ||||
| @ -410,7 +410,7 @@ config TINY_RCU | ||||
| 
 | ||||
| config TINY_PREEMPT_RCU | ||||
| 	bool "Preemptible UP-only small-memory-footprint RCU" | ||||
| 	depends on !SMP && PREEMPT | ||||
| 	depends on PREEMPT && !SMP | ||||
| 	help | ||||
| 	  This option selects the RCU implementation that is designed | ||||
| 	  for real-time UP systems.  This option greatly reduces the | ||||
|  | ||||
| @ -1145,10 +1145,11 @@ print_circular_bug_header(struct lock_list *entry, unsigned int depth, | ||||
| 	if (debug_locks_silent) | ||||
| 		return 0; | ||||
| 
 | ||||
| 	printk("\n=======================================================\n"); | ||||
| 	printk(  "[ INFO: possible circular locking dependency detected ]\n"); | ||||
| 	printk("\n"); | ||||
| 	printk("======================================================\n"); | ||||
| 	printk("[ INFO: possible circular locking dependency detected ]\n"); | ||||
| 	print_kernel_version(); | ||||
| 	printk(  "-------------------------------------------------------\n"); | ||||
| 	printk("-------------------------------------------------------\n"); | ||||
| 	printk("%s/%d is trying to acquire lock:\n", | ||||
| 		curr->comm, task_pid_nr(curr)); | ||||
| 	print_lock(check_src); | ||||
| @ -1482,11 +1483,12 @@ print_bad_irq_dependency(struct task_struct *curr, | ||||
| 	if (!debug_locks_off_graph_unlock() || debug_locks_silent) | ||||
| 		return 0; | ||||
| 
 | ||||
| 	printk("\n======================================================\n"); | ||||
| 	printk(  "[ INFO: %s-safe -> %s-unsafe lock order detected ]\n", | ||||
| 	printk("\n"); | ||||
| 	printk("======================================================\n"); | ||||
| 	printk("[ INFO: %s-safe -> %s-unsafe lock order detected ]\n", | ||||
| 		irqclass, irqclass); | ||||
| 	print_kernel_version(); | ||||
| 	printk(  "------------------------------------------------------\n"); | ||||
| 	printk("------------------------------------------------------\n"); | ||||
| 	printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n", | ||||
| 		curr->comm, task_pid_nr(curr), | ||||
| 		curr->hardirq_context, hardirq_count() >> HARDIRQ_SHIFT, | ||||
| @ -1711,10 +1713,11 @@ print_deadlock_bug(struct task_struct *curr, struct held_lock *prev, | ||||
| 	if (!debug_locks_off_graph_unlock() || debug_locks_silent) | ||||
| 		return 0; | ||||
| 
 | ||||
| 	printk("\n=============================================\n"); | ||||
| 	printk(  "[ INFO: possible recursive locking detected ]\n"); | ||||
| 	printk("\n"); | ||||
| 	printk("=============================================\n"); | ||||
| 	printk("[ INFO: possible recursive locking detected ]\n"); | ||||
| 	print_kernel_version(); | ||||
| 	printk(  "---------------------------------------------\n"); | ||||
| 	printk("---------------------------------------------\n"); | ||||
| 	printk("%s/%d is trying to acquire lock:\n", | ||||
| 		curr->comm, task_pid_nr(curr)); | ||||
| 	print_lock(next); | ||||
| @ -2217,10 +2220,11 @@ print_usage_bug(struct task_struct *curr, struct held_lock *this, | ||||
| 	if (!debug_locks_off_graph_unlock() || debug_locks_silent) | ||||
| 		return 0; | ||||
| 
 | ||||
| 	printk("\n=================================\n"); | ||||
| 	printk(  "[ INFO: inconsistent lock state ]\n"); | ||||
| 	printk("\n"); | ||||
| 	printk("=================================\n"); | ||||
| 	printk("[ INFO: inconsistent lock state ]\n"); | ||||
| 	print_kernel_version(); | ||||
| 	printk(  "---------------------------------\n"); | ||||
| 	printk("---------------------------------\n"); | ||||
| 
 | ||||
| 	printk("inconsistent {%s} -> {%s} usage.\n", | ||||
| 		usage_str[prev_bit], usage_str[new_bit]); | ||||
| @ -2281,10 +2285,11 @@ print_irq_inversion_bug(struct task_struct *curr, | ||||
| 	if (!debug_locks_off_graph_unlock() || debug_locks_silent) | ||||
| 		return 0; | ||||
| 
 | ||||
| 	printk("\n=========================================================\n"); | ||||
| 	printk(  "[ INFO: possible irq lock inversion dependency detected ]\n"); | ||||
| 	printk("\n"); | ||||
| 	printk("=========================================================\n"); | ||||
| 	printk("[ INFO: possible irq lock inversion dependency detected ]\n"); | ||||
| 	print_kernel_version(); | ||||
| 	printk(  "---------------------------------------------------------\n"); | ||||
| 	printk("---------------------------------------------------------\n"); | ||||
| 	printk("%s/%d just changed the state of lock:\n", | ||||
| 		curr->comm, task_pid_nr(curr)); | ||||
| 	print_lock(this); | ||||
| @ -3161,9 +3166,10 @@ print_unlock_inbalance_bug(struct task_struct *curr, struct lockdep_map *lock, | ||||
| 	if (debug_locks_silent) | ||||
| 		return 0; | ||||
| 
 | ||||
| 	printk("\n=====================================\n"); | ||||
| 	printk(  "[ BUG: bad unlock balance detected! ]\n"); | ||||
| 	printk(  "-------------------------------------\n"); | ||||
| 	printk("\n"); | ||||
| 	printk("=====================================\n"); | ||||
| 	printk("[ BUG: bad unlock balance detected! ]\n"); | ||||
| 	printk("-------------------------------------\n"); | ||||
| 	printk("%s/%d is trying to release lock (", | ||||
| 		curr->comm, task_pid_nr(curr)); | ||||
| 	print_lockdep_cache(lock); | ||||
| @ -3604,9 +3610,10 @@ print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock, | ||||
| 	if (debug_locks_silent) | ||||
| 		return 0; | ||||
| 
 | ||||
| 	printk("\n=================================\n"); | ||||
| 	printk(  "[ BUG: bad contention detected! ]\n"); | ||||
| 	printk(  "---------------------------------\n"); | ||||
| 	printk("\n"); | ||||
| 	printk("=================================\n"); | ||||
| 	printk("[ BUG: bad contention detected! ]\n"); | ||||
| 	printk("---------------------------------\n"); | ||||
| 	printk("%s/%d is trying to contend lock (", | ||||
| 		curr->comm, task_pid_nr(curr)); | ||||
| 	print_lockdep_cache(lock); | ||||
| @ -3977,9 +3984,10 @@ print_freed_lock_bug(struct task_struct *curr, const void *mem_from, | ||||
| 	if (debug_locks_silent) | ||||
| 		return; | ||||
| 
 | ||||
| 	printk("\n=========================\n"); | ||||
| 	printk(  "[ BUG: held lock freed! ]\n"); | ||||
| 	printk(  "-------------------------\n"); | ||||
| 	printk("\n"); | ||||
| 	printk("=========================\n"); | ||||
| 	printk("[ BUG: held lock freed! ]\n"); | ||||
| 	printk("-------------------------\n"); | ||||
| 	printk("%s/%d is freeing memory %p-%p, with a lock still held there!\n", | ||||
| 		curr->comm, task_pid_nr(curr), mem_from, mem_to-1); | ||||
| 	print_lock(hlock); | ||||
| @ -4033,9 +4041,10 @@ static void print_held_locks_bug(struct task_struct *curr) | ||||
| 	if (debug_locks_silent) | ||||
| 		return; | ||||
| 
 | ||||
| 	printk("\n=====================================\n"); | ||||
| 	printk(  "[ BUG: lock held at task exit time! ]\n"); | ||||
| 	printk(  "-------------------------------------\n"); | ||||
| 	printk("\n"); | ||||
| 	printk("=====================================\n"); | ||||
| 	printk("[ BUG: lock held at task exit time! ]\n"); | ||||
| 	printk("-------------------------------------\n"); | ||||
| 	printk("%s/%d is exiting with locks still held!\n", | ||||
| 		curr->comm, task_pid_nr(curr)); | ||||
| 	lockdep_print_held_locks(curr); | ||||
| @ -4129,16 +4138,17 @@ void lockdep_sys_exit(void) | ||||
| 	if (unlikely(curr->lockdep_depth)) { | ||||
| 		if (!debug_locks_off()) | ||||
| 			return; | ||||
| 		printk("\n================================================\n"); | ||||
| 		printk(  "[ BUG: lock held when returning to user space! ]\n"); | ||||
| 		printk(  "------------------------------------------------\n"); | ||||
| 		printk("\n"); | ||||
| 		printk("================================================\n"); | ||||
| 		printk("[ BUG: lock held when returning to user space! ]\n"); | ||||
| 		printk("------------------------------------------------\n"); | ||||
| 		printk("%s/%d is leaving the kernel with locks still held!\n", | ||||
| 				curr->comm, curr->pid); | ||||
| 		lockdep_print_held_locks(curr); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| void lockdep_rcu_dereference(const char *file, const int line) | ||||
| void lockdep_rcu_suspicious(const char *file, const int line, const char *s) | ||||
| { | ||||
| 	struct task_struct *curr = current; | ||||
| 
 | ||||
| @ -4147,15 +4157,15 @@ void lockdep_rcu_dereference(const char *file, const int line) | ||||
| 		return; | ||||
| #endif /* #ifdef CONFIG_PROVE_RCU_REPEATEDLY */ | ||||
| 	/* Note: the following can be executed concurrently, so be careful. */ | ||||
| 	printk("\n===================================================\n"); | ||||
| 	printk(  "[ INFO: suspicious rcu_dereference_check() usage. ]\n"); | ||||
| 	printk(  "---------------------------------------------------\n"); | ||||
| 	printk("%s:%d invoked rcu_dereference_check() without protection!\n", | ||||
| 			file, line); | ||||
| 	printk("\n"); | ||||
| 	printk("===============================\n"); | ||||
| 	printk("[ INFO: suspicious RCU usage. ]\n"); | ||||
| 	printk("-------------------------------\n"); | ||||
| 	printk("%s:%d %s!\n", file, line, s); | ||||
| 	printk("\nother info that might help us debug this:\n\n"); | ||||
| 	printk("\nrcu_scheduler_active = %d, debug_locks = %d\n", rcu_scheduler_active, debug_locks); | ||||
| 	lockdep_print_held_locks(curr); | ||||
| 	printk("\nstack backtrace:\n"); | ||||
| 	dump_stack(); | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(lockdep_rcu_dereference); | ||||
| EXPORT_SYMBOL_GPL(lockdep_rcu_suspicious); | ||||
|  | ||||
| @ -418,7 +418,9 @@ EXPORT_SYMBOL(pid_task); | ||||
|  */ | ||||
| struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns) | ||||
| { | ||||
| 	rcu_lockdep_assert(rcu_read_lock_held()); | ||||
| 	rcu_lockdep_assert(rcu_read_lock_held(), | ||||
| 			   "find_task_by_pid_ns() needs rcu_read_lock()" | ||||
| 			   " protection"); | ||||
| 	return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID); | ||||
| } | ||||
| 
 | ||||
|  | ||||
							
								
								
									
										85
									
								
								kernel/rcu.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										85
									
								
								kernel/rcu.h
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,85 @@ | ||||
| /*
 | ||||
|  * Read-Copy Update definitions shared among RCU implementations. | ||||
|  * | ||||
|  * This program is free software; you can redistribute it and/or modify | ||||
|  * it under the terms of the GNU General Public License as published by | ||||
|  * the Free Software Foundation; either version 2 of the License, or | ||||
|  * (at your option) any later version. | ||||
|  * | ||||
|  * This program is distributed in the hope that it will be useful, | ||||
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|  * GNU General Public License for more details. | ||||
|  * | ||||
|  * You should have received a copy of the GNU General Public License | ||||
|  * along with this program; if not, write to the Free Software | ||||
|  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||||
|  * | ||||
|  * Copyright IBM Corporation, 2011 | ||||
|  * | ||||
|  * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com> | ||||
|  */ | ||||
| 
 | ||||
| #ifndef __LINUX_RCU_H | ||||
| #define __LINUX_RCU_H | ||||
| 
 | ||||
| #ifdef CONFIG_RCU_TRACE | ||||
| #define RCU_TRACE(stmt) stmt | ||||
| #else /* #ifdef CONFIG_RCU_TRACE */ | ||||
| #define RCU_TRACE(stmt) | ||||
| #endif /* #else #ifdef CONFIG_RCU_TRACE */ | ||||
| 
 | ||||
| /*
 | ||||
|  * debug_rcu_head_queue()/debug_rcu_head_unqueue() are used internally | ||||
|  * by call_rcu() and rcu callback execution, and are therefore not part of the | ||||
|  * RCU API. Leaving in rcupdate.h because they are used by all RCU flavors. | ||||
|  */ | ||||
| 
 | ||||
| #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD | ||||
| # define STATE_RCU_HEAD_READY	0 | ||||
| # define STATE_RCU_HEAD_QUEUED	1 | ||||
| 
 | ||||
| extern struct debug_obj_descr rcuhead_debug_descr; | ||||
| 
 | ||||
| static inline void debug_rcu_head_queue(struct rcu_head *head) | ||||
| { | ||||
| 	WARN_ON_ONCE((unsigned long)head & 0x3); | ||||
| 	debug_object_activate(head, &rcuhead_debug_descr); | ||||
| 	debug_object_active_state(head, &rcuhead_debug_descr, | ||||
| 				  STATE_RCU_HEAD_READY, | ||||
| 				  STATE_RCU_HEAD_QUEUED); | ||||
| } | ||||
| 
 | ||||
| static inline void debug_rcu_head_unqueue(struct rcu_head *head) | ||||
| { | ||||
| 	debug_object_active_state(head, &rcuhead_debug_descr, | ||||
| 				  STATE_RCU_HEAD_QUEUED, | ||||
| 				  STATE_RCU_HEAD_READY); | ||||
| 	debug_object_deactivate(head, &rcuhead_debug_descr); | ||||
| } | ||||
| #else	/* !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ | ||||
| static inline void debug_rcu_head_queue(struct rcu_head *head) | ||||
| { | ||||
| } | ||||
| 
 | ||||
| static inline void debug_rcu_head_unqueue(struct rcu_head *head) | ||||
| { | ||||
| } | ||||
| #endif	/* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ | ||||
| 
 | ||||
| extern void kfree(const void *); | ||||
| 
 | ||||
| static inline void __rcu_reclaim(char *rn, struct rcu_head *head) | ||||
| { | ||||
| 	unsigned long offset = (unsigned long)head->func; | ||||
| 
 | ||||
| 	if (__is_kfree_rcu_offset(offset)) { | ||||
| 		RCU_TRACE(trace_rcu_invoke_kfree_callback(rn, head, offset)); | ||||
| 		kfree((void *)head - offset); | ||||
| 	} else { | ||||
| 		RCU_TRACE(trace_rcu_invoke_callback(rn, head)); | ||||
| 		head->func(head); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| #endif /* __LINUX_RCU_H */ | ||||
| @ -46,6 +46,11 @@ | ||||
| #include <linux/module.h> | ||||
| #include <linux/hardirq.h> | ||||
| 
 | ||||
| #define CREATE_TRACE_POINTS | ||||
| #include <trace/events/rcu.h> | ||||
| 
 | ||||
| #include "rcu.h" | ||||
| 
 | ||||
| #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||||
| static struct lock_class_key rcu_lock_key; | ||||
| struct lockdep_map rcu_lock_map = | ||||
| @ -94,11 +99,16 @@ EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held); | ||||
| 
 | ||||
| #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ | ||||
| 
 | ||||
| struct rcu_synchronize { | ||||
| 	struct rcu_head head; | ||||
| 	struct completion completion; | ||||
| }; | ||||
| 
 | ||||
| /*
 | ||||
|  * Awaken the corresponding synchronize_rcu() instance now that a | ||||
|  * grace period has elapsed. | ||||
|  */ | ||||
| void wakeme_after_rcu(struct rcu_head  *head) | ||||
| static void wakeme_after_rcu(struct rcu_head  *head) | ||||
| { | ||||
| 	struct rcu_synchronize *rcu; | ||||
| 
 | ||||
| @ -106,6 +116,20 @@ void wakeme_after_rcu(struct rcu_head  *head) | ||||
| 	complete(&rcu->completion); | ||||
| } | ||||
| 
 | ||||
| void wait_rcu_gp(call_rcu_func_t crf) | ||||
| { | ||||
| 	struct rcu_synchronize rcu; | ||||
| 
 | ||||
| 	init_rcu_head_on_stack(&rcu.head); | ||||
| 	init_completion(&rcu.completion); | ||||
| 	/* Will wake me after RCU finished. */ | ||||
| 	crf(&rcu.head, wakeme_after_rcu); | ||||
| 	/* Wait for it. */ | ||||
| 	wait_for_completion(&rcu.completion); | ||||
| 	destroy_rcu_head_on_stack(&rcu.head); | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(wait_rcu_gp); | ||||
| 
 | ||||
| #ifdef CONFIG_PROVE_RCU | ||||
| /*
 | ||||
|  * wrapper function to avoid #include problems. | ||||
|  | ||||
							
								
								
									
										117
									
								
								kernel/rcutiny.c
									
									
									
									
									
								
							
							
						
						
									
										117
									
								
								kernel/rcutiny.c
									
									
									
									
									
								
							| @ -37,16 +37,17 @@ | ||||
| #include <linux/cpu.h> | ||||
| #include <linux/prefetch.h> | ||||
| 
 | ||||
| /* Controls for rcu_kthread() kthread, replacing RCU_SOFTIRQ used previously. */ | ||||
| static struct task_struct *rcu_kthread_task; | ||||
| static DECLARE_WAIT_QUEUE_HEAD(rcu_kthread_wq); | ||||
| static unsigned long have_rcu_kthread_work; | ||||
| #ifdef CONFIG_RCU_TRACE | ||||
| #include <trace/events/rcu.h> | ||||
| #endif /* #else #ifdef CONFIG_RCU_TRACE */ | ||||
| 
 | ||||
| #include "rcu.h" | ||||
| 
 | ||||
| /* Forward declarations for rcutiny_plugin.h. */ | ||||
| struct rcu_ctrlblk; | ||||
| static void invoke_rcu_kthread(void); | ||||
| static void rcu_process_callbacks(struct rcu_ctrlblk *rcp); | ||||
| static int rcu_kthread(void *arg); | ||||
| static void invoke_rcu_callbacks(void); | ||||
| static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp); | ||||
| static void rcu_process_callbacks(struct softirq_action *unused); | ||||
| static void __call_rcu(struct rcu_head *head, | ||||
| 		       void (*func)(struct rcu_head *rcu), | ||||
| 		       struct rcu_ctrlblk *rcp); | ||||
| @ -95,16 +96,6 @@ static int rcu_qsctr_help(struct rcu_ctrlblk *rcp) | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Wake up rcu_kthread() to process callbacks now eligible for invocation | ||||
|  * or to boost readers. | ||||
|  */ | ||||
| static void invoke_rcu_kthread(void) | ||||
| { | ||||
| 	have_rcu_kthread_work = 1; | ||||
| 	wake_up(&rcu_kthread_wq); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Record an rcu quiescent state.  And an rcu_bh quiescent state while we | ||||
|  * are at it, given that any rcu quiescent state is also an rcu_bh | ||||
| @ -117,7 +108,7 @@ void rcu_sched_qs(int cpu) | ||||
| 	local_irq_save(flags); | ||||
| 	if (rcu_qsctr_help(&rcu_sched_ctrlblk) + | ||||
| 	    rcu_qsctr_help(&rcu_bh_ctrlblk)) | ||||
| 		invoke_rcu_kthread(); | ||||
| 		invoke_rcu_callbacks(); | ||||
| 	local_irq_restore(flags); | ||||
| } | ||||
| 
 | ||||
| @ -130,7 +121,7 @@ void rcu_bh_qs(int cpu) | ||||
| 
 | ||||
| 	local_irq_save(flags); | ||||
| 	if (rcu_qsctr_help(&rcu_bh_ctrlblk)) | ||||
| 		invoke_rcu_kthread(); | ||||
| 		invoke_rcu_callbacks(); | ||||
| 	local_irq_restore(flags); | ||||
| } | ||||
| 
 | ||||
| @ -154,18 +145,23 @@ void rcu_check_callbacks(int cpu, int user) | ||||
|  * Invoke the RCU callbacks on the specified rcu_ctrlkblk structure | ||||
|  * whose grace period has elapsed. | ||||
|  */ | ||||
| static void rcu_process_callbacks(struct rcu_ctrlblk *rcp) | ||||
| static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) | ||||
| { | ||||
| 	char *rn = NULL; | ||||
| 	struct rcu_head *next, *list; | ||||
| 	unsigned long flags; | ||||
| 	RCU_TRACE(int cb_count = 0); | ||||
| 
 | ||||
| 	/* If no RCU callbacks ready to invoke, just return. */ | ||||
| 	if (&rcp->rcucblist == rcp->donetail) | ||||
| 	if (&rcp->rcucblist == rcp->donetail) { | ||||
| 		RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, -1)); | ||||
| 		RCU_TRACE(trace_rcu_batch_end(rcp->name, 0)); | ||||
| 		return; | ||||
| 	} | ||||
| 
 | ||||
| 	/* Move the ready-to-invoke callbacks to a local list. */ | ||||
| 	local_irq_save(flags); | ||||
| 	RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, -1)); | ||||
| 	list = rcp->rcucblist; | ||||
| 	rcp->rcucblist = *rcp->donetail; | ||||
| 	*rcp->donetail = NULL; | ||||
| @ -176,49 +172,26 @@ static void rcu_process_callbacks(struct rcu_ctrlblk *rcp) | ||||
| 	local_irq_restore(flags); | ||||
| 
 | ||||
| 	/* Invoke the callbacks on the local list. */ | ||||
| 	RCU_TRACE(rn = rcp->name); | ||||
| 	while (list) { | ||||
| 		next = list->next; | ||||
| 		prefetch(next); | ||||
| 		debug_rcu_head_unqueue(list); | ||||
| 		local_bh_disable(); | ||||
| 		__rcu_reclaim(list); | ||||
| 		__rcu_reclaim(rn, list); | ||||
| 		local_bh_enable(); | ||||
| 		list = next; | ||||
| 		RCU_TRACE(cb_count++); | ||||
| 	} | ||||
| 	RCU_TRACE(rcu_trace_sub_qlen(rcp, cb_count)); | ||||
| 	RCU_TRACE(trace_rcu_batch_end(rcp->name, cb_count)); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * This kthread invokes RCU callbacks whose grace periods have | ||||
|  * elapsed.  It is awakened as needed, and takes the place of the | ||||
|  * RCU_SOFTIRQ that was used previously for this purpose. | ||||
|  * This is a kthread, but it is never stopped, at least not until | ||||
|  * the system goes down. | ||||
|  */ | ||||
| static int rcu_kthread(void *arg) | ||||
| static void rcu_process_callbacks(struct softirq_action *unused) | ||||
| { | ||||
| 	unsigned long work; | ||||
| 	unsigned long morework; | ||||
| 	unsigned long flags; | ||||
| 
 | ||||
| 	for (;;) { | ||||
| 		wait_event_interruptible(rcu_kthread_wq, | ||||
| 					 have_rcu_kthread_work != 0); | ||||
| 		morework = rcu_boost(); | ||||
| 		local_irq_save(flags); | ||||
| 		work = have_rcu_kthread_work; | ||||
| 		have_rcu_kthread_work = morework; | ||||
| 		local_irq_restore(flags); | ||||
| 		if (work) { | ||||
| 			rcu_process_callbacks(&rcu_sched_ctrlblk); | ||||
| 			rcu_process_callbacks(&rcu_bh_ctrlblk); | ||||
| 			rcu_preempt_process_callbacks(); | ||||
| 		} | ||||
| 		schedule_timeout_interruptible(1); /* Leave CPU for others. */ | ||||
| 	} | ||||
| 
 | ||||
| 	return 0;  /* Not reached, but needed to shut gcc up. */ | ||||
| 	__rcu_process_callbacks(&rcu_sched_ctrlblk); | ||||
| 	__rcu_process_callbacks(&rcu_bh_ctrlblk); | ||||
| 	rcu_preempt_process_callbacks(); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
| @ -280,45 +253,3 @@ void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) | ||||
| 	__call_rcu(head, func, &rcu_bh_ctrlblk); | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(call_rcu_bh); | ||||
| 
 | ||||
| void rcu_barrier_bh(void) | ||||
| { | ||||
| 	struct rcu_synchronize rcu; | ||||
| 
 | ||||
| 	init_rcu_head_on_stack(&rcu.head); | ||||
| 	init_completion(&rcu.completion); | ||||
| 	/* Will wake me after RCU finished. */ | ||||
| 	call_rcu_bh(&rcu.head, wakeme_after_rcu); | ||||
| 	/* Wait for it. */ | ||||
| 	wait_for_completion(&rcu.completion); | ||||
| 	destroy_rcu_head_on_stack(&rcu.head); | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(rcu_barrier_bh); | ||||
| 
 | ||||
| void rcu_barrier_sched(void) | ||||
| { | ||||
| 	struct rcu_synchronize rcu; | ||||
| 
 | ||||
| 	init_rcu_head_on_stack(&rcu.head); | ||||
| 	init_completion(&rcu.completion); | ||||
| 	/* Will wake me after RCU finished. */ | ||||
| 	call_rcu_sched(&rcu.head, wakeme_after_rcu); | ||||
| 	/* Wait for it. */ | ||||
| 	wait_for_completion(&rcu.completion); | ||||
| 	destroy_rcu_head_on_stack(&rcu.head); | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(rcu_barrier_sched); | ||||
| 
 | ||||
| /*
 | ||||
|  * Spawn the kthread that invokes RCU callbacks. | ||||
|  */ | ||||
| static int __init rcu_spawn_kthreads(void) | ||||
| { | ||||
| 	struct sched_param sp; | ||||
| 
 | ||||
| 	rcu_kthread_task = kthread_run(rcu_kthread, NULL, "rcu_kthread"); | ||||
| 	sp.sched_priority = RCU_BOOST_PRIO; | ||||
| 	sched_setscheduler_nocheck(rcu_kthread_task, SCHED_FIFO, &sp); | ||||
| 	return 0; | ||||
| } | ||||
| early_initcall(rcu_spawn_kthreads); | ||||
|  | ||||
| @ -26,29 +26,26 @@ | ||||
| #include <linux/debugfs.h> | ||||
| #include <linux/seq_file.h> | ||||
| 
 | ||||
| #ifdef CONFIG_RCU_TRACE | ||||
| #define RCU_TRACE(stmt)	stmt | ||||
| #else /* #ifdef CONFIG_RCU_TRACE */ | ||||
| #define RCU_TRACE(stmt) | ||||
| #endif /* #else #ifdef CONFIG_RCU_TRACE */ | ||||
| 
 | ||||
| /* Global control variables for rcupdate callback mechanism. */ | ||||
| struct rcu_ctrlblk { | ||||
| 	struct rcu_head *rcucblist;	/* List of pending callbacks (CBs). */ | ||||
| 	struct rcu_head **donetail;	/* ->next pointer of last "done" CB. */ | ||||
| 	struct rcu_head **curtail;	/* ->next pointer of last CB. */ | ||||
| 	RCU_TRACE(long qlen);		/* Number of pending CBs. */ | ||||
| 	RCU_TRACE(char *name);		/* Name of RCU type. */ | ||||
| }; | ||||
| 
 | ||||
| /* Definition for rcupdate control block. */ | ||||
| static struct rcu_ctrlblk rcu_sched_ctrlblk = { | ||||
| 	.donetail	= &rcu_sched_ctrlblk.rcucblist, | ||||
| 	.curtail	= &rcu_sched_ctrlblk.rcucblist, | ||||
| 	RCU_TRACE(.name = "rcu_sched") | ||||
| }; | ||||
| 
 | ||||
| static struct rcu_ctrlblk rcu_bh_ctrlblk = { | ||||
| 	.donetail	= &rcu_bh_ctrlblk.rcucblist, | ||||
| 	.curtail	= &rcu_bh_ctrlblk.rcucblist, | ||||
| 	RCU_TRACE(.name = "rcu_bh") | ||||
| }; | ||||
| 
 | ||||
| #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||||
| @ -131,6 +128,7 @@ static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = { | ||||
| 	.rcb.curtail = &rcu_preempt_ctrlblk.rcb.rcucblist, | ||||
| 	.nexttail = &rcu_preempt_ctrlblk.rcb.rcucblist, | ||||
| 	.blkd_tasks = LIST_HEAD_INIT(rcu_preempt_ctrlblk.blkd_tasks), | ||||
| 	RCU_TRACE(.rcb.name = "rcu_preempt") | ||||
| }; | ||||
| 
 | ||||
| static int rcu_preempted_readers_exp(void); | ||||
| @ -247,6 +245,13 @@ static void show_tiny_preempt_stats(struct seq_file *m) | ||||
| 
 | ||||
| #include "rtmutex_common.h" | ||||
| 
 | ||||
| #define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO | ||||
| 
 | ||||
| /* Controls for rcu_kthread() kthread. */ | ||||
| static struct task_struct *rcu_kthread_task; | ||||
| static DECLARE_WAIT_QUEUE_HEAD(rcu_kthread_wq); | ||||
| static unsigned long have_rcu_kthread_work; | ||||
| 
 | ||||
| /*
 | ||||
|  * Carry out RCU priority boosting on the task indicated by ->boost_tasks, | ||||
|  * and advance ->boost_tasks to the next task in the ->blkd_tasks list. | ||||
| @ -334,7 +339,7 @@ static int rcu_initiate_boost(void) | ||||
| 		if (rcu_preempt_ctrlblk.exp_tasks == NULL) | ||||
| 			rcu_preempt_ctrlblk.boost_tasks = | ||||
| 				rcu_preempt_ctrlblk.gp_tasks; | ||||
| 		invoke_rcu_kthread(); | ||||
| 		invoke_rcu_callbacks(); | ||||
| 	} else | ||||
| 		RCU_TRACE(rcu_initiate_boost_trace()); | ||||
| 	return 1; | ||||
| @ -352,14 +357,6 @@ static void rcu_preempt_boost_start_gp(void) | ||||
| 
 | ||||
| #else /* #ifdef CONFIG_RCU_BOOST */ | ||||
| 
 | ||||
| /*
 | ||||
|  * If there is no RCU priority boosting, we don't boost. | ||||
|  */ | ||||
| static int rcu_boost(void) | ||||
| { | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * If there is no RCU priority boosting, we don't initiate boosting, | ||||
|  * but we do indicate whether there are blocked readers blocking the | ||||
| @ -427,7 +424,7 @@ static void rcu_preempt_cpu_qs(void) | ||||
| 
 | ||||
| 	/* If there are done callbacks, cause them to be invoked. */ | ||||
| 	if (*rcu_preempt_ctrlblk.rcb.donetail != NULL) | ||||
| 		invoke_rcu_kthread(); | ||||
| 		invoke_rcu_callbacks(); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
| @ -648,7 +645,7 @@ static void rcu_preempt_check_callbacks(void) | ||||
| 		rcu_preempt_cpu_qs(); | ||||
| 	if (&rcu_preempt_ctrlblk.rcb.rcucblist != | ||||
| 	    rcu_preempt_ctrlblk.rcb.donetail) | ||||
| 		invoke_rcu_kthread(); | ||||
| 		invoke_rcu_callbacks(); | ||||
| 	if (rcu_preempt_gp_in_progress() && | ||||
| 	    rcu_cpu_blocking_cur_gp() && | ||||
| 	    rcu_preempt_running_reader()) | ||||
| @ -674,7 +671,7 @@ static void rcu_preempt_remove_callbacks(struct rcu_ctrlblk *rcp) | ||||
|  */ | ||||
| static void rcu_preempt_process_callbacks(void) | ||||
| { | ||||
| 	rcu_process_callbacks(&rcu_preempt_ctrlblk.rcb); | ||||
| 	__rcu_process_callbacks(&rcu_preempt_ctrlblk.rcb); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
| @ -697,20 +694,6 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(call_rcu); | ||||
| 
 | ||||
| void rcu_barrier(void) | ||||
| { | ||||
| 	struct rcu_synchronize rcu; | ||||
| 
 | ||||
| 	init_rcu_head_on_stack(&rcu.head); | ||||
| 	init_completion(&rcu.completion); | ||||
| 	/* Will wake me after RCU finished. */ | ||||
| 	call_rcu(&rcu.head, wakeme_after_rcu); | ||||
| 	/* Wait for it. */ | ||||
| 	wait_for_completion(&rcu.completion); | ||||
| 	destroy_rcu_head_on_stack(&rcu.head); | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(rcu_barrier); | ||||
| 
 | ||||
| /*
 | ||||
|  * synchronize_rcu - wait until a grace period has elapsed. | ||||
|  * | ||||
| @ -863,15 +846,6 @@ static void show_tiny_preempt_stats(struct seq_file *m) | ||||
| 
 | ||||
| #endif /* #ifdef CONFIG_RCU_TRACE */ | ||||
| 
 | ||||
| /*
 | ||||
|  * Because preemptible RCU does not exist, it is never necessary to | ||||
|  * boost preempted RCU readers. | ||||
|  */ | ||||
| static int rcu_boost(void) | ||||
| { | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Because preemptible RCU does not exist, it never has any callbacks | ||||
|  * to check. | ||||
| @ -898,6 +872,78 @@ static void rcu_preempt_process_callbacks(void) | ||||
| 
 | ||||
| #endif /* #else #ifdef CONFIG_TINY_PREEMPT_RCU */ | ||||
| 
 | ||||
| #ifdef CONFIG_RCU_BOOST | ||||
| 
 | ||||
| /*
 | ||||
|  * Wake up rcu_kthread() to process callbacks now eligible for invocation | ||||
|  * or to boost readers. | ||||
|  */ | ||||
| static void invoke_rcu_callbacks(void) | ||||
| { | ||||
| 	have_rcu_kthread_work = 1; | ||||
| 	wake_up(&rcu_kthread_wq); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * This kthread invokes RCU callbacks whose grace periods have | ||||
|  * elapsed.  It is awakened as needed, and takes the place of the | ||||
|  * RCU_SOFTIRQ that is used for this purpose when boosting is disabled. | ||||
|  * This is a kthread, but it is never stopped, at least not until | ||||
|  * the system goes down. | ||||
|  */ | ||||
| static int rcu_kthread(void *arg) | ||||
| { | ||||
| 	unsigned long work; | ||||
| 	unsigned long morework; | ||||
| 	unsigned long flags; | ||||
| 
 | ||||
| 	for (;;) { | ||||
| 		wait_event_interruptible(rcu_kthread_wq, | ||||
| 					 have_rcu_kthread_work != 0); | ||||
| 		morework = rcu_boost(); | ||||
| 		local_irq_save(flags); | ||||
| 		work = have_rcu_kthread_work; | ||||
| 		have_rcu_kthread_work = morework; | ||||
| 		local_irq_restore(flags); | ||||
| 		if (work) | ||||
| 			rcu_process_callbacks(NULL); | ||||
| 		schedule_timeout_interruptible(1); /* Leave CPU for others. */ | ||||
| 	} | ||||
| 
 | ||||
| 	return 0;  /* Not reached, but needed to shut gcc up. */ | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Spawn the kthread that invokes RCU callbacks. | ||||
|  */ | ||||
| static int __init rcu_spawn_kthreads(void) | ||||
| { | ||||
| 	struct sched_param sp; | ||||
| 
 | ||||
| 	rcu_kthread_task = kthread_run(rcu_kthread, NULL, "rcu_kthread"); | ||||
| 	sp.sched_priority = RCU_BOOST_PRIO; | ||||
| 	sched_setscheduler_nocheck(rcu_kthread_task, SCHED_FIFO, &sp); | ||||
| 	return 0; | ||||
| } | ||||
| early_initcall(rcu_spawn_kthreads); | ||||
| 
 | ||||
| #else /* #ifdef CONFIG_RCU_BOOST */ | ||||
| 
 | ||||
| /*
 | ||||
|  * Start up softirq processing of callbacks. | ||||
|  */ | ||||
| void invoke_rcu_callbacks(void) | ||||
| { | ||||
| 	raise_softirq(RCU_SOFTIRQ); | ||||
| } | ||||
| 
 | ||||
| void rcu_init(void) | ||||
| { | ||||
| 	open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); | ||||
| } | ||||
| 
 | ||||
| #endif /* #else #ifdef CONFIG_RCU_BOOST */ | ||||
| 
 | ||||
| #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||||
| #include <linux/kernel_stat.h> | ||||
| 
 | ||||
| @ -913,12 +959,6 @@ void __init rcu_scheduler_starting(void) | ||||
| 
 | ||||
| #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ | ||||
| 
 | ||||
| #ifdef CONFIG_RCU_BOOST | ||||
| #define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO | ||||
| #else /* #ifdef CONFIG_RCU_BOOST */ | ||||
| #define RCU_BOOST_PRIO 1 | ||||
| #endif /* #else #ifdef CONFIG_RCU_BOOST */ | ||||
| 
 | ||||
| #ifdef CONFIG_RCU_TRACE | ||||
| 
 | ||||
| #ifdef CONFIG_RCU_BOOST | ||||
|  | ||||
| @ -73,7 +73,7 @@ module_param(nreaders, int, 0444); | ||||
| MODULE_PARM_DESC(nreaders, "Number of RCU reader threads"); | ||||
| module_param(nfakewriters, int, 0444); | ||||
| MODULE_PARM_DESC(nfakewriters, "Number of RCU fake writer threads"); | ||||
| module_param(stat_interval, int, 0444); | ||||
| module_param(stat_interval, int, 0644); | ||||
| MODULE_PARM_DESC(stat_interval, "Number of seconds between stats printk()s"); | ||||
| module_param(verbose, bool, 0444); | ||||
| MODULE_PARM_DESC(verbose, "Enable verbose debugging printk()s"); | ||||
| @ -480,30 +480,6 @@ static void rcu_bh_torture_deferred_free(struct rcu_torture *p) | ||||
| 	call_rcu_bh(&p->rtort_rcu, rcu_torture_cb); | ||||
| } | ||||
| 
 | ||||
| struct rcu_bh_torture_synchronize { | ||||
| 	struct rcu_head head; | ||||
| 	struct completion completion; | ||||
| }; | ||||
| 
 | ||||
| static void rcu_bh_torture_wakeme_after_cb(struct rcu_head *head) | ||||
| { | ||||
| 	struct rcu_bh_torture_synchronize *rcu; | ||||
| 
 | ||||
| 	rcu = container_of(head, struct rcu_bh_torture_synchronize, head); | ||||
| 	complete(&rcu->completion); | ||||
| } | ||||
| 
 | ||||
| static void rcu_bh_torture_synchronize(void) | ||||
| { | ||||
| 	struct rcu_bh_torture_synchronize rcu; | ||||
| 
 | ||||
| 	init_rcu_head_on_stack(&rcu.head); | ||||
| 	init_completion(&rcu.completion); | ||||
| 	call_rcu_bh(&rcu.head, rcu_bh_torture_wakeme_after_cb); | ||||
| 	wait_for_completion(&rcu.completion); | ||||
| 	destroy_rcu_head_on_stack(&rcu.head); | ||||
| } | ||||
| 
 | ||||
| static struct rcu_torture_ops rcu_bh_ops = { | ||||
| 	.init		= NULL, | ||||
| 	.cleanup	= NULL, | ||||
| @ -512,7 +488,7 @@ static struct rcu_torture_ops rcu_bh_ops = { | ||||
| 	.readunlock	= rcu_bh_torture_read_unlock, | ||||
| 	.completed	= rcu_bh_torture_completed, | ||||
| 	.deferred_free	= rcu_bh_torture_deferred_free, | ||||
| 	.sync		= rcu_bh_torture_synchronize, | ||||
| 	.sync		= synchronize_rcu_bh, | ||||
| 	.cb_barrier	= rcu_barrier_bh, | ||||
| 	.fqs		= rcu_bh_force_quiescent_state, | ||||
| 	.stats		= NULL, | ||||
| @ -528,7 +504,7 @@ static struct rcu_torture_ops rcu_bh_sync_ops = { | ||||
| 	.readunlock	= rcu_bh_torture_read_unlock, | ||||
| 	.completed	= rcu_bh_torture_completed, | ||||
| 	.deferred_free	= rcu_sync_torture_deferred_free, | ||||
| 	.sync		= rcu_bh_torture_synchronize, | ||||
| 	.sync		= synchronize_rcu_bh, | ||||
| 	.cb_barrier	= NULL, | ||||
| 	.fqs		= rcu_bh_force_quiescent_state, | ||||
| 	.stats		= NULL, | ||||
| @ -536,6 +512,22 @@ static struct rcu_torture_ops rcu_bh_sync_ops = { | ||||
| 	.name		= "rcu_bh_sync" | ||||
| }; | ||||
| 
 | ||||
| static struct rcu_torture_ops rcu_bh_expedited_ops = { | ||||
| 	.init		= rcu_sync_torture_init, | ||||
| 	.cleanup	= NULL, | ||||
| 	.readlock	= rcu_bh_torture_read_lock, | ||||
| 	.read_delay	= rcu_read_delay,  /* just reuse rcu's version. */ | ||||
| 	.readunlock	= rcu_bh_torture_read_unlock, | ||||
| 	.completed	= rcu_bh_torture_completed, | ||||
| 	.deferred_free	= rcu_sync_torture_deferred_free, | ||||
| 	.sync		= synchronize_rcu_bh_expedited, | ||||
| 	.cb_barrier	= NULL, | ||||
| 	.fqs		= rcu_bh_force_quiescent_state, | ||||
| 	.stats		= NULL, | ||||
| 	.irq_capable	= 1, | ||||
| 	.name		= "rcu_bh_expedited" | ||||
| }; | ||||
| 
 | ||||
| /*
 | ||||
|  * Definitions for srcu torture testing. | ||||
|  */ | ||||
| @ -659,11 +651,6 @@ static void rcu_sched_torture_deferred_free(struct rcu_torture *p) | ||||
| 	call_rcu_sched(&p->rtort_rcu, rcu_torture_cb); | ||||
| } | ||||
| 
 | ||||
| static void sched_torture_synchronize(void) | ||||
| { | ||||
| 	synchronize_sched(); | ||||
| } | ||||
| 
 | ||||
| static struct rcu_torture_ops sched_ops = { | ||||
| 	.init		= rcu_sync_torture_init, | ||||
| 	.cleanup	= NULL, | ||||
| @ -672,7 +659,7 @@ static struct rcu_torture_ops sched_ops = { | ||||
| 	.readunlock	= sched_torture_read_unlock, | ||||
| 	.completed	= rcu_no_completed, | ||||
| 	.deferred_free	= rcu_sched_torture_deferred_free, | ||||
| 	.sync		= sched_torture_synchronize, | ||||
| 	.sync		= synchronize_sched, | ||||
| 	.cb_barrier	= rcu_barrier_sched, | ||||
| 	.fqs		= rcu_sched_force_quiescent_state, | ||||
| 	.stats		= NULL, | ||||
| @ -688,7 +675,7 @@ static struct rcu_torture_ops sched_sync_ops = { | ||||
| 	.readunlock	= sched_torture_read_unlock, | ||||
| 	.completed	= rcu_no_completed, | ||||
| 	.deferred_free	= rcu_sync_torture_deferred_free, | ||||
| 	.sync		= sched_torture_synchronize, | ||||
| 	.sync		= synchronize_sched, | ||||
| 	.cb_barrier	= NULL, | ||||
| 	.fqs		= rcu_sched_force_quiescent_state, | ||||
| 	.stats		= NULL, | ||||
| @ -754,7 +741,7 @@ static int rcu_torture_boost(void *arg) | ||||
| 	do { | ||||
| 		/* Wait for the next test interval. */ | ||||
| 		oldstarttime = boost_starttime; | ||||
| 		while (jiffies - oldstarttime > ULONG_MAX / 2) { | ||||
| 		while (ULONG_CMP_LT(jiffies, oldstarttime)) { | ||||
| 			schedule_timeout_uninterruptible(1); | ||||
| 			rcu_stutter_wait("rcu_torture_boost"); | ||||
| 			if (kthread_should_stop() || | ||||
| @ -765,7 +752,7 @@ static int rcu_torture_boost(void *arg) | ||||
| 		/* Do one boost-test interval. */ | ||||
| 		endtime = oldstarttime + test_boost_duration * HZ; | ||||
| 		call_rcu_time = jiffies; | ||||
| 		while (jiffies - endtime > ULONG_MAX / 2) { | ||||
| 		while (ULONG_CMP_LT(jiffies, endtime)) { | ||||
| 			/* If we don't have a callback in flight, post one. */ | ||||
| 			if (!rbi.inflight) { | ||||
| 				smp_mb(); /* RCU core before ->inflight = 1. */ | ||||
| @ -792,7 +779,8 @@ static int rcu_torture_boost(void *arg) | ||||
| 		 * interval.  Besides, we are running at RT priority, | ||||
| 		 * so delays should be relatively rare. | ||||
| 		 */ | ||||
| 		while (oldstarttime == boost_starttime) { | ||||
| 		while (oldstarttime == boost_starttime && | ||||
| 		       !kthread_should_stop()) { | ||||
| 			if (mutex_trylock(&boost_mutex)) { | ||||
| 				boost_starttime = jiffies + | ||||
| 						  test_boost_interval * HZ; | ||||
| @ -809,11 +797,11 @@ checkwait:	rcu_stutter_wait("rcu_torture_boost"); | ||||
| 
 | ||||
| 	/* Clean up and exit. */ | ||||
| 	VERBOSE_PRINTK_STRING("rcu_torture_boost task stopping"); | ||||
| 	destroy_rcu_head_on_stack(&rbi.rcu); | ||||
| 	rcutorture_shutdown_absorb("rcu_torture_boost"); | ||||
| 	while (!kthread_should_stop() || rbi.inflight) | ||||
| 		schedule_timeout_uninterruptible(1); | ||||
| 	smp_mb(); /* order accesses to ->inflight before stack-frame death. */ | ||||
| 	destroy_rcu_head_on_stack(&rbi.rcu); | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| @ -831,11 +819,13 @@ rcu_torture_fqs(void *arg) | ||||
| 	VERBOSE_PRINTK_STRING("rcu_torture_fqs task started"); | ||||
| 	do { | ||||
| 		fqs_resume_time = jiffies + fqs_stutter * HZ; | ||||
| 		while (jiffies - fqs_resume_time > LONG_MAX) { | ||||
| 		while (ULONG_CMP_LT(jiffies, fqs_resume_time) && | ||||
| 		       !kthread_should_stop()) { | ||||
| 			schedule_timeout_interruptible(1); | ||||
| 		} | ||||
| 		fqs_burst_remaining = fqs_duration; | ||||
| 		while (fqs_burst_remaining > 0) { | ||||
| 		while (fqs_burst_remaining > 0 && | ||||
| 		       !kthread_should_stop()) { | ||||
| 			cur_ops->fqs(); | ||||
| 			udelay(fqs_holdoff); | ||||
| 			fqs_burst_remaining -= fqs_holdoff; | ||||
| @ -1280,8 +1270,9 @@ static int rcutorture_booster_init(int cpu) | ||||
| 	/* Don't allow time recalculation while creating a new task. */ | ||||
| 	mutex_lock(&boost_mutex); | ||||
| 	VERBOSE_PRINTK_STRING("Creating rcu_torture_boost task"); | ||||
| 	boost_tasks[cpu] = kthread_create(rcu_torture_boost, NULL, | ||||
| 					  "rcu_torture_boost"); | ||||
| 	boost_tasks[cpu] = kthread_create_on_node(rcu_torture_boost, NULL, | ||||
| 						  cpu_to_node(cpu), | ||||
| 						  "rcu_torture_boost"); | ||||
| 	if (IS_ERR(boost_tasks[cpu])) { | ||||
| 		retval = PTR_ERR(boost_tasks[cpu]); | ||||
| 		VERBOSE_PRINTK_STRING("rcu_torture_boost task create failed"); | ||||
| @ -1424,7 +1415,7 @@ rcu_torture_init(void) | ||||
| 	int firsterr = 0; | ||||
| 	static struct rcu_torture_ops *torture_ops[] = | ||||
| 		{ &rcu_ops, &rcu_sync_ops, &rcu_expedited_ops, | ||||
| 		  &rcu_bh_ops, &rcu_bh_sync_ops, | ||||
| 		  &rcu_bh_ops, &rcu_bh_sync_ops, &rcu_bh_expedited_ops, | ||||
| 		  &srcu_ops, &srcu_expedited_ops, | ||||
| 		  &sched_ops, &sched_sync_ops, &sched_expedited_ops, }; | ||||
| 
 | ||||
|  | ||||
							
								
								
									
										292
									
								
								kernel/rcutree.c
									
									
									
									
									
								
							
							
						
						
									
										292
									
								
								kernel/rcutree.c
									
									
									
									
									
								
							| @ -52,13 +52,16 @@ | ||||
| #include <linux/prefetch.h> | ||||
| 
 | ||||
| #include "rcutree.h" | ||||
| #include <trace/events/rcu.h> | ||||
| 
 | ||||
| #include "rcu.h" | ||||
| 
 | ||||
| /* Data structures. */ | ||||
| 
 | ||||
| static struct lock_class_key rcu_node_class[NUM_RCU_LVLS]; | ||||
| 
 | ||||
| #define RCU_STATE_INITIALIZER(structname) { \ | ||||
| 	.level = { &structname.node[0] }, \ | ||||
| 	.level = { &structname##_state.node[0] }, \ | ||||
| 	.levelcnt = { \ | ||||
| 		NUM_RCU_LVL_0,  /* root of hierarchy. */ \ | ||||
| 		NUM_RCU_LVL_1, \ | ||||
| @ -69,17 +72,17 @@ static struct lock_class_key rcu_node_class[NUM_RCU_LVLS]; | ||||
| 	.signaled = RCU_GP_IDLE, \ | ||||
| 	.gpnum = -300, \ | ||||
| 	.completed = -300, \ | ||||
| 	.onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname.onofflock), \ | ||||
| 	.fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname.fqslock), \ | ||||
| 	.onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.onofflock), \ | ||||
| 	.fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.fqslock), \ | ||||
| 	.n_force_qs = 0, \ | ||||
| 	.n_force_qs_ngp = 0, \ | ||||
| 	.name = #structname, \ | ||||
| } | ||||
| 
 | ||||
| struct rcu_state rcu_sched_state = RCU_STATE_INITIALIZER(rcu_sched_state); | ||||
| struct rcu_state rcu_sched_state = RCU_STATE_INITIALIZER(rcu_sched); | ||||
| DEFINE_PER_CPU(struct rcu_data, rcu_sched_data); | ||||
| 
 | ||||
| struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state); | ||||
| struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh); | ||||
| DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); | ||||
| 
 | ||||
| static struct rcu_state *rcu_state; | ||||
| @ -128,8 +131,6 @@ static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu); | ||||
| static void invoke_rcu_core(void); | ||||
| static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); | ||||
| 
 | ||||
| #define RCU_KTHREAD_PRIO 1	/* RT priority for per-CPU kthreads. */ | ||||
| 
 | ||||
| /*
 | ||||
|  * Track the rcutorture test sequence number and the update version | ||||
|  * number within a given test.  The rcutorture_testseq is incremented | ||||
| @ -156,33 +157,41 @@ static int rcu_gp_in_progress(struct rcu_state *rsp) | ||||
|  * Note a quiescent state.  Because we do not need to know | ||||
|  * how many quiescent states passed, just if there was at least | ||||
|  * one since the start of the grace period, this just sets a flag. | ||||
|  * The caller must have disabled preemption. | ||||
|  */ | ||||
| void rcu_sched_qs(int cpu) | ||||
| { | ||||
| 	struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu); | ||||
| 
 | ||||
| 	rdp->passed_quiesc_completed = rdp->gpnum - 1; | ||||
| 	rdp->passed_quiesce_gpnum = rdp->gpnum; | ||||
| 	barrier(); | ||||
| 	rdp->passed_quiesc = 1; | ||||
| 	if (rdp->passed_quiesce == 0) | ||||
| 		trace_rcu_grace_period("rcu_sched", rdp->gpnum, "cpuqs"); | ||||
| 	rdp->passed_quiesce = 1; | ||||
| } | ||||
| 
 | ||||
| void rcu_bh_qs(int cpu) | ||||
| { | ||||
| 	struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu); | ||||
| 
 | ||||
| 	rdp->passed_quiesc_completed = rdp->gpnum - 1; | ||||
| 	rdp->passed_quiesce_gpnum = rdp->gpnum; | ||||
| 	barrier(); | ||||
| 	rdp->passed_quiesc = 1; | ||||
| 	if (rdp->passed_quiesce == 0) | ||||
| 		trace_rcu_grace_period("rcu_bh", rdp->gpnum, "cpuqs"); | ||||
| 	rdp->passed_quiesce = 1; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Note a context switch.  This is a quiescent state for RCU-sched, | ||||
|  * and requires special handling for preemptible RCU. | ||||
|  * The caller must have disabled preemption. | ||||
|  */ | ||||
| void rcu_note_context_switch(int cpu) | ||||
| { | ||||
| 	trace_rcu_utilization("Start context switch"); | ||||
| 	rcu_sched_qs(cpu); | ||||
| 	rcu_preempt_note_context_switch(cpu); | ||||
| 	trace_rcu_utilization("End context switch"); | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(rcu_note_context_switch); | ||||
| 
 | ||||
| @ -193,7 +202,7 @@ DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { | ||||
| }; | ||||
| #endif /* #ifdef CONFIG_NO_HZ */ | ||||
| 
 | ||||
| static int blimit = 10;		/* Maximum callbacks per softirq. */ | ||||
| static int blimit = 10;		/* Maximum callbacks per rcu_do_batch. */ | ||||
| static int qhimark = 10000;	/* If this many pending, ignore blimit. */ | ||||
| static int qlowmark = 100;	/* Once only this many pending, use blimit. */ | ||||
| 
 | ||||
| @ -314,6 +323,7 @@ static int rcu_implicit_offline_qs(struct rcu_data *rdp) | ||||
| 	 * trust its state not to change because interrupts are disabled. | ||||
| 	 */ | ||||
| 	if (cpu_is_offline(rdp->cpu)) { | ||||
| 		trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "ofl"); | ||||
| 		rdp->offline_fqs++; | ||||
| 		return 1; | ||||
| 	} | ||||
| @ -354,19 +364,13 @@ void rcu_enter_nohz(void) | ||||
| 		local_irq_restore(flags); | ||||
| 		return; | ||||
| 	} | ||||
| 	trace_rcu_dyntick("Start"); | ||||
| 	/* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ | ||||
| 	smp_mb__before_atomic_inc();  /* See above. */ | ||||
| 	atomic_inc(&rdtp->dynticks); | ||||
| 	smp_mb__after_atomic_inc();  /* Force ordering with next sojourn. */ | ||||
| 	WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); | ||||
| 	local_irq_restore(flags); | ||||
| 
 | ||||
| 	/* If the interrupt queued a callback, get out of dyntick mode. */ | ||||
| 	if (in_irq() && | ||||
| 	    (__get_cpu_var(rcu_sched_data).nxtlist || | ||||
| 	     __get_cpu_var(rcu_bh_data).nxtlist || | ||||
| 	     rcu_preempt_needs_cpu(smp_processor_id()))) | ||||
| 		set_need_resched(); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
| @ -391,6 +395,7 @@ void rcu_exit_nohz(void) | ||||
| 	/* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ | ||||
| 	smp_mb__after_atomic_inc();  /* See above. */ | ||||
| 	WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); | ||||
| 	trace_rcu_dyntick("End"); | ||||
| 	local_irq_restore(flags); | ||||
| } | ||||
| 
 | ||||
| @ -481,11 +486,11 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp) | ||||
|  */ | ||||
| static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) | ||||
| { | ||||
| 	unsigned long curr; | ||||
| 	unsigned long snap; | ||||
| 	unsigned int curr; | ||||
| 	unsigned int snap; | ||||
| 
 | ||||
| 	curr = (unsigned long)atomic_add_return(0, &rdp->dynticks->dynticks); | ||||
| 	snap = (unsigned long)rdp->dynticks_snap; | ||||
| 	curr = (unsigned int)atomic_add_return(0, &rdp->dynticks->dynticks); | ||||
| 	snap = (unsigned int)rdp->dynticks_snap; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * If the CPU passed through or entered a dynticks idle phase with | ||||
| @ -495,7 +500,8 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) | ||||
| 	 * read-side critical section that started before the beginning | ||||
| 	 * of the current RCU grace period. | ||||
| 	 */ | ||||
| 	if ((curr & 0x1) == 0 || ULONG_CMP_GE(curr, snap + 2)) { | ||||
| 	if ((curr & 0x1) == 0 || UINT_CMP_GE(curr, snap + 2)) { | ||||
| 		trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "dti"); | ||||
| 		rdp->dynticks_fqs++; | ||||
| 		return 1; | ||||
| 	} | ||||
| @ -537,6 +543,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp) | ||||
| 	int cpu; | ||||
| 	long delta; | ||||
| 	unsigned long flags; | ||||
| 	int ndetected; | ||||
| 	struct rcu_node *rnp = rcu_get_root(rsp); | ||||
| 
 | ||||
| 	/* Only let one CPU complain about others per time interval. */ | ||||
| @ -553,7 +560,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp) | ||||
| 	 * Now rat on any tasks that got kicked up to the root rcu_node | ||||
| 	 * due to CPU offlining. | ||||
| 	 */ | ||||
| 	rcu_print_task_stall(rnp); | ||||
| 	ndetected = rcu_print_task_stall(rnp); | ||||
| 	raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||||
| 
 | ||||
| 	/*
 | ||||
| @ -565,17 +572,22 @@ static void print_other_cpu_stall(struct rcu_state *rsp) | ||||
| 	       rsp->name); | ||||
| 	rcu_for_each_leaf_node(rsp, rnp) { | ||||
| 		raw_spin_lock_irqsave(&rnp->lock, flags); | ||||
| 		rcu_print_task_stall(rnp); | ||||
| 		ndetected += rcu_print_task_stall(rnp); | ||||
| 		raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||||
| 		if (rnp->qsmask == 0) | ||||
| 			continue; | ||||
| 		for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++) | ||||
| 			if (rnp->qsmask & (1UL << cpu)) | ||||
| 			if (rnp->qsmask & (1UL << cpu)) { | ||||
| 				printk(" %d", rnp->grplo + cpu); | ||||
| 				ndetected++; | ||||
| 			} | ||||
| 	} | ||||
| 	printk("} (detected by %d, t=%ld jiffies)\n", | ||||
| 	       smp_processor_id(), (long)(jiffies - rsp->gp_start)); | ||||
| 	trigger_all_cpu_backtrace(); | ||||
| 	if (ndetected == 0) | ||||
| 		printk(KERN_ERR "INFO: Stall ended before state dump start\n"); | ||||
| 	else if (!trigger_all_cpu_backtrace()) | ||||
| 		dump_stack(); | ||||
| 
 | ||||
| 	/* If so configured, complain about tasks blocking the grace period. */ | ||||
| 
 | ||||
| @ -596,7 +608,8 @@ static void print_cpu_stall(struct rcu_state *rsp) | ||||
| 	 */ | ||||
| 	printk(KERN_ERR "INFO: %s detected stall on CPU %d (t=%lu jiffies)\n", | ||||
| 	       rsp->name, smp_processor_id(), jiffies - rsp->gp_start); | ||||
| 	trigger_all_cpu_backtrace(); | ||||
| 	if (!trigger_all_cpu_backtrace()) | ||||
| 		dump_stack(); | ||||
| 
 | ||||
| 	raw_spin_lock_irqsave(&rnp->lock, flags); | ||||
| 	if (ULONG_CMP_GE(jiffies, rsp->jiffies_stall)) | ||||
| @ -678,9 +691,10 @@ static void __note_new_gpnum(struct rcu_state *rsp, struct rcu_node *rnp, struct | ||||
| 		 * go looking for one. | ||||
| 		 */ | ||||
| 		rdp->gpnum = rnp->gpnum; | ||||
| 		trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpustart"); | ||||
| 		if (rnp->qsmask & rdp->grpmask) { | ||||
| 			rdp->qs_pending = 1; | ||||
| 			rdp->passed_quiesc = 0; | ||||
| 			rdp->passed_quiesce = 0; | ||||
| 		} else | ||||
| 			rdp->qs_pending = 0; | ||||
| 	} | ||||
| @ -741,6 +755,7 @@ __rcu_process_gp_end(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_dat | ||||
| 
 | ||||
| 		/* Remember that we saw this grace-period completion. */ | ||||
| 		rdp->completed = rnp->completed; | ||||
| 		trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpuend"); | ||||
| 
 | ||||
| 		/*
 | ||||
| 		 * If we were in an extended quiescent state, we may have | ||||
| @ -826,31 +841,31 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) | ||||
| 	struct rcu_data *rdp = this_cpu_ptr(rsp->rda); | ||||
| 	struct rcu_node *rnp = rcu_get_root(rsp); | ||||
| 
 | ||||
| 	if (!cpu_needs_another_gp(rsp, rdp) || rsp->fqs_active) { | ||||
| 		if (cpu_needs_another_gp(rsp, rdp)) | ||||
| 			rsp->fqs_need_gp = 1; | ||||
| 		if (rnp->completed == rsp->completed) { | ||||
| 			raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||||
| 			return; | ||||
| 		} | ||||
| 		raw_spin_unlock(&rnp->lock);	 /* irqs remain disabled. */ | ||||
| 
 | ||||
| 	if (!rcu_scheduler_fully_active || | ||||
| 	    !cpu_needs_another_gp(rsp, rdp)) { | ||||
| 		/*
 | ||||
| 		 * Propagate new ->completed value to rcu_node structures | ||||
| 		 * so that other CPUs don't have to wait until the start | ||||
| 		 * of the next grace period to process their callbacks. | ||||
| 		 * Either the scheduler hasn't yet spawned the first | ||||
| 		 * non-idle task or this CPU does not need another | ||||
| 		 * grace period.  Either way, don't start a new grace | ||||
| 		 * period. | ||||
| 		 */ | ||||
| 		rcu_for_each_node_breadth_first(rsp, rnp) { | ||||
| 			raw_spin_lock(&rnp->lock); /* irqs already disabled. */ | ||||
| 			rnp->completed = rsp->completed; | ||||
| 			raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | ||||
| 		} | ||||
| 		local_irq_restore(flags); | ||||
| 		raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||||
| 		return; | ||||
| 	} | ||||
| 
 | ||||
| 	if (rsp->fqs_active) { | ||||
| 		/*
 | ||||
| 		 * This CPU needs a grace period, but force_quiescent_state() | ||||
| 		 * is running.  Tell it to start one on this CPU's behalf. | ||||
| 		 */ | ||||
| 		rsp->fqs_need_gp = 1; | ||||
| 		raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||||
| 		return; | ||||
| 	} | ||||
| 
 | ||||
| 	/* Advance to a new grace period and initialize state. */ | ||||
| 	rsp->gpnum++; | ||||
| 	trace_rcu_grace_period(rsp->name, rsp->gpnum, "start"); | ||||
| 	WARN_ON_ONCE(rsp->signaled == RCU_GP_INIT); | ||||
| 	rsp->signaled = RCU_GP_INIT; /* Hold off force_quiescent_state. */ | ||||
| 	rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; | ||||
| @ -865,6 +880,9 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) | ||||
| 		rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state OK. */ | ||||
| 		rcu_start_gp_per_cpu(rsp, rnp, rdp); | ||||
| 		rcu_preempt_boost_start_gp(rnp); | ||||
| 		trace_rcu_grace_period_init(rsp->name, rnp->gpnum, | ||||
| 					    rnp->level, rnp->grplo, | ||||
| 					    rnp->grphi, rnp->qsmask); | ||||
| 		raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||||
| 		return; | ||||
| 	} | ||||
| @ -901,6 +919,9 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) | ||||
| 		if (rnp == rdp->mynode) | ||||
| 			rcu_start_gp_per_cpu(rsp, rnp, rdp); | ||||
| 		rcu_preempt_boost_start_gp(rnp); | ||||
| 		trace_rcu_grace_period_init(rsp->name, rnp->gpnum, | ||||
| 					    rnp->level, rnp->grplo, | ||||
| 					    rnp->grphi, rnp->qsmask); | ||||
| 		raw_spin_unlock(&rnp->lock);	/* irqs remain disabled. */ | ||||
| 	} | ||||
| 
 | ||||
| @ -922,6 +943,8 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) | ||||
| 	__releases(rcu_get_root(rsp)->lock) | ||||
| { | ||||
| 	unsigned long gp_duration; | ||||
| 	struct rcu_node *rnp = rcu_get_root(rsp); | ||||
| 	struct rcu_data *rdp = this_cpu_ptr(rsp->rda); | ||||
| 
 | ||||
| 	WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); | ||||
| 
 | ||||
| @ -933,7 +956,41 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) | ||||
| 	gp_duration = jiffies - rsp->gp_start; | ||||
| 	if (gp_duration > rsp->gp_max) | ||||
| 		rsp->gp_max = gp_duration; | ||||
| 	rsp->completed = rsp->gpnum; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * We know the grace period is complete, but to everyone else | ||||
| 	 * it appears to still be ongoing.  But it is also the case | ||||
| 	 * that to everyone else it looks like there is nothing that | ||||
| 	 * they can do to advance the grace period.  It is therefore | ||||
| 	 * safe for us to drop the lock in order to mark the grace | ||||
| 	 * period as completed in all of the rcu_node structures. | ||||
| 	 * | ||||
| 	 * But if this CPU needs another grace period, it will take | ||||
| 	 * care of this while initializing the next grace period. | ||||
| 	 * We use RCU_WAIT_TAIL instead of the usual RCU_DONE_TAIL | ||||
| 	 * because the callbacks have not yet been advanced: Those | ||||
| 	 * callbacks are waiting on the grace period that just now | ||||
| 	 * completed. | ||||
| 	 */ | ||||
| 	if (*rdp->nxttail[RCU_WAIT_TAIL] == NULL) { | ||||
| 		raw_spin_unlock(&rnp->lock);	 /* irqs remain disabled. */ | ||||
| 
 | ||||
| 		/*
 | ||||
| 		 * Propagate new ->completed value to rcu_node structures | ||||
| 		 * so that other CPUs don't have to wait until the start | ||||
| 		 * of the next grace period to process their callbacks. | ||||
| 		 */ | ||||
| 		rcu_for_each_node_breadth_first(rsp, rnp) { | ||||
| 			raw_spin_lock(&rnp->lock); /* irqs already disabled. */ | ||||
| 			rnp->completed = rsp->gpnum; | ||||
| 			raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | ||||
| 		} | ||||
| 		rnp = rcu_get_root(rsp); | ||||
| 		raw_spin_lock(&rnp->lock); /* irqs already disabled. */ | ||||
| 	} | ||||
| 
 | ||||
| 	rsp->completed = rsp->gpnum;  /* Declare the grace period complete. */ | ||||
| 	trace_rcu_grace_period(rsp->name, rsp->completed, "end"); | ||||
| 	rsp->signaled = RCU_GP_IDLE; | ||||
| 	rcu_start_gp(rsp, flags);  /* releases root node's rnp->lock. */ | ||||
| } | ||||
| @ -962,6 +1019,10 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp, | ||||
| 			return; | ||||
| 		} | ||||
| 		rnp->qsmask &= ~mask; | ||||
| 		trace_rcu_quiescent_state_report(rsp->name, rnp->gpnum, | ||||
| 						 mask, rnp->qsmask, rnp->level, | ||||
| 						 rnp->grplo, rnp->grphi, | ||||
| 						 !!rnp->gp_tasks); | ||||
| 		if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) { | ||||
| 
 | ||||
| 			/* Other bits still set at this level, so done. */ | ||||
| @ -1000,7 +1061,7 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp, | ||||
|  * based on quiescent states detected in an earlier grace period! | ||||
|  */ | ||||
| static void | ||||
| rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long lastcomp) | ||||
| rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long lastgp) | ||||
| { | ||||
| 	unsigned long flags; | ||||
| 	unsigned long mask; | ||||
| @ -1008,17 +1069,15 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long las | ||||
| 
 | ||||
| 	rnp = rdp->mynode; | ||||
| 	raw_spin_lock_irqsave(&rnp->lock, flags); | ||||
| 	if (lastcomp != rnp->completed) { | ||||
| 	if (lastgp != rnp->gpnum || rnp->completed == rnp->gpnum) { | ||||
| 
 | ||||
| 		/*
 | ||||
| 		 * Someone beat us to it for this grace period, so leave. | ||||
| 		 * The race with GP start is resolved by the fact that we | ||||
| 		 * hold the leaf rcu_node lock, so that the per-CPU bits | ||||
| 		 * cannot yet be initialized -- so we would simply find our | ||||
| 		 * CPU's bit already cleared in rcu_report_qs_rnp() if this | ||||
| 		 * race occurred. | ||||
| 		 * The grace period in which this quiescent state was | ||||
| 		 * recorded has ended, so don't report it upwards. | ||||
| 		 * We will instead need a new quiescent state that lies | ||||
| 		 * within the current grace period. | ||||
| 		 */ | ||||
| 		rdp->passed_quiesc = 0;	/* try again later! */ | ||||
| 		rdp->passed_quiesce = 0;	/* need qs for new gp. */ | ||||
| 		raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||||
| 		return; | ||||
| 	} | ||||
| @ -1062,14 +1121,14 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp) | ||||
| 	 * Was there a quiescent state since the beginning of the grace | ||||
| 	 * period? If no, then exit and wait for the next call. | ||||
| 	 */ | ||||
| 	if (!rdp->passed_quiesc) | ||||
| 	if (!rdp->passed_quiesce) | ||||
| 		return; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Tell RCU we are done (but rcu_report_qs_rdp() will be the | ||||
| 	 * judge of that). | ||||
| 	 */ | ||||
| 	rcu_report_qs_rdp(rdp->cpu, rsp, rdp, rdp->passed_quiesc_completed); | ||||
| 	rcu_report_qs_rdp(rdp->cpu, rsp, rdp, rdp->passed_quiesce_gpnum); | ||||
| } | ||||
| 
 | ||||
| #ifdef CONFIG_HOTPLUG_CPU | ||||
| @ -1130,11 +1189,20 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) | ||||
| 		if (rnp->qsmaskinit != 0) { | ||||
| 			if (rnp != rdp->mynode) | ||||
| 				raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | ||||
| 			else | ||||
| 				trace_rcu_grace_period(rsp->name, | ||||
| 						       rnp->gpnum + 1 - | ||||
| 						       !!(rnp->qsmask & mask), | ||||
| 						       "cpuofl"); | ||||
| 			break; | ||||
| 		} | ||||
| 		if (rnp == rdp->mynode) | ||||
| 		if (rnp == rdp->mynode) { | ||||
| 			trace_rcu_grace_period(rsp->name, | ||||
| 					       rnp->gpnum + 1 - | ||||
| 					       !!(rnp->qsmask & mask), | ||||
| 					       "cpuofl"); | ||||
| 			need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp); | ||||
| 		else | ||||
| 		} else | ||||
| 			raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | ||||
| 		mask = rnp->grpmask; | ||||
| 		rnp = rnp->parent; | ||||
| @ -1190,17 +1258,22 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) | ||||
| { | ||||
| 	unsigned long flags; | ||||
| 	struct rcu_head *next, *list, **tail; | ||||
| 	int count; | ||||
| 	int bl, count; | ||||
| 
 | ||||
| 	/* If no callbacks are ready, just return.*/ | ||||
| 	if (!cpu_has_callbacks_ready_to_invoke(rdp)) | ||||
| 	if (!cpu_has_callbacks_ready_to_invoke(rdp)) { | ||||
| 		trace_rcu_batch_start(rsp->name, 0, 0); | ||||
| 		trace_rcu_batch_end(rsp->name, 0); | ||||
| 		return; | ||||
| 	} | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Extract the list of ready callbacks, disabling to prevent | ||||
| 	 * races with call_rcu() from interrupt handlers. | ||||
| 	 */ | ||||
| 	local_irq_save(flags); | ||||
| 	bl = rdp->blimit; | ||||
| 	trace_rcu_batch_start(rsp->name, rdp->qlen, bl); | ||||
| 	list = rdp->nxtlist; | ||||
| 	rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL]; | ||||
| 	*rdp->nxttail[RCU_DONE_TAIL] = NULL; | ||||
| @ -1216,13 +1289,14 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) | ||||
| 		next = list->next; | ||||
| 		prefetch(next); | ||||
| 		debug_rcu_head_unqueue(list); | ||||
| 		__rcu_reclaim(list); | ||||
| 		__rcu_reclaim(rsp->name, list); | ||||
| 		list = next; | ||||
| 		if (++count >= rdp->blimit) | ||||
| 		if (++count >= bl) | ||||
| 			break; | ||||
| 	} | ||||
| 
 | ||||
| 	local_irq_save(flags); | ||||
| 	trace_rcu_batch_end(rsp->name, count); | ||||
| 
 | ||||
| 	/* Update count, and requeue any remaining callbacks. */ | ||||
| 	rdp->qlen -= count; | ||||
| @ -1250,7 +1324,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) | ||||
| 
 | ||||
| 	local_irq_restore(flags); | ||||
| 
 | ||||
| 	/* Re-raise the RCU softirq if there are callbacks remaining. */ | ||||
| 	/* Re-invoke RCU core processing if there are callbacks remaining. */ | ||||
| 	if (cpu_has_callbacks_ready_to_invoke(rdp)) | ||||
| 		invoke_rcu_core(); | ||||
| } | ||||
| @ -1258,7 +1332,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) | ||||
| /*
 | ||||
|  * Check to see if this CPU is in a non-context-switch quiescent state | ||||
|  * (user mode or idle loop for rcu, non-softirq execution for rcu_bh). | ||||
|  * Also schedule the RCU softirq handler. | ||||
|  * Also schedule RCU core processing. | ||||
|  * | ||||
|  * This function must be called with hardirqs disabled.  It is normally | ||||
|  * invoked from the scheduling-clock interrupt.  If rcu_pending returns | ||||
| @ -1266,6 +1340,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) | ||||
|  */ | ||||
| void rcu_check_callbacks(int cpu, int user) | ||||
| { | ||||
| 	trace_rcu_utilization("Start scheduler-tick"); | ||||
| 	if (user || | ||||
| 	    (idle_cpu(cpu) && rcu_scheduler_active && | ||||
| 	     !in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) { | ||||
| @ -1299,6 +1374,7 @@ void rcu_check_callbacks(int cpu, int user) | ||||
| 	rcu_preempt_check_callbacks(cpu); | ||||
| 	if (rcu_pending(cpu)) | ||||
| 		invoke_rcu_core(); | ||||
| 	trace_rcu_utilization("End scheduler-tick"); | ||||
| } | ||||
| 
 | ||||
| #ifdef CONFIG_SMP | ||||
| @ -1360,10 +1436,14 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) | ||||
| 	unsigned long flags; | ||||
| 	struct rcu_node *rnp = rcu_get_root(rsp); | ||||
| 
 | ||||
| 	if (!rcu_gp_in_progress(rsp)) | ||||
| 	trace_rcu_utilization("Start fqs"); | ||||
| 	if (!rcu_gp_in_progress(rsp)) { | ||||
| 		trace_rcu_utilization("End fqs"); | ||||
| 		return;  /* No grace period in progress, nothing to force. */ | ||||
| 	} | ||||
| 	if (!raw_spin_trylock_irqsave(&rsp->fqslock, flags)) { | ||||
| 		rsp->n_force_qs_lh++; /* Inexact, can lose counts.  Tough! */ | ||||
| 		trace_rcu_utilization("End fqs"); | ||||
| 		return;	/* Someone else is already on the job. */ | ||||
| 	} | ||||
| 	if (relaxed && ULONG_CMP_GE(rsp->jiffies_force_qs, jiffies)) | ||||
| @ -1412,11 +1492,13 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) | ||||
| 		raw_spin_unlock(&rsp->fqslock); /* irqs remain disabled */ | ||||
| 		rsp->fqs_need_gp = 0; | ||||
| 		rcu_start_gp(rsp, flags); /* releases rnp->lock */ | ||||
| 		trace_rcu_utilization("End fqs"); | ||||
| 		return; | ||||
| 	} | ||||
| 	raw_spin_unlock(&rnp->lock);  /* irqs remain disabled */ | ||||
| unlock_fqs_ret: | ||||
| 	raw_spin_unlock_irqrestore(&rsp->fqslock, flags); | ||||
| 	trace_rcu_utilization("End fqs"); | ||||
| } | ||||
| 
 | ||||
| #else /* #ifdef CONFIG_SMP */ | ||||
| @ -1429,9 +1511,9 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) | ||||
| #endif /* #else #ifdef CONFIG_SMP */ | ||||
| 
 | ||||
| /*
 | ||||
|  * This does the RCU processing work from softirq context for the | ||||
|  * specified rcu_state and rcu_data structures.  This may be called | ||||
|  * only from the CPU to whom the rdp belongs. | ||||
|  * This does the RCU core processing work for the specified rcu_state | ||||
|  * and rcu_data structures.  This may be called only from the CPU to | ||||
|  * whom the rdp belongs. | ||||
|  */ | ||||
| static void | ||||
| __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) | ||||
| @ -1468,24 +1550,24 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Do softirq processing for the current CPU. | ||||
|  * Do RCU core processing for the current CPU. | ||||
|  */ | ||||
| static void rcu_process_callbacks(struct softirq_action *unused) | ||||
| { | ||||
| 	trace_rcu_utilization("Start RCU core"); | ||||
| 	__rcu_process_callbacks(&rcu_sched_state, | ||||
| 				&__get_cpu_var(rcu_sched_data)); | ||||
| 	__rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); | ||||
| 	rcu_preempt_process_callbacks(); | ||||
| 
 | ||||
| 	/* If we are last CPU on way to dyntick-idle mode, accelerate it. */ | ||||
| 	rcu_needs_cpu_flush(); | ||||
| 	trace_rcu_utilization("End RCU core"); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Wake up the current CPU's kthread.  This replaces raise_softirq() | ||||
|  * in earlier versions of RCU.  Note that because we are running on | ||||
|  * the current CPU with interrupts disabled, the rcu_cpu_kthread_task | ||||
|  * cannot disappear out from under us. | ||||
|  * Schedule RCU callback invocation.  If the specified type of RCU | ||||
|  * does not support RCU priority boosting, just do a direct call, | ||||
|  * otherwise wake up the per-CPU kernel kthread.  Note that because we | ||||
|  * are running on the current CPU with interrupts disabled, the | ||||
|  * rcu_cpu_kthread_task cannot disappear out from under us. | ||||
|  */ | ||||
| static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) | ||||
| { | ||||
| @ -1530,6 +1612,12 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), | ||||
| 	rdp->nxttail[RCU_NEXT_TAIL] = &head->next; | ||||
| 	rdp->qlen++; | ||||
| 
 | ||||
| 	if (__is_kfree_rcu_offset((unsigned long)func)) | ||||
| 		trace_rcu_kfree_callback(rsp->name, head, (unsigned long)func, | ||||
| 					 rdp->qlen); | ||||
| 	else | ||||
| 		trace_rcu_callback(rsp->name, head, rdp->qlen); | ||||
| 
 | ||||
| 	/* If interrupts were disabled, don't dive into RCU core. */ | ||||
| 	if (irqs_disabled_flags(flags)) { | ||||
| 		local_irq_restore(flags); | ||||
| @ -1613,18 +1701,9 @@ EXPORT_SYMBOL_GPL(call_rcu_bh); | ||||
|  */ | ||||
| void synchronize_sched(void) | ||||
| { | ||||
| 	struct rcu_synchronize rcu; | ||||
| 
 | ||||
| 	if (rcu_blocking_is_gp()) | ||||
| 		return; | ||||
| 
 | ||||
| 	init_rcu_head_on_stack(&rcu.head); | ||||
| 	init_completion(&rcu.completion); | ||||
| 	/* Will wake me after RCU finished. */ | ||||
| 	call_rcu_sched(&rcu.head, wakeme_after_rcu); | ||||
| 	/* Wait for it. */ | ||||
| 	wait_for_completion(&rcu.completion); | ||||
| 	destroy_rcu_head_on_stack(&rcu.head); | ||||
| 	wait_rcu_gp(call_rcu_sched); | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(synchronize_sched); | ||||
| 
 | ||||
| @ -1639,18 +1718,9 @@ EXPORT_SYMBOL_GPL(synchronize_sched); | ||||
|  */ | ||||
| void synchronize_rcu_bh(void) | ||||
| { | ||||
| 	struct rcu_synchronize rcu; | ||||
| 
 | ||||
| 	if (rcu_blocking_is_gp()) | ||||
| 		return; | ||||
| 
 | ||||
| 	init_rcu_head_on_stack(&rcu.head); | ||||
| 	init_completion(&rcu.completion); | ||||
| 	/* Will wake me after RCU finished. */ | ||||
| 	call_rcu_bh(&rcu.head, wakeme_after_rcu); | ||||
| 	/* Wait for it. */ | ||||
| 	wait_for_completion(&rcu.completion); | ||||
| 	destroy_rcu_head_on_stack(&rcu.head); | ||||
| 	wait_rcu_gp(call_rcu_bh); | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(synchronize_rcu_bh); | ||||
| 
 | ||||
| @ -1671,7 +1741,8 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) | ||||
| 	check_cpu_stall(rsp, rdp); | ||||
| 
 | ||||
| 	/* Is the RCU core waiting for a quiescent state from this CPU? */ | ||||
| 	if (rdp->qs_pending && !rdp->passed_quiesc) { | ||||
| 	if (rcu_scheduler_fully_active && | ||||
| 	    rdp->qs_pending && !rdp->passed_quiesce) { | ||||
| 
 | ||||
| 		/*
 | ||||
| 		 * If force_quiescent_state() coming soon and this CPU | ||||
| @ -1683,7 +1754,7 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) | ||||
| 		    ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs) - 1, | ||||
| 				 jiffies)) | ||||
| 			set_need_resched(); | ||||
| 	} else if (rdp->qs_pending && rdp->passed_quiesc) { | ||||
| 	} else if (rdp->qs_pending && rdp->passed_quiesce) { | ||||
| 		rdp->n_rp_report_qs++; | ||||
| 		return 1; | ||||
| 	} | ||||
| @ -1846,6 +1917,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) | ||||
| 	rdp->dynticks = &per_cpu(rcu_dynticks, cpu); | ||||
| #endif /* #ifdef CONFIG_NO_HZ */ | ||||
| 	rdp->cpu = cpu; | ||||
| 	rdp->rsp = rsp; | ||||
| 	raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||||
| } | ||||
| 
 | ||||
| @ -1865,8 +1937,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) | ||||
| 
 | ||||
| 	/* Set up local state, ensuring consistent view of global state. */ | ||||
| 	raw_spin_lock_irqsave(&rnp->lock, flags); | ||||
| 	rdp->passed_quiesc = 0;  /* We could be racing with new GP, */ | ||||
| 	rdp->qs_pending = 1;	 /*  so set up to respond to current GP. */ | ||||
| 	rdp->beenonline = 1;	 /* We have now been online. */ | ||||
| 	rdp->preemptible = preemptible; | ||||
| 	rdp->qlen_last_fqs_check = 0; | ||||
| @ -1891,9 +1961,17 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) | ||||
| 		rnp->qsmaskinit |= mask; | ||||
| 		mask = rnp->grpmask; | ||||
| 		if (rnp == rdp->mynode) { | ||||
| 			rdp->gpnum = rnp->completed; /* if GP in progress... */ | ||||
| 			/*
 | ||||
| 			 * If there is a grace period in progress, we will | ||||
| 			 * set up to wait for it next time we run the | ||||
| 			 * RCU core code. | ||||
| 			 */ | ||||
| 			rdp->gpnum = rnp->completed; | ||||
| 			rdp->completed = rnp->completed; | ||||
| 			rdp->passed_quiesc_completed = rnp->completed - 1; | ||||
| 			rdp->passed_quiesce = 0; | ||||
| 			rdp->qs_pending = 0; | ||||
| 			rdp->passed_quiesce_gpnum = rnp->gpnum - 1; | ||||
| 			trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpuonl"); | ||||
| 		} | ||||
| 		raw_spin_unlock(&rnp->lock); /* irqs already disabled. */ | ||||
| 		rnp = rnp->parent; | ||||
| @ -1919,6 +1997,7 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, | ||||
| 	struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); | ||||
| 	struct rcu_node *rnp = rdp->mynode; | ||||
| 
 | ||||
| 	trace_rcu_utilization("Start CPU hotplug"); | ||||
| 	switch (action) { | ||||
| 	case CPU_UP_PREPARE: | ||||
| 	case CPU_UP_PREPARE_FROZEN: | ||||
| @ -1954,6 +2033,7 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, | ||||
| 	default: | ||||
| 		break; | ||||
| 	} | ||||
| 	trace_rcu_utilization("End CPU hotplug"); | ||||
| 	return NOTIFY_OK; | ||||
| } | ||||
| 
 | ||||
|  | ||||
| @ -230,9 +230,9 @@ struct rcu_data { | ||||
| 					/*  in order to detect GP end. */ | ||||
| 	unsigned long	gpnum;		/* Highest gp number that this CPU */ | ||||
| 					/*  is aware of having started. */ | ||||
| 	unsigned long	passed_quiesc_completed; | ||||
| 					/* Value of completed at time of qs. */ | ||||
| 	bool		passed_quiesc;	/* User-mode/idle loop etc. */ | ||||
| 	unsigned long	passed_quiesce_gpnum; | ||||
| 					/* gpnum at time of quiescent state. */ | ||||
| 	bool		passed_quiesce;	/* User-mode/idle loop etc. */ | ||||
| 	bool		qs_pending;	/* Core waits for quiesc state. */ | ||||
| 	bool		beenonline;	/* CPU online at least once. */ | ||||
| 	bool		preemptible;	/* Preemptible RCU? */ | ||||
| @ -299,6 +299,7 @@ struct rcu_data { | ||||
| 	unsigned long n_rp_need_nothing; | ||||
| 
 | ||||
| 	int cpu; | ||||
| 	struct rcu_state *rsp; | ||||
| }; | ||||
| 
 | ||||
| /* Values for signaled field in struct rcu_state. */ | ||||
| @ -417,6 +418,13 @@ extern struct rcu_state rcu_preempt_state; | ||||
| DECLARE_PER_CPU(struct rcu_data, rcu_preempt_data); | ||||
| #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ | ||||
| 
 | ||||
| #ifdef CONFIG_RCU_BOOST | ||||
| DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status); | ||||
| DECLARE_PER_CPU(int, rcu_cpu_kthread_cpu); | ||||
| DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); | ||||
| DECLARE_PER_CPU(char, rcu_cpu_has_work); | ||||
| #endif /* #ifdef CONFIG_RCU_BOOST */ | ||||
| 
 | ||||
| #ifndef RCU_TREE_NONCORE | ||||
| 
 | ||||
| /* Forward declarations for rcutree_plugin.h */ | ||||
| @ -430,7 +438,7 @@ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, | ||||
| static void rcu_stop_cpu_kthread(int cpu); | ||||
| #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | ||||
| static void rcu_print_detail_task_stall(struct rcu_state *rsp); | ||||
| static void rcu_print_task_stall(struct rcu_node *rnp); | ||||
| static int rcu_print_task_stall(struct rcu_node *rnp); | ||||
| static void rcu_preempt_stall_reset(void); | ||||
| static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp); | ||||
| #ifdef CONFIG_HOTPLUG_CPU | ||||
| @ -450,7 +458,6 @@ static int rcu_preempt_needs_cpu(int cpu); | ||||
| static void __cpuinit rcu_preempt_init_percpu_data(int cpu); | ||||
| static void rcu_preempt_send_cbs_to_online(void); | ||||
| static void __init __rcu_init_preempt(void); | ||||
| static void rcu_needs_cpu_flush(void); | ||||
| static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); | ||||
| static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); | ||||
| static void invoke_rcu_callbacks_kthread(void); | ||||
|  | ||||
| @ -27,6 +27,14 @@ | ||||
| #include <linux/delay.h> | ||||
| #include <linux/stop_machine.h> | ||||
| 
 | ||||
| #define RCU_KTHREAD_PRIO 1 | ||||
| 
 | ||||
| #ifdef CONFIG_RCU_BOOST | ||||
| #define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO | ||||
| #else | ||||
| #define RCU_BOOST_PRIO RCU_KTHREAD_PRIO | ||||
| #endif | ||||
| 
 | ||||
| /*
 | ||||
|  * Check the RCU kernel configuration parameters and print informative | ||||
|  * messages about anything out of the ordinary.  If you like #ifdef, you | ||||
| @ -64,7 +72,7 @@ static void __init rcu_bootup_announce_oddness(void) | ||||
| 
 | ||||
| #ifdef CONFIG_TREE_PREEMPT_RCU | ||||
| 
 | ||||
| struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state); | ||||
| struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt); | ||||
| DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); | ||||
| static struct rcu_state *rcu_state = &rcu_preempt_state; | ||||
| 
 | ||||
| @ -122,9 +130,11 @@ static void rcu_preempt_qs(int cpu) | ||||
| { | ||||
| 	struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); | ||||
| 
 | ||||
| 	rdp->passed_quiesc_completed = rdp->gpnum - 1; | ||||
| 	rdp->passed_quiesce_gpnum = rdp->gpnum; | ||||
| 	barrier(); | ||||
| 	rdp->passed_quiesc = 1; | ||||
| 	if (rdp->passed_quiesce == 0) | ||||
| 		trace_rcu_grace_period("rcu_preempt", rdp->gpnum, "cpuqs"); | ||||
| 	rdp->passed_quiesce = 1; | ||||
| 	current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; | ||||
| } | ||||
| 
 | ||||
| @ -190,6 +200,11 @@ static void rcu_preempt_note_context_switch(int cpu) | ||||
| 			if (rnp->qsmask & rdp->grpmask) | ||||
| 				rnp->gp_tasks = &t->rcu_node_entry; | ||||
| 		} | ||||
| 		trace_rcu_preempt_task(rdp->rsp->name, | ||||
| 				       t->pid, | ||||
| 				       (rnp->qsmask & rdp->grpmask) | ||||
| 				       ? rnp->gpnum | ||||
| 				       : rnp->gpnum + 1); | ||||
| 		raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||||
| 	} else if (t->rcu_read_lock_nesting < 0 && | ||||
| 		   t->rcu_read_unlock_special) { | ||||
| @ -299,6 +314,9 @@ static noinline void rcu_read_unlock_special(struct task_struct *t) | ||||
| 	int empty_exp; | ||||
| 	unsigned long flags; | ||||
| 	struct list_head *np; | ||||
| #ifdef CONFIG_RCU_BOOST | ||||
| 	struct rt_mutex *rbmp = NULL; | ||||
| #endif /* #ifdef CONFIG_RCU_BOOST */ | ||||
| 	struct rcu_node *rnp; | ||||
| 	int special; | ||||
| 
 | ||||
| @ -344,6 +362,9 @@ static noinline void rcu_read_unlock_special(struct task_struct *t) | ||||
| 		smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ | ||||
| 		np = rcu_next_node_entry(t, rnp); | ||||
| 		list_del_init(&t->rcu_node_entry); | ||||
| 		t->rcu_blocked_node = NULL; | ||||
| 		trace_rcu_unlock_preempted_task("rcu_preempt", | ||||
| 						rnp->gpnum, t->pid); | ||||
| 		if (&t->rcu_node_entry == rnp->gp_tasks) | ||||
| 			rnp->gp_tasks = np; | ||||
| 		if (&t->rcu_node_entry == rnp->exp_tasks) | ||||
| @ -351,30 +372,34 @@ static noinline void rcu_read_unlock_special(struct task_struct *t) | ||||
| #ifdef CONFIG_RCU_BOOST | ||||
| 		if (&t->rcu_node_entry == rnp->boost_tasks) | ||||
| 			rnp->boost_tasks = np; | ||||
| 		/* Snapshot and clear ->rcu_boosted with rcu_node lock held. */ | ||||
| 		if (t->rcu_boosted) { | ||||
| 			special |= RCU_READ_UNLOCK_BOOSTED; | ||||
| 			t->rcu_boosted = 0; | ||||
| 		/* Snapshot/clear ->rcu_boost_mutex with rcu_node lock held. */ | ||||
| 		if (t->rcu_boost_mutex) { | ||||
| 			rbmp = t->rcu_boost_mutex; | ||||
| 			t->rcu_boost_mutex = NULL; | ||||
| 		} | ||||
| #endif /* #ifdef CONFIG_RCU_BOOST */ | ||||
| 		t->rcu_blocked_node = NULL; | ||||
| 
 | ||||
| 		/*
 | ||||
| 		 * If this was the last task on the current list, and if | ||||
| 		 * we aren't waiting on any CPUs, report the quiescent state. | ||||
| 		 * Note that rcu_report_unblock_qs_rnp() releases rnp->lock. | ||||
| 		 */ | ||||
| 		if (empty) | ||||
| 			raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||||
| 		else | ||||
| 		if (!empty && !rcu_preempt_blocked_readers_cgp(rnp)) { | ||||
| 			trace_rcu_quiescent_state_report("preempt_rcu", | ||||
| 							 rnp->gpnum, | ||||
| 							 0, rnp->qsmask, | ||||
| 							 rnp->level, | ||||
| 							 rnp->grplo, | ||||
| 							 rnp->grphi, | ||||
| 							 !!rnp->gp_tasks); | ||||
| 			rcu_report_unblock_qs_rnp(rnp, flags); | ||||
| 		} else | ||||
| 			raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||||
| 
 | ||||
| #ifdef CONFIG_RCU_BOOST | ||||
| 		/* Unboost if we were boosted. */ | ||||
| 		if (special & RCU_READ_UNLOCK_BOOSTED) { | ||||
| 			rt_mutex_unlock(t->rcu_boost_mutex); | ||||
| 			t->rcu_boost_mutex = NULL; | ||||
| 		} | ||||
| 		if (rbmp) | ||||
| 			rt_mutex_unlock(rbmp); | ||||
| #endif /* #ifdef CONFIG_RCU_BOOST */ | ||||
| 
 | ||||
| 		/*
 | ||||
| @ -399,10 +424,10 @@ void __rcu_read_unlock(void) | ||||
| { | ||||
| 	struct task_struct *t = current; | ||||
| 
 | ||||
| 	barrier();  /* needed if we ever invoke rcu_read_unlock in rcutree.c */ | ||||
| 	if (t->rcu_read_lock_nesting != 1) | ||||
| 		--t->rcu_read_lock_nesting; | ||||
| 	else { | ||||
| 		barrier();  /* critical section before exit code. */ | ||||
| 		t->rcu_read_lock_nesting = INT_MIN; | ||||
| 		barrier();  /* assign before ->rcu_read_unlock_special load */ | ||||
| 		if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) | ||||
| @ -466,16 +491,20 @@ static void rcu_print_detail_task_stall(struct rcu_state *rsp) | ||||
|  * Scan the current list of tasks blocked within RCU read-side critical | ||||
|  * sections, printing out the tid of each. | ||||
|  */ | ||||
| static void rcu_print_task_stall(struct rcu_node *rnp) | ||||
| static int rcu_print_task_stall(struct rcu_node *rnp) | ||||
| { | ||||
| 	struct task_struct *t; | ||||
| 	int ndetected = 0; | ||||
| 
 | ||||
| 	if (!rcu_preempt_blocked_readers_cgp(rnp)) | ||||
| 		return; | ||||
| 		return 0; | ||||
| 	t = list_entry(rnp->gp_tasks, | ||||
| 		       struct task_struct, rcu_node_entry); | ||||
| 	list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) | ||||
| 	list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) { | ||||
| 		printk(" P%d", t->pid); | ||||
| 		ndetected++; | ||||
| 	} | ||||
| 	return ndetected; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
| @ -656,18 +685,9 @@ EXPORT_SYMBOL_GPL(call_rcu); | ||||
|  */ | ||||
| void synchronize_rcu(void) | ||||
| { | ||||
| 	struct rcu_synchronize rcu; | ||||
| 
 | ||||
| 	if (!rcu_scheduler_active) | ||||
| 		return; | ||||
| 
 | ||||
| 	init_rcu_head_on_stack(&rcu.head); | ||||
| 	init_completion(&rcu.completion); | ||||
| 	/* Will wake me after RCU finished. */ | ||||
| 	call_rcu(&rcu.head, wakeme_after_rcu); | ||||
| 	/* Wait for it. */ | ||||
| 	wait_for_completion(&rcu.completion); | ||||
| 	destroy_rcu_head_on_stack(&rcu.head); | ||||
| 	wait_rcu_gp(call_rcu); | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(synchronize_rcu); | ||||
| 
 | ||||
| @ -968,8 +988,9 @@ static void rcu_print_detail_task_stall(struct rcu_state *rsp) | ||||
|  * Because preemptible RCU does not exist, we never have to check for | ||||
|  * tasks blocked within RCU read-side critical sections. | ||||
|  */ | ||||
| static void rcu_print_task_stall(struct rcu_node *rnp) | ||||
| static int rcu_print_task_stall(struct rcu_node *rnp) | ||||
| { | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
| @ -1136,6 +1157,8 @@ static void rcu_initiate_boost_trace(struct rcu_node *rnp) | ||||
| 
 | ||||
| #endif /* #else #ifdef CONFIG_RCU_TRACE */ | ||||
| 
 | ||||
| static struct lock_class_key rcu_boost_class; | ||||
| 
 | ||||
| /*
 | ||||
|  * Carry out RCU priority boosting on the task indicated by ->exp_tasks | ||||
|  * or ->boost_tasks, advancing the pointer to the next task in the | ||||
| @ -1198,8 +1221,10 @@ static int rcu_boost(struct rcu_node *rnp) | ||||
| 	 */ | ||||
| 	t = container_of(tb, struct task_struct, rcu_node_entry); | ||||
| 	rt_mutex_init_proxy_locked(&mtx, t); | ||||
| 	/* Avoid lockdep false positives.  This rt_mutex is its own thing. */ | ||||
| 	lockdep_set_class_and_name(&mtx.wait_lock, &rcu_boost_class, | ||||
| 				   "rcu_boost_mutex"); | ||||
| 	t->rcu_boost_mutex = &mtx; | ||||
| 	t->rcu_boosted = 1; | ||||
| 	raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||||
| 	rt_mutex_lock(&mtx);  /* Side effect: boosts task t's priority. */ | ||||
| 	rt_mutex_unlock(&mtx);  /* Keep lockdep happy. */ | ||||
| @ -1228,9 +1253,12 @@ static int rcu_boost_kthread(void *arg) | ||||
| 	int spincnt = 0; | ||||
| 	int more2boost; | ||||
| 
 | ||||
| 	trace_rcu_utilization("Start boost kthread@init"); | ||||
| 	for (;;) { | ||||
| 		rnp->boost_kthread_status = RCU_KTHREAD_WAITING; | ||||
| 		trace_rcu_utilization("End boost kthread@rcu_wait"); | ||||
| 		rcu_wait(rnp->boost_tasks || rnp->exp_tasks); | ||||
| 		trace_rcu_utilization("Start boost kthread@rcu_wait"); | ||||
| 		rnp->boost_kthread_status = RCU_KTHREAD_RUNNING; | ||||
| 		more2boost = rcu_boost(rnp); | ||||
| 		if (more2boost) | ||||
| @ -1238,11 +1266,14 @@ static int rcu_boost_kthread(void *arg) | ||||
| 		else | ||||
| 			spincnt = 0; | ||||
| 		if (spincnt > 10) { | ||||
| 			trace_rcu_utilization("End boost kthread@rcu_yield"); | ||||
| 			rcu_yield(rcu_boost_kthread_timer, (unsigned long)rnp); | ||||
| 			trace_rcu_utilization("Start boost kthread@rcu_yield"); | ||||
| 			spincnt = 0; | ||||
| 		} | ||||
| 	} | ||||
| 	/* NOTREACHED */ | ||||
| 	trace_rcu_utilization("End boost kthread@notreached"); | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| @ -1291,11 +1322,9 @@ static void invoke_rcu_callbacks_kthread(void) | ||||
| 
 | ||||
| 	local_irq_save(flags); | ||||
| 	__this_cpu_write(rcu_cpu_has_work, 1); | ||||
| 	if (__this_cpu_read(rcu_cpu_kthread_task) == NULL) { | ||||
| 		local_irq_restore(flags); | ||||
| 		return; | ||||
| 	} | ||||
| 	wake_up_process(__this_cpu_read(rcu_cpu_kthread_task)); | ||||
| 	if (__this_cpu_read(rcu_cpu_kthread_task) != NULL && | ||||
| 	    current != __this_cpu_read(rcu_cpu_kthread_task)) | ||||
| 		wake_up_process(__this_cpu_read(rcu_cpu_kthread_task)); | ||||
| 	local_irq_restore(flags); | ||||
| } | ||||
| 
 | ||||
| @ -1343,13 +1372,13 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, | ||||
| 	if (rnp->boost_kthread_task != NULL) | ||||
| 		return 0; | ||||
| 	t = kthread_create(rcu_boost_kthread, (void *)rnp, | ||||
| 			   "rcub%d", rnp_index); | ||||
| 			   "rcub/%d", rnp_index); | ||||
| 	if (IS_ERR(t)) | ||||
| 		return PTR_ERR(t); | ||||
| 	raw_spin_lock_irqsave(&rnp->lock, flags); | ||||
| 	rnp->boost_kthread_task = t; | ||||
| 	raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||||
| 	sp.sched_priority = RCU_KTHREAD_PRIO; | ||||
| 	sp.sched_priority = RCU_BOOST_PRIO; | ||||
| 	sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); | ||||
| 	wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */ | ||||
| 	return 0; | ||||
| @ -1444,6 +1473,7 @@ static void rcu_yield(void (*f)(unsigned long), unsigned long arg) | ||||
| { | ||||
| 	struct sched_param sp; | ||||
| 	struct timer_list yield_timer; | ||||
| 	int prio = current->rt_priority; | ||||
| 
 | ||||
| 	setup_timer_on_stack(&yield_timer, f, arg); | ||||
| 	mod_timer(&yield_timer, jiffies + 2); | ||||
| @ -1451,7 +1481,8 @@ static void rcu_yield(void (*f)(unsigned long), unsigned long arg) | ||||
| 	sched_setscheduler_nocheck(current, SCHED_NORMAL, &sp); | ||||
| 	set_user_nice(current, 19); | ||||
| 	schedule(); | ||||
| 	sp.sched_priority = RCU_KTHREAD_PRIO; | ||||
| 	set_user_nice(current, 0); | ||||
| 	sp.sched_priority = prio; | ||||
| 	sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); | ||||
| 	del_timer(&yield_timer); | ||||
| } | ||||
| @ -1489,7 +1520,8 @@ static int rcu_cpu_kthread_should_stop(int cpu) | ||||
| 
 | ||||
| /*
 | ||||
|  * Per-CPU kernel thread that invokes RCU callbacks.  This replaces the | ||||
|  * earlier RCU softirq. | ||||
|  * RCU softirq used in flavors and configurations of RCU that do not | ||||
|  * support RCU priority boosting. | ||||
|  */ | ||||
| static int rcu_cpu_kthread(void *arg) | ||||
| { | ||||
| @ -1500,9 +1532,12 @@ static int rcu_cpu_kthread(void *arg) | ||||
| 	char work; | ||||
| 	char *workp = &per_cpu(rcu_cpu_has_work, cpu); | ||||
| 
 | ||||
| 	trace_rcu_utilization("Start CPU kthread@init"); | ||||
| 	for (;;) { | ||||
| 		*statusp = RCU_KTHREAD_WAITING; | ||||
| 		trace_rcu_utilization("End CPU kthread@rcu_wait"); | ||||
| 		rcu_wait(*workp != 0 || kthread_should_stop()); | ||||
| 		trace_rcu_utilization("Start CPU kthread@rcu_wait"); | ||||
| 		local_bh_disable(); | ||||
| 		if (rcu_cpu_kthread_should_stop(cpu)) { | ||||
| 			local_bh_enable(); | ||||
| @ -1523,11 +1558,14 @@ static int rcu_cpu_kthread(void *arg) | ||||
| 			spincnt = 0; | ||||
| 		if (spincnt > 10) { | ||||
| 			*statusp = RCU_KTHREAD_YIELDING; | ||||
| 			trace_rcu_utilization("End CPU kthread@rcu_yield"); | ||||
| 			rcu_yield(rcu_cpu_kthread_timer, (unsigned long)cpu); | ||||
| 			trace_rcu_utilization("Start CPU kthread@rcu_yield"); | ||||
| 			spincnt = 0; | ||||
| 		} | ||||
| 	} | ||||
| 	*statusp = RCU_KTHREAD_STOPPED; | ||||
| 	trace_rcu_utilization("End CPU kthread@term"); | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| @ -1560,7 +1598,10 @@ static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu) | ||||
| 	if (!rcu_scheduler_fully_active || | ||||
| 	    per_cpu(rcu_cpu_kthread_task, cpu) != NULL) | ||||
| 		return 0; | ||||
| 	t = kthread_create(rcu_cpu_kthread, (void *)(long)cpu, "rcuc%d", cpu); | ||||
| 	t = kthread_create_on_node(rcu_cpu_kthread, | ||||
| 				   (void *)(long)cpu, | ||||
| 				   cpu_to_node(cpu), | ||||
| 				   "rcuc/%d", cpu); | ||||
| 	if (IS_ERR(t)) | ||||
| 		return PTR_ERR(t); | ||||
| 	if (cpu_online(cpu)) | ||||
| @ -1669,7 +1710,7 @@ static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp, | ||||
| 		return 0; | ||||
| 	if (rnp->node_kthread_task == NULL) { | ||||
| 		t = kthread_create(rcu_node_kthread, (void *)rnp, | ||||
| 				   "rcun%d", rnp_index); | ||||
| 				   "rcun/%d", rnp_index); | ||||
| 		if (IS_ERR(t)) | ||||
| 			return PTR_ERR(t); | ||||
| 		raw_spin_lock_irqsave(&rnp->lock, flags); | ||||
| @ -1907,15 +1948,6 @@ int rcu_needs_cpu(int cpu) | ||||
| 	return rcu_needs_cpu_quick_check(cpu); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Check to see if we need to continue a callback-flush operations to | ||||
|  * allow the last CPU to enter dyntick-idle mode.  But fast dyntick-idle | ||||
|  * entry is not configured, so we never do need to. | ||||
|  */ | ||||
| static void rcu_needs_cpu_flush(void) | ||||
| { | ||||
| } | ||||
| 
 | ||||
| #else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */ | ||||
| 
 | ||||
| #define RCU_NEEDS_CPU_FLUSHES 5 | ||||
| @ -1991,20 +2023,4 @@ int rcu_needs_cpu(int cpu) | ||||
| 	return c; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Check to see if we need to continue a callback-flush operations to | ||||
|  * allow the last CPU to enter dyntick-idle mode. | ||||
|  */ | ||||
| static void rcu_needs_cpu_flush(void) | ||||
| { | ||||
| 	int cpu = smp_processor_id(); | ||||
| 	unsigned long flags; | ||||
| 
 | ||||
| 	if (per_cpu(rcu_dyntick_drain, cpu) <= 0) | ||||
| 		return; | ||||
| 	local_irq_save(flags); | ||||
| 	(void)rcu_needs_cpu(cpu); | ||||
| 	local_irq_restore(flags); | ||||
| } | ||||
| 
 | ||||
| #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */ | ||||
|  | ||||
| @ -48,11 +48,6 @@ | ||||
| 
 | ||||
| #ifdef CONFIG_RCU_BOOST | ||||
| 
 | ||||
| DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status); | ||||
| DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_cpu); | ||||
| DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); | ||||
| DECLARE_PER_CPU(char, rcu_cpu_has_work); | ||||
| 
 | ||||
| static char convert_kthread_status(unsigned int kthread_status) | ||||
| { | ||||
| 	if (kthread_status > RCU_KTHREAD_MAX) | ||||
| @ -66,11 +61,11 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp) | ||||
| { | ||||
| 	if (!rdp->beenonline) | ||||
| 		return; | ||||
| 	seq_printf(m, "%3d%cc=%lu g=%lu pq=%d pqc=%lu qp=%d", | ||||
| 	seq_printf(m, "%3d%cc=%lu g=%lu pq=%d pgp=%lu qp=%d", | ||||
| 		   rdp->cpu, | ||||
| 		   cpu_is_offline(rdp->cpu) ? '!' : ' ', | ||||
| 		   rdp->completed, rdp->gpnum, | ||||
| 		   rdp->passed_quiesc, rdp->passed_quiesc_completed, | ||||
| 		   rdp->passed_quiesce, rdp->passed_quiesce_gpnum, | ||||
| 		   rdp->qs_pending); | ||||
| #ifdef CONFIG_NO_HZ | ||||
| 	seq_printf(m, " dt=%d/%d/%d df=%lu", | ||||
| @ -144,7 +139,7 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp) | ||||
| 		   rdp->cpu, | ||||
| 		   cpu_is_offline(rdp->cpu) ? "\"N\"" : "\"Y\"", | ||||
| 		   rdp->completed, rdp->gpnum, | ||||
| 		   rdp->passed_quiesc, rdp->passed_quiesc_completed, | ||||
| 		   rdp->passed_quiesce, rdp->passed_quiesce_gpnum, | ||||
| 		   rdp->qs_pending); | ||||
| #ifdef CONFIG_NO_HZ | ||||
| 	seq_printf(m, ",%d,%d,%d,%lu", | ||||
| @ -175,7 +170,7 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp) | ||||
| 
 | ||||
| static int show_rcudata_csv(struct seq_file *m, void *unused) | ||||
| { | ||||
| 	seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pqc\",\"pq\","); | ||||
| 	seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pgp\",\"pq\","); | ||||
| #ifdef CONFIG_NO_HZ | ||||
| 	seq_puts(m, "\"dt\",\"dt nesting\",\"dt NMI nesting\",\"df\","); | ||||
| #endif /* #ifdef CONFIG_NO_HZ */ | ||||
|  | ||||
| @ -579,6 +579,7 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state, | ||||
| 		    struct rt_mutex_waiter *waiter) | ||||
| { | ||||
| 	int ret = 0; | ||||
| 	int was_disabled; | ||||
| 
 | ||||
| 	for (;;) { | ||||
| 		/* Try to acquire the lock: */ | ||||
| @ -601,10 +602,17 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state, | ||||
| 
 | ||||
| 		raw_spin_unlock(&lock->wait_lock); | ||||
| 
 | ||||
| 		was_disabled = irqs_disabled(); | ||||
| 		if (was_disabled) | ||||
| 			local_irq_enable(); | ||||
| 
 | ||||
| 		debug_rt_mutex_print_deadlock(waiter); | ||||
| 
 | ||||
| 		schedule_rt_mutex(lock); | ||||
| 
 | ||||
| 		if (was_disabled) | ||||
| 			local_irq_disable(); | ||||
| 
 | ||||
| 		raw_spin_lock(&lock->wait_lock); | ||||
| 		set_current_state(state); | ||||
| 	} | ||||
|  | ||||
| @ -4213,6 +4213,7 @@ static inline void schedule_debug(struct task_struct *prev) | ||||
| 	 */ | ||||
| 	if (unlikely(in_atomic_preempt_off() && !prev->exit_state)) | ||||
| 		__schedule_bug(prev); | ||||
| 	rcu_sleep_check(); | ||||
| 
 | ||||
| 	profile_hit(SCHED_PROFILING, __builtin_return_address(0)); | ||||
| 
 | ||||
| @ -5954,15 +5955,6 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) | ||||
| 	ftrace_graph_init_idle_task(idle, cpu); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * In a system that switches off the HZ timer nohz_cpu_mask | ||||
|  * indicates which cpus entered this state. This is used | ||||
|  * in the rcu update to wait only for active cpus. For system | ||||
|  * which do not switch off the HZ timer nohz_cpu_mask should | ||||
|  * always be CPU_BITS_NONE. | ||||
|  */ | ||||
| cpumask_var_t nohz_cpu_mask; | ||||
| 
 | ||||
| /*
 | ||||
|  * Increase the granularity value when there are more CPUs, | ||||
|  * because with more CPUs the 'effective latency' as visible | ||||
| @ -8175,8 +8167,6 @@ void __init sched_init(void) | ||||
| 	 */ | ||||
| 	current->sched_class = &fair_sched_class; | ||||
| 
 | ||||
| 	/* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */ | ||||
| 	zalloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT); | ||||
| #ifdef CONFIG_SMP | ||||
| 	zalloc_cpumask_var(&sched_domains_tmpmask, GFP_NOWAIT); | ||||
| #ifdef CONFIG_NO_HZ | ||||
| @ -8206,6 +8196,7 @@ void __might_sleep(const char *file, int line, int preempt_offset) | ||||
| { | ||||
| 	static unsigned long prev_jiffy;	/* ratelimiting */ | ||||
| 
 | ||||
| 	rcu_sleep_check(); /* WARN_ON_ONCE() by default, no rate limit reqd. */ | ||||
| 	if ((preempt_count_equals(preempt_offset) && !irqs_disabled()) || | ||||
| 	    system_state != SYSTEM_RUNNING || oops_in_progress) | ||||
| 		return; | ||||
|  | ||||
| @ -139,7 +139,6 @@ static void tick_nohz_update_jiffies(ktime_t now) | ||||
| 	struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | ||||
| 	unsigned long flags; | ||||
| 
 | ||||
| 	cpumask_clear_cpu(cpu, nohz_cpu_mask); | ||||
| 	ts->idle_waketime = now; | ||||
| 
 | ||||
| 	local_irq_save(flags); | ||||
| @ -389,9 +388,6 @@ void tick_nohz_stop_sched_tick(int inidle) | ||||
| 		else | ||||
| 			expires.tv64 = KTIME_MAX; | ||||
| 
 | ||||
| 		if (delta_jiffies > 1) | ||||
| 			cpumask_set_cpu(cpu, nohz_cpu_mask); | ||||
| 
 | ||||
| 		/* Skip reprogram of event if its not changed */ | ||||
| 		if (ts->tick_stopped && ktime_equal(expires, dev->next_event)) | ||||
| 			goto out; | ||||
| @ -441,7 +437,6 @@ void tick_nohz_stop_sched_tick(int inidle) | ||||
| 		 * softirq. | ||||
| 		 */ | ||||
| 		tick_do_update_jiffies64(ktime_get()); | ||||
| 		cpumask_clear_cpu(cpu, nohz_cpu_mask); | ||||
| 	} | ||||
| 	raise_softirq_irqoff(TIMER_SOFTIRQ); | ||||
| out: | ||||
| @ -524,7 +519,6 @@ void tick_nohz_restart_sched_tick(void) | ||||
| 	/* Update jiffies first */ | ||||
| 	select_nohz_load_balancer(0); | ||||
| 	tick_do_update_jiffies64(now); | ||||
| 	cpumask_clear_cpu(cpu, nohz_cpu_mask); | ||||
| 
 | ||||
| #ifndef CONFIG_VIRT_CPU_ACCOUNTING | ||||
| 	/*
 | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user