forked from Minki/linux
Scheduler changes for v6.1:
- Debuggability: - Change most occurances of BUG_ON() to WARN_ON_ONCE() - Reorganize & fix TASK_ state comparisons, turn it into a bitmap - Update/fix misc scheduler debugging facilities - Load-balancing & regular scheduling: - Improve the behavior of the scheduler in presence of lot of SCHED_IDLE tasks - in particular they should not impact other scheduling classes. - Optimize task load tracking, cleanups & fixes - Clean up & simplify misc load-balancing code - Freezer: - Rewrite the core freezer to behave better wrt thawing and be simpler in general, by replacing PF_FROZEN with TASK_FROZEN & fixing/adjusting all the fallout. - Deadline scheduler: - Fix the DL capacity-aware code - Factor out dl_task_is_earliest_deadline() & replenish_dl_new_period() - Relax/optimize locking in task_non_contending() - Cleanups: - Factor out the update_current_exec_runtime() helper - Various cleanups, simplifications Signed-off-by: Ingo Molnar <mingo@kernel.org> -----BEGIN PGP SIGNATURE----- iQJFBAABCgAvFiEEBpT5eoXrXCwVQwEKEnMQ0APhK1gFAmM/01cRHG1pbmdvQGtl cm5lbC5vcmcACgkQEnMQ0APhK1geZA/+PB4KC1T9aVxzaTHI36R03YgJYZmIdtxw wTf02MixePmz+gQCbepJbempGOh5ST28aOcI0xhdYOql5B63MaUBBMlB0HvGUyDG IU3zETqLMRtAbnSTdQFv8m++ECUtZYp8/x1FCel4WO7ya4ETkRu1NRfCoUepEhpZ aVAlae9LH3NBaF9t7s0PT2lTjf3pIzMFRkddJ0ywJhbFR3VnWat05fAK+J6fGY8+ LS54coefNlJD4oDh5TY8uniL1j5SmWmmwbk9Cdj7bLU5P3dFSS0/+5FJNHJPVGDE srGT7wstRUcDrN0CnZo48VIUBiApJCCDqTfJYi9wNYd0NAHvwY6MIJJgEIY8mKsI L/qH26H81Wt+ezSZ/5JIlGlZ/LIeNaa6OO/fbWEYABBQogvvx3nxsRNUYKSQzumH CnSBasBjLnjWyLlK4qARM9cI7NFSEK6NUigrEx/7h8JFu/8T4DlSy6LsF1HUyKgq 4+FJLAqG6cL0tcwB/fHYd0oRESN8dStnQhGxSojgufwLc7dlFULvCYF5JM/dX+/V IKwbOfIOeOn6ViMtSOXAEGdII+IQ2/ZFPwr+8Z5JC7NzvTVL6xlu/3JXkLZR3L7o yaXTSaz06h1vil7Z+GRf7RHc+wUeGkEpXh5vnarGZKXivhFdWsBdROIJANK+xR0i TeSLCxQxXlU= =KjMD -----END PGP SIGNATURE----- Merge tag 'sched-core-2022-10-07' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull scheduler updates from Ingo Molnar: "Debuggability: - Change most occurances of BUG_ON() to WARN_ON_ONCE() - Reorganize & fix TASK_ state comparisons, turn it into a bitmap - Update/fix misc scheduler debugging facilities Load-balancing & regular scheduling: - Improve the behavior of the scheduler in presence of lot of SCHED_IDLE tasks - in particular they should not impact other scheduling classes. - Optimize task load tracking, cleanups & fixes - Clean up & simplify misc load-balancing code Freezer: - Rewrite the core freezer to behave better wrt thawing and be simpler in general, by replacing PF_FROZEN with TASK_FROZEN & fixing/adjusting all the fallout. Deadline scheduler: - Fix the DL capacity-aware code - Factor out dl_task_is_earliest_deadline() & replenish_dl_new_period() - Relax/optimize locking in task_non_contending() Cleanups: - Factor out the update_current_exec_runtime() helper - Various cleanups, simplifications" * tag 'sched-core-2022-10-07' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (41 commits) sched: Fix more TASK_state comparisons sched: Fix TASK_state comparisons sched/fair: Move call to list_last_entry() in detach_tasks sched/fair: Cleanup loop_max and loop_break sched/fair: Make sure to try to detach at least one movable task sched: Show PF_flag holes freezer,sched: Rewrite core freezer logic sched: Widen TAKS_state literals sched/wait: Add wait_event_state() sched/completion: Add wait_for_completion_state() sched: Add TASK_ANY for wait_task_inactive() sched: Change wait_task_inactive()s match_state freezer,umh: Clean up freezer/initrd interaction freezer: Have {,un}lock_system_sleep() save/restore flags sched: Rename task_running() to task_on_cpu() sched/fair: Cleanup for SIS_PROP sched/fair: Default to false in test_idle_cores() sched/fair: Remove useless check in select_idle_core() sched/fair: Avoid double search on same cpu sched/fair: Remove redundant check in select_idle_smt() ...
This commit is contained in:
commit
30c999937f
@ -654,12 +654,14 @@ void __init acpi_s2idle_setup(void)
|
||||
|
||||
int acpi_register_lps0_dev(struct acpi_s2idle_dev_ops *arg)
|
||||
{
|
||||
unsigned int sleep_flags;
|
||||
|
||||
if (!lps0_device_handle || sleep_no_lps0)
|
||||
return -ENODEV;
|
||||
|
||||
lock_system_sleep();
|
||||
sleep_flags = lock_system_sleep();
|
||||
list_add(&arg->list_node, &lps0_s2idle_devops_head);
|
||||
unlock_system_sleep();
|
||||
unlock_system_sleep(sleep_flags);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -667,12 +669,14 @@ EXPORT_SYMBOL_GPL(acpi_register_lps0_dev);
|
||||
|
||||
void acpi_unregister_lps0_dev(struct acpi_s2idle_dev_ops *arg)
|
||||
{
|
||||
unsigned int sleep_flags;
|
||||
|
||||
if (!lps0_device_handle || sleep_no_lps0)
|
||||
return;
|
||||
|
||||
lock_system_sleep();
|
||||
sleep_flags = lock_system_sleep();
|
||||
list_del(&arg->list_node);
|
||||
unlock_system_sleep();
|
||||
unlock_system_sleep(sleep_flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(acpi_unregister_lps0_dev);
|
||||
|
||||
|
@ -4259,10 +4259,9 @@ static int binder_wait_for_work(struct binder_thread *thread,
|
||||
struct binder_proc *proc = thread->proc;
|
||||
int ret = 0;
|
||||
|
||||
freezer_do_not_count();
|
||||
binder_inner_proc_lock(proc);
|
||||
for (;;) {
|
||||
prepare_to_wait(&thread->wait, &wait, TASK_INTERRUPTIBLE);
|
||||
prepare_to_wait(&thread->wait, &wait, TASK_INTERRUPTIBLE|TASK_FREEZABLE);
|
||||
if (binder_has_work_ilocked(thread, do_proc_work))
|
||||
break;
|
||||
if (do_proc_work)
|
||||
@ -4279,7 +4278,6 @@ static int binder_wait_for_work(struct binder_thread *thread,
|
||||
}
|
||||
finish_wait(&thread->wait, &wait);
|
||||
binder_inner_proc_unlock(proc);
|
||||
freezer_count();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -445,8 +445,8 @@ static int pt3_fetch_thread(void *data)
|
||||
pt3_proc_dma(adap);
|
||||
|
||||
delay = ktime_set(0, PT3_FETCH_DELAY * NSEC_PER_MSEC);
|
||||
set_current_state(TASK_UNINTERRUPTIBLE);
|
||||
freezable_schedule_hrtimeout_range(&delay,
|
||||
set_current_state(TASK_UNINTERRUPTIBLE|TASK_FREEZABLE);
|
||||
schedule_hrtimeout_range(&delay,
|
||||
PT3_FETCH_DELAY_DELTA * NSEC_PER_MSEC,
|
||||
HRTIMER_MODE_REL);
|
||||
}
|
||||
|
@ -254,7 +254,7 @@ void idle_inject_stop(struct idle_inject_device *ii_dev)
|
||||
iit = per_cpu_ptr(&idle_inject_thread, cpu);
|
||||
iit->should_run = 0;
|
||||
|
||||
wait_task_inactive(iit->tsk, 0);
|
||||
wait_task_inactive(iit->tsk, TASK_ANY);
|
||||
}
|
||||
|
||||
cpu_hotplug_enable();
|
||||
|
@ -998,8 +998,9 @@ void
|
||||
spi_dv_device(struct scsi_device *sdev)
|
||||
{
|
||||
struct scsi_target *starget = sdev->sdev_target;
|
||||
u8 *buffer;
|
||||
const int len = SPI_MAX_ECHO_BUFFER_SIZE*2;
|
||||
unsigned int sleep_flags;
|
||||
u8 *buffer;
|
||||
|
||||
/*
|
||||
* Because this function and the power management code both call
|
||||
@ -1007,7 +1008,7 @@ spi_dv_device(struct scsi_device *sdev)
|
||||
* while suspend or resume is in progress. Hence the
|
||||
* lock/unlock_system_sleep() calls.
|
||||
*/
|
||||
lock_system_sleep();
|
||||
sleep_flags = lock_system_sleep();
|
||||
|
||||
if (scsi_autopm_get_device(sdev))
|
||||
goto unlock_system_sleep;
|
||||
@ -1058,7 +1059,7 @@ put_autopm:
|
||||
scsi_autopm_put_device(sdev);
|
||||
|
||||
unlock_system_sleep:
|
||||
unlock_system_sleep();
|
||||
unlock_system_sleep(sleep_flags);
|
||||
}
|
||||
EXPORT_SYMBOL(spi_dv_device);
|
||||
|
||||
|
@ -2327,7 +2327,7 @@ cifs_invalidate_mapping(struct inode *inode)
|
||||
static int
|
||||
cifs_wait_bit_killable(struct wait_bit_key *key, int mode)
|
||||
{
|
||||
freezable_schedule_unsafe();
|
||||
schedule();
|
||||
if (signal_pending_state(mode, current))
|
||||
return -ERESTARTSYS;
|
||||
return 0;
|
||||
@ -2345,7 +2345,7 @@ cifs_revalidate_mapping(struct inode *inode)
|
||||
return 0;
|
||||
|
||||
rc = wait_on_bit_lock_action(flags, CIFS_INO_LOCK, cifs_wait_bit_killable,
|
||||
TASK_KILLABLE);
|
||||
TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
|
@ -753,8 +753,9 @@ wait_for_response(struct TCP_Server_Info *server, struct mid_q_entry *midQ)
|
||||
{
|
||||
int error;
|
||||
|
||||
error = wait_event_freezekillable_unsafe(server->response_q,
|
||||
midQ->mid_state != MID_REQUEST_SUBMITTED);
|
||||
error = wait_event_state(server->response_q,
|
||||
midQ->mid_state != MID_REQUEST_SUBMITTED,
|
||||
(TASK_KILLABLE|TASK_FREEZABLE_UNSAFE));
|
||||
if (error < 0)
|
||||
return -ERESTARTSYS;
|
||||
|
||||
|
@ -402,9 +402,8 @@ static int coredump_wait(int exit_code, struct core_state *core_state)
|
||||
if (core_waiters > 0) {
|
||||
struct core_thread *ptr;
|
||||
|
||||
freezer_do_not_count();
|
||||
wait_for_completion(&core_state->startup);
|
||||
freezer_count();
|
||||
wait_for_completion_state(&core_state->startup,
|
||||
TASK_UNINTERRUPTIBLE|TASK_FREEZABLE);
|
||||
/*
|
||||
* Wait for all the threads to become inactive, so that
|
||||
* all the thread context (extended register state, like
|
||||
@ -412,7 +411,7 @@ static int coredump_wait(int exit_code, struct core_state *core_state)
|
||||
*/
|
||||
ptr = core_state->dumper.next;
|
||||
while (ptr != NULL) {
|
||||
wait_task_inactive(ptr->task, 0);
|
||||
wait_task_inactive(ptr->task, TASK_ANY);
|
||||
ptr = ptr->next;
|
||||
}
|
||||
}
|
||||
|
@ -567,7 +567,8 @@ static vm_fault_t nfs_vm_page_mkwrite(struct vm_fault *vmf)
|
||||
}
|
||||
|
||||
wait_on_bit_action(&NFS_I(inode)->flags, NFS_INO_INVALIDATING,
|
||||
nfs_wait_bit_killable, TASK_KILLABLE);
|
||||
nfs_wait_bit_killable,
|
||||
TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
|
||||
|
||||
lock_page(page);
|
||||
mapping = page_file_mapping(page);
|
||||
|
@ -72,18 +72,13 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr)
|
||||
return nfs_fileid_to_ino_t(fattr->fileid);
|
||||
}
|
||||
|
||||
static int nfs_wait_killable(int mode)
|
||||
int nfs_wait_bit_killable(struct wait_bit_key *key, int mode)
|
||||
{
|
||||
freezable_schedule_unsafe();
|
||||
schedule();
|
||||
if (signal_pending_state(mode, current))
|
||||
return -ERESTARTSYS;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int nfs_wait_bit_killable(struct wait_bit_key *key, int mode)
|
||||
{
|
||||
return nfs_wait_killable(mode);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nfs_wait_bit_killable);
|
||||
|
||||
/**
|
||||
@ -1332,7 +1327,8 @@ int nfs_clear_invalid_mapping(struct address_space *mapping)
|
||||
*/
|
||||
for (;;) {
|
||||
ret = wait_on_bit_action(bitlock, NFS_INO_INVALIDATING,
|
||||
nfs_wait_bit_killable, TASK_KILLABLE);
|
||||
nfs_wait_bit_killable,
|
||||
TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
|
||||
if (ret)
|
||||
goto out;
|
||||
spin_lock(&inode->i_lock);
|
||||
|
@ -36,7 +36,8 @@ nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
|
||||
res = rpc_call_sync(clnt, msg, flags);
|
||||
if (res != -EJUKEBOX)
|
||||
break;
|
||||
freezable_schedule_timeout_killable_unsafe(NFS_JUKEBOX_RETRY_TIME);
|
||||
__set_current_state(TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
|
||||
schedule_timeout(NFS_JUKEBOX_RETRY_TIME);
|
||||
res = -ERESTARTSYS;
|
||||
} while (!fatal_signal_pending(current));
|
||||
return res;
|
||||
|
@ -416,8 +416,8 @@ static int nfs4_delay_killable(long *timeout)
|
||||
{
|
||||
might_sleep();
|
||||
|
||||
freezable_schedule_timeout_killable_unsafe(
|
||||
nfs4_update_delay(timeout));
|
||||
__set_current_state(TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
|
||||
schedule_timeout(nfs4_update_delay(timeout));
|
||||
if (!__fatal_signal_pending(current))
|
||||
return 0;
|
||||
return -EINTR;
|
||||
@ -427,7 +427,8 @@ static int nfs4_delay_interruptible(long *timeout)
|
||||
{
|
||||
might_sleep();
|
||||
|
||||
freezable_schedule_timeout_interruptible_unsafe(nfs4_update_delay(timeout));
|
||||
__set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE_UNSAFE);
|
||||
schedule_timeout(nfs4_update_delay(timeout));
|
||||
if (!signal_pending(current))
|
||||
return 0;
|
||||
return __fatal_signal_pending(current) ? -EINTR :-ERESTARTSYS;
|
||||
@ -7406,7 +7407,8 @@ nfs4_retry_setlk_simple(struct nfs4_state *state, int cmd,
|
||||
status = nfs4_proc_setlk(state, cmd, request);
|
||||
if ((status != -EAGAIN) || IS_SETLK(cmd))
|
||||
break;
|
||||
freezable_schedule_timeout_interruptible(timeout);
|
||||
__set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE);
|
||||
schedule_timeout(timeout);
|
||||
timeout *= 2;
|
||||
timeout = min_t(unsigned long, NFS4_LOCK_MAXTIMEOUT, timeout);
|
||||
status = -ERESTARTSYS;
|
||||
@ -7474,10 +7476,8 @@ nfs4_retry_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
|
||||
break;
|
||||
|
||||
status = -ERESTARTSYS;
|
||||
freezer_do_not_count();
|
||||
wait_woken(&waiter.wait, TASK_INTERRUPTIBLE,
|
||||
wait_woken(&waiter.wait, TASK_INTERRUPTIBLE|TASK_FREEZABLE,
|
||||
NFS4_LOCK_MAXTIMEOUT);
|
||||
freezer_count();
|
||||
} while (!signalled());
|
||||
|
||||
remove_wait_queue(q, &waiter.wait);
|
||||
|
@ -1314,7 +1314,8 @@ int nfs4_wait_clnt_recover(struct nfs_client *clp)
|
||||
|
||||
refcount_inc(&clp->cl_count);
|
||||
res = wait_on_bit_action(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING,
|
||||
nfs_wait_bit_killable, TASK_KILLABLE);
|
||||
nfs_wait_bit_killable,
|
||||
TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
|
||||
if (res)
|
||||
goto out;
|
||||
if (clp->cl_cons_state < 0)
|
||||
|
@ -1908,7 +1908,7 @@ static int pnfs_prepare_to_retry_layoutget(struct pnfs_layout_hdr *lo)
|
||||
pnfs_layoutcommit_inode(lo->plh_inode, false);
|
||||
return wait_on_bit_action(&lo->plh_flags, NFS_LAYOUT_RETURN,
|
||||
nfs_wait_bit_killable,
|
||||
TASK_KILLABLE);
|
||||
TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
|
||||
}
|
||||
|
||||
static void nfs_layoutget_begin(struct pnfs_layout_hdr *lo)
|
||||
@ -3192,7 +3192,7 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync)
|
||||
status = wait_on_bit_lock_action(&nfsi->flags,
|
||||
NFS_INO_LAYOUTCOMMITTING,
|
||||
nfs_wait_bit_killable,
|
||||
TASK_KILLABLE);
|
||||
TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
|
||||
if (status)
|
||||
goto out;
|
||||
}
|
||||
|
@ -602,9 +602,9 @@ xfsaild(
|
||||
|
||||
while (1) {
|
||||
if (tout && tout <= 20)
|
||||
set_current_state(TASK_KILLABLE);
|
||||
set_current_state(TASK_KILLABLE|TASK_FREEZABLE);
|
||||
else
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE);
|
||||
|
||||
/*
|
||||
* Check kthread_should_stop() after we set the task state to
|
||||
@ -653,14 +653,14 @@ xfsaild(
|
||||
ailp->ail_target == ailp->ail_target_prev &&
|
||||
list_empty(&ailp->ail_buf_list)) {
|
||||
spin_unlock(&ailp->ail_lock);
|
||||
freezable_schedule();
|
||||
schedule();
|
||||
tout = 0;
|
||||
continue;
|
||||
}
|
||||
spin_unlock(&ailp->ail_lock);
|
||||
|
||||
if (tout)
|
||||
freezable_schedule_timeout(msecs_to_jiffies(tout));
|
||||
schedule_timeout(msecs_to_jiffies(tout));
|
||||
|
||||
__set_current_state(TASK_RUNNING);
|
||||
|
||||
|
@ -103,6 +103,7 @@ extern void wait_for_completion(struct completion *);
|
||||
extern void wait_for_completion_io(struct completion *);
|
||||
extern int wait_for_completion_interruptible(struct completion *x);
|
||||
extern int wait_for_completion_killable(struct completion *x);
|
||||
extern int wait_for_completion_state(struct completion *x, unsigned int state);
|
||||
extern unsigned long wait_for_completion_timeout(struct completion *x,
|
||||
unsigned long timeout);
|
||||
extern unsigned long wait_for_completion_io_timeout(struct completion *x,
|
||||
|
@ -8,9 +8,11 @@
|
||||
#include <linux/sched.h>
|
||||
#include <linux/wait.h>
|
||||
#include <linux/atomic.h>
|
||||
#include <linux/jump_label.h>
|
||||
|
||||
#ifdef CONFIG_FREEZER
|
||||
extern atomic_t system_freezing_cnt; /* nr of freezing conds in effect */
|
||||
DECLARE_STATIC_KEY_FALSE(freezer_active);
|
||||
|
||||
extern bool pm_freezing; /* PM freezing in effect */
|
||||
extern bool pm_nosig_freezing; /* PM nosig freezing in effect */
|
||||
|
||||
@ -22,10 +24,7 @@ extern unsigned int freeze_timeout_msecs;
|
||||
/*
|
||||
* Check if a process has been frozen
|
||||
*/
|
||||
static inline bool frozen(struct task_struct *p)
|
||||
{
|
||||
return p->flags & PF_FROZEN;
|
||||
}
|
||||
extern bool frozen(struct task_struct *p);
|
||||
|
||||
extern bool freezing_slow_path(struct task_struct *p);
|
||||
|
||||
@ -34,9 +33,10 @@ extern bool freezing_slow_path(struct task_struct *p);
|
||||
*/
|
||||
static inline bool freezing(struct task_struct *p)
|
||||
{
|
||||
if (likely(!atomic_read(&system_freezing_cnt)))
|
||||
return false;
|
||||
return freezing_slow_path(p);
|
||||
if (static_branch_unlikely(&freezer_active))
|
||||
return freezing_slow_path(p);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Takes and releases task alloc lock using task_lock() */
|
||||
@ -48,23 +48,14 @@ extern int freeze_kernel_threads(void);
|
||||
extern void thaw_processes(void);
|
||||
extern void thaw_kernel_threads(void);
|
||||
|
||||
/*
|
||||
* DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION
|
||||
* If try_to_freeze causes a lockdep warning it means the caller may deadlock
|
||||
*/
|
||||
static inline bool try_to_freeze_unsafe(void)
|
||||
static inline bool try_to_freeze(void)
|
||||
{
|
||||
might_sleep();
|
||||
if (likely(!freezing(current)))
|
||||
return false;
|
||||
return __refrigerator(false);
|
||||
}
|
||||
|
||||
static inline bool try_to_freeze(void)
|
||||
{
|
||||
if (!(current->flags & PF_NOFREEZE))
|
||||
debug_check_no_locks_held();
|
||||
return try_to_freeze_unsafe();
|
||||
return __refrigerator(false);
|
||||
}
|
||||
|
||||
extern bool freeze_task(struct task_struct *p);
|
||||
@ -79,195 +70,6 @@ static inline bool cgroup_freezing(struct task_struct *task)
|
||||
}
|
||||
#endif /* !CONFIG_CGROUP_FREEZER */
|
||||
|
||||
/*
|
||||
* The PF_FREEZER_SKIP flag should be set by a vfork parent right before it
|
||||
* calls wait_for_completion(&vfork) and reset right after it returns from this
|
||||
* function. Next, the parent should call try_to_freeze() to freeze itself
|
||||
* appropriately in case the child has exited before the freezing of tasks is
|
||||
* complete. However, we don't want kernel threads to be frozen in unexpected
|
||||
* places, so we allow them to block freeze_processes() instead or to set
|
||||
* PF_NOFREEZE if needed. Fortunately, in the ____call_usermodehelper() case the
|
||||
* parent won't really block freeze_processes(), since ____call_usermodehelper()
|
||||
* (the child) does a little before exec/exit and it can't be frozen before
|
||||
* waking up the parent.
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* freezer_do_not_count - tell freezer to ignore %current
|
||||
*
|
||||
* Tell freezers to ignore the current task when determining whether the
|
||||
* target frozen state is reached. IOW, the current task will be
|
||||
* considered frozen enough by freezers.
|
||||
*
|
||||
* The caller shouldn't do anything which isn't allowed for a frozen task
|
||||
* until freezer_cont() is called. Usually, freezer[_do_not]_count() pair
|
||||
* wrap a scheduling operation and nothing much else.
|
||||
*/
|
||||
static inline void freezer_do_not_count(void)
|
||||
{
|
||||
current->flags |= PF_FREEZER_SKIP;
|
||||
}
|
||||
|
||||
/**
|
||||
* freezer_count - tell freezer to stop ignoring %current
|
||||
*
|
||||
* Undo freezer_do_not_count(). It tells freezers that %current should be
|
||||
* considered again and tries to freeze if freezing condition is already in
|
||||
* effect.
|
||||
*/
|
||||
static inline void freezer_count(void)
|
||||
{
|
||||
current->flags &= ~PF_FREEZER_SKIP;
|
||||
/*
|
||||
* If freezing is in progress, the following paired with smp_mb()
|
||||
* in freezer_should_skip() ensures that either we see %true
|
||||
* freezing() or freezer_should_skip() sees !PF_FREEZER_SKIP.
|
||||
*/
|
||||
smp_mb();
|
||||
try_to_freeze();
|
||||
}
|
||||
|
||||
/* DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION */
|
||||
static inline void freezer_count_unsafe(void)
|
||||
{
|
||||
current->flags &= ~PF_FREEZER_SKIP;
|
||||
smp_mb();
|
||||
try_to_freeze_unsafe();
|
||||
}
|
||||
|
||||
/**
|
||||
* freezer_should_skip - whether to skip a task when determining frozen
|
||||
* state is reached
|
||||
* @p: task in quesion
|
||||
*
|
||||
* This function is used by freezers after establishing %true freezing() to
|
||||
* test whether a task should be skipped when determining the target frozen
|
||||
* state is reached. IOW, if this function returns %true, @p is considered
|
||||
* frozen enough.
|
||||
*/
|
||||
static inline bool freezer_should_skip(struct task_struct *p)
|
||||
{
|
||||
/*
|
||||
* The following smp_mb() paired with the one in freezer_count()
|
||||
* ensures that either freezer_count() sees %true freezing() or we
|
||||
* see cleared %PF_FREEZER_SKIP and return %false. This makes it
|
||||
* impossible for a task to slip frozen state testing after
|
||||
* clearing %PF_FREEZER_SKIP.
|
||||
*/
|
||||
smp_mb();
|
||||
return p->flags & PF_FREEZER_SKIP;
|
||||
}
|
||||
|
||||
/*
|
||||
* These functions are intended to be used whenever you want allow a sleeping
|
||||
* task to be frozen. Note that neither return any clear indication of
|
||||
* whether a freeze event happened while in this function.
|
||||
*/
|
||||
|
||||
/* Like schedule(), but should not block the freezer. */
|
||||
static inline void freezable_schedule(void)
|
||||
{
|
||||
freezer_do_not_count();
|
||||
schedule();
|
||||
freezer_count();
|
||||
}
|
||||
|
||||
/* DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION */
|
||||
static inline void freezable_schedule_unsafe(void)
|
||||
{
|
||||
freezer_do_not_count();
|
||||
schedule();
|
||||
freezer_count_unsafe();
|
||||
}
|
||||
|
||||
/*
|
||||
* Like schedule_timeout(), but should not block the freezer. Do not
|
||||
* call this with locks held.
|
||||
*/
|
||||
static inline long freezable_schedule_timeout(long timeout)
|
||||
{
|
||||
long __retval;
|
||||
freezer_do_not_count();
|
||||
__retval = schedule_timeout(timeout);
|
||||
freezer_count();
|
||||
return __retval;
|
||||
}
|
||||
|
||||
/*
|
||||
* Like schedule_timeout_interruptible(), but should not block the freezer. Do not
|
||||
* call this with locks held.
|
||||
*/
|
||||
static inline long freezable_schedule_timeout_interruptible(long timeout)
|
||||
{
|
||||
long __retval;
|
||||
freezer_do_not_count();
|
||||
__retval = schedule_timeout_interruptible(timeout);
|
||||
freezer_count();
|
||||
return __retval;
|
||||
}
|
||||
|
||||
/* DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION */
|
||||
static inline long freezable_schedule_timeout_interruptible_unsafe(long timeout)
|
||||
{
|
||||
long __retval;
|
||||
|
||||
freezer_do_not_count();
|
||||
__retval = schedule_timeout_interruptible(timeout);
|
||||
freezer_count_unsafe();
|
||||
return __retval;
|
||||
}
|
||||
|
||||
/* Like schedule_timeout_killable(), but should not block the freezer. */
|
||||
static inline long freezable_schedule_timeout_killable(long timeout)
|
||||
{
|
||||
long __retval;
|
||||
freezer_do_not_count();
|
||||
__retval = schedule_timeout_killable(timeout);
|
||||
freezer_count();
|
||||
return __retval;
|
||||
}
|
||||
|
||||
/* DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION */
|
||||
static inline long freezable_schedule_timeout_killable_unsafe(long timeout)
|
||||
{
|
||||
long __retval;
|
||||
freezer_do_not_count();
|
||||
__retval = schedule_timeout_killable(timeout);
|
||||
freezer_count_unsafe();
|
||||
return __retval;
|
||||
}
|
||||
|
||||
/*
|
||||
* Like schedule_hrtimeout_range(), but should not block the freezer. Do not
|
||||
* call this with locks held.
|
||||
*/
|
||||
static inline int freezable_schedule_hrtimeout_range(ktime_t *expires,
|
||||
u64 delta, const enum hrtimer_mode mode)
|
||||
{
|
||||
int __retval;
|
||||
freezer_do_not_count();
|
||||
__retval = schedule_hrtimeout_range(expires, delta, mode);
|
||||
freezer_count();
|
||||
return __retval;
|
||||
}
|
||||
|
||||
/*
|
||||
* Freezer-friendly wrappers around wait_event_interruptible(),
|
||||
* wait_event_killable() and wait_event_interruptible_timeout(), originally
|
||||
* defined in <linux/wait.h>
|
||||
*/
|
||||
|
||||
/* DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION */
|
||||
#define wait_event_freezekillable_unsafe(wq, condition) \
|
||||
({ \
|
||||
int __retval; \
|
||||
freezer_do_not_count(); \
|
||||
__retval = wait_event_killable(wq, (condition)); \
|
||||
freezer_count_unsafe(); \
|
||||
__retval; \
|
||||
})
|
||||
|
||||
#else /* !CONFIG_FREEZER */
|
||||
static inline bool frozen(struct task_struct *p) { return false; }
|
||||
static inline bool freezing(struct task_struct *p) { return false; }
|
||||
@ -281,35 +83,8 @@ static inline void thaw_kernel_threads(void) {}
|
||||
|
||||
static inline bool try_to_freeze(void) { return false; }
|
||||
|
||||
static inline void freezer_do_not_count(void) {}
|
||||
static inline void freezer_count(void) {}
|
||||
static inline int freezer_should_skip(struct task_struct *p) { return 0; }
|
||||
static inline void set_freezable(void) {}
|
||||
|
||||
#define freezable_schedule() schedule()
|
||||
|
||||
#define freezable_schedule_unsafe() schedule()
|
||||
|
||||
#define freezable_schedule_timeout(timeout) schedule_timeout(timeout)
|
||||
|
||||
#define freezable_schedule_timeout_interruptible(timeout) \
|
||||
schedule_timeout_interruptible(timeout)
|
||||
|
||||
#define freezable_schedule_timeout_interruptible_unsafe(timeout) \
|
||||
schedule_timeout_interruptible(timeout)
|
||||
|
||||
#define freezable_schedule_timeout_killable(timeout) \
|
||||
schedule_timeout_killable(timeout)
|
||||
|
||||
#define freezable_schedule_timeout_killable_unsafe(timeout) \
|
||||
schedule_timeout_killable(timeout)
|
||||
|
||||
#define freezable_schedule_hrtimeout_range(expires, delta, mode) \
|
||||
schedule_hrtimeout_range(expires, delta, mode)
|
||||
|
||||
#define wait_event_freezekillable_unsafe(wq, condition) \
|
||||
wait_event_killable(wq, condition)
|
||||
|
||||
#endif /* !CONFIG_FREEZER */
|
||||
|
||||
#endif /* FREEZER_H_INCLUDED */
|
||||
|
@ -81,25 +81,34 @@ struct task_group;
|
||||
*/
|
||||
|
||||
/* Used in tsk->state: */
|
||||
#define TASK_RUNNING 0x0000
|
||||
#define TASK_INTERRUPTIBLE 0x0001
|
||||
#define TASK_UNINTERRUPTIBLE 0x0002
|
||||
#define __TASK_STOPPED 0x0004
|
||||
#define __TASK_TRACED 0x0008
|
||||
#define TASK_RUNNING 0x00000000
|
||||
#define TASK_INTERRUPTIBLE 0x00000001
|
||||
#define TASK_UNINTERRUPTIBLE 0x00000002
|
||||
#define __TASK_STOPPED 0x00000004
|
||||
#define __TASK_TRACED 0x00000008
|
||||
/* Used in tsk->exit_state: */
|
||||
#define EXIT_DEAD 0x0010
|
||||
#define EXIT_ZOMBIE 0x0020
|
||||
#define EXIT_DEAD 0x00000010
|
||||
#define EXIT_ZOMBIE 0x00000020
|
||||
#define EXIT_TRACE (EXIT_ZOMBIE | EXIT_DEAD)
|
||||
/* Used in tsk->state again: */
|
||||
#define TASK_PARKED 0x0040
|
||||
#define TASK_DEAD 0x0080
|
||||
#define TASK_WAKEKILL 0x0100
|
||||
#define TASK_WAKING 0x0200
|
||||
#define TASK_NOLOAD 0x0400
|
||||
#define TASK_NEW 0x0800
|
||||
/* RT specific auxilliary flag to mark RT lock waiters */
|
||||
#define TASK_RTLOCK_WAIT 0x1000
|
||||
#define TASK_STATE_MAX 0x2000
|
||||
#define TASK_PARKED 0x00000040
|
||||
#define TASK_DEAD 0x00000080
|
||||
#define TASK_WAKEKILL 0x00000100
|
||||
#define TASK_WAKING 0x00000200
|
||||
#define TASK_NOLOAD 0x00000400
|
||||
#define TASK_NEW 0x00000800
|
||||
#define TASK_RTLOCK_WAIT 0x00001000
|
||||
#define TASK_FREEZABLE 0x00002000
|
||||
#define __TASK_FREEZABLE_UNSAFE (0x00004000 * IS_ENABLED(CONFIG_LOCKDEP))
|
||||
#define TASK_FROZEN 0x00008000
|
||||
#define TASK_STATE_MAX 0x00010000
|
||||
|
||||
#define TASK_ANY (TASK_STATE_MAX-1)
|
||||
|
||||
/*
|
||||
* DO NOT ADD ANY NEW USERS !
|
||||
*/
|
||||
#define TASK_FREEZABLE_UNSAFE (TASK_FREEZABLE | __TASK_FREEZABLE_UNSAFE)
|
||||
|
||||
/* Convenience macros for the sake of set_current_state: */
|
||||
#define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
|
||||
@ -1713,8 +1722,9 @@ extern struct pid *cad_pid;
|
||||
#define PF_MEMALLOC 0x00000800 /* Allocating memory */
|
||||
#define PF_NPROC_EXCEEDED 0x00001000 /* set_user() noticed that RLIMIT_NPROC was exceeded */
|
||||
#define PF_USED_MATH 0x00002000 /* If unset the fpu must be initialized before use */
|
||||
#define PF__HOLE__00004000 0x00004000
|
||||
#define PF_NOFREEZE 0x00008000 /* This thread should not be frozen */
|
||||
#define PF_FROZEN 0x00010000 /* Frozen for system suspend */
|
||||
#define PF__HOLE__00010000 0x00010000
|
||||
#define PF_KSWAPD 0x00020000 /* I am kswapd */
|
||||
#define PF_MEMALLOC_NOFS 0x00040000 /* All allocation requests will inherit GFP_NOFS */
|
||||
#define PF_MEMALLOC_NOIO 0x00080000 /* All allocation requests will inherit GFP_NOIO */
|
||||
@ -1722,10 +1732,14 @@ extern struct pid *cad_pid;
|
||||
* I am cleaning dirty pages from some other bdi. */
|
||||
#define PF_KTHREAD 0x00200000 /* I am a kernel thread */
|
||||
#define PF_RANDOMIZE 0x00400000 /* Randomize virtual address space */
|
||||
#define PF__HOLE__00800000 0x00800000
|
||||
#define PF__HOLE__01000000 0x01000000
|
||||
#define PF__HOLE__02000000 0x02000000
|
||||
#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */
|
||||
#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */
|
||||
#define PF_MEMALLOC_PIN 0x10000000 /* Allocation context constrained to zones which allow long term pinning. */
|
||||
#define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */
|
||||
#define PF__HOLE__20000000 0x20000000
|
||||
#define PF__HOLE__40000000 0x40000000
|
||||
#define PF_SUSPEND_TASK 0x80000000 /* This thread called freeze_processes() and should not be frozen */
|
||||
|
||||
/*
|
||||
|
@ -252,7 +252,7 @@ int rpc_malloc(struct rpc_task *);
|
||||
void rpc_free(struct rpc_task *);
|
||||
int rpciod_up(void);
|
||||
void rpciod_down(void);
|
||||
int __rpc_wait_for_completion_task(struct rpc_task *task, wait_bit_action_f *);
|
||||
int rpc_wait_for_completion_task(struct rpc_task *task);
|
||||
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
|
||||
struct net;
|
||||
void rpc_show_tasks(struct net *);
|
||||
@ -264,11 +264,6 @@ extern struct workqueue_struct *xprtiod_workqueue;
|
||||
void rpc_prepare_task(struct rpc_task *task);
|
||||
gfp_t rpc_task_gfp_mask(void);
|
||||
|
||||
static inline int rpc_wait_for_completion_task(struct rpc_task *task)
|
||||
{
|
||||
return __rpc_wait_for_completion_task(task, NULL);
|
||||
}
|
||||
|
||||
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) || IS_ENABLED(CONFIG_TRACEPOINTS)
|
||||
static inline const char * rpc_qname(const struct rpc_wait_queue *q)
|
||||
{
|
||||
|
@ -511,8 +511,8 @@ extern bool pm_save_wakeup_count(unsigned int count);
|
||||
extern void pm_wakep_autosleep_enabled(bool set);
|
||||
extern void pm_print_active_wakeup_sources(void);
|
||||
|
||||
extern void lock_system_sleep(void);
|
||||
extern void unlock_system_sleep(void);
|
||||
extern unsigned int lock_system_sleep(void);
|
||||
extern void unlock_system_sleep(unsigned int);
|
||||
|
||||
#else /* !CONFIG_PM_SLEEP */
|
||||
|
||||
@ -535,8 +535,8 @@ static inline void pm_system_wakeup(void) {}
|
||||
static inline void pm_wakeup_clear(bool reset) {}
|
||||
static inline void pm_system_irq_wakeup(unsigned int irq_number) {}
|
||||
|
||||
static inline void lock_system_sleep(void) {}
|
||||
static inline void unlock_system_sleep(void) {}
|
||||
static inline unsigned int lock_system_sleep(void) { return 0; }
|
||||
static inline void unlock_system_sleep(unsigned int flags) {}
|
||||
|
||||
#endif /* !CONFIG_PM_SLEEP */
|
||||
|
||||
|
@ -11,10 +11,11 @@
|
||||
struct cred;
|
||||
struct file;
|
||||
|
||||
#define UMH_NO_WAIT 0 /* don't wait at all */
|
||||
#define UMH_WAIT_EXEC 1 /* wait for the exec, but not the process */
|
||||
#define UMH_WAIT_PROC 2 /* wait for the process to complete */
|
||||
#define UMH_KILLABLE 4 /* wait for EXEC/PROC killable */
|
||||
#define UMH_NO_WAIT 0x00 /* don't wait at all */
|
||||
#define UMH_WAIT_EXEC 0x01 /* wait for the exec, but not the process */
|
||||
#define UMH_WAIT_PROC 0x02 /* wait for the process to complete */
|
||||
#define UMH_KILLABLE 0x04 /* wait for EXEC/PROC killable */
|
||||
#define UMH_FREEZABLE 0x08 /* wait for EXEC/PROC freezable */
|
||||
|
||||
struct subprocess_info {
|
||||
struct work_struct work;
|
||||
|
@ -281,7 +281,7 @@ static inline void wake_up_pollfree(struct wait_queue_head *wq_head)
|
||||
|
||||
#define ___wait_is_interruptible(state) \
|
||||
(!__builtin_constant_p(state) || \
|
||||
state == TASK_INTERRUPTIBLE || state == TASK_KILLABLE) \
|
||||
(state & (TASK_INTERRUPTIBLE | TASK_WAKEKILL)))
|
||||
|
||||
extern void init_wait_entry(struct wait_queue_entry *wq_entry, int flags);
|
||||
|
||||
@ -361,8 +361,8 @@ do { \
|
||||
} while (0)
|
||||
|
||||
#define __wait_event_freezable(wq_head, condition) \
|
||||
___wait_event(wq_head, condition, TASK_INTERRUPTIBLE, 0, 0, \
|
||||
freezable_schedule())
|
||||
___wait_event(wq_head, condition, (TASK_INTERRUPTIBLE|TASK_FREEZABLE), \
|
||||
0, 0, schedule())
|
||||
|
||||
/**
|
||||
* wait_event_freezable - sleep (or freeze) until a condition gets true
|
||||
@ -420,8 +420,8 @@ do { \
|
||||
|
||||
#define __wait_event_freezable_timeout(wq_head, condition, timeout) \
|
||||
___wait_event(wq_head, ___wait_cond_timeout(condition), \
|
||||
TASK_INTERRUPTIBLE, 0, timeout, \
|
||||
__ret = freezable_schedule_timeout(__ret))
|
||||
(TASK_INTERRUPTIBLE|TASK_FREEZABLE), 0, timeout, \
|
||||
__ret = schedule_timeout(__ret))
|
||||
|
||||
/*
|
||||
* like wait_event_timeout() -- except it uses TASK_INTERRUPTIBLE to avoid
|
||||
@ -642,8 +642,8 @@ do { \
|
||||
|
||||
|
||||
#define __wait_event_freezable_exclusive(wq, condition) \
|
||||
___wait_event(wq, condition, TASK_INTERRUPTIBLE, 1, 0, \
|
||||
freezable_schedule())
|
||||
___wait_event(wq, condition, (TASK_INTERRUPTIBLE|TASK_FREEZABLE), 1, 0,\
|
||||
schedule())
|
||||
|
||||
#define wait_event_freezable_exclusive(wq, condition) \
|
||||
({ \
|
||||
@ -932,6 +932,34 @@ extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_entry_t *);
|
||||
__ret; \
|
||||
})
|
||||
|
||||
#define __wait_event_state(wq, condition, state) \
|
||||
___wait_event(wq, condition, state, 0, 0, schedule())
|
||||
|
||||
/**
|
||||
* wait_event_state - sleep until a condition gets true
|
||||
* @wq_head: the waitqueue to wait on
|
||||
* @condition: a C expression for the event to wait for
|
||||
* @state: state to sleep in
|
||||
*
|
||||
* The process is put to sleep (@state) until the @condition evaluates to true
|
||||
* or a signal is received (when allowed by @state). The @condition is checked
|
||||
* each time the waitqueue @wq_head is woken up.
|
||||
*
|
||||
* wake_up() has to be called after changing any variable that could
|
||||
* change the result of the wait condition.
|
||||
*
|
||||
* The function will return -ERESTARTSYS if it was interrupted by a signal
|
||||
* (when allowed by @state) and 0 if @condition evaluated to true.
|
||||
*/
|
||||
#define wait_event_state(wq_head, condition, state) \
|
||||
({ \
|
||||
int __ret = 0; \
|
||||
might_sleep(); \
|
||||
if (!(condition)) \
|
||||
__ret = __wait_event_state(wq_head, condition, state); \
|
||||
__ret; \
|
||||
})
|
||||
|
||||
#define __wait_event_killable_timeout(wq_head, condition, timeout) \
|
||||
___wait_event(wq_head, ___wait_cond_timeout(condition), \
|
||||
TASK_KILLABLE, 0, timeout, \
|
||||
|
@ -99,19 +99,11 @@ static void __init handle_initrd(void)
|
||||
init_mkdir("/old", 0700);
|
||||
init_chdir("/old");
|
||||
|
||||
/*
|
||||
* In case that a resume from disk is carried out by linuxrc or one of
|
||||
* its children, we need to tell the freezer not to wait for us.
|
||||
*/
|
||||
current->flags |= PF_FREEZER_SKIP;
|
||||
|
||||
info = call_usermodehelper_setup("/linuxrc", argv, envp_init,
|
||||
GFP_KERNEL, init_linuxrc, NULL, NULL);
|
||||
if (!info)
|
||||
return;
|
||||
call_usermodehelper_exec(info, UMH_WAIT_PROC);
|
||||
|
||||
current->flags &= ~PF_FREEZER_SKIP;
|
||||
call_usermodehelper_exec(info, UMH_WAIT_PROC|UMH_FREEZABLE);
|
||||
|
||||
/* move initrd to rootfs' /old */
|
||||
init_mount("..", ".", NULL, MS_MOVE, NULL);
|
||||
|
@ -113,7 +113,7 @@ static int freezer_css_online(struct cgroup_subsys_state *css)
|
||||
|
||||
if (parent && (parent->state & CGROUP_FREEZING)) {
|
||||
freezer->state |= CGROUP_FREEZING_PARENT | CGROUP_FROZEN;
|
||||
atomic_inc(&system_freezing_cnt);
|
||||
static_branch_inc(&freezer_active);
|
||||
}
|
||||
|
||||
mutex_unlock(&freezer_mutex);
|
||||
@ -134,7 +134,7 @@ static void freezer_css_offline(struct cgroup_subsys_state *css)
|
||||
mutex_lock(&freezer_mutex);
|
||||
|
||||
if (freezer->state & CGROUP_FREEZING)
|
||||
atomic_dec(&system_freezing_cnt);
|
||||
static_branch_dec(&freezer_active);
|
||||
|
||||
freezer->state = 0;
|
||||
|
||||
@ -179,6 +179,7 @@ static void freezer_attach(struct cgroup_taskset *tset)
|
||||
__thaw_task(task);
|
||||
} else {
|
||||
freeze_task(task);
|
||||
|
||||
/* clear FROZEN and propagate upwards */
|
||||
while (freezer && (freezer->state & CGROUP_FROZEN)) {
|
||||
freezer->state &= ~CGROUP_FROZEN;
|
||||
@ -271,16 +272,8 @@ static void update_if_frozen(struct cgroup_subsys_state *css)
|
||||
css_task_iter_start(css, 0, &it);
|
||||
|
||||
while ((task = css_task_iter_next(&it))) {
|
||||
if (freezing(task)) {
|
||||
/*
|
||||
* freezer_should_skip() indicates that the task
|
||||
* should be skipped when determining freezing
|
||||
* completion. Consider it frozen in addition to
|
||||
* the usual frozen condition.
|
||||
*/
|
||||
if (!frozen(task) && !freezer_should_skip(task))
|
||||
goto out_iter_end;
|
||||
}
|
||||
if (freezing(task) && !frozen(task))
|
||||
goto out_iter_end;
|
||||
}
|
||||
|
||||
freezer->state |= CGROUP_FROZEN;
|
||||
@ -357,7 +350,7 @@ static void freezer_apply_state(struct freezer *freezer, bool freeze,
|
||||
|
||||
if (freeze) {
|
||||
if (!(freezer->state & CGROUP_FREEZING))
|
||||
atomic_inc(&system_freezing_cnt);
|
||||
static_branch_inc(&freezer_active);
|
||||
freezer->state |= state;
|
||||
freeze_cgroup(freezer);
|
||||
} else {
|
||||
@ -366,9 +359,9 @@ static void freezer_apply_state(struct freezer *freezer, bool freeze,
|
||||
freezer->state &= ~state;
|
||||
|
||||
if (!(freezer->state & CGROUP_FREEZING)) {
|
||||
if (was_freezing)
|
||||
atomic_dec(&system_freezing_cnt);
|
||||
freezer->state &= ~CGROUP_FROZEN;
|
||||
if (was_freezing)
|
||||
static_branch_dec(&freezer_active);
|
||||
unfreeze_cgroup(freezer);
|
||||
}
|
||||
}
|
||||
|
@ -374,10 +374,10 @@ static void coredump_task_exit(struct task_struct *tsk)
|
||||
complete(&core_state->startup);
|
||||
|
||||
for (;;) {
|
||||
set_current_state(TASK_UNINTERRUPTIBLE);
|
||||
set_current_state(TASK_UNINTERRUPTIBLE|TASK_FREEZABLE);
|
||||
if (!self.task) /* see coredump_finish() */
|
||||
break;
|
||||
freezable_schedule();
|
||||
schedule();
|
||||
}
|
||||
__set_current_state(TASK_RUNNING);
|
||||
}
|
||||
|
@ -1421,13 +1421,12 @@ static void complete_vfork_done(struct task_struct *tsk)
|
||||
static int wait_for_vfork_done(struct task_struct *child,
|
||||
struct completion *vfork)
|
||||
{
|
||||
unsigned int state = TASK_UNINTERRUPTIBLE|TASK_KILLABLE|TASK_FREEZABLE;
|
||||
int killed;
|
||||
|
||||
freezer_do_not_count();
|
||||
cgroup_enter_frozen();
|
||||
killed = wait_for_completion_killable(vfork);
|
||||
killed = wait_for_completion_state(vfork, state);
|
||||
cgroup_leave_frozen(false);
|
||||
freezer_count();
|
||||
|
||||
if (killed) {
|
||||
task_lock(child);
|
||||
|
133
kernel/freezer.c
133
kernel/freezer.c
@ -13,10 +13,11 @@
|
||||
#include <linux/kthread.h>
|
||||
|
||||
/* total number of freezing conditions in effect */
|
||||
atomic_t system_freezing_cnt = ATOMIC_INIT(0);
|
||||
EXPORT_SYMBOL(system_freezing_cnt);
|
||||
DEFINE_STATIC_KEY_FALSE(freezer_active);
|
||||
EXPORT_SYMBOL(freezer_active);
|
||||
|
||||
/* indicate whether PM freezing is in effect, protected by
|
||||
/*
|
||||
* indicate whether PM freezing is in effect, protected by
|
||||
* system_transition_mutex
|
||||
*/
|
||||
bool pm_freezing;
|
||||
@ -29,7 +30,7 @@ static DEFINE_SPINLOCK(freezer_lock);
|
||||
* freezing_slow_path - slow path for testing whether a task needs to be frozen
|
||||
* @p: task to be tested
|
||||
*
|
||||
* This function is called by freezing() if system_freezing_cnt isn't zero
|
||||
* This function is called by freezing() if freezer_active isn't zero
|
||||
* and tests whether @p needs to enter and stay in frozen state. Can be
|
||||
* called under any context. The freezers are responsible for ensuring the
|
||||
* target tasks see the updated state.
|
||||
@ -52,41 +53,40 @@ bool freezing_slow_path(struct task_struct *p)
|
||||
}
|
||||
EXPORT_SYMBOL(freezing_slow_path);
|
||||
|
||||
bool frozen(struct task_struct *p)
|
||||
{
|
||||
return READ_ONCE(p->__state) & TASK_FROZEN;
|
||||
}
|
||||
|
||||
/* Refrigerator is place where frozen processes are stored :-). */
|
||||
bool __refrigerator(bool check_kthr_stop)
|
||||
{
|
||||
/* Hmm, should we be allowed to suspend when there are realtime
|
||||
processes around? */
|
||||
unsigned int state = get_current_state();
|
||||
bool was_frozen = false;
|
||||
unsigned int save = get_current_state();
|
||||
|
||||
pr_debug("%s entered refrigerator\n", current->comm);
|
||||
|
||||
WARN_ON_ONCE(state && !(state & TASK_NORMAL));
|
||||
|
||||
for (;;) {
|
||||
set_current_state(TASK_UNINTERRUPTIBLE);
|
||||
bool freeze;
|
||||
|
||||
set_current_state(TASK_FROZEN);
|
||||
|
||||
spin_lock_irq(&freezer_lock);
|
||||
current->flags |= PF_FROZEN;
|
||||
if (!freezing(current) ||
|
||||
(check_kthr_stop && kthread_should_stop()))
|
||||
current->flags &= ~PF_FROZEN;
|
||||
freeze = freezing(current) && !(check_kthr_stop && kthread_should_stop());
|
||||
spin_unlock_irq(&freezer_lock);
|
||||
|
||||
if (!(current->flags & PF_FROZEN))
|
||||
if (!freeze)
|
||||
break;
|
||||
|
||||
was_frozen = true;
|
||||
schedule();
|
||||
}
|
||||
__set_current_state(TASK_RUNNING);
|
||||
|
||||
pr_debug("%s left refrigerator\n", current->comm);
|
||||
|
||||
/*
|
||||
* Restore saved task state before returning. The mb'd version
|
||||
* needs to be used; otherwise, it might silently break
|
||||
* synchronization which depends on ordered task state change.
|
||||
*/
|
||||
set_current_state(save);
|
||||
|
||||
return was_frozen;
|
||||
}
|
||||
EXPORT_SYMBOL(__refrigerator);
|
||||
@ -101,6 +101,44 @@ static void fake_signal_wake_up(struct task_struct *p)
|
||||
}
|
||||
}
|
||||
|
||||
static int __set_task_frozen(struct task_struct *p, void *arg)
|
||||
{
|
||||
unsigned int state = READ_ONCE(p->__state);
|
||||
|
||||
if (p->on_rq)
|
||||
return 0;
|
||||
|
||||
if (p != current && task_curr(p))
|
||||
return 0;
|
||||
|
||||
if (!(state & (TASK_FREEZABLE | __TASK_STOPPED | __TASK_TRACED)))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Only TASK_NORMAL can be augmented with TASK_FREEZABLE, since they
|
||||
* can suffer spurious wakeups.
|
||||
*/
|
||||
if (state & TASK_FREEZABLE)
|
||||
WARN_ON_ONCE(!(state & TASK_NORMAL));
|
||||
|
||||
#ifdef CONFIG_LOCKDEP
|
||||
/*
|
||||
* It's dangerous to freeze with locks held; there be dragons there.
|
||||
*/
|
||||
if (!(state & __TASK_FREEZABLE_UNSAFE))
|
||||
WARN_ON_ONCE(debug_locks && p->lockdep_depth);
|
||||
#endif
|
||||
|
||||
WRITE_ONCE(p->__state, TASK_FROZEN);
|
||||
return TASK_FROZEN;
|
||||
}
|
||||
|
||||
static bool __freeze_task(struct task_struct *p)
|
||||
{
|
||||
/* TASK_FREEZABLE|TASK_STOPPED|TASK_TRACED -> TASK_FROZEN */
|
||||
return task_call_func(p, __set_task_frozen, NULL);
|
||||
}
|
||||
|
||||
/**
|
||||
* freeze_task - send a freeze request to given task
|
||||
* @p: task to send the request to
|
||||
@ -116,20 +154,8 @@ bool freeze_task(struct task_struct *p)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
/*
|
||||
* This check can race with freezer_do_not_count, but worst case that
|
||||
* will result in an extra wakeup being sent to the task. It does not
|
||||
* race with freezer_count(), the barriers in freezer_count() and
|
||||
* freezer_should_skip() ensure that either freezer_count() sees
|
||||
* freezing == true in try_to_freeze() and freezes, or
|
||||
* freezer_should_skip() sees !PF_FREEZE_SKIP and freezes the task
|
||||
* normally.
|
||||
*/
|
||||
if (freezer_should_skip(p))
|
||||
return false;
|
||||
|
||||
spin_lock_irqsave(&freezer_lock, flags);
|
||||
if (!freezing(p) || frozen(p)) {
|
||||
if (!freezing(p) || frozen(p) || __freeze_task(p)) {
|
||||
spin_unlock_irqrestore(&freezer_lock, flags);
|
||||
return false;
|
||||
}
|
||||
@ -137,19 +163,52 @@ bool freeze_task(struct task_struct *p)
|
||||
if (!(p->flags & PF_KTHREAD))
|
||||
fake_signal_wake_up(p);
|
||||
else
|
||||
wake_up_state(p, TASK_INTERRUPTIBLE);
|
||||
wake_up_state(p, TASK_NORMAL);
|
||||
|
||||
spin_unlock_irqrestore(&freezer_lock, flags);
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* The special task states (TASK_STOPPED, TASK_TRACED) keep their canonical
|
||||
* state in p->jobctl. If either of them got a wakeup that was missed because
|
||||
* TASK_FROZEN, then their canonical state reflects that and the below will
|
||||
* refuse to restore the special state and instead issue the wakeup.
|
||||
*/
|
||||
static int __set_task_special(struct task_struct *p, void *arg)
|
||||
{
|
||||
unsigned int state = 0;
|
||||
|
||||
if (p->jobctl & JOBCTL_TRACED)
|
||||
state = TASK_TRACED;
|
||||
|
||||
else if (p->jobctl & JOBCTL_STOPPED)
|
||||
state = TASK_STOPPED;
|
||||
|
||||
if (state)
|
||||
WRITE_ONCE(p->__state, state);
|
||||
|
||||
return state;
|
||||
}
|
||||
|
||||
void __thaw_task(struct task_struct *p)
|
||||
{
|
||||
unsigned long flags;
|
||||
unsigned long flags, flags2;
|
||||
|
||||
spin_lock_irqsave(&freezer_lock, flags);
|
||||
if (frozen(p))
|
||||
wake_up_process(p);
|
||||
if (WARN_ON_ONCE(freezing(p)))
|
||||
goto unlock;
|
||||
|
||||
if (lock_task_sighand(p, &flags2)) {
|
||||
/* TASK_FROZEN -> TASK_{STOPPED,TRACED} */
|
||||
bool ret = task_call_func(p, __set_task_special, NULL);
|
||||
unlock_task_sighand(p, &flags2);
|
||||
if (ret)
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
wake_up_state(p, TASK_FROZEN);
|
||||
unlock:
|
||||
spin_unlock_irqrestore(&freezer_lock, flags);
|
||||
}
|
||||
|
||||
|
@ -334,7 +334,7 @@ void futex_wait_queue(struct futex_hash_bucket *hb, struct futex_q *q,
|
||||
* futex_queue() calls spin_unlock() upon completion, both serializing
|
||||
* access to the hash list and forcing another memory barrier.
|
||||
*/
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE);
|
||||
futex_queue(q, hb);
|
||||
|
||||
/* Arm the timer */
|
||||
@ -352,7 +352,7 @@ void futex_wait_queue(struct futex_hash_bucket *hb, struct futex_q *q,
|
||||
* is no timeout, or if it has yet to expire.
|
||||
*/
|
||||
if (!timeout || timeout->task)
|
||||
freezable_schedule();
|
||||
schedule();
|
||||
}
|
||||
__set_current_state(TASK_RUNNING);
|
||||
}
|
||||
@ -430,7 +430,7 @@ retry:
|
||||
return ret;
|
||||
}
|
||||
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE);
|
||||
|
||||
for (i = 0; i < count; i++) {
|
||||
u32 __user *uaddr = (u32 __user *)(unsigned long)vs[i].w.uaddr;
|
||||
@ -504,7 +504,7 @@ static void futex_sleep_multiple(struct futex_vector *vs, unsigned int count,
|
||||
return;
|
||||
}
|
||||
|
||||
freezable_schedule();
|
||||
schedule();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -95,8 +95,8 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)
|
||||
* Ensure the task is not frozen.
|
||||
* Also, skip vfork and any other user process that freezer should skip.
|
||||
*/
|
||||
if (unlikely(t->flags & (PF_FROZEN | PF_FREEZER_SKIP)))
|
||||
return;
|
||||
if (unlikely(READ_ONCE(t->__state) & TASK_FROZEN))
|
||||
return;
|
||||
|
||||
/*
|
||||
* When a freshly created task is scheduled once, changes its state to
|
||||
@ -191,6 +191,8 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
|
||||
hung_task_show_lock = false;
|
||||
rcu_read_lock();
|
||||
for_each_process_thread(g, t) {
|
||||
unsigned int state;
|
||||
|
||||
if (!max_count--)
|
||||
goto unlock;
|
||||
if (time_after(jiffies, last_break + HUNG_TASK_LOCK_BREAK)) {
|
||||
@ -198,8 +200,14 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
|
||||
goto unlock;
|
||||
last_break = jiffies;
|
||||
}
|
||||
/* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
|
||||
if (READ_ONCE(t->__state) == TASK_UNINTERRUPTIBLE)
|
||||
/*
|
||||
* skip the TASK_KILLABLE tasks -- these can be killed
|
||||
* skip the TASK_IDLE tasks -- those are genuinely idle
|
||||
*/
|
||||
state = READ_ONCE(t->__state);
|
||||
if ((state & TASK_UNINTERRUPTIBLE) &&
|
||||
!(state & TASK_WAKEKILL) &&
|
||||
!(state & TASK_NOLOAD))
|
||||
check_hung_task(t, timeout);
|
||||
}
|
||||
unlock:
|
||||
|
@ -92,20 +92,24 @@ bool hibernation_available(void)
|
||||
*/
|
||||
void hibernation_set_ops(const struct platform_hibernation_ops *ops)
|
||||
{
|
||||
unsigned int sleep_flags;
|
||||
|
||||
if (ops && !(ops->begin && ops->end && ops->pre_snapshot
|
||||
&& ops->prepare && ops->finish && ops->enter && ops->pre_restore
|
||||
&& ops->restore_cleanup && ops->leave)) {
|
||||
WARN_ON(1);
|
||||
return;
|
||||
}
|
||||
lock_system_sleep();
|
||||
|
||||
sleep_flags = lock_system_sleep();
|
||||
|
||||
hibernation_ops = ops;
|
||||
if (ops)
|
||||
hibernation_mode = HIBERNATION_PLATFORM;
|
||||
else if (hibernation_mode == HIBERNATION_PLATFORM)
|
||||
hibernation_mode = HIBERNATION_SHUTDOWN;
|
||||
|
||||
unlock_system_sleep();
|
||||
unlock_system_sleep(sleep_flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hibernation_set_ops);
|
||||
|
||||
@ -713,6 +717,7 @@ static int load_image_and_restore(void)
|
||||
int hibernate(void)
|
||||
{
|
||||
bool snapshot_test = false;
|
||||
unsigned int sleep_flags;
|
||||
int error;
|
||||
|
||||
if (!hibernation_available()) {
|
||||
@ -720,7 +725,7 @@ int hibernate(void)
|
||||
return -EPERM;
|
||||
}
|
||||
|
||||
lock_system_sleep();
|
||||
sleep_flags = lock_system_sleep();
|
||||
/* The snapshot device should not be opened while we're running */
|
||||
if (!hibernate_acquire()) {
|
||||
error = -EBUSY;
|
||||
@ -794,7 +799,7 @@ int hibernate(void)
|
||||
pm_restore_console();
|
||||
hibernate_release();
|
||||
Unlock:
|
||||
unlock_system_sleep();
|
||||
unlock_system_sleep(sleep_flags);
|
||||
pr_info("hibernation exit\n");
|
||||
|
||||
return error;
|
||||
@ -809,9 +814,10 @@ int hibernate(void)
|
||||
*/
|
||||
int hibernate_quiet_exec(int (*func)(void *data), void *data)
|
||||
{
|
||||
unsigned int sleep_flags;
|
||||
int error;
|
||||
|
||||
lock_system_sleep();
|
||||
sleep_flags = lock_system_sleep();
|
||||
|
||||
if (!hibernate_acquire()) {
|
||||
error = -EBUSY;
|
||||
@ -891,7 +897,7 @@ restore:
|
||||
hibernate_release();
|
||||
|
||||
unlock:
|
||||
unlock_system_sleep();
|
||||
unlock_system_sleep(sleep_flags);
|
||||
|
||||
return error;
|
||||
}
|
||||
@ -1100,11 +1106,12 @@ static ssize_t disk_show(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
const char *buf, size_t n)
|
||||
{
|
||||
int mode = HIBERNATION_INVALID;
|
||||
unsigned int sleep_flags;
|
||||
int error = 0;
|
||||
int i;
|
||||
int len;
|
||||
char *p;
|
||||
int mode = HIBERNATION_INVALID;
|
||||
int i;
|
||||
|
||||
if (!hibernation_available())
|
||||
return -EPERM;
|
||||
@ -1112,7 +1119,7 @@ static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
p = memchr(buf, '\n', n);
|
||||
len = p ? p - buf : n;
|
||||
|
||||
lock_system_sleep();
|
||||
sleep_flags = lock_system_sleep();
|
||||
for (i = HIBERNATION_FIRST; i <= HIBERNATION_MAX; i++) {
|
||||
if (len == strlen(hibernation_modes[i])
|
||||
&& !strncmp(buf, hibernation_modes[i], len)) {
|
||||
@ -1142,7 +1149,7 @@ static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
if (!error)
|
||||
pm_pr_dbg("Hibernation mode set to '%s'\n",
|
||||
hibernation_modes[mode]);
|
||||
unlock_system_sleep();
|
||||
unlock_system_sleep(sleep_flags);
|
||||
return error ? error : n;
|
||||
}
|
||||
|
||||
@ -1158,9 +1165,10 @@ static ssize_t resume_show(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
static ssize_t resume_store(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
const char *buf, size_t n)
|
||||
{
|
||||
dev_t res;
|
||||
unsigned int sleep_flags;
|
||||
int len = n;
|
||||
char *name;
|
||||
dev_t res;
|
||||
|
||||
if (len && buf[len-1] == '\n')
|
||||
len--;
|
||||
@ -1173,9 +1181,10 @@ static ssize_t resume_store(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
if (!res)
|
||||
return -EINVAL;
|
||||
|
||||
lock_system_sleep();
|
||||
sleep_flags = lock_system_sleep();
|
||||
swsusp_resume_device = res;
|
||||
unlock_system_sleep();
|
||||
unlock_system_sleep(sleep_flags);
|
||||
|
||||
pm_pr_dbg("Configured hibernation resume from disk to %u\n",
|
||||
swsusp_resume_device);
|
||||
noresume = 0;
|
||||
|
@ -21,14 +21,16 @@
|
||||
|
||||
#ifdef CONFIG_PM_SLEEP
|
||||
|
||||
void lock_system_sleep(void)
|
||||
unsigned int lock_system_sleep(void)
|
||||
{
|
||||
current->flags |= PF_FREEZER_SKIP;
|
||||
unsigned int flags = current->flags;
|
||||
current->flags |= PF_NOFREEZE;
|
||||
mutex_lock(&system_transition_mutex);
|
||||
return flags;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(lock_system_sleep);
|
||||
|
||||
void unlock_system_sleep(void)
|
||||
void unlock_system_sleep(unsigned int flags)
|
||||
{
|
||||
/*
|
||||
* Don't use freezer_count() because we don't want the call to
|
||||
@ -46,7 +48,8 @@ void unlock_system_sleep(void)
|
||||
* Which means, if we use try_to_freeze() here, it would make them
|
||||
* enter the refrigerator, thus causing hibernation to lockup.
|
||||
*/
|
||||
current->flags &= ~PF_FREEZER_SKIP;
|
||||
if (!(flags & PF_NOFREEZE))
|
||||
current->flags &= ~PF_NOFREEZE;
|
||||
mutex_unlock(&system_transition_mutex);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(unlock_system_sleep);
|
||||
@ -263,16 +266,17 @@ static ssize_t pm_test_show(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
static ssize_t pm_test_store(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
const char *buf, size_t n)
|
||||
{
|
||||
unsigned int sleep_flags;
|
||||
const char * const *s;
|
||||
int error = -EINVAL;
|
||||
int level;
|
||||
char *p;
|
||||
int len;
|
||||
int error = -EINVAL;
|
||||
|
||||
p = memchr(buf, '\n', n);
|
||||
len = p ? p - buf : n;
|
||||
|
||||
lock_system_sleep();
|
||||
sleep_flags = lock_system_sleep();
|
||||
|
||||
level = TEST_FIRST;
|
||||
for (s = &pm_tests[level]; level <= TEST_MAX; s++, level++)
|
||||
@ -282,7 +286,7 @@ static ssize_t pm_test_store(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
break;
|
||||
}
|
||||
|
||||
unlock_system_sleep();
|
||||
unlock_system_sleep(sleep_flags);
|
||||
|
||||
return error ? error : n;
|
||||
}
|
||||
|
@ -50,8 +50,7 @@ static int try_to_freeze_tasks(bool user_only)
|
||||
if (p == current || !freeze_task(p))
|
||||
continue;
|
||||
|
||||
if (!freezer_should_skip(p))
|
||||
todo++;
|
||||
todo++;
|
||||
}
|
||||
read_unlock(&tasklist_lock);
|
||||
|
||||
@ -96,8 +95,7 @@ static int try_to_freeze_tasks(bool user_only)
|
||||
if (!wakeup || pm_debug_messages_on) {
|
||||
read_lock(&tasklist_lock);
|
||||
for_each_process_thread(g, p) {
|
||||
if (p != current && !freezer_should_skip(p)
|
||||
&& freezing(p) && !frozen(p))
|
||||
if (p != current && freezing(p) && !frozen(p))
|
||||
sched_show_task(p);
|
||||
}
|
||||
read_unlock(&tasklist_lock);
|
||||
@ -129,7 +127,7 @@ int freeze_processes(void)
|
||||
current->flags |= PF_SUSPEND_TASK;
|
||||
|
||||
if (!pm_freezing)
|
||||
atomic_inc(&system_freezing_cnt);
|
||||
static_branch_inc(&freezer_active);
|
||||
|
||||
pm_wakeup_clear(0);
|
||||
pr_info("Freezing user space processes ... ");
|
||||
@ -190,7 +188,7 @@ void thaw_processes(void)
|
||||
|
||||
trace_suspend_resume(TPS("thaw_processes"), 0, true);
|
||||
if (pm_freezing)
|
||||
atomic_dec(&system_freezing_cnt);
|
||||
static_branch_dec(&freezer_active);
|
||||
pm_freezing = false;
|
||||
pm_nosig_freezing = false;
|
||||
|
||||
|
@ -75,9 +75,11 @@ EXPORT_SYMBOL_GPL(pm_suspend_default_s2idle);
|
||||
|
||||
void s2idle_set_ops(const struct platform_s2idle_ops *ops)
|
||||
{
|
||||
lock_system_sleep();
|
||||
unsigned int sleep_flags;
|
||||
|
||||
sleep_flags = lock_system_sleep();
|
||||
s2idle_ops = ops;
|
||||
unlock_system_sleep();
|
||||
unlock_system_sleep(sleep_flags);
|
||||
}
|
||||
|
||||
static void s2idle_begin(void)
|
||||
@ -203,7 +205,9 @@ __setup("mem_sleep_default=", mem_sleep_default_setup);
|
||||
*/
|
||||
void suspend_set_ops(const struct platform_suspend_ops *ops)
|
||||
{
|
||||
lock_system_sleep();
|
||||
unsigned int sleep_flags;
|
||||
|
||||
sleep_flags = lock_system_sleep();
|
||||
|
||||
suspend_ops = ops;
|
||||
|
||||
@ -219,7 +223,7 @@ void suspend_set_ops(const struct platform_suspend_ops *ops)
|
||||
mem_sleep_current = PM_SUSPEND_MEM;
|
||||
}
|
||||
|
||||
unlock_system_sleep();
|
||||
unlock_system_sleep(sleep_flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(suspend_set_ops);
|
||||
|
||||
|
@ -47,12 +47,13 @@ int is_hibernate_resume_dev(dev_t dev)
|
||||
static int snapshot_open(struct inode *inode, struct file *filp)
|
||||
{
|
||||
struct snapshot_data *data;
|
||||
unsigned int sleep_flags;
|
||||
int error;
|
||||
|
||||
if (!hibernation_available())
|
||||
return -EPERM;
|
||||
|
||||
lock_system_sleep();
|
||||
sleep_flags = lock_system_sleep();
|
||||
|
||||
if (!hibernate_acquire()) {
|
||||
error = -EBUSY;
|
||||
@ -98,7 +99,7 @@ static int snapshot_open(struct inode *inode, struct file *filp)
|
||||
data->dev = 0;
|
||||
|
||||
Unlock:
|
||||
unlock_system_sleep();
|
||||
unlock_system_sleep(sleep_flags);
|
||||
|
||||
return error;
|
||||
}
|
||||
@ -106,8 +107,9 @@ static int snapshot_open(struct inode *inode, struct file *filp)
|
||||
static int snapshot_release(struct inode *inode, struct file *filp)
|
||||
{
|
||||
struct snapshot_data *data;
|
||||
unsigned int sleep_flags;
|
||||
|
||||
lock_system_sleep();
|
||||
sleep_flags = lock_system_sleep();
|
||||
|
||||
swsusp_free();
|
||||
data = filp->private_data;
|
||||
@ -124,7 +126,7 @@ static int snapshot_release(struct inode *inode, struct file *filp)
|
||||
PM_POST_HIBERNATION : PM_POST_RESTORE);
|
||||
hibernate_release();
|
||||
|
||||
unlock_system_sleep();
|
||||
unlock_system_sleep(sleep_flags);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -132,11 +134,12 @@ static int snapshot_release(struct inode *inode, struct file *filp)
|
||||
static ssize_t snapshot_read(struct file *filp, char __user *buf,
|
||||
size_t count, loff_t *offp)
|
||||
{
|
||||
struct snapshot_data *data;
|
||||
ssize_t res;
|
||||
loff_t pg_offp = *offp & ~PAGE_MASK;
|
||||
struct snapshot_data *data;
|
||||
unsigned int sleep_flags;
|
||||
ssize_t res;
|
||||
|
||||
lock_system_sleep();
|
||||
sleep_flags = lock_system_sleep();
|
||||
|
||||
data = filp->private_data;
|
||||
if (!data->ready) {
|
||||
@ -157,7 +160,7 @@ static ssize_t snapshot_read(struct file *filp, char __user *buf,
|
||||
*offp += res;
|
||||
|
||||
Unlock:
|
||||
unlock_system_sleep();
|
||||
unlock_system_sleep(sleep_flags);
|
||||
|
||||
return res;
|
||||
}
|
||||
@ -165,16 +168,17 @@ static ssize_t snapshot_read(struct file *filp, char __user *buf,
|
||||
static ssize_t snapshot_write(struct file *filp, const char __user *buf,
|
||||
size_t count, loff_t *offp)
|
||||
{
|
||||
struct snapshot_data *data;
|
||||
ssize_t res;
|
||||
loff_t pg_offp = *offp & ~PAGE_MASK;
|
||||
struct snapshot_data *data;
|
||||
unsigned long sleep_flags;
|
||||
ssize_t res;
|
||||
|
||||
if (need_wait) {
|
||||
wait_for_device_probe();
|
||||
need_wait = false;
|
||||
}
|
||||
|
||||
lock_system_sleep();
|
||||
sleep_flags = lock_system_sleep();
|
||||
|
||||
data = filp->private_data;
|
||||
|
||||
@ -196,7 +200,7 @@ static ssize_t snapshot_write(struct file *filp, const char __user *buf,
|
||||
if (res > 0)
|
||||
*offp += res;
|
||||
unlock:
|
||||
unlock_system_sleep();
|
||||
unlock_system_sleep(sleep_flags);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
@ -269,7 +269,7 @@ static int ptrace_check_attach(struct task_struct *child, bool ignore_state)
|
||||
read_unlock(&tasklist_lock);
|
||||
|
||||
if (!ret && !ignore_state &&
|
||||
WARN_ON_ONCE(!wait_task_inactive(child, __TASK_TRACED)))
|
||||
WARN_ON_ONCE(!wait_task_inactive(child, __TASK_TRACED|TASK_FROZEN)))
|
||||
ret = -ESRCH;
|
||||
|
||||
return ret;
|
||||
|
@ -161,7 +161,8 @@ autogroup_move_group(struct task_struct *p, struct autogroup *ag)
|
||||
struct task_struct *t;
|
||||
unsigned long flags;
|
||||
|
||||
BUG_ON(!lock_task_sighand(p, &flags));
|
||||
if (WARN_ON_ONCE(!lock_task_sighand(p, &flags)))
|
||||
return;
|
||||
|
||||
prev = p->signal->autogroup;
|
||||
if (prev == ag) {
|
||||
|
@ -204,6 +204,7 @@ EXPORT_SYMBOL(wait_for_completion_io_timeout);
|
||||
int __sched wait_for_completion_interruptible(struct completion *x)
|
||||
{
|
||||
long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_INTERRUPTIBLE);
|
||||
|
||||
if (t == -ERESTARTSYS)
|
||||
return t;
|
||||
return 0;
|
||||
@ -241,12 +242,23 @@ EXPORT_SYMBOL(wait_for_completion_interruptible_timeout);
|
||||
int __sched wait_for_completion_killable(struct completion *x)
|
||||
{
|
||||
long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_KILLABLE);
|
||||
|
||||
if (t == -ERESTARTSYS)
|
||||
return t;
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(wait_for_completion_killable);
|
||||
|
||||
int __sched wait_for_completion_state(struct completion *x, unsigned int state)
|
||||
{
|
||||
long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, state);
|
||||
|
||||
if (t == -ERESTARTSYS)
|
||||
return t;
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(wait_for_completion_state);
|
||||
|
||||
/**
|
||||
* wait_for_completion_killable_timeout: - waits for completion of a task (w/(to,killable))
|
||||
* @x: holds the state of this particular completion
|
||||
|
@ -143,11 +143,7 @@ __read_mostly int sysctl_resched_latency_warn_once = 1;
|
||||
* Number of tasks to iterate in a single balance run.
|
||||
* Limited because this is done with IRQs disabled.
|
||||
*/
|
||||
#ifdef CONFIG_PREEMPT_RT
|
||||
const_debug unsigned int sysctl_sched_nr_migrate = 8;
|
||||
#else
|
||||
const_debug unsigned int sysctl_sched_nr_migrate = 32;
|
||||
#endif
|
||||
const_debug unsigned int sysctl_sched_nr_migrate = SCHED_NR_MIGRATE_BREAK;
|
||||
|
||||
__read_mostly int scheduler_running;
|
||||
|
||||
@ -482,8 +478,7 @@ sched_core_dequeue(struct rq *rq, struct task_struct *p, int flags) { }
|
||||
* p->se.load, p->rt_priority,
|
||||
* p->dl.dl_{runtime, deadline, period, flags, bw, density}
|
||||
* - sched_setnuma(): p->numa_preferred_nid
|
||||
* - sched_move_task()/
|
||||
* cpu_cgroup_fork(): p->sched_task_group
|
||||
* - sched_move_task(): p->sched_task_group
|
||||
* - uclamp_update_active() p->uclamp*
|
||||
*
|
||||
* p->state <- TASK_*:
|
||||
@ -2329,7 +2324,7 @@ static struct rq *move_queued_task(struct rq *rq, struct rq_flags *rf,
|
||||
rq = cpu_rq(new_cpu);
|
||||
|
||||
rq_lock(rq, rf);
|
||||
BUG_ON(task_cpu(p) != new_cpu);
|
||||
WARN_ON_ONCE(task_cpu(p) != new_cpu);
|
||||
activate_task(rq, p, 0);
|
||||
check_preempt_curr(rq, p, 0);
|
||||
|
||||
@ -2779,7 +2774,7 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (task_running(rq, p) || READ_ONCE(p->__state) == TASK_WAKING) {
|
||||
if (task_on_cpu(rq, p) || READ_ONCE(p->__state) == TASK_WAKING) {
|
||||
/*
|
||||
* MIGRATE_ENABLE gets here because 'p == current', but for
|
||||
* anything else we cannot do is_migration_disabled(), punt
|
||||
@ -3255,12 +3250,12 @@ out:
|
||||
/*
|
||||
* wait_task_inactive - wait for a thread to unschedule.
|
||||
*
|
||||
* If @match_state is nonzero, it's the @p->state value just checked and
|
||||
* not expected to change. If it changes, i.e. @p might have woken up,
|
||||
* then return zero. When we succeed in waiting for @p to be off its CPU,
|
||||
* we return a positive number (its total switch count). If a second call
|
||||
* a short while later returns the same number, the caller can be sure that
|
||||
* @p has remained unscheduled the whole time.
|
||||
* Wait for the thread to block in any of the states set in @match_state.
|
||||
* If it changes, i.e. @p might have woken up, then return zero. When we
|
||||
* succeed in waiting for @p to be off its CPU, we return a positive number
|
||||
* (its total switch count). If a second call a short while later returns the
|
||||
* same number, the caller can be sure that @p has remained unscheduled the
|
||||
* whole time.
|
||||
*
|
||||
* The caller must ensure that the task *will* unschedule sometime soon,
|
||||
* else this function might spin for a *long* time. This function can't
|
||||
@ -3291,12 +3286,12 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state
|
||||
*
|
||||
* NOTE! Since we don't hold any locks, it's not
|
||||
* even sure that "rq" stays as the right runqueue!
|
||||
* But we don't care, since "task_running()" will
|
||||
* But we don't care, since "task_on_cpu()" will
|
||||
* return false if the runqueue has changed and p
|
||||
* is actually now running somewhere else!
|
||||
*/
|
||||
while (task_running(rq, p)) {
|
||||
if (match_state && unlikely(READ_ONCE(p->__state) != match_state))
|
||||
while (task_on_cpu(rq, p)) {
|
||||
if (!(READ_ONCE(p->__state) & match_state))
|
||||
return 0;
|
||||
cpu_relax();
|
||||
}
|
||||
@ -3308,10 +3303,10 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state
|
||||
*/
|
||||
rq = task_rq_lock(p, &rf);
|
||||
trace_sched_wait_task(p);
|
||||
running = task_running(rq, p);
|
||||
running = task_on_cpu(rq, p);
|
||||
queued = task_on_rq_queued(p);
|
||||
ncsw = 0;
|
||||
if (!match_state || READ_ONCE(p->__state) == match_state)
|
||||
if (READ_ONCE(p->__state) & match_state)
|
||||
ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
|
||||
task_rq_unlock(rq, p, &rf);
|
||||
|
||||
@ -6430,7 +6425,7 @@ static void __sched notrace __schedule(unsigned int sched_mode)
|
||||
prev->sched_contributes_to_load =
|
||||
(prev_state & TASK_UNINTERRUPTIBLE) &&
|
||||
!(prev_state & TASK_NOLOAD) &&
|
||||
!(prev->flags & PF_FROZEN);
|
||||
!(prev_state & TASK_FROZEN);
|
||||
|
||||
if (prev->sched_contributes_to_load)
|
||||
rq->nr_uninterruptible++;
|
||||
@ -8650,7 +8645,7 @@ again:
|
||||
if (curr->sched_class != p->sched_class)
|
||||
goto out_unlock;
|
||||
|
||||
if (task_running(p_rq, p) || !task_is_running(p))
|
||||
if (task_on_cpu(p_rq, p) || !task_is_running(p))
|
||||
goto out_unlock;
|
||||
|
||||
yielded = curr->sched_class->yield_to_task(rq, p);
|
||||
@ -8862,7 +8857,7 @@ void sched_show_task(struct task_struct *p)
|
||||
if (pid_alive(p))
|
||||
ppid = task_pid_nr(rcu_dereference(p->real_parent));
|
||||
rcu_read_unlock();
|
||||
pr_cont(" stack:%5lu pid:%5d ppid:%6d flags:0x%08lx\n",
|
||||
pr_cont(" stack:%-5lu pid:%-5d ppid:%-6d flags:0x%08lx\n",
|
||||
free, task_pid_nr(p), ppid,
|
||||
read_task_thread_flags(p));
|
||||
|
||||
@ -8890,7 +8885,7 @@ state_filter_match(unsigned long state_filter, struct task_struct *p)
|
||||
* When looking for TASK_UNINTERRUPTIBLE skip TASK_IDLE (allows
|
||||
* TASK_KILLABLE).
|
||||
*/
|
||||
if (state_filter == TASK_UNINTERRUPTIBLE && state == TASK_IDLE)
|
||||
if (state_filter == TASK_UNINTERRUPTIBLE && (state & TASK_NOLOAD))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
@ -9602,9 +9597,6 @@ LIST_HEAD(task_groups);
|
||||
static struct kmem_cache *task_group_cache __read_mostly;
|
||||
#endif
|
||||
|
||||
DECLARE_PER_CPU(cpumask_var_t, load_balance_mask);
|
||||
DECLARE_PER_CPU(cpumask_var_t, select_rq_mask);
|
||||
|
||||
void __init sched_init(void)
|
||||
{
|
||||
unsigned long ptr = 0;
|
||||
@ -9648,14 +9640,6 @@ void __init sched_init(void)
|
||||
|
||||
#endif /* CONFIG_RT_GROUP_SCHED */
|
||||
}
|
||||
#ifdef CONFIG_CPUMASK_OFFSTACK
|
||||
for_each_possible_cpu(i) {
|
||||
per_cpu(load_balance_mask, i) = (cpumask_var_t)kzalloc_node(
|
||||
cpumask_size(), GFP_KERNEL, cpu_to_node(i));
|
||||
per_cpu(select_rq_mask, i) = (cpumask_var_t)kzalloc_node(
|
||||
cpumask_size(), GFP_KERNEL, cpu_to_node(i));
|
||||
}
|
||||
#endif /* CONFIG_CPUMASK_OFFSTACK */
|
||||
|
||||
init_rt_bandwidth(&def_rt_bandwidth, global_rt_period(), global_rt_runtime());
|
||||
|
||||
@ -10164,7 +10148,7 @@ void sched_release_group(struct task_group *tg)
|
||||
spin_unlock_irqrestore(&task_group_lock, flags);
|
||||
}
|
||||
|
||||
static void sched_change_group(struct task_struct *tsk, int type)
|
||||
static void sched_change_group(struct task_struct *tsk)
|
||||
{
|
||||
struct task_group *tg;
|
||||
|
||||
@ -10180,7 +10164,7 @@ static void sched_change_group(struct task_struct *tsk, int type)
|
||||
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
if (tsk->sched_class->task_change_group)
|
||||
tsk->sched_class->task_change_group(tsk, type);
|
||||
tsk->sched_class->task_change_group(tsk);
|
||||
else
|
||||
#endif
|
||||
set_task_rq(tsk, task_cpu(tsk));
|
||||
@ -10211,7 +10195,7 @@ void sched_move_task(struct task_struct *tsk)
|
||||
if (running)
|
||||
put_prev_task(rq, tsk);
|
||||
|
||||
sched_change_group(tsk, TASK_MOVE_GROUP);
|
||||
sched_change_group(tsk);
|
||||
|
||||
if (queued)
|
||||
enqueue_task(rq, tsk, queue_flags);
|
||||
@ -10289,53 +10273,19 @@ static void cpu_cgroup_css_free(struct cgroup_subsys_state *css)
|
||||
sched_unregister_group(tg);
|
||||
}
|
||||
|
||||
/*
|
||||
* This is called before wake_up_new_task(), therefore we really only
|
||||
* have to set its group bits, all the other stuff does not apply.
|
||||
*/
|
||||
static void cpu_cgroup_fork(struct task_struct *task)
|
||||
{
|
||||
struct rq_flags rf;
|
||||
struct rq *rq;
|
||||
|
||||
rq = task_rq_lock(task, &rf);
|
||||
|
||||
update_rq_clock(rq);
|
||||
sched_change_group(task, TASK_SET_GROUP);
|
||||
|
||||
task_rq_unlock(rq, task, &rf);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_RT_GROUP_SCHED
|
||||
static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)
|
||||
{
|
||||
struct task_struct *task;
|
||||
struct cgroup_subsys_state *css;
|
||||
int ret = 0;
|
||||
|
||||
cgroup_taskset_for_each(task, css, tset) {
|
||||
#ifdef CONFIG_RT_GROUP_SCHED
|
||||
if (!sched_rt_can_attach(css_tg(css), task))
|
||||
return -EINVAL;
|
||||
#endif
|
||||
/*
|
||||
* Serialize against wake_up_new_task() such that if it's
|
||||
* running, we're sure to observe its full state.
|
||||
*/
|
||||
raw_spin_lock_irq(&task->pi_lock);
|
||||
/*
|
||||
* Avoid calling sched_move_task() before wake_up_new_task()
|
||||
* has happened. This would lead to problems with PELT, due to
|
||||
* move wanting to detach+attach while we're not attached yet.
|
||||
*/
|
||||
if (READ_ONCE(task->__state) == TASK_NEW)
|
||||
ret = -EINVAL;
|
||||
raw_spin_unlock_irq(&task->pi_lock);
|
||||
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
static void cpu_cgroup_attach(struct cgroup_taskset *tset)
|
||||
{
|
||||
@ -11171,8 +11121,9 @@ struct cgroup_subsys cpu_cgrp_subsys = {
|
||||
.css_released = cpu_cgroup_css_released,
|
||||
.css_free = cpu_cgroup_css_free,
|
||||
.css_extra_stat_show = cpu_extra_stat_show,
|
||||
.fork = cpu_cgroup_fork,
|
||||
#ifdef CONFIG_RT_GROUP_SCHED
|
||||
.can_attach = cpu_cgroup_can_attach,
|
||||
#endif
|
||||
.attach = cpu_cgroup_attach,
|
||||
.legacy_cftypes = cpu_legacy_files,
|
||||
.dfl_cftypes = cpu_files,
|
||||
|
@ -88,7 +88,7 @@ static unsigned long sched_core_update_cookie(struct task_struct *p,
|
||||
* core has now entered/left forced idle state. Defer accounting to the
|
||||
* next scheduling edge, rather than always forcing a reschedule here.
|
||||
*/
|
||||
if (task_running(rq, p))
|
||||
if (task_on_cpu(rq, p))
|
||||
resched_curr(rq);
|
||||
|
||||
task_rq_unlock(rq, p, &rf);
|
||||
@ -205,7 +205,7 @@ int sched_core_share_pid(unsigned int cmd, pid_t pid, enum pid_type type,
|
||||
default:
|
||||
err = -EINVAL;
|
||||
goto out;
|
||||
};
|
||||
}
|
||||
|
||||
if (type == PIDTYPE_PID) {
|
||||
__sched_core_set(task, cookie);
|
||||
|
@ -123,7 +123,7 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p,
|
||||
unsigned long cap, max_cap = 0;
|
||||
int cpu, max_cpu = -1;
|
||||
|
||||
if (!static_branch_unlikely(&sched_asym_cpucapacity))
|
||||
if (!sched_asym_cpucap_active())
|
||||
return 1;
|
||||
|
||||
/* Ensure the capacity of the CPUs fits the task. */
|
||||
|
@ -147,7 +147,7 @@ int cpupri_find_fitness(struct cpupri *cp, struct task_struct *p,
|
||||
int task_pri = convert_prio(p->prio);
|
||||
int idx, cpu;
|
||||
|
||||
BUG_ON(task_pri >= CPUPRI_NR_PRIORITIES);
|
||||
WARN_ON_ONCE(task_pri >= CPUPRI_NR_PRIORITIES);
|
||||
|
||||
for (idx = 0; idx < task_pri; idx++) {
|
||||
|
||||
|
@ -124,15 +124,12 @@ static inline int dl_bw_cpus(int i)
|
||||
return cpus;
|
||||
}
|
||||
|
||||
static inline unsigned long __dl_bw_capacity(int i)
|
||||
static inline unsigned long __dl_bw_capacity(const struct cpumask *mask)
|
||||
{
|
||||
struct root_domain *rd = cpu_rq(i)->rd;
|
||||
unsigned long cap = 0;
|
||||
int i;
|
||||
|
||||
RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
|
||||
"sched RCU must be held");
|
||||
|
||||
for_each_cpu_and(i, rd->span, cpu_active_mask)
|
||||
for_each_cpu_and(i, mask, cpu_active_mask)
|
||||
cap += capacity_orig_of(i);
|
||||
|
||||
return cap;
|
||||
@ -144,11 +141,14 @@ static inline unsigned long __dl_bw_capacity(int i)
|
||||
*/
|
||||
static inline unsigned long dl_bw_capacity(int i)
|
||||
{
|
||||
if (!static_branch_unlikely(&sched_asym_cpucapacity) &&
|
||||
if (!sched_asym_cpucap_active() &&
|
||||
capacity_orig_of(i) == SCHED_CAPACITY_SCALE) {
|
||||
return dl_bw_cpus(i) << SCHED_CAPACITY_SHIFT;
|
||||
} else {
|
||||
return __dl_bw_capacity(i);
|
||||
RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
|
||||
"sched RCU must be held");
|
||||
|
||||
return __dl_bw_capacity(cpu_rq(i)->rd->span);
|
||||
}
|
||||
}
|
||||
|
||||
@ -310,7 +310,7 @@ static void dl_change_utilization(struct task_struct *p, u64 new_bw)
|
||||
{
|
||||
struct rq *rq;
|
||||
|
||||
BUG_ON(p->dl.flags & SCHED_FLAG_SUGOV);
|
||||
WARN_ON_ONCE(p->dl.flags & SCHED_FLAG_SUGOV);
|
||||
|
||||
if (task_on_rq_queued(p))
|
||||
return;
|
||||
@ -431,8 +431,8 @@ static void task_non_contending(struct task_struct *p)
|
||||
sub_rq_bw(&p->dl, &rq->dl);
|
||||
raw_spin_lock(&dl_b->lock);
|
||||
__dl_sub(dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p)));
|
||||
__dl_clear_params(p);
|
||||
raw_spin_unlock(&dl_b->lock);
|
||||
__dl_clear_params(p);
|
||||
}
|
||||
|
||||
return;
|
||||
@ -607,7 +607,7 @@ static void enqueue_pushable_dl_task(struct rq *rq, struct task_struct *p)
|
||||
{
|
||||
struct rb_node *leftmost;
|
||||
|
||||
BUG_ON(!RB_EMPTY_NODE(&p->pushable_dl_tasks));
|
||||
WARN_ON_ONCE(!RB_EMPTY_NODE(&p->pushable_dl_tasks));
|
||||
|
||||
leftmost = rb_add_cached(&p->pushable_dl_tasks,
|
||||
&rq->dl.pushable_dl_tasks_root,
|
||||
@ -684,7 +684,7 @@ static struct rq *dl_task_offline_migration(struct rq *rq, struct task_struct *p
|
||||
* Failed to find any suitable CPU.
|
||||
* The task will never come back!
|
||||
*/
|
||||
BUG_ON(dl_bandwidth_enabled());
|
||||
WARN_ON_ONCE(dl_bandwidth_enabled());
|
||||
|
||||
/*
|
||||
* If admission control is disabled we
|
||||
@ -770,6 +770,14 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags);
|
||||
static void __dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags);
|
||||
static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p, int flags);
|
||||
|
||||
static inline void replenish_dl_new_period(struct sched_dl_entity *dl_se,
|
||||
struct rq *rq)
|
||||
{
|
||||
/* for non-boosted task, pi_of(dl_se) == dl_se */
|
||||
dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline;
|
||||
dl_se->runtime = pi_of(dl_se)->dl_runtime;
|
||||
}
|
||||
|
||||
/*
|
||||
* We are being explicitly informed that a new instance is starting,
|
||||
* and this means that:
|
||||
@ -803,8 +811,7 @@ static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se)
|
||||
* future; in fact, we must consider execution overheads (time
|
||||
* spent on hardirq context, etc.).
|
||||
*/
|
||||
dl_se->deadline = rq_clock(rq) + dl_se->dl_deadline;
|
||||
dl_se->runtime = dl_se->dl_runtime;
|
||||
replenish_dl_new_period(dl_se, rq);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -830,16 +837,14 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se)
|
||||
struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
|
||||
struct rq *rq = rq_of_dl_rq(dl_rq);
|
||||
|
||||
BUG_ON(pi_of(dl_se)->dl_runtime <= 0);
|
||||
WARN_ON_ONCE(pi_of(dl_se)->dl_runtime <= 0);
|
||||
|
||||
/*
|
||||
* This could be the case for a !-dl task that is boosted.
|
||||
* Just go with full inherited parameters.
|
||||
*/
|
||||
if (dl_se->dl_deadline == 0) {
|
||||
dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline;
|
||||
dl_se->runtime = pi_of(dl_se)->dl_runtime;
|
||||
}
|
||||
if (dl_se->dl_deadline == 0)
|
||||
replenish_dl_new_period(dl_se, rq);
|
||||
|
||||
if (dl_se->dl_yielded && dl_se->runtime > 0)
|
||||
dl_se->runtime = 0;
|
||||
@ -866,8 +871,7 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se)
|
||||
*/
|
||||
if (dl_time_before(dl_se->deadline, rq_clock(rq))) {
|
||||
printk_deferred_once("sched: DL replenish lagged too much\n");
|
||||
dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline;
|
||||
dl_se->runtime = pi_of(dl_se)->dl_runtime;
|
||||
replenish_dl_new_period(dl_se, rq);
|
||||
}
|
||||
|
||||
if (dl_se->dl_yielded)
|
||||
@ -1024,8 +1028,7 @@ static void update_dl_entity(struct sched_dl_entity *dl_se)
|
||||
return;
|
||||
}
|
||||
|
||||
dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline;
|
||||
dl_se->runtime = pi_of(dl_se)->dl_runtime;
|
||||
replenish_dl_new_period(dl_se, rq);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1333,11 +1336,7 @@ static void update_curr_dl(struct rq *rq)
|
||||
|
||||
trace_sched_stat_runtime(curr, delta_exec, 0);
|
||||
|
||||
curr->se.sum_exec_runtime += delta_exec;
|
||||
account_group_exec_runtime(curr, delta_exec);
|
||||
|
||||
curr->se.exec_start = now;
|
||||
cgroup_account_cputime(curr, delta_exec);
|
||||
update_current_exec_runtime(curr, now, delta_exec);
|
||||
|
||||
if (dl_entity_is_special(dl_se))
|
||||
return;
|
||||
@ -1616,7 +1615,7 @@ static void __enqueue_dl_entity(struct sched_dl_entity *dl_se)
|
||||
{
|
||||
struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
|
||||
|
||||
BUG_ON(!RB_EMPTY_NODE(&dl_se->rb_node));
|
||||
WARN_ON_ONCE(!RB_EMPTY_NODE(&dl_se->rb_node));
|
||||
|
||||
rb_add_cached(&dl_se->rb_node, &dl_rq->root, __dl_less);
|
||||
|
||||
@ -1640,7 +1639,7 @@ static void __dequeue_dl_entity(struct sched_dl_entity *dl_se)
|
||||
static void
|
||||
enqueue_dl_entity(struct sched_dl_entity *dl_se, int flags)
|
||||
{
|
||||
BUG_ON(on_dl_rq(dl_se));
|
||||
WARN_ON_ONCE(on_dl_rq(dl_se));
|
||||
|
||||
update_stats_enqueue_dl(dl_rq_of_se(dl_se), dl_se, flags);
|
||||
|
||||
@ -1814,6 +1813,14 @@ static void yield_task_dl(struct rq *rq)
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
|
||||
static inline bool dl_task_is_earliest_deadline(struct task_struct *p,
|
||||
struct rq *rq)
|
||||
{
|
||||
return (!rq->dl.dl_nr_running ||
|
||||
dl_time_before(p->dl.deadline,
|
||||
rq->dl.earliest_dl.curr));
|
||||
}
|
||||
|
||||
static int find_later_rq(struct task_struct *task);
|
||||
|
||||
static int
|
||||
@ -1849,16 +1856,14 @@ select_task_rq_dl(struct task_struct *p, int cpu, int flags)
|
||||
* Take the capacity of the CPU into account to
|
||||
* ensure it fits the requirement of the task.
|
||||
*/
|
||||
if (static_branch_unlikely(&sched_asym_cpucapacity))
|
||||
if (sched_asym_cpucap_active())
|
||||
select_rq |= !dl_task_fits_capacity(p, cpu);
|
||||
|
||||
if (select_rq) {
|
||||
int target = find_later_rq(p);
|
||||
|
||||
if (target != -1 &&
|
||||
(dl_time_before(p->dl.deadline,
|
||||
cpu_rq(target)->dl.earliest_dl.curr) ||
|
||||
(cpu_rq(target)->dl.dl_nr_running == 0)))
|
||||
dl_task_is_earliest_deadline(p, cpu_rq(target)))
|
||||
cpu = target;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
@ -2017,7 +2022,7 @@ static struct task_struct *pick_task_dl(struct rq *rq)
|
||||
return NULL;
|
||||
|
||||
dl_se = pick_next_dl_entity(dl_rq);
|
||||
BUG_ON(!dl_se);
|
||||
WARN_ON_ONCE(!dl_se);
|
||||
p = dl_task_of(dl_se);
|
||||
|
||||
return p;
|
||||
@ -2087,7 +2092,7 @@ static void task_fork_dl(struct task_struct *p)
|
||||
|
||||
static int pick_dl_task(struct rq *rq, struct task_struct *p, int cpu)
|
||||
{
|
||||
if (!task_running(rq, p) &&
|
||||
if (!task_on_cpu(rq, p) &&
|
||||
cpumask_test_cpu(cpu, &p->cpus_mask))
|
||||
return 1;
|
||||
return 0;
|
||||
@ -2225,9 +2230,7 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
|
||||
|
||||
later_rq = cpu_rq(cpu);
|
||||
|
||||
if (later_rq->dl.dl_nr_running &&
|
||||
!dl_time_before(task->dl.deadline,
|
||||
later_rq->dl.earliest_dl.curr)) {
|
||||
if (!dl_task_is_earliest_deadline(task, later_rq)) {
|
||||
/*
|
||||
* Target rq has tasks of equal or earlier deadline,
|
||||
* retrying does not release any lock and is unlikely
|
||||
@ -2241,7 +2244,7 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
|
||||
if (double_lock_balance(rq, later_rq)) {
|
||||
if (unlikely(task_rq(task) != rq ||
|
||||
!cpumask_test_cpu(later_rq->cpu, &task->cpus_mask) ||
|
||||
task_running(rq, task) ||
|
||||
task_on_cpu(rq, task) ||
|
||||
!dl_task(task) ||
|
||||
!task_on_rq_queued(task))) {
|
||||
double_unlock_balance(rq, later_rq);
|
||||
@ -2255,9 +2258,7 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
|
||||
* its earliest one has a later deadline than our
|
||||
* task, the rq is a good one.
|
||||
*/
|
||||
if (!later_rq->dl.dl_nr_running ||
|
||||
dl_time_before(task->dl.deadline,
|
||||
later_rq->dl.earliest_dl.curr))
|
||||
if (dl_task_is_earliest_deadline(task, later_rq))
|
||||
break;
|
||||
|
||||
/* Otherwise we try again. */
|
||||
@ -2277,12 +2278,12 @@ static struct task_struct *pick_next_pushable_dl_task(struct rq *rq)
|
||||
|
||||
p = __node_2_pdl(rb_first_cached(&rq->dl.pushable_dl_tasks_root));
|
||||
|
||||
BUG_ON(rq->cpu != task_cpu(p));
|
||||
BUG_ON(task_current(rq, p));
|
||||
BUG_ON(p->nr_cpus_allowed <= 1);
|
||||
WARN_ON_ONCE(rq->cpu != task_cpu(p));
|
||||
WARN_ON_ONCE(task_current(rq, p));
|
||||
WARN_ON_ONCE(p->nr_cpus_allowed <= 1);
|
||||
|
||||
BUG_ON(!task_on_rq_queued(p));
|
||||
BUG_ON(!dl_task(p));
|
||||
WARN_ON_ONCE(!task_on_rq_queued(p));
|
||||
WARN_ON_ONCE(!dl_task(p));
|
||||
|
||||
return p;
|
||||
}
|
||||
@ -2428,9 +2429,7 @@ static void pull_dl_task(struct rq *this_rq)
|
||||
* - it will preempt the last one we pulled (if any).
|
||||
*/
|
||||
if (p && dl_time_before(p->dl.deadline, dmin) &&
|
||||
(!this_rq->dl.dl_nr_running ||
|
||||
dl_time_before(p->dl.deadline,
|
||||
this_rq->dl.earliest_dl.curr))) {
|
||||
dl_task_is_earliest_deadline(p, this_rq)) {
|
||||
WARN_ON(p == src_rq->curr);
|
||||
WARN_ON(!task_on_rq_queued(p));
|
||||
|
||||
@ -2475,7 +2474,7 @@ skip:
|
||||
*/
|
||||
static void task_woken_dl(struct rq *rq, struct task_struct *p)
|
||||
{
|
||||
if (!task_running(rq, p) &&
|
||||
if (!task_on_cpu(rq, p) &&
|
||||
!test_tsk_need_resched(rq->curr) &&
|
||||
p->nr_cpus_allowed > 1 &&
|
||||
dl_task(rq->curr) &&
|
||||
@ -2492,7 +2491,7 @@ static void set_cpus_allowed_dl(struct task_struct *p,
|
||||
struct root_domain *src_rd;
|
||||
struct rq *rq;
|
||||
|
||||
BUG_ON(!dl_task(p));
|
||||
WARN_ON_ONCE(!dl_task(p));
|
||||
|
||||
rq = task_rq(p);
|
||||
src_rd = rq->rd;
|
||||
@ -3007,17 +3006,15 @@ bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr)
|
||||
int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur,
|
||||
const struct cpumask *trial)
|
||||
{
|
||||
int ret = 1, trial_cpus;
|
||||
unsigned long flags, cap;
|
||||
struct dl_bw *cur_dl_b;
|
||||
unsigned long flags;
|
||||
int ret = 1;
|
||||
|
||||
rcu_read_lock_sched();
|
||||
cur_dl_b = dl_bw_of(cpumask_any(cur));
|
||||
trial_cpus = cpumask_weight(trial);
|
||||
|
||||
cap = __dl_bw_capacity(trial);
|
||||
raw_spin_lock_irqsave(&cur_dl_b->lock, flags);
|
||||
if (cur_dl_b->bw != -1 &&
|
||||
cur_dl_b->bw * trial_cpus < cur_dl_b->total_bw)
|
||||
if (__dl_overflow(cur_dl_b, cap, 0, 0))
|
||||
ret = 0;
|
||||
raw_spin_unlock_irqrestore(&cur_dl_b->lock, flags);
|
||||
rcu_read_unlock_sched();
|
||||
|
@ -799,8 +799,6 @@ void init_entity_runnable_average(struct sched_entity *se)
|
||||
/* when this task enqueue'ed, it will contribute to its cfs_rq's load_avg */
|
||||
}
|
||||
|
||||
static void attach_entity_cfs_rq(struct sched_entity *se);
|
||||
|
||||
/*
|
||||
* With new tasks being created, their initial util_avgs are extrapolated
|
||||
* based on the cfs_rq's current util_avg:
|
||||
@ -835,20 +833,6 @@ void post_init_entity_util_avg(struct task_struct *p)
|
||||
long cpu_scale = arch_scale_cpu_capacity(cpu_of(rq_of(cfs_rq)));
|
||||
long cap = (long)(cpu_scale - cfs_rq->avg.util_avg) / 2;
|
||||
|
||||
if (cap > 0) {
|
||||
if (cfs_rq->avg.util_avg != 0) {
|
||||
sa->util_avg = cfs_rq->avg.util_avg * se->load.weight;
|
||||
sa->util_avg /= (cfs_rq->avg.load_avg + 1);
|
||||
|
||||
if (sa->util_avg > cap)
|
||||
sa->util_avg = cap;
|
||||
} else {
|
||||
sa->util_avg = cap;
|
||||
}
|
||||
}
|
||||
|
||||
sa->runnable_avg = sa->util_avg;
|
||||
|
||||
if (p->sched_class != &fair_sched_class) {
|
||||
/*
|
||||
* For !fair tasks do:
|
||||
@ -864,7 +848,19 @@ void post_init_entity_util_avg(struct task_struct *p)
|
||||
return;
|
||||
}
|
||||
|
||||
attach_entity_cfs_rq(se);
|
||||
if (cap > 0) {
|
||||
if (cfs_rq->avg.util_avg != 0) {
|
||||
sa->util_avg = cfs_rq->avg.util_avg * se->load.weight;
|
||||
sa->util_avg /= (cfs_rq->avg.load_avg + 1);
|
||||
|
||||
if (sa->util_avg > cap)
|
||||
sa->util_avg = cap;
|
||||
} else {
|
||||
sa->util_avg = cap;
|
||||
}
|
||||
}
|
||||
|
||||
sa->runnable_avg = sa->util_avg;
|
||||
}
|
||||
|
||||
#else /* !CONFIG_SMP */
|
||||
@ -1592,11 +1588,11 @@ numa_type numa_classify(unsigned int imbalance_pct,
|
||||
|
||||
#ifdef CONFIG_SCHED_SMT
|
||||
/* Forward declarations of select_idle_sibling helpers */
|
||||
static inline bool test_idle_cores(int cpu, bool def);
|
||||
static inline bool test_idle_cores(int cpu);
|
||||
static inline int numa_idle_core(int idle_core, int cpu)
|
||||
{
|
||||
if (!static_branch_likely(&sched_smt_present) ||
|
||||
idle_core >= 0 || !test_idle_cores(cpu, false))
|
||||
idle_core >= 0 || !test_idle_cores(cpu))
|
||||
return idle_core;
|
||||
|
||||
/*
|
||||
@ -2600,7 +2596,7 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags,
|
||||
if (!join)
|
||||
return;
|
||||
|
||||
BUG_ON(irqs_disabled());
|
||||
WARN_ON_ONCE(irqs_disabled());
|
||||
double_lock_irq(&my_grp->lock, &grp->lock);
|
||||
|
||||
for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++) {
|
||||
@ -3838,8 +3834,7 @@ static void migrate_se_pelt_lag(struct sched_entity *se) {}
|
||||
* @cfs_rq: cfs_rq to update
|
||||
*
|
||||
* The cfs_rq avg is the direct sum of all its entities (blocked and runnable)
|
||||
* avg. The immediate corollary is that all (fair) tasks must be attached, see
|
||||
* post_init_entity_util_avg().
|
||||
* avg. The immediate corollary is that all (fair) tasks must be attached.
|
||||
*
|
||||
* cfs_rq->avg is used for task_h_load() and update_cfs_share() for example.
|
||||
*
|
||||
@ -4003,6 +3998,7 @@ static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
|
||||
#define UPDATE_TG 0x1
|
||||
#define SKIP_AGE_LOAD 0x2
|
||||
#define DO_ATTACH 0x4
|
||||
#define DO_DETACH 0x8
|
||||
|
||||
/* Update task and its cfs_rq load average */
|
||||
static inline void update_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
||||
@ -4032,6 +4028,13 @@ static inline void update_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
|
||||
attach_entity_load_avg(cfs_rq, se);
|
||||
update_tg_load_avg(cfs_rq);
|
||||
|
||||
} else if (flags & DO_DETACH) {
|
||||
/*
|
||||
* DO_DETACH means we're here from dequeue_entity()
|
||||
* and we are migrating task out of the CPU.
|
||||
*/
|
||||
detach_entity_load_avg(cfs_rq, se);
|
||||
update_tg_load_avg(cfs_rq);
|
||||
} else if (decayed) {
|
||||
cfs_rq_util_change(cfs_rq, 0);
|
||||
|
||||
@ -4064,8 +4067,8 @@ static void remove_entity_load_avg(struct sched_entity *se)
|
||||
|
||||
/*
|
||||
* tasks cannot exit without having gone through wake_up_new_task() ->
|
||||
* post_init_entity_util_avg() which will have added things to the
|
||||
* cfs_rq, so we can remove unconditionally.
|
||||
* enqueue_task_fair() which will have added things to the cfs_rq,
|
||||
* so we can remove unconditionally.
|
||||
*/
|
||||
|
||||
sync_entity_load_avg(se);
|
||||
@ -4262,7 +4265,7 @@ static inline int task_fits_capacity(struct task_struct *p,
|
||||
|
||||
static inline void update_misfit_status(struct task_struct *p, struct rq *rq)
|
||||
{
|
||||
if (!static_branch_unlikely(&sched_asym_cpucapacity))
|
||||
if (!sched_asym_cpucap_active())
|
||||
return;
|
||||
|
||||
if (!p || p->nr_cpus_allowed == 1) {
|
||||
@ -4292,6 +4295,7 @@ static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq)
|
||||
#define UPDATE_TG 0x0
|
||||
#define SKIP_AGE_LOAD 0x0
|
||||
#define DO_ATTACH 0x0
|
||||
#define DO_DETACH 0x0
|
||||
|
||||
static inline void update_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se, int not_used1)
|
||||
{
|
||||
@ -4434,7 +4438,8 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
||||
/*
|
||||
* When enqueuing a sched_entity, we must:
|
||||
* - Update loads to have both entity and cfs_rq synced with now.
|
||||
* - Add its load to cfs_rq->runnable_avg
|
||||
* - For group_entity, update its runnable_weight to reflect the new
|
||||
* h_nr_running of its group cfs_rq.
|
||||
* - For group_entity, update its weight to reflect the new share of
|
||||
* its group cfs_rq
|
||||
* - Add its new weight to cfs_rq->load.weight
|
||||
@ -4511,6 +4516,11 @@ static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq);
|
||||
static void
|
||||
dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
||||
{
|
||||
int action = UPDATE_TG;
|
||||
|
||||
if (entity_is_task(se) && task_on_rq_migrating(task_of(se)))
|
||||
action |= DO_DETACH;
|
||||
|
||||
/*
|
||||
* Update run-time statistics of the 'current'.
|
||||
*/
|
||||
@ -4519,12 +4529,13 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
||||
/*
|
||||
* When dequeuing a sched_entity, we must:
|
||||
* - Update loads to have both entity and cfs_rq synced with now.
|
||||
* - Subtract its load from the cfs_rq->runnable_avg.
|
||||
* - For group_entity, update its runnable_weight to reflect the new
|
||||
* h_nr_running of its group cfs_rq.
|
||||
* - Subtract its previous weight from cfs_rq->load.weight.
|
||||
* - For group entity, update its weight to reflect the new share
|
||||
* of its group cfs_rq.
|
||||
*/
|
||||
update_load_avg(cfs_rq, se, UPDATE_TG);
|
||||
update_load_avg(cfs_rq, se, action);
|
||||
se_update_runnable(se);
|
||||
|
||||
update_stats_dequeue_fair(cfs_rq, se, flags);
|
||||
@ -5893,8 +5904,8 @@ dequeue_throttle:
|
||||
#ifdef CONFIG_SMP
|
||||
|
||||
/* Working cpumask for: load_balance, load_balance_newidle. */
|
||||
DEFINE_PER_CPU(cpumask_var_t, load_balance_mask);
|
||||
DEFINE_PER_CPU(cpumask_var_t, select_rq_mask);
|
||||
static DEFINE_PER_CPU(cpumask_var_t, load_balance_mask);
|
||||
static DEFINE_PER_CPU(cpumask_var_t, select_rq_mask);
|
||||
|
||||
#ifdef CONFIG_NO_HZ_COMMON
|
||||
|
||||
@ -6260,7 +6271,7 @@ static inline void set_idle_cores(int cpu, int val)
|
||||
WRITE_ONCE(sds->has_idle_cores, val);
|
||||
}
|
||||
|
||||
static inline bool test_idle_cores(int cpu, bool def)
|
||||
static inline bool test_idle_cores(int cpu)
|
||||
{
|
||||
struct sched_domain_shared *sds;
|
||||
|
||||
@ -6268,7 +6279,7 @@ static inline bool test_idle_cores(int cpu, bool def)
|
||||
if (sds)
|
||||
return READ_ONCE(sds->has_idle_cores);
|
||||
|
||||
return def;
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -6284,7 +6295,7 @@ void __update_idle_core(struct rq *rq)
|
||||
int cpu;
|
||||
|
||||
rcu_read_lock();
|
||||
if (test_idle_cores(core, true))
|
||||
if (test_idle_cores(core))
|
||||
goto unlock;
|
||||
|
||||
for_each_cpu(cpu, cpu_smt_mask(core)) {
|
||||
@ -6310,9 +6321,6 @@ static int select_idle_core(struct task_struct *p, int core, struct cpumask *cpu
|
||||
bool idle = true;
|
||||
int cpu;
|
||||
|
||||
if (!static_branch_likely(&sched_smt_present))
|
||||
return __select_idle_cpu(core, p);
|
||||
|
||||
for_each_cpu(cpu, cpu_smt_mask(core)) {
|
||||
if (!available_idle_cpu(cpu)) {
|
||||
idle = false;
|
||||
@ -6339,13 +6347,12 @@ static int select_idle_core(struct task_struct *p, int core, struct cpumask *cpu
|
||||
/*
|
||||
* Scan the local SMT mask for idle CPUs.
|
||||
*/
|
||||
static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int target)
|
||||
static int select_idle_smt(struct task_struct *p, int target)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
for_each_cpu(cpu, cpu_smt_mask(target)) {
|
||||
if (!cpumask_test_cpu(cpu, p->cpus_ptr) ||
|
||||
!cpumask_test_cpu(cpu, sched_domain_span(sd)))
|
||||
for_each_cpu_and(cpu, cpu_smt_mask(target), p->cpus_ptr) {
|
||||
if (cpu == target)
|
||||
continue;
|
||||
if (available_idle_cpu(cpu) || sched_idle_cpu(cpu))
|
||||
return cpu;
|
||||
@ -6360,9 +6367,9 @@ static inline void set_idle_cores(int cpu, int val)
|
||||
{
|
||||
}
|
||||
|
||||
static inline bool test_idle_cores(int cpu, bool def)
|
||||
static inline bool test_idle_cores(int cpu)
|
||||
{
|
||||
return def;
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline int select_idle_core(struct task_struct *p, int core, struct cpumask *cpus, int *idle_cpu)
|
||||
@ -6370,7 +6377,7 @@ static inline int select_idle_core(struct task_struct *p, int core, struct cpuma
|
||||
return __select_idle_cpu(core, p);
|
||||
}
|
||||
|
||||
static inline int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int target)
|
||||
static inline int select_idle_smt(struct task_struct *p, int target)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
@ -6389,19 +6396,19 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool
|
||||
struct sched_domain_shared *sd_share;
|
||||
struct rq *this_rq = this_rq();
|
||||
int this = smp_processor_id();
|
||||
struct sched_domain *this_sd;
|
||||
struct sched_domain *this_sd = NULL;
|
||||
u64 time = 0;
|
||||
|
||||
this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc));
|
||||
if (!this_sd)
|
||||
return -1;
|
||||
|
||||
cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);
|
||||
|
||||
if (sched_feat(SIS_PROP) && !has_idle_core) {
|
||||
u64 avg_cost, avg_idle, span_avg;
|
||||
unsigned long now = jiffies;
|
||||
|
||||
this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc));
|
||||
if (!this_sd)
|
||||
return -1;
|
||||
|
||||
/*
|
||||
* If we're busy, the assumption that the last idle period
|
||||
* predicts the future is flawed; age away the remaining
|
||||
@ -6455,7 +6462,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool
|
||||
if (has_idle_core)
|
||||
set_idle_cores(target, false);
|
||||
|
||||
if (sched_feat(SIS_PROP) && !has_idle_core) {
|
||||
if (sched_feat(SIS_PROP) && this_sd && !has_idle_core) {
|
||||
time = cpu_clock(this) - time;
|
||||
|
||||
/*
|
||||
@ -6506,7 +6513,7 @@ select_idle_capacity(struct task_struct *p, struct sched_domain *sd, int target)
|
||||
|
||||
static inline bool asym_fits_capacity(unsigned long task_util, int cpu)
|
||||
{
|
||||
if (static_branch_unlikely(&sched_asym_cpucapacity))
|
||||
if (sched_asym_cpucap_active())
|
||||
return fits_capacity(task_util, capacity_of(cpu));
|
||||
|
||||
return true;
|
||||
@ -6526,7 +6533,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
|
||||
* On asymmetric system, update task utilization because we will check
|
||||
* that the task fits with cpu's capacity.
|
||||
*/
|
||||
if (static_branch_unlikely(&sched_asym_cpucapacity)) {
|
||||
if (sched_asym_cpucap_active()) {
|
||||
sync_entity_load_avg(&p->se);
|
||||
task_util = uclamp_task_util(p);
|
||||
}
|
||||
@ -6580,7 +6587,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
|
||||
* For asymmetric CPU capacity systems, our domain of interest is
|
||||
* sd_asym_cpucapacity rather than sd_llc.
|
||||
*/
|
||||
if (static_branch_unlikely(&sched_asym_cpucapacity)) {
|
||||
if (sched_asym_cpucap_active()) {
|
||||
sd = rcu_dereference(per_cpu(sd_asym_cpucapacity, target));
|
||||
/*
|
||||
* On an asymmetric CPU capacity system where an exclusive
|
||||
@ -6601,10 +6608,10 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
|
||||
return target;
|
||||
|
||||
if (sched_smt_active()) {
|
||||
has_idle_core = test_idle_cores(target, false);
|
||||
has_idle_core = test_idle_cores(target);
|
||||
|
||||
if (!has_idle_core && cpus_share_cache(prev, target)) {
|
||||
i = select_idle_smt(p, sd, prev);
|
||||
i = select_idle_smt(p, prev);
|
||||
if ((unsigned int)i < nr_cpumask_bits)
|
||||
return i;
|
||||
}
|
||||
@ -7076,8 +7083,6 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
|
||||
return new_cpu;
|
||||
}
|
||||
|
||||
static void detach_entity_cfs_rq(struct sched_entity *se);
|
||||
|
||||
/*
|
||||
* Called immediately before a task is migrated to a new CPU; task_cpu(p) and
|
||||
* cfs_rq_of(p) references at time of call are still valid and identify the
|
||||
@ -7099,15 +7104,7 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
|
||||
se->vruntime -= u64_u32_load(cfs_rq->min_vruntime);
|
||||
}
|
||||
|
||||
if (p->on_rq == TASK_ON_RQ_MIGRATING) {
|
||||
/*
|
||||
* In case of TASK_ON_RQ_MIGRATING we in fact hold the 'old'
|
||||
* rq->lock and can modify state directly.
|
||||
*/
|
||||
lockdep_assert_rq_held(task_rq(p));
|
||||
detach_entity_cfs_rq(se);
|
||||
|
||||
} else {
|
||||
if (!task_on_rq_migrating(p)) {
|
||||
remove_entity_load_avg(se);
|
||||
|
||||
/*
|
||||
@ -7279,7 +7276,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
|
||||
return;
|
||||
|
||||
find_matching_se(&se, &pse);
|
||||
BUG_ON(!pse);
|
||||
WARN_ON_ONCE(!pse);
|
||||
|
||||
cse_is_idle = se_is_idle(se);
|
||||
pse_is_idle = se_is_idle(pse);
|
||||
@ -7938,7 +7935,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
|
||||
/* Record that we found at least one task that could run on dst_cpu */
|
||||
env->flags &= ~LBF_ALL_PINNED;
|
||||
|
||||
if (task_running(env->src_rq, p)) {
|
||||
if (task_on_cpu(env->src_rq, p)) {
|
||||
schedstat_inc(p->stats.nr_failed_migrations_running);
|
||||
return 0;
|
||||
}
|
||||
@ -8012,8 +8009,6 @@ static struct task_struct *detach_one_task(struct lb_env *env)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static const unsigned int sched_nr_migrate_break = 32;
|
||||
|
||||
/*
|
||||
* detach_tasks() -- tries to detach up to imbalance load/util/tasks from
|
||||
* busiest_rq, as part of a balancing operation within domain "sd".
|
||||
@ -8049,20 +8044,24 @@ static int detach_tasks(struct lb_env *env)
|
||||
if (env->idle != CPU_NOT_IDLE && env->src_rq->nr_running <= 1)
|
||||
break;
|
||||
|
||||
p = list_last_entry(tasks, struct task_struct, se.group_node);
|
||||
|
||||
env->loop++;
|
||||
/* We've more or less seen every task there is, call it quits */
|
||||
if (env->loop > env->loop_max)
|
||||
/*
|
||||
* We've more or less seen every task there is, call it quits
|
||||
* unless we haven't found any movable task yet.
|
||||
*/
|
||||
if (env->loop > env->loop_max &&
|
||||
!(env->flags & LBF_ALL_PINNED))
|
||||
break;
|
||||
|
||||
/* take a breather every nr_migrate tasks */
|
||||
if (env->loop > env->loop_break) {
|
||||
env->loop_break += sched_nr_migrate_break;
|
||||
env->loop_break += SCHED_NR_MIGRATE_BREAK;
|
||||
env->flags |= LBF_NEED_BREAK;
|
||||
break;
|
||||
}
|
||||
|
||||
p = list_last_entry(tasks, struct task_struct, se.group_node);
|
||||
|
||||
if (!can_migrate_task(p, env))
|
||||
goto next;
|
||||
|
||||
@ -8159,7 +8158,7 @@ static void attach_task(struct rq *rq, struct task_struct *p)
|
||||
{
|
||||
lockdep_assert_rq_held(rq);
|
||||
|
||||
BUG_ON(task_rq(p) != rq);
|
||||
WARN_ON_ONCE(task_rq(p) != rq);
|
||||
activate_task(rq, p, ENQUEUE_NOCLOCK);
|
||||
check_preempt_curr(rq, p, 0);
|
||||
}
|
||||
@ -10099,14 +10098,13 @@ static int load_balance(int this_cpu, struct rq *this_rq,
|
||||
struct rq *busiest;
|
||||
struct rq_flags rf;
|
||||
struct cpumask *cpus = this_cpu_cpumask_var_ptr(load_balance_mask);
|
||||
|
||||
struct lb_env env = {
|
||||
.sd = sd,
|
||||
.dst_cpu = this_cpu,
|
||||
.dst_rq = this_rq,
|
||||
.dst_grpmask = sched_group_span(sd->groups),
|
||||
.idle = idle,
|
||||
.loop_break = sched_nr_migrate_break,
|
||||
.loop_break = SCHED_NR_MIGRATE_BREAK,
|
||||
.cpus = cpus,
|
||||
.fbq_type = all,
|
||||
.tasks = LIST_HEAD_INIT(env.tasks),
|
||||
@ -10134,7 +10132,7 @@ redo:
|
||||
goto out_balanced;
|
||||
}
|
||||
|
||||
BUG_ON(busiest == env.dst_rq);
|
||||
WARN_ON_ONCE(busiest == env.dst_rq);
|
||||
|
||||
schedstat_add(sd->lb_imbalance[idle], env.imbalance);
|
||||
|
||||
@ -10182,7 +10180,9 @@ more_balance:
|
||||
|
||||
if (env.flags & LBF_NEED_BREAK) {
|
||||
env.flags &= ~LBF_NEED_BREAK;
|
||||
goto more_balance;
|
||||
/* Stop if we tried all running tasks */
|
||||
if (env.loop < busiest->nr_running)
|
||||
goto more_balance;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -10213,7 +10213,7 @@ more_balance:
|
||||
env.dst_cpu = env.new_dst_cpu;
|
||||
env.flags &= ~LBF_DST_PINNED;
|
||||
env.loop = 0;
|
||||
env.loop_break = sched_nr_migrate_break;
|
||||
env.loop_break = SCHED_NR_MIGRATE_BREAK;
|
||||
|
||||
/*
|
||||
* Go back to "more_balance" rather than "redo" since we
|
||||
@ -10245,7 +10245,7 @@ more_balance:
|
||||
*/
|
||||
if (!cpumask_subset(cpus, env.dst_grpmask)) {
|
||||
env.loop = 0;
|
||||
env.loop_break = sched_nr_migrate_break;
|
||||
env.loop_break = SCHED_NR_MIGRATE_BREAK;
|
||||
goto redo;
|
||||
}
|
||||
goto out_all_pinned;
|
||||
@ -10430,7 +10430,7 @@ static int active_load_balance_cpu_stop(void *data)
|
||||
* we need to fix it. Originally reported by
|
||||
* Bjorn Helgaas on a 128-CPU setup.
|
||||
*/
|
||||
BUG_ON(busiest_rq == target_rq);
|
||||
WARN_ON_ONCE(busiest_rq == target_rq);
|
||||
|
||||
/* Search for an sd spanning us and the target CPU. */
|
||||
rcu_read_lock();
|
||||
@ -10916,8 +10916,7 @@ static bool update_nohz_stats(struct rq *rq)
|
||||
* can be a simple update of blocked load or a complete load balance with
|
||||
* tasks movement depending of flags.
|
||||
*/
|
||||
static void _nohz_idle_balance(struct rq *this_rq, unsigned int flags,
|
||||
enum cpu_idle_type idle)
|
||||
static void _nohz_idle_balance(struct rq *this_rq, unsigned int flags)
|
||||
{
|
||||
/* Earliest time when we have to do rebalance again */
|
||||
unsigned long now = jiffies;
|
||||
@ -11032,7 +11031,7 @@ static bool nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle)
|
||||
if (idle != CPU_IDLE)
|
||||
return false;
|
||||
|
||||
_nohz_idle_balance(this_rq, flags, idle);
|
||||
_nohz_idle_balance(this_rq, flags);
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -11052,7 +11051,7 @@ void nohz_run_idle_balance(int cpu)
|
||||
* (ie NOHZ_STATS_KICK set) and will do the same.
|
||||
*/
|
||||
if ((flags == NOHZ_NEWILB_KICK) && !need_resched())
|
||||
_nohz_idle_balance(cpu_rq(cpu), NOHZ_STATS_KICK, CPU_IDLE);
|
||||
_nohz_idle_balance(cpu_rq(cpu), NOHZ_STATS_KICK);
|
||||
}
|
||||
|
||||
static void nohz_newidle_balance(struct rq *this_rq)
|
||||
@ -11552,6 +11551,17 @@ static void detach_entity_cfs_rq(struct sched_entity *se)
|
||||
{
|
||||
struct cfs_rq *cfs_rq = cfs_rq_of(se);
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
/*
|
||||
* In case the task sched_avg hasn't been attached:
|
||||
* - A forked task which hasn't been woken up by wake_up_new_task().
|
||||
* - A task which has been woken up by try_to_wake_up() but is
|
||||
* waiting for actually being woken up by sched_ttwu_pending().
|
||||
*/
|
||||
if (!se->avg.last_update_time)
|
||||
return;
|
||||
#endif
|
||||
|
||||
/* Catch up with the cfs_rq and remove our load when we leave */
|
||||
update_load_avg(cfs_rq, se, 0);
|
||||
detach_entity_load_avg(cfs_rq, se);
|
||||
@ -11563,14 +11573,6 @@ static void attach_entity_cfs_rq(struct sched_entity *se)
|
||||
{
|
||||
struct cfs_rq *cfs_rq = cfs_rq_of(se);
|
||||
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
/*
|
||||
* Since the real-depth could have been changed (only FAIR
|
||||
* class maintain depth value), reset depth properly.
|
||||
*/
|
||||
se->depth = se->parent ? se->parent->depth + 1 : 0;
|
||||
#endif
|
||||
|
||||
/* Synchronize entity with its cfs_rq */
|
||||
update_load_avg(cfs_rq, se, sched_feat(ATTACH_AGE_LOAD) ? 0 : SKIP_AGE_LOAD);
|
||||
attach_entity_load_avg(cfs_rq, se);
|
||||
@ -11666,39 +11668,25 @@ void init_cfs_rq(struct cfs_rq *cfs_rq)
|
||||
}
|
||||
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
static void task_set_group_fair(struct task_struct *p)
|
||||
static void task_change_group_fair(struct task_struct *p)
|
||||
{
|
||||
struct sched_entity *se = &p->se;
|
||||
/*
|
||||
* We couldn't detach or attach a forked task which
|
||||
* hasn't been woken up by wake_up_new_task().
|
||||
*/
|
||||
if (READ_ONCE(p->__state) == TASK_NEW)
|
||||
return;
|
||||
|
||||
set_task_rq(p, task_cpu(p));
|
||||
se->depth = se->parent ? se->parent->depth + 1 : 0;
|
||||
}
|
||||
|
||||
static void task_move_group_fair(struct task_struct *p)
|
||||
{
|
||||
detach_task_cfs_rq(p);
|
||||
set_task_rq(p, task_cpu(p));
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
/* Tell se's cfs_rq has been changed -- migrated */
|
||||
p->se.avg.last_update_time = 0;
|
||||
#endif
|
||||
set_task_rq(p, task_cpu(p));
|
||||
attach_task_cfs_rq(p);
|
||||
}
|
||||
|
||||
static void task_change_group_fair(struct task_struct *p, int type)
|
||||
{
|
||||
switch (type) {
|
||||
case TASK_SET_GROUP:
|
||||
task_set_group_fair(p);
|
||||
break;
|
||||
|
||||
case TASK_MOVE_GROUP:
|
||||
task_move_group_fair(p);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void free_fair_sched_group(struct task_group *tg)
|
||||
{
|
||||
int i;
|
||||
@ -12075,6 +12063,13 @@ void show_numa_stats(struct task_struct *p, struct seq_file *m)
|
||||
__init void init_sched_fair_class(void)
|
||||
{
|
||||
#ifdef CONFIG_SMP
|
||||
int i;
|
||||
|
||||
for_each_possible_cpu(i) {
|
||||
zalloc_cpumask_var_node(&per_cpu(load_balance_mask, i), GFP_KERNEL, cpu_to_node(i));
|
||||
zalloc_cpumask_var_node(&per_cpu(select_rq_mask, i), GFP_KERNEL, cpu_to_node(i));
|
||||
}
|
||||
|
||||
open_softirq(SCHED_SOFTIRQ, run_rebalance_domains);
|
||||
|
||||
#ifdef CONFIG_NO_HZ_COMMON
|
||||
|
@ -509,7 +509,7 @@ static inline bool rt_task_fits_capacity(struct task_struct *p, int cpu)
|
||||
unsigned int cpu_cap;
|
||||
|
||||
/* Only heterogeneous systems can benefit from this check */
|
||||
if (!static_branch_unlikely(&sched_asym_cpucapacity))
|
||||
if (!sched_asym_cpucap_active())
|
||||
return true;
|
||||
|
||||
min_cap = uclamp_eff_value(p, UCLAMP_MIN);
|
||||
@ -843,7 +843,7 @@ static void __disable_runtime(struct rq *rq)
|
||||
* We cannot be left wanting - that would mean some runtime
|
||||
* leaked out of the system.
|
||||
*/
|
||||
BUG_ON(want);
|
||||
WARN_ON_ONCE(want);
|
||||
balanced:
|
||||
/*
|
||||
* Disable all the borrow logic by pretending we have inf
|
||||
@ -1062,11 +1062,7 @@ static void update_curr_rt(struct rq *rq)
|
||||
|
||||
trace_sched_stat_runtime(curr, delta_exec, 0);
|
||||
|
||||
curr->se.sum_exec_runtime += delta_exec;
|
||||
account_group_exec_runtime(curr, delta_exec);
|
||||
|
||||
curr->se.exec_start = now;
|
||||
cgroup_account_cputime(curr, delta_exec);
|
||||
update_current_exec_runtime(curr, now, delta_exec);
|
||||
|
||||
if (!rt_bandwidth_enabled())
|
||||
return;
|
||||
@ -1849,7 +1845,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
|
||||
|
||||
static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
|
||||
{
|
||||
if (!task_running(rq, p) &&
|
||||
if (!task_on_cpu(rq, p) &&
|
||||
cpumask_test_cpu(cpu, &p->cpus_mask))
|
||||
return 1;
|
||||
|
||||
@ -1897,7 +1893,7 @@ static int find_lowest_rq(struct task_struct *task)
|
||||
* If we're on asym system ensure we consider the different capacities
|
||||
* of the CPUs when searching for the lowest_mask.
|
||||
*/
|
||||
if (static_branch_unlikely(&sched_asym_cpucapacity)) {
|
||||
if (sched_asym_cpucap_active()) {
|
||||
|
||||
ret = cpupri_find_fitness(&task_rq(task)->rd->cpupri,
|
||||
task, lowest_mask,
|
||||
@ -2004,7 +2000,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
|
||||
*/
|
||||
if (unlikely(task_rq(task) != rq ||
|
||||
!cpumask_test_cpu(lowest_rq->cpu, &task->cpus_mask) ||
|
||||
task_running(rq, task) ||
|
||||
task_on_cpu(rq, task) ||
|
||||
!rt_task(task) ||
|
||||
!task_on_rq_queued(task))) {
|
||||
|
||||
@ -2462,7 +2458,7 @@ skip:
|
||||
*/
|
||||
static void task_woken_rt(struct rq *rq, struct task_struct *p)
|
||||
{
|
||||
bool need_to_push = !task_running(rq, p) &&
|
||||
bool need_to_push = !task_on_cpu(rq, p) &&
|
||||
!test_tsk_need_resched(rq->curr) &&
|
||||
p->nr_cpus_allowed > 1 &&
|
||||
(dl_task(rq->curr) || rt_task(rq->curr)) &&
|
||||
|
@ -321,21 +321,6 @@ struct dl_bw {
|
||||
u64 total_bw;
|
||||
};
|
||||
|
||||
/*
|
||||
* Verify the fitness of task @p to run on @cpu taking into account the
|
||||
* CPU original capacity and the runtime/deadline ratio of the task.
|
||||
*
|
||||
* The function will return true if the CPU original capacity of the
|
||||
* @cpu scaled by SCHED_CAPACITY_SCALE >= runtime/deadline ratio of the
|
||||
* task and false otherwise.
|
||||
*/
|
||||
static inline bool dl_task_fits_capacity(struct task_struct *p, int cpu)
|
||||
{
|
||||
unsigned long cap = arch_scale_cpu_capacity(cpu);
|
||||
|
||||
return cap_scale(p->dl.dl_deadline, cap) >= p->dl.dl_runtime;
|
||||
}
|
||||
|
||||
extern void init_dl_bw(struct dl_bw *dl_b);
|
||||
extern int sched_dl_global_validate(void);
|
||||
extern void sched_dl_do_global(void);
|
||||
@ -1815,6 +1800,11 @@ DECLARE_PER_CPU(struct sched_domain __rcu *, sd_asym_packing);
|
||||
DECLARE_PER_CPU(struct sched_domain __rcu *, sd_asym_cpucapacity);
|
||||
extern struct static_key_false sched_asym_cpucapacity;
|
||||
|
||||
static __always_inline bool sched_asym_cpucap_active(void)
|
||||
{
|
||||
return static_branch_unlikely(&sched_asym_cpucapacity);
|
||||
}
|
||||
|
||||
struct sched_group_capacity {
|
||||
atomic_t ref;
|
||||
/*
|
||||
@ -1942,6 +1932,7 @@ static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
|
||||
set_task_rq_fair(&p->se, p->se.cfs_rq, tg->cfs_rq[cpu]);
|
||||
p->se.cfs_rq = tg->cfs_rq[cpu];
|
||||
p->se.parent = tg->se[cpu];
|
||||
p->se.depth = tg->se[cpu] ? tg->se[cpu]->depth + 1 : 0;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_RT_GROUP_SCHED
|
||||
@ -2060,7 +2051,7 @@ static inline int task_current(struct rq *rq, struct task_struct *p)
|
||||
return rq->curr == p;
|
||||
}
|
||||
|
||||
static inline int task_running(struct rq *rq, struct task_struct *p)
|
||||
static inline int task_on_cpu(struct rq *rq, struct task_struct *p)
|
||||
{
|
||||
#ifdef CONFIG_SMP
|
||||
return p->on_cpu;
|
||||
@ -2204,11 +2195,8 @@ struct sched_class {
|
||||
|
||||
void (*update_curr)(struct rq *rq);
|
||||
|
||||
#define TASK_SET_GROUP 0
|
||||
#define TASK_MOVE_GROUP 1
|
||||
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
void (*task_change_group)(struct task_struct *p, int type);
|
||||
void (*task_change_group)(struct task_struct *p);
|
||||
#endif
|
||||
};
|
||||
|
||||
@ -2435,6 +2423,12 @@ extern void deactivate_task(struct rq *rq, struct task_struct *p, int flags);
|
||||
|
||||
extern void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags);
|
||||
|
||||
#ifdef CONFIG_PREEMPT_RT
|
||||
#define SCHED_NR_MIGRATE_BREAK 8
|
||||
#else
|
||||
#define SCHED_NR_MIGRATE_BREAK 32
|
||||
#endif
|
||||
|
||||
extern const_debug unsigned int sysctl_sched_nr_migrate;
|
||||
extern const_debug unsigned int sysctl_sched_migration_cost;
|
||||
|
||||
@ -2709,8 +2703,8 @@ static inline void double_rq_lock(struct rq *rq1, struct rq *rq2)
|
||||
__acquires(rq1->lock)
|
||||
__acquires(rq2->lock)
|
||||
{
|
||||
BUG_ON(!irqs_disabled());
|
||||
BUG_ON(rq1 != rq2);
|
||||
WARN_ON_ONCE(!irqs_disabled());
|
||||
WARN_ON_ONCE(rq1 != rq2);
|
||||
raw_spin_rq_lock(rq1);
|
||||
__acquire(rq2->lock); /* Fake it out ;) */
|
||||
double_rq_clock_clear_update(rq1, rq2);
|
||||
@ -2726,7 +2720,7 @@ static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2)
|
||||
__releases(rq1->lock)
|
||||
__releases(rq2->lock)
|
||||
{
|
||||
BUG_ON(rq1 != rq2);
|
||||
WARN_ON_ONCE(rq1 != rq2);
|
||||
raw_spin_rq_unlock(rq1);
|
||||
__release(rq2->lock);
|
||||
}
|
||||
@ -2896,6 +2890,21 @@ unsigned long effective_cpu_util(int cpu, unsigned long util_cfs,
|
||||
enum cpu_util_type type,
|
||||
struct task_struct *p);
|
||||
|
||||
/*
|
||||
* Verify the fitness of task @p to run on @cpu taking into account the
|
||||
* CPU original capacity and the runtime/deadline ratio of the task.
|
||||
*
|
||||
* The function will return true if the original capacity of @cpu is
|
||||
* greater than or equal to task's deadline density right shifted by
|
||||
* (BW_SHIFT - SCHED_CAPACITY_SHIFT) and false otherwise.
|
||||
*/
|
||||
static inline bool dl_task_fits_capacity(struct task_struct *p, int cpu)
|
||||
{
|
||||
unsigned long cap = arch_scale_cpu_capacity(cpu);
|
||||
|
||||
return cap >= p->dl.dl_density >> (BW_SHIFT - SCHED_CAPACITY_SHIFT);
|
||||
}
|
||||
|
||||
static inline unsigned long cpu_bw_dl(struct rq *rq)
|
||||
{
|
||||
return (rq->dl.running_bw * SCHED_CAPACITY_SCALE) >> BW_SHIFT;
|
||||
@ -3157,4 +3166,14 @@ extern int sched_dynamic_mode(const char *str);
|
||||
extern void sched_dynamic_update(int mode);
|
||||
#endif
|
||||
|
||||
static inline void update_current_exec_runtime(struct task_struct *curr,
|
||||
u64 now, u64 delta_exec)
|
||||
{
|
||||
curr->se.sum_exec_runtime += delta_exec;
|
||||
account_group_exec_runtime(curr, delta_exec);
|
||||
|
||||
curr->se.exec_start = now;
|
||||
cgroup_account_cputime(curr, delta_exec);
|
||||
}
|
||||
|
||||
#endif /* _KERNEL_SCHED_SCHED_H */
|
||||
|
@ -71,20 +71,17 @@ static void yield_task_stop(struct rq *rq)
|
||||
static void put_prev_task_stop(struct rq *rq, struct task_struct *prev)
|
||||
{
|
||||
struct task_struct *curr = rq->curr;
|
||||
u64 delta_exec;
|
||||
u64 now, delta_exec;
|
||||
|
||||
delta_exec = rq_clock_task(rq) - curr->se.exec_start;
|
||||
now = rq_clock_task(rq);
|
||||
delta_exec = now - curr->se.exec_start;
|
||||
if (unlikely((s64)delta_exec < 0))
|
||||
delta_exec = 0;
|
||||
|
||||
schedstat_set(curr->stats.exec_max,
|
||||
max(curr->stats.exec_max, delta_exec));
|
||||
|
||||
curr->se.sum_exec_runtime += delta_exec;
|
||||
account_group_exec_runtime(curr, delta_exec);
|
||||
|
||||
curr->se.exec_start = rq_clock_task(rq);
|
||||
cgroup_account_cputime(curr, delta_exec);
|
||||
update_current_exec_runtime(curr, now, delta_exec);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -2305,7 +2305,7 @@ static int ptrace_stop(int exit_code, int why, unsigned long message,
|
||||
read_unlock(&tasklist_lock);
|
||||
cgroup_enter_frozen();
|
||||
preempt_enable_no_resched();
|
||||
freezable_schedule();
|
||||
schedule();
|
||||
cgroup_leave_frozen(true);
|
||||
|
||||
/*
|
||||
@ -2474,7 +2474,7 @@ static bool do_signal_stop(int signr)
|
||||
|
||||
/* Now we don't run again until woken by SIGCONT or SIGKILL */
|
||||
cgroup_enter_frozen();
|
||||
freezable_schedule();
|
||||
schedule();
|
||||
return true;
|
||||
} else {
|
||||
/*
|
||||
@ -2549,11 +2549,11 @@ static void do_freezer_trap(void)
|
||||
* immediately (if there is a non-fatal signal pending), and
|
||||
* put the task into sleep.
|
||||
*/
|
||||
__set_current_state(TASK_INTERRUPTIBLE);
|
||||
__set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE);
|
||||
clear_thread_flag(TIF_SIGPENDING);
|
||||
spin_unlock_irq(¤t->sighand->siglock);
|
||||
cgroup_enter_frozen();
|
||||
freezable_schedule();
|
||||
schedule();
|
||||
}
|
||||
|
||||
static int ptrace_signal(int signr, kernel_siginfo_t *info, enum pid_type type)
|
||||
@ -3601,9 +3601,9 @@ static int do_sigtimedwait(const sigset_t *which, kernel_siginfo_t *info,
|
||||
recalc_sigpending();
|
||||
spin_unlock_irq(&tsk->sighand->siglock);
|
||||
|
||||
__set_current_state(TASK_INTERRUPTIBLE);
|
||||
ret = freezable_schedule_hrtimeout_range(to, tsk->timer_slack_ns,
|
||||
HRTIMER_MODE_REL);
|
||||
__set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE);
|
||||
ret = schedule_hrtimeout_range(to, tsk->timer_slack_ns,
|
||||
HRTIMER_MODE_REL);
|
||||
spin_lock_irq(&tsk->sighand->siglock);
|
||||
__set_task_blocked(tsk, &tsk->real_blocked);
|
||||
sigemptyset(&tsk->real_blocked);
|
||||
|
@ -2037,11 +2037,11 @@ static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mod
|
||||
struct restart_block *restart;
|
||||
|
||||
do {
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE);
|
||||
hrtimer_sleeper_start_expires(t, mode);
|
||||
|
||||
if (likely(t->task))
|
||||
freezable_schedule();
|
||||
schedule();
|
||||
|
||||
hrtimer_cancel(&t->timer);
|
||||
mode = HRTIMER_MODE_ABS;
|
||||
|
18
kernel/umh.c
18
kernel/umh.c
@ -28,6 +28,7 @@
|
||||
#include <linux/async.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/initrd.h>
|
||||
#include <linux/freezer.h>
|
||||
|
||||
#include <trace/events/module.h>
|
||||
|
||||
@ -403,6 +404,7 @@ EXPORT_SYMBOL(call_usermodehelper_setup);
|
||||
*/
|
||||
int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
|
||||
{
|
||||
unsigned int state = TASK_UNINTERRUPTIBLE;
|
||||
DECLARE_COMPLETION_ONSTACK(done);
|
||||
int retval = 0;
|
||||
|
||||
@ -436,18 +438,22 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
|
||||
if (wait == UMH_NO_WAIT) /* task has freed sub_info */
|
||||
goto unlock;
|
||||
|
||||
if (wait & UMH_KILLABLE) {
|
||||
retval = wait_for_completion_killable(&done);
|
||||
if (!retval)
|
||||
goto wait_done;
|
||||
if (wait & UMH_KILLABLE)
|
||||
state |= TASK_KILLABLE;
|
||||
|
||||
if (wait & UMH_FREEZABLE)
|
||||
state |= TASK_FREEZABLE;
|
||||
|
||||
retval = wait_for_completion_state(&done, state);
|
||||
if (!retval)
|
||||
goto wait_done;
|
||||
|
||||
if (wait & UMH_KILLABLE) {
|
||||
/* umh_complete() will see NULL and free sub_info */
|
||||
if (xchg(&sub_info->complete, NULL))
|
||||
goto unlock;
|
||||
/* fallthrough, umh_complete() was already called */
|
||||
}
|
||||
|
||||
wait_for_completion(&done);
|
||||
wait_done:
|
||||
retval = sub_info->retval;
|
||||
out:
|
||||
|
@ -730,8 +730,8 @@ static void khugepaged_alloc_sleep(void)
|
||||
DEFINE_WAIT(wait);
|
||||
|
||||
add_wait_queue(&khugepaged_wait, &wait);
|
||||
freezable_schedule_timeout_interruptible(
|
||||
msecs_to_jiffies(khugepaged_alloc_sleep_millisecs));
|
||||
__set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE);
|
||||
schedule_timeout(msecs_to_jiffies(khugepaged_alloc_sleep_millisecs));
|
||||
remove_wait_queue(&khugepaged_wait, &wait);
|
||||
}
|
||||
|
||||
|
@ -269,7 +269,7 @@ EXPORT_SYMBOL_GPL(rpc_destroy_wait_queue);
|
||||
|
||||
static int rpc_wait_bit_killable(struct wait_bit_key *key, int mode)
|
||||
{
|
||||
freezable_schedule_unsafe();
|
||||
schedule();
|
||||
if (signal_pending_state(mode, current))
|
||||
return -ERESTARTSYS;
|
||||
return 0;
|
||||
@ -333,14 +333,12 @@ static int rpc_complete_task(struct rpc_task *task)
|
||||
* to enforce taking of the wq->lock and hence avoid races with
|
||||
* rpc_complete_task().
|
||||
*/
|
||||
int __rpc_wait_for_completion_task(struct rpc_task *task, wait_bit_action_f *action)
|
||||
int rpc_wait_for_completion_task(struct rpc_task *task)
|
||||
{
|
||||
if (action == NULL)
|
||||
action = rpc_wait_bit_killable;
|
||||
return out_of_line_wait_on_bit(&task->tk_runstate, RPC_TASK_ACTIVE,
|
||||
action, TASK_KILLABLE);
|
||||
rpc_wait_bit_killable, TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__rpc_wait_for_completion_task);
|
||||
EXPORT_SYMBOL_GPL(rpc_wait_for_completion_task);
|
||||
|
||||
/*
|
||||
* Make an RPC task runnable.
|
||||
@ -964,7 +962,7 @@ static void __rpc_execute(struct rpc_task *task)
|
||||
trace_rpc_task_sync_sleep(task, task->tk_action);
|
||||
status = out_of_line_wait_on_bit(&task->tk_runstate,
|
||||
RPC_TASK_QUEUED, rpc_wait_bit_killable,
|
||||
TASK_KILLABLE);
|
||||
TASK_KILLABLE|TASK_FREEZABLE);
|
||||
if (status < 0) {
|
||||
/*
|
||||
* When a sync task receives a signal, it exits with
|
||||
|
@ -2560,13 +2560,14 @@ static long unix_stream_data_wait(struct sock *sk, long timeo,
|
||||
struct sk_buff *last, unsigned int last_len,
|
||||
bool freezable)
|
||||
{
|
||||
unsigned int state = TASK_INTERRUPTIBLE | freezable * TASK_FREEZABLE;
|
||||
struct sk_buff *tail;
|
||||
DEFINE_WAIT(wait);
|
||||
|
||||
unix_state_lock(sk);
|
||||
|
||||
for (;;) {
|
||||
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
|
||||
prepare_to_wait(sk_sleep(sk), &wait, state);
|
||||
|
||||
tail = skb_peek_tail(&sk->sk_receive_queue);
|
||||
if (tail != last ||
|
||||
@ -2579,10 +2580,7 @@ static long unix_stream_data_wait(struct sock *sk, long timeo,
|
||||
|
||||
sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
|
||||
unix_state_unlock(sk);
|
||||
if (freezable)
|
||||
timeo = freezable_schedule_timeout(timeo);
|
||||
else
|
||||
timeo = schedule_timeout(timeo);
|
||||
timeo = schedule_timeout(timeo);
|
||||
unix_state_lock(sk);
|
||||
|
||||
if (sock_flag(sk, SOCK_DEAD))
|
||||
|
Loading…
Reference in New Issue
Block a user