Merge branch 'linus' into locking/kcsan, to pick up fixes
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
@@ -1101,13 +1101,11 @@ static void audit_log_feature_change(int which, u32 old_feature, u32 new_feature
|
||||
audit_log_end(ab);
|
||||
}
|
||||
|
||||
static int audit_set_feature(struct sk_buff *skb)
|
||||
static int audit_set_feature(struct audit_features *uaf)
|
||||
{
|
||||
struct audit_features *uaf;
|
||||
int i;
|
||||
|
||||
BUILD_BUG_ON(AUDIT_LAST_FEATURE + 1 > ARRAY_SIZE(audit_feature_names));
|
||||
uaf = nlmsg_data(nlmsg_hdr(skb));
|
||||
|
||||
/* if there is ever a version 2 we should handle that here */
|
||||
|
||||
@@ -1175,6 +1173,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
|
||||
{
|
||||
u32 seq;
|
||||
void *data;
|
||||
int data_len;
|
||||
int err;
|
||||
struct audit_buffer *ab;
|
||||
u16 msg_type = nlh->nlmsg_type;
|
||||
@@ -1188,6 +1187,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
|
||||
|
||||
seq = nlh->nlmsg_seq;
|
||||
data = nlmsg_data(nlh);
|
||||
data_len = nlmsg_len(nlh);
|
||||
|
||||
switch (msg_type) {
|
||||
case AUDIT_GET: {
|
||||
@@ -1211,7 +1211,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
|
||||
struct audit_status s;
|
||||
memset(&s, 0, sizeof(s));
|
||||
/* guard against past and future API changes */
|
||||
memcpy(&s, data, min_t(size_t, sizeof(s), nlmsg_len(nlh)));
|
||||
memcpy(&s, data, min_t(size_t, sizeof(s), data_len));
|
||||
if (s.mask & AUDIT_STATUS_ENABLED) {
|
||||
err = audit_set_enabled(s.enabled);
|
||||
if (err < 0)
|
||||
@@ -1315,7 +1315,9 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
|
||||
return err;
|
||||
break;
|
||||
case AUDIT_SET_FEATURE:
|
||||
err = audit_set_feature(skb);
|
||||
if (data_len < sizeof(struct audit_features))
|
||||
return -EINVAL;
|
||||
err = audit_set_feature(data);
|
||||
if (err)
|
||||
return err;
|
||||
break;
|
||||
@@ -1327,6 +1329,8 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
|
||||
|
||||
err = audit_filter(msg_type, AUDIT_FILTER_USER);
|
||||
if (err == 1) { /* match or error */
|
||||
char *str = data;
|
||||
|
||||
err = 0;
|
||||
if (msg_type == AUDIT_USER_TTY) {
|
||||
err = tty_audit_push();
|
||||
@@ -1334,26 +1338,24 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
|
||||
break;
|
||||
}
|
||||
audit_log_user_recv_msg(&ab, msg_type);
|
||||
if (msg_type != AUDIT_USER_TTY)
|
||||
if (msg_type != AUDIT_USER_TTY) {
|
||||
/* ensure NULL termination */
|
||||
str[data_len - 1] = '\0';
|
||||
audit_log_format(ab, " msg='%.*s'",
|
||||
AUDIT_MESSAGE_TEXT_MAX,
|
||||
(char *)data);
|
||||
else {
|
||||
int size;
|
||||
|
||||
str);
|
||||
} else {
|
||||
audit_log_format(ab, " data=");
|
||||
size = nlmsg_len(nlh);
|
||||
if (size > 0 &&
|
||||
((unsigned char *)data)[size - 1] == '\0')
|
||||
size--;
|
||||
audit_log_n_untrustedstring(ab, data, size);
|
||||
if (data_len > 0 && str[data_len - 1] == '\0')
|
||||
data_len--;
|
||||
audit_log_n_untrustedstring(ab, str, data_len);
|
||||
}
|
||||
audit_log_end(ab);
|
||||
}
|
||||
break;
|
||||
case AUDIT_ADD_RULE:
|
||||
case AUDIT_DEL_RULE:
|
||||
if (nlmsg_len(nlh) < sizeof(struct audit_rule_data))
|
||||
if (data_len < sizeof(struct audit_rule_data))
|
||||
return -EINVAL;
|
||||
if (audit_enabled == AUDIT_LOCKED) {
|
||||
audit_log_common_recv_msg(audit_context(), &ab,
|
||||
@@ -1365,7 +1367,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
|
||||
audit_log_end(ab);
|
||||
return -EPERM;
|
||||
}
|
||||
err = audit_rule_change(msg_type, seq, data, nlmsg_len(nlh));
|
||||
err = audit_rule_change(msg_type, seq, data, data_len);
|
||||
break;
|
||||
case AUDIT_LIST_RULES:
|
||||
err = audit_list_rules_send(skb, seq);
|
||||
@@ -1380,7 +1382,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
|
||||
case AUDIT_MAKE_EQUIV: {
|
||||
void *bufp = data;
|
||||
u32 sizes[2];
|
||||
size_t msglen = nlmsg_len(nlh);
|
||||
size_t msglen = data_len;
|
||||
char *old, *new;
|
||||
|
||||
err = -EINVAL;
|
||||
@@ -1456,7 +1458,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
|
||||
|
||||
memset(&s, 0, sizeof(s));
|
||||
/* guard against past and future API changes */
|
||||
memcpy(&s, data, min_t(size_t, sizeof(s), nlmsg_len(nlh)));
|
||||
memcpy(&s, data, min_t(size_t, sizeof(s), data_len));
|
||||
/* check if new data is valid */
|
||||
if ((s.enabled != 0 && s.enabled != 1) ||
|
||||
(s.log_passwd != 0 && s.log_passwd != 1))
|
||||
|
||||
@@ -456,6 +456,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
|
||||
bufp = data->buf;
|
||||
for (i = 0; i < data->field_count; i++) {
|
||||
struct audit_field *f = &entry->rule.fields[i];
|
||||
u32 f_val;
|
||||
|
||||
err = -EINVAL;
|
||||
|
||||
@@ -464,12 +465,12 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
|
||||
goto exit_free;
|
||||
|
||||
f->type = data->fields[i];
|
||||
f->val = data->values[i];
|
||||
f_val = data->values[i];
|
||||
|
||||
/* Support legacy tests for a valid loginuid */
|
||||
if ((f->type == AUDIT_LOGINUID) && (f->val == AUDIT_UID_UNSET)) {
|
||||
if ((f->type == AUDIT_LOGINUID) && (f_val == AUDIT_UID_UNSET)) {
|
||||
f->type = AUDIT_LOGINUID_SET;
|
||||
f->val = 0;
|
||||
f_val = 0;
|
||||
entry->rule.pflags |= AUDIT_LOGINUID_LEGACY;
|
||||
}
|
||||
|
||||
@@ -485,7 +486,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
|
||||
case AUDIT_SUID:
|
||||
case AUDIT_FSUID:
|
||||
case AUDIT_OBJ_UID:
|
||||
f->uid = make_kuid(current_user_ns(), f->val);
|
||||
f->uid = make_kuid(current_user_ns(), f_val);
|
||||
if (!uid_valid(f->uid))
|
||||
goto exit_free;
|
||||
break;
|
||||
@@ -494,11 +495,12 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
|
||||
case AUDIT_SGID:
|
||||
case AUDIT_FSGID:
|
||||
case AUDIT_OBJ_GID:
|
||||
f->gid = make_kgid(current_user_ns(), f->val);
|
||||
f->gid = make_kgid(current_user_ns(), f_val);
|
||||
if (!gid_valid(f->gid))
|
||||
goto exit_free;
|
||||
break;
|
||||
case AUDIT_ARCH:
|
||||
f->val = f_val;
|
||||
entry->rule.arch_f = f;
|
||||
break;
|
||||
case AUDIT_SUBJ_USER:
|
||||
@@ -511,11 +513,13 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
|
||||
case AUDIT_OBJ_TYPE:
|
||||
case AUDIT_OBJ_LEV_LOW:
|
||||
case AUDIT_OBJ_LEV_HIGH:
|
||||
str = audit_unpack_string(&bufp, &remain, f->val);
|
||||
if (IS_ERR(str))
|
||||
str = audit_unpack_string(&bufp, &remain, f_val);
|
||||
if (IS_ERR(str)) {
|
||||
err = PTR_ERR(str);
|
||||
goto exit_free;
|
||||
entry->rule.buflen += f->val;
|
||||
|
||||
}
|
||||
entry->rule.buflen += f_val;
|
||||
f->lsm_str = str;
|
||||
err = security_audit_rule_init(f->type, f->op, str,
|
||||
(void **)&f->lsm_rule);
|
||||
/* Keep currently invalid fields around in case they
|
||||
@@ -524,68 +528,71 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
|
||||
pr_warn("audit rule for LSM \'%s\' is invalid\n",
|
||||
str);
|
||||
err = 0;
|
||||
}
|
||||
if (err) {
|
||||
kfree(str);
|
||||
} else if (err)
|
||||
goto exit_free;
|
||||
} else
|
||||
f->lsm_str = str;
|
||||
break;
|
||||
case AUDIT_WATCH:
|
||||
str = audit_unpack_string(&bufp, &remain, f->val);
|
||||
if (IS_ERR(str))
|
||||
str = audit_unpack_string(&bufp, &remain, f_val);
|
||||
if (IS_ERR(str)) {
|
||||
err = PTR_ERR(str);
|
||||
goto exit_free;
|
||||
entry->rule.buflen += f->val;
|
||||
|
||||
err = audit_to_watch(&entry->rule, str, f->val, f->op);
|
||||
}
|
||||
err = audit_to_watch(&entry->rule, str, f_val, f->op);
|
||||
if (err) {
|
||||
kfree(str);
|
||||
goto exit_free;
|
||||
}
|
||||
entry->rule.buflen += f_val;
|
||||
break;
|
||||
case AUDIT_DIR:
|
||||
str = audit_unpack_string(&bufp, &remain, f->val);
|
||||
if (IS_ERR(str))
|
||||
str = audit_unpack_string(&bufp, &remain, f_val);
|
||||
if (IS_ERR(str)) {
|
||||
err = PTR_ERR(str);
|
||||
goto exit_free;
|
||||
entry->rule.buflen += f->val;
|
||||
|
||||
}
|
||||
err = audit_make_tree(&entry->rule, str, f->op);
|
||||
kfree(str);
|
||||
if (err)
|
||||
goto exit_free;
|
||||
entry->rule.buflen += f_val;
|
||||
break;
|
||||
case AUDIT_INODE:
|
||||
f->val = f_val;
|
||||
err = audit_to_inode(&entry->rule, f);
|
||||
if (err)
|
||||
goto exit_free;
|
||||
break;
|
||||
case AUDIT_FILTERKEY:
|
||||
if (entry->rule.filterkey || f->val > AUDIT_MAX_KEY_LEN)
|
||||
if (entry->rule.filterkey || f_val > AUDIT_MAX_KEY_LEN)
|
||||
goto exit_free;
|
||||
str = audit_unpack_string(&bufp, &remain, f->val);
|
||||
if (IS_ERR(str))
|
||||
goto exit_free;
|
||||
entry->rule.buflen += f->val;
|
||||
entry->rule.filterkey = str;
|
||||
break;
|
||||
case AUDIT_EXE:
|
||||
if (entry->rule.exe || f->val > PATH_MAX)
|
||||
goto exit_free;
|
||||
str = audit_unpack_string(&bufp, &remain, f->val);
|
||||
str = audit_unpack_string(&bufp, &remain, f_val);
|
||||
if (IS_ERR(str)) {
|
||||
err = PTR_ERR(str);
|
||||
goto exit_free;
|
||||
}
|
||||
entry->rule.buflen += f->val;
|
||||
|
||||
audit_mark = audit_alloc_mark(&entry->rule, str, f->val);
|
||||
entry->rule.buflen += f_val;
|
||||
entry->rule.filterkey = str;
|
||||
break;
|
||||
case AUDIT_EXE:
|
||||
if (entry->rule.exe || f_val > PATH_MAX)
|
||||
goto exit_free;
|
||||
str = audit_unpack_string(&bufp, &remain, f_val);
|
||||
if (IS_ERR(str)) {
|
||||
err = PTR_ERR(str);
|
||||
goto exit_free;
|
||||
}
|
||||
audit_mark = audit_alloc_mark(&entry->rule, str, f_val);
|
||||
if (IS_ERR(audit_mark)) {
|
||||
kfree(str);
|
||||
err = PTR_ERR(audit_mark);
|
||||
goto exit_free;
|
||||
}
|
||||
entry->rule.buflen += f_val;
|
||||
entry->rule.exe = audit_mark;
|
||||
break;
|
||||
default:
|
||||
f->val = f_val;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -471,6 +471,7 @@ static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos)
|
||||
*/
|
||||
p++;
|
||||
if (p >= end) {
|
||||
(*pos)++;
|
||||
return NULL;
|
||||
} else {
|
||||
*pos = *p;
|
||||
@@ -782,7 +783,7 @@ void cgroup1_release_agent(struct work_struct *work)
|
||||
|
||||
pathbuf = kmalloc(PATH_MAX, GFP_KERNEL);
|
||||
agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL);
|
||||
if (!pathbuf || !agentbuf)
|
||||
if (!pathbuf || !agentbuf || !strlen(agentbuf))
|
||||
goto out;
|
||||
|
||||
spin_lock_irq(&css_set_lock);
|
||||
|
||||
@@ -3542,21 +3542,21 @@ static int cpu_stat_show(struct seq_file *seq, void *v)
|
||||
static int cgroup_io_pressure_show(struct seq_file *seq, void *v)
|
||||
{
|
||||
struct cgroup *cgrp = seq_css(seq)->cgroup;
|
||||
struct psi_group *psi = cgroup_id(cgrp) == 1 ? &psi_system : &cgrp->psi;
|
||||
struct psi_group *psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi;
|
||||
|
||||
return psi_show(seq, psi, PSI_IO);
|
||||
}
|
||||
static int cgroup_memory_pressure_show(struct seq_file *seq, void *v)
|
||||
{
|
||||
struct cgroup *cgrp = seq_css(seq)->cgroup;
|
||||
struct psi_group *psi = cgroup_id(cgrp) == 1 ? &psi_system : &cgrp->psi;
|
||||
struct psi_group *psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi;
|
||||
|
||||
return psi_show(seq, psi, PSI_MEM);
|
||||
}
|
||||
static int cgroup_cpu_pressure_show(struct seq_file *seq, void *v)
|
||||
{
|
||||
struct cgroup *cgrp = seq_css(seq)->cgroup;
|
||||
struct psi_group *psi = cgroup_id(cgrp) == 1 ? &psi_system : &cgrp->psi;
|
||||
struct psi_group *psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi;
|
||||
|
||||
return psi_show(seq, psi, PSI_CPU);
|
||||
}
|
||||
@@ -4400,12 +4400,16 @@ static void css_task_iter_advance_css_set(struct css_task_iter *it)
|
||||
}
|
||||
} while (!css_set_populated(cset) && list_empty(&cset->dying_tasks));
|
||||
|
||||
if (!list_empty(&cset->tasks))
|
||||
if (!list_empty(&cset->tasks)) {
|
||||
it->task_pos = cset->tasks.next;
|
||||
else if (!list_empty(&cset->mg_tasks))
|
||||
it->cur_tasks_head = &cset->tasks;
|
||||
} else if (!list_empty(&cset->mg_tasks)) {
|
||||
it->task_pos = cset->mg_tasks.next;
|
||||
else
|
||||
it->cur_tasks_head = &cset->mg_tasks;
|
||||
} else {
|
||||
it->task_pos = cset->dying_tasks.next;
|
||||
it->cur_tasks_head = &cset->dying_tasks;
|
||||
}
|
||||
|
||||
it->tasks_head = &cset->tasks;
|
||||
it->mg_tasks_head = &cset->mg_tasks;
|
||||
@@ -4463,10 +4467,14 @@ repeat:
|
||||
else
|
||||
it->task_pos = it->task_pos->next;
|
||||
|
||||
if (it->task_pos == it->tasks_head)
|
||||
if (it->task_pos == it->tasks_head) {
|
||||
it->task_pos = it->mg_tasks_head->next;
|
||||
if (it->task_pos == it->mg_tasks_head)
|
||||
it->cur_tasks_head = it->mg_tasks_head;
|
||||
}
|
||||
if (it->task_pos == it->mg_tasks_head) {
|
||||
it->task_pos = it->dying_tasks_head->next;
|
||||
it->cur_tasks_head = it->dying_tasks_head;
|
||||
}
|
||||
if (it->task_pos == it->dying_tasks_head)
|
||||
css_task_iter_advance_css_set(it);
|
||||
} else {
|
||||
@@ -4485,11 +4493,12 @@ repeat:
|
||||
goto repeat;
|
||||
|
||||
/* and dying leaders w/o live member threads */
|
||||
if (!atomic_read(&task->signal->live))
|
||||
if (it->cur_tasks_head == it->dying_tasks_head &&
|
||||
!atomic_read(&task->signal->live))
|
||||
goto repeat;
|
||||
} else {
|
||||
/* skip all dying ones */
|
||||
if (task->flags & PF_EXITING)
|
||||
if (it->cur_tasks_head == it->dying_tasks_head)
|
||||
goto repeat;
|
||||
}
|
||||
}
|
||||
@@ -4595,6 +4604,9 @@ static void *cgroup_procs_next(struct seq_file *s, void *v, loff_t *pos)
|
||||
struct kernfs_open_file *of = s->private;
|
||||
struct css_task_iter *it = of->priv;
|
||||
|
||||
if (pos)
|
||||
(*pos)++;
|
||||
|
||||
return css_task_iter_next(it);
|
||||
}
|
||||
|
||||
@@ -4610,7 +4622,7 @@ static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos,
|
||||
* from position 0, so we can simply keep iterating on !0 *pos.
|
||||
*/
|
||||
if (!it) {
|
||||
if (WARN_ON_ONCE((*pos)++))
|
||||
if (WARN_ON_ONCE((*pos)))
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
it = kzalloc(sizeof(*it), GFP_KERNEL);
|
||||
@@ -4618,10 +4630,11 @@ static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos,
|
||||
return ERR_PTR(-ENOMEM);
|
||||
of->priv = it;
|
||||
css_task_iter_start(&cgrp->self, iter_flags, it);
|
||||
} else if (!(*pos)++) {
|
||||
} else if (!(*pos)) {
|
||||
css_task_iter_end(it);
|
||||
css_task_iter_start(&cgrp->self, iter_flags, it);
|
||||
}
|
||||
} else
|
||||
return it->cur_task;
|
||||
|
||||
return cgroup_procs_next(s, NULL, NULL);
|
||||
}
|
||||
@@ -6258,6 +6271,10 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
|
||||
return;
|
||||
}
|
||||
|
||||
/* Don't associate the sock with unrelated interrupted task's cgroup. */
|
||||
if (in_interrupt())
|
||||
return;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
while (true) {
|
||||
|
||||
@@ -619,8 +619,8 @@ static void forget_original_parent(struct task_struct *father,
|
||||
reaper = find_new_reaper(father, reaper);
|
||||
list_for_each_entry(p, &father->children, sibling) {
|
||||
for_each_thread(p, t) {
|
||||
t->real_parent = reaper;
|
||||
BUG_ON((!t->ptrace) != (t->parent == father));
|
||||
RCU_INIT_POINTER(t->real_parent, reaper);
|
||||
BUG_ON((!t->ptrace) != (rcu_access_pointer(t->parent) == father));
|
||||
if (likely(!t->ptrace))
|
||||
t->parent = t->real_parent;
|
||||
if (t->pdeath_signal)
|
||||
|
||||
@@ -1508,7 +1508,7 @@ static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
|
||||
return 0;
|
||||
}
|
||||
sig = kmem_cache_alloc(sighand_cachep, GFP_KERNEL);
|
||||
rcu_assign_pointer(tsk->sighand, sig);
|
||||
RCU_INIT_POINTER(tsk->sighand, sig);
|
||||
if (!sig)
|
||||
return -ENOMEM;
|
||||
|
||||
|
||||
@@ -385,9 +385,9 @@ static inline int hb_waiters_pending(struct futex_hash_bucket *hb)
|
||||
*/
|
||||
static struct futex_hash_bucket *hash_futex(union futex_key *key)
|
||||
{
|
||||
u32 hash = jhash2((u32*)&key->both.word,
|
||||
(sizeof(key->both.word)+sizeof(key->both.ptr))/4,
|
||||
u32 hash = jhash2((u32 *)key, offsetof(typeof(*key), both.offset) / 4,
|
||||
key->both.offset);
|
||||
|
||||
return &futex_queues[hash & (futex_hashsize - 1)];
|
||||
}
|
||||
|
||||
@@ -429,7 +429,7 @@ static void get_futex_key_refs(union futex_key *key)
|
||||
|
||||
switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
|
||||
case FUT_OFF_INODE:
|
||||
ihold(key->shared.inode); /* implies smp_mb(); (B) */
|
||||
smp_mb(); /* explicit smp_mb(); (B) */
|
||||
break;
|
||||
case FUT_OFF_MMSHARED:
|
||||
futex_get_mm(key); /* implies smp_mb(); (B) */
|
||||
@@ -463,7 +463,6 @@ static void drop_futex_key_refs(union futex_key *key)
|
||||
|
||||
switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
|
||||
case FUT_OFF_INODE:
|
||||
iput(key->shared.inode);
|
||||
break;
|
||||
case FUT_OFF_MMSHARED:
|
||||
mmdrop(key->private.mm);
|
||||
@@ -505,6 +504,46 @@ futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
|
||||
return timeout;
|
||||
}
|
||||
|
||||
/*
|
||||
* Generate a machine wide unique identifier for this inode.
|
||||
*
|
||||
* This relies on u64 not wrapping in the life-time of the machine; which with
|
||||
* 1ns resolution means almost 585 years.
|
||||
*
|
||||
* This further relies on the fact that a well formed program will not unmap
|
||||
* the file while it has a (shared) futex waiting on it. This mapping will have
|
||||
* a file reference which pins the mount and inode.
|
||||
*
|
||||
* If for some reason an inode gets evicted and read back in again, it will get
|
||||
* a new sequence number and will _NOT_ match, even though it is the exact same
|
||||
* file.
|
||||
*
|
||||
* It is important that match_futex() will never have a false-positive, esp.
|
||||
* for PI futexes that can mess up the state. The above argues that false-negatives
|
||||
* are only possible for malformed programs.
|
||||
*/
|
||||
static u64 get_inode_sequence_number(struct inode *inode)
|
||||
{
|
||||
static atomic64_t i_seq;
|
||||
u64 old;
|
||||
|
||||
/* Does the inode already have a sequence number? */
|
||||
old = atomic64_read(&inode->i_sequence);
|
||||
if (likely(old))
|
||||
return old;
|
||||
|
||||
for (;;) {
|
||||
u64 new = atomic64_add_return(1, &i_seq);
|
||||
if (WARN_ON_ONCE(!new))
|
||||
continue;
|
||||
|
||||
old = atomic64_cmpxchg_relaxed(&inode->i_sequence, 0, new);
|
||||
if (old)
|
||||
return old;
|
||||
return new;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* get_futex_key() - Get parameters which are the keys for a futex
|
||||
* @uaddr: virtual address of the futex
|
||||
@@ -517,9 +556,15 @@ futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
|
||||
*
|
||||
* The key words are stored in @key on success.
|
||||
*
|
||||
* For shared mappings, it's (page->index, file_inode(vma->vm_file),
|
||||
* offset_within_page). For private mappings, it's (uaddr, current->mm).
|
||||
* We can usually work out the index without swapping in the page.
|
||||
* For shared mappings (when @fshared), the key is:
|
||||
* ( inode->i_sequence, page->index, offset_within_page )
|
||||
* [ also see get_inode_sequence_number() ]
|
||||
*
|
||||
* For private mappings (or when !@fshared), the key is:
|
||||
* ( current->mm, address, 0 )
|
||||
*
|
||||
* This allows (cross process, where applicable) identification of the futex
|
||||
* without keeping the page pinned for the duration of the FUTEX_WAIT.
|
||||
*
|
||||
* lock_page() might sleep, the caller should not hold a spinlock.
|
||||
*/
|
||||
@@ -659,8 +704,6 @@ again:
|
||||
key->private.mm = mm;
|
||||
key->private.address = address;
|
||||
|
||||
get_futex_key_refs(key); /* implies smp_mb(); (B) */
|
||||
|
||||
} else {
|
||||
struct inode *inode;
|
||||
|
||||
@@ -692,40 +735,14 @@ again:
|
||||
goto again;
|
||||
}
|
||||
|
||||
/*
|
||||
* Take a reference unless it is about to be freed. Previously
|
||||
* this reference was taken by ihold under the page lock
|
||||
* pinning the inode in place so i_lock was unnecessary. The
|
||||
* only way for this check to fail is if the inode was
|
||||
* truncated in parallel which is almost certainly an
|
||||
* application bug. In such a case, just retry.
|
||||
*
|
||||
* We are not calling into get_futex_key_refs() in file-backed
|
||||
* cases, therefore a successful atomic_inc return below will
|
||||
* guarantee that get_futex_key() will still imply smp_mb(); (B).
|
||||
*/
|
||||
if (!atomic_inc_not_zero(&inode->i_count)) {
|
||||
rcu_read_unlock();
|
||||
put_page(page);
|
||||
|
||||
goto again;
|
||||
}
|
||||
|
||||
/* Should be impossible but lets be paranoid for now */
|
||||
if (WARN_ON_ONCE(inode->i_mapping != mapping)) {
|
||||
err = -EFAULT;
|
||||
rcu_read_unlock();
|
||||
iput(inode);
|
||||
|
||||
goto out;
|
||||
}
|
||||
|
||||
key->both.offset |= FUT_OFF_INODE; /* inode-based key */
|
||||
key->shared.inode = inode;
|
||||
key->shared.i_seq = get_inode_sequence_number(inode);
|
||||
key->shared.pgoff = basepage_index(tail);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
get_futex_key_refs(key); /* implies smp_mb(); (B) */
|
||||
|
||||
out:
|
||||
put_page(page);
|
||||
return err;
|
||||
|
||||
10
kernel/pid.c
10
kernel/pid.c
@@ -247,6 +247,16 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
|
||||
tmp = tmp->parent;
|
||||
}
|
||||
|
||||
/*
|
||||
* ENOMEM is not the most obvious choice especially for the case
|
||||
* where the child subreaper has already exited and the pid
|
||||
* namespace denies the creation of any new processes. But ENOMEM
|
||||
* is what we have exposed to userspace for a long time and it is
|
||||
* documented behavior for pid namespaces. So we can't easily
|
||||
* change it even if there were an error code better suited.
|
||||
*/
|
||||
retval = -ENOMEM;
|
||||
|
||||
if (unlikely(is_child_reaper(pid))) {
|
||||
if (pid_ns_prepare_proc(ns))
|
||||
goto out_free;
|
||||
|
||||
@@ -1681,7 +1681,7 @@ static unsigned long minimum_image_size(unsigned long saveable)
|
||||
* hibernation for allocations made while saving the image and for device
|
||||
* drivers, in case they need to allocate memory from their hibernation
|
||||
* callbacks (these two numbers are given by PAGES_FOR_IO (which is a rough
|
||||
* estimate) and reserverd_size divided by PAGE_SIZE (which is tunable through
|
||||
* estimate) and reserved_size divided by PAGE_SIZE (which is tunable through
|
||||
* /sys/power/reserved_size, respectively). To make this happen, we compute the
|
||||
* total number of available page frames and allocate at least
|
||||
*
|
||||
|
||||
@@ -8337,6 +8337,8 @@ static inline void update_sg_wakeup_stats(struct sched_domain *sd,
|
||||
|
||||
sgs->group_capacity = group->sgc->capacity;
|
||||
|
||||
sgs->group_weight = group->group_weight;
|
||||
|
||||
sgs->group_type = group_classify(sd->imbalance_pct, group, sgs);
|
||||
|
||||
/*
|
||||
|
||||
@@ -413,27 +413,32 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimi
|
||||
{
|
||||
struct sigqueue *q = NULL;
|
||||
struct user_struct *user;
|
||||
int sigpending;
|
||||
|
||||
/*
|
||||
* Protect access to @t credentials. This can go away when all
|
||||
* callers hold rcu read lock.
|
||||
*
|
||||
* NOTE! A pending signal will hold on to the user refcount,
|
||||
* and we get/put the refcount only when the sigpending count
|
||||
* changes from/to zero.
|
||||
*/
|
||||
rcu_read_lock();
|
||||
user = get_uid(__task_cred(t)->user);
|
||||
atomic_inc(&user->sigpending);
|
||||
user = __task_cred(t)->user;
|
||||
sigpending = atomic_inc_return(&user->sigpending);
|
||||
if (sigpending == 1)
|
||||
get_uid(user);
|
||||
rcu_read_unlock();
|
||||
|
||||
if (override_rlimit ||
|
||||
atomic_read(&user->sigpending) <=
|
||||
task_rlimit(t, RLIMIT_SIGPENDING)) {
|
||||
if (override_rlimit || likely(sigpending <= task_rlimit(t, RLIMIT_SIGPENDING))) {
|
||||
q = kmem_cache_alloc(sigqueue_cachep, flags);
|
||||
} else {
|
||||
print_dropped_signal(sig);
|
||||
}
|
||||
|
||||
if (unlikely(q == NULL)) {
|
||||
atomic_dec(&user->sigpending);
|
||||
free_uid(user);
|
||||
if (atomic_dec_and_test(&user->sigpending))
|
||||
free_uid(user);
|
||||
} else {
|
||||
INIT_LIST_HEAD(&q->list);
|
||||
q->flags = 0;
|
||||
@@ -447,8 +452,8 @@ static void __sigqueue_free(struct sigqueue *q)
|
||||
{
|
||||
if (q->flags & SIGQUEUE_PREALLOC)
|
||||
return;
|
||||
atomic_dec(&q->user->sigpending);
|
||||
free_uid(q->user);
|
||||
if (atomic_dec_and_test(&q->user->sigpending))
|
||||
free_uid(q->user);
|
||||
kmem_cache_free(sigqueue_cachep, q);
|
||||
}
|
||||
|
||||
|
||||
@@ -47,6 +47,7 @@
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/kprobes.h>
|
||||
#include <linux/user_namespace.h>
|
||||
#include <linux/time_namespace.h>
|
||||
#include <linux/binfmts.h>
|
||||
|
||||
#include <linux/sched.h>
|
||||
@@ -2546,6 +2547,7 @@ static int do_sysinfo(struct sysinfo *info)
|
||||
memset(info, 0, sizeof(struct sysinfo));
|
||||
|
||||
ktime_get_boottime_ts64(&tp);
|
||||
timens_add_boottime(&tp);
|
||||
info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
|
||||
|
||||
get_avenrun(info->loads, 0, SI_LOAD_SHIFT - FSHIFT);
|
||||
|
||||
@@ -143,8 +143,8 @@ if FTRACE
|
||||
|
||||
config BOOTTIME_TRACING
|
||||
bool "Boot-time Tracing support"
|
||||
depends on BOOT_CONFIG && TRACING
|
||||
default y
|
||||
depends on TRACING
|
||||
select BOOT_CONFIG
|
||||
help
|
||||
Enable developer to setup ftrace subsystem via supplemental
|
||||
kernel cmdline at boot time for debugging (tracing) driver
|
||||
|
||||
@@ -335,6 +335,7 @@ static void put_probe_ref(void)
|
||||
|
||||
static void blk_trace_cleanup(struct blk_trace *bt)
|
||||
{
|
||||
synchronize_rcu();
|
||||
blk_trace_free(bt);
|
||||
put_probe_ref();
|
||||
}
|
||||
@@ -629,8 +630,10 @@ static int compat_blk_trace_setup(struct request_queue *q, char *name,
|
||||
static int __blk_trace_startstop(struct request_queue *q, int start)
|
||||
{
|
||||
int ret;
|
||||
struct blk_trace *bt = q->blk_trace;
|
||||
struct blk_trace *bt;
|
||||
|
||||
bt = rcu_dereference_protected(q->blk_trace,
|
||||
lockdep_is_held(&q->blk_trace_mutex));
|
||||
if (bt == NULL)
|
||||
return -EINVAL;
|
||||
|
||||
@@ -740,8 +743,8 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
|
||||
void blk_trace_shutdown(struct request_queue *q)
|
||||
{
|
||||
mutex_lock(&q->blk_trace_mutex);
|
||||
|
||||
if (q->blk_trace) {
|
||||
if (rcu_dereference_protected(q->blk_trace,
|
||||
lockdep_is_held(&q->blk_trace_mutex))) {
|
||||
__blk_trace_startstop(q, 0);
|
||||
__blk_trace_remove(q);
|
||||
}
|
||||
@@ -752,8 +755,10 @@ void blk_trace_shutdown(struct request_queue *q)
|
||||
#ifdef CONFIG_BLK_CGROUP
|
||||
static u64 blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio)
|
||||
{
|
||||
struct blk_trace *bt = q->blk_trace;
|
||||
struct blk_trace *bt;
|
||||
|
||||
/* We don't use the 'bt' value here except as an optimization... */
|
||||
bt = rcu_dereference_protected(q->blk_trace, 1);
|
||||
if (!bt || !(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP))
|
||||
return 0;
|
||||
|
||||
@@ -796,10 +801,14 @@ blk_trace_request_get_cgid(struct request_queue *q, struct request *rq)
|
||||
static void blk_add_trace_rq(struct request *rq, int error,
|
||||
unsigned int nr_bytes, u32 what, u64 cgid)
|
||||
{
|
||||
struct blk_trace *bt = rq->q->blk_trace;
|
||||
struct blk_trace *bt;
|
||||
|
||||
if (likely(!bt))
|
||||
rcu_read_lock();
|
||||
bt = rcu_dereference(rq->q->blk_trace);
|
||||
if (likely(!bt)) {
|
||||
rcu_read_unlock();
|
||||
return;
|
||||
}
|
||||
|
||||
if (blk_rq_is_passthrough(rq))
|
||||
what |= BLK_TC_ACT(BLK_TC_PC);
|
||||
@@ -808,6 +817,7 @@ static void blk_add_trace_rq(struct request *rq, int error,
|
||||
|
||||
__blk_add_trace(bt, blk_rq_trace_sector(rq), nr_bytes, req_op(rq),
|
||||
rq->cmd_flags, what, error, 0, NULL, cgid);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static void blk_add_trace_rq_insert(void *ignore,
|
||||
@@ -853,14 +863,19 @@ static void blk_add_trace_rq_complete(void *ignore, struct request *rq,
|
||||
static void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
|
||||
u32 what, int error)
|
||||
{
|
||||
struct blk_trace *bt = q->blk_trace;
|
||||
struct blk_trace *bt;
|
||||
|
||||
if (likely(!bt))
|
||||
rcu_read_lock();
|
||||
bt = rcu_dereference(q->blk_trace);
|
||||
if (likely(!bt)) {
|
||||
rcu_read_unlock();
|
||||
return;
|
||||
}
|
||||
|
||||
__blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size,
|
||||
bio_op(bio), bio->bi_opf, what, error, 0, NULL,
|
||||
blk_trace_bio_get_cgid(q, bio));
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static void blk_add_trace_bio_bounce(void *ignore,
|
||||
@@ -905,11 +920,14 @@ static void blk_add_trace_getrq(void *ignore,
|
||||
if (bio)
|
||||
blk_add_trace_bio(q, bio, BLK_TA_GETRQ, 0);
|
||||
else {
|
||||
struct blk_trace *bt = q->blk_trace;
|
||||
struct blk_trace *bt;
|
||||
|
||||
rcu_read_lock();
|
||||
bt = rcu_dereference(q->blk_trace);
|
||||
if (bt)
|
||||
__blk_add_trace(bt, 0, 0, rw, 0, BLK_TA_GETRQ, 0, 0,
|
||||
NULL, 0);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -921,27 +939,35 @@ static void blk_add_trace_sleeprq(void *ignore,
|
||||
if (bio)
|
||||
blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ, 0);
|
||||
else {
|
||||
struct blk_trace *bt = q->blk_trace;
|
||||
struct blk_trace *bt;
|
||||
|
||||
rcu_read_lock();
|
||||
bt = rcu_dereference(q->blk_trace);
|
||||
if (bt)
|
||||
__blk_add_trace(bt, 0, 0, rw, 0, BLK_TA_SLEEPRQ,
|
||||
0, 0, NULL, 0);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
}
|
||||
|
||||
static void blk_add_trace_plug(void *ignore, struct request_queue *q)
|
||||
{
|
||||
struct blk_trace *bt = q->blk_trace;
|
||||
struct blk_trace *bt;
|
||||
|
||||
rcu_read_lock();
|
||||
bt = rcu_dereference(q->blk_trace);
|
||||
if (bt)
|
||||
__blk_add_trace(bt, 0, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL, 0);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static void blk_add_trace_unplug(void *ignore, struct request_queue *q,
|
||||
unsigned int depth, bool explicit)
|
||||
{
|
||||
struct blk_trace *bt = q->blk_trace;
|
||||
struct blk_trace *bt;
|
||||
|
||||
rcu_read_lock();
|
||||
bt = rcu_dereference(q->blk_trace);
|
||||
if (bt) {
|
||||
__be64 rpdu = cpu_to_be64(depth);
|
||||
u32 what;
|
||||
@@ -953,14 +979,17 @@ static void blk_add_trace_unplug(void *ignore, struct request_queue *q,
|
||||
|
||||
__blk_add_trace(bt, 0, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu, 0);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static void blk_add_trace_split(void *ignore,
|
||||
struct request_queue *q, struct bio *bio,
|
||||
unsigned int pdu)
|
||||
{
|
||||
struct blk_trace *bt = q->blk_trace;
|
||||
struct blk_trace *bt;
|
||||
|
||||
rcu_read_lock();
|
||||
bt = rcu_dereference(q->blk_trace);
|
||||
if (bt) {
|
||||
__be64 rpdu = cpu_to_be64(pdu);
|
||||
|
||||
@@ -969,6 +998,7 @@ static void blk_add_trace_split(void *ignore,
|
||||
BLK_TA_SPLIT, bio->bi_status, sizeof(rpdu),
|
||||
&rpdu, blk_trace_bio_get_cgid(q, bio));
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -988,11 +1018,15 @@ static void blk_add_trace_bio_remap(void *ignore,
|
||||
struct request_queue *q, struct bio *bio,
|
||||
dev_t dev, sector_t from)
|
||||
{
|
||||
struct blk_trace *bt = q->blk_trace;
|
||||
struct blk_trace *bt;
|
||||
struct blk_io_trace_remap r;
|
||||
|
||||
if (likely(!bt))
|
||||
rcu_read_lock();
|
||||
bt = rcu_dereference(q->blk_trace);
|
||||
if (likely(!bt)) {
|
||||
rcu_read_unlock();
|
||||
return;
|
||||
}
|
||||
|
||||
r.device_from = cpu_to_be32(dev);
|
||||
r.device_to = cpu_to_be32(bio_dev(bio));
|
||||
@@ -1001,6 +1035,7 @@ static void blk_add_trace_bio_remap(void *ignore,
|
||||
__blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size,
|
||||
bio_op(bio), bio->bi_opf, BLK_TA_REMAP, bio->bi_status,
|
||||
sizeof(r), &r, blk_trace_bio_get_cgid(q, bio));
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -1021,11 +1056,15 @@ static void blk_add_trace_rq_remap(void *ignore,
|
||||
struct request *rq, dev_t dev,
|
||||
sector_t from)
|
||||
{
|
||||
struct blk_trace *bt = q->blk_trace;
|
||||
struct blk_trace *bt;
|
||||
struct blk_io_trace_remap r;
|
||||
|
||||
if (likely(!bt))
|
||||
rcu_read_lock();
|
||||
bt = rcu_dereference(q->blk_trace);
|
||||
if (likely(!bt)) {
|
||||
rcu_read_unlock();
|
||||
return;
|
||||
}
|
||||
|
||||
r.device_from = cpu_to_be32(dev);
|
||||
r.device_to = cpu_to_be32(disk_devt(rq->rq_disk));
|
||||
@@ -1034,6 +1073,7 @@ static void blk_add_trace_rq_remap(void *ignore,
|
||||
__blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq),
|
||||
rq_data_dir(rq), 0, BLK_TA_REMAP, 0,
|
||||
sizeof(r), &r, blk_trace_request_get_cgid(q, rq));
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -1051,14 +1091,19 @@ void blk_add_driver_data(struct request_queue *q,
|
||||
struct request *rq,
|
||||
void *data, size_t len)
|
||||
{
|
||||
struct blk_trace *bt = q->blk_trace;
|
||||
struct blk_trace *bt;
|
||||
|
||||
if (likely(!bt))
|
||||
rcu_read_lock();
|
||||
bt = rcu_dereference(q->blk_trace);
|
||||
if (likely(!bt)) {
|
||||
rcu_read_unlock();
|
||||
return;
|
||||
}
|
||||
|
||||
__blk_add_trace(bt, blk_rq_trace_sector(rq), blk_rq_bytes(rq), 0, 0,
|
||||
BLK_TA_DRV_DATA, 0, len, data,
|
||||
blk_trace_request_get_cgid(q, rq));
|
||||
rcu_read_unlock();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_add_driver_data);
|
||||
|
||||
@@ -1597,6 +1642,7 @@ static int blk_trace_remove_queue(struct request_queue *q)
|
||||
return -EINVAL;
|
||||
|
||||
put_probe_ref();
|
||||
synchronize_rcu();
|
||||
blk_trace_free(bt);
|
||||
return 0;
|
||||
}
|
||||
@@ -1758,6 +1804,7 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
|
||||
struct hd_struct *p = dev_to_part(dev);
|
||||
struct request_queue *q;
|
||||
struct block_device *bdev;
|
||||
struct blk_trace *bt;
|
||||
ssize_t ret = -ENXIO;
|
||||
|
||||
bdev = bdget(part_devt(p));
|
||||
@@ -1770,21 +1817,23 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
|
||||
|
||||
mutex_lock(&q->blk_trace_mutex);
|
||||
|
||||
bt = rcu_dereference_protected(q->blk_trace,
|
||||
lockdep_is_held(&q->blk_trace_mutex));
|
||||
if (attr == &dev_attr_enable) {
|
||||
ret = sprintf(buf, "%u\n", !!q->blk_trace);
|
||||
ret = sprintf(buf, "%u\n", !!bt);
|
||||
goto out_unlock_bdev;
|
||||
}
|
||||
|
||||
if (q->blk_trace == NULL)
|
||||
if (bt == NULL)
|
||||
ret = sprintf(buf, "disabled\n");
|
||||
else if (attr == &dev_attr_act_mask)
|
||||
ret = blk_trace_mask2str(buf, q->blk_trace->act_mask);
|
||||
ret = blk_trace_mask2str(buf, bt->act_mask);
|
||||
else if (attr == &dev_attr_pid)
|
||||
ret = sprintf(buf, "%u\n", q->blk_trace->pid);
|
||||
ret = sprintf(buf, "%u\n", bt->pid);
|
||||
else if (attr == &dev_attr_start_lba)
|
||||
ret = sprintf(buf, "%llu\n", q->blk_trace->start_lba);
|
||||
ret = sprintf(buf, "%llu\n", bt->start_lba);
|
||||
else if (attr == &dev_attr_end_lba)
|
||||
ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba);
|
||||
ret = sprintf(buf, "%llu\n", bt->end_lba);
|
||||
|
||||
out_unlock_bdev:
|
||||
mutex_unlock(&q->blk_trace_mutex);
|
||||
@@ -1801,6 +1850,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
|
||||
struct block_device *bdev;
|
||||
struct request_queue *q;
|
||||
struct hd_struct *p;
|
||||
struct blk_trace *bt;
|
||||
u64 value;
|
||||
ssize_t ret = -EINVAL;
|
||||
|
||||
@@ -1831,8 +1881,10 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
|
||||
|
||||
mutex_lock(&q->blk_trace_mutex);
|
||||
|
||||
bt = rcu_dereference_protected(q->blk_trace,
|
||||
lockdep_is_held(&q->blk_trace_mutex));
|
||||
if (attr == &dev_attr_enable) {
|
||||
if (!!value == !!q->blk_trace) {
|
||||
if (!!value == !!bt) {
|
||||
ret = 0;
|
||||
goto out_unlock_bdev;
|
||||
}
|
||||
@@ -1844,18 +1896,21 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
if (q->blk_trace == NULL)
|
||||
if (bt == NULL) {
|
||||
ret = blk_trace_setup_queue(q, bdev);
|
||||
bt = rcu_dereference_protected(q->blk_trace,
|
||||
lockdep_is_held(&q->blk_trace_mutex));
|
||||
}
|
||||
|
||||
if (ret == 0) {
|
||||
if (attr == &dev_attr_act_mask)
|
||||
q->blk_trace->act_mask = value;
|
||||
bt->act_mask = value;
|
||||
else if (attr == &dev_attr_pid)
|
||||
q->blk_trace->pid = value;
|
||||
bt->pid = value;
|
||||
else if (attr == &dev_attr_start_lba)
|
||||
q->blk_trace->start_lba = value;
|
||||
bt->start_lba = value;
|
||||
else if (attr == &dev_attr_end_lba)
|
||||
q->blk_trace->end_lba = value;
|
||||
bt->end_lba = value;
|
||||
}
|
||||
|
||||
out_unlock_bdev:
|
||||
|
||||
@@ -1547,6 +1547,8 @@ static struct dyn_ftrace *lookup_rec(unsigned long start, unsigned long end)
|
||||
rec = bsearch(&key, pg->records, pg->index,
|
||||
sizeof(struct dyn_ftrace),
|
||||
ftrace_cmp_recs);
|
||||
if (rec)
|
||||
break;
|
||||
}
|
||||
return rec;
|
||||
}
|
||||
|
||||
@@ -111,11 +111,11 @@ static int __init test_gen_synth_cmd(void)
|
||||
/* Create some bogus values just for testing */
|
||||
|
||||
vals[0] = 777; /* next_pid_field */
|
||||
vals[1] = (u64)"hula hoops"; /* next_comm_field */
|
||||
vals[1] = (u64)(long)"hula hoops"; /* next_comm_field */
|
||||
vals[2] = 1000000; /* ts_ns */
|
||||
vals[3] = 1000; /* ts_ms */
|
||||
vals[4] = smp_processor_id(); /* cpu */
|
||||
vals[5] = (u64)"thneed"; /* my_string_field */
|
||||
vals[4] = raw_smp_processor_id(); /* cpu */
|
||||
vals[5] = (u64)(long)"thneed"; /* my_string_field */
|
||||
vals[6] = 598; /* my_int_field */
|
||||
|
||||
/* Now generate a gen_synth_test event */
|
||||
@@ -218,11 +218,11 @@ static int __init test_empty_synth_event(void)
|
||||
/* Create some bogus values just for testing */
|
||||
|
||||
vals[0] = 777; /* next_pid_field */
|
||||
vals[1] = (u64)"tiddlywinks"; /* next_comm_field */
|
||||
vals[1] = (u64)(long)"tiddlywinks"; /* next_comm_field */
|
||||
vals[2] = 1000000; /* ts_ns */
|
||||
vals[3] = 1000; /* ts_ms */
|
||||
vals[4] = smp_processor_id(); /* cpu */
|
||||
vals[5] = (u64)"thneed_2.0"; /* my_string_field */
|
||||
vals[4] = raw_smp_processor_id(); /* cpu */
|
||||
vals[5] = (u64)(long)"thneed_2.0"; /* my_string_field */
|
||||
vals[6] = 399; /* my_int_field */
|
||||
|
||||
/* Now trace an empty_synth_test event */
|
||||
@@ -290,11 +290,11 @@ static int __init test_create_synth_event(void)
|
||||
/* Create some bogus values just for testing */
|
||||
|
||||
vals[0] = 777; /* next_pid_field */
|
||||
vals[1] = (u64)"tiddlywinks"; /* next_comm_field */
|
||||
vals[1] = (u64)(long)"tiddlywinks"; /* next_comm_field */
|
||||
vals[2] = 1000000; /* ts_ns */
|
||||
vals[3] = 1000; /* ts_ms */
|
||||
vals[4] = smp_processor_id(); /* cpu */
|
||||
vals[5] = (u64)"thneed"; /* my_string_field */
|
||||
vals[4] = raw_smp_processor_id(); /* cpu */
|
||||
vals[5] = (u64)(long)"thneed"; /* my_string_field */
|
||||
vals[6] = 398; /* my_int_field */
|
||||
|
||||
/* Now generate a create_synth_test event */
|
||||
@@ -330,7 +330,7 @@ static int __init test_add_next_synth_val(void)
|
||||
goto out;
|
||||
|
||||
/* next_comm_field */
|
||||
ret = synth_event_add_next_val((u64)"slinky", &trace_state);
|
||||
ret = synth_event_add_next_val((u64)(long)"slinky", &trace_state);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
@@ -345,12 +345,12 @@ static int __init test_add_next_synth_val(void)
|
||||
goto out;
|
||||
|
||||
/* cpu */
|
||||
ret = synth_event_add_next_val(smp_processor_id(), &trace_state);
|
||||
ret = synth_event_add_next_val(raw_smp_processor_id(), &trace_state);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
/* my_string_field */
|
||||
ret = synth_event_add_next_val((u64)"thneed_2.01", &trace_state);
|
||||
ret = synth_event_add_next_val((u64)(long)"thneed_2.01", &trace_state);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
@@ -388,7 +388,7 @@ static int __init test_add_synth_val(void)
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
ret = synth_event_add_val("cpu", smp_processor_id(), &trace_state);
|
||||
ret = synth_event_add_val("cpu", raw_smp_processor_id(), &trace_state);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
@@ -396,12 +396,12 @@ static int __init test_add_synth_val(void)
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
ret = synth_event_add_val("next_comm_field", (u64)"silly putty",
|
||||
ret = synth_event_add_val("next_comm_field", (u64)(long)"silly putty",
|
||||
&trace_state);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
ret = synth_event_add_val("my_string_field", (u64)"thneed_9",
|
||||
ret = synth_event_add_val("my_string_field", (u64)(long)"thneed_9",
|
||||
&trace_state);
|
||||
if (ret)
|
||||
goto out;
|
||||
@@ -423,13 +423,13 @@ static int __init test_trace_synth_event(void)
|
||||
|
||||
/* Trace some bogus values just for testing */
|
||||
ret = synth_event_trace(create_synth_test, 7, /* number of values */
|
||||
444, /* next_pid_field */
|
||||
(u64)"clackers", /* next_comm_field */
|
||||
1000000, /* ts_ns */
|
||||
1000, /* ts_ms */
|
||||
smp_processor_id(), /* cpu */
|
||||
(u64)"Thneed", /* my_string_field */
|
||||
999); /* my_int_field */
|
||||
(u64)444, /* next_pid_field */
|
||||
(u64)(long)"clackers", /* next_comm_field */
|
||||
(u64)1000000, /* ts_ns */
|
||||
(u64)1000, /* ts_ms */
|
||||
(u64)raw_smp_processor_id(), /* cpu */
|
||||
(u64)(long)"Thneed", /* my_string_field */
|
||||
(u64)999); /* my_int_field */
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
@@ -1837,6 +1837,7 @@ static __init int init_trace_selftests(void)
|
||||
|
||||
pr_info("Running postponed tracer tests:\n");
|
||||
|
||||
tracing_selftest_running = true;
|
||||
list_for_each_entry_safe(p, n, &postponed_selftests, list) {
|
||||
/* This loop can take minutes when sanitizers are enabled, so
|
||||
* lets make sure we allow RCU processing.
|
||||
@@ -1859,6 +1860,7 @@ static __init int init_trace_selftests(void)
|
||||
list_del(&p->list);
|
||||
kfree(p);
|
||||
}
|
||||
tracing_selftest_running = false;
|
||||
|
||||
out:
|
||||
mutex_unlock(&trace_types_lock);
|
||||
|
||||
@@ -821,6 +821,29 @@ static const char *synth_field_fmt(char *type)
|
||||
return fmt;
|
||||
}
|
||||
|
||||
static void print_synth_event_num_val(struct trace_seq *s,
|
||||
char *print_fmt, char *name,
|
||||
int size, u64 val, char *space)
|
||||
{
|
||||
switch (size) {
|
||||
case 1:
|
||||
trace_seq_printf(s, print_fmt, name, (u8)val, space);
|
||||
break;
|
||||
|
||||
case 2:
|
||||
trace_seq_printf(s, print_fmt, name, (u16)val, space);
|
||||
break;
|
||||
|
||||
case 4:
|
||||
trace_seq_printf(s, print_fmt, name, (u32)val, space);
|
||||
break;
|
||||
|
||||
default:
|
||||
trace_seq_printf(s, print_fmt, name, val, space);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static enum print_line_t print_synth_event(struct trace_iterator *iter,
|
||||
int flags,
|
||||
struct trace_event *event)
|
||||
@@ -859,10 +882,13 @@ static enum print_line_t print_synth_event(struct trace_iterator *iter,
|
||||
} else {
|
||||
struct trace_print_flags __flags[] = {
|
||||
__def_gfpflag_names, {-1, NULL} };
|
||||
char *space = (i == se->n_fields - 1 ? "" : " ");
|
||||
|
||||
trace_seq_printf(s, print_fmt, se->fields[i]->name,
|
||||
entry->fields[n_u64],
|
||||
i == se->n_fields - 1 ? "" : " ");
|
||||
print_synth_event_num_val(s, print_fmt,
|
||||
se->fields[i]->name,
|
||||
se->fields[i]->size,
|
||||
entry->fields[n_u64],
|
||||
space);
|
||||
|
||||
if (strcmp(se->fields[i]->type, "gfp_t") == 0) {
|
||||
trace_seq_puts(s, " (");
|
||||
@@ -1805,6 +1831,8 @@ __synth_event_trace_start(struct trace_event_file *file,
|
||||
int entry_size, fields_size = 0;
|
||||
int ret = 0;
|
||||
|
||||
memset(trace_state, '\0', sizeof(*trace_state));
|
||||
|
||||
/*
|
||||
* Normal event tracing doesn't get called at all unless the
|
||||
* ENABLED bit is set (which attaches the probe thus allowing
|
||||
@@ -1885,6 +1913,11 @@ int synth_event_trace(struct trace_event_file *file, unsigned int n_vals, ...)
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (n_vals != state.event->n_fields) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
va_start(args, n_vals);
|
||||
for (i = 0, n_u64 = 0; i < state.event->n_fields; i++) {
|
||||
u64 val;
|
||||
@@ -1898,12 +1931,30 @@ int synth_event_trace(struct trace_event_file *file, unsigned int n_vals, ...)
|
||||
strscpy(str_field, str_val, STR_VAR_LEN_MAX);
|
||||
n_u64 += STR_VAR_LEN_MAX / sizeof(u64);
|
||||
} else {
|
||||
state.entry->fields[n_u64] = val;
|
||||
struct synth_field *field = state.event->fields[i];
|
||||
|
||||
switch (field->size) {
|
||||
case 1:
|
||||
*(u8 *)&state.entry->fields[n_u64] = (u8)val;
|
||||
break;
|
||||
|
||||
case 2:
|
||||
*(u16 *)&state.entry->fields[n_u64] = (u16)val;
|
||||
break;
|
||||
|
||||
case 4:
|
||||
*(u32 *)&state.entry->fields[n_u64] = (u32)val;
|
||||
break;
|
||||
|
||||
default:
|
||||
state.entry->fields[n_u64] = val;
|
||||
break;
|
||||
}
|
||||
n_u64++;
|
||||
}
|
||||
}
|
||||
va_end(args);
|
||||
|
||||
out:
|
||||
__synth_event_trace_end(&state);
|
||||
|
||||
return ret;
|
||||
@@ -1942,6 +1993,11 @@ int synth_event_trace_array(struct trace_event_file *file, u64 *vals,
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (n_vals != state.event->n_fields) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (i = 0, n_u64 = 0; i < state.event->n_fields; i++) {
|
||||
if (state.event->fields[i]->is_string) {
|
||||
char *str_val = (char *)(long)vals[i];
|
||||
@@ -1950,11 +2006,30 @@ int synth_event_trace_array(struct trace_event_file *file, u64 *vals,
|
||||
strscpy(str_field, str_val, STR_VAR_LEN_MAX);
|
||||
n_u64 += STR_VAR_LEN_MAX / sizeof(u64);
|
||||
} else {
|
||||
state.entry->fields[n_u64] = vals[i];
|
||||
struct synth_field *field = state.event->fields[i];
|
||||
u64 val = vals[i];
|
||||
|
||||
switch (field->size) {
|
||||
case 1:
|
||||
*(u8 *)&state.entry->fields[n_u64] = (u8)val;
|
||||
break;
|
||||
|
||||
case 2:
|
||||
*(u16 *)&state.entry->fields[n_u64] = (u16)val;
|
||||
break;
|
||||
|
||||
case 4:
|
||||
*(u32 *)&state.entry->fields[n_u64] = (u32)val;
|
||||
break;
|
||||
|
||||
default:
|
||||
state.entry->fields[n_u64] = val;
|
||||
break;
|
||||
}
|
||||
n_u64++;
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
__synth_event_trace_end(&state);
|
||||
|
||||
return ret;
|
||||
@@ -1997,8 +2072,6 @@ int synth_event_trace_start(struct trace_event_file *file,
|
||||
if (!trace_state)
|
||||
return -EINVAL;
|
||||
|
||||
memset(trace_state, '\0', sizeof(*trace_state));
|
||||
|
||||
ret = __synth_event_trace_start(file, trace_state);
|
||||
if (ret == -ENOENT)
|
||||
ret = 0; /* just disabled, not really an error */
|
||||
@@ -2069,8 +2142,25 @@ static int __synth_event_add_val(const char *field_name, u64 val,
|
||||
|
||||
str_field = (char *)&entry->fields[field->offset];
|
||||
strscpy(str_field, str_val, STR_VAR_LEN_MAX);
|
||||
} else
|
||||
entry->fields[field->offset] = val;
|
||||
} else {
|
||||
switch (field->size) {
|
||||
case 1:
|
||||
*(u8 *)&trace_state->entry->fields[field->offset] = (u8)val;
|
||||
break;
|
||||
|
||||
case 2:
|
||||
*(u16 *)&trace_state->entry->fields[field->offset] = (u16)val;
|
||||
break;
|
||||
|
||||
case 4:
|
||||
*(u32 *)&trace_state->entry->fields[field->offset] = (u32)val;
|
||||
break;
|
||||
|
||||
default:
|
||||
trace_state->entry->fields[field->offset] = val;
|
||||
break;
|
||||
}
|
||||
}
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -1411,14 +1411,16 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
|
||||
return;
|
||||
rcu_read_lock();
|
||||
retry:
|
||||
if (req_cpu == WORK_CPU_UNBOUND)
|
||||
cpu = wq_select_unbound_cpu(raw_smp_processor_id());
|
||||
|
||||
/* pwq which will be used unless @work is executing elsewhere */
|
||||
if (!(wq->flags & WQ_UNBOUND))
|
||||
pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
|
||||
else
|
||||
if (wq->flags & WQ_UNBOUND) {
|
||||
if (req_cpu == WORK_CPU_UNBOUND)
|
||||
cpu = wq_select_unbound_cpu(raw_smp_processor_id());
|
||||
pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
|
||||
} else {
|
||||
if (req_cpu == WORK_CPU_UNBOUND)
|
||||
cpu = raw_smp_processor_id();
|
||||
pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
|
||||
}
|
||||
|
||||
/*
|
||||
* If @work was previously on a different pool, it might still be
|
||||
|
||||
Reference in New Issue
Block a user