mirror of
https://github.com/torvalds/linux.git
synced 2024-11-25 13:41:51 +00:00
for-linus-20181102
-----BEGIN PGP SIGNATURE----- iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAlvchGgQHGF4Ym9lQGtl cm5lbC5kawAKCRD301j7KXHgpj/1D/4kEQx4ncnFoZk8QshHV1L++rH3BbcLjQDd Wbh9ZSIQdI/gHTzS6bE7x3YfcbpMWPMO3+jFawdfRiFTEjlF8vQ+mnJ+Btb3z4D6 mGEeFGVhHExlp2a0x/Ma8YWVNlMB7BE8Tq73bZEVMY+9lbpmDW/vp7Sfa87LBDKQ ZmY+My+VdHN7qLtQ7t3W/HtpbU+kcXMMd3ICjK4i+ofXy6mynk4+oQ2jwyXc5L86 UCJCsTsSRr3CgbnkW/uprHo0XHk8i7O/4C3oR+x4pAIxCCa9g+vmw0EO9fvi/2iQ qe8jKdm7Y09xu/TiPBa7iz45tdh0cNMJKo3OezmSF9Np+r69KL5C/U4GRPKN3Iwm keoqn14ScABkYMSe4ys1AdEgKD6bNUaW3r/lJxTH2oUR23mjnCLp7c4WD/G+MlbB CzoakQyCHTZmDFLr2Kc8bkjmpil2T2UFfmLIDAu30LWIYeSGpiIO/V+g1foJMF2f 06ERltNvgX1BJjoh4NSWySLEf1ZtkUU60NeATRol6gwhnIyLrHsgfm6OEhqlW/7x Xc1BWyzX7K6c3Dskk/u5aSRyXOyRC9KkMt3/2XexeDNHkte9yMH0IgSvopPBuER8 +iPvPjNp7ychTKZB3zpSnlqGgePTjbufIEBtO3OyUmDZKjUqxahtxkQfmPhoclu+ XdR4ArcqNg== =0zM4 -----END PGP SIGNATURE----- Merge tag 'for-linus-20181102' of git://git.kernel.dk/linux-block Pull block layer fixes from Jens Axboe: "The biggest part of this pull request is the revert of the blkcg cleanup series. It had one fix earlier for a stacked device issue, but another one was reported. Rather than play whack-a-mole with this, revert the entire series and try again for the next kernel release. Apart from that, only small fixes/changes. Summary: - Indentation fixup for mtip32xx (Colin Ian King) - The blkcg cleanup series revert (Dennis Zhou) - Two NVMe fixes. One fixing a regression in the nvme request initialization in this merge window, causing nvme-fc to not work. The other is a suspend/resume p2p resource issue (James, Keith) - Fix sg discard merge, allowing us to merge in cases where we didn't before (Jianchao Wang) - Call rq_qos_exit() after the queue is frozen, preventing a hang (Ming) - Fix brd queue setup, fixing an oops if we fail setting up all devices (Ming)" * tag 'for-linus-20181102' of git://git.kernel.dk/linux-block: nvme-pci: fix conflicting p2p resource adds nvme-fc: fix request private initialization blkcg: revert blkcg cleanups series block: brd: associate with queue until adding disk block: call rq_qos_exit() after queue is frozen mtip32xx: clean an indentation issue, remove extraneous tabs block: fix the DISCARD request merge
This commit is contained in:
commit
5f21585384
@ -1879,10 +1879,8 @@ following two functions.
|
||||
|
||||
wbc_init_bio(@wbc, @bio)
|
||||
Should be called for each bio carrying writeback data and
|
||||
associates the bio with the inode's owner cgroup and the
|
||||
corresponding request queue. This must be called after
|
||||
a queue (device) has been associated with the bio and
|
||||
before submission.
|
||||
associates the bio with the inode's owner cgroup. Can be
|
||||
called anytime between bio allocation and submission.
|
||||
|
||||
wbc_account_io(@wbc, @page, @bytes)
|
||||
Should be called for each data segment being written out.
|
||||
@ -1901,7 +1899,7 @@ the configuration, the bio may be executed at a lower priority and if
|
||||
the writeback session is holding shared resources, e.g. a journal
|
||||
entry, may lead to priority inversion. There is no one easy solution
|
||||
for the problem. Filesystems can try to work around specific problem
|
||||
cases by skipping wbc_init_bio() or using bio_associate_create_blkg()
|
||||
cases by skipping wbc_init_bio() or using bio_associate_blkcg()
|
||||
directly.
|
||||
|
||||
|
||||
|
@ -642,7 +642,7 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
|
||||
uint64_t serial_nr;
|
||||
|
||||
rcu_read_lock();
|
||||
serial_nr = __bio_blkcg(bio)->css.serial_nr;
|
||||
serial_nr = bio_blkcg(bio)->css.serial_nr;
|
||||
|
||||
/*
|
||||
* Check whether blkcg has changed. The condition may trigger
|
||||
@ -651,7 +651,7 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
|
||||
if (unlikely(!bfqd) || likely(bic->blkcg_serial_nr == serial_nr))
|
||||
goto out;
|
||||
|
||||
bfqg = __bfq_bic_change_cgroup(bfqd, bic, __bio_blkcg(bio));
|
||||
bfqg = __bfq_bic_change_cgroup(bfqd, bic, bio_blkcg(bio));
|
||||
/*
|
||||
* Update blkg_path for bfq_log_* functions. We cache this
|
||||
* path, and update it here, for the following
|
||||
|
@ -4384,7 +4384,7 @@ static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
bfqg = bfq_find_set_group(bfqd, __bio_blkcg(bio));
|
||||
bfqg = bfq_find_set_group(bfqd, bio_blkcg(bio));
|
||||
if (!bfqg) {
|
||||
bfqq = &bfqd->oom_bfqq;
|
||||
goto out;
|
||||
|
206
block/bio.c
206
block/bio.c
@ -609,9 +609,7 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
|
||||
bio->bi_iter = bio_src->bi_iter;
|
||||
bio->bi_io_vec = bio_src->bi_io_vec;
|
||||
|
||||
bio_clone_blkg_association(bio, bio_src);
|
||||
|
||||
blkcg_bio_issue_init(bio);
|
||||
bio_clone_blkcg_association(bio, bio_src);
|
||||
}
|
||||
EXPORT_SYMBOL(__bio_clone_fast);
|
||||
|
||||
@ -1956,153 +1954,71 @@ EXPORT_SYMBOL(bioset_init_from_src);
|
||||
|
||||
#ifdef CONFIG_BLK_CGROUP
|
||||
|
||||
#ifdef CONFIG_MEMCG
|
||||
/**
|
||||
* bio_associate_blkg - associate a bio with the a blkg
|
||||
* bio_associate_blkcg_from_page - associate a bio with the page's blkcg
|
||||
* @bio: target bio
|
||||
* @page: the page to lookup the blkcg from
|
||||
*
|
||||
* Associate @bio with the blkcg from @page's owning memcg. This works like
|
||||
* every other associate function wrt references.
|
||||
*/
|
||||
int bio_associate_blkcg_from_page(struct bio *bio, struct page *page)
|
||||
{
|
||||
struct cgroup_subsys_state *blkcg_css;
|
||||
|
||||
if (unlikely(bio->bi_css))
|
||||
return -EBUSY;
|
||||
if (!page->mem_cgroup)
|
||||
return 0;
|
||||
blkcg_css = cgroup_get_e_css(page->mem_cgroup->css.cgroup,
|
||||
&io_cgrp_subsys);
|
||||
bio->bi_css = blkcg_css;
|
||||
return 0;
|
||||
}
|
||||
#endif /* CONFIG_MEMCG */
|
||||
|
||||
/**
|
||||
* bio_associate_blkcg - associate a bio with the specified blkcg
|
||||
* @bio: target bio
|
||||
* @blkcg_css: css of the blkcg to associate
|
||||
*
|
||||
* Associate @bio with the blkcg specified by @blkcg_css. Block layer will
|
||||
* treat @bio as if it were issued by a task which belongs to the blkcg.
|
||||
*
|
||||
* This function takes an extra reference of @blkcg_css which will be put
|
||||
* when @bio is released. The caller must own @bio and is responsible for
|
||||
* synchronizing calls to this function.
|
||||
*/
|
||||
int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css)
|
||||
{
|
||||
if (unlikely(bio->bi_css))
|
||||
return -EBUSY;
|
||||
css_get(blkcg_css);
|
||||
bio->bi_css = blkcg_css;
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bio_associate_blkcg);
|
||||
|
||||
/**
|
||||
* bio_associate_blkg - associate a bio with the specified blkg
|
||||
* @bio: target bio
|
||||
* @blkg: the blkg to associate
|
||||
*
|
||||
* This tries to associate @bio with the specified blkg. Association failure
|
||||
* is handled by walking up the blkg tree. Therefore, the blkg associated can
|
||||
* be anything between @blkg and the root_blkg. This situation only happens
|
||||
* when a cgroup is dying and then the remaining bios will spill to the closest
|
||||
* alive blkg.
|
||||
*
|
||||
* A reference will be taken on the @blkg and will be released when @bio is
|
||||
* freed.
|
||||
* Associate @bio with the blkg specified by @blkg. This is the queue specific
|
||||
* blkcg information associated with the @bio, a reference will be taken on the
|
||||
* @blkg and will be freed when the bio is freed.
|
||||
*/
|
||||
int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg)
|
||||
{
|
||||
if (unlikely(bio->bi_blkg))
|
||||
return -EBUSY;
|
||||
bio->bi_blkg = blkg_tryget_closest(blkg);
|
||||
if (!blkg_try_get(blkg))
|
||||
return -ENODEV;
|
||||
bio->bi_blkg = blkg;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* __bio_associate_blkg_from_css - internal blkg association function
|
||||
*
|
||||
* This in the core association function that all association paths rely on.
|
||||
* A blkg reference is taken which is released upon freeing of the bio.
|
||||
*/
|
||||
static int __bio_associate_blkg_from_css(struct bio *bio,
|
||||
struct cgroup_subsys_state *css)
|
||||
{
|
||||
struct request_queue *q = bio->bi_disk->queue;
|
||||
struct blkcg_gq *blkg;
|
||||
int ret;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
if (!css || !css->parent)
|
||||
blkg = q->root_blkg;
|
||||
else
|
||||
blkg = blkg_lookup_create(css_to_blkcg(css), q);
|
||||
|
||||
ret = bio_associate_blkg(bio, blkg);
|
||||
|
||||
rcu_read_unlock();
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* bio_associate_blkg_from_css - associate a bio with a specified css
|
||||
* @bio: target bio
|
||||
* @css: target css
|
||||
*
|
||||
* Associate @bio with the blkg found by combining the css's blkg and the
|
||||
* request_queue of the @bio. This falls back to the queue's root_blkg if
|
||||
* the association fails with the css.
|
||||
*/
|
||||
int bio_associate_blkg_from_css(struct bio *bio,
|
||||
struct cgroup_subsys_state *css)
|
||||
{
|
||||
if (unlikely(bio->bi_blkg))
|
||||
return -EBUSY;
|
||||
return __bio_associate_blkg_from_css(bio, css);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bio_associate_blkg_from_css);
|
||||
|
||||
#ifdef CONFIG_MEMCG
|
||||
/**
|
||||
* bio_associate_blkg_from_page - associate a bio with the page's blkg
|
||||
* @bio: target bio
|
||||
* @page: the page to lookup the blkcg from
|
||||
*
|
||||
* Associate @bio with the blkg from @page's owning memcg and the respective
|
||||
* request_queue. If cgroup_e_css returns NULL, fall back to the queue's
|
||||
* root_blkg.
|
||||
*
|
||||
* Note: this must be called after bio has an associated device.
|
||||
*/
|
||||
int bio_associate_blkg_from_page(struct bio *bio, struct page *page)
|
||||
{
|
||||
struct cgroup_subsys_state *css;
|
||||
int ret;
|
||||
|
||||
if (unlikely(bio->bi_blkg))
|
||||
return -EBUSY;
|
||||
if (!page->mem_cgroup)
|
||||
return 0;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
css = cgroup_e_css(page->mem_cgroup->css.cgroup, &io_cgrp_subsys);
|
||||
|
||||
ret = __bio_associate_blkg_from_css(bio, css);
|
||||
|
||||
rcu_read_unlock();
|
||||
return ret;
|
||||
}
|
||||
#endif /* CONFIG_MEMCG */
|
||||
|
||||
/**
|
||||
* bio_associate_create_blkg - associate a bio with a blkg from q
|
||||
* @q: request_queue where bio is going
|
||||
* @bio: target bio
|
||||
*
|
||||
* Associate @bio with the blkg found from the bio's css and the request_queue.
|
||||
* If one is not found, bio_lookup_blkg creates the blkg. This falls back to
|
||||
* the queue's root_blkg if association fails.
|
||||
*/
|
||||
int bio_associate_create_blkg(struct request_queue *q, struct bio *bio)
|
||||
{
|
||||
struct cgroup_subsys_state *css;
|
||||
int ret = 0;
|
||||
|
||||
/* someone has already associated this bio with a blkg */
|
||||
if (bio->bi_blkg)
|
||||
return ret;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
css = blkcg_css();
|
||||
|
||||
ret = __bio_associate_blkg_from_css(bio, css);
|
||||
|
||||
rcu_read_unlock();
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* bio_reassociate_blkg - reassociate a bio with a blkg from q
|
||||
* @q: request_queue where bio is going
|
||||
* @bio: target bio
|
||||
*
|
||||
* When submitting a bio, multiple recursive calls to make_request() may occur.
|
||||
* This causes the initial associate done in blkcg_bio_issue_check() to be
|
||||
* incorrect and reference the prior request_queue. This performs reassociation
|
||||
* when this situation happens.
|
||||
*/
|
||||
int bio_reassociate_blkg(struct request_queue *q, struct bio *bio)
|
||||
{
|
||||
if (bio->bi_blkg) {
|
||||
blkg_put(bio->bi_blkg);
|
||||
bio->bi_blkg = NULL;
|
||||
}
|
||||
|
||||
return bio_associate_create_blkg(q, bio);
|
||||
}
|
||||
|
||||
/**
|
||||
* bio_disassociate_task - undo bio_associate_current()
|
||||
* @bio: target bio
|
||||
@ -2113,6 +2029,10 @@ void bio_disassociate_task(struct bio *bio)
|
||||
put_io_context(bio->bi_ioc);
|
||||
bio->bi_ioc = NULL;
|
||||
}
|
||||
if (bio->bi_css) {
|
||||
css_put(bio->bi_css);
|
||||
bio->bi_css = NULL;
|
||||
}
|
||||
if (bio->bi_blkg) {
|
||||
blkg_put(bio->bi_blkg);
|
||||
bio->bi_blkg = NULL;
|
||||
@ -2120,16 +2040,16 @@ void bio_disassociate_task(struct bio *bio)
|
||||
}
|
||||
|
||||
/**
|
||||
* bio_clone_blkg_association - clone blkg association from src to dst bio
|
||||
* bio_clone_blkcg_association - clone blkcg association from src to dst bio
|
||||
* @dst: destination bio
|
||||
* @src: source bio
|
||||
*/
|
||||
void bio_clone_blkg_association(struct bio *dst, struct bio *src)
|
||||
void bio_clone_blkcg_association(struct bio *dst, struct bio *src)
|
||||
{
|
||||
if (src->bi_blkg)
|
||||
bio_associate_blkg(dst, src->bi_blkg);
|
||||
if (src->bi_css)
|
||||
WARN_ON(bio_associate_blkcg(dst, src->bi_css));
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bio_clone_blkg_association);
|
||||
EXPORT_SYMBOL_GPL(bio_clone_blkcg_association);
|
||||
#endif /* CONFIG_BLK_CGROUP */
|
||||
|
||||
static void __init biovec_init_slabs(void)
|
||||
|
@ -84,37 +84,6 @@ static void blkg_free(struct blkcg_gq *blkg)
|
||||
kfree(blkg);
|
||||
}
|
||||
|
||||
static void __blkg_release(struct rcu_head *rcu)
|
||||
{
|
||||
struct blkcg_gq *blkg = container_of(rcu, struct blkcg_gq, rcu_head);
|
||||
|
||||
percpu_ref_exit(&blkg->refcnt);
|
||||
|
||||
/* release the blkcg and parent blkg refs this blkg has been holding */
|
||||
css_put(&blkg->blkcg->css);
|
||||
if (blkg->parent)
|
||||
blkg_put(blkg->parent);
|
||||
|
||||
wb_congested_put(blkg->wb_congested);
|
||||
|
||||
blkg_free(blkg);
|
||||
}
|
||||
|
||||
/*
|
||||
* A group is RCU protected, but having an rcu lock does not mean that one
|
||||
* can access all the fields of blkg and assume these are valid. For
|
||||
* example, don't try to follow throtl_data and request queue links.
|
||||
*
|
||||
* Having a reference to blkg under an rcu allows accesses to only values
|
||||
* local to groups like group stats and group rate limits.
|
||||
*/
|
||||
static void blkg_release(struct percpu_ref *ref)
|
||||
{
|
||||
struct blkcg_gq *blkg = container_of(ref, struct blkcg_gq, refcnt);
|
||||
|
||||
call_rcu(&blkg->rcu_head, __blkg_release);
|
||||
}
|
||||
|
||||
/**
|
||||
* blkg_alloc - allocate a blkg
|
||||
* @blkcg: block cgroup the new blkg is associated with
|
||||
@ -141,6 +110,7 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q,
|
||||
blkg->q = q;
|
||||
INIT_LIST_HEAD(&blkg->q_node);
|
||||
blkg->blkcg = blkcg;
|
||||
atomic_set(&blkg->refcnt, 1);
|
||||
|
||||
/* root blkg uses @q->root_rl, init rl only for !root blkgs */
|
||||
if (blkcg != &blkcg_root) {
|
||||
@ -247,11 +217,6 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
|
||||
blkg_get(blkg->parent);
|
||||
}
|
||||
|
||||
ret = percpu_ref_init(&blkg->refcnt, blkg_release, 0,
|
||||
GFP_NOWAIT | __GFP_NOWARN);
|
||||
if (ret)
|
||||
goto err_cancel_ref;
|
||||
|
||||
/* invoke per-policy init */
|
||||
for (i = 0; i < BLKCG_MAX_POLS; i++) {
|
||||
struct blkcg_policy *pol = blkcg_policy[i];
|
||||
@ -284,8 +249,6 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
|
||||
blkg_put(blkg);
|
||||
return ERR_PTR(ret);
|
||||
|
||||
err_cancel_ref:
|
||||
percpu_ref_exit(&blkg->refcnt);
|
||||
err_put_congested:
|
||||
wb_congested_put(wb_congested);
|
||||
err_put_css:
|
||||
@ -296,7 +259,7 @@ err_free_blkg:
|
||||
}
|
||||
|
||||
/**
|
||||
* __blkg_lookup_create - lookup blkg, try to create one if not there
|
||||
* blkg_lookup_create - lookup blkg, try to create one if not there
|
||||
* @blkcg: blkcg of interest
|
||||
* @q: request_queue of interest
|
||||
*
|
||||
@ -305,11 +268,12 @@ err_free_blkg:
|
||||
* that all non-root blkg's have access to the parent blkg. This function
|
||||
* should be called under RCU read lock and @q->queue_lock.
|
||||
*
|
||||
* Returns the blkg or the closest blkg if blkg_create fails as it walks
|
||||
* down from root.
|
||||
* Returns pointer to the looked up or created blkg on success, ERR_PTR()
|
||||
* value on error. If @q is dead, returns ERR_PTR(-EINVAL). If @q is not
|
||||
* dead and bypassing, returns ERR_PTR(-EBUSY).
|
||||
*/
|
||||
struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
|
||||
struct request_queue *q)
|
||||
struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
|
||||
struct request_queue *q)
|
||||
{
|
||||
struct blkcg_gq *blkg;
|
||||
|
||||
@ -321,7 +285,7 @@ struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
|
||||
* we shouldn't allow anything to go through for a bypassing queue.
|
||||
*/
|
||||
if (unlikely(blk_queue_bypass(q)))
|
||||
return q->root_blkg;
|
||||
return ERR_PTR(blk_queue_dying(q) ? -ENODEV : -EBUSY);
|
||||
|
||||
blkg = __blkg_lookup(blkcg, q, true);
|
||||
if (blkg)
|
||||
@ -329,58 +293,23 @@ struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
|
||||
|
||||
/*
|
||||
* Create blkgs walking down from blkcg_root to @blkcg, so that all
|
||||
* non-root blkgs have access to their parents. Returns the closest
|
||||
* blkg to the intended blkg should blkg_create() fail.
|
||||
* non-root blkgs have access to their parents.
|
||||
*/
|
||||
while (true) {
|
||||
struct blkcg *pos = blkcg;
|
||||
struct blkcg *parent = blkcg_parent(blkcg);
|
||||
struct blkcg_gq *ret_blkg = q->root_blkg;
|
||||
|
||||
while (parent) {
|
||||
blkg = __blkg_lookup(parent, q, false);
|
||||
if (blkg) {
|
||||
/* remember closest blkg */
|
||||
ret_blkg = blkg;
|
||||
break;
|
||||
}
|
||||
while (parent && !__blkg_lookup(parent, q, false)) {
|
||||
pos = parent;
|
||||
parent = blkcg_parent(parent);
|
||||
}
|
||||
|
||||
blkg = blkg_create(pos, q, NULL);
|
||||
if (IS_ERR(blkg))
|
||||
return ret_blkg;
|
||||
if (pos == blkcg)
|
||||
if (pos == blkcg || IS_ERR(blkg))
|
||||
return blkg;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* blkg_lookup_create - find or create a blkg
|
||||
* @blkcg: target block cgroup
|
||||
* @q: target request_queue
|
||||
*
|
||||
* This looks up or creates the blkg representing the unique pair
|
||||
* of the blkcg and the request_queue.
|
||||
*/
|
||||
struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
|
||||
struct request_queue *q)
|
||||
{
|
||||
struct blkcg_gq *blkg = blkg_lookup(blkcg, q);
|
||||
unsigned long flags;
|
||||
|
||||
if (unlikely(!blkg)) {
|
||||
spin_lock_irqsave(q->queue_lock, flags);
|
||||
|
||||
blkg = __blkg_lookup_create(blkcg, q);
|
||||
|
||||
spin_unlock_irqrestore(q->queue_lock, flags);
|
||||
}
|
||||
|
||||
return blkg;
|
||||
}
|
||||
|
||||
static void blkg_destroy(struct blkcg_gq *blkg)
|
||||
{
|
||||
struct blkcg *blkcg = blkg->blkcg;
|
||||
@ -424,7 +353,7 @@ static void blkg_destroy(struct blkcg_gq *blkg)
|
||||
* Put the reference taken at the time of creation so that when all
|
||||
* queues are gone, group can be destroyed.
|
||||
*/
|
||||
percpu_ref_kill(&blkg->refcnt);
|
||||
blkg_put(blkg);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -451,6 +380,29 @@ static void blkg_destroy_all(struct request_queue *q)
|
||||
q->root_rl.blkg = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* A group is RCU protected, but having an rcu lock does not mean that one
|
||||
* can access all the fields of blkg and assume these are valid. For
|
||||
* example, don't try to follow throtl_data and request queue links.
|
||||
*
|
||||
* Having a reference to blkg under an rcu allows accesses to only values
|
||||
* local to groups like group stats and group rate limits.
|
||||
*/
|
||||
void __blkg_release_rcu(struct rcu_head *rcu_head)
|
||||
{
|
||||
struct blkcg_gq *blkg = container_of(rcu_head, struct blkcg_gq, rcu_head);
|
||||
|
||||
/* release the blkcg and parent blkg refs this blkg has been holding */
|
||||
css_put(&blkg->blkcg->css);
|
||||
if (blkg->parent)
|
||||
blkg_put(blkg->parent);
|
||||
|
||||
wb_congested_put(blkg->wb_congested);
|
||||
|
||||
blkg_free(blkg);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__blkg_release_rcu);
|
||||
|
||||
/*
|
||||
* The next function used by blk_queue_for_each_rl(). It's a bit tricky
|
||||
* because the root blkg uses @q->root_rl instead of its own rl.
|
||||
@ -1796,7 +1748,8 @@ void blkcg_maybe_throttle_current(void)
|
||||
blkg = blkg_lookup(blkcg, q);
|
||||
if (!blkg)
|
||||
goto out;
|
||||
if (!blkg_tryget(blkg))
|
||||
blkg = blkg_try_get(blkg);
|
||||
if (!blkg)
|
||||
goto out;
|
||||
rcu_read_unlock();
|
||||
|
||||
|
@ -785,6 +785,9 @@ void blk_cleanup_queue(struct request_queue *q)
|
||||
* prevent that q->request_fn() gets invoked after draining finished.
|
||||
*/
|
||||
blk_freeze_queue(q);
|
||||
|
||||
rq_qos_exit(q);
|
||||
|
||||
spin_lock_irq(lock);
|
||||
queue_flag_set(QUEUE_FLAG_DEAD, q);
|
||||
spin_unlock_irq(lock);
|
||||
@ -2432,7 +2435,6 @@ blk_qc_t generic_make_request(struct bio *bio)
|
||||
if (q)
|
||||
blk_queue_exit(q);
|
||||
q = bio->bi_disk->queue;
|
||||
bio_reassociate_blkg(q, bio);
|
||||
flags = 0;
|
||||
if (bio->bi_opf & REQ_NOWAIT)
|
||||
flags = BLK_MQ_REQ_NOWAIT;
|
||||
|
@ -482,12 +482,34 @@ static void blkcg_iolatency_throttle(struct rq_qos *rqos, struct bio *bio,
|
||||
spinlock_t *lock)
|
||||
{
|
||||
struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos);
|
||||
struct blkcg_gq *blkg = bio->bi_blkg;
|
||||
struct blkcg *blkcg;
|
||||
struct blkcg_gq *blkg;
|
||||
struct request_queue *q = rqos->q;
|
||||
bool issue_as_root = bio_issue_as_root_blkg(bio);
|
||||
|
||||
if (!blk_iolatency_enabled(blkiolat))
|
||||
return;
|
||||
|
||||
rcu_read_lock();
|
||||
blkcg = bio_blkcg(bio);
|
||||
bio_associate_blkcg(bio, &blkcg->css);
|
||||
blkg = blkg_lookup(blkcg, q);
|
||||
if (unlikely(!blkg)) {
|
||||
if (!lock)
|
||||
spin_lock_irq(q->queue_lock);
|
||||
blkg = blkg_lookup_create(blkcg, q);
|
||||
if (IS_ERR(blkg))
|
||||
blkg = NULL;
|
||||
if (!lock)
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
}
|
||||
if (!blkg)
|
||||
goto out;
|
||||
|
||||
bio_issue_init(&bio->bi_issue, bio_sectors(bio));
|
||||
bio_associate_blkg(bio, blkg);
|
||||
out:
|
||||
rcu_read_unlock();
|
||||
while (blkg && blkg->parent) {
|
||||
struct iolatency_grp *iolat = blkg_to_lat(blkg);
|
||||
if (!iolat) {
|
||||
@ -708,7 +730,7 @@ static void blkiolatency_timer_fn(struct timer_list *t)
|
||||
* We could be exiting, don't access the pd unless we have a
|
||||
* ref on the blkg.
|
||||
*/
|
||||
if (!blkg_tryget(blkg))
|
||||
if (!blkg_try_get(blkg))
|
||||
continue;
|
||||
|
||||
iolat = blkg_to_lat(blkg);
|
||||
|
@ -714,6 +714,31 @@ static void blk_account_io_merge(struct request *req)
|
||||
part_stat_unlock();
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Two cases of handling DISCARD merge:
|
||||
* If max_discard_segments > 1, the driver takes every bio
|
||||
* as a range and send them to controller together. The ranges
|
||||
* needn't to be contiguous.
|
||||
* Otherwise, the bios/requests will be handled as same as
|
||||
* others which should be contiguous.
|
||||
*/
|
||||
static inline bool blk_discard_mergable(struct request *req)
|
||||
{
|
||||
if (req_op(req) == REQ_OP_DISCARD &&
|
||||
queue_max_discard_segments(req->q) > 1)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
enum elv_merge blk_try_req_merge(struct request *req, struct request *next)
|
||||
{
|
||||
if (blk_discard_mergable(req))
|
||||
return ELEVATOR_DISCARD_MERGE;
|
||||
else if (blk_rq_pos(req) + blk_rq_sectors(req) == blk_rq_pos(next))
|
||||
return ELEVATOR_BACK_MERGE;
|
||||
|
||||
return ELEVATOR_NO_MERGE;
|
||||
}
|
||||
|
||||
/*
|
||||
* For non-mq, this has to be called with the request spinlock acquired.
|
||||
@ -731,12 +756,6 @@ static struct request *attempt_merge(struct request_queue *q,
|
||||
if (req_op(req) != req_op(next))
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* not contiguous
|
||||
*/
|
||||
if (blk_rq_pos(req) + blk_rq_sectors(req) != blk_rq_pos(next))
|
||||
return NULL;
|
||||
|
||||
if (rq_data_dir(req) != rq_data_dir(next)
|
||||
|| req->rq_disk != next->rq_disk
|
||||
|| req_no_special_merge(next))
|
||||
@ -760,11 +779,19 @@ static struct request *attempt_merge(struct request_queue *q,
|
||||
* counts here. Handle DISCARDs separately, as they
|
||||
* have separate settings.
|
||||
*/
|
||||
if (req_op(req) == REQ_OP_DISCARD) {
|
||||
|
||||
switch (blk_try_req_merge(req, next)) {
|
||||
case ELEVATOR_DISCARD_MERGE:
|
||||
if (!req_attempt_discard_merge(q, req, next))
|
||||
return NULL;
|
||||
} else if (!ll_merge_requests_fn(q, req, next))
|
||||
break;
|
||||
case ELEVATOR_BACK_MERGE:
|
||||
if (!ll_merge_requests_fn(q, req, next))
|
||||
return NULL;
|
||||
break;
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* If failfast settings disagree or any of the two is already
|
||||
@ -888,8 +915,7 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
|
||||
|
||||
enum elv_merge blk_try_merge(struct request *rq, struct bio *bio)
|
||||
{
|
||||
if (req_op(rq) == REQ_OP_DISCARD &&
|
||||
queue_max_discard_segments(rq->q) > 1)
|
||||
if (blk_discard_mergable(rq))
|
||||
return ELEVATOR_DISCARD_MERGE;
|
||||
else if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_iter.bi_sector)
|
||||
return ELEVATOR_BACK_MERGE;
|
||||
|
@ -1007,8 +1007,6 @@ void blk_unregister_queue(struct gendisk *disk)
|
||||
kobject_del(&q->kobj);
|
||||
blk_trace_remove_sysfs(disk_to_dev(disk));
|
||||
|
||||
rq_qos_exit(q);
|
||||
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
if (q->request_fn || (q->mq_ops && q->elevator))
|
||||
elv_unregister_queue(q);
|
||||
|
@ -2115,11 +2115,21 @@ static inline void throtl_update_latency_buckets(struct throtl_data *td)
|
||||
}
|
||||
#endif
|
||||
|
||||
static void blk_throtl_assoc_bio(struct throtl_grp *tg, struct bio *bio)
|
||||
{
|
||||
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
|
||||
/* fallback to root_blkg if we fail to get a blkg ref */
|
||||
if (bio->bi_css && (bio_associate_blkg(bio, tg_to_blkg(tg)) == -ENODEV))
|
||||
bio_associate_blkg(bio, bio->bi_disk->queue->root_blkg);
|
||||
bio_issue_init(&bio->bi_issue, bio_sectors(bio));
|
||||
#endif
|
||||
}
|
||||
|
||||
bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
|
||||
struct bio *bio)
|
||||
{
|
||||
struct throtl_qnode *qn = NULL;
|
||||
struct throtl_grp *tg = blkg_to_tg(blkg);
|
||||
struct throtl_grp *tg = blkg_to_tg(blkg ?: q->root_blkg);
|
||||
struct throtl_service_queue *sq;
|
||||
bool rw = bio_data_dir(bio);
|
||||
bool throttled = false;
|
||||
@ -2138,6 +2148,7 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
|
||||
if (unlikely(blk_queue_bypass(q)))
|
||||
goto out_unlock;
|
||||
|
||||
blk_throtl_assoc_bio(tg, bio);
|
||||
blk_throtl_update_idletime(tg);
|
||||
|
||||
sq = &tg->service_queue;
|
||||
|
@ -276,9 +276,7 @@ static struct bio *bounce_clone_bio(struct bio *bio_src, gfp_t gfp_mask,
|
||||
}
|
||||
}
|
||||
|
||||
bio_clone_blkg_association(bio, bio_src);
|
||||
|
||||
blkcg_bio_issue_init(bio);
|
||||
bio_clone_blkcg_association(bio, bio_src);
|
||||
|
||||
return bio;
|
||||
}
|
||||
|
@ -3759,7 +3759,7 @@ static void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio)
|
||||
uint64_t serial_nr;
|
||||
|
||||
rcu_read_lock();
|
||||
serial_nr = __bio_blkcg(bio)->css.serial_nr;
|
||||
serial_nr = bio_blkcg(bio)->css.serial_nr;
|
||||
rcu_read_unlock();
|
||||
|
||||
/*
|
||||
@ -3824,7 +3824,7 @@ cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct cfq_io_cq *cic,
|
||||
struct cfq_group *cfqg;
|
||||
|
||||
rcu_read_lock();
|
||||
cfqg = cfq_lookup_cfqg(cfqd, __bio_blkcg(bio));
|
||||
cfqg = cfq_lookup_cfqg(cfqd, bio_blkcg(bio));
|
||||
if (!cfqg) {
|
||||
cfqq = &cfqd->oom_cfqq;
|
||||
goto out;
|
||||
|
@ -396,15 +396,14 @@ static struct brd_device *brd_alloc(int i)
|
||||
disk->first_minor = i * max_part;
|
||||
disk->fops = &brd_fops;
|
||||
disk->private_data = brd;
|
||||
disk->queue = brd->brd_queue;
|
||||
disk->flags = GENHD_FL_EXT_DEVT;
|
||||
sprintf(disk->disk_name, "ram%d", i);
|
||||
set_capacity(disk, rd_size * 2);
|
||||
disk->queue->backing_dev_info->capabilities |= BDI_CAP_SYNCHRONOUS_IO;
|
||||
brd->brd_queue->backing_dev_info->capabilities |= BDI_CAP_SYNCHRONOUS_IO;
|
||||
|
||||
/* Tell the block layer that this is not a rotational device */
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue);
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, brd->brd_queue);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, brd->brd_queue);
|
||||
|
||||
return brd;
|
||||
|
||||
@ -436,6 +435,7 @@ static struct brd_device *brd_init_one(int i, bool *new)
|
||||
|
||||
brd = brd_alloc(i);
|
||||
if (brd) {
|
||||
brd->brd_disk->queue = brd->brd_queue;
|
||||
add_disk(brd->brd_disk);
|
||||
list_add_tail(&brd->brd_list, &brd_devices);
|
||||
}
|
||||
@ -503,8 +503,14 @@ static int __init brd_init(void)
|
||||
|
||||
/* point of no return */
|
||||
|
||||
list_for_each_entry(brd, &brd_devices, brd_list)
|
||||
list_for_each_entry(brd, &brd_devices, brd_list) {
|
||||
/*
|
||||
* associate with queue just before adding disk for
|
||||
* avoiding to mess up failure path
|
||||
*/
|
||||
brd->brd_disk->queue = brd->brd_queue;
|
||||
add_disk(brd->brd_disk);
|
||||
}
|
||||
|
||||
blk_register_region(MKDEV(RAMDISK_MAJOR, 0), 1UL << MINORBITS,
|
||||
THIS_MODULE, brd_probe, NULL, NULL);
|
||||
|
@ -77,7 +77,6 @@
|
||||
#include <linux/falloc.h>
|
||||
#include <linux/uio.h>
|
||||
#include <linux/ioprio.h>
|
||||
#include <linux/blk-cgroup.h>
|
||||
|
||||
#include "loop.h"
|
||||
|
||||
@ -1760,8 +1759,8 @@ static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx,
|
||||
|
||||
/* always use the first bio's css */
|
||||
#ifdef CONFIG_BLK_CGROUP
|
||||
if (cmd->use_aio && rq->bio && rq->bio->bi_blkg) {
|
||||
cmd->css = &bio_blkcg(rq->bio)->css;
|
||||
if (cmd->use_aio && rq->bio && rq->bio->bi_css) {
|
||||
cmd->css = rq->bio->bi_css;
|
||||
css_get(cmd->css);
|
||||
} else
|
||||
#endif
|
||||
|
@ -1942,8 +1942,8 @@ static int exec_drive_taskfile(struct driver_data *dd,
|
||||
dev_warn(&dd->pdev->dev,
|
||||
"data movement but "
|
||||
"sect_count is 0\n");
|
||||
err = -EINVAL;
|
||||
goto abort;
|
||||
err = -EINVAL;
|
||||
goto abort;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -542,7 +542,7 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio)
|
||||
!discard_bio)
|
||||
continue;
|
||||
bio_chain(discard_bio, bio);
|
||||
bio_clone_blkg_association(discard_bio, bio);
|
||||
bio_clone_blkcg_association(discard_bio, bio);
|
||||
if (mddev->gendisk)
|
||||
trace_block_bio_remap(bdev_get_queue(rdev->bdev),
|
||||
discard_bio, disk_devt(mddev->gendisk),
|
||||
|
@ -1704,7 +1704,6 @@ __nvme_fc_init_request(struct nvme_fc_ctrl *ctrl,
|
||||
op->fcp_req.rspaddr = &op->rsp_iu;
|
||||
op->fcp_req.rsplen = sizeof(op->rsp_iu);
|
||||
op->fcp_req.done = nvme_fc_fcpio_done;
|
||||
op->fcp_req.private = &op->fcp_req.first_sgl[SG_CHUNK_SIZE];
|
||||
op->ctrl = ctrl;
|
||||
op->queue = queue;
|
||||
op->rq = rq;
|
||||
@ -1752,6 +1751,7 @@ nvme_fc_init_request(struct blk_mq_tag_set *set, struct request *rq,
|
||||
if (res)
|
||||
return res;
|
||||
op->op.fcp_req.first_sgl = &op->sgl[0];
|
||||
op->op.fcp_req.private = &op->priv[0];
|
||||
return res;
|
||||
}
|
||||
|
||||
|
@ -1663,6 +1663,9 @@ static void nvme_map_cmb(struct nvme_dev *dev)
|
||||
struct pci_dev *pdev = to_pci_dev(dev->dev);
|
||||
int bar;
|
||||
|
||||
if (dev->cmb_size)
|
||||
return;
|
||||
|
||||
dev->cmbsz = readl(dev->bar + NVME_REG_CMBSZ);
|
||||
if (!dev->cmbsz)
|
||||
return;
|
||||
@ -2147,7 +2150,6 @@ static void nvme_pci_disable(struct nvme_dev *dev)
|
||||
{
|
||||
struct pci_dev *pdev = to_pci_dev(dev->dev);
|
||||
|
||||
nvme_release_cmb(dev);
|
||||
pci_free_irq_vectors(pdev);
|
||||
|
||||
if (pci_is_enabled(pdev)) {
|
||||
@ -2595,6 +2597,7 @@ static void nvme_remove(struct pci_dev *pdev)
|
||||
nvme_stop_ctrl(&dev->ctrl);
|
||||
nvme_remove_namespaces(&dev->ctrl);
|
||||
nvme_dev_disable(dev, true);
|
||||
nvme_release_cmb(dev);
|
||||
nvme_free_host_mem(dev);
|
||||
nvme_dev_remove_admin(dev);
|
||||
nvme_free_queues(dev, 0);
|
||||
|
10
fs/buffer.c
10
fs/buffer.c
@ -3060,6 +3060,11 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
|
||||
*/
|
||||
bio = bio_alloc(GFP_NOIO, 1);
|
||||
|
||||
if (wbc) {
|
||||
wbc_init_bio(wbc, bio);
|
||||
wbc_account_io(wbc, bh->b_page, bh->b_size);
|
||||
}
|
||||
|
||||
bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
|
||||
bio_set_dev(bio, bh->b_bdev);
|
||||
bio->bi_write_hint = write_hint;
|
||||
@ -3079,11 +3084,6 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
|
||||
op_flags |= REQ_PRIO;
|
||||
bio_set_op_attrs(bio, op, op_flags);
|
||||
|
||||
if (wbc) {
|
||||
wbc_init_bio(wbc, bio);
|
||||
wbc_account_io(wbc, bh->b_page, bh->b_size);
|
||||
}
|
||||
|
||||
submit_bio(bio);
|
||||
return 0;
|
||||
}
|
||||
|
@ -374,13 +374,13 @@ static int io_submit_init_bio(struct ext4_io_submit *io,
|
||||
bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
|
||||
if (!bio)
|
||||
return -ENOMEM;
|
||||
wbc_init_bio(io->io_wbc, bio);
|
||||
bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
|
||||
bio_set_dev(bio, bh->b_bdev);
|
||||
bio->bi_end_io = ext4_end_bio;
|
||||
bio->bi_private = ext4_get_io_end(io->io_end);
|
||||
io->io_bio = bio;
|
||||
io->io_next_block = bh->b_blocknr;
|
||||
wbc_init_bio(io->io_wbc, bio);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -503,31 +503,23 @@ do { \
|
||||
disk_devt((bio)->bi_disk)
|
||||
|
||||
#if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP)
|
||||
int bio_associate_blkg_from_page(struct bio *bio, struct page *page);
|
||||
int bio_associate_blkcg_from_page(struct bio *bio, struct page *page);
|
||||
#else
|
||||
static inline int bio_associate_blkg_from_page(struct bio *bio,
|
||||
struct page *page) { return 0; }
|
||||
static inline int bio_associate_blkcg_from_page(struct bio *bio,
|
||||
struct page *page) { return 0; }
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_BLK_CGROUP
|
||||
int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css);
|
||||
int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg);
|
||||
int bio_associate_blkg_from_css(struct bio *bio,
|
||||
struct cgroup_subsys_state *css);
|
||||
int bio_associate_create_blkg(struct request_queue *q, struct bio *bio);
|
||||
int bio_reassociate_blkg(struct request_queue *q, struct bio *bio);
|
||||
void bio_disassociate_task(struct bio *bio);
|
||||
void bio_clone_blkg_association(struct bio *dst, struct bio *src);
|
||||
void bio_clone_blkcg_association(struct bio *dst, struct bio *src);
|
||||
#else /* CONFIG_BLK_CGROUP */
|
||||
static inline int bio_associate_blkg_from_css(struct bio *bio,
|
||||
struct cgroup_subsys_state *css)
|
||||
{ return 0; }
|
||||
static inline int bio_associate_create_blkg(struct request_queue *q,
|
||||
struct bio *bio) { return 0; }
|
||||
static inline int bio_reassociate_blkg(struct request_queue *q, struct bio *bio)
|
||||
{ return 0; }
|
||||
static inline int bio_associate_blkcg(struct bio *bio,
|
||||
struct cgroup_subsys_state *blkcg_css) { return 0; }
|
||||
static inline void bio_disassociate_task(struct bio *bio) { }
|
||||
static inline void bio_clone_blkg_association(struct bio *dst,
|
||||
struct bio *src) { }
|
||||
static inline void bio_clone_blkcg_association(struct bio *dst,
|
||||
struct bio *src) { }
|
||||
#endif /* CONFIG_BLK_CGROUP */
|
||||
|
||||
#ifdef CONFIG_HIGHMEM
|
||||
|
@ -126,7 +126,7 @@ struct blkcg_gq {
|
||||
struct request_list rl;
|
||||
|
||||
/* reference count */
|
||||
struct percpu_ref refcnt;
|
||||
atomic_t refcnt;
|
||||
|
||||
/* is this blkg online? protected by both blkcg and q locks */
|
||||
bool online;
|
||||
@ -184,8 +184,6 @@ extern struct cgroup_subsys_state * const blkcg_root_css;
|
||||
|
||||
struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg,
|
||||
struct request_queue *q, bool update_hint);
|
||||
struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
|
||||
struct request_queue *q);
|
||||
struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
|
||||
struct request_queue *q);
|
||||
int blkcg_init_queue(struct request_queue *q);
|
||||
@ -232,59 +230,22 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
|
||||
char *input, struct blkg_conf_ctx *ctx);
|
||||
void blkg_conf_finish(struct blkg_conf_ctx *ctx);
|
||||
|
||||
/**
|
||||
* blkcg_css - find the current css
|
||||
*
|
||||
* Find the css associated with either the kthread or the current task.
|
||||
* This may return a dying css, so it is up to the caller to use tryget logic
|
||||
* to confirm it is alive and well.
|
||||
*/
|
||||
static inline struct cgroup_subsys_state *blkcg_css(void)
|
||||
{
|
||||
struct cgroup_subsys_state *css;
|
||||
|
||||
css = kthread_blkcg();
|
||||
if (css)
|
||||
return css;
|
||||
return task_css(current, io_cgrp_id);
|
||||
}
|
||||
|
||||
static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css)
|
||||
{
|
||||
return css ? container_of(css, struct blkcg, css) : NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* __bio_blkcg - internal version of bio_blkcg for bfq and cfq
|
||||
*
|
||||
* DO NOT USE.
|
||||
* There is a flaw using this version of the function. In particular, this was
|
||||
* used in a broken paradigm where association was called on the given css. It
|
||||
* is possible though that the returned css from task_css() is in the process
|
||||
* of dying due to migration of the current task. So it is improper to assume
|
||||
* *_get() is going to succeed. Both BFQ and CFQ rely on this logic and will
|
||||
* take additional work to handle more gracefully.
|
||||
*/
|
||||
static inline struct blkcg *__bio_blkcg(struct bio *bio)
|
||||
{
|
||||
if (bio && bio->bi_blkg)
|
||||
return bio->bi_blkg->blkcg;
|
||||
return css_to_blkcg(blkcg_css());
|
||||
}
|
||||
|
||||
/**
|
||||
* bio_blkcg - grab the blkcg associated with a bio
|
||||
* @bio: target bio
|
||||
*
|
||||
* This returns the blkcg associated with a bio, NULL if not associated.
|
||||
* Callers are expected to either handle NULL or know association has been
|
||||
* done prior to calling this.
|
||||
*/
|
||||
static inline struct blkcg *bio_blkcg(struct bio *bio)
|
||||
{
|
||||
if (bio && bio->bi_blkg)
|
||||
return bio->bi_blkg->blkcg;
|
||||
return NULL;
|
||||
struct cgroup_subsys_state *css;
|
||||
|
||||
if (bio && bio->bi_css)
|
||||
return css_to_blkcg(bio->bi_css);
|
||||
css = kthread_blkcg();
|
||||
if (css)
|
||||
return css_to_blkcg(css);
|
||||
return css_to_blkcg(task_css(current, io_cgrp_id));
|
||||
}
|
||||
|
||||
static inline bool blk_cgroup_congested(void)
|
||||
@ -490,35 +451,26 @@ static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen)
|
||||
*/
|
||||
static inline void blkg_get(struct blkcg_gq *blkg)
|
||||
{
|
||||
percpu_ref_get(&blkg->refcnt);
|
||||
WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0);
|
||||
atomic_inc(&blkg->refcnt);
|
||||
}
|
||||
|
||||
/**
|
||||
* blkg_tryget - try and get a blkg reference
|
||||
* blkg_try_get - try and get a blkg reference
|
||||
* @blkg: blkg to get
|
||||
*
|
||||
* This is for use when doing an RCU lookup of the blkg. We may be in the midst
|
||||
* of freeing this blkg, so we can only use it if the refcnt is not zero.
|
||||
*/
|
||||
static inline bool blkg_tryget(struct blkcg_gq *blkg)
|
||||
static inline struct blkcg_gq *blkg_try_get(struct blkcg_gq *blkg)
|
||||
{
|
||||
return percpu_ref_tryget(&blkg->refcnt);
|
||||
if (atomic_inc_not_zero(&blkg->refcnt))
|
||||
return blkg;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* blkg_tryget_closest - try and get a blkg ref on the closet blkg
|
||||
* @blkg: blkg to get
|
||||
*
|
||||
* This walks up the blkg tree to find the closest non-dying blkg and returns
|
||||
* the blkg that it did association with as it may not be the passed in blkg.
|
||||
*/
|
||||
static inline struct blkcg_gq *blkg_tryget_closest(struct blkcg_gq *blkg)
|
||||
{
|
||||
while (!percpu_ref_tryget(&blkg->refcnt))
|
||||
blkg = blkg->parent;
|
||||
|
||||
return blkg;
|
||||
}
|
||||
void __blkg_release_rcu(struct rcu_head *rcu);
|
||||
|
||||
/**
|
||||
* blkg_put - put a blkg reference
|
||||
@ -526,7 +478,9 @@ static inline struct blkcg_gq *blkg_tryget_closest(struct blkcg_gq *blkg)
|
||||
*/
|
||||
static inline void blkg_put(struct blkcg_gq *blkg)
|
||||
{
|
||||
percpu_ref_put(&blkg->refcnt);
|
||||
WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0);
|
||||
if (atomic_dec_and_test(&blkg->refcnt))
|
||||
call_rcu(&blkg->rcu_head, __blkg_release_rcu);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -579,36 +533,25 @@ static inline struct request_list *blk_get_rl(struct request_queue *q,
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
if (bio && bio->bi_blkg) {
|
||||
blkcg = bio->bi_blkg->blkcg;
|
||||
if (blkcg == &blkcg_root)
|
||||
goto rl_use_root;
|
||||
blkcg = bio_blkcg(bio);
|
||||
|
||||
blkg_get(bio->bi_blkg);
|
||||
rcu_read_unlock();
|
||||
return &bio->bi_blkg->rl;
|
||||
}
|
||||
|
||||
blkcg = css_to_blkcg(blkcg_css());
|
||||
/* bypass blkg lookup and use @q->root_rl directly for root */
|
||||
if (blkcg == &blkcg_root)
|
||||
goto rl_use_root;
|
||||
|
||||
blkg = blkg_lookup(blkcg, q);
|
||||
if (unlikely(!blkg))
|
||||
blkg = __blkg_lookup_create(blkcg, q);
|
||||
|
||||
if (blkg->blkcg == &blkcg_root || !blkg_tryget(blkg))
|
||||
goto rl_use_root;
|
||||
|
||||
rcu_read_unlock();
|
||||
return &blkg->rl;
|
||||
goto root_rl;
|
||||
|
||||
/*
|
||||
* Each blkg has its own request_list, however, the root blkcg
|
||||
* uses the request_queue's root_rl. This is to avoid most
|
||||
* overhead for the root blkcg.
|
||||
* Try to use blkg->rl. blkg lookup may fail under memory pressure
|
||||
* or if either the blkcg or queue is going away. Fall back to
|
||||
* root_rl in such cases.
|
||||
*/
|
||||
rl_use_root:
|
||||
blkg = blkg_lookup(blkcg, q);
|
||||
if (unlikely(!blkg))
|
||||
goto root_rl;
|
||||
|
||||
blkg_get(blkg);
|
||||
rcu_read_unlock();
|
||||
return &blkg->rl;
|
||||
root_rl:
|
||||
rcu_read_unlock();
|
||||
return &q->root_rl;
|
||||
}
|
||||
@ -854,26 +797,32 @@ static inline bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg
|
||||
struct bio *bio) { return false; }
|
||||
#endif
|
||||
|
||||
|
||||
static inline void blkcg_bio_issue_init(struct bio *bio)
|
||||
{
|
||||
bio_issue_init(&bio->bi_issue, bio_sectors(bio));
|
||||
}
|
||||
|
||||
static inline bool blkcg_bio_issue_check(struct request_queue *q,
|
||||
struct bio *bio)
|
||||
{
|
||||
struct blkcg *blkcg;
|
||||
struct blkcg_gq *blkg;
|
||||
bool throtl = false;
|
||||
|
||||
rcu_read_lock();
|
||||
blkcg = bio_blkcg(bio);
|
||||
|
||||
bio_associate_create_blkg(q, bio);
|
||||
blkg = bio->bi_blkg;
|
||||
/* associate blkcg if bio hasn't attached one */
|
||||
bio_associate_blkcg(bio, &blkcg->css);
|
||||
|
||||
blkg = blkg_lookup(blkcg, q);
|
||||
if (unlikely(!blkg)) {
|
||||
spin_lock_irq(q->queue_lock);
|
||||
blkg = blkg_lookup_create(blkcg, q);
|
||||
if (IS_ERR(blkg))
|
||||
blkg = NULL;
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
}
|
||||
|
||||
throtl = blk_throtl_bio(q, blkg, bio);
|
||||
|
||||
if (!throtl) {
|
||||
blkg = blkg ?: q->root_blkg;
|
||||
/*
|
||||
* If the bio is flagged with BIO_QUEUE_ENTERED it means this
|
||||
* is a split bio and we would have already accounted for the
|
||||
@ -885,8 +834,6 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q,
|
||||
blkg_rwstat_add(&blkg->stat_ios, bio->bi_opf, 1);
|
||||
}
|
||||
|
||||
blkcg_bio_issue_init(bio);
|
||||
|
||||
rcu_read_unlock();
|
||||
return !throtl;
|
||||
}
|
||||
@ -983,7 +930,6 @@ static inline int blkcg_activate_policy(struct request_queue *q,
|
||||
static inline void blkcg_deactivate_policy(struct request_queue *q,
|
||||
const struct blkcg_policy *pol) { }
|
||||
|
||||
static inline struct blkcg *__bio_blkcg(struct bio *bio) { return NULL; }
|
||||
static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; }
|
||||
|
||||
static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
|
||||
@ -999,7 +945,6 @@ static inline void blk_put_rl(struct request_list *rl) { }
|
||||
static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { }
|
||||
static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; }
|
||||
|
||||
static inline void blkcg_bio_issue_init(struct bio *bio) { }
|
||||
static inline bool blkcg_bio_issue_check(struct request_queue *q,
|
||||
struct bio *bio) { return true; }
|
||||
|
||||
|
@ -178,6 +178,7 @@ struct bio {
|
||||
* release. Read comment on top of bio_associate_current().
|
||||
*/
|
||||
struct io_context *bi_ioc;
|
||||
struct cgroup_subsys_state *bi_css;
|
||||
struct blkcg_gq *bi_blkg;
|
||||
struct bio_issue bi_issue;
|
||||
#endif
|
||||
|
@ -93,8 +93,6 @@ extern struct css_set init_css_set;
|
||||
|
||||
bool css_has_online_children(struct cgroup_subsys_state *css);
|
||||
struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss);
|
||||
struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgroup,
|
||||
struct cgroup_subsys *ss);
|
||||
struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgroup,
|
||||
struct cgroup_subsys *ss);
|
||||
struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry,
|
||||
|
@ -246,8 +246,7 @@ static inline void wbc_attach_fdatawrite_inode(struct writeback_control *wbc,
|
||||
*
|
||||
* @bio is a part of the writeback in progress controlled by @wbc. Perform
|
||||
* writeback specific initialization. This is used to apply the cgroup
|
||||
* writeback context. Must be called after the bio has been associated with
|
||||
* a device.
|
||||
* writeback context.
|
||||
*/
|
||||
static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio)
|
||||
{
|
||||
@ -258,7 +257,7 @@ static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio)
|
||||
* regular writeback instead of writing things out itself.
|
||||
*/
|
||||
if (wbc->wb)
|
||||
bio_associate_blkg_from_css(bio, wbc->wb->blkcg_css);
|
||||
bio_associate_blkcg(bio, wbc->wb->blkcg_css);
|
||||
}
|
||||
|
||||
#else /* CONFIG_CGROUP_WRITEBACK */
|
||||
|
@ -493,7 +493,7 @@ static struct cgroup_subsys_state *cgroup_tryget_css(struct cgroup *cgrp,
|
||||
}
|
||||
|
||||
/**
|
||||
* cgroup_e_css_by_mask - obtain a cgroup's effective css for the specified ss
|
||||
* cgroup_e_css - obtain a cgroup's effective css for the specified subsystem
|
||||
* @cgrp: the cgroup of interest
|
||||
* @ss: the subsystem of interest (%NULL returns @cgrp->self)
|
||||
*
|
||||
@ -502,8 +502,8 @@ static struct cgroup_subsys_state *cgroup_tryget_css(struct cgroup *cgrp,
|
||||
* enabled. If @ss is associated with the hierarchy @cgrp is on, this
|
||||
* function is guaranteed to return non-NULL css.
|
||||
*/
|
||||
static struct cgroup_subsys_state *cgroup_e_css_by_mask(struct cgroup *cgrp,
|
||||
struct cgroup_subsys *ss)
|
||||
static struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
|
||||
struct cgroup_subsys *ss)
|
||||
{
|
||||
lockdep_assert_held(&cgroup_mutex);
|
||||
|
||||
@ -523,35 +523,6 @@ static struct cgroup_subsys_state *cgroup_e_css_by_mask(struct cgroup *cgrp,
|
||||
return cgroup_css(cgrp, ss);
|
||||
}
|
||||
|
||||
/**
|
||||
* cgroup_e_css - obtain a cgroup's effective css for the specified subsystem
|
||||
* @cgrp: the cgroup of interest
|
||||
* @ss: the subsystem of interest
|
||||
*
|
||||
* Find and get the effective css of @cgrp for @ss. The effective css is
|
||||
* defined as the matching css of the nearest ancestor including self which
|
||||
* has @ss enabled. If @ss is not mounted on the hierarchy @cgrp is on,
|
||||
* the root css is returned, so this function always returns a valid css.
|
||||
*
|
||||
* The returned css is not guaranteed to be online, and therefore it is the
|
||||
* callers responsiblity to tryget a reference for it.
|
||||
*/
|
||||
struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
|
||||
struct cgroup_subsys *ss)
|
||||
{
|
||||
struct cgroup_subsys_state *css;
|
||||
|
||||
do {
|
||||
css = cgroup_css(cgrp, ss);
|
||||
|
||||
if (css)
|
||||
return css;
|
||||
cgrp = cgroup_parent(cgrp);
|
||||
} while (cgrp);
|
||||
|
||||
return init_css_set.subsys[ss->id];
|
||||
}
|
||||
|
||||
/**
|
||||
* cgroup_get_e_css - get a cgroup's effective css for the specified subsystem
|
||||
* @cgrp: the cgroup of interest
|
||||
@ -634,11 +605,10 @@ EXPORT_SYMBOL_GPL(of_css);
|
||||
*
|
||||
* Should be called under cgroup_[tree_]mutex.
|
||||
*/
|
||||
#define for_each_e_css(css, ssid, cgrp) \
|
||||
for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++) \
|
||||
if (!((css) = cgroup_e_css_by_mask(cgrp, \
|
||||
cgroup_subsys[(ssid)]))) \
|
||||
; \
|
||||
#define for_each_e_css(css, ssid, cgrp) \
|
||||
for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++) \
|
||||
if (!((css) = cgroup_e_css(cgrp, cgroup_subsys[(ssid)]))) \
|
||||
; \
|
||||
else
|
||||
|
||||
/**
|
||||
@ -1037,7 +1007,7 @@ static struct css_set *find_existing_css_set(struct css_set *old_cset,
|
||||
* @ss is in this hierarchy, so we want the
|
||||
* effective css from @cgrp.
|
||||
*/
|
||||
template[i] = cgroup_e_css_by_mask(cgrp, ss);
|
||||
template[i] = cgroup_e_css(cgrp, ss);
|
||||
} else {
|
||||
/*
|
||||
* @ss is not in this hierarchy, so we don't want
|
||||
@ -3054,7 +3024,7 @@ static int cgroup_apply_control(struct cgroup *cgrp)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* At this point, cgroup_e_css_by_mask() results reflect the new csses
|
||||
* At this point, cgroup_e_css() results reflect the new csses
|
||||
* making the following cgroup_update_dfl_csses() properly update
|
||||
* css associations of all tasks in the subtree.
|
||||
*/
|
||||
|
@ -764,9 +764,9 @@ blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio)
|
||||
if (!bt || !(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP))
|
||||
return NULL;
|
||||
|
||||
if (!bio->bi_blkg)
|
||||
if (!bio->bi_css)
|
||||
return NULL;
|
||||
return cgroup_get_kernfs_id(bio_blkcg(bio)->css.cgroup);
|
||||
return cgroup_get_kernfs_id(bio->bi_css->cgroup);
|
||||
}
|
||||
#else
|
||||
static union kernfs_node_id *
|
||||
|
@ -339,7 +339,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
|
||||
goto out;
|
||||
}
|
||||
bio->bi_opf = REQ_OP_WRITE | REQ_SWAP | wbc_to_write_flags(wbc);
|
||||
bio_associate_blkg_from_page(bio, page);
|
||||
bio_associate_blkcg_from_page(bio, page);
|
||||
count_swpout_vm_event(page);
|
||||
set_page_writeback(page);
|
||||
unlock_page(page);
|
||||
|
Loading…
Reference in New Issue
Block a user