2018-04-03 17:23:33 +00:00
|
|
|
// SPDX-License-Identifier: GPL-2.0
|
2008-06-25 20:01:30 +00:00
|
|
|
/*
|
|
|
|
* Copyright (C) 2008 Oracle. All rights reserved.
|
|
|
|
*/
|
2018-04-03 17:23:33 +00:00
|
|
|
|
2008-06-25 20:01:30 +00:00
|
|
|
#include <linux/sched.h>
|
|
|
|
#include <linux/pagemap.h>
|
|
|
|
#include <linux/spinlock.h>
|
|
|
|
#include <linux/page-flags.h>
|
2008-07-24 13:51:08 +00:00
|
|
|
#include <asm/bug.h>
|
2008-06-25 20:01:30 +00:00
|
|
|
#include "ctree.h"
|
|
|
|
#include "extent_io.h"
|
|
|
|
#include "locking.h"
|
|
|
|
|
2013-04-25 20:41:01 +00:00
|
|
|
static void btrfs_assert_tree_read_locked(struct extent_buffer *eb);
|
2009-01-06 02:25:51 +00:00
|
|
|
|
2018-04-03 23:43:05 +00:00
|
|
|
void btrfs_set_lock_blocking_read(struct extent_buffer *eb)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* No lock is required. The lock owner may change if we have a read
|
|
|
|
* lock, but it won't change to or away from us. If we have the write
|
|
|
|
* lock, we are the owner and it'll never change.
|
|
|
|
*/
|
|
|
|
if (eb->lock_nested && current->pid == eb->lock_owner)
|
|
|
|
return;
|
|
|
|
btrfs_assert_tree_read_locked(eb);
|
|
|
|
atomic_inc(&eb->blocking_readers);
|
|
|
|
WARN_ON(atomic_read(&eb->spinning_readers) == 0);
|
|
|
|
atomic_dec(&eb->spinning_readers);
|
|
|
|
read_unlock(&eb->lock);
|
|
|
|
}
|
|
|
|
|
|
|
|
void btrfs_set_lock_blocking_write(struct extent_buffer *eb)
|
2008-06-25 20:01:30 +00:00
|
|
|
{
|
2014-06-19 21:16:52 +00:00
|
|
|
/*
|
2018-04-03 23:43:05 +00:00
|
|
|
* No lock is required. The lock owner may change if we have a read
|
|
|
|
* lock, but it won't change to or away from us. If we have the write
|
|
|
|
* lock, we are the owner and it'll never change.
|
2014-06-19 21:16:52 +00:00
|
|
|
*/
|
|
|
|
if (eb->lock_nested && current->pid == eb->lock_owner)
|
|
|
|
return;
|
2018-04-03 23:43:05 +00:00
|
|
|
if (atomic_read(&eb->blocking_writers) == 0) {
|
|
|
|
WARN_ON(atomic_read(&eb->spinning_writers) != 1);
|
|
|
|
atomic_dec(&eb->spinning_writers);
|
|
|
|
btrfs_assert_tree_locked(eb);
|
|
|
|
atomic_inc(&eb->blocking_writers);
|
|
|
|
write_unlock(&eb->lock);
|
Btrfs: Change btree locking to use explicit blocking points
Most of the btrfs metadata operations can be protected by a spinlock,
but some operations still need to schedule.
So far, btrfs has been using a mutex along with a trylock loop,
most of the time it is able to avoid going for the full mutex, so
the trylock loop is a big performance gain.
This commit is step one for getting rid of the blocking locks entirely.
btrfs_tree_lock takes a spinlock, and the code explicitly switches
to a blocking lock when it starts an operation that can schedule.
We'll be able get rid of the blocking locks in smaller pieces over time.
Tracing allows us to find the most common cause of blocking, so we
can start with the hot spots first.
The basic idea is:
btrfs_tree_lock() returns with the spin lock held
btrfs_set_lock_blocking() sets the EXTENT_BUFFER_BLOCKING bit in
the extent buffer flags, and then drops the spin lock. The buffer is
still considered locked by all of the btrfs code.
If btrfs_tree_lock gets the spinlock but finds the blocking bit set, it drops
the spin lock and waits on a wait queue for the blocking bit to go away.
Much of the code that needs to set the blocking bit finishes without actually
blocking a good percentage of the time. So, an adaptive spin is still
used against the blocking bit to avoid very high context switch rates.
btrfs_clear_lock_blocking() clears the blocking bit and returns
with the spinlock held again.
btrfs_tree_unlock() can be called on either blocking or spinning locks,
it does the right thing based on the blocking bit.
ctree.c has a helper function to set/clear all the locked buffers in a
path as blocking.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
2009-02-04 14:25:08 +00:00
|
|
|
}
|
|
|
|
}
|
2008-06-25 20:14:04 +00:00
|
|
|
|
2018-04-03 23:52:31 +00:00
|
|
|
void btrfs_clear_lock_blocking_read(struct extent_buffer *eb)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* No lock is required. The lock owner may change if we have a read
|
|
|
|
* lock, but it won't change to or away from us. If we have the write
|
|
|
|
* lock, we are the owner and it'll never change.
|
|
|
|
*/
|
|
|
|
if (eb->lock_nested && current->pid == eb->lock_owner)
|
|
|
|
return;
|
|
|
|
BUG_ON(atomic_read(&eb->blocking_readers) == 0);
|
|
|
|
read_lock(&eb->lock);
|
|
|
|
atomic_inc(&eb->spinning_readers);
|
|
|
|
/* atomic_dec_and_test implies a barrier */
|
|
|
|
if (atomic_dec_and_test(&eb->blocking_readers))
|
|
|
|
cond_wake_up_nomb(&eb->read_lock_wq);
|
|
|
|
}
|
|
|
|
|
|
|
|
void btrfs_clear_lock_blocking_write(struct extent_buffer *eb)
|
Btrfs: Change btree locking to use explicit blocking points
Most of the btrfs metadata operations can be protected by a spinlock,
but some operations still need to schedule.
So far, btrfs has been using a mutex along with a trylock loop,
most of the time it is able to avoid going for the full mutex, so
the trylock loop is a big performance gain.
This commit is step one for getting rid of the blocking locks entirely.
btrfs_tree_lock takes a spinlock, and the code explicitly switches
to a blocking lock when it starts an operation that can schedule.
We'll be able get rid of the blocking locks in smaller pieces over time.
Tracing allows us to find the most common cause of blocking, so we
can start with the hot spots first.
The basic idea is:
btrfs_tree_lock() returns with the spin lock held
btrfs_set_lock_blocking() sets the EXTENT_BUFFER_BLOCKING bit in
the extent buffer flags, and then drops the spin lock. The buffer is
still considered locked by all of the btrfs code.
If btrfs_tree_lock gets the spinlock but finds the blocking bit set, it drops
the spin lock and waits on a wait queue for the blocking bit to go away.
Much of the code that needs to set the blocking bit finishes without actually
blocking a good percentage of the time. So, an adaptive spin is still
used against the blocking bit to avoid very high context switch rates.
btrfs_clear_lock_blocking() clears the blocking bit and returns
with the spinlock held again.
btrfs_tree_unlock() can be called on either blocking or spinning locks,
it does the right thing based on the blocking bit.
ctree.c has a helper function to set/clear all the locked buffers in a
path as blocking.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
2009-02-04 14:25:08 +00:00
|
|
|
{
|
2014-06-19 21:16:52 +00:00
|
|
|
/*
|
|
|
|
* no lock is required. The lock owner may change if
|
|
|
|
* we have a read lock, but it won't change to or away
|
|
|
|
* from us. If we have the write lock, we are the owner
|
|
|
|
* and it'll never change.
|
|
|
|
*/
|
|
|
|
if (eb->lock_nested && current->pid == eb->lock_owner)
|
|
|
|
return;
|
2018-04-03 23:52:31 +00:00
|
|
|
BUG_ON(atomic_read(&eb->blocking_writers) != 1);
|
|
|
|
write_lock(&eb->lock);
|
|
|
|
WARN_ON(atomic_read(&eb->spinning_writers));
|
|
|
|
atomic_inc(&eb->spinning_writers);
|
|
|
|
/* atomic_dec_and_test implies a barrier */
|
|
|
|
if (atomic_dec_and_test(&eb->blocking_writers))
|
|
|
|
cond_wake_up_nomb(&eb->write_lock_wq);
|
Btrfs: Change btree locking to use explicit blocking points
Most of the btrfs metadata operations can be protected by a spinlock,
but some operations still need to schedule.
So far, btrfs has been using a mutex along with a trylock loop,
most of the time it is able to avoid going for the full mutex, so
the trylock loop is a big performance gain.
This commit is step one for getting rid of the blocking locks entirely.
btrfs_tree_lock takes a spinlock, and the code explicitly switches
to a blocking lock when it starts an operation that can schedule.
We'll be able get rid of the blocking locks in smaller pieces over time.
Tracing allows us to find the most common cause of blocking, so we
can start with the hot spots first.
The basic idea is:
btrfs_tree_lock() returns with the spin lock held
btrfs_set_lock_blocking() sets the EXTENT_BUFFER_BLOCKING bit in
the extent buffer flags, and then drops the spin lock. The buffer is
still considered locked by all of the btrfs code.
If btrfs_tree_lock gets the spinlock but finds the blocking bit set, it drops
the spin lock and waits on a wait queue for the blocking bit to go away.
Much of the code that needs to set the blocking bit finishes without actually
blocking a good percentage of the time. So, an adaptive spin is still
used against the blocking bit to avoid very high context switch rates.
btrfs_clear_lock_blocking() clears the blocking bit and returns
with the spinlock held again.
btrfs_tree_unlock() can be called on either blocking or spinning locks,
it does the right thing based on the blocking bit.
ctree.c has a helper function to set/clear all the locked buffers in a
path as blocking.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
2009-02-04 14:25:08 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2011-07-16 19:23:14 +00:00
|
|
|
* take a spinning read lock. This will wait for any blocking
|
|
|
|
* writers
|
Btrfs: Change btree locking to use explicit blocking points
Most of the btrfs metadata operations can be protected by a spinlock,
but some operations still need to schedule.
So far, btrfs has been using a mutex along with a trylock loop,
most of the time it is able to avoid going for the full mutex, so
the trylock loop is a big performance gain.
This commit is step one for getting rid of the blocking locks entirely.
btrfs_tree_lock takes a spinlock, and the code explicitly switches
to a blocking lock when it starts an operation that can schedule.
We'll be able get rid of the blocking locks in smaller pieces over time.
Tracing allows us to find the most common cause of blocking, so we
can start with the hot spots first.
The basic idea is:
btrfs_tree_lock() returns with the spin lock held
btrfs_set_lock_blocking() sets the EXTENT_BUFFER_BLOCKING bit in
the extent buffer flags, and then drops the spin lock. The buffer is
still considered locked by all of the btrfs code.
If btrfs_tree_lock gets the spinlock but finds the blocking bit set, it drops
the spin lock and waits on a wait queue for the blocking bit to go away.
Much of the code that needs to set the blocking bit finishes without actually
blocking a good percentage of the time. So, an adaptive spin is still
used against the blocking bit to avoid very high context switch rates.
btrfs_clear_lock_blocking() clears the blocking bit and returns
with the spinlock held again.
btrfs_tree_unlock() can be called on either blocking or spinning locks,
it does the right thing based on the blocking bit.
ctree.c has a helper function to set/clear all the locked buffers in a
path as blocking.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
2009-02-04 14:25:08 +00:00
|
|
|
*/
|
2011-07-16 19:23:14 +00:00
|
|
|
void btrfs_tree_read_lock(struct extent_buffer *eb)
|
Btrfs: Change btree locking to use explicit blocking points
Most of the btrfs metadata operations can be protected by a spinlock,
but some operations still need to schedule.
So far, btrfs has been using a mutex along with a trylock loop,
most of the time it is able to avoid going for the full mutex, so
the trylock loop is a big performance gain.
This commit is step one for getting rid of the blocking locks entirely.
btrfs_tree_lock takes a spinlock, and the code explicitly switches
to a blocking lock when it starts an operation that can schedule.
We'll be able get rid of the blocking locks in smaller pieces over time.
Tracing allows us to find the most common cause of blocking, so we
can start with the hot spots first.
The basic idea is:
btrfs_tree_lock() returns with the spin lock held
btrfs_set_lock_blocking() sets the EXTENT_BUFFER_BLOCKING bit in
the extent buffer flags, and then drops the spin lock. The buffer is
still considered locked by all of the btrfs code.
If btrfs_tree_lock gets the spinlock but finds the blocking bit set, it drops
the spin lock and waits on a wait queue for the blocking bit to go away.
Much of the code that needs to set the blocking bit finishes without actually
blocking a good percentage of the time. So, an adaptive spin is still
used against the blocking bit to avoid very high context switch rates.
btrfs_clear_lock_blocking() clears the blocking bit and returns
with the spinlock held again.
btrfs_tree_unlock() can be called on either blocking or spinning locks,
it does the right thing based on the blocking bit.
ctree.c has a helper function to set/clear all the locked buffers in a
path as blocking.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
2009-02-04 14:25:08 +00:00
|
|
|
{
|
2011-07-16 19:23:14 +00:00
|
|
|
again:
|
2014-06-19 21:16:52 +00:00
|
|
|
BUG_ON(!atomic_read(&eb->blocking_writers) &&
|
|
|
|
current->pid == eb->lock_owner);
|
|
|
|
|
2011-09-13 08:55:48 +00:00
|
|
|
read_lock(&eb->lock);
|
|
|
|
if (atomic_read(&eb->blocking_writers) &&
|
|
|
|
current->pid == eb->lock_owner) {
|
|
|
|
/*
|
|
|
|
* This extent is already write-locked by our thread. We allow
|
|
|
|
* an additional read lock to be added because it's for the same
|
|
|
|
* thread. btrfs_find_all_roots() depends on this as it may be
|
|
|
|
* called on a partly (write-)locked tree.
|
|
|
|
*/
|
|
|
|
BUG_ON(eb->lock_nested);
|
|
|
|
eb->lock_nested = 1;
|
|
|
|
read_unlock(&eb->lock);
|
|
|
|
return;
|
|
|
|
}
|
2011-07-16 19:23:14 +00:00
|
|
|
if (atomic_read(&eb->blocking_writers)) {
|
|
|
|
read_unlock(&eb->lock);
|
2012-12-27 09:01:22 +00:00
|
|
|
wait_event(eb->write_lock_wq,
|
|
|
|
atomic_read(&eb->blocking_writers) == 0);
|
2011-07-16 19:23:14 +00:00
|
|
|
goto again;
|
Btrfs: Change btree locking to use explicit blocking points
Most of the btrfs metadata operations can be protected by a spinlock,
but some operations still need to schedule.
So far, btrfs has been using a mutex along with a trylock loop,
most of the time it is able to avoid going for the full mutex, so
the trylock loop is a big performance gain.
This commit is step one for getting rid of the blocking locks entirely.
btrfs_tree_lock takes a spinlock, and the code explicitly switches
to a blocking lock when it starts an operation that can schedule.
We'll be able get rid of the blocking locks in smaller pieces over time.
Tracing allows us to find the most common cause of blocking, so we
can start with the hot spots first.
The basic idea is:
btrfs_tree_lock() returns with the spin lock held
btrfs_set_lock_blocking() sets the EXTENT_BUFFER_BLOCKING bit in
the extent buffer flags, and then drops the spin lock. The buffer is
still considered locked by all of the btrfs code.
If btrfs_tree_lock gets the spinlock but finds the blocking bit set, it drops
the spin lock and waits on a wait queue for the blocking bit to go away.
Much of the code that needs to set the blocking bit finishes without actually
blocking a good percentage of the time. So, an adaptive spin is still
used against the blocking bit to avoid very high context switch rates.
btrfs_clear_lock_blocking() clears the blocking bit and returns
with the spinlock held again.
btrfs_tree_unlock() can be called on either blocking or spinning locks,
it does the right thing based on the blocking bit.
ctree.c has a helper function to set/clear all the locked buffers in a
path as blocking.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
2009-02-04 14:25:08 +00:00
|
|
|
}
|
2011-07-16 19:23:14 +00:00
|
|
|
atomic_inc(&eb->read_locks);
|
|
|
|
atomic_inc(&eb->spinning_readers);
|
Btrfs: Change btree locking to use explicit blocking points
Most of the btrfs metadata operations can be protected by a spinlock,
but some operations still need to schedule.
So far, btrfs has been using a mutex along with a trylock loop,
most of the time it is able to avoid going for the full mutex, so
the trylock loop is a big performance gain.
This commit is step one for getting rid of the blocking locks entirely.
btrfs_tree_lock takes a spinlock, and the code explicitly switches
to a blocking lock when it starts an operation that can schedule.
We'll be able get rid of the blocking locks in smaller pieces over time.
Tracing allows us to find the most common cause of blocking, so we
can start with the hot spots first.
The basic idea is:
btrfs_tree_lock() returns with the spin lock held
btrfs_set_lock_blocking() sets the EXTENT_BUFFER_BLOCKING bit in
the extent buffer flags, and then drops the spin lock. The buffer is
still considered locked by all of the btrfs code.
If btrfs_tree_lock gets the spinlock but finds the blocking bit set, it drops
the spin lock and waits on a wait queue for the blocking bit to go away.
Much of the code that needs to set the blocking bit finishes without actually
blocking a good percentage of the time. So, an adaptive spin is still
used against the blocking bit to avoid very high context switch rates.
btrfs_clear_lock_blocking() clears the blocking bit and returns
with the spinlock held again.
btrfs_tree_unlock() can be called on either blocking or spinning locks,
it does the right thing based on the blocking bit.
ctree.c has a helper function to set/clear all the locked buffers in a
path as blocking.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
2009-02-04 14:25:08 +00:00
|
|
|
}
|
|
|
|
|
2014-11-19 18:25:09 +00:00
|
|
|
/*
|
|
|
|
* take a spinning read lock.
|
|
|
|
* returns 1 if we get the read lock and 0 if we don't
|
|
|
|
* this won't wait for blocking writers
|
|
|
|
*/
|
|
|
|
int btrfs_tree_read_lock_atomic(struct extent_buffer *eb)
|
|
|
|
{
|
|
|
|
if (atomic_read(&eb->blocking_writers))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
read_lock(&eb->lock);
|
|
|
|
if (atomic_read(&eb->blocking_writers)) {
|
|
|
|
read_unlock(&eb->lock);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
atomic_inc(&eb->read_locks);
|
|
|
|
atomic_inc(&eb->spinning_readers);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
Btrfs: Change btree locking to use explicit blocking points
Most of the btrfs metadata operations can be protected by a spinlock,
but some operations still need to schedule.
So far, btrfs has been using a mutex along with a trylock loop,
most of the time it is able to avoid going for the full mutex, so
the trylock loop is a big performance gain.
This commit is step one for getting rid of the blocking locks entirely.
btrfs_tree_lock takes a spinlock, and the code explicitly switches
to a blocking lock when it starts an operation that can schedule.
We'll be able get rid of the blocking locks in smaller pieces over time.
Tracing allows us to find the most common cause of blocking, so we
can start with the hot spots first.
The basic idea is:
btrfs_tree_lock() returns with the spin lock held
btrfs_set_lock_blocking() sets the EXTENT_BUFFER_BLOCKING bit in
the extent buffer flags, and then drops the spin lock. The buffer is
still considered locked by all of the btrfs code.
If btrfs_tree_lock gets the spinlock but finds the blocking bit set, it drops
the spin lock and waits on a wait queue for the blocking bit to go away.
Much of the code that needs to set the blocking bit finishes without actually
blocking a good percentage of the time. So, an adaptive spin is still
used against the blocking bit to avoid very high context switch rates.
btrfs_clear_lock_blocking() clears the blocking bit and returns
with the spinlock held again.
btrfs_tree_unlock() can be called on either blocking or spinning locks,
it does the right thing based on the blocking bit.
ctree.c has a helper function to set/clear all the locked buffers in a
path as blocking.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
2009-02-04 14:25:08 +00:00
|
|
|
/*
|
2011-07-16 19:23:14 +00:00
|
|
|
* returns 1 if we get the read lock and 0 if we don't
|
|
|
|
* this won't wait for blocking writers
|
Btrfs: Change btree locking to use explicit blocking points
Most of the btrfs metadata operations can be protected by a spinlock,
but some operations still need to schedule.
So far, btrfs has been using a mutex along with a trylock loop,
most of the time it is able to avoid going for the full mutex, so
the trylock loop is a big performance gain.
This commit is step one for getting rid of the blocking locks entirely.
btrfs_tree_lock takes a spinlock, and the code explicitly switches
to a blocking lock when it starts an operation that can schedule.
We'll be able get rid of the blocking locks in smaller pieces over time.
Tracing allows us to find the most common cause of blocking, so we
can start with the hot spots first.
The basic idea is:
btrfs_tree_lock() returns with the spin lock held
btrfs_set_lock_blocking() sets the EXTENT_BUFFER_BLOCKING bit in
the extent buffer flags, and then drops the spin lock. The buffer is
still considered locked by all of the btrfs code.
If btrfs_tree_lock gets the spinlock but finds the blocking bit set, it drops
the spin lock and waits on a wait queue for the blocking bit to go away.
Much of the code that needs to set the blocking bit finishes without actually
blocking a good percentage of the time. So, an adaptive spin is still
used against the blocking bit to avoid very high context switch rates.
btrfs_clear_lock_blocking() clears the blocking bit and returns
with the spinlock held again.
btrfs_tree_unlock() can be called on either blocking or spinning locks,
it does the right thing based on the blocking bit.
ctree.c has a helper function to set/clear all the locked buffers in a
path as blocking.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
2009-02-04 14:25:08 +00:00
|
|
|
*/
|
2011-07-16 19:23:14 +00:00
|
|
|
int btrfs_try_tree_read_lock(struct extent_buffer *eb)
|
Btrfs: Change btree locking to use explicit blocking points
Most of the btrfs metadata operations can be protected by a spinlock,
but some operations still need to schedule.
So far, btrfs has been using a mutex along with a trylock loop,
most of the time it is able to avoid going for the full mutex, so
the trylock loop is a big performance gain.
This commit is step one for getting rid of the blocking locks entirely.
btrfs_tree_lock takes a spinlock, and the code explicitly switches
to a blocking lock when it starts an operation that can schedule.
We'll be able get rid of the blocking locks in smaller pieces over time.
Tracing allows us to find the most common cause of blocking, so we
can start with the hot spots first.
The basic idea is:
btrfs_tree_lock() returns with the spin lock held
btrfs_set_lock_blocking() sets the EXTENT_BUFFER_BLOCKING bit in
the extent buffer flags, and then drops the spin lock. The buffer is
still considered locked by all of the btrfs code.
If btrfs_tree_lock gets the spinlock but finds the blocking bit set, it drops
the spin lock and waits on a wait queue for the blocking bit to go away.
Much of the code that needs to set the blocking bit finishes without actually
blocking a good percentage of the time. So, an adaptive spin is still
used against the blocking bit to avoid very high context switch rates.
btrfs_clear_lock_blocking() clears the blocking bit and returns
with the spinlock held again.
btrfs_tree_unlock() can be called on either blocking or spinning locks,
it does the right thing based on the blocking bit.
ctree.c has a helper function to set/clear all the locked buffers in a
path as blocking.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
2009-02-04 14:25:08 +00:00
|
|
|
{
|
2011-07-16 19:23:14 +00:00
|
|
|
if (atomic_read(&eb->blocking_writers))
|
|
|
|
return 0;
|
Btrfs: Change btree locking to use explicit blocking points
Most of the btrfs metadata operations can be protected by a spinlock,
but some operations still need to schedule.
So far, btrfs has been using a mutex along with a trylock loop,
most of the time it is able to avoid going for the full mutex, so
the trylock loop is a big performance gain.
This commit is step one for getting rid of the blocking locks entirely.
btrfs_tree_lock takes a spinlock, and the code explicitly switches
to a blocking lock when it starts an operation that can schedule.
We'll be able get rid of the blocking locks in smaller pieces over time.
Tracing allows us to find the most common cause of blocking, so we
can start with the hot spots first.
The basic idea is:
btrfs_tree_lock() returns with the spin lock held
btrfs_set_lock_blocking() sets the EXTENT_BUFFER_BLOCKING bit in
the extent buffer flags, and then drops the spin lock. The buffer is
still considered locked by all of the btrfs code.
If btrfs_tree_lock gets the spinlock but finds the blocking bit set, it drops
the spin lock and waits on a wait queue for the blocking bit to go away.
Much of the code that needs to set the blocking bit finishes without actually
blocking a good percentage of the time. So, an adaptive spin is still
used against the blocking bit to avoid very high context switch rates.
btrfs_clear_lock_blocking() clears the blocking bit and returns
with the spinlock held again.
btrfs_tree_unlock() can be called on either blocking or spinning locks,
it does the right thing based on the blocking bit.
ctree.c has a helper function to set/clear all the locked buffers in a
path as blocking.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
2009-02-04 14:25:08 +00:00
|
|
|
|
2014-06-19 21:16:52 +00:00
|
|
|
if (!read_trylock(&eb->lock))
|
|
|
|
return 0;
|
|
|
|
|
2011-07-16 19:23:14 +00:00
|
|
|
if (atomic_read(&eb->blocking_writers)) {
|
|
|
|
read_unlock(&eb->lock);
|
|
|
|
return 0;
|
2009-03-13 15:00:37 +00:00
|
|
|
}
|
2011-07-16 19:23:14 +00:00
|
|
|
atomic_inc(&eb->read_locks);
|
|
|
|
atomic_inc(&eb->spinning_readers);
|
|
|
|
return 1;
|
Btrfs: Change btree locking to use explicit blocking points
Most of the btrfs metadata operations can be protected by a spinlock,
but some operations still need to schedule.
So far, btrfs has been using a mutex along with a trylock loop,
most of the time it is able to avoid going for the full mutex, so
the trylock loop is a big performance gain.
This commit is step one for getting rid of the blocking locks entirely.
btrfs_tree_lock takes a spinlock, and the code explicitly switches
to a blocking lock when it starts an operation that can schedule.
We'll be able get rid of the blocking locks in smaller pieces over time.
Tracing allows us to find the most common cause of blocking, so we
can start with the hot spots first.
The basic idea is:
btrfs_tree_lock() returns with the spin lock held
btrfs_set_lock_blocking() sets the EXTENT_BUFFER_BLOCKING bit in
the extent buffer flags, and then drops the spin lock. The buffer is
still considered locked by all of the btrfs code.
If btrfs_tree_lock gets the spinlock but finds the blocking bit set, it drops
the spin lock and waits on a wait queue for the blocking bit to go away.
Much of the code that needs to set the blocking bit finishes without actually
blocking a good percentage of the time. So, an adaptive spin is still
used against the blocking bit to avoid very high context switch rates.
btrfs_clear_lock_blocking() clears the blocking bit and returns
with the spinlock held again.
btrfs_tree_unlock() can be called on either blocking or spinning locks,
it does the right thing based on the blocking bit.
ctree.c has a helper function to set/clear all the locked buffers in a
path as blocking.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
2009-02-04 14:25:08 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2011-07-16 19:23:14 +00:00
|
|
|
* returns 1 if we get the read lock and 0 if we don't
|
|
|
|
* this won't wait for blocking writers or readers
|
Btrfs: Change btree locking to use explicit blocking points
Most of the btrfs metadata operations can be protected by a spinlock,
but some operations still need to schedule.
So far, btrfs has been using a mutex along with a trylock loop,
most of the time it is able to avoid going for the full mutex, so
the trylock loop is a big performance gain.
This commit is step one for getting rid of the blocking locks entirely.
btrfs_tree_lock takes a spinlock, and the code explicitly switches
to a blocking lock when it starts an operation that can schedule.
We'll be able get rid of the blocking locks in smaller pieces over time.
Tracing allows us to find the most common cause of blocking, so we
can start with the hot spots first.
The basic idea is:
btrfs_tree_lock() returns with the spin lock held
btrfs_set_lock_blocking() sets the EXTENT_BUFFER_BLOCKING bit in
the extent buffer flags, and then drops the spin lock. The buffer is
still considered locked by all of the btrfs code.
If btrfs_tree_lock gets the spinlock but finds the blocking bit set, it drops
the spin lock and waits on a wait queue for the blocking bit to go away.
Much of the code that needs to set the blocking bit finishes without actually
blocking a good percentage of the time. So, an adaptive spin is still
used against the blocking bit to avoid very high context switch rates.
btrfs_clear_lock_blocking() clears the blocking bit and returns
with the spinlock held again.
btrfs_tree_unlock() can be called on either blocking or spinning locks,
it does the right thing based on the blocking bit.
ctree.c has a helper function to set/clear all the locked buffers in a
path as blocking.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
2009-02-04 14:25:08 +00:00
|
|
|
*/
|
2011-07-16 19:23:14 +00:00
|
|
|
int btrfs_try_tree_write_lock(struct extent_buffer *eb)
|
Btrfs: Change btree locking to use explicit blocking points
Most of the btrfs metadata operations can be protected by a spinlock,
but some operations still need to schedule.
So far, btrfs has been using a mutex along with a trylock loop,
most of the time it is able to avoid going for the full mutex, so
the trylock loop is a big performance gain.
This commit is step one for getting rid of the blocking locks entirely.
btrfs_tree_lock takes a spinlock, and the code explicitly switches
to a blocking lock when it starts an operation that can schedule.
We'll be able get rid of the blocking locks in smaller pieces over time.
Tracing allows us to find the most common cause of blocking, so we
can start with the hot spots first.
The basic idea is:
btrfs_tree_lock() returns with the spin lock held
btrfs_set_lock_blocking() sets the EXTENT_BUFFER_BLOCKING bit in
the extent buffer flags, and then drops the spin lock. The buffer is
still considered locked by all of the btrfs code.
If btrfs_tree_lock gets the spinlock but finds the blocking bit set, it drops
the spin lock and waits on a wait queue for the blocking bit to go away.
Much of the code that needs to set the blocking bit finishes without actually
blocking a good percentage of the time. So, an adaptive spin is still
used against the blocking bit to avoid very high context switch rates.
btrfs_clear_lock_blocking() clears the blocking bit and returns
with the spinlock held again.
btrfs_tree_unlock() can be called on either blocking or spinning locks,
it does the right thing based on the blocking bit.
ctree.c has a helper function to set/clear all the locked buffers in a
path as blocking.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
2009-02-04 14:25:08 +00:00
|
|
|
{
|
2011-07-16 19:23:14 +00:00
|
|
|
if (atomic_read(&eb->blocking_writers) ||
|
|
|
|
atomic_read(&eb->blocking_readers))
|
|
|
|
return 0;
|
2014-06-19 21:16:52 +00:00
|
|
|
|
2014-11-19 18:25:09 +00:00
|
|
|
write_lock(&eb->lock);
|
2011-07-16 19:23:14 +00:00
|
|
|
if (atomic_read(&eb->blocking_writers) ||
|
|
|
|
atomic_read(&eb->blocking_readers)) {
|
|
|
|
write_unlock(&eb->lock);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
atomic_inc(&eb->write_locks);
|
|
|
|
atomic_inc(&eb->spinning_writers);
|
2011-09-13 08:55:48 +00:00
|
|
|
eb->lock_owner = current->pid;
|
Btrfs: Change btree locking to use explicit blocking points
Most of the btrfs metadata operations can be protected by a spinlock,
but some operations still need to schedule.
So far, btrfs has been using a mutex along with a trylock loop,
most of the time it is able to avoid going for the full mutex, so
the trylock loop is a big performance gain.
This commit is step one for getting rid of the blocking locks entirely.
btrfs_tree_lock takes a spinlock, and the code explicitly switches
to a blocking lock when it starts an operation that can schedule.
We'll be able get rid of the blocking locks in smaller pieces over time.
Tracing allows us to find the most common cause of blocking, so we
can start with the hot spots first.
The basic idea is:
btrfs_tree_lock() returns with the spin lock held
btrfs_set_lock_blocking() sets the EXTENT_BUFFER_BLOCKING bit in
the extent buffer flags, and then drops the spin lock. The buffer is
still considered locked by all of the btrfs code.
If btrfs_tree_lock gets the spinlock but finds the blocking bit set, it drops
the spin lock and waits on a wait queue for the blocking bit to go away.
Much of the code that needs to set the blocking bit finishes without actually
blocking a good percentage of the time. So, an adaptive spin is still
used against the blocking bit to avoid very high context switch rates.
btrfs_clear_lock_blocking() clears the blocking bit and returns
with the spinlock held again.
btrfs_tree_unlock() can be called on either blocking or spinning locks,
it does the right thing based on the blocking bit.
ctree.c has a helper function to set/clear all the locked buffers in a
path as blocking.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
2009-02-04 14:25:08 +00:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2011-07-16 19:23:14 +00:00
|
|
|
* drop a spinning read lock
|
|
|
|
*/
|
|
|
|
void btrfs_tree_read_unlock(struct extent_buffer *eb)
|
|
|
|
{
|
2014-06-19 21:16:52 +00:00
|
|
|
/*
|
|
|
|
* if we're nested, we have the write lock. No new locking
|
|
|
|
* is needed as long as we are the lock owner.
|
|
|
|
* The write unlock will do a barrier for us, and the lock_nested
|
|
|
|
* field only matters to the lock owner.
|
|
|
|
*/
|
|
|
|
if (eb->lock_nested && current->pid == eb->lock_owner) {
|
|
|
|
eb->lock_nested = 0;
|
|
|
|
return;
|
2011-09-13 08:55:48 +00:00
|
|
|
}
|
2011-07-16 19:23:14 +00:00
|
|
|
btrfs_assert_tree_read_locked(eb);
|
|
|
|
WARN_ON(atomic_read(&eb->spinning_readers) == 0);
|
|
|
|
atomic_dec(&eb->spinning_readers);
|
|
|
|
atomic_dec(&eb->read_locks);
|
|
|
|
read_unlock(&eb->lock);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* drop a blocking read lock
|
|
|
|
*/
|
|
|
|
void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb)
|
|
|
|
{
|
2014-06-19 21:16:52 +00:00
|
|
|
/*
|
|
|
|
* if we're nested, we have the write lock. No new locking
|
|
|
|
* is needed as long as we are the lock owner.
|
|
|
|
* The write unlock will do a barrier for us, and the lock_nested
|
|
|
|
* field only matters to the lock owner.
|
|
|
|
*/
|
|
|
|
if (eb->lock_nested && current->pid == eb->lock_owner) {
|
|
|
|
eb->lock_nested = 0;
|
|
|
|
return;
|
2011-09-13 08:55:48 +00:00
|
|
|
}
|
2011-07-16 19:23:14 +00:00
|
|
|
btrfs_assert_tree_read_locked(eb);
|
|
|
|
WARN_ON(atomic_read(&eb->blocking_readers) == 0);
|
2018-02-26 15:15:17 +00:00
|
|
|
/* atomic_dec_and_test implies a barrier */
|
|
|
|
if (atomic_dec_and_test(&eb->blocking_readers))
|
|
|
|
cond_wake_up_nomb(&eb->read_lock_wq);
|
2011-07-16 19:23:14 +00:00
|
|
|
atomic_dec(&eb->read_locks);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* take a spinning write lock. This will wait for both
|
|
|
|
* blocking readers or writers
|
Btrfs: Change btree locking to use explicit blocking points
Most of the btrfs metadata operations can be protected by a spinlock,
but some operations still need to schedule.
So far, btrfs has been using a mutex along with a trylock loop,
most of the time it is able to avoid going for the full mutex, so
the trylock loop is a big performance gain.
This commit is step one for getting rid of the blocking locks entirely.
btrfs_tree_lock takes a spinlock, and the code explicitly switches
to a blocking lock when it starts an operation that can schedule.
We'll be able get rid of the blocking locks in smaller pieces over time.
Tracing allows us to find the most common cause of blocking, so we
can start with the hot spots first.
The basic idea is:
btrfs_tree_lock() returns with the spin lock held
btrfs_set_lock_blocking() sets the EXTENT_BUFFER_BLOCKING bit in
the extent buffer flags, and then drops the spin lock. The buffer is
still considered locked by all of the btrfs code.
If btrfs_tree_lock gets the spinlock but finds the blocking bit set, it drops
the spin lock and waits on a wait queue for the blocking bit to go away.
Much of the code that needs to set the blocking bit finishes without actually
blocking a good percentage of the time. So, an adaptive spin is still
used against the blocking bit to avoid very high context switch rates.
btrfs_clear_lock_blocking() clears the blocking bit and returns
with the spinlock held again.
btrfs_tree_unlock() can be called on either blocking or spinning locks,
it does the right thing based on the blocking bit.
ctree.c has a helper function to set/clear all the locked buffers in a
path as blocking.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
2009-02-04 14:25:08 +00:00
|
|
|
*/
|
2012-03-01 13:56:26 +00:00
|
|
|
void btrfs_tree_lock(struct extent_buffer *eb)
|
Btrfs: Change btree locking to use explicit blocking points
Most of the btrfs metadata operations can be protected by a spinlock,
but some operations still need to schedule.
So far, btrfs has been using a mutex along with a trylock loop,
most of the time it is able to avoid going for the full mutex, so
the trylock loop is a big performance gain.
This commit is step one for getting rid of the blocking locks entirely.
btrfs_tree_lock takes a spinlock, and the code explicitly switches
to a blocking lock when it starts an operation that can schedule.
We'll be able get rid of the blocking locks in smaller pieces over time.
Tracing allows us to find the most common cause of blocking, so we
can start with the hot spots first.
The basic idea is:
btrfs_tree_lock() returns with the spin lock held
btrfs_set_lock_blocking() sets the EXTENT_BUFFER_BLOCKING bit in
the extent buffer flags, and then drops the spin lock. The buffer is
still considered locked by all of the btrfs code.
If btrfs_tree_lock gets the spinlock but finds the blocking bit set, it drops
the spin lock and waits on a wait queue for the blocking bit to go away.
Much of the code that needs to set the blocking bit finishes without actually
blocking a good percentage of the time. So, an adaptive spin is still
used against the blocking bit to avoid very high context switch rates.
btrfs_clear_lock_blocking() clears the blocking bit and returns
with the spinlock held again.
btrfs_tree_unlock() can be called on either blocking or spinning locks,
it does the right thing based on the blocking bit.
ctree.c has a helper function to set/clear all the locked buffers in a
path as blocking.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
2009-02-04 14:25:08 +00:00
|
|
|
{
|
2015-08-06 14:39:36 +00:00
|
|
|
WARN_ON(eb->lock_owner == current->pid);
|
2011-07-16 19:23:14 +00:00
|
|
|
again:
|
|
|
|
wait_event(eb->read_lock_wq, atomic_read(&eb->blocking_readers) == 0);
|
|
|
|
wait_event(eb->write_lock_wq, atomic_read(&eb->blocking_writers) == 0);
|
|
|
|
write_lock(&eb->lock);
|
btrfs: simplify waiting loop in btrfs_tree_lock
Currently, the number of readers and writers is checked and in case
there are any, wait and redo the locks. There's some duplication
before the branches go back to again label, eg. calling wait_event on
blocking_readers twice.
The sequence is transformed
loop:
* wait for readers
* wait for writers
* write_lock
* check readers, unlock and wait for readers, loop
* check writers, unlock and wait for writers, loop
The new sequence is not exactly the same due to the simplification, for
readers it's slightly faster. For the writers, original code does
* wait for writers
* (loop) wait for readers
* wait for writers -- again
while the new goes directly to the reader check. This should behave the
same on a contended lock with multiple writers and readers, but can
reduce number of times we're waiting on something.
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: David Sterba <dsterba@suse.com>
2018-04-04 00:11:50 +00:00
|
|
|
if (atomic_read(&eb->blocking_readers) ||
|
|
|
|
atomic_read(&eb->blocking_writers)) {
|
2011-07-16 19:23:14 +00:00
|
|
|
write_unlock(&eb->lock);
|
|
|
|
goto again;
|
|
|
|
}
|
|
|
|
WARN_ON(atomic_read(&eb->spinning_writers));
|
|
|
|
atomic_inc(&eb->spinning_writers);
|
|
|
|
atomic_inc(&eb->write_locks);
|
2011-09-13 08:55:48 +00:00
|
|
|
eb->lock_owner = current->pid;
|
2008-06-25 20:01:30 +00:00
|
|
|
}
|
|
|
|
|
2011-07-16 19:23:14 +00:00
|
|
|
/*
|
|
|
|
* drop a spinning or a blocking write lock.
|
|
|
|
*/
|
2012-03-01 13:56:26 +00:00
|
|
|
void btrfs_tree_unlock(struct extent_buffer *eb)
|
2008-06-25 20:01:30 +00:00
|
|
|
{
|
2011-07-16 19:23:14 +00:00
|
|
|
int blockers = atomic_read(&eb->blocking_writers);
|
|
|
|
|
|
|
|
BUG_ON(blockers > 1);
|
|
|
|
|
|
|
|
btrfs_assert_tree_locked(eb);
|
2014-06-19 21:16:52 +00:00
|
|
|
eb->lock_owner = 0;
|
2011-07-16 19:23:14 +00:00
|
|
|
atomic_dec(&eb->write_locks);
|
|
|
|
|
|
|
|
if (blockers) {
|
|
|
|
WARN_ON(atomic_read(&eb->spinning_writers));
|
|
|
|
atomic_dec(&eb->blocking_writers);
|
2018-02-26 15:15:17 +00:00
|
|
|
/* Use the lighter barrier after atomic */
|
2018-02-14 12:37:26 +00:00
|
|
|
smp_mb__after_atomic();
|
2018-02-26 15:15:17 +00:00
|
|
|
cond_wake_up_nomb(&eb->write_lock_wq);
|
2011-07-16 19:23:14 +00:00
|
|
|
} else {
|
|
|
|
WARN_ON(atomic_read(&eb->spinning_writers) != 1);
|
|
|
|
atomic_dec(&eb->spinning_writers);
|
|
|
|
write_unlock(&eb->lock);
|
|
|
|
}
|
2008-06-25 20:01:30 +00:00
|
|
|
}
|
|
|
|
|
2009-03-09 15:45:38 +00:00
|
|
|
void btrfs_assert_tree_locked(struct extent_buffer *eb)
|
2008-06-25 20:01:30 +00:00
|
|
|
{
|
2011-07-16 19:23:14 +00:00
|
|
|
BUG_ON(!atomic_read(&eb->write_locks));
|
|
|
|
}
|
|
|
|
|
2013-04-25 20:41:01 +00:00
|
|
|
static void btrfs_assert_tree_read_locked(struct extent_buffer *eb)
|
2011-07-16 19:23:14 +00:00
|
|
|
{
|
|
|
|
BUG_ON(!atomic_read(&eb->read_locks));
|
2008-06-25 20:01:30 +00:00
|
|
|
}
|