linux/drivers/target/target_core_tpg.c

681 lines
18 KiB
C
Raw Normal View History

/*******************************************************************************
* Filename: target_core_tpg.c
*
* This file contains generic Target Portal Group related functions.
*
* (c) Copyright 2002-2013 Datera, Inc.
*
* Nicholas A. Bellinger <nab@kernel.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
******************************************************************************/
#include <linux/net.h>
#include <linux/string.h>
#include <linux/timer.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/in.h>
#include <linux/export.h>
#include <net/sock.h>
#include <net/tcp.h>
#include <scsi/scsi_proto.h>
#include <target/target_core_base.h>
#include <target/target_core_backend.h>
#include <target/target_core_fabric.h>
#include "target_core_internal.h"
#include "target_core_alua.h"
#include "target_core_pr.h"
#include "target_core_ua.h"
2011-07-19 08:55:10 +00:00
extern struct se_device *g_lun0_dev;
static DEFINE_SPINLOCK(tpg_lock);
static LIST_HEAD(tpg_list);
/* __core_tpg_get_initiator_node_acl():
*
* mutex_lock(&tpg->acl_node_mutex); must be held when calling
*/
struct se_node_acl *__core_tpg_get_initiator_node_acl(
struct se_portal_group *tpg,
const char *initiatorname)
{
struct se_node_acl *acl;
list_for_each_entry(acl, &tpg->acl_node_list, acl_list) {
if (!strcmp(acl->initiatorname, initiatorname))
return acl;
}
return NULL;
}
/* core_tpg_get_initiator_node_acl():
*
*
*/
struct se_node_acl *core_tpg_get_initiator_node_acl(
struct se_portal_group *tpg,
unsigned char *initiatorname)
{
struct se_node_acl *acl;
/*
* Obtain se_node_acl->acl_kref using fabric driver provided
* initiatorname[] during node acl endpoint lookup driven by
* new se_session login.
*
* The reference is held until se_session shutdown -> release
* occurs via fabric driver invoked transport_deregister_session()
* or transport_free_session() code.
*/
mutex_lock(&tpg->acl_node_mutex);
acl = __core_tpg_get_initiator_node_acl(tpg, initiatorname);
if (acl) {
if (!kref_get_unless_zero(&acl->acl_kref))
acl = NULL;
}
mutex_unlock(&tpg->acl_node_mutex);
return acl;
}
EXPORT_SYMBOL(core_tpg_get_initiator_node_acl);
void core_allocate_nexus_loss_ua(
struct se_node_acl *nacl)
{
struct se_dev_entry *deve;
if (!nacl)
return;
rcu_read_lock();
hlist_for_each_entry_rcu(deve, &nacl->lun_entry_hlist, link)
core_scsi3_ua_allocate(deve, 0x29,
ASCQ_29H_NEXUS_LOSS_OCCURRED);
rcu_read_unlock();
}
EXPORT_SYMBOL(core_allocate_nexus_loss_ua);
/* core_tpg_add_node_to_devs():
*
*
*/
void core_tpg_add_node_to_devs(
struct se_node_acl *acl,
struct se_portal_group *tpg,
struct se_lun *lun_orig)
{
bool lun_access_ro = true;
struct se_lun *lun;
struct se_device *dev;
mutex_lock(&tpg->tpg_lun_mutex);
hlist_for_each_entry_rcu(lun, &tpg->tpg_lun_hlist, link) {
if (lun_orig && lun != lun_orig)
continue;
dev = rcu_dereference_check(lun->lun_se_dev,
lockdep_is_held(&tpg->tpg_lun_mutex));
/*
* By default in LIO-Target $FABRIC_MOD,
* demo_mode_write_protect is ON, or READ_ONLY;
*/
if (!tpg->se_tpg_tfo->tpg_check_demo_mode_write_protect(tpg)) {
lun_access_ro = false;
} else {
/*
* Allow only optical drives to issue R/W in default RO
* demo mode.
*/
2011-07-19 08:55:10 +00:00
if (dev->transport->get_device_type(dev) == TYPE_DISK)
lun_access_ro = true;
else
lun_access_ro = false;
}
pr_debug("TARGET_CORE[%s]->TPG[%u]_LUN[%llu] - Adding %s"
" access for LUN in Demo Mode\n",
2011-07-19 08:55:10 +00:00
tpg->se_tpg_tfo->get_fabric_name(),
tpg->se_tpg_tfo->tpg_get_tag(tpg), lun->unpacked_lun,
lun_access_ro ? "READ-ONLY" : "READ-WRITE");
core_enable_device_list_for_node(lun, NULL, lun->unpacked_lun,
lun_access_ro, acl, tpg);
/*
* Check to see if there are any existing persistent reservation
* APTPL pre-registrations that need to be enabled for this dynamic
* LUN ACL now..
*/
core_scsi3_check_aptpl_registration(dev, tpg, lun, acl,
lun->unpacked_lun);
}
mutex_unlock(&tpg->tpg_lun_mutex);
}
static void
target_set_nacl_queue_depth(struct se_portal_group *tpg,
struct se_node_acl *acl, u32 queue_depth)
{
acl->queue_depth = queue_depth;
if (!acl->queue_depth) {
pr_warn("Queue depth for %s Initiator Node: %s is 0,"
2011-07-19 08:55:10 +00:00
"defaulting to 1.\n", tpg->se_tpg_tfo->get_fabric_name(),
acl->initiatorname);
acl->queue_depth = 1;
}
}
static struct se_node_acl *target_alloc_node_acl(struct se_portal_group *tpg,
const unsigned char *initiatorname)
{
struct se_node_acl *acl;
u32 queue_depth;
acl = kzalloc(max(sizeof(*acl), tpg->se_tpg_tfo->node_acl_size),
GFP_KERNEL);
if (!acl)
return NULL;
INIT_LIST_HEAD(&acl->acl_list);
INIT_LIST_HEAD(&acl->acl_sess_list);
target: Convert se_node_acl->device_list[] to RCU hlist This patch converts se_node_acl->device_list[] table for mappedluns to modern RCU hlist_head usage in order to support an arbitrary number of node_acl lun mappings. It converts transport_lookup_*_lun() fast-path code to use RCU read path primitives when looking up se_dev_entry. It adds a new hlist_head at se_node_acl->lun_entry_hlist for this purpose. For transport_lookup_cmd_lun() code, it works with existing per-cpu se_lun->lun_ref when associating se_cmd with se_lun + se_device. Also, go ahead and update core_create_device_list_for_node() + core_free_device_list_for_node() to use ->lun_entry_hlist. It also converts se_dev_entry->pr_ref_count access to use modern struct kref counting, and updates core_disable_device_list_for_node() to kref_put() and block on se_deve->pr_comp waiting for outstanding PR special-case PR references to drop, then invoke kfree_rcu() to wait for the RCU grace period to complete before releasing memory. So now that se_node_acl->lun_entry_hlist fast path access uses RCU protected pointers, go ahead and convert remaining non-fast path RCU updater code using ->lun_entry_lock to struct mutex to allow callers to block while walking se_node_acl->lun_entry_hlist. Finally drop the left-over core_clear_initiator_node_from_tpg() that originally cleared lun_access during se_node_acl shutdown, as post RCU conversion it now becomes duplicated logic. Reviewed-by: Hannes Reinecke <hare@suse.de> Cc: Christoph Hellwig <hch@lst.de> Cc: Sagi Grimberg <sagig@mellanox.com> Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
2015-03-23 03:42:19 +00:00
INIT_HLIST_HEAD(&acl->lun_entry_hlist);
kref_init(&acl->acl_kref);
init_completion(&acl->acl_free_comp);
spin_lock_init(&acl->nacl_sess_lock);
target: Convert se_node_acl->device_list[] to RCU hlist This patch converts se_node_acl->device_list[] table for mappedluns to modern RCU hlist_head usage in order to support an arbitrary number of node_acl lun mappings. It converts transport_lookup_*_lun() fast-path code to use RCU read path primitives when looking up se_dev_entry. It adds a new hlist_head at se_node_acl->lun_entry_hlist for this purpose. For transport_lookup_cmd_lun() code, it works with existing per-cpu se_lun->lun_ref when associating se_cmd with se_lun + se_device. Also, go ahead and update core_create_device_list_for_node() + core_free_device_list_for_node() to use ->lun_entry_hlist. It also converts se_dev_entry->pr_ref_count access to use modern struct kref counting, and updates core_disable_device_list_for_node() to kref_put() and block on se_deve->pr_comp waiting for outstanding PR special-case PR references to drop, then invoke kfree_rcu() to wait for the RCU grace period to complete before releasing memory. So now that se_node_acl->lun_entry_hlist fast path access uses RCU protected pointers, go ahead and convert remaining non-fast path RCU updater code using ->lun_entry_lock to struct mutex to allow callers to block while walking se_node_acl->lun_entry_hlist. Finally drop the left-over core_clear_initiator_node_from_tpg() that originally cleared lun_access during se_node_acl shutdown, as post RCU conversion it now becomes duplicated logic. Reviewed-by: Hannes Reinecke <hare@suse.de> Cc: Christoph Hellwig <hch@lst.de> Cc: Sagi Grimberg <sagig@mellanox.com> Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
2015-03-23 03:42:19 +00:00
mutex_init(&acl->lun_entry_mutex);
atomic_set(&acl->acl_pr_ref_count, 0);
if (tpg->se_tpg_tfo->tpg_get_default_depth)
queue_depth = tpg->se_tpg_tfo->tpg_get_default_depth(tpg);
else
queue_depth = 1;
target_set_nacl_queue_depth(tpg, acl, queue_depth);
snprintf(acl->initiatorname, TRANSPORT_IQN_LEN, "%s", initiatorname);
acl->se_tpg = tpg;
acl->acl_index = scsi_get_new_index(SCSI_AUTH_INTR_INDEX);
2011-07-19 08:55:10 +00:00
tpg->se_tpg_tfo->set_default_node_attributes(acl);
return acl;
}
static void target_add_node_acl(struct se_node_acl *acl)
{
struct se_portal_group *tpg = acl->se_tpg;
mutex_lock(&tpg->acl_node_mutex);
list_add_tail(&acl->acl_list, &tpg->acl_node_list);
mutex_unlock(&tpg->acl_node_mutex);
pr_debug("%s_TPG[%hu] - Added %s ACL with TCQ Depth: %d for %s"
" Initiator Node: %s\n",
tpg->se_tpg_tfo->get_fabric_name(),
tpg->se_tpg_tfo->tpg_get_tag(tpg),
acl->dynamic_node_acl ? "DYNAMIC" : "",
acl->queue_depth,
tpg->se_tpg_tfo->get_fabric_name(),
acl->initiatorname);
}
bool target_tpg_has_node_acl(struct se_portal_group *tpg,
const char *initiatorname)
{
struct se_node_acl *acl;
bool found = false;
mutex_lock(&tpg->acl_node_mutex);
list_for_each_entry(acl, &tpg->acl_node_list, acl_list) {
if (!strcmp(acl->initiatorname, initiatorname)) {
found = true;
break;
}
}
mutex_unlock(&tpg->acl_node_mutex);
return found;
}
EXPORT_SYMBOL(target_tpg_has_node_acl);
struct se_node_acl *core_tpg_check_initiator_node_acl(
struct se_portal_group *tpg,
unsigned char *initiatorname)
{
struct se_node_acl *acl;
acl = core_tpg_get_initiator_node_acl(tpg, initiatorname);
if (acl)
return acl;
if (!tpg->se_tpg_tfo->tpg_check_demo_mode(tpg))
return NULL;
acl = target_alloc_node_acl(tpg, initiatorname);
if (!acl)
return NULL;
/*
* When allocating a dynamically generated node_acl, go ahead
* and take the extra kref now before returning to the fabric
* driver caller.
*
* Note this reference will be released at session shutdown
* time within transport_free_session() code.
*/
kref_get(&acl->acl_kref);
acl->dynamic_node_acl = 1;
/*
* Here we only create demo-mode MappedLUNs from the active
* TPG LUNs if the fabric is not explicitly asking for
* tpg_check_demo_mode_login_only() == 1.
*/
if ((tpg->se_tpg_tfo->tpg_check_demo_mode_login_only == NULL) ||
(tpg->se_tpg_tfo->tpg_check_demo_mode_login_only(tpg) != 1))
core_tpg_add_node_to_devs(acl, tpg, NULL);
target_add_node_acl(acl);
return acl;
}
EXPORT_SYMBOL(core_tpg_check_initiator_node_acl);
void core_tpg_wait_for_nacl_pr_ref(struct se_node_acl *nacl)
{
while (atomic_read(&nacl->acl_pr_ref_count) != 0)
cpu_relax();
}
struct se_node_acl *core_tpg_add_initiator_node_acl(
struct se_portal_group *tpg,
const char *initiatorname)
{
struct se_node_acl *acl;
mutex_lock(&tpg->acl_node_mutex);
acl = __core_tpg_get_initiator_node_acl(tpg, initiatorname);
if (acl) {
if (acl->dynamic_node_acl) {
acl->dynamic_node_acl = 0;
pr_debug("%s_TPG[%u] - Replacing dynamic ACL"
2011-07-19 08:55:10 +00:00
" for %s\n", tpg->se_tpg_tfo->get_fabric_name(),
tpg->se_tpg_tfo->tpg_get_tag(tpg), initiatorname);
mutex_unlock(&tpg->acl_node_mutex);
return acl;
}
pr_err("ACL entry for %s Initiator"
" Node %s already exists for TPG %u, ignoring"
2011-07-19 08:55:10 +00:00
" request.\n", tpg->se_tpg_tfo->get_fabric_name(),
initiatorname, tpg->se_tpg_tfo->tpg_get_tag(tpg));
mutex_unlock(&tpg->acl_node_mutex);
return ERR_PTR(-EEXIST);
}
mutex_unlock(&tpg->acl_node_mutex);
acl = target_alloc_node_acl(tpg, initiatorname);
if (!acl)
return ERR_PTR(-ENOMEM);
target_add_node_acl(acl);
return acl;
}
static void target_shutdown_sessions(struct se_node_acl *acl)
{
struct se_session *sess;
target: Convert session_lock to irqsave This patch converts the remaining struct se_portal_group->session_lock usage to use irqsave+irqrestore to address the following warnings for hardware target mode interrupt context usage. This change generate other warnings for current iscsi-target mode still using ->session_lock with spin_lock_bh, which will need to be converted in a seperate patch. [ 492.480728] [ INFO: HARDIRQ-safe -> HARDIRQ-unsafe lock order detected ] [ 492.488194] 3.0.0+ #23 [ 492.490820] ------------------------------------------------------ [ 492.497704] sh/7162 [HC0[0]:SC0[2]:HE0:SE0] is trying to acquire: [ 492.504493] (&(&se_tpg->session_lock)->rlock){+.....}, at: [<ffffffffa022364d>] transport_deregister_session+0x2d/0x163 [target_core_mod] 492.518390] [ 492.518390] and this task is already holding: [ 492.524897] (&(&ha->hardware_lock)->rlock){-.-...}, at: [<ffffffffa00b9146>] qla_tgt_stop_phase1+0x5e/0x27e [qla2xxx] [ 492.536856] which would create a new lock dependency: [ 492.542481] (&(&ha->hardware_lock)->rlock){-.-...} -> (&(&se_tpg->session_lock)->rlock){+.....} [ 492.552321] [ 492.552321] but this new dependency connects a HARDIRQ-irq-safe lock: [ 492.561149] (&(&ha->hardware_lock)->rlock){-.-...} [ 492.566400] ... which became HARDIRQ-irq-safe at: [ 492.571841] [<ffffffff81064720>] __lock_acquire+0x68f/0x921 [ 492.578247] [<ffffffff81064eff>] lock_acquire+0xe0/0x10d [ 492.584367] [<ffffffff813a74c6>] _raw_spin_lock_irqsave+0x44/0x56 [ 492.591358] [<ffffffffa009b1be>] qla24xx_msix_default+0x5c/0x2aa [qla2xxx] [ 492.599227] [<ffffffff81088582>] handle_irq_event_percpu+0x5a/0x197 [ 492.606413] [<ffffffff810886fb>] handle_irq_event+0x3c/0x5c [ 492.612822] [<ffffffff8108a6dc>] handle_edge_irq+0xcc/0xf1 [ 492.619138] [<ffffffff810039b9>] handle_irq+0x83/0x8e [ 492.624971] [<ffffffff8100333e>] do_IRQ+0x48/0xaf [ 492.630413] [<ffffffff813a7cd3>] ret_from_intr+0x0/0x1a [ 492.636437] [<ffffffff81001dc1>] cpu_idle+0x5b/0x8d [ 492.642073] [<ffffffff81392709>] rest_init+0xad/0xb4 [ 492.647809] [<ffffffff81a1cbbc>] start_kernel+0x366/0x371 [ 492.654030] [<ffffffff81a1c2b1>] x86_64_start_reservations+0xb8/0xbc [ 492.661311] [<ffffffff81a1c3b6>] x86_64_start_kernel+0x101/0x110 [ 492.668204] [ 492.668205] to a HARDIRQ-irq-unsafe lock: [ 492.674324] (&(&se_tpg->session_lock)->rlock){+.....} [ 492.679862] ... which became HARDIRQ-irq-unsafe at: [ 492.685497] ... [<ffffffff8106479a>] __lock_acquire+0x709/0x921 [ 492.692209] [<ffffffff81064eff>] lock_acquire+0xe0/0x10d [ 492.698330] [<ffffffff813a75ed>] _raw_spin_lock_bh+0x31/0x40 [ 492.704836] [<ffffffffa021c208>] core_tpg_del_initiator_node_acl+0x89/0x336 [target_core_mod] [ 492.714546] [<ffffffffa02fb075>] tcm_qla2xxx_drop_nodeacl+0x20/0x2d [tcm_qla2xxx] [ 492.723087] [<ffffffffa02108d9>] target_fabric_nacl_base_release+0x22/0x24 [target_core_mod] [ 492.732698] [<ffffffffa01661c8>] config_item_release+0x7d/0xa3 [configfs] [ 492.740465] [<ffffffff811d48fe>] kref_put+0x43/0x4d [ 492.746101] [<ffffffffa0166149>] config_item_put+0x19/0x1b [configfs] [ 492.753481] [<ffffffffa0164987>] configfs_rmdir+0x1eb/0x258 [configfs] [ 492.760957] [<ffffffff810ecc54>] vfs_rmdir+0x79/0xd0 [ 492.766690] [<ffffffff810eec4a>] do_rmdir+0xc2/0x111 [ 492.772423] [<ffffffff810eecd0>] sys_rmdir+0x11/0x13 [ 492.778156] [<ffffffff813ae4d2>] system_call_fastpath+0x16/0x1b [ 492.784953] Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
2011-08-31 19:34:39 +00:00
unsigned long flags;
restart:
spin_lock_irqsave(&acl->nacl_sess_lock, flags);
list_for_each_entry(sess, &acl->acl_sess_list, sess_acl_list) {
if (sess->sess_tearing_down)
continue;
list_del_init(&sess->sess_acl_list);
spin_unlock_irqrestore(&acl->nacl_sess_lock, flags);
if (acl->se_tpg->se_tpg_tfo->close_session)
acl->se_tpg->se_tpg_tfo->close_session(sess);
goto restart;
}
spin_unlock_irqrestore(&acl->nacl_sess_lock, flags);
}
void core_tpg_del_initiator_node_acl(struct se_node_acl *acl)
{
struct se_portal_group *tpg = acl->se_tpg;
mutex_lock(&tpg->acl_node_mutex);
if (acl->dynamic_node_acl)
acl->dynamic_node_acl = 0;
list_del(&acl->acl_list);
mutex_unlock(&tpg->acl_node_mutex);
target_shutdown_sessions(acl);
target_put_nacl(acl);
/*
* Wait for last target_put_nacl() to complete in target_complete_nacl()
* for active fabric session transport_deregister_session() callbacks.
*/
wait_for_completion(&acl->acl_free_comp);
core_tpg_wait_for_nacl_pr_ref(acl);
core_free_device_list_for_node(acl, tpg);
pr_debug("%s_TPG[%hu] - Deleted ACL with TCQ Depth: %d for %s"
2011-07-19 08:55:10 +00:00
" Initiator Node: %s\n", tpg->se_tpg_tfo->get_fabric_name(),
tpg->se_tpg_tfo->tpg_get_tag(tpg), acl->queue_depth,
tpg->se_tpg_tfo->get_fabric_name(), acl->initiatorname);
kfree(acl);
}
/* core_tpg_set_initiator_node_queue_depth():
*
*
*/
int core_tpg_set_initiator_node_queue_depth(
struct se_node_acl *acl,
u32 queue_depth)
{
struct se_portal_group *tpg = acl->se_tpg;
/*
* Allow the setting of se_node_acl queue_depth to be idempotent,
* and not force a session shutdown event if the value is not
* changing.
*/
if (acl->queue_depth == queue_depth)
return 0;
/*
* User has requested to change the queue depth for a Initiator Node.
* Change the value in the Node's struct se_node_acl, and call
* target_set_nacl_queue_depth() to set the new queue depth.
*/
target_set_nacl_queue_depth(tpg, acl, queue_depth);
/*
* Shutdown all pending sessions to force session reinstatement.
*/
target_shutdown_sessions(acl);
pr_debug("Successfully changed queue depth to: %d for Initiator"
" Node: %s on %s Target Portal Group: %u\n", acl->queue_depth,
acl->initiatorname, tpg->se_tpg_tfo->get_fabric_name(),
2011-07-19 08:55:10 +00:00
tpg->se_tpg_tfo->tpg_get_tag(tpg));
return 0;
}
EXPORT_SYMBOL(core_tpg_set_initiator_node_queue_depth);
/* core_tpg_set_initiator_node_tag():
*
* Initiator nodeacl tags are not used internally, but may be used by
* userspace to emulate aliases or groups.
* Returns length of newly-set tag or -EINVAL.
*/
int core_tpg_set_initiator_node_tag(
struct se_portal_group *tpg,
struct se_node_acl *acl,
const char *new_tag)
{
if (strlen(new_tag) >= MAX_ACL_TAG_SIZE)
return -EINVAL;
if (!strncmp("NULL", new_tag, 4)) {
acl->acl_tag[0] = '\0';
return 0;
}
return snprintf(acl->acl_tag, MAX_ACL_TAG_SIZE, "%s", new_tag);
}
EXPORT_SYMBOL(core_tpg_set_initiator_node_tag);
static void core_tpg_lun_ref_release(struct percpu_ref *ref)
{
struct se_lun *lun = container_of(ref, struct se_lun, lun_ref);
target: Fix NULL dereference during LUN lookup + active I/O shutdown When transport_clear_lun_ref() is shutting down a se_lun via configfs with new I/O in-flight, it's possible to trigger a NULL pointer dereference in transport_lookup_cmd_lun() due to the fact percpu_ref_get() doesn't do any __PERCPU_REF_DEAD checking before incrementing lun->lun_ref.count after lun->lun_ref has switched to atomic_t mode. This results in a NULL pointer dereference as LUN shutdown code in core_tpg_remove_lun() continues running after the existing ->release() -> core_tpg_lun_ref_release() callback completes, and clears the RCU protected se_lun->lun_se_dev pointer. During the OOPs, the state of lun->lun_ref in the process which triggered the NULL pointer dereference looks like the following on v4.1.y stable code: struct se_lun { lun_link_magic = 4294932337, lun_status = TRANSPORT_LUN_STATUS_FREE, ..... lun_se_dev = 0x0, lun_sep = 0x0, ..... lun_ref = { count = { counter = 1 }, percpu_count_ptr = 3, release = 0xffffffffa02fa1e0 <core_tpg_lun_ref_release>, confirm_switch = 0x0, force_atomic = false, rcu = { next = 0xffff88154fa1a5d0, func = 0xffffffff8137c4c0 <percpu_ref_switch_to_atomic_rcu> } } } To address this bug, use percpu_ref_tryget_live() to ensure once __PERCPU_REF_DEAD is visable on all CPUs and ->lun_ref has switched to atomic_t, all new I/Os will fail to obtain a new lun->lun_ref reference. Also use an explicit percpu_ref_kill_and_confirm() callback to block on ->lun_ref_comp to allow the first stage and associated RCU grace period to complete, and then block on ->lun_ref_shutdown waiting for the final percpu_ref_put() to drop the last reference via transport_lun_remove_cmd() before continuing with core_tpg_remove_lun() shutdown. Reported-by: Rob Millner <rlm@daterainc.com> Tested-by: Rob Millner <rlm@daterainc.com> Cc: Rob Millner <rlm@daterainc.com> Tested-by: Vaibhav Tandon <vst@datera.io> Cc: Vaibhav Tandon <vst@datera.io> Tested-by: Bryant G. Ly <bryantly@linux.vnet.ibm.com> Cc: <stable@vger.kernel.org> # v3.14+ Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
2017-02-23 06:06:32 +00:00
complete(&lun->lun_shutdown_comp);
}
/* Does not change se_wwn->priv. */
int core_tpg_register(
struct se_wwn *se_wwn,
struct se_portal_group *se_tpg,
int proto_id)
{
int ret;
if (!se_tpg)
return -EINVAL;
/*
* For the typical case where core_tpg_register() is called by a
* fabric driver from target_core_fabric_ops->fabric_make_tpg()
* configfs context, use the original tf_ops pointer already saved
* by target-core in target_fabric_make_wwn().
*
* Otherwise, for special cases like iscsi-target discovery TPGs
* the caller is responsible for setting ->se_tpg_tfo ahead of
* calling core_tpg_register().
*/
if (se_wwn)
se_tpg->se_tpg_tfo = se_wwn->wwn_tf->tf_ops;
if (!se_tpg->se_tpg_tfo) {
pr_err("Unable to locate se_tpg->se_tpg_tfo pointer\n");
return -EINVAL;
}
INIT_HLIST_HEAD(&se_tpg->tpg_lun_hlist);
se_tpg->proto_id = proto_id;
se_tpg->se_tpg_wwn = se_wwn;
atomic_set(&se_tpg->tpg_pr_ref_count, 0);
INIT_LIST_HEAD(&se_tpg->acl_node_list);
2011-07-19 08:55:10 +00:00
INIT_LIST_HEAD(&se_tpg->se_tpg_node);
INIT_LIST_HEAD(&se_tpg->tpg_sess_list);
spin_lock_init(&se_tpg->session_lock);
mutex_init(&se_tpg->tpg_lun_mutex);
mutex_init(&se_tpg->acl_node_mutex);
if (se_tpg->proto_id >= 0) {
se_tpg->tpg_virt_lun0 = core_tpg_alloc_lun(se_tpg, 0);
if (IS_ERR(se_tpg->tpg_virt_lun0))
return PTR_ERR(se_tpg->tpg_virt_lun0);
ret = core_tpg_add_lun(se_tpg, se_tpg->tpg_virt_lun0,
true, g_lun0_dev);
if (ret < 0) {
kfree(se_tpg->tpg_virt_lun0);
return ret;
}
}
2011-07-19 08:55:10 +00:00
spin_lock_bh(&tpg_lock);
list_add_tail(&se_tpg->se_tpg_node, &tpg_list);
spin_unlock_bh(&tpg_lock);
pr_debug("TARGET_CORE[%s]: Allocated portal_group for endpoint: %s, "
"Proto: %d, Portal Tag: %u\n", se_tpg->se_tpg_tfo->get_fabric_name(),
se_tpg->se_tpg_tfo->tpg_get_wwn(se_tpg) ?
se_tpg->se_tpg_tfo->tpg_get_wwn(se_tpg) : NULL,
se_tpg->proto_id, se_tpg->se_tpg_tfo->tpg_get_tag(se_tpg));
return 0;
}
EXPORT_SYMBOL(core_tpg_register);
int core_tpg_deregister(struct se_portal_group *se_tpg)
{
const struct target_core_fabric_ops *tfo = se_tpg->se_tpg_tfo;
struct se_node_acl *nacl, *nacl_tmp;
LIST_HEAD(node_list);
pr_debug("TARGET_CORE[%s]: Deallocating portal_group for endpoint: %s, "
"Proto: %d, Portal Tag: %u\n", tfo->get_fabric_name(),
tfo->tpg_get_wwn(se_tpg) ? tfo->tpg_get_wwn(se_tpg) : NULL,
se_tpg->proto_id, tfo->tpg_get_tag(se_tpg));
2011-07-19 08:55:10 +00:00
spin_lock_bh(&tpg_lock);
list_del(&se_tpg->se_tpg_node);
spin_unlock_bh(&tpg_lock);
while (atomic_read(&se_tpg->tpg_pr_ref_count) != 0)
cpu_relax();
mutex_lock(&se_tpg->acl_node_mutex);
list_splice_init(&se_tpg->acl_node_list, &node_list);
mutex_unlock(&se_tpg->acl_node_mutex);
/*
* Release any remaining demo-mode generated se_node_acl that have
* not been released because of TFO->tpg_check_demo_mode_cache() == 1
* in transport_deregister_session().
*/
list_for_each_entry_safe(nacl, nacl_tmp, &node_list, acl_list) {
list_del(&nacl->acl_list);
core_tpg_wait_for_nacl_pr_ref(nacl);
core_free_device_list_for_node(nacl, se_tpg);
kfree(nacl);
}
if (se_tpg->proto_id >= 0) {
core_tpg_remove_lun(se_tpg, se_tpg->tpg_virt_lun0);
kfree_rcu(se_tpg->tpg_virt_lun0, rcu_head);
}
return 0;
}
EXPORT_SYMBOL(core_tpg_deregister);
struct se_lun *core_tpg_alloc_lun(
struct se_portal_group *tpg,
u64 unpacked_lun)
{
struct se_lun *lun;
lun = kzalloc(sizeof(*lun), GFP_KERNEL);
if (!lun) {
pr_err("Unable to allocate se_lun memory\n");
return ERR_PTR(-ENOMEM);
}
lun->unpacked_lun = unpacked_lun;
lun->lun_link_magic = SE_LUN_LINK_MAGIC;
atomic_set(&lun->lun_acl_count, 0);
init_completion(&lun->lun_ref_comp);
target: Fix NULL dereference during LUN lookup + active I/O shutdown When transport_clear_lun_ref() is shutting down a se_lun via configfs with new I/O in-flight, it's possible to trigger a NULL pointer dereference in transport_lookup_cmd_lun() due to the fact percpu_ref_get() doesn't do any __PERCPU_REF_DEAD checking before incrementing lun->lun_ref.count after lun->lun_ref has switched to atomic_t mode. This results in a NULL pointer dereference as LUN shutdown code in core_tpg_remove_lun() continues running after the existing ->release() -> core_tpg_lun_ref_release() callback completes, and clears the RCU protected se_lun->lun_se_dev pointer. During the OOPs, the state of lun->lun_ref in the process which triggered the NULL pointer dereference looks like the following on v4.1.y stable code: struct se_lun { lun_link_magic = 4294932337, lun_status = TRANSPORT_LUN_STATUS_FREE, ..... lun_se_dev = 0x0, lun_sep = 0x0, ..... lun_ref = { count = { counter = 1 }, percpu_count_ptr = 3, release = 0xffffffffa02fa1e0 <core_tpg_lun_ref_release>, confirm_switch = 0x0, force_atomic = false, rcu = { next = 0xffff88154fa1a5d0, func = 0xffffffff8137c4c0 <percpu_ref_switch_to_atomic_rcu> } } } To address this bug, use percpu_ref_tryget_live() to ensure once __PERCPU_REF_DEAD is visable on all CPUs and ->lun_ref has switched to atomic_t, all new I/Os will fail to obtain a new lun->lun_ref reference. Also use an explicit percpu_ref_kill_and_confirm() callback to block on ->lun_ref_comp to allow the first stage and associated RCU grace period to complete, and then block on ->lun_ref_shutdown waiting for the final percpu_ref_put() to drop the last reference via transport_lun_remove_cmd() before continuing with core_tpg_remove_lun() shutdown. Reported-by: Rob Millner <rlm@daterainc.com> Tested-by: Rob Millner <rlm@daterainc.com> Cc: Rob Millner <rlm@daterainc.com> Tested-by: Vaibhav Tandon <vst@datera.io> Cc: Vaibhav Tandon <vst@datera.io> Tested-by: Bryant G. Ly <bryantly@linux.vnet.ibm.com> Cc: <stable@vger.kernel.org> # v3.14+ Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
2017-02-23 06:06:32 +00:00
init_completion(&lun->lun_shutdown_comp);
INIT_LIST_HEAD(&lun->lun_deve_list);
INIT_LIST_HEAD(&lun->lun_dev_link);
atomic_set(&lun->lun_tg_pt_secondary_offline, 0);
spin_lock_init(&lun->lun_deve_lock);
mutex_init(&lun->lun_tg_pt_md_mutex);
INIT_LIST_HEAD(&lun->lun_tg_pt_gp_link);
spin_lock_init(&lun->lun_tg_pt_gp_lock);
lun->lun_tpg = tpg;
return lun;
}
int core_tpg_add_lun(
struct se_portal_group *tpg,
struct se_lun *lun,
bool lun_access_ro,
struct se_device *dev)
{
2011-07-19 08:55:10 +00:00
int ret;
ret = percpu_ref_init(&lun->lun_ref, core_tpg_lun_ref_release, 0,
GFP_KERNEL);
2011-07-19 08:55:10 +00:00
if (ret < 0)
goto out;
ret = core_alloc_rtpi(lun, dev);
if (ret)
goto out_kill_ref;
if (!(dev->transport->transport_flags &
TRANSPORT_FLAG_PASSTHROUGH_ALUA) &&
!(dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE))
target_attach_tg_pt_gp(lun, dev->t10_alua.default_tg_pt_gp);
mutex_lock(&tpg->tpg_lun_mutex);
spin_lock(&dev->se_port_lock);
lun->lun_index = dev->dev_index;
rcu_assign_pointer(lun->lun_se_dev, dev);
dev->export_count++;
list_add_tail(&lun->lun_dev_link, &dev->dev_sep_list);
spin_unlock(&dev->se_port_lock);
if (dev->dev_flags & DF_READ_ONLY)
lun->lun_access_ro = true;
else
lun->lun_access_ro = lun_access_ro;
if (!(dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE))
hlist_add_head_rcu(&lun->link, &tpg->tpg_lun_hlist);
mutex_unlock(&tpg->tpg_lun_mutex);
return 0;
out_kill_ref:
percpu_ref_exit(&lun->lun_ref);
out:
return ret;
}
void core_tpg_remove_lun(
struct se_portal_group *tpg,
struct se_lun *lun)
{
/*
* rcu_dereference_raw protected by se_lun->lun_group symlink
* reference to se_device->dev_group.
*/
struct se_device *dev = rcu_dereference_raw(lun->lun_se_dev);
lun->lun_shutdown = true;
core_clear_lun_from_tpg(lun, tpg);
/*
* Wait for any active I/O references to percpu se_lun->lun_ref to
* be released. Also, se_lun->lun_ref is now used by PR and ALUA
* logic when referencing a remote target port during ALL_TGT_PT=1
* and generating UNIT_ATTENTIONs for ALUA access state transition.
*/
transport_clear_lun_ref(lun);
mutex_lock(&tpg->tpg_lun_mutex);
if (lun->lun_se_dev) {
target_detach_tg_pt_gp(lun);
spin_lock(&dev->se_port_lock);
list_del(&lun->lun_dev_link);
dev->export_count--;
rcu_assign_pointer(lun->lun_se_dev, NULL);
spin_unlock(&dev->se_port_lock);
}
if (!(dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE))
hlist_del_rcu(&lun->link);
lun->lun_shutdown = false;
mutex_unlock(&tpg->tpg_lun_mutex);
percpu_ref_exit(&lun->lun_ref);
}