linux/drivers/scsi/lpfc/lpfc_mem.c

741 lines
21 KiB
C
Raw Normal View History

/*******************************************************************
* This file is part of the Emulex Linux Device Driver for *
* Fibre Channel Host Bus Adapters. *
* Copyright (C) 2017-2018 Broadcom. All Rights Reserved. The term *
* Broadcom refers to Broadcom Inc. and/or its subsidiaries. *
* Copyright (C) 2004-2014 Emulex. All rights reserved. *
* EMULEX and SLI are trademarks of Emulex. *
* www.broadcom.com *
* Portions Copyright (C) 2004-2005 Christoph Hellwig *
* *
* This program is free software; you can redistribute it and/or *
* modify it under the terms of version 2 of the GNU General *
* Public License as published by the Free Software Foundation. *
* This program is distributed in the hope that it will be useful. *
* ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND *
* WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE *
* DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
* TO BE LEGALLY INVALID. See the GNU General Public License for *
* more details, a copy of which can be found in the file COPYING *
* included with this package. *
*******************************************************************/
#include <linux/mempool.h>
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo <tj@kernel.org> Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 08:04:11 +00:00
#include <linux/slab.h>
#include <linux/pci.h>
#include <linux/interrupt.h>
scsi: lpfc: NVME Initiator: Base modifications NVME Initiator: Base modifications This patch adds base modifications for NVME initiator support. The base modifications consist of: - Formal split of SLI3 rings from SLI-4 WQs (sometimes referred to as rings as well) as implementation now widely varies between the two. - Addition of configuration modes: SCSI initiator only; NVME initiator only; NVME target only; and SCSI and NVME initiator. The configuration mode drives overall adapter configuration, offloads enabled, and resource splits. NVME support is only available on SLI-4 devices and newer fw. - Implements the following based on configuration mode: - Exchange resources are split by protocol; Obviously, if only 1 mode, then no split occurs. Default is 50/50. module attribute allows tuning. - Pools and config parameters are separated per-protocol - Each protocol has it's own set of queues, but share interrupt vectors. SCSI: SLI3 devices have few queues and the original style of queue allocation remains. SLI4 devices piggy back on an "io-channel" concept that eventually needs to merge with scsi-mq/blk-mq support (it is underway). For now, the paradigm continues as it existed prior. io channel allocates N msix and N WQs (N=4 default) and either round robins or uses cpu # modulo N for scheduling. A bunch of module parameters allow the configuration to be tuned. NVME (initiator): Allocates an msix per cpu (or whatever pci_alloc_irq_vectors gets) Allocates a WQ per cpu, and maps the WQs to msix on a WQ # modulo msix vector count basis. Module parameters exist to cap/control the config if desired. - Each protocol has its own buffer and dma pools. I apologize for the size of the patch. Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com> Signed-off-by: James Smart <james.smart@broadcom.com> ---- Reviewed-by: Hannes Reinecke <hare@suse.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2017-02-12 21:52:30 +00:00
#include <scsi/scsi.h>
#include <scsi/scsi_device.h>
#include <scsi/scsi_transport_fc.h>
scsi: lpfc: NVME Initiator: Base modifications NVME Initiator: Base modifications This patch adds base modifications for NVME initiator support. The base modifications consist of: - Formal split of SLI3 rings from SLI-4 WQs (sometimes referred to as rings as well) as implementation now widely varies between the two. - Addition of configuration modes: SCSI initiator only; NVME initiator only; NVME target only; and SCSI and NVME initiator. The configuration mode drives overall adapter configuration, offloads enabled, and resource splits. NVME support is only available on SLI-4 devices and newer fw. - Implements the following based on configuration mode: - Exchange resources are split by protocol; Obviously, if only 1 mode, then no split occurs. Default is 50/50. module attribute allows tuning. - Pools and config parameters are separated per-protocol - Each protocol has it's own set of queues, but share interrupt vectors. SCSI: SLI3 devices have few queues and the original style of queue allocation remains. SLI4 devices piggy back on an "io-channel" concept that eventually needs to merge with scsi-mq/blk-mq support (it is underway). For now, the paradigm continues as it existed prior. io channel allocates N msix and N WQs (N=4 default) and either round robins or uses cpu # modulo N for scheduling. A bunch of module parameters allow the configuration to be tuned. NVME (initiator): Allocates an msix per cpu (or whatever pci_alloc_irq_vectors gets) Allocates a WQ per cpu, and maps the WQs to msix on a WQ # modulo msix vector count basis. Module parameters exist to cap/control the config if desired. - Each protocol has its own buffer and dma pools. I apologize for the size of the patch. Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com> Signed-off-by: James Smart <james.smart@broadcom.com> ---- Reviewed-by: Hannes Reinecke <hare@suse.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2017-02-12 21:52:30 +00:00
#include <scsi/fc/fc_fs.h>
#include "lpfc_hw4.h"
#include "lpfc_hw.h"
#include "lpfc_sli.h"
#include "lpfc_sli4.h"
#include "lpfc_nl.h"
#include "lpfc_disc.h"
#include "lpfc.h"
scsi: lpfc: NVME Initiator: Base modifications NVME Initiator: Base modifications This patch adds base modifications for NVME initiator support. The base modifications consist of: - Formal split of SLI3 rings from SLI-4 WQs (sometimes referred to as rings as well) as implementation now widely varies between the two. - Addition of configuration modes: SCSI initiator only; NVME initiator only; NVME target only; and SCSI and NVME initiator. The configuration mode drives overall adapter configuration, offloads enabled, and resource splits. NVME support is only available on SLI-4 devices and newer fw. - Implements the following based on configuration mode: - Exchange resources are split by protocol; Obviously, if only 1 mode, then no split occurs. Default is 50/50. module attribute allows tuning. - Pools and config parameters are separated per-protocol - Each protocol has it's own set of queues, but share interrupt vectors. SCSI: SLI3 devices have few queues and the original style of queue allocation remains. SLI4 devices piggy back on an "io-channel" concept that eventually needs to merge with scsi-mq/blk-mq support (it is underway). For now, the paradigm continues as it existed prior. io channel allocates N msix and N WQs (N=4 default) and either round robins or uses cpu # modulo N for scheduling. A bunch of module parameters allow the configuration to be tuned. NVME (initiator): Allocates an msix per cpu (or whatever pci_alloc_irq_vectors gets) Allocates a WQ per cpu, and maps the WQs to msix on a WQ # modulo msix vector count basis. Module parameters exist to cap/control the config if desired. - Each protocol has its own buffer and dma pools. I apologize for the size of the patch. Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com> Signed-off-by: James Smart <james.smart@broadcom.com> ---- Reviewed-by: Hannes Reinecke <hare@suse.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2017-02-12 21:52:30 +00:00
#include "lpfc_scsi.h"
#include "lpfc_crtn.h"
#include "lpfc_logmsg.h"
#define LPFC_MBUF_POOL_SIZE 64 /* max elements in MBUF safety pool */
#define LPFC_MEM_POOL_SIZE 64 /* max elem in non-DMA safety pool */
#define LPFC_DEVICE_DATA_POOL_SIZE 64 /* max elements in device data pool */
#define LPFC_RRQ_POOL_SIZE 256 /* max elements in non-DMA pool */
scsi: lpfc: Fix scheduling call while in softirq context in lpfc_unreg_rpi The following call trace was seen during HBA reset testing: BUG: scheduling while atomic: swapper/2/0/0x10000100 ... Call Trace: dump_stack+0x19/0x1b __schedule_bug+0x64/0x72 __schedule+0x782/0x840 __cond_resched+0x26/0x30 _cond_resched+0x3a/0x50 mempool_alloc+0xa0/0x170 lpfc_unreg_rpi+0x151/0x630 [lpfc] lpfc_sli_abts_recover_port+0x171/0x190 [lpfc] lpfc_sli4_abts_err_handler+0xb2/0x1f0 [lpfc] lpfc_sli4_io_xri_aborted+0x256/0x300 [lpfc] lpfc_sli4_sp_handle_abort_xri_wcqe.isra.51+0xa3/0x190 [lpfc] lpfc_sli4_fp_handle_cqe+0x89/0x4d0 [lpfc] __lpfc_sli4_process_cq+0xdb/0x2e0 [lpfc] __lpfc_sli4_hba_process_cq+0x41/0x100 [lpfc] lpfc_cq_poll_hdler+0x1a/0x30 [lpfc] irq_poll_softirq+0xc7/0x100 __do_softirq+0xf5/0x280 call_softirq+0x1c/0x30 do_softirq+0x65/0xa0 irq_exit+0x105/0x110 do_IRQ+0x56/0xf0 common_interrupt+0x16a/0x16a With the conversion to blk_io_poll for better interrupt latency in normal cases, it introduced this code path, executed when I/O aborts or logouts are seen, which attempts to allocate memory for a mailbox command to be issued. The allocation is GFP_KERNEL, thus it could attempt to sleep. Fix by creating a work element that performs the event handling for the remote port. This will have the mailbox commands and other items performed in the work element, not the irq. A much better method as the "irq" routine does not stall while performing all this deep handling code. Ensure that allocation failures are handled and send LOGO on failure. Additionally, enlarge the mailbox memory pool to reduce the possibility of additional allocation in this path. Link: https://lore.kernel.org/r/20201020202719.54726-3-james.smart@broadcom.com Fixes: 317aeb83c92b ("scsi: lpfc: Add blk_io_poll support for latency improvment") Cc: <stable@vger.kernel.org> # v5.9+ Co-developed-by: Dick Kennedy <dick.kennedy@broadcom.com> Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com> Signed-off-by: James Smart <james.smart@broadcom.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2020-10-20 20:27:12 +00:00
#define LPFC_MBX_POOL_SIZE 256 /* max elements in MBX non-DMA pool */
int
lpfc_mem_alloc_active_rrq_pool_s4(struct lpfc_hba *phba) {
size_t bytes;
int max_xri = phba->sli4_hba.max_cfg_param.max_xri;
if (max_xri <= 0)
return -ENOMEM;
bytes = ((BITS_PER_LONG - 1 + max_xri) / BITS_PER_LONG) *
sizeof(unsigned long);
phba->cfg_rrq_xri_bitmap_sz = bytes;
phba->active_rrq_pool = mempool_create_kmalloc_pool(LPFC_MEM_POOL_SIZE,
bytes);
if (!phba->active_rrq_pool)
return -ENOMEM;
else
return 0;
}
/**
* lpfc_mem_alloc - create and allocate all PCI and memory pools
* @phba: HBA to allocate pools for
* @align: alignment requirement for blocks; must be a power of two
*
scsi: lpfc: Support dynamic unbounded SGL lists on G7 hardware. Typical SLI-4 hardware supports up to 2 4KB pages to be registered per XRI to contain the exchanges Scatter/Gather List. This caps the number of SGL elements that can be in the SGL. There are not extensions to extend the list out of the 2 pages. The G7 hardware adds a SGE type that allows the SGL to be vectored to a different scatter/gather list segment. And that segment can contain a SGE to go to another segment and so on. The initial segment must still be pre-registered for the XRI, but it can be a much smaller amount (256Bytes) as it can now be dynamically grown. This much smaller allocation can handle the SG list for most normal I/O, and the dynamic aspect allows it to support many MB's if needed. The implementation creates a pool which contains "segments" and which is initially sized to hold the initial small segment per xri. If an I/O requires additional segments, they are allocated from the pool. If the pool has no more segments, the pool is grown based on what is now needed. After the I/O completes, the additional segments are returned to the pool for use by other I/Os. Once allocated, the additional segments are not released under the assumption of "if needed once, it will be needed again". Pools are kept on a per-hardware queue basis, which is typically 1:1 per cpu, but may be shared by multiple cpus. The switch to the smaller initial allocation significantly reduces the memory footprint of the driver (which only grows if large ios are issued). Based on the several K of XRIs for the adapter, the 8KB->256B reduction can conserve 32MBs or more. It has been observed with per-cpu resource pools that allocating a resource on CPU A, may be put back on CPU B. While the get routines are distributed evenly, only a limited subset of CPUs may be handling the put routines. This can put a strain on the lpfc_put_cmd_rsp_buf_per_cpu routine because all the resources are being put on a limited subset of CPUs. Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com> Signed-off-by: James Smart <jsmart2021@gmail.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2019-08-14 23:57:09 +00:00
* Description: Creates and allocates PCI pools lpfc_mbuf_pool,
* lpfc_hrb_pool. Creates and allocates kmalloc-backed mempools
* for LPFC_MBOXQ_t and lpfc_nodelist. Also allocates the VPI bitmask.
*
* Notes: Not interrupt-safe. Must be called with no locks held. If any
* allocation fails, frees all successfully allocated memory before returning.
*
* Returns:
* 0 on success
* -ENOMEM on failure (if any memory allocations fail)
**/
int
lpfc_mem_alloc(struct lpfc_hba *phba, int align)
{
struct lpfc_dma_pool *pool = &phba->lpfc_mbuf_safety_pool;
int i;
phba->lpfc_mbuf_pool = dma_pool_create("lpfc_mbuf_pool", &phba->pcidev->dev,
LPFC_BPL_SIZE,
align, 0);
if (!phba->lpfc_mbuf_pool)
scsi: lpfc: Support dynamic unbounded SGL lists on G7 hardware. Typical SLI-4 hardware supports up to 2 4KB pages to be registered per XRI to contain the exchanges Scatter/Gather List. This caps the number of SGL elements that can be in the SGL. There are not extensions to extend the list out of the 2 pages. The G7 hardware adds a SGE type that allows the SGL to be vectored to a different scatter/gather list segment. And that segment can contain a SGE to go to another segment and so on. The initial segment must still be pre-registered for the XRI, but it can be a much smaller amount (256Bytes) as it can now be dynamically grown. This much smaller allocation can handle the SG list for most normal I/O, and the dynamic aspect allows it to support many MB's if needed. The implementation creates a pool which contains "segments" and which is initially sized to hold the initial small segment per xri. If an I/O requires additional segments, they are allocated from the pool. If the pool has no more segments, the pool is grown based on what is now needed. After the I/O completes, the additional segments are returned to the pool for use by other I/Os. Once allocated, the additional segments are not released under the assumption of "if needed once, it will be needed again". Pools are kept on a per-hardware queue basis, which is typically 1:1 per cpu, but may be shared by multiple cpus. The switch to the smaller initial allocation significantly reduces the memory footprint of the driver (which only grows if large ios are issued). Based on the several K of XRIs for the adapter, the 8KB->256B reduction can conserve 32MBs or more. It has been observed with per-cpu resource pools that allocating a resource on CPU A, may be put back on CPU B. While the get routines are distributed evenly, only a limited subset of CPUs may be handling the put routines. This can put a strain on the lpfc_put_cmd_rsp_buf_per_cpu routine because all the resources are being put on a limited subset of CPUs. Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com> Signed-off-by: James Smart <jsmart2021@gmail.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2019-08-14 23:57:09 +00:00
goto fail;
treewide: kmalloc() -> kmalloc_array() The kmalloc() function has a 2-factor argument form, kmalloc_array(). This patch replaces cases of: kmalloc(a * b, gfp) with: kmalloc_array(a * b, gfp) as well as handling cases of: kmalloc(a * b * c, gfp) with: kmalloc(array3_size(a, b, c), gfp) as it's slightly less ugly than: kmalloc_array(array_size(a, b), c, gfp) This does, however, attempt to ignore constant size factors like: kmalloc(4 * 1024, gfp) though any constants defined via macros get caught up in the conversion. Any factors with a sizeof() of "unsigned char", "char", and "u8" were dropped, since they're redundant. The tools/ directory was manually excluded, since it has its own implementation of kmalloc(). The Coccinelle script used for this was: // Fix redundant parens around sizeof(). @@ type TYPE; expression THING, E; @@ ( kmalloc( - (sizeof(TYPE)) * E + sizeof(TYPE) * E , ...) | kmalloc( - (sizeof(THING)) * E + sizeof(THING) * E , ...) ) // Drop single-byte sizes and redundant parens. @@ expression COUNT; typedef u8; typedef __u8; @@ ( kmalloc( - sizeof(u8) * (COUNT) + COUNT , ...) | kmalloc( - sizeof(__u8) * (COUNT) + COUNT , ...) | kmalloc( - sizeof(char) * (COUNT) + COUNT , ...) | kmalloc( - sizeof(unsigned char) * (COUNT) + COUNT , ...) | kmalloc( - sizeof(u8) * COUNT + COUNT , ...) | kmalloc( - sizeof(__u8) * COUNT + COUNT , ...) | kmalloc( - sizeof(char) * COUNT + COUNT , ...) | kmalloc( - sizeof(unsigned char) * COUNT + COUNT , ...) ) // 2-factor product with sizeof(type/expression) and identifier or constant. @@ type TYPE; expression THING; identifier COUNT_ID; constant COUNT_CONST; @@ ( - kmalloc + kmalloc_array ( - sizeof(TYPE) * (COUNT_ID) + COUNT_ID, sizeof(TYPE) , ...) | - kmalloc + kmalloc_array ( - sizeof(TYPE) * COUNT_ID + COUNT_ID, sizeof(TYPE) , ...) | - kmalloc + kmalloc_array ( - sizeof(TYPE) * (COUNT_CONST) + COUNT_CONST, sizeof(TYPE) , ...) | - kmalloc + kmalloc_array ( - sizeof(TYPE) * COUNT_CONST + COUNT_CONST, sizeof(TYPE) , ...) | - kmalloc + kmalloc_array ( - sizeof(THING) * (COUNT_ID) + COUNT_ID, sizeof(THING) , ...) | - kmalloc + kmalloc_array ( - sizeof(THING) * COUNT_ID + COUNT_ID, sizeof(THING) , ...) | - kmalloc + kmalloc_array ( - sizeof(THING) * (COUNT_CONST) + COUNT_CONST, sizeof(THING) , ...) | - kmalloc + kmalloc_array ( - sizeof(THING) * COUNT_CONST + COUNT_CONST, sizeof(THING) , ...) ) // 2-factor product, only identifiers. @@ identifier SIZE, COUNT; @@ - kmalloc + kmalloc_array ( - SIZE * COUNT + COUNT, SIZE , ...) // 3-factor product with 1 sizeof(type) or sizeof(expression), with // redundant parens removed. @@ expression THING; identifier STRIDE, COUNT; type TYPE; @@ ( kmalloc( - sizeof(TYPE) * (COUNT) * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kmalloc( - sizeof(TYPE) * (COUNT) * STRIDE + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kmalloc( - sizeof(TYPE) * COUNT * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kmalloc( - sizeof(TYPE) * COUNT * STRIDE + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kmalloc( - sizeof(THING) * (COUNT) * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | kmalloc( - sizeof(THING) * (COUNT) * STRIDE + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | kmalloc( - sizeof(THING) * COUNT * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | kmalloc( - sizeof(THING) * COUNT * STRIDE + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) ) // 3-factor product with 2 sizeof(variable), with redundant parens removed. @@ expression THING1, THING2; identifier COUNT; type TYPE1, TYPE2; @@ ( kmalloc( - sizeof(TYPE1) * sizeof(TYPE2) * COUNT + array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2)) , ...) | kmalloc( - sizeof(TYPE1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2)) , ...) | kmalloc( - sizeof(THING1) * sizeof(THING2) * COUNT + array3_size(COUNT, sizeof(THING1), sizeof(THING2)) , ...) | kmalloc( - sizeof(THING1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(THING1), sizeof(THING2)) , ...) | kmalloc( - sizeof(TYPE1) * sizeof(THING2) * COUNT + array3_size(COUNT, sizeof(TYPE1), sizeof(THING2)) , ...) | kmalloc( - sizeof(TYPE1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(TYPE1), sizeof(THING2)) , ...) ) // 3-factor product, only identifiers, with redundant parens removed. @@ identifier STRIDE, SIZE, COUNT; @@ ( kmalloc( - (COUNT) * STRIDE * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | kmalloc( - COUNT * (STRIDE) * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | kmalloc( - COUNT * STRIDE * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kmalloc( - (COUNT) * (STRIDE) * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | kmalloc( - COUNT * (STRIDE) * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kmalloc( - (COUNT) * STRIDE * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kmalloc( - (COUNT) * (STRIDE) * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kmalloc( - COUNT * STRIDE * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) ) // Any remaining multi-factor products, first at least 3-factor products, // when they're not all constants... @@ expression E1, E2, E3; constant C1, C2, C3; @@ ( kmalloc(C1 * C2 * C3, ...) | kmalloc( - (E1) * E2 * E3 + array3_size(E1, E2, E3) , ...) | kmalloc( - (E1) * (E2) * E3 + array3_size(E1, E2, E3) , ...) | kmalloc( - (E1) * (E2) * (E3) + array3_size(E1, E2, E3) , ...) | kmalloc( - E1 * E2 * E3 + array3_size(E1, E2, E3) , ...) ) // And then all remaining 2 factors products when they're not all constants, // keeping sizeof() as the second factor argument. @@ expression THING, E1, E2; type TYPE; constant C1, C2, C3; @@ ( kmalloc(sizeof(THING) * C2, ...) | kmalloc(sizeof(TYPE) * C2, ...) | kmalloc(C1 * C2 * C3, ...) | kmalloc(C1 * C2, ...) | - kmalloc + kmalloc_array ( - sizeof(TYPE) * (E2) + E2, sizeof(TYPE) , ...) | - kmalloc + kmalloc_array ( - sizeof(TYPE) * E2 + E2, sizeof(TYPE) , ...) | - kmalloc + kmalloc_array ( - sizeof(THING) * (E2) + E2, sizeof(THING) , ...) | - kmalloc + kmalloc_array ( - sizeof(THING) * E2 + E2, sizeof(THING) , ...) | - kmalloc + kmalloc_array ( - (E1) * E2 + E1, E2 , ...) | - kmalloc + kmalloc_array ( - (E1) * (E2) + E1, E2 , ...) | - kmalloc + kmalloc_array ( - E1 * E2 + E1, E2 , ...) ) Signed-off-by: Kees Cook <keescook@chromium.org>
2018-06-12 20:55:00 +00:00
pool->elements = kmalloc_array(LPFC_MBUF_POOL_SIZE,
sizeof(struct lpfc_dmabuf),
GFP_KERNEL);
if (!pool->elements)
goto fail_free_lpfc_mbuf_pool;
pool->max_count = 0;
pool->current_count = 0;
for ( i = 0; i < LPFC_MBUF_POOL_SIZE; i++) {
pool->elements[i].virt = dma_pool_alloc(phba->lpfc_mbuf_pool,
GFP_KERNEL, &pool->elements[i].phys);
if (!pool->elements[i].virt)
goto fail_free_mbuf_pool;
pool->max_count++;
pool->current_count++;
}
scsi: lpfc: Fix scheduling call while in softirq context in lpfc_unreg_rpi The following call trace was seen during HBA reset testing: BUG: scheduling while atomic: swapper/2/0/0x10000100 ... Call Trace: dump_stack+0x19/0x1b __schedule_bug+0x64/0x72 __schedule+0x782/0x840 __cond_resched+0x26/0x30 _cond_resched+0x3a/0x50 mempool_alloc+0xa0/0x170 lpfc_unreg_rpi+0x151/0x630 [lpfc] lpfc_sli_abts_recover_port+0x171/0x190 [lpfc] lpfc_sli4_abts_err_handler+0xb2/0x1f0 [lpfc] lpfc_sli4_io_xri_aborted+0x256/0x300 [lpfc] lpfc_sli4_sp_handle_abort_xri_wcqe.isra.51+0xa3/0x190 [lpfc] lpfc_sli4_fp_handle_cqe+0x89/0x4d0 [lpfc] __lpfc_sli4_process_cq+0xdb/0x2e0 [lpfc] __lpfc_sli4_hba_process_cq+0x41/0x100 [lpfc] lpfc_cq_poll_hdler+0x1a/0x30 [lpfc] irq_poll_softirq+0xc7/0x100 __do_softirq+0xf5/0x280 call_softirq+0x1c/0x30 do_softirq+0x65/0xa0 irq_exit+0x105/0x110 do_IRQ+0x56/0xf0 common_interrupt+0x16a/0x16a With the conversion to blk_io_poll for better interrupt latency in normal cases, it introduced this code path, executed when I/O aborts or logouts are seen, which attempts to allocate memory for a mailbox command to be issued. The allocation is GFP_KERNEL, thus it could attempt to sleep. Fix by creating a work element that performs the event handling for the remote port. This will have the mailbox commands and other items performed in the work element, not the irq. A much better method as the "irq" routine does not stall while performing all this deep handling code. Ensure that allocation failures are handled and send LOGO on failure. Additionally, enlarge the mailbox memory pool to reduce the possibility of additional allocation in this path. Link: https://lore.kernel.org/r/20201020202719.54726-3-james.smart@broadcom.com Fixes: 317aeb83c92b ("scsi: lpfc: Add blk_io_poll support for latency improvment") Cc: <stable@vger.kernel.org> # v5.9+ Co-developed-by: Dick Kennedy <dick.kennedy@broadcom.com> Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com> Signed-off-by: James Smart <james.smart@broadcom.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2020-10-20 20:27:12 +00:00
phba->mbox_mem_pool = mempool_create_kmalloc_pool(LPFC_MBX_POOL_SIZE,
sizeof(LPFC_MBOXQ_t));
if (!phba->mbox_mem_pool)
goto fail_free_mbuf_pool;
phba->nlp_mem_pool = mempool_create_kmalloc_pool(LPFC_MEM_POOL_SIZE,
sizeof(struct lpfc_nodelist));
if (!phba->nlp_mem_pool)
goto fail_free_mbox_pool;
if (phba->sli_rev == LPFC_SLI_REV4) {
phba->rrq_pool =
mempool_create_kmalloc_pool(LPFC_RRQ_POOL_SIZE,
sizeof(struct lpfc_node_rrq));
if (!phba->rrq_pool)
goto fail_free_nlp_mem_pool;
phba->lpfc_hrb_pool = dma_pool_create("lpfc_hrb_pool",
&phba->pcidev->dev,
LPFC_HDR_BUF_SIZE, align, 0);
if (!phba->lpfc_hrb_pool)
goto fail_free_rrq_mem_pool;
phba->lpfc_drb_pool = dma_pool_create("lpfc_drb_pool",
&phba->pcidev->dev,
LPFC_DATA_BUF_SIZE, align, 0);
if (!phba->lpfc_drb_pool)
goto fail_free_hrb_pool;
phba->lpfc_hbq_pool = NULL;
} else {
phba->lpfc_hbq_pool = dma_pool_create("lpfc_hbq_pool",
&phba->pcidev->dev, LPFC_BPL_SIZE, align, 0);
if (!phba->lpfc_hbq_pool)
goto fail_free_nlp_mem_pool;
phba->lpfc_hrb_pool = NULL;
phba->lpfc_drb_pool = NULL;
}
if (phba->cfg_EnableXLane) {
phba->device_data_mem_pool = mempool_create_kmalloc_pool(
LPFC_DEVICE_DATA_POOL_SIZE,
sizeof(struct lpfc_device_data));
if (!phba->device_data_mem_pool)
scsi: lpfc: NVME Initiator: Base modifications NVME Initiator: Base modifications This patch adds base modifications for NVME initiator support. The base modifications consist of: - Formal split of SLI3 rings from SLI-4 WQs (sometimes referred to as rings as well) as implementation now widely varies between the two. - Addition of configuration modes: SCSI initiator only; NVME initiator only; NVME target only; and SCSI and NVME initiator. The configuration mode drives overall adapter configuration, offloads enabled, and resource splits. NVME support is only available on SLI-4 devices and newer fw. - Implements the following based on configuration mode: - Exchange resources are split by protocol; Obviously, if only 1 mode, then no split occurs. Default is 50/50. module attribute allows tuning. - Pools and config parameters are separated per-protocol - Each protocol has it's own set of queues, but share interrupt vectors. SCSI: SLI3 devices have few queues and the original style of queue allocation remains. SLI4 devices piggy back on an "io-channel" concept that eventually needs to merge with scsi-mq/blk-mq support (it is underway). For now, the paradigm continues as it existed prior. io channel allocates N msix and N WQs (N=4 default) and either round robins or uses cpu # modulo N for scheduling. A bunch of module parameters allow the configuration to be tuned. NVME (initiator): Allocates an msix per cpu (or whatever pci_alloc_irq_vectors gets) Allocates a WQ per cpu, and maps the WQs to msix on a WQ # modulo msix vector count basis. Module parameters exist to cap/control the config if desired. - Each protocol has its own buffer and dma pools. I apologize for the size of the patch. Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com> Signed-off-by: James Smart <james.smart@broadcom.com> ---- Reviewed-by: Hannes Reinecke <hare@suse.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2017-02-12 21:52:30 +00:00
goto fail_free_drb_pool;
} else {
phba->device_data_mem_pool = NULL;
}
return 0;
scsi: lpfc: NVME Initiator: Base modifications NVME Initiator: Base modifications This patch adds base modifications for NVME initiator support. The base modifications consist of: - Formal split of SLI3 rings from SLI-4 WQs (sometimes referred to as rings as well) as implementation now widely varies between the two. - Addition of configuration modes: SCSI initiator only; NVME initiator only; NVME target only; and SCSI and NVME initiator. The configuration mode drives overall adapter configuration, offloads enabled, and resource splits. NVME support is only available on SLI-4 devices and newer fw. - Implements the following based on configuration mode: - Exchange resources are split by protocol; Obviously, if only 1 mode, then no split occurs. Default is 50/50. module attribute allows tuning. - Pools and config parameters are separated per-protocol - Each protocol has it's own set of queues, but share interrupt vectors. SCSI: SLI3 devices have few queues and the original style of queue allocation remains. SLI4 devices piggy back on an "io-channel" concept that eventually needs to merge with scsi-mq/blk-mq support (it is underway). For now, the paradigm continues as it existed prior. io channel allocates N msix and N WQs (N=4 default) and either round robins or uses cpu # modulo N for scheduling. A bunch of module parameters allow the configuration to be tuned. NVME (initiator): Allocates an msix per cpu (or whatever pci_alloc_irq_vectors gets) Allocates a WQ per cpu, and maps the WQs to msix on a WQ # modulo msix vector count basis. Module parameters exist to cap/control the config if desired. - Each protocol has its own buffer and dma pools. I apologize for the size of the patch. Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com> Signed-off-by: James Smart <james.smart@broadcom.com> ---- Reviewed-by: Hannes Reinecke <hare@suse.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2017-02-12 21:52:30 +00:00
fail_free_drb_pool:
dma_pool_destroy(phba->lpfc_drb_pool);
scsi: lpfc: NVME Initiator: Base modifications NVME Initiator: Base modifications This patch adds base modifications for NVME initiator support. The base modifications consist of: - Formal split of SLI3 rings from SLI-4 WQs (sometimes referred to as rings as well) as implementation now widely varies between the two. - Addition of configuration modes: SCSI initiator only; NVME initiator only; NVME target only; and SCSI and NVME initiator. The configuration mode drives overall adapter configuration, offloads enabled, and resource splits. NVME support is only available on SLI-4 devices and newer fw. - Implements the following based on configuration mode: - Exchange resources are split by protocol; Obviously, if only 1 mode, then no split occurs. Default is 50/50. module attribute allows tuning. - Pools and config parameters are separated per-protocol - Each protocol has it's own set of queues, but share interrupt vectors. SCSI: SLI3 devices have few queues and the original style of queue allocation remains. SLI4 devices piggy back on an "io-channel" concept that eventually needs to merge with scsi-mq/blk-mq support (it is underway). For now, the paradigm continues as it existed prior. io channel allocates N msix and N WQs (N=4 default) and either round robins or uses cpu # modulo N for scheduling. A bunch of module parameters allow the configuration to be tuned. NVME (initiator): Allocates an msix per cpu (or whatever pci_alloc_irq_vectors gets) Allocates a WQ per cpu, and maps the WQs to msix on a WQ # modulo msix vector count basis. Module parameters exist to cap/control the config if desired. - Each protocol has its own buffer and dma pools. I apologize for the size of the patch. Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com> Signed-off-by: James Smart <james.smart@broadcom.com> ---- Reviewed-by: Hannes Reinecke <hare@suse.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2017-02-12 21:52:30 +00:00
phba->lpfc_drb_pool = NULL;
fail_free_hrb_pool:
dma_pool_destroy(phba->lpfc_hrb_pool);
phba->lpfc_hrb_pool = NULL;
fail_free_rrq_mem_pool:
mempool_destroy(phba->rrq_pool);
phba->rrq_pool = NULL;
fail_free_nlp_mem_pool:
mempool_destroy(phba->nlp_mem_pool);
phba->nlp_mem_pool = NULL;
fail_free_mbox_pool:
mempool_destroy(phba->mbox_mem_pool);
phba->mbox_mem_pool = NULL;
fail_free_mbuf_pool:
while (i--)
dma_pool_free(phba->lpfc_mbuf_pool, pool->elements[i].virt,
pool->elements[i].phys);
kfree(pool->elements);
fail_free_lpfc_mbuf_pool:
dma_pool_destroy(phba->lpfc_mbuf_pool);
phba->lpfc_mbuf_pool = NULL;
fail:
return -ENOMEM;
}
int
lpfc_nvmet_mem_alloc(struct lpfc_hba *phba)
{
phba->lpfc_nvmet_drb_pool =
dma_pool_create("lpfc_nvmet_drb_pool",
&phba->pcidev->dev, LPFC_NVMET_DATA_BUF_SIZE,
SGL_ALIGN_SZ, 0);
if (!phba->lpfc_nvmet_drb_pool) {
lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
"6024 Can't enable NVME Target - no memory\n");
return -ENOMEM;
}
return 0;
}
/**
* lpfc_mem_free - Frees memory allocated by lpfc_mem_alloc
* @phba: HBA to free memory for
*
* Description: Free the memory allocated by lpfc_mem_alloc routine. This
* routine is a the counterpart of lpfc_mem_alloc.
*
* Returns: None
**/
void
lpfc_mem_free(struct lpfc_hba *phba)
{
int i;
struct lpfc_dma_pool *pool = &phba->lpfc_mbuf_safety_pool;
struct lpfc_device_data *device_data;
/* Free HBQ pools */
lpfc_sli_hbqbuf_free_all(phba);
dma_pool_destroy(phba->lpfc_nvmet_drb_pool);
phba->lpfc_nvmet_drb_pool = NULL;
dma_pool_destroy(phba->lpfc_drb_pool);
phba->lpfc_drb_pool = NULL;
dma_pool_destroy(phba->lpfc_hrb_pool);
phba->lpfc_hrb_pool = NULL;
dma_pool_destroy(phba->lpfc_hbq_pool);
phba->lpfc_hbq_pool = NULL;
mempool_destroy(phba->rrq_pool);
phba->rrq_pool = NULL;
/* Free NLP memory pool */
mempool_destroy(phba->nlp_mem_pool);
phba->nlp_mem_pool = NULL;
if (phba->sli_rev == LPFC_SLI_REV4 && phba->active_rrq_pool) {
mempool_destroy(phba->active_rrq_pool);
phba->active_rrq_pool = NULL;
}
/* Free mbox memory pool */
mempool_destroy(phba->mbox_mem_pool);
phba->mbox_mem_pool = NULL;
/* Free MBUF memory pool */
for (i = 0; i < pool->current_count; i++)
dma_pool_free(phba->lpfc_mbuf_pool, pool->elements[i].virt,
pool->elements[i].phys);
kfree(pool->elements);
dma_pool_destroy(phba->lpfc_mbuf_pool);
phba->lpfc_mbuf_pool = NULL;
/* Free Device Data memory pool */
if (phba->device_data_mem_pool) {
/* Ensure all objects have been returned to the pool */
while (!list_empty(&phba->luns)) {
device_data = list_first_entry(&phba->luns,
struct lpfc_device_data,
listentry);
list_del(&device_data->listentry);
mempool_free(device_data, phba->device_data_mem_pool);
}
mempool_destroy(phba->device_data_mem_pool);
}
phba->device_data_mem_pool = NULL;
return;
}
/**
* lpfc_mem_free_all - Frees all PCI and driver memory
* @phba: HBA to free memory for
*
* Description: Free memory from PCI and driver memory pools and also those
scsi: lpfc: NVME Initiator: Base modifications NVME Initiator: Base modifications This patch adds base modifications for NVME initiator support. The base modifications consist of: - Formal split of SLI3 rings from SLI-4 WQs (sometimes referred to as rings as well) as implementation now widely varies between the two. - Addition of configuration modes: SCSI initiator only; NVME initiator only; NVME target only; and SCSI and NVME initiator. The configuration mode drives overall adapter configuration, offloads enabled, and resource splits. NVME support is only available on SLI-4 devices and newer fw. - Implements the following based on configuration mode: - Exchange resources are split by protocol; Obviously, if only 1 mode, then no split occurs. Default is 50/50. module attribute allows tuning. - Pools and config parameters are separated per-protocol - Each protocol has it's own set of queues, but share interrupt vectors. SCSI: SLI3 devices have few queues and the original style of queue allocation remains. SLI4 devices piggy back on an "io-channel" concept that eventually needs to merge with scsi-mq/blk-mq support (it is underway). For now, the paradigm continues as it existed prior. io channel allocates N msix and N WQs (N=4 default) and either round robins or uses cpu # modulo N for scheduling. A bunch of module parameters allow the configuration to be tuned. NVME (initiator): Allocates an msix per cpu (or whatever pci_alloc_irq_vectors gets) Allocates a WQ per cpu, and maps the WQs to msix on a WQ # modulo msix vector count basis. Module parameters exist to cap/control the config if desired. - Each protocol has its own buffer and dma pools. I apologize for the size of the patch. Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com> Signed-off-by: James Smart <james.smart@broadcom.com> ---- Reviewed-by: Hannes Reinecke <hare@suse.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2017-02-12 21:52:30 +00:00
* used : lpfc_sg_dma_buf_pool, lpfc_mbuf_pool, lpfc_hrb_pool. Frees
* kmalloc-backed mempools for LPFC_MBOXQ_t and lpfc_nodelist. Also frees
* the VPI bitmask.
*
* Returns: None
**/
void
lpfc_mem_free_all(struct lpfc_hba *phba)
{
struct lpfc_sli *psli = &phba->sli;
LPFC_MBOXQ_t *mbox, *next_mbox;
struct lpfc_dmabuf *mp;
/* Free memory used in mailbox queue back to mailbox memory pool */
list_for_each_entry_safe(mbox, next_mbox, &psli->mboxq, list) {
mp = (struct lpfc_dmabuf *)(mbox->ctx_buf);
if (mp) {
lpfc_mbuf_free(phba, mp->virt, mp->phys);
kfree(mp);
}
list_del(&mbox->list);
mempool_free(mbox, phba->mbox_mem_pool);
}
/* Free memory used in mailbox cmpl list back to mailbox memory pool */
list_for_each_entry_safe(mbox, next_mbox, &psli->mboxq_cmpl, list) {
mp = (struct lpfc_dmabuf *)(mbox->ctx_buf);
if (mp) {
lpfc_mbuf_free(phba, mp->virt, mp->phys);
kfree(mp);
}
list_del(&mbox->list);
mempool_free(mbox, phba->mbox_mem_pool);
}
/* Free the active mailbox command back to the mailbox memory pool */
spin_lock_irq(&phba->hbalock);
psli->sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
spin_unlock_irq(&phba->hbalock);
if (psli->mbox_active) {
mbox = psli->mbox_active;
mp = (struct lpfc_dmabuf *)(mbox->ctx_buf);
if (mp) {
lpfc_mbuf_free(phba, mp->virt, mp->phys);
kfree(mp);
}
mempool_free(mbox, phba->mbox_mem_pool);
psli->mbox_active = NULL;
}
/* Free and destroy all the allocated memory pools */
lpfc_mem_free(phba);
scsi: lpfc: Support dynamic unbounded SGL lists on G7 hardware. Typical SLI-4 hardware supports up to 2 4KB pages to be registered per XRI to contain the exchanges Scatter/Gather List. This caps the number of SGL elements that can be in the SGL. There are not extensions to extend the list out of the 2 pages. The G7 hardware adds a SGE type that allows the SGL to be vectored to a different scatter/gather list segment. And that segment can contain a SGE to go to another segment and so on. The initial segment must still be pre-registered for the XRI, but it can be a much smaller amount (256Bytes) as it can now be dynamically grown. This much smaller allocation can handle the SG list for most normal I/O, and the dynamic aspect allows it to support many MB's if needed. The implementation creates a pool which contains "segments" and which is initially sized to hold the initial small segment per xri. If an I/O requires additional segments, they are allocated from the pool. If the pool has no more segments, the pool is grown based on what is now needed. After the I/O completes, the additional segments are returned to the pool for use by other I/Os. Once allocated, the additional segments are not released under the assumption of "if needed once, it will be needed again". Pools are kept on a per-hardware queue basis, which is typically 1:1 per cpu, but may be shared by multiple cpus. The switch to the smaller initial allocation significantly reduces the memory footprint of the driver (which only grows if large ios are issued). Based on the several K of XRIs for the adapter, the 8KB->256B reduction can conserve 32MBs or more. It has been observed with per-cpu resource pools that allocating a resource on CPU A, may be put back on CPU B. While the get routines are distributed evenly, only a limited subset of CPUs may be handling the put routines. This can put a strain on the lpfc_put_cmd_rsp_buf_per_cpu routine because all the resources are being put on a limited subset of CPUs. Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com> Signed-off-by: James Smart <jsmart2021@gmail.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2019-08-14 23:57:09 +00:00
/* Free DMA buffer memory pool */
dma_pool_destroy(phba->lpfc_sg_dma_buf_pool);
phba->lpfc_sg_dma_buf_pool = NULL;
dma_pool_destroy(phba->lpfc_cmd_rsp_buf_pool);
phba->lpfc_cmd_rsp_buf_pool = NULL;
/* Free the iocb lookup array */
kfree(psli->iocbq_lookup);
psli->iocbq_lookup = NULL;
return;
}
/**
* lpfc_mbuf_alloc - Allocate an mbuf from the lpfc_mbuf_pool PCI pool
* @phba: HBA which owns the pool to allocate from
* @mem_flags: indicates if this is a priority (MEM_PRI) allocation
* @handle: used to return the DMA-mapped address of the mbuf
*
* Description: Allocates a DMA-mapped buffer from the lpfc_mbuf_pool PCI pool.
* Allocates from generic dma_pool_alloc function first and if that fails and
* mem_flags has MEM_PRI set (the only defined flag), returns an mbuf from the
* HBA's pool.
*
* Notes: Not interrupt-safe. Must be called with no locks held. Takes
* phba->hbalock.
*
* Returns:
* pointer to the allocated mbuf on success
* NULL on failure
**/
void *
lpfc_mbuf_alloc(struct lpfc_hba *phba, int mem_flags, dma_addr_t *handle)
{
struct lpfc_dma_pool *pool = &phba->lpfc_mbuf_safety_pool;
unsigned long iflags;
void *ret;
ret = dma_pool_alloc(phba->lpfc_mbuf_pool, GFP_KERNEL, handle);
spin_lock_irqsave(&phba->hbalock, iflags);
if (!ret && (mem_flags & MEM_PRI) && pool->current_count) {
pool->current_count--;
ret = pool->elements[pool->current_count].virt;
*handle = pool->elements[pool->current_count].phys;
}
spin_unlock_irqrestore(&phba->hbalock, iflags);
return ret;
}
/**
* __lpfc_mbuf_free - Free an mbuf from the lpfc_mbuf_pool PCI pool (locked)
* @phba: HBA which owns the pool to return to
* @virt: mbuf to free
* @dma: the DMA-mapped address of the lpfc_mbuf_pool to be freed
*
* Description: Returns an mbuf lpfc_mbuf_pool to the lpfc_mbuf_safety_pool if
* it is below its max_count, frees the mbuf otherwise.
*
* Notes: Must be called with phba->hbalock held to synchronize access to
* lpfc_mbuf_safety_pool.
*
* Returns: None
**/
void
__lpfc_mbuf_free(struct lpfc_hba * phba, void *virt, dma_addr_t dma)
{
struct lpfc_dma_pool *pool = &phba->lpfc_mbuf_safety_pool;
if (pool->current_count < pool->max_count) {
pool->elements[pool->current_count].virt = virt;
pool->elements[pool->current_count].phys = dma;
pool->current_count++;
} else {
dma_pool_free(phba->lpfc_mbuf_pool, virt, dma);
}
return;
}
/**
* lpfc_mbuf_free - Free an mbuf from the lpfc_mbuf_pool PCI pool (unlocked)
* @phba: HBA which owns the pool to return to
* @virt: mbuf to free
* @dma: the DMA-mapped address of the lpfc_mbuf_pool to be freed
*
* Description: Returns an mbuf lpfc_mbuf_pool to the lpfc_mbuf_safety_pool if
* it is below its max_count, frees the mbuf otherwise.
*
* Notes: Takes phba->hbalock. Can be called with or without other locks held.
*
* Returns: None
**/
void
lpfc_mbuf_free(struct lpfc_hba * phba, void *virt, dma_addr_t dma)
{
unsigned long iflags;
spin_lock_irqsave(&phba->hbalock, iflags);
__lpfc_mbuf_free(phba, virt, dma);
spin_unlock_irqrestore(&phba->hbalock, iflags);
return;
}
/**
* lpfc_nvmet_buf_alloc - Allocate an nvmet_buf from the
* lpfc_sg_dma_buf_pool PCI pool
* @phba: HBA which owns the pool to allocate from
* @mem_flags: indicates if this is a priority (MEM_PRI) allocation
* @handle: used to return the DMA-mapped address of the nvmet_buf
*
* Description: Allocates a DMA-mapped buffer from the lpfc_sg_dma_buf_pool
* PCI pool. Allocates from generic dma_pool_alloc function.
*
* Returns:
* pointer to the allocated nvmet_buf on success
* NULL on failure
**/
void *
lpfc_nvmet_buf_alloc(struct lpfc_hba *phba, int mem_flags, dma_addr_t *handle)
{
void *ret;
ret = dma_pool_alloc(phba->lpfc_sg_dma_buf_pool, GFP_KERNEL, handle);
return ret;
}
/**
* lpfc_nvmet_buf_free - Free an nvmet_buf from the lpfc_sg_dma_buf_pool
* PCI pool
* @phba: HBA which owns the pool to return to
* @virt: nvmet_buf to free
* @dma: the DMA-mapped address of the lpfc_sg_dma_buf_pool to be freed
*
* Returns: None
**/
void
lpfc_nvmet_buf_free(struct lpfc_hba *phba, void *virt, dma_addr_t dma)
{
dma_pool_free(phba->lpfc_sg_dma_buf_pool, virt, dma);
}
/**
* lpfc_els_hbq_alloc - Allocate an HBQ buffer
* @phba: HBA to allocate HBQ buffer for
*
* Description: Allocates a DMA-mapped HBQ buffer from the lpfc_hrb_pool PCI
* pool along a non-DMA-mapped container for it.
*
* Notes: Not interrupt-safe. Must be called with no locks held.
*
* Returns:
* pointer to HBQ on success
* NULL on failure
**/
struct hbq_dmabuf *
lpfc_els_hbq_alloc(struct lpfc_hba *phba)
{
struct hbq_dmabuf *hbqbp;
hbqbp = kzalloc(sizeof(struct hbq_dmabuf), GFP_KERNEL);
if (!hbqbp)
return NULL;
hbqbp->dbuf.virt = dma_pool_alloc(phba->lpfc_hbq_pool, GFP_KERNEL,
&hbqbp->dbuf.phys);
if (!hbqbp->dbuf.virt) {
kfree(hbqbp);
return NULL;
}
scsi: lpfc: NVME Initiator: Base modifications NVME Initiator: Base modifications This patch adds base modifications for NVME initiator support. The base modifications consist of: - Formal split of SLI3 rings from SLI-4 WQs (sometimes referred to as rings as well) as implementation now widely varies between the two. - Addition of configuration modes: SCSI initiator only; NVME initiator only; NVME target only; and SCSI and NVME initiator. The configuration mode drives overall adapter configuration, offloads enabled, and resource splits. NVME support is only available on SLI-4 devices and newer fw. - Implements the following based on configuration mode: - Exchange resources are split by protocol; Obviously, if only 1 mode, then no split occurs. Default is 50/50. module attribute allows tuning. - Pools and config parameters are separated per-protocol - Each protocol has it's own set of queues, but share interrupt vectors. SCSI: SLI3 devices have few queues and the original style of queue allocation remains. SLI4 devices piggy back on an "io-channel" concept that eventually needs to merge with scsi-mq/blk-mq support (it is underway). For now, the paradigm continues as it existed prior. io channel allocates N msix and N WQs (N=4 default) and either round robins or uses cpu # modulo N for scheduling. A bunch of module parameters allow the configuration to be tuned. NVME (initiator): Allocates an msix per cpu (or whatever pci_alloc_irq_vectors gets) Allocates a WQ per cpu, and maps the WQs to msix on a WQ # modulo msix vector count basis. Module parameters exist to cap/control the config if desired. - Each protocol has its own buffer and dma pools. I apologize for the size of the patch. Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com> Signed-off-by: James Smart <james.smart@broadcom.com> ---- Reviewed-by: Hannes Reinecke <hare@suse.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2017-02-12 21:52:30 +00:00
hbqbp->total_size = LPFC_BPL_SIZE;
return hbqbp;
}
/**
* lpfc_els_hbq_free - Frees an HBQ buffer allocated with lpfc_els_hbq_alloc
* @phba: HBA buffer was allocated for
* @hbqbp: HBQ container returned by lpfc_els_hbq_alloc
*
* Description: Frees both the container and the DMA-mapped buffer returned by
* lpfc_els_hbq_alloc.
*
* Notes: Can be called with or without locks held.
*
* Returns: None
**/
void
lpfc_els_hbq_free(struct lpfc_hba *phba, struct hbq_dmabuf *hbqbp)
{
dma_pool_free(phba->lpfc_hbq_pool, hbqbp->dbuf.virt, hbqbp->dbuf.phys);
kfree(hbqbp);
return;
}
/**
* lpfc_sli4_rb_alloc - Allocate an SLI4 Receive buffer
* @phba: HBA to allocate a receive buffer for
*
* Description: Allocates a DMA-mapped receive buffer from the lpfc_hrb_pool PCI
* pool along a non-DMA-mapped container for it.
*
* Notes: Not interrupt-safe. Must be called with no locks held.
*
* Returns:
* pointer to HBQ on success
* NULL on failure
**/
struct hbq_dmabuf *
lpfc_sli4_rb_alloc(struct lpfc_hba *phba)
{
struct hbq_dmabuf *dma_buf;
dma_buf = kzalloc(sizeof(struct hbq_dmabuf), GFP_KERNEL);
if (!dma_buf)
return NULL;
dma_buf->hbuf.virt = dma_pool_alloc(phba->lpfc_hrb_pool, GFP_KERNEL,
&dma_buf->hbuf.phys);
if (!dma_buf->hbuf.virt) {
kfree(dma_buf);
return NULL;
}
dma_buf->dbuf.virt = dma_pool_alloc(phba->lpfc_drb_pool, GFP_KERNEL,
&dma_buf->dbuf.phys);
if (!dma_buf->dbuf.virt) {
dma_pool_free(phba->lpfc_hrb_pool, dma_buf->hbuf.virt,
dma_buf->hbuf.phys);
kfree(dma_buf);
return NULL;
}
scsi: lpfc: NVME Initiator: Base modifications NVME Initiator: Base modifications This patch adds base modifications for NVME initiator support. The base modifications consist of: - Formal split of SLI3 rings from SLI-4 WQs (sometimes referred to as rings as well) as implementation now widely varies between the two. - Addition of configuration modes: SCSI initiator only; NVME initiator only; NVME target only; and SCSI and NVME initiator. The configuration mode drives overall adapter configuration, offloads enabled, and resource splits. NVME support is only available on SLI-4 devices and newer fw. - Implements the following based on configuration mode: - Exchange resources are split by protocol; Obviously, if only 1 mode, then no split occurs. Default is 50/50. module attribute allows tuning. - Pools and config parameters are separated per-protocol - Each protocol has it's own set of queues, but share interrupt vectors. SCSI: SLI3 devices have few queues and the original style of queue allocation remains. SLI4 devices piggy back on an "io-channel" concept that eventually needs to merge with scsi-mq/blk-mq support (it is underway). For now, the paradigm continues as it existed prior. io channel allocates N msix and N WQs (N=4 default) and either round robins or uses cpu # modulo N for scheduling. A bunch of module parameters allow the configuration to be tuned. NVME (initiator): Allocates an msix per cpu (or whatever pci_alloc_irq_vectors gets) Allocates a WQ per cpu, and maps the WQs to msix on a WQ # modulo msix vector count basis. Module parameters exist to cap/control the config if desired. - Each protocol has its own buffer and dma pools. I apologize for the size of the patch. Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com> Signed-off-by: James Smart <james.smart@broadcom.com> ---- Reviewed-by: Hannes Reinecke <hare@suse.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2017-02-12 21:52:30 +00:00
dma_buf->total_size = LPFC_DATA_BUF_SIZE;
return dma_buf;
}
/**
* lpfc_sli4_rb_free - Frees a receive buffer
* @phba: HBA buffer was allocated for
* @dmab: DMA Buffer container returned by lpfc_sli4_hbq_alloc
*
* Description: Frees both the container and the DMA-mapped buffers returned by
* lpfc_sli4_rb_alloc.
*
* Notes: Can be called with or without locks held.
*
* Returns: None
**/
void
lpfc_sli4_rb_free(struct lpfc_hba *phba, struct hbq_dmabuf *dmab)
{
dma_pool_free(phba->lpfc_hrb_pool, dmab->hbuf.virt, dmab->hbuf.phys);
dma_pool_free(phba->lpfc_drb_pool, dmab->dbuf.virt, dmab->dbuf.phys);
kfree(dmab);
}
/**
* lpfc_sli4_nvmet_alloc - Allocate an SLI4 Receive buffer
* @phba: HBA to allocate a receive buffer for
*
* Description: Allocates a DMA-mapped receive buffer from the lpfc_hrb_pool PCI
* pool along a non-DMA-mapped container for it.
*
* Returns:
* pointer to HBQ on success
* NULL on failure
**/
struct rqb_dmabuf *
lpfc_sli4_nvmet_alloc(struct lpfc_hba *phba)
{
struct rqb_dmabuf *dma_buf;
dma_buf = kzalloc(sizeof(*dma_buf), GFP_KERNEL);
if (!dma_buf)
return NULL;
dma_buf->hbuf.virt = dma_pool_alloc(phba->lpfc_hrb_pool, GFP_KERNEL,
&dma_buf->hbuf.phys);
if (!dma_buf->hbuf.virt) {
kfree(dma_buf);
return NULL;
}
dma_buf->dbuf.virt = dma_pool_alloc(phba->lpfc_nvmet_drb_pool,
GFP_KERNEL, &dma_buf->dbuf.phys);
if (!dma_buf->dbuf.virt) {
dma_pool_free(phba->lpfc_hrb_pool, dma_buf->hbuf.virt,
dma_buf->hbuf.phys);
kfree(dma_buf);
return NULL;
}
dma_buf->total_size = LPFC_NVMET_DATA_BUF_SIZE;
return dma_buf;
}
/**
* lpfc_sli4_nvmet_free - Frees a receive buffer
* @phba: HBA buffer was allocated for
* @dmab: DMA Buffer container returned by lpfc_sli4_rbq_alloc
*
* Description: Frees both the container and the DMA-mapped buffers returned by
* lpfc_sli4_nvmet_alloc.
*
* Notes: Can be called with or without locks held.
*
* Returns: None
**/
void
lpfc_sli4_nvmet_free(struct lpfc_hba *phba, struct rqb_dmabuf *dmab)
{
dma_pool_free(phba->lpfc_hrb_pool, dmab->hbuf.virt, dmab->hbuf.phys);
dma_pool_free(phba->lpfc_nvmet_drb_pool,
dmab->dbuf.virt, dmab->dbuf.phys);
kfree(dmab);
}
/**
* lpfc_in_buf_free - Free a DMA buffer
* @phba: HBA buffer is associated with
* @mp: Buffer to free
*
* Description: Frees the given DMA buffer in the appropriate way given if the
* HBA is running in SLI3 mode with HBQs enabled.
*
* Notes: Takes phba->hbalock. Can be called with or without other locks held.
*
* Returns: None
**/
void
lpfc_in_buf_free(struct lpfc_hba *phba, struct lpfc_dmabuf *mp)
{
struct hbq_dmabuf *hbq_entry;
unsigned long flags;
if (!mp)
return;
if (phba->sli3_options & LPFC_SLI3_HBQ_ENABLED) {
scsi: lpfc: NVME Initiator: Base modifications NVME Initiator: Base modifications This patch adds base modifications for NVME initiator support. The base modifications consist of: - Formal split of SLI3 rings from SLI-4 WQs (sometimes referred to as rings as well) as implementation now widely varies between the two. - Addition of configuration modes: SCSI initiator only; NVME initiator only; NVME target only; and SCSI and NVME initiator. The configuration mode drives overall adapter configuration, offloads enabled, and resource splits. NVME support is only available on SLI-4 devices and newer fw. - Implements the following based on configuration mode: - Exchange resources are split by protocol; Obviously, if only 1 mode, then no split occurs. Default is 50/50. module attribute allows tuning. - Pools and config parameters are separated per-protocol - Each protocol has it's own set of queues, but share interrupt vectors. SCSI: SLI3 devices have few queues and the original style of queue allocation remains. SLI4 devices piggy back on an "io-channel" concept that eventually needs to merge with scsi-mq/blk-mq support (it is underway). For now, the paradigm continues as it existed prior. io channel allocates N msix and N WQs (N=4 default) and either round robins or uses cpu # modulo N for scheduling. A bunch of module parameters allow the configuration to be tuned. NVME (initiator): Allocates an msix per cpu (or whatever pci_alloc_irq_vectors gets) Allocates a WQ per cpu, and maps the WQs to msix on a WQ # modulo msix vector count basis. Module parameters exist to cap/control the config if desired. - Each protocol has its own buffer and dma pools. I apologize for the size of the patch. Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com> Signed-off-by: James Smart <james.smart@broadcom.com> ---- Reviewed-by: Hannes Reinecke <hare@suse.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2017-02-12 21:52:30 +00:00
hbq_entry = container_of(mp, struct hbq_dmabuf, dbuf);
/* Check whether HBQ is still in use */
spin_lock_irqsave(&phba->hbalock, flags);
if (!phba->hbq_in_use) {
spin_unlock_irqrestore(&phba->hbalock, flags);
return;
}
list_del(&hbq_entry->dbuf.list);
if (hbq_entry->tag == -1) {
(phba->hbqs[LPFC_ELS_HBQ].hbq_free_buffer)
(phba, hbq_entry);
} else {
lpfc_sli_free_hbq(phba, hbq_entry);
}
spin_unlock_irqrestore(&phba->hbalock, flags);
} else {
lpfc_mbuf_free(phba, mp->virt, mp->phys);
kfree(mp);
}
return;
}
/**
* lpfc_rq_buf_free - Free a RQ DMA buffer
* @phba: HBA buffer is associated with
* @mp: Buffer to free
*
* Description: Frees the given DMA buffer in the appropriate way given by
* reposting it to its associated RQ so it can be reused.
*
* Notes: Takes phba->hbalock. Can be called with or without other locks held.
*
* Returns: None
**/
void
lpfc_rq_buf_free(struct lpfc_hba *phba, struct lpfc_dmabuf *mp)
{
struct lpfc_rqb *rqbp;
struct lpfc_rqe hrqe;
struct lpfc_rqe drqe;
struct rqb_dmabuf *rqb_entry;
unsigned long flags;
int rc;
if (!mp)
return;
rqb_entry = container_of(mp, struct rqb_dmabuf, hbuf);
rqbp = rqb_entry->hrq->rqbp;
spin_lock_irqsave(&phba->hbalock, flags);
list_del(&rqb_entry->hbuf.list);
hrqe.address_lo = putPaddrLow(rqb_entry->hbuf.phys);
hrqe.address_hi = putPaddrHigh(rqb_entry->hbuf.phys);
drqe.address_lo = putPaddrLow(rqb_entry->dbuf.phys);
drqe.address_hi = putPaddrHigh(rqb_entry->dbuf.phys);
rc = lpfc_sli4_rq_put(rqb_entry->hrq, rqb_entry->drq, &hrqe, &drqe);
if (rc < 0) {
lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
"6409 Cannot post to HRQ %d: %x %x %x "
"DRQ %x %x\n",
rqb_entry->hrq->queue_id,
rqb_entry->hrq->host_index,
rqb_entry->hrq->hba_index,
rqb_entry->hrq->entry_count,
rqb_entry->drq->host_index,
rqb_entry->drq->hba_index);
(rqbp->rqb_free_buffer)(phba, rqb_entry);
} else {
list_add_tail(&rqb_entry->hbuf.list, &rqbp->rqb_buffer_list);
rqbp->buffer_count++;
}
spin_unlock_irqrestore(&phba->hbalock, flags);
}