IB/hfi1: Fix bar0 mapping to use write combining

When the debugpat kernel boot flag is turned on the following
traces are printed:

[ 1884.793168] x86/PAT: Overlap at 0x90000000-0x92000000
[ 1884.803510] x86/PAT: reserve_memtype added [mem 0x91200000-0x9127ffff],
track uncached-minus, req write-combining, ret uncached-minus
[ 1884.818167] hfi1 0000:05:00.0: hfi1_0: WC Remapped RcvArray:
ffffc9000a980000

The ioremap_wc() clearly is not returning a write combining mapping due
to an overlap where the RcvArray is mapped in a uncached mapping prior
to creating the proposed write combining mapping.

The patch replaces the single base register for uncached CSRs that
used to overlap the RcvArray with two mappings.   One, kregbase1, from the
bar0 up to the RcvArray and another, kregbase2, from the end of the
RcvArray to the pio send buffer space.  A new dd field, base2_start,
is used to convert the zero-based offset in the CSR routines to the
correct kregbase1/kregbase2 mapping.  A single direct write of the
RcvArray CSRs is replaced with hfi1_put_tid() to insure correct access
using the new disjoint mapping.

Additionally, the kregend field is deleted since it is only ever written.

patdebug now shows the RcvArray as write combining:
[   35.688990] x86/PAT: reserve_memtype added [mem 0x91200000-0x9127ffff],
track write-combining, req write-combining, ret write-combining

To insulate from any potential issues with write combining, all
writeq are now flushed in hfi1_put_tid() and rcv_array_wc_fill().

Reviewed-by: Mitko Haralanov <mitko.haralanov@intel.com>
Reviewed-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
This commit is contained in:
Mike Marciniszyn 2017-07-24 07:45:31 -07:00 committed by Doug Ledford
parent c53df62c7a
commit cb51c5d2cd
8 changed files with 126 additions and 49 deletions

View File

@ -1297,25 +1297,71 @@ CNTR_ELEM(#name, \
CNTR_SYNTH, \ CNTR_SYNTH, \
access_ibp_##cntr) access_ibp_##cntr)
/**
* hfi_addr_from_offset - return addr for readq/writeq
* @dd - the dd device
* @offset - the offset of the CSR within bar0
*
* This routine selects the appropriate base address
* based on the indicated offset.
*/
static inline void __iomem *hfi1_addr_from_offset(
const struct hfi1_devdata *dd,
u32 offset)
{
if (offset >= dd->base2_start)
return dd->kregbase2 + (offset - dd->base2_start);
return dd->kregbase1 + offset;
}
/**
* read_csr - read CSR at the indicated offset
* @dd - the dd device
* @offset - the offset of the CSR within bar0
*
* Return: the value read or all FF's if there
* is no mapping
*/
u64 read_csr(const struct hfi1_devdata *dd, u32 offset) u64 read_csr(const struct hfi1_devdata *dd, u32 offset)
{ {
if (dd->flags & HFI1_PRESENT) { if (dd->flags & HFI1_PRESENT)
return readq((void __iomem *)dd->kregbase + offset); return readq(hfi1_addr_from_offset(dd, offset));
}
return -1; return -1;
} }
/**
* write_csr - write CSR at the indicated offset
* @dd - the dd device
* @offset - the offset of the CSR within bar0
* @value - value to write
*/
void write_csr(const struct hfi1_devdata *dd, u32 offset, u64 value) void write_csr(const struct hfi1_devdata *dd, u32 offset, u64 value)
{ {
if (dd->flags & HFI1_PRESENT) if (dd->flags & HFI1_PRESENT) {
writeq(value, (void __iomem *)dd->kregbase + offset); void __iomem *base = hfi1_addr_from_offset(dd, offset);
/* avoid write to RcvArray */
if (WARN_ON(offset >= RCV_ARRAY && offset < dd->base2_start))
return;
writeq(value, base);
}
} }
/**
* get_csr_addr - return te iomem address for offset
* @dd - the dd device
* @offset - the offset of the CSR within bar0
*
* Return: The iomem address to use in subsequent
* writeq/readq operations.
*/
void __iomem *get_csr_addr( void __iomem *get_csr_addr(
struct hfi1_devdata *dd, const struct hfi1_devdata *dd,
u32 offset) u32 offset)
{ {
return (void __iomem *)dd->kregbase + offset; if (dd->flags & HFI1_PRESENT)
return hfi1_addr_from_offset(dd, offset);
return NULL;
} }
static inline u64 read_write_csr(const struct hfi1_devdata *dd, u32 csr, static inline u64 read_write_csr(const struct hfi1_devdata *dd, u32 csr,
@ -9752,14 +9798,13 @@ void hfi1_put_tid(struct hfi1_devdata *dd, u32 index,
u32 type, unsigned long pa, u16 order) u32 type, unsigned long pa, u16 order)
{ {
u64 reg; u64 reg;
void __iomem *base = (dd->rcvarray_wc ? dd->rcvarray_wc :
(dd->kregbase + RCV_ARRAY));
if (!(dd->flags & HFI1_PRESENT)) if (!(dd->flags & HFI1_PRESENT))
goto done; goto done;
if (type == PT_INVALID) { if (type == PT_INVALID || type == PT_INVALID_FLUSH) {
pa = 0; pa = 0;
order = 0;
} else if (type > PT_INVALID) { } else if (type > PT_INVALID) {
dd_dev_err(dd, dd_dev_err(dd,
"unexpected receive array type %u for index %u, not handled\n", "unexpected receive array type %u for index %u, not handled\n",
@ -9773,13 +9818,14 @@ void hfi1_put_tid(struct hfi1_devdata *dd, u32 index,
| (u64)order << RCV_ARRAY_RT_BUF_SIZE_SHIFT | (u64)order << RCV_ARRAY_RT_BUF_SIZE_SHIFT
| ((pa >> RT_ADDR_SHIFT) & RCV_ARRAY_RT_ADDR_MASK) | ((pa >> RT_ADDR_SHIFT) & RCV_ARRAY_RT_ADDR_MASK)
<< RCV_ARRAY_RT_ADDR_SHIFT; << RCV_ARRAY_RT_ADDR_SHIFT;
trace_hfi1_write_rcvarray(base + (index * 8), reg); trace_hfi1_write_rcvarray(dd->rcvarray_wc + (index * 8), reg);
writeq(reg, base + (index * 8)); writeq(reg, dd->rcvarray_wc + (index * 8));
if (type == PT_EAGER) if (type == PT_EAGER || type == PT_INVALID_FLUSH || (index & 3) == 3)
/* /*
* Eager entries are written one-by-one so we have to push them * Eager entries are written and flushed
* after we write the entry. *
* Expected entries are flushed every 4 writes
*/ */
flush_wc(); flush_wc();
done: done:
@ -13411,8 +13457,7 @@ static void write_uninitialized_csrs_and_memories(struct hfi1_devdata *dd)
/* RcvArray */ /* RcvArray */
for (i = 0; i < dd->chip_rcv_array_count; i++) for (i = 0; i < dd->chip_rcv_array_count; i++)
write_csr(dd, RCV_ARRAY + (8 * i), hfi1_put_tid(dd, i, PT_INVALID_FLUSH, 0, 0);
RCV_ARRAY_RT_WRITE_ENABLE_SMASK);
/* RcvQPMapTable */ /* RcvQPMapTable */
for (i = 0; i < 32; i++) for (i = 0; i < 32; i++)

View File

@ -605,11 +605,11 @@ int read_lcb_csr(struct hfi1_devdata *dd, u32 offset, u64 *data);
int write_lcb_csr(struct hfi1_devdata *dd, u32 offset, u64 data); int write_lcb_csr(struct hfi1_devdata *dd, u32 offset, u64 data);
void __iomem *get_csr_addr( void __iomem *get_csr_addr(
struct hfi1_devdata *dd, const struct hfi1_devdata *dd,
u32 offset); u32 offset);
static inline void __iomem *get_kctxt_csr_addr( static inline void __iomem *get_kctxt_csr_addr(
struct hfi1_devdata *dd, const struct hfi1_devdata *dd,
int ctxt, int ctxt,
u32 offset0) u32 offset0)
{ {

View File

@ -195,7 +195,7 @@ int hfi1_count_active_units(void)
spin_lock_irqsave(&hfi1_devs_lock, flags); spin_lock_irqsave(&hfi1_devs_lock, flags);
list_for_each_entry(dd, &hfi1_dev_list, list) { list_for_each_entry(dd, &hfi1_dev_list, list) {
if (!(dd->flags & HFI1_PRESENT) || !dd->kregbase) if (!(dd->flags & HFI1_PRESENT) || !dd->kregbase1)
continue; continue;
for (pidx = 0; pidx < dd->num_pports; ++pidx) { for (pidx = 0; pidx < dd->num_pports; ++pidx) {
ppd = dd->pport + pidx; ppd = dd->pport + pidx;
@ -1282,7 +1282,7 @@ int hfi1_reset_device(int unit)
dd_dev_info(dd, "Reset on unit %u requested\n", unit); dd_dev_info(dd, "Reset on unit %u requested\n", unit);
if (!dd->kregbase || !(dd->flags & HFI1_PRESENT)) { if (!dd->kregbase1 || !(dd->flags & HFI1_PRESENT)) {
dd_dev_info(dd, dd_dev_info(dd,
"Invalid unit number %u or not initialized or not present\n", "Invalid unit number %u or not initialized or not present\n",
unit); unit);

View File

@ -137,8 +137,11 @@ static inline void rcv_array_wc_fill(struct hfi1_devdata *dd, u32 index)
* Doing the WC fill writes only makes sense if the device is * Doing the WC fill writes only makes sense if the device is
* present and the RcvArray has been mapped as WC memory. * present and the RcvArray has been mapped as WC memory.
*/ */
if ((dd->flags & HFI1_PRESENT) && dd->rcvarray_wc) if ((dd->flags & HFI1_PRESENT) && dd->rcvarray_wc) {
writeq(0, dd->rcvarray_wc + (index * 8)); writeq(0, dd->rcvarray_wc + (index * 8));
if ((index & 3) == 3)
flush_wc();
}
} }
static inline void tid_group_add_tail(struct tid_group *grp, static inline void tid_group_add_tail(struct tid_group *grp,

View File

@ -181,7 +181,7 @@ static int hfi1_file_open(struct inode *inode, struct file *fp)
struct hfi1_devdata, struct hfi1_devdata,
user_cdev); user_cdev);
if (!((dd->flags & HFI1_PRESENT) && dd->kregbase)) if (!((dd->flags & HFI1_PRESENT) && dd->kregbase1))
return -EINVAL; return -EINVAL;
if (!atomic_inc_not_zero(&dd->user_refcount)) if (!atomic_inc_not_zero(&dd->user_refcount))

View File

@ -867,12 +867,15 @@ struct hfi1_devdata {
struct device *diag_device; struct device *diag_device;
struct device *ui_device; struct device *ui_device;
/* mem-mapped pointer to base of chip regs */ /* first mapping up to RcvArray */
u8 __iomem *kregbase; u8 __iomem *kregbase1;
/* end of mem-mapped chip space excluding sendbuf and user regs */
u8 __iomem *kregend;
/* physical address of chip for io_remap, etc. */
resource_size_t physaddr; resource_size_t physaddr;
/* second uncached mapping from RcvArray to pio send buffers */
u8 __iomem *kregbase2;
/* for detecting offset above kregbase2 address */
u32 base2_start;
/* Per VL data. Enough for all VLs but not all elements are set/used. */ /* Per VL data. Enough for all VLs but not all elements are set/used. */
struct per_vl_data vld[PER_VL_SEND_CONTEXTS]; struct per_vl_data vld[PER_VL_SEND_CONTEXTS];
/* send context data */ /* send context data */
@ -1236,9 +1239,10 @@ static inline bool hfi1_vnic_is_rsm_full(struct hfi1_devdata *dd, int spare)
#define dc8051_ver_patch(a) ((a) & 0x0000ff) #define dc8051_ver_patch(a) ((a) & 0x0000ff)
/* f_put_tid types */ /* f_put_tid types */
#define PT_EXPECTED 0 #define PT_EXPECTED 0
#define PT_EAGER 1 #define PT_EAGER 1
#define PT_INVALID 2 #define PT_INVALID_FLUSH 2
#define PT_INVALID 3
struct tid_rb_node; struct tid_rb_node;
struct mmu_rb_node; struct mmu_rb_node;

View File

@ -180,31 +180,47 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev)
return -EINVAL; return -EINVAL;
} }
dd->kregbase = ioremap_nocache(addr, TXE_PIO_SEND); dd->kregbase1 = ioremap_nocache(addr, RCV_ARRAY);
if (!dd->kregbase) if (!dd->kregbase1) {
dd_dev_err(dd, "UC mapping of kregbase1 failed\n");
return -ENOMEM; return -ENOMEM;
}
dd_dev_info(dd, "UC base1: %p for %x\n", dd->kregbase1, RCV_ARRAY);
dd->chip_rcv_array_count = readq(dd->kregbase1 + RCV_ARRAY_CNT);
dd_dev_info(dd, "RcvArray count: %u\n", dd->chip_rcv_array_count);
dd->base2_start = RCV_ARRAY + dd->chip_rcv_array_count * 8;
dd->kregbase2 = ioremap_nocache(
addr + dd->base2_start,
TXE_PIO_SEND - dd->base2_start);
if (!dd->kregbase2) {
dd_dev_err(dd, "UC mapping of kregbase2 failed\n");
goto nomem;
}
dd_dev_info(dd, "UC base2: %p for %x\n", dd->kregbase2,
TXE_PIO_SEND - dd->base2_start);
dd->piobase = ioremap_wc(addr + TXE_PIO_SEND, TXE_PIO_SIZE); dd->piobase = ioremap_wc(addr + TXE_PIO_SEND, TXE_PIO_SIZE);
if (!dd->piobase) { if (!dd->piobase) {
iounmap(dd->kregbase); dd_dev_err(dd, "WC mapping of send buffers failed\n");
return -ENOMEM; goto nomem;
} }
dd_dev_info(dd, "WC piobase: %p\n for %x", dd->piobase, TXE_PIO_SIZE);
dd->flags |= HFI1_PRESENT; /* now register routines work */
dd->kregend = dd->kregbase + TXE_PIO_SEND;
dd->physaddr = addr; /* used for io_remap, etc. */ dd->physaddr = addr; /* used for io_remap, etc. */
/* /*
* Re-map the chip's RcvArray as write-combining to allow us * Map the chip's RcvArray as write-combining to allow us
* to write an entire cacheline worth of entries in one shot. * to write an entire cacheline worth of entries in one shot.
* If this re-map fails, just continue - the RcvArray programming
* function will handle both cases.
*/ */
dd->chip_rcv_array_count = read_csr(dd, RCV_ARRAY_CNT);
dd->rcvarray_wc = ioremap_wc(addr + RCV_ARRAY, dd->rcvarray_wc = ioremap_wc(addr + RCV_ARRAY,
dd->chip_rcv_array_count * 8); dd->chip_rcv_array_count * 8);
dd_dev_info(dd, "WC Remapped RcvArray: %p\n", dd->rcvarray_wc); if (!dd->rcvarray_wc) {
dd_dev_err(dd, "WC mapping of receive array failed\n");
goto nomem;
}
dd_dev_info(dd, "WC RcvArray: %p for %x\n",
dd->rcvarray_wc, dd->chip_rcv_array_count * 8);
/* /*
* Save BARs and command to rewrite after device reset. * Save BARs and command to rewrite after device reset.
*/ */
@ -253,10 +269,16 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev)
if (ret) if (ret)
goto read_error; goto read_error;
dd->flags |= HFI1_PRESENT; /* chip.c CSR routines now work */
return 0; return 0;
read_error: read_error:
dd_dev_err(dd, "Unable to read from PCI config\n"); dd_dev_err(dd, "Unable to read from PCI config\n");
goto bail_error;
nomem:
ret = -ENOMEM;
bail_error:
hfi1_pcie_ddcleanup(dd);
return ret; return ret;
} }
@ -267,15 +289,19 @@ read_error:
*/ */
void hfi1_pcie_ddcleanup(struct hfi1_devdata *dd) void hfi1_pcie_ddcleanup(struct hfi1_devdata *dd)
{ {
u64 __iomem *base = (void __iomem *)dd->kregbase;
dd->flags &= ~HFI1_PRESENT; dd->flags &= ~HFI1_PRESENT;
dd->kregbase = NULL; if (dd->kregbase1)
iounmap(base); iounmap(dd->kregbase1);
dd->kregbase1 = NULL;
if (dd->kregbase2)
iounmap(dd->kregbase2);
dd->kregbase2 = NULL;
if (dd->rcvarray_wc) if (dd->rcvarray_wc)
iounmap(dd->rcvarray_wc); iounmap(dd->rcvarray_wc);
dd->rcvarray_wc = NULL;
if (dd->piobase) if (dd->piobase)
iounmap(dd->piobase); iounmap(dd->piobase);
dd->piobase = NULL;
} }
/* return the PCIe link speed from the given link status */ /* return the PCIe link speed from the given link status */

View File

@ -814,12 +814,11 @@ static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node)
node->npages, node->mmu.addr, node->phys, node->npages, node->mmu.addr, node->phys,
node->dma_addr); node->dma_addr);
hfi1_put_tid(dd, node->rcventry, PT_INVALID, 0, 0);
/* /*
* Make sure device has seen the write before we unpin the * Make sure device has seen the write before we unpin the
* pages. * pages.
*/ */
flush_wc(); hfi1_put_tid(dd, node->rcventry, PT_INVALID_FLUSH, 0, 0);
pci_unmap_single(dd->pcidev, node->dma_addr, node->mmu.len, pci_unmap_single(dd->pcidev, node->dma_addr, node->mmu.len,
PCI_DMA_FROMDEVICE); PCI_DMA_FROMDEVICE);