f48ad614c1
The TODO list for the hfi1 driver was completed during 4.6. In addition other objections raised (which are far beyond what was in the TODO list) have been addressed as well. It is now time to remove the driver from staging and into the drivers/infiniband sub-tree. Reviewed-by: Jubin John <jubin.john@intel.com> Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com> Signed-off-by: Doug Ledford <dledford@redhat.com>
868 lines
22 KiB
C
868 lines
22 KiB
C
/*
|
|
* Copyright(c) 2015, 2016 Intel Corporation.
|
|
*
|
|
* This file is provided under a dual BSD/GPLv2 license. When using or
|
|
* redistributing this file, you may do so under either license.
|
|
*
|
|
* GPL LICENSE SUMMARY
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of version 2 of the GNU General Public License as
|
|
* published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope that it will be useful, but
|
|
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* General Public License for more details.
|
|
*
|
|
* BSD LICENSE
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
*
|
|
* - Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* - Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in
|
|
* the documentation and/or other materials provided with the
|
|
* distribution.
|
|
* - Neither the name of Intel Corporation nor the names of its
|
|
* contributors may be used to endorse or promote products derived
|
|
* from this software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*
|
|
*/
|
|
|
|
#include "hfi.h"
|
|
|
|
/* additive distance between non-SOP and SOP space */
|
|
#define SOP_DISTANCE (TXE_PIO_SIZE / 2)
|
|
#define PIO_BLOCK_MASK (PIO_BLOCK_SIZE - 1)
|
|
/* number of QUADWORDs in a block */
|
|
#define PIO_BLOCK_QWS (PIO_BLOCK_SIZE / sizeof(u64))
|
|
|
|
/**
|
|
* pio_copy - copy data block to MMIO space
|
|
* @pbuf: a number of blocks allocated within a PIO send context
|
|
* @pbc: PBC to send
|
|
* @from: source, must be 8 byte aligned
|
|
* @count: number of DWORD (32-bit) quantities to copy from source
|
|
*
|
|
* Copy data from source to PIO Send Buffer memory, 8 bytes at a time.
|
|
* Must always write full BLOCK_SIZE bytes blocks. The first block must
|
|
* be written to the corresponding SOP=1 address.
|
|
*
|
|
* Known:
|
|
* o pbuf->start always starts on a block boundary
|
|
* o pbuf can wrap only at a block boundary
|
|
*/
|
|
void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc,
|
|
const void *from, size_t count)
|
|
{
|
|
void __iomem *dest = pbuf->start + SOP_DISTANCE;
|
|
void __iomem *send = dest + PIO_BLOCK_SIZE;
|
|
void __iomem *dend; /* 8-byte data end */
|
|
|
|
/* write the PBC */
|
|
writeq(pbc, dest);
|
|
dest += sizeof(u64);
|
|
|
|
/* calculate where the QWORD data ends - in SOP=1 space */
|
|
dend = dest + ((count >> 1) * sizeof(u64));
|
|
|
|
if (dend < send) {
|
|
/*
|
|
* all QWORD data is within the SOP block, does *not*
|
|
* reach the end of the SOP block
|
|
*/
|
|
|
|
while (dest < dend) {
|
|
writeq(*(u64 *)from, dest);
|
|
from += sizeof(u64);
|
|
dest += sizeof(u64);
|
|
}
|
|
/*
|
|
* No boundary checks are needed here:
|
|
* 0. We're not on the SOP block boundary
|
|
* 1. The possible DWORD dangle will still be within
|
|
* the SOP block
|
|
* 2. We cannot wrap except on a block boundary.
|
|
*/
|
|
} else {
|
|
/* QWORD data extends _to_ or beyond the SOP block */
|
|
|
|
/* write 8-byte SOP chunk data */
|
|
while (dest < send) {
|
|
writeq(*(u64 *)from, dest);
|
|
from += sizeof(u64);
|
|
dest += sizeof(u64);
|
|
}
|
|
/* drop out of the SOP range */
|
|
dest -= SOP_DISTANCE;
|
|
dend -= SOP_DISTANCE;
|
|
|
|
/*
|
|
* If the wrap comes before or matches the data end,
|
|
* copy until until the wrap, then wrap.
|
|
*
|
|
* If the data ends at the end of the SOP above and
|
|
* the buffer wraps, then pbuf->end == dend == dest
|
|
* and nothing will get written, but we will wrap in
|
|
* case there is a dangling DWORD.
|
|
*/
|
|
if (pbuf->end <= dend) {
|
|
while (dest < pbuf->end) {
|
|
writeq(*(u64 *)from, dest);
|
|
from += sizeof(u64);
|
|
dest += sizeof(u64);
|
|
}
|
|
|
|
dest -= pbuf->size;
|
|
dend -= pbuf->size;
|
|
}
|
|
|
|
/* write 8-byte non-SOP, non-wrap chunk data */
|
|
while (dest < dend) {
|
|
writeq(*(u64 *)from, dest);
|
|
from += sizeof(u64);
|
|
dest += sizeof(u64);
|
|
}
|
|
}
|
|
/* at this point we have wrapped if we are going to wrap */
|
|
|
|
/* write dangling u32, if any */
|
|
if (count & 1) {
|
|
union mix val;
|
|
|
|
val.val64 = 0;
|
|
val.val32[0] = *(u32 *)from;
|
|
writeq(val.val64, dest);
|
|
dest += sizeof(u64);
|
|
}
|
|
/*
|
|
* fill in rest of block, no need to check pbuf->end
|
|
* as we only wrap on a block boundary
|
|
*/
|
|
while (((unsigned long)dest & PIO_BLOCK_MASK) != 0) {
|
|
writeq(0, dest);
|
|
dest += sizeof(u64);
|
|
}
|
|
|
|
/* finished with this buffer */
|
|
this_cpu_dec(*pbuf->sc->buffers_allocated);
|
|
preempt_enable();
|
|
}
|
|
|
|
/* USE_SHIFTS is faster in user-space tests on a Xeon X5570 @ 2.93GHz */
|
|
#define USE_SHIFTS 1
|
|
#ifdef USE_SHIFTS
|
|
/*
|
|
* Handle carry bytes using shifts and masks.
|
|
*
|
|
* NOTE: the value the unused portion of carry is expected to always be zero.
|
|
*/
|
|
|
|
/*
|
|
* "zero" shift - bit shift used to zero out upper bytes. Input is
|
|
* the count of LSB bytes to preserve.
|
|
*/
|
|
#define zshift(x) (8 * (8 - (x)))
|
|
|
|
/*
|
|
* "merge" shift - bit shift used to merge with carry bytes. Input is
|
|
* the LSB byte count to move beyond.
|
|
*/
|
|
#define mshift(x) (8 * (x))
|
|
|
|
/*
|
|
* Read nbytes bytes from "from" and return them in the LSB bytes
|
|
* of pbuf->carry. Other bytes are zeroed. Any previous value
|
|
* pbuf->carry is lost.
|
|
*
|
|
* NOTES:
|
|
* o do not read from from if nbytes is zero
|
|
* o from may _not_ be u64 aligned
|
|
* o nbytes must not span a QW boundary
|
|
*/
|
|
static inline void read_low_bytes(struct pio_buf *pbuf, const void *from,
|
|
unsigned int nbytes)
|
|
{
|
|
unsigned long off;
|
|
|
|
if (nbytes == 0) {
|
|
pbuf->carry.val64 = 0;
|
|
} else {
|
|
/* align our pointer */
|
|
off = (unsigned long)from & 0x7;
|
|
from = (void *)((unsigned long)from & ~0x7l);
|
|
pbuf->carry.val64 = ((*(u64 *)from)
|
|
<< zshift(nbytes + off))/* zero upper bytes */
|
|
>> zshift(nbytes); /* place at bottom */
|
|
}
|
|
pbuf->carry_bytes = nbytes;
|
|
}
|
|
|
|
/*
|
|
* Read nbytes bytes from "from" and put them at the next significant bytes
|
|
* of pbuf->carry. Unused bytes are zeroed. It is expected that the extra
|
|
* read does not overfill carry.
|
|
*
|
|
* NOTES:
|
|
* o from may _not_ be u64 aligned
|
|
* o nbytes may span a QW boundary
|
|
*/
|
|
static inline void read_extra_bytes(struct pio_buf *pbuf,
|
|
const void *from, unsigned int nbytes)
|
|
{
|
|
unsigned long off = (unsigned long)from & 0x7;
|
|
unsigned int room, xbytes;
|
|
|
|
/* align our pointer */
|
|
from = (void *)((unsigned long)from & ~0x7l);
|
|
|
|
/* check count first - don't read anything if count is zero */
|
|
while (nbytes) {
|
|
/* find the number of bytes in this u64 */
|
|
room = 8 - off; /* this u64 has room for this many bytes */
|
|
xbytes = min(room, nbytes);
|
|
|
|
/*
|
|
* shift down to zero lower bytes, shift up to zero upper
|
|
* bytes, shift back down to move into place
|
|
*/
|
|
pbuf->carry.val64 |= (((*(u64 *)from)
|
|
>> mshift(off))
|
|
<< zshift(xbytes))
|
|
>> zshift(xbytes + pbuf->carry_bytes);
|
|
off = 0;
|
|
pbuf->carry_bytes += xbytes;
|
|
nbytes -= xbytes;
|
|
from += sizeof(u64);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Zero extra bytes from the end of pbuf->carry.
|
|
*
|
|
* NOTES:
|
|
* o zbytes <= old_bytes
|
|
*/
|
|
static inline void zero_extra_bytes(struct pio_buf *pbuf, unsigned int zbytes)
|
|
{
|
|
unsigned int remaining;
|
|
|
|
if (zbytes == 0) /* nothing to do */
|
|
return;
|
|
|
|
remaining = pbuf->carry_bytes - zbytes; /* remaining bytes */
|
|
|
|
/* NOTE: zshift only guaranteed to work if remaining != 0 */
|
|
if (remaining)
|
|
pbuf->carry.val64 = (pbuf->carry.val64 << zshift(remaining))
|
|
>> zshift(remaining);
|
|
else
|
|
pbuf->carry.val64 = 0;
|
|
pbuf->carry_bytes = remaining;
|
|
}
|
|
|
|
/*
|
|
* Write a quad word using parts of pbuf->carry and the next 8 bytes of src.
|
|
* Put the unused part of the next 8 bytes of src into the LSB bytes of
|
|
* pbuf->carry with the upper bytes zeroed..
|
|
*
|
|
* NOTES:
|
|
* o result must keep unused bytes zeroed
|
|
* o src must be u64 aligned
|
|
*/
|
|
static inline void merge_write8(
|
|
struct pio_buf *pbuf,
|
|
void __iomem *dest,
|
|
const void *src)
|
|
{
|
|
u64 new, temp;
|
|
|
|
new = *(u64 *)src;
|
|
temp = pbuf->carry.val64 | (new << mshift(pbuf->carry_bytes));
|
|
writeq(temp, dest);
|
|
pbuf->carry.val64 = new >> zshift(pbuf->carry_bytes);
|
|
}
|
|
|
|
/*
|
|
* Write a quad word using all bytes of carry.
|
|
*/
|
|
static inline void carry8_write8(union mix carry, void __iomem *dest)
|
|
{
|
|
writeq(carry.val64, dest);
|
|
}
|
|
|
|
/*
|
|
* Write a quad word using all the valid bytes of carry. If carry
|
|
* has zero valid bytes, nothing is written.
|
|
* Returns 0 on nothing written, non-zero on quad word written.
|
|
*/
|
|
static inline int carry_write8(struct pio_buf *pbuf, void __iomem *dest)
|
|
{
|
|
if (pbuf->carry_bytes) {
|
|
/* unused bytes are always kept zeroed, so just write */
|
|
writeq(pbuf->carry.val64, dest);
|
|
return 1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
#else /* USE_SHIFTS */
|
|
/*
|
|
* Handle carry bytes using byte copies.
|
|
*
|
|
* NOTE: the value the unused portion of carry is left uninitialized.
|
|
*/
|
|
|
|
/*
|
|
* Jump copy - no-loop copy for < 8 bytes.
|
|
*/
|
|
static inline void jcopy(u8 *dest, const u8 *src, u32 n)
|
|
{
|
|
switch (n) {
|
|
case 7:
|
|
*dest++ = *src++;
|
|
case 6:
|
|
*dest++ = *src++;
|
|
case 5:
|
|
*dest++ = *src++;
|
|
case 4:
|
|
*dest++ = *src++;
|
|
case 3:
|
|
*dest++ = *src++;
|
|
case 2:
|
|
*dest++ = *src++;
|
|
case 1:
|
|
*dest++ = *src++;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Read nbytes from "from" and and place them in the low bytes
|
|
* of pbuf->carry. Other bytes are left as-is. Any previous
|
|
* value in pbuf->carry is lost.
|
|
*
|
|
* NOTES:
|
|
* o do not read from from if nbytes is zero
|
|
* o from may _not_ be u64 aligned.
|
|
*/
|
|
static inline void read_low_bytes(struct pio_buf *pbuf, const void *from,
|
|
unsigned int nbytes)
|
|
{
|
|
jcopy(&pbuf->carry.val8[0], from, nbytes);
|
|
pbuf->carry_bytes = nbytes;
|
|
}
|
|
|
|
/*
|
|
* Read nbytes bytes from "from" and put them at the end of pbuf->carry.
|
|
* It is expected that the extra read does not overfill carry.
|
|
*
|
|
* NOTES:
|
|
* o from may _not_ be u64 aligned
|
|
* o nbytes may span a QW boundary
|
|
*/
|
|
static inline void read_extra_bytes(struct pio_buf *pbuf,
|
|
const void *from, unsigned int nbytes)
|
|
{
|
|
jcopy(&pbuf->carry.val8[pbuf->carry_bytes], from, nbytes);
|
|
pbuf->carry_bytes += nbytes;
|
|
}
|
|
|
|
/*
|
|
* Zero extra bytes from the end of pbuf->carry.
|
|
*
|
|
* We do not care about the value of unused bytes in carry, so just
|
|
* reduce the byte count.
|
|
*
|
|
* NOTES:
|
|
* o zbytes <= old_bytes
|
|
*/
|
|
static inline void zero_extra_bytes(struct pio_buf *pbuf, unsigned int zbytes)
|
|
{
|
|
pbuf->carry_bytes -= zbytes;
|
|
}
|
|
|
|
/*
|
|
* Write a quad word using parts of pbuf->carry and the next 8 bytes of src.
|
|
* Put the unused part of the next 8 bytes of src into the low bytes of
|
|
* pbuf->carry.
|
|
*/
|
|
static inline void merge_write8(
|
|
struct pio_buf *pbuf,
|
|
void *dest,
|
|
const void *src)
|
|
{
|
|
u32 remainder = 8 - pbuf->carry_bytes;
|
|
|
|
jcopy(&pbuf->carry.val8[pbuf->carry_bytes], src, remainder);
|
|
writeq(pbuf->carry.val64, dest);
|
|
jcopy(&pbuf->carry.val8[0], src + remainder, pbuf->carry_bytes);
|
|
}
|
|
|
|
/*
|
|
* Write a quad word using all bytes of carry.
|
|
*/
|
|
static inline void carry8_write8(union mix carry, void *dest)
|
|
{
|
|
writeq(carry.val64, dest);
|
|
}
|
|
|
|
/*
|
|
* Write a quad word using all the valid bytes of carry. If carry
|
|
* has zero valid bytes, nothing is written.
|
|
* Returns 0 on nothing written, non-zero on quad word written.
|
|
*/
|
|
static inline int carry_write8(struct pio_buf *pbuf, void *dest)
|
|
{
|
|
if (pbuf->carry_bytes) {
|
|
u64 zero = 0;
|
|
|
|
jcopy(&pbuf->carry.val8[pbuf->carry_bytes], (u8 *)&zero,
|
|
8 - pbuf->carry_bytes);
|
|
writeq(pbuf->carry.val64, dest);
|
|
return 1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
#endif /* USE_SHIFTS */
|
|
|
|
/*
|
|
* Segmented PIO Copy - start
|
|
*
|
|
* Start a PIO copy.
|
|
*
|
|
* @pbuf: destination buffer
|
|
* @pbc: the PBC for the PIO buffer
|
|
* @from: data source, QWORD aligned
|
|
* @nbytes: bytes to copy
|
|
*/
|
|
void seg_pio_copy_start(struct pio_buf *pbuf, u64 pbc,
|
|
const void *from, size_t nbytes)
|
|
{
|
|
void __iomem *dest = pbuf->start + SOP_DISTANCE;
|
|
void __iomem *send = dest + PIO_BLOCK_SIZE;
|
|
void __iomem *dend; /* 8-byte data end */
|
|
|
|
writeq(pbc, dest);
|
|
dest += sizeof(u64);
|
|
|
|
/* calculate where the QWORD data ends - in SOP=1 space */
|
|
dend = dest + ((nbytes >> 3) * sizeof(u64));
|
|
|
|
if (dend < send) {
|
|
/*
|
|
* all QWORD data is within the SOP block, does *not*
|
|
* reach the end of the SOP block
|
|
*/
|
|
|
|
while (dest < dend) {
|
|
writeq(*(u64 *)from, dest);
|
|
from += sizeof(u64);
|
|
dest += sizeof(u64);
|
|
}
|
|
/*
|
|
* No boundary checks are needed here:
|
|
* 0. We're not on the SOP block boundary
|
|
* 1. The possible DWORD dangle will still be within
|
|
* the SOP block
|
|
* 2. We cannot wrap except on a block boundary.
|
|
*/
|
|
} else {
|
|
/* QWORD data extends _to_ or beyond the SOP block */
|
|
|
|
/* write 8-byte SOP chunk data */
|
|
while (dest < send) {
|
|
writeq(*(u64 *)from, dest);
|
|
from += sizeof(u64);
|
|
dest += sizeof(u64);
|
|
}
|
|
/* drop out of the SOP range */
|
|
dest -= SOP_DISTANCE;
|
|
dend -= SOP_DISTANCE;
|
|
|
|
/*
|
|
* If the wrap comes before or matches the data end,
|
|
* copy until until the wrap, then wrap.
|
|
*
|
|
* If the data ends at the end of the SOP above and
|
|
* the buffer wraps, then pbuf->end == dend == dest
|
|
* and nothing will get written, but we will wrap in
|
|
* case there is a dangling DWORD.
|
|
*/
|
|
if (pbuf->end <= dend) {
|
|
while (dest < pbuf->end) {
|
|
writeq(*(u64 *)from, dest);
|
|
from += sizeof(u64);
|
|
dest += sizeof(u64);
|
|
}
|
|
|
|
dest -= pbuf->size;
|
|
dend -= pbuf->size;
|
|
}
|
|
|
|
/* write 8-byte non-SOP, non-wrap chunk data */
|
|
while (dest < dend) {
|
|
writeq(*(u64 *)from, dest);
|
|
from += sizeof(u64);
|
|
dest += sizeof(u64);
|
|
}
|
|
}
|
|
/* at this point we have wrapped if we are going to wrap */
|
|
|
|
/* ...but it doesn't matter as we're done writing */
|
|
|
|
/* save dangling bytes, if any */
|
|
read_low_bytes(pbuf, from, nbytes & 0x7);
|
|
|
|
pbuf->qw_written = 1 /*PBC*/ + (nbytes >> 3);
|
|
}
|
|
|
|
/*
|
|
* Mid copy helper, "mixed case" - source is 64-bit aligned but carry
|
|
* bytes are non-zero.
|
|
*
|
|
* Whole u64s must be written to the chip, so bytes must be manually merged.
|
|
*
|
|
* @pbuf: destination buffer
|
|
* @from: data source, is QWORD aligned.
|
|
* @nbytes: bytes to copy
|
|
*
|
|
* Must handle nbytes < 8.
|
|
*/
|
|
static void mid_copy_mix(struct pio_buf *pbuf, const void *from, size_t nbytes)
|
|
{
|
|
void __iomem *dest = pbuf->start + (pbuf->qw_written * sizeof(u64));
|
|
void __iomem *dend; /* 8-byte data end */
|
|
unsigned long qw_to_write = (pbuf->carry_bytes + nbytes) >> 3;
|
|
unsigned long bytes_left = (pbuf->carry_bytes + nbytes) & 0x7;
|
|
|
|
/* calculate 8-byte data end */
|
|
dend = dest + (qw_to_write * sizeof(u64));
|
|
|
|
if (pbuf->qw_written < PIO_BLOCK_QWS) {
|
|
/*
|
|
* Still within SOP block. We don't need to check for
|
|
* wrap because we are still in the first block and
|
|
* can only wrap on block boundaries.
|
|
*/
|
|
void __iomem *send; /* SOP end */
|
|
void __iomem *xend;
|
|
|
|
/*
|
|
* calculate the end of data or end of block, whichever
|
|
* comes first
|
|
*/
|
|
send = pbuf->start + PIO_BLOCK_SIZE;
|
|
xend = min(send, dend);
|
|
|
|
/* shift up to SOP=1 space */
|
|
dest += SOP_DISTANCE;
|
|
xend += SOP_DISTANCE;
|
|
|
|
/* write 8-byte chunk data */
|
|
while (dest < xend) {
|
|
merge_write8(pbuf, dest, from);
|
|
from += sizeof(u64);
|
|
dest += sizeof(u64);
|
|
}
|
|
|
|
/* shift down to SOP=0 space */
|
|
dest -= SOP_DISTANCE;
|
|
}
|
|
/*
|
|
* At this point dest could be (either, both, or neither):
|
|
* - at dend
|
|
* - at the wrap
|
|
*/
|
|
|
|
/*
|
|
* If the wrap comes before or matches the data end,
|
|
* copy until until the wrap, then wrap.
|
|
*
|
|
* If dest is at the wrap, we will fall into the if,
|
|
* not do the loop, when wrap.
|
|
*
|
|
* If the data ends at the end of the SOP above and
|
|
* the buffer wraps, then pbuf->end == dend == dest
|
|
* and nothing will get written.
|
|
*/
|
|
if (pbuf->end <= dend) {
|
|
while (dest < pbuf->end) {
|
|
merge_write8(pbuf, dest, from);
|
|
from += sizeof(u64);
|
|
dest += sizeof(u64);
|
|
}
|
|
|
|
dest -= pbuf->size;
|
|
dend -= pbuf->size;
|
|
}
|
|
|
|
/* write 8-byte non-SOP, non-wrap chunk data */
|
|
while (dest < dend) {
|
|
merge_write8(pbuf, dest, from);
|
|
from += sizeof(u64);
|
|
dest += sizeof(u64);
|
|
}
|
|
|
|
/* adjust carry */
|
|
if (pbuf->carry_bytes < bytes_left) {
|
|
/* need to read more */
|
|
read_extra_bytes(pbuf, from, bytes_left - pbuf->carry_bytes);
|
|
} else {
|
|
/* remove invalid bytes */
|
|
zero_extra_bytes(pbuf, pbuf->carry_bytes - bytes_left);
|
|
}
|
|
|
|
pbuf->qw_written += qw_to_write;
|
|
}
|
|
|
|
/*
|
|
* Mid copy helper, "straight case" - source pointer is 64-bit aligned
|
|
* with no carry bytes.
|
|
*
|
|
* @pbuf: destination buffer
|
|
* @from: data source, is QWORD aligned
|
|
* @nbytes: bytes to copy
|
|
*
|
|
* Must handle nbytes < 8.
|
|
*/
|
|
static void mid_copy_straight(struct pio_buf *pbuf,
|
|
const void *from, size_t nbytes)
|
|
{
|
|
void __iomem *dest = pbuf->start + (pbuf->qw_written * sizeof(u64));
|
|
void __iomem *dend; /* 8-byte data end */
|
|
|
|
/* calculate 8-byte data end */
|
|
dend = dest + ((nbytes >> 3) * sizeof(u64));
|
|
|
|
if (pbuf->qw_written < PIO_BLOCK_QWS) {
|
|
/*
|
|
* Still within SOP block. We don't need to check for
|
|
* wrap because we are still in the first block and
|
|
* can only wrap on block boundaries.
|
|
*/
|
|
void __iomem *send; /* SOP end */
|
|
void __iomem *xend;
|
|
|
|
/*
|
|
* calculate the end of data or end of block, whichever
|
|
* comes first
|
|
*/
|
|
send = pbuf->start + PIO_BLOCK_SIZE;
|
|
xend = min(send, dend);
|
|
|
|
/* shift up to SOP=1 space */
|
|
dest += SOP_DISTANCE;
|
|
xend += SOP_DISTANCE;
|
|
|
|
/* write 8-byte chunk data */
|
|
while (dest < xend) {
|
|
writeq(*(u64 *)from, dest);
|
|
from += sizeof(u64);
|
|
dest += sizeof(u64);
|
|
}
|
|
|
|
/* shift down to SOP=0 space */
|
|
dest -= SOP_DISTANCE;
|
|
}
|
|
/*
|
|
* At this point dest could be (either, both, or neither):
|
|
* - at dend
|
|
* - at the wrap
|
|
*/
|
|
|
|
/*
|
|
* If the wrap comes before or matches the data end,
|
|
* copy until until the wrap, then wrap.
|
|
*
|
|
* If dest is at the wrap, we will fall into the if,
|
|
* not do the loop, when wrap.
|
|
*
|
|
* If the data ends at the end of the SOP above and
|
|
* the buffer wraps, then pbuf->end == dend == dest
|
|
* and nothing will get written.
|
|
*/
|
|
if (pbuf->end <= dend) {
|
|
while (dest < pbuf->end) {
|
|
writeq(*(u64 *)from, dest);
|
|
from += sizeof(u64);
|
|
dest += sizeof(u64);
|
|
}
|
|
|
|
dest -= pbuf->size;
|
|
dend -= pbuf->size;
|
|
}
|
|
|
|
/* write 8-byte non-SOP, non-wrap chunk data */
|
|
while (dest < dend) {
|
|
writeq(*(u64 *)from, dest);
|
|
from += sizeof(u64);
|
|
dest += sizeof(u64);
|
|
}
|
|
|
|
/* we know carry_bytes was zero on entry to this routine */
|
|
read_low_bytes(pbuf, from, nbytes & 0x7);
|
|
|
|
pbuf->qw_written += nbytes >> 3;
|
|
}
|
|
|
|
/*
|
|
* Segmented PIO Copy - middle
|
|
*
|
|
* Must handle any aligned tail and any aligned source with any byte count.
|
|
*
|
|
* @pbuf: a number of blocks allocated within a PIO send context
|
|
* @from: data source
|
|
* @nbytes: number of bytes to copy
|
|
*/
|
|
void seg_pio_copy_mid(struct pio_buf *pbuf, const void *from, size_t nbytes)
|
|
{
|
|
unsigned long from_align = (unsigned long)from & 0x7;
|
|
|
|
if (pbuf->carry_bytes + nbytes < 8) {
|
|
/* not enough bytes to fill a QW */
|
|
read_extra_bytes(pbuf, from, nbytes);
|
|
return;
|
|
}
|
|
|
|
if (from_align) {
|
|
/* misaligned source pointer - align it */
|
|
unsigned long to_align;
|
|
|
|
/* bytes to read to align "from" */
|
|
to_align = 8 - from_align;
|
|
|
|
/*
|
|
* In the advance-to-alignment logic below, we do not need
|
|
* to check if we are using more than nbytes. This is because
|
|
* if we are here, we already know that carry+nbytes will
|
|
* fill at least one QW.
|
|
*/
|
|
if (pbuf->carry_bytes + to_align < 8) {
|
|
/* not enough align bytes to fill a QW */
|
|
read_extra_bytes(pbuf, from, to_align);
|
|
from += to_align;
|
|
nbytes -= to_align;
|
|
} else {
|
|
/* bytes to fill carry */
|
|
unsigned long to_fill = 8 - pbuf->carry_bytes;
|
|
/* bytes left over to be read */
|
|
unsigned long extra = to_align - to_fill;
|
|
void __iomem *dest;
|
|
|
|
/* fill carry... */
|
|
read_extra_bytes(pbuf, from, to_fill);
|
|
from += to_fill;
|
|
nbytes -= to_fill;
|
|
|
|
/* ...now write carry */
|
|
dest = pbuf->start + (pbuf->qw_written * sizeof(u64));
|
|
|
|
/*
|
|
* The two checks immediately below cannot both be
|
|
* true, hence the else. If we have wrapped, we
|
|
* cannot still be within the first block.
|
|
* Conversely, if we are still in the first block, we
|
|
* cannot have wrapped. We do the wrap check first
|
|
* as that is more likely.
|
|
*/
|
|
/* adjust if we've wrapped */
|
|
if (dest >= pbuf->end)
|
|
dest -= pbuf->size;
|
|
/* jump to SOP range if within the first block */
|
|
else if (pbuf->qw_written < PIO_BLOCK_QWS)
|
|
dest += SOP_DISTANCE;
|
|
|
|
carry8_write8(pbuf->carry, dest);
|
|
pbuf->qw_written++;
|
|
|
|
/* read any extra bytes to do final alignment */
|
|
/* this will overwrite anything in pbuf->carry */
|
|
read_low_bytes(pbuf, from, extra);
|
|
from += extra;
|
|
nbytes -= extra;
|
|
}
|
|
|
|
/* at this point, from is QW aligned */
|
|
}
|
|
|
|
if (pbuf->carry_bytes)
|
|
mid_copy_mix(pbuf, from, nbytes);
|
|
else
|
|
mid_copy_straight(pbuf, from, nbytes);
|
|
}
|
|
|
|
/*
|
|
* Segmented PIO Copy - end
|
|
*
|
|
* Write any remainder (in pbuf->carry) and finish writing the whole block.
|
|
*
|
|
* @pbuf: a number of blocks allocated within a PIO send context
|
|
*/
|
|
void seg_pio_copy_end(struct pio_buf *pbuf)
|
|
{
|
|
void __iomem *dest = pbuf->start + (pbuf->qw_written * sizeof(u64));
|
|
|
|
/*
|
|
* The two checks immediately below cannot both be true, hence the
|
|
* else. If we have wrapped, we cannot still be within the first
|
|
* block. Conversely, if we are still in the first block, we
|
|
* cannot have wrapped. We do the wrap check first as that is
|
|
* more likely.
|
|
*/
|
|
/* adjust if we have wrapped */
|
|
if (dest >= pbuf->end)
|
|
dest -= pbuf->size;
|
|
/* jump to the SOP range if within the first block */
|
|
else if (pbuf->qw_written < PIO_BLOCK_QWS)
|
|
dest += SOP_DISTANCE;
|
|
|
|
/* write final bytes, if any */
|
|
if (carry_write8(pbuf, dest)) {
|
|
dest += sizeof(u64);
|
|
/*
|
|
* NOTE: We do not need to recalculate whether dest needs
|
|
* SOP_DISTANCE or not.
|
|
*
|
|
* If we are in the first block and the dangle write
|
|
* keeps us in the same block, dest will need
|
|
* to retain SOP_DISTANCE in the loop below.
|
|
*
|
|
* If we are in the first block and the dangle write pushes
|
|
* us to the next block, then loop below will not run
|
|
* and dest is not used. Hence we do not need to update
|
|
* it.
|
|
*
|
|
* If we are past the first block, then SOP_DISTANCE
|
|
* was never added, so there is nothing to do.
|
|
*/
|
|
}
|
|
|
|
/* fill in rest of block */
|
|
while (((unsigned long)dest & PIO_BLOCK_MASK) != 0) {
|
|
writeq(0, dest);
|
|
dest += sizeof(u64);
|
|
}
|
|
|
|
/* finished with this buffer */
|
|
this_cpu_dec(*pbuf->sc->buffers_allocated);
|
|
preempt_enable();
|
|
}
|