linux/lib/raid6/altivec.uc

/* -*- linux-c -*- ------------------------------------------------------- *
 *
 *   Copyright 2002-2004 H. Peter Anvin - All Rights Reserved
 *
 *   This program is free software; you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, Inc., 53 Temple Place Ste 330,
 *   Boston MA 02111-1307, USA; either version 2 of the License, or
 *   (at your option) any later version; incorporated herein by reference.
 *
 * ----------------------------------------------------------------------- */

/*
 * raid6altivec$#.c
 *
 * $#-way unrolled portable integer math RAID-6 instruction set
 *
 * This file is postprocessed using unroll.awk
 *
 * <benh> hpa: in process,
 * you can just "steal" the vec unit with enable_kernel_altivec() (but
 * bracked this with preempt_disable/enable or in a lock)
 */

#include <linux/raid/pq.h>

#include <altivec.h>
#ifdef __KERNEL__
# include <asm/cputable.h>
# include <asm/switch_to.h>

/*
 * This is the C data type to use.  We use a vector of
 * signed char so vec_cmpgt() will generate the right
 * instruction.
 */

typedef vector signed char unative_t;

#define NBYTES(x) ((vector signed char) {x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x})
#define NSIZE	sizeof(unative_t)

/*
 * The SHLBYTE() operation shifts each byte left by 1, *not*
 * rolling over into the next byte
 */
static inline __attribute_const__ unative_t SHLBYTE(unative_t v)
{
	return vec_add(v,v);
}

/*
 * The MASK() operation returns 0xFF in any byte for which the high
 * bit is 1, 0x00 for any byte for which the high bit is 0.
 */
static inline __attribute_const__ unative_t MASK(unative_t v)
{
	unative_t zv = NBYTES(0);

	/* vec_cmpgt returns a vector bool char; thus the need for the cast */
	return (unative_t)vec_cmpgt(zv, v);
}


/* This is noinline to make damned sure that gcc doesn't move any of the
   Altivec code around the enable/disable code */
static void noinline
raid6_altivec$#_gen_syndrome_real(int disks, size_t bytes, void **ptrs)
{
	u8 **dptr = (u8 **)ptrs;
	u8 *p, *q;
	int d, z, z0;

	unative_t wd$$, wq$$, wp$$, w1$$, w2$$;
	unative_t x1d = NBYTES(0x1d);

	z0 = disks - 3;		/* Highest data disk */
	p = dptr[z0+1];		/* XOR parity */
	q = dptr[z0+2];		/* RS syndrome */

	for ( d = 0 ; d < bytes ; d += NSIZE*$# ) {
		wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE];
		for ( z = z0-1 ; z >= 0 ; z-- ) {
			wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
			wp$$ = vec_xor(wp$$, wd$$);
			w2$$ = MASK(wq$$);
			w1$$ = SHLBYTE(wq$$);
			w2$$ = vec_and(w2$$, x1d);
			w1$$ = vec_xor(w1$$, w2$$);
			wq$$ = vec_xor(w1$$, wd$$);
		}
		*(unative_t *)&p[d+NSIZE*$$] = wp$$;
		*(unative_t *)&q[d+NSIZE*$$] = wq$$;
	}
}

static void raid6_altivec$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
{
	preempt_disable();
	enable_kernel_altivec();

	raid6_altivec$#_gen_syndrome_real(disks, bytes, ptrs);

	preempt_enable();
}

int raid6_have_altivec(void);
#if $# == 1
int raid6_have_altivec(void)
{
	/* This assumes either all CPUs have Altivec or none does */
# ifdef __KERNEL__
	return cpu_has_feature(CPU_FTR_ALTIVEC);
# else
	return 1;
# endif
}
#endif

const struct raid6_calls raid6_altivec$# = {
	raid6_altivec$#_gen_syndrome,
	NULL,			/* XOR not yet implemented */
	raid6_have_altivec,
	"altivecx$#",
	0
};

#endif /* CONFIG_ALTIVEC */
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-16 22:20:36 +00:00			`/* -- linux-c -- ------------------------------------------------------- *`
			`*`
			`* Copyright 2002-2004 H. Peter Anvin - All Rights Reserved`
			`*`
			`* This program is free software; you can redistribute it and/or modify`
			`* it under the terms of the GNU General Public License as published by`
			`* the Free Software Foundation, Inc., 53 Temple Place Ste 330,`
md: fix typo in FSF address Hello, I found a typo Bosto"m" in FSF address. And I am checking around linux source code. Here is the only place which uses Bosto"m" (not Boston). Signed-off-by: Atsushi SAKAI <sakaia@jp.fujitsu.com> Signed-off-by: NeilBrown <neilb@suse.de> 2009-03-31 03:57:37 +00:00			`* Boston MA 02111-1307, USA; either version 2 of the License, or`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-16 22:20:36 +00:00			`* (at your option) any later version; incorporated herein by reference.`
			`*`
			`* ----------------------------------------------------------------------- */`

			`/*`
			`* raid6altivec$#.c`
			`*`
			`* $#-way unrolled portable integer math RAID-6 instruction set`
			`*`
md: drivers/md/unroll.pl replaced with awk analog drivers/md/unroll.pl replaced by awk script to drop build-time dependency on perl Signed-off-by: Vladimir Dronnikov <dronnikov@gmail.com> Signed-off-by: NeilBrown <neilb@suse.de> 2009-10-16 05:25:19 +00:00			`* This file is postprocessed using unroll.awk`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-16 22:20:36 +00:00			`*`
			`* <benh> hpa: in process,`
			`* you can just "steal" the vec unit with enable_kernel_altivec() (but`
			`* bracked this with preempt_disable/enable or in a lock)`
			`*/`

md/raid6: move raid6 data processing to raid6_pq.ko Move the raid6 data processing routines into a standalone module (raid6_pq) to prepare them to be called from async_tx wrappers and other non-md drivers/modules. This precludes a circular dependency of raid456 needing the async modules for data processing while those modules in turn depend on raid456 for the base level synchronous raid6 routines. To support this move: 1/ The exportable definitions in raid6.h move to include/linux/raid/pq.h 2/ The raid6_call, recovery calls, and table symbols are exported 3/ Extra #ifdef __KERNEL__ statements to enable the userspace raid6test to compile Signed-off-by: Dan Williams <dan.j.williams@intel.com> Signed-off-by: NeilBrown <neilb@suse.de> 2009-03-31 04:09:39 +00:00			`#include <linux/raid/pq.h>`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-16 22:20:36 +00:00
			`#include <altivec.h>`
[PATCH] RAID6 Altivec fix This patch fixes a signedness bug with RAID6 for Altivec, and makes the Altivec code testable in userspace. Signed-off-by: H. Peter Anvin <hpa@zytor.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org> 2005-09-17 02:27:29 +00:00			`#ifdef __KERNEL__`
			`# include <asm/cputable.h>`
Disintegrate asm/system.h for PowerPC Disintegrate asm/system.h for PowerPC. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> cc: linuxppc-dev@lists.ozlabs.org 2012-03-28 17:30:02 +00:00			`# include <asm/switch_to.h>`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-16 22:20:36 +00:00
			`/*`
[PATCH] RAID6 Altivec fix This patch fixes a signedness bug with RAID6 for Altivec, and makes the Altivec code testable in userspace. Signed-off-by: H. Peter Anvin <hpa@zytor.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org> 2005-09-17 02:27:29 +00:00			`* This is the C data type to use. We use a vector of`
			`* signed char so vec_cmpgt() will generate the right`
			`* instruction.`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-16 22:20:36 +00:00			`*/`

[PATCH] RAID6 Altivec fix This patch fixes a signedness bug with RAID6 for Altivec, and makes the Altivec code testable in userspace. Signed-off-by: H. Peter Anvin <hpa@zytor.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org> 2005-09-17 02:27:29 +00:00			`typedef vector signed char unative_t;`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-16 22:20:36 +00:00
[PATCH] RAID6 Altivec fix This patch fixes a signedness bug with RAID6 for Altivec, and makes the Altivec code testable in userspace. Signed-off-by: H. Peter Anvin <hpa@zytor.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org> 2005-09-17 02:27:29 +00:00			`#define NBYTES(x) ((vector signed char) {x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x})`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-16 22:20:36 +00:00			`#define NSIZE sizeof(unative_t)`

			`/*`
			`* The SHLBYTE() operation shifts each byte left by 1, not`
			`* rolling over into the next byte`
			`*/`
			`static inline __attribute_const__ unative_t SHLBYTE(unative_t v)`
			`{`
			`return vec_add(v,v);`
			`}`

			`/*`
			`* The MASK() operation returns 0xFF in any byte for which the high`
			`* bit is 1, 0x00 for any byte for which the high bit is 0.`
			`*/`
			`static inline __attribute_const__ unative_t MASK(unative_t v)`
			`{`
			`unative_t zv = NBYTES(0);`

			`/* vec_cmpgt returns a vector bool char; thus the need for the cast */`
			`return (unative_t)vec_cmpgt(zv, v);`
			`}`


			`/* This is noinline to make damned sure that gcc doesn't move any of the`
			`Altivec code around the enable/disable code */`
			`static void noinline`
			`raid6_altivec$#_gen_syndrome_real(int disks, size_t bytes, void **ptrs)`
			`{`
			`u8 dptr = (u8 )ptrs;`
			`u8 p, q;`
			`int d, z, z0;`

			`unative_t wd$$, wq$$, wp$$, w1$$, w2$$;`
			`unative_t x1d = NBYTES(0x1d);`

			`z0 = disks - 3; /* Highest data disk */`
			`p = dptr[z0+1]; /* XOR parity */`
			`q = dptr[z0+2]; /* RS syndrome */`

			`for ( d = 0 ; d < bytes ; d += NSIZE*$# ) {`
			`wq$$ = wp$$ = (unative_t )&dptr[z0][d+$$*NSIZE];`
			`for ( z = z0-1 ; z >= 0 ; z-- ) {`
			`wd$$ = (unative_t )&dptr[z][d+$$*NSIZE];`
			`wp$$ = vec_xor(wp$$, wd$$);`
			`w2$$ = MASK(wq$$);`
			`w1$$ = SHLBYTE(wq$$);`
			`w2$$ = vec_and(w2$$, x1d);`
			`w1$$ = vec_xor(w1$$, w2$$);`
			`wq$$ = vec_xor(w1$$, wd$$);`
			`}`
			`(unative_t )&p[d+NSIZE*$$] = wp$$;`
			`(unative_t )&q[d+NSIZE*$$] = wq$$;`
			`}`
			`}`

			`static void raid6_altivec$#_gen_syndrome(int disks, size_t bytes, void **ptrs)`
			`{`
			`preempt_disable();`
			`enable_kernel_altivec();`

			`raid6_altivec$#_gen_syndrome_real(disks, bytes, ptrs);`

			`preempt_enable();`
			`}`

			`int raid6_have_altivec(void);`
			`#if $# == 1`
			`int raid6_have_altivec(void)`
			`{`
			`/* This assumes either all CPUs have Altivec or none does */`
[PATCH] RAID6 Altivec fix This patch fixes a signedness bug with RAID6 for Altivec, and makes the Altivec code testable in userspace. Signed-off-by: H. Peter Anvin <hpa@zytor.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org> 2005-09-17 02:27:29 +00:00			`# ifdef __KERNEL__`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-16 22:20:36 +00:00			`return cpu_has_feature(CPU_FTR_ALTIVEC);`
[PATCH] RAID6 Altivec fix This patch fixes a signedness bug with RAID6 for Altivec, and makes the Altivec code testable in userspace. Signed-off-by: H. Peter Anvin <hpa@zytor.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org> 2005-09-17 02:27:29 +00:00			`# else`
			`return 1;`
			`# endif`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-16 22:20:36 +00:00			`}`
			`#endif`

			`const struct raid6_calls raid6_altivec$# = {`
			`raid6_altivec$#_gen_syndrome,`
md/raid6 algorithms: delta syndrome functions v3: s-o-b comment, explanation of performance and descision for the start/stop implementation Implementing rmw functionality for RAID6 requires optimized syndrome calculation. Up to now we can only generate a complete syndrome. The target P/Q pages are always overwritten. With this patch we provide a framework for inplace P/Q modification. In the first place simply fill those functions with NULL values. xor_syndrome() has two additional parameters: start & stop. These will indicate the first and last page that are changing during a rmw run. That makes it possible to avoid several unneccessary loops and speed up calculation. The caller needs to implement the following logic to make the functions work. 1) xor_syndrome(disks, start, stop, ...): "Remove" all data of source blocks inside P/Q between (and including) start and end. 2) modify any block with start <= block <= stop 3) xor_syndrome(disks, start, stop, ...): "Reinsert" all data of source blocks into P/Q between (and including) start and end. Pages between start and stop that won't be changed should be filled with a pointer to the kernel zero page. The reasons for not taking NULL pages are: 1) Algorithms cross the whole source data line by line. Thus avoid additional branches. 2) Having a NULL page avoids calculating the XOR P parity but still need calulation steps for the Q parity. Depending on the algorithm unrolling that might be only a difference of 2 instructions per loop. The benchmark numbers of the gen_syndrome() functions are displayed in the kernel log. Do the same for the xor_syndrome() functions. This will help to analyze performance problems and give an rough estimate how well the algorithm works. The choice of the fastest algorithm will still depend on the gen_syndrome() performance. With the start/stop page implementation the speed can vary a lot in real life. E.g. a change of page 0 & page 15 on a stripe will be harder to compute than the case where page 0 & page 1 are XOR candidates. To be not to enthusiatic about the expected speeds we will run a worse case test that simulates a change on the upper half of the stripe. So we do: 1) calculation of P/Q for the upper pages 2) continuation of Q for the lower (empty) pages Signed-off-by: Markus Stockhausen <stockhausen@collogia.de> Signed-off-by: NeilBrown <neilb@suse.de> 2014-12-15 01:57:04 +00:00			`NULL, /* XOR not yet implemented */`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-16 22:20:36 +00:00			`raid6_have_altivec,`
			`"altivecx$#",`
			`0`
			`};`

			`#endif /* CONFIG_ALTIVEC */`