forked from Minki/linux
fff7fb0b2d
The binary GCD algorithm is based on the following facts: 1. If a and b are all evens, then gcd(a,b) = 2 * gcd(a/2, b/2) 2. If a is even and b is odd, then gcd(a,b) = gcd(a/2, b) 3. If a and b are all odds, then gcd(a,b) = gcd((a-b)/2, b) = gcd((a+b)/2, b) Even on x86 machines with reasonable division hardware, the binary algorithm runs about 25% faster (80% the execution time) than the division-based Euclidian algorithm. On platforms like Alpha and ARMv6 where division is a function call to emulation code, it's even more significant. There are two variants of the code here, depending on whether a fast __ffs (find least significant set bit) instruction is available. This allows the unpredictable branches in the bit-at-a-time shifting loop to be eliminated. If fast __ffs is not available, the "even/odd" GCD variant is used. I use the following code to benchmark: #include <stdio.h> #include <stdlib.h> #include <stdint.h> #include <string.h> #include <time.h> #include <unistd.h> #define swap(a, b) \ do { \ a ^= b; \ b ^= a; \ a ^= b; \ } while (0) unsigned long gcd0(unsigned long a, unsigned long b) { unsigned long r; if (a < b) { swap(a, b); } if (b == 0) return a; while ((r = a % b) != 0) { a = b; b = r; } return b; } unsigned long gcd1(unsigned long a, unsigned long b) { unsigned long r = a | b; if (!a || !b) return r; b >>= __builtin_ctzl(b); for (;;) { a >>= __builtin_ctzl(a); if (a == b) return a << __builtin_ctzl(r); if (a < b) swap(a, b); a -= b; } } unsigned long gcd2(unsigned long a, unsigned long b) { unsigned long r = a | b; if (!a || !b) return r; r &= -r; while (!(b & r)) b >>= 1; for (;;) { while (!(a & r)) a >>= 1; if (a == b) return a; if (a < b) swap(a, b); a -= b; a >>= 1; if (a & r) a += b; a >>= 1; } } unsigned long gcd3(unsigned long a, unsigned long b) { unsigned long r = a | b; if (!a || !b) return r; b >>= __builtin_ctzl(b); if (b == 1) return r & -r; for (;;) { a >>= __builtin_ctzl(a); if (a == 1) return r & -r; if (a == b) return a << __builtin_ctzl(r); if (a < b) swap(a, b); a -= b; } } unsigned long gcd4(unsigned long a, unsigned long b) { unsigned long r = a | b; if (!a || !b) return r; r &= -r; while (!(b & r)) b >>= 1; if (b == r) return r; for (;;) { while (!(a & r)) a >>= 1; if (a == r) return r; if (a == b) return a; if (a < b) swap(a, b); a -= b; a >>= 1; if (a & r) a += b; a >>= 1; } } static unsigned long (*gcd_func[])(unsigned long a, unsigned long b) = { gcd0, gcd1, gcd2, gcd3, gcd4, }; #define TEST_ENTRIES (sizeof(gcd_func) / sizeof(gcd_func[0])) #if defined(__x86_64__) #define rdtscll(val) do { \ unsigned long __a,__d; \ __asm__ __volatile__("rdtsc" : "=a" (__a), "=d" (__d)); \ (val) = ((unsigned long long)__a) | (((unsigned long long)__d)<<32); \ } while(0) static unsigned long long benchmark_gcd_func(unsigned long (*gcd)(unsigned long, unsigned long), unsigned long a, unsigned long b, unsigned long *res) { unsigned long long start, end; unsigned long long ret; unsigned long gcd_res; rdtscll(start); gcd_res = gcd(a, b); rdtscll(end); if (end >= start) ret = end - start; else ret = ~0ULL - start + 1 + end; *res = gcd_res; return ret; } #else static inline struct timespec read_time(void) { struct timespec time; clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &time); return time; } static inline unsigned long long diff_time(struct timespec start, struct timespec end) { struct timespec temp; if ((end.tv_nsec - start.tv_nsec) < 0) { temp.tv_sec = end.tv_sec - start.tv_sec - 1; temp.tv_nsec = 1000000000ULL + end.tv_nsec - start.tv_nsec; } else { temp.tv_sec = end.tv_sec - start.tv_sec; temp.tv_nsec = end.tv_nsec - start.tv_nsec; } return temp.tv_sec * 1000000000ULL + temp.tv_nsec; } static unsigned long long benchmark_gcd_func(unsigned long (*gcd)(unsigned long, unsigned long), unsigned long a, unsigned long b, unsigned long *res) { struct timespec start, end; unsigned long gcd_res; start = read_time(); gcd_res = gcd(a, b); end = read_time(); *res = gcd_res; return diff_time(start, end); } #endif static inline unsigned long get_rand() { if (sizeof(long) == 8) return (unsigned long)rand() << 32 | rand(); else return rand(); } int main(int argc, char **argv) { unsigned int seed = time(0); int loops = 100; int repeats = 1000; unsigned long (*res)[TEST_ENTRIES]; unsigned long long elapsed[TEST_ENTRIES]; int i, j, k; for (;;) { int opt = getopt(argc, argv, "n:r:s:"); /* End condition always first */ if (opt == -1) break; switch (opt) { case 'n': loops = atoi(optarg); break; case 'r': repeats = atoi(optarg); break; case 's': seed = strtoul(optarg, NULL, 10); break; default: /* You won't actually get here. */ break; } } res = malloc(sizeof(unsigned long) * TEST_ENTRIES * loops); memset(elapsed, 0, sizeof(elapsed)); srand(seed); for (j = 0; j < loops; j++) { unsigned long a = get_rand(); /* Do we have args? */ unsigned long b = argc > optind ? strtoul(argv[optind], NULL, 10) : get_rand(); unsigned long long min_elapsed[TEST_ENTRIES]; for (k = 0; k < repeats; k++) { for (i = 0; i < TEST_ENTRIES; i++) { unsigned long long tmp = benchmark_gcd_func(gcd_func[i], a, b, &res[j][i]); if (k == 0 || min_elapsed[i] > tmp) min_elapsed[i] = tmp; } } for (i = 0; i < TEST_ENTRIES; i++) elapsed[i] += min_elapsed[i]; } for (i = 0; i < TEST_ENTRIES; i++) printf("gcd%d: elapsed %llu\n", i, elapsed[i]); k = 0; srand(seed); for (j = 0; j < loops; j++) { unsigned long a = get_rand(); unsigned long b = argc > optind ? strtoul(argv[optind], NULL, 10) : get_rand(); for (i = 1; i < TEST_ENTRIES; i++) { if (res[j][i] != res[j][0]) break; } if (i < TEST_ENTRIES) { if (k == 0) { k = 1; fprintf(stderr, "Error:\n"); } fprintf(stderr, "gcd(%lu, %lu): ", a, b); for (i = 0; i < TEST_ENTRIES; i++) fprintf(stderr, "%ld%s", res[j][i], i < TEST_ENTRIES - 1 ? ", " : "\n"); } } if (k == 0) fprintf(stderr, "PASS\n"); free(res); return 0; } Compiled with "-O2", on "VirtualBox 4.4.0-22-generic #38-Ubuntu x86_64" got: zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10 gcd0: elapsed 10174 gcd1: elapsed 2120 gcd2: elapsed 2902 gcd3: elapsed 2039 gcd4: elapsed 2812 PASS zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10 gcd0: elapsed 9309 gcd1: elapsed 2280 gcd2: elapsed 2822 gcd3: elapsed 2217 gcd4: elapsed 2710 PASS zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10 gcd0: elapsed 9589 gcd1: elapsed 2098 gcd2: elapsed 2815 gcd3: elapsed 2030 gcd4: elapsed 2718 PASS zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10 gcd0: elapsed 9914 gcd1: elapsed 2309 gcd2: elapsed 2779 gcd3: elapsed 2228 gcd4: elapsed 2709 PASS [akpm@linux-foundation.org: avoid #defining a CONFIG_ variable] Signed-off-by: Zhaoxiu Zeng <zhaoxiu.zeng@gmail.com> Signed-off-by: George Spelvin <linux@horizon.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
350 lines
8.0 KiB
Plaintext
350 lines
8.0 KiB
Plaintext
config PARISC
|
|
def_bool y
|
|
select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
|
|
select ARCH_MIGHT_HAVE_PC_PARPORT
|
|
select HAVE_IDE
|
|
select HAVE_OPROFILE
|
|
select HAVE_FUNCTION_TRACER
|
|
select HAVE_FUNCTION_GRAPH_TRACER
|
|
select ARCH_WANT_FRAME_POINTERS
|
|
select RTC_CLASS
|
|
select RTC_DRV_GENERIC
|
|
select INIT_ALL_POSSIBLE
|
|
select BUG
|
|
select BUILDTIME_EXTABLE_SORT
|
|
select HAVE_PERF_EVENTS
|
|
select GENERIC_ATOMIC64 if !64BIT
|
|
select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
|
|
select BROKEN_RODATA
|
|
select GENERIC_IRQ_PROBE
|
|
select GENERIC_PCI_IOMAP
|
|
select ARCH_HAVE_NMI_SAFE_CMPXCHG
|
|
select GENERIC_SMP_IDLE_THREAD
|
|
select GENERIC_STRNCPY_FROM_USER
|
|
select SYSCTL_ARCH_UNALIGN_ALLOW
|
|
select SYSCTL_EXCEPTION_TRACE
|
|
select HAVE_MOD_ARCH_SPECIFIC
|
|
select VIRT_TO_BUS
|
|
select MODULES_USE_ELF_RELA
|
|
select CLONE_BACKWARDS
|
|
select TTY # Needed for pdc_cons.c
|
|
select HAVE_DEBUG_STACKOVERFLOW
|
|
select HAVE_ARCH_AUDITSYSCALL
|
|
select HAVE_ARCH_SECCOMP_FILTER
|
|
select ARCH_NO_COHERENT_DMA_MMAP
|
|
select CPU_NO_EFFICIENT_FFS
|
|
|
|
help
|
|
The PA-RISC microprocessor is designed by Hewlett-Packard and used
|
|
in many of their workstations & servers (HP9000 700 and 800 series,
|
|
and later HP3000 series). The PA-RISC Linux project home page is
|
|
at <http://www.parisc-linux.org/>.
|
|
|
|
config MMU
|
|
def_bool y
|
|
|
|
config STACK_GROWSUP
|
|
def_bool y
|
|
|
|
config GENERIC_LOCKBREAK
|
|
bool
|
|
default y
|
|
depends on SMP && PREEMPT
|
|
|
|
config RWSEM_GENERIC_SPINLOCK
|
|
def_bool y
|
|
|
|
config RWSEM_XCHGADD_ALGORITHM
|
|
bool
|
|
|
|
config ARCH_HAS_ILOG2_U32
|
|
bool
|
|
default n
|
|
|
|
config ARCH_HAS_ILOG2_U64
|
|
bool
|
|
default n
|
|
|
|
config GENERIC_BUG
|
|
bool
|
|
default y
|
|
depends on BUG
|
|
|
|
config GENERIC_HWEIGHT
|
|
bool
|
|
default y
|
|
|
|
config GENERIC_CALIBRATE_DELAY
|
|
bool
|
|
default y
|
|
|
|
config TIME_LOW_RES
|
|
bool
|
|
depends on SMP
|
|
default y
|
|
|
|
# unless you want to implement ACPI on PA-RISC ... ;-)
|
|
config PM
|
|
bool
|
|
|
|
config STACKTRACE_SUPPORT
|
|
def_bool y
|
|
|
|
config NEED_DMA_MAP_STATE
|
|
def_bool y
|
|
|
|
config NEED_SG_DMA_LENGTH
|
|
def_bool y
|
|
|
|
config ISA_DMA_API
|
|
bool
|
|
|
|
config ARCH_MAY_HAVE_PC_FDC
|
|
bool
|
|
depends on BROKEN
|
|
default y
|
|
|
|
config PGTABLE_LEVELS
|
|
int
|
|
default 3 if 64BIT && PARISC_PAGE_SIZE_4KB
|
|
default 2
|
|
|
|
config SYS_SUPPORTS_HUGETLBFS
|
|
def_bool y if PA20
|
|
|
|
source "init/Kconfig"
|
|
|
|
source "kernel/Kconfig.freezer"
|
|
|
|
|
|
menu "Processor type and features"
|
|
|
|
choice
|
|
prompt "Processor type"
|
|
default PA7000
|
|
|
|
config PA7000
|
|
bool "PA7000/PA7100"
|
|
---help---
|
|
This is the processor type of your CPU. This information is
|
|
used for optimizing purposes. In order to compile a kernel
|
|
that can run on all 32-bit PA CPUs (albeit not optimally fast),
|
|
you can specify "PA7000" here.
|
|
|
|
Specifying "PA8000" here will allow you to select a 64-bit kernel
|
|
which is required on some machines.
|
|
|
|
config PA7100LC
|
|
bool "PA7100LC"
|
|
help
|
|
Select this option for the PCX-L processor, as used in the
|
|
712, 715/64, 715/80, 715/100, 715/100XC, 725/100, 743, 748,
|
|
D200, D210, D300, D310 and E-class
|
|
|
|
config PA7200
|
|
bool "PA7200"
|
|
help
|
|
Select this option for the PCX-T' processor, as used in the
|
|
C100, C110, J100, J110, J210XC, D250, D260, D350, D360,
|
|
K100, K200, K210, K220, K400, K410 and K420
|
|
|
|
config PA7300LC
|
|
bool "PA7300LC"
|
|
help
|
|
Select this option for the PCX-L2 processor, as used in the
|
|
744, A180, B132L, B160L, B180L, C132L, C160L, C180L,
|
|
D220, D230, D320 and D330.
|
|
|
|
config PA8X00
|
|
bool "PA8000 and up"
|
|
help
|
|
Select this option for PCX-U to PCX-W2 processors.
|
|
|
|
endchoice
|
|
|
|
# Define implied options from the CPU selection here
|
|
|
|
config PA20
|
|
def_bool y
|
|
depends on PA8X00
|
|
|
|
config PA11
|
|
def_bool y
|
|
depends on PA7000 || PA7100LC || PA7200 || PA7300LC
|
|
|
|
config PREFETCH
|
|
def_bool y
|
|
depends on PA8X00 || PA7200
|
|
|
|
config MLONGCALLS
|
|
bool "Enable the -mlong-calls compiler option for big kernels"
|
|
def_bool y if (!MODULES)
|
|
depends on PA8X00
|
|
help
|
|
If you configure the kernel to include many drivers built-in instead
|
|
as modules, the kernel executable may become too big, so that the
|
|
linker will not be able to resolve some long branches and fails to link
|
|
your vmlinux kernel. In that case enabling this option will help you
|
|
to overcome this limit by using the -mlong-calls compiler option.
|
|
|
|
Usually you want to say N here, unless you e.g. want to build
|
|
a kernel which includes all necessary drivers built-in and which can
|
|
be used for TFTP booting without the need to have an initrd ramdisk.
|
|
|
|
Enabling this option will probably slow down your kernel.
|
|
|
|
config 64BIT
|
|
bool "64-bit kernel"
|
|
depends on PA8X00
|
|
help
|
|
Enable this if you want to support 64bit kernel on PA-RISC platform.
|
|
|
|
At the moment, only people willing to use more than 2GB of RAM,
|
|
or having a 64bit-only capable PA-RISC machine should say Y here.
|
|
|
|
Since there is no 64bit userland on PA-RISC, there is no point to
|
|
enable this option otherwise. The 64bit kernel is significantly bigger
|
|
and slower than the 32bit one.
|
|
|
|
choice
|
|
prompt "Kernel page size"
|
|
default PARISC_PAGE_SIZE_4KB
|
|
|
|
config PARISC_PAGE_SIZE_4KB
|
|
bool "4KB"
|
|
help
|
|
This lets you select the page size of the kernel. For best
|
|
performance, a page size of 16KB is recommended. For best
|
|
compatibility with 32bit applications, a page size of 4KB should be
|
|
selected (the vast majority of 32bit binaries work perfectly fine
|
|
with a larger page size).
|
|
|
|
4KB For best 32bit compatibility
|
|
16KB For best performance
|
|
64KB For best performance, might give more overhead.
|
|
|
|
If you don't know what to do, choose 4KB.
|
|
|
|
config PARISC_PAGE_SIZE_16KB
|
|
bool "16KB"
|
|
depends on PA8X00
|
|
|
|
config PARISC_PAGE_SIZE_64KB
|
|
bool "64KB"
|
|
depends on PA8X00
|
|
|
|
endchoice
|
|
|
|
config SMP
|
|
bool "Symmetric multi-processing support"
|
|
---help---
|
|
This enables support for systems with more than one CPU. If you have
|
|
a system with only one CPU, say N. If you have a system with more
|
|
than one CPU, say Y.
|
|
|
|
If you say N here, the kernel will run on uni- and multiprocessor
|
|
machines, but will use only one CPU of a multiprocessor machine. If
|
|
you say Y here, the kernel will run on many, but not all,
|
|
uniprocessor machines. On a uniprocessor machine, the kernel
|
|
will run faster if you say N here.
|
|
|
|
See also <file:Documentation/nmi_watchdog.txt> and the SMP-HOWTO
|
|
available at <http://www.tldp.org/docs.html#howto>.
|
|
|
|
If you don't know what to do here, say N.
|
|
|
|
config IRQSTACKS
|
|
bool "Use separate kernel stacks when processing interrupts"
|
|
default y
|
|
help
|
|
If you say Y here the kernel will use separate kernel stacks
|
|
for handling hard and soft interrupts. This can help avoid
|
|
overflowing the process kernel stacks.
|
|
|
|
config HOTPLUG_CPU
|
|
bool
|
|
default y if SMP
|
|
|
|
config ARCH_SELECT_MEMORY_MODEL
|
|
def_bool y
|
|
depends on 64BIT
|
|
|
|
config ARCH_DISCONTIGMEM_ENABLE
|
|
def_bool y
|
|
depends on 64BIT
|
|
|
|
config ARCH_FLATMEM_ENABLE
|
|
def_bool y
|
|
|
|
config ARCH_DISCONTIGMEM_DEFAULT
|
|
def_bool y
|
|
depends on ARCH_DISCONTIGMEM_ENABLE
|
|
|
|
config NODES_SHIFT
|
|
int
|
|
default "3"
|
|
depends on NEED_MULTIPLE_NODES
|
|
|
|
source "kernel/Kconfig.preempt"
|
|
source "kernel/Kconfig.hz"
|
|
source "mm/Kconfig"
|
|
|
|
config COMPAT
|
|
def_bool y
|
|
depends on 64BIT
|
|
|
|
config SYSVIPC_COMPAT
|
|
def_bool y
|
|
depends on COMPAT && SYSVIPC
|
|
|
|
config AUDIT_ARCH
|
|
def_bool y
|
|
|
|
config NR_CPUS
|
|
int "Maximum number of CPUs (2-32)"
|
|
range 2 32
|
|
depends on SMP
|
|
default "32"
|
|
|
|
endmenu
|
|
|
|
|
|
source "drivers/parisc/Kconfig"
|
|
|
|
|
|
menu "Executable file formats"
|
|
|
|
source "fs/Kconfig.binfmt"
|
|
|
|
endmenu
|
|
|
|
source "net/Kconfig"
|
|
|
|
source "drivers/Kconfig"
|
|
|
|
source "fs/Kconfig"
|
|
|
|
source "arch/parisc/Kconfig.debug"
|
|
|
|
config SECCOMP
|
|
def_bool y
|
|
prompt "Enable seccomp to safely compute untrusted bytecode"
|
|
---help---
|
|
This kernel feature is useful for number crunching applications
|
|
that may need to compute untrusted bytecode during their
|
|
execution. By using pipes or other transports made available to
|
|
the process as file descriptors supporting the read/write
|
|
syscalls, it's possible to isolate those applications in
|
|
their own address space using seccomp. Once seccomp is
|
|
enabled via prctl(PR_SET_SECCOMP), it cannot be disabled
|
|
and the task is only allowed to execute a few safe syscalls
|
|
defined by each seccomp mode.
|
|
|
|
If unsure, say Y. Only embedded should say N here.
|
|
|
|
source "security/Kconfig"
|
|
|
|
source "crypto/Kconfig"
|
|
|
|
source "lib/Kconfig"
|