linux/lib
Daniel Wagner ca96ab859a lib/sort: Add 64 bit swap function
In case the call side is not providing a swap function, we either use a
32 bit or a generic swap function.  When swapping around pointers on 64
bit architectures falling back to use the generic swap function seems
like an unnecessary waste.

There at least 9 users ('sort' is of difficult to grep for) of sort()
and all of them use the sort function without a customized swap
function.  Furthermore, they are all using pointers to swap around:

arch/x86/kernel/e820.c:sanitize_e820_map()
arch/x86/mm/extable.c:sort_extable()
drivers/acpi/fan.c:acpi_fan_get_fps()
fs/btrfs/super.c:btrfs_descending_sort_devices()
fs/xfs/libxfs/xfs_dir2_block.c:xfs_dir2_sf_to_block()
kernel/range.c:clean_sort_range()
mm/memcontrol.c:__mem_cgroup_usage_register_event()
sound/pci/hda/hda_auto_parser.c:snd_hda_parse_pin_defcfg()
sound/pci/hda/hda_auto_parser.c:sort_pins_by_sequence()

Obviously, we could improve the swap for other sizes as well
but this is overkill at this point.

A simple test shows sorting a 400 element array (try to stay in one
page) with either with u32_swap() or u64_swap() show that the theory
actually works. This test was done on a x86_64 (Intel Xeon E5-4610)
machine.

- swap_32:

NumSamples = 100; Min = 48.00; Max = 49.00
Mean = 48.320000; Variance = 0.217600; SD = 0.466476; Median 48.000000
each * represents a count of 1
   48.0000 -    48.1000 [    68]: ********************************************************************
   48.1000 -    48.2000 [     0]:
   48.2000 -    48.3000 [     0]:
   48.3000 -    48.4000 [     0]:
   48.4000 -    48.5000 [     0]:
   48.5000 -    48.6000 [     0]:
   48.6000 -    48.7000 [     0]:
   48.7000 -    48.8000 [     0]:
   48.8000 -    48.9000 [     0]:
   48.9000 -    49.0000 [    32]: ********************************

- swap_64:

NumSamples = 100; Min = 44.00; Max = 63.00
Mean = 48.250000; Variance = 18.687500; SD = 4.322904; Median 47.000000
each * represents a count of 1
   44.0000 -    45.9000 [    15]: ***************
   45.9000 -    47.8000 [    37]: *************************************
   47.8000 -    49.7000 [    39]: ***************************************
   49.7000 -    51.6000 [     0]:
   51.6000 -    53.5000 [     0]:
   53.5000 -    55.4000 [     0]:
   55.4000 -    57.3000 [     0]:
   57.3000 -    59.2000 [     1]: *
   59.2000 -    61.1000 [     3]: ***
   61.1000 -    63.0000 [     5]: *****

- swap_72:

NumSamples = 100; Min = 53.00; Max = 71.00
Mean = 55.070000; Variance = 21.565100; SD = 4.643824; Median 53.000000
each * represents a count of 1
   53.0000 -    54.8000 [    73]: *************************************************************************
   54.8000 -    56.6000 [     9]: *********
   56.6000 -    58.4000 [     9]: *********
   58.4000 -    60.2000 [     0]:
   60.2000 -    62.0000 [     0]:
   62.0000 -    63.8000 [     0]:
   63.8000 -    65.6000 [     0]:
   65.6000 -    67.4000 [     1]: *
   67.4000 -    69.2000 [     4]: ****
   69.2000 -    71.0000 [     4]: ****

- test program:

static int cmp_32(const void *a, const void *b)
{
	u32 l = *(u32 *)a;
	u32 r = *(u32 *)b;

	if (l < r)
		return -1;
	if (l > r)
		return 1;
	return 0;
}

static int cmp_64(const void *a, const void *b)
{
	u64 l = *(u64 *)a;
	u64 r = *(u64 *)b;

	if (l < r)
		return -1;
	if (l > r)
		return 1;
	return 0;
}

static int cmp_72(const void *a, const void *b)
{
	u32 l = get_unaligned((u32 *) a);
	u32 r = get_unaligned((u32 *) b);

	if (l < r)
		return -1;
	if (l > r)
		return 1;
	return 0;
}

static void init_array32(void *array)
{
	u32 *a = array;
	int i;

	a[0] = 3821;
	for (i = 1; i < ARRAY_ELEMENTS; i++)
		a[i] = next_pseudo_random32(a[i-1]);
}

static void init_array64(void *array)
{
	u64 *a = array;
	int i;

	a[0] = 3821;
	for (i = 1; i < ARRAY_ELEMENTS; i++)
		a[i] = next_pseudo_random32(a[i-1]);
}

static void init_array72(void *array)
{
	char *p;
	u32 v;
	int i;

	v = 3821;
	for (i = 0; i < ARRAY_ELEMENTS; i++) {
		p = (char *)array + (i * 9);
		put_unaligned(v, (u32*) p);
		v = next_pseudo_random32(v);
	}
}

static void sort_test(void (*init)(void *array),
		      int (*cmp) (const void *, const void *),
		      void *array, size_t size)
{
	ktime_t start, stop;
	int i;

	for (i = 0; i < 10000; i++) {
		init(array);

		local_irq_disable();
		start = ktime_get();

		sort(array, ARRAY_ELEMENTS, size, cmp, NULL);

		stop = ktime_get();
		local_irq_enable();

		if (i > 10000 - 101)
		  pr_info("%lld\n",  ktime_to_us(ktime_sub(stop, start)));
	}
}

static void *create_array(size_t size)
{
	void *array;

	array = kmalloc(ARRAY_ELEMENTS * size, GFP_KERNEL);
	if (!array)
		return NULL;

	return array;
}

static int perform_test(size_t size)
{
	void *array;

	array = create_array(size);
	if (!array)
		return -ENOMEM;

	pr_info("test element size %d bytes\n", (int)size);
	switch (size) {
	case 4:
		sort_test(init_array32, cmp_32, array, size);
		break;
	case 8:
		sort_test(init_array64, cmp_64, array, size);
		break;
	case 9:
		sort_test(init_array72, cmp_72, array, size);
		break;
	}
	kfree(array);

	return 0;
}

static int __init sort_tests_init(void)
{
	int err;

	err = perform_test(sizeof(u32));
	if (err)
		return err;

	err = perform_test(sizeof(u64));
	if (err)
		return err;

	err = perform_test(sizeof(u64)+1);
	if (err)
		return err;

	return 0;
}

static void __exit sort_tests_exit(void)
{
}

module_init(sort_tests_init);
module_exit(sort_tests_exit);

MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Daniel Wagner");
MODULE_DESCRIPTION("sort perfomance tests");

Signed-off-by: Daniel Wagner <daniel.wagner@bmw-carit.de>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2015-06-25 17:00:40 -07:00
..
842 lib: correct 842 decompress for 32 bit 2015-05-13 10:31:59 +08:00
fonts fonts: Add 6x10 font 2014-10-09 11:35:48 +03:00
lz4 lib/lz4: Pull out constant tables 2015-03-25 15:04:57 +01:00
lzo lzo: check for length overrun in variable length encoding. 2014-09-28 11:08:01 +02:00
mpi Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6 2015-06-22 21:04:48 -07:00
raid6 powerpc updates for 4.2 2015-06-24 08:46:32 -07:00
reed_solomon
xz lib/xz: enable all filters by default in Kconfig 2014-06-04 16:54:18 -07:00
zlib_deflate zlib: clean up some dead code 2014-08-06 18:01:24 -07:00
zlib_inflate zlib: clean up some dead code 2014-08-06 18:01:24 -07:00
.gitignore
argv_split.c
asn1_decoder.c lib/asn1_decoder.c: kernel-doc warning fix 2014-06-04 16:54:19 -07:00
assoc_array.c assoc_array: Include rcupdate.h for call_rcu() definition 2015-01-07 16:08:41 +00:00
atomic64_test.c lib/atomic64_test.c: convert printk(KERN_INFO to pr_info 2014-06-04 16:54:19 -07:00
atomic64.c locking,arch: Rewrite generic atomic support 2014-08-14 12:48:14 +02:00
audit.c syscalls: implement execveat() system call 2014-12-13 12:42:51 -08:00
average.c lib: Ensure EWMA does not store wrong intermediate values 2014-01-16 23:46:06 -08:00
bcd.c
bch.c
bitmap.c __bitmap_parselist: fix bug in empty string handling 2015-06-25 17:00:40 -07:00
bitrev.c ARM: 8187/1: add CONFIG_HAVE_ARCH_BITREVERSE to support rbit instruction 2014-12-22 16:43:06 +00:00
bsearch.c
btree.c lib/btree.c: fix leak of whole btree nodes 2014-06-04 16:54:18 -07:00
bug.c lib/bug: Use RCU list ops for module_bug_list 2014-11-11 17:07:46 +10:30
build_OID_registry X.509: do not emit any informational output 2013-06-19 17:54:06 +02:00
bust_spinlocks.c
check_signature.c
checksum.c lib/checksum.c: fix build for generic csum_tcpudp_nofold 2015-01-29 11:57:38 -08:00
clz_ctz.c lib/clz_ctz.c: add prototype declarations in lib/clz_ctz.c 2014-04-03 16:21:12 -07:00
clz_tab.c
cmdline.c lib: Add a generic cmdline parse function parse_option_str 2014-10-03 18:40:58 +01:00
compat_audit.c audit: Add generic compat syscall support 2014-03-20 10:11:35 -04:00
cordic.c
cpu_rmap.c sched/topology: Rename topology_thread_cpumask() to topology_sibling_cpumask() 2015-05-27 15:22:15 +02:00
cpu-notifier-error-inject.c
cpumask.c revert "cpumask: don't perform while loop in cpumask_next_and()" 2015-06-18 17:00:23 -10:00
crc7.c lib/crc7: Shift crc7() output left 1 bit 2014-05-16 14:26:52 -04:00
crc8.c
crc16.c
crc32.c lib: crc32: Add some additional __pure annotations 2014-06-25 16:04:00 -07:00
crc32defs.h
crc-ccitt.c
crc-itu-t.c lib: crc-itu-t.[ch] fix 0x0x prefix in integer constants 2015-05-26 15:26:43 +02:00
crc-t10dif.c crypto: crct10dif - Add fallback for broken initrds 2013-09-12 15:31:34 +10:00
ctype.c
debug_locks.c mutex: Add support for wound/wait style locks 2013-06-26 12:10:56 +02:00
debugobjects.c lib/debugobjects.c: convert printk(KERN_DEBUG to pr_debug 2014-06-04 16:53:53 -07:00
dec_and_lock.c
decompress_bunzip2.c decompress_bunzip2: off by one in get_next_block() 2014-12-13 12:42:52 -08:00
decompress_inflate.c initramfs: support initramfs that is bigger than 2GiB 2014-08-08 15:57:26 -07:00
decompress_unlz4.c initramfs: support initramfs that is bigger than 2GiB 2014-08-08 15:57:26 -07:00
decompress_unlzma.c initramfs: support initramfs that is bigger than 2GiB 2014-08-08 15:57:26 -07:00
decompress_unlzo.c initramfs: support initramfs that is bigger than 2GiB 2014-08-08 15:57:26 -07:00
decompress_unxz.c initramfs: support initramfs that is bigger than 2GiB 2014-08-08 15:57:26 -07:00
decompress.c lib/decompress.c: consistency of compress formats for kernel image 2014-12-13 12:42:52 -08:00
devres.c lib: devres: add a helper function for ioremap_wc 2015-03-16 21:11:32 +01:00
digsig.c lib/digsig.c: kernel-doc warning fixes 2014-06-04 16:54:19 -07:00
div64.c lib: correct link to the original source for div64_u64 2015-03-06 23:19:27 +01:00
dma-debug.c lib/dma-debug: fix bucket_find_contain() 2015-04-17 09:03:54 -04:00
dump_stack.c asmlinkage: Add explicit __visible to drivers/*, lib/*, kernel/* 2014-05-05 16:07:46 -07:00
dynamic_debug.c Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jikos/trivial 2015-02-10 18:57:15 -08:00
dynamic_queue_limits.c lib/dynamic_queue_limits.c: simplify includes 2015-02-12 18:54:15 -08:00
earlycpio.c earlycpio.c: Fix the confusing comment of find_cpio_data(). 2013-08-14 23:24:01 +02:00
extable.c
fault-inject.c fault-inject: add ratelimit option 2014-12-13 12:42:52 -08:00
fdt_empty_tree.c lib: add fdt_empty_tree.c 2014-04-30 19:49:37 +01:00
fdt_ro.c
fdt_rw.c
fdt_strerror.c
fdt_sw.c
fdt_wip.c
fdt.c
find_bit.c lib: rename lib/find_next_bit.c to lib/find_bit.c 2015-04-17 09:03:54 -04:00
flex_array.c reciprocal_divide: update/correction of the algorithm 2014-01-21 23:17:20 -08:00
flex_proportions.c proportions: add @gfp to init functions 2014-09-08 09:51:30 +09:00
gcd.c
gen_crc32table.c lib: crc32: constify crc32 lookup table 2015-02-13 21:21:35 -08:00
genalloc.c lib/genalloc.c: check result of devres_alloc() 2015-02-13 21:21:36 -08:00
glob.c lib/glob.c: add CONFIG_GLOB_SELFTEST 2014-08-06 18:01:25 -07:00
halfmd4.c lib/halfmd4.c: simplify includes 2015-02-12 18:54:15 -08:00
hexdump.c hexdump: make it return number of bytes placed in buffer 2015-02-12 18:54:15 -08:00
hweight.c Make ARCH_HAS_FAST_MULTIPLIER a real config variable 2014-09-13 11:14:53 -07:00
idr.c lib/idr.c: remove redundant include 2015-02-12 18:54:15 -08:00
inflate.c
int_sqrt.c
interval_tree_test.c lib: Export interval_tree 2014-05-05 09:09:14 +02:00
interval_tree.c lib/interval_tree.c: simplify includes 2015-02-12 18:54:15 -08:00
iomap_copy.c
iomap.c Kconfig: rename HAS_IOPORT to HAS_IOPORT_MAP 2014-04-07 16:36:11 -07:00
iommu-common.c iommu-common: rename iommu_pool_hash to iommu_hash_common 2015-04-20 14:09:55 -04:00
iommu-helper.c
ioremap.c x86, mm: support huge KVA mappings on x86 2015-04-14 16:49:04 -07:00
iov_iter.c Merge branch 'iov_iter' into for-next 2015-04-11 22:26:51 -04:00
irq_regs.c
is_single_threaded.c
jedec_ddr_data.c
kasprintf.c
Kconfig Merge branch 'mvebu/drivers' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc 2015-06-19 22:07:07 +08:00
Kconfig.debug rcu: Conditionally compile RCU's eqs warnings 2015-05-27 12:59:07 -07:00
Kconfig.kasan kasan: show gcc version requirements in Kconfig and Documentation 2015-05-05 17:10:10 -07:00
Kconfig.kgdb kdb: Allow access to sensitive commands to be restricted by default 2014-11-11 09:31:52 -06:00
Kconfig.kmemcheck
kfifo.c kfifo: use BUG_ON 2014-08-08 15:57:25 -07:00
klist.c klist: use same naming scheme as hlist for klist_add_after() 2014-08-06 18:01:24 -07:00
kobject_uevent.c lib/kobject_uevent.c: remove redundant include 2015-02-12 18:54:15 -08:00
kobject.c kobject: WARN as tip when call kobject_get() to a kobject not initialized 2015-03-25 15:26:49 +01:00
kstrtox.c lib/kstrtox.c: remove redundant cleanup 2014-01-23 16:36:57 -08:00
kstrtox.h
lcm.c block: fix blk_stack_limits() regression due to lcm() change 2015-03-31 09:45:50 -06:00
libcrc32c.c crypto: LLVMLinux: Remove VLAIS usage from libcrc32c.c 2014-10-14 10:51:23 +02:00
list_debug.c
list_sort.c lib/list_sort.c: rearrange includes 2015-02-12 18:54:15 -08:00
llist.c lib/llist.c: remove redundant include 2015-02-12 18:54:15 -08:00
locking-selftest-hardirq.h
locking-selftest-mutex.h
locking-selftest-rlock-hardirq.h
locking-selftest-rlock-softirq.h
locking-selftest-rlock.h
locking-selftest-rsem.h
locking-selftest-softirq.h
locking-selftest-spin-hardirq.h
locking-selftest-spin-softirq.h
locking-selftest-spin.h
locking-selftest-wlock-hardirq.h
locking-selftest-wlock-softirq.h
locking-selftest-wlock.h
locking-selftest-wsem.h
locking-selftest.c locking/lockdep: Revert qrwlock recusive stuff 2014-10-03 06:09:30 +02:00
lockref.c locking: Remove ACCESS_ONCE() usage 2015-02-24 08:44:16 +01:00
lru_cache.c lru_cache: remove use of seq_printf return value 2015-04-15 16:35:25 -07:00
Makefile Merge branch 'mvebu/drivers' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc 2015-06-19 22:07:07 +08:00
md5.c lib/md5.c: simplify include 2015-02-12 18:54:15 -08:00
memory-notifier-error-inject.c
memweight.c
net_utils.c mac_pton: Use bool not int return 2014-06-25 17:45:43 -07:00
nlattr.c netlink: pad nla_memcpy dest buffer with zeroes 2015-03-31 14:07:24 -04:00
notifier-error-inject.c
notifier-error-inject.h
of-reconfig-notifier-error-inject.c
oid_registry.c Give the OID registry file module info to avoid kernel tainting 2013-05-05 14:38:00 -07:00
parser.c lib/parser.c: put EXPORT_SYMBOLs in the conventional place 2014-01-23 16:36:55 -08:00
pci_iomap.c pci: add pci_iomap_range 2015-01-21 16:28:49 +10:30
percpu_counter.c percpu_counter: batch size aware __percpu_counter_compare() 2015-05-29 07:39:34 +10:00
percpu_ida.c lib/percpu_ida.c: remove redundant includes 2015-02-12 18:54:16 -08:00
percpu_test.c percpu: add test module for various percpu operations 2013-11-13 12:09:11 +09:00
percpu-refcount.c percpu_ref: make INIT_ATOMIC and switch_to_atomic() sticky 2014-09-24 13:31:50 -04:00
plist.c lib/plist.c: remove redundant include 2015-02-12 18:54:16 -08:00
pm-notifier-error-inject.c
proportions.c proportions: add @gfp to init functions 2014-09-08 09:51:30 +09:00
radix-tree.c sched/preempt: Merge preempt_mask.h into preempt.h 2015-05-19 08:39:11 +02:00
random32.c random32: improvements to prandom_bytes 2014-08-24 18:36:01 -07:00
ratelimit.c
rational.c
rbtree_test.c rbtree/test: test rbtree_postorder_for_each_entry_safe() 2014-01-23 16:37:03 -08:00
rbtree.c lib/rbtree.c: fix typo in comment of __rb_insert() 2014-08-08 15:57:24 -07:00
reciprocal_div.c reciprocal_divide: update/correction of the algorithm 2014-01-21 23:17:20 -08:00
rhashtable.c Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net 2015-06-08 20:06:56 -07:00
scatterlist.c scatterlist: introduce sg_nents_for_len 2015-06-03 10:51:27 +08:00
seq_buf.c seq_buf: Fix seq_buf_bprintf() truncation 2015-03-04 23:40:19 -05:00
sha1.c lib: EXPORT_SYMBOL sha_init 2015-03-23 22:12:08 -04:00
show_mem.c lib/show_mem.c: remove redundant include 2015-02-12 18:54:16 -08:00
smp_processor_id.c percpu: add preemption checks to __this_cpu ops 2014-04-07 16:36:14 -07:00
sort.c lib/sort: Add 64 bit swap function 2015-06-25 17:00:40 -07:00
stmp_device.c lib/stmp_device.c: replace module.h include 2015-02-12 18:54:16 -08:00
string_helpers.c SCSI misc on 20150416 2015-04-16 19:02:04 -04:00
string.c lib: make memzero_explicit more robust against dead store elimination 2015-05-04 17:49:51 +08:00
strncpy_from_user.c lib/strncpy_from_user.c: replace module.h include 2015-02-12 18:54:16 -08:00
strnlen_user.c Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip 2015-06-22 15:52:04 -07:00
swiotlb.c swiotlb: Warn on allocation failure in swiotlb_alloc_coherent() 2015-06-11 08:28:38 +02:00
syscall.c lib/syscall.c: unexport task_current_syscall() 2014-04-03 16:21:06 -07:00
test_bpf.c test_bpf: add similarly conflicting jump test case only for classic 2015-05-27 14:05:59 -04:00
test_firmware.c test: add firmware_class loader test 2014-07-17 18:44:19 -07:00
test_kasan.c lib: add kasan test module 2015-02-13 21:21:41 -08:00
test_module.c test: add minimal module for verification testing 2014-01-23 16:36:57 -08:00
test_rhashtable.c rhashtable-test: Fix 64bit division 2015-05-05 19:30:47 -04:00
test_user_copy.c test: check copy_to/from_user boundary validation 2014-01-23 16:36:57 -08:00
test-hexdump.c hexdump: Make test data really const 2015-06-25 17:00:40 -07:00
test-kstrtox.c lib/test-kstrtox.c: use ARRAY_SIZE instead of sizeof/sizeof[0] 2014-08-06 18:01:25 -07:00
test-string_helpers.c lib/string_helpers.c: change semantics of string_escape_mem 2015-04-15 16:35:24 -07:00
textsearch.c lib/textsearch.c: remove textsearch_put reference from comments 2014-10-14 02:18:14 +02:00
timerqueue.c timerqueue: Let timerqueue_add/del return information 2015-04-22 17:06:49 +02:00
ts_bm.c
ts_fsm.c
ts_kmp.c
ucs2_string.c
usercopy.c
uuid.c
vsprintf.c lib/vsprintf.c: improve put_dec_trunc8 slightly 2015-04-17 09:03:55 -04:00