From 2178817c4a105ead044e054bf902f256e6d589c2 Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Wed, 24 Jan 2018 23:22:33 +0300 Subject: [PATCH 01/26] arc: Get rid of handwritten string routines U-Boot is a bit special piese of software because it is being only executed once on power-on as compared to operating system for example. That's why we don't care much about performance optimizations instead we're more concerned about size. And up-to-date compilers might produce much smaller code compared to performance-optimized routines copy-pasted from the Linux kernel. Here's an example: ------------------------------->8-------------------------- --- size_asm_strings.txt +++ size_c_strings.txt @@ -1,2 +1,2 @@ text data bss dec hex filename - 121260 3784 3308 128352 1f560 u-boot + 120448 3784 3308 127540 1f234 u-boot ------------------------------->8-------------------------- See we were able to shave off ~800 bytes of .text section. Also usage of string routines implemented in C gives us an ability to support more HW flavors for free: generated instructions will match our target as long as correct compiler option is used. Signed-off-by: Alexey Brodkin --- arch/arc/include/asm/string.h | 26 ------- arch/arc/lib/Makefile | 7 -- arch/arc/lib/memcmp.S | 123 ----------------------------- arch/arc/lib/memcpy-700.S | 63 --------------- arch/arc/lib/memset.S | 62 --------------- arch/arc/lib/strchr-700.S | 141 ---------------------------------- arch/arc/lib/strcmp.S | 97 ----------------------- arch/arc/lib/strcpy-700.S | 67 ---------------- arch/arc/lib/strlen.S | 80 ------------------- 9 files changed, 666 deletions(-) delete mode 100644 arch/arc/lib/memcmp.S delete mode 100644 arch/arc/lib/memcpy-700.S delete mode 100644 arch/arc/lib/memset.S delete mode 100644 arch/arc/lib/strchr-700.S delete mode 100644 arch/arc/lib/strcmp.S delete mode 100644 arch/arc/lib/strcpy-700.S delete mode 100644 arch/arc/lib/strlen.S diff --git a/arch/arc/include/asm/string.h b/arch/arc/include/asm/string.h index 909129c333..8b13789179 100644 --- a/arch/arc/include/asm/string.h +++ b/arch/arc/include/asm/string.h @@ -1,27 +1 @@ -/* - * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. All rights reserved. - * - * SPDX-License-Identifier: GPL-2.0+ - */ -#ifndef __ASM_ARC_STRING_H -#define __ASM_ARC_STRING_H - -#define __HAVE_ARCH_MEMSET -#define __HAVE_ARCH_MEMCPY -#define __HAVE_ARCH_MEMCMP -#define __HAVE_ARCH_STRCHR -#define __HAVE_ARCH_STRCPY -#define __HAVE_ARCH_STRCMP -#define __HAVE_ARCH_STRLEN - -extern void *memset(void *ptr, int, __kernel_size_t); -extern void *memcpy(void *, const void *, __kernel_size_t); -extern void memzero(void *ptr, __kernel_size_t n); -extern int memcmp(const void *, const void *, __kernel_size_t); -extern char *strchr(const char *s, int c); -extern char *strcpy(char *dest, const char *src); -extern int strcmp(const char *cs, const char *ct); -extern __kernel_size_t strlen(const char *); - -#endif /* __ASM_ARC_STRING_H */ diff --git a/arch/arc/lib/Makefile b/arch/arc/lib/Makefile index 12097bf3be..6b7fb0fdff 100644 --- a/arch/arc/lib/Makefile +++ b/arch/arc/lib/Makefile @@ -10,13 +10,6 @@ obj-y += cache.o obj-y += cpu.o obj-y += interrupts.o obj-y += relocate.o -obj-y += strchr-700.o -obj-y += strcmp.o -obj-y += strcpy-700.o -obj-y += strlen.o -obj-y += memcmp.o -obj-y += memcpy-700.o -obj-y += memset.o obj-y += reset.o obj-y += ints_low.o obj-y += init_helpers.o diff --git a/arch/arc/lib/memcmp.S b/arch/arc/lib/memcmp.S deleted file mode 100644 index 87bccab51d..0000000000 --- a/arch/arc/lib/memcmp.S +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved. - * - * SPDX-License-Identifier: GPL-2.0+ - */ - -#ifdef __LITTLE_ENDIAN__ -#define WORD2 r2 -#define SHIFT r3 -#else /* __BIG_ENDIAN__ */ -#define WORD2 r3 -#define SHIFT r2 -#endif /* _ENDIAN__ */ - -.global memcmp -.align 4 -memcmp: - or %r12, %r0, %r1 - asl_s %r12, %r12, 30 - sub %r3, %r2, 1 - brls %r2, %r12, .Lbytewise - ld %r4, [%r0, 0] - ld %r5, [%r1, 0] - lsr.f %lp_count, %r3, 3 - lpne .Loop_end - ld_s WORD2, [%r0, 4] - ld_s %r12, [%r1, 4] - brne %r4, %r5, .Leven - ld.a %r4, [%r0, 8] - ld.a %r5, [%r1, 8] - brne WORD2, %r12, .Lodd - nop -.Loop_end: - asl_s SHIFT, SHIFT, 3 - bhs_s .Last_cmp - brne %r4, %r5, .Leven - ld %r4, [%r0, 4] - ld %r5, [%r1, 4] -#ifdef __LITTLE_ENDIAN__ - nop_s - /* one more load latency cycle */ -.Last_cmp: - xor %r0, %r4, %r5 - bset %r0, %r0, SHIFT - sub_s %r1, %r0, 1 - bic_s %r1, %r1, %r0 - norm %r1, %r1 - b.d .Leven_cmp - and %r1, %r1, 24 -.Leven: - xor %r0, %r4, %r5 - sub_s %r1, %r0, 1 - bic_s %r1, %r1, %r0 - norm %r1, %r1 - /* slow track insn */ - and %r1, %r1, 24 -.Leven_cmp: - asl %r2, %r4, %r1 - asl %r12, %r5, %r1 - lsr_s %r2, %r2, 1 - lsr_s %r12, %r12, 1 - j_s.d [%blink] - sub %r0, %r2, %r12 - .balign 4 -.Lodd: - xor %r0, WORD2, %r12 - sub_s %r1, %r0, 1 - bic_s %r1, %r1, %r0 - norm %r1, %r1 - /* slow track insn */ - and %r1, %r1, 24 - asl_s %r2, %r2, %r1 - asl_s %r12, %r12, %r1 - lsr_s %r2, %r2, 1 - lsr_s %r12, %r12, 1 - j_s.d [%blink] - sub %r0, %r2, %r12 -#else /* __BIG_ENDIAN__ */ -.Last_cmp: - neg_s SHIFT, SHIFT - lsr %r4, %r4, SHIFT - lsr %r5, %r5, SHIFT - /* slow track insn */ -.Leven: - sub.f %r0, %r4, %r5 - mov.ne %r0, 1 - j_s.d [%blink] - bset.cs %r0, %r0, 31 -.Lodd: - cmp_s WORD2, %r12 - - mov_s %r0, 1 - j_s.d [%blink] - bset.cs %r0, %r0, 31 -#endif /* _ENDIAN__ */ - .balign 4 -.Lbytewise: - breq %r2, 0, .Lnil - ldb %r4, [%r0, 0] - ldb %r5, [%r1, 0] - lsr.f %lp_count, %r3 - lpne .Lbyte_end - ldb_s %r3, [%r0, 1] - ldb %r12, [%r1, 1] - brne %r4, %r5, .Lbyte_even - ldb.a %r4, [%r0, 2] - ldb.a %r5, [%r1, 2] - brne %r3, %r12, .Lbyte_odd - nop -.Lbyte_end: - bcc .Lbyte_even - brne %r4, %r5, .Lbyte_even - ldb_s %r3, [%r0, 1] - ldb_s %r12, [%r1, 1] -.Lbyte_odd: - j_s.d [%blink] - sub %r0, %r3, %r12 -.Lbyte_even: - j_s.d [%blink] - sub %r0, %r4, %r5 -.Lnil: - j_s.d [%blink] - mov %r0, 0 diff --git a/arch/arc/lib/memcpy-700.S b/arch/arc/lib/memcpy-700.S deleted file mode 100644 index 51dd73ab8f..0000000000 --- a/arch/arc/lib/memcpy-700.S +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved. - * - * SPDX-License-Identifier: GPL-2.0+ - */ - -.global memcpy -.align 4 -memcpy: - or %r3, %r0, %r1 - asl_s %r3, %r3, 30 - mov_s %r5, %r0 - brls.d %r2, %r3, .Lcopy_bytewise - sub.f %r3, %r2, 1 - ld_s %r12, [%r1, 0] - asr.f %lp_count, %r3, 3 - bbit0.d %r3, 2, .Lnox4 - bmsk_s %r2, %r2, 1 - st.ab %r12, [%r5, 4] - ld.a %r12, [%r1, 4] -.Lnox4: - lppnz .Lendloop - ld_s %r3, [%r1, 4] - st.ab %r12, [%r5, 4] - ld.a %r12, [%r1, 8] - st.ab %r3, [%r5, 4] -.Lendloop: - breq %r2, 0, .Last_store - ld %r3, [%r5, 0] -#ifdef __LITTLE_ENDIAN__ - add3 %r2, -1, %r2 - /* uses long immediate */ - xor_s %r12, %r12, %r3 - bmsk %r12, %r12, %r2 - xor_s %r12, %r12, %r3 -#else /* __BIG_ENDIAN__ */ - sub3 %r2, 31, %r2 - /* uses long immediate */ - xor_s %r3, %r3, %r12 - bmsk %r3, %r3, %r2 - xor_s %r12, %r12, %r3 -#endif /* _ENDIAN__ */ -.Last_store: - j_s.d [%blink] - st %r12, [%r5, 0] - - .balign 4 -.Lcopy_bytewise: - jcs [%blink] - ldb_s %r12, [%r1, 0] - lsr.f %lp_count, %r3 - bhs_s .Lnox1 - stb.ab %r12, [%r5, 1] - ldb.a %r12, [%r1, 1] -.Lnox1: - lppnz .Lendbloop - ldb_s %r3, [%r1, 1] - stb.ab %r12, [%r5, 1] - ldb.a %r12, [%r1, 2] - stb.ab %r3, [%r5, 1] -.Lendbloop: - j_s.d [%blink] - stb %r12, [%r5, 0] diff --git a/arch/arc/lib/memset.S b/arch/arc/lib/memset.S deleted file mode 100644 index 017e8af0e8..0000000000 --- a/arch/arc/lib/memset.S +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved. - * - * SPDX-License-Identifier: GPL-2.0+ - */ - -#define SMALL 7 /* Must be at least 6 to deal with alignment/loop issues. */ - -.global memset -.align 4 -memset: - mov_s %r4, %r0 - or %r12, %r0, %r2 - bmsk.f %r12, %r12, 1 - extb_s %r1, %r1 - asl %r3, %r1, 8 - beq.d .Laligned - or_s %r1, %r1, %r3 - brls %r2, SMALL, .Ltiny - add %r3, %r2, %r0 - stb %r1, [%r3, -1] - bclr_s %r3, %r3, 0 - stw %r1, [%r3, -2] - bmsk.f %r12, %r0, 1 - add_s %r2, %r2, %r12 - sub.ne %r2, %r2, 4 - stb.ab %r1, [%r4, 1] - and %r4, %r4, -2 - stw.ab %r1, [%r4, 2] - and %r4, %r4, -4 - - .balign 4 -.Laligned: - asl %r3, %r1, 16 - lsr.f %lp_count, %r2, 2 - or_s %r1, %r1, %r3 - lpne .Loop_end - st.ab %r1, [%r4, 4] -.Loop_end: - j_s [%blink] - - .balign 4 -.Ltiny: - mov.f %lp_count, %r2 - lpne .Ltiny_end - stb.ab %r1, [%r4, 1] -.Ltiny_end: - j_s [%blink] - -/* - * memzero: @r0 = mem, @r1 = size_t - * memset: @r0 = mem, @r1 = char, @r2 = size_t - */ - -.global memzero -.align 4 -memzero: - /* adjust bzero args to memset args */ - mov %r2, %r1 - mov %r1, 0 - /* tail call so need to tinker with blink */ - b memset diff --git a/arch/arc/lib/strchr-700.S b/arch/arc/lib/strchr-700.S deleted file mode 100644 index 55fcc9fb00..0000000000 --- a/arch/arc/lib/strchr-700.S +++ /dev/null @@ -1,141 +0,0 @@ -/* - * Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved. - * - * SPDX-License-Identifier: GPL-2.0+ - */ - -/* - * ARC700 has a relatively long pipeline and branch prediction, so we want - * to avoid branches that are hard to predict. On the other hand, the - * presence of the norm instruction makes it easier to operate on whole - * words branch-free. - */ - -.global strchr -.align 4 -strchr: - extb_s %r1, %r1 - asl %r5, %r1, 8 - bmsk %r2, %r0, 1 - or %r5, %r5, %r1 - mov_s %r3, 0x01010101 - breq.d %r2, %r0, .Laligned - asl %r4, %r5, 16 - sub_s %r0, %r0, %r2 - asl %r7, %r2, 3 - ld_s %r2, [%r0] -#ifdef __LITTLE_ENDIAN__ - asl %r7, %r3, %r7 -#else /* __BIG_ENDIAN__ */ - lsr %r7, %r3, %r7 -#endif /* _ENDIAN__ */ - or %r5, %r5, %r4 - ror %r4, %r3 - sub %r12, %r2, %r7 - bic_s %r12, %r12, %r2 - and %r12, %r12, %r4 - brne.d %r12, 0, .Lfound0_ua - xor %r6, %r2, %r5 - ld.a %r2, [%r0, 4] - sub %r12, %r6, %r7 - bic %r12, %r12, %r6 -#ifdef __LITTLE_ENDIAN__ - and %r7, %r12, %r4 - /* For speed, we want this branch to be unaligned. */ - breq %r7, 0, .Loop - /* Likewise this one */ - b .Lfound_char -#else /* __BIG_ENDIAN__ */ - and %r12, %r12, %r4 - /* For speed, we want this branch to be unaligned. */ - breq %r12, 0, .Loop - lsr_s %r12, %r12, 7 - bic %r2, %r7, %r6 - b.d .Lfound_char_b - and_s %r2, %r2, %r12 -#endif /* _ENDIAN__ */ - /* We require this code address to be unaligned for speed... */ -.Laligned: - ld_s %r2, [%r0] - or %r5, %r5, %r4 - ror %r4, %r3 - /* ... so that this code address is aligned, for itself and ... */ -.Loop: - sub %r12, %r2, %r3 - bic_s %r12, %r12, %r2 - and %r12, %r12, %r4 - brne.d %r12, 0, .Lfound0 - xor %r6, %r2, %r5 - ld.a %r2, [%r0, 4] - sub %r12, %r6, %r3 - bic %r12, %r12, %r6 - and %r7, %r12, %r4 - breq %r7, 0, .Loop - /* - *... so that this branch is unaligned. - * Found searched-for character. - * r0 has already advanced to next word. - */ -#ifdef __LITTLE_ENDIAN__ - /* - * We only need the information about the first matching byte - * (i.e. the least significant matching byte) to be exact, - * hence there is no problem with carry effects. - */ -.Lfound_char: - sub %r3, %r7, 1 - bic %r3, %r3, %r7 - norm %r2, %r3 - sub_s %r0, %r0, 1 - asr_s %r2, %r2, 3 - j.d [%blink] - sub_s %r0, %r0, %r2 - - .balign 4 -.Lfound0_ua: - mov %r3, %r7 -.Lfound0: - sub %r3, %r6, %r3 - bic %r3, %r3, %r6 - and %r2, %r3, %r4 - or_s %r12, %r12, %r2 - sub_s %r3, %r12, 1 - bic_s %r3, %r3, %r12 - norm %r3, %r3 - add_s %r0, %r0, 3 - asr_s %r12, %r3, 3 - asl.f 0, %r2, %r3 - sub_s %r0, %r0, %r12 - j_s.d [%blink] - mov.pl %r0, 0 -#else /* __BIG_ENDIAN__ */ -.Lfound_char: - lsr %r7, %r7, 7 - - bic %r2, %r7, %r6 -.Lfound_char_b: - norm %r2, %r2 - sub_s %r0, %r0, 4 - asr_s %r2, %r2, 3 - j.d [%blink] - add_s %r0, %r0, %r2 - -.Lfound0_ua: - mov_s %r3, %r7 -.Lfound0: - asl_s %r2, %r2, 7 - or %r7, %r6, %r4 - bic_s %r12, %r12, %r2 - sub %r2, %r7, %r3 - or %r2, %r2, %r6 - bic %r12, %r2, %r12 - bic.f %r3, %r4, %r12 - norm %r3, %r3 - - add.pl %r3, %r3, 1 - asr_s %r12, %r3, 3 - asl.f 0, %r2, %r3 - add_s %r0, %r0, %r12 - j_s.d [%blink] - mov.mi %r0, 0 -#endif /* _ENDIAN__ */ diff --git a/arch/arc/lib/strcmp.S b/arch/arc/lib/strcmp.S deleted file mode 100644 index 8cb7d2f18c..0000000000 --- a/arch/arc/lib/strcmp.S +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved. - * - * SPDX-License-Identifier: GPL-2.0+ - */ - -/* - * This is optimized primarily for the ARC700. - * It would be possible to speed up the loops by one cycle / word - * respective one cycle / byte by forcing double source 1 alignment, unrolling - * by a factor of two, and speculatively loading the second word / byte of - * source 1; however, that would increase the overhead for loop setup / finish, - * and strcmp might often terminate early. - */ - -.global strcmp -.align 4 -strcmp: - or %r2, %r0, %r1 - bmsk_s %r2, %r2, 1 - brne %r2, 0, .Lcharloop - mov_s %r12, 0x01010101 - ror %r5, %r12 -.Lwordloop: - ld.ab %r2, [%r0, 4] - ld.ab %r3, [%r1, 4] - nop_s - sub %r4, %r2, %r12 - bic %r4, %r4, %r2 - and %r4, %r4, %r5 - brne %r4, 0, .Lfound0 - breq %r2 ,%r3, .Lwordloop -#ifdef __LITTLE_ENDIAN__ - xor %r0, %r2, %r3 /* mask for difference */ - sub_s %r1, %r0, 1 - bic_s %r0, %r0, %r1 /* mask for least significant difference bit */ - sub %r1, %r5, %r0 - xor %r0, %r5, %r1 /* mask for least significant difference byte */ - and_s %r2, %r2, %r0 - and_s %r3, %r3, %r0 -#endif /* _ENDIAN__ */ - cmp_s %r2, %r3 - mov_s %r0, 1 - j_s.d [%blink] - bset.lo %r0, %r0, 31 - - .balign 4 -#ifdef __LITTLE_ENDIAN__ -.Lfound0: - xor %r0, %r2, %r3 /* mask for difference */ - or %r0, %r0, %r4 /* or in zero indicator */ - sub_s %r1, %r0, 1 - bic_s %r0, %r0, %r1 /* mask for least significant difference bit */ - sub %r1, %r5, %r0 - xor %r0, %r5, %r1 /* mask for least significant difference byte */ - and_s %r2, %r2, %r0 - and_s %r3, %r3, %r0 - sub.f %r0, %r2, %r3 - mov.hi %r0, 1 - j_s.d [%blink] - bset.lo %r0, %r0, 31 -#else /* __BIG_ENDIAN__ */ - /* - * The zero-detection above can mis-detect 0x01 bytes as zeroes - * because of carry-propagateion from a lower significant zero byte. - * We can compensate for this by checking that bit0 is zero. - * This compensation is not necessary in the step where we - * get a low estimate for r2, because in any affected bytes - * we already have 0x00 or 0x01, which will remain unchanged - * when bit 7 is cleared. - */ - .balign 4 -.Lfound0: - lsr %r0, %r4, 8 - lsr_s %r1, %r2 - bic_s %r2, %r2, %r0 /* get low estimate for r2 and get ... */ - bic_s %r0, %r0, %r1 /* */ - or_s %r3, %r3, %r0 /* ... high estimate r3 so that r2 > r3 will */ - cmp_s %r3, %r2 /* ... be independent of trailing garbage */ - or_s %r2, %r2, %r0 /* likewise for r3 > r2 */ - bic_s %r3, %r3, %r0 - rlc %r0, 0 /* r0 := r2 > r3 ? 1 : 0 */ - cmp_s %r2, %r3 - j_s.d [%blink] - bset.lo %r0, %r0, 31 -#endif /* _ENDIAN__ */ - - .balign 4 -.Lcharloop: - ldb.ab %r2,[%r0,1] - ldb.ab %r3,[%r1,1] - nop_s - breq %r2, 0, .Lcmpend - breq %r2, %r3, .Lcharloop -.Lcmpend: - j_s.d [%blink] - sub %r0, %r2, %r3 diff --git a/arch/arc/lib/strcpy-700.S b/arch/arc/lib/strcpy-700.S deleted file mode 100644 index 41bb53e501..0000000000 --- a/arch/arc/lib/strcpy-700.S +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved. - * - * SPDX-License-Identifier: GPL-2.0+ - */ - -/* - * If dst and src are 4 byte aligned, copy 8 bytes at a time. - * If the src is 4, but not 8 byte aligned, we first read 4 bytes to get - * it 8 byte aligned. Thus, we can do a little read-ahead, without - * dereferencing a cache line that we should not touch. - * Note that short and long instructions have been scheduled to avoid - * branch stalls. - * The beq_s to r3z could be made unaligned & long to avoid a stall - * there, but it is not likely to be taken often, and it would also be likely - * to cost an unaligned mispredict at the next call. - */ - -.global strcpy -.align 4 -strcpy: - or %r2, %r0, %r1 - bmsk_s %r2, %r2, 1 - brne.d %r2, 0, charloop - mov_s %r10, %r0 - ld_s %r3, [%r1, 0] - mov %r8, 0x01010101 - bbit0.d %r1, 2, loop_start - ror %r12, %r8 - sub %r2, %r3, %r8 - bic_s %r2, %r2, %r3 - tst_s %r2,%r12 - bne r3z - mov_s %r4,%r3 - .balign 4 -loop: - ld.a %r3, [%r1, 4] - st.ab %r4, [%r10, 4] -loop_start: - ld.a %r4, [%r1, 4] - sub %r2, %r3, %r8 - bic_s %r2, %r2, %r3 - tst_s %r2, %r12 - bne_s r3z - st.ab %r3, [%r10, 4] - sub %r2, %r4, %r8 - bic %r2, %r2, %r4 - tst %r2, %r12 - beq loop - mov_s %r3, %r4 -#ifdef __LITTLE_ENDIAN__ -r3z: bmsk.f %r1, %r3, 7 - lsr_s %r3, %r3, 8 -#else /* __BIG_ENDIAN__ */ -r3z: lsr.f %r1, %r3, 24 - asl_s %r3, %r3, 8 -#endif /* _ENDIAN__ */ - bne.d r3z - stb.ab %r1, [%r10, 1] - j_s [%blink] - - .balign 4 -charloop: - ldb.ab %r3, [%r1, 1] - brne.d %r3, 0, charloop - stb.ab %r3, [%r10, 1] - j [%blink] diff --git a/arch/arc/lib/strlen.S b/arch/arc/lib/strlen.S deleted file mode 100644 index 666e22c0d5..0000000000 --- a/arch/arc/lib/strlen.S +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved. - * - * SPDX-License-Identifier: GPL-2.0+ - */ - -.global strlen -.align 4 -strlen: - or %r3, %r0, 7 - ld %r2, [%r3, -7] - ld.a %r6, [%r3, -3] - mov %r4, 0x01010101 - /* uses long immediate */ -#ifdef __LITTLE_ENDIAN__ - asl_s %r1, %r0, 3 - btst_s %r0, 2 - asl %r7, %r4, %r1 - ror %r5, %r4 - sub %r1, %r2, %r7 - bic_s %r1, %r1, %r2 - mov.eq %r7, %r4 - sub %r12, %r6, %r7 - bic %r12, %r12, %r6 - or.eq %r12, %r12, %r1 - and %r12, %r12, %r5 - brne %r12, 0, .Learly_end -#else /* __BIG_ENDIAN__ */ - ror %r5, %r4 - btst_s %r0, 2 - mov_s %r1, 31 - sub3 %r7, %r1, %r0 - sub %r1, %r2, %r4 - bic_s %r1, %r1, %r2 - bmsk %r1, %r1, %r7 - sub %r12, %r6, %r4 - bic %r12, %r12, %r6 - bmsk.ne %r12, %r12, %r7 - or.eq %r12, %r12, %r1 - and %r12, %r12, %r5 - brne %r12, 0, .Learly_end -#endif /* _ENDIAN__ */ - -.Loop: - ld_s %r2, [%r3, 4] - ld.a %r6, [%r3, 8] - /* stall for load result */ - sub %r1, %r2, %r4 - bic_s %r1, %r1, %r2 - sub %r12, %r6, %r4 - bic %r12, %r12, %r6 - or %r12, %r12, %r1 - and %r12, %r12, %r5 - breq %r12, 0, .Loop -.Lend: - and.f %r1, %r1, %r5 - sub.ne %r3, %r3, 4 - mov.eq %r1, %r12 -#ifdef __LITTLE_ENDIAN__ - sub_s %r2, %r1, 1 - bic_s %r2, %r2, %r1 - norm %r1, %r2 - sub_s %r0, %r0, 3 - lsr_s %r1, %r1, 3 - sub %r0, %r3, %r0 - j_s.d [%blink] - sub %r0, %r0, %r1 -#else /* __BIG_ENDIAN__ */ - lsr_s %r1, %r1, 7 - mov.eq %r2, %r6 - bic_s %r1, %r1, %r2 - norm %r1, %r1 - sub %r0, %r3, %r0 - lsr_s %r1, %r1, 3 - j_s.d [%blink] - add %r0, %r0, %r1 -#endif /* _ENDIAN */ -.Learly_end: - b.d .Lend - sub_s.ne %r1, %r1, %r1 From 0a097ba54bee9ee9fb2f6c5529b55d4b46b872b2 Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Wed, 24 Jan 2018 15:27:04 +0300 Subject: [PATCH 02/26] arc: Don't halt slaves This commit basically reverts two commits: 1. cf628f772ef2 ("arc: arcv1: Disable master/slave check") 2. 6cba327bd96f ("arcv2: Halt non-master cores") With mentioned commits in-place we experience more trouble than benefits. In case of SMP Linux kernel this is really required as we have all the cores running from the very beginning and then we need to allow master core to do some preparatory work while slaves are not getting in the way. In case of U-Boot we: a) Don't really run more than 1 core in parallel b) We may use whatever core for that Signed-off-by: Alexey Brodkin --- arch/arc/lib/start.S | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/arch/arc/lib/start.S b/arch/arc/lib/start.S index 0d72fe71d4..ac9944c4cf 100644 --- a/arch/arc/lib/start.S +++ b/arch/arc/lib/start.S @@ -10,26 +10,6 @@ #include ENTRY(_start) -; ARCompact devices are not supposed to be SMP so master/slave check -; makes no sense. -#ifdef CONFIG_ISA_ARCV2 - ; Non-masters will be halted immediately, they might be kicked later - ; by platform code right before passing control to the Linux kernel - ; in bootm.c:boot_jump_linux(). - lr r5, [identity] - lsr r5, r5, 8 - bmsk r5, r5, 7 - cmp r5, 0 - mov.nz r0, r5 - bz .Lmaster_proceed - flag 1 - nop - nop - nop - -.Lmaster_proceed: -#endif - /* Setup interrupt vector base that matches "__text_start" */ sr __ivt_start, [ARC_AUX_INTR_VEC_BASE] From fac4790491f69b29755d92db2cad508849573ff7 Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Thu, 25 Jan 2018 00:25:12 +0300 Subject: [PATCH 03/26] arc: Eliminate unused code and data with GCC's garbage collector Finally GCC's garbage collector works on ARC so let's use it. That's what I may see for HSDK: Before: text data bss dec hex filename 290153 10068 222616 522837 7fa55 u-boot After: text data bss dec hex filename 261999 9460 222360 493819 788fb u-boot Overall ~5% of memory footprint saved. Signed-off-by: Alexey Brodkin --- arch/arc/config.mk | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arc/config.mk b/arch/arc/config.mk index 3ed0c282ba..d040454d1a 100644 --- a/arch/arc/config.mk +++ b/arch/arc/config.mk @@ -51,9 +51,10 @@ PLATFORM_CPPFLAGS += -mcpu=archs endif PLATFORM_CPPFLAGS += -ffixed-r25 -D__ARC__ -gdwarf-2 -mno-sdata +PLATFORM_RELFLAGS += -ffunction-sections -fdata-sections # Needed for relocation -LDFLAGS_FINAL += -pie +LDFLAGS_FINAL += -pie --gc-sections # Load address for standalone apps CONFIG_STANDALONE_LOAD_ADDR ?= 0x82000000 From 264d298fda39c5caa9505702b257a1f60c3b7352 Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Wed, 16 Dec 2015 19:24:10 +0300 Subject: [PATCH 04/26] arc: Introduce a possibility to not relocate U-boot Disabling relocation might be useful on ARC for 2 reasons: a) For advanced debugging with Synopsys proprietary MetaWare debugger which is capable of accessing much more specific hardware resources compared to gdb. For example it may show contents of L1 and L2 caches, internal states of some hardware blocks etc. But on the downside MetaWare debugger still cannot work with PIE. Even though that limitation could be work-arounded with change of ELF's header and stripping down all debug info but with it we won't have debug info for source-level debugging which is quite inconvenient. b) Some platforms which might benefit from usage of U-Boot basically don't have enough RAM to accommodate relocation of U-Boot so we keep code in flash and use as much of RAM as possible for more interesting things. Signed-off-by: Alexey Brodkin Cc: Simon Glass Cc: Bin Meng Cc: Heiko Schocher Cc: York Sun Cc: Stefan Roese --- arch/arc/lib/relocate.c | 6 ++++++ arch/arc/lib/start.S | 8 +++++++- common/board_f.c | 3 ++- 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/arch/arc/lib/relocate.c b/arch/arc/lib/relocate.c index 7802f40545..96b4bd3d8f 100644 --- a/arch/arc/lib/relocate.c +++ b/arch/arc/lib/relocate.c @@ -17,6 +17,9 @@ int copy_uboot_to_ram(void) { size_t len = (size_t)&__image_copy_end - (size_t)&__image_copy_start; + if (gd->flags & GD_FLG_SKIP_RELOC) + return 0; + memcpy((void *)gd->relocaddr, (void *)&__image_copy_start, len); return 0; @@ -40,6 +43,9 @@ int do_elf_reloc_fixups(void) Elf32_Rela *re_src = (Elf32_Rela *)(&__rel_dyn_start); Elf32_Rela *re_end = (Elf32_Rela *)(&__rel_dyn_end); + if (gd->flags & GD_FLG_SKIP_RELOC) + return 0; + debug("Section .rela.dyn is located at %08x-%08x\n", (unsigned int)re_src, (unsigned int)re_end); diff --git a/arch/arc/lib/start.S b/arch/arc/lib/start.S index ac9944c4cf..c78dd001d8 100644 --- a/arch/arc/lib/start.S +++ b/arch/arc/lib/start.S @@ -78,7 +78,13 @@ ENTRY(_start) /* Zero the one and only argument of "board_init_f" */ mov_s %r0, 0 - j board_init_f + bl board_init_f + + /* We only get here if relocation is disabled by GD_FLG_SKIP_RELOC */ + /* Make sure we don't lose GD overwritten by zero new GD */ + mov %r0, %r25 + mov %r1, 0 + bl board_init_r ENDPROC(_start) /* diff --git a/common/board_f.c b/common/board_f.c index 62588c5bad..ae8bdb7c5c 100644 --- a/common/board_f.c +++ b/common/board_f.c @@ -900,7 +900,8 @@ void board_init_f(ulong boot_flags) hang(); #if !defined(CONFIG_ARM) && !defined(CONFIG_SANDBOX) && \ - !defined(CONFIG_EFI_APP) && !CONFIG_IS_ENABLED(X86_64) + !defined(CONFIG_EFI_APP) && !CONFIG_IS_ENABLED(X86_64) && \ + !defined(CONFIG_ARC) /* NOTREACHED - jump_to_copy() does not return */ hang(); #endif From 71621525c3bf9ed2a73446ba505ad5b980b6b8bb Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Wed, 21 Feb 2018 12:58:00 +0300 Subject: [PATCH 05/26] arc: Fine-tune implementation of memory barriers We improve on 2 things: 1. Only ARC HS family has "dmb" instructions so do compile-time check for automatically defined macro __ARCHS__. Previous check for ARCv2 ISA was not good enough because ARC EM family is v2 ISA as well but still "dmb" instaruction is not supported in EM family. 2. Still if there's no dedicated instruction for memory barrier let's at least insert compile-time barrier to make sure compiler deosn't reorder critical memory operations. Signed-off-by: Alexey Brodkin --- arch/arc/include/asm/io.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arc/include/asm/io.h b/arch/arc/include/asm/io.h index a12303bc73..060cdf637b 100644 --- a/arch/arc/include/asm/io.h +++ b/arch/arc/include/asm/io.h @@ -10,7 +10,7 @@ #include #include -#ifdef CONFIG_ISA_ARCV2 +#ifdef __ARCHS__ /* * ARCv2 based HS38 cores are in-order issue, but still weakly ordered @@ -42,12 +42,12 @@ #define mb() asm volatile("sync\n" : : : "memory") #endif -#ifdef CONFIG_ISA_ARCV2 +#ifdef __ARCHS__ #define __iormb() rmb() #define __iowmb() wmb() #else -#define __iormb() do { } while (0) -#define __iowmb() do { } while (0) +#define __iormb() asm volatile("" : : : "memory") +#define __iowmb() asm volatile("" : : : "memory") #endif static inline void sync(void) From 16aeee81d9baa403f80cb0ac9bea1141243ab97d Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Wed, 21 Mar 2018 15:58:46 +0300 Subject: [PATCH 06/26] ARC: Cache: Move I$ entire operation to a separate function Move instruction cache entire operation to a separate function because we are planing to use it in other places like sync_icache_dcache_all(). Signed-off-by: Eugeniy Paltsev Signed-off-by: Alexey Brodkin --- arch/arc/lib/cache.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/arch/arc/lib/cache.c b/arch/arc/lib/cache.c index 04f1d9d59b..26f0a1ff9b 100644 --- a/arch/arc/lib/cache.c +++ b/arch/arc/lib/cache.c @@ -315,20 +315,27 @@ void icache_disable(void) IC_CTRL_CACHE_DISABLE); } +/* IC supports only invalidation */ +static inline void __ic_entire_invalidate(void) +{ + if (!icache_status()) + return; + + /* Any write to IC_IVIC register triggers invalidation of entire I$ */ + write_aux_reg(ARC_AUX_IC_IVIC, 1); + /* + * As per ARC HS databook (see chapter 5.3.3.2) + * it is required to add 3 NOPs after each write to IC_IVIC. + */ + __builtin_arc_nop(); + __builtin_arc_nop(); + __builtin_arc_nop(); + read_aux_reg(ARC_AUX_IC_CTRL); /* blocks */ +} + void invalidate_icache_all(void) { - /* Any write to IC_IVIC register triggers invalidation of entire I$ */ - if (icache_status()) { - write_aux_reg(ARC_AUX_IC_IVIC, 1); - /* - * As per ARC HS databook (see chapter 5.3.3.2) - * it is required to add 3 NOPs after each write to IC_IVIC. - */ - __builtin_arc_nop(); - __builtin_arc_nop(); - __builtin_arc_nop(); - read_aux_reg(ARC_AUX_IC_CTRL); /* blocks */ - } + __ic_entire_invalidate(); #ifdef CONFIG_ISA_ARCV2 if (slc_exists) From c4ef14d2cc06ca7cca7e1484b5d9a8b1decb02ac Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Wed, 21 Mar 2018 15:58:47 +0300 Subject: [PATCH 07/26] ARC: Cache: Remove per-line I$ operations as unused __cache_line_loop() function was copied from Linux kernel where per-line instruction cache operations are really used. In U-Boot we use only entire I$ ops, so we can drop support of per-line I$ ops from __cache_line_loop() because __cache_line_loop() is never called with OP_INV_IC parameter. Signed-off-by: Eugeniy Paltsev Signed-off-by: Alexey Brodkin --- arch/arc/lib/cache.c | 30 +++++++----------------------- 1 file changed, 7 insertions(+), 23 deletions(-) diff --git a/arch/arc/lib/cache.c b/arch/arc/lib/cache.c index 26f0a1ff9b..2252542f16 100644 --- a/arch/arc/lib/cache.c +++ b/arch/arc/lib/cache.c @@ -23,7 +23,6 @@ #define OP_INV 0x1 #define OP_FLUSH 0x2 -#define OP_INV_IC 0x3 /* Bit val in SLC_CONTROL */ #define SLC_CTRL_DIS 0x001 @@ -373,30 +372,15 @@ void dcache_disable(void) } #ifndef CONFIG_SYS_DCACHE_OFF -/* - * Common Helper for Line Operations on {I,D}-Cache - */ -static inline void __cache_line_loop(unsigned long paddr, unsigned long sz, - const int cacheop) +/* Common Helper for Line Operations on D-cache */ +static inline void __dcache_line_loop(unsigned long paddr, unsigned long sz, + const int cacheop) { unsigned int aux_cmd; -#if (CONFIG_ARC_MMU_VER == 3) - unsigned int aux_tag; -#endif int num_lines; - if (cacheop == OP_INV_IC) { - aux_cmd = ARC_AUX_IC_IVIL; -#if (CONFIG_ARC_MMU_VER == 3) - aux_tag = ARC_AUX_IC_PTAG; -#endif - } else { - /* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */ - aux_cmd = cacheop & OP_INV ? ARC_AUX_DC_IVDL : ARC_AUX_DC_FLDL; -#if (CONFIG_ARC_MMU_VER == 3) - aux_tag = ARC_AUX_DC_PTAG; -#endif - } + /* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */ + aux_cmd = cacheop & OP_INV ? ARC_AUX_DC_IVDL : ARC_AUX_DC_FLDL; sz += paddr & ~CACHE_LINE_MASK; paddr &= CACHE_LINE_MASK; @@ -405,7 +389,7 @@ static inline void __cache_line_loop(unsigned long paddr, unsigned long sz, while (num_lines-- > 0) { #if (CONFIG_ARC_MMU_VER == 3) - write_aux_reg(aux_tag, paddr); + write_aux_reg(ARC_AUX_DC_PTAG, paddr); #endif write_aux_reg(aux_cmd, paddr); paddr += l1_line_sz; @@ -458,7 +442,7 @@ static inline void __dc_line_op(unsigned long paddr, unsigned long sz, { unsigned int ctrl_reg = __before_dc_op(cacheop); - __cache_line_loop(paddr, sz, cacheop); + __dcache_line_loop(paddr, sz, cacheop); __after_dc_op(cacheop, ctrl_reg); } #else From 5d7a24d6462389ab5a1ac3baa800659155e81329 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Wed, 21 Mar 2018 15:58:48 +0300 Subject: [PATCH 08/26] ARC: Cache: Add support for FLUSH_N_INV D$ operations As of today __dc_line_op() and __dc_entire_op() support only separate flush (OP_FLUSH) and invalidate (OP_INV) operations. Add support of combined flush and invalidate (OP_FLUSH_N_INV) operation which we planing to use in subsequent patches. Signed-off-by: Eugeniy Paltsev Signed-off-by: Alexey Brodkin --- arch/arc/lib/cache.c | 42 +++++++++++++++++++----------------------- 1 file changed, 19 insertions(+), 23 deletions(-) diff --git a/arch/arc/lib/cache.c b/arch/arc/lib/cache.c index 2252542f16..83b77b9716 100644 --- a/arch/arc/lib/cache.c +++ b/arch/arc/lib/cache.c @@ -21,8 +21,9 @@ #define DC_CTRL_FLUSH_STATUS BIT(8) #define CACHE_VER_NUM_MASK 0xF -#define OP_INV 0x1 -#define OP_FLUSH 0x2 +#define OP_INV BIT(0) +#define OP_FLUSH BIT(1) +#define OP_FLUSH_N_INV (OP_FLUSH | OP_INV) /* Bit val in SLC_CONTROL */ #define SLC_CTRL_DIS 0x001 @@ -396,36 +397,32 @@ static inline void __dcache_line_loop(unsigned long paddr, unsigned long sz, } } -static unsigned int __before_dc_op(const int op) +static void __before_dc_op(const int op) { - unsigned int reg; + unsigned int ctrl; - if (op == OP_INV) { - /* - * IM is set by default and implies Flush-n-inv - * Clear it here for vanilla inv - */ - reg = read_aux_reg(ARC_AUX_DC_CTRL); - write_aux_reg(ARC_AUX_DC_CTRL, reg & ~DC_CTRL_INV_MODE_FLUSH); - } + ctrl = read_aux_reg(ARC_AUX_DC_CTRL); - return reg; + /* IM bit implies flush-n-inv, instead of vanilla inv */ + if (op == OP_INV) + ctrl &= ~DC_CTRL_INV_MODE_FLUSH; + else + ctrl |= DC_CTRL_INV_MODE_FLUSH; + + write_aux_reg(ARC_AUX_DC_CTRL, ctrl); } -static void __after_dc_op(const int op, unsigned int reg) +static void __after_dc_op(const int op) { if (op & OP_FLUSH) /* flush / flush-n-inv both wait */ while (read_aux_reg(ARC_AUX_DC_CTRL) & DC_CTRL_FLUSH_STATUS); - - /* Switch back to default Invalidate mode */ - if (op == OP_INV) - write_aux_reg(ARC_AUX_DC_CTRL, reg | DC_CTRL_INV_MODE_FLUSH); } static inline void __dc_entire_op(const int cacheop) { int aux; - unsigned int ctrl_reg = __before_dc_op(cacheop); + + __before_dc_op(cacheop); if (cacheop & OP_INV) /* Inv or flush-n-inv use same cmd reg */ aux = ARC_AUX_DC_IVDC; @@ -434,16 +431,15 @@ static inline void __dc_entire_op(const int cacheop) write_aux_reg(aux, 0x1); - __after_dc_op(cacheop, ctrl_reg); + __after_dc_op(cacheop); } static inline void __dc_line_op(unsigned long paddr, unsigned long sz, const int cacheop) { - unsigned int ctrl_reg = __before_dc_op(cacheop); - + __before_dc_op(cacheop); __dcache_line_loop(paddr, sz, cacheop); - __after_dc_op(cacheop, ctrl_reg); + __after_dc_op(cacheop); } #else #define __dc_entire_op(cacheop) From 5e0c68edad7d47438721e862fd772710dffca9b4 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Wed, 21 Mar 2018 15:58:49 +0300 Subject: [PATCH 09/26] ARC: Introduce is_isa_X() functions Introduce is_isa_arcv2() and is_isa_arcompact() functions. These functions only check configuration options and return compile-time constant so they can be used instead of #ifdef's to to write cleaner code. Now we can write: -------------->8--------------- if (is_isa_arcv2()) ioc_configure(); -------------->8--------------- instead of: -------------->8--------------- ifdef CONFIG_ISA_ARCV2 ioc_configure(); endif -------------->8--------------- Signed-off-by: Eugeniy Paltsev Signed-off-by: Alexey Brodkin --- arch/arc/include/asm/arcregs.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h index 67f416305d..3a513149f5 100644 --- a/arch/arc/include/asm/arcregs.h +++ b/arch/arc/include/asm/arcregs.h @@ -8,6 +8,7 @@ #define _ASM_ARC_ARCREGS_H #include +#include /* * ARC architecture has additional address space - auxiliary registers. @@ -88,6 +89,16 @@ /* ARCNUM [15:8] - field to identify each core in a multi-core system */ #define CPU_ID_GET() ((read_aux_reg(ARC_AUX_IDENTITY) & 0xFF00) >> 8) + +static const inline int is_isa_arcv2(void) +{ + return IS_ENABLED(CONFIG_ISA_ARCV2); +} + +static const inline int is_isa_arcompact(void) +{ + return IS_ENABLED(CONFIG_ISA_ARCOMPACT); +} #endif /* __ASSEMBLY__ */ #endif /* _ASM_ARC_ARCREGS_H */ From c27814be336ee612418ff010f4002deb1cc9c387 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Wed, 21 Mar 2018 15:58:50 +0300 Subject: [PATCH 10/26] ARC: Flush & invalidate D$ with a single command We don't implement separate flush_dcache_all() intentionally as entire data cache invalidation is dangerous operation even if we flush data cache right before invalidation. There is the real example: We may get stuck in the following code if we store any context (like BLINK register) on stack in invalidate_dcache_all() function. BLINK register is the register where return address is automatically saved when we do function call with instructions like 'bl'. void flush_dcache_all() { __dc_entire_op(OP_FLUSH); // Other code // } void invalidate_dcache_all() { __dc_entire_op(OP_INV); // Other code // } void foo(void) { flush_dcache_all(); invalidate_dcache_all(); } Now let's see what really happens during that code execution: foo() |->> call flush_dcache_all [return address is saved to BLINK register] [push BLINK] (save to stack) ![point 1] |->> call __dc_entire_op(OP_FLUSH) [return address is saved to BLINK register] [flush L1 D$] return [jump to BLINK] <<------ [other flush_dcache_all code] [pop BLINK] (get from stack) return [jump to BLINK] <<------ |->> call invalidate_dcache_all [return address is saved to BLINK register] [push BLINK] (save to stack) ![point 2] |->> call __dc_entire_op(OP_FLUSH) [return address is saved to BLINK register] [invalidate L1 D$] ![point 3] // Oops!!! // We lose return address from invalidate_dcache_all function: // we save it to stack and invalidate L1 D$ after that! return [jump to BLINK] <<------ [other invalidate_dcache_all code] [pop BLINK] (get from stack) // we don't have this data in L1 dcache as we invalidated it in [point 3] // so we get it from next memory level (for example DDR memory) // but in the memory we have value which we save in [point 1], which // is return address from flush_dcache_all function (instead of // address from current invalidate_dcache_all function which we // saved in [point 2] !) return [jump to BLINK] <<------ // As BLINK points to invalidate_dcache_all, we call it again and // loop forever. Fortunately we may do flush and invalidation of D$ with a single one instruction which automatically mitigates a situation described above. And because invalidate_dcache_all() isn't used in common U-Boot code we implement "flush and invalidate dcache all" instead. Signed-off-by: Eugeniy Paltsev Signed-off-by: Alexey Brodkin --- arch/arc/include/asm/cache.h | 1 + arch/arc/lib/cache.c | 89 ++++++++++++++++++++++++++++++++++-- 2 files changed, 85 insertions(+), 5 deletions(-) diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h index d26d9fb18d..382c4126c3 100644 --- a/arch/arc/include/asm/cache.h +++ b/arch/arc/include/asm/cache.h @@ -30,6 +30,7 @@ #ifndef __ASSEMBLY__ void cache_init(void); +void flush_n_invalidate_dcache_all(void); #endif /* __ASSEMBLY__ */ diff --git a/arch/arc/lib/cache.c b/arch/arc/lib/cache.c index 83b77b9716..42207b201c 100644 --- a/arch/arc/lib/cache.c +++ b/arch/arc/lib/cache.c @@ -12,6 +12,80 @@ #include #include +/* + * [ NOTE 1 ]: + * Data cache (L1 D$ or SL$) entire invalidate operation or data cache disable + * operation may result in unexpected behavior and data loss even if we flush + * data cache right before invalidation. That may happens if we store any context + * on stack (like we store BLINK register on stack before function call). + * BLINK register is the register where return address is automatically saved + * when we do function call with instructions like 'bl'. + * + * There is the real example: + * We may hang in the next code as we store any BLINK register on stack in + * invalidate_dcache_all() function. + * + * void flush_dcache_all() { + * __dc_entire_op(OP_FLUSH); + * // Other code // + * } + * + * void invalidate_dcache_all() { + * __dc_entire_op(OP_INV); + * // Other code // + * } + * + * void foo(void) { + * flush_dcache_all(); + * invalidate_dcache_all(); + * } + * + * Now let's see what really happens during that code execution: + * + * foo() + * |->> call flush_dcache_all + * [return address is saved to BLINK register] + * [push BLINK] (save to stack) ![point 1] + * |->> call __dc_entire_op(OP_FLUSH) + * [return address is saved to BLINK register] + * [flush L1 D$] + * return [jump to BLINK] + * <<------ + * [other flush_dcache_all code] + * [pop BLINK] (get from stack) + * return [jump to BLINK] + * <<------ + * |->> call invalidate_dcache_all + * [return address is saved to BLINK register] + * [push BLINK] (save to stack) ![point 2] + * |->> call __dc_entire_op(OP_FLUSH) + * [return address is saved to BLINK register] + * [invalidate L1 D$] ![point 3] + * // Oops!!! + * // We lose return address from invalidate_dcache_all function: + * // we save it to stack and invalidate L1 D$ after that! + * return [jump to BLINK] + * <<------ + * [other invalidate_dcache_all code] + * [pop BLINK] (get from stack) + * // we don't have this data in L1 dcache as we invalidated it in [point 3] + * // so we get it from next memory level (for example DDR memory) + * // but in the memory we have value which we save in [point 1], which + * // is return address from flush_dcache_all function (instead of + * // address from current invalidate_dcache_all function which we + * // saved in [point 2] !) + * return [jump to BLINK] + * <<------ + * // As BLINK points to invalidate_dcache_all, we call it again and + * // loop forever. + * + * Fortunately we may fix that by using flush & invalidation of D$ with a single + * one instruction (instead of flush and invalidation instructions pair) and + * enabling force function inline with '__attribute__((always_inline))' gcc + * attribute to avoid any function call (and BLINK store) between cache flush + * and disable. + */ + /* Bit values in IC_CTRL */ #define IC_CTRL_CACHE_DISABLE BIT(0) @@ -256,8 +330,7 @@ void cache_init(void) /* IOC Aperture size is equal to DDR size */ long ap_size = CONFIG_SYS_SDRAM_SIZE; - flush_dcache_all(); - invalidate_dcache_all(); + flush_n_invalidate_dcache_all(); if (!is_power_of_2(ap_size) || ap_size < 4096) panic("IOC Aperture size must be power of 2 and bigger 4Kib"); @@ -483,13 +556,19 @@ void flush_cache(unsigned long start, unsigned long size) flush_dcache_range(start, start + size); } -void invalidate_dcache_all(void) +/* + * As invalidate_dcache_all() is not used in generic U-Boot code and as we + * don't need it in arch/arc code alone (invalidate without flush) we implement + * flush_n_invalidate_dcache_all (flush and invalidate in 1 operation) because + * it's much safer. See [ NOTE 1 ] for more details. + */ +void flush_n_invalidate_dcache_all(void) { - __dc_entire_op(OP_INV); + __dc_entire_op(OP_FLUSH_N_INV); #ifdef CONFIG_ISA_ARCV2 if (slc_exists) - __slc_entire_op(OP_INV); + __slc_entire_op(OP_FLUSH_N_INV); #endif } From a6f557c4e04a70df2815a44e22a1c2f6c25a0a1c Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Wed, 21 Mar 2018 15:58:51 +0300 Subject: [PATCH 11/26] ARC: Cache: Move IOC initialization to a separate function Move IOC initialization from cache_init() to a separate function. This is the preparation for the next patch where we'll switch to is_isa_arcv2() function usage instead of "CONFIG_ISA_ARCV2" ifdef. Also it makes cache_init function a bit cleaner. Signed-off-by: Eugeniy Paltsev Signed-off-by: Alexey Brodkin --- arch/arc/lib/cache.c | 56 ++++++++++++++++++++++++-------------------- 1 file changed, 30 insertions(+), 26 deletions(-) diff --git a/arch/arc/lib/cache.c b/arch/arc/lib/cache.c index 42207b201c..af07a11724 100644 --- a/arch/arc/lib/cache.c +++ b/arch/arc/lib/cache.c @@ -231,6 +231,34 @@ static void __slc_rgn_op(unsigned long paddr, unsigned long sz, const int op) while (read_aux_reg(ARC_AUX_SLC_CTRL) & SLC_CTRL_BUSY); } + +static void arc_ioc_setup(void) +{ + /* IOC Aperture start is equal to DDR start */ + unsigned int ap_base = CONFIG_SYS_SDRAM_BASE; + /* IOC Aperture size is equal to DDR size */ + long ap_size = CONFIG_SYS_SDRAM_SIZE; + + flush_n_invalidate_dcache_all(); + + if (!is_power_of_2(ap_size) || ap_size < 4096) + panic("IOC Aperture size must be power of 2 and bigger 4Kib"); + + /* + * IOC Aperture size decoded as 2 ^ (SIZE + 2) KB, + * so setting 0x11 implies 512M, 0x12 implies 1G... + */ + write_aux_reg(ARC_AUX_IO_COH_AP0_SIZE, + order_base_2(ap_size / 1024) - 2); + + /* IOC Aperture start must be aligned to the size of the aperture */ + if (ap_base % ap_size != 0) + panic("IOC Aperture start must be aligned to the size of the aperture"); + + write_aux_reg(ARC_AUX_IO_COH_AP0_BASE, ap_base >> 12); + write_aux_reg(ARC_AUX_IO_COH_PARTIAL, 1); + write_aux_reg(ARC_AUX_IO_COH_ENABLE, 1); +} #endif /* CONFIG_ISA_ARCV2 */ #ifdef CONFIG_ISA_ARCV2 @@ -324,32 +352,8 @@ void cache_init(void) #ifdef CONFIG_ISA_ARCV2 read_decode_cache_bcr_arcv2(); - if (ioc_exists) { - /* IOC Aperture start is equal to DDR start */ - unsigned int ap_base = CONFIG_SYS_SDRAM_BASE; - /* IOC Aperture size is equal to DDR size */ - long ap_size = CONFIG_SYS_SDRAM_SIZE; - - flush_n_invalidate_dcache_all(); - - if (!is_power_of_2(ap_size) || ap_size < 4096) - panic("IOC Aperture size must be power of 2 and bigger 4Kib"); - - /* - * IOC Aperture size decoded as 2 ^ (SIZE + 2) KB, - * so setting 0x11 implies 512M, 0x12 implies 1G... - */ - write_aux_reg(ARC_AUX_IO_COH_AP0_SIZE, - order_base_2(ap_size / 1024) - 2); - - /* IOC Aperture start must be aligned to the size of the aperture */ - if (ap_base % ap_size != 0) - panic("IOC Aperture start must be aligned to the size of the aperture"); - - write_aux_reg(ARC_AUX_IO_COH_AP0_BASE, ap_base >> 12); - write_aux_reg(ARC_AUX_IO_COH_PARTIAL, 1); - write_aux_reg(ARC_AUX_IO_COH_ENABLE, 1); - } + if (ioc_exists) + arc_ioc_setup(); read_decode_mmu_bcr(); From 88ae27eda3d47ab1c7885745f28fe3cf6f0b9a5b Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Wed, 21 Mar 2018 15:58:52 +0300 Subject: [PATCH 12/26] ARC: Move BCR encodings to separate header file We're starting to use more and more BCRs and having their definitions in-lined in sources becomes a bit annoying so we move it all to a separate header. Signed-off-by: Eugeniy Paltsev Signed-off-by: Alexey Brodkin --- arch/arc/include/asm/arc-bcr.h | 77 ++++++++++++++++++++++++++++++++++ arch/arc/lib/cache.c | 67 ++++------------------------- 2 files changed, 85 insertions(+), 59 deletions(-) create mode 100644 arch/arc/include/asm/arc-bcr.h diff --git a/arch/arc/include/asm/arc-bcr.h b/arch/arc/include/asm/arc-bcr.h new file mode 100644 index 0000000000..823906d946 --- /dev/null +++ b/arch/arc/include/asm/arc-bcr.h @@ -0,0 +1,77 @@ +/* + * ARC Build Configuration Registers, with encoded hardware config + * + * Copyright (C) 2018 Synopsys + * Author: Eugeniy Paltsev + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + */ + +#ifndef __ARC_BCR_H +#define __ARC_BCR_H +#ifndef __ASSEMBLY__ + +#include + +union bcr_di_cache { + struct { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int pad:12, line_len:4, sz:4, config:4, ver:8; +#else + unsigned int ver:8, config:4, sz:4, line_len:4, pad:12; +#endif + } fields; + unsigned int word; +}; + +union bcr_slc_cfg { + struct { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int pad:24, way:2, lsz:2, sz:4; +#else + unsigned int sz:4, lsz:2, way:2, pad:24; +#endif + } fields; + unsigned int word; +}; + +union bcr_generic { + struct { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int pad:24, ver:8; +#else + unsigned int ver:8, pad:24; +#endif + } fields; + unsigned int word; +}; + +union bcr_clust_cfg { + struct { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int pad:7, c:1, num_entries:8, num_cores:8, ver:8; +#else + unsigned int ver:8, num_cores:8, num_entries:8, c:1, pad:7; +#endif + } fields; + unsigned int word; +}; + +union bcr_mmu_4 { + struct { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int ver:8, sasid:1, sz1:4, sz0:4, res:2, pae:1, + n_ways:2, n_entry:2, n_super:2, u_itlb:3, u_dtlb:3; +#else + /* DTLB ITLB JES JE JA */ + unsigned int u_dtlb:3, u_itlb:3, n_super:2, n_entry:2, n_ways:2, + pae:1, res:2, sz0:4, sz1:4, sasid:1, ver:8; +#endif + } fields; + unsigned int word; +}; + +#endif /* __ASSEMBLY__ */ +#endif /* __ARC_BCR_H */ diff --git a/arch/arc/lib/cache.c b/arch/arc/lib/cache.c index af07a11724..e6e439aab8 100644 --- a/arch/arc/lib/cache.c +++ b/arch/arc/lib/cache.c @@ -10,6 +10,7 @@ #include #include #include +#include #include /* @@ -129,24 +130,11 @@ void read_decode_mmu_bcr(void) { /* TODO: should we compare mmu version from BCR and from CONFIG? */ #if (CONFIG_ARC_MMU_VER >= 4) - u32 tmp; + union bcr_mmu_4 mmu4; - tmp = read_aux_reg(ARC_AUX_MMU_BCR); + mmu4.word = read_aux_reg(ARC_AUX_MMU_BCR); - struct bcr_mmu_4 { -#ifdef CONFIG_CPU_BIG_ENDIAN - unsigned int ver:8, sasid:1, sz1:4, sz0:4, res:2, pae:1, - n_ways:2, n_entry:2, n_super:2, u_itlb:3, u_dtlb:3; -#else - /* DTLB ITLB JES JE JA */ - unsigned int u_dtlb:3, u_itlb:3, n_super:2, n_entry:2, n_ways:2, - pae:1, res:2, sz0:4, sz1:4, sasid:1, ver:8; -#endif /* CONFIG_CPU_BIG_ENDIAN */ - } *mmu4; - - mmu4 = (struct bcr_mmu_4 *)&tmp; - - pae_exists = !!mmu4->pae; + pae_exists = !!mmu4.fields.pae; #endif /* (CONFIG_ARC_MMU_VER >= 4) */ } @@ -264,27 +252,9 @@ static void arc_ioc_setup(void) #ifdef CONFIG_ISA_ARCV2 static void read_decode_cache_bcr_arcv2(void) { - union { - struct { -#ifdef CONFIG_CPU_BIG_ENDIAN - unsigned int pad:24, way:2, lsz:2, sz:4; -#else - unsigned int sz:4, lsz:2, way:2, pad:24; -#endif - } fields; - unsigned int word; - } slc_cfg; - - union { - struct { -#ifdef CONFIG_CPU_BIG_ENDIAN - unsigned int pad:24, ver:8; -#else - unsigned int ver:8, pad:24; -#endif - } fields; - unsigned int word; - } sbcr; + union bcr_slc_cfg slc_cfg; + union bcr_clust_cfg cbcr; + union bcr_generic sbcr; sbcr.word = read_aux_reg(ARC_BCR_SLC); if (sbcr.fields.ver) { @@ -293,17 +263,6 @@ static void read_decode_cache_bcr_arcv2(void) slc_line_sz = (slc_cfg.fields.lsz == 0) ? 128 : 64; } - union { - struct bcr_clust_cfg { -#ifdef CONFIG_CPU_BIG_ENDIAN - unsigned int pad:7, c:1, num_entries:8, num_cores:8, ver:8; -#else - unsigned int ver:8, num_cores:8, num_entries:8, c:1, pad:7; -#endif - } fields; - unsigned int word; - } cbcr; - cbcr.word = read_aux_reg(ARC_BCR_CLUSTER); if (cbcr.fields.c && ioc_enable) ioc_exists = true; @@ -313,17 +272,7 @@ static void read_decode_cache_bcr_arcv2(void) void read_decode_cache_bcr(void) { int dc_line_sz = 0, ic_line_sz = 0; - - union { - struct { -#ifdef CONFIG_CPU_BIG_ENDIAN - unsigned int pad:12, line_len:4, sz:4, config:4, ver:8; -#else - unsigned int ver:8, config:4, sz:4, line_len:4, pad:12; -#endif - } fields; - unsigned int word; - } ibcr, dbcr; + union bcr_di_cache ibcr, dbcr; ibcr.word = read_aux_reg(ARC_BCR_IC_BUILD); if (ibcr.fields.ver) { From c877a8911d1262d3f78e1f24a24ef2f5e1333676 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Wed, 21 Mar 2018 15:58:53 +0300 Subject: [PATCH 13/26] ARC: Cache: Allways check D$ status before entire/line ops As we are planning to get rid of dozens of ifdef's in cache.c we would better check D$ status before each entire/line operation then check CONFIG_SYS_DCACHE_OFF config option. This makes the code cleaner as well as D$ entire/line functions remain functional even if we enable or disable D$ in run-time. As we need to check status before *each* function execution and we call D$ entire/line functions from different places we add this check directly into D$ entire/line functions instead of their callers to avoid code duplication. Signed-off-by: Eugeniy Paltsev Signed-off-by: Alexey Brodkin --- arch/arc/lib/cache.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/arc/lib/cache.c b/arch/arc/lib/cache.c index e6e439aab8..efba789062 100644 --- a/arch/arc/lib/cache.c +++ b/arch/arc/lib/cache.c @@ -398,7 +398,6 @@ void dcache_disable(void) DC_CTRL_CACHE_DISABLE); } -#ifndef CONFIG_SYS_DCACHE_OFF /* Common Helper for Line Operations on D-cache */ static inline void __dcache_line_loop(unsigned long paddr, unsigned long sz, const int cacheop) @@ -448,6 +447,9 @@ static inline void __dc_entire_op(const int cacheop) { int aux; + if (!dcache_status()) + return; + __before_dc_op(cacheop); if (cacheop & OP_INV) /* Inv or flush-n-inv use same cmd reg */ @@ -463,14 +465,13 @@ static inline void __dc_entire_op(const int cacheop) static inline void __dc_line_op(unsigned long paddr, unsigned long sz, const int cacheop) { + if (!dcache_status()) + return; + __before_dc_op(cacheop); __dcache_line_loop(paddr, sz, cacheop); __after_dc_op(cacheop); } -#else -#define __dc_entire_op(cacheop) -#define __dc_line_op(paddr, sz, cacheop) -#endif /* !CONFIG_SYS_DCACHE_OFF */ void invalidate_dcache_range(unsigned long start, unsigned long end) { From 05c6a26a04c4bddc62d2c307b754ccc81878b303 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Wed, 21 Mar 2018 15:58:54 +0300 Subject: [PATCH 14/26] ARC: Cache: Use is_isa_arcv2() instead of CONFIG_ISA_ARCV2 ifdef Use is_isa_arcv2() function where it is possible instead of CONFIG_ISA_ARCV2 define check to make code cleaner at the same time keeping pretty much the same functionality - code in branches under "if (is_isa_arcv2())" won't be compiled if CONFIG_ISA_ARCV2 is not defined, still we need a couple of CONFIG_ISA_ARCV2 ifdefs to make compiler happy. That's because code in !is_isa_x() branch gets compiled and only then gets optimized away. Signed-off-by: Eugeniy Paltsev Signed-off-by: Alexey Brodkin --- arch/arc/lib/cache.c | 59 ++++++++++++++++++++++---------------------- 1 file changed, 29 insertions(+), 30 deletions(-) diff --git a/arch/arc/lib/cache.c b/arch/arc/lib/cache.c index efba789062..354dbc600e 100644 --- a/arch/arc/lib/cache.c +++ b/arch/arc/lib/cache.c @@ -117,7 +117,6 @@ bool icache_exists __section(".data") = false; #define CACHE_LINE_MASK (~(l1_line_sz - 1)) -#ifdef CONFIG_ISA_ARCV2 int slc_line_sz __section(".data"); bool slc_exists __section(".data") = false; bool ioc_exists __section(".data") = false; @@ -175,6 +174,8 @@ static void slc_upper_region_init(void) static void __slc_rgn_op(unsigned long paddr, unsigned long sz, const int op) { +#ifdef CONFIG_ISA_ARCV2 + unsigned int ctrl; unsigned long end; @@ -218,6 +219,8 @@ static void __slc_rgn_op(unsigned long paddr, unsigned long sz, const int op) read_aux_reg(ARC_AUX_SLC_CTRL); while (read_aux_reg(ARC_AUX_SLC_CTRL) & SLC_CTRL_BUSY); + +#endif /* CONFIG_ISA_ARCV2 */ } static void arc_ioc_setup(void) @@ -247,11 +250,11 @@ static void arc_ioc_setup(void) write_aux_reg(ARC_AUX_IO_COH_PARTIAL, 1); write_aux_reg(ARC_AUX_IO_COH_ENABLE, 1); } -#endif /* CONFIG_ISA_ARCV2 */ -#ifdef CONFIG_ISA_ARCV2 static void read_decode_cache_bcr_arcv2(void) { +#ifdef CONFIG_ISA_ARCV2 + union bcr_slc_cfg slc_cfg; union bcr_clust_cfg cbcr; union bcr_generic sbcr; @@ -266,8 +269,9 @@ static void read_decode_cache_bcr_arcv2(void) cbcr.word = read_aux_reg(ARC_BCR_CLUSTER); if (cbcr.fields.c && ioc_enable) ioc_exists = true; + +#endif /* CONFIG_ISA_ARCV2 */ } -#endif void read_decode_cache_bcr(void) { @@ -298,10 +302,10 @@ void cache_init(void) { read_decode_cache_bcr(); -#ifdef CONFIG_ISA_ARCV2 - read_decode_cache_bcr_arcv2(); + if (is_isa_arcv2()) + read_decode_cache_bcr_arcv2(); - if (ioc_exists) + if (is_isa_arcv2() && ioc_exists) arc_ioc_setup(); read_decode_mmu_bcr(); @@ -311,9 +315,8 @@ void cache_init(void) * only if PAE exists in current HW. So we had to check pae_exist * before using them. */ - if (slc_exists && pae_exists) + if (is_isa_arcv2() && slc_exists && pae_exists) slc_upper_region_init(); -#endif /* CONFIG_ISA_ARCV2 */ } int icache_status(void) @@ -363,10 +366,8 @@ void invalidate_icache_all(void) { __ic_entire_invalidate(); -#ifdef CONFIG_ISA_ARCV2 - if (slc_exists) + if (is_isa_arcv2() && slc_exists) __slc_entire_op(OP_INV); -#endif } int dcache_status(void) @@ -478,15 +479,16 @@ void invalidate_dcache_range(unsigned long start, unsigned long end) if (start >= end) return; -#ifdef CONFIG_ISA_ARCV2 - if (!ioc_exists) -#endif + /* + * ARCv1 -> call __dc_line_op + * ARCv2 && no IOC -> call __dc_line_op; call __slc_rgn_op + * ARCv2 && IOC enabled -> nothing + */ + if (!is_isa_arcv2() || !ioc_exists) __dc_line_op(start, end - start, OP_INV); -#ifdef CONFIG_ISA_ARCV2 - if (slc_exists && !ioc_exists) + if (is_isa_arcv2() && slc_exists && !ioc_exists) __slc_rgn_op(start, end - start, OP_INV); -#endif } void flush_dcache_range(unsigned long start, unsigned long end) @@ -494,15 +496,16 @@ void flush_dcache_range(unsigned long start, unsigned long end) if (start >= end) return; -#ifdef CONFIG_ISA_ARCV2 - if (!ioc_exists) -#endif + /* + * ARCv1 -> call __dc_line_op + * ARCv2 && no IOC -> call __dc_line_op; call __slc_rgn_op + * ARCv2 && IOC enabled -> nothing + */ + if (!is_isa_arcv2() || !ioc_exists) __dc_line_op(start, end - start, OP_FLUSH); -#ifdef CONFIG_ISA_ARCV2 - if (slc_exists && !ioc_exists) + if (is_isa_arcv2() && slc_exists && !ioc_exists) __slc_rgn_op(start, end - start, OP_FLUSH); -#endif } void flush_cache(unsigned long start, unsigned long size) @@ -520,18 +523,14 @@ void flush_n_invalidate_dcache_all(void) { __dc_entire_op(OP_FLUSH_N_INV); -#ifdef CONFIG_ISA_ARCV2 - if (slc_exists) + if (is_isa_arcv2() && slc_exists) __slc_entire_op(OP_FLUSH_N_INV); -#endif } void flush_dcache_all(void) { __dc_entire_op(OP_FLUSH); -#ifdef CONFIG_ISA_ARCV2 - if (slc_exists) + if (is_isa_arcv2() && slc_exists) __slc_entire_op(OP_FLUSH); -#endif } From ea9f6f1e873cc1b6fe220e1017357532045ce988 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Wed, 21 Mar 2018 15:58:55 +0300 Subject: [PATCH 15/26] ARC: Cache: Move SLC status check into slc_entire_op() and slc_rgn_op() As of today we check SLC status before each call of __slc_rgn_op() or __slc_entire_op(). So move status check into __slc_rgn_op() and __slc_entire_op(). As we need to check status before *each* function execution and we call slc_entire_op() and slc_rgn_op() from different places we add this check directly into SLC entire/line functions instead of their callers to avoid code duplication. Signed-off-by: Eugeniy Paltsev Signed-off-by: Alexey Brodkin --- arch/arc/lib/cache.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/arch/arc/lib/cache.c b/arch/arc/lib/cache.c index 354dbc600e..f147674ae8 100644 --- a/arch/arc/lib/cache.c +++ b/arch/arc/lib/cache.c @@ -141,6 +141,9 @@ static void __slc_entire_op(const int op) { unsigned int ctrl; + if (!slc_exists) + return; + ctrl = read_aux_reg(ARC_AUX_SLC_CTRL); if (!(op & OP_FLUSH)) /* i.e. OP_INV */ @@ -179,6 +182,9 @@ static void __slc_rgn_op(unsigned long paddr, unsigned long sz, const int op) unsigned int ctrl; unsigned long end; + if (!slc_exists) + return; + /* * The Region Flush operation is specified by CTRL.RGN_OP[11..9] * - b'000 (default) is Flush, @@ -366,7 +372,7 @@ void invalidate_icache_all(void) { __ic_entire_invalidate(); - if (is_isa_arcv2() && slc_exists) + if (is_isa_arcv2()) __slc_entire_op(OP_INV); } @@ -487,7 +493,7 @@ void invalidate_dcache_range(unsigned long start, unsigned long end) if (!is_isa_arcv2() || !ioc_exists) __dc_line_op(start, end - start, OP_INV); - if (is_isa_arcv2() && slc_exists && !ioc_exists) + if (is_isa_arcv2() && !ioc_exists) __slc_rgn_op(start, end - start, OP_INV); } @@ -504,7 +510,7 @@ void flush_dcache_range(unsigned long start, unsigned long end) if (!is_isa_arcv2() || !ioc_exists) __dc_line_op(start, end - start, OP_FLUSH); - if (is_isa_arcv2() && slc_exists && !ioc_exists) + if (is_isa_arcv2() && !ioc_exists) __slc_rgn_op(start, end - start, OP_FLUSH); } @@ -523,7 +529,7 @@ void flush_n_invalidate_dcache_all(void) { __dc_entire_op(OP_FLUSH_N_INV); - if (is_isa_arcv2() && slc_exists) + if (is_isa_arcv2()) __slc_entire_op(OP_FLUSH_N_INV); } @@ -531,6 +537,6 @@ void flush_dcache_all(void) { __dc_entire_op(OP_FLUSH); - if (is_isa_arcv2() && slc_exists) + if (is_isa_arcv2()) __slc_entire_op(OP_FLUSH); } From 75790873202f005e93085cc5ff5076bce0f5e84a Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Wed, 21 Mar 2018 15:58:56 +0300 Subject: [PATCH 16/26] ARC: Cache: Get rid of [slc,pae,icache,dcache]_exists global variables There is a problem with current implementation if we start U-Boot from ROM, as we use global variables before ther initialization, so these variables get overwritten when we copy .data section from ROM. Instead we'll use icache_exists(), dcache_exists(), slc_exists(), pae_exists() functions which directly check BCRs every time. In U-Boot case ops are used only during self-relocation and DMA so we shouldn't be hit by noticeable performance degradation. Signed-off-by: Eugeniy Paltsev Signed-off-by: Alexey Brodkin --- arch/arc/lib/cache.c | 67 ++++++++++++++++++++++++++++---------------- 1 file changed, 43 insertions(+), 24 deletions(-) diff --git a/arch/arc/lib/cache.c b/arch/arc/lib/cache.c index f147674ae8..1b74f55ab7 100644 --- a/arch/arc/lib/cache.c +++ b/arch/arc/lib/cache.c @@ -94,7 +94,6 @@ #define DC_CTRL_CACHE_DISABLE BIT(0) #define DC_CTRL_INV_MODE_FLUSH BIT(6) #define DC_CTRL_FLUSH_STATUS BIT(8) -#define CACHE_VER_NUM_MASK 0xF #define OP_INV BIT(0) #define OP_FLUSH BIT(1) @@ -112,20 +111,16 @@ * relocation but will be used after being zeroed. */ int l1_line_sz __section(".data"); -bool dcache_exists __section(".data") = false; -bool icache_exists __section(".data") = false; #define CACHE_LINE_MASK (~(l1_line_sz - 1)) int slc_line_sz __section(".data"); -bool slc_exists __section(".data") = false; bool ioc_exists __section(".data") = false; -bool pae_exists __section(".data") = false; /* To force enable IOC set ioc_enable to 'true' */ bool ioc_enable __section(".data") = false; -void read_decode_mmu_bcr(void) +static inline bool pae_exists(void) { /* TODO: should we compare mmu version from BCR and from CONFIG? */ #if (CONFIG_ARC_MMU_VER >= 4) @@ -133,15 +128,46 @@ void read_decode_mmu_bcr(void) mmu4.word = read_aux_reg(ARC_AUX_MMU_BCR); - pae_exists = !!mmu4.fields.pae; + if (mmu4.fields.pae) + return true; #endif /* (CONFIG_ARC_MMU_VER >= 4) */ + + return false; +} + +static inline bool icache_exists(void) +{ + union bcr_di_cache ibcr; + + ibcr.word = read_aux_reg(ARC_BCR_IC_BUILD); + return !!ibcr.fields.ver; +} + +static inline bool dcache_exists(void) +{ + union bcr_di_cache dbcr; + + dbcr.word = read_aux_reg(ARC_BCR_DC_BUILD); + return !!dbcr.fields.ver; +} + +static inline bool slc_exists(void) +{ + if (is_isa_arcv2()) { + union bcr_generic sbcr; + + sbcr.word = read_aux_reg(ARC_BCR_SLC); + return !!sbcr.fields.ver; + } + + return false; } static void __slc_entire_op(const int op) { unsigned int ctrl; - if (!slc_exists) + if (!slc_exists()) return; ctrl = read_aux_reg(ARC_AUX_SLC_CTRL); @@ -182,7 +208,7 @@ static void __slc_rgn_op(unsigned long paddr, unsigned long sz, const int op) unsigned int ctrl; unsigned long end; - if (!slc_exists) + if (!slc_exists()) return; /* @@ -263,12 +289,9 @@ static void read_decode_cache_bcr_arcv2(void) union bcr_slc_cfg slc_cfg; union bcr_clust_cfg cbcr; - union bcr_generic sbcr; - sbcr.word = read_aux_reg(ARC_BCR_SLC); - if (sbcr.fields.ver) { + if (slc_exists()) { slc_cfg.word = read_aux_reg(ARC_AUX_SLC_CONFIG); - slc_exists = true; slc_line_sz = (slc_cfg.fields.lsz == 0) ? 128 : 64; } @@ -286,7 +309,6 @@ void read_decode_cache_bcr(void) ibcr.word = read_aux_reg(ARC_BCR_IC_BUILD); if (ibcr.fields.ver) { - icache_exists = true; l1_line_sz = ic_line_sz = 8 << ibcr.fields.line_len; if (!ic_line_sz) panic("Instruction exists but line length is 0\n"); @@ -294,7 +316,6 @@ void read_decode_cache_bcr(void) dbcr.word = read_aux_reg(ARC_BCR_DC_BUILD); if (dbcr.fields.ver) { - dcache_exists = true; l1_line_sz = dc_line_sz = 16 << dbcr.fields.line_len; if (!dc_line_sz) panic("Data cache exists but line length is 0\n"); @@ -314,20 +335,18 @@ void cache_init(void) if (is_isa_arcv2() && ioc_exists) arc_ioc_setup(); - read_decode_mmu_bcr(); - /* * ARC_AUX_SLC_RGN_START1 and ARC_AUX_SLC_RGN_END1 register exist * only if PAE exists in current HW. So we had to check pae_exist * before using them. */ - if (is_isa_arcv2() && slc_exists && pae_exists) + if (is_isa_arcv2() && slc_exists() && pae_exists()) slc_upper_region_init(); } int icache_status(void) { - if (!icache_exists) + if (!icache_exists()) return 0; if (read_aux_reg(ARC_AUX_IC_CTRL) & IC_CTRL_CACHE_DISABLE) @@ -338,14 +357,14 @@ int icache_status(void) void icache_enable(void) { - if (icache_exists) + if (icache_exists()) write_aux_reg(ARC_AUX_IC_CTRL, read_aux_reg(ARC_AUX_IC_CTRL) & ~IC_CTRL_CACHE_DISABLE); } void icache_disable(void) { - if (icache_exists) + if (icache_exists()) write_aux_reg(ARC_AUX_IC_CTRL, read_aux_reg(ARC_AUX_IC_CTRL) | IC_CTRL_CACHE_DISABLE); } @@ -378,7 +397,7 @@ void invalidate_icache_all(void) int dcache_status(void) { - if (!dcache_exists) + if (!dcache_exists()) return 0; if (read_aux_reg(ARC_AUX_DC_CTRL) & DC_CTRL_CACHE_DISABLE) @@ -389,7 +408,7 @@ int dcache_status(void) void dcache_enable(void) { - if (!dcache_exists) + if (!dcache_exists()) return; write_aux_reg(ARC_AUX_DC_CTRL, read_aux_reg(ARC_AUX_DC_CTRL) & @@ -398,7 +417,7 @@ void dcache_enable(void) void dcache_disable(void) { - if (!dcache_exists) + if (!dcache_exists()) return; write_aux_reg(ARC_AUX_DC_CTRL, read_aux_reg(ARC_AUX_DC_CTRL) | From bf8974eda40e7954a3f279fb54a95d938c0d3bba Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Wed, 21 Mar 2018 15:58:57 +0300 Subject: [PATCH 17/26] ARC: Move cache global variables to arch_global_data There is a problem with current implementation if we start U-Boot from ROM, as we use global variables before ther initialization, so these variables get overwritten when we copy .data section from ROM. Instead we move these global variables into our "global data" structure so that we may really start from ROM. Signed-off-by: Eugeniy Paltsev Signed-off-by: Alexey Brodkin --- arch/arc/include/asm/global_data.h | 6 ++++++ arch/arc/lib/cache.c | 19 +++++++++---------- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/arch/arc/include/asm/global_data.h b/arch/arc/include/asm/global_data.h index f0242f1ad6..43e1343095 100644 --- a/arch/arc/include/asm/global_data.h +++ b/arch/arc/include/asm/global_data.h @@ -7,9 +7,15 @@ #ifndef __ASM_ARC_GLOBAL_DATA_H #define __ASM_ARC_GLOBAL_DATA_H +#include + #ifndef __ASSEMBLY__ /* Architecture-specific global data */ struct arch_global_data { + int l1_line_sz; +#if defined(CONFIG_ISA_ARCV2) + int slc_line_sz; +#endif }; #endif /* __ASSEMBLY__ */ diff --git a/arch/arc/lib/cache.c b/arch/arc/lib/cache.c index 1b74f55ab7..b08c2111c8 100644 --- a/arch/arc/lib/cache.c +++ b/arch/arc/lib/cache.c @@ -87,6 +87,8 @@ * and disable. */ +DECLARE_GLOBAL_DATA_PTR; + /* Bit values in IC_CTRL */ #define IC_CTRL_CACHE_DISABLE BIT(0) @@ -110,11 +112,8 @@ * But .bss section is not relocated and so it will be initilized before * relocation but will be used after being zeroed. */ -int l1_line_sz __section(".data"); +#define CACHE_LINE_MASK (~(gd->arch.l1_line_sz - 1)) -#define CACHE_LINE_MASK (~(l1_line_sz - 1)) - -int slc_line_sz __section(".data"); bool ioc_exists __section(".data") = false; /* To force enable IOC set ioc_enable to 'true' */ @@ -237,7 +236,7 @@ static void __slc_rgn_op(unsigned long paddr, unsigned long sz, const int op) * END needs to be setup before START (latter triggers the operation) * END can't be same as START, so add (l2_line_sz - 1) to sz */ - end = paddr + sz + slc_line_sz - 1; + end = paddr + sz + gd->arch.slc_line_sz - 1; /* * Upper addresses (ARC_AUX_SLC_RGN_END1 and ARC_AUX_SLC_RGN_START1) @@ -292,7 +291,7 @@ static void read_decode_cache_bcr_arcv2(void) if (slc_exists()) { slc_cfg.word = read_aux_reg(ARC_AUX_SLC_CONFIG); - slc_line_sz = (slc_cfg.fields.lsz == 0) ? 128 : 64; + gd->arch.slc_line_sz = (slc_cfg.fields.lsz == 0) ? 128 : 64; } cbcr.word = read_aux_reg(ARC_BCR_CLUSTER); @@ -309,14 +308,14 @@ void read_decode_cache_bcr(void) ibcr.word = read_aux_reg(ARC_BCR_IC_BUILD); if (ibcr.fields.ver) { - l1_line_sz = ic_line_sz = 8 << ibcr.fields.line_len; + gd->arch.l1_line_sz = ic_line_sz = 8 << ibcr.fields.line_len; if (!ic_line_sz) panic("Instruction exists but line length is 0\n"); } dbcr.word = read_aux_reg(ARC_BCR_DC_BUILD); if (dbcr.fields.ver) { - l1_line_sz = dc_line_sz = 16 << dbcr.fields.line_len; + gd->arch.l1_line_sz = dc_line_sz = 16 << dbcr.fields.line_len; if (!dc_line_sz) panic("Data cache exists but line length is 0\n"); } @@ -437,14 +436,14 @@ static inline void __dcache_line_loop(unsigned long paddr, unsigned long sz, sz += paddr & ~CACHE_LINE_MASK; paddr &= CACHE_LINE_MASK; - num_lines = DIV_ROUND_UP(sz, l1_line_sz); + num_lines = DIV_ROUND_UP(sz, gd->arch.l1_line_sz); while (num_lines-- > 0) { #if (CONFIG_ARC_MMU_VER == 3) write_aux_reg(ARC_AUX_DC_PTAG, paddr); #endif write_aux_reg(aux_cmd, paddr); - paddr += l1_line_sz; + paddr += gd->arch.l1_line_sz; } } From 246ba284ecd4c6da7a7e15c6455bd3d43dc6f33e Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Wed, 21 Mar 2018 15:58:58 +0300 Subject: [PATCH 18/26] ARC: Cache: Move PAE exists check into slc_upper_region_init() Move check for PAE existence into slc_upper_region_init() instead of its caller as more appropriate place. Signed-off-by: Eugeniy Paltsev Signed-off-by: Alexey Brodkin --- arch/arc/lib/cache.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/arch/arc/lib/cache.c b/arch/arc/lib/cache.c index b08c2111c8..031ebd7765 100644 --- a/arch/arc/lib/cache.c +++ b/arch/arc/lib/cache.c @@ -192,6 +192,14 @@ static void __slc_entire_op(const int op) static void slc_upper_region_init(void) { + /* + * ARC_AUX_SLC_RGN_START1 and ARC_AUX_SLC_RGN_END1 register exist + * only if PAE exists in current HW. So we had to check pae_exist + * before using them. + */ + if (!pae_exists()) + return; + /* * ARC_AUX_SLC_RGN_END1 and ARC_AUX_SLC_RGN_START1 are always == 0 * as we don't use PAE40. @@ -334,12 +342,7 @@ void cache_init(void) if (is_isa_arcv2() && ioc_exists) arc_ioc_setup(); - /* - * ARC_AUX_SLC_RGN_START1 and ARC_AUX_SLC_RGN_END1 register exist - * only if PAE exists in current HW. So we had to check pae_exist - * before using them. - */ - if (is_isa_arcv2() && slc_exists() && pae_exists()) + if (is_isa_arcv2() && slc_exists()) slc_upper_region_init(); } From 48b04832d8a2045c8e72c5a0c8e7e54043fe670d Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Wed, 21 Mar 2018 15:58:59 +0300 Subject: [PATCH 19/26] ARC: Move IOC enabling to compile-time options Use CONFIG_ARC_DBG_IOC_ENABLE Kconfig option instead of ioc_enable global variable. Signed-off-by: Eugeniy Paltsev Signed-off-by: Alexey Brodkin --- arch/arc/Kconfig | 18 +++++++++++++ arch/arc/include/asm/cache.h | 5 ++++ arch/arc/lib/cache.c | 49 +++++++++++++++++++++--------------- 3 files changed, 52 insertions(+), 20 deletions(-) diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index e3f9db7b29..aee15d5353 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -116,6 +116,24 @@ config SYS_DCACHE_OFF bool "Do not use Data Cache" default n +menuconfig ARC_DBG + bool "ARC debugging" + default n + +if ARC_DBG + +config ARC_DBG_IOC_ENABLE + bool "Enable IO coherency unit" + depends on CPU_ARCHS38 + default n + help + Enable IO coherency unit to debug problems with caches and + DMA peripherals. + NOTE: as of today linux will not work properly if this option + is enabled in u-boot! + +endif + choice prompt "Target select" default TARGET_AXS103 diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h index 382c4126c3..fe75409b5c 100644 --- a/arch/arc/include/asm/cache.h +++ b/arch/arc/include/asm/cache.h @@ -32,6 +32,11 @@ void cache_init(void); void flush_n_invalidate_dcache_all(void); +static const inline int is_ioc_enabled(void) +{ + return IS_ENABLED(CONFIG_ARC_DBG_IOC_ENABLE); +} + #endif /* __ASSEMBLY__ */ #endif /* __ASM_ARC_CACHE_H */ diff --git a/arch/arc/lib/cache.c b/arch/arc/lib/cache.c index 031ebd7765..ad14900135 100644 --- a/arch/arc/lib/cache.c +++ b/arch/arc/lib/cache.c @@ -107,18 +107,8 @@ DECLARE_GLOBAL_DATA_PTR; #define SLC_CTRL_BUSY 0x100 #define SLC_CTRL_RGN_OP_INV 0x200 -/* - * By default that variable will fall into .bss section. - * But .bss section is not relocated and so it will be initilized before - * relocation but will be used after being zeroed. - */ #define CACHE_LINE_MASK (~(gd->arch.l1_line_sz - 1)) -bool ioc_exists __section(".data") = false; - -/* To force enable IOC set ioc_enable to 'true' */ -bool ioc_enable __section(".data") = false; - static inline bool pae_exists(void) { /* TODO: should we compare mmu version from BCR and from CONFIG? */ @@ -162,6 +152,30 @@ static inline bool slc_exists(void) return false; } +static inline bool ioc_exists(void) +{ + if (is_isa_arcv2()) { + union bcr_clust_cfg cbcr; + + cbcr.word = read_aux_reg(ARC_BCR_CLUSTER); + return cbcr.fields.c; + } + + return false; +} + +static inline bool ioc_enabled(void) +{ + /* + * We check only CONFIG option instead of IOC HW state check as IOC + * must be disabled by default. + */ + if (is_ioc_enabled()) + return ioc_exists(); + + return false; +} + static void __slc_entire_op(const int op) { unsigned int ctrl; @@ -295,17 +309,12 @@ static void read_decode_cache_bcr_arcv2(void) #ifdef CONFIG_ISA_ARCV2 union bcr_slc_cfg slc_cfg; - union bcr_clust_cfg cbcr; if (slc_exists()) { slc_cfg.word = read_aux_reg(ARC_AUX_SLC_CONFIG); gd->arch.slc_line_sz = (slc_cfg.fields.lsz == 0) ? 128 : 64; } - cbcr.word = read_aux_reg(ARC_BCR_CLUSTER); - if (cbcr.fields.c && ioc_enable) - ioc_exists = true; - #endif /* CONFIG_ISA_ARCV2 */ } @@ -339,7 +348,7 @@ void cache_init(void) if (is_isa_arcv2()) read_decode_cache_bcr_arcv2(); - if (is_isa_arcv2() && ioc_exists) + if (is_isa_arcv2() && ioc_enabled()) arc_ioc_setup(); if (is_isa_arcv2() && slc_exists()) @@ -511,10 +520,10 @@ void invalidate_dcache_range(unsigned long start, unsigned long end) * ARCv2 && no IOC -> call __dc_line_op; call __slc_rgn_op * ARCv2 && IOC enabled -> nothing */ - if (!is_isa_arcv2() || !ioc_exists) + if (!is_isa_arcv2() || !ioc_enabled()) __dc_line_op(start, end - start, OP_INV); - if (is_isa_arcv2() && !ioc_exists) + if (is_isa_arcv2() && !ioc_enabled()) __slc_rgn_op(start, end - start, OP_INV); } @@ -528,10 +537,10 @@ void flush_dcache_range(unsigned long start, unsigned long end) * ARCv2 && no IOC -> call __dc_line_op; call __slc_rgn_op * ARCv2 && IOC enabled -> nothing */ - if (!is_isa_arcv2() || !ioc_exists) + if (!is_isa_arcv2() || !ioc_enabled()) __dc_line_op(start, end - start, OP_FLUSH); - if (is_isa_arcv2() && !ioc_exists) + if (is_isa_arcv2() && !ioc_enabled()) __slc_rgn_op(start, end - start, OP_FLUSH); } From c75eeb0bcbd5596f2b4e2b81eaa572675f019906 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Wed, 21 Mar 2018 15:59:00 +0300 Subject: [PATCH 20/26] ARC: Cache: Implement [i,d]cache_enabled() as separate functions Implement icache_enabled() and dcache_enabled() as separate functions which can be used with "inline" attribute. This is a preparation to make them always_inline. Signed-off-by: Eugeniy Paltsev Signed-off-by: Alexey Brodkin --- arch/arc/lib/cache.c | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/arch/arc/lib/cache.c b/arch/arc/lib/cache.c index ad14900135..7052895bb7 100644 --- a/arch/arc/lib/cache.c +++ b/arch/arc/lib/cache.c @@ -132,6 +132,14 @@ static inline bool icache_exists(void) return !!ibcr.fields.ver; } +static inline bool icache_enabled(void) +{ + if (!icache_exists()) + return false; + + return !(read_aux_reg(ARC_AUX_IC_CTRL) & IC_CTRL_CACHE_DISABLE); +} + static inline bool dcache_exists(void) { union bcr_di_cache dbcr; @@ -140,6 +148,14 @@ static inline bool dcache_exists(void) return !!dbcr.fields.ver; } +static inline bool dcache_enabled(void) +{ + if (!dcache_exists()) + return false; + + return !(read_aux_reg(ARC_AUX_DC_CTRL) & DC_CTRL_CACHE_DISABLE); +} + static inline bool slc_exists(void) { if (is_isa_arcv2()) { @@ -357,13 +373,7 @@ void cache_init(void) int icache_status(void) { - if (!icache_exists()) - return 0; - - if (read_aux_reg(ARC_AUX_IC_CTRL) & IC_CTRL_CACHE_DISABLE) - return 0; - else - return 1; + return icache_enabled(); } void icache_enable(void) @@ -383,7 +393,7 @@ void icache_disable(void) /* IC supports only invalidation */ static inline void __ic_entire_invalidate(void) { - if (!icache_status()) + if (!icache_enabled()) return; /* Any write to IC_IVIC register triggers invalidation of entire I$ */ @@ -408,13 +418,7 @@ void invalidate_icache_all(void) int dcache_status(void) { - if (!dcache_exists()) - return 0; - - if (read_aux_reg(ARC_AUX_DC_CTRL) & DC_CTRL_CACHE_DISABLE) - return 0; - else - return 1; + return dcache_enabled(); } void dcache_enable(void) @@ -484,7 +488,7 @@ static inline void __dc_entire_op(const int cacheop) { int aux; - if (!dcache_status()) + if (!dcache_enabled()) return; __before_dc_op(cacheop); @@ -502,7 +506,7 @@ static inline void __dc_entire_op(const int cacheop) static inline void __dc_line_op(unsigned long paddr, unsigned long sz, const int cacheop) { - if (!dcache_status()) + if (!dcache_enabled()) return; __before_dc_op(cacheop); From 95336738f1cf2d4fabfc9bfc4981fa14714efc30 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Wed, 21 Mar 2018 15:59:01 +0300 Subject: [PATCH 21/26] ARC: Cache: Fix SLC operations when SLC is bypassed for data If L1 D$ is disabled SLC is bypassed for data and all load/store requests are sent directly to main memory. If L1 I$ is disabled SLC is NOT bypassed for instructions and all instruction requests are fetched through SLC. Signed-off-by: Eugeniy Paltsev Signed-off-by: Alexey Brodkin --- arch/arc/lib/cache.c | 39 ++++++++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/arch/arc/lib/cache.c b/arch/arc/lib/cache.c index 7052895bb7..a5aae3d232 100644 --- a/arch/arc/lib/cache.c +++ b/arch/arc/lib/cache.c @@ -168,6 +168,15 @@ static inline bool slc_exists(void) return false; } +static inline bool slc_data_bypass(void) +{ + /* + * If L1 data cache is disabled SL$ is bypassed and all load/store + * requests are sent directly to main memory. + */ + return !dcache_enabled(); +} + static inline bool ioc_exists(void) { if (is_isa_arcv2()) { @@ -412,7 +421,13 @@ void invalidate_icache_all(void) { __ic_entire_invalidate(); - if (is_isa_arcv2()) + /* + * If SL$ is bypassed for data it is used only for instructions, + * so we need to invalidate it too. + * TODO: HS 3.0 supports SLC disable so we need to check slc + * enable/disable status here. + */ + if (is_isa_arcv2() && slc_data_bypass()) __slc_entire_op(OP_INV); } @@ -520,14 +535,15 @@ void invalidate_dcache_range(unsigned long start, unsigned long end) return; /* - * ARCv1 -> call __dc_line_op - * ARCv2 && no IOC -> call __dc_line_op; call __slc_rgn_op - * ARCv2 && IOC enabled -> nothing + * ARCv1 -> call __dc_line_op + * ARCv2 && L1 D$ disabled -> nothing + * ARCv2 && L1 D$ enabled && IOC enabled -> nothing + * ARCv2 && L1 D$ enabled && no IOC -> call __dc_line_op; call __slc_rgn_op */ if (!is_isa_arcv2() || !ioc_enabled()) __dc_line_op(start, end - start, OP_INV); - if (is_isa_arcv2() && !ioc_enabled()) + if (is_isa_arcv2() && !ioc_enabled() && !slc_data_bypass()) __slc_rgn_op(start, end - start, OP_INV); } @@ -537,14 +553,15 @@ void flush_dcache_range(unsigned long start, unsigned long end) return; /* - * ARCv1 -> call __dc_line_op - * ARCv2 && no IOC -> call __dc_line_op; call __slc_rgn_op - * ARCv2 && IOC enabled -> nothing + * ARCv1 -> call __dc_line_op + * ARCv2 && L1 D$ disabled -> nothing + * ARCv2 && L1 D$ enabled && IOC enabled -> nothing + * ARCv2 && L1 D$ enabled && no IOC -> call __dc_line_op; call __slc_rgn_op */ if (!is_isa_arcv2() || !ioc_enabled()) __dc_line_op(start, end - start, OP_FLUSH); - if (is_isa_arcv2() && !ioc_enabled()) + if (is_isa_arcv2() && !ioc_enabled() && !slc_data_bypass()) __slc_rgn_op(start, end - start, OP_FLUSH); } @@ -563,7 +580,7 @@ void flush_n_invalidate_dcache_all(void) { __dc_entire_op(OP_FLUSH_N_INV); - if (is_isa_arcv2()) + if (is_isa_arcv2() && !slc_data_bypass()) __slc_entire_op(OP_FLUSH_N_INV); } @@ -571,6 +588,6 @@ void flush_dcache_all(void) { __dc_entire_op(OP_FLUSH); - if (is_isa_arcv2()) + if (is_isa_arcv2() && !slc_data_bypass()) __slc_entire_op(OP_FLUSH); } From 375945bac2b8cbb547940ad5cf1e16a0eb0ddfeb Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Wed, 21 Mar 2018 15:59:02 +0300 Subject: [PATCH 22/26] ARC: Implement a function to sync and cleanup caches Implement specialized function to clenup caches (and therefore sync instruction and data caches) which can be used for cleanup before linux launch or to sync caches during U-Boot self-relocation. Signed-off-by: Eugeniy Paltsev Signed-off-by: Alexey Brodkin --- arch/arc/include/asm/cache.h | 1 + arch/arc/lib/bootm.c | 4 ++-- arch/arc/lib/cache.c | 23 +++++++++++++++++++++++ arch/arc/lib/init_helpers.c | 6 +++--- 4 files changed, 29 insertions(+), 5 deletions(-) diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h index fe75409b5c..2269183615 100644 --- a/arch/arc/include/asm/cache.h +++ b/arch/arc/include/asm/cache.h @@ -31,6 +31,7 @@ void cache_init(void); void flush_n_invalidate_dcache_all(void); +void sync_n_cleanup_cache_all(void); static const inline int is_ioc_enabled(void) { diff --git a/arch/arc/lib/bootm.c b/arch/arc/lib/bootm.c index 4d4acff239..f1ab344773 100644 --- a/arch/arc/lib/bootm.c +++ b/arch/arc/lib/bootm.c @@ -4,6 +4,7 @@ * SPDX-License-Identifier: GPL-2.0+ */ +#include #include DECLARE_GLOBAL_DATA_PTR; @@ -40,8 +41,7 @@ void arch_lmb_reserve(struct lmb *lmb) static int cleanup_before_linux(void) { disable_interrupts(); - flush_dcache_all(); - invalidate_icache_all(); + sync_n_cleanup_cache_all(); return 0; } diff --git a/arch/arc/lib/cache.c b/arch/arc/lib/cache.c index a5aae3d232..5d7583d868 100644 --- a/arch/arc/lib/cache.c +++ b/arch/arc/lib/cache.c @@ -591,3 +591,26 @@ void flush_dcache_all(void) if (is_isa_arcv2() && !slc_data_bypass()) __slc_entire_op(OP_FLUSH); } + +/* + * This is function to cleanup all caches (and therefore sync I/D caches) which + * can be used for cleanup before linux launch or to sync caches during + * relocation. + */ +void sync_n_cleanup_cache_all(void) +{ + __dc_entire_op(OP_FLUSH_N_INV); + + /* + * If SL$ is bypassed for data it is used only for instructions, + * and we shouldn't flush it. So invalidate it instead of flush_n_inv. + */ + if (is_isa_arcv2()) { + if (slc_data_bypass()) + __slc_entire_op(OP_INV); + else + __slc_entire_op(OP_FLUSH_N_INV); + } + + __ic_entire_invalidate(); +} diff --git a/arch/arc/lib/init_helpers.c b/arch/arc/lib/init_helpers.c index dbc8d68ffb..435fe96ef4 100644 --- a/arch/arc/lib/init_helpers.c +++ b/arch/arc/lib/init_helpers.c @@ -4,14 +4,14 @@ * SPDX-License-Identifier: GPL-2.0+ */ +#include #include DECLARE_GLOBAL_DATA_PTR; int init_cache_f_r(void) { -#ifndef CONFIG_SYS_DCACHE_OFF - flush_dcache_all(); -#endif + sync_n_cleanup_cache_all(); + return 0; } From 7241944a5b3069205f615f8a0a5d9f3833c80156 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Wed, 21 Mar 2018 15:59:03 +0300 Subject: [PATCH 23/26] ARC: Cache: Add more HW configuration checks Add additional cache configuration checks and note about supported configurations. It is unlikely to face some configuration in real life but it's better to be prepared and refuse to work on those. Signed-off-by: Eugeniy Paltsev Signed-off-by: Alexey Brodkin --- arch/arc/lib/cache.c | 75 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/arch/arc/lib/cache.c b/arch/arc/lib/cache.c index 5d7583d868..fd70ce8efe 100644 --- a/arch/arc/lib/cache.c +++ b/arch/arc/lib/cache.c @@ -85,6 +85,66 @@ * enabling force function inline with '__attribute__((always_inline))' gcc * attribute to avoid any function call (and BLINK store) between cache flush * and disable. + * + * + * [ NOTE 2 ]: + * As of today we only support the following cache configurations on ARC. + * Other configurations may exist in HW (for example, since version 3.0 HS + * supports SL$ (L2 system level cache) disable) but we don't support it in SW. + * Configuration 1: + * ______________________ + * | | + * | ARC CPU | + * |______________________| + * ___|___ ___|___ + * | | | | + * | L1 I$ | | L1 D$ | + * |_______| |_______| + * on/off on/off + * ___|______________|____ + * | | + * | main memory | + * |______________________| + * + * Configuration 2: + * ______________________ + * | | + * | ARC CPU | + * |______________________| + * ___|___ ___|___ + * | | | | + * | L1 I$ | | L1 D$ | + * |_______| |_______| + * on/off on/off + * ___|______________|____ + * | | + * | L2 (SL$) | + * |______________________| + * always must be on + * ___|______________|____ + * | | + * | main memory | + * |______________________| + * + * Configuration 3: + * ______________________ + * | | + * | ARC CPU | + * |______________________| + * ___|___ ___|___ + * | | | | + * | L1 I$ | | L1 D$ | + * |_______| |_______| + * on/off must be on + * ___|______________|____ _______ + * | | | | + * | L2 (SL$) |-----| IOC | + * |______________________| |_______| + * always must be on on/off + * ___|______________|____ + * | | + * | main memory | + * |______________________| */ DECLARE_GLOBAL_DATA_PTR; @@ -308,6 +368,14 @@ static void arc_ioc_setup(void) /* IOC Aperture size is equal to DDR size */ long ap_size = CONFIG_SYS_SDRAM_SIZE; + /* Unsupported configuration. See [ NOTE 2 ] for more details. */ + if (!slc_exists()) + panic("Try to enable IOC but SLC is not present"); + + /* Unsupported configuration. See [ NOTE 2 ] for more details. */ + if (!dcache_enabled()) + panic("Try to enable IOC but L1 D$ is disabled"); + flush_n_invalidate_dcache_all(); if (!is_power_of_2(ap_size) || ap_size < 4096) @@ -338,6 +406,13 @@ static void read_decode_cache_bcr_arcv2(void) if (slc_exists()) { slc_cfg.word = read_aux_reg(ARC_AUX_SLC_CONFIG); gd->arch.slc_line_sz = (slc_cfg.fields.lsz == 0) ? 128 : 64; + + /* + * We don't support configuration where L1 I$ or L1 D$ is + * absent but SL$ exists. See [ NOTE 2 ] for more details. + */ + if (!icache_exists() || !dcache_exists()) + panic("Unsupported cache configuration: SLC exists but one of L1 caches is absent"); } #endif /* CONFIG_ISA_ARCV2 */ From 9f0253c61acaa5c86adb3f6d41a5f95102609aa0 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Wed, 21 Mar 2018 15:59:04 +0300 Subject: [PATCH 24/26] ARC: Cache: Add missing cache cleanup before cache disable Add missing cache cleanup before cache disable: * Flush and invalidate L1 D$ before disabling. Flush and invalidate SLC before L1 D$ disabling (as it will be bypassed for data) Otherwise we can lose some data when we disable L1 D$ if this data isn't flushed to next level cache. Or we can get wrong data if L1 D$ has some entries after enable which we modified when the L1 D$ was disabled. * Invalidate L1 I$ before disabling. Otherwise we can execute wrong instructions after L1 I$ enable if we modified any code when L1 I$ was disabled. Signed-off-by: Eugeniy Paltsev Signed-off-by: Alexey Brodkin --- arch/arc/lib/cache.c | 53 ++++++++++++++++++++++++++++++++------------ 1 file changed, 39 insertions(+), 14 deletions(-) diff --git a/arch/arc/lib/cache.c b/arch/arc/lib/cache.c index fd70ce8efe..99776066d3 100644 --- a/arch/arc/lib/cache.c +++ b/arch/arc/lib/cache.c @@ -169,6 +169,16 @@ DECLARE_GLOBAL_DATA_PTR; #define CACHE_LINE_MASK (~(gd->arch.l1_line_sz - 1)) +/* + * We don't want to use '__always_inline' macro here as it can be redefined + * to simple 'inline' in some cases which breaks stuff. See [ NOTE 1 ] for more + * details about the reasons we need to use always_inline functions. + */ +#define inlined_cachefunc inline __attribute__((always_inline)) + +static inlined_cachefunc void __ic_entire_invalidate(void); +static inlined_cachefunc void __dc_entire_op(const int cacheop); + static inline bool pae_exists(void) { /* TODO: should we compare mmu version from BCR and from CONFIG? */ @@ -184,7 +194,7 @@ static inline bool pae_exists(void) return false; } -static inline bool icache_exists(void) +static inlined_cachefunc bool icache_exists(void) { union bcr_di_cache ibcr; @@ -192,7 +202,7 @@ static inline bool icache_exists(void) return !!ibcr.fields.ver; } -static inline bool icache_enabled(void) +static inlined_cachefunc bool icache_enabled(void) { if (!icache_exists()) return false; @@ -200,7 +210,7 @@ static inline bool icache_enabled(void) return !(read_aux_reg(ARC_AUX_IC_CTRL) & IC_CTRL_CACHE_DISABLE); } -static inline bool dcache_exists(void) +static inlined_cachefunc bool dcache_exists(void) { union bcr_di_cache dbcr; @@ -208,7 +218,7 @@ static inline bool dcache_exists(void) return !!dbcr.fields.ver; } -static inline bool dcache_enabled(void) +static inlined_cachefunc bool dcache_enabled(void) { if (!dcache_exists()) return false; @@ -216,7 +226,7 @@ static inline bool dcache_enabled(void) return !(read_aux_reg(ARC_AUX_DC_CTRL) & DC_CTRL_CACHE_DISABLE); } -static inline bool slc_exists(void) +static inlined_cachefunc bool slc_exists(void) { if (is_isa_arcv2()) { union bcr_generic sbcr; @@ -228,7 +238,7 @@ static inline bool slc_exists(void) return false; } -static inline bool slc_data_bypass(void) +static inlined_cachefunc bool slc_data_bypass(void) { /* * If L1 data cache is disabled SL$ is bypassed and all load/store @@ -261,7 +271,7 @@ static inline bool ioc_enabled(void) return false; } -static void __slc_entire_op(const int op) +static inlined_cachefunc void __slc_entire_op(const int op) { unsigned int ctrl; @@ -469,13 +479,17 @@ void icache_enable(void) void icache_disable(void) { - if (icache_exists()) - write_aux_reg(ARC_AUX_IC_CTRL, read_aux_reg(ARC_AUX_IC_CTRL) | - IC_CTRL_CACHE_DISABLE); + if (!icache_exists()) + return; + + __ic_entire_invalidate(); + + write_aux_reg(ARC_AUX_IC_CTRL, read_aux_reg(ARC_AUX_IC_CTRL) | + IC_CTRL_CACHE_DISABLE); } /* IC supports only invalidation */ -static inline void __ic_entire_invalidate(void) +static inlined_cachefunc void __ic_entire_invalidate(void) { if (!icache_enabled()) return; @@ -525,6 +539,17 @@ void dcache_disable(void) if (!dcache_exists()) return; + __dc_entire_op(OP_FLUSH_N_INV); + + /* + * As SLC will be bypassed for data after L1 D$ disable we need to + * flush it first before L1 D$ disable. Also we invalidate SLC to + * avoid any inconsistent data problems after enabling L1 D$ again with + * dcache_enable function. + */ + if (is_isa_arcv2()) + __slc_entire_op(OP_FLUSH_N_INV); + write_aux_reg(ARC_AUX_DC_CTRL, read_aux_reg(ARC_AUX_DC_CTRL) | DC_CTRL_CACHE_DISABLE); } @@ -553,7 +578,7 @@ static inline void __dcache_line_loop(unsigned long paddr, unsigned long sz, } } -static void __before_dc_op(const int op) +static inlined_cachefunc void __before_dc_op(const int op) { unsigned int ctrl; @@ -568,13 +593,13 @@ static void __before_dc_op(const int op) write_aux_reg(ARC_AUX_DC_CTRL, ctrl); } -static void __after_dc_op(const int op) +static inlined_cachefunc void __after_dc_op(const int op) { if (op & OP_FLUSH) /* flush / flush-n-inv both wait */ while (read_aux_reg(ARC_AUX_DC_CTRL) & DC_CTRL_FLUSH_STATUS); } -static inline void __dc_entire_op(const int cacheop) +static inlined_cachefunc void __dc_entire_op(const int cacheop) { int aux; From 6b85b26e4bc76bcc92571ae90c913b57ae32423c Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Wed, 21 Mar 2018 15:59:05 +0300 Subject: [PATCH 25/26] ARC: Cache: Refactor arc_ioc_setup() Move all checks before cache flush and IOC setup. Signed-off-by: Eugeniy Paltsev Signed-off-by: Alexey Brodkin --- arch/arc/lib/cache.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/arc/lib/cache.c b/arch/arc/lib/cache.c index 99776066d3..8203fae145 100644 --- a/arch/arc/lib/cache.c +++ b/arch/arc/lib/cache.c @@ -386,11 +386,15 @@ static void arc_ioc_setup(void) if (!dcache_enabled()) panic("Try to enable IOC but L1 D$ is disabled"); - flush_n_invalidate_dcache_all(); - if (!is_power_of_2(ap_size) || ap_size < 4096) panic("IOC Aperture size must be power of 2 and bigger 4Kib"); + /* IOC Aperture start must be aligned to the size of the aperture */ + if (ap_base % ap_size != 0) + panic("IOC Aperture start must be aligned to the size of the aperture"); + + flush_n_invalidate_dcache_all(); + /* * IOC Aperture size decoded as 2 ^ (SIZE + 2) KB, * so setting 0x11 implies 512M, 0x12 implies 1G... @@ -398,10 +402,6 @@ static void arc_ioc_setup(void) write_aux_reg(ARC_AUX_IO_COH_AP0_SIZE, order_base_2(ap_size / 1024) - 2); - /* IOC Aperture start must be aligned to the size of the aperture */ - if (ap_base % ap_size != 0) - panic("IOC Aperture start must be aligned to the size of the aperture"); - write_aux_reg(ARC_AUX_IO_COH_AP0_BASE, ap_base >> 12); write_aux_reg(ARC_AUX_IO_COH_PARTIAL, 1); write_aux_reg(ARC_AUX_IO_COH_ENABLE, 1); From f665c14f04d225597cd3cb0c6faa35ddc2a1b2df Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Fri, 23 Mar 2018 15:35:03 +0300 Subject: [PATCH 26/26] ARC: bootm: Refactor GO and PREP subcommands implementation Refactor GO and PREP subcommands implementation for a simpler override in the boards platform code. Signed-off-by: Eugeniy Paltsev Signed-off-by: Alexey Brodkin --- arch/arc/lib/bootm.c | 51 +++++++++++++++++++--------------- board/synopsys/axs10x/axs10x.c | 12 ++++++++ board/synopsys/hsdk/hsdk.c | 11 ++++++++ 3 files changed, 52 insertions(+), 22 deletions(-) diff --git a/arch/arc/lib/bootm.c b/arch/arc/lib/bootm.c index f1ab344773..4f04aad34a 100644 --- a/arch/arc/lib/bootm.c +++ b/arch/arc/lib/bootm.c @@ -46,35 +46,47 @@ static int cleanup_before_linux(void) return 0; } +__weak int board_prep_linux(bootm_headers_t *images) { return 0; } + /* Subcommand: PREP */ -static void boot_prep_linux(bootm_headers_t *images) +static int boot_prep_linux(bootm_headers_t *images) { - if (image_setup_linux(images)) - hang(); + int ret; + + ret = image_setup_linux(images); + if (ret) + return ret; + + return board_prep_linux(images); } -__weak void smp_set_core_boot_addr(unsigned long addr, int corenr) {} -__weak void smp_kick_all_cpus(void) {} +/* Generic implementation for single core CPU */ +__weak void board_jump_and_run(ulong entry, int zero, int arch, uint params) +{ + void (*kernel_entry)(int zero, int arch, uint params); + + kernel_entry = (void (*)(int, int, uint))entry; + + kernel_entry(zero, arch, params); +} /* Subcommand: GO */ static void boot_jump_linux(bootm_headers_t *images, int flag) { - void (*kernel_entry)(int zero, int arch, uint params); + ulong kernel_entry; unsigned int r0, r2; int fake = (flag & BOOTM_STATE_OS_FAKE_GO); - kernel_entry = (void (*)(int, int, uint))images->ep; + kernel_entry = images->ep; debug("## Transferring control to Linux (at address %08lx)...\n", - (ulong) kernel_entry); + kernel_entry); bootstage_mark(BOOTSTAGE_ID_RUN_OS); printf("\nStarting kernel ...%s\n\n", fake ? "(fake run for tracing)" : ""); bootstage_mark_name(BOOTSTAGE_ID_BOOTM_HANDOFF, "start_kernel"); - cleanup_before_linux(); - if (IMAGE_ENABLE_OF_LIBFDT && images->ft_len) { r0 = 2; r2 = (unsigned int)images->ft_addr; @@ -83,11 +95,10 @@ static void boot_jump_linux(bootm_headers_t *images, int flag) r2 = (unsigned int)env_get("bootargs"); } - if (!fake) { - smp_set_core_boot_addr((unsigned long)kernel_entry, -1); - smp_kick_all_cpus(); - kernel_entry(r0, 0, r2); - } + cleanup_before_linux(); + + if (!fake) + board_jump_and_run(kernel_entry, r0, 0, r2); } int do_bootm_linux(int flag, int argc, char *argv[], bootm_headers_t *images) @@ -96,17 +107,13 @@ int do_bootm_linux(int flag, int argc, char *argv[], bootm_headers_t *images) if ((flag & BOOTM_STATE_OS_BD_T) || (flag & BOOTM_STATE_OS_CMDLINE)) return -1; - if (flag & BOOTM_STATE_OS_PREP) { - boot_prep_linux(images); - return 0; - } + if (flag & BOOTM_STATE_OS_PREP) + return boot_prep_linux(images); if (flag & (BOOTM_STATE_OS_GO | BOOTM_STATE_OS_FAKE_GO)) { boot_jump_linux(images, flag); return 0; } - boot_prep_linux(images); - boot_jump_linux(images, flag); - return 0; + return -1; } diff --git a/board/synopsys/axs10x/axs10x.c b/board/synopsys/axs10x/axs10x.c index e6b69da3da..18f7666b15 100644 --- a/board/synopsys/axs10x/axs10x.c +++ b/board/synopsys/axs10x/axs10x.c @@ -47,6 +47,18 @@ int board_early_init_f(void) } #ifdef CONFIG_ISA_ARCV2 + +void board_jump_and_run(ulong entry, int zero, int arch, uint params) +{ + void (*kernel_entry)(int zero, int arch, uint params); + + kernel_entry = (void (*)(int, int, uint))entry; + + smp_set_core_boot_addr(entry, -1); + smp_kick_all_cpus(); + kernel_entry(zero, arch, params); +} + #define RESET_VECTOR_ADDR 0x0 void smp_set_core_boot_addr(unsigned long addr, int corenr) diff --git a/board/synopsys/hsdk/hsdk.c b/board/synopsys/hsdk/hsdk.c index 7641978a7b..5b3a063b69 100644 --- a/board/synopsys/hsdk/hsdk.c +++ b/board/synopsys/hsdk/hsdk.c @@ -58,6 +58,17 @@ int board_mmc_init(bd_t *bis) return 0; } +void board_jump_and_run(ulong entry, int zero, int arch, uint params) +{ + void (*kernel_entry)(int zero, int arch, uint params); + + kernel_entry = (void (*)(int, int, uint))entry; + + smp_set_core_boot_addr(entry, -1); + smp_kick_all_cpus(); + kernel_entry(zero, arch, params); +} + #define RESET_VECTOR_ADDR 0x0 void smp_set_core_boot_addr(unsigned long addr, int corenr)