diff --git a/Makefile b/Makefile index f3f043da..dafb17fb 100644 --- a/Makefile +++ b/Makefile @@ -17,16 +17,6 @@ CFLAGS=-g $(TARGET): $(OBJS) $(CC) $(LDFLAGS) -I./elfy/elfy.h $(OBJS) -o $@ $(LOADLIBES) $(LDLIBS) -L./elfy/target/release/ -Wl,--no-as-needed -ldl -lpthread -lelfy -.PHONY: clean -clean: - $(RM) $(TARGET) $(OBJS) $(DEPS) - -.PHONY: distclean -distclean: clean - $(RM) $(TARGET) $(OBJS) $(DEPS) *_payload.* - $(MAKE) -C opensbi clean - $(MAKE) -C linux clean - -include $(DEPS) # build device tree @@ -39,9 +29,9 @@ LINUX_PAYLOAD=linux/arch/riscv/boot/Image BUILDROOT_MARKER=buildroot-2021.05/build.marker $(BUILDROOT_MARKER): - $(MAKE) -C buildroot-2021.05 make + $(MAKE) -C buildroot-2021.05 cd buildroot-2021.05 && cp init output/target/ && cp pi.js output/target/ - $(MAKE) -C buildroot-2021.05 make + $(MAKE) -C buildroot-2021.05 touch $@ $(PAYLOAD): $(shell find rust_payload/src -type f) @@ -101,3 +91,15 @@ run: fw_payload.bin $(TARGET) dts.dtb run-v1: fw_payload.bin $(TARGET) dts.dtb ./rvc -b fw_payload.bin -d dts.dtb -v1 + + +.PHONY: clean +clean: + $(RM) $(TARGET) $(OBJS) $(DEPS) + +.PHONY: distclean +distclean: clean + $(RM) $(TARGET) $(OBJS) $(DEPS) *_payload.* + $(MAKE) -C opensbi clean + $(MAKE) -C linux clean + $(RM) $(BUILDROOT_MARKER) diff --git a/bare_metal_test/Makefile b/bare_metal_test/Makefile index 0b316554..35966e11 100644 --- a/bare_metal_test/Makefile +++ b/bare_metal_test/Makefile @@ -15,7 +15,7 @@ DEPS := $(OBJS:.o=.d) INC_DIRS := $(shell find $(SRC_DIRS) -type d) INC_FLAGS := $(addprefix -I,$(INC_DIRS)) -CFLAGS += -O3 -march=rv32ima -mabi=ilp32 -fno-stack-protector -fno-pie +CFLAGS += -O2 -march=rv32ima -mabi=ilp32 -fno-stack-protector -fno-pie ASFLAGS += -march=rv32ima -mabi=ilp32 LDFLAGS += -static -nostdlib -nostartfiles -nodefaultlibs -Triscv32.ld -Wl,--no-as-needed diff --git a/bare_metal_test/bare b/bare_metal_test/bare index 45102ac3..7008f504 100755 Binary files a/bare_metal_test/bare and b/bare_metal_test/bare differ diff --git a/bare_metal_test/bare.c b/bare_metal_test/bare.c index 79c66384..5adea961 100644 --- a/bare_metal_test/bare.c +++ b/bare_metal_test/bare.c @@ -16,26 +16,89 @@ void print(const char* str) { } } +void print_hex(unsigned int number) { + for (int i = 0; i < 8; i++) { + switch (number & 0xF0000000) { + case 0x00000000: uart_putc('0'); break; + case 0x10000000: uart_putc('1'); break; + case 0x20000000: uart_putc('2'); break; + case 0x30000000: uart_putc('3'); break; + case 0x40000000: uart_putc('4'); break; + case 0x50000000: uart_putc('5'); break; + case 0x60000000: uart_putc('6'); break; + case 0x70000000: uart_putc('7'); break; + case 0x80000000: uart_putc('8'); break; + case 0x90000000: uart_putc('9'); break; + case 0xA0000000: uart_putc('A'); break; + case 0xB0000000: uart_putc('B'); break; + case 0xC0000000: uart_putc('C'); break; + case 0xD0000000: uart_putc('D'); break; + case 0xE0000000: uart_putc('E'); break; + case 0xF0000000: uart_putc('F'); break; + } + number <<= 4; + } +} + static bool got_ssip = false; volatile static long long a = 0xaaaaffffffff; +static int pattern_good = 0x12345678; +static int pattern_bad = 0xdeadbeef; + +static char print_buf[256]; + // runs in supervisor mode via mideleg (machine-mode trap delegation) void handle_trap(void) { print("[bare/trap] got SSIP!\n"); got_ssip = true; } +// byte-for-byte memcpy +void memcpy_manual(unsigned char *dst, const unsigned char* src, int n) { + for (int i = 0; i < n; i++) { + dst[i] = src[i]; + } +} + +// CSR-based memcpy +void memcpy_csr(unsigned char *dst, const unsigned char* src, int n) { + print("[bare/memcpy_csr] src=0x"); + print_hex((unsigned int)src); + print(" dst=0x"); + print_hex((unsigned int)dst); + print("\n"); + __asm volatile ("csrw 0x0b1, %0" :: "r" (src)); + __asm volatile ("csrw 0x0b2, %0" :: "r" (dst)); + __asm volatile ("csrw 0x0b3, %0" :: "r" (n)); + __asm volatile ("csrw 0x0b0, %0" :: "r" (1) : "memory"); +} + // called from assembly, runs in supervisor mode (via sret) int main(void) { print("[bare] in main()\n"); + print("[bare] testing printing...\n[bare] 0x"); + print_hex(0xabcd1234); + uart_putc('\n'); + print("[bare] testing arithmetic...\n"); long long b = a * 400LLU * -1; if (b != -0x10AAB2FFFFFFE70LL) { - print("[bare] got wrong result :(\n"); + print("[bare/error] got wrong result :(\n[bare/error] low=0x"); + print_hex((unsigned int)b); + print("\n[bare/error] high=0x"); + print_hex((unsigned int)(b >> 32)); + print("\n"); + __asm volatile ("ebreak"); + while (true) {} } else { - print("[bare] got correct result!\n"); + print("[bare] got correct result!\n[bare] low=0x"); + print_hex((unsigned int)b); + print("\n[bare] high=0x"); + print_hex((unsigned int)(b >> 32)); + print("\n"); } print("[bare] testing trap handler...\n"); @@ -43,6 +106,52 @@ int main(void) { while (!got_ssip) {} + print("[bare] testing memcpy...\n"); + int src[128]; + int dst_man[128]; + int dst_csr[128]; + + for (int i = 0; i < 128; i++) { + src[i] = pattern_good; + dst_man[i] = pattern_bad; + dst_csr[i] = pattern_bad; + } + + memcpy_manual((unsigned char*)dst_man, + (const unsigned char*)src, + sizeof(src)); + + memcpy_csr((unsigned char*)dst_csr, + (const unsigned char*)src, + sizeof(src)); + + for (int i = 0; i < 128; i++) { + int bad = ~0; + + if (dst_man[i] != src[i] || dst_man[i] != pattern_good) { + print("[bare/error] memcpy (manual) failed\n"); + bad = dst_man[i]; + } + if (dst_csr[i] != src[i] || dst_csr[i] != pattern_good) { + print("[bare/error] memcpy (CSR) failed\n"); + bad = dst_csr[i]; + } + + if (bad != (~0)) { + print("[bare/error] dst[0x"); + print_hex(i); + print("] = 0x"); + print_hex(bad); + print(" src[0x"); + print_hex(i); + print("] = 0x"); + print_hex(src[i]); + print("\n"); + __asm volatile ("ebreak"); + while (true) {} + } + } + print("[bare] selftest done!\n"); print("[bare] now go run something more exciting...\n"); diff --git a/bare_metal_test/init.S b/bare_metal_test/init.S index 16d2a6b8..9d37a06e 100644 --- a/bare_metal_test/init.S +++ b/bare_metal_test/init.S @@ -88,7 +88,7 @@ _s_trap_entry: STORE t2, 34*REGBYTES(x2) STORE t3, 35*REGBYTES(x2) - move a0, sp + mv a0, sp jal handle_trap mv a0, sp @@ -101,7 +101,6 @@ _s_trap_entry: # restore x registers LOAD x1,1*REGBYTES(a0) - LOAD x2,2*REGBYTES(a0) LOAD x3,3*REGBYTES(a0) LOAD x4,4*REGBYTES(a0) LOAD x5,5*REGBYTES(a0) diff --git a/linux-_pi_-patches-for-rvc.patch b/linux-_pi_-patches-for-rvc.patch index 225a4ba7..a940012a 100644 --- a/linux-_pi_-patches-for-rvc.patch +++ b/linux-_pi_-patches-for-rvc.patch @@ -1,44 +1,45 @@ -From fbba62aff7a727723aa75715e1d1838b484ba842 Mon Sep 17 00:00:00 2001 +From 30022eebb78abc5a8a9289aecbd30a155fafcff0 Mon Sep 17 00:00:00 2001 From: Stefan Date: Sun, 1 Aug 2021 20:58:20 +0200 Subject: [PATCH] _pi_ patches for rvc -* use MEMOP extension for faster memcpy (disabled atm) +* use MEMOP extension for faster memcpy * don't poison init kernel area, we don't care about security * print more messages about initramfs loading, since otherwise it just looks like it got stuck Signed-off-by: Stefan --- - arch/riscv/lib/memcpy.S | 13 +++++++++++++ + arch/riscv/lib/memcpy.S | 14 ++++++++++++++ include/linux/mm.h | 2 +- init/initramfs.c | 19 +++++++++++++++++-- - 3 files changed, 31 insertions(+), 3 deletions(-) + 3 files changed, 32 insertions(+), 3 deletions(-) diff --git a/arch/riscv/lib/memcpy.S b/arch/riscv/lib/memcpy.S -index 51ab716253fa..67636579413d 100644 +index 51ab716253fa..67e3cf24d9d0 100644 --- a/arch/riscv/lib/memcpy.S +++ b/arch/riscv/lib/memcpy.S -@@ -41,6 +41,18 @@ WEAK(memcpy) +@@ -41,6 +41,19 @@ WEAK(memcpy) beqz a4, 4f add a3, a1, a4 3: -+ #lla t0, 10f -+ #li t2, 1025 -+ #bltu a2, t2, 11f ++ # fast memcpy for page-sized chunks ++ lla t0, 10f ++ li t2, 4096 ++ bne a2, t2, 11f + -+ #csrw 0x0b1, a1 # src -+ #csrw 0x0b2, t6 # dst -+ #csrw 0x0b3, a2 # n -+ #li t2, 0x1 -+ #csrw 0x0b0, t2 # memcpy -+ #j 10f ++ csrw 0x0b1, a1 # src ++ csrw 0x0b2, t6 # dst ++ csrw 0x0b3, a2 # n ++ li t2, 0x1 ++ csrw 0x0b0, t2 # memcpy ++ j 10f + +11: REG_L a4, 0(a1) REG_L a5, SZREG(a1) REG_L a6, 2*SZREG(a1) -@@ -76,6 +88,7 @@ WEAK(memcpy) +@@ -76,6 +89,7 @@ WEAK(memcpy) REG_S t1, 15*SZREG(t6) addi t6, t6, 16*SZREG bltu a1, a3, 3b diff --git a/src/uart.h b/src/uart.h index 530a779f..eb366489 100644 --- a/src/uart.h +++ b/src/uart.h @@ -57,7 +57,7 @@ void uart_tick(cpu_t *cpu) { uint thr = UART_GET1(THR); if ((cpu->clock % 0x16) == 0 && thr != 0) { - printf(" %d", (unsigned char)thr); + printf("%c", (unsigned char)thr); fflush(stdout); UART_SET1(THR, 0); UART_SET2(LSR, (UART_GET2(LSR) | LSR_THR_EMPTY));