From 15ea618a79886024f50cabac19d7f91811dec5fb Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 28 Nov 2016 18:56:57 -0800 Subject: Pass argc and argv to main() --- bsp/env/freedom-e300-arty/init.c | 2 -- bsp/env/freedom-e300-hifive1/init.c | 2 -- bsp/env/start.S | 3 +++ 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/bsp/env/freedom-e300-arty/init.c b/bsp/env/freedom-e300-arty/init.c index c766e98..0a80cbb 100644 --- a/bsp/env/freedom-e300-arty/init.c +++ b/bsp/env/freedom-e300-arty/init.c @@ -59,8 +59,6 @@ void _init() printf("core freq at %d Hz\n", get_cpu_freq()); write_csr(mtvec, &trap_entry); - - // _exit(main(0, NULL)); } diff --git a/bsp/env/freedom-e300-hifive1/init.c b/bsp/env/freedom-e300-hifive1/init.c index c088079..167d652 100644 --- a/bsp/env/freedom-e300-hifive1/init.c +++ b/bsp/env/freedom-e300-hifive1/init.c @@ -188,8 +188,6 @@ void _init() write_csr(mstatus, MSTATUS_FS); // allow FPU instructions without trapping write_csr(fcsr, 0); // initialize rounding mode, undefined at reset } - - //_exit(main(0, NULL)); } void _fini() diff --git a/bsp/env/start.S b/bsp/env/start.S index 77e223d..b526411 100644 --- a/bsp/env/start.S +++ b/bsp/env/start.S @@ -47,5 +47,8 @@ _start: 1: #endif + /* argc = argv = 0 */ + li a0, 0 + li a1, 0 call main tail exit -- cgit v1.2.3 From 8c445a9e23ea4ccf34dfc52ee1030aa29681afbf Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 19 Dec 2016 18:50:53 -0800 Subject: Enable RVC by default --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index ab02199..d19e956 100644 --- a/Makefile +++ b/Makefile @@ -64,7 +64,7 @@ openocd: $(openocd_dest)/bin/openocd $(toolchain_dest)/bin/$(target32)-gcc: $(toolchain_srcdir) mkdir -p $(toolchain32_wrkdir) - cd $(toolchain32_wrkdir); $(toolchain_srcdir)/configure --prefix=$(toolchain_dest) --with-arch=rv32ima --with-abi=ilp32 + cd $(toolchain32_wrkdir); $(toolchain_srcdir)/configure --prefix=$(toolchain_dest) --with-arch=rv32imac --with-abi=ilp32 $(MAKE) -C $(toolchain32_wrkdir) $(openocd_dest)/bin/openocd: $(openocd_srcdir) -- cgit v1.2.3 From adecf626f023ee974e502703bdd024aceb9c7678 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 20 Dec 2016 15:46:12 -0800 Subject: Avoid jal to weak symbols The symbol may be overridden and end up out of range of JAL. --- bsp/env/entry.S | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/bsp/env/entry.S b/bsp/env/entry.S index cbf26eb..b433628 100644 --- a/bsp/env/entry.S +++ b/bsp/env/entry.S @@ -46,7 +46,7 @@ trap_entry: csrr a0, mcause csrr a1, mepc mv a2, sp - jal handle_trap + call handle_trap csrw mepc, a0 # Remain in M-mode after mret @@ -90,6 +90,7 @@ trap_entry: .weak handle_trap handle_trap: - j handle_trap +1: + j 1b #endif -- cgit v1.2.3 From 0f7bac0247ae4bbb6b4e51c72d4d5a25560ae2fe Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 20 Dec 2016 15:46:45 -0800 Subject: Separate .text.unlikely and .text.startup from .text This improves the instruction stream's spatial locality. --- bsp/env/freedom-e300-hifive1/link.lds | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bsp/env/freedom-e300-hifive1/link.lds b/bsp/env/freedom-e300-hifive1/link.lds index e224273..90e5c8f 100644 --- a/bsp/env/freedom-e300-hifive1/link.lds +++ b/bsp/env/freedom-e300-hifive1/link.lds @@ -26,6 +26,8 @@ SECTIONS .text : { + *(.text.unlikely .text.unlikely.*) + *(.text.startup .text.startup.*) *(.text .text.*) *(.gnu.linkonce.t.*) } >flash AT>flash :flash -- cgit v1.2.3 From b1f048f991e7d743e46be9c48c73d00a843f400c Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 21 Dec 2016 11:09:10 -0800 Subject: Keep mtvec 4-byte aligned --- bsp/env/entry.S | 1 + 1 file changed, 1 insertion(+) diff --git a/bsp/env/entry.S b/bsp/env/entry.S index b433628..1f5de24 100644 --- a/bsp/env/entry.S +++ b/bsp/env/entry.S @@ -7,6 +7,7 @@ #include "sifive/bits.h" .section .text.entry + .align 2 .global trap_entry trap_entry: addi sp, sp, -32*REGBYTES -- cgit v1.2.3 From 2398dfda399f445cf114e29b61d9331fddb09b4e Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 3 Jan 2017 17:45:07 -0800 Subject: Improve HiFive1 CPU frequency measurement routine - Warm up I$ first. - Correct for integer division truncation error. - Wait for an RTC clock edge before starting the timing loop, which removes an error proportional to the number of loop iterations, allowing us to run for far less time. --- bsp/env/freedom-e300-hifive1/init.c | 38 ++++++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/bsp/env/freedom-e300-hifive1/init.c b/bsp/env/freedom-e300-hifive1/init.c index 167d652..61a1ae3 100644 --- a/bsp/env/freedom-e300-hifive1/init.c +++ b/bsp/env/freedom-e300-hifive1/init.c @@ -5,8 +5,6 @@ #include "platform.h" #include "encoding.h" -uint32_t cpu_freq = 0; - extern int main(int argc, char** argv); extern void trap_entry(); @@ -116,21 +114,40 @@ static void use_default_clocks() use_hfrosc(4, 16); } -void measure_cpu_freq(size_t n, size_t mtime_freq) +static unsigned long __attribute__((noinline)) measure_cpu_freq(size_t n) { - uint32_t start_mtime = mtime_lo(); - uint32_t start_mcycle = mcycle_lo(); + unsigned long start_mtime, delta_mtime; + unsigned long mtime_freq = get_timer_freq(); + + // Don't start measuruing until we see an mtime tick + unsigned long tmp = mtime_lo(); + do { + start_mtime = mtime_lo(); + } while (start_mtime == tmp); + + unsigned long start_mcycle = read_csr(mcycle); - while (mtime_lo() - start_mtime < n) ; + do { + delta_mtime = mtime_lo() - start_mtime; + } while (delta_mtime < n); - uint32_t end_mtime = mtime_lo(); - uint32_t end_mcycle = mcycle_lo(); + unsigned long delta_mcycle = read_csr(mcycle) - start_mcycle; - cpu_freq = (end_mcycle-start_mcycle)/n*mtime_freq; + return (delta_mcycle / delta_mtime) * mtime_freq + + ((delta_mcycle % delta_mtime) * mtime_freq) / delta_mtime; } -uint32_t get_cpu_freq() +unsigned long get_cpu_freq() { + static uint32_t cpu_freq; + + if (!cpu_freq) { + // warm up I$ + measure_cpu_freq(1); + // measure for real + cpu_freq = measure_cpu_freq(10); + } + return cpu_freq; } @@ -178,7 +195,6 @@ void _init() { use_default_clocks(); use_pll(0, 0, 1, 31, 1); - measure_cpu_freq(1000, 32768); uart_init(115200); printf("core freq at %d Hz\n", get_cpu_freq()); -- cgit v1.2.3 From 005b1a8f84ff743710ebd693b70d208da583098d Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 3 Jan 2017 17:45:33 -0800 Subject: Regularize timing code Provide get_timer_value() and get_timer_freq() and use them. On Arty, they use mcycle and the known-fixed core frequency, whereas on HiFive1 they use mtime and the known-fixed mtime frequency. --- bsp/env/freedom-e300-arty/init.c | 21 ++++++++++++++++++- bsp/env/freedom-e300-arty/platform.h | 3 +++ bsp/env/freedom-e300-hifive1/init.c | 36 +++++++++++++++++++++++++++------ bsp/env/freedom-e300-hifive1/platform.h | 4 +++- software/dhrystone/dhry_stubs.c | 7 ++----- 5 files changed, 58 insertions(+), 13 deletions(-) diff --git a/bsp/env/freedom-e300-arty/init.c b/bsp/env/freedom-e300-arty/init.c index 0a80cbb..35b1104 100644 --- a/bsp/env/freedom-e300-arty/init.c +++ b/bsp/env/freedom-e300-arty/init.c @@ -9,11 +9,30 @@ extern int main(int argc, char** argv); extern void trap_entry(); -uint32_t get_cpu_freq() +static unsigned long get_cpu_freq() { return 65000000; } +unsigned long get_timer_freq() +{ + return get_cpu_freq(); +} + +uint64_t get_timer_value() +{ +#if __riscv_xlen == 32 + while (1) { + uint32_t hi = read_csr(mcycleh); + uint32_t lo = read_csr(mcycle); + if (hi == read_csr(mcycleh)) + return ((uint64_t)hi << 32) | lo; + } +#else + return read_csr(mcycle); +#endif +} + static void uart_init(size_t baud_rate) { GPIO_REG(GPIO_IOF_SEL) &= ~IOF0_UART0_MASK; diff --git a/bsp/env/freedom-e300-arty/platform.h b/bsp/env/freedom-e300-arty/platform.h index 1f62956..d5d6dda 100644 --- a/bsp/env/freedom-e300-arty/platform.h +++ b/bsp/env/freedom-e300-arty/platform.h @@ -119,4 +119,7 @@ #define HAS_BOARD_BUTTONS #include "hifive1.h" +unsigned long get_timer_freq(void); +uint64_t get_timer_value(void); + #endif /* _SIFIVE_PLATFORM_H */ diff --git a/bsp/env/freedom-e300-hifive1/init.c b/bsp/env/freedom-e300-hifive1/init.c index 61a1ae3..71e1659 100644 --- a/bsp/env/freedom-e300-hifive1/init.c +++ b/bsp/env/freedom-e300-hifive1/init.c @@ -8,16 +8,40 @@ extern int main(int argc, char** argv); extern void trap_entry(); -uint32_t mtime_lo(void) +static unsigned long mtime_lo(void) { - return *(volatile uint32_t *)(CLINT_BASE_ADDR + CLINT_MTIME); + return *(volatile unsigned long *)(CLINT_BASE_ADDR + CLINT_MTIME); } -uint32_t mcycle_lo(void) +#ifdef __riscv32 + +static uint32_t mtime_hi(void) +{ + return *(volatile uint32_t *)(CLINT_BASE_ADDR + CLINT_MTIME + 4); +} + +uint64_t get_timer_value() +{ + while (1) { + uint32_t hi = mtime_hi(); + uint32_t lo = mtime_lo(); + if (hi == mtime_hi()) + return ((uint64_t)hi << 32) | lo; + } +} + +#else /* __riscv32 */ + +uint64_t get_timer_value() +{ + return mtime_lo(); +} + +#endif + +unsigned long get_timer_freq() { - uint32_t t; - asm volatile ("csrr %0, mcycle" : "=r" (t)); - return t; + return 32768; } static void use_hfrosc(int div, int trim) diff --git a/bsp/env/freedom-e300-hifive1/platform.h b/bsp/env/freedom-e300-hifive1/platform.h index eca708e..63efc9e 100644 --- a/bsp/env/freedom-e300-hifive1/platform.h +++ b/bsp/env/freedom-e300-hifive1/platform.h @@ -126,6 +126,8 @@ #include "hifive1.h" -uint32_t get_cpu_freq(); +unsigned long get_cpu_freq(void); +unsigned long get_timer_freq(void); +uint64_t get_timer_value(void); #endif /* _SIFIVE_PLATFORM_H */ diff --git a/software/dhrystone/dhry_stubs.c b/software/dhrystone/dhry_stubs.c index d3bd14c..0616f86 100644 --- a/software/dhrystone/dhry_stubs.c +++ b/software/dhrystone/dhry_stubs.c @@ -3,16 +3,13 @@ /* The functions in this file are only meant to support Dhrystone on an * embedded RV32 system and are obviously incorrect in general. */ -// return the cycle counter as though it were the current time long time(void) { - long t; - asm volatile ("csrr %0, mcycle" : "=r" (t)); - return t / (get_cpu_freq() / 1000); + return get_timer_value() / get_timer_freq(); } // set the number of dhrystone iterations void __wrap_scanf(const char* fmt, int* n) { - *n = 1500000; + *n = 100000000; } -- cgit v1.2.3 From 628d2b3559be5e9e651801d289a075d68df820e8 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 3 Jan 2017 17:58:18 -0800 Subject: Compile Dhrystone without RVC Branch target misalignment reduces performance by about 10%. --- software/dhrystone/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/software/dhrystone/Makefile b/software/dhrystone/Makefile index a55b1ec..78a7b23 100644 --- a/software/dhrystone/Makefile +++ b/software/dhrystone/Makefile @@ -5,7 +5,7 @@ C_SRCS := dhry_stubs.c dhry_printf.c HEADERS := dhry.h DHRY_SRCS := dhry_1.c dhry_2.c -DHRY_CFLAGS := -O2 -DTIME -fno-inline -fno-builtin-printf -Wno-implicit +DHRY_CFLAGS := -O2 -DTIME -fno-inline -fno-builtin-printf -Wno-implicit -march=rv32ima XLEN ?= 32 CFLAGS := -Os -fno-common -- cgit v1.2.3 From 15dc05331b9821cb449e7bf433a24ddfdfa3c295 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 3 Jan 2017 18:54:30 -0800 Subject: Add CoreMark build skeleton --- software/coremark/.gitignore | 9 +++++++ software/coremark/Makefile | 20 ++++++++++++++ software/coremark/core_portme.c | 52 ++++++++++++++++++++++++++++++++++++ software/coremark/core_portme.h | 58 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 139 insertions(+) create mode 100644 software/coremark/.gitignore create mode 100644 software/coremark/Makefile create mode 100644 software/coremark/core_portme.c create mode 100644 software/coremark/core_portme.h diff --git a/software/coremark/.gitignore b/software/coremark/.gitignore new file mode 100644 index 0000000..6bd8438 --- /dev/null +++ b/software/coremark/.gitignore @@ -0,0 +1,9 @@ +/*.o +/coremark +/core_list_join.c +/core_main.c +/coremark.h +/core_matrix.c +/core_state.c +/core_util.c +/trans.c diff --git a/software/coremark/Makefile b/software/coremark/Makefile new file mode 100644 index 0000000..b57151e --- /dev/null +++ b/software/coremark/Makefile @@ -0,0 +1,20 @@ +TARGET := coremark + +C_SRCS := \ + core_list_join.c \ + core_main.c \ + core_matrix.c \ + core_state.c \ + core_util.c \ + core_portme.c \ + +HEADERS := \ + coremark.h \ + core_portme.h \ + +CFLAGS := -O2 -fno-common -funroll-loops -finline-functions --param max-inline-insns-auto=20 -falign-functions=4 -falign-jumps=4 -falign-loops=4 +CFLAGS += -DFLAGS_STR=\""$(CFLAGS)"\" +CFLAGS += -DITERATIONS=10000 -DPERFORMANCE_RUN=1 + +BSP_BASE = ../../bsp +include $(BSP_BASE)/env/common.mk diff --git a/software/coremark/core_portme.c b/software/coremark/core_portme.c new file mode 100644 index 0000000..cbf1396 --- /dev/null +++ b/software/coremark/core_portme.c @@ -0,0 +1,52 @@ +#include +#include +#include "coremark.h" +#include "platform.h" +#include "encoding.h" + +#if VALIDATION_RUN + volatile ee_s32 seed1_volatile=0x3415; + volatile ee_s32 seed2_volatile=0x3415; + volatile ee_s32 seed3_volatile=0x66; +#endif + +#if PERFORMANCE_RUN + volatile ee_s32 seed1_volatile=0x0; + volatile ee_s32 seed2_volatile=0x0; + volatile ee_s32 seed3_volatile=0x66; +#endif + +#if PROFILE_RUN + volatile ee_s32 seed1_volatile=0x8; + volatile ee_s32 seed2_volatile=0x8; + volatile ee_s32 seed3_volatile=0x8; +#endif + +volatile ee_s32 seed4_volatile=ITERATIONS; +volatile ee_s32 seed5_volatile=0; + +static CORE_TICKS t0, t1; + +void start_time(void) +{ + t0 = get_timer_value(); +} + +void stop_time(void) +{ + t1 = get_timer_value(); +} + +CORE_TICKS get_time(void) +{ + return t1 - t0; +} + +secs_ret time_in_secs(CORE_TICKS ticks) +{ + // scale timer down to avoid uint64_t -> double conversion in RV32 + int scale = 256; + uint32_t delta = ticks / scale; + uint32_t freq = get_timer_freq() / scale; + return delta / (double)freq; +} diff --git a/software/coremark/core_portme.h b/software/coremark/core_portme.h new file mode 100644 index 0000000..82298d6 --- /dev/null +++ b/software/coremark/core_portme.h @@ -0,0 +1,58 @@ +#ifndef FESDK_CORE_PORTME_H +#define FESDK_CORE_PORTME_H + +#include +#include + +#define HAS_FLOAT 1 +#define HAS_TIME_H 1 +#define USE_CLOCK 1 +#define HAS_STDIO 1 +#define HAS_PRINTF 1 +#define SEED_METHOD SEED_VOLATILE +#define CORE_TICKS uint64_t +#define ee_u8 uint8_t +#define ee_u16 uint16_t +#define ee_u32 uint32_t +#define ee_s16 int16_t +#define ee_s32 int32_t +#define ee_ptr_int uintptr_t +#define ee_size_t size_t +#define COMPILER_FLAGS FLAGS_STR + +#define align_mem(x) (void *)(((ee_ptr_int)(x) + sizeof(ee_u32) - 1) & -sizeof(ee_u32)) + +#ifdef __GNUC__ +# define COMPILER_VERSION "GCC"__VERSION__ +#else +# error +#endif + +#define MEM_METHOD MEM_STACK +#define MEM_LOCATION "STACK" + +#define MAIN_HAS_NOARGC 0 +#define MAIN_HAS_NORETURN 0 + +#define MULTITHREAD 1 +#define USE_PTHREAD 0 +#define USE_FORK 0 +#define USE_SOCKET 0 + +#define default_num_contexts MULTITHREAD + +typedef int core_portable; +static void portable_init(core_portable *p, int *argc, char *argv[]) {} +static void portable_fini(core_portable *p) {} + +#if !defined(PROFILE_RUN) && !defined(PERFORMANCE_RUN) && !defined(VALIDATION_RUN) +#if (TOTAL_DATA_SIZE==1200) +#define PROFILE_RUN 1 +#elif (TOTAL_DATA_SIZE==2000) +#define PERFORMANCE_RUN 1 +#else +#define VALIDATION_RUN 1 +#endif +#endif + +#endif -- cgit v1.2.3 From b8057b191231dea4b973f1d5066f59afa3881dba Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 3 Jan 2017 18:55:20 -0800 Subject: Add benchmarking README --- README.md | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/README.md b/README.md index 58664b0..d0dfc29 100644 --- a/README.md +++ b/README.md @@ -42,6 +42,53 @@ cd freedom-e-sdk make help ``` +### Benchmarking ### + +#### Dhrystone #### + +After setting up the software and debug toolchains, you can build and +execute everyone's favorite benchmark as follows: + +- Compile the benchmark with the command `make software PROGRAM=dhrystone`. +- Run on the HiFive1 board with the command `make upload PROGRAM=dhrystone`. + This will take a few minutes. Sample output is provided below. +- Compute DMIPS by dividing the Dhrystones per Second result by 1757, which + was the VAX 11/780's performance. In the example below, 729927 / 1757 = + 415 DMIPS. +- Compute DMIPS/MHz by dividing by the clock rate: in the example below, + 415 / 260 = 1.60 DMIPS/MHz. + +``` +core freq at 259830579 Hz + +Dhrystone Benchmark, Version 2.1 (Language: C) + + + +Microseconds for one run through Dhrystone: 1.3 +Dhrystones per Second: 729927.0 +``` + +#### CoreMark #### + +We cannot distribute the CoreMark benchmark, but following are instructions +to download and run the benchmark on the HiFive1 board: + +- Download CoreMark from EEMBC's web site and extract the archive from + http://www.eembc.org/coremark/download.php. +- Copy the following files from the extracted archive into the + `software/coremark` directory in this repository: + - `core_list_join.c` + - `core_main.c` + - `coremark.h` + - `core_matrix.c` + - `core_state.c` + - `core_util.c` +- Compile the benchmark with the command `make software PROGRAM=coremark`. +- Run on the HiFive1 board with the command `make upload PROGRAM=coremark`. +- Divide the reported Iterations/Sec by the reported core frequency in MHz to + obtain a CoreMarks/MHz value. + ### For More Information ### Documentation, Forums, and much more available at -- cgit v1.2.3