diff options
-rw-r--r-- | Makefile | 2 | ||||
-rw-r--r-- | README.md | 47 | ||||
-rw-r--r-- | bsp/env/entry.S | 6 | ||||
-rw-r--r-- | bsp/env/freedom-e300-arty/init.c | 23 | ||||
-rw-r--r-- | bsp/env/freedom-e300-arty/platform.h | 3 | ||||
-rw-r--r-- | bsp/env/freedom-e300-hifive1/init.c | 76 | ||||
-rw-r--r-- | bsp/env/freedom-e300-hifive1/link.lds | 2 | ||||
-rw-r--r-- | bsp/env/freedom-e300-hifive1/platform.h | 4 | ||||
-rw-r--r-- | bsp/env/start.S | 3 | ||||
-rw-r--r-- | software/coremark/.gitignore | 9 | ||||
-rw-r--r-- | software/coremark/Makefile | 20 | ||||
-rw-r--r-- | software/coremark/core_portme.c | 52 | ||||
-rw-r--r-- | software/coremark/core_portme.h | 58 | ||||
-rw-r--r-- | software/dhrystone/Makefile | 2 | ||||
-rw-r--r-- | software/dhrystone/dhry_stubs.c | 7 |
15 files changed, 282 insertions, 32 deletions
@@ -64,7 +64,7 @@ openocd: $(openocd_dest)/bin/openocd $(toolchain_dest)/bin/$(target32)-gcc: $(toolchain_srcdir) mkdir -p $(toolchain32_wrkdir) - cd $(toolchain32_wrkdir); $(toolchain_srcdir)/configure --prefix=$(toolchain_dest) --with-arch=rv32ima --with-abi=ilp32 + cd $(toolchain32_wrkdir); $(toolchain_srcdir)/configure --prefix=$(toolchain_dest) --with-arch=rv32imac --with-abi=ilp32 $(MAKE) -C $(toolchain32_wrkdir) $(openocd_dest)/bin/openocd: $(openocd_srcdir) @@ -42,6 +42,53 @@ cd freedom-e-sdk make help ``` +### Benchmarking ### + +#### Dhrystone #### + +After setting up the software and debug toolchains, you can build and +execute everyone's favorite benchmark as follows: + +- Compile the benchmark with the command `make software PROGRAM=dhrystone`. +- Run on the HiFive1 board with the command `make upload PROGRAM=dhrystone`. + This will take a few minutes. Sample output is provided below. +- Compute DMIPS by dividing the Dhrystones per Second result by 1757, which + was the VAX 11/780's performance. In the example below, 729927 / 1757 = + 415 DMIPS. +- Compute DMIPS/MHz by dividing by the clock rate: in the example below, + 415 / 260 = 1.60 DMIPS/MHz. + +``` +core freq at 259830579 Hz + +Dhrystone Benchmark, Version 2.1 (Language: C) + +<snip> + +Microseconds for one run through Dhrystone: 1.3 +Dhrystones per Second: 729927.0 +``` + +#### CoreMark #### + +We cannot distribute the CoreMark benchmark, but following are instructions +to download and run the benchmark on the HiFive1 board: + +- Download CoreMark from EEMBC's web site and extract the archive from + http://www.eembc.org/coremark/download.php. +- Copy the following files from the extracted archive into the + `software/coremark` directory in this repository: + - `core_list_join.c` + - `core_main.c` + - `coremark.h` + - `core_matrix.c` + - `core_state.c` + - `core_util.c` +- Compile the benchmark with the command `make software PROGRAM=coremark`. +- Run on the HiFive1 board with the command `make upload PROGRAM=coremark`. +- Divide the reported Iterations/Sec by the reported core frequency in MHz to + obtain a CoreMarks/MHz value. + ### For More Information ### Documentation, Forums, and much more available at diff --git a/bsp/env/entry.S b/bsp/env/entry.S index cbf26eb..1f5de24 100644 --- a/bsp/env/entry.S +++ b/bsp/env/entry.S @@ -7,6 +7,7 @@ #include "sifive/bits.h" .section .text.entry + .align 2 .global trap_entry trap_entry: addi sp, sp, -32*REGBYTES @@ -46,7 +47,7 @@ trap_entry: csrr a0, mcause csrr a1, mepc mv a2, sp - jal handle_trap + call handle_trap csrw mepc, a0 # Remain in M-mode after mret @@ -90,6 +91,7 @@ trap_entry: .weak handle_trap handle_trap: - j handle_trap +1: + j 1b #endif diff --git a/bsp/env/freedom-e300-arty/init.c b/bsp/env/freedom-e300-arty/init.c index c766e98..35b1104 100644 --- a/bsp/env/freedom-e300-arty/init.c +++ b/bsp/env/freedom-e300-arty/init.c @@ -9,11 +9,30 @@ extern int main(int argc, char** argv); extern void trap_entry(); -uint32_t get_cpu_freq() +static unsigned long get_cpu_freq() { return 65000000; } +unsigned long get_timer_freq() +{ + return get_cpu_freq(); +} + +uint64_t get_timer_value() +{ +#if __riscv_xlen == 32 + while (1) { + uint32_t hi = read_csr(mcycleh); + uint32_t lo = read_csr(mcycle); + if (hi == read_csr(mcycleh)) + return ((uint64_t)hi << 32) | lo; + } +#else + return read_csr(mcycle); +#endif +} + static void uart_init(size_t baud_rate) { GPIO_REG(GPIO_IOF_SEL) &= ~IOF0_UART0_MASK; @@ -59,8 +78,6 @@ void _init() printf("core freq at %d Hz\n", get_cpu_freq()); write_csr(mtvec, &trap_entry); - - // _exit(main(0, NULL)); } diff --git a/bsp/env/freedom-e300-arty/platform.h b/bsp/env/freedom-e300-arty/platform.h index 1f62956..d5d6dda 100644 --- a/bsp/env/freedom-e300-arty/platform.h +++ b/bsp/env/freedom-e300-arty/platform.h @@ -119,4 +119,7 @@ #define HAS_BOARD_BUTTONS #include "hifive1.h" +unsigned long get_timer_freq(void); +uint64_t get_timer_value(void); + #endif /* _SIFIVE_PLATFORM_H */ diff --git a/bsp/env/freedom-e300-hifive1/init.c b/bsp/env/freedom-e300-hifive1/init.c index c088079..71e1659 100644 --- a/bsp/env/freedom-e300-hifive1/init.c +++ b/bsp/env/freedom-e300-hifive1/init.c @@ -5,21 +5,43 @@ #include "platform.h" #include "encoding.h" -uint32_t cpu_freq = 0; - extern int main(int argc, char** argv); extern void trap_entry(); -uint32_t mtime_lo(void) +static unsigned long mtime_lo(void) +{ + return *(volatile unsigned long *)(CLINT_BASE_ADDR + CLINT_MTIME); +} + +#ifdef __riscv32 + +static uint32_t mtime_hi(void) { - return *(volatile uint32_t *)(CLINT_BASE_ADDR + CLINT_MTIME); + return *(volatile uint32_t *)(CLINT_BASE_ADDR + CLINT_MTIME + 4); } -uint32_t mcycle_lo(void) +uint64_t get_timer_value() { - uint32_t t; - asm volatile ("csrr %0, mcycle" : "=r" (t)); - return t; + while (1) { + uint32_t hi = mtime_hi(); + uint32_t lo = mtime_lo(); + if (hi == mtime_hi()) + return ((uint64_t)hi << 32) | lo; + } +} + +#else /* __riscv32 */ + +uint64_t get_timer_value() +{ + return mtime_lo(); +} + +#endif + +unsigned long get_timer_freq() +{ + return 32768; } static void use_hfrosc(int div, int trim) @@ -116,21 +138,40 @@ static void use_default_clocks() use_hfrosc(4, 16); } -void measure_cpu_freq(size_t n, size_t mtime_freq) +static unsigned long __attribute__((noinline)) measure_cpu_freq(size_t n) { - uint32_t start_mtime = mtime_lo(); - uint32_t start_mcycle = mcycle_lo(); + unsigned long start_mtime, delta_mtime; + unsigned long mtime_freq = get_timer_freq(); + + // Don't start measuruing until we see an mtime tick + unsigned long tmp = mtime_lo(); + do { + start_mtime = mtime_lo(); + } while (start_mtime == tmp); - while (mtime_lo() - start_mtime < n) ; + unsigned long start_mcycle = read_csr(mcycle); - uint32_t end_mtime = mtime_lo(); - uint32_t end_mcycle = mcycle_lo(); + do { + delta_mtime = mtime_lo() - start_mtime; + } while (delta_mtime < n); - cpu_freq = (end_mcycle-start_mcycle)/n*mtime_freq; + unsigned long delta_mcycle = read_csr(mcycle) - start_mcycle; + + return (delta_mcycle / delta_mtime) * mtime_freq + + ((delta_mcycle % delta_mtime) * mtime_freq) / delta_mtime; } -uint32_t get_cpu_freq() +unsigned long get_cpu_freq() { + static uint32_t cpu_freq; + + if (!cpu_freq) { + // warm up I$ + measure_cpu_freq(1); + // measure for real + cpu_freq = measure_cpu_freq(10); + } + return cpu_freq; } @@ -178,7 +219,6 @@ void _init() { use_default_clocks(); use_pll(0, 0, 1, 31, 1); - measure_cpu_freq(1000, 32768); uart_init(115200); printf("core freq at %d Hz\n", get_cpu_freq()); @@ -188,8 +228,6 @@ void _init() write_csr(mstatus, MSTATUS_FS); // allow FPU instructions without trapping write_csr(fcsr, 0); // initialize rounding mode, undefined at reset } - - //_exit(main(0, NULL)); } void _fini() diff --git a/bsp/env/freedom-e300-hifive1/link.lds b/bsp/env/freedom-e300-hifive1/link.lds index e224273..90e5c8f 100644 --- a/bsp/env/freedom-e300-hifive1/link.lds +++ b/bsp/env/freedom-e300-hifive1/link.lds @@ -26,6 +26,8 @@ SECTIONS .text : { + *(.text.unlikely .text.unlikely.*) + *(.text.startup .text.startup.*) *(.text .text.*) *(.gnu.linkonce.t.*) } >flash AT>flash :flash diff --git a/bsp/env/freedom-e300-hifive1/platform.h b/bsp/env/freedom-e300-hifive1/platform.h index eca708e..63efc9e 100644 --- a/bsp/env/freedom-e300-hifive1/platform.h +++ b/bsp/env/freedom-e300-hifive1/platform.h @@ -126,6 +126,8 @@ #include "hifive1.h" -uint32_t get_cpu_freq(); +unsigned long get_cpu_freq(void); +unsigned long get_timer_freq(void); +uint64_t get_timer_value(void); #endif /* _SIFIVE_PLATFORM_H */ diff --git a/bsp/env/start.S b/bsp/env/start.S index 77e223d..b526411 100644 --- a/bsp/env/start.S +++ b/bsp/env/start.S @@ -47,5 +47,8 @@ _start: 1: #endif + /* argc = argv = 0 */ + li a0, 0 + li a1, 0 call main tail exit diff --git a/software/coremark/.gitignore b/software/coremark/.gitignore new file mode 100644 index 0000000..6bd8438 --- /dev/null +++ b/software/coremark/.gitignore @@ -0,0 +1,9 @@ +/*.o +/coremark +/core_list_join.c +/core_main.c +/coremark.h +/core_matrix.c +/core_state.c +/core_util.c +/trans.c diff --git a/software/coremark/Makefile b/software/coremark/Makefile new file mode 100644 index 0000000..b57151e --- /dev/null +++ b/software/coremark/Makefile @@ -0,0 +1,20 @@ +TARGET := coremark + +C_SRCS := \ + core_list_join.c \ + core_main.c \ + core_matrix.c \ + core_state.c \ + core_util.c \ + core_portme.c \ + +HEADERS := \ + coremark.h \ + core_portme.h \ + +CFLAGS := -O2 -fno-common -funroll-loops -finline-functions --param max-inline-insns-auto=20 -falign-functions=4 -falign-jumps=4 -falign-loops=4 +CFLAGS += -DFLAGS_STR=\""$(CFLAGS)"\" +CFLAGS += -DITERATIONS=10000 -DPERFORMANCE_RUN=1 + +BSP_BASE = ../../bsp +include $(BSP_BASE)/env/common.mk diff --git a/software/coremark/core_portme.c b/software/coremark/core_portme.c new file mode 100644 index 0000000..cbf1396 --- /dev/null +++ b/software/coremark/core_portme.c @@ -0,0 +1,52 @@ +#include <stdio.h> +#include <stdlib.h> +#include "coremark.h" +#include "platform.h" +#include "encoding.h" + +#if VALIDATION_RUN + volatile ee_s32 seed1_volatile=0x3415; + volatile ee_s32 seed2_volatile=0x3415; + volatile ee_s32 seed3_volatile=0x66; +#endif + +#if PERFORMANCE_RUN + volatile ee_s32 seed1_volatile=0x0; + volatile ee_s32 seed2_volatile=0x0; + volatile ee_s32 seed3_volatile=0x66; +#endif + +#if PROFILE_RUN + volatile ee_s32 seed1_volatile=0x8; + volatile ee_s32 seed2_volatile=0x8; + volatile ee_s32 seed3_volatile=0x8; +#endif + +volatile ee_s32 seed4_volatile=ITERATIONS; +volatile ee_s32 seed5_volatile=0; + +static CORE_TICKS t0, t1; + +void start_time(void) +{ + t0 = get_timer_value(); +} + +void stop_time(void) +{ + t1 = get_timer_value(); +} + +CORE_TICKS get_time(void) +{ + return t1 - t0; +} + +secs_ret time_in_secs(CORE_TICKS ticks) +{ + // scale timer down to avoid uint64_t -> double conversion in RV32 + int scale = 256; + uint32_t delta = ticks / scale; + uint32_t freq = get_timer_freq() / scale; + return delta / (double)freq; +} diff --git a/software/coremark/core_portme.h b/software/coremark/core_portme.h new file mode 100644 index 0000000..82298d6 --- /dev/null +++ b/software/coremark/core_portme.h @@ -0,0 +1,58 @@ +#ifndef FESDK_CORE_PORTME_H +#define FESDK_CORE_PORTME_H + +#include <stdint.h> +#include <stddef.h> + +#define HAS_FLOAT 1 +#define HAS_TIME_H 1 +#define USE_CLOCK 1 +#define HAS_STDIO 1 +#define HAS_PRINTF 1 +#define SEED_METHOD SEED_VOLATILE +#define CORE_TICKS uint64_t +#define ee_u8 uint8_t +#define ee_u16 uint16_t +#define ee_u32 uint32_t +#define ee_s16 int16_t +#define ee_s32 int32_t +#define ee_ptr_int uintptr_t +#define ee_size_t size_t +#define COMPILER_FLAGS FLAGS_STR + +#define align_mem(x) (void *)(((ee_ptr_int)(x) + sizeof(ee_u32) - 1) & -sizeof(ee_u32)) + +#ifdef __GNUC__ +# define COMPILER_VERSION "GCC"__VERSION__ +#else +# error +#endif + +#define MEM_METHOD MEM_STACK +#define MEM_LOCATION "STACK" + +#define MAIN_HAS_NOARGC 0 +#define MAIN_HAS_NORETURN 0 + +#define MULTITHREAD 1 +#define USE_PTHREAD 0 +#define USE_FORK 0 +#define USE_SOCKET 0 + +#define default_num_contexts MULTITHREAD + +typedef int core_portable; +static void portable_init(core_portable *p, int *argc, char *argv[]) {} +static void portable_fini(core_portable *p) {} + +#if !defined(PROFILE_RUN) && !defined(PERFORMANCE_RUN) && !defined(VALIDATION_RUN) +#if (TOTAL_DATA_SIZE==1200) +#define PROFILE_RUN 1 +#elif (TOTAL_DATA_SIZE==2000) +#define PERFORMANCE_RUN 1 +#else +#define VALIDATION_RUN 1 +#endif +#endif + +#endif diff --git a/software/dhrystone/Makefile b/software/dhrystone/Makefile index a55b1ec..78a7b23 100644 --- a/software/dhrystone/Makefile +++ b/software/dhrystone/Makefile @@ -5,7 +5,7 @@ C_SRCS := dhry_stubs.c dhry_printf.c HEADERS := dhry.h DHRY_SRCS := dhry_1.c dhry_2.c -DHRY_CFLAGS := -O2 -DTIME -fno-inline -fno-builtin-printf -Wno-implicit +DHRY_CFLAGS := -O2 -DTIME -fno-inline -fno-builtin-printf -Wno-implicit -march=rv32ima XLEN ?= 32 CFLAGS := -Os -fno-common diff --git a/software/dhrystone/dhry_stubs.c b/software/dhrystone/dhry_stubs.c index d3bd14c..0616f86 100644 --- a/software/dhrystone/dhry_stubs.c +++ b/software/dhrystone/dhry_stubs.c @@ -3,16 +3,13 @@ /* The functions in this file are only meant to support Dhrystone on an * embedded RV32 system and are obviously incorrect in general. */ -// return the cycle counter as though it were the current time long time(void) { - long t; - asm volatile ("csrr %0, mcycle" : "=r" (t)); - return t / (get_cpu_freq() / 1000); + return get_timer_value() / get_timer_freq(); } // set the number of dhrystone iterations void __wrap_scanf(const char* fmt, int* n) { - *n = 1500000; + *n = 100000000; } |