summaryrefslogtreecommitdiff
path: root/software/performance_counters/performance_counters.c
blob: e7c12a3436177ac7debcf93ecd52a82eea2b5d88 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
// See LICENSE for license details.

// This demo shows how to use basic
// RISC-V profiling counters, mcycle
// (counts the number of processor cycles)
// and minstret (counts the number of retired instructions). 
// Note that both are writable as well.

#include <stdio.h>

// The CSR encodings are in this header.
#include "encoding.h"

// The mcycle counter is 64-bit counter, but since
// Freedom E platforms use RV32, we must access it as
// 2 32-bit registers. At 256MHz, the lower bits will
// roll over approx. every 5 seconds, so we check for
// rollover with this routine as suggested by the
// RISC-V Priviledged Architecture Specification.

#define rdmcycle(x)  {				       \
    uint32_t lo, hi, hi2;			       \
    __asm__ __volatile__ ("1:\n\t"		       \
			  "csrr %0, mcycleh\n\t"       \
			  "csrr %1, mcycle\n\t"	       \
			  "csrr %2, mcycleh\n\t"       \
			  "bne  %0, %2, 1b\n\t"			\
			  : "=r" (hi), "=r" (lo), "=r" (hi2)) ;	\
    *(x) = lo | ((uint64_t) hi << 32); 				\
  }


// The minstret counter is 64-bit counter, but
// Freedom E platforms use RV32, we must access it as
// 2 32-bit registers, same as for mcycle.

#define rdminstret(x)  {			       \
    uint32_t lo, hi, hi2;			       \
    __asm__ __volatile__ ("1:\n\t"		       \
			  "csrr %0, minstreth\n\t"       \
			  "csrr %1, minstret\n\t"	       \
			  "csrr %2, minstreth\n\t"       \
			  "bne  %0, %2, 1b\n\t"			\
			  : "=r" (hi), "=r" (lo), "=r" (hi2)) ;	\
    *(x) = lo | ((uint64_t) hi << 32); 				\
  }

// Simple program to measure the performance of.

int factorial(int i){

  int result = 1;
  for (int ii = 1; ii <= i; ii++) {
    result = result * i;
  }

  return result;
  
}


int main()
{

  uint64_t before_cycle;
  uint64_t before_instret;
  
  uint64_t after_cycle;
  uint64_t after_instret;
    
  printf("\n\nDemo 1: Using Counter Differences.\n");
  
  for (int ii = 0; ii < 3; ii++){
    rdmcycle(&before_cycle);
    rdminstret(&before_instret);

    volatile int result = factorial (100);
    
    rdmcycle(&after_cycle);
    rdminstret(&after_instret);
    
    printf("Loop %d: Retired %d instructions in %d cycles\n",
	   ii,
	   (uint32_t)(after_instret - before_instret), 
	   (uint32_t)(after_cycle - before_cycle));
  }

  printf("\n\nDemo 2: Clearing Counters, Using Values Directly.\n");

  for (int ii = 0; ii < 3; ii++){

    write_csr(mcycle,  0);
    write_csr(mcycleh, 0);
    write_csr(minstret, 0);
    write_csr(minstreth, 0);
    
    volatile int result = factorial (100);
    
    rdmcycle(&after_cycle);
    rdminstret(&after_instret);
    
    printf("Loop %d: Retired %d instructions in %d cycles\n",
	   ii,
	   (uint32_t)(after_instret),
	   (uint32_t)(after_cycle));

  }

  return 0;

}