File: hsatimer.cpp

package info (click to toggle)
rocr-runtime 6.4.3%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 12,928 kB
  • sloc: cpp: 126,824; ansic: 41,837; lisp: 1,225; asm: 905; sh: 452; python: 117; makefile: 59
file content (190 lines) | stat: -rw-r--r-- 3,865 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
#include "hsatimer.h"

PerfTimer::PerfTimer()
{
    freq_in_100mhz = MeasureTSCFreqHz();
}

PerfTimer::~PerfTimer()
{
	while(!_timers.empty())
	{
		Timer *temp = _timers.back();
		_timers.pop_back();
		delete temp;
	}
}

//a new cretaed timer instantance index will be returned
int PerfTimer::CreateTimer()
{
    Timer *newTimer = new Timer;
	newTimer->_start = 0;
	newTimer->_clocks = 0;

#ifdef _WIN32
    QueryPerformanceFrequency((LARGE_INTEGER*)&newTimer->_freq);       
#else
	newTimer->_freq = (long long)1.0E3;
#endif

	/* Push back the address of new Timer instance created */
	_timers.push_back(newTimer);
	return (int)(_timers.size() - 1);
}

int PerfTimer::StartTimer(int index)
{
	if(index >= (int)_timers.size())
	{
		Error("Cannot reset timer. Invalid handle.");
		return HSA_FAILURE;
	}
	
#ifdef _WIN32
        // General Windows timing method
       #ifndef _AMD
	long long tmpStart;
	QueryPerformanceCounter((LARGE_INTEGER*)&(tmpStart));
	_timers[index]->_start = (double)tmpStart;
       #else
       // AMD Windows timing method      

       #endif
	   
#else
       // General Linux timing method
      #ifndef _AMD
	struct timeval s;
	gettimeofday(&s, 0);
	_timers[index]->_start = s.tv_sec * 1.0E3 + ((double)(s.tv_usec / 1.0E3)); 
       #else

       // AMD timing method

	unsigned int unused;
	_timers[index]->_start = __rdtscp(&unused);

       #endif
	   
#endif

	return HSA_SUCCESS;
}


int PerfTimer::StopTimer(int index)
{
	double n=0;
	if(index >= (int)_timers.size())
	{
		Error("Cannot reset timer. Invalid handle.");
		return HSA_FAILURE;
	}
#ifdef _WIN32
       #ifndef _AMD
	long long n1;
	QueryPerformanceCounter((LARGE_INTEGER*)&(n1));
	n = (double) n1;
	#else
	
        // AMD Window Timing
        
	#endif
	
#else
        // General Linux timing method
        #ifndef _AMD
	struct timeval s;
	gettimeofday(&s, 0);
	n = s.tv_sec * 1.0E3+ (double)(s.tv_usec/1.0E3);
	#else
       // AMD Linux timing

	unsigned int unused;
	n = __rdtscp(&unused);
	#endif
	
#endif

	n -= _timers[index]->_start;
	_timers[index]->_start = 0;

	#ifndef _AMD
	_timers[index]->_clocks += n;
	#else
        //_timers[index]->_clocks += 10 * n /freq_in_100mhz;      // unit is ns
	_timers[index]->_clocks += 1.0E-6 * 10  * n /freq_in_100mhz;  // convert to ms
	cout << "_AMD is enabled!!!" << endl;
	#endif
	
	return HSA_SUCCESS;
}

void PerfTimer::Error(string str)
{
    cout << str << endl;
}


double PerfTimer::ReadTimer(int index)
{

	if(index >= (int)_timers.size())
	{
		Error("Cannot read timer. Invalid handle.");
		return HSA_FAILURE;
	}
	
	double reading = double(_timers[index]->_clocks);
	
	reading = double(reading / _timers[index]->_freq);
	
	return reading;
}


uint64_t PerfTimer::CoarseTimestampUs() 
{
#ifdef _WIN32
	uint64_t freqHz, ticks;
	QueryPerformanceFrequency((LARGE_INTEGER *)&freqHz);
	QueryPerformanceCounter((LARGE_INTEGER *)&ticks);

	// Scale numerator and divisor until (ticks * 1000000) fits in uint64_t.
	while (ticks > (1ULL << 44)) {
		ticks /= 16;
		freqHz /= 16;
	}

	return (ticks * 1000000) / freqHz;
#else
	struct timespec ts;
	clock_gettime(CLOCK_MONOTONIC_RAW, &ts); 
	return uint64_t(ts.tv_sec) * 1000000 + ts.tv_nsec / 1000;
#endif
}

uint64_t PerfTimer::MeasureTSCFreqHz() 
{
	// Make a coarse interval measurement of TSC ticks for 1 gigacycles.
	unsigned int unused;
	uint64_t tscTicksEnd;

	uint64_t coarseBeginUs = CoarseTimestampUs();
	uint64_t tscTicksBegin = __rdtscp(&unused);
	do 
	{
		tscTicksEnd = __rdtscp(&unused);
	} 
	while (tscTicksEnd - tscTicksBegin < 1000000000);
	
	uint64_t coarseEndUs = CoarseTimestampUs();

	// Compute the TSC frequency and round to nearest 100MHz.
	uint64_t coarseIntervalNs = (coarseEndUs - coarseBeginUs) * 1000;
	uint64_t tscIntervalTicks = tscTicksEnd - tscTicksBegin;
	return (tscIntervalTicks * 10 + (coarseIntervalNs / 2)) / coarseIntervalNs;
}