File: papi_bipartite.h

package info (click to toggle)
papi 5.7.0+dfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, buster, sid
  • size: 9,856 kB
  • sloc: ansic: 93,265; fortran: 3,338; xml: 2,460; makefile: 815; sh: 290
file content (189 lines) | stat: -rw-r--r-- 6,344 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
/*
* File:    papi_bipartite.h
* Author:  Dan Terpstra
*          terpstra@eecs.utk.edu
* Mods:    
*          
*/
/* This file contains one function: _papi_bipartite_alloc()
   Its role is to act as an execution harness for implementing a recursive
   Modified Bipartite Graph allocation of counter resources for those
   platforms that don't have built-in smart counter allocation.
   It is intended to be #included in the cpu component source to minimize
   other disruption to the build process.
   
   This routine presumes the existence of a half dozen "bpt_" helper routines. 
   Prototypes for these routines are given below.
 
    success  return 1
    fail     return 0
*/

/* This function examines the event to determine
    if it can be mapped to counter ctr.
    Returns true if it can, false if it can't. */
static int
_bpt_map_avail( hwd_reg_alloc_t * dst, int ctr );

/* This function forces the event to
    be mapped to only counter ctr.
    Returns nothing.  */
static void
_bpt_map_set( hwd_reg_alloc_t * dst, int ctr );

/* This function examines the event to determine
   if it has a single exclusive mapping.
   Returns true if exlusive, false if non-exclusive.  */
static int
_bpt_map_exclusive( hwd_reg_alloc_t * dst );

/* This function compares the dst and src events
    to determine if any resources are shared. Typically the src event
    is exclusive, so this detects a conflict if true.
    Returns true if conflict, false if no conflict.  */
static int
_bpt_map_shared( hwd_reg_alloc_t * dst, hwd_reg_alloc_t * src );

/* This function removes shared resources available to the src event
    from the resources available to the dst event,
    and reduces the rank of the dst event accordingly. Typically,
    the src event will be exclusive, but the code shouldn't assume it.
    Returns nothing.  */
static void
_bpt_map_preempt( hwd_reg_alloc_t * dst, hwd_reg_alloc_t * src );

static void
_bpt_map_update( hwd_reg_alloc_t * dst, hwd_reg_alloc_t * src );


static int
_papi_bipartite_alloc( hwd_reg_alloc_t * event_list, int count, int cidx )
{
	int i, j;
	char *ptr = ( char * ) event_list;
	int idx_q[count];				   /* queue of indexes of lowest rank events */
	int map_q[count];				   /* queue of mapped events (TRUE if mapped) */
	int head, tail;
	int size = _papi_hwd[cidx]->size.reg_alloc;

	/* build a queue of indexes to all events 
	   that live on one counter only (rank == 1) */
	head = 0;				 /* points to top of queue */
	tail = 0;				 /* points to bottom of queue */
	for ( i = 0; i < count; i++ ) {
		map_q[i] = 0;
		if ( _bpt_map_exclusive( ( hwd_reg_alloc_t * ) & ptr[size * i] ) )
			idx_q[tail++] = i;
	}
	/* scan the single counter queue looking for events that share counters.
	   If two events can live only on one counter, return failure.
	   If the second event lives on more than one counter, remove shared counter
	   from its selector and reduce its rank. 
	   Mark first event as mapped to its counter. */
	while ( head < tail ) {
		for ( i = 0; i < count; i++ ) {
			if ( i != idx_q[head] ) {
				if ( _bpt_map_shared( ( hwd_reg_alloc_t * ) & ptr[size * i],
									 ( hwd_reg_alloc_t * ) & ptr[size *
																 idx_q
																 [head]] ) ) {
					/* both share a counter; if second is exclusive, mapping fails */
					if ( _bpt_map_exclusive( ( hwd_reg_alloc_t * ) &
											ptr[size * i] ) )
						return 0;
					else {
						_bpt_map_preempt( ( hwd_reg_alloc_t * ) &
											 ptr[size * i],
											 ( hwd_reg_alloc_t * ) & ptr[size *
																		 idx_q
																		 [head]] );
						if ( _bpt_map_exclusive( ( hwd_reg_alloc_t * ) &
												ptr[size * i] ) )
							idx_q[tail++] = i;
					}
				}
			}
		}
		map_q[idx_q[head]] = 1;	/* mark this event as mapped */
		head++;
	}
	if ( tail == count ) {
		return 1;			 /* idx_q includes all events; everything is successfully mapped */
	} else {
		char *rest_event_list;
		char *copy_rest_event_list;
		int remainder;

		rest_event_list =
			papi_calloc(  _papi_hwd[cidx]->cmp_info.num_cntrs, 
				      size );

		copy_rest_event_list =
		        papi_calloc( _papi_hwd[cidx]->cmp_info.num_cntrs,
				     size );

		if ( !rest_event_list || !copy_rest_event_list ) {
			if ( rest_event_list )
				papi_free( rest_event_list );
			if ( copy_rest_event_list )
				papi_free( copy_rest_event_list );
			return ( 0 );
		}

		/* copy all unmapped events to a second list and make a backup */
		for ( i = 0, j = 0; i < count; i++ ) {
			if ( map_q[i] == 0 ) {
				memcpy( &copy_rest_event_list[size * j++], &ptr[size * i],
						( size_t ) size );
			}
		}
		remainder = j;

		memcpy( rest_event_list, copy_rest_event_list,
				( size_t ) size * ( size_t ) remainder );

		/* try each possible mapping until you fail or find one that works */
		for ( i = 0; i < _papi_hwd[cidx]->cmp_info.num_cntrs; i++ ) {
			/* for the first unmapped event, try every possible counter */
			if ( _bpt_map_avail( ( hwd_reg_alloc_t * ) rest_event_list, i ) ) {
				 _bpt_map_set( ( hwd_reg_alloc_t * ) rest_event_list, i );
				/* remove selected counter from all other unmapped events */
				for ( j = 1; j < remainder; j++ ) {
					if ( _bpt_map_shared( ( hwd_reg_alloc_t * ) &
										 rest_event_list[size * j],
										 ( hwd_reg_alloc_t * )
										 rest_event_list ) )
						_bpt_map_preempt( ( hwd_reg_alloc_t * ) &
											 rest_event_list[size * j],
											 ( hwd_reg_alloc_t * )
											 rest_event_list );
				}
				/* if recursive call to allocation works, break out of the loop */
				if ( _papi_bipartite_alloc
					 ( ( hwd_reg_alloc_t * ) rest_event_list, remainder,
					   cidx ) )
					break;

				/* recursive mapping failed; copy the backup list and try the next combination */
				memcpy( rest_event_list, copy_rest_event_list,
						( size_t ) size * ( size_t ) remainder );
			}
		}
		if ( i == _papi_hwd[cidx]->cmp_info.num_cntrs ) {
			papi_free( rest_event_list );
			papi_free( copy_rest_event_list );
			return 0;		 /* fail to find mapping */
		}
		for ( i = 0, j = 0; i < count; i++ ) {
			if ( map_q[i] == 0 )
				_bpt_map_update( ( hwd_reg_alloc_t * ) & ptr[size * i],
									( hwd_reg_alloc_t * ) & rest_event_list[size
																			*
																			j++] );
		}
		papi_free( rest_event_list );
		papi_free( copy_rest_event_list );
		return 1;
	}
}