File: gpu_array_tile.h

package info (click to toggle)

llvm-toolchain-13 1%3A13.0.1-11

links: PTS, VCS
area: main
in suites: bookworm
size: 1,418,840 kB
sloc: cpp: 5,290,826; ansic: 996,570; asm: 544,593; python: 188,212; objc: 72,027; lisp: 30,291; f90: 25,395; sh: 24,898; javascript: 9,780; pascal: 9,398; perl: 7,484; ml: 5,432; awk: 3,523; makefile: 2,913; xml: 953; cs: 573; fortran: 539

file content (59 lines) | stat: -rw-r--r-- 1,657 bytes

parent folder | download | duplicates (16)

#ifndef GPU_ARRAY_TILE_H
#define GPU_ARRAY_TILE_H

#include <isl/aff_type.h>
#include <isl/map_type.h>
#include <isl/val.h>

/* The fields stride and shift only contain valid information
 * if shift != NULL.
 * If so, they express that current index is such that if you add shift,
 * then the result is always a multiple of stride.
 * Let D represent the initial tile->depth dimensions of the computed schedule.
 * The spaces of "lb" and "shift" are of the form
 *
 *	D -> [b]
 */
struct gpu_array_bound {
	isl_val *size;
	isl_aff *lb;

	isl_val *stride;
	isl_aff *shift;
};

/* A tile of an outer array.
 *
 * requires_unroll is set if the schedule dimensions that are mapped
 * to threads need to be unrolled for this (private) tile to be used.
 *
 * "depth" reflects the number of schedule dimensions that affect the tile.
 * The copying into and/or out of the tile is performed at that depth.
 *
 * n is the dimension of the array.
 * bound is an array of size "n" representing the lower bound
 *	and size for each index.
 *
 * tiling maps a tile in the global array to the corresponding
 * shared/private memory tile and is of the form
 *
 *	{ [D[i] -> A[a]] -> T[(a + shift(i))/stride - lb(i)] }
 *
 * where D represents the initial "depth" dimensions
 * of the computed schedule.
 */
struct gpu_array_tile {
	isl_ctx *ctx;
	int requires_unroll;
	int depth;
	int n;
	struct gpu_array_bound *bound;
	isl_multi_aff *tiling;
};

struct gpu_array_tile *gpu_array_tile_create(isl_ctx *ctx, int n_index);
struct gpu_array_tile *gpu_array_tile_free(struct gpu_array_tile *tile);

__isl_give isl_val *gpu_array_tile_size(struct gpu_array_tile *tile);

#endif