1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193
|
//===- File.cpp - Parsing sparse tensors from files -----------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements parsing and printing of files in one of the
// following external formats:
//
// (1) Matrix Market Exchange (MME): *.mtx
// https://math.nist.gov/MatrixMarket/formats.html
//
// (2) Formidable Repository of Open Sparse Tensors and Tools (FROSTT): *.tns
// http://frostt.io/tensors/file-formats.html
//
// This file is part of the lightweight runtime support library for sparse
// tensor manipulations. The functionality of the support library is meant
// to simplify benchmarking, testing, and debugging MLIR code operating on
// sparse tensors. However, the provided functionality is **not** part of
// core MLIR itself.
//
//===----------------------------------------------------------------------===//
#include "mlir/ExecutionEngine/SparseTensor/File.h"
#include <cctype>
#include <cstring>
using namespace mlir::sparse_tensor;
/// Opens the file for reading.
void SparseTensorReader::openFile() {
if (file)
MLIR_SPARSETENSOR_FATAL("Already opened file %s\n", filename);
file = fopen(filename, "r");
if (!file)
MLIR_SPARSETENSOR_FATAL("Cannot find file %s\n", filename);
}
/// Closes the file.
void SparseTensorReader::closeFile() {
if (file) {
fclose(file);
file = nullptr;
}
}
/// Attempts to read a line from the file.
void SparseTensorReader::readLine() {
if (!fgets(line, kColWidth, file))
MLIR_SPARSETENSOR_FATAL("Cannot read next line of %s\n", filename);
}
/// Reads and parses the file's header.
void SparseTensorReader::readHeader() {
assert(file && "Attempt to readHeader() before openFile()");
if (strstr(filename, ".mtx"))
readMMEHeader();
else if (strstr(filename, ".tns"))
readExtFROSTTHeader();
else
MLIR_SPARSETENSOR_FATAL("Unknown format %s\n", filename);
assert(isValid() && "Failed to read the header");
}
/// Asserts the shape subsumes the actual dimension sizes. Is only
/// valid after parsing the header.
void SparseTensorReader::assertMatchesShape(uint64_t rank,
const uint64_t *shape) const {
assert(rank == getRank() && "Rank mismatch");
for (uint64_t r = 0; r < rank; ++r)
assert((shape[r] == 0 || shape[r] == idata[2 + r]) &&
"Dimension size mismatch");
}
bool SparseTensorReader::canReadAs(PrimaryType valTy) const {
switch (valueKind_) {
case ValueKind::kInvalid:
assert(false && "Must readHeader() before calling canReadAs()");
return false; // In case assertions are disabled.
case ValueKind::kPattern:
return true;
case ValueKind::kInteger:
// When the file is specified to store integer values, we still
// allow implicitly converting those to floating primary-types.
return isRealPrimaryType(valTy);
case ValueKind::kReal:
// When the file is specified to store real/floating values, then
// we disallow implicit conversion to integer primary-types.
return isFloatingPrimaryType(valTy);
case ValueKind::kComplex:
// When the file is specified to store complex values, then we
// require a complex primary-type.
return isComplexPrimaryType(valTy);
case ValueKind::kUndefined:
// The "extended" FROSTT format doesn't specify a ValueKind.
// So we allow implicitly converting the stored values to both
// integer and floating primary-types.
return isRealPrimaryType(valTy);
}
MLIR_SPARSETENSOR_FATAL("Unknown ValueKind: %d\n",
static_cast<uint8_t>(valueKind_));
}
/// Helper to convert C-style strings (i.e., '\0' terminated) to lower case.
static inline void toLower(char *token) {
for (char *c = token; *c; ++c)
*c = tolower(*c);
}
/// Idiomatic name for checking string equality.
static inline bool streq(const char *lhs, const char *rhs) {
return strcmp(lhs, rhs) == 0;
}
/// Idiomatic name for checking string inequality.
static inline bool strne(const char *lhs, const char *rhs) {
return strcmp(lhs, rhs); // aka `!= 0`
}
/// Read the MME header of a general sparse matrix of type real.
void SparseTensorReader::readMMEHeader() {
char header[64];
char object[64];
char format[64];
char field[64];
char symmetry[64];
// Read header line.
if (fscanf(file, "%63s %63s %63s %63s %63s\n", header, object, format, field,
symmetry) != 5)
MLIR_SPARSETENSOR_FATAL("Corrupt header in %s\n", filename);
// Convert all to lowercase up front (to avoid accidental redundancy).
toLower(header);
toLower(object);
toLower(format);
toLower(field);
toLower(symmetry);
// Process `field`, which specify pattern or the data type of the values.
if (streq(field, "pattern"))
valueKind_ = ValueKind::kPattern;
else if (streq(field, "real"))
valueKind_ = ValueKind::kReal;
else if (streq(field, "integer"))
valueKind_ = ValueKind::kInteger;
else if (streq(field, "complex"))
valueKind_ = ValueKind::kComplex;
else
MLIR_SPARSETENSOR_FATAL("Unexpected header field value in %s\n", filename);
// Set properties.
isSymmetric_ = streq(symmetry, "symmetric");
// Make sure this is a general sparse matrix.
if (strne(header, "%%matrixmarket") || strne(object, "matrix") ||
strne(format, "coordinate") ||
(strne(symmetry, "general") && !isSymmetric_))
MLIR_SPARSETENSOR_FATAL("Cannot find a general sparse matrix in %s\n",
filename);
// Skip comments.
while (true) {
readLine();
if (line[0] != '%')
break;
}
// Next line contains M N NNZ.
idata[0] = 2; // rank
if (sscanf(line, "%" PRIu64 "%" PRIu64 "%" PRIu64 "\n", idata + 2, idata + 3,
idata + 1) != 3)
MLIR_SPARSETENSOR_FATAL("Cannot find size in %s\n", filename);
}
/// Read the "extended" FROSTT header. Although not part of the documented
/// format, we assume that the file starts with optional comments followed
/// by two lines that define the rank, the number of nonzeros, and the
/// dimensions sizes (one per rank) of the sparse tensor.
void SparseTensorReader::readExtFROSTTHeader() {
// Skip comments.
while (true) {
readLine();
if (line[0] != '#')
break;
}
// Next line contains RANK and NNZ.
if (sscanf(line, "%" PRIu64 "%" PRIu64 "\n", idata, idata + 1) != 2)
MLIR_SPARSETENSOR_FATAL("Cannot find metadata in %s\n", filename);
// Followed by a line with the dimension sizes (one per rank).
for (uint64_t r = 0; r < idata[0]; ++r)
if (fscanf(file, "%" PRIu64, idata + 2 + r) != 1)
MLIR_SPARSETENSOR_FATAL("Cannot find dimension size %s\n", filename);
readLine(); // end of line
// The FROSTT format does not define the data type of the nonzero elements.
valueKind_ = ValueKind::kUndefined;
}
|