File: halide_image_io.h

package info (click to toggle)
halide 21.0.0-4
links: PTS, VCS
area: main
in suites: forky, sid
size: 55,752 kB
sloc: cpp: 289,334; ansic: 22,751; python: 7,486; makefile: 4,299; sh: 2,508; java: 1,549; javascript: 282; pascal: 207; xml: 127; asm: 9
file content (2794 lines) | stat: -rw-r--r-- 91,166 bytes
parent folder | download | duplicates (3)
// This simple IO library works the Halide::Buffer<T> type or any
// other image type with the same API.

#ifndef HALIDE_IMAGE_IO_H
#define HALIDE_IMAGE_IO_H

#include <algorithm>
#include <cctype>
#include <cmath>
#include <cstdarg>
#include <cstddef>
#include <cstdio>
#include <cstdlib>
#include <functional>
#include <map>
#include <set>
#include <string>
#include <vector>

#ifndef HALIDE_NO_PNG
#include "png.h"
#endif

#ifndef HALIDE_NO_JPEG
#ifdef _WIN32
#ifndef NOMINMAX
#define NOMINMAX
#endif
#include <windows.h>
#endif
#include "jpeglib.h"
#endif

#include "HalideRuntime.h"  // for halide_type_t

namespace Halide {
namespace Tools {

struct FormatInfo {
    halide_type_t type;
    int dimensions;

    bool operator<(const FormatInfo &other) const {
        if (type.code < other.type.code) {
            return true;
        } else if (type.code > other.type.code) {
            return false;
        }
        if (type.bits < other.type.bits) {
            return true;
        } else if (type.bits > other.type.bits) {
            return false;
        }
        if (type.lanes < other.type.lanes) {
            return true;
        } else if (type.lanes > other.type.lanes) {
            return false;
        }
        return (dimensions < other.dimensions);
    }
};

namespace Internal {

typedef bool (*CheckFunc)(bool condition, const char *msg);

inline bool CheckFail(bool condition, const char *msg) {
    if (!condition) {
        fprintf(stderr, "%s\n", msg);
        abort();
    }
    return condition;
}

inline bool CheckReturn(bool condition, const char *msg) {
    return condition;
}

template<typename To, typename From>
To convert(const From &from);

// Convert to bool
template<>
inline bool convert(const bool &in) {
    return in;
}
template<>
inline bool convert(const uint8_t &in) {
    return in != 0;
}
template<>
inline bool convert(const uint16_t &in) {
    return in != 0;
}
template<>
inline bool convert(const uint32_t &in) {
    return in != 0;
}
template<>
inline bool convert(const uint64_t &in) {
    return in != 0;
}
template<>
inline bool convert(const int8_t &in) {
    return in != 0;
}
template<>
inline bool convert(const int16_t &in) {
    return in != 0;
}
template<>
inline bool convert(const int32_t &in) {
    return in != 0;
}
template<>
inline bool convert(const int64_t &in) {
    return in != 0;
}
#ifdef HALIDE_CPP_COMPILER_HAS_FLOAT16
template<>
inline bool convert(const _Float16 &in) {
    return (float)in != 0;
}
#endif  // HALIDE_CPP_COMPILER_HAS_FLOAT16
template<>
inline bool convert(const float &in) {
    return in != 0;
}
template<>
inline bool convert(const double &in) {
    return in != 0;
}

// Convert to u8
template<>
inline uint8_t convert(const bool &in) {
    return in;
}
template<>
inline uint8_t convert(const uint8_t &in) {
    return in;
}
template<>
inline uint8_t convert(const uint16_t &in) {
    uint32_t tmp = (uint32_t)(in) + 0x80;
    // Fast approximation of div-by-257: see http://research.swtch.com/divmult
    return ((tmp * 255 + 255) >> 16);
}
template<>
inline uint8_t convert(const uint32_t &in) {
    return (uint8_t)((((uint64_t)in) + 0x00808080) / 0x01010101);
}
// uint64 -> 8 just discards the lower 32 bits: if you were expecting more precision, well, sorry
template<>
inline uint8_t convert(const uint64_t &in) {
    return convert<uint8_t, uint32_t>(uint32_t(in >> 32));
}
template<>
inline uint8_t convert(const int8_t &in) {
    return convert<uint8_t, uint8_t>(in);
}
template<>
inline uint8_t convert(const int16_t &in) {
    return convert<uint8_t, uint16_t>(in);
}
template<>
inline uint8_t convert(const int32_t &in) {
    return convert<uint8_t, uint32_t>(in);
}
template<>
inline uint8_t convert(const int64_t &in) {
    return convert<uint8_t, uint64_t>(in);
}
#ifdef HALIDE_CPP_COMPILER_HAS_FLOAT16
template<>
inline uint8_t convert(const _Float16 &in) {
    return (uint8_t)std::lround((float)in * 255.0f);
}
#endif  // HALIDE_CPP_COMPILER_HAS_FLOAT16
template<>
inline uint8_t convert(const float &in) {
    return (uint8_t)std::lround(in * 255.0f);
}
template<>
inline uint8_t convert(const double &in) {
    return (uint8_t)std::lround(in * 255.0);
}

// Convert to u16
template<>
inline uint16_t convert(const bool &in) {
    return in;
}
template<>
inline uint16_t convert(const uint8_t &in) {
    return uint16_t(in) * 0x0101;
}
template<>
inline uint16_t convert(const uint16_t &in) {
    return in;
}
template<>
inline uint16_t convert(const uint32_t &in) {
    return in >> 16;
}
template<>
inline uint16_t convert(const uint64_t &in) {
    return in >> 48;
}
template<>
inline uint16_t convert(const int8_t &in) {
    return convert<uint16_t, uint8_t>(in);
}
template<>
inline uint16_t convert(const int16_t &in) {
    return convert<uint16_t, uint16_t>(in);
}
template<>
inline uint16_t convert(const int32_t &in) {
    return convert<uint16_t, uint32_t>(in);
}
template<>
inline uint16_t convert(const int64_t &in) {
    return convert<uint16_t, uint64_t>(in);
}
#ifdef HALIDE_CPP_COMPILER_HAS_FLOAT16
template<>
inline uint16_t convert(const _Float16 &in) {
    return (uint16_t)std::lround((float)in * 65535.0f);
}
#endif  // HALIDE_CPP_COMPILER_HAS_FLOAT16
template<>
inline uint16_t convert(const float &in) {
    return (uint16_t)std::lround(in * 65535.0f);
}
template<>
inline uint16_t convert(const double &in) {
    return (uint16_t)std::lround(in * 65535.0);
}

// Convert to u32
template<>
inline uint32_t convert(const bool &in) {
    return in;
}
template<>
inline uint32_t convert(const uint8_t &in) {
    return uint32_t(in) * 0x01010101;
}
template<>
inline uint32_t convert(const uint16_t &in) {
    return uint32_t(in) * 0x00010001;
}
template<>
inline uint32_t convert(const uint32_t &in) {
    return in;
}
template<>
inline uint32_t convert(const uint64_t &in) {
    return (uint32_t)(in >> 32);
}
template<>
inline uint32_t convert(const int8_t &in) {
    return convert<uint32_t, uint8_t>(in);
}
template<>
inline uint32_t convert(const int16_t &in) {
    return convert<uint32_t, uint16_t>(in);
}
template<>
inline uint32_t convert(const int32_t &in) {
    return convert<uint32_t, uint32_t>(in);
}
template<>
inline uint32_t convert(const int64_t &in) {
    return convert<uint32_t, uint64_t>(in);
}
#ifdef HALIDE_CPP_COMPILER_HAS_FLOAT16
template<>
inline uint32_t convert(const _Float16 &in) {
    return (uint32_t)std::llround((float)in * 4294967295.0);
}
#endif  // HALIDE_CPP_COMPILER_HAS_FLOAT16
template<>
inline uint32_t convert(const float &in) {
    return (uint32_t)std::llround(in * 4294967295.0);
}
template<>
inline uint32_t convert(const double &in) {
    return (uint32_t)std::llround(in * 4294967295.0);
}

// Convert to u64
template<>
inline uint64_t convert(const bool &in) {
    return in;
}
template<>
inline uint64_t convert(const uint8_t &in) {
    return uint64_t(in) * 0x0101010101010101LL;
}
template<>
inline uint64_t convert(const uint16_t &in) {
    return uint64_t(in) * 0x0001000100010001LL;
}
template<>
inline uint64_t convert(const uint32_t &in) {
    return uint64_t(in) * 0x0000000100000001LL;
}
template<>
inline uint64_t convert(const uint64_t &in) {
    return in;
}
template<>
inline uint64_t convert(const int8_t &in) {
    return convert<uint64_t, uint8_t>(in);
}
template<>
inline uint64_t convert(const int16_t &in) {
    return convert<uint64_t, uint16_t>(in);
}
template<>
inline uint64_t convert(const int32_t &in) {
    return convert<uint64_t, uint64_t>(in);
}
template<>
inline uint64_t convert(const int64_t &in) {
    return convert<uint64_t, uint64_t>(in);
}
#ifdef HALIDE_CPP_COMPILER_HAS_FLOAT16
template<>
inline uint64_t convert(const _Float16 &in) {
    return convert<uint64_t, uint32_t>((uint32_t)std::llround((float)in * 4294967295.0));
}
#endif  // HALIDE_CPP_COMPILER_HAS_FLOAT16
template<>
inline uint64_t convert(const float &in) {
    return convert<uint64_t, uint32_t>((uint32_t)std::llround(in * 4294967295.0));
}
template<>
inline uint64_t convert(const double &in) {
    return convert<uint64_t, uint32_t>((uint32_t)std::llround(in * 4294967295.0));
}

// Convert to i8
template<>
inline int8_t convert(const bool &in) {
    return in;
}
template<>
inline int8_t convert(const uint8_t &in) {
    return convert<uint8_t, uint8_t>(in);
}
template<>
inline int8_t convert(const uint16_t &in) {
    return convert<uint8_t, uint16_t>(in);
}
template<>
inline int8_t convert(const uint32_t &in) {
    return convert<uint8_t, uint32_t>(in);
}
template<>
inline int8_t convert(const uint64_t &in) {
    return convert<uint8_t, uint64_t>(in);
}
template<>
inline int8_t convert(const int8_t &in) {
    return convert<uint8_t, int8_t>(in);
}
template<>
inline int8_t convert(const int16_t &in) {
    return convert<uint8_t, int16_t>(in);
}
template<>
inline int8_t convert(const int32_t &in) {
    return convert<uint8_t, int32_t>(in);
}
template<>
inline int8_t convert(const int64_t &in) {
    return convert<uint8_t, int64_t>(in);
}
#ifdef HALIDE_CPP_COMPILER_HAS_FLOAT16
template<>
inline int8_t convert(const _Float16 &in) {
    return convert<uint8_t, float>((float)in);
}
#endif  // HALIDE_CPP_COMPILER_HAS_FLOAT16
template<>
inline int8_t convert(const float &in) {
    return convert<uint8_t, float>(in);
}
template<>
inline int8_t convert(const double &in) {
    return convert<uint8_t, double>(in);
}

// Convert to i16
template<>
inline int16_t convert(const bool &in) {
    return in;
}
template<>
inline int16_t convert(const uint8_t &in) {
    return convert<uint16_t, uint8_t>(in);
}
template<>
inline int16_t convert(const uint16_t &in) {
    return convert<uint16_t, uint16_t>(in);
}
template<>
inline int16_t convert(const uint32_t &in) {
    return convert<uint16_t, uint32_t>(in);
}
template<>
inline int16_t convert(const uint64_t &in) {
    return convert<uint16_t, uint64_t>(in);
}
template<>
inline int16_t convert(const int8_t &in) {
    return convert<uint16_t, int8_t>(in);
}
template<>
inline int16_t convert(const int16_t &in) {
    return convert<uint16_t, int16_t>(in);
}
template<>
inline int16_t convert(const int32_t &in) {
    return convert<uint16_t, int32_t>(in);
}
template<>
inline int16_t convert(const int64_t &in) {
    return convert<uint16_t, int64_t>(in);
}
#ifdef HALIDE_CPP_COMPILER_HAS_FLOAT16
template<>
inline int16_t convert(const _Float16 &in) {
    return convert<uint16_t, float>((float)in);
}
#endif  // HALIDE_CPP_COMPILER_HAS_FLOAT16
template<>
inline int16_t convert(const float &in) {
    return convert<uint16_t, float>(in);
}
template<>
inline int16_t convert(const double &in) {
    return convert<uint16_t, double>(in);
}

// Convert to i32
template<>
inline int32_t convert(const bool &in) {
    return in;
}
template<>
inline int32_t convert(const uint8_t &in) {
    return convert<uint32_t, uint8_t>(in);
}
template<>
inline int32_t convert(const uint16_t &in) {
    return convert<uint32_t, uint16_t>(in);
}
template<>
inline int32_t convert(const uint32_t &in) {
    return convert<uint32_t, uint32_t>(in);
}
template<>
inline int32_t convert(const uint64_t &in) {
    return convert<uint32_t, uint64_t>(in);
}
template<>
inline int32_t convert(const int8_t &in) {
    return convert<uint32_t, int8_t>(in);
}
template<>
inline int32_t convert(const int16_t &in) {
    return convert<uint32_t, int16_t>(in);
}
template<>
inline int32_t convert(const int32_t &in) {
    return convert<uint32_t, int32_t>(in);
}
template<>
inline int32_t convert(const int64_t &in) {
    return convert<uint32_t, int64_t>(in);
}
#ifdef HALIDE_CPP_COMPILER_HAS_FLOAT16
template<>
inline int32_t convert(const _Float16 &in) {
    return convert<uint32_t, float>((float)in);
}
#endif  // HALIDE_CPP_COMPILER_HAS_FLOAT16
template<>
inline int32_t convert(const float &in) {
    return convert<uint32_t, float>(in);
}
template<>
inline int32_t convert(const double &in) {
    return convert<uint32_t, double>(in);
}

// Convert to i64
template<>
inline int64_t convert(const bool &in) {
    return in;
}
template<>
inline int64_t convert(const uint8_t &in) {
    return convert<uint64_t, uint8_t>(in);
}
template<>
inline int64_t convert(const uint16_t &in) {
    return convert<uint64_t, uint16_t>(in);
}
template<>
inline int64_t convert(const uint32_t &in) {
    return convert<uint64_t, uint32_t>(in);
}
template<>
inline int64_t convert(const uint64_t &in) {
    return convert<uint64_t, uint64_t>(in);
}
template<>
inline int64_t convert(const int8_t &in) {
    return convert<uint64_t, int8_t>(in);
}
template<>
inline int64_t convert(const int16_t &in) {
    return convert<uint64_t, int16_t>(in);
}
template<>
inline int64_t convert(const int32_t &in) {
    return convert<uint64_t, int32_t>(in);
}
template<>
inline int64_t convert(const int64_t &in) {
    return convert<uint64_t, int64_t>(in);
}
#ifdef HALIDE_CPP_COMPILER_HAS_FLOAT16
template<>
inline int64_t convert(const _Float16 &in) {
    return convert<uint64_t, float>((float)in);
}
#endif  // HALIDE_CPP_COMPILER_HAS_FLOAT16
template<>
inline int64_t convert(const float &in) {
    return convert<uint64_t, float>(in);
}
template<>
inline int64_t convert(const double &in) {
    return convert<uint64_t, double>(in);
}

#ifdef HALIDE_CPP_COMPILER_HAS_FLOAT16
// Convert to f16
template<>
inline _Float16 convert(const bool &in) {
    return in;
}
template<>
inline _Float16 convert(const uint8_t &in) {
    return (_Float16)(in / 255.0f);
}
template<>
inline _Float16 convert(const uint16_t &in) {
    return (_Float16)(in / 65535.0f);
}
template<>
inline _Float16 convert(const uint32_t &in) {
    return (_Float16)(in / 4294967295.0);
}
template<>
inline _Float16 convert(const uint64_t &in) {
    return convert<_Float16, uint32_t>(uint32_t(in >> 32));
}
template<>
inline _Float16 convert(const int8_t &in) {
    return convert<_Float16, uint8_t>(in);
}
template<>
inline _Float16 convert(const int16_t &in) {
    return convert<_Float16, uint16_t>(in);
}
template<>
inline _Float16 convert(const int32_t &in) {
    return convert<_Float16, uint64_t>(in);
}
template<>
inline _Float16 convert(const int64_t &in) {
    return convert<_Float16, uint64_t>(in);
}
template<>
inline _Float16 convert(const _Float16 &in) {
    return in;
}
template<>
inline _Float16 convert(const float &in) {
    return (_Float16)in;
}
template<>
inline _Float16 convert(const double &in) {
    return (_Float16)in;
}
#endif  // HALIDE_CPP_COMPILER_HAS_FLOAT16

// Convert to f32
template<>
inline float convert(const bool &in) {
    return in;
}
template<>
inline float convert(const uint8_t &in) {
    return in / 255.0f;
}
template<>
inline float convert(const uint16_t &in) {
    return in / 65535.0f;
}
template<>
inline float convert(const uint32_t &in) {
    return (float)(in / 4294967295.0);
}
template<>
inline float convert(const uint64_t &in) {
    return convert<float, uint32_t>(uint32_t(in >> 32));
}
template<>
inline float convert(const int8_t &in) {
    return convert<float, uint8_t>(in);
}
template<>
inline float convert(const int16_t &in) {
    return convert<float, uint16_t>(in);
}
template<>
inline float convert(const int32_t &in) {
    return convert<float, uint64_t>(in);
}
template<>
inline float convert(const int64_t &in) {
    return convert<float, uint64_t>(in);
}
#ifdef HALIDE_CPP_COMPILER_HAS_FLOAT16
template<>
inline float convert(const _Float16 &in) {
    return (float)in;
}
#endif  // HALIDE_CPP_COMPILER_HAS_FLOAT16
template<>
inline float convert(const float &in) {
    return in;
}
template<>
inline float convert(const double &in) {
    return (float)in;
}

// Convert to f64
template<>
inline double convert(const bool &in) {
    return in;
}
template<>
inline double convert(const uint8_t &in) {
    return in / 255.0f;
}
template<>
inline double convert(const uint16_t &in) {
    return in / 65535.0f;
}
template<>
inline double convert(const uint32_t &in) {
    return (double)(in / 4294967295.0);
}
template<>
inline double convert(const uint64_t &in) {
    return convert<double, uint32_t>(uint32_t(in >> 32));
}
template<>
inline double convert(const int8_t &in) {
    return convert<double, uint8_t>(in);
}
template<>
inline double convert(const int16_t &in) {
    return convert<double, uint16_t>(in);
}
template<>
inline double convert(const int32_t &in) {
    return convert<double, uint64_t>(in);
}
template<>
inline double convert(const int64_t &in) {
    return convert<double, uint64_t>(in);
}
#ifdef HALIDE_CPP_COMPILER_HAS_FLOAT16
template<>
inline double convert(const _Float16 &in) {
    return (double)in;
}
#endif  // HALIDE_CPP_COMPILER_HAS_FLOAT16
template<>
inline double convert(const float &in) {
    return (double)in;
}
template<>
inline double convert(const double &in) {
    return in;
}

inline std::string to_lowercase(const std::string &s) {
    std::string r = s;
    std::transform(r.begin(), r.end(), r.begin(), ::tolower);
    return r;
}

inline std::string get_lowercase_extension(const std::string &path) {
    size_t last_dot = path.rfind('.');
    if (last_dot == std::string::npos) {
        return "";
    }
    return to_lowercase(path.substr(last_dot + 1));
}

template<typename ElemType>
ElemType read_big_endian(const uint8_t *src);

template<>
inline uint8_t read_big_endian(const uint8_t *src) {
    return *src;
}

template<>
inline uint16_t read_big_endian(const uint8_t *src) {
    return (((uint16_t)src[0]) << 8) | ((uint16_t)src[1]);
}

template<typename ElemType>
void write_big_endian(const ElemType &src, uint8_t *dst);

template<>
inline void write_big_endian(const uint8_t &src, uint8_t *dst) {
    *dst = src;
}

template<>
inline void write_big_endian(const uint16_t &src, uint8_t *dst) {
    dst[0] = src >> 8;
    dst[1] = src & 0xff;
}

struct FileOpener {
    FileOpener(const std::string &filename, const char *mode)
        : f(fopen(filename.c_str(), mode)) {
        // nothing
    }

    ~FileOpener() {
        if (f != nullptr) {
            fclose(f);
        }
    }

    // read a line of data, skipping lines that begin with '#"
    char *read_line(char *buf, int maxlen) {
        char *status;
        do {
            status = fgets(buf, maxlen, f);
        } while (status && buf[0] == '#');
        return (status);
    }

    // call read_line and to a sscanf() on it
    int scan_line(const char *fmt, ...) {
        char buf[1024];
        if (!read_line(buf, 1024)) {
            return 0;
        }
        va_list args;
        va_start(args, fmt);
        int result = vsscanf(buf, fmt, args);
        va_end(args);
        return result;
    }

    bool read_bytes(void *data, size_t count) {
        return fread(data, 1, count, f) == count;
    }

    template<typename T, size_t N>
    bool read_array(T (&data)[N]) {
        return read_bytes(&data[0], sizeof(T) * N);
    }

    template<typename T>
    bool read_vector(std::vector<T> *v) {
        return read_bytes(v->data(), v->size() * sizeof(T));
    }

    bool write_bytes(const void *data, size_t count) {
        return fwrite(data, 1, count, f) == count;
    }

    template<typename T>
    bool write_vector(const std::vector<T> &v) {
        return write_bytes(v.data(), v.size() * sizeof(T));
    }

    template<typename T, size_t N>
    bool write_array(const T (&data)[N]) {
        return write_bytes(&data[0], sizeof(T) * N);
    }

    FILE *const f;
};

constexpr int AnyDims = -1;

// Read a row of ElemTypes from a byte buffer and copy them into a specific image row.
// Multibyte elements are assumed to be big-endian.
template<typename ElemType, typename ImageType>
void read_big_endian_row(const uint8_t *src, int y, ImageType *im) {
    auto im_typed = im->template as<ElemType, AnyDims>();
    const int xmin = im_typed.dim(0).min();
    const int xmax = im_typed.dim(0).max();
    if (im_typed.dimensions() > 2) {
        const int cmin = im_typed.dim(2).min();
        const int cmax = im_typed.dim(2).max();
        for (int x = xmin; x <= xmax; x++) {
            for (int c = cmin; c <= cmax; c++) {
                im_typed(x, y, c + cmin) = read_big_endian<ElemType>(src);
                src += sizeof(ElemType);
            }
        }
    } else {
        for (int x = xmin; x <= xmax; x++) {
            im_typed(x, y) = read_big_endian<ElemType>(src);
            src += sizeof(ElemType);
        }
    }
}

// Copy a row from an image into a byte buffer.
// Multibyte elements are written in big-endian layout.
template<typename ElemType, typename ImageType>
void write_big_endian_row(const ImageType &im, int y, uint8_t *dst) {
    auto im_typed = im.template as<typename std::add_const<ElemType>::type, AnyDims>();
    const int xmin = im_typed.dim(0).min();
    const int xmax = im_typed.dim(0).max();
    if (im_typed.dimensions() > 2) {
        const int cmin = im_typed.dim(2).min();
        const int cmax = im_typed.dim(2).max();
        for (int x = xmin; x <= xmax; x++) {
            for (int c = cmin; c <= cmax; c++) {
                write_big_endian<ElemType>(im_typed(x, y, c), dst);
                dst += sizeof(ElemType);
            }
        }
    } else {
        for (int x = xmin; x <= xmax; x++) {
            write_big_endian<ElemType>(im_typed(x, y), dst);
            dst += sizeof(ElemType);
        }
    }
}

#ifndef HALIDE_NO_PNG

template<typename ImageType, Internal::CheckFunc check = Internal::CheckReturn>
bool load_png(const std::string &filename, ImageType *im) {
    static_assert(!ImageType::has_static_halide_type, "");

    /* open file and test for it being a png */
    Internal::FileOpener f(filename, "rb");
    if (!check(f.f != nullptr, "File could not be opened for reading")) {
        return false;
    }
    png_byte header[8];
    if (!check(f.read_array(header), "File ended before end of header")) {
        return false;
    }
    if (!check(!png_sig_cmp(header, 0, 8), "File is not recognized as a PNG file")) {
        return false;
    }

    /* initialize stuff */
    png_structp png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, nullptr, nullptr, nullptr);
    if (!check(png_ptr != nullptr, "png_create_read_struct failed")) {
        return false;
    }

    png_infop info_ptr = png_create_info_struct(png_ptr);
    if (!check(info_ptr != nullptr, "png_create_info_struct failed")) {
        return false;
    }

    if (!check(!setjmp(png_jmpbuf(png_ptr)), "Error loading PNG")) {
        return false;
    }

    png_init_io(png_ptr, f.f);
    png_set_sig_bytes(png_ptr, 8);

    png_read_info(png_ptr, info_ptr);

    const int width = png_get_image_width(png_ptr, info_ptr);
    const int height = png_get_image_height(png_ptr, info_ptr);
    const int channels = png_get_channels(png_ptr, info_ptr);
    const int bit_depth = png_get_bit_depth(png_ptr, info_ptr);

    const halide_type_t im_type(halide_type_uint, bit_depth);
    std::vector<int> im_dimensions = {width, height};
    if (channels != 1) {
        im_dimensions.push_back(channels);
    }

    *im = ImageType(im_type, im_dimensions);

    png_read_update_info(png_ptr, info_ptr);

    auto copy_to_image = bit_depth == 8 ?
                             Internal::read_big_endian_row<uint8_t, ImageType> :
                             Internal::read_big_endian_row<uint16_t, ImageType>;

    std::vector<uint8_t> row(png_get_rowbytes(png_ptr, info_ptr));
    const int ymin = im->dim(1).min();
    const int ymax = im->dim(1).max();
    for (int y = ymin; y <= ymax; ++y) {
        png_read_row(png_ptr, row.data(), nullptr);
        copy_to_image(row.data(), y, im);
    }

    png_destroy_read_struct(&png_ptr, &info_ptr, nullptr);

    return true;
}

inline const std::set<FormatInfo> &query_png() {
    static std::set<FormatInfo> info = {
        {halide_type_t(halide_type_uint, 8), 2},
        {halide_type_t(halide_type_uint, 16), 2},
        {halide_type_t(halide_type_uint, 8), 3},
        {halide_type_t(halide_type_uint, 16), 3}};
    return info;
}

// "im" is not const-ref because copy_to_host() is not const.
template<typename ImageType, Internal::CheckFunc check = Internal::CheckReturn>
bool save_png(ImageType &im, const std::string &filename) {
    static_assert(!ImageType::has_static_halide_type, "");

    if (!check(im.copy_to_host() == halide_error_code_success, "copy_to_host() failed.")) {
        return false;
    }

    const int width = im.width();
    const int height = im.height();
    const int channels = im.channels();

    if (!check(channels >= 1 && channels <= 4,
               "Can't write PNG files that have other than 1, 2, 3, or 4 channels")) {
        return false;
    }

    const png_byte color_types[4] = {
        PNG_COLOR_TYPE_GRAY,
        PNG_COLOR_TYPE_GRAY_ALPHA,
        PNG_COLOR_TYPE_RGB,
        PNG_COLOR_TYPE_RGB_ALPHA};
    png_byte color_type = color_types[channels - 1];

    // open file
    Internal::FileOpener f(filename, "wb");
    if (!check(f.f != nullptr, "[write_png_file] File could not be opened for writing")) {
        return false;
    }

    // initialize stuff
    png_structp png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, nullptr, nullptr, nullptr);
    if (!check(png_ptr != nullptr, "[write_png_file] png_create_write_struct failed")) {
        return false;
    }

    png_infop info_ptr = png_create_info_struct(png_ptr);
    if (!check(info_ptr != nullptr, "[write_png_file] png_create_info_struct failed")) {
        return false;
    }

    if (!check(!setjmp(png_jmpbuf(png_ptr)), "Error saving PNG")) {
        return false;
    }

    png_init_io(png_ptr, f.f);

    const halide_type_t im_type = im.type();
    const int bit_depth = im_type.bits;

    png_set_IHDR(png_ptr, info_ptr, width, height,
                 bit_depth, color_type, PNG_INTERLACE_NONE,
                 PNG_COMPRESSION_TYPE_BASE, PNG_FILTER_TYPE_BASE);

    png_write_info(png_ptr, info_ptr);

    auto copy_from_image = bit_depth == 8 ?
                               Internal::write_big_endian_row<uint8_t, ImageType> :
                               Internal::write_big_endian_row<uint16_t, ImageType>;

    std::vector<uint8_t> row(png_get_rowbytes(png_ptr, info_ptr));
    const int ymin = im.dim(1).min();
    const int ymax = im.dim(1).max();
    for (int y = ymin; y <= ymax; ++y) {
        copy_from_image(im, y, row.data());
        png_write_row(png_ptr, row.data());
    }
    png_write_end(png_ptr, nullptr);
    png_destroy_write_struct(&png_ptr, &info_ptr);

    return true;
}

#endif  // not HALIDE_NO_PNG

template<Internal::CheckFunc check>
bool read_pnm_header(Internal::FileOpener &f, const std::string &hdr_fmt, int *width, int *height, int *bit_depth) {
    if (!check(f.f != nullptr, "File could not be opened for reading")) {
        return false;
    }

    char header[256];
    if (!check(f.scan_line("%255s", header) == 1, "Could not read header")) {
        return false;
    }

    if (!check(to_lowercase(hdr_fmt) == to_lowercase(header), "Unexpected file header")) {
        return false;
    }

    if (!check(f.scan_line("%d %d\n", width, height) == 2, "Could not read width and height")) {
        return false;
    }

    int maxval;
    if (!check(f.scan_line("%d", &maxval) == 1, "Could not read max value")) {
        return false;
    }
    if (maxval == 255) {
        *bit_depth = 8;
    } else if (maxval == 65535) {
        *bit_depth = 16;
    } else {
        *bit_depth = 0;
        return check(false, "Invalid bit depth");
    }

    return true;
}

template<typename ImageType, Internal::CheckFunc check = Internal::CheckReturn>
bool load_pnm(const std::string &filename, int channels, ImageType *im) {
    static_assert(!ImageType::has_static_halide_type, "");

    const char *hdr_fmt = channels == 3 ? "P6" : "P5";

    Internal::FileOpener f(filename, "rb");
    int width, height, bit_depth;
    if (!Internal::read_pnm_header<check>(f, hdr_fmt, &width, &height, &bit_depth)) {
        return false;
    }

    const halide_type_t im_type(halide_type_uint, bit_depth);
    std::vector<int> im_dimensions = {width, height};
    if (channels > 1) {
        im_dimensions.push_back(channels);
    }
    *im = ImageType(im_type, im_dimensions);

    auto copy_to_image = bit_depth == 8 ?
                             Internal::read_big_endian_row<uint8_t, ImageType> :
                             Internal::read_big_endian_row<uint16_t, ImageType>;

    std::vector<uint8_t> row(width * channels * (bit_depth / 8));
    const int ymin = im->dim(1).min();
    const int ymax = im->dim(1).max();
    for (int y = ymin; y <= ymax; ++y) {
        if (!check(f.read_vector(&row), "Could not read data")) {
            return false;
        }
        copy_to_image(row.data(), y, im);
    }

    return true;
}

template<typename ImageType, Internal::CheckFunc check = Internal::CheckReturn>
bool save_pnm(ImageType &im, const int channels, const std::string &filename) {
    static_assert(!ImageType::has_static_halide_type, "");

    if (!check(im.channels() == channels, "Wrong number of channels")) {
        return false;
    }

    if (!check(im.copy_to_host() == halide_error_code_success, "copy_to_host() failed.")) {
        return false;
    }

    const halide_type_t im_type = im.type();
    const int width = im.width();
    const int height = im.height();
    const int bit_depth = im_type.bits;

    Internal::FileOpener f(filename, "wb");
    if (!check(f.f != nullptr, "File could not be opened for writing")) {
        return false;
    }
    const char *hdr_fmt = channels == 3 ? "P6" : "P5";
    fprintf(f.f, "%s\n%d %d\n%d\n", hdr_fmt, width, height, (1 << bit_depth) - 1);

    auto copy_from_image = bit_depth == 8 ?
                               Internal::write_big_endian_row<uint8_t, ImageType> :
                               Internal::write_big_endian_row<uint16_t, ImageType>;

    std::vector<uint8_t> row(width * channels * (bit_depth / 8));
    const int ymin = im.dim(1).min();
    const int ymax = im.dim(1).max();
    for (int y = ymin; y <= ymax; ++y) {
        copy_from_image(im, y, row.data());
        if (!check(f.write_vector(row), "Could not write data")) {
            return false;
        }
    }

    return true;
}

template<typename ImageType, Internal::CheckFunc check = Internal::CheckReturn>
bool load_pgm(const std::string &filename, ImageType *im) {
    return Internal::load_pnm<ImageType, check>(filename, 1, im);
}

inline const std::set<FormatInfo> &query_pgm() {
    static std::set<FormatInfo> info = {
        {halide_type_t(halide_type_uint, 8), 2},
        {halide_type_t(halide_type_uint, 16), 2}};
    return info;
}

// "im" is not const-ref because copy_to_host() is not const.
template<typename ImageType, Internal::CheckFunc check = Internal::CheckReturn>
bool save_pgm(ImageType &im, const std::string &filename) {
    return Internal::save_pnm<ImageType, check>(im, 1, filename);
}

template<typename ImageType, Internal::CheckFunc check = Internal::CheckReturn>
bool load_ppm(const std::string &filename, ImageType *im) {
    return Internal::load_pnm<ImageType, check>(filename, 3, im);
}

inline const std::set<FormatInfo> &query_ppm() {
    static std::set<FormatInfo> info = {
        {halide_type_t(halide_type_uint, 8), 3},
        {halide_type_t(halide_type_uint, 16), 3}};
    return info;
}

// "im" is not const-ref because copy_to_host() is not const.
template<typename ImageType, Internal::CheckFunc check = Internal::CheckReturn>
bool save_ppm(ImageType &im, const std::string &filename) {
    return Internal::save_pnm<ImageType, check>(im, 3, filename);
}

// -------------- .npy file format
// Based on documentation at https://numpy.org/devdocs/reference/generated/numpy.lib.format.html
// and elsewhere

#if (defined(__BYTE_ORDER) && __BYTE_ORDER == __BIG_ENDIAN) || defined(HALIDE_FORCE_BIG_ENDIAN)
constexpr bool host_is_big_endian = true;
#else
constexpr bool host_is_big_endian = false;
#endif

constexpr char little_endian_char = '<';
constexpr char big_endian_char = '>';
constexpr char no_endian_char = '|';
constexpr char host_endian_char = (host_is_big_endian ? big_endian_char : little_endian_char);

struct npy_dtype_info_t {
    char byte_order;
    char type_code;
    char type_bytes;

    std::string descr() const {
        return std::string(1, byte_order) + std::string(1, type_code) + std::to_string((int)type_bytes);
    }
};

inline static const std::array<std::pair<halide_type_t, npy_dtype_info_t>, 11> npy_dtypes = {{
    {halide_type_t(halide_type_float, 16), {host_endian_char, 'f', 2}},
    {halide_type_of<float>(), {host_endian_char, 'f', sizeof(float)}},
    {halide_type_of<double>(), {host_endian_char, 'f', sizeof(double)}},
    {halide_type_of<int8_t>(), {no_endian_char, 'i', sizeof(int8_t)}},
    {halide_type_of<int16_t>(), {host_endian_char, 'i', sizeof(int16_t)}},
    {halide_type_of<int32_t>(), {host_endian_char, 'i', sizeof(int32_t)}},
    {halide_type_of<int64_t>(), {host_endian_char, 'i', sizeof(int64_t)}},
    {halide_type_of<uint8_t>(), {no_endian_char, 'u', sizeof(uint8_t)}},
    {halide_type_of<uint16_t>(), {host_endian_char, 'u', sizeof(uint16_t)}},
    {halide_type_of<uint32_t>(), {host_endian_char, 'u', sizeof(uint32_t)}},
    {halide_type_of<uint64_t>(), {host_endian_char, 'u', sizeof(uint64_t)}},
}};

inline static const std::array<char, 6> npy_magic_string = {'\x93', 'N', 'U', 'M', 'P', 'Y'};
inline static const std::array<char, 2> npy_v1_bytes = {'\x01', '\x00'};

inline std::string trim_whitespace(const std::string &s) {
    const size_t first = s.find_first_not_of(" \t\n");
    if (first == std::string::npos) {
        return "";
    }
    const size_t last = s.find_last_not_of(" \t\n");
    return s.substr(first, (last - first + 1));
}

struct NpyHeader {
    char type_code;
    int type_bytes;
    std::vector<int> extents;

    bool parse(const std::string &header) {
        const char *ptr = &header[0];
        if (*ptr++ != '{') {
            return false;
        }
        while (true) {
            char endian;
            int consumed;
            if (std::sscanf(ptr, "'descr': '%c%c%d'%n", &endian, &type_code, &type_bytes, &consumed) == 3) {
                if (endian != '<' && endian != '|') {
                    return false;
                }
                ptr += consumed;
            } else if (std::strncmp(ptr, "'fortran_order': False", 22) == 0) {
                ptr += 22;
            } else if (std::strncmp(ptr, "'shape': (", 10) == 0) {
                ptr += 10;
                int n;
                while (std::sscanf(ptr, "%d%n", &n, &consumed) == 1) {
                    extents.push_back(n);
                    ptr += consumed;
                    if (*ptr == ',') {
                        ptr++;
                    }
                    if (*ptr == ' ') {
                        ptr++;
                    }
                }
                if (*ptr++ != ')') {
                    return false;
                }
            } else if (*ptr == '}') {
                return true;
            } else {
                return false;
            }
            if (*ptr == ',') {
                ptr++;
            }
            if (*ptr == ' ') {
                ptr++;
            }
            assert(ptr <= &header.back());
        }
    }
};

// return true iff the buffer storage has no padding between
// any elements, and is in strictly planar order.
template<typename ImageType>
bool buffer_is_compact_planar(ImageType &im) {
    const halide_type_t im_type = im.type();
    const size_t elem_size = (im_type.bits / 8);
    if (((const uint8_t *)im.begin() + (im.number_of_elements() * elem_size)) != (const uint8_t *)im.end()) {
        return false;
    }
    for (int d = 1; d < im.dimensions(); ++d) {
        if (im.dim(d - 1).stride() > im.dim(d).stride()) {
            return false;
        }
        // Strides can only match if the previous dimension has extent 1
        // (this can happen when artificially adding dimension(s), e.g.
        // to write a .tmp file)
        if (im.dim(d - 1).stride() == im.dim(d).stride() && im.dim(d - 1).extent() != 1) {
            return false;
        }
    }
    return true;
}

template<typename ImageType, CheckFunc check = CheckReturn>
bool load_npy(const std::string &filename, ImageType *im) {
    static_assert(!ImageType::has_static_halide_type, "");

    FileOpener f(filename, "rb");
    if (!check(f.f != nullptr, "File could not be opened for reading")) {
        return false;
    }

    char magic_and_version[8];
    if (!check(f.read_bytes(magic_and_version, 8), "Could not read .npy header")) {
        return false;
    }
    if (memcmp(magic_and_version, npy_magic_string.data(), npy_magic_string.size()) != 0) {
        return check(false, "Bad .npy magic string");
    }
    if ((magic_and_version[6] != 1 && magic_and_version[6] != 2 && magic_and_version[6] != 3) || magic_and_version[7] != 0) {
        return check(false, "Bad .npy version");
    }
    size_t header_len;
    uint8_t header_len_le[4];
    if (magic_and_version[6] == 1) {
        if (!check(f.read_bytes(header_len_le, 2), "Could not read .npy header")) {
            return false;
        }
        header_len = (header_len_le[0] << 0) | (header_len_le[1] << 8);
        if (!check((6 + 2 + 2 + header_len) % 64 == 0, ".npy header is not aligned properly")) {
            return false;
        }
    } else {
        if (!check(f.read_bytes(header_len_le, 4), "Could not read .npy header")) {
            return false;
        }
        header_len = (header_len_le[0] << 0) | (header_len_le[1] << 8) | (header_len_le[2] << 16) | (header_len_le[3] << 24);
        if (!check((6 + 2 + 4 + header_len) % 64 == 0, ".npy header is not aligned properly")) {
            return false;
        }
    }

    std::string header(header_len + 1, ' ');
    if (!check(f.read_bytes(header.data(), header_len), "Could not read .npy header string")) {
        return false;
    }

    NpyHeader h;
    if (!check(h.parse(header), "Could not parse .npy header dict")) {
        return false;
    }

    halide_type_t im_type((halide_type_code_t)0, 0, 0);
    for (const auto &d : npy_dtypes) {
        if (h.type_code == d.second.type_code && h.type_bytes == d.second.type_bytes) {
            im_type = d.first;
            break;
        }
    }
    if (!check(im_type.bits != 0, "Unsupported type in load_npy")) {
        return false;
    }

    *im = ImageType(im_type, h.extents);

    // This should never fail unless the default Buffer<> constructor behavior changes.
    if (!check(buffer_is_compact_planar(*im), "load_npy() requires compact planar images")) {
        return false;
    }

    if (!check(f.read_bytes(im->begin(), im->size_in_bytes()), "Count not read .npy payload")) {
        return false;
    }

    im->set_host_dirty();
    return true;
}

template<typename ImageType, CheckFunc check = CheckReturn>
bool write_planar_payload(ImageType &im, FileOpener &f) {
    if (im.dimensions() == 0 || buffer_is_compact_planar(im)) {
        // Contiguous buffer! Write it all in one swell foop.
        if (!check(f.write_bytes(im.begin(), im.size_in_bytes()), "Count not write planar payload")) {
            return false;
        }
    } else {
        // We have to do this the hard way.
        int d = im.dimensions() - 1;
        for (int i = im.dim(d).min(); i <= im.dim(d).max(); i++) {
            auto slice = im.sliced(d, i);
            if (!write_planar_payload(slice, f)) {
                return false;
            }
        }
    }
    return true;
}

template<typename ImageType, CheckFunc check = CheckReturn>
bool save_npy(ImageType &im, const std::string &filename) {
    static_assert(!ImageType::has_static_halide_type, "");

    if (!check(im.copy_to_host() == halide_error_code_success, "copy_to_host() failed.")) {
        return false;
    }

    const halide_type_t im_type = im.type();
    npy_dtype_info_t di = {0, 0, 0};
    for (const auto &d : npy_dtypes) {
        if (d.first == im_type) {
            di = d.second;
            break;
        }
    }
    if (!check(di.byte_order != 0, "Unsupported type in save_npy")) {
        return false;
    }

    std::string shape = "(";
    for (int d = 0; d < im.dimensions(); ++d) {
        if (d > 0) {
            shape += ",";
        }
        shape += std::to_string(im.dim(d).extent());
        if (im.dimensions() == 1) {
            shape += ",";  // special-case for single-element tuples
        }
    }
    shape += ")";

    std::string header_dict_str = "{'descr': '" + di.descr() + "', 'fortran_order': False, 'shape': " + shape + "}\n";

    const size_t unpadded_length = npy_magic_string.size() + npy_v1_bytes.size() + 2 + header_dict_str.size();
    const size_t padded_length = (unpadded_length + 64 - 1) & ~(64 - 1);
    const size_t padding = padded_length - unpadded_length;
    header_dict_str += std::string(padding, ' ');

    if (!check(header_dict_str.size() <= 65535, "Header is too large for v1 .npy file")) {
        return false;
    }
    const uint16_t header_len = (uint16_t)(header_dict_str.size());
    const uint8_t header_len_le[2] = {
        (uint8_t)((header_len >> 0) & 0xff),
        (uint8_t)((header_len >> 8) & 0xff)};

    FileOpener f(filename, "wb");
    if (!check(f.write_bytes(npy_magic_string.data(), npy_magic_string.size()), ".npy write failed")) {
        return false;
    }
    if (!check(f.write_bytes(npy_v1_bytes.data(), npy_v1_bytes.size()), ".npy write failed")) {
        return false;
    }
    if (!check(f.write_bytes(header_len_le, 2), ".npy write failed")) {
        return false;
    }
    if (!check(f.write_bytes(header_dict_str.data(), header_dict_str.size()), ".npy write failed")) {
        return false;
    }

    if (!write_planar_payload<ImageType, check>(im, f)) {
        return false;
    }

    return true;
}

inline const std::set<FormatInfo> &query_npy() {
    auto build_set = []() -> std::set<FormatInfo> {
        // NumPy doesn't support bfloat16, not sure if they plan to,
        // so we don't attempt to support it here
        std::set<FormatInfo> s;
        for (halide_type_code_t code : {halide_type_int, halide_type_uint, halide_type_float}) {
            for (int bits : {8, 16, 32, 64}) {
                if (code == halide_type_float && bits < 16) {
                    continue;
                }
                for (int dims : {1, 2, 3, 4}) {
                    s.insert({halide_type_t(code, bits), dims});
                }
            }
        }
        return s;
    };

    static std::set<FormatInfo> info = build_set();
    return info;
}

#ifndef HALIDE_NO_JPEG

template<typename ImageType, Internal::CheckFunc check = Internal::CheckReturn>
bool load_jpg(const std::string &filename, ImageType *im) {
    static_assert(!ImageType::has_static_halide_type, "");

    Internal::FileOpener f(filename, "rb");
    if (!check(f.f != nullptr, "File could not be opened for reading")) {
        return false;
    }

    struct jpeg_decompress_struct cinfo;
    struct jpeg_error_mgr jerr;
    cinfo.err = jpeg_std_error(&jerr);
    jpeg_create_decompress(&cinfo);
    jpeg_stdio_src(&cinfo, f.f);
    jpeg_read_header(&cinfo, TRUE);
    jpeg_start_decompress(&cinfo);

    const int width = cinfo.output_width;
    const int height = cinfo.output_height;
    const int channels = cinfo.output_components;

    const halide_type_t im_type(halide_type_uint, 8);
    std::vector<int> im_dimensions = {width, height};
    if (channels > 1) {
        im_dimensions.push_back(channels);
    }
    *im = ImageType(im_type, im_dimensions);

    auto copy_to_image = Internal::read_big_endian_row<uint8_t, ImageType>;

    std::vector<uint8_t> row(width * channels);
    const int ymin = im->dim(1).min();
    const int ymax = im->dim(1).max();
    for (int y = ymin; y <= ymax; ++y) {
        uint8_t *src = row.data();
        jpeg_read_scanlines(&cinfo, &src, 1);
        copy_to_image(row.data(), y, im);
    }

    jpeg_finish_decompress(&cinfo);
    jpeg_destroy_decompress(&cinfo);

    return true;
}

inline const std::set<FormatInfo> &query_jpg() {
    static std::set<FormatInfo> info = {
        {halide_type_t(halide_type_uint, 8), 2},
        {halide_type_t(halide_type_uint, 8), 3},
    };
    return info;
}

template<typename ImageType, Internal::CheckFunc check = Internal::CheckReturn>
bool save_jpg(ImageType &im, const std::string &filename) {
    static_assert(!ImageType::has_static_halide_type, "");

    if (!check(im.copy_to_host() == halide_error_code_success, "copy_to_host() failed.")) {
        return false;
    }

    const int width = im.width();
    const int height = im.height();
    const int channels = im.channels();
    if (!check(channels == 1 || channels == 3, "Wrong number of channels")) {
        return false;
    }

    Internal::FileOpener f(filename, "wb");
    if (!check(f.f != nullptr, "File could not be opened for writing")) {
        return false;
    }

    // TODO: Make this an argument?
    constexpr int quality = 99;

    struct jpeg_compress_struct cinfo;
    struct jpeg_error_mgr jerr;
    cinfo.err = jpeg_std_error(&jerr);
    jpeg_create_compress(&cinfo);
    jpeg_stdio_dest(&cinfo, f.f);
    cinfo.image_width = width;
    cinfo.image_height = height;
    cinfo.input_components = channels;
    cinfo.in_color_space = (channels == 3) ? JCS_RGB : JCS_GRAYSCALE;
    jpeg_set_defaults(&cinfo);
    jpeg_set_quality(&cinfo, quality, TRUE);
    jpeg_start_compress(&cinfo, TRUE);

    auto copy_from_image = Internal::write_big_endian_row<uint8_t, ImageType>;

    std::vector<uint8_t> row(width * channels);
    const int ymin = im.dim(1).min();
    const int ymax = im.dim(1).max();
    for (int y = ymin; y <= ymax; ++y) {
        uint8_t *dst = row.data();
        copy_from_image(im, y, dst);
        jpeg_write_scanlines(&cinfo, &dst, 1);
    }

    jpeg_finish_compress(&cinfo);
    jpeg_destroy_compress(&cinfo);

    return true;
}

#endif  // not HALIDE_NO_JPEG

constexpr int kNumTmpCodes = 10;

inline const halide_type_t *tmp_code_to_halide_type() {
    static const halide_type_t tmp_code_to_halide_type_[kNumTmpCodes] = {
        {halide_type_float, 32},
        {halide_type_float, 64},
        {halide_type_uint, 8},
        {halide_type_int, 8},
        {halide_type_uint, 16},
        {halide_type_int, 16},
        {halide_type_uint, 32},
        {halide_type_int, 32},
        {halide_type_uint, 64},
        {halide_type_int, 64}};
    return tmp_code_to_halide_type_;
}

// ".tmp" is a file format used by the ImageStack tool (see https://github.com/abadams/ImageStack)
template<typename ImageType, CheckFunc check = CheckReturn>
bool load_tmp(const std::string &filename, ImageType *im) {
    static_assert(!ImageType::has_static_halide_type, "");

    FileOpener f(filename, "rb");
    if (!check(f.f != nullptr, "File could not be opened for reading")) {
        return false;
    }

    int32_t header[5];
    if (!check(f.read_array(header), "Count not read .tmp header")) {
        return false;
    }

    if (!check(header[0] > 0 && header[1] > 0 && header[2] > 0 && header[3] > 0 &&
                   header[4] >= 0 && header[4] < kNumTmpCodes,
               "Bad header on .tmp file")) {
        return false;
    }

    const halide_type_t im_type = tmp_code_to_halide_type()[header[4]];
    std::vector<int> im_dimensions = {header[0], header[1], header[2], header[3]};
    *im = ImageType(im_type, im_dimensions);

    // This should never fail unless the default Buffer<> constructor behavior changes.
    if (!check(buffer_is_compact_planar(*im), "load_tmp() requires compact planar images")) {
        return false;
    }

    if (!check(f.read_bytes(im->begin(), im->size_in_bytes()), "Count not read .tmp payload")) {
        return false;
    }

    im->set_host_dirty();
    return true;
}

inline const std::set<FormatInfo> &query_tmp() {
    // TMP files require exactly 4 dimensions.
    static std::set<FormatInfo> info = {
        {halide_type_t(halide_type_float, 32), 4},
        {halide_type_t(halide_type_float, 64), 4},
        {halide_type_t(halide_type_uint, 8), 4},
        {halide_type_t(halide_type_int, 8), 4},
        {halide_type_t(halide_type_uint, 16), 4},
        {halide_type_t(halide_type_int, 16), 4},
        {halide_type_t(halide_type_uint, 32), 4},
        {halide_type_t(halide_type_int, 32), 4},
        {halide_type_t(halide_type_uint, 64), 4},
        {halide_type_t(halide_type_int, 64), 4},
    };
    return info;
}

// ".tmp" is a file format used by the ImageStack tool (see https://github.com/abadams/ImageStack)
template<typename ImageType, CheckFunc check = CheckReturn>
bool save_tmp(ImageType &im, const std::string &filename) {
    static_assert(!ImageType::has_static_halide_type, "");

    if (!check(im.copy_to_host() == halide_error_code_success, "copy_to_host() failed.")) {
        return false;
    }

    int32_t header[5] = {1, 1, 1, 1, -1};
    for (int i = 0; i < im.dimensions(); ++i) {
        header[i] = im.dim(i).extent();
    }
    const auto *table = tmp_code_to_halide_type();
    for (int i = 0; i < kNumTmpCodes; i++) {
        if (im.type() == table[i]) {
            header[4] = i;
            break;
        }
    }
    if (!check(header[4] >= 0, "Unsupported type for .tmp file")) {
        return false;
    }

    FileOpener f(filename, "wb");
    if (!check(f.f != nullptr, "File could not be opened for writing")) {
        return false;
    }
    if (!check(f.write_array(header), "Could not write .tmp header")) {
        return false;
    }

    if (!write_planar_payload<ImageType, check>(im, f)) {
        return false;
    }

    return true;
}

// ".mat" is the matlab level 5 format documented here:
// http://www.mathworks.com/help/pdf_doc/matlab/matfile_format.pdf

enum MatlabTypeCode {
    miINT8 = 1,
    miUINT8 = 2,
    miINT16 = 3,
    miUINT16 = 4,
    miINT32 = 5,
    miUINT32 = 6,
    miSINGLE = 7,
    miDOUBLE = 9,
    miINT64 = 12,
    miUINT64 = 13,
    miMATRIX = 14,
    miCOMPRESSED = 15,
    miUTF8 = 16,
    miUTF16 = 17,
    miUTF32 = 18
};

enum MatlabClassCode {
    mxCHAR_CLASS = 3,
    mxDOUBLE_CLASS = 6,
    mxSINGLE_CLASS = 7,
    mxINT8_CLASS = 8,
    mxUINT8_CLASS = 9,
    mxINT16_CLASS = 10,
    mxUINT16_CLASS = 11,
    mxINT32_CLASS = 12,
    mxUINT32_CLASS = 13,
    mxINT64_CLASS = 14,
    mxUINT64_CLASS = 15
};

template<typename ImageType, CheckFunc check = CheckReturn>
bool load_mat(const std::string &filename, ImageType *im) {
    static_assert(!ImageType::has_static_halide_type, "");

    FileOpener f(filename, "rb");
    if (!check(f.f != nullptr, "File could not be opened for reading")) {
        return false;
    }

    uint8_t header[128];
    if (!check(f.read_array(header), "Could not read .mat header\n")) {
        return false;
    }

    // Matrix header
    uint32_t matrix_header[2];
    if (!check(f.read_array(matrix_header), "Could not read .mat header\n")) {
        return false;
    }
    if (!check(matrix_header[0] == miMATRIX, "Could not parse this .mat file: bad matrix header\n")) {
        return false;
    }

    // Array flags
    uint32_t flags[4];
    if (!check(f.read_array(flags), "Could not read .mat header\n")) {
        return false;
    }
    if (!check(flags[0] == miUINT32 && flags[1] == 8, "Could not parse this .mat file: bad flags\n")) {
        return false;
    }

    // Shape
    uint32_t shape_header[2];
    if (!check(f.read_array(shape_header), "Could not read .mat header\n")) {
        return false;
    }
    if (!check(shape_header[0] == miINT32, "Could not parse this .mat file: bad shape header\n")) {
        return false;
    }
    int dims = shape_header[1] / 4;
    std::vector<int> extents(dims);
    if (!check(f.read_vector(&extents), "Could not read .mat header\n")) {
        return false;
    }
    if (dims & 1) {
        uint32_t padding;
        if (!check(f.read_bytes(&padding, 4), "Could not read .mat header\n")) {
            return false;
        }
    }

    // Skip over the name
    uint32_t name_header[2];
    if (!check(f.read_array(name_header), "Could not read .mat header\n")) {
        return false;
    }

    if (name_header[0] >> 16) {
        // Name must be fewer than 4 chars, and so the whole name
        // field was stored packed into 8 bytes
    } else {
        if (!check(name_header[0] == miINT8, "Could not parse this .mat file: bad name header\n")) {
            return false;
        }
        std::vector<uint64_t> scratch((name_header[1] + 7) / 8);
        if (!check(f.read_vector(&scratch), "Could not read .mat header\n")) {
            return false;
        }
    }

    // Payload header
    uint32_t payload_header[2];
    if (!check(f.read_array(payload_header), "Could not read .mat header\n")) {
        return false;
    }
    halide_type_t type;
    switch (payload_header[0]) {
    case miINT8:
        type = halide_type_of<int8_t>();
        break;
    case miINT16:
        type = halide_type_of<int16_t>();
        break;
    case miINT32:
        type = halide_type_of<int32_t>();
        break;
    case miINT64:
        type = halide_type_of<int64_t>();
        break;
    case miUINT8:
        type = halide_type_of<uint8_t>();
        break;
    case miUINT16:
        type = halide_type_of<uint16_t>();
        break;
    case miUINT32:
        type = halide_type_of<uint32_t>();
        break;
    case miUINT64:
        type = halide_type_of<uint64_t>();
        break;
    case miSINGLE:
        type = halide_type_of<float>();
        break;
    case miDOUBLE:
        type = halide_type_of<double>();
        break;
    default:
        check(false, "Unknown header");
        return false;
    }

    *im = ImageType(type, extents);

    // This should never fail unless the default Buffer<> constructor behavior changes.
    if (!check(buffer_is_compact_planar(*im), "load_mat() requires compact planar images")) {
        return false;
    }

    if (!check(f.read_bytes(im->begin(), im->size_in_bytes()), "Could not read .tmp payload")) {
        return false;
    }

    im->set_host_dirty();
    return true;
}

inline const std::set<FormatInfo> &query_mat() {
    // MAT files must have at least 2 dimensions, but there's no upper
    // bound. Our support arbitrarily stops at 16 dimensions.
    static std::set<FormatInfo> info = []() {
        std::set<FormatInfo> s;
        for (int i = 2; i < 16; i++) {
            s.insert({halide_type_t(halide_type_float, 32), i});
            s.insert({halide_type_t(halide_type_float, 64), i});
            s.insert({halide_type_t(halide_type_uint, 8), i});
            s.insert({halide_type_t(halide_type_int, 8), i});
            s.insert({halide_type_t(halide_type_uint, 16), i});
            s.insert({halide_type_t(halide_type_int, 16), i});
            s.insert({halide_type_t(halide_type_uint, 32), i});
            s.insert({halide_type_t(halide_type_int, 32), i});
            s.insert({halide_type_t(halide_type_uint, 64), i});
            s.insert({halide_type_t(halide_type_int, 64), i});
        }
        return s;
    }();
    return info;
}

template<typename ImageType, CheckFunc check = CheckReturn>
bool save_mat(ImageType &im, const std::string &filename) {
    static_assert(!ImageType::has_static_halide_type, "");

    if (!check(im.copy_to_host() == halide_error_code_success, "copy_to_host() failed.")) {
        return false;
    }

    uint32_t class_code = 0, type_code = 0;
    switch (im.raw_buffer()->type.code) {
    case halide_type_int:
        switch (im.raw_buffer()->type.bits) {
        case 8:
            class_code = mxINT8_CLASS;
            type_code = miINT8;
            break;
        case 16:
            class_code = mxINT16_CLASS;
            type_code = miINT16;
            break;
        case 32:
            class_code = mxINT32_CLASS;
            type_code = miINT32;
            break;
        case 64:
            class_code = mxINT64_CLASS;
            type_code = miINT64;
            break;
        default:
            check(false, "unreachable");
        };
        break;
    case halide_type_uint:
        switch (im.raw_buffer()->type.bits) {
        case 8:
            class_code = mxUINT8_CLASS;
            type_code = miUINT8;
            break;
        case 16:
            class_code = mxUINT16_CLASS;
            type_code = miUINT16;
            break;
        case 32:
            class_code = mxUINT32_CLASS;
            type_code = miUINT32;
            break;
        case 64:
            class_code = mxUINT64_CLASS;
            type_code = miUINT64;
            break;
        default:
            check(false, "unreachable");
        };
        break;
    case halide_type_float:
        switch (im.raw_buffer()->type.bits) {
        case 16:
            check(false, "float16 not supported by .mat");
            break;
        case 32:
            class_code = mxSINGLE_CLASS;
            type_code = miSINGLE;
            break;
        case 64:
            class_code = mxDOUBLE_CLASS;
            type_code = miDOUBLE;
            break;
        default:
            check(false, "unreachable");
        };
        break;
    case halide_type_bfloat:
        check(false, "bfloat not supported by .mat");
        break;
    default:
        check(false, "unreachable");
    }

    FileOpener f(filename, "wb");
    if (!check(f.f != nullptr, "File could not be opened for writing")) {
        return false;
    }

    // Pick a name for the array
    size_t idx = filename.rfind('.');
    std::string name = filename.substr(0, idx);
    idx = filename.rfind('/');
    if (idx != std::string::npos) {
        name = name.substr(idx + 1);
    }

    // Matlab variable names conform to similar rules as C
    if (name.empty() || !std::isalpha(name[0])) {
        name = "v" + name;
    }
    for (char &c : name) {
        if (!std::isalnum(c)) {
            c = '_';
        }
    }

    uint32_t name_size = (int)name.size();
    while (name.size() & 0x7) {
        name += '\0';
    }

    char header[128] = "MATLAB 5.0 MAT-file, produced by Halide";
    int len = strlen(header);
    memset(header + len, ' ', sizeof(header) - len);

    // Version
    *((uint16_t *)(header + 124)) = 0x0100;

    // Endianness check
    header[126] = 'I';
    header[127] = 'M';

    uint64_t payload_bytes = im.size_in_bytes();

    if (!check((payload_bytes >> 32) == 0, "Buffer too large to save as .mat")) {
        return false;
    }

    int dims = im.dimensions();
    if (dims < 2) {
        dims = 2;
    }
    int padded_dims = dims + (dims & 1);

    uint32_t padding_bytes = 7 - ((payload_bytes - 1) & 7);

    // Matrix header
    uint32_t matrix_header[2] = {
        miMATRIX, 40 + padded_dims * 4 + (uint32_t)name.size() + (uint32_t)payload_bytes + padding_bytes};

    // Array flags
    uint32_t flags[4] = {
        miUINT32, 8, class_code, 1};

    // Shape
    int32_t shape[2] = {
        miINT32,
        im.dimensions() * 4,
    };
    std::vector<int> extents(im.dimensions());
    for (int d = 0; d < im.dimensions(); d++) {
        extents[d] = im.dim(d).extent();
    }
    while ((int)extents.size() < dims) {
        extents.push_back(1);
    }
    while ((int)extents.size() < padded_dims) {
        extents.push_back(0);
    }

    // Name
    uint32_t name_header[2] = {
        miINT8, name_size};

    // Payload header
    uint32_t payload_header[2] = {
        type_code, (uint32_t)payload_bytes};

    bool success =
        f.write_array(header) &&
        f.write_array(matrix_header) &&
        f.write_array(flags) &&
        f.write_array(shape) &&
        f.write_vector(extents) &&
        f.write_array(name_header) &&
        f.write_bytes(&name[0], name.size()) &&
        f.write_array(payload_header);

    if (!check(success, "Could not write .mat header")) {
        return false;
    }

    if (!write_planar_payload<ImageType, check>(im, f)) {
        return false;
    }

    // Padding
    if (!check(padding_bytes < 8, "Too much padding!\n")) {
        return false;
    }
    uint64_t padding = 0;
    if (!f.write_bytes(&padding, padding_bytes)) {
        return false;
    }

    return true;
}

template<typename ImageType, Internal::CheckFunc check = Internal::CheckReturn>
bool load_tiff(const std::string &filename, ImageType *im) {
    static_assert(!ImageType::has_static_halide_type, "");
    check(false, "Reading TIFF is not yet supported");
    return false;
}

inline const std::set<FormatInfo> &query_tiff() {
    auto build_set = []() -> std::set<FormatInfo> {
        std::set<FormatInfo> s;
        for (halide_type_code_t code : {halide_type_int, halide_type_uint, halide_type_float}) {
            for (int bits : {8, 16, 32, 64}) {
                for (int dims : {1, 2, 3, 4}) {
                    if (code == halide_type_float && bits < 32) {
                        continue;
                    }
                    s.insert({halide_type_t(code, bits), dims});
                }
            }
        }
        return s;
    };

    static std::set<FormatInfo> info = build_set();
    return info;
}

#pragma pack(push)
#pragma pack(2)

struct halide_tiff_tag {
    uint16_t tag_code;
    int16_t type_code;
    int32_t count;
    union {
        int8_t i8;
        int16_t i16;
        int32_t i32;
    } value;

    void assign16(uint16_t tag_code, int32_t count, int16_t value) {
        this->tag_code = tag_code;
        this->type_code = 3;  // SHORT
        this->count = count;
        this->value.i16 = value;
    }

    void assign32(uint16_t tag_code, int32_t count, int32_t value) {
        this->tag_code = tag_code;
        this->type_code = 4;  // LONG
        this->count = count;
        this->value.i32 = value;
    }

    void assign32(uint16_t tag_code, int16_t type_code, int32_t count, int32_t value) {
        this->tag_code = tag_code;
        this->type_code = type_code;
        this->count = count;
        this->value.i32 = value;
    }
};

struct halide_tiff_header {
    int16_t byte_order_marker;
    int16_t version;
    int32_t ifd0_offset;
    int16_t entry_count;
    halide_tiff_tag entries[15];
    int32_t ifd0_end;
    int32_t width_resolution[2];
    int32_t height_resolution[2];
};

#pragma pack(pop)

template<typename ElemType, int BUFFER_SIZE = 1024>
struct ElemWriter {
    ElemWriter(FileOpener *f)
        : f(f), next(&buf[0]) {
    }
    ~ElemWriter() {
        flush();
    }

    void operator()(const ElemType &elem) {
        if (!ok) {
            return;
        }

        *next++ = elem;
        if (next == &buf[BUFFER_SIZE]) {
            flush();
        }
    }

    void flush() {
        if (!ok) {
            return;
        }

        if (next > buf) {
            if (!f->write_bytes(buf, (next - buf) * sizeof(ElemType))) {
                ok = false;
            }
            next = buf;
        }
    }

    FileOpener *const f;
    ElemType buf[BUFFER_SIZE];
    ElemType *next;
    bool ok = true;
};

// Note that this is a fairly simpleminded TIFF writer that doesn't
// do any compression. It would be desirable to (optionally) support using libtiff
// here instead, which would also allow us to provide a useful implementation
// for TIFF reading.
template<typename ImageType, Internal::CheckFunc check = Internal::CheckReturn>
bool save_tiff(ImageType &im, const std::string &filename) {
    static_assert(!ImageType::has_static_halide_type, "");

    if (!check(im.copy_to_host() == halide_error_code_success, "copy_to_host() failed.")) {
        return false;
    }

    if (!check(im.dimensions() <= 4, "Can only save TIFF files with <= 4 dimensions")) {
        return false;
    }

    FileOpener f(filename, "wb");
    if (!check(f.f != nullptr, "File could not be opened for writing")) {
        return false;
    }

    const size_t elements = im.number_of_elements();
    halide_dimension_t shape[4];
    for (int i = 0; i < im.dimensions() && i < 4; i++) {
        const auto &d = im.dim(i);
        shape[i].min = d.min();
        shape[i].extent = d.extent();
        shape[i].stride = d.stride();
    }
    for (int i = im.dimensions(); i < 4; i++) {
        shape[i].min = 0;
        shape[i].extent = 1;
        shape[i].stride = 0;
    }
    const halide_type_t im_type = im.type();
    if (!check(im_type.code >= 0 && im_type.code < 3, "Unsupported image type")) {
        return false;
    }
    const int32_t bytes_per_element = im_type.bytes();
    const int32_t width = shape[0].extent;
    const int32_t height = shape[1].extent;
    int32_t depth = shape[2].extent;
    int32_t channels = shape[3].extent;

    if ((channels == 0 || channels == 1) && (depth < 5)) {
        channels = depth;
        depth = 1;
    }

    // TIFF sample type values are:
    //     0 => Signed int
    //     1 => Unsigned int
    //     2 => Floating-point
    static const int16_t type_code_to_tiff_sample_type[] = {
        2, 1, 3};

    struct halide_tiff_header header;
    memset(&header, 0, sizeof(header));

    const int32_t MMII = 0x4d4d4949;
    // Select the appropriate two bytes signaling byte order automatically
    const char *c = (const char *)&MMII;
    header.byte_order_marker = (c[0] << 8) | c[1];
    header.version = 42;
    header.ifd0_offset = offsetof(halide_tiff_header, entry_count);
    header.entry_count = sizeof(header.entries) / sizeof(header.entries[0]);

    static_assert(sizeof(halide_tiff_tag) == 12, "Unexpected halide_tiff_tag packing");
    halide_tiff_tag *tag = &header.entries[0];
    tag++->assign32(256, 1, width);                           // ImageWidth
    tag++->assign32(257, 1, height);                          // ImageLength
    tag++->assign16(258, 1, int16_t(bytes_per_element * 8));  // BitsPerSample
    tag++->assign16(259, 1, 1);                               // Compression -- none
    tag++->assign16(262, 1, channels >= 3 ? 2 : 1);           // PhotometricInterpretation -- black is zero or RGB
    tag++->assign32(273, channels, sizeof(header));           // StripOffsets
    tag++->assign16(277, 1, int16_t(channels));               // SamplesPerPixel
    tag++->assign32(278, 1, height);                          // RowsPerStrip
    tag++->assign32(279, channels,                            // StripByteCounts
                    (channels == 1) ?
                        elements * bytes_per_element :
                        sizeof(header) + channels * sizeof(int32_t));  // for channels > 1, this is an offset
    tag++->assign32(282, 5, 1,
                    offsetof(halide_tiff_header, width_resolution));  // XResolution
    tag++->assign32(283, 5, 1,
                    offsetof(halide_tiff_header, height_resolution));      // YResolution
    tag++->assign16(284, 1, channels == 1 ? 1 : 2);                        // PlanarConfiguration -- contig or planar
    tag++->assign16(296, 1, 1);                                            // ResolutionUnit -- none
    tag++->assign16(339, 1, type_code_to_tiff_sample_type[im_type.code]);  // SampleFormat
    tag++->assign32(32997, 1, depth);                                      // Image depth

    // Verify we used exactly the number we declared
    assert(tag == &header.entries[header.entry_count]);

    header.ifd0_end = 0;
    header.width_resolution[0] = 1;
    header.width_resolution[1] = 1;
    header.height_resolution[0] = 1;
    header.height_resolution[1] = 1;

    if (!check(f.write_bytes(&header, sizeof(header)), "TIFF write failed")) {
        return false;
    }

    if (channels > 1) {
        // Fill in the values for StripOffsets
        int32_t offset = sizeof(header) + channels * sizeof(int32_t) * 2;
        for (int32_t i = 0; i < channels; i++) {
            if (!check(f.write_bytes(&offset, sizeof(offset)), "TIFF write failed")) {
                return false;
            }
            offset += width * height * depth * bytes_per_element;
        }
        // Fill in the values for StripByteCounts
        int32_t count = width * height * depth * bytes_per_element;
        for (int32_t i = 0; i < channels; i++) {
            if (!check(f.write_bytes(&count, sizeof(count)), "TIFF write failed")) {
                return false;
            }
        }
    }

    // If image is dense, we can write it in one fell swoop
    if (elements * bytes_per_element == im.size_in_bytes()) {
        if (!check(f.write_bytes(im.data(), im.size_in_bytes()), "TIFF write failed")) {
            return false;
        }
        return true;
    }

    // Otherwise, write it out via manual traversal.
#define HANDLE_CASE(CODE, BITS, TYPE)                             \
    case halide_type_t(CODE, BITS).as_u32(): {                    \
        ElemWriter<TYPE> ew(&f);                                  \
        im.template as<const TYPE, AnyDims>().for_each_value(ew); \
        if (!check(ew.ok, "TIFF write failed")) {                 \
            return false;                                         \
        }                                                         \
        break;                                                    \
    }

    switch (im_type.element_of().as_u32()) {
        HANDLE_CASE(halide_type_float, 32, float)
        HANDLE_CASE(halide_type_float, 64, double)
        HANDLE_CASE(halide_type_int, 8, int8_t)
        HANDLE_CASE(halide_type_int, 16, int16_t)
        HANDLE_CASE(halide_type_int, 32, int32_t)
        HANDLE_CASE(halide_type_int, 64, int64_t)
        HANDLE_CASE(halide_type_uint, 1, bool)
        HANDLE_CASE(halide_type_uint, 8, uint8_t)
        HANDLE_CASE(halide_type_uint, 16, uint16_t)
        HANDLE_CASE(halide_type_uint, 32, uint32_t)
        HANDLE_CASE(halide_type_uint, 64, uint64_t)
    // Note that we don't attempt to handle halide_type_handle here.
    default:
        assert(false && "Unsupported type");
        return false;
    }
#undef HANDLE_CASE

    return true;
}

// Given something like ImageType<Foo, 2>, produce typedef ImageType<Foo, AnyDims>
template<typename ImageType>
struct ImageTypeWithDynamicDims {
    using type = decltype(std::declval<ImageType>().template as<typename ImageType::ElemType, AnyDims>());
};

// Given something like ImageType<Foo>, produce typedef ImageType<Bar, AnyDims>
template<typename ImageType, typename ElemType>
struct ImageTypeWithElemType {
    using type = decltype(std::declval<ImageType>().template as<ElemType, AnyDims>());
};

// Given something like ImageType<Foo>, produce typedef ImageType<const Bar, AnyDims>
template<typename ImageType, typename ElemType>
struct ImageTypeWithConstElemType {
    using type = decltype(std::declval<ImageType>().template as<typename std::add_const<ElemType>::type, AnyDims>());
};

template<typename ImageType, Internal::CheckFunc check>
struct ImageIO {
    using ConstImageType = typename ImageTypeWithConstElemType<ImageType, typename ImageType::ElemType>::type;

    std::function<bool(const std::string &, ImageType *)> load;
    std::function<bool(ConstImageType &im, const std::string &)> save;
    std::function<const std::set<FormatInfo> &()> query;
};

template<typename ImageType, Internal::CheckFunc check>
bool find_imageio(const std::string &filename, ImageIO<ImageType, check> *result) {
    static_assert(!ImageType::has_static_halide_type, "");
    using ConstImageType = typename ImageTypeWithConstElemType<ImageType, typename ImageType::ElemType>::type;

    const std::map<std::string, ImageIO<ImageType, check>> m = {
#ifndef HALIDE_NO_JPEG
        {"jpeg", {load_jpg<ImageType, check>, save_jpg<ConstImageType, check>, query_jpg}},
        {"jpg", {load_jpg<ImageType, check>, save_jpg<ConstImageType, check>, query_jpg}},
#endif
        {"npy", {load_npy<ImageType, check>, save_npy<ConstImageType, check>, query_npy}},
        {"pgm", {load_pgm<ImageType, check>, save_pgm<ConstImageType, check>, query_pgm}},
#ifndef HALIDE_NO_PNG
        {"png", {load_png<ImageType, check>, save_png<ConstImageType, check>, query_png}},
#endif
        {"ppm", {load_ppm<ImageType, check>, save_ppm<ConstImageType, check>, query_ppm}},
        {"tmp", {load_tmp<ImageType, check>, save_tmp<ConstImageType, check>, query_tmp}},
        {"mat", {load_mat<ImageType, check>, save_mat<ConstImageType, check>, query_mat}},
        {"tiff", {load_tiff<ImageType, check>, save_tiff<ConstImageType, check>, query_tiff}},
    };
    std::string ext = Internal::get_lowercase_extension(filename);
    auto it = m.find(ext);
    if (it != m.end()) {
        *result = it->second;
        return true;
    }

    std::string err = "unsupported file extension \"" + ext + "\", supported are:";
    for (auto &it : m) {
        err += " " + it.first;
    }
    err += "\n";
    return check(false, err.c_str());
}

template<typename ImageType>
FormatInfo best_save_format(const ImageType &im, const std::set<FormatInfo> &info) {
    // A bit ad hoc, but will do for now:
    // Perfect score is zero (exact match).
    // The larger the score, the worse the match.
    int best_score = 0x7fffffff;
    FormatInfo best{};
    const halide_type_t im_type = im.type();
    const int im_dimensions = im.dimensions();
    for (const auto &f : info) {
        int score = 0;
        // If format has too-few dimensions, that's very bad.
        score += std::max(0, im_dimensions - f.dimensions) * 1024;
        // If format has too-few bits, that's pretty bad.
        score += std::max(0, im_type.bits - f.type.bits) * 8;
        // If format has too-many bits, that's a little bad.
        score += std::max(0, f.type.bits - im_type.bits);
        // If format has different code, that's a little bad.
        score += (f.type.code != im_type.code) ? 1 : 0;
        if (score < best_score) {
            best_score = score;
            best = f;
        }
    }

    return best;
}

}  // namespace Internal

struct ImageTypeConversion {
    // Convert an Image from one ElemType to another, where the src and
    // dst types are statically known (e.g. Buffer<uint8_t> -> Buffer<float>).
    // Note that this does conversion with scaling -- intepreting integers
    // as fixed-point numbers between 0 and 1 -- not merely C-style casting.
    //
    // You'd normally call this with an explicit type for DstElemType and
    // allow ImageType to be inferred, e.g.
    //     Buffer<uint8_t> src = ...;
    //     Buffer<float> dst = convert_image<float>(src);
    template<typename DstElemType, typename ImageType,
             typename std::enable_if<ImageType::has_static_halide_type && !std::is_void<DstElemType>::value>::type * = nullptr>
    static auto convert_image(const ImageType &src) ->
        typename Internal::ImageTypeWithElemType<ImageType, DstElemType>::type {
        // The enable_if ensures this will never fire; this is here primarily
        // as documentation and a backstop against breakage.
        static_assert(ImageType::has_static_halide_type,
                      "This variant of convert_image() requires a statically-typed image");

        using SrcImageType = ImageType;
        using SrcElemType = typename SrcImageType::ElemType;

        using DstImageType = typename Internal::ImageTypeWithElemType<ImageType, DstElemType>::type;

        DstImageType dst = DstImageType::make_with_shape_of(src);
        const auto converter = [](DstElemType &dst_elem, SrcElemType src_elem) {
            dst_elem = Internal::convert<DstElemType>(src_elem);
        };
        dst.for_each_value(converter, src);
        dst.set_host_dirty();

        return dst;
    }

    // Convert an Image from one ElemType to another, where the dst type is statically
    // known but the src type is not (e.g. Buffer<> -> Buffer<float>).
    // You'd normally call this with an explicit type for DstElemType and
    // allow ImageType to be inferred, e.g.
    //     Buffer<uint8_t> src = ...;
    //     Buffer<float> dst = convert_image<float>(src);
    template<typename DstElemType, typename ImageType,
             typename std::enable_if<!ImageType::has_static_halide_type && !std::is_void<DstElemType>::value>::type * = nullptr>
    static auto convert_image(const ImageType &src) ->
        typename Internal::ImageTypeWithElemType<ImageType, DstElemType>::type {
        // The enable_if ensures this will never fire; this is here primarily
        // as documentation and a backstop against breakage.
        static_assert(!ImageType::has_static_halide_type,
                      "This variant of convert_image() requires a dynamically-typed image");
        constexpr int AnyDims = Internal::AnyDims;

        const halide_type_t src_type = src.type();
        switch (src_type.element_of().as_u32()) {
#ifdef HALIDE_CPP_COMPILER_HAS_FLOAT16
        case halide_type_t(halide_type_float, 16).as_u32():
            return convert_image<DstElemType>(src.template as<_Float16, AnyDims>());
#endif
        case halide_type_t(halide_type_float, 32).as_u32():
            return convert_image<DstElemType>(src.template as<float, AnyDims>());
        case halide_type_t(halide_type_float, 64).as_u32():
            return convert_image<DstElemType>(src.template as<double, AnyDims>());
        case halide_type_t(halide_type_int, 8).as_u32():
            return convert_image<DstElemType>(src.template as<int8_t, AnyDims>());
        case halide_type_t(halide_type_int, 16).as_u32():
            return convert_image<DstElemType>(src.template as<int16_t, AnyDims>());
        case halide_type_t(halide_type_int, 32).as_u32():
            return convert_image<DstElemType>(src.template as<int32_t, AnyDims>());
        case halide_type_t(halide_type_int, 64).as_u32():
            return convert_image<DstElemType>(src.template as<int64_t, AnyDims>());
        case halide_type_t(halide_type_uint, 1).as_u32():
            return convert_image<DstElemType>(src.template as<bool, AnyDims>());
        case halide_type_t(halide_type_uint, 8).as_u32():
            return convert_image<DstElemType>(src.template as<uint8_t, AnyDims>());
        case halide_type_t(halide_type_uint, 16).as_u32():
            return convert_image<DstElemType>(src.template as<uint16_t, AnyDims>());
        case halide_type_t(halide_type_uint, 32).as_u32():
            return convert_image<DstElemType>(src.template as<uint32_t, AnyDims>());
        case halide_type_t(halide_type_uint, 64).as_u32():
            return convert_image<DstElemType>(src.template as<uint64_t, AnyDims>());
        default:
            assert(false && "Unsupported type");
            using DstImageType = typename Internal::ImageTypeWithElemType<ImageType, DstElemType>::type;
            return DstImageType();
        }
    }

    // Convert an Image from one ElemType to another, where the src type
    // is statically known but the dst type is not
    // (e.g. Buffer<uint8_t> -> Buffer<>(halide_type_t)).
    template<typename DstElemType = void,
             typename ImageType,
             typename std::enable_if<ImageType::has_static_halide_type && std::is_void<DstElemType>::value>::type * = nullptr>
    static auto convert_image(const ImageType &src, const halide_type_t &dst_type) ->
        typename Internal::ImageTypeWithElemType<ImageType, void>::type {
        // The enable_if ensures this will never fire; this is here primarily
        // as documentation and a backstop against breakage.
        static_assert(ImageType::has_static_halide_type,
                      "This variant of convert_image() requires a statically-typed image");

        // Call the appropriate static-to-static conversion routine
        // based on the desired dst type.
        switch (dst_type.element_of().as_u32()) {
#ifdef HALIDE_CPP_COMPILER_HAS_FLOAT16
        case halide_type_t(halide_type_float, 16).as_u32():
            return convert_image<_Float16>(src);
#endif
        case halide_type_t(halide_type_float, 32).as_u32():
            return convert_image<float>(src);
        case halide_type_t(halide_type_float, 64).as_u32():
            return convert_image<double>(src);
        case halide_type_t(halide_type_int, 8).as_u32():
            return convert_image<int8_t>(src);
        case halide_type_t(halide_type_int, 16).as_u32():
            return convert_image<int16_t>(src);
        case halide_type_t(halide_type_int, 32).as_u32():
            return convert_image<int32_t>(src);
        case halide_type_t(halide_type_int, 64).as_u32():
            return convert_image<int64_t>(src);
        case halide_type_t(halide_type_uint, 1).as_u32():
            return convert_image<bool>(src);
        case halide_type_t(halide_type_uint, 8).as_u32():
            return convert_image<uint8_t>(src);
        case halide_type_t(halide_type_uint, 16).as_u32():
            return convert_image<uint16_t>(src);
        case halide_type_t(halide_type_uint, 32).as_u32():
            return convert_image<uint32_t>(src);
        case halide_type_t(halide_type_uint, 64).as_u32():
            return convert_image<uint64_t>(src);
        default:
            assert(false && "Unsupported type");
            using RetImageType = typename Internal::ImageTypeWithDynamicDims<ImageType>::type;
            return RetImageType();
        }
    }

    // Convert an Image from one ElemType to another, where neither src type
    // nor dst type are statically known
    // (e.g. Buffer<>(halide_type_t) -> Buffer<>(halide_type_t)).
    template<typename DstElemType = void,
             typename ImageType,
             typename std::enable_if<!ImageType::has_static_halide_type && std::is_void<DstElemType>::value>::type * = nullptr>
    static auto convert_image(const ImageType &src, const halide_type_t &dst_type) ->
        typename Internal::ImageTypeWithElemType<ImageType, void>::type {
        // The enable_if ensures this will never fire; this is here primarily
        // as documentation and a backstop against breakage.
        static_assert(!ImageType::has_static_halide_type,
                      "This variant of convert_image() requires a dynamically-typed image");
        constexpr int AnyDims = Internal::AnyDims;

        // Sniff the runtime type of src, coerce it to that type using as<>(),
        // and call the static-to-dynamic variant of this method. (Note that
        // this forces instantiation of the complete any-to-any conversion
        // matrix of code.)
        const halide_type_t src_type = src.type();
        switch (src_type.element_of().as_u32()) {
        case halide_type_t(halide_type_float, 32).as_u32():
            return convert_image(src.template as<float, AnyDims>(), dst_type);
        case halide_type_t(halide_type_float, 64).as_u32():
            return convert_image(src.template as<double, AnyDims>(), dst_type);
        case halide_type_t(halide_type_int, 8).as_u32():
            return convert_image(src.template as<int8_t, AnyDims>(), dst_type);
        case halide_type_t(halide_type_int, 16).as_u32():
            return convert_image(src.template as<int16_t, AnyDims>(), dst_type);
        case halide_type_t(halide_type_int, 32).as_u32():
            return convert_image(src.template as<int32_t, AnyDims>(), dst_type);
        case halide_type_t(halide_type_int, 64).as_u32():
            return convert_image(src.template as<int64_t, AnyDims>(), dst_type);
        case halide_type_t(halide_type_uint, 1).as_u32():
            return convert_image(src.template as<bool, AnyDims>(), dst_type);
        case halide_type_t(halide_type_uint, 8).as_u32():
            return convert_image(src.template as<uint8_t, AnyDims>(), dst_type);
        case halide_type_t(halide_type_uint, 16).as_u32():
            return convert_image(src.template as<uint16_t, AnyDims>(), dst_type);
        case halide_type_t(halide_type_uint, 32).as_u32():
            return convert_image(src.template as<uint32_t, AnyDims>(), dst_type);
        case halide_type_t(halide_type_uint, 64).as_u32():
            return convert_image(src.template as<uint64_t, AnyDims>(), dst_type);
        default:
            assert(false && "Unsupported type");
            using RetImageType = typename Internal::ImageTypeWithDynamicDims<ImageType>::type;
            return RetImageType();
        }
    }
};

// Load the Image from the given file.
// If output Image has a static type, and the loaded image cannot be stored
// in such an image without losing data, fail.
// Returns false upon failure.
template<typename ImageType, Internal::CheckFunc check = Internal::CheckReturn>
bool load(const std::string &filename, ImageType *im) {
    using DynamicImageType = typename Internal::ImageTypeWithElemType<ImageType, void>::type;
    Internal::ImageIO<DynamicImageType, check> imageio;
    if (!Internal::find_imageio<DynamicImageType, check>(filename, &imageio)) {
        return false;
    }
    using DynamicImageType = typename Internal::ImageTypeWithElemType<ImageType, void>::type;
    DynamicImageType im_d;
    if (!imageio.load(filename, &im_d)) {
        return false;
    }
    // Allow statically-typed images to be passed as the out-param, but do
    // a runtime check to ensure
    if (ImageType::has_static_halide_type) {
        const halide_type_t expected_type = ImageType::static_halide_type();
        if (!check(im_d.type() == expected_type, "Image loaded did not match the expected type")) {
            return false;
        }
    }
    *im = im_d.template as<typename ImageType::ElemType, Internal::AnyDims>();
    im->set_host_dirty();
    return true;
}

// Save the Image in the format associated with the filename's extension.
// If the format can't represent the Image without losing data, fail.
// Returns false upon failure.
template<typename ImageType, Internal::CheckFunc check = Internal::CheckReturn>
bool save(ImageType &im, const std::string &filename) {
    using DynamicImageType = typename Internal::ImageTypeWithElemType<ImageType, void>::type;
    Internal::ImageIO<DynamicImageType, check> imageio;
    if (!Internal::find_imageio<DynamicImageType, check>(filename, &imageio)) {
        return false;
    }
    if (!check(imageio.query().count({im.type(), im.dimensions()}) > 0, "Image cannot be saved in this format")) {
        return false;
    }

    // Allow statically-typed images to be passed in, but quietly pass them on
    // as dynamically-typed images.
    auto im_d = im.template as<const void, Internal::AnyDims>();
    return imageio.save(im_d, filename);
}

// Return a set of FormatInfo structs that contain the legal type-and-dimensions
// that can be saved in this format. Most applications won't ever need to use
// this call. Returns false upon failure.
template<typename ImageType, Internal::CheckFunc check = Internal::CheckReturn>
bool save_query(const std::string &filename, std::set<FormatInfo> *info) {
    using DynamicImageType = typename Internal::ImageTypeWithElemType<ImageType, void>::type;
    Internal::ImageIO<DynamicImageType, check> imageio;
    if (!Internal::find_imageio<DynamicImageType, check>(filename, &imageio)) {
        return false;
    }
    *info = imageio.query();
    return true;
}

// Fancy wrapper to call load() with CheckFail, inferring the return type;
// this allows you to simply use
//
//    Image im = load_image("filename");
//
// without bothering to check error results (all errors simply abort).
//
// Note that if the image being loaded doesn't match the static type and
// dimensions of of the image on the LHS, a runtime error will occur.
class load_image {
public:
    load_image(const std::string &f)
        : filename(f) {
    }

    template<typename ImageType>
    operator ImageType() {
        using DynamicImageType = typename Internal::ImageTypeWithElemType<ImageType, void>::type;
        DynamicImageType im_d;
        Internal::CheckFail(load<DynamicImageType, Internal::CheckFail>(filename, &im_d), "load() failed");
        Internal::CheckFail(ImageType::can_convert_from(im_d),
                            "Type mismatch assigning the result of load_image. "
                            "Did you mean to use load_and_convert_image?");
        return im_d.template as<typename ImageType::ElemType, Internal::AnyDims>();
    }

private:
    const std::string filename;
};

// Like load_image, but quietly convert the loaded image to the type of the LHS
// if necessary, discarding information if necessary.
class load_and_convert_image {
public:
    load_and_convert_image(const std::string &f)
        : filename(f) {
    }

    template<typename ImageType>
    inline operator ImageType() {
        using DynamicImageType = typename Internal::ImageTypeWithElemType<ImageType, void>::type;
        DynamicImageType im_d;
        Internal::CheckFail(load<DynamicImageType, Internal::CheckFail>(filename, &im_d), "load() failed");
        const halide_type_t expected_type = ImageType::static_halide_type();
        if (im_d.type() == expected_type) {
            return im_d.template as<typename ImageType::ElemType, Internal::AnyDims>();
        } else {
            return ImageTypeConversion::convert_image<typename ImageType::ElemType>(im_d);
        }
    }

private:
    const std::string filename;
};

// Fancy wrapper to call save() with CheckFail; this allows you to simply use
//
//    save_image(im, "filename");
//
// without bothering to check error results (all errors simply abort).
//
// If the specified image file format cannot represent the image without
// losing data (e.g, a float32 or 4-dimensional image saved as a JPEG),
// a runtime error will occur.
template<typename ImageType, Internal::CheckFunc check = Internal::CheckFail>
void save_image(ImageType &im, const std::string &filename) {
    auto im_d = im.template as<const void, Internal::AnyDims>();
    (void)save<decltype(im_d), check>(im_d, filename);
}

// Like save_image, but quietly convert the saved image to a type that the
// specified image file format can hold, discarding information if necessary.
// (Note that the input image is unaffected!)
template<typename ImageType, Internal::CheckFunc check = Internal::CheckFail>
void convert_and_save_image(ImageType &im, const std::string &filename) {
    // We'll be doing any conversion on the CPU
    if (!check(im.copy_to_host() == halide_error_code_success, "copy_to_host() failed.")) {
        return;
    }

    std::set<FormatInfo> info;
    (void)save_query<typename Internal::ImageTypeWithDynamicDims<ImageType>::type, check>(filename, &info);
    const FormatInfo best = Internal::best_save_format(im, info);
    if (best.type == im.type() && best.dimensions == im.dimensions()) {
        // It's an exact match, we can save as-is.
        using DynamicImageDims = typename Internal::ImageTypeWithDynamicDims<ImageType>::type;
        (void)save<DynamicImageDims, check>(im.template as<typename ImageType::ElemType, Internal::AnyDims>(), filename);
    } else {
        using DynamicImageType = typename Internal::ImageTypeWithElemType<ImageType, void>::type;
        DynamicImageType im_converted = ImageTypeConversion::convert_image(im, best.type);
        while (im_converted.dimensions() < best.dimensions) {
            im_converted.add_dimension();
        }
        (void)save<DynamicImageType, check>(im_converted, filename);
    }
}

}  // namespace Tools
}  // namespace Halide

#endif  // HALIDE_IMAGE_IO_H