File: ReadWrite2DTable.cpp

package info (click to toggle)
bornagain 23.0-4
links: PTS, VCS
area: main
in suites: forky, sid
size: 103,936 kB
sloc: cpp: 423,131; python: 40,997; javascript: 11,167; awk: 630; sh: 318; ruby: 173; xml: 130; makefile: 51; ansic: 24
file content (253 lines) | stat: -rw-r--r-- 8,250 bytes
//  ************************************************************************************************
//
//  BornAgain: simulate and fit reflection and scattering
//
//! @file      Device/IO/ReadWrite2DTable.cpp
//! @brief     Implements functions read|writeNumpyTxt.
//!
//! @homepage  http://www.bornagainproject.org
//! @license   GNU General Public License v3 or higher (see COPYING)
//! @copyright Forschungszentrum Jülich GmbH 2018
//! @authors   Scientific Computing Group at MLZ (see CITATION, AUTHORS)
//
//  ************************************************************************************************

#include "Device/IO/ReadWrite2DTable.h"
#include "Base/Axis/MakeScale.h"
#include "Base/Axis/Scale.h"
#include "Base/Math/Numeric.h"
#include "Base/Util/Assert.h"
#include "Base/Util/StringUtil.h"
#include "Device/Data/DataUtil.h"
#include "Device/Data/Datafield.h"
#include "Device/IO/ImportSettings.h"
#include <algorithm>
#include <iomanip>
#include <numbers>
#include <string>
#include <vector>

using std::numbers::pi;

namespace {

bool isDoubleStartChar(char c)
{
    return isdigit(c) || c == '-' || c == '+';
}

void write1DRepresentation(const Datafield& data, std::ostream& output_stream)
{
    output_stream << "# coordinates         intensities" << std::endl;
    output_stream.imbue(std::locale::classic());
    output_stream << std::scientific << std::setprecision(12);

    const std::vector<double> axis_values = data.axis(0).binCenters();

    // printing coordinate and associated intensity
    for (size_t i = 0, nrows = axis_values.size(); i < nrows; ++i)
        output_stream << axis_values[i] << "    " << Numeric::ignoreDenormalized(data[i])
                      << std::endl;
}

void write2DRepresentation(const Datafield& data, std::ostream& output_stream)
{
    const size_t nrows = data.axis(1).size();
    const size_t ncols = data.axis(0).size();

    output_stream << "# [nrows=" << nrows << ", ncols=" << ncols << "]" << std::endl;

    double2d_t dataArray = data.values2D();
    output_stream.imbue(std::locale::classic());
    output_stream << std::scientific << std::setprecision(12);

    // write in row-major order, especially with NumPy in mind
    for (size_t i = 0; i < nrows; i++) {
        for (size_t j = 0; j < ncols; j++) {
            double z_value = dataArray[i][j];
            output_stream << Numeric::ignoreDenormalized(z_value) << "    ";
        }
        output_stream << std::endl;
    }
}

const std::vector<std::string> to_replace = {",", "\"", "(", ")", "[", "]"};

bool getNextLine(std::istream& input_stream, std::string& line)
{
    while (std::getline(input_stream, line)) {
        line = Base::String::trim(line);
        Base::String::replaceItemsFromString(line, to_replace, " ");
        if (!line.empty() && isDoubleStartChar(line[0]))
            return true;
    }
    return false;
}

double2d_t parseFile(std::istream& input_stream)
{
    std::string line;
    double2d_t data;

    // Read numbers from input stream:
    size_t nrows = 0;
    size_t ncols = 0;
    while (getNextLine(input_stream, line)) {
        std::vector<double> tmp = Base::String::parse_doubles(line);
        if (nrows == 0)
            ncols = tmp.size();
        else if (tmp.size() != ncols)
            throw std::runtime_error("Number of elements is not the same for all rows");
        data.push_back(tmp);
        ++nrows;
    }
    return data;
}

Datafield readBareIntensity(std::istream& input_stream)
{
    double2d_t data = parseFile(input_stream);
    size_t nrows = data.size();
    size_t ncols = nrows ? data[0].size() : 0;

    if (nrows == 0 || ncols == 0)
        throw std::runtime_error("No data found in table");

    // Convert:
    if (nrows == 1) {
        std::vector<const Scale*> axes{newEquiDivision("u (bin)", ncols, 0.0, (double)ncols)};
        return {axes, data[0]};
    }
    if (ncols == 1) {
        std::vector<const Scale*> axes{newEquiDivision("v (bin)", nrows, 0.0, (double)nrows)};
        std::vector<double> vector1d(nrows);
        for (size_t i = 0; i < nrows; ++i)
            vector1d[i] = data[i][0];
        return {axes, vector1d};
    }

    std::vector<double> outvec(nrows * ncols);
    for (size_t row = 0; row < nrows; ++row)
        for (size_t col = 0; col < ncols; ++col)
            outvec[row * ncols + col] = data[row][col];

    return {{newEquiDivision("u (bin)", ncols, 0.0, (double)ncols),
             newEquiDivision("v (bin)", nrows, 0.0, (double)nrows)},
            outvec};
}

} // namespace

Datafield Util::RW::read2DTable(std::istream& input_stream, const ImportSettings2D* pars)
{
    if (!pars || !pars->has_axes)
        return readBareIntensity(input_stream);

    // read table with axes info
    double2d_t data = parseFile(input_stream);
    size_t nrows = data.size();
    size_t ncols = nrows ? data.front().size() : 0;

    if (nrows <= 1)
        throw std::runtime_error("Table should contain at least 2 rows");
    if (ncols <= 1)
        throw std::runtime_error("Table should contain at least 2 columns");

    // derive axes
    std::vector<double> q_row(data.front().size());
    std::vector<double> q_col(data.size());

    q_row = pars->first_row ? data.front() : data.back();
    for (size_t i = 0; i < nrows; i++)
        q_col[i] = pars->first_col ? data[i].front() : data[i].back();

    // remove redundant elements from axes and data
    if (pars->first_row) {
        q_col.erase(q_col.begin());
        data.erase(data.begin());
    } else {
        q_col.pop_back();
        data.pop_back();
    }
    if (pars->first_col) {
        q_row.erase(q_row.begin());
        for (auto& row : data)
            row.erase(row.begin());
    } else {
        q_row.pop_back();
        for (auto& row : data)
            row.pop_back();
    }

    // validate axes
    for (size_t i = 0; i < q_row.size() - 1; i++)
        if (q_row[i] == q_row[i + 1])
            throw std::runtime_error("Axis row contains repeated argument "
                                     + std::to_string(q_row[i])
                                     + "\nAll axes arguments must be unique");

    for (size_t i = 0; i < q_col.size() - 1; i++)
        if (q_col[i] == q_col[i + 1])
            throw std::runtime_error("Axis column contains repeated argument "
                                     + std::to_string(q_col[i])
                                     + "\nAll axes arguments must be unique");

    // sort axes
    if (q_col.front() > q_col.back()) {
        std::reverse(q_col.begin(), q_col.end());
        data = DataUtil::invertAxis(1, data);
    }
    if (q_row.front() > q_row.back()) {
        std::reverse(q_row.begin(), q_row.end());
        data = DataUtil::invertAxis(0, data);
    }

    // scale axes
    double fac = 1.;
    Coordinate xOutCoord = pars->xCoord;
    Coordinate yOutCoord = pars->yCoord;
    if (pars->xCoord.unit() == "1/angstrom") {
        fac = 10;
        xOutCoord = {pars->xCoord.name(), "1/nm"};
        yOutCoord = {pars->yCoord.name(), "1/nm"};
    } else if (pars->xCoord.unit() == "deg") {
        fac = pi / 180.;
        xOutCoord = {pars->xCoord.name(), "rad"};
        yOutCoord = {pars->yCoord.name(), "rad"};
    }
    for (auto& q : q_row)
        q *= fac;
    for (auto& q : q_col)
        q *= fac;

    // interpret axes
    std::vector<double> qy = pars->swap_axes ? q_col : q_row;
    std::vector<double> qz = pars->swap_axes ? q_row : q_col;
    if (pars->swap_axes)
        data = DataUtil::transpose(data);

    // flatten values
    std::vector<double> outvec;
    for (const auto& row : data)
        outvec.insert(outvec.end(), row.begin(), row.end());

    return {{newListScan(xOutCoord.label(), qy), newListScan(yOutCoord.label(), qz)}, outvec};
}

void Util::RW::write2DTable(const Datafield& data, std::ostream& output_stream)
{
    output_stream << "# BornAgain Intensity Data" << std::endl;
    output_stream << "# Simple array suitable for numpy, matlab etc." << std::endl;

    const size_t dim = data.rank();
    switch (dim) {
    case 1:
        write1DRepresentation(data, output_stream);
        break;
    case 2:
        write2DRepresentation(data, output_stream);
        break;
    default:
        ASSERT_NEVER;
    }
}