#!/usr/bin/env python3

"""
Extract reflectometry data from NIST.

Specifically written for https://www.nist.gov/document/spinelfilmzip.

Output columns are: ["Qz", "counts per incident count", "uncertainty", "resolution"].
"""

import io, numpy, re

angstrom = 0.1

def normalizeData(data):
    """
    Removes duplicate q values from the input data,
    normalizes it such that the maximum of the reflectivity is
    unity and rescales the q-axis to inverse nm
    """

    r0 = numpy.where(data[0] - numpy.roll(data[0], 1) == 0)
    data = numpy.delete(data, r0, 1)

    data[0] = data[0]/angstrom
    data[3] = data[3]/angstrom

    norm = numpy.max(data[1])
    data[1] = data[1]/norm
    data[2] = data[2]/norm

    so = numpy.argsort(data[0])
    data = data[:, so]

    return data

def xtract(rawdata, out_fname, code):

    table = re.match(
        r'.*# "polarization": "' + code + r'"\n#.*?\n# "units".*?\n(.*?)#.*',
        rawdata, re.DOTALL).group(1)

    data1 = numpy.genfromtxt(io.BytesIO(table.encode()), unpack=True)

    data2 = normalizeData(data1)

    n = len(data2[0])
    data3 = [(data2[0][i], data2[1][i], data2[2][i], data2[3][i]) for i in range(n)]

    with open(out_fname, 'w') as f:
        for a in data3:
            for v in a:
                f.write('%21.16e ' % v)
            f.write('\n')

rawdata = open("MAFO_Saturated.refl").read()

xtract(rawdata, "MAFO_Saturated_pp.tab", r'\+\+')
xtract(rawdata, "MAFO_Saturated_mm.tab", r'\-\-')
