File: Convolve.cpp

package info (click to toggle)
bornagain 23.0-4
links: PTS, VCS
area: main
in suites: forky, sid
size: 103,936 kB
sloc: cpp: 423,131; python: 40,997; javascript: 11,167; awk: 630; sh: 318; ruby: 173; xml: 130; makefile: 51; ansic: 24
file content (279 lines) | stat: -rw-r--r-- 9,326 bytes
parent folder | download | duplicates (2)
//  ************************************************************************************************
//
//  BornAgain: simulate and fit reflection and scattering
//
//! @file      Device/Resolution/Convolve.cpp
//! @brief     Implements class Convolve.
//!
//! @homepage  http://www.bornagainproject.org
//! @license   GNU General Public License v3 or higher (see COPYING)
//! @copyright Forschungszentrum Jülich GmbH 2018
//! @authors   Scientific Computing Group at MLZ (see CITATION, AUTHORS)
//
//  ************************************************************************************************

#include "Device/Resolution/Convolve.h"
#include "Base/Util/Assert.h"

namespace {

// Returns a true if number can be factorized using only favorite fftw3 factors
bool is_optimal(int n)
{
    static const std::vector<int> favored_factors{13, 11, 7, 5, 3, 2};
    if (n == 1)
        return false;
    size_t ntest = n;
    for (int favored_factor : favored_factors)
        while ((ntest % favored_factor) == 0)
            ntest = ntest / favored_factor;
    return ntest == 1;
}

// Returns next higher number that has only favored prime factors
int find_closest_factor(int n)
{
    while (!is_optimal(n))
        ++n;
    return n;
}

} // namespace


Convolve::Convolve()
    : m_mode(FFTW_LINEAR_SAME)
{
}

Convolve::Workspace::~Workspace()
{
    clear();
}

void Convolve::Workspace::clear()
{
    h_src = 0;
    w_src = 0;
    h_kernel = 0;
    w_kernel = 0;

    delete[] in_src;
    in_src = nullptr;

    if (out_src)
        fftw_free((fftw_complex*)out_src);
    out_src = nullptr;


    delete[] in_kernel;
    in_kernel = nullptr;

    if (out_kernel)
        fftw_free((fftw_complex*)out_kernel);
    out_kernel = nullptr;


    delete[] dst_fft;
    dst_fft = nullptr;

    h_offset = 0;
    w_offset = 0;

    if (p_forw_src != nullptr)
        fftw_destroy_plan(p_forw_src);
    if (p_forw_kernel != nullptr)
        fftw_destroy_plan(p_forw_kernel);
    if (p_back != nullptr)
        fftw_destroy_plan(p_back);

    fftw_cleanup();
}

void Convolve::fftconvolve2D(const double2d_t& source, const double2d_t& kernel, double2d_t& result)
{
    int h_src = (int)source.size();
    int w_src = (int)(!source.empty() ? source[0].size() : 0);
    int h_kernel = (int)kernel.size();
    int w_kernel = (!kernel.empty() ? (int)kernel[0].size() : 0);

    // initialization
    init(h_src, w_src, h_kernel, w_kernel);
    // Compute the circular convolution
    fftw_circular_convolution(source, kernel);

    // results
    result.clear();
    result.resize(ws.h_dst);
    for (int i = 0; i < ws.h_dst; i++) {
        result[i].resize(ws.w_dst, 0);
        for (int j = 0; j < ws.w_dst; j++) {
            // result[i][j]=ws.dst[i*ws.w_dst+j];
            if (m_mode == FFTW_CIRCULAR_SAME_SHIFTED)
                result[i][j] = ws.dst_fft[((i + int(ws.h_kernel / 2.0)) % ws.h_fftw) * ws.w_fftw
                                          + (j + int(ws.w_kernel / 2.0)) % ws.w_fftw];
            else
                result[i][j] = ws.dst_fft[(i + ws.h_offset) * ws.w_fftw + j + ws.w_offset];
        }
    }
}

void Convolve::fftconvolve1D(const std::vector<double>& source, const std::vector<double>& kernel,
                             std::vector<double>& result)
{
    // we simply create 2d arrays with length of first dimension equal to 1, and call 2d convolution
    double2d_t source2d, kernel2d;
    source2d.push_back(source);
    kernel2d.push_back(kernel);

    double2d_t result2d;
    fftconvolve2D(source2d, kernel2d, result2d);
    ASSERT(result2d.size() == 1);
    result = result2d[0];
}

void Convolve::init(int h_src, int w_src, int h_kernel, int w_kernel)
{
    ASSERT(h_src);
    ASSERT(w_src);
    ASSERT(h_kernel);
    ASSERT(w_kernel);

    ws.clear();
    ws.h_src = h_src;
    ws.w_src = w_src;
    ws.h_kernel = h_kernel;
    ws.w_kernel = w_kernel;
    switch (m_mode) {
    case FFTW_LINEAR_FULL:
        // Full Linear convolution
        ws.h_fftw = find_closest_factor(h_src + h_kernel - 1);
        ws.w_fftw = find_closest_factor(w_src + w_kernel - 1);
        ws.h_dst = h_src + h_kernel - 1;
        ws.w_dst = w_src + w_kernel - 1;
        // Here we just keep the first [0:h_dst-1 ; 0:w_dst-1] real part elements of out_src
        ws.h_offset = 0;
        ws.w_offset = 0;
        break;
    case FFTW_LINEAR_SAME_UNPADDED:
        // Same Linear convolution
        ws.h_fftw = h_src + int(h_kernel / 2.0);
        ws.w_fftw = w_src + int(w_kernel / 2.0);
        ws.h_dst = h_src;
        ws.w_dst = w_src;
        // Here we just keep the first [h_filt/2:h_filt/2+h_dst-1 ; w_filt/2:w_filt/2+w_dst-1]
        // real part elements of out_src
        ws.h_offset = int(ws.h_kernel / 2.0);
        ws.w_offset = int(ws.w_kernel / 2.0);
        break;
    case FFTW_LINEAR_SAME:
        // Same Linear convolution
        ws.h_fftw = find_closest_factor(h_src + int(h_kernel / 2.0));
        ws.w_fftw = find_closest_factor(w_src + int(w_kernel / 2.0));
        ws.h_dst = h_src;
        ws.w_dst = w_src;
        // Here we just keep the first [h_filt/2:h_filt/2+h_dst-1 ; w_filt/2:w_filt/2+w_dst-1]
        // real part elements of out_src
        ws.h_offset = int(ws.h_kernel / 2.0);
        ws.w_offset = int(ws.w_kernel / 2.0);
        break;
    case FFTW_LINEAR_VALID:
        // Valid Linear convolution
        ASSERT(ws.h_kernel <= ws.h_src);
        ASSERT(ws.w_kernel <= ws.w_src);
        ws.h_fftw = find_closest_factor(h_src);
        ws.w_fftw = find_closest_factor(w_src);
        ws.h_dst = h_src - h_kernel + 1;
        ws.w_dst = w_src - w_kernel + 1;
        // Here we just take [h_dst x w_dst] elements starting at [h_kernel-1;w_kernel-1]
        ws.h_offset = ws.h_kernel - 1;
        ws.w_offset = ws.w_kernel - 1;
        break;
    case FFTW_CIRCULAR_SAME:
    case FFTW_CIRCULAR_SAME_SHIFTED:
        // Circular convolution, modulo N
        // We don't padd with zeros because if we do, we need to padd with at least h_kernel/2;
        // w_kernel/2 elements plus the wrapp around, which in facts leads to too much
        // computations compared to the gain obtained with the optimal size
        ws.h_fftw = h_src;
        ws.w_fftw = w_src;
        ws.h_dst = h_src;
        ws.w_dst = w_src;
        // We copy the first [0:h_dst-1 ; 0:w_dst-1] real part elements of out_src
        ws.h_offset = 0;
        ws.w_offset = 0;
        break;
    default:
        ASSERT_NEVER;
    }

    ws.in_src = new double[ws.h_fftw * ws.w_fftw];
    ws.out_src = (double*)fftw_malloc(sizeof(fftw_complex) * ws.h_fftw * (ws.w_fftw / 2 + 1));
    ws.in_kernel = new double[ws.h_fftw * ws.w_fftw];
    ws.out_kernel = (double*)fftw_malloc(sizeof(fftw_complex) * ws.h_fftw * (ws.w_fftw / 2 + 1));

    ws.dst_fft = new double[ws.h_fftw * ws.w_fftw];
    // ws.dst = new double[ws.h_dst * ws.w_dst];

    // Initialization of the plans
    ws.p_forw_src = fftw_plan_dft_r2c_2d(ws.h_fftw, ws.w_fftw, ws.in_src, (fftw_complex*)ws.out_src,
                                         FFTW_ESTIMATE);
    ASSERT(ws.p_forw_src);
    ws.p_forw_kernel = fftw_plan_dft_r2c_2d(ws.h_fftw, ws.w_fftw, ws.in_kernel,
                                            (fftw_complex*)ws.out_kernel, FFTW_ESTIMATE);
    ASSERT(ws.p_forw_kernel);

    // The backward FFT takes ws.out_kernel as input
    ws.p_back = fftw_plan_dft_c2r_2d(ws.h_fftw, ws.w_fftw, (fftw_complex*)ws.out_kernel, ws.dst_fft,
                                     FFTW_ESTIMATE);
    ASSERT(ws.p_back);
}

void Convolve::fftw_circular_convolution(const double2d_t& src, const double2d_t& kernel)
{
    ASSERT(ws.h_fftw > 0);
    ASSERT(ws.w_fftw > 0);

    double *ptr, *ptr_end, *ptr2;

    // Reset the content of ws.in_src
    for (ptr = ws.in_src, ptr_end = ws.in_src + ws.h_fftw * ws.w_fftw; ptr != ptr_end; ++ptr)
        *ptr = 0.0;
    for (ptr = ws.in_kernel, ptr_end = ws.in_kernel + ws.h_fftw * ws.w_fftw; ptr != ptr_end; ++ptr)
        *ptr = 0.0;

    // Then we build our periodic signals
    // ptr = src;
    for (int i = 0; i < ws.h_src; ++i)
        for (int j = 0; j < ws.w_src; ++j, ++ptr)
            ws.in_src[(i % ws.h_fftw) * ws.w_fftw + (j % ws.w_fftw)] += src[i][j];

    // ptr = kernel;
    for (int i = 0; i < ws.h_kernel; ++i)
        for (int j = 0; j < ws.w_kernel; ++j, ++ptr)
            ws.in_kernel[(i % ws.h_fftw) * ws.w_fftw + (j % ws.w_fftw)] += kernel[i][j];

    // And we compute their packed FFT
    fftw_execute(ws.p_forw_src);
    fftw_execute(ws.p_forw_kernel);

    // Compute the element-wise product on the packed terms
    // Let's put the elementwise products in ws.in_kernel
    for (ptr = ws.out_src, ptr2 = ws.out_kernel,
        ptr_end = ws.out_src + 2 * ws.h_fftw * (ws.w_fftw / 2 + 1);
         ptr != ptr_end; ++ptr, ++ptr2) {
        double re_s = *ptr;
        double im_s = *(++ptr);
        double re_k = *ptr2;
        double im_k = *(++ptr2);
        *(ptr2 - 1) = re_s * re_k - im_s * im_k;
        *ptr2 = re_s * im_k + im_s * re_k;
    }

    // Compute the backward FFT
    // Carefull, The backward FFT does not preserve the output
    fftw_execute(ws.p_back);
    // Scale the transform
    for (ptr = ws.dst_fft, ptr_end = ws.dst_fft + ws.w_fftw * ws.h_fftw; ptr != ptr_end; ++ptr)
        *ptr /= double(ws.h_fftw * ws.w_fftw);
}