File: ecalc.cpp

package info (click to toggle)
gemmi 0.6.5%2Bds-3
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 5,836 kB
  • sloc: cpp: 54,719; python: 4,743; ansic: 3,972; sh: 384; makefile: 73; f90: 42; javascript: 12
file content (142 lines) | stat: -rw-r--r-- 5,579 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
// Copyright 2023 Global Phasing Ltd.

#include <cstdio>   // for printf, fprintf
#include "gemmi/binner.hpp"  // for Binner
#include "gemmi/mtz.hpp"     // for Mtz
#include "gemmi/ecalc.hpp"   // for Mean

#define GEMMI_PROG ecalc
#include "options.h"

namespace {

enum OptionIndex {
  LabelF=4, LabelE, NoSigma, Method, BinSize, MaxBins
};

struct EcalcArg: public Arg {
  static option::ArgStatus MethodChoice(const option::Option& option, bool msg) {
    return Arg::Choice(option, msg, {/*"ma",*/ "2", "3", "ec"});
  }
};

const option::Descriptor Usage[] = {
  { NoOp, 0, "", "", Arg::None,
    "Usage:\n " EXE_NAME " [options] INPUT.mtz OUTPUT.mtz"
    "\n\nCalculates E (normalised structure amplitudes) from F." },
  CommonUsage[Help],
  CommonUsage[Version],
  CommonUsage[Verbose],
  { LabelF, 0, "", "F", Arg::Required,
    "  --F=LABEL  \tLabel of the input column (default: F)." },
  { LabelE, 0, "", "E", Arg::Required,
    "  --E=LABEL  \tLabel of the output column (default: E)." },
  { NoSigma, 0, "", "no-sigma", Arg::Required,
    "  --no-sigma  \tDo not use sigma columns (SIGF->SIGE)." },
  { Method, 0, "", "method", EcalcArg::MethodChoice,
    "  --method=METHOD  \tMethod of calculating <F^2>, one of:\n\t"
    "ma - moving average (not implemented yet),\n\t"
    "2 - resolution bins equispaced in 1/d^2,\n\t"
    "3 - resolution bins in 1/d^3 (default),\n\t"
    "ec - bins with equal count of reflections." },
  { BinSize, 0, "", "binsize", Arg::Int,
    "  --binsize=N  \tNumber of reflections per bin or in moving window\n\t"
    "(default: 200)." },
  { MaxBins, 0, "", "maxbins", Arg::Int,
    "  --maxbins=N  \tMaximum number of bins (default: 100)." },
  { NoOp, 0, "", "", Arg::None,
    "\nColumn for SIGF is always the next column after F (the type must be Q)."
    "\nIf INPUT.mtz has column E, it is replaced in OUTPUT.mtz."
    "\nOtherwise, a new column is appended."
    "\nThe name for SIGE, if it is added as a column, is 'SIG' + label of E." },
  { 0, 0, 0, 0, 0, 0 }
};

} // anonymous namespace

int GEMMI_MAIN(int argc, char **argv) {
  using gemmi::Mtz;
  OptParser p(EXE_NAME);
  p.simple_parse(argc, argv, Usage);
  p.require_positional_args(2);
  const char* input = p.nonOption(0);
  const char* output = p.nonOption(1);
  const char* f_label = p.options[LabelF] ? p.options[LabelF].arg : "F";
  const char* e_label = p.options[LabelE] ? p.options[LabelE].arg : "E";
  bool use_sigma = !p.options[NoSigma];
  int verbose = p.options[Verbose].count();
  if (verbose > 0)
    std::fprintf(stderr, "Reading %s ...\n", input);
  try {
    Mtz mtz;
    mtz.read_file_gz(input);
    const Mtz::Column& fcol_before = mtz.get_column_with_label(f_label);
    if (use_sigma && !fcol_before.get_next_column_if_type('Q'))
        gemmi::fail("Column ", f_label, " not followed by sigma column. Use --no-sigma.\n");
    int e_idx = -1;
    if (Mtz::Column* e = mtz.column_with_label(e_label)) {
      if (e->type != 'E')
        gemmi::fail("Column for E exists, but is not of type E");
      if (use_sigma && !e->get_next_column_if_type('Q'))
        gemmi::fail("Column ", e_label, " not followed by sigma column. Use --no-sigma.\n");
      e_idx = (int)e->idx;
    }
    if (verbose > 0)
      std::fprintf(stderr, "%s column %s ...\n",
                   (e_idx >= 0 ? "Replacing existing" : "Adding"), e_label);
    std::vector<std::string> trailing_cols(use_sigma ? 1 : 0);
    int fcol_idx = fcol_before.idx; // fcol_before gets invalidated in the next line
    Mtz::Column& ecol = mtz.copy_column(e_idx, fcol_before, trailing_cols);
    //const Mtz::Column* fcol = &mtz.columns[fcol_idx];
    ecol.label = e_label;
    ecol.type = 'E';
    if (use_sigma) {
      assert(ecol.get_next_column_if_type('Q') != nullptr);
      (&ecol + 1)->label = "SIG" + ecol.label;
    }
    if (!mtz.has_data())  // shouldn't happen
      gemmi::fail("no data");
    int f_count = 0;
    for (size_t n = 0; n < mtz.data.size(); n += mtz.columns.size())
      if (!std::isnan(mtz.data[n + fcol_idx]))
        ++f_count;
    int binsize = p.integer_or(BinSize, 200);
    int nbins = gemmi::iround((double)f_count / binsize);
    nbins = std::min(nbins, p.integer_or(MaxBins, 100));

    if (verbose > 0 || nbins < 3)
      std::fprintf(stderr, "%d reflections, %d Fs, %d shells\n",
                   mtz.nreflections, f_count, nbins);
    if (nbins < 3)
      gemmi::fail("not enough resolution bins");
    //bool use_moving_average = false;
    auto method = gemmi::Binner::Method::Dstar3;
    if (p.options[Method])
      switch (p.options[Method].arg[0]) {
        case '2': method = gemmi::Binner::Method::Dstar2; break;
        case 'e': method = gemmi::Binner::Method::EqualCount; break;
        //case 'm': use_moving_average = true; break;
      }
    if (verbose > 0)
      std::fprintf(stderr, "Calculating E ...\n");
    gemmi::Binner binner;
    gemmi::MtzDataProxy data_proxy{mtz};
    binner.setup(nbins, method, data_proxy, nullptr, /*with_mids=*/true, fcol_idx);
    std::vector<double> multipliers
      = gemmi::calculate_amplitude_normalizers(data_proxy, fcol_idx, binner);

    if (verbose > 0)
      std::fprintf(stderr, "Writing %s ...\n", output);
    for (size_t i = 0, n = 0; n < mtz.data.size(); n += mtz.columns.size(), ++i) {
      mtz.data[n + ecol.idx] *= float(multipliers[i]);
      if (use_sigma)
        mtz.data[n + ecol.idx + 1] *= float(multipliers[i]);
    }
    mtz.write_to_file(output);
  } catch (std::runtime_error& e) {
    std::fprintf(stderr, "ERROR: %s\n", e.what());
    return 1;
  }
  return 0;
}