1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303
|
/*
* Copyright (C) 2014-2021 Brian L. Browning
*
* This file is part of Beagle
*
* Beagle is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Beagle is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package main;
import blbutil.Const;
import blbutil.FileUtil;
import blbutil.Utilities;
import java.io.File;
import java.io.PrintWriter;
import java.util.Arrays;
import java.util.Optional;
import phase.PhaseData;
import vcf.GT;
import vcf.Marker;
import vcf.Markers;
import vcf.RefGT;
import vcf.Window;
/**
* <p>Class {@code RunStats} contains methods for storing and printing
* statistics describing a Beagle analysis.</p>
*
* <p>Instances of class {@code RunStats} are not thread-safe.</p>
*
* @author Brian L. Browning {@code <browning@uw.edu>}
*/
public class RunStats {
private final Par par;
private final PrintWriter log;
private final long startNanos;
private long totalPhaseNanos = 0;
private long imputeNanos = 0;
private long totalImputeNanos = 0;
/**
* Constructs a new {@code RunStats} instance.
* @param par the analysis parameters
* @throws NullPointerException if {@code par == null}
*/
RunStats(Par par) {
this.startNanos = System.nanoTime();
this.par = par;
this.log = FileUtil.printWriter(new File(par.out() + ".log"));
}
/**
* Prints initial information about the analysis to a log
* file and to standard output.
*/
public void printStartInfo() {
String[] argList = par.args();
if (par.noNThreads()) {
// add nthreads parameter to list of command line parameters
int oldLength = argList.length;
argList = Arrays.copyOf(argList, argList.length+1);
argList[oldLength] = "nthreads=" + par.nthreads();
}
Utilities.duoPrint(log, Main.SHORT_HELP + Const.nl);
Utilities.duoPrintln(log, "Start time: " + Utilities.timeStamp());
Utilities.duoPrint(log, Utilities.commandLine(Main.PROGRAM, argList));
if (par.ped() != null) {
String s = Const.nl + "WARNING: This version will not model"
+ " duos or trios in the pedigree file";
Utilities.duoPrintln(log, s);
}
if (par.map() == null) {
String s = Const.nl + "No genetic map is specified: using 1 cM = 1 Mb";
Utilities.duoPrintln(log, s);
}
log.flush();
}
/**
* Returns the analysis parameters.
* @return the analysis parameters
*/
public Par par() {
return par;
}
/**
* Prints information about the samples to a log
* file and to standard output.
* @param ped the pedigree data for the target samples
* @param window the input data for the current marker window
* @throws NullPointerException if
* {@code this.par().ped() != null && ped == null}
*/
public void printSampleSummary(Pedigree ped, Window window) {
Optional<RefGT> optRefGT = window.refGT();
int nRefSamples = optRefGT.isPresent() ? optRefGT.get().nSamples() : 0;
Utilities.duoPrint(log, Const.nl);
Utilities.duoPrint(log, String.format("Reference samples: %,20d%n",
nRefSamples));
Utilities.duoPrint(log, String.format("Study samples: %,20d%n",
window.targGT().nSamples()));
if (par.ped() != null) {
Utilities.duoPrint(log, " ");
Utilities.duoPrint(log, String.valueOf(ped.nSingles()));
Utilities.duoPrintln(log, " singles");
Utilities.duoPrint(log, " ");
Utilities.duoPrint(log, String.valueOf(ped.nDuos()));
Utilities.duoPrintln(log, " duos");
Utilities.duoPrint(log, " ");
Utilities.duoPrint(log, String.valueOf(ped.nTrios()));
Utilities.duoPrintln(log, " trios");
}
log.flush();
}
/**
* Prints information about the marker window to a log
* file and to standard output.
* @param window the input genotype data for the marker window
*/
public void printWindowUpdate(Window window) {
GT targGT = window.targGT();
Optional<RefGT> refGT = window.refGT();
Markers markers = refGT.isPresent() ? refGT.get().markers() :
targGT.markers();
Marker first = markers.marker(0);
Marker last = markers.marker(markers.size() - 1);
StringBuilder sb = new StringBuilder(30);
sb.append(Const.nl);
sb.append("Window ");
sb.append(window.windowIndex());
sb.append(" [");
String chr = first.chrom();
if (chr.equals(Const.MISSING_DATA_STRING)==false) {
sb.append(chr);
sb.append(Const.colon);
}
sb.append(first.pos());
sb.append(Const.hyphen);
if (chr.equals(last.chrom())==false) {
sb.append(last.chrom());
sb.append(Const.colon);
}
sb.append(last.pos());
sb.append(']');
sb.append(Const.nl);
if (refGT.isPresent()) {
sb.append(String.format("Reference markers: %,20d%n", markers.size()));
}
sb.append(String.format("Study markers: %,20d%n", targGT.nMarkers()));
Utilities.duoPrint(log, sb.toString());
log.flush();
}
/**
* Prints information about the complete analysis to a log
* file and to standard output, and closes the log file.
* @param nTargetMarkers the total number of target markers analyzed
* @param nMarkers the total number of markers analyzed
*/
public void printSummaryAndClose(int nTargetMarkers, int nMarkers) {
long totalTime = System.nanoTime() - startNanos;
Utilities.duoPrint(log, Const.nl);
Utilities.duoPrintln(log, "Cumulative Statistics:" + Const.nl);
if (nTargetMarkers != nMarkers) {
Utilities.duoPrint(log,
String.format("Reference markers: %,20d%n", nMarkers));
}
Utilities.duoPrint(log,
String.format("Study markers: %,20d%n%n", nTargetMarkers));
if (totalPhaseNanos > 1000) {
duoPrintNanos("Haplotype phasing time: ", totalPhaseNanos);
}
if (totalImputeNanos > 0) {
duoPrintNanos("Imputation time: ", totalImputeNanos);
}
duoPrintNanos( "Total time: ", totalTime);
Utilities.duoPrintln(log, Const.nl + "End time: "
+ Utilities.timeStamp());
Utilities.duoPrintln(log, Main.PROGRAM + " finished");
log.close();
}
/**
* Increases the cumulative phasing time by the specified number of
* nanoseconds.
* @param nanos the elapsed nanoseconds for updating the haplotype
* estimates
*/
public void phaseNanos(long nanos) {
totalPhaseNanos += nanos;
}
/**
* Stores the time for imputing ungenotyped marker and increases
* the cumulative imputation time by the specified number
* of nanoseconds.
* @param nanos the nanoseconds required to impute ungenotyped
* markers
*/
public void imputationNanos(long nanos) {
imputeNanos = nanos;
totalImputeNanos += nanos;
}
/**
* Prints run time for most recent imputation to a log file
* and to standard output.
*/
public void printImputationUpdate() {
Utilities.duoPrint(log, Const.nl);
duoPrintNanos("Imputation time: ", imputeNanos);
log.flush();
}
/**
* Prints the specified string to the log file and to standard out.
* @param msg the message to be printed
*/
public void println(String msg) {
Utilities.duoPrintln(log, msg);
log.flush();
}
/**
* Prints information about the specified iteration, and adds the
* specified elapsed nanoseconds to the total phasing time.
* @param pd estimated phased genotypes at stage 1 markers
* @param elapsedNanos the elapsed nanoseconds for the iteration
*/
public void printStage1Info(PhaseData pd, long elapsedNanos) {
if (pd.it()==par.burnin() && par.em()) {
printEstimatedParameters(pd.ne(), pd.pMismatch());
}
phaseNanos(elapsedNanos);
String msg;
int it = pd.it();
if (it < par.burnin()) {
if (it==0) {
println("");
}
msg = "Burnin iteration " + (it+1) + ":"; // count from 1
}
else {
it -= par.burnin();
if (it==0) {
println("");
}
msg = "Phasing iteration " + (it+1) + ":"; // count from 1
}
duoPrintNanos(String.format("%1$-31s", msg), elapsedNanos);
}
/**
* Prints the specified elapsed nanoseconds for stage 2 phasing.
* @param elapsedNanos the elapsed nanoseconds for stage 2 phasing
*/
public void printStage2Info(long elapsedNanos) {
phaseNanos(elapsedNanos);
String msg = "Low frequency phasing:";
duoPrintNanos(String.format("%1$-31s", msg), elapsedNanos);
}
/**
* Prints the specified estimated effective population size.
* @param ne the estimated effective population size
* @param pMismatch the estimated allele mismatch parameter
*/
public void printEstimatedParameters(long ne, float pMismatch) {
Utilities.duoPrintln(log, "");
Utilities.duoPrint(log, String.format("%1$-31s", "Estimated ne:"));
Utilities.duoPrintln(log, String.valueOf(ne));
Utilities.duoPrint(log, String.format("%1$-31s", "Estimated err:"));
Utilities.duoPrintln(log, String.format("%1$7.1e", pMismatch));
}
/**
* Print the specified message followed by the human
* elapsed time as formatted by
* {@code blbutil.Utilities.elapsedNanos(nanos)}
* @param message the message to be printed
* @param nanos the elapsed time in nanoseconds
*/
public void duoPrintNanos(String message, long nanos) {
Utilities.duoPrint(log, message);
Utilities.duoPrintln(log, Utilities.elapsedNanos(nanos));
}
}
|