File: snp.d

package info (click to toggle)
sambamba 1.0%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 3,528 kB
  • sloc: sh: 220; python: 166; ruby: 147; makefile: 103
file content (76 lines) | stat: -rw-r--r-- 1,671 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
/*
    This file is part of BioD.

    Copyright (C) 2018 Pjotr Prins <pjotr.prins@thebird.nl>
*/

module bio.std.genotype.snp;

import bio.std.range.splitter;

import std.algorithm;
import std.array;
import std.conv;
import std.exception;
import std.experimental.logger;
import std.stdio;

/*

  Functions around SNPs

*/

struct SNP {
  string name;
  string chr;
  long pos;   // -1 if NA
  double cm;  // centimorgan, -1 if NA
}

immutable NullSNP = SNP("unknown","NA",-1,-1.0);

/*
  Fetch SNP annotations from tab delimited file that looks like name,
  pos, chr, cM

  rs3668922       111771071       13      65.0648
  rs13480515      17261714        10      4.72355
  rs13483034      53249416        17      30.175

  NA values can be used and the last cM column is optional.

  Note: this function should be generalized to return a named tuple, based on a
  named list
*/

SNP[] fetch_snp_annotations(string filen) {
  SNP[] list;

  bool[string] names;

  info("Parsing ",filen);
  File f = File(filen);

  try {
    foreach(line; f.byLine) {
      auto fields = array(SimpleSplitConv!(char[])(line));
      auto name = to!string(fields[0]);
      if (name in names)
        throw new Exception("SNP name "~name~" appears multiple times in "~filen);
      auto pos = (fields[1] == "NA" ? -1 : to!ulong(fields[1]));
      auto chr = to!string(fields[2]);
      double cm = -1.0;
      if (fields.length > 3)
        cm = (fields[3] == "NA" ? -1.0 : to!double(fields[3]));
      auto snp = SNP(name,chr,pos,cm);
      list ~= snp;
    }
  }
  catch(Exception e) {
    writeln(e.msg); // need to test and give proper output FIXME
    throw e;
  }

  return list;
}