File: build_markov_model.cpp

package info (click to toggle)
seqan2 2.4.0%2Bdfsg-16
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 224,180 kB
  • sloc: cpp: 256,886; ansic: 91,672; python: 8,330; sh: 995; xml: 570; makefile: 252; awk: 51; javascript: 21
file content (46 lines) | stat: -rw-r--r-- 1,238 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#include <iostream>
#include <fstream>

#include <seqan/index.h>
#include <seqan/statistics.h>
#include <seqan/seq_io.h>

using namespace seqan;

int main()
{
    // Build path to background FASTA file.
    CharString bgPath = getAbsolutePath("/demos/statistics/background.fa");

    // Read the background from a file into X.
    StringSet<DnaString> X;
    SeqFileIn seqFile;
    if (!open(seqFile, toCString(bgPath)))
    {
        std::cerr << "ERROR: Could not open " << bgPath << "\n";
        return 1;
    }
    StringSet<CharString> ids;  // will be ignored
    readRecords(ids, X, seqFile);

    // Create MarkovModel of order 3 from the background.
    MarkovModel<Dna> mm(3);
    buildMarkovModel(mm, X);

    // Build set of words that we want to compute the zscore of.
    StringSet<DnaString> W;
    appendValue(W, "CCCAAAGC");
    appendValue(W, "CCCAAAGTAAATT");

    // Compute and print zscore.
    std::cout << "zscore: " << zscore(W, X, mm, AhoCorasick()) << "\n";

// //TODO his path has to be set explicitely when calling the demo
//  FILE *fd = fopen("projects/library/demos/zscore_human_mm.3","r");
//  read(fd, mm);
//  fclose(fd);

    //std::cout << zscore(W, X, mm, WuManber()) << std::endl;

    return 0;
}