1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109
|
#include <iostream>
#include <seqan/store.h>
#include <seqan/misc/interval_tree.h>
#include <seqan/parallel.h>
using namespace seqan2;
// define used types
typedef FragmentStore<> TStore;
typedef Value<TStore::TAnnotationStore>::Type TAnnotation;
typedef TAnnotation::TId TId;
typedef TAnnotation::TPos TPos;
typedef IntervalAndCargo<TPos, TId> TInterval;
typedef IntervalTree<TPos, TId> TIntervalTree;
//
// 1. Load annotations and alignments from files
//
bool loadFiles(TStore & store, std::string const & annotationFileName, std::string const & alignmentFileName)
{
BamFileIn alignmentFile;
if (!open(alignmentFile, alignmentFileName.c_str()))
{
std::cerr << "Couldn't open alignment file " << alignmentFileName << std::endl;
return false;
}
std::cout << "Loading read alignments ..... " << std::flush;
readRecords(store, alignmentFile);
std::cout << "[" << length(store.alignedReadStore) << "]" << std::endl;
// load annotations
GffFileIn annotationFile;
if (!open(annotationFile, toCString(annotationFileName)))
{
std::cerr << "Couldn't open annotation file" << annotationFileName << std::endl;
return false;
}
std::cout << "Loading genome annotation ... " << std::flush;
readRecords(store, annotationFile);
std::cout << "[" << length(store.annotationStore) << "]" << std::endl;
return true;
}
//
// 2. Extract intervals from gene annotations (grouped by contigId)
//
void extractGeneIntervals(String<String<TInterval> > & intervals, TStore const & store)
{
// extract intervals from gene annotations (grouped by contigId)
resize(intervals, length(store.contigStore));
Iterator<TStore const, AnnotationTree<> >::Type it = begin(store, AnnotationTree<>());
if (!goDown(it))
return;
do
{
SEQAN_ASSERT_EQ(getType(it), "gene");
TPos beginPos = getAnnotation(it).beginPos;
TPos endPos = getAnnotation(it).endPos;
TId contigId = getAnnotation(it).contigId;
if (beginPos > endPos)
std::swap(beginPos, endPos);
// insert forward-strand interval of the gene and its annotation id
appendValue(intervals[contigId], TInterval(beginPos, endPos, value(it)));
}
while (goRight(it));
}
//![solution]
//
// 3. Construct interval trees
//
void constructIntervalTrees(String<TIntervalTree> & intervalTrees,
String<String<TInterval> > & intervals)
{
int numContigs = length(intervals);
resize(intervalTrees, numContigs);
SEQAN_OMP_PRAGMA(parallel for)
for (int i = 0; i < numContigs; ++i)
createIntervalTree(intervalTrees[i], intervals[i]);
}
//![solution]
//![main]
int main(int, char const **)
{
TStore store;
String<String<TInterval> > intervals;
String<TIntervalTree> intervalTrees;
std::string annotationFileName = getAbsolutePath("demos/tutorial/simple_rna_seq/example.gtf");
std::string alignmentFileName = getAbsolutePath("demos/tutorial/simple_rna_seq/example.sam");
if (!loadFiles(store, annotationFileName, alignmentFileName))
return 1;
extractGeneIntervals(intervals, store);
constructIntervalTrees(intervalTrees, intervals);
return 0;
}
//![main]
|