1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229
|
#include <alignment/format/SAMPrinter.hpp>
#include <algorithm> //reverse
using namespace SAMOutput;
void SAMOutput::BuildFlag(T_AlignmentCandidate &alignment, AlignmentContext &context,
uint16_t &flag)
{
/*
* Much of the flags are commented out for now since they do not
* generate PICARD compliant SAM. This needs to be worked out.
*/
//
// Without supporting strobe, assume 1 segment per template.
flag = 0;
/*
if (context.nSubreads > 1) {
flag |= MULTI_SEGMENTS;
}*/
// if (context.AllSubreadsAligned() and context.nSubreads > 1) {
// flag |= ALL_SEGMENTS_ALIGNED;
// }
if (alignment.tStrand == 1) {
flag |= SEQ_REVERSED;
}
/*
if (context.hasNextSubreadPos == false and context.nSubreads > 1) {
flag |= NEXT_SEGMENT_UNMAPPED;
}
if (context.nextSubreadDir == 1) {
flag |= SEQ_NEXT_REVERSED;
}
if (context.IsFirst() and context.nSubreads > 1) {
flag |= FIRST_SEGMENT;
}
*/
else if (context.nSubreads > 1) {
/*
* Remember, if you're not first, you're last.
*/
// flag |= LAST_SEGMENT;
}
if (context.isPrimary == false) {
flag |= SECONDARY_ALIGNMENT;
}
}
void SAMOutput::CreateDNAString(DNASequence &seq, DNASequence &clippedSeq,
//
// Trimming is used for both hard non-clipping
// so it is called trim instead of clip.
//
DNALength trimFront, DNALength trimEnd)
{
assert(seq.length >= trimEnd and seq.length - trimEnd >= trimFront);
clippedSeq.seq = &seq.seq[trimFront];
clippedSeq.length = seq.length - trimEnd - trimFront;
}
void SAMOutput::AddGaps(T_AlignmentCandidate &alignment, int gapIndex, std::vector<int> &opSize,
std::vector<char> &opChar)
{
for (size_t g = 0; g < alignment.gaps[gapIndex].size(); g++) {
if (alignment.gaps[gapIndex][g].seq == blasr::Gap::Query) {
opSize.push_back(alignment.gaps[gapIndex][g].length);
opChar.push_back('D');
} else if (alignment.gaps[gapIndex][g].seq == blasr::Gap::Target) {
opSize.push_back(alignment.gaps[gapIndex][g].length);
opChar.push_back('I');
}
}
}
void SAMOutput::AddMatchBlockCigarOps(DNASequence &qSeq, DNASequence &tSeq, blasr::Block &b,
DNALength &qSeqPos, DNALength &tSeqPos,
std::vector<int> &opSize, std::vector<char> &opChar)
{
DNALength qPos = qSeqPos + b.qPos, tPos = tSeqPos + b.tPos;
bool started = false, prevSeqMatch = false;
for (DNALength i = 0; i < b.length; i++) {
bool curSeqMatch = (qSeq[qPos + i] == tSeq[tPos + i]);
if (started) {
if (curSeqMatch == prevSeqMatch)
opSize[opSize.size() - 1]++;
else {
opSize.push_back(1);
opChar.push_back(curSeqMatch ? '=' : 'X');
}
} else {
started = true;
opSize.push_back(1);
opChar.push_back(curSeqMatch ? '=' : 'X');
}
prevSeqMatch = curSeqMatch;
}
}
void SAMOutput::MergeAdjacentIndels(std::vector<int> &opSize, std::vector<char> &opChar,
const char mismatchChar)
{
assert(opSize.size() == opChar.size() and not opSize.empty());
size_t i, j;
for (i = 0, j = 1; i < opSize.size() and j < opSize.size(); j++) {
const int ni = opSize[i], nj = opSize[j];
const char ci = opChar[i], cj = opChar[j];
if (ci == cj) {
opSize[i] = ni + nj; // merge i and j to i
} else {
if ((ci == 'I' and cj == 'D') or (ci == 'D' and cj == 'I')) {
opSize[i] = std::min(ni, nj);
opChar[i] = mismatchChar; // merge 'I' and 'D' to Mismatch
if (i != 0 and i != opSize.size() and opChar[i] == opChar[i - 1]) {
assert(i >= 1);
opSize[i - 1] += opSize[i]; // merge with i - 1?
i--;
}
if (ni != nj) {
i++; // the remaining 'I' or 'D'
opSize[i] = std::abs(ni - nj);
opChar[i] = (ni > nj) ? ci : cj;
}
} else {
i++; // move forward.
opSize[i] = nj;
opChar[i] = cj;
}
}
}
assert(i < opSize.size());
opSize.erase(opSize.begin() + i + 1, opSize.end());
opChar.erase(opChar.begin() + i + 1, opChar.end());
}
void SAMOutput::CreateNoClippingCigarOps(T_AlignmentCandidate &alignment, std::vector<int> &opSize,
std::vector<char> &opChar, bool cigarUseSeqMatch,
const bool allowAdjacentIndels)
{
//
// Create the cigar string for the aligned region of a read,
// excluding the clipping.
//
int b;
// Each block creates a match NM (N=block.length)
int nBlocks = alignment.blocks.size();
int nGaps = alignment.gaps.size();
opSize.clear();
opChar.clear();
//
// Add gaps at the beginning of the alignment.
//
if (nGaps > 0) {
AddGaps(alignment, 0, opSize, opChar);
}
for (b = 0; b < nBlocks; b++) {
//
// Determine the gap before printing the match, since it is
// possible that the qurey and target are gapped at the same
// time, which merges into a mismatch.
//
int qGap = 0, tGap = 0;
int matchLength = alignment.blocks[b].length;
if (nGaps == 0) {
if (b + 1 < nBlocks) {
qGap = alignment.blocks[b + 1].qPos - alignment.blocks[b].qPos -
alignment.blocks[b].length;
tGap = alignment.blocks[b + 1].tPos - alignment.blocks[b].tPos -
alignment.blocks[b].length;
int commonGap;
commonGap = std::abs(qGap - tGap);
qGap -= commonGap;
tGap -= commonGap;
matchLength += commonGap;
if (cigarUseSeqMatch) {
AddMatchBlockCigarOps(alignment.qAlignedSeq, alignment.tAlignedSeq,
alignment.blocks[b], alignment.qPos, alignment.tPos,
opSize, opChar);
} else {
opSize.push_back(matchLength);
opChar.push_back('M');
}
assert((qGap > 0 and tGap == 0) or (qGap == 0 and tGap > 0));
if (qGap > 0) {
opSize.push_back(qGap);
opChar.push_back('I');
}
if (tGap > 0) {
opSize.push_back(tGap);
opChar.push_back('D');
}
}
} else {
if (cigarUseSeqMatch) {
AddMatchBlockCigarOps(alignment.qAlignedSeq, alignment.tAlignedSeq,
alignment.blocks[b], alignment.qPos, alignment.tPos, opSize,
opChar);
} else {
opSize.push_back(matchLength);
opChar.push_back('M');
}
int gapIndex = b + 1;
AddGaps(alignment, gapIndex, opSize, opChar);
}
}
if (alignment.tStrand == 1) {
std::reverse(opSize.begin(), opSize.end());
std::reverse(opChar.begin(), opChar.end());
}
if (not allowAdjacentIndels) {
MergeAdjacentIndels(opSize, opChar, (cigarUseSeqMatch ? 'X' : 'M'));
}
}
void SAMOutput::CigarOpsToString(std::vector<int> &opSize, std::vector<char> &opChar,
std::string &cigarString)
{
std::stringstream sstrm;
int i, nElem;
for (i = 0, nElem = opSize.size(); i < nElem; i++) {
sstrm << opSize[i] << opChar[i];
}
cigarString = sstrm.str();
}
|