1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274
|
// ==========================================================================
// SeqAn - The Library for Sequence Analysis
// ==========================================================================
// Copyright (c) 2006-2026, Knut Reinert, FU Berlin
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of Knut Reinert or the FU Berlin nor the names of
// its contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
// DAMAGE.
//
// ==========================================================================
// Author: Manuel Holtgrewe <manuel.holtgrewe@fu-berlin.de>
// Author: David Weese <david.weese@fu-berlin.de>
// ==========================================================================
// Code for writing BAM.
// ==========================================================================
#ifndef INCLUDE_SEQAN_BAM_IO_WRITE_BAM_H_
#define INCLUDE_SEQAN_BAM_IO_WRITE_BAM_H_
namespace seqan2 {
// ============================================================================
// Functions
// ============================================================================
// ----------------------------------------------------------------------------
// Function writeRecord() BamHeader
// ----------------------------------------------------------------------------
template <typename TTarget, typename TNameStore, typename TNameStoreCache, typename TStorageSpec>
void write(TTarget & target,
BamHeader const & header,
BamIOContext<TNameStore, TNameStoreCache, TStorageSpec> & context,
Bam const & /*tag*/)
{
write(target, "BAM\1");
clear(context.buffer);
write(context.buffer, header, context, Sam());
// Note that we do not write out a null-character to terminate the header. This would be valid by the SAM standard
// but the samtools do not expect this and write out the '\0' when converting from BAM to SAM.
// appendValue(context.buffer, '\0');
// Write text header.
appendRawPod(target, static_cast<int32_t>(length(context.buffer)));
write(target, context.buffer);
// Write references.
if (!empty(context._contigNames))
{
int32_t nRef = _max(length(contigNames(context)), length(contigLengths(context)));
appendRawPod(target, nRef);
for (int32_t i = 0; i < nRef; ++i)
{
if (i < (int32_t)length(contigNames(context)))
{
appendRawPod(target, static_cast<int32_t>(length(contigNames(context)[i]) + 1));
write(target, contigNames(context)[i]);
}
else
{
appendRawPod(target, static_cast<int32_t>(1));
}
writeValue(target, '\0');
int32_t lRef = 0;
if (i < (int32_t)length(contigLengths(context)))
lRef = contigLengths(context)[i];
appendRawPod(target, lRef);
}
}
}
// ----------------------------------------------------------------------------
// Function writeRecord() BamAlignmentRecord
// ----------------------------------------------------------------------------
static inline int _reg2Bin(uint32_t beg, uint32_t end)
{
--end;
if (beg >> 14 == end >> 14)
return 4681 + (beg >> 14);
if (beg >> 17 == end >> 17)
return 585 + (beg >> 17);
if (beg >> 20 == end >> 20)
return 73 + (beg >> 20);
if (beg >> 23 == end >> 23)
return 9 + (beg >> 23);
if (beg >> 26 == end >> 26)
return 1 + (beg >> 26);
return 0;
}
inline uint32_t
updateLengths(BamAlignmentRecord const & record)
{
// update internal lengths.
record._l_qname = length(record.qName) + 1;
record._n_cigar = length(record.cigar);
record._l_qseq = length(record.seq);
return sizeof(BamAlignmentRecordCore) + record._l_qname +
record._n_cigar * 4 + (record._l_qseq + 1) / 2 + record._l_qseq +
length(record.tags);
}
template <typename TTarget>
inline void
_writeBamRecord(TTarget & target,
BamAlignmentRecord const & record,
Bam const & /*tag*/)
{
typedef typename Iterator<String<CigarElement<> > const, Standard>::Type SEQAN_RESTRICT TCigarIter;
typedef typename Iterator<IupacString const, Standard>::Type SEQAN_RESTRICT TSeqIter;
typedef typename Iterator<CharString const, Standard>::Type SEQAN_RESTRICT TQualIter;
// bin_mq_nl
unsigned l = 0;
_getLengthInRef(l, record.cigar);
record.bin =_reg2Bin(record.beginPos, record.beginPos + std::max(1u, l));
// Write fixed-size BamAlignmentRecordCore.
appendRawPod(target, (BamAlignmentRecordCore &)record);
// read_name
write(target, record.qName);
writeValue(target, '\0');
// cigar
static unsigned char const MAP[256] =
{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0,
0, 0, 0, 0, 2, 0, 0, 0, 5, 1, 0, 0, 0, 0, 3, 0,
6, 0, 0, 4, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
TCigarIter citEnd = end(record.cigar, Standard());
for (TCigarIter cit = begin(record.cigar, Standard()); cit != citEnd; ++cit)
appendRawPod(target, ((uint32_t)cit->count << 4) | MAP[(unsigned char)cit->operation]);
// seq
TSeqIter sit = begin(record.seq, Standard());
TSeqIter sitEnd = sit + (record._l_qseq & ~1);
while (sit != sitEnd)
{
unsigned char x = (ordValue(getValue(sit++)) << 4);
writeValue(target, x | ordValue(getValue(sit++)));
}
if (record._l_qseq & 1)
writeValue(target, ordValue(getValue(sit++)) << 4);
// qual
SEQAN_ASSERT_LEQ(length(record.qual), length(record.seq));
TQualIter qit = begin(record.qual, Standard());
TQualIter qitEnd = end(record.qual, Standard());
TQualIter qitVirtEnd = qit + record._l_qseq;
while (qit != qitEnd)
writeValue(target, *qit++ - '!');
for (; qit != qitVirtEnd; ++qit)
writeValue(target, '\xff'); // fill with zero qualities
// tags
write(target, record.tags);
}
template <typename TTarget>
inline void
_writeBamRecordWrapper(TTarget & target,
BamAlignmentRecord const & record,
Nothing & /* range */,
uint32_t size,
Bam const & tag)
{
appendRawPod(target, size);
_writeBamRecord(target, record, tag);
}
template <typename TTarget, typename TOValue>
inline void
_writeBamRecordWrapper(TTarget & target,
BamAlignmentRecord const & record,
Range<TOValue*> & range,
uint32_t size,
Bam const & tag)
{
if (SEQAN_LIKELY(size + 4 <= length(range)))
{
appendRawPod(range.begin, size);
_writeBamRecord(range.begin, record, tag);
advanceChunk(target, size + 4);
}
else
{
appendRawPod(target, size);
_writeBamRecord(target, record, tag);
}
}
template <typename TTarget, typename TNameStore, typename TNameStoreCache, typename TStorageSpec>
void write(TTarget & target,
BamAlignmentRecord const & record,
BamIOContext<TNameStore, TNameStoreCache, TStorageSpec> & context,
Bam const & tag)
{
#ifdef SEQAN_DEBUG_OR_TEST_
// Check for valid IO Context.
if (record.rID != BamAlignmentRecord::INVALID_REFID)
{
SEQAN_ASSERT_LT_MSG(record.rID, static_cast<int32_t>(length(contigNames(context))),
"BAM IO Assertion: Unknown REF ID!");
}
if (record.rNextId != BamAlignmentRecord::INVALID_REFID)
{
SEQAN_ASSERT_LT_MSG(record.rNextId, static_cast<int32_t>(length(contigNames(context))),
"BAM IO Assertion: Unknown NEXT REF ID!");
}
#endif
ignoreUnusedVariableWarning(context);
// Update internal lengths
uint32_t size = updateLengths(record);
// Reserve chunk memory
reserveChunk(target, 4 + size, Output());
// Write length and record
typename Chunk<TTarget>::Type ochunk;
getChunk(ochunk, target, Output());
_writeBamRecordWrapper(target, record, ochunk, size, tag);
}
} // namespace seqan2
#endif // #ifndef INCLUDE_SEQAN_BAM_IO_WRITE_BAM_H_
|