File: bgzf_writer.d

package info (click to toggle)
sambamba 1.0%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 3,528 kB
  • sloc: sh: 220; python: 166; ruby: 147; makefile: 103
file content (244 lines) | stat: -rw-r--r-- 7,742 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
/*
    New style BGZF writer. This file is part of Sambamba.
    Copyright (C) 2017 Pjotr Prins <pjotr.prins@thebird.nl>

    Sambamba is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published
    by the Free Software Foundation; either version 2 of the License,
    or (at your option) any later version.

    Sambamba is distributed in the hope that it will be useful, but
    WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307 USA

*/

// Based on the original by Artem Tarasov.

module bio.std.experimental.hts.bgzf_writer;

// import core.stdc.stdlib : malloc, free;
import core.memory: pureMalloc, pureFree;
import core.stdc.stdio: fopen, fread, fclose;
import std.bitmanip;
import std.conv;
import std.exception;
import std.typecons;
import std.parallelism;
import std.array;
import std.algorithm : max;
import std.stdio;
import std.typecons;

// depends on old 
import bio.core.bgzf.compress;
import bio.core.utils.roundbuf;

// import undead.stream;

import bio.std.hts.bam.constants: BGZF_MAX_BLOCK_SIZE, BGZF_BLOCK_SIZE, BGZF_EOF;
import bio.std.experimental.hts.bgzf;
import bio.std.experimental.hts.constants;

alias void delegate(ubyte[], ubyte[]) BlockWriteHandler;

/// Convenience function for Taskpool handler
Tuple!(ubyte[], ubyte[], BlockWriteHandler) bgzfCompressFunc(ubyte[] input,
                                                             int level,
                                                             ubyte[] output_buffer,
                                                             BlockWriteHandler handler)
{
  auto output = bgzfCompress(input, level, output_buffer);
  return tuple(input, output, handler);
}

/// BGZF compression - this is a port of the original that used the
/// undead.stream library.
struct BgzfWriter {

private:
  File f;
  TaskPool task_pool;

  ubyte[] buffer; // a slice into compression_buf (uncompressed data)
  ubyte[] tmp;    // a slice into compression_buf (compressed data)
  size_t current_size;
  int compression_level;

  alias Task!(bgzfCompressFunc,
              ubyte[], int, ubyte[], BlockWriteHandler) CompressionTask;
  RoundBuf!(CompressionTask*) _compression_tasks;
  ubyte[] compression_buf;

public:

  /// Create new BGZF output stream with a multi-threaded writer
  this(string fn, int _compression_level=-1) {
    f = File(fn,"wb");
    enforce1(-1 <= compression_level && compression_level <= 9,
            "BGZF compression level must be a number in interval [-1, 9]");
    size_t max_block_size = BGZF_MAX_BLOCK_SIZE;
    size_t block_size     = BGZF_BLOCK_SIZE;
    task_pool             = taskPool(),
    compression_level     = _compression_level;

    // create a ring buffer that is large enough
    size_t n_tasks = max(task_pool.size, 1) * 16;
    _compression_tasks = RoundBuf!(CompressionTask*)(n_tasks);

    // create extra block to which we can write while n_tasks are
    // executed
    auto comp_buf_size = (2 * n_tasks + 2) * max_block_size;
    auto p = cast(ubyte*)pureMalloc(comp_buf_size);
    compression_buf = p[0 .. comp_buf_size];
    buffer          = compression_buf[0 .. block_size];
    tmp             = compression_buf[max_block_size .. max_block_size * 2];
  }

  ~this() {
    close();
  }

  @disable this(this); // BgzfWriter does not have copy semantics;

  void throwBgzfException(string msg, string file = __FILE__, size_t line = __LINE__) {
    throw new BgzfException("Error writing BGZF block starting in "~f.name ~
                            " (" ~ file ~ ":" ~ to!string(line) ~ "): " ~ msg);
  }

  void enforce1(bool check, lazy string msg, string file = __FILE__, int line = __LINE__) {
    if (!check)
      throwBgzfException(msg,file,line);
  }

  void write(const void* buf, size_t size) {
    // stderr.writeln("HEY1 writing bytes ",size);
    if (size + current_size >= buffer.length) {
      size_t room;
      ubyte[] data = (cast(ubyte*)buf)[0 .. size];

      while (data.length + current_size >= buffer.length) {
        room = buffer.length - current_size;
        buffer[$ - room .. $] = data[0 .. room];
        data = data[room .. $];

        current_size = buffer.length;

        flush_block();
      }

      buffer[0 .. data.length] = data[];
      current_size = data.length;
    } else {
      buffer[current_size .. current_size + size] = (cast(ubyte*)buf)[0 .. size];
      current_size += size;
    }
    // return size;
  }

  void write(ubyte[] buf) {
    write(buf.ptr, buf.length);
  }

  void write(string s) {
    write(cast(ubyte[])s);
  }

  void write(T)(T value) { // int values
    // ubyte[T.sizeof] buf;
    ubyte[] buf = [0,0,0,0,0,0,0,0,0,0];
    assert(T.sizeof < buf.length);
    buf.write!(T,Endian.littleEndian)(value,0);
    // writeln("HEY T.sizeof: ",T.sizeof," value ",value," ",buf[0..T.sizeof]);
    write(buf[0..T.sizeof]);
  }

  /// Force flushing current block, even if it is not yet filled.
  /// Should also be used when it's not desired to have records
  /// crossing block borders.
  void flush_block() {
    if (current_size == 0)
      return;

    Tuple!(ubyte[], ubyte[], BlockWriteHandler) front_result;
    if (_compression_tasks.full) {
      front_result = _compression_tasks.front.yieldForce();
      _compression_tasks.popFront();
    }

    auto compression_task = task!bgzfCompressFunc(buffer[0 .. current_size],
                                                  compression_level, tmp,
                                                  _before_write);
    _compression_tasks.put(compression_task);
    task_pool.put(compression_task);

    size_t offset = buffer.ptr - compression_buf.ptr;
    immutable N = tmp.length;
    offset += 2 * N;
    if (offset == compression_buf.length)
      offset = 0;
    buffer = compression_buf[offset .. offset + buffer.length];
    tmp = compression_buf[offset + N .. offset + 2 * N];
    current_size = 0;

    if (front_result[0] !is null)
      writeResult(front_result);

    while (!_compression_tasks.empty) {
      auto task = _compression_tasks.front;
      if (!task.done())
        break;
      auto result = task.yieldForce();
      writeResult(result);
      _compression_tasks.popFront();
    }
  }

  private void delegate(ubyte[], ubyte[]) _before_write;
  void setWriteHandler(void delegate(ubyte[], ubyte[]) handler) {
    _before_write = handler;
  }

  private void writeResult(Tuple!(ubyte[], ubyte[], BlockWriteHandler) block) {
    auto uncompressed = block[0];
    auto compressed = block[1];
    auto handler = block[2];
    if (handler) {// write handler enabled
      handler(uncompressed, compressed);
    }
    // _stream.writeExact(compressed.ptr, compressed.length);
    f.rawWrite(compressed);
  }

  /// Flush all remaining BGZF blocks and underlying stream.
  void flush() {
    flush_block();

    while (!_compression_tasks.empty) {
      auto task = _compression_tasks.front;
      auto block = task.yieldForce();
      writeResult(block);
      _compression_tasks.popFront();
    }

    f.flush();
    current_size = 0;
  }

  /// Flush all remaining BGZF blocks and close source stream.
  /// Automatically adds empty block at the end, serving as indicator
  /// of end of stream. This function is automatically called on
  /// destruction.
  void close() {
    flush();
    f.rawWrite(BGZF_EOF);
    f.close();
    pureFree(compression_buf.ptr);
  }
}