1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355
|
#ifdef WITH_TRANSCODING
#include "mix_buffer.h"
#include <libavutil/samplefmt.h>
#include <stdlib.h>
#include <assert.h>
#include <glib.h>
#include "ssrc.h"
typedef void mix_in_fn_t(void *restrict dst, const void *restrict src, unsigned int num);
struct mix_buffer_impl {
unsigned int sample_size;
mix_in_fn_t *mix_in;
};
typedef struct {
struct ssrc_entry h; // must be first
unsigned int write_pos;
unsigned int loops;
} mix_buffer_ssrc_source;
#if defined(__x86_64__)
// mix_in_x64_sse2.S
mix_in_fn_t s16_mix_in_sse2;
// mix_in_x64_avx2.S
mix_in_fn_t s16_mix_in_avx2;
// mix_in_x64_avx512.S
mix_in_fn_t s16_mix_in_avx512;
#endif
static void s16_mix_in_c(void *restrict dst, const void *restrict src, unsigned int samples) {
int16_t *d = dst;
const int16_t *s = src;
for (unsigned int i = 0; i < samples; i++) {
int16_t orig = d[i];
d[i] += s[i];
// saturate/clamp
if (d[i] < orig && s[i] > 0)
d[i] = 32767;
else if (d[i] > orig && s[i] < 0)
d[i] = -32768;
}
}
#if defined(__x86_64__) && !defined(ASAN_BUILD) && HAS_ATTR(ifunc) && defined(__GLIBC__)
static mix_in_fn_t *resolve_s16_mix_in(void) {
if (rtpe_has_cpu_flag(RTPE_CPU_FLAG_AVX512BW))
return s16_mix_in_avx512;
if (rtpe_has_cpu_flag(RTPE_CPU_FLAG_AVX2))
return s16_mix_in_avx2;
if (rtpe_has_cpu_flag(RTPE_CPU_FLAG_SSE2))
return s16_mix_in_sse2;
return s16_mix_in_c;
}
static mix_in_fn_t s16_mix_in __attribute__ ((ifunc ("resolve_s16_mix_in")));
#else
#define s16_mix_in s16_mix_in_c
#endif
const struct mix_buffer_impl impl_s16_c = {
.sample_size = sizeof(int16_t),
.mix_in = s16_mix_in,
};
// must be locked already
static void fill_up_to(struct mix_buffer *mb, unsigned int up_to) {
if (mb->fill >= up_to)
return;
unsigned int needed = up_to - mb->fill;
assert(up_to <= mb->size);
// tail end
unsigned int tail_room = mb->size - mb->head_write_pos;
tail_room = MIN(tail_room, needed);
memset(mb->buf.c + mb->head_write_pos * mb->sample_size_channels, 0, tail_room * mb->sample_size_channels);
needed -= tail_room;
mb->head_write_pos += tail_room;
mb->fill += tail_room;
if (needed) {
// ran against the end of the buffer. fill up from beginning
memset(mb->buf.c, 0, needed * mb->sample_size_channels);
mb->head_write_pos = needed;
mb->fill += needed;
mb->loops++;
}
}
void *mix_buffer_read_fast(struct mix_buffer *mb, unsigned int samples, unsigned int *size) {
LOCK(&mb->lock);
if (samples > mb->size || !mb->active) {
*size = 0; // error or inactive
return NULL;
}
fill_up_to(mb, samples);
*size = samples * mb->sample_size_channels;
// shortcut extraction possible?
int end_read_pos = mb->read_pos + samples;
if (end_read_pos > mb->size)
return NULL; // nope, must use temporary buffer
void *ret = mb->buf.c + mb->read_pos * mb->sample_size_channels;
mb->read_pos = end_read_pos == mb->size ? 0 : end_read_pos;
mb->fill -= samples;
return ret;
}
// must be called after mix_buffer_read_fast returned NULL, with a buffer the size of *size bytes
void mix_buffer_read_slow(struct mix_buffer *mb, void *outbuf, unsigned int samples) {
LOCK(&mb->lock);
unsigned int tail_part = mb->size - mb->read_pos;
memcpy(outbuf, mb->buf.c + mb->read_pos * mb->sample_size_channels, tail_part * mb->sample_size_channels);
mb->fill -= samples;
samples -= tail_part;
memcpy(outbuf + tail_part * mb->sample_size_channels, mb->buf.c, samples * mb->sample_size_channels);
mb->read_pos = samples;
}
static void mix_ssrc_put(mix_buffer_ssrc_source *s) {
obj_put(&s->h);
}
G_DEFINE_AUTOPTR_CLEANUP_FUNC(mix_buffer_ssrc_source, mix_ssrc_put)
// write at the write-head, direct copy without mixing
// must be locked already
static bool mix_buffer_write_fast(struct mix_buffer *mb, mix_buffer_ssrc_source *src,
const void *buf, unsigned int samples)
{
// check for buffer overflow
if (mb->fill + samples > mb->size)
return false;
// will there be a buffer wrap-around?
if (mb->head_write_pos + samples >= mb->size) {
// copy in to end of buffer
unsigned int tail_part = mb->size - mb->head_write_pos;
memcpy(mb->buf.c + mb->head_write_pos * mb->sample_size_channels, buf,
tail_part * mb->sample_size_channels);
mb->fill += tail_part;
samples -= tail_part;
buf = ((const char *) buf) + tail_part * mb->sample_size_channels;
mb->head_write_pos = 0;
// src->write_pos is updated below
mb->loops++;
src->loops = mb->loops;
}
// copy in remainder, if any
memcpy(mb->buf.c + mb->head_write_pos * mb->sample_size_channels, buf,
samples * mb->sample_size_channels);
mb->head_write_pos += samples;
src->write_pos = mb->head_write_pos;
mb->fill += samples;
return true;
}
// write before the write-head with mixing-in
// must be locked already
static bool mix_buffer_write_slow(struct mix_buffer *mb, mix_buffer_ssrc_source *src,
const void *buf, unsigned int samples)
{
// mix-in up to the current write-head, or end of buffer in case of wrap-around
if (mb->head_write_pos < src->write_pos) {
// wrap-arund: mix-in to end of buffer
unsigned int tail_part = mb->size - src->write_pos;
if (tail_part > samples)
tail_part = samples;
mb->impl->mix_in(mb->buf.c + src->write_pos * mb->sample_size_channels, buf,
tail_part * mb->channels);
samples -= tail_part;
buf = ((const char *) buf) + tail_part * mb->sample_size_channels;
src->write_pos += tail_part;
if (src->write_pos == mb->size) {
src->write_pos = 0;
src->loops++;
}
if (samples == 0)
return true;
}
// mix-in to current write-head
unsigned int mix_part = mb->head_write_pos - src->write_pos;
if (mix_part > samples)
mix_part = samples;
mb->impl->mix_in(mb->buf.c + src->write_pos * mb->sample_size_channels, buf, mix_part * mb->channels);
samples -= mix_part;
src->write_pos += mix_part;
buf = ((const char *) buf) + mix_part * mb->sample_size_channels;
// anything that's left, just copy-in
return mix_buffer_write_fast(mb, src, buf, samples);
}
static void mix_buffer_src_add_delay(struct mix_buffer *mb, mix_buffer_ssrc_source *src,
unsigned int samples)
{
if (!samples)
return;
// shift new write pos into the future
src->write_pos += samples;
if (src->write_pos >= mb->size) {
src->write_pos -= mb->size;
src->loops++;
}
// fill up buffer if needed
if (src->loops == mb->loops && src->write_pos > mb->head_write_pos)
fill_up_to(mb, mb->fill + src->write_pos - mb->head_write_pos);
else if (src->loops == mb->loops + 1 && src->write_pos < mb->head_write_pos)
fill_up_to(mb, mb->fill + src->write_pos + mb->size - mb->head_write_pos);
}
static void mix_buffer_src_init_pos(struct mix_buffer *mb, mix_buffer_ssrc_source *src) {
src->write_pos = mb->read_pos;
src->loops = mb->loops;
if (mb->head_write_pos < src->write_pos)
src->loops--;
mix_buffer_src_add_delay(mb, src, mb->delay);
}
static void mix_buff_src_shift_delay(struct mix_buffer *mb, mix_buffer_ssrc_source *src,
const int64_t last, const int64_t now)
{
if (!last || !now)
return;
int64_t diff_us = now - last;
if (diff_us <= 0)
return;
unsigned int samples = mb->clockrate * diff_us / 1000000;
mix_buffer_src_add_delay(mb, src, samples);
}
// takes the difference between two time stamps into account, scaled to the given clock rate,
// to add an additional write-delay for a newly created source
bool mix_buffer_write_delay(struct mix_buffer *mb, uint32_t ssrc, const void *buf, unsigned int samples,
const int64_t last, const int64_t now)
{
LOCK(&mb->lock);
bool created;
g_autoptr(mix_buffer_ssrc_source) src = get_ssrc_full(ssrc, &mb->ssrc_hash, &created);
if (!src)
return false;
if (created)
mix_buff_src_shift_delay(mb, src, last, now);
mb->active = true;
// loop twice at the most to re-run logic after a reset
while (true) {
// shortcut if we're at the write head
if (src->write_pos == mb->head_write_pos && src->loops == mb->loops)
return mix_buffer_write_fast(mb, src, buf, samples);
// not at the write head... did we fall behind what has been read already?
if (mb->head_write_pos >= mb->read_pos) {
// |--------------|###################|------------|
// R W
// ^- slow mix-in
if (src->write_pos >= mb->read_pos && src->write_pos < mb->head_write_pos
&& src->loops == mb->loops)
return mix_buffer_write_slow(mb, src, buf, samples);
}
else {
// |#########|-----------------------------|#######|
// W R
// ^--- slow mix-in ------^
if ((src->write_pos < mb->head_write_pos && src->loops == mb->loops)
|| (src->write_pos >= mb->read_pos && src->loops + 1 == mb->loops))
return mix_buffer_write_slow(mb, src, buf, samples);
}
// we fell behind. reset write position to current read pos and try again
mix_buffer_src_init_pos(mb, src);
}
}
static struct ssrc_entry *mix_buffer_ssrc_new(void *p) {
struct mix_buffer *mb = p;
mix_buffer_ssrc_source *src = obj_alloc0(mix_buffer_ssrc_source, NULL);
mix_buffer_src_init_pos(mb, src);
return &src->h;
}
// struct must be zeroed already
bool mix_buffer_init_active(struct mix_buffer *mb, enum AVSampleFormat fmt, unsigned int clockrate,
unsigned int channels, unsigned int size_ms, unsigned int delay_ms, bool active)
{
switch (fmt) {
case AV_SAMPLE_FMT_S16:
mb->impl = &impl_s16_c;
break;
default:
return false;
}
unsigned int size = clockrate * size_ms / 1000; // in samples
unsigned int delay = clockrate * delay_ms / 1000; // in samples
mutex_init(&mb->lock);
mb->sample_size_channels = channels * mb->impl->sample_size;
mb->buf.v = g_malloc(mb->sample_size_channels * size);
mb->size = size;
mb->clockrate = clockrate;
mb->channels = channels;
mb->delay = delay;
mb->active = active;
ssrc_hash_full_init(&mb->ssrc_hash, mix_buffer_ssrc_new, mb);
return true;
}
void mix_buffer_destroy(struct mix_buffer *mb) {
g_free(mb->buf.v);
ssrc_hash_destroy(&mb->ssrc_hash);
mutex_destroy(&mb->lock);
}
#endif
|