File: audio_stream.cpp

package info (click to toggle)
pytorch-vision 0.21.0-3
links: PTS, VCS
area: main
in suites: forky, sid, trixie
size: 20,228 kB
sloc: python: 65,904; cpp: 11,406; ansic: 2,459; java: 550; sh: 265; xml: 79; objc: 56; makefile: 33
file content (120 lines) | stat: -rw-r--r-- 3,723 bytes
#include "audio_stream.h"
#include <c10/util/Logging.h>
#include <limits>
#include "util.h"

namespace ffmpeg {

namespace {
static int get_nb_channels(const AVFrame* frame, const AVCodecContext* codec) {
#if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 28, 100)
  return frame ? frame->ch_layout.nb_channels : codec->ch_layout.nb_channels;
#else
  return frame ? frame->channels : codec->channels;
#endif
}

bool operator==(const AudioFormat& x, const AVFrame& y) {
  return x.samples == static_cast<size_t>(y.sample_rate) &&
      x.channels == static_cast<size_t>(get_nb_channels(&y, nullptr)) &&
      x.format == y.format;
}

bool operator==(const AudioFormat& x, const AVCodecContext& y) {
  return x.samples == static_cast<size_t>(y.sample_rate) &&
      x.channels == static_cast<size_t>(get_nb_channels(nullptr, &y)) &&
      x.format == y.sample_fmt;
}

AudioFormat& toAudioFormat(AudioFormat& x, const AVFrame& y) {
  x.samples = y.sample_rate;
  x.channels = get_nb_channels(&y, nullptr);
  x.format = y.format;
  return x;
}

AudioFormat& toAudioFormat(AudioFormat& x, const AVCodecContext& y) {
  x.samples = y.sample_rate;
  x.channels = get_nb_channels(nullptr, &y);
  x.format = y.sample_fmt;
  return x;
}
} // namespace

AudioStream::AudioStream(
    AVFormatContext* inputCtx,
    int index,
    bool convertPtsToWallTime,
    const AudioFormat& format)
    : Stream(
          inputCtx,
          MediaFormat::makeMediaFormat(format, index),
          convertPtsToWallTime,
          0) {}

AudioStream::~AudioStream() {
  if (sampler_) {
    sampler_->shutdown();
    sampler_.reset();
  }
}

int AudioStream::initFormat() {
  // set output format
  if (format_.format.audio.samples == 0) {
    format_.format.audio.samples = codecCtx_->sample_rate;
  }
#if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 28, 100)
  if (format_.format.audio.channels == 0) {
    format_.format.audio.channels = codecCtx_->ch_layout.nb_channels;
  }
#else
  if (format_.format.audio.channels == 0) {
    format_.format.audio.channels = codecCtx_->channels;
  }
#endif
  if (format_.format.audio.format == AV_SAMPLE_FMT_NONE) {
    format_.format.audio.format = codecCtx_->sample_fmt;
  }

  return format_.format.audio.samples != 0 &&
          format_.format.audio.channels != 0 &&
          format_.format.audio.format != AV_SAMPLE_FMT_NONE
      ? 0
      : -1;
}

// copies audio sample bytes via swr_convert call in audio_sampler.cpp
int AudioStream::copyFrameBytes(ByteStorage* out, bool flush) {
  if (!sampler_) {
    sampler_ = std::make_unique<AudioSampler>(codecCtx_);
  }
  // check if input format gets changed
  if (flush ? !(sampler_->getInputFormat().audio == *codecCtx_)
            : !(sampler_->getInputFormat().audio == *frame_)) {
    // - reinit sampler
    SamplerParameters params;
    params.type = format_.type;
    params.out = format_.format;
    params.in = FormatUnion();
    flush ? toAudioFormat(params.in.audio, *codecCtx_)
          : toAudioFormat(params.in.audio, *frame_);
    if (!sampler_->init(params)) {
      return -1;
    }

    VLOG(1) << "Set input audio sampler format"
            << ", samples: " << params.in.audio.samples
            << ", channels: " << params.in.audio.channels
            << ", format: " << params.in.audio.format
            << " : output audio sampler format"
            << ", samples: " << format_.format.audio.samples
            << ", channels: " << format_.format.audio.channels
            << ", format: " << format_.format.audio.format;
  }
  // calls to a sampler that converts the audio samples and copies them to the
  // out buffer via ffmpeg::swr_convert
  return sampler_->sample(flush ? nullptr : frame_, out);
}

} // namespace ffmpeg