File: sox_utils.py

package info (click to toggle)
pytorch-audio 0.7.2-1
links: PTS, VCS
area: main
in suites: bullseye
size: 5,512 kB
sloc: python: 15,606; cpp: 1,352; sh: 257; makefile: 21
file content (100 lines) | stat: -rw-r--r-- 3,168 bytes
import subprocess


def get_encoding(dtype):
    encodings = {
        'float32': 'floating-point',
        'int32': 'signed-integer',
        'int16': 'signed-integer',
        'uint8': 'unsigned-integer',
    }
    return encodings[dtype]


def get_bit_depth(dtype):
    bit_depths = {
        'float32': 32,
        'int32': 32,
        'int16': 16,
        'uint8': 8,
    }
    return bit_depths[dtype]


def gen_audio_file(
        path, sample_rate, num_channels,
        *, encoding=None, bit_depth=None, compression=None, attenuation=None, duration=1,
):
    """Generate synthetic audio file with `sox` command."""
    if path.endswith('.wav'):
        raise RuntimeError(
            'Use get_wav_data and save_wav to generate wav file for accurate result.')
    command = [
        'sox',
        '-V3',  # verbose
        '-R',
        # -R is supposed to be repeatable, though the implementation looks suspicious
        # and not setting the seed to a fixed value.
        # https://fossies.org/dox/sox-14.4.2/sox_8c_source.html
        # search "sox_globals.repeatable"
    ]
    if bit_depth is not None:
        command += ['--bits', str(bit_depth)]
    command += [
        '--rate', str(sample_rate),
        '--null',  # no input
        '--channels', str(num_channels),
    ]
    if compression is not None:
        command += ['--compression', str(compression)]
    if bit_depth is not None:
        command += ['--bits', str(bit_depth)]
    if encoding is not None:
        command += ['--encoding', str(encoding)]
    command += [
        str(path),
        'synth', str(duration),  # synthesizes for the given duration [sec]
        'sawtooth', '1',
        # saw tooth covers the both ends of value range, which is a good property for test.
        # similar to linspace(-1., 1.)
        # this introduces bigger boundary effect than sine when converted to mp3
    ]
    if attenuation is not None:
        command += ['vol', f'-{attenuation}dB']
    print(' '.join(command))
    subprocess.run(command, check=True)


def convert_audio_file(
        src_path, dst_path,
        *, bit_depth=None, compression=None):
    """Convert audio file with `sox` command."""
    command = ['sox', '-V3', '-R', str(src_path)]
    if bit_depth is not None:
        command += ['--bits', str(bit_depth)]
    if compression is not None:
        command += ['--compression', str(compression)]
    command += [dst_path]
    print(' '.join(command))
    subprocess.run(command, check=True)


def _flattern(effects):
    if not effects:
        return effects
    if isinstance(effects[0], str):
        return effects
    return [item for sublist in effects for item in sublist]


def run_sox_effect(input_file, output_file, effect, *, output_sample_rate=None, output_bitdepth=None):
    """Run sox effects"""
    effect = _flattern(effect)
    command = ['sox', '-V', '--no-dither', input_file]
    if output_bitdepth:
        command += ['--bits', str(output_bitdepth)]
    command += [output_file] + effect
    if output_sample_rate:
        command += ['rate', str(output_sample_rate)]
    print(' '.join(command))
    subprocess.run(command, check=True)