1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68
|
from pathlib import Path
from typing import List, Tuple, Union
import torch
import torchaudio
from torch.utils.data import Dataset
SampleType = Tuple[int, torch.Tensor, List[torch.Tensor]]
class WSJ0Mix(Dataset):
"""Create a Dataset for wsj0-mix.
Args:
root (str or Path): Path to the directory where the dataset is found.
num_speakers (int): The number of speakers, which determines the directories
to traverse. The Dataset will traverse ``s1`` to ``sN`` directories to collect
N source audios.
sample_rate (int): Expected sample rate of audio files. If any of the audio has a
different sample rate, raises ``ValueError``.
audio_ext (str, optional): The extension of audio files to find. (default: ".wav")
"""
def __init__(
self,
root: Union[str, Path],
num_speakers: int,
sample_rate: int,
audio_ext: str = ".wav",
):
self.root = Path(root)
self.sample_rate = sample_rate
self.mix_dir = (self.root / "mix").resolve()
self.src_dirs = [(self.root / f"s{i+1}").resolve() for i in range(num_speakers)]
self.files = [p.name for p in self.mix_dir.glob(f"*{audio_ext}")]
self.files.sort()
def _load_audio(self, path) -> torch.Tensor:
waveform, sample_rate = torchaudio.load(path)
if sample_rate != self.sample_rate:
raise ValueError(
f"The dataset contains audio file of sample rate {sample_rate}, "
f"but the requested sample rate is {self.sample_rate}."
)
return waveform
def _load_sample(self, filename) -> SampleType:
mixed = self._load_audio(str(self.mix_dir / filename))
srcs = []
for i, dir_ in enumerate(self.src_dirs):
src = self._load_audio(str(dir_ / filename))
if mixed.shape != src.shape:
raise ValueError(f"Different waveform shapes. mixed: {mixed.shape}, src[{i}]: {src.shape}")
srcs.append(src)
return self.sample_rate, mixed, srcs
def __len__(self) -> int:
return len(self.files)
def __getitem__(self, key: int) -> SampleType:
"""Load the n-th sample from the dataset.
Args:
key (int): The index of the sample to be loaded
Returns:
tuple: ``(sample_rate, mix_waveform, list_of_source_waveforms)``
"""
return self._load_sample(self.files[key])
|