1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124
|
import os
import random
from torchaudio.datasets import iemocap
from torchaudio_unittest.common_utils import get_whitenoise, save_wav, TempDirMixin, TorchaudioTestCase
LABELS = ["neu", "hap", "ang", "sad", "exc", "fru", "xxx"]
SAMPLE_RATE = 16000
def _save_wav(filepath: str, seed: int):
wav = get_whitenoise(
sample_rate=SAMPLE_RATE,
duration=0.01,
n_channels=1,
seed=seed,
)
save_wav(filepath, wav, SAMPLE_RATE)
return wav
def _save_label(label_folder: str, filename: str, wav_stem: str):
label = random.choice(LABELS)
line = f"[xxx]\t{wav_stem}\t{label}\t[yyy]"
filepath = os.path.join(label_folder, filename)
with open(filepath, "a") as f:
f.write(line + "\n")
return label
def _get_samples(dataset_dir: str, session: int):
session_folder = os.path.join(dataset_dir, f"Session{session}")
os.makedirs(session_folder, exist_ok=True)
wav_folder = os.path.join(session_folder, "sentences", "wav")
label_folder = os.path.join(session_folder, "dialog", "EmoEvaluation")
os.makedirs(wav_folder, exist_ok=True)
os.makedirs(label_folder, exist_ok=True)
wav_stems = []
for i in range(5):
for g in ["F", "M"]:
for utt in ["impro", "script"]:
speaker = f"Ses0{session}{g}"
subfolder = f"{speaker}_{utt}0{i}"
subfolder_path = os.path.join(wav_folder, subfolder)
os.makedirs(subfolder_path, exist_ok=True)
for j in range(5):
wav_stem = f"{subfolder}_F00{j}"
wav_stems.append(wav_stem)
all_samples = []
impro_samples = []
script_samples = []
wav_stems = sorted(wav_stems)
for wav_stem in wav_stems:
subfolder = wav_stem[:-5]
speaker = subfolder.split("_")[0]
wav_file = os.path.join(wav_folder, subfolder, wav_stem + ".wav")
wav = _save_wav(wav_file, seed=0)
label = _save_label(label_folder, subfolder + ".txt", wav_stem)
if label == "xxx":
continue
sample = (wav, SAMPLE_RATE, wav_stem, label, speaker)
all_samples.append(sample)
if "impro" in subfolder:
impro_samples.append(sample)
else:
script_samples.append(sample)
return all_samples, script_samples, impro_samples
def get_mock_dataset(dataset_dir: str):
os.makedirs(dataset_dir, exist_ok=True)
all_samples = []
script_samples = []
impro_samples = []
for session in range(1, 4):
samples = _get_samples(dataset_dir, session)
all_samples += samples[0]
script_samples += samples[1]
impro_samples += samples[2]
return all_samples, script_samples, impro_samples
class TestIemocap(TempDirMixin, TorchaudioTestCase):
root_dir = None
backend = "default"
all_samples = []
script_samples = []
impro_samples = []
@classmethod
def setUpClass(cls):
cls.root_dir = cls.get_base_temp_dir()
dataset_dir = os.path.join(cls.root_dir, "IEMOCAP")
cls.all_samples, cls.script_samples, cls.impro_samples = get_mock_dataset(dataset_dir)
def _testIEMOCAP(self, dataset, samples):
num_samples = 0
for i, data in enumerate(dataset):
self.assertEqual(data, samples[i])
num_samples += 1
assert num_samples == len(samples)
def testIEMOCAPFullDataset(self):
dataset = iemocap.IEMOCAP(self.root_dir)
self._testIEMOCAP(dataset, self.all_samples)
def testIEMOCAPScriptedDataset(self):
dataset = iemocap.IEMOCAP(self.root_dir, utterance_type="scripted")
self._testIEMOCAP(dataset, self.script_samples)
def testIEMOCAPImprovisedDataset(self):
dataset = iemocap.IEMOCAP(self.root_dir, utterance_type="improvised")
self._testIEMOCAP(dataset, self.impro_samples)
|