1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
|
import csv
import os
import random
import string
from torchaudio.datasets import fluentcommands
from torchaudio_unittest.common_utils import get_whitenoise, save_wav, TempDirMixin, TorchaudioTestCase
HEADER = ["", "path", "speakerId", "transcription", "action", "object", "location"]
SLOTS = ["action", "object", "location"]
ACTIONS = ["activate", "deactivate"]
OBJECTS = ["lights", "volume"]
LOCATIONS = ["none", "kitchen", "bedroom"]
NUM_SPEAKERS = 5
SAMPLES_PER_SPEAKER = 10
SAMPLE_RATE = 16000
def _gen_rand_str(n: int, seed: int):
random.seed(seed)
return "".join(random.choices(string.ascii_letters + string.digits, k=n))
def _gen_csv(dataset_dir: str, subset: str, init_seed: int):
data = []
data.append(HEADER)
idx = 0
seed = init_seed
for _ in range(NUM_SPEAKERS):
speaker_id = _gen_rand_str(5, seed=seed)
speaker_dir = os.path.join(dataset_dir, "wavs", "speakers", speaker_id)
os.makedirs(speaker_dir, exist_ok=True)
for _ in range(SAMPLES_PER_SPEAKER):
seed += 1
filename = _gen_rand_str(10, seed=seed)
path = f"wavs/speakers/{speaker_id}/{filename}.wav"
random.seed(seed)
transcription = ""
act = random.choice(ACTIONS)
obj = random.choice(OBJECTS)
loc = random.choice(LOCATIONS)
data.append([idx, path, speaker_id, transcription, act, obj, loc])
idx += 1
csv_path = os.path.join(dataset_dir, "data", f"{subset}_data.csv")
with open(csv_path, "w", newline="") as csv_file:
file_writer = csv.writer(csv_file)
file_writer.writerows(data)
return data
def _save_samples(dataset_dir: str, subset: str, seed: int):
# generate csv file
data = _gen_csv(dataset_dir, subset, seed)
# iterate through csv file, save wavs to corresponding files
header = data[0]
data = data[1:] # remove header
path_idx = header.index("path")
samples = []
for row in data:
wav = get_whitenoise(
sample_rate=SAMPLE_RATE,
duration=0.01,
n_channels=1,
seed=seed,
)
path = row[path_idx]
filename = path.split("/")[-1]
filename = filename.split(".")[0]
speaker_id, transcription, act, obj, loc = row[2:]
wav_file = os.path.join(dataset_dir, "wavs", "speakers", speaker_id, f"{filename}.wav")
save_wav(wav_file, wav, SAMPLE_RATE)
sample = wav, SAMPLE_RATE, filename, speaker_id, transcription, act, obj, loc
samples.append(sample)
seed += 1
return samples
def get_mock_dataset(dataset_dir: str):
data_folder = os.path.join(dataset_dir, "data")
wav_folder = os.path.join(dataset_dir, "wavs", "speakers")
os.makedirs(data_folder, exist_ok=True)
os.makedirs(wav_folder, exist_ok=True)
mocked_train_samples = _save_samples(dataset_dir, "train", 1)
mocked_valid_samples = _save_samples(dataset_dir, "valid", 111)
mocked_test_samples = _save_samples(dataset_dir, "test", 1111)
return mocked_train_samples, mocked_valid_samples, mocked_test_samples
class TestFluentSpeechCommands(TempDirMixin, TorchaudioTestCase):
root_dir = None
mocked_train_samples = []
mocked_valid_samples = []
mocked_test_samples = []
@classmethod
def setUpClass(cls):
cls.root_dir = cls.get_base_temp_dir()
dataset_dir = os.path.join(cls.root_dir, "fluent_speech_commands_dataset")
(
cls.mocked_train_samples,
cls.mocked_valid_samples,
cls.mocked_test_samples,
) = get_mock_dataset(dataset_dir)
def _testFluentCommands(self, dataset, samples):
num_samples = 0
for i, data in enumerate(dataset):
self.assertEqual(data, samples[i])
num_samples += 1
assert num_samples == len(samples)
def testFluentCommandsTrain(self):
dataset = fluentcommands.FluentSpeechCommands(self.root_dir, subset="train")
self._testFluentCommands(dataset, self.mocked_train_samples)
def testFluentCommandsValid(self):
dataset = fluentcommands.FluentSpeechCommands(self.root_dir, subset="valid")
self._testFluentCommands(dataset, self.mocked_valid_samples)
def testFluentCommandsTest(self):
dataset = fluentcommands.FluentSpeechCommands(self.root_dir, subset="test")
self._testFluentCommands(dataset, self.mocked_test_samples)
|