FluentSpeech¶
Audio datasets with multiple targets (class id, action, object, location) that represents short commands.
You need to install the Soundfile library.
You also probably want to add a custom transform to uniformize the length of each audio in order to batch it.
from continuum import ClassIncremental, ContinualScenario
from continuum.datasets import FluentSpeech
from torch.utils.data import DataLoader
dataset = FluentSpeech("/my/data/folder", train=True)
def trunc(x, max_len): # transformationn
l = len(x)
if l > max_len:
x = x[l//2-max_len//2:l//2+max_len//2]
if l < max_len:
x = F.pad(x, (0, max_len-l), value=0.)
return x
# Iterates through the 31 possible classes
scenario = ClassIncremental(
dataset, increment=1, transformations=[partial(trunc, max_len=32000)])
for taskset in scenario:
loader = DataLoader(taskset, batch_size=32)
for x, y, t in loader:
print(x.shape, y.shape, t.shape, np.unique(y[:, 0]))
break
# Iterates through the 77 existing speakers
scenario = ContinualScenario(dataset, transformations=[partial(trunc, max_len=32000)])
for taskset in scenario:
loader = DataLoader(taskset, batch_size=32)
for x, y, t in loader:
print(x.shape, y.shape, t.shape)
break