Source code for numpy_datasets.timeseries.vocalset

import os
import gzip
import urllib.request
import numpy as np
import time
import zipfile
import io
from scipy.io.wavfile import read as wav_read
from tqdm import tqdm
from ..utils import download_dataset

_urls = {
    "https://zenodo.org/record/1442513/files/VocalSet11.zip?download=1": "VocalSet11.zip"
}

_name = "vocalset"


[docs]def load(path=None): """singer/technique/vowel of singing voices source: https://zenodo.org/record/1442513#.W7OaFBNKjx4 We present VocalSet, a singing voice dataset consisting of 10.1 hours of monophonic recorded audio of professional singers demonstrating both standard and extended vocal techniques on all 5 vowels. Existing singing voice datasets aim to capture a focused subset of singing voice characteristics, and generally consist of just a few singers. VocalSet contains recordings from 20 different singers (9 male, 11 female) and a range of voice types. VocalSet aims to improve the state of existing singing voice datasets and singing voice research by capturing not only a range of vowels, but also a diverse set of voices on many different vocal techniques, sung in contexts of scales, arpeggios, long tones, and excerpts. Parameters ---------- path: str (optional) a string where to load the data and download if not present Returns ------- singers: list the list of singers as strings, 11 males and 9 females as in male1, male2, ... genders: list the list of genders of the singers as in male, male, female, ... vowels: list the vowels being pronunced data: list the list of waveforms, not all equal length """ if path is None: path = os.environ["DATASET_PATH"] download_dataset(path, _name, _urls) t = time.time() # load wavs f = zipfile.ZipFile(os.path.join(path, "vocalset/VocalSet11.zip")) # init. the data array singers = [] genders = [] vowels = [] # techniques = [] data = [] for filename in tqdm(f.namelist(), ascii=True): if ".wav" not in filename or "excerpts" in filename or "_" == filename[0]: continue vowel = filename[-5] if vowel not in ["a", "e", "i", "o", "u"]: continue vowels.append(vowel) bytes_ = io.BytesIO(f.read(filename)) data.append(wav_read(bytes_)[1].astype("float32")) split = filename.split("/") genders.append("".join(x for x in split[1] if x.isalpha())) singers.append(split[1]) # techniques.append(split[-1][3:-6]) dataset = { "singers": singers, "genders": genders, "vowels": vowels, "wavs": wavs, } return dataset