Source code for numpy_datasets.timeseries.vocalset

import os
import gzip
import urllib.request
import numpy as np
import time
import zipfile
import io
from scipy.io.wavfile import read as wav_read
from tqdm import tqdm
from ..utils import download_dataset

_urls = {
    "https://zenodo.org/record/1442513/files/VocalSet11.zip?download=1": "VocalSet11.zip"
}

_name = "vocalset"


[docs]def load(path=None):
    """singer/technique/vowel of singing voices

    source: https://zenodo.org/record/1442513#.W7OaFBNKjx4

    We present VocalSet, a singing voice dataset consisting of 10.1 hours
    of monophonic recorded audio of professional singers demonstrating both
    standard and extended vocal techniques on all 5 vowels. Existing
    singing voice datasets aim to capture a focused subset of singing
    voice characteristics, and generally consist of just a few singers.
    VocalSet contains recordings from 20 different singers (9 male, 11
    female) and a range of voice types.  VocalSet aims to improve the
    state of existing singing voice datasets and singing voice research by
    capturing not only a range of vowels, but also a diverse set of voices
    on many different vocal techniques, sung in contexts of scales,
    arpeggios, long tones, and excerpts.
    Parameters
    ----------

    path: str (optional)
        a string where to load the data and download if not present

    Returns
    -------

    singers: list
        the list of singers as strings, 11 males and 9 females as in male1,
        male2, ...

    genders: list
        the list of genders of the singers as in male, male, female, ...

    vowels: list
        the vowels being pronunced

    data: list
        the list of waveforms, not all equal length

    """
    if path is None:
        path = os.environ["DATASET_PATH"]

    download_dataset(path, _name, _urls)
    t = time.time()

    # load wavs
    f = zipfile.ZipFile(os.path.join(path, "vocalset/VocalSet11.zip"))

    # init. the data array
    singers = []
    genders = []
    vowels = []
    #        techniques = []
    data = []
    for filename in tqdm(f.namelist(), ascii=True):
        if ".wav" not in filename or "excerpts" in filename or "_" == filename[0]:
            continue
        vowel = filename[-5]
        if vowel not in ["a", "e", "i", "o", "u"]:
            continue
        vowels.append(vowel)
        bytes_ = io.BytesIO(f.read(filename))
        data.append(wav_read(bytes_)[1].astype("float32"))
        split = filename.split("/")
        genders.append("".join(x for x in split[1] if x.isalpha()))
        singers.append(split[1])
    #            techniques.append(split[-1][3:-6])

    dataset = {
        "singers": singers,
        "genders": genders,
        "vowels": vowels,
        "wavs": wavs,
    }
    return dataset