Source code for numpy_datasets.timeseries.irmas

import os
import zipfile
import io
from tqdm import tqdm
import numpy as np
import time
from scipy.io.wavfile import read as wav_read
from ..utils import download_dataset


_urls = {
    "https://zenodo.org/record/1290750/files/IRMAS-TrainingData.zip?download=1": "IRMAS-TrainingData.zip",
    "https://zenodo.org/record/1290750/files/IRMAS-TestingData-Part1.zip?download=1": "IRMAS-TestingData-Part1.zip",
    "https://zenodo.org/record/1290750/files/IRMAS-TestingData-Part2.zip?download=1": "IRMAS-TestingData-Part2.zip",
    "https://zenodo.org/record/1290750/files/IRMAS-TestingData-Part3.zip?download=1": "IRMAS-TestingData-Part3.zip",
}


[docs]def load(path=None):
    """music instrument classification

    ref https://zenodo.org/record/1290750#.WzCwSRyxXMU

    This dataset includes musical audio excerpts with annotations of the
    predominant instrument(s) present. It was used for the evaluation in the
    following article:

    Bosch, J. J., Janer, J., Fuhrmann, F., & Herrera, P. “A Comparison of Sound
    Segregation Techniques for Predominant Instrument Recognition in Musical
    Audio Signals”, in Proc. ISMIR (pp. 559-564), 2012

    Please Acknowledge IRMAS in Academic Research

    IRMAS is intended to be used for training and testing methods for the
    automatic recognition of predominant instruments in musical audio. The
    instruments considered are: cello, clarinet, flute, acoustic guitar,
    electric guitar, organ, piano, saxophone, trumpet, violin, and human singing
    voice. This dataset is derived from the one compiled by Ferdinand Fuhrmann
    in his PhD thesis, with the difference that we provide audio data in stereo
    format, the annotations in the testing dataset are limited to specific
    pitched instruments, and there is a different amount and lenght of excerpts.
    """
    if path is None:
        path = os.environ["DATASET_PATH"]

    download_dataset(path, "irmas", _urls)

    t0 = time.time()

    train_wavs = list()
    train_labels = list()
    test_wavs = list()
    test_labels = list()

    # loading the training set
    f = zipfile.ZipFile(path + "irmas/IRMAS-TrainingData.zip")
    namelist = f.namelist()
    for filename in tqdm(namelist, ascii=True):
        if ".wav" not in filename:
            continue
        wavfile = f.read(filename)
        byt = io.BytesIO(wavfile)
        train_wavs.append(wav_read(byt)[1].astype("float32"))
        train_labels.append(filename.split("/")[-2])

    base = "irmas/IRMAS-TestingData-Part{}.zip"
    for part in ["1", "2", "3"]:
        f = zipfile.ZipFile(path + base.format(part))
        namelist = f.namelist()
        for filename in tqdm(namelist, ascii=True, desc="Test data {}/3".format(part)):
            if ".wav" not in filename:
                continue

            byt = io.BytesIO(f.read(filename))
            test_wavs.append(wav_read(byt)[1].astype("float32"))

            byt = io.BytesIO(f.read(filename.replace(".wav", ".txt")))
            test_labels.append(np.loadtxt(byt, dtype="str"))

    data = {
        "train_set/wavs": np.array(train_wavs),
        "train_set/labels": train_labels,
        "test_wavs": np.array(test_wavs),
        "test_labels": test_labels,
    }

    print("Dataset IRMAS loaded in {0:.2f}s.".format(time.time() - t0))
    return data