Source code for numpy_datasets.timeseries.esc

import os
import pickle, gzip
import urllib.request
import numpy as np
import time
import tarfile
from tqdm import tqdm
import zipfile
from scipy.io.wavfile import read as wav_read
import io

fine_to_coarse = {
    "dog": 0,
    "rooster": 0,
    "pig": 0,
    "cow": 0,
    "frog": 0,
    "cat": 0,
    "hen": 0,
    "insects": 0,
    "sheep": 0,
    "crow": 0,
    "rain": 1,
    "sea_waves": 1,
    "crackling_fire": 1,
    "crickets": 1,
    "chirping_birds": 1,
    "water_drops": 1,
    "wind": 1,
    "pouring_water": 1,
    "toilet_flush": 1,
    "thunderstorm": 1,
    "crying_baby": 2,
    "sneezing": 2,
    "clapping": 2,
    "breathing": 2,
    "coughing": 2,
    "footsteps": 2,
    "laughing": 2,
    "brushing_teeth": 2,
    "snoring": 2,
    "drinking_sipping": 2,
    "door_wood_knock": 3,
    "mouse_click": 3,
    "keyboard_typing": 3,
    "door_wood_creaks": 3,
    "can_opening": 3,
    "washing_machine": 3,
    "vacuum_cleaner": 3,
    "clock_alarm": 3,
    "clock_tick": 3,
    "glass_breaking": 3,
    "helicopter": 4,
    "chainsaw": 4,
    "siren": 4,
    "car_horn": 4,
    "engine": 4,
    "train": 4,
    "church_bells": 4,
    "airplane": 4,
    "fireworks": 4,
    "hand_saw": 4,
}

_urls = {
    "https://github.com/karoldvl/ESC-50/archive/master.zip": "master.zip",
}


[docs]def load(path=None): """ESC-10/50: Environmental Sound Classification https://github.com/karolpiczak/ESC-50#download The ESC-50 dataset is a labeled collection of 2000 environmental audio recordings suitable for benchmarking methods of environmental sound classification. The dataset consists of 5-second-long recordings organized into 50 semantical classes (with 40 examples per class) loosely arranged into 5 major categories: Animals Natural soundscapes & water sounds Human, non-speech sounds Interior/domestic sounds Exterior/urban noises Clips in this dataset have been manually extracted from public field recordings gathered by the Freesound.org project. The dataset has been prearranged into 5 folds for comparable cross-validation, making sure that fragments from the same original source file are contained in a single fold. ESC 50. https://github.com/karolpiczak/ESC-50#download Parameters ---------- path: str (optional) default $DATASET_path), the path to look for the data and where the data will be downloaded if not present Returns ------- wavs: array the wavs as a numpy array (matrix) with first dimension the data and second dimension time fine_labels: array the labels of the final classes (50 different ones) as a integer vector coarse_labels: array the labels of the classes big cateogry (5 of them) folds: array the fold as an integer from 1 to 5 specifying how to split the data one should not split a fold into train and set as it would make the same recording (but different subparts) be present in train and test, biasing optimistically the results. esc10: array the boolean vector specifying if the corresponding datum (wav, label, ...) is in the ESC-10 dataset or not. That is, to load the ESC-10 dataset simply load ESC-50 and use this boolean vector to extract only the ESC-10 data. """ if path is None: path = os.environ["DATASET_PATH"] download_dataset(path, _dataset, _urls, _baseurl) t0 = time.time() f = zipfile.ZipFile(path + "esc50/master.zip") meta = np.loadtxt( io.BytesIO(f.read("ESC-50-master/meta/esc50.csv")), delimiter=",", skiprows=1, dtype="str", ) filenames = list(meta[:, 0]) folds = meta[:, 1].astype("int32") fine_labels = meta[:, 2].astype("int32") categories = meta[:, 3] esc10 = meta[:, 4] == "True" coarse_labels = np.array([esc.fine_to_coarse[c] for c in categories]) coarse_labels = coarse_labels.astype("int32") wavs = list() order = list() N = 0 for filename in tqdm(f.namelist(), ascii=True): if ".wav" not in filename: continue wavfile = f.read(filename) byt = io.BytesIO(wavfile) wavs.append(wav_read(byt)[1].astype("float32")) order.append(filenames.index(filename.split("/")[-1])) N = max(N, len(wavs[-1])) all_wavs = np.zeros((len(wavs), N)) for i in range(len(wavs)): left = (N - len(wavs[i])) // 2 all_wavs[order[i], left : left + len(wavs[i])] = wavs[i] data = { "wavs": all_wavs, "fine_labels": fine_labels, "coarse_labels": coarse_labels, "folds": folds, "esc10": esc10, } return data