Source code for numpy_datasets.timeseries.sonycust

import os
import numpy as np
import tarfile
from tqdm import tqdm
from scipy.io.wavfile import read as wav_read
from ..utils import download_dataset


_urls = {
    "https://zenodo.org/record/3233082/files/audio-dev.tar.gz?download=1": "audio-dev.tar.gz",
    "https://zenodo.org/record/3233082/files/annotations-dev.csv?download=1": "annotations-dev.csv",
}


coarse_labels = [
    "engine",
    "machinery-impact",
    "non-machinery-impact" "powered-saw",
    "alert-signal",
    "music",
    "human-voice",
    "dog",
]

fine_labels = [
    "small-sounding-engine",
    "medium-sounding-engine",
    "large-sounding-engine",
    "engine-of-uncertain-size",
    "rock-drill",
    "jackhammer",
    "hoe-ram",
    "pile-driver",
    "other-unknown-impact-machinery",
    "non-machinery-impact",
    "chainsaw",
    "small-medium-rotating-saw",
    "large-rotating-saw",
    "other-unknown-powered-saw",
    "car-horn",
    "car-alarm",
    "siren",
    "reverse-beeper",
    "other-unknown-alert-signal",
    "stationary-music",
    "mobile-music",
    "ice-cream-truck",
    "music-from-uncertain-source",
    "person-or-small-group-talking",
    "person-or-small-group-shouting",
    "large-crowd",
    "amplified-speech",
    "other-unknown-human-voice",
    "dog-barking-whining",
]


[docs]def load(path=None): """multilabel urban sound classification Reference at https://zenodo.org/record/3233082 Description SONYC Urban Sound Tagging (SONYC-UST) is a dataset for the development and evaluation of machine listening systems for realistic urban noise monitoring. The audio was recorded from the SONYC acoustic sensor network. Volunteers on the Zooniverse citizen science platform tagged the presence of 23 classes that were chosen in consultation with the New York City Department of Environmental Protection. These 23 fine-grained classes can be grouped into 8 coarse-grained classes. The recordings are split into three subsets: training, validation, and test. These sets are disjoint with respect to the sensor from which each recording came. For increased reliability, three volunteers annotated each recording, and members of the SONYC team subsequently created a set of ground-truth tags for the validation set using a two-stage annotation procedure in which two annotators independently tagged and then collectively resolved any disagreements. For more details on the motivation and creation of this dataset see the DCASE 2019 Urban Sound Tagging Task website. Audio data The provided audio has been acquired using the SONYC acoustic sensor network for urban noise pollution monitoring. Over 50 different sensors have been deployed in New York City, and these sensors have collectively gathered the equivalent of 37 years of audio data, of which we provide a small subset. The data was sampled by selecting the nearest neighbors on VGGish features of recordings known to have classes of interest. All recordings are 10 seconds and were recorded with identical microphones at identical gain settings. To maintain privacy, the recordings in this release have been distributed in time and location, and the time and location of the recordings are not included in the metadata. Labels there are fine and coarse labels engine 1: small-sounding-engine 2: medium-sounding-engine 3: large-sounding-engine X: engine-of-uncertain-size machinery-impact 1: rock-drill 2: jackhammer 3: hoe-ram 4: pile-driver X: other-unknown-impact-machinery non-machinery-impact 1: non-machinery-impact powered-saw 1: chainsaw 2: small-medium-rotating-saw 3: large-rotating-saw X: other-unknown-powered-saw alert-signal 1: car-horn 2: car-alarm 3: siren 4: reverse-beeper X: other-unknown-alert-signal music 1: stationary-music 2: mobile-music 3: ice-cream-truck X: music-from-uncertain-source human-voice 1: person-or-small-group-talking 2: person-or-small-group-shouting 3: large-crowd 4: amplified-speech X: other-unknown-human-voice dog 1: dog-barking-whining """ if path is None: path = os.environ["DATASET_PATH"] download_dataset(path, "irmas", _urls) # Loading the file files = tarfile.open(path + "ust/audio-dev.tar.gz", "r:gz") annotations = np.loadtxt( path + "ust/annotations-dev.csv", delimiter=",", skiprows=1, dtype="str", ) # get name filenames = list(annotations[:, 2]) for i in range(len(filenames)): filenames[i] = annotations[i, 0] + "/" + str(filenames[i]) # get fine labels and limts for coarse classes fine_labels = annotations[:, 4:33].astype("float32").astype("int32") class_limits = [0, 4, 9, 10, 14, 19, 23, 28, 29] n_classes = len(class_limits) - 1 n_samples = len(annotations) llabels = np.zeros((n_samples, n_classes), dtype="int") for k in range(n_classes): block = fine_labels[:, class_limits[k] : class_limits[k + 1]] llabels[:, k] = block.max(1) wavs = np.zeros((2794, 441000), dtype="float32") coarse = np.zeros((2794, 8), dtype="int32") fine = np.zeros((2794, 29), dtype="int32") filenames = files.getnames() cpt = 0 for name in tqdm(filenames, ascii=True): if ".wav" not in name: continue wavs[cpt] = wav_read(files.extractfile(name))[1].astype("float32") coarse[cpt] = llabels[filenames.index(name)] fine[cpt] = fine_labels[filenames.index(name)] cpt += 1 data = {"wavs": wavs, "fine_labels": fine, "coarse_labels": coarse} return data