Source code for numpy_datasets.timeseries.birdvox_dcase_20k

#!/usr/bin/env python
# -*- coding: utf-8 -*-

__author__ = "Randall Balestriero"

import io
import os
import pickle, gzip
import urllib.request
import numpy as np
import time
import zipfile
from tqdm import tqdm
from scipy.io.wavfile import read as wav_read
from ..utils import download_dataset


_urls = {
    "https://zenodo.org/record/1208080/files/BirdVox-DCASE-20k.zip?download=1": "BirdVox-DCASE-20k.zip",
    "https://ndownloader.figshare.com/files/10853300": "data_labels.csv",
}

_name = "birdvox_dcase_20k"


cite = """
@inproceedings{lostanlen2018icassp,
    title = {BirdVox-full-night: a dataset and benchmark for avian
    flight call detection},
    author = {Lostanlen, Vincent and Salamon, Justin and Farnsworth,
    Andrew and Kelling, Steve and Bello, Juan Pablo},
    booktitle = {Proc. IEEE ICASSP},
    year = {2018},
    published = {IEEE},
    venue = {Calgary, Canada},
    month = {April},
    }
    """


[docs]def load(path=None): """Binary bird detection classification Dataset is 16.5Go compressed. BirdVox-DCASE-20k: a dataset for bird audio detection in 10-second clips Version 2.0, March 2018. `link <https://wp.nyu.edu/birdvox>`_ Description The BirdVox-DCASE-20k dataset contains 20,000 ten-second audio recordings. These recordings come from ROBIN autonomous recording units, placed near Ithaca, NY, USA during the fall 2015. They were captured on the night of September 23rd, 2015, by six different sensors, originally numbered 1, 2, 3, 5, 7, and 10. Out of these 20,000 recording, 10,017 (50.09%) contain at least one bird vocalization (either song, call, or chatter). The dataset is a derivative work of the BirdVox-full-night dataset [1], containing almost as much data but formatted into ten-second excerpts rather than ten-hour full night recordings. In addition, the BirdVox-DCASE-20k dataset is provided as a development set in the context of the "Bird Audio Detection" challenge, organized by DCASE (Detection and Classification of Acoustic Scenes and Events) and the IEEE Signal Processing Society. The dataset can be used, among other things, for the development and evaluation of bioacoustic classification models. We refer the reader to [1] for details on the distribution of the data and [2] for details on the hardware of ROBIN recording units. [1] V. Lostanlen, J. Salamon, A. Farnsworth, S. Kelling, J.P. Bello. "BirdVox-full-night: a dataset and benchmark for avian flight call detection", Proc. IEEE ICASSP, 2018. [2] J. Salamon, J. P. Bello, A. Farnsworth, M. Robbins, S. Keen, H. Klinck, and S. Kelling. Towards the Automatic Classification of Avian Flight Calls for Bioacoustic Monitoring. PLoS One, 2016. Data Files The wav folder contains the recordings as WAV files, sampled at 44,1 kHz, with a single channel (mono). The original sample rate was 24 kHz. The name of each wav file is a random 128-bit UUID (Universal Unique IDentifier) string, which is randomized with respect to the origin of the recording in BirdVox-full-night, both in terms of time (UTC hour at the start of the excerpt) and space (location of the sensor). The origin of each 10-second excerpt is known by the challenge organizers, but not disclosed to the participants. Please Acknowledge BirdVox-DCASE-20k in Academic Research When BirdVox-70k is used for academic research, we would highly appreciate it if scientific publications of works partly based on this dataset cite the following publication: V. Lostanlen, J. Salamon, A. Farnsworth, S. Kelling, J. Bello. "BirdVox-full-night: a dataset and benchmark for avian flight call detection", Proc. IEEE ICASSP, 2018. The creation of this dataset was supported by NSF grants 1125098 (BIRDCAST) and 1633259 (BIRDVOX), a Google Faculty Award, the Leon Levy Foundation, and two anonymous donors. Parameters ---------- path: str (optional) default ($DATASET_PATH), the path to look for the data and where the data will be downloaded if not present Returns ------- wavs: array the waveforms in the time amplitude domain labels: array binary values representing the presence or not of an avian recording: array the file number from which the sample has been extracted """ if path is None: path = os.environ["DATASET_PATH"] download_dataset(path, _name, _urls) t0 = time.time() # Loading the file basefile = os.path.join(path, "birdvox_dcase_20k/BirdVox-DCASE-20k.zip") wavs = list() labels = np.loadtxt( os.path.join(path, "birdvox_dcase_20k/data_labels.csv"), skiprows=1, delimiter=",", dtype="str", ) wav_names = list(labels[:, 0]) wav_labels = labels[:, 2].astype("int") labels = list() f = zipfile.ZipFile(basefile) for name in tqdm(f.namelist(), ascii=True): filename = name.split("/")[-1][:-4] if ".wav" not in name or filename not in wav_names: continue byt = io.BytesIO(f.read(name)) wavs.append(wav_read(byt)[1].astype("float32")) labels.append(wav_labels[wav_names.index(filename)]) wavs = np.array(wavs).astype("float32") labels = np.array(labels).astype("int32") print("Dataset birdvox_dcase_20k loaded in {0:.2f}s.".format(time.time() - t0)) dataset = {"wavs": wavs, "labels": labels} return dataset