Source code for numpy_datasets.timeseries.birdvox_dcase_20k

#!/usr/bin/env python
# -*- coding: utf-8 -*-

__author__ = "Randall Balestriero"

import io
import os
import pickle, gzip
import urllib.request
import numpy as np
import time
import zipfile
from tqdm import tqdm
from scipy.io.wavfile import read as wav_read
from ..utils import download_dataset


_urls = {
    "https://zenodo.org/record/1208080/files/BirdVox-DCASE-20k.zip?download=1": "BirdVox-DCASE-20k.zip",
    "https://ndownloader.figshare.com/files/10853300": "data_labels.csv",
}

_name = "birdvox_dcase_20k"


cite = """
@inproceedings{lostanlen2018icassp,
    title = {BirdVox-full-night: a dataset and benchmark for avian
    flight call detection},
    author = {Lostanlen, Vincent and Salamon, Justin and Farnsworth,
    Andrew and Kelling, Steve and Bello, Juan Pablo},
    booktitle = {Proc. IEEE ICASSP},
    year = {2018},
    published = {IEEE},
    venue = {Calgary, Canada},
    month = {April},
    }
    """


[docs]def load(path=None):
    """Binary bird detection classification

    Dataset is 16.5Go compressed.

    BirdVox-DCASE-20k: a dataset for bird audio detection in 10-second
    clips

    Version 2.0, March 2018.

    `link <https://wp.nyu.edu/birdvox>`_

    Description

    The BirdVox-DCASE-20k dataset contains 20,000 ten-second audio
    recordings. These recordings come from ROBIN autonomous recording
    units, placed near Ithaca, NY, USA during the fall 2015. They were
    captured on the night of September 23rd, 2015, by six different
    sensors, originally numbered 1, 2, 3, 5, 7, and 10.

    Out of these 20,000 recording, 10,017 (50.09%) contain at least one
    bird vocalization (either song, call, or chatter).

    The dataset is a derivative work of the BirdVox-full-night dataset
    [1], containing almost as much data but formatted into ten-second
    excerpts rather than ten-hour full night recordings.

    In addition, the BirdVox-DCASE-20k dataset is provided as a
    development set in the context of the "Bird Audio Detection"
    challenge, organized by DCASE (Detection and Classification of
    Acoustic Scenes and Events) and the IEEE Signal Processing Society.

    The dataset can be used, among other things, for the development and
    evaluation of bioacoustic classification models.


    We refer the reader to [1] for details on the distribution of the
    data and [2] for details on the hardware of ROBIN recording units.

    [1] V. Lostanlen, J. Salamon, A. Farnsworth, S. Kelling, J.P. Bello.
    "BirdVox-full-night: a dataset and benchmark for avian flight call
    detection", Proc. IEEE ICASSP, 2018.

    [2] J. Salamon, J. P. Bello, A. Farnsworth, M. Robbins, S. Keen,
    H. Klinck, and S. Kelling. Towards the Automatic Classification of
    Avian Flight Calls for Bioacoustic Monitoring. PLoS One, 2016.

    Data Files

    The wav folder contains the recordings as WAV files, sampled at
    44,1 kHz, with a single channel (mono). The original sample rate
    was 24 kHz.

    The name of each wav file is a random 128-bit UUID (Universal
    Unique IDentifier) string, which is randomized with respect to the
    origin of the recording in BirdVox-full-night, both in terms of
    time (UTC hour at the start of the excerpt) and space (location of
    the sensor).

    The origin of each 10-second excerpt is known by the challenge
    organizers, but not disclosed to the participants.

    Please Acknowledge BirdVox-DCASE-20k in Academic Research

    When BirdVox-70k is used for academic research, we would highly
    appreciate it if  scientific publications of works partly based on
    this dataset cite the following publication:

    V. Lostanlen, J. Salamon, A. Farnsworth, S. Kelling, J. Bello.
    "BirdVox-full-night: a dataset and benchmark for avian flight call
    detection", Proc. IEEE ICASSP, 2018.

    The creation of this dataset was supported by NSF grants 1125098
    (BIRDCAST) and 1633259 (BIRDVOX), a Google Faculty Award, the Leon
    Levy Foundation, and two anonymous donors.
    Parameters
    ----------
    path: str (optional)
        default ($DATASET_PATH), the path to look for the data and
        where the data will be downloaded if not present

    Returns
    -------

    wavs: array
        the waveforms in the time amplitude domain

    labels: array
        binary values representing the presence or not of an avian

    recording: array
        the file number from which the sample has been extracted

    """

    if path is None:
        path = os.environ["DATASET_PATH"]

    download_dataset(path, _name, _urls)

    t0 = time.time()

    # Loading the file
    basefile = os.path.join(path, "birdvox_dcase_20k/BirdVox-DCASE-20k.zip")
    wavs = list()
    labels = np.loadtxt(
        os.path.join(path, "birdvox_dcase_20k/data_labels.csv"),
        skiprows=1,
        delimiter=",",
        dtype="str",
    )
    wav_names = list(labels[:, 0])
    wav_labels = labels[:, 2].astype("int")
    labels = list()
    f = zipfile.ZipFile(basefile)
    for name in tqdm(f.namelist(), ascii=True):
        filename = name.split("/")[-1][:-4]
        if ".wav" not in name or filename not in wav_names:
            continue
        byt = io.BytesIO(f.read(name))
        wavs.append(wav_read(byt)[1].astype("float32"))
        labels.append(wav_labels[wav_names.index(filename)])

    wavs = np.array(wavs).astype("float32")
    labels = np.array(labels).astype("int32")

    print("Dataset birdvox_dcase_20k loaded in {0:.2f}s.".format(time.time() - t0))
    dataset = {"wavs": wavs, "labels": labels}
    return dataset