Source code for numpy_datasets.timeseries.birdvox_70k

#!/usr/bin/env python
# -*- coding: utf-8 -*-

__author__ = "Randall Balestriero"
import os
import pickle, gzip
import urllib.request
import numpy as np
import time
import h5py
from tqdm import tqdm
from ..utils import download_dataset


_urls = {
    "https://zenodo.org/record/1226427/files/BirdVox-70k_unit{}.hdf5?download=1".format(
        i
    ): "BirdVox-70k_unit{}.hdf5".format(i)
    for i in ["01", "02", "03", "05", "07", "10"]
}


[docs]def load(path=None):
    """a dataset for avian flight call detection in half-second clips

    Version 1.0, April 2018.

    Created By

    Vincent Lostanlen (1, 2, 3), Justin Salamon (2, 3), Andrew Farnsworth (1),
    Steve Kelling (1), and Juan Pablo Bello (2, 3).

    (1): Cornell Lab of Ornithology (CLO)
    (2): Center for Urban Science and Progress, New York University
    (3): Music and Audio Research Lab, New York University

    https://wp.nyu.edu/birdvox

    Description

    The BirdVox-70k dataset contains 70k half-second clips from 6 audio
    recordings in the BirdVox-full-night dataset, each about ten hours in
    duration. These recordings come from ROBIN autonomous recording units,
    placed near Ithaca, NY, USA during the fall 2015. They were captured on the
    night of September 23rd, 2015, by six different sensors, originally
    numbered 1, 2, 3, 5, 7, and 10.

    Andrew Farnsworth used the Raven software to pinpoint every avian flight
    call in time and frequency. He found 35402 flight calls in total.
    He estimates that about 25 different species of passerines (thrushes,
    warblers, and sparrows) are present in this recording. Species are not
    labeled in BirdVox-70k, but it is possible to tell apart thrushes from
    warblers and sparrows by looking at the center frequencies of their calls.
    The annotation process took 102 hours.

    The dataset can be used, among other things, for the research,development
    and testing of bioacoustic classification models, including the
    reproduction of the results reported in [1].

    For details on the hardware of ROBIN recording units, we refer the reader
    to [2].

    [1] V. Lostanlen, J. Salamon, A. Farnsworth, S. Kelling, J. Bello. BirdVox-full-night: a dataset and benchmark for avian flight call detection. Proc. IEEE ICASSP, 2018.

    [2] J. Salamon, J. P. Bello, A. Farnsworth, M. Robbins, S. Keen, H. Klinck, and S. Kelling. Towards the Automatic Classification of Avian Flight Calls for Bioacoustic Monitoring. PLoS One, 2016.

    @inproceedings{lostanlen2018icassp,
    title = {BirdVox-full-night: a dataset and benchmark for avian flight call detection},
    author = {Lostanlen, Vincent and Salamon, Justin and Farnsworth, Andrew and Kelling, Steve and Bello, Juan Pablo},
    booktitle = {Proc. IEEE ICASSP},
    year = {2018},
    published = {IEEE},
    venue = {Calgary, Canada},
    month = {April},
    }

    Parameters
    ----------
        path: str (optional)
            default ($DATASET_PATH), the path to look for the data and
            where the data will be downloaded if not present

    Returns
    -------

        wavs: array(70804, 12000)
            the waveforms in the time amplitude domain

        labels: array(70804,)
            binary values representing the presence or not of an avian

        recording: array(70804,)
            the file number from which the sample has been extracted

    """

    if path is None:
        path = os.environ["DATASET_PATH"]

    if path is None:
        path = os.environ["DATASET_PATH"]
    download_dataset(path, "birdvox_70k", _urls)

    # Loading the file
    path += "birdvox_70k/"
    names = ["01", "02", "03", "05", "07", "10"]
    basefile = "BirdVox-70k_unit{}.hdf5"
    wavs = list()
    label = list()
    recording = list()
    for name in names:
        f = h5py.File(path + basefile.format(name), "r")
        for filename in tqdm(
            f["waveforms"].keys(), ascii=True, desc="recording {}".format(name)
        ):
            wavs.append(f["waveforms"][filename][...])
            label.append(int(filename[-1]))
            recording.append(int(name))

    data = {
        "wavs": np.array(wavs).astype("float32"),
        "labels": np.array(label).astype("int32"),
        "recording": np.array(recording).astype("int32"),
    }

    print("Dataset birdvox_70k loaded in {0:.2f}s.".format(time.time() - t0))

    return data