Source code for numpy_datasets.timeseries.univariate_timeseries

import os
import pickle, gzip
import urllib.request
import numpy as np
import time
import zipfile
from scipy.io import arff
import io
from tqdm import tqdm


def download(path):
    """
    Download the univariate_timeseries dataset
    path

    Parameters
    ----------

        path: str
            the path where the downloaded files will be stored. If the
            directory does not exist, it is created.
    """

    # Check if directory exists
    if not os.path.isdir(path + "univariate_timeseries"):
        print("Creating univariate_timeseries Directory")
        os.mkdir(path + "univariate_timeseries")

    # Check if file exists
    if not os.path.exists(path + "univariate_timeseries/Univariate2018_arff.zip"):
        td = time.time()
        print("Downloading univariate timeseries")
        url = "http://www.timeseriesclassification.com/Downloads/Archives/Univariate2018_arff.zip"
        urllib.request.urlretrieve(
            url, path + "univariate_timeseries/Univariate2018_arff.zip"
        )


[docs]def load(path=None): """ Parameters ---------- path: str (optional) default ($DATASET_PATH), the path to look for the data and where the data will be downloaded if not present Returns ------- train_images: array train_labels: array valid_images: array valid_labels: array test_images: array test_labels: array """ if path is None: path = os.environ["DATASET_PATH"] download(path) t0 = time.time() # Loading the fileunivariate_timeseries f = zipfile.ZipFile(path + "univariate_timeseries/Univariate2018_arff.zip") # init. the data array all_data = {} for name in tqdm(f.namelist()): if "TRAIN.txt" in name or "TEST.txt" in name: file = f.read(name) if "PLAID" in name: data = np.loadtxt(io.BytesIO(file), delimiter=",") else: data = np.loadtxt(io.BytesIO(file)) dataset = name[:-4].split("/")[-2] part = "TRAIN" if "TRAIN" in name else "TEST" all_data[dataset + "/" + part] = data return all_data