Source code for numpy_datasets.images.ibeans

#!/usr/bin/env python
# -*- coding: utf-8 -*-
import io
import os
import time
import zipfile
import matplotlib.image as mpimg
import urllib
import numpy as np
from ..utils import download_dataset


__author__ = "Randall Balestriero"

_name = "ibeans"

classes = ["angular_leaf_spot", "bean_rust", "healthy"]

_urls = {
    "https://storage.googleapis.com/ibeans/train.zip": "train.zip",
    "https://storage.googleapis.com/ibeans/test.zip": "test.zip",
    "https://storage.googleapis.com/ibeans/validation.zip": "validation.zip",
}


[docs]def load(path=None): """Plant images classification. This dataset is of leaf images taken in the field in different districts in Uganda by the Makerere AI lab in collaboration with the National Crops Resources Research Institute (NaCRRI), the national body in charge of research in agriculture in Uganda. The goal is to build a robust machine learning model that is able to distinguish between diseases in the Bean plants. Beans are an important cereal food crop for Africa grown by many small-holder farmers - they are a significant source of proteins for school-age going children in East Africa. The data is of leaf images representing 3 classes: the healthy class of images, and two disease classes including Angular Leaf Spot and Bean Rust diseases. The model should be able to distinguish between these 3 classes with high accuracy. The end goal is to build a robust, model that can be deployed on a mobile device and used in the field by a farmer. The data includes leaf images taken in the field. The figure above depicts examples of the types of images per class. Images were taken from the field/garden a basic smartphone. The images were then annotated by experts from NaCRRI who determined for each image which disease was manifested. The experts were part of the data collection team and images were annotated directly during the data collection process in the field. Class Examples Healthy class 428 Angular Leaf Spot 432 Bean Rust 436 Total: 1,296 Data Released 20-January-2020 License MIT Credits Makerere AI Lab Parameters ---------- path: str (optional) default ($DATASET_PATH), the path to look for the data and where the data will be downloaded if not present Returns ------- train_images: array train_labels: array valid_images: array valid_labels: array test_images: array test_labels: array """ download_dataset(path, _name, _urls) t0 = time.time() # Loading the file train_images = list() train_labels = list() f = zipfile.ZipFile(path + "ibeans/train.zip") for filename in f.namelist(): if ".jpg" not in filename: continue train_images.append(mpimg.imread(io.BytesIO(f.read(filename)), "jpg")) train_labels.append(ibeans.classes.index(filename.split("/")[1])) # Loading the file test_images = list() test_labels = list() f = zipfile.ZipFile(path + "ibeans/test.zip") for filename in f.namelist(): if ".jpg" not in filename: continue test_images.append(mpimg.imread(io.BytesIO(f.read(filename)), "jpg")) test_labels.append(ibeans.classes.index(filename.split("/")[1])) # Loading the file valid_images = list() valid_labels = list() f = zipfile.ZipFile(path + "ibeans/validation.zip") for filename in f.namelist(): if ".jpg" not in filename: continue valid_images.append(mpimg.imread(io.BytesIO(f.read(filename)), "jpg")) valid_labels.append(ibeans.classes.index(filename.split("/")[1])) dataset = { "train_set/images": np.array(train_images), "test_set/images": np.array(test_images), "valid_set/images": np.array(valid_images), "train_set/labels": np.array(train_labels), "test_set/labels": np.array(test_labels), "valid_set/labels": np.array(valid_labels), } print("Dataset ibeans loaded in {0:.2f}s.".format(time.time() - t0)) return dataset