Source code for numpy_datasets.images.celeb

from tqdm import tqdm
import matplotlib.image as mpimg
import zipfile
import numpy as np
import os
import time
import io
from typing import Any, Callable, List, Iterable, Optional, TypeVar

_CITATION = """\
@inproceedings{conf/iccv/LiuLWT15,
  added-at = {2018-10-09T00:00:00.000+0200},
  author = {Liu, Ziwei and Luo, Ping and Wang, Xiaogang and Tang, Xiaoou},
  biburl = {https://www.bibsonomy.org/bibtex/250e4959be61db325d2f02c1d8cd7bfbb/dblp},
  booktitle = {ICCV},
  crossref = {conf/iccv/2015},
  ee = {http://doi.ieeecomputersociety.org/10.1109/ICCV.2015.425},
  interhash = {3f735aaa11957e73914bbe2ca9d5e702},
  intrahash = {50e4959be61db325d2f02c1d8cd7bfbb},
  isbn = {978-1-4673-8391-2},
  keywords = {dblp},
  pages = {3730-3738},
  publisher = {IEEE Computer Society},
  timestamp = {2018-10-11T11:43:28.000+0200},
  title = {Deep Learning Face Attributes in the Wild.},
  url = {http://dblp.uni-trier.de/db/conf/iccv/iccv2015.html#LiuLWT15},
  year = 2015
}
"""


def download(path: str) -> None:
    if not os.path.exists(path):
        os.mkdir(path)
    if not os.path.exists(os.path.join(path, "celeba-dataset.zip")):
        cwd = os.getcwd()
        os.chdir(path)
        os.system("kaggle datasets download -d jessicali9530/celeba-dataset")
        os.chdir(cwd)


[docs]def load(path=None): """face images with attributes CelebFaces Attributes Dataset (CelebA) is a large-scale face attributes dataset with more than 200K celebrity images, each with 40 attribute annotations. The images in this dataset cover large pose variations and background clutter. CelebA has large diversities, large quantities, and rich annotations, including - 10,177 number of identities, - 202,599 number of face images, and - 5 landmark locations, 40 binary attributes annotations per image. The dataset can be employed as the training and test sets for the following computer vision tasks: face attribute recognition, face detection, and landmark (or facial part) localization. Note: CelebA dataset may contain potential bias. The fairness indicators `https://github.com/tensorflow/fairness-indicators/blob/master/fairness_indicators/documentation/examples/Fairness_Indicators_TFCO_CelebA_Case_Study.ipynb` goes into detail about several considerations to keep in mind while using the CelebA dataset. Parameters ---------- path: str (optional) default ($DATASET_PATH), the path to look for the data and where the data will be downloaded if not present Returns ------- train_images: array train_labels: array valid_images: array valid_labels: array test_images: array test_labels: array """ if path is None: path = os.environ["DATASET_PATH"] download(os.path.join(path, "celebA")) t0 = time.time() archive = zipfile.ZipFile(os.path.join(path, "celebA", "celeba-dataset.zip"), "r") images = [] ids = [] for name in tqdm(archive.namelist()): if "jpg" in name: images.append(mpimg.imread(archive.open(name), "jpg")) atts = np.loadtxt( archive.open("list_attr_celeba.csv"), delimiter=",", dtype=str, ) names = atts[0, 1:] # list_bbox_celeba.csv # list_eval_partition.csv # list_landmarks_align_celeba.csv print("Dataset celebA loaded in {0:.2f}s.".format(time.time() - t0)) dataset = { "images": np.array(images), "attributes": atts[1:, 1:].astype("float32"), "names": names, } return dataset