Shortcuts

Source code for torchvision.datasets.coco

from .vision import VisionDataset
from PIL import Image
import os
import os.path


[docs]class CocoCaptions(VisionDataset): """`MS Coco Captions <http://mscoco.org/dataset/#captions-challenge2015>`_ Dataset. Args: root (string): Root directory where images are downloaded to. annFile (string): Path to json annotation file. transform (callable, optional): A function/transform that takes in an PIL image and returns a transformed version. E.g, ``transforms.ToTensor`` target_transform (callable, optional): A function/transform that takes in the target and transforms it. transforms (callable, optional): A function/transform that takes input sample and its target as entry and returns a transformed version. Example: .. code:: python import torchvision.datasets as dset import torchvision.transforms as transforms cap = dset.CocoCaptions(root = 'dir where images are', annFile = 'json annotation file', transform=transforms.ToTensor()) print('Number of samples: ', len(cap)) img, target = cap[3] # load 4th sample print("Image Size: ", img.size()) print(target) Output: :: Number of samples: 82783 Image Size: (3L, 427L, 640L) [u'A plane emitting smoke stream flying over a mountain.', u'A plane darts across a bright blue sky behind a mountain covered in snow', u'A plane leaves a contrail above the snowy mountain top.', u'A mountain that has a plane flying overheard in the distance.', u'A mountain view with a plume of smoke in the background'] """ def __init__(self, root, annFile, transform=None, target_transform=None, transforms=None): super(CocoCaptions, self).__init__(root, transforms, transform, target_transform) from pycocotools.coco import COCO self.coco = COCO(annFile) self.ids = list(sorted(self.coco.imgs.keys()))
[docs] def __getitem__(self, index): """ Args: index (int): Index Returns: tuple: Tuple (image, target). target is a list of captions for the image. """ coco = self.coco img_id = self.ids[index] ann_ids = coco.getAnnIds(imgIds=img_id) anns = coco.loadAnns(ann_ids) target = [ann['caption'] for ann in anns] path = coco.loadImgs(img_id)[0]['file_name'] img = Image.open(os.path.join(self.root, path)).convert('RGB') if self.transforms is not None: img, target = self.transforms(img, target) return img, target
def __len__(self): return len(self.ids)
[docs]class CocoDetection(VisionDataset): """`MS Coco Detection <http://mscoco.org/dataset/#detections-challenge2016>`_ Dataset. Args: root (string): Root directory where images are downloaded to. annFile (string): Path to json annotation file. transform (callable, optional): A function/transform that takes in an PIL image and returns a transformed version. E.g, ``transforms.ToTensor`` target_transform (callable, optional): A function/transform that takes in the target and transforms it. transforms (callable, optional): A function/transform that takes input sample and its target as entry and returns a transformed version. """ def __init__(self, root, annFile, transform=None, target_transform=None, transforms=None): super(CocoDetection, self).__init__(root, transforms, transform, target_transform) from pycocotools.coco import COCO self.coco = COCO(annFile) self.ids = list(sorted(self.coco.imgs.keys()))
[docs] def __getitem__(self, index): """ Args: index (int): Index Returns: tuple: Tuple (image, target). target is the object returned by ``coco.loadAnns``. """ coco = self.coco img_id = self.ids[index] ann_ids = coco.getAnnIds(imgIds=img_id) target = coco.loadAnns(ann_ids) path = coco.loadImgs(img_id)[0]['file_name'] img = Image.open(os.path.join(self.root, path)).convert('RGB') if self.transforms is not None: img, target = self.transforms(img, target) return img, target
def __len__(self): return len(self.ids)

Docs

Access comprehensive developer documentation for PyTorch

View Docs

Tutorials

Get in-depth tutorials for beginners and advanced developers

View Tutorials

Resources

Find development resources and get your questions answered

View Resources