Implementation of custom reading of dataset by Python

Time:2020-7-30

Take reading voc2012 semantic segmentation dataset as an example. For details, see code note:

VocDataset.py

from PIL import Image
import torch
import torch.utils.data as data
import numpy as np
import os
import torchvision
import torchvision.transforms as transforms
import time

#Color label corresponding to VOC dataset classification
VOC_COLORMAP = [[0, 0, 0], [128, 0, 0], [0, 128, 0], [128, 128, 0],
        [0, 0, 128], [128, 0, 128], [0, 128, 128], [128, 128, 128],
        [64, 0, 0], [192, 0, 0], [64, 128, 0], [192, 128, 0],
        [64, 0, 128], [192, 0, 128], [64, 128, 128], [192, 128, 128],
        [0, 64, 0], [128, 64, 0], [0, 192, 0], [128, 192, 0],
        [0, 64, 128]]

#Color label space to serial number label space. It's a waste of time here. There's a fuckin 'problem here
def voc_label_indices(colormap, colormap2label):
  """Assign label indices for Pascal VOC2012 Dataset."""
  idx = ((colormap[:, :, 2] * 256 + colormap[ :, :,1]) * 256+ colormap[:, :,0])
  #out = np.empty(idx.shape, dtype = np.int64) 
  out = colormap2label[idx]
  out= out.astype ( np.int64 )#Data type conversion
  end = time.time()
  return out

class MyDataset( data.Dataset ): ා create a custom data reading class
  def __init__(self, root, is_train, crop_size=(320,480)):
    self.rgb_mean =(0.485, 0.456, 0.406)
    self.rgb_std = (0.229, 0.224, 0.225)
    self.root=root
    self.crop_size=crop_size
    Images = [] ා create an empty list file name
    txt_fname = '%s/ImageSets/Segmentation/%s' % (root, 'train.txt' if is_train else 'val.txt')
    with open(txt_fname, 'r') as f:
      self.images = f.read().split()
    #Name of sorting data
    self.files = []
    for name in self.images:
      img_file = os.path.join(self.root, "JPEGImages/%s.jpg" % name)
      label_file = os.path.join(self.root, "SegmentationClass/%s.png" % name)
      self.files.append({
        "img": img_file,
        "label": label_file,
        "name": name
      })
    self.colormap2label = np.zeros(256**3)
    #The whole loop is to map the color label to an array index of a single channel
    for i, cm in enumerate(VOC_COLORMAP):
      self.colormap2label[(cm[2] * 256 + cm[1]) * 256 + cm[0]] = i
  #Read the specific content of each element according to the index
  def __getitem__(self, index):
    
    datafiles = self.files[index]
    name = datafiles["name"]
    image = Image.open(datafiles["img"])
    label =  Image.open (datafiles ["label"]). Convert ('rgb ')) ා open the PNG format of the picture, you need to switch to RGB format, otherwise the result will be quite fatal
    #Take the center of the image as the center to capture the fixed size image. If the image is smaller than the fixed size, it will fill in 0 automatically
    imgCenterCrop = transforms.Compose([
       transforms.CenterCrop(self.crop_size),
       transforms.ToTensor(),
       transforms.Normalize ( self.rgb_ mean,  self.rgb_ Regularization of image data
     ])
    labelCenterCrop = transforms.CenterCrop(self.crop_size)
    cropImage=imgCenterCrop(image)
    croplabel=labelCenterCrop(label)
    croplabel= torch.from_ numpy( np.array (croplabel)). Long() ා converts the label data type to torch
    
    #Convert color label map to ordinal label map
    mylabel=voc_label_indices(croplabel, self.colormap2label)
    
    return cropImage,mylabel
  #Return image data length
  def __len__(self):
    return len(self.files)

Train.py

import matplotlib.pyplot as plt
import torch.utils.data as data
import torchvision.transforms as transforms
import numpy as np

from PIL import Image
from VocDataset import MyDataset

#Color label corresponding to VOC dataset classification
VOC_COLORMAP = [[0, 0, 0], [128, 0, 0], [0, 128, 0], [128, 128, 0],
        [0, 0, 128], [128, 0, 128], [0, 128, 128], [128, 128, 128],
        [64, 0, 0], [192, 0, 0], [64, 128, 0], [192, 128, 0],
        [64, 0, 128], [192, 0, 128], [64, 128, 128], [192, 128, 128],
        [0, 64, 0], [128, 64, 0], [0, 192, 0], [128, 192, 0],
        [0, 64, 128]]

root='../data/VOCdevkit/VOC2012'
train_data=MyDataset(root,True)
trainloader = data.DataLoader(train_data, 4)

#Take a batch of data from a dataset
for i, data in enumerate(trainloader):
  getimgs, labels= data
  img = transforms.ToPILImage()(getimgs[0])

  labels =  labels.numpy () from tensor to numpy
  Labels = labels [0] ා get a label image in batch label set
  labels =  labels.transpose ((1,0)) ා array dimension switch, change the first dimension to the 0 dimension, and the 0 dimension to the first dimension

  ##Map single channel index label image back to color label image
  newIm=  Image.new ('rgb ', (480, 320)) ා create a picture of the same size as the label to display the color of the label
  for i in range(0, 480):
    for j in range(0, 320):
      SEL = labels [i] [J] ා get the value of the pixel corresponding to the coordinate point
      newIm.putpixel((i, j), (int(VOC_COLORMAP[sele][0]), int(VOC_COLORMAP[sele][1]), int(VOC_COLORMAP[sele][2])))

  #Display images and labels
  plt.figure("image")
  ax1 = plt.subplot(1,2,1)
  ax2 = plt.subplot(1,2,2)
  plt.sca(ax1)
  plt.imshow(img)
  plt.sca(ax2)
  plt.imshow(newIm)
  plt.show()

The above-mentioned Python implementation of data set custom reading is all the content that Xiaobian shares with you. I hope to give you a reference, and I hope you can support developeppaer more.