This repository has been archived on 2024-07-12. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
rabat-illness-yolov5/utils/dataloader.py
2024-06-13 12:13:54 +08:00

505 lines
22 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from random import sample, shuffle
import cv2
import numpy as np
import torch
from PIL import Image
from torch.utils.data.dataset import Dataset
from utils.utils import cvtColor, preprocess_input
class YoloDataset(Dataset):
def __init__(self, annotation_lines, input_shape, num_classes, anchors, anchors_mask, epoch_length, \
mosaic, mixup, mosaic_prob, mixup_prob, train, special_aug_ratio = 0.7):
super(YoloDataset, self).__init__()
self.annotation_lines = annotation_lines
self.input_shape = input_shape
self.num_classes = num_classes
self.anchors = anchors
self.anchors_mask = anchors_mask
self.epoch_length = epoch_length
self.mosaic = mosaic
self.mosaic_prob = mosaic_prob
self.mixup = mixup
self.mixup_prob = mixup_prob
self.train = train
self.special_aug_ratio = special_aug_ratio
self.epoch_now = -1
self.length = len(self.annotation_lines)
self.bbox_attrs = 5 + num_classes
self.threshold = 4
def __len__(self):
return self.length
def __getitem__(self, index):
index = index % self.length
#---------------------------------------------------#
# 训练时进行数据的随机增强
# 验证时不进行数据的随机增强
#---------------------------------------------------#
if self.mosaic and self.rand() < self.mosaic_prob and self.epoch_now < self.epoch_length * self.special_aug_ratio:
lines = sample(self.annotation_lines, 3)
lines.append(self.annotation_lines[index])
shuffle(lines)
image, box = self.get_random_data_with_Mosaic(lines, self.input_shape)
if self.mixup and self.rand() < self.mixup_prob:
lines = sample(self.annotation_lines, 1)
image_2, box_2 = self.get_random_data(lines[0], self.input_shape, random = self.train)
image, box = self.get_random_data_with_MixUp(image, box, image_2, box_2)
else:
image, box = self.get_random_data(self.annotation_lines[index], self.input_shape, random = self.train)
image = np.transpose(preprocess_input(np.array(image, dtype=np.float32)), (2, 0, 1))
box = np.array(box, dtype=np.float32)
if len(box) != 0:
#---------------------------------------------------#
# 对真实框进行归一化调整到0-1之间
#---------------------------------------------------#
box[:, [0, 2]] = box[:, [0, 2]] / self.input_shape[1]
box[:, [1, 3]] = box[:, [1, 3]] / self.input_shape[0]
#---------------------------------------------------#
# 序号为0、1的部分为真实框的中心
# 序号为2、3的部分为真实框的宽高
# 序号为4的部分为真实框的种类
#---------------------------------------------------#
box[:, 2:4] = box[:, 2:4] - box[:, 0:2]
box[:, 0:2] = box[:, 0:2] + box[:, 2:4] / 2
y_true = self.get_target(box)
return image, box, y_true
def rand(self, a=0, b=1):
return np.random.rand()*(b-a) + a
def get_random_data(self, annotation_line, input_shape, jitter=.3, hue=.1, sat=0.7, val=0.4, random=True):
line = annotation_line.split()
#------------------------------#
# 读取图像并转换成RGB图像
#------------------------------#
image = Image.open(line[0])
image = cvtColor(image)
#------------------------------#
# 获得图像的高宽与目标高宽
#------------------------------#
iw, ih = image.size
h, w = input_shape
#------------------------------#
# 获得预测框
#------------------------------#
box = np.array([np.array(list(map(int,box.split(',')))) for box in line[1:]])
if not random:
scale = min(w/iw, h/ih)
nw = int(iw*scale)
nh = int(ih*scale)
dx = (w-nw)//2
dy = (h-nh)//2
#---------------------------------#
# 将图像多余的部分加上灰条
#---------------------------------#
image = image.resize((nw,nh), Image.BICUBIC)
new_image = Image.new('RGB', (w,h), (128,128,128))
new_image.paste(image, (dx, dy))
image_data = np.array(new_image, np.float32)
#---------------------------------#
# 对真实框进行调整
#---------------------------------#
if len(box)>0:
np.random.shuffle(box)
box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
box[:, 0:2][box[:, 0:2]<0] = 0
box[:, 2][box[:, 2]>w] = w
box[:, 3][box[:, 3]>h] = h
box_w = box[:, 2] - box[:, 0]
box_h = box[:, 3] - box[:, 1]
box = box[np.logical_and(box_w>1, box_h>1)] # discard invalid box
return image_data, box
#------------------------------------------#
# 对图像进行缩放并且进行长和宽的扭曲
#------------------------------------------#
new_ar = iw/ih * self.rand(1-jitter,1+jitter) / self.rand(1-jitter,1+jitter)
scale = self.rand(.25, 2)
if new_ar < 1:
nh = int(scale*h)
nw = int(nh*new_ar)
else:
nw = int(scale*w)
nh = int(nw/new_ar)
image = image.resize((nw,nh), Image.BICUBIC)
#------------------------------------------#
# 将图像多余的部分加上灰条
#------------------------------------------#
dx = int(self.rand(0, w-nw))
dy = int(self.rand(0, h-nh))
new_image = Image.new('RGB', (w,h), (128,128,128))
new_image.paste(image, (dx, dy))
image = new_image
#------------------------------------------#
# 翻转图像
#------------------------------------------#
flip = self.rand()<.5
if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT)
image_data = np.array(image, np.uint8)
#---------------------------------#
# 对图像进行色域变换
# 计算色域变换的参数
#---------------------------------#
r = np.random.uniform(-1, 1, 3) * [hue, sat, val] + 1
#---------------------------------#
# 将图像转到HSV上
#---------------------------------#
hue, sat, val = cv2.split(cv2.cvtColor(image_data, cv2.COLOR_RGB2HSV))
dtype = image_data.dtype
#---------------------------------#
# 应用变换
#---------------------------------#
x = np.arange(0, 256, dtype=r.dtype)
lut_hue = ((x * r[0]) % 180).astype(dtype)
lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
image_data = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))
image_data = cv2.cvtColor(image_data, cv2.COLOR_HSV2RGB)
#---------------------------------#
# 对真实框进行调整
#---------------------------------#
if len(box)>0:
np.random.shuffle(box)
box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
if flip: box[:, [0,2]] = w - box[:, [2,0]]
box[:, 0:2][box[:, 0:2]<0] = 0
box[:, 2][box[:, 2]>w] = w
box[:, 3][box[:, 3]>h] = h
box_w = box[:, 2] - box[:, 0]
box_h = box[:, 3] - box[:, 1]
box = box[np.logical_and(box_w>1, box_h>1)]
return image_data, box
def merge_bboxes(self, bboxes, cutx, cuty):
merge_bbox = []
for i in range(len(bboxes)):
for box in bboxes[i]:
tmp_box = []
x1, y1, x2, y2 = box[0], box[1], box[2], box[3]
if i == 0:
if y1 > cuty or x1 > cutx:
continue
if y2 >= cuty and y1 <= cuty:
y2 = cuty
if x2 >= cutx and x1 <= cutx:
x2 = cutx
if i == 1:
if y2 < cuty or x1 > cutx:
continue
if y2 >= cuty and y1 <= cuty:
y1 = cuty
if x2 >= cutx and x1 <= cutx:
x2 = cutx
if i == 2:
if y2 < cuty or x2 < cutx:
continue
if y2 >= cuty and y1 <= cuty:
y1 = cuty
if x2 >= cutx and x1 <= cutx:
x1 = cutx
if i == 3:
if y1 > cuty or x2 < cutx:
continue
if y2 >= cuty and y1 <= cuty:
y2 = cuty
if x2 >= cutx and x1 <= cutx:
x1 = cutx
tmp_box.append(x1)
tmp_box.append(y1)
tmp_box.append(x2)
tmp_box.append(y2)
tmp_box.append(box[-1])
merge_bbox.append(tmp_box)
return merge_bbox
def get_random_data_with_Mosaic(self, annotation_line, input_shape, jitter=0.3, hue=.1, sat=0.7, val=0.4):
h, w = input_shape
min_offset_x = self.rand(0.3, 0.7)
min_offset_y = self.rand(0.3, 0.7)
image_datas = []
box_datas = []
index = 0
for line in annotation_line:
#---------------------------------#
# 每一行进行分割
#---------------------------------#
line_content = line.split()
#---------------------------------#
# 打开图片
#---------------------------------#
image = Image.open(line_content[0])
image = cvtColor(image)
#---------------------------------#
# 图片的大小
#---------------------------------#
iw, ih = image.size
#---------------------------------#
# 保存框的位置
#---------------------------------#
box = np.array([np.array(list(map(int,box.split(',')))) for box in line_content[1:]])
#---------------------------------#
# 是否翻转图片
#---------------------------------#
flip = self.rand()<.5
if flip and len(box)>0:
image = image.transpose(Image.FLIP_LEFT_RIGHT)
box[:, [0,2]] = iw - box[:, [2,0]]
#------------------------------------------#
# 对图像进行缩放并且进行长和宽的扭曲
#------------------------------------------#
new_ar = iw/ih * self.rand(1-jitter,1+jitter) / self.rand(1-jitter,1+jitter)
scale = self.rand(.4, 1)
if new_ar < 1:
nh = int(scale*h)
nw = int(nh*new_ar)
else:
nw = int(scale*w)
nh = int(nw/new_ar)
image = image.resize((nw, nh), Image.BICUBIC)
#-----------------------------------------------#
# 将图片进行放置,分别对应四张分割图片的位置
#-----------------------------------------------#
if index == 0:
dx = int(w*min_offset_x) - nw
dy = int(h*min_offset_y) - nh
elif index == 1:
dx = int(w*min_offset_x) - nw
dy = int(h*min_offset_y)
elif index == 2:
dx = int(w*min_offset_x)
dy = int(h*min_offset_y)
elif index == 3:
dx = int(w*min_offset_x)
dy = int(h*min_offset_y) - nh
new_image = Image.new('RGB', (w,h), (128,128,128))
new_image.paste(image, (dx, dy))
image_data = np.array(new_image)
index = index + 1
box_data = []
#---------------------------------#
# 对box进行重新处理
#---------------------------------#
if len(box)>0:
np.random.shuffle(box)
box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
box[:, 0:2][box[:, 0:2]<0] = 0
box[:, 2][box[:, 2]>w] = w
box[:, 3][box[:, 3]>h] = h
box_w = box[:, 2] - box[:, 0]
box_h = box[:, 3] - box[:, 1]
box = box[np.logical_and(box_w>1, box_h>1)]
box_data = np.zeros((len(box),5))
box_data[:len(box)] = box
image_datas.append(image_data)
box_datas.append(box_data)
#---------------------------------#
# 将图片分割,放在一起
#---------------------------------#
cutx = int(w * min_offset_x)
cuty = int(h * min_offset_y)
new_image = np.zeros([h, w, 3])
new_image[:cuty, :cutx, :] = image_datas[0][:cuty, :cutx, :]
new_image[cuty:, :cutx, :] = image_datas[1][cuty:, :cutx, :]
new_image[cuty:, cutx:, :] = image_datas[2][cuty:, cutx:, :]
new_image[:cuty, cutx:, :] = image_datas[3][:cuty, cutx:, :]
new_image = np.array(new_image, np.uint8)
#---------------------------------#
# 对图像进行色域变换
# 计算色域变换的参数
#---------------------------------#
r = np.random.uniform(-1, 1, 3) * [hue, sat, val] + 1
#---------------------------------#
# 将图像转到HSV上
#---------------------------------#
hue, sat, val = cv2.split(cv2.cvtColor(new_image, cv2.COLOR_RGB2HSV))
dtype = new_image.dtype
#---------------------------------#
# 应用变换
#---------------------------------#
x = np.arange(0, 256, dtype=r.dtype)
lut_hue = ((x * r[0]) % 180).astype(dtype)
lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
new_image = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))
new_image = cv2.cvtColor(new_image, cv2.COLOR_HSV2RGB)
#---------------------------------#
# 对框进行进一步的处理
#---------------------------------#
new_boxes = self.merge_bboxes(box_datas, cutx, cuty)
return new_image, new_boxes
def get_random_data_with_MixUp(self, image_1, box_1, image_2, box_2):
new_image = np.array(image_1, np.float32) * 0.5 + np.array(image_2, np.float32) * 0.5
if len(box_1) == 0:
new_boxes = box_2
elif len(box_2) == 0:
new_boxes = box_1
else:
new_boxes = np.concatenate([box_1, box_2], axis=0)
return new_image, new_boxes
def get_near_points(self, x, y, i, j):
sub_x = x - i
sub_y = y - j
if sub_x > 0.5 and sub_y > 0.5:
return [[0, 0], [1, 0], [0, 1]]
elif sub_x < 0.5 and sub_y > 0.5:
return [[0, 0], [-1, 0], [0, 1]]
elif sub_x < 0.5 and sub_y < 0.5:
return [[0, 0], [-1, 0], [0, -1]]
else:
return [[0, 0], [1, 0], [0, -1]]
def get_target(self, targets):
#-----------------------------------------------------------#
# 一共有三个特征层数
#-----------------------------------------------------------#
num_layers = len(self.anchors_mask)
input_shape = np.array(self.input_shape, dtype='int32')
grid_shapes = [input_shape // {0:32, 1:16, 2:8, 3:4}[l] for l in range(num_layers)]
y_true = [np.zeros((len(self.anchors_mask[l]), grid_shapes[l][0], grid_shapes[l][1], self.bbox_attrs), dtype='float32') for l in range(num_layers)]
box_best_ratio = [np.zeros((len(self.anchors_mask[l]), grid_shapes[l][0], grid_shapes[l][1]), dtype='float32') for l in range(num_layers)]
if len(targets) == 0:
return y_true
for l in range(num_layers):
in_h, in_w = grid_shapes[l]
anchors = np.array(self.anchors) / {0:32, 1:16, 2:8, 3:4}[l]
batch_target = np.zeros_like(targets)
#-------------------------------------------------------#
# 计算出正样本在特征层上的中心点
#-------------------------------------------------------#
batch_target[:, [0,2]] = targets[:, [0,2]] * in_w
batch_target[:, [1,3]] = targets[:, [1,3]] * in_h
batch_target[:, 4] = targets[:, 4]
#-------------------------------------------------------#
# wh : num_true_box, 2
# np.expand_dims(wh, 1) : num_true_box, 1, 2
# anchors : 9, 2
# np.expand_dims(anchors, 0) : 1, 9, 2
#
# ratios_of_gt_anchors代表每一个真实框和每一个先验框的宽高的比值
# ratios_of_gt_anchors : num_true_box, 9, 2
# ratios_of_anchors_gt代表每一个先验框和每一个真实框的宽高的比值
# ratios_of_anchors_gt : num_true_box, 9, 2
#
# ratios : num_true_box, 9, 4
# max_ratios代表每一个真实框和每一个先验框的宽高的比值的最大值
# max_ratios : num_true_box, 9
#-------------------------------------------------------#
ratios_of_gt_anchors = np.expand_dims(batch_target[:, 2:4], 1) / np.expand_dims(anchors, 0)
ratios_of_anchors_gt = np.expand_dims(anchors, 0) / np.expand_dims(batch_target[:, 2:4], 1)
ratios = np.concatenate([ratios_of_gt_anchors, ratios_of_anchors_gt], axis = -1)
max_ratios = np.max(ratios, axis = -1)
for t, ratio in enumerate(max_ratios):
#-------------------------------------------------------#
# ratio : 9
#-------------------------------------------------------#
over_threshold = ratio < self.threshold
over_threshold[np.argmin(ratio)] = True
for k, mask in enumerate(self.anchors_mask[l]):
if not over_threshold[mask]:
continue
#----------------------------------------#
# 获得真实框属于哪个网格点
# x 1.25 => 1
# y 3.75 => 3
#----------------------------------------#
i = int(np.floor(batch_target[t, 0]))
j = int(np.floor(batch_target[t, 1]))
offsets = self.get_near_points(batch_target[t, 0], batch_target[t, 1], i, j)
for offset in offsets:
local_i = i + offset[0]
local_j = j + offset[1]
if local_i >= in_w or local_i < 0 or local_j >= in_h or local_j < 0:
continue
if box_best_ratio[l][k, local_j, local_i] != 0:
if box_best_ratio[l][k, local_j, local_i] > ratio[mask]:
y_true[l][k, local_j, local_i, :] = 0
else:
continue
#----------------------------------------#
# 取出真实框的种类
#----------------------------------------#
c = int(batch_target[t, 4])
#----------------------------------------#
# tx、ty代表中心调整参数的真实值
#----------------------------------------#
y_true[l][k, local_j, local_i, 0] = batch_target[t, 0]
y_true[l][k, local_j, local_i, 1] = batch_target[t, 1]
y_true[l][k, local_j, local_i, 2] = batch_target[t, 2]
y_true[l][k, local_j, local_i, 3] = batch_target[t, 3]
y_true[l][k, local_j, local_i, 4] = 1
y_true[l][k, local_j, local_i, c + 5] = 1
#----------------------------------------#
# 获得当前先验框最好的比例
#----------------------------------------#
box_best_ratio[l][k, local_j, local_i] = ratio[mask]
return y_true
# DataLoader中collate_fn使用
def yolo_dataset_collate(batch):
images = []
bboxes = []
y_trues = [[] for _ in batch[0][2]]
for img, box, y_true in batch:
images.append(img)
bboxes.append(box)
for i, sub_y_true in enumerate(y_true):
y_trues[i].append(sub_y_true)
images = torch.from_numpy(np.array(images)).type(torch.FloatTensor)
bboxes = [torch.from_numpy(ann).type(torch.FloatTensor) for ann in bboxes]
y_trues = [torch.from_numpy(np.array(ann, np.float32)).type(torch.FloatTensor) for ann in y_trues]
return images, bboxes,y_trues