Initial

2024-06-13 12:13:54 +08:00
commit db40d1af1b
38 changed files with 5006 additions and 0 deletions
--- a/utils/dataloader.py
+++ b/utils/dataloader.py
@@ -0,0 +1,504 @@
+from random import sample, shuffle
+
+import cv2
+import numpy as np
+import torch
+from PIL import Image
+from torch.utils.data.dataset import Dataset
+
+from utils.utils import cvtColor, preprocess_input
+
+
+class YoloDataset(Dataset):
+    def __init__(self, annotation_lines, input_shape, num_classes, anchors, anchors_mask, epoch_length, \
+                        mosaic, mixup, mosaic_prob, mixup_prob, train, special_aug_ratio = 0.7):
+        super(YoloDataset, self).__init__()
+        self.annotation_lines   = annotation_lines
+        self.input_shape        = input_shape
+        self.num_classes        = num_classes
+        self.anchors            = anchors
+        self.anchors_mask       = anchors_mask
+        self.epoch_length       = epoch_length
+        self.mosaic             = mosaic
+        self.mosaic_prob        = mosaic_prob
+        self.mixup              = mixup
+        self.mixup_prob         = mixup_prob
+        self.train              = train
+        self.special_aug_ratio  = special_aug_ratio
+
+        self.epoch_now          = -1
+        self.length             = len(self.annotation_lines)
+        
+        self.bbox_attrs         = 5 + num_classes
+        self.threshold          = 4
+
+    def __len__(self):
+        return self.length
+
+    def __getitem__(self, index):
+        index       = index % self.length
+
+        #---------------------------------------------------#
+        #   训练时进行数据的随机增强
+        #   验证时不进行数据的随机增强
+        #---------------------------------------------------#
+        if self.mosaic and self.rand() < self.mosaic_prob and self.epoch_now < self.epoch_length * self.special_aug_ratio:
+            lines = sample(self.annotation_lines, 3)
+            lines.append(self.annotation_lines[index])
+            shuffle(lines)
+            image, box  = self.get_random_data_with_Mosaic(lines, self.input_shape)
+            
+            if self.mixup and self.rand() < self.mixup_prob:
+                lines           = sample(self.annotation_lines, 1)
+                image_2, box_2  = self.get_random_data(lines[0], self.input_shape, random = self.train)
+                image, box      = self.get_random_data_with_MixUp(image, box, image_2, box_2)
+        else:
+            image, box      = self.get_random_data(self.annotation_lines[index], self.input_shape, random = self.train)
+
+        image       = np.transpose(preprocess_input(np.array(image, dtype=np.float32)), (2, 0, 1))
+        box         = np.array(box, dtype=np.float32)
+        if len(box) != 0:
+            #---------------------------------------------------#
+            #   对真实框进行归一化，调整到0-1之间
+            #---------------------------------------------------#
+            box[:, [0, 2]] = box[:, [0, 2]] / self.input_shape[1]
+            box[:, [1, 3]] = box[:, [1, 3]] / self.input_shape[0]
+            #---------------------------------------------------#
+            #   序号为0、1的部分，为真实框的中心
+            #   序号为2、3的部分，为真实框的宽高
+            #   序号为4的部分，为真实框的种类
+            #---------------------------------------------------#
+            box[:, 2:4] = box[:, 2:4] - box[:, 0:2]
+            box[:, 0:2] = box[:, 0:2] + box[:, 2:4] / 2
+        y_true = self.get_target(box)
+        return image, box, y_true
+
+    def rand(self, a=0, b=1):
+        return np.random.rand()*(b-a) + a
+
+    def get_random_data(self, annotation_line, input_shape, jitter=.3, hue=.1, sat=0.7, val=0.4, random=True):
+        line    = annotation_line.split()
+        #------------------------------#
+        #   读取图像并转换成RGB图像
+        #------------------------------#
+        image   = Image.open(line[0])
+        image   = cvtColor(image)
+        #------------------------------#
+        #   获得图像的高宽与目标高宽
+        #------------------------------#
+        iw, ih  = image.size
+        h, w    = input_shape
+        #------------------------------#
+        #   获得预测框
+        #------------------------------#
+        box     = np.array([np.array(list(map(int,box.split(',')))) for box in line[1:]])
+
+        if not random:
+            scale = min(w/iw, h/ih)
+            nw = int(iw*scale)
+            nh = int(ih*scale)
+            dx = (w-nw)//2
+            dy = (h-nh)//2
+
+            #---------------------------------#
+            #   将图像多余的部分加上灰条
+            #---------------------------------#
+            image       = image.resize((nw,nh), Image.BICUBIC)
+            new_image   = Image.new('RGB', (w,h), (128,128,128))
+            new_image.paste(image, (dx, dy))
+            image_data  = np.array(new_image, np.float32)
+
+            #---------------------------------#
+            #   对真实框进行调整
+            #---------------------------------#
+            if len(box)>0:
+                np.random.shuffle(box)
+                box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
+                box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
+                box[:, 0:2][box[:, 0:2]<0] = 0
+                box[:, 2][box[:, 2]>w] = w
+                box[:, 3][box[:, 3]>h] = h
+                box_w = box[:, 2] - box[:, 0]
+                box_h = box[:, 3] - box[:, 1]
+                box = box[np.logical_and(box_w>1, box_h>1)] # discard invalid box
+
+            return image_data, box
+                
+        #------------------------------------------#
+        #   对图像进行缩放并且进行长和宽的扭曲
+        #------------------------------------------#
+        new_ar = iw/ih * self.rand(1-jitter,1+jitter) / self.rand(1-jitter,1+jitter)
+        scale = self.rand(.25, 2)
+        if new_ar < 1:
+            nh = int(scale*h)
+            nw = int(nh*new_ar)
+        else:
+            nw = int(scale*w)
+            nh = int(nw/new_ar)
+        image = image.resize((nw,nh), Image.BICUBIC)
+
+        #------------------------------------------#
+        #   将图像多余的部分加上灰条
+        #------------------------------------------#
+        dx = int(self.rand(0, w-nw))
+        dy = int(self.rand(0, h-nh))
+        new_image = Image.new('RGB', (w,h), (128,128,128))
+        new_image.paste(image, (dx, dy))
+        image = new_image
+
+        #------------------------------------------#
+        #   翻转图像
+        #------------------------------------------#
+        flip = self.rand()<.5
+        if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT)
+
+        image_data      = np.array(image, np.uint8)
+        #---------------------------------#
+        #   对图像进行色域变换
+        #   计算色域变换的参数
+        #---------------------------------#
+        r               = np.random.uniform(-1, 1, 3) * [hue, sat, val] + 1
+        #---------------------------------#
+        #   将图像转到HSV上
+        #---------------------------------#
+        hue, sat, val   = cv2.split(cv2.cvtColor(image_data, cv2.COLOR_RGB2HSV))
+        dtype           = image_data.dtype
+        #---------------------------------#
+        #   应用变换
+        #---------------------------------#
+        x       = np.arange(0, 256, dtype=r.dtype)
+        lut_hue = ((x * r[0]) % 180).astype(dtype)
+        lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
+        lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
+
+        image_data = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))
+        image_data = cv2.cvtColor(image_data, cv2.COLOR_HSV2RGB)
+
+        #---------------------------------#
+        #   对真实框进行调整
+        #---------------------------------#
+        if len(box)>0:
+            np.random.shuffle(box)
+            box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
+            box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
+            if flip: box[:, [0,2]] = w - box[:, [2,0]]
+            box[:, 0:2][box[:, 0:2]<0] = 0
+            box[:, 2][box[:, 2]>w] = w
+            box[:, 3][box[:, 3]>h] = h
+            box_w = box[:, 2] - box[:, 0]
+            box_h = box[:, 3] - box[:, 1]
+            box = box[np.logical_and(box_w>1, box_h>1)] 
+        
+        return image_data, box
+    
+    def merge_bboxes(self, bboxes, cutx, cuty):
+        merge_bbox = []
+        for i in range(len(bboxes)):
+            for box in bboxes[i]:
+                tmp_box = []
+                x1, y1, x2, y2 = box[0], box[1], box[2], box[3]
+
+                if i == 0:
+                    if y1 > cuty or x1 > cutx:
+                        continue
+                    if y2 >= cuty and y1 <= cuty:
+                        y2 = cuty
+                    if x2 >= cutx and x1 <= cutx:
+                        x2 = cutx
+
+                if i == 1:
+                    if y2 < cuty or x1 > cutx:
+                        continue
+                    if y2 >= cuty and y1 <= cuty:
+                        y1 = cuty
+                    if x2 >= cutx and x1 <= cutx:
+                        x2 = cutx
+
+                if i == 2:
+                    if y2 < cuty or x2 < cutx:
+                        continue
+                    if y2 >= cuty and y1 <= cuty:
+                        y1 = cuty
+                    if x2 >= cutx and x1 <= cutx:
+                        x1 = cutx
+
+                if i == 3:
+                    if y1 > cuty or x2 < cutx:
+                        continue
+                    if y2 >= cuty and y1 <= cuty:
+                        y2 = cuty
+                    if x2 >= cutx and x1 <= cutx:
+                        x1 = cutx
+                tmp_box.append(x1)
+                tmp_box.append(y1)
+                tmp_box.append(x2)
+                tmp_box.append(y2)
+                tmp_box.append(box[-1])
+                merge_bbox.append(tmp_box)
+        return merge_bbox
+
+    def get_random_data_with_Mosaic(self, annotation_line, input_shape, jitter=0.3, hue=.1, sat=0.7, val=0.4):
+        h, w = input_shape
+        min_offset_x = self.rand(0.3, 0.7)
+        min_offset_y = self.rand(0.3, 0.7)
+
+        image_datas = [] 
+        box_datas   = []
+        index       = 0
+        for line in annotation_line:
+            #---------------------------------#
+            #   每一行进行分割
+            #---------------------------------#
+            line_content = line.split()
+            #---------------------------------#
+            #   打开图片
+            #---------------------------------#
+            image = Image.open(line_content[0])
+            image = cvtColor(image)
+            
+            #---------------------------------#
+            #   图片的大小
+            #---------------------------------#
+            iw, ih = image.size
+            #---------------------------------#
+            #   保存框的位置
+            #---------------------------------#
+            box = np.array([np.array(list(map(int,box.split(',')))) for box in line_content[1:]])
+            
+            #---------------------------------#
+            #   是否翻转图片
+            #---------------------------------#
+            flip = self.rand()<.5
+            if flip and len(box)>0:
+                image = image.transpose(Image.FLIP_LEFT_RIGHT)
+                box[:, [0,2]] = iw - box[:, [2,0]]
+
+            #------------------------------------------#
+            #   对图像进行缩放并且进行长和宽的扭曲
+            #------------------------------------------#
+            new_ar = iw/ih * self.rand(1-jitter,1+jitter) / self.rand(1-jitter,1+jitter)
+            scale = self.rand(.4, 1)
+            if new_ar < 1:
+                nh = int(scale*h)
+                nw = int(nh*new_ar)
+            else:
+                nw = int(scale*w)
+                nh = int(nw/new_ar)
+            image = image.resize((nw, nh), Image.BICUBIC)
+
+            #-----------------------------------------------#
+            #   将图片进行放置，分别对应四张分割图片的位置
+            #-----------------------------------------------#
+            if index == 0:
+                dx = int(w*min_offset_x) - nw
+                dy = int(h*min_offset_y) - nh
+            elif index == 1:
+                dx = int(w*min_offset_x) - nw
+                dy = int(h*min_offset_y)
+            elif index == 2:
+                dx = int(w*min_offset_x)
+                dy = int(h*min_offset_y)
+            elif index == 3:
+                dx = int(w*min_offset_x)
+                dy = int(h*min_offset_y) - nh
+            
+            new_image = Image.new('RGB', (w,h), (128,128,128))
+            new_image.paste(image, (dx, dy))
+            image_data = np.array(new_image)
+
+            index = index + 1
+            box_data = []
+            #---------------------------------#
+            #   对box进行重新处理
+            #---------------------------------#
+            if len(box)>0:
+                np.random.shuffle(box)
+                box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
+                box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
+                box[:, 0:2][box[:, 0:2]<0] = 0
+                box[:, 2][box[:, 2]>w] = w
+                box[:, 3][box[:, 3]>h] = h
+                box_w = box[:, 2] - box[:, 0]
+                box_h = box[:, 3] - box[:, 1]
+                box = box[np.logical_and(box_w>1, box_h>1)]
+                box_data = np.zeros((len(box),5))
+                box_data[:len(box)] = box
+            
+            image_datas.append(image_data)
+            box_datas.append(box_data)
+
+        #---------------------------------#
+        #   将图片分割，放在一起
+        #---------------------------------#
+        cutx = int(w * min_offset_x)
+        cuty = int(h * min_offset_y)
+
+        new_image = np.zeros([h, w, 3])
+        new_image[:cuty, :cutx, :] = image_datas[0][:cuty, :cutx, :]
+        new_image[cuty:, :cutx, :] = image_datas[1][cuty:, :cutx, :]
+        new_image[cuty:, cutx:, :] = image_datas[2][cuty:, cutx:, :]
+        new_image[:cuty, cutx:, :] = image_datas[3][:cuty, cutx:, :]
+
+        new_image       = np.array(new_image, np.uint8)
+        #---------------------------------#
+        #   对图像进行色域变换
+        #   计算色域变换的参数
+        #---------------------------------#
+        r               = np.random.uniform(-1, 1, 3) * [hue, sat, val] + 1
+        #---------------------------------#
+        #   将图像转到HSV上
+        #---------------------------------#
+        hue, sat, val   = cv2.split(cv2.cvtColor(new_image, cv2.COLOR_RGB2HSV))
+        dtype           = new_image.dtype
+        #---------------------------------#
+        #   应用变换
+        #---------------------------------#
+        x       = np.arange(0, 256, dtype=r.dtype)
+        lut_hue = ((x * r[0]) % 180).astype(dtype)
+        lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
+        lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
+
+        new_image = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))
+        new_image = cv2.cvtColor(new_image, cv2.COLOR_HSV2RGB)
+
+        #---------------------------------#
+        #   对框进行进一步的处理
+        #---------------------------------#
+        new_boxes = self.merge_bboxes(box_datas, cutx, cuty)
+
+        return new_image, new_boxes
+
+    def get_random_data_with_MixUp(self, image_1, box_1, image_2, box_2):
+        new_image = np.array(image_1, np.float32) * 0.5 + np.array(image_2, np.float32) * 0.5
+        if len(box_1) == 0:
+            new_boxes = box_2
+        elif len(box_2) == 0:
+            new_boxes = box_1
+        else:
+            new_boxes = np.concatenate([box_1, box_2], axis=0)
+        return new_image, new_boxes
+    
+    def get_near_points(self, x, y, i, j):
+        sub_x = x - i
+        sub_y = y - j
+        if sub_x > 0.5 and sub_y > 0.5:
+            return [[0, 0], [1, 0], [0, 1]]
+        elif sub_x < 0.5 and sub_y > 0.5:
+            return [[0, 0], [-1, 0], [0, 1]]
+        elif sub_x < 0.5 and sub_y < 0.5:
+            return [[0, 0], [-1, 0], [0, -1]]
+        else:
+            return [[0, 0], [1, 0], [0, -1]]
+
+    def get_target(self, targets):
+        #-----------------------------------------------------------#
+        #   一共有三个特征层数
+        #-----------------------------------------------------------#
+        num_layers  = len(self.anchors_mask)
+        
+        input_shape = np.array(self.input_shape, dtype='int32')
+        grid_shapes = [input_shape // {0:32, 1:16, 2:8, 3:4}[l] for l in range(num_layers)]
+        y_true      = [np.zeros((len(self.anchors_mask[l]), grid_shapes[l][0], grid_shapes[l][1], self.bbox_attrs), dtype='float32') for l in range(num_layers)]
+        box_best_ratio = [np.zeros((len(self.anchors_mask[l]), grid_shapes[l][0], grid_shapes[l][1]), dtype='float32') for l in range(num_layers)]
+        
+        if len(targets) == 0:
+            return y_true
+        
+        for l in range(num_layers):
+            in_h, in_w      = grid_shapes[l]
+            anchors         = np.array(self.anchors) / {0:32, 1:16, 2:8, 3:4}[l]
+            
+            batch_target = np.zeros_like(targets)
+            #-------------------------------------------------------#
+            #   计算出正样本在特征层上的中心点
+            #-------------------------------------------------------#
+            batch_target[:, [0,2]]  = targets[:, [0,2]] * in_w
+            batch_target[:, [1,3]]  = targets[:, [1,3]] * in_h
+            batch_target[:, 4]      = targets[:, 4]
+            #-------------------------------------------------------#
+            #   wh                          : num_true_box, 2
+            #   np.expand_dims(wh, 1)       : num_true_box, 1, 2
+            #   anchors                     : 9, 2
+            #   np.expand_dims(anchors, 0)  : 1, 9, 2
+            #   
+            #   ratios_of_gt_anchors代表每一个真实框和每一个先验框的宽高的比值
+            #   ratios_of_gt_anchors    : num_true_box, 9, 2
+            #   ratios_of_anchors_gt代表每一个先验框和每一个真实框的宽高的比值
+            #   ratios_of_anchors_gt    : num_true_box, 9, 2
+            #
+            #   ratios                  : num_true_box, 9, 4
+            #   max_ratios代表每一个真实框和每一个先验框的宽高的比值的最大值
+            #   max_ratios              : num_true_box, 9
+            #-------------------------------------------------------#
+            ratios_of_gt_anchors = np.expand_dims(batch_target[:, 2:4], 1) / np.expand_dims(anchors, 0)
+            ratios_of_anchors_gt = np.expand_dims(anchors, 0) / np.expand_dims(batch_target[:, 2:4], 1)
+            ratios               = np.concatenate([ratios_of_gt_anchors, ratios_of_anchors_gt], axis = -1)
+            max_ratios           = np.max(ratios, axis = -1)
+            
+            for t, ratio in enumerate(max_ratios):
+                #-------------------------------------------------------#
+                #   ratio : 9
+                #-------------------------------------------------------#
+                over_threshold = ratio < self.threshold
+                over_threshold[np.argmin(ratio)] = True
+                for k, mask in enumerate(self.anchors_mask[l]):
+                    if not over_threshold[mask]:
+                        continue
+                    #----------------------------------------#
+                    #   获得真实框属于哪个网格点
+                    #   x  1.25     => 1
+                    #   y  3.75     => 3
+                    #----------------------------------------#
+                    i = int(np.floor(batch_target[t, 0]))
+                    j = int(np.floor(batch_target[t, 1]))
+                    
+                    offsets = self.get_near_points(batch_target[t, 0], batch_target[t, 1], i, j)
+                    for offset in offsets:
+                        local_i = i + offset[0]
+                        local_j = j + offset[1]
+
+                        if local_i >= in_w or local_i < 0 or local_j >= in_h or local_j < 0:
+                            continue
+
+                        if box_best_ratio[l][k, local_j, local_i] != 0:
+                            if box_best_ratio[l][k, local_j, local_i] > ratio[mask]:
+                                y_true[l][k, local_j, local_i, :] = 0
+                            else:
+                                continue
+                            
+                        #----------------------------------------#
+                        #   取出真实框的种类
+                        #----------------------------------------#
+                        c = int(batch_target[t, 4])
+
+                        #----------------------------------------#
+                        #   tx、ty代表中心调整参数的真实值
+                        #----------------------------------------#
+                        y_true[l][k, local_j, local_i, 0] = batch_target[t, 0]
+                        y_true[l][k, local_j, local_i, 1] = batch_target[t, 1]
+                        y_true[l][k, local_j, local_i, 2] = batch_target[t, 2]
+                        y_true[l][k, local_j, local_i, 3] = batch_target[t, 3]
+                        y_true[l][k, local_j, local_i, 4] = 1
+                        y_true[l][k, local_j, local_i, c + 5] = 1
+                        #----------------------------------------#
+                        #   获得当前先验框最好的比例
+                        #----------------------------------------#
+                        box_best_ratio[l][k, local_j, local_i] = ratio[mask]
+                        
+        return y_true
+    
+# DataLoader中collate_fn使用
+def yolo_dataset_collate(batch):
+    images  = []
+    bboxes  = []
+    y_trues = [[] for _ in batch[0][2]]
+    for img, box, y_true in batch:
+        images.append(img)
+        bboxes.append(box)
+        for i, sub_y_true in enumerate(y_true):
+            y_trues[i].append(sub_y_true)
+            
+    images  = torch.from_numpy(np.array(images)).type(torch.FloatTensor)
+    bboxes  = [torch.from_numpy(ann).type(torch.FloatTensor) for ann in bboxes]
+    y_trues = [torch.from_numpy(np.array(ann, np.float32)).type(torch.FloatTensor) for ann in y_trues]
+    return images, bboxes,y_trues