Initial
This commit is contained in:
		
							
								
								
									
										504
									
								
								utils/dataloader.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										504
									
								
								utils/dataloader.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,504 @@ | ||||
| from random import sample, shuffle | ||||
|  | ||||
| import cv2 | ||||
| import numpy as np | ||||
| import torch | ||||
| from PIL import Image | ||||
| from torch.utils.data.dataset import Dataset | ||||
|  | ||||
| from utils.utils import cvtColor, preprocess_input | ||||
|  | ||||
|  | ||||
| class YoloDataset(Dataset): | ||||
|     def __init__(self, annotation_lines, input_shape, num_classes, anchors, anchors_mask, epoch_length, \ | ||||
|                         mosaic, mixup, mosaic_prob, mixup_prob, train, special_aug_ratio = 0.7): | ||||
|         super(YoloDataset, self).__init__() | ||||
|         self.annotation_lines   = annotation_lines | ||||
|         self.input_shape        = input_shape | ||||
|         self.num_classes        = num_classes | ||||
|         self.anchors            = anchors | ||||
|         self.anchors_mask       = anchors_mask | ||||
|         self.epoch_length       = epoch_length | ||||
|         self.mosaic             = mosaic | ||||
|         self.mosaic_prob        = mosaic_prob | ||||
|         self.mixup              = mixup | ||||
|         self.mixup_prob         = mixup_prob | ||||
|         self.train              = train | ||||
|         self.special_aug_ratio  = special_aug_ratio | ||||
|  | ||||
|         self.epoch_now          = -1 | ||||
|         self.length             = len(self.annotation_lines) | ||||
|          | ||||
|         self.bbox_attrs         = 5 + num_classes | ||||
|         self.threshold          = 4 | ||||
|  | ||||
|     def __len__(self): | ||||
|         return self.length | ||||
|  | ||||
|     def __getitem__(self, index): | ||||
|         index       = index % self.length | ||||
|  | ||||
|         #---------------------------------------------------# | ||||
|         #   训练时进行数据的随机增强 | ||||
|         #   验证时不进行数据的随机增强 | ||||
|         #---------------------------------------------------# | ||||
|         if self.mosaic and self.rand() < self.mosaic_prob and self.epoch_now < self.epoch_length * self.special_aug_ratio: | ||||
|             lines = sample(self.annotation_lines, 3) | ||||
|             lines.append(self.annotation_lines[index]) | ||||
|             shuffle(lines) | ||||
|             image, box  = self.get_random_data_with_Mosaic(lines, self.input_shape) | ||||
|              | ||||
|             if self.mixup and self.rand() < self.mixup_prob: | ||||
|                 lines           = sample(self.annotation_lines, 1) | ||||
|                 image_2, box_2  = self.get_random_data(lines[0], self.input_shape, random = self.train) | ||||
|                 image, box      = self.get_random_data_with_MixUp(image, box, image_2, box_2) | ||||
|         else: | ||||
|             image, box      = self.get_random_data(self.annotation_lines[index], self.input_shape, random = self.train) | ||||
|  | ||||
|         image       = np.transpose(preprocess_input(np.array(image, dtype=np.float32)), (2, 0, 1)) | ||||
|         box         = np.array(box, dtype=np.float32) | ||||
|         if len(box) != 0: | ||||
|             #---------------------------------------------------# | ||||
|             #   对真实框进行归一化,调整到0-1之间 | ||||
|             #---------------------------------------------------# | ||||
|             box[:, [0, 2]] = box[:, [0, 2]] / self.input_shape[1] | ||||
|             box[:, [1, 3]] = box[:, [1, 3]] / self.input_shape[0] | ||||
|             #---------------------------------------------------# | ||||
|             #   序号为0、1的部分,为真实框的中心 | ||||
|             #   序号为2、3的部分,为真实框的宽高 | ||||
|             #   序号为4的部分,为真实框的种类 | ||||
|             #---------------------------------------------------# | ||||
|             box[:, 2:4] = box[:, 2:4] - box[:, 0:2] | ||||
|             box[:, 0:2] = box[:, 0:2] + box[:, 2:4] / 2 | ||||
|         y_true = self.get_target(box) | ||||
|         return image, box, y_true | ||||
|  | ||||
|     def rand(self, a=0, b=1): | ||||
|         return np.random.rand()*(b-a) + a | ||||
|  | ||||
|     def get_random_data(self, annotation_line, input_shape, jitter=.3, hue=.1, sat=0.7, val=0.4, random=True): | ||||
|         line    = annotation_line.split() | ||||
|         #------------------------------# | ||||
|         #   读取图像并转换成RGB图像 | ||||
|         #------------------------------# | ||||
|         image   = Image.open(line[0]) | ||||
|         image   = cvtColor(image) | ||||
|         #------------------------------# | ||||
|         #   获得图像的高宽与目标高宽 | ||||
|         #------------------------------# | ||||
|         iw, ih  = image.size | ||||
|         h, w    = input_shape | ||||
|         #------------------------------# | ||||
|         #   获得预测框 | ||||
|         #------------------------------# | ||||
|         box     = np.array([np.array(list(map(int,box.split(',')))) for box in line[1:]]) | ||||
|  | ||||
|         if not random: | ||||
|             scale = min(w/iw, h/ih) | ||||
|             nw = int(iw*scale) | ||||
|             nh = int(ih*scale) | ||||
|             dx = (w-nw)//2 | ||||
|             dy = (h-nh)//2 | ||||
|  | ||||
|             #---------------------------------# | ||||
|             #   将图像多余的部分加上灰条 | ||||
|             #---------------------------------# | ||||
|             image       = image.resize((nw,nh), Image.BICUBIC) | ||||
|             new_image   = Image.new('RGB', (w,h), (128,128,128)) | ||||
|             new_image.paste(image, (dx, dy)) | ||||
|             image_data  = np.array(new_image, np.float32) | ||||
|  | ||||
|             #---------------------------------# | ||||
|             #   对真实框进行调整 | ||||
|             #---------------------------------# | ||||
|             if len(box)>0: | ||||
|                 np.random.shuffle(box) | ||||
|                 box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx | ||||
|                 box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy | ||||
|                 box[:, 0:2][box[:, 0:2]<0] = 0 | ||||
|                 box[:, 2][box[:, 2]>w] = w | ||||
|                 box[:, 3][box[:, 3]>h] = h | ||||
|                 box_w = box[:, 2] - box[:, 0] | ||||
|                 box_h = box[:, 3] - box[:, 1] | ||||
|                 box = box[np.logical_and(box_w>1, box_h>1)] # discard invalid box | ||||
|  | ||||
|             return image_data, box | ||||
|                  | ||||
|         #------------------------------------------# | ||||
|         #   对图像进行缩放并且进行长和宽的扭曲 | ||||
|         #------------------------------------------# | ||||
|         new_ar = iw/ih * self.rand(1-jitter,1+jitter) / self.rand(1-jitter,1+jitter) | ||||
|         scale = self.rand(.25, 2) | ||||
|         if new_ar < 1: | ||||
|             nh = int(scale*h) | ||||
|             nw = int(nh*new_ar) | ||||
|         else: | ||||
|             nw = int(scale*w) | ||||
|             nh = int(nw/new_ar) | ||||
|         image = image.resize((nw,nh), Image.BICUBIC) | ||||
|  | ||||
|         #------------------------------------------# | ||||
|         #   将图像多余的部分加上灰条 | ||||
|         #------------------------------------------# | ||||
|         dx = int(self.rand(0, w-nw)) | ||||
|         dy = int(self.rand(0, h-nh)) | ||||
|         new_image = Image.new('RGB', (w,h), (128,128,128)) | ||||
|         new_image.paste(image, (dx, dy)) | ||||
|         image = new_image | ||||
|  | ||||
|         #------------------------------------------# | ||||
|         #   翻转图像 | ||||
|         #------------------------------------------# | ||||
|         flip = self.rand()<.5 | ||||
|         if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT) | ||||
|  | ||||
|         image_data      = np.array(image, np.uint8) | ||||
|         #---------------------------------# | ||||
|         #   对图像进行色域变换 | ||||
|         #   计算色域变换的参数 | ||||
|         #---------------------------------# | ||||
|         r               = np.random.uniform(-1, 1, 3) * [hue, sat, val] + 1 | ||||
|         #---------------------------------# | ||||
|         #   将图像转到HSV上 | ||||
|         #---------------------------------# | ||||
|         hue, sat, val   = cv2.split(cv2.cvtColor(image_data, cv2.COLOR_RGB2HSV)) | ||||
|         dtype           = image_data.dtype | ||||
|         #---------------------------------# | ||||
|         #   应用变换 | ||||
|         #---------------------------------# | ||||
|         x       = np.arange(0, 256, dtype=r.dtype) | ||||
|         lut_hue = ((x * r[0]) % 180).astype(dtype) | ||||
|         lut_sat = np.clip(x * r[1], 0, 255).astype(dtype) | ||||
|         lut_val = np.clip(x * r[2], 0, 255).astype(dtype) | ||||
|  | ||||
|         image_data = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))) | ||||
|         image_data = cv2.cvtColor(image_data, cv2.COLOR_HSV2RGB) | ||||
|  | ||||
|         #---------------------------------# | ||||
|         #   对真实框进行调整 | ||||
|         #---------------------------------# | ||||
|         if len(box)>0: | ||||
|             np.random.shuffle(box) | ||||
|             box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx | ||||
|             box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy | ||||
|             if flip: box[:, [0,2]] = w - box[:, [2,0]] | ||||
|             box[:, 0:2][box[:, 0:2]<0] = 0 | ||||
|             box[:, 2][box[:, 2]>w] = w | ||||
|             box[:, 3][box[:, 3]>h] = h | ||||
|             box_w = box[:, 2] - box[:, 0] | ||||
|             box_h = box[:, 3] - box[:, 1] | ||||
|             box = box[np.logical_and(box_w>1, box_h>1)]  | ||||
|          | ||||
|         return image_data, box | ||||
|      | ||||
|     def merge_bboxes(self, bboxes, cutx, cuty): | ||||
|         merge_bbox = [] | ||||
|         for i in range(len(bboxes)): | ||||
|             for box in bboxes[i]: | ||||
|                 tmp_box = [] | ||||
|                 x1, y1, x2, y2 = box[0], box[1], box[2], box[3] | ||||
|  | ||||
|                 if i == 0: | ||||
|                     if y1 > cuty or x1 > cutx: | ||||
|                         continue | ||||
|                     if y2 >= cuty and y1 <= cuty: | ||||
|                         y2 = cuty | ||||
|                     if x2 >= cutx and x1 <= cutx: | ||||
|                         x2 = cutx | ||||
|  | ||||
|                 if i == 1: | ||||
|                     if y2 < cuty or x1 > cutx: | ||||
|                         continue | ||||
|                     if y2 >= cuty and y1 <= cuty: | ||||
|                         y1 = cuty | ||||
|                     if x2 >= cutx and x1 <= cutx: | ||||
|                         x2 = cutx | ||||
|  | ||||
|                 if i == 2: | ||||
|                     if y2 < cuty or x2 < cutx: | ||||
|                         continue | ||||
|                     if y2 >= cuty and y1 <= cuty: | ||||
|                         y1 = cuty | ||||
|                     if x2 >= cutx and x1 <= cutx: | ||||
|                         x1 = cutx | ||||
|  | ||||
|                 if i == 3: | ||||
|                     if y1 > cuty or x2 < cutx: | ||||
|                         continue | ||||
|                     if y2 >= cuty and y1 <= cuty: | ||||
|                         y2 = cuty | ||||
|                     if x2 >= cutx and x1 <= cutx: | ||||
|                         x1 = cutx | ||||
|                 tmp_box.append(x1) | ||||
|                 tmp_box.append(y1) | ||||
|                 tmp_box.append(x2) | ||||
|                 tmp_box.append(y2) | ||||
|                 tmp_box.append(box[-1]) | ||||
|                 merge_bbox.append(tmp_box) | ||||
|         return merge_bbox | ||||
|  | ||||
|     def get_random_data_with_Mosaic(self, annotation_line, input_shape, jitter=0.3, hue=.1, sat=0.7, val=0.4): | ||||
|         h, w = input_shape | ||||
|         min_offset_x = self.rand(0.3, 0.7) | ||||
|         min_offset_y = self.rand(0.3, 0.7) | ||||
|  | ||||
|         image_datas = []  | ||||
|         box_datas   = [] | ||||
|         index       = 0 | ||||
|         for line in annotation_line: | ||||
|             #---------------------------------# | ||||
|             #   每一行进行分割 | ||||
|             #---------------------------------# | ||||
|             line_content = line.split() | ||||
|             #---------------------------------# | ||||
|             #   打开图片 | ||||
|             #---------------------------------# | ||||
|             image = Image.open(line_content[0]) | ||||
|             image = cvtColor(image) | ||||
|              | ||||
|             #---------------------------------# | ||||
|             #   图片的大小 | ||||
|             #---------------------------------# | ||||
|             iw, ih = image.size | ||||
|             #---------------------------------# | ||||
|             #   保存框的位置 | ||||
|             #---------------------------------# | ||||
|             box = np.array([np.array(list(map(int,box.split(',')))) for box in line_content[1:]]) | ||||
|              | ||||
|             #---------------------------------# | ||||
|             #   是否翻转图片 | ||||
|             #---------------------------------# | ||||
|             flip = self.rand()<.5 | ||||
|             if flip and len(box)>0: | ||||
|                 image = image.transpose(Image.FLIP_LEFT_RIGHT) | ||||
|                 box[:, [0,2]] = iw - box[:, [2,0]] | ||||
|  | ||||
|             #------------------------------------------# | ||||
|             #   对图像进行缩放并且进行长和宽的扭曲 | ||||
|             #------------------------------------------# | ||||
|             new_ar = iw/ih * self.rand(1-jitter,1+jitter) / self.rand(1-jitter,1+jitter) | ||||
|             scale = self.rand(.4, 1) | ||||
|             if new_ar < 1: | ||||
|                 nh = int(scale*h) | ||||
|                 nw = int(nh*new_ar) | ||||
|             else: | ||||
|                 nw = int(scale*w) | ||||
|                 nh = int(nw/new_ar) | ||||
|             image = image.resize((nw, nh), Image.BICUBIC) | ||||
|  | ||||
|             #-----------------------------------------------# | ||||
|             #   将图片进行放置,分别对应四张分割图片的位置 | ||||
|             #-----------------------------------------------# | ||||
|             if index == 0: | ||||
|                 dx = int(w*min_offset_x) - nw | ||||
|                 dy = int(h*min_offset_y) - nh | ||||
|             elif index == 1: | ||||
|                 dx = int(w*min_offset_x) - nw | ||||
|                 dy = int(h*min_offset_y) | ||||
|             elif index == 2: | ||||
|                 dx = int(w*min_offset_x) | ||||
|                 dy = int(h*min_offset_y) | ||||
|             elif index == 3: | ||||
|                 dx = int(w*min_offset_x) | ||||
|                 dy = int(h*min_offset_y) - nh | ||||
|              | ||||
|             new_image = Image.new('RGB', (w,h), (128,128,128)) | ||||
|             new_image.paste(image, (dx, dy)) | ||||
|             image_data = np.array(new_image) | ||||
|  | ||||
|             index = index + 1 | ||||
|             box_data = [] | ||||
|             #---------------------------------# | ||||
|             #   对box进行重新处理 | ||||
|             #---------------------------------# | ||||
|             if len(box)>0: | ||||
|                 np.random.shuffle(box) | ||||
|                 box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx | ||||
|                 box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy | ||||
|                 box[:, 0:2][box[:, 0:2]<0] = 0 | ||||
|                 box[:, 2][box[:, 2]>w] = w | ||||
|                 box[:, 3][box[:, 3]>h] = h | ||||
|                 box_w = box[:, 2] - box[:, 0] | ||||
|                 box_h = box[:, 3] - box[:, 1] | ||||
|                 box = box[np.logical_and(box_w>1, box_h>1)] | ||||
|                 box_data = np.zeros((len(box),5)) | ||||
|                 box_data[:len(box)] = box | ||||
|              | ||||
|             image_datas.append(image_data) | ||||
|             box_datas.append(box_data) | ||||
|  | ||||
|         #---------------------------------# | ||||
|         #   将图片分割,放在一起 | ||||
|         #---------------------------------# | ||||
|         cutx = int(w * min_offset_x) | ||||
|         cuty = int(h * min_offset_y) | ||||
|  | ||||
|         new_image = np.zeros([h, w, 3]) | ||||
|         new_image[:cuty, :cutx, :] = image_datas[0][:cuty, :cutx, :] | ||||
|         new_image[cuty:, :cutx, :] = image_datas[1][cuty:, :cutx, :] | ||||
|         new_image[cuty:, cutx:, :] = image_datas[2][cuty:, cutx:, :] | ||||
|         new_image[:cuty, cutx:, :] = image_datas[3][:cuty, cutx:, :] | ||||
|  | ||||
|         new_image       = np.array(new_image, np.uint8) | ||||
|         #---------------------------------# | ||||
|         #   对图像进行色域变换 | ||||
|         #   计算色域变换的参数 | ||||
|         #---------------------------------# | ||||
|         r               = np.random.uniform(-1, 1, 3) * [hue, sat, val] + 1 | ||||
|         #---------------------------------# | ||||
|         #   将图像转到HSV上 | ||||
|         #---------------------------------# | ||||
|         hue, sat, val   = cv2.split(cv2.cvtColor(new_image, cv2.COLOR_RGB2HSV)) | ||||
|         dtype           = new_image.dtype | ||||
|         #---------------------------------# | ||||
|         #   应用变换 | ||||
|         #---------------------------------# | ||||
|         x       = np.arange(0, 256, dtype=r.dtype) | ||||
|         lut_hue = ((x * r[0]) % 180).astype(dtype) | ||||
|         lut_sat = np.clip(x * r[1], 0, 255).astype(dtype) | ||||
|         lut_val = np.clip(x * r[2], 0, 255).astype(dtype) | ||||
|  | ||||
|         new_image = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))) | ||||
|         new_image = cv2.cvtColor(new_image, cv2.COLOR_HSV2RGB) | ||||
|  | ||||
|         #---------------------------------# | ||||
|         #   对框进行进一步的处理 | ||||
|         #---------------------------------# | ||||
|         new_boxes = self.merge_bboxes(box_datas, cutx, cuty) | ||||
|  | ||||
|         return new_image, new_boxes | ||||
|  | ||||
|     def get_random_data_with_MixUp(self, image_1, box_1, image_2, box_2): | ||||
|         new_image = np.array(image_1, np.float32) * 0.5 + np.array(image_2, np.float32) * 0.5 | ||||
|         if len(box_1) == 0: | ||||
|             new_boxes = box_2 | ||||
|         elif len(box_2) == 0: | ||||
|             new_boxes = box_1 | ||||
|         else: | ||||
|             new_boxes = np.concatenate([box_1, box_2], axis=0) | ||||
|         return new_image, new_boxes | ||||
|      | ||||
|     def get_near_points(self, x, y, i, j): | ||||
|         sub_x = x - i | ||||
|         sub_y = y - j | ||||
|         if sub_x > 0.5 and sub_y > 0.5: | ||||
|             return [[0, 0], [1, 0], [0, 1]] | ||||
|         elif sub_x < 0.5 and sub_y > 0.5: | ||||
|             return [[0, 0], [-1, 0], [0, 1]] | ||||
|         elif sub_x < 0.5 and sub_y < 0.5: | ||||
|             return [[0, 0], [-1, 0], [0, -1]] | ||||
|         else: | ||||
|             return [[0, 0], [1, 0], [0, -1]] | ||||
|  | ||||
|     def get_target(self, targets): | ||||
|         #-----------------------------------------------------------# | ||||
|         #   一共有三个特征层数 | ||||
|         #-----------------------------------------------------------# | ||||
|         num_layers  = len(self.anchors_mask) | ||||
|          | ||||
|         input_shape = np.array(self.input_shape, dtype='int32') | ||||
|         grid_shapes = [input_shape // {0:32, 1:16, 2:8, 3:4}[l] for l in range(num_layers)] | ||||
|         y_true      = [np.zeros((len(self.anchors_mask[l]), grid_shapes[l][0], grid_shapes[l][1], self.bbox_attrs), dtype='float32') for l in range(num_layers)] | ||||
|         box_best_ratio = [np.zeros((len(self.anchors_mask[l]), grid_shapes[l][0], grid_shapes[l][1]), dtype='float32') for l in range(num_layers)] | ||||
|          | ||||
|         if len(targets) == 0: | ||||
|             return y_true | ||||
|          | ||||
|         for l in range(num_layers): | ||||
|             in_h, in_w      = grid_shapes[l] | ||||
|             anchors         = np.array(self.anchors) / {0:32, 1:16, 2:8, 3:4}[l] | ||||
|              | ||||
|             batch_target = np.zeros_like(targets) | ||||
|             #-------------------------------------------------------# | ||||
|             #   计算出正样本在特征层上的中心点 | ||||
|             #-------------------------------------------------------# | ||||
|             batch_target[:, [0,2]]  = targets[:, [0,2]] * in_w | ||||
|             batch_target[:, [1,3]]  = targets[:, [1,3]] * in_h | ||||
|             batch_target[:, 4]      = targets[:, 4] | ||||
|             #-------------------------------------------------------# | ||||
|             #   wh                          : num_true_box, 2 | ||||
|             #   np.expand_dims(wh, 1)       : num_true_box, 1, 2 | ||||
|             #   anchors                     : 9, 2 | ||||
|             #   np.expand_dims(anchors, 0)  : 1, 9, 2 | ||||
|             #    | ||||
|             #   ratios_of_gt_anchors代表每一个真实框和每一个先验框的宽高的比值 | ||||
|             #   ratios_of_gt_anchors    : num_true_box, 9, 2 | ||||
|             #   ratios_of_anchors_gt代表每一个先验框和每一个真实框的宽高的比值 | ||||
|             #   ratios_of_anchors_gt    : num_true_box, 9, 2 | ||||
|             # | ||||
|             #   ratios                  : num_true_box, 9, 4 | ||||
|             #   max_ratios代表每一个真实框和每一个先验框的宽高的比值的最大值 | ||||
|             #   max_ratios              : num_true_box, 9 | ||||
|             #-------------------------------------------------------# | ||||
|             ratios_of_gt_anchors = np.expand_dims(batch_target[:, 2:4], 1) / np.expand_dims(anchors, 0) | ||||
|             ratios_of_anchors_gt = np.expand_dims(anchors, 0) / np.expand_dims(batch_target[:, 2:4], 1) | ||||
|             ratios               = np.concatenate([ratios_of_gt_anchors, ratios_of_anchors_gt], axis = -1) | ||||
|             max_ratios           = np.max(ratios, axis = -1) | ||||
|              | ||||
|             for t, ratio in enumerate(max_ratios): | ||||
|                 #-------------------------------------------------------# | ||||
|                 #   ratio : 9 | ||||
|                 #-------------------------------------------------------# | ||||
|                 over_threshold = ratio < self.threshold | ||||
|                 over_threshold[np.argmin(ratio)] = True | ||||
|                 for k, mask in enumerate(self.anchors_mask[l]): | ||||
|                     if not over_threshold[mask]: | ||||
|                         continue | ||||
|                     #----------------------------------------# | ||||
|                     #   获得真实框属于哪个网格点 | ||||
|                     #   x  1.25     => 1 | ||||
|                     #   y  3.75     => 3 | ||||
|                     #----------------------------------------# | ||||
|                     i = int(np.floor(batch_target[t, 0])) | ||||
|                     j = int(np.floor(batch_target[t, 1])) | ||||
|                      | ||||
|                     offsets = self.get_near_points(batch_target[t, 0], batch_target[t, 1], i, j) | ||||
|                     for offset in offsets: | ||||
|                         local_i = i + offset[0] | ||||
|                         local_j = j + offset[1] | ||||
|  | ||||
|                         if local_i >= in_w or local_i < 0 or local_j >= in_h or local_j < 0: | ||||
|                             continue | ||||
|  | ||||
|                         if box_best_ratio[l][k, local_j, local_i] != 0: | ||||
|                             if box_best_ratio[l][k, local_j, local_i] > ratio[mask]: | ||||
|                                 y_true[l][k, local_j, local_i, :] = 0 | ||||
|                             else: | ||||
|                                 continue | ||||
|                              | ||||
|                         #----------------------------------------# | ||||
|                         #   取出真实框的种类 | ||||
|                         #----------------------------------------# | ||||
|                         c = int(batch_target[t, 4]) | ||||
|  | ||||
|                         #----------------------------------------# | ||||
|                         #   tx、ty代表中心调整参数的真实值 | ||||
|                         #----------------------------------------# | ||||
|                         y_true[l][k, local_j, local_i, 0] = batch_target[t, 0] | ||||
|                         y_true[l][k, local_j, local_i, 1] = batch_target[t, 1] | ||||
|                         y_true[l][k, local_j, local_i, 2] = batch_target[t, 2] | ||||
|                         y_true[l][k, local_j, local_i, 3] = batch_target[t, 3] | ||||
|                         y_true[l][k, local_j, local_i, 4] = 1 | ||||
|                         y_true[l][k, local_j, local_i, c + 5] = 1 | ||||
|                         #----------------------------------------# | ||||
|                         #   获得当前先验框最好的比例 | ||||
|                         #----------------------------------------# | ||||
|                         box_best_ratio[l][k, local_j, local_i] = ratio[mask] | ||||
|                          | ||||
|         return y_true | ||||
|      | ||||
| # DataLoader中collate_fn使用 | ||||
| def yolo_dataset_collate(batch): | ||||
|     images  = [] | ||||
|     bboxes  = [] | ||||
|     y_trues = [[] for _ in batch[0][2]] | ||||
|     for img, box, y_true in batch: | ||||
|         images.append(img) | ||||
|         bboxes.append(box) | ||||
|         for i, sub_y_true in enumerate(y_true): | ||||
|             y_trues[i].append(sub_y_true) | ||||
|              | ||||
|     images  = torch.from_numpy(np.array(images)).type(torch.FloatTensor) | ||||
|     bboxes  = [torch.from_numpy(ann).type(torch.FloatTensor) for ann in bboxes] | ||||
|     y_trues = [torch.from_numpy(np.array(ann, np.float32)).type(torch.FloatTensor) for ann in y_trues] | ||||
|     return images, bboxes,y_trues | ||||
		Reference in New Issue
	
	Block a user