Initial
This commit is contained in:
BIN
utils/__pycache__/callbacks.cpython-310.pyc
Normal file
BIN
utils/__pycache__/callbacks.cpython-310.pyc
Normal file
Binary file not shown.
BIN
utils/__pycache__/dataloader.cpython-310.pyc
Normal file
BIN
utils/__pycache__/dataloader.cpython-310.pyc
Normal file
Binary file not shown.
BIN
utils/__pycache__/utils.cpython-310.pyc
Normal file
BIN
utils/__pycache__/utils.cpython-310.pyc
Normal file
Binary file not shown.
BIN
utils/__pycache__/utils_bbox.cpython-310.pyc
Normal file
BIN
utils/__pycache__/utils_bbox.cpython-310.pyc
Normal file
Binary file not shown.
BIN
utils/__pycache__/utils_fit.cpython-310.pyc
Normal file
BIN
utils/__pycache__/utils_fit.cpython-310.pyc
Normal file
Binary file not shown.
BIN
utils/__pycache__/utils_map.cpython-310.pyc
Normal file
BIN
utils/__pycache__/utils_map.cpython-310.pyc
Normal file
Binary file not shown.
232
utils/callbacks.py
Normal file
232
utils/callbacks.py
Normal file
@@ -0,0 +1,232 @@
|
||||
import datetime
|
||||
import os
|
||||
|
||||
import torch
|
||||
import matplotlib
|
||||
matplotlib.use('Agg')
|
||||
import scipy.signal
|
||||
from matplotlib import pyplot as plt
|
||||
from torch.utils.tensorboard import SummaryWriter
|
||||
|
||||
import shutil
|
||||
import numpy as np
|
||||
|
||||
from PIL import Image
|
||||
from tqdm import tqdm
|
||||
from .utils import cvtColor, preprocess_input, resize_image
|
||||
from .utils_bbox import DecodeBox
|
||||
from .utils_map import get_coco_map, get_map
|
||||
|
||||
|
||||
class LossHistory():
|
||||
def __init__(self, log_dir, model, input_shape):
|
||||
self.log_dir = log_dir
|
||||
self.losses = []
|
||||
self.val_loss = []
|
||||
|
||||
os.makedirs(self.log_dir)
|
||||
self.writer = SummaryWriter(self.log_dir)
|
||||
try:
|
||||
dummy_input = torch.randn(2, 3, input_shape[0], input_shape[1])
|
||||
self.writer.add_graph(model, dummy_input)
|
||||
except:
|
||||
pass
|
||||
|
||||
def append_loss(self, epoch, loss, val_loss):
|
||||
if not os.path.exists(self.log_dir):
|
||||
os.makedirs(self.log_dir)
|
||||
|
||||
self.losses.append(loss)
|
||||
self.val_loss.append(val_loss)
|
||||
|
||||
with open(os.path.join(self.log_dir, "epoch_loss.txt"), 'a') as f:
|
||||
f.write(str(loss))
|
||||
f.write("\n")
|
||||
with open(os.path.join(self.log_dir, "epoch_val_loss.txt"), 'a') as f:
|
||||
f.write(str(val_loss))
|
||||
f.write("\n")
|
||||
|
||||
self.writer.add_scalar('loss', loss, epoch)
|
||||
self.writer.add_scalar('val_loss', val_loss, epoch)
|
||||
self.loss_plot()
|
||||
|
||||
def loss_plot(self):
|
||||
iters = range(len(self.losses))
|
||||
|
||||
plt.figure()
|
||||
plt.plot(iters, self.losses, 'red', linewidth = 2, label='train loss')
|
||||
plt.plot(iters, self.val_loss, 'coral', linewidth = 2, label='val loss')
|
||||
try:
|
||||
if len(self.losses) < 25:
|
||||
num = 5
|
||||
else:
|
||||
num = 15
|
||||
|
||||
plt.plot(iters, scipy.signal.savgol_filter(self.losses, num, 3), 'green', linestyle = '--', linewidth = 2, label='smooth train loss')
|
||||
plt.plot(iters, scipy.signal.savgol_filter(self.val_loss, num, 3), '#8B4513', linestyle = '--', linewidth = 2, label='smooth val loss')
|
||||
except:
|
||||
pass
|
||||
|
||||
plt.grid(True)
|
||||
plt.xlabel('Epoch')
|
||||
plt.ylabel('Loss')
|
||||
plt.legend(loc="upper right")
|
||||
|
||||
plt.savefig(os.path.join(self.log_dir, "epoch_loss.png"))
|
||||
|
||||
plt.cla()
|
||||
plt.close("all")
|
||||
|
||||
class EvalCallback():
|
||||
def __init__(self, net, input_shape, anchors, anchors_mask, class_names, num_classes, val_lines, log_dir, cuda, \
|
||||
map_out_path=".temp_map_out", max_boxes=100, confidence=0.05, nms_iou=0.5, letterbox_image=True, MINOVERLAP=0.5, eval_flag=True, period=1):
|
||||
super(EvalCallback, self).__init__()
|
||||
|
||||
self.net = net
|
||||
self.input_shape = input_shape
|
||||
self.anchors = anchors
|
||||
self.anchors_mask = anchors_mask
|
||||
self.class_names = class_names
|
||||
self.num_classes = num_classes
|
||||
self.val_lines = val_lines
|
||||
self.log_dir = log_dir
|
||||
self.cuda = cuda
|
||||
self.map_out_path = map_out_path
|
||||
self.max_boxes = max_boxes
|
||||
self.confidence = confidence
|
||||
self.nms_iou = nms_iou
|
||||
self.letterbox_image = letterbox_image
|
||||
self.MINOVERLAP = MINOVERLAP
|
||||
self.eval_flag = eval_flag
|
||||
self.period = period
|
||||
|
||||
self.bbox_util = DecodeBox(self.anchors, self.num_classes, (self.input_shape[0], self.input_shape[1]), self.anchors_mask)
|
||||
|
||||
self.maps = [0]
|
||||
self.epoches = [0]
|
||||
if self.eval_flag:
|
||||
with open(os.path.join(self.log_dir, "epoch_map.txt"), 'a') as f:
|
||||
f.write(str(0))
|
||||
f.write("\n")
|
||||
|
||||
def get_map_txt(self, image_id, image, class_names, map_out_path):
|
||||
f = open(os.path.join(map_out_path, "detection-results/"+image_id+".txt"), "w", encoding='utf-8')
|
||||
image_shape = np.array(np.shape(image)[0:2])
|
||||
#---------------------------------------------------------#
|
||||
# 在这里将图像转换成RGB图像,防止灰度图在预测时报错。
|
||||
# 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB
|
||||
#---------------------------------------------------------#
|
||||
image = cvtColor(image)
|
||||
#---------------------------------------------------------#
|
||||
# 给图像增加灰条,实现不失真的resize
|
||||
# 也可以直接resize进行识别
|
||||
#---------------------------------------------------------#
|
||||
image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image)
|
||||
#---------------------------------------------------------#
|
||||
# 添加上batch_size维度
|
||||
#---------------------------------------------------------#
|
||||
image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0)
|
||||
|
||||
with torch.no_grad():
|
||||
images = torch.from_numpy(image_data)
|
||||
if self.cuda:
|
||||
images = images.cuda()
|
||||
#---------------------------------------------------------#
|
||||
# 将图像输入网络当中进行预测!
|
||||
#---------------------------------------------------------#
|
||||
outputs = self.net(images)
|
||||
outputs = self.bbox_util.decode_box(outputs)
|
||||
#---------------------------------------------------------#
|
||||
# 将预测框进行堆叠,然后进行非极大抑制
|
||||
#---------------------------------------------------------#
|
||||
results = self.bbox_util.non_max_suppression(torch.cat(outputs, 1), self.num_classes, self.input_shape,
|
||||
image_shape, self.letterbox_image, conf_thres = self.confidence, nms_thres = self.nms_iou)
|
||||
|
||||
if results[0] is None:
|
||||
return
|
||||
|
||||
top_label = np.array(results[0][:, 6], dtype = 'int32')
|
||||
top_conf = results[0][:, 4] * results[0][:, 5]
|
||||
top_boxes = results[0][:, :4]
|
||||
|
||||
top_100 = np.argsort(top_conf)[::-1][:self.max_boxes]
|
||||
top_boxes = top_boxes[top_100]
|
||||
top_conf = top_conf[top_100]
|
||||
top_label = top_label[top_100]
|
||||
|
||||
for i, c in list(enumerate(top_label)):
|
||||
predicted_class = self.class_names[int(c)]
|
||||
box = top_boxes[i]
|
||||
score = str(top_conf[i])
|
||||
|
||||
top, left, bottom, right = box
|
||||
if predicted_class not in class_names:
|
||||
continue
|
||||
|
||||
f.write("%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str(int(top)), str(int(right)),str(int(bottom))))
|
||||
|
||||
f.close()
|
||||
return
|
||||
|
||||
def on_epoch_end(self, epoch, model_eval):
|
||||
if epoch % self.period == 0 and self.eval_flag:
|
||||
self.net = model_eval
|
||||
if not os.path.exists(self.map_out_path):
|
||||
os.makedirs(self.map_out_path)
|
||||
if not os.path.exists(os.path.join(self.map_out_path, "ground-truth")):
|
||||
os.makedirs(os.path.join(self.map_out_path, "ground-truth"))
|
||||
if not os.path.exists(os.path.join(self.map_out_path, "detection-results")):
|
||||
os.makedirs(os.path.join(self.map_out_path, "detection-results"))
|
||||
print("Get map.")
|
||||
for annotation_line in tqdm(self.val_lines):
|
||||
line = annotation_line.split()
|
||||
image_id = os.path.basename(line[0]).split('.')[0]
|
||||
#------------------------------#
|
||||
# 读取图像并转换成RGB图像
|
||||
#------------------------------#
|
||||
image = Image.open(line[0])
|
||||
#------------------------------#
|
||||
# 获得预测框
|
||||
#------------------------------#
|
||||
gt_boxes = np.array([np.array(list(map(int,box.split(',')))) for box in line[1:]])
|
||||
#------------------------------#
|
||||
# 获得预测txt
|
||||
#------------------------------#
|
||||
self.get_map_txt(image_id, image, self.class_names, self.map_out_path)
|
||||
|
||||
#------------------------------#
|
||||
# 获得真实框txt
|
||||
#------------------------------#
|
||||
with open(os.path.join(self.map_out_path, "ground-truth/"+image_id+".txt"), "w") as new_f:
|
||||
for box in gt_boxes:
|
||||
left, top, right, bottom, obj = box
|
||||
obj_name = self.class_names[obj]
|
||||
new_f.write("%s %s %s %s %s\n" % (obj_name, left, top, right, bottom))
|
||||
|
||||
print("Calculate Map.")
|
||||
try:
|
||||
temp_map = get_coco_map(class_names = self.class_names, path = self.map_out_path)[1]
|
||||
except:
|
||||
temp_map = get_map(self.MINOVERLAP, False, path = self.map_out_path)
|
||||
self.maps.append(temp_map)
|
||||
self.epoches.append(epoch)
|
||||
|
||||
with open(os.path.join(self.log_dir, "epoch_map.txt"), 'a') as f:
|
||||
f.write(str(temp_map))
|
||||
f.write("\n")
|
||||
|
||||
plt.figure()
|
||||
plt.plot(self.epoches, self.maps, 'red', linewidth = 2, label='train map')
|
||||
|
||||
plt.grid(True)
|
||||
plt.xlabel('Epoch')
|
||||
plt.ylabel('Map %s'%str(self.MINOVERLAP))
|
||||
plt.title('A Map Curve')
|
||||
plt.legend(loc="upper right")
|
||||
|
||||
plt.savefig(os.path.join(self.log_dir, "epoch_map.png"))
|
||||
plt.cla()
|
||||
plt.close("all")
|
||||
|
||||
print("Get map done.")
|
||||
shutil.rmtree(self.map_out_path)
|
||||
504
utils/dataloader.py
Normal file
504
utils/dataloader.py
Normal file
@@ -0,0 +1,504 @@
|
||||
from random import sample, shuffle
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
import torch
|
||||
from PIL import Image
|
||||
from torch.utils.data.dataset import Dataset
|
||||
|
||||
from utils.utils import cvtColor, preprocess_input
|
||||
|
||||
|
||||
class YoloDataset(Dataset):
|
||||
def __init__(self, annotation_lines, input_shape, num_classes, anchors, anchors_mask, epoch_length, \
|
||||
mosaic, mixup, mosaic_prob, mixup_prob, train, special_aug_ratio = 0.7):
|
||||
super(YoloDataset, self).__init__()
|
||||
self.annotation_lines = annotation_lines
|
||||
self.input_shape = input_shape
|
||||
self.num_classes = num_classes
|
||||
self.anchors = anchors
|
||||
self.anchors_mask = anchors_mask
|
||||
self.epoch_length = epoch_length
|
||||
self.mosaic = mosaic
|
||||
self.mosaic_prob = mosaic_prob
|
||||
self.mixup = mixup
|
||||
self.mixup_prob = mixup_prob
|
||||
self.train = train
|
||||
self.special_aug_ratio = special_aug_ratio
|
||||
|
||||
self.epoch_now = -1
|
||||
self.length = len(self.annotation_lines)
|
||||
|
||||
self.bbox_attrs = 5 + num_classes
|
||||
self.threshold = 4
|
||||
|
||||
def __len__(self):
|
||||
return self.length
|
||||
|
||||
def __getitem__(self, index):
|
||||
index = index % self.length
|
||||
|
||||
#---------------------------------------------------#
|
||||
# 训练时进行数据的随机增强
|
||||
# 验证时不进行数据的随机增强
|
||||
#---------------------------------------------------#
|
||||
if self.mosaic and self.rand() < self.mosaic_prob and self.epoch_now < self.epoch_length * self.special_aug_ratio:
|
||||
lines = sample(self.annotation_lines, 3)
|
||||
lines.append(self.annotation_lines[index])
|
||||
shuffle(lines)
|
||||
image, box = self.get_random_data_with_Mosaic(lines, self.input_shape)
|
||||
|
||||
if self.mixup and self.rand() < self.mixup_prob:
|
||||
lines = sample(self.annotation_lines, 1)
|
||||
image_2, box_2 = self.get_random_data(lines[0], self.input_shape, random = self.train)
|
||||
image, box = self.get_random_data_with_MixUp(image, box, image_2, box_2)
|
||||
else:
|
||||
image, box = self.get_random_data(self.annotation_lines[index], self.input_shape, random = self.train)
|
||||
|
||||
image = np.transpose(preprocess_input(np.array(image, dtype=np.float32)), (2, 0, 1))
|
||||
box = np.array(box, dtype=np.float32)
|
||||
if len(box) != 0:
|
||||
#---------------------------------------------------#
|
||||
# 对真实框进行归一化,调整到0-1之间
|
||||
#---------------------------------------------------#
|
||||
box[:, [0, 2]] = box[:, [0, 2]] / self.input_shape[1]
|
||||
box[:, [1, 3]] = box[:, [1, 3]] / self.input_shape[0]
|
||||
#---------------------------------------------------#
|
||||
# 序号为0、1的部分,为真实框的中心
|
||||
# 序号为2、3的部分,为真实框的宽高
|
||||
# 序号为4的部分,为真实框的种类
|
||||
#---------------------------------------------------#
|
||||
box[:, 2:4] = box[:, 2:4] - box[:, 0:2]
|
||||
box[:, 0:2] = box[:, 0:2] + box[:, 2:4] / 2
|
||||
y_true = self.get_target(box)
|
||||
return image, box, y_true
|
||||
|
||||
def rand(self, a=0, b=1):
|
||||
return np.random.rand()*(b-a) + a
|
||||
|
||||
def get_random_data(self, annotation_line, input_shape, jitter=.3, hue=.1, sat=0.7, val=0.4, random=True):
|
||||
line = annotation_line.split()
|
||||
#------------------------------#
|
||||
# 读取图像并转换成RGB图像
|
||||
#------------------------------#
|
||||
image = Image.open(line[0])
|
||||
image = cvtColor(image)
|
||||
#------------------------------#
|
||||
# 获得图像的高宽与目标高宽
|
||||
#------------------------------#
|
||||
iw, ih = image.size
|
||||
h, w = input_shape
|
||||
#------------------------------#
|
||||
# 获得预测框
|
||||
#------------------------------#
|
||||
box = np.array([np.array(list(map(int,box.split(',')))) for box in line[1:]])
|
||||
|
||||
if not random:
|
||||
scale = min(w/iw, h/ih)
|
||||
nw = int(iw*scale)
|
||||
nh = int(ih*scale)
|
||||
dx = (w-nw)//2
|
||||
dy = (h-nh)//2
|
||||
|
||||
#---------------------------------#
|
||||
# 将图像多余的部分加上灰条
|
||||
#---------------------------------#
|
||||
image = image.resize((nw,nh), Image.BICUBIC)
|
||||
new_image = Image.new('RGB', (w,h), (128,128,128))
|
||||
new_image.paste(image, (dx, dy))
|
||||
image_data = np.array(new_image, np.float32)
|
||||
|
||||
#---------------------------------#
|
||||
# 对真实框进行调整
|
||||
#---------------------------------#
|
||||
if len(box)>0:
|
||||
np.random.shuffle(box)
|
||||
box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
|
||||
box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
|
||||
box[:, 0:2][box[:, 0:2]<0] = 0
|
||||
box[:, 2][box[:, 2]>w] = w
|
||||
box[:, 3][box[:, 3]>h] = h
|
||||
box_w = box[:, 2] - box[:, 0]
|
||||
box_h = box[:, 3] - box[:, 1]
|
||||
box = box[np.logical_and(box_w>1, box_h>1)] # discard invalid box
|
||||
|
||||
return image_data, box
|
||||
|
||||
#------------------------------------------#
|
||||
# 对图像进行缩放并且进行长和宽的扭曲
|
||||
#------------------------------------------#
|
||||
new_ar = iw/ih * self.rand(1-jitter,1+jitter) / self.rand(1-jitter,1+jitter)
|
||||
scale = self.rand(.25, 2)
|
||||
if new_ar < 1:
|
||||
nh = int(scale*h)
|
||||
nw = int(nh*new_ar)
|
||||
else:
|
||||
nw = int(scale*w)
|
||||
nh = int(nw/new_ar)
|
||||
image = image.resize((nw,nh), Image.BICUBIC)
|
||||
|
||||
#------------------------------------------#
|
||||
# 将图像多余的部分加上灰条
|
||||
#------------------------------------------#
|
||||
dx = int(self.rand(0, w-nw))
|
||||
dy = int(self.rand(0, h-nh))
|
||||
new_image = Image.new('RGB', (w,h), (128,128,128))
|
||||
new_image.paste(image, (dx, dy))
|
||||
image = new_image
|
||||
|
||||
#------------------------------------------#
|
||||
# 翻转图像
|
||||
#------------------------------------------#
|
||||
flip = self.rand()<.5
|
||||
if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT)
|
||||
|
||||
image_data = np.array(image, np.uint8)
|
||||
#---------------------------------#
|
||||
# 对图像进行色域变换
|
||||
# 计算色域变换的参数
|
||||
#---------------------------------#
|
||||
r = np.random.uniform(-1, 1, 3) * [hue, sat, val] + 1
|
||||
#---------------------------------#
|
||||
# 将图像转到HSV上
|
||||
#---------------------------------#
|
||||
hue, sat, val = cv2.split(cv2.cvtColor(image_data, cv2.COLOR_RGB2HSV))
|
||||
dtype = image_data.dtype
|
||||
#---------------------------------#
|
||||
# 应用变换
|
||||
#---------------------------------#
|
||||
x = np.arange(0, 256, dtype=r.dtype)
|
||||
lut_hue = ((x * r[0]) % 180).astype(dtype)
|
||||
lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
|
||||
lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
|
||||
|
||||
image_data = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))
|
||||
image_data = cv2.cvtColor(image_data, cv2.COLOR_HSV2RGB)
|
||||
|
||||
#---------------------------------#
|
||||
# 对真实框进行调整
|
||||
#---------------------------------#
|
||||
if len(box)>0:
|
||||
np.random.shuffle(box)
|
||||
box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
|
||||
box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
|
||||
if flip: box[:, [0,2]] = w - box[:, [2,0]]
|
||||
box[:, 0:2][box[:, 0:2]<0] = 0
|
||||
box[:, 2][box[:, 2]>w] = w
|
||||
box[:, 3][box[:, 3]>h] = h
|
||||
box_w = box[:, 2] - box[:, 0]
|
||||
box_h = box[:, 3] - box[:, 1]
|
||||
box = box[np.logical_and(box_w>1, box_h>1)]
|
||||
|
||||
return image_data, box
|
||||
|
||||
def merge_bboxes(self, bboxes, cutx, cuty):
|
||||
merge_bbox = []
|
||||
for i in range(len(bboxes)):
|
||||
for box in bboxes[i]:
|
||||
tmp_box = []
|
||||
x1, y1, x2, y2 = box[0], box[1], box[2], box[3]
|
||||
|
||||
if i == 0:
|
||||
if y1 > cuty or x1 > cutx:
|
||||
continue
|
||||
if y2 >= cuty and y1 <= cuty:
|
||||
y2 = cuty
|
||||
if x2 >= cutx and x1 <= cutx:
|
||||
x2 = cutx
|
||||
|
||||
if i == 1:
|
||||
if y2 < cuty or x1 > cutx:
|
||||
continue
|
||||
if y2 >= cuty and y1 <= cuty:
|
||||
y1 = cuty
|
||||
if x2 >= cutx and x1 <= cutx:
|
||||
x2 = cutx
|
||||
|
||||
if i == 2:
|
||||
if y2 < cuty or x2 < cutx:
|
||||
continue
|
||||
if y2 >= cuty and y1 <= cuty:
|
||||
y1 = cuty
|
||||
if x2 >= cutx and x1 <= cutx:
|
||||
x1 = cutx
|
||||
|
||||
if i == 3:
|
||||
if y1 > cuty or x2 < cutx:
|
||||
continue
|
||||
if y2 >= cuty and y1 <= cuty:
|
||||
y2 = cuty
|
||||
if x2 >= cutx and x1 <= cutx:
|
||||
x1 = cutx
|
||||
tmp_box.append(x1)
|
||||
tmp_box.append(y1)
|
||||
tmp_box.append(x2)
|
||||
tmp_box.append(y2)
|
||||
tmp_box.append(box[-1])
|
||||
merge_bbox.append(tmp_box)
|
||||
return merge_bbox
|
||||
|
||||
def get_random_data_with_Mosaic(self, annotation_line, input_shape, jitter=0.3, hue=.1, sat=0.7, val=0.4):
|
||||
h, w = input_shape
|
||||
min_offset_x = self.rand(0.3, 0.7)
|
||||
min_offset_y = self.rand(0.3, 0.7)
|
||||
|
||||
image_datas = []
|
||||
box_datas = []
|
||||
index = 0
|
||||
for line in annotation_line:
|
||||
#---------------------------------#
|
||||
# 每一行进行分割
|
||||
#---------------------------------#
|
||||
line_content = line.split()
|
||||
#---------------------------------#
|
||||
# 打开图片
|
||||
#---------------------------------#
|
||||
image = Image.open(line_content[0])
|
||||
image = cvtColor(image)
|
||||
|
||||
#---------------------------------#
|
||||
# 图片的大小
|
||||
#---------------------------------#
|
||||
iw, ih = image.size
|
||||
#---------------------------------#
|
||||
# 保存框的位置
|
||||
#---------------------------------#
|
||||
box = np.array([np.array(list(map(int,box.split(',')))) for box in line_content[1:]])
|
||||
|
||||
#---------------------------------#
|
||||
# 是否翻转图片
|
||||
#---------------------------------#
|
||||
flip = self.rand()<.5
|
||||
if flip and len(box)>0:
|
||||
image = image.transpose(Image.FLIP_LEFT_RIGHT)
|
||||
box[:, [0,2]] = iw - box[:, [2,0]]
|
||||
|
||||
#------------------------------------------#
|
||||
# 对图像进行缩放并且进行长和宽的扭曲
|
||||
#------------------------------------------#
|
||||
new_ar = iw/ih * self.rand(1-jitter,1+jitter) / self.rand(1-jitter,1+jitter)
|
||||
scale = self.rand(.4, 1)
|
||||
if new_ar < 1:
|
||||
nh = int(scale*h)
|
||||
nw = int(nh*new_ar)
|
||||
else:
|
||||
nw = int(scale*w)
|
||||
nh = int(nw/new_ar)
|
||||
image = image.resize((nw, nh), Image.BICUBIC)
|
||||
|
||||
#-----------------------------------------------#
|
||||
# 将图片进行放置,分别对应四张分割图片的位置
|
||||
#-----------------------------------------------#
|
||||
if index == 0:
|
||||
dx = int(w*min_offset_x) - nw
|
||||
dy = int(h*min_offset_y) - nh
|
||||
elif index == 1:
|
||||
dx = int(w*min_offset_x) - nw
|
||||
dy = int(h*min_offset_y)
|
||||
elif index == 2:
|
||||
dx = int(w*min_offset_x)
|
||||
dy = int(h*min_offset_y)
|
||||
elif index == 3:
|
||||
dx = int(w*min_offset_x)
|
||||
dy = int(h*min_offset_y) - nh
|
||||
|
||||
new_image = Image.new('RGB', (w,h), (128,128,128))
|
||||
new_image.paste(image, (dx, dy))
|
||||
image_data = np.array(new_image)
|
||||
|
||||
index = index + 1
|
||||
box_data = []
|
||||
#---------------------------------#
|
||||
# 对box进行重新处理
|
||||
#---------------------------------#
|
||||
if len(box)>0:
|
||||
np.random.shuffle(box)
|
||||
box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
|
||||
box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
|
||||
box[:, 0:2][box[:, 0:2]<0] = 0
|
||||
box[:, 2][box[:, 2]>w] = w
|
||||
box[:, 3][box[:, 3]>h] = h
|
||||
box_w = box[:, 2] - box[:, 0]
|
||||
box_h = box[:, 3] - box[:, 1]
|
||||
box = box[np.logical_and(box_w>1, box_h>1)]
|
||||
box_data = np.zeros((len(box),5))
|
||||
box_data[:len(box)] = box
|
||||
|
||||
image_datas.append(image_data)
|
||||
box_datas.append(box_data)
|
||||
|
||||
#---------------------------------#
|
||||
# 将图片分割,放在一起
|
||||
#---------------------------------#
|
||||
cutx = int(w * min_offset_x)
|
||||
cuty = int(h * min_offset_y)
|
||||
|
||||
new_image = np.zeros([h, w, 3])
|
||||
new_image[:cuty, :cutx, :] = image_datas[0][:cuty, :cutx, :]
|
||||
new_image[cuty:, :cutx, :] = image_datas[1][cuty:, :cutx, :]
|
||||
new_image[cuty:, cutx:, :] = image_datas[2][cuty:, cutx:, :]
|
||||
new_image[:cuty, cutx:, :] = image_datas[3][:cuty, cutx:, :]
|
||||
|
||||
new_image = np.array(new_image, np.uint8)
|
||||
#---------------------------------#
|
||||
# 对图像进行色域变换
|
||||
# 计算色域变换的参数
|
||||
#---------------------------------#
|
||||
r = np.random.uniform(-1, 1, 3) * [hue, sat, val] + 1
|
||||
#---------------------------------#
|
||||
# 将图像转到HSV上
|
||||
#---------------------------------#
|
||||
hue, sat, val = cv2.split(cv2.cvtColor(new_image, cv2.COLOR_RGB2HSV))
|
||||
dtype = new_image.dtype
|
||||
#---------------------------------#
|
||||
# 应用变换
|
||||
#---------------------------------#
|
||||
x = np.arange(0, 256, dtype=r.dtype)
|
||||
lut_hue = ((x * r[0]) % 180).astype(dtype)
|
||||
lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
|
||||
lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
|
||||
|
||||
new_image = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))
|
||||
new_image = cv2.cvtColor(new_image, cv2.COLOR_HSV2RGB)
|
||||
|
||||
#---------------------------------#
|
||||
# 对框进行进一步的处理
|
||||
#---------------------------------#
|
||||
new_boxes = self.merge_bboxes(box_datas, cutx, cuty)
|
||||
|
||||
return new_image, new_boxes
|
||||
|
||||
def get_random_data_with_MixUp(self, image_1, box_1, image_2, box_2):
|
||||
new_image = np.array(image_1, np.float32) * 0.5 + np.array(image_2, np.float32) * 0.5
|
||||
if len(box_1) == 0:
|
||||
new_boxes = box_2
|
||||
elif len(box_2) == 0:
|
||||
new_boxes = box_1
|
||||
else:
|
||||
new_boxes = np.concatenate([box_1, box_2], axis=0)
|
||||
return new_image, new_boxes
|
||||
|
||||
def get_near_points(self, x, y, i, j):
|
||||
sub_x = x - i
|
||||
sub_y = y - j
|
||||
if sub_x > 0.5 and sub_y > 0.5:
|
||||
return [[0, 0], [1, 0], [0, 1]]
|
||||
elif sub_x < 0.5 and sub_y > 0.5:
|
||||
return [[0, 0], [-1, 0], [0, 1]]
|
||||
elif sub_x < 0.5 and sub_y < 0.5:
|
||||
return [[0, 0], [-1, 0], [0, -1]]
|
||||
else:
|
||||
return [[0, 0], [1, 0], [0, -1]]
|
||||
|
||||
def get_target(self, targets):
|
||||
#-----------------------------------------------------------#
|
||||
# 一共有三个特征层数
|
||||
#-----------------------------------------------------------#
|
||||
num_layers = len(self.anchors_mask)
|
||||
|
||||
input_shape = np.array(self.input_shape, dtype='int32')
|
||||
grid_shapes = [input_shape // {0:32, 1:16, 2:8, 3:4}[l] for l in range(num_layers)]
|
||||
y_true = [np.zeros((len(self.anchors_mask[l]), grid_shapes[l][0], grid_shapes[l][1], self.bbox_attrs), dtype='float32') for l in range(num_layers)]
|
||||
box_best_ratio = [np.zeros((len(self.anchors_mask[l]), grid_shapes[l][0], grid_shapes[l][1]), dtype='float32') for l in range(num_layers)]
|
||||
|
||||
if len(targets) == 0:
|
||||
return y_true
|
||||
|
||||
for l in range(num_layers):
|
||||
in_h, in_w = grid_shapes[l]
|
||||
anchors = np.array(self.anchors) / {0:32, 1:16, 2:8, 3:4}[l]
|
||||
|
||||
batch_target = np.zeros_like(targets)
|
||||
#-------------------------------------------------------#
|
||||
# 计算出正样本在特征层上的中心点
|
||||
#-------------------------------------------------------#
|
||||
batch_target[:, [0,2]] = targets[:, [0,2]] * in_w
|
||||
batch_target[:, [1,3]] = targets[:, [1,3]] * in_h
|
||||
batch_target[:, 4] = targets[:, 4]
|
||||
#-------------------------------------------------------#
|
||||
# wh : num_true_box, 2
|
||||
# np.expand_dims(wh, 1) : num_true_box, 1, 2
|
||||
# anchors : 9, 2
|
||||
# np.expand_dims(anchors, 0) : 1, 9, 2
|
||||
#
|
||||
# ratios_of_gt_anchors代表每一个真实框和每一个先验框的宽高的比值
|
||||
# ratios_of_gt_anchors : num_true_box, 9, 2
|
||||
# ratios_of_anchors_gt代表每一个先验框和每一个真实框的宽高的比值
|
||||
# ratios_of_anchors_gt : num_true_box, 9, 2
|
||||
#
|
||||
# ratios : num_true_box, 9, 4
|
||||
# max_ratios代表每一个真实框和每一个先验框的宽高的比值的最大值
|
||||
# max_ratios : num_true_box, 9
|
||||
#-------------------------------------------------------#
|
||||
ratios_of_gt_anchors = np.expand_dims(batch_target[:, 2:4], 1) / np.expand_dims(anchors, 0)
|
||||
ratios_of_anchors_gt = np.expand_dims(anchors, 0) / np.expand_dims(batch_target[:, 2:4], 1)
|
||||
ratios = np.concatenate([ratios_of_gt_anchors, ratios_of_anchors_gt], axis = -1)
|
||||
max_ratios = np.max(ratios, axis = -1)
|
||||
|
||||
for t, ratio in enumerate(max_ratios):
|
||||
#-------------------------------------------------------#
|
||||
# ratio : 9
|
||||
#-------------------------------------------------------#
|
||||
over_threshold = ratio < self.threshold
|
||||
over_threshold[np.argmin(ratio)] = True
|
||||
for k, mask in enumerate(self.anchors_mask[l]):
|
||||
if not over_threshold[mask]:
|
||||
continue
|
||||
#----------------------------------------#
|
||||
# 获得真实框属于哪个网格点
|
||||
# x 1.25 => 1
|
||||
# y 3.75 => 3
|
||||
#----------------------------------------#
|
||||
i = int(np.floor(batch_target[t, 0]))
|
||||
j = int(np.floor(batch_target[t, 1]))
|
||||
|
||||
offsets = self.get_near_points(batch_target[t, 0], batch_target[t, 1], i, j)
|
||||
for offset in offsets:
|
||||
local_i = i + offset[0]
|
||||
local_j = j + offset[1]
|
||||
|
||||
if local_i >= in_w or local_i < 0 or local_j >= in_h or local_j < 0:
|
||||
continue
|
||||
|
||||
if box_best_ratio[l][k, local_j, local_i] != 0:
|
||||
if box_best_ratio[l][k, local_j, local_i] > ratio[mask]:
|
||||
y_true[l][k, local_j, local_i, :] = 0
|
||||
else:
|
||||
continue
|
||||
|
||||
#----------------------------------------#
|
||||
# 取出真实框的种类
|
||||
#----------------------------------------#
|
||||
c = int(batch_target[t, 4])
|
||||
|
||||
#----------------------------------------#
|
||||
# tx、ty代表中心调整参数的真实值
|
||||
#----------------------------------------#
|
||||
y_true[l][k, local_j, local_i, 0] = batch_target[t, 0]
|
||||
y_true[l][k, local_j, local_i, 1] = batch_target[t, 1]
|
||||
y_true[l][k, local_j, local_i, 2] = batch_target[t, 2]
|
||||
y_true[l][k, local_j, local_i, 3] = batch_target[t, 3]
|
||||
y_true[l][k, local_j, local_i, 4] = 1
|
||||
y_true[l][k, local_j, local_i, c + 5] = 1
|
||||
#----------------------------------------#
|
||||
# 获得当前先验框最好的比例
|
||||
#----------------------------------------#
|
||||
box_best_ratio[l][k, local_j, local_i] = ratio[mask]
|
||||
|
||||
return y_true
|
||||
|
||||
# DataLoader中collate_fn使用
|
||||
def yolo_dataset_collate(batch):
|
||||
images = []
|
||||
bboxes = []
|
||||
y_trues = [[] for _ in batch[0][2]]
|
||||
for img, box, y_true in batch:
|
||||
images.append(img)
|
||||
bboxes.append(box)
|
||||
for i, sub_y_true in enumerate(y_true):
|
||||
y_trues[i].append(sub_y_true)
|
||||
|
||||
images = torch.from_numpy(np.array(images)).type(torch.FloatTensor)
|
||||
bboxes = [torch.from_numpy(ann).type(torch.FloatTensor) for ann in bboxes]
|
||||
y_trues = [torch.from_numpy(np.array(ann, np.float32)).type(torch.FloatTensor) for ann in y_trues]
|
||||
return images, bboxes,y_trues
|
||||
117
utils/utils.py
Normal file
117
utils/utils.py
Normal file
@@ -0,0 +1,117 @@
|
||||
import random
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from PIL import Image
|
||||
|
||||
|
||||
#---------------------------------------------------------#
|
||||
# 将图像转换成RGB图像,防止灰度图在预测时报错。
|
||||
# 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB
|
||||
#---------------------------------------------------------#
|
||||
def cvtColor(image):
|
||||
if len(np.shape(image)) == 3 and np.shape(image)[2] == 3:
|
||||
return image
|
||||
else:
|
||||
image = image.convert('RGB')
|
||||
return image
|
||||
|
||||
#---------------------------------------------------#
|
||||
# 对输入图像进行resize
|
||||
#---------------------------------------------------#
|
||||
def resize_image(image, size, letterbox_image):
|
||||
iw, ih = image.size
|
||||
w, h = size
|
||||
if letterbox_image:
|
||||
scale = min(w/iw, h/ih)
|
||||
nw = int(iw*scale)
|
||||
nh = int(ih*scale)
|
||||
|
||||
image = image.resize((nw,nh), Image.BICUBIC)
|
||||
new_image = Image.new('RGB', size, (128,128,128))
|
||||
new_image.paste(image, ((w-nw)//2, (h-nh)//2))
|
||||
else:
|
||||
new_image = image.resize((w, h), Image.BICUBIC)
|
||||
return new_image
|
||||
|
||||
#---------------------------------------------------#
|
||||
# 获得类
|
||||
#---------------------------------------------------#
|
||||
def get_classes(classes_path):
|
||||
with open(classes_path, encoding='utf-8') as f:
|
||||
class_names = f.readlines()
|
||||
class_names = [c.strip() for c in class_names]
|
||||
return class_names, len(class_names)
|
||||
|
||||
#---------------------------------------------------#
|
||||
# 获得先验框
|
||||
#---------------------------------------------------#
|
||||
def get_anchors(anchors_path):
|
||||
'''loads the anchors from a file'''
|
||||
with open(anchors_path, encoding='utf-8') as f:
|
||||
anchors = f.readline()
|
||||
anchors = [float(x) for x in anchors.split(',')]
|
||||
anchors = np.array(anchors).reshape(-1, 2)
|
||||
return anchors, len(anchors)
|
||||
|
||||
#---------------------------------------------------#
|
||||
# 获得学习率
|
||||
#---------------------------------------------------#
|
||||
def get_lr(optimizer):
|
||||
for param_group in optimizer.param_groups:
|
||||
return param_group['lr']
|
||||
|
||||
#---------------------------------------------------#
|
||||
# 设置种子
|
||||
#---------------------------------------------------#
|
||||
def seed_everything(seed=11):
|
||||
random.seed(seed)
|
||||
np.random.seed(seed)
|
||||
torch.manual_seed(seed)
|
||||
torch.cuda.manual_seed(seed)
|
||||
torch.cuda.manual_seed_all(seed)
|
||||
torch.backends.cudnn.deterministic = True
|
||||
torch.backends.cudnn.benchmark = False
|
||||
|
||||
#---------------------------------------------------#
|
||||
# 设置Dataloader的种子
|
||||
#---------------------------------------------------#
|
||||
def worker_init_fn(worker_id, rank, seed):
|
||||
worker_seed = rank + seed
|
||||
random.seed(worker_seed)
|
||||
np.random.seed(worker_seed)
|
||||
torch.manual_seed(worker_seed)
|
||||
|
||||
def preprocess_input(image):
|
||||
image /= 255.0
|
||||
return image
|
||||
|
||||
def show_config(**kwargs):
|
||||
print('Configurations:')
|
||||
print('-' * 70)
|
||||
print('|%25s | %40s|' % ('keys', 'values'))
|
||||
print('-' * 70)
|
||||
for key, value in kwargs.items():
|
||||
print('|%25s | %40s|' % (str(key), str(value)))
|
||||
print('-' * 70)
|
||||
|
||||
def download_weights(backbone, phi, model_dir="./model_data"):
|
||||
import os
|
||||
from torch.hub import load_state_dict_from_url
|
||||
if backbone == "cspdarknet":
|
||||
backbone = backbone + "_" + phi
|
||||
|
||||
download_urls = {
|
||||
"convnext_tiny" : "https://github.com/bubbliiiing/yolov5-pytorch/releases/download/v1.0/convnext_tiny_1k_224_ema_no_jit.pth",
|
||||
"convnext_small" : "https://github.com/bubbliiiing/yolov5-pytorch/releases/download/v1.0/convnext_small_1k_224_ema_no_jit.pth",
|
||||
"cspdarknet_s" : 'https://github.com/bubbliiiing/yolov5-pytorch/releases/download/v1.0/cspdarknet_s_backbone.pth',
|
||||
'cspdarknet_m' : 'https://github.com/bubbliiiing/yolov5-pytorch/releases/download/v1.0/cspdarknet_m_backbone.pth',
|
||||
'cspdarknet_l' : 'https://github.com/bubbliiiing/yolov5-pytorch/releases/download/v1.0/cspdarknet_l_backbone.pth',
|
||||
'cspdarknet_x' : 'https://github.com/bubbliiiing/yolov5-pytorch/releases/download/v1.0/cspdarknet_x_backbone.pth',
|
||||
'swin_transfomer_tiny' : "https://github.com/bubbliiiing/yolov5-pytorch/releases/download/v1.0/swin_tiny_patch4_window7.pth",
|
||||
}
|
||||
url = download_urls[backbone]
|
||||
|
||||
if not os.path.exists(model_dir):
|
||||
os.makedirs(model_dir)
|
||||
load_state_dict_from_url(url, model_dir)
|
||||
637
utils/utils_bbox.py
Normal file
637
utils/utils_bbox.py
Normal file
@@ -0,0 +1,637 @@
|
||||
import numpy as np
|
||||
import torch
|
||||
from torchvision.ops import nms
|
||||
|
||||
|
||||
class DecodeBox():
|
||||
def __init__(self, anchors, num_classes, input_shape, anchors_mask = [[6,7,8], [3,4,5], [0,1,2]]):
|
||||
super(DecodeBox, self).__init__()
|
||||
self.anchors = anchors
|
||||
self.num_classes = num_classes
|
||||
self.bbox_attrs = 5 + num_classes
|
||||
self.input_shape = input_shape
|
||||
#-----------------------------------------------------------#
|
||||
# 20x20的特征层对应的anchor是[116,90],[156,198],[373,326]
|
||||
# 40x40的特征层对应的anchor是[30,61],[62,45],[59,119]
|
||||
# 80x80的特征层对应的anchor是[10,13],[16,30],[33,23]
|
||||
#-----------------------------------------------------------#
|
||||
self.anchors_mask = anchors_mask
|
||||
|
||||
def decode_box(self, inputs):
|
||||
outputs = []
|
||||
for i, input in enumerate(inputs):
|
||||
#-----------------------------------------------#
|
||||
# 输入的input一共有三个,他们的shape分别是
|
||||
# batch_size = 1
|
||||
# batch_size, 3 * (4 + 1 + 80), 20, 20
|
||||
# batch_size, 255, 40, 40
|
||||
# batch_size, 255, 80, 80
|
||||
#-----------------------------------------------#
|
||||
batch_size = input.size(0)
|
||||
input_height = input.size(2)
|
||||
input_width = input.size(3)
|
||||
|
||||
#-----------------------------------------------#
|
||||
# 输入为640x640时
|
||||
# stride_h = stride_w = 32、16、8
|
||||
#-----------------------------------------------#
|
||||
stride_h = self.input_shape[0] / input_height
|
||||
stride_w = self.input_shape[1] / input_width
|
||||
#-------------------------------------------------#
|
||||
# 此时获得的scaled_anchors大小是相对于特征层的
|
||||
#-------------------------------------------------#
|
||||
scaled_anchors = [(anchor_width / stride_w, anchor_height / stride_h) for anchor_width, anchor_height in self.anchors[self.anchors_mask[i]]]
|
||||
|
||||
#-----------------------------------------------#
|
||||
# 输入的input一共有三个,他们的shape分别是
|
||||
# batch_size, 3, 20, 20, 85
|
||||
# batch_size, 3, 40, 40, 85
|
||||
# batch_size, 3, 80, 80, 85
|
||||
#-----------------------------------------------#
|
||||
prediction = input.view(batch_size, len(self.anchors_mask[i]),
|
||||
self.bbox_attrs, input_height, input_width).permute(0, 1, 3, 4, 2).contiguous()
|
||||
|
||||
#-----------------------------------------------#
|
||||
# 先验框的中心位置的调整参数
|
||||
#-----------------------------------------------#
|
||||
x = torch.sigmoid(prediction[..., 0])
|
||||
y = torch.sigmoid(prediction[..., 1])
|
||||
#-----------------------------------------------#
|
||||
# 先验框的宽高调整参数
|
||||
#-----------------------------------------------#
|
||||
w = torch.sigmoid(prediction[..., 2])
|
||||
h = torch.sigmoid(prediction[..., 3])
|
||||
#-----------------------------------------------#
|
||||
# 获得置信度,是否有物体
|
||||
#-----------------------------------------------#
|
||||
conf = torch.sigmoid(prediction[..., 4])
|
||||
#-----------------------------------------------#
|
||||
# 种类置信度
|
||||
#-----------------------------------------------#
|
||||
pred_cls = torch.sigmoid(prediction[..., 5:])
|
||||
|
||||
FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
|
||||
LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
|
||||
|
||||
#----------------------------------------------------------#
|
||||
# 生成网格,先验框中心,网格左上角
|
||||
# batch_size,3,20,20
|
||||
#----------------------------------------------------------#
|
||||
grid_x = torch.linspace(0, input_width - 1, input_width).repeat(input_height, 1).repeat(
|
||||
batch_size * len(self.anchors_mask[i]), 1, 1).view(x.shape).type(FloatTensor)
|
||||
grid_y = torch.linspace(0, input_height - 1, input_height).repeat(input_width, 1).t().repeat(
|
||||
batch_size * len(self.anchors_mask[i]), 1, 1).view(y.shape).type(FloatTensor)
|
||||
|
||||
#----------------------------------------------------------#
|
||||
# 按照网格格式生成先验框的宽高
|
||||
# batch_size,3,20,20
|
||||
#----------------------------------------------------------#
|
||||
anchor_w = FloatTensor(scaled_anchors).index_select(1, LongTensor([0]))
|
||||
anchor_h = FloatTensor(scaled_anchors).index_select(1, LongTensor([1]))
|
||||
anchor_w = anchor_w.repeat(batch_size, 1).repeat(1, 1, input_height * input_width).view(w.shape)
|
||||
anchor_h = anchor_h.repeat(batch_size, 1).repeat(1, 1, input_height * input_width).view(h.shape)
|
||||
|
||||
#----------------------------------------------------------#
|
||||
# 利用预测结果对先验框进行调整
|
||||
# 首先调整先验框的中心,从先验框中心向右下角偏移
|
||||
# 再调整先验框的宽高。
|
||||
# x 0 ~ 1 => 0 ~ 2 => -0.5, 1.5 => 负责一定范围的目标的预测
|
||||
# y 0 ~ 1 => 0 ~ 2 => -0.5, 1.5 => 负责一定范围的目标的预测
|
||||
# w 0 ~ 1 => 0 ~ 2 => 0 ~ 4 => 先验框的宽高调节范围为0~4倍
|
||||
# h 0 ~ 1 => 0 ~ 2 => 0 ~ 4 => 先验框的宽高调节范围为0~4倍
|
||||
#----------------------------------------------------------#
|
||||
pred_boxes = FloatTensor(prediction[..., :4].shape)
|
||||
pred_boxes[..., 0] = x.data * 2. - 0.5 + grid_x
|
||||
pred_boxes[..., 1] = y.data * 2. - 0.5 + grid_y
|
||||
pred_boxes[..., 2] = (w.data * 2) ** 2 * anchor_w
|
||||
pred_boxes[..., 3] = (h.data * 2) ** 2 * anchor_h
|
||||
|
||||
#----------------------------------------------------------#
|
||||
# 将输出结果归一化成小数的形式
|
||||
#----------------------------------------------------------#
|
||||
_scale = torch.Tensor([input_width, input_height, input_width, input_height]).type(FloatTensor)
|
||||
output = torch.cat((pred_boxes.view(batch_size, -1, 4) / _scale,
|
||||
conf.view(batch_size, -1, 1), pred_cls.view(batch_size, -1, self.num_classes)), -1)
|
||||
outputs.append(output.data)
|
||||
return outputs
|
||||
|
||||
def yolo_correct_boxes(self, box_xy, box_wh, input_shape, image_shape, letterbox_image):
|
||||
#-----------------------------------------------------------------#
|
||||
# 把y轴放前面是因为方便预测框和图像的宽高进行相乘
|
||||
#-----------------------------------------------------------------#
|
||||
box_yx = box_xy[..., ::-1]
|
||||
box_hw = box_wh[..., ::-1]
|
||||
input_shape = np.array(input_shape)
|
||||
image_shape = np.array(image_shape)
|
||||
|
||||
if letterbox_image:
|
||||
#-----------------------------------------------------------------#
|
||||
# 这里求出来的offset是图像有效区域相对于图像左上角的偏移情况
|
||||
# new_shape指的是宽高缩放情况
|
||||
#-----------------------------------------------------------------#
|
||||
new_shape = np.round(image_shape * np.min(input_shape/image_shape))
|
||||
offset = (input_shape - new_shape)/2./input_shape
|
||||
scale = input_shape/new_shape
|
||||
|
||||
box_yx = (box_yx - offset) * scale
|
||||
box_hw *= scale
|
||||
|
||||
box_mins = box_yx - (box_hw / 2.)
|
||||
box_maxes = box_yx + (box_hw / 2.)
|
||||
boxes = np.concatenate([box_mins[..., 0:1], box_mins[..., 1:2], box_maxes[..., 0:1], box_maxes[..., 1:2]], axis=-1)
|
||||
boxes *= np.concatenate([image_shape, image_shape], axis=-1)
|
||||
return boxes
|
||||
|
||||
def non_max_suppression(self, prediction, num_classes, input_shape, image_shape, letterbox_image, conf_thres=0.5, nms_thres=0.4):
|
||||
#----------------------------------------------------------#
|
||||
# 将预测结果的格式转换成左上角右下角的格式。
|
||||
# prediction [batch_size, num_anchors, 85]
|
||||
#----------------------------------------------------------#
|
||||
box_corner = prediction.new(prediction.shape)
|
||||
box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2
|
||||
box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2
|
||||
box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2
|
||||
box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2
|
||||
prediction[:, :, :4] = box_corner[:, :, :4]
|
||||
|
||||
output = [None for _ in range(len(prediction))]
|
||||
for i, image_pred in enumerate(prediction):
|
||||
#----------------------------------------------------------#
|
||||
# 对种类预测部分取max。
|
||||
# class_conf [num_anchors, 1] 种类置信度
|
||||
# class_pred [num_anchors, 1] 种类
|
||||
#----------------------------------------------------------#
|
||||
class_conf, class_pred = torch.max(image_pred[:, 5:5 + num_classes], 1, keepdim=True)
|
||||
|
||||
#----------------------------------------------------------#
|
||||
# 利用置信度进行第一轮筛选
|
||||
#----------------------------------------------------------#
|
||||
conf_mask = (image_pred[:, 4] * class_conf[:, 0] >= conf_thres).squeeze()
|
||||
|
||||
#----------------------------------------------------------#
|
||||
# 根据置信度进行预测结果的筛选
|
||||
#----------------------------------------------------------#
|
||||
image_pred = image_pred[conf_mask]
|
||||
class_conf = class_conf[conf_mask]
|
||||
class_pred = class_pred[conf_mask]
|
||||
if not image_pred.size(0):
|
||||
continue
|
||||
#-------------------------------------------------------------------------#
|
||||
# detections [num_anchors, 7]
|
||||
# 7的内容为:x1, y1, x2, y2, obj_conf, class_conf, class_pred
|
||||
#-------------------------------------------------------------------------#
|
||||
detections = torch.cat((image_pred[:, :5], class_conf.float(), class_pred.float()), 1)
|
||||
|
||||
#------------------------------------------#
|
||||
# 获得预测结果中包含的所有种类
|
||||
#------------------------------------------#
|
||||
unique_labels = detections[:, -1].cpu().unique()
|
||||
|
||||
if prediction.is_cuda:
|
||||
unique_labels = unique_labels.cuda()
|
||||
detections = detections.cuda()
|
||||
|
||||
for c in unique_labels:
|
||||
#------------------------------------------#
|
||||
# 获得某一类得分筛选后全部的预测结果
|
||||
#------------------------------------------#
|
||||
detections_class = detections[detections[:, -1] == c]
|
||||
|
||||
#------------------------------------------#
|
||||
# 使用官方自带的非极大抑制会速度更快一些!
|
||||
# 筛选出一定区域内,属于同一种类得分最大的框
|
||||
#------------------------------------------#
|
||||
keep = nms(
|
||||
detections_class[:, :4],
|
||||
detections_class[:, 4] * detections_class[:, 5],
|
||||
nms_thres
|
||||
)
|
||||
max_detections = detections_class[keep]
|
||||
|
||||
# # 按照存在物体的置信度排序
|
||||
# _, conf_sort_index = torch.sort(detections_class[:, 4]*detections_class[:, 5], descending=True)
|
||||
# detections_class = detections_class[conf_sort_index]
|
||||
# # 进行非极大抑制
|
||||
# max_detections = []
|
||||
# while detections_class.size(0):
|
||||
# # 取出这一类置信度最高的,一步一步往下判断,判断重合程度是否大于nms_thres,如果是则去除掉
|
||||
# max_detections.append(detections_class[0].unsqueeze(0))
|
||||
# if len(detections_class) == 1:
|
||||
# break
|
||||
# ious = bbox_iou(max_detections[-1], detections_class[1:])
|
||||
# detections_class = detections_class[1:][ious < nms_thres]
|
||||
# # 堆叠
|
||||
# max_detections = torch.cat(max_detections).data
|
||||
|
||||
# Add max detections to outputs
|
||||
output[i] = max_detections if output[i] is None else torch.cat((output[i], max_detections))
|
||||
|
||||
if output[i] is not None:
|
||||
output[i] = output[i].cpu().numpy()
|
||||
box_xy, box_wh = (output[i][:, 0:2] + output[i][:, 2:4])/2, output[i][:, 2:4] - output[i][:, 0:2]
|
||||
output[i][:, :4] = self.yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape, letterbox_image)
|
||||
return output
|
||||
|
||||
|
||||
class DecodeBoxNP():
|
||||
def __init__(self, anchors, num_classes, input_shape, anchors_mask = [[6,7,8], [3,4,5], [0,1,2]]):
|
||||
super(DecodeBoxNP, self).__init__()
|
||||
self.anchors = anchors
|
||||
self.num_classes = num_classes
|
||||
self.bbox_attrs = 5 + num_classes
|
||||
self.input_shape = input_shape
|
||||
self.anchors_mask = anchors_mask
|
||||
|
||||
def sigmoid(self, x):
|
||||
return 1 / (1 + np.exp(-x))
|
||||
|
||||
def decode_box(self, inputs):
|
||||
outputs = []
|
||||
for i, input in enumerate(inputs):
|
||||
batch_size = np.shape(input)[0]
|
||||
input_height = np.shape(input)[2]
|
||||
input_width = np.shape(input)[3]
|
||||
|
||||
#-----------------------------------------------#
|
||||
# 输入为640x640时
|
||||
# stride_h = stride_w = 32、16、8
|
||||
#-----------------------------------------------#
|
||||
stride_h = self.input_shape[0] / input_height
|
||||
stride_w = self.input_shape[1] / input_width
|
||||
#-------------------------------------------------#
|
||||
# 此时获得的scaled_anchors大小是相对于特征层的
|
||||
#-------------------------------------------------#
|
||||
scaled_anchors = [(anchor_width / stride_w, anchor_height / stride_h) for anchor_width, anchor_height in self.anchors[self.anchors_mask[i]]]
|
||||
|
||||
#-----------------------------------------------#
|
||||
# 输入的input一共有三个,他们的shape分别是
|
||||
# batch_size, 3, 20, 20, 85
|
||||
# batch_size, 3, 40, 40, 85
|
||||
# batch_size, 3, 80, 80, 85
|
||||
#-----------------------------------------------#
|
||||
prediction = np.transpose(np.reshape(input, (batch_size, len(self.anchors_mask[i]), self.bbox_attrs, input_height, input_width)), (0, 1, 3, 4, 2))
|
||||
|
||||
#-----------------------------------------------#
|
||||
# 先验框的中心位置的调整参数
|
||||
#-----------------------------------------------#
|
||||
x = self.sigmoid(prediction[..., 0])
|
||||
y = self.sigmoid(prediction[..., 1])
|
||||
#-----------------------------------------------#
|
||||
# 先验框的宽高调整参数
|
||||
#-----------------------------------------------#
|
||||
w = self.sigmoid(prediction[..., 2])
|
||||
h = self.sigmoid(prediction[..., 3])
|
||||
#-----------------------------------------------#
|
||||
# 获得置信度,是否有物体
|
||||
#-----------------------------------------------#
|
||||
conf = self.sigmoid(prediction[..., 4])
|
||||
#-----------------------------------------------#
|
||||
# 种类置信度
|
||||
#-----------------------------------------------#
|
||||
pred_cls = self.sigmoid(prediction[..., 5:])
|
||||
|
||||
#----------------------------------------------------------#
|
||||
# 生成网格,先验框中心,网格左上角
|
||||
# batch_size,3,20,20
|
||||
#----------------------------------------------------------#
|
||||
grid_x = np.repeat(np.expand_dims(np.repeat(np.expand_dims(np.linspace(0, input_width - 1, input_width), 0), input_height, axis=0), 0), batch_size * len(self.anchors_mask[i]), axis=0)
|
||||
grid_x = np.reshape(grid_x, np.shape(x))
|
||||
grid_y = np.repeat(np.expand_dims(np.repeat(np.expand_dims(np.linspace(0, input_height - 1, input_height), 0), input_width, axis=0).T, 0), batch_size * len(self.anchors_mask[i]), axis=0)
|
||||
grid_y = np.reshape(grid_y, np.shape(y))
|
||||
|
||||
#----------------------------------------------------------#
|
||||
# 按照网格格式生成先验框的宽高
|
||||
# batch_size,3,20,20
|
||||
#----------------------------------------------------------#
|
||||
anchor_w = np.repeat(np.expand_dims(np.repeat(np.expand_dims(np.array(scaled_anchors)[:, 0], 0), batch_size, axis=0), -1), input_height * input_width, axis=-1)
|
||||
anchor_h = np.repeat(np.expand_dims(np.repeat(np.expand_dims(np.array(scaled_anchors)[:, 1], 0), batch_size, axis=0), -1), input_height * input_width, axis=-1)
|
||||
anchor_w = np.reshape(anchor_w, np.shape(w))
|
||||
anchor_h = np.reshape(anchor_h, np.shape(h))
|
||||
#----------------------------------------------------------#
|
||||
# 利用预测结果对先验框进行调整
|
||||
# 首先调整先验框的中心,从先验框中心向右下角偏移
|
||||
# 再调整先验框的宽高。
|
||||
# x 0 ~ 1 => 0 ~ 2 => -0.5, 1.5 => 负责一定范围的目标的预测
|
||||
# y 0 ~ 1 => 0 ~ 2 => -0.5, 1.5 => 负责一定范围的目标的预测
|
||||
# w 0 ~ 1 => 0 ~ 2 => 0 ~ 4 => 先验框的宽高调节范围为0~4倍
|
||||
# h 0 ~ 1 => 0 ~ 2 => 0 ~ 4 => 先验框的宽高调节范围为0~4倍
|
||||
#----------------------------------------------------------#
|
||||
pred_boxes = np.zeros(np.shape(prediction[..., :4]))
|
||||
pred_boxes[..., 0] = x * 2. - 0.5 + grid_x
|
||||
pred_boxes[..., 1] = y * 2. - 0.5 + grid_y
|
||||
pred_boxes[..., 2] = (w * 2) ** 2 * anchor_w
|
||||
pred_boxes[..., 3] = (h * 2) ** 2 * anchor_h
|
||||
|
||||
#----------------------------------------------------------#
|
||||
# 将输出结果归一化成小数的形式
|
||||
#----------------------------------------------------------#
|
||||
_scale = np.array([input_width, input_height, input_width, input_height])
|
||||
output = np.concatenate([np.reshape(pred_boxes, (batch_size, -1, 4)) / _scale,
|
||||
np.reshape(conf, (batch_size, -1, 1)), np.reshape(pred_cls, (batch_size, -1, self.num_classes))], -1)
|
||||
outputs.append(output)
|
||||
return outputs
|
||||
|
||||
def bbox_iou(self, box1, box2, x1y1x2y2=True):
|
||||
"""
|
||||
计算IOU
|
||||
"""
|
||||
if not x1y1x2y2:
|
||||
b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2
|
||||
b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2
|
||||
b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2
|
||||
b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2
|
||||
else:
|
||||
b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3]
|
||||
b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3]
|
||||
|
||||
inter_rect_x1 = np.maximum(b1_x1, b2_x1)
|
||||
inter_rect_y1 = np.maximum(b1_y1, b2_y1)
|
||||
inter_rect_x2 = np.minimum(b1_x2, b2_x2)
|
||||
inter_rect_y2 = np.minimum(b1_y2, b2_y2)
|
||||
|
||||
inter_area = np.maximum(inter_rect_x2 - inter_rect_x1, 0) * \
|
||||
np.maximum(inter_rect_y2 - inter_rect_y1, 0)
|
||||
|
||||
b1_area = (b1_x2 - b1_x1) * (b1_y2 - b1_y1)
|
||||
b2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1)
|
||||
|
||||
iou = inter_area / np.maximum(b1_area + b2_area - inter_area, 1e-6)
|
||||
|
||||
return iou
|
||||
|
||||
def yolo_correct_boxes(self, box_xy, box_wh, input_shape, image_shape, letterbox_image):
|
||||
#-----------------------------------------------------------------#
|
||||
# 把y轴放前面是因为方便预测框和图像的宽高进行相乘
|
||||
#-----------------------------------------------------------------#
|
||||
box_yx = box_xy[..., ::-1]
|
||||
box_hw = box_wh[..., ::-1]
|
||||
input_shape = np.array(input_shape)
|
||||
image_shape = np.array(image_shape)
|
||||
|
||||
if letterbox_image:
|
||||
#-----------------------------------------------------------------#
|
||||
# 这里求出来的offset是图像有效区域相对于图像左上角的偏移情况
|
||||
# new_shape指的是宽高缩放情况
|
||||
#-----------------------------------------------------------------#
|
||||
new_shape = np.round(image_shape * np.min(input_shape/image_shape))
|
||||
offset = (input_shape - new_shape)/2./input_shape
|
||||
scale = input_shape/new_shape
|
||||
|
||||
box_yx = (box_yx - offset) * scale
|
||||
box_hw *= scale
|
||||
|
||||
box_mins = box_yx - (box_hw / 2.)
|
||||
box_maxes = box_yx + (box_hw / 2.)
|
||||
boxes = np.concatenate([box_mins[..., 0:1], box_mins[..., 1:2], box_maxes[..., 0:1], box_maxes[..., 1:2]], axis=-1)
|
||||
boxes *= np.concatenate([image_shape, image_shape], axis=-1)
|
||||
return boxes
|
||||
|
||||
def non_max_suppression(self, prediction, num_classes, input_shape, image_shape, letterbox_image, conf_thres=0.5, nms_thres=0.4):
|
||||
#----------------------------------------------------------#
|
||||
# 将预测结果的格式转换成左上角右下角的格式。
|
||||
# prediction [batch_size, num_anchors, 85]
|
||||
#----------------------------------------------------------#
|
||||
box_corner = np.zeros_like(prediction)
|
||||
box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2
|
||||
box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2
|
||||
box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2
|
||||
box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2
|
||||
prediction[:, :, :4] = box_corner[:, :, :4]
|
||||
|
||||
output = [None for _ in range(len(prediction))]
|
||||
for i, image_pred in enumerate(prediction):
|
||||
#----------------------------------------------------------#
|
||||
# 对种类预测部分取max。
|
||||
# class_conf [num_anchors, 1] 种类置信度
|
||||
# class_pred [num_anchors, 1] 种类
|
||||
#----------------------------------------------------------#
|
||||
class_conf = np.max(image_pred[:, 5:5 + num_classes], 1, keepdims=True)
|
||||
class_pred = np.expand_dims(np.argmax(image_pred[:, 5:5 + num_classes], 1), -1)
|
||||
|
||||
#----------------------------------------------------------#
|
||||
# 利用置信度进行第一轮筛选
|
||||
#----------------------------------------------------------#
|
||||
conf_mask = np.squeeze((image_pred[:, 4] * class_conf[:, 0] >= conf_thres))
|
||||
|
||||
#----------------------------------------------------------#
|
||||
# 根据置信度进行预测结果的筛选
|
||||
#----------------------------------------------------------#
|
||||
image_pred = image_pred[conf_mask]
|
||||
class_conf = class_conf[conf_mask]
|
||||
class_pred = class_pred[conf_mask]
|
||||
if not np.shape(image_pred)[0]:
|
||||
continue
|
||||
#-------------------------------------------------------------------------#
|
||||
# detections [num_anchors, 7]
|
||||
# 7的内容为:x1, y1, x2, y2, obj_conf, class_conf, class_pred
|
||||
#-------------------------------------------------------------------------#
|
||||
detections = np.concatenate((image_pred[:, :5], class_conf, class_pred), 1)
|
||||
|
||||
#------------------------------------------#
|
||||
# 获得预测结果中包含的所有种类
|
||||
#------------------------------------------#
|
||||
unique_labels = np.unique(detections[:, -1])
|
||||
|
||||
for c in unique_labels:
|
||||
#------------------------------------------#
|
||||
# 获得某一类得分筛选后全部的预测结果
|
||||
#------------------------------------------#
|
||||
detections_class = detections[detections[:, -1] == c]
|
||||
|
||||
# 按照存在物体的置信度排序
|
||||
conf_sort_index = np.argsort(detections_class[:, 4] * detections_class[:, 5])[::-1]
|
||||
detections_class = detections_class[conf_sort_index]
|
||||
# 进行非极大抑制
|
||||
max_detections = []
|
||||
while np.shape(detections_class)[0]:
|
||||
# 取出这一类置信度最高的,一步一步往下判断,判断重合程度是否大于nms_thres,如果是则去除掉
|
||||
max_detections.append(detections_class[0:1])
|
||||
if len(detections_class) == 1:
|
||||
break
|
||||
ious = self.bbox_iou(max_detections[-1], detections_class[1:])
|
||||
detections_class = detections_class[1:][ious < nms_thres]
|
||||
# 堆叠
|
||||
max_detections = np.concatenate(max_detections, 0)
|
||||
|
||||
# Add max detections to outputs
|
||||
output[i] = max_detections if output[i] is None else np.concatenate((output[i], max_detections))
|
||||
|
||||
if output[i] is not None:
|
||||
output[i] = output[i]
|
||||
box_xy, box_wh = (output[i][:, 0:2] + output[i][:, 2:4])/2, output[i][:, 2:4] - output[i][:, 0:2]
|
||||
output[i][:, :4] = self.yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape, letterbox_image)
|
||||
return output
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
|
||||
#---------------------------------------------------#
|
||||
# 将预测值的每个特征层调成真实值
|
||||
#---------------------------------------------------#
|
||||
def get_anchors_and_decode(input, input_shape, anchors, anchors_mask, num_classes):
|
||||
#-----------------------------------------------#
|
||||
# input batch_size, 3 * (4 + 1 + num_classes), 20, 20
|
||||
#-----------------------------------------------#
|
||||
batch_size = input.size(0)
|
||||
input_height = input.size(2)
|
||||
input_width = input.size(3)
|
||||
|
||||
#-----------------------------------------------#
|
||||
# 输入为640x640时 input_shape = [640, 640] input_height = 20, input_width = 20
|
||||
# 640 / 20 = 32
|
||||
# stride_h = stride_w = 32
|
||||
#-----------------------------------------------#
|
||||
stride_h = input_shape[0] / input_height
|
||||
stride_w = input_shape[1] / input_width
|
||||
#-------------------------------------------------#
|
||||
# 此时获得的scaled_anchors大小是相对于特征层的
|
||||
# anchor_width, anchor_height / stride_h, stride_w
|
||||
#-------------------------------------------------#
|
||||
scaled_anchors = [(anchor_width / stride_w, anchor_height / stride_h) for anchor_width, anchor_height in anchors[anchors_mask[2]]]
|
||||
|
||||
#-----------------------------------------------#
|
||||
# batch_size, 3 * (4 + 1 + num_classes), 20, 20 =>
|
||||
# batch_size, 3, 5 + num_classes, 20, 20 =>
|
||||
# batch_size, 3, 20, 20, 4 + 1 + num_classes
|
||||
#-----------------------------------------------#
|
||||
prediction = input.view(batch_size, len(anchors_mask[2]),
|
||||
num_classes + 5, input_height, input_width).permute(0, 1, 3, 4, 2).contiguous()
|
||||
|
||||
#-----------------------------------------------#
|
||||
# 先验框的中心位置的调整参数
|
||||
#-----------------------------------------------#
|
||||
x = torch.sigmoid(prediction[..., 0])
|
||||
y = torch.sigmoid(prediction[..., 1])
|
||||
#-----------------------------------------------#
|
||||
# 先验框的宽高调整参数
|
||||
#-----------------------------------------------#
|
||||
w = torch.sigmoid(prediction[..., 2])
|
||||
h = torch.sigmoid(prediction[..., 3])
|
||||
#-----------------------------------------------#
|
||||
# 获得置信度,是否有物体 0 - 1
|
||||
#-----------------------------------------------#
|
||||
conf = torch.sigmoid(prediction[..., 4])
|
||||
#-----------------------------------------------#
|
||||
# 种类置信度 0 - 1
|
||||
#-----------------------------------------------#
|
||||
pred_cls = torch.sigmoid(prediction[..., 5:])
|
||||
|
||||
FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
|
||||
LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
|
||||
|
||||
#----------------------------------------------------------#
|
||||
# 生成网格,先验框中心,网格左上角
|
||||
# batch_size,3,20,20
|
||||
# range(20)
|
||||
# [
|
||||
# [0, 1, 2, 3 ……, 19],
|
||||
# [0, 1, 2, 3 ……, 19],
|
||||
# …… (20次)
|
||||
# [0, 1, 2, 3 ……, 19]
|
||||
# ] * (batch_size * 3)
|
||||
# [batch_size, 3, 20, 20]
|
||||
#
|
||||
# [
|
||||
# [0, 1, 2, 3 ……, 19],
|
||||
# [0, 1, 2, 3 ……, 19],
|
||||
# …… (20次)
|
||||
# [0, 1, 2, 3 ……, 19]
|
||||
# ].T * (batch_size * 3)
|
||||
# [batch_size, 3, 20, 20]
|
||||
#----------------------------------------------------------#
|
||||
grid_x = torch.linspace(0, input_width - 1, input_width).repeat(input_height, 1).repeat(
|
||||
batch_size * len(anchors_mask[2]), 1, 1).view(x.shape).type(FloatTensor)
|
||||
grid_y = torch.linspace(0, input_height - 1, input_height).repeat(input_width, 1).t().repeat(
|
||||
batch_size * len(anchors_mask[2]), 1, 1).view(y.shape).type(FloatTensor)
|
||||
|
||||
#----------------------------------------------------------#
|
||||
# 按照网格格式生成先验框的宽高
|
||||
# batch_size, 3, 20 * 20 => batch_size, 3, 20, 20
|
||||
# batch_size, 3, 20 * 20 => batch_size, 3, 20, 20
|
||||
#----------------------------------------------------------#
|
||||
anchor_w = FloatTensor(scaled_anchors).index_select(1, LongTensor([0]))
|
||||
anchor_h = FloatTensor(scaled_anchors).index_select(1, LongTensor([1]))
|
||||
anchor_w = anchor_w.repeat(batch_size, 1).repeat(1, 1, input_height * input_width).view(w.shape)
|
||||
anchor_h = anchor_h.repeat(batch_size, 1).repeat(1, 1, input_height * input_width).view(h.shape)
|
||||
|
||||
#----------------------------------------------------------#
|
||||
# 利用预测结果对先验框进行调整
|
||||
# 首先调整先验框的中心,从先验框中心向右下角偏移
|
||||
# 再调整先验框的宽高。
|
||||
# x 0 ~ 1 => 0 ~ 2 => -0.5 ~ 1.5 + grid_x
|
||||
# y 0 ~ 1 => 0 ~ 2 => -0.5 ~ 1.5 + grid_y
|
||||
# w 0 ~ 1 => 0 ~ 2 => 0 ~ 4 * anchor_w
|
||||
# h 0 ~ 1 => 0 ~ 2 => 0 ~ 4 * anchor_h
|
||||
#----------------------------------------------------------#
|
||||
pred_boxes = FloatTensor(prediction[..., :4].shape)
|
||||
pred_boxes[..., 0] = x.data * 2. - 0.5 + grid_x
|
||||
pred_boxes[..., 1] = y.data * 2. - 0.5 + grid_y
|
||||
pred_boxes[..., 2] = (w.data * 2) ** 2 * anchor_w
|
||||
pred_boxes[..., 3] = (h.data * 2) ** 2 * anchor_h
|
||||
|
||||
point_h = 5
|
||||
point_w = 5
|
||||
|
||||
box_xy = pred_boxes[..., 0:2].cpu().numpy() * 32
|
||||
box_wh = pred_boxes[..., 2:4].cpu().numpy() * 32
|
||||
grid_x = grid_x.cpu().numpy() * 32
|
||||
grid_y = grid_y.cpu().numpy() * 32
|
||||
anchor_w = anchor_w.cpu().numpy() * 32
|
||||
anchor_h = anchor_h.cpu().numpy() * 32
|
||||
|
||||
fig = plt.figure()
|
||||
ax = fig.add_subplot(121)
|
||||
from PIL import Image
|
||||
img = Image.open("img/street.jpg").resize([640, 640])
|
||||
plt.imshow(img, alpha=0.5)
|
||||
plt.ylim(-30, 650)
|
||||
plt.xlim(-30, 650)
|
||||
plt.scatter(grid_x, grid_y)
|
||||
plt.scatter(point_h * 32, point_w * 32, c='black')
|
||||
plt.gca().invert_yaxis()
|
||||
|
||||
anchor_left = grid_x - anchor_w / 2
|
||||
anchor_top = grid_y - anchor_h / 2
|
||||
|
||||
rect1 = plt.Rectangle([anchor_left[0, 0, point_h, point_w],anchor_top[0, 0, point_h, point_w]], \
|
||||
anchor_w[0, 0, point_h, point_w],anchor_h[0, 0, point_h, point_w],color="r",fill=False)
|
||||
rect2 = plt.Rectangle([anchor_left[0, 1, point_h, point_w],anchor_top[0, 1, point_h, point_w]], \
|
||||
anchor_w[0, 1, point_h, point_w],anchor_h[0, 1, point_h, point_w],color="r",fill=False)
|
||||
rect3 = plt.Rectangle([anchor_left[0, 2, point_h, point_w],anchor_top[0, 2, point_h, point_w]], \
|
||||
anchor_w[0, 2, point_h, point_w],anchor_h[0, 2, point_h, point_w],color="r",fill=False)
|
||||
|
||||
ax.add_patch(rect1)
|
||||
ax.add_patch(rect2)
|
||||
ax.add_patch(rect3)
|
||||
|
||||
ax = fig.add_subplot(122)
|
||||
plt.imshow(img, alpha=0.5)
|
||||
plt.ylim(-30, 650)
|
||||
plt.xlim(-30, 650)
|
||||
plt.scatter(grid_x, grid_y)
|
||||
plt.scatter(point_h * 32, point_w * 32, c='black')
|
||||
plt.scatter(box_xy[0, :, point_h, point_w, 0], box_xy[0, :, point_h, point_w, 1], c='r')
|
||||
plt.gca().invert_yaxis()
|
||||
|
||||
pre_left = box_xy[...,0] - box_wh[...,0] / 2
|
||||
pre_top = box_xy[...,1] - box_wh[...,1] / 2
|
||||
|
||||
rect1 = plt.Rectangle([pre_left[0, 0, point_h, point_w], pre_top[0, 0, point_h, point_w]],\
|
||||
box_wh[0, 0, point_h, point_w,0], box_wh[0, 0, point_h, point_w,1],color="r",fill=False)
|
||||
rect2 = plt.Rectangle([pre_left[0, 1, point_h, point_w], pre_top[0, 1, point_h, point_w]],\
|
||||
box_wh[0, 1, point_h, point_w,0], box_wh[0, 1, point_h, point_w,1],color="r",fill=False)
|
||||
rect3 = plt.Rectangle([pre_left[0, 2, point_h, point_w], pre_top[0, 2, point_h, point_w]],\
|
||||
box_wh[0, 2, point_h, point_w,0], box_wh[0, 2, point_h, point_w,1],color="r",fill=False)
|
||||
|
||||
ax.add_patch(rect1)
|
||||
ax.add_patch(rect2)
|
||||
ax.add_patch(rect3)
|
||||
|
||||
plt.show()
|
||||
#
|
||||
feat = torch.from_numpy(np.random.normal(0.2, 0.5, [4, 255, 20, 20])).float()
|
||||
anchors = np.array([[116, 90], [156, 198], [373, 326], [30,61], [62,45], [59,119], [10,13], [16,30], [33,23]])
|
||||
anchors_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
|
||||
get_anchors_and_decode(feat, [640, 640], anchors, anchors_mask, 80)
|
||||
149
utils/utils_fit.py
Normal file
149
utils/utils_fit.py
Normal file
@@ -0,0 +1,149 @@
|
||||
import os
|
||||
|
||||
import torch
|
||||
from tqdm import tqdm
|
||||
|
||||
from utils.utils import get_lr
|
||||
|
||||
def fit_one_epoch(model_train, model, ema, yolo_loss, loss_history, eval_callback, optimizer, epoch, epoch_step, epoch_step_val, gen, gen_val, Epoch, cuda, fp16, scaler, save_period, save_dir, local_rank=0):
|
||||
loss = 0
|
||||
val_loss = 0
|
||||
|
||||
if local_rank == 0:
|
||||
print('Start Train')
|
||||
pbar = tqdm(total=epoch_step,desc=f'Epoch {epoch + 1}/{Epoch}',postfix=dict,mininterval=0.3)
|
||||
model_train.train()
|
||||
for iteration, batch in enumerate(gen):
|
||||
if iteration >= epoch_step:
|
||||
break
|
||||
|
||||
images, targets, y_trues = batch[0], batch[1], batch[2]
|
||||
with torch.no_grad():
|
||||
if cuda:
|
||||
images = images.cuda(local_rank)
|
||||
targets = [ann.cuda(local_rank) for ann in targets]
|
||||
y_trues = [ann.cuda(local_rank) for ann in y_trues]
|
||||
#----------------------#
|
||||
# 清零梯度
|
||||
#----------------------#
|
||||
optimizer.zero_grad()
|
||||
if not fp16:
|
||||
#----------------------#
|
||||
# 前向传播
|
||||
#----------------------#
|
||||
outputs = model_train(images)
|
||||
|
||||
loss_value_all = 0
|
||||
#----------------------#
|
||||
# 计算损失
|
||||
#----------------------#
|
||||
for l in range(len(outputs)):
|
||||
loss_item = yolo_loss(l, outputs[l], targets, y_trues[l])
|
||||
loss_value_all += loss_item
|
||||
loss_value = loss_value_all
|
||||
|
||||
#----------------------#
|
||||
# 反向传播
|
||||
#----------------------#
|
||||
loss_value.backward()
|
||||
optimizer.step()
|
||||
else:
|
||||
from torch.cuda.amp import autocast
|
||||
with autocast():
|
||||
#----------------------#
|
||||
# 前向传播
|
||||
#----------------------#
|
||||
outputs = model_train(images)
|
||||
|
||||
loss_value_all = 0
|
||||
#----------------------#
|
||||
# 计算损失
|
||||
#----------------------#
|
||||
for l in range(len(outputs)):
|
||||
loss_item = yolo_loss(l, outputs[l], targets, y_trues[l])
|
||||
loss_value_all += loss_item
|
||||
loss_value = loss_value_all
|
||||
|
||||
#----------------------#
|
||||
# 反向传播
|
||||
#----------------------#
|
||||
scaler.scale(loss_value).backward()
|
||||
scaler.step(optimizer)
|
||||
scaler.update()
|
||||
if ema:
|
||||
ema.update(model_train)
|
||||
|
||||
loss += loss_value.item()
|
||||
|
||||
if local_rank == 0:
|
||||
pbar.set_postfix(**{'loss' : loss / (iteration + 1),
|
||||
'lr' : get_lr(optimizer)})
|
||||
pbar.update(1)
|
||||
|
||||
if local_rank == 0:
|
||||
pbar.close()
|
||||
print('Finish Train')
|
||||
print('Start Validation')
|
||||
pbar = tqdm(total=epoch_step_val, desc=f'Epoch {epoch + 1}/{Epoch}',postfix=dict,mininterval=0.3)
|
||||
|
||||
if ema:
|
||||
model_train_eval = ema.ema
|
||||
else:
|
||||
model_train_eval = model_train.eval()
|
||||
|
||||
for iteration, batch in enumerate(gen_val):
|
||||
if iteration >= epoch_step_val:
|
||||
break
|
||||
images, targets, y_trues = batch[0], batch[1], batch[2]
|
||||
with torch.no_grad():
|
||||
if cuda:
|
||||
images = images.cuda(local_rank)
|
||||
targets = [ann.cuda(local_rank) for ann in targets]
|
||||
y_trues = [ann.cuda(local_rank) for ann in y_trues]
|
||||
#----------------------#
|
||||
# 清零梯度
|
||||
#----------------------#
|
||||
optimizer.zero_grad()
|
||||
#----------------------#
|
||||
# 前向传播
|
||||
#----------------------#
|
||||
outputs = model_train_eval(images)
|
||||
|
||||
loss_value_all = 0
|
||||
#----------------------#
|
||||
# 计算损失
|
||||
#----------------------#
|
||||
for l in range(len(outputs)):
|
||||
loss_item = yolo_loss(l, outputs[l], targets, y_trues[l])
|
||||
loss_value_all += loss_item
|
||||
loss_value = loss_value_all
|
||||
|
||||
val_loss += loss_value.item()
|
||||
if local_rank == 0:
|
||||
pbar.set_postfix(**{'val_loss': val_loss / (iteration + 1)})
|
||||
pbar.update(1)
|
||||
|
||||
if local_rank == 0:
|
||||
pbar.close()
|
||||
print('Finish Validation')
|
||||
loss_history.append_loss(epoch + 1, loss / epoch_step, val_loss / epoch_step_val)
|
||||
eval_callback.on_epoch_end(epoch + 1, model_train_eval)
|
||||
print('Epoch:'+ str(epoch + 1) + '/' + str(Epoch))
|
||||
print('Total Loss: %.3f || Val Loss: %.3f ' % (loss / epoch_step, val_loss / epoch_step_val))
|
||||
|
||||
#-----------------------------------------------#
|
||||
# 保存权值
|
||||
#-----------------------------------------------#
|
||||
if ema:
|
||||
save_state_dict = ema.ema.state_dict()
|
||||
else:
|
||||
save_state_dict = model.state_dict()
|
||||
|
||||
if (epoch + 1) % save_period == 0 or epoch + 1 == Epoch:
|
||||
torch.save(save_state_dict, os.path.join(save_dir, "ep%03d-loss%.3f-val_loss%.3f.pth" % (epoch + 1, loss / epoch_step, val_loss / epoch_step_val)))
|
||||
|
||||
if len(loss_history.val_loss) <= 1 or (val_loss / epoch_step_val) <= min(loss_history.val_loss):
|
||||
print('Save best model to best_epoch_weights.pth')
|
||||
torch.save(save_state_dict, os.path.join(save_dir, "best_epoch_weights.pth"))
|
||||
|
||||
torch.save(save_state_dict, os.path.join(save_dir, "last_epoch_weights.pth"))
|
||||
923
utils/utils_map.py
Normal file
923
utils/utils_map.py
Normal file
@@ -0,0 +1,923 @@
|
||||
import glob
|
||||
import json
|
||||
import math
|
||||
import operator
|
||||
import os
|
||||
import shutil
|
||||
import sys
|
||||
try:
|
||||
from pycocotools.coco import COCO
|
||||
from pycocotools.cocoeval import COCOeval
|
||||
except:
|
||||
pass
|
||||
import cv2
|
||||
import matplotlib
|
||||
matplotlib.use('Agg')
|
||||
from matplotlib import pyplot as plt
|
||||
import numpy as np
|
||||
|
||||
'''
|
||||
0,0 ------> x (width)
|
||||
|
|
||||
| (Left,Top)
|
||||
| *_________
|
||||
| | |
|
||||
| |
|
||||
y |_________|
|
||||
(height) *
|
||||
(Right,Bottom)
|
||||
'''
|
||||
|
||||
def log_average_miss_rate(precision, fp_cumsum, num_images):
|
||||
"""
|
||||
log-average miss rate:
|
||||
Calculated by averaging miss rates at 9 evenly spaced FPPI points
|
||||
between 10e-2 and 10e0, in log-space.
|
||||
|
||||
output:
|
||||
lamr | log-average miss rate
|
||||
mr | miss rate
|
||||
fppi | false positives per image
|
||||
|
||||
references:
|
||||
[1] Dollar, Piotr, et al. "Pedestrian Detection: An Evaluation of the
|
||||
State of the Art." Pattern Analysis and Machine Intelligence, IEEE
|
||||
Transactions on 34.4 (2012): 743 - 761.
|
||||
"""
|
||||
|
||||
if precision.size == 0:
|
||||
lamr = 0
|
||||
mr = 1
|
||||
fppi = 0
|
||||
return lamr, mr, fppi
|
||||
|
||||
fppi = fp_cumsum / float(num_images)
|
||||
mr = (1 - precision)
|
||||
|
||||
fppi_tmp = np.insert(fppi, 0, -1.0)
|
||||
mr_tmp = np.insert(mr, 0, 1.0)
|
||||
|
||||
ref = np.logspace(-2.0, 0.0, num = 9)
|
||||
for i, ref_i in enumerate(ref):
|
||||
j = np.where(fppi_tmp <= ref_i)[-1][-1]
|
||||
ref[i] = mr_tmp[j]
|
||||
|
||||
lamr = math.exp(np.mean(np.log(np.maximum(1e-10, ref))))
|
||||
|
||||
return lamr, mr, fppi
|
||||
|
||||
"""
|
||||
throw error and exit
|
||||
"""
|
||||
def error(msg):
|
||||
print(msg)
|
||||
sys.exit(0)
|
||||
|
||||
"""
|
||||
check if the number is a float between 0.0 and 1.0
|
||||
"""
|
||||
def is_float_between_0_and_1(value):
|
||||
try:
|
||||
val = float(value)
|
||||
if val > 0.0 and val < 1.0:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
"""
|
||||
Calculate the AP given the recall and precision array
|
||||
1st) We compute a version of the measured precision/recall curve with
|
||||
precision monotonically decreasing
|
||||
2nd) We compute the AP as the area under this curve by numerical integration.
|
||||
"""
|
||||
def voc_ap(rec, prec):
|
||||
"""
|
||||
--- Official matlab code VOC2012---
|
||||
mrec=[0 ; rec ; 1];
|
||||
mpre=[0 ; prec ; 0];
|
||||
for i=numel(mpre)-1:-1:1
|
||||
mpre(i)=max(mpre(i),mpre(i+1));
|
||||
end
|
||||
i=find(mrec(2:end)~=mrec(1:end-1))+1;
|
||||
ap=sum((mrec(i)-mrec(i-1)).*mpre(i));
|
||||
"""
|
||||
rec.insert(0, 0.0) # insert 0.0 at begining of list
|
||||
rec.append(1.0) # insert 1.0 at end of list
|
||||
mrec = rec[:]
|
||||
prec.insert(0, 0.0) # insert 0.0 at begining of list
|
||||
prec.append(0.0) # insert 0.0 at end of list
|
||||
mpre = prec[:]
|
||||
"""
|
||||
This part makes the precision monotonically decreasing
|
||||
(goes from the end to the beginning)
|
||||
matlab: for i=numel(mpre)-1:-1:1
|
||||
mpre(i)=max(mpre(i),mpre(i+1));
|
||||
"""
|
||||
for i in range(len(mpre)-2, -1, -1):
|
||||
mpre[i] = max(mpre[i], mpre[i+1])
|
||||
"""
|
||||
This part creates a list of indexes where the recall changes
|
||||
matlab: i=find(mrec(2:end)~=mrec(1:end-1))+1;
|
||||
"""
|
||||
i_list = []
|
||||
for i in range(1, len(mrec)):
|
||||
if mrec[i] != mrec[i-1]:
|
||||
i_list.append(i) # if it was matlab would be i + 1
|
||||
"""
|
||||
The Average Precision (AP) is the area under the curve
|
||||
(numerical integration)
|
||||
matlab: ap=sum((mrec(i)-mrec(i-1)).*mpre(i));
|
||||
"""
|
||||
ap = 0.0
|
||||
for i in i_list:
|
||||
ap += ((mrec[i]-mrec[i-1])*mpre[i])
|
||||
return ap, mrec, mpre
|
||||
|
||||
|
||||
"""
|
||||
Convert the lines of a file to a list
|
||||
"""
|
||||
def file_lines_to_list(path):
|
||||
# open txt file lines to a list
|
||||
with open(path) as f:
|
||||
content = f.readlines()
|
||||
# remove whitespace characters like `\n` at the end of each line
|
||||
content = [x.strip() for x in content]
|
||||
return content
|
||||
|
||||
"""
|
||||
Draws text in image
|
||||
"""
|
||||
def draw_text_in_image(img, text, pos, color, line_width):
|
||||
font = cv2.FONT_HERSHEY_PLAIN
|
||||
fontScale = 1
|
||||
lineType = 1
|
||||
bottomLeftCornerOfText = pos
|
||||
cv2.putText(img, text,
|
||||
bottomLeftCornerOfText,
|
||||
font,
|
||||
fontScale,
|
||||
color,
|
||||
lineType)
|
||||
text_width, _ = cv2.getTextSize(text, font, fontScale, lineType)[0]
|
||||
return img, (line_width + text_width)
|
||||
|
||||
"""
|
||||
Plot - adjust axes
|
||||
"""
|
||||
def adjust_axes(r, t, fig, axes):
|
||||
# get text width for re-scaling
|
||||
bb = t.get_window_extent(renderer=r)
|
||||
text_width_inches = bb.width / fig.dpi
|
||||
# get axis width in inches
|
||||
current_fig_width = fig.get_figwidth()
|
||||
new_fig_width = current_fig_width + text_width_inches
|
||||
propotion = new_fig_width / current_fig_width
|
||||
# get axis limit
|
||||
x_lim = axes.get_xlim()
|
||||
axes.set_xlim([x_lim[0], x_lim[1]*propotion])
|
||||
|
||||
"""
|
||||
Draw plot using Matplotlib
|
||||
"""
|
||||
def draw_plot_func(dictionary, n_classes, window_title, plot_title, x_label, output_path, to_show, plot_color, true_p_bar):
|
||||
# sort the dictionary by decreasing value, into a list of tuples
|
||||
sorted_dic_by_value = sorted(dictionary.items(), key=operator.itemgetter(1))
|
||||
# unpacking the list of tuples into two lists
|
||||
sorted_keys, sorted_values = zip(*sorted_dic_by_value)
|
||||
#
|
||||
if true_p_bar != "":
|
||||
"""
|
||||
Special case to draw in:
|
||||
- green -> TP: True Positives (object detected and matches ground-truth)
|
||||
- red -> FP: False Positives (object detected but does not match ground-truth)
|
||||
- orange -> FN: False Negatives (object not detected but present in the ground-truth)
|
||||
"""
|
||||
fp_sorted = []
|
||||
tp_sorted = []
|
||||
for key in sorted_keys:
|
||||
fp_sorted.append(dictionary[key] - true_p_bar[key])
|
||||
tp_sorted.append(true_p_bar[key])
|
||||
plt.barh(range(n_classes), fp_sorted, align='center', color='crimson', label='False Positive')
|
||||
plt.barh(range(n_classes), tp_sorted, align='center', color='forestgreen', label='True Positive', left=fp_sorted)
|
||||
# add legend
|
||||
plt.legend(loc='lower right')
|
||||
"""
|
||||
Write number on side of bar
|
||||
"""
|
||||
fig = plt.gcf() # gcf - get current figure
|
||||
axes = plt.gca()
|
||||
r = fig.canvas.get_renderer()
|
||||
for i, val in enumerate(sorted_values):
|
||||
fp_val = fp_sorted[i]
|
||||
tp_val = tp_sorted[i]
|
||||
fp_str_val = " " + str(fp_val)
|
||||
tp_str_val = fp_str_val + " " + str(tp_val)
|
||||
# trick to paint multicolor with offset:
|
||||
# first paint everything and then repaint the first number
|
||||
t = plt.text(val, i, tp_str_val, color='forestgreen', va='center', fontweight='bold')
|
||||
plt.text(val, i, fp_str_val, color='crimson', va='center', fontweight='bold')
|
||||
if i == (len(sorted_values)-1): # largest bar
|
||||
adjust_axes(r, t, fig, axes)
|
||||
else:
|
||||
plt.barh(range(n_classes), sorted_values, color=plot_color)
|
||||
"""
|
||||
Write number on side of bar
|
||||
"""
|
||||
fig = plt.gcf() # gcf - get current figure
|
||||
axes = plt.gca()
|
||||
r = fig.canvas.get_renderer()
|
||||
for i, val in enumerate(sorted_values):
|
||||
str_val = " " + str(val) # add a space before
|
||||
if val < 1.0:
|
||||
str_val = " {0:.2f}".format(val)
|
||||
t = plt.text(val, i, str_val, color=plot_color, va='center', fontweight='bold')
|
||||
# re-set axes to show number inside the figure
|
||||
if i == (len(sorted_values)-1): # largest bar
|
||||
adjust_axes(r, t, fig, axes)
|
||||
# set window title
|
||||
fig.canvas.set_window_title(window_title)
|
||||
# write classes in y axis
|
||||
tick_font_size = 12
|
||||
plt.yticks(range(n_classes), sorted_keys, fontsize=tick_font_size)
|
||||
"""
|
||||
Re-scale height accordingly
|
||||
"""
|
||||
init_height = fig.get_figheight()
|
||||
# comput the matrix height in points and inches
|
||||
dpi = fig.dpi
|
||||
height_pt = n_classes * (tick_font_size * 1.4) # 1.4 (some spacing)
|
||||
height_in = height_pt / dpi
|
||||
# compute the required figure height
|
||||
top_margin = 0.15 # in percentage of the figure height
|
||||
bottom_margin = 0.05 # in percentage of the figure height
|
||||
figure_height = height_in / (1 - top_margin - bottom_margin)
|
||||
# set new height
|
||||
if figure_height > init_height:
|
||||
fig.set_figheight(figure_height)
|
||||
|
||||
# set plot title
|
||||
plt.title(plot_title, fontsize=14)
|
||||
# set axis titles
|
||||
# plt.xlabel('classes')
|
||||
plt.xlabel(x_label, fontsize='large')
|
||||
# adjust size of window
|
||||
fig.tight_layout()
|
||||
# save the plot
|
||||
fig.savefig(output_path)
|
||||
# show image
|
||||
if to_show:
|
||||
plt.show()
|
||||
# close the plot
|
||||
plt.close()
|
||||
|
||||
def get_map(MINOVERLAP, draw_plot, score_threhold=0.5, path = './map_out'):
|
||||
GT_PATH = os.path.join(path, 'ground-truth')
|
||||
DR_PATH = os.path.join(path, 'detection-results')
|
||||
IMG_PATH = os.path.join(path, 'images-optional')
|
||||
TEMP_FILES_PATH = os.path.join(path, '.temp_files')
|
||||
RESULTS_FILES_PATH = os.path.join(path, 'results')
|
||||
|
||||
show_animation = True
|
||||
if os.path.exists(IMG_PATH):
|
||||
for dirpath, dirnames, files in os.walk(IMG_PATH):
|
||||
if not files:
|
||||
show_animation = False
|
||||
else:
|
||||
show_animation = False
|
||||
|
||||
if not os.path.exists(TEMP_FILES_PATH):
|
||||
os.makedirs(TEMP_FILES_PATH)
|
||||
|
||||
if os.path.exists(RESULTS_FILES_PATH):
|
||||
shutil.rmtree(RESULTS_FILES_PATH)
|
||||
else:
|
||||
os.makedirs(RESULTS_FILES_PATH)
|
||||
if draw_plot:
|
||||
try:
|
||||
matplotlib.use('TkAgg')
|
||||
except:
|
||||
pass
|
||||
os.makedirs(os.path.join(RESULTS_FILES_PATH, "AP"))
|
||||
os.makedirs(os.path.join(RESULTS_FILES_PATH, "F1"))
|
||||
os.makedirs(os.path.join(RESULTS_FILES_PATH, "Recall"))
|
||||
os.makedirs(os.path.join(RESULTS_FILES_PATH, "Precision"))
|
||||
if show_animation:
|
||||
os.makedirs(os.path.join(RESULTS_FILES_PATH, "images", "detections_one_by_one"))
|
||||
|
||||
ground_truth_files_list = glob.glob(GT_PATH + '/*.txt')
|
||||
if len(ground_truth_files_list) == 0:
|
||||
error("Error: No ground-truth files found!")
|
||||
ground_truth_files_list.sort()
|
||||
gt_counter_per_class = {}
|
||||
counter_images_per_class = {}
|
||||
|
||||
for txt_file in ground_truth_files_list:
|
||||
file_id = txt_file.split(".txt", 1)[0]
|
||||
file_id = os.path.basename(os.path.normpath(file_id))
|
||||
temp_path = os.path.join(DR_PATH, (file_id + ".txt"))
|
||||
if not os.path.exists(temp_path):
|
||||
error_msg = "Error. File not found: {}\n".format(temp_path)
|
||||
error(error_msg)
|
||||
lines_list = file_lines_to_list(txt_file)
|
||||
bounding_boxes = []
|
||||
is_difficult = False
|
||||
already_seen_classes = []
|
||||
for line in lines_list:
|
||||
try:
|
||||
if "difficult" in line:
|
||||
class_name, left, top, right, bottom, _difficult = line.split()
|
||||
is_difficult = True
|
||||
else:
|
||||
class_name, left, top, right, bottom = line.split()
|
||||
except:
|
||||
if "difficult" in line:
|
||||
line_split = line.split()
|
||||
_difficult = line_split[-1]
|
||||
bottom = line_split[-2]
|
||||
right = line_split[-3]
|
||||
top = line_split[-4]
|
||||
left = line_split[-5]
|
||||
class_name = ""
|
||||
for name in line_split[:-5]:
|
||||
class_name += name + " "
|
||||
class_name = class_name[:-1]
|
||||
is_difficult = True
|
||||
else:
|
||||
line_split = line.split()
|
||||
bottom = line_split[-1]
|
||||
right = line_split[-2]
|
||||
top = line_split[-3]
|
||||
left = line_split[-4]
|
||||
class_name = ""
|
||||
for name in line_split[:-4]:
|
||||
class_name += name + " "
|
||||
class_name = class_name[:-1]
|
||||
|
||||
bbox = left + " " + top + " " + right + " " + bottom
|
||||
if is_difficult:
|
||||
bounding_boxes.append({"class_name":class_name, "bbox":bbox, "used":False, "difficult":True})
|
||||
is_difficult = False
|
||||
else:
|
||||
bounding_boxes.append({"class_name":class_name, "bbox":bbox, "used":False})
|
||||
if class_name in gt_counter_per_class:
|
||||
gt_counter_per_class[class_name] += 1
|
||||
else:
|
||||
gt_counter_per_class[class_name] = 1
|
||||
|
||||
if class_name not in already_seen_classes:
|
||||
if class_name in counter_images_per_class:
|
||||
counter_images_per_class[class_name] += 1
|
||||
else:
|
||||
counter_images_per_class[class_name] = 1
|
||||
already_seen_classes.append(class_name)
|
||||
|
||||
with open(TEMP_FILES_PATH + "/" + file_id + "_ground_truth.json", 'w') as outfile:
|
||||
json.dump(bounding_boxes, outfile)
|
||||
|
||||
gt_classes = list(gt_counter_per_class.keys())
|
||||
gt_classes = sorted(gt_classes)
|
||||
n_classes = len(gt_classes)
|
||||
|
||||
dr_files_list = glob.glob(DR_PATH + '/*.txt')
|
||||
dr_files_list.sort()
|
||||
for class_index, class_name in enumerate(gt_classes):
|
||||
bounding_boxes = []
|
||||
for txt_file in dr_files_list:
|
||||
file_id = txt_file.split(".txt",1)[0]
|
||||
file_id = os.path.basename(os.path.normpath(file_id))
|
||||
temp_path = os.path.join(GT_PATH, (file_id + ".txt"))
|
||||
if class_index == 0:
|
||||
if not os.path.exists(temp_path):
|
||||
error_msg = "Error. File not found: {}\n".format(temp_path)
|
||||
error(error_msg)
|
||||
lines = file_lines_to_list(txt_file)
|
||||
for line in lines:
|
||||
try:
|
||||
tmp_class_name, confidence, left, top, right, bottom = line.split()
|
||||
except:
|
||||
line_split = line.split()
|
||||
bottom = line_split[-1]
|
||||
right = line_split[-2]
|
||||
top = line_split[-3]
|
||||
left = line_split[-4]
|
||||
confidence = line_split[-5]
|
||||
tmp_class_name = ""
|
||||
for name in line_split[:-5]:
|
||||
tmp_class_name += name + " "
|
||||
tmp_class_name = tmp_class_name[:-1]
|
||||
|
||||
if tmp_class_name == class_name:
|
||||
bbox = left + " " + top + " " + right + " " +bottom
|
||||
bounding_boxes.append({"confidence":confidence, "file_id":file_id, "bbox":bbox})
|
||||
|
||||
bounding_boxes.sort(key=lambda x:float(x['confidence']), reverse=True)
|
||||
with open(TEMP_FILES_PATH + "/" + class_name + "_dr.json", 'w') as outfile:
|
||||
json.dump(bounding_boxes, outfile)
|
||||
|
||||
sum_AP = 0.0
|
||||
ap_dictionary = {}
|
||||
lamr_dictionary = {}
|
||||
with open(RESULTS_FILES_PATH + "/results.txt", 'w') as results_file:
|
||||
results_file.write("# AP and precision/recall per class\n")
|
||||
count_true_positives = {}
|
||||
|
||||
for class_index, class_name in enumerate(gt_classes):
|
||||
count_true_positives[class_name] = 0
|
||||
dr_file = TEMP_FILES_PATH + "/" + class_name + "_dr.json"
|
||||
dr_data = json.load(open(dr_file))
|
||||
|
||||
nd = len(dr_data)
|
||||
tp = [0] * nd
|
||||
fp = [0] * nd
|
||||
score = [0] * nd
|
||||
score_threhold_idx = 0
|
||||
for idx, detection in enumerate(dr_data):
|
||||
file_id = detection["file_id"]
|
||||
score[idx] = float(detection["confidence"])
|
||||
if score[idx] >= score_threhold:
|
||||
score_threhold_idx = idx
|
||||
|
||||
if show_animation:
|
||||
ground_truth_img = glob.glob1(IMG_PATH, file_id + ".*")
|
||||
if len(ground_truth_img) == 0:
|
||||
error("Error. Image not found with id: " + file_id)
|
||||
elif len(ground_truth_img) > 1:
|
||||
error("Error. Multiple image with id: " + file_id)
|
||||
else:
|
||||
img = cv2.imread(IMG_PATH + "/" + ground_truth_img[0])
|
||||
img_cumulative_path = RESULTS_FILES_PATH + "/images/" + ground_truth_img[0]
|
||||
if os.path.isfile(img_cumulative_path):
|
||||
img_cumulative = cv2.imread(img_cumulative_path)
|
||||
else:
|
||||
img_cumulative = img.copy()
|
||||
bottom_border = 60
|
||||
BLACK = [0, 0, 0]
|
||||
img = cv2.copyMakeBorder(img, 0, bottom_border, 0, 0, cv2.BORDER_CONSTANT, value=BLACK)
|
||||
|
||||
gt_file = TEMP_FILES_PATH + "/" + file_id + "_ground_truth.json"
|
||||
ground_truth_data = json.load(open(gt_file))
|
||||
ovmax = -1
|
||||
gt_match = -1
|
||||
bb = [float(x) for x in detection["bbox"].split()]
|
||||
for obj in ground_truth_data:
|
||||
if obj["class_name"] == class_name:
|
||||
bbgt = [ float(x) for x in obj["bbox"].split() ]
|
||||
bi = [max(bb[0],bbgt[0]), max(bb[1],bbgt[1]), min(bb[2],bbgt[2]), min(bb[3],bbgt[3])]
|
||||
iw = bi[2] - bi[0] + 1
|
||||
ih = bi[3] - bi[1] + 1
|
||||
if iw > 0 and ih > 0:
|
||||
ua = (bb[2] - bb[0] + 1) * (bb[3] - bb[1] + 1) + (bbgt[2] - bbgt[0]
|
||||
+ 1) * (bbgt[3] - bbgt[1] + 1) - iw * ih
|
||||
ov = iw * ih / ua
|
||||
if ov > ovmax:
|
||||
ovmax = ov
|
||||
gt_match = obj
|
||||
|
||||
if show_animation:
|
||||
status = "NO MATCH FOUND!"
|
||||
|
||||
min_overlap = MINOVERLAP
|
||||
if ovmax >= min_overlap:
|
||||
if "difficult" not in gt_match:
|
||||
if not bool(gt_match["used"]):
|
||||
tp[idx] = 1
|
||||
gt_match["used"] = True
|
||||
count_true_positives[class_name] += 1
|
||||
with open(gt_file, 'w') as f:
|
||||
f.write(json.dumps(ground_truth_data))
|
||||
if show_animation:
|
||||
status = "MATCH!"
|
||||
else:
|
||||
fp[idx] = 1
|
||||
if show_animation:
|
||||
status = "REPEATED MATCH!"
|
||||
else:
|
||||
fp[idx] = 1
|
||||
if ovmax > 0:
|
||||
status = "INSUFFICIENT OVERLAP"
|
||||
|
||||
"""
|
||||
Draw image to show animation
|
||||
"""
|
||||
if show_animation:
|
||||
height, widht = img.shape[:2]
|
||||
white = (255,255,255)
|
||||
light_blue = (255,200,100)
|
||||
green = (0,255,0)
|
||||
light_red = (30,30,255)
|
||||
margin = 10
|
||||
# 1nd line
|
||||
v_pos = int(height - margin - (bottom_border / 2.0))
|
||||
text = "Image: " + ground_truth_img[0] + " "
|
||||
img, line_width = draw_text_in_image(img, text, (margin, v_pos), white, 0)
|
||||
text = "Class [" + str(class_index) + "/" + str(n_classes) + "]: " + class_name + " "
|
||||
img, line_width = draw_text_in_image(img, text, (margin + line_width, v_pos), light_blue, line_width)
|
||||
if ovmax != -1:
|
||||
color = light_red
|
||||
if status == "INSUFFICIENT OVERLAP":
|
||||
text = "IoU: {0:.2f}% ".format(ovmax*100) + "< {0:.2f}% ".format(min_overlap*100)
|
||||
else:
|
||||
text = "IoU: {0:.2f}% ".format(ovmax*100) + ">= {0:.2f}% ".format(min_overlap*100)
|
||||
color = green
|
||||
img, _ = draw_text_in_image(img, text, (margin + line_width, v_pos), color, line_width)
|
||||
# 2nd line
|
||||
v_pos += int(bottom_border / 2.0)
|
||||
rank_pos = str(idx+1)
|
||||
text = "Detection #rank: " + rank_pos + " confidence: {0:.2f}% ".format(float(detection["confidence"])*100)
|
||||
img, line_width = draw_text_in_image(img, text, (margin, v_pos), white, 0)
|
||||
color = light_red
|
||||
if status == "MATCH!":
|
||||
color = green
|
||||
text = "Result: " + status + " "
|
||||
img, line_width = draw_text_in_image(img, text, (margin + line_width, v_pos), color, line_width)
|
||||
|
||||
font = cv2.FONT_HERSHEY_SIMPLEX
|
||||
if ovmax > 0:
|
||||
bbgt = [ int(round(float(x))) for x in gt_match["bbox"].split() ]
|
||||
cv2.rectangle(img,(bbgt[0],bbgt[1]),(bbgt[2],bbgt[3]),light_blue,2)
|
||||
cv2.rectangle(img_cumulative,(bbgt[0],bbgt[1]),(bbgt[2],bbgt[3]),light_blue,2)
|
||||
cv2.putText(img_cumulative, class_name, (bbgt[0],bbgt[1] - 5), font, 0.6, light_blue, 1, cv2.LINE_AA)
|
||||
bb = [int(i) for i in bb]
|
||||
cv2.rectangle(img,(bb[0],bb[1]),(bb[2],bb[3]),color,2)
|
||||
cv2.rectangle(img_cumulative,(bb[0],bb[1]),(bb[2],bb[3]),color,2)
|
||||
cv2.putText(img_cumulative, class_name, (bb[0],bb[1] - 5), font, 0.6, color, 1, cv2.LINE_AA)
|
||||
|
||||
cv2.imshow("Animation", img)
|
||||
cv2.waitKey(20)
|
||||
output_img_path = RESULTS_FILES_PATH + "/images/detections_one_by_one/" + class_name + "_detection" + str(idx) + ".jpg"
|
||||
cv2.imwrite(output_img_path, img)
|
||||
cv2.imwrite(img_cumulative_path, img_cumulative)
|
||||
|
||||
cumsum = 0
|
||||
for idx, val in enumerate(fp):
|
||||
fp[idx] += cumsum
|
||||
cumsum += val
|
||||
|
||||
cumsum = 0
|
||||
for idx, val in enumerate(tp):
|
||||
tp[idx] += cumsum
|
||||
cumsum += val
|
||||
|
||||
rec = tp[:]
|
||||
for idx, val in enumerate(tp):
|
||||
rec[idx] = float(tp[idx]) / np.maximum(gt_counter_per_class[class_name], 1)
|
||||
|
||||
prec = tp[:]
|
||||
for idx, val in enumerate(tp):
|
||||
prec[idx] = float(tp[idx]) / np.maximum((fp[idx] + tp[idx]), 1)
|
||||
|
||||
ap, mrec, mprec = voc_ap(rec[:], prec[:])
|
||||
F1 = np.array(rec)*np.array(prec)*2 / np.where((np.array(prec)+np.array(rec))==0, 1, (np.array(prec)+np.array(rec)))
|
||||
|
||||
sum_AP += ap
|
||||
text = "{0:.2f}%".format(ap*100) + " = " + class_name + " AP " #class_name + " AP = {0:.2f}%".format(ap*100)
|
||||
|
||||
if len(prec)>0:
|
||||
F1_text = "{0:.2f}".format(F1[score_threhold_idx]) + " = " + class_name + " F1 "
|
||||
Recall_text = "{0:.2f}%".format(rec[score_threhold_idx]*100) + " = " + class_name + " Recall "
|
||||
Precision_text = "{0:.2f}%".format(prec[score_threhold_idx]*100) + " = " + class_name + " Precision "
|
||||
else:
|
||||
F1_text = "0.00" + " = " + class_name + " F1 "
|
||||
Recall_text = "0.00%" + " = " + class_name + " Recall "
|
||||
Precision_text = "0.00%" + " = " + class_name + " Precision "
|
||||
|
||||
rounded_prec = [ '%.2f' % elem for elem in prec ]
|
||||
rounded_rec = [ '%.2f' % elem for elem in rec ]
|
||||
results_file.write(text + "\n Precision: " + str(rounded_prec) + "\n Recall :" + str(rounded_rec) + "\n\n")
|
||||
|
||||
if len(prec)>0:
|
||||
print(text + "\t||\tscore_threhold=" + str(score_threhold) + " : " + "F1=" + "{0:.2f}".format(F1[score_threhold_idx])\
|
||||
+ " ; Recall=" + "{0:.2f}%".format(rec[score_threhold_idx]*100) + " ; Precision=" + "{0:.2f}%".format(prec[score_threhold_idx]*100))
|
||||
else:
|
||||
print(text + "\t||\tscore_threhold=" + str(score_threhold) + " : " + "F1=0.00% ; Recall=0.00% ; Precision=0.00%")
|
||||
ap_dictionary[class_name] = ap
|
||||
|
||||
n_images = counter_images_per_class[class_name]
|
||||
lamr, mr, fppi = log_average_miss_rate(np.array(rec), np.array(fp), n_images)
|
||||
lamr_dictionary[class_name] = lamr
|
||||
|
||||
if draw_plot:
|
||||
plt.plot(rec, prec, '-o')
|
||||
area_under_curve_x = mrec[:-1] + [mrec[-2]] + [mrec[-1]]
|
||||
area_under_curve_y = mprec[:-1] + [0.0] + [mprec[-1]]
|
||||
plt.fill_between(area_under_curve_x, 0, area_under_curve_y, alpha=0.2, edgecolor='r')
|
||||
|
||||
fig = plt.gcf()
|
||||
fig.canvas.set_window_title('AP ' + class_name)
|
||||
|
||||
plt.title('class: ' + text)
|
||||
plt.xlabel('Recall')
|
||||
plt.ylabel('Precision')
|
||||
axes = plt.gca()
|
||||
axes.set_xlim([0.0,1.0])
|
||||
axes.set_ylim([0.0,1.05])
|
||||
fig.savefig(RESULTS_FILES_PATH + "/AP/" + class_name + ".png")
|
||||
plt.cla()
|
||||
|
||||
plt.plot(score, F1, "-", color='orangered')
|
||||
plt.title('class: ' + F1_text + "\nscore_threhold=" + str(score_threhold))
|
||||
plt.xlabel('Score_Threhold')
|
||||
plt.ylabel('F1')
|
||||
axes = plt.gca()
|
||||
axes.set_xlim([0.0,1.0])
|
||||
axes.set_ylim([0.0,1.05])
|
||||
fig.savefig(RESULTS_FILES_PATH + "/F1/" + class_name + ".png")
|
||||
plt.cla()
|
||||
|
||||
plt.plot(score, rec, "-H", color='gold')
|
||||
plt.title('class: ' + Recall_text + "\nscore_threhold=" + str(score_threhold))
|
||||
plt.xlabel('Score_Threhold')
|
||||
plt.ylabel('Recall')
|
||||
axes = plt.gca()
|
||||
axes.set_xlim([0.0,1.0])
|
||||
axes.set_ylim([0.0,1.05])
|
||||
fig.savefig(RESULTS_FILES_PATH + "/Recall/" + class_name + ".png")
|
||||
plt.cla()
|
||||
|
||||
plt.plot(score, prec, "-s", color='palevioletred')
|
||||
plt.title('class: ' + Precision_text + "\nscore_threhold=" + str(score_threhold))
|
||||
plt.xlabel('Score_Threhold')
|
||||
plt.ylabel('Precision')
|
||||
axes = plt.gca()
|
||||
axes.set_xlim([0.0,1.0])
|
||||
axes.set_ylim([0.0,1.05])
|
||||
fig.savefig(RESULTS_FILES_PATH + "/Precision/" + class_name + ".png")
|
||||
plt.cla()
|
||||
|
||||
if show_animation:
|
||||
cv2.destroyAllWindows()
|
||||
if n_classes == 0:
|
||||
print("未检测到任何种类,请检查标签信息与get_map.py中的classes_path是否修改。")
|
||||
return 0
|
||||
results_file.write("\n# mAP of all classes\n")
|
||||
mAP = sum_AP / n_classes
|
||||
text = "mAP = {0:.2f}%".format(mAP*100)
|
||||
results_file.write(text + "\n")
|
||||
print(text)
|
||||
|
||||
shutil.rmtree(TEMP_FILES_PATH)
|
||||
|
||||
"""
|
||||
Count total of detection-results
|
||||
"""
|
||||
det_counter_per_class = {}
|
||||
for txt_file in dr_files_list:
|
||||
lines_list = file_lines_to_list(txt_file)
|
||||
for line in lines_list:
|
||||
class_name = line.split()[0]
|
||||
if class_name in det_counter_per_class:
|
||||
det_counter_per_class[class_name] += 1
|
||||
else:
|
||||
det_counter_per_class[class_name] = 1
|
||||
dr_classes = list(det_counter_per_class.keys())
|
||||
|
||||
"""
|
||||
Write number of ground-truth objects per class to results.txt
|
||||
"""
|
||||
with open(RESULTS_FILES_PATH + "/results.txt", 'a') as results_file:
|
||||
results_file.write("\n# Number of ground-truth objects per class\n")
|
||||
for class_name in sorted(gt_counter_per_class):
|
||||
results_file.write(class_name + ": " + str(gt_counter_per_class[class_name]) + "\n")
|
||||
|
||||
"""
|
||||
Finish counting true positives
|
||||
"""
|
||||
for class_name in dr_classes:
|
||||
if class_name not in gt_classes:
|
||||
count_true_positives[class_name] = 0
|
||||
|
||||
"""
|
||||
Write number of detected objects per class to results.txt
|
||||
"""
|
||||
with open(RESULTS_FILES_PATH + "/results.txt", 'a') as results_file:
|
||||
results_file.write("\n# Number of detected objects per class\n")
|
||||
for class_name in sorted(dr_classes):
|
||||
n_det = det_counter_per_class[class_name]
|
||||
text = class_name + ": " + str(n_det)
|
||||
text += " (tp:" + str(count_true_positives[class_name]) + ""
|
||||
text += ", fp:" + str(n_det - count_true_positives[class_name]) + ")\n"
|
||||
results_file.write(text)
|
||||
|
||||
"""
|
||||
Plot the total number of occurences of each class in the ground-truth
|
||||
"""
|
||||
if draw_plot:
|
||||
window_title = "ground-truth-info"
|
||||
plot_title = "ground-truth\n"
|
||||
plot_title += "(" + str(len(ground_truth_files_list)) + " files and " + str(n_classes) + " classes)"
|
||||
x_label = "Number of objects per class"
|
||||
output_path = RESULTS_FILES_PATH + "/ground-truth-info.png"
|
||||
to_show = False
|
||||
plot_color = 'forestgreen'
|
||||
draw_plot_func(
|
||||
gt_counter_per_class,
|
||||
n_classes,
|
||||
window_title,
|
||||
plot_title,
|
||||
x_label,
|
||||
output_path,
|
||||
to_show,
|
||||
plot_color,
|
||||
'',
|
||||
)
|
||||
|
||||
# """
|
||||
# Plot the total number of occurences of each class in the "detection-results" folder
|
||||
# """
|
||||
# if draw_plot:
|
||||
# window_title = "detection-results-info"
|
||||
# # Plot title
|
||||
# plot_title = "detection-results\n"
|
||||
# plot_title += "(" + str(len(dr_files_list)) + " files and "
|
||||
# count_non_zero_values_in_dictionary = sum(int(x) > 0 for x in list(det_counter_per_class.values()))
|
||||
# plot_title += str(count_non_zero_values_in_dictionary) + " detected classes)"
|
||||
# # end Plot title
|
||||
# x_label = "Number of objects per class"
|
||||
# output_path = RESULTS_FILES_PATH + "/detection-results-info.png"
|
||||
# to_show = False
|
||||
# plot_color = 'forestgreen'
|
||||
# true_p_bar = count_true_positives
|
||||
# draw_plot_func(
|
||||
# det_counter_per_class,
|
||||
# len(det_counter_per_class),
|
||||
# window_title,
|
||||
# plot_title,
|
||||
# x_label,
|
||||
# output_path,
|
||||
# to_show,
|
||||
# plot_color,
|
||||
# true_p_bar
|
||||
# )
|
||||
|
||||
"""
|
||||
Draw log-average miss rate plot (Show lamr of all classes in decreasing order)
|
||||
"""
|
||||
if draw_plot:
|
||||
window_title = "lamr"
|
||||
plot_title = "log-average miss rate"
|
||||
x_label = "log-average miss rate"
|
||||
output_path = RESULTS_FILES_PATH + "/lamr.png"
|
||||
to_show = False
|
||||
plot_color = 'royalblue'
|
||||
draw_plot_func(
|
||||
lamr_dictionary,
|
||||
n_classes,
|
||||
window_title,
|
||||
plot_title,
|
||||
x_label,
|
||||
output_path,
|
||||
to_show,
|
||||
plot_color,
|
||||
""
|
||||
)
|
||||
|
||||
"""
|
||||
Draw mAP plot (Show AP's of all classes in decreasing order)
|
||||
"""
|
||||
if draw_plot:
|
||||
window_title = "mAP"
|
||||
plot_title = "mAP = {0:.2f}%".format(mAP*100)
|
||||
x_label = "Average Precision"
|
||||
output_path = RESULTS_FILES_PATH + "/mAP.png"
|
||||
to_show = True
|
||||
plot_color = 'royalblue'
|
||||
draw_plot_func(
|
||||
ap_dictionary,
|
||||
n_classes,
|
||||
window_title,
|
||||
plot_title,
|
||||
x_label,
|
||||
output_path,
|
||||
to_show,
|
||||
plot_color,
|
||||
""
|
||||
)
|
||||
return mAP
|
||||
|
||||
def preprocess_gt(gt_path, class_names):
|
||||
image_ids = os.listdir(gt_path)
|
||||
results = {}
|
||||
|
||||
images = []
|
||||
bboxes = []
|
||||
for i, image_id in enumerate(image_ids):
|
||||
lines_list = file_lines_to_list(os.path.join(gt_path, image_id))
|
||||
boxes_per_image = []
|
||||
image = {}
|
||||
image_id = os.path.splitext(image_id)[0]
|
||||
image['file_name'] = image_id + '.jpg'
|
||||
image['width'] = 1
|
||||
image['height'] = 1
|
||||
#-----------------------------------------------------------------#
|
||||
# 感谢 多学学英语吧 的提醒
|
||||
# 解决了'Results do not correspond to current coco set'问题
|
||||
#-----------------------------------------------------------------#
|
||||
image['id'] = str(image_id)
|
||||
|
||||
for line in lines_list:
|
||||
difficult = 0
|
||||
if "difficult" in line:
|
||||
line_split = line.split()
|
||||
left, top, right, bottom, _difficult = line_split[-5:]
|
||||
class_name = ""
|
||||
for name in line_split[:-5]:
|
||||
class_name += name + " "
|
||||
class_name = class_name[:-1]
|
||||
difficult = 1
|
||||
else:
|
||||
line_split = line.split()
|
||||
left, top, right, bottom = line_split[-4:]
|
||||
class_name = ""
|
||||
for name in line_split[:-4]:
|
||||
class_name += name + " "
|
||||
class_name = class_name[:-1]
|
||||
|
||||
left, top, right, bottom = float(left), float(top), float(right), float(bottom)
|
||||
if class_name not in class_names:
|
||||
continue
|
||||
cls_id = class_names.index(class_name) + 1
|
||||
bbox = [left, top, right - left, bottom - top, difficult, str(image_id), cls_id, (right - left) * (bottom - top) - 10.0]
|
||||
boxes_per_image.append(bbox)
|
||||
images.append(image)
|
||||
bboxes.extend(boxes_per_image)
|
||||
results['images'] = images
|
||||
|
||||
categories = []
|
||||
for i, cls in enumerate(class_names):
|
||||
category = {}
|
||||
category['supercategory'] = cls
|
||||
category['name'] = cls
|
||||
category['id'] = i + 1
|
||||
categories.append(category)
|
||||
results['categories'] = categories
|
||||
|
||||
annotations = []
|
||||
for i, box in enumerate(bboxes):
|
||||
annotation = {}
|
||||
annotation['area'] = box[-1]
|
||||
annotation['category_id'] = box[-2]
|
||||
annotation['image_id'] = box[-3]
|
||||
annotation['iscrowd'] = box[-4]
|
||||
annotation['bbox'] = box[:4]
|
||||
annotation['id'] = i
|
||||
annotations.append(annotation)
|
||||
results['annotations'] = annotations
|
||||
return results
|
||||
|
||||
def preprocess_dr(dr_path, class_names):
|
||||
image_ids = os.listdir(dr_path)
|
||||
results = []
|
||||
for image_id in image_ids:
|
||||
lines_list = file_lines_to_list(os.path.join(dr_path, image_id))
|
||||
image_id = os.path.splitext(image_id)[0]
|
||||
for line in lines_list:
|
||||
line_split = line.split()
|
||||
confidence, left, top, right, bottom = line_split[-5:]
|
||||
class_name = ""
|
||||
for name in line_split[:-5]:
|
||||
class_name += name + " "
|
||||
class_name = class_name[:-1]
|
||||
left, top, right, bottom = float(left), float(top), float(right), float(bottom)
|
||||
result = {}
|
||||
result["image_id"] = str(image_id)
|
||||
if class_name not in class_names:
|
||||
continue
|
||||
result["category_id"] = class_names.index(class_name) + 1
|
||||
result["bbox"] = [left, top, right - left, bottom - top]
|
||||
result["score"] = float(confidence)
|
||||
results.append(result)
|
||||
return results
|
||||
|
||||
def get_coco_map(class_names, path):
|
||||
GT_PATH = os.path.join(path, 'ground-truth')
|
||||
DR_PATH = os.path.join(path, 'detection-results')
|
||||
COCO_PATH = os.path.join(path, 'coco_eval')
|
||||
|
||||
if not os.path.exists(COCO_PATH):
|
||||
os.makedirs(COCO_PATH)
|
||||
|
||||
GT_JSON_PATH = os.path.join(COCO_PATH, 'instances_gt.json')
|
||||
DR_JSON_PATH = os.path.join(COCO_PATH, 'instances_dr.json')
|
||||
|
||||
with open(GT_JSON_PATH, "w") as f:
|
||||
results_gt = preprocess_gt(GT_PATH, class_names)
|
||||
json.dump(results_gt, f, indent=4)
|
||||
|
||||
with open(DR_JSON_PATH, "w") as f:
|
||||
results_dr = preprocess_dr(DR_PATH, class_names)
|
||||
json.dump(results_dr, f, indent=4)
|
||||
if len(results_dr) == 0:
|
||||
print("未检测到任何目标。")
|
||||
return [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
|
||||
|
||||
cocoGt = COCO(GT_JSON_PATH)
|
||||
cocoDt = cocoGt.loadRes(DR_JSON_PATH)
|
||||
cocoEval = COCOeval(cocoGt, cocoDt, 'bbox')
|
||||
cocoEval.evaluate()
|
||||
cocoEval.accumulate()
|
||||
cocoEval.summarize()
|
||||
|
||||
return cocoEval.stats
|
||||
Reference in New Issue
Block a user