在上一章,我們了解了demo,現(xiàn)在開始準備根據(jù)自己的場景去訓練自己的數(shù)據(jù),實現(xiàn)自己的目標檢測。
在前面我們知道機器要識別一個問題,需要一個訓練的過程,所以我們首先第一步是要對物體進行標注。這里就不使用官方的數(shù)據(jù)集了,先自己準備一個自己需要學習的圖片集合(大概每個種類100張左右)
第一步:建立圖片文件夾
mkdir images
然后把需要學習的圖片放到文件夾里面
第二步:用labelme進行標記(如何安裝,請自行百度),標注結(jié)束后,會在images下新增和圖片一一對應(yīng)的json文件
第三步:把labelme格式的文件轉(zhuǎn)為coco格式(PaddleDetection不支持labelme格式),這里可以使用PaddleDetection下的工具tools/x2coco.py,
python3 tools/tc_x2coco.py \
--dataset_type labelme \
--json_input_dir dataset/lables/images_yt \
--image_input_dir dataset/lables/images_yt \
--output_dir dataset/lables/images_yt/output \
--train_proportion 0.8 \
--val_proportion 0.2 \
--test_proportion 0.0
這里存在兩個坑。
坑一:labelme生成的json和圖片默認是在同一個文件夾下,這里用tools/x2coco.py會導致生成的訓練文件和驗證文件減少一半,主要是由于上面圖中的代碼導致,并且容易出現(xiàn)json和圖片不對應(yīng)的問題,導致最終訓練結(jié)果失敗,我開始就是被這里坑了,以為是自己標注問題,來來回回刪除了好幾次標注由重新標,最后發(fā)現(xiàn)不是自己的問題,是被這里的bug坑了
坑二,如果你有幸json文件和圖片文件不在一個文件夾,是分為兩個文件夾,這里還有一個小坑,不過問題不嚴重,你可以看到上圖的count=1,這里導致生成的train比應(yīng)該生成的少了一個,應(yīng)該為count=0
填完上面的兩個坑后,會在輸出文件夾下得到三個文件夾
Annotation、train、val
這里就是需要訓練的素材了,按照自己的實際路徑修改configs/yolov3_mobilenet_v1_coco.yml的配置,其中分類數(shù)量也需要改為自己的實際分類數(shù)量,然后開始訓練
python3 tools/train.py -c configs/yolov3_mobilenet_v1_coco.yml --eval -o use_gpu=false
漫長的訓練過程…………
這里其實有幾個地方需要注意:
1、batch_size 每次訓練的批次大小,和內(nèi)存相關(guān),內(nèi)存大的可以調(diào)整大一點,但是需要控制內(nèi)存使用率不能超過80%,否則更加慢,我自己的是設(shè)置batch_size: 2,
2、worker_num,和自己的cpu核數(shù)有關(guān),略小于自己的核數(shù)比較合適,我的是i5是6C,我這里設(shè)置worker_num: 4,否則容易cpu跑滿,夯住
這里附上我自己修改后的x2coco
#!/usr/bin/env python
# coding: utf-8
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import glob
import json
import os
import os.path as osp
import sys
import shutil
import xml.etree.ElementTree as ET
from tqdm import tqdm
import re
import numpy as np
import PIL.ImageDraw
label_to_num = {}
categories_list = []
labels_list = []
class MyEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, np.integer):
return int(obj)
elif isinstance(obj, np.floating):
return float(obj)
elif isinstance(obj, np.ndarray):
return obj.tolist()
else:
return super(MyEncoder, self).default(obj)
def getbbox(self, points):
polygons = points
mask = self.polygons_to_mask([self.height, self.width], polygons)
return self.mask2box(mask)
def images_labelme(data, num):
image = {}
image['height'] = data['imageHeight']
image['width'] = data['imageWidth']
image['id'] = num + 1
image['file_name'] = data['imagePath'].split('/')[-1]
return image
def images_cityscape(data, num, img_file):
image = {}
image['height'] = data['imgHeight']
image['width'] = data['imgWidth']
image['id'] = num + 1
image['file_name'] = img_file
return image
def categories(label, labels_list):
category = {}
category['supercategory'] = 'component'
category['id'] = len(labels_list) + 1
category['name'] = label
return category
def annotations_rectangle(points, label, image_num, object_num, label_to_num):
annotation = {}
seg_points = np.asarray(points).copy()
seg_points[1, :] = np.asarray(points)[2, :]
seg_points[2, :] = np.asarray(points)[1, :]
annotation['segmentation'] = [list(seg_points.flatten())]
annotation['iscrowd'] = 0
annotation['image_id'] = image_num + 1
annotation['bbox'] = list(
map(float, [
points[0][0], points[0][1], points[1][0] - points[0][0], points[1][
1] - points[0][1]
]))
annotation['area'] = annotation['bbox'][2] * annotation['bbox'][3]
annotation['category_id'] = label_to_num[label]
annotation['id'] = object_num + 1
return annotation
def annotations_polygon(height, width, points, label, image_num, object_num,
label_to_num):
annotation = {}
annotation['segmentation'] = [list(np.asarray(points).flatten())]
annotation['iscrowd'] = 0
annotation['image_id'] = image_num + 1
annotation['bbox'] = list(map(float, get_bbox(height, width, points)))
annotation['area'] = annotation['bbox'][2] * annotation['bbox'][3]
annotation['category_id'] = label_to_num[label]
annotation['id'] = object_num + 1
return annotation
def get_bbox(height, width, points):
polygons = points
mask = np.zeros([height, width], dtype=np.uint8)
mask = PIL.Image.fromarray(mask)
xy = list(map(tuple, polygons))
PIL.ImageDraw.Draw(mask).polygon(xy=xy, outline=1, fill=1)
mask = np.array(mask, dtype=bool)
index = np.argwhere(mask == 1)
rows = index[:, 0]
clos = index[:, 1]
left_top_r = np.min(rows)
left_top_c = np.min(clos)
right_bottom_r = np.max(rows)
right_bottom_c = np.max(clos)
return [
left_top_c, left_top_r, right_bottom_c - left_top_c,
right_bottom_r - left_top_r
]
def deal_json(ds_type, img_path, json_path):
data_coco = {}
images_list = []
annotations_list = []
image_num = -1
object_num = -1
for img_file in get_filelist_from_dir(img_path,support_ext=".jpg|.jpeg|.png"):
img_label = os.path.splitext(img_file)[0]
if img_file.split('.')[
-1] not in ['bmp', 'jpg', 'jpeg', 'png', 'JPEG', 'JPG', 'PNG']:
continue
label_file = osp.join(json_path, img_label + '.json')
print('Generating dataset from:', label_file)
image_num = image_num + 1
with open(label_file) as f:
data = json.load(f)
if ds_type == 'labelme':
images_list.append(images_labelme(data, image_num))
elif ds_type == 'cityscape':
images_list.append(images_cityscape(data, image_num, img_file))
if ds_type == 'labelme':
for shapes in data['shapes']:
object_num = object_num + 1
label = shapes['label']
if label not in labels_list:
categories_list.append(categories(label, labels_list))
labels_list.append(label)
label_to_num[label] = len(labels_list)
p_type = shapes['shape_type']
if p_type == 'polygon':
points = shapes['points']
annotations_list.append(
annotations_polygon(data['imageHeight'], data[
'imageWidth'], points, label, image_num,
object_num, label_to_num))
if p_type == 'rectangle':
(x1, y1), (x2, y2) = shapes['points']
x1, x2 = sorted([x1, x2])
y1, y2 = sorted([y1, y2])
points = [[x1, y1], [x2, y2], [x1, y2], [x2, y1]]
annotations_list.append(
annotations_rectangle(points, label, image_num,
object_num, label_to_num))
elif ds_type == 'cityscape':
for shapes in data['objects']:
object_num = object_num + 1
label = shapes['label']
if label not in labels_list:
categories_list.append(categories(label, labels_list))
labels_list.append(label)
label_to_num[label] = len(labels_list)
points = shapes['polygon']
annotations_list.append(
annotations_polygon(data['imgHeight'], data[
'imgWidth'], points, label, image_num, object_num,
label_to_num))
data_coco['images'] = images_list
data_coco['categories'] = categories_list
data_coco['annotations'] = annotations_list
return data_coco
def voc_get_label_anno(ann_dir_path, ann_ids_path, labels_path):
with open(labels_path, 'r') as f:
labels_str = f.read().split()
labels_ids = list(range(1, len(labels_str) + 1))
with open(ann_ids_path, 'r') as f:
ann_ids = f.read().split()
ann_paths = []
for aid in ann_ids:
if aid.endswith('xml'):
ann_path = os.path.join(ann_dir_path, aid)
else:
ann_path = os.path.join(ann_dir_path, aid + '.xml')
ann_paths.append(ann_path)
return dict(zip(labels_str, labels_ids)), ann_paths
def voc_get_image_info(annotation_root, im_id):
filename = annotation_root.findtext('filename')
assert filename is not None
img_name = os.path.basename(filename)
size = annotation_root.find('size')
width = float(size.findtext('width'))
height = float(size.findtext('height'))
image_info = {
'file_name': filename,
'height': height,
'width': width,
'id': im_id
}
return image_info
def voc_get_coco_annotation(obj, label2id):
label = obj.findtext('name')
assert label in label2id, "label is not in label2id."
category_id = label2id[label]
bndbox = obj.find('bndbox')
xmin = float(bndbox.findtext('xmin')) - 1
ymin = float(bndbox.findtext('ymin')) - 1
xmax = float(bndbox.findtext('xmax'))
ymax = float(bndbox.findtext('ymax'))
assert xmax > xmin and ymax > ymin, "Box size error."
o_width = xmax - xmin
o_height = ymax - ymin
anno = {
'area': o_width * o_height,
'iscrowd': 0,
'bbox': [xmin, ymin, o_width, o_height],
'category_id': category_id,
'ignore': 0,
}
return anno
def voc_xmls_to_cocojson(annotation_paths, label2id, output_dir, output_file):
output_json_dict = {
"images": [],
"type": "instances",
"annotations": [],
"categories": []
}
bnd_id = 1 # bounding box start id
im_id = 0
print('Start converting !')
for a_path in tqdm(annotation_paths):
# Read annotation xml
ann_tree = ET.parse(a_path)
ann_root = ann_tree.getroot()
img_info = voc_get_image_info(ann_root, im_id)
im_id += 1
img_id = img_info['id']
output_json_dict['images'].append(img_info)
for obj in ann_root.findall('object'):
ann = voc_get_coco_annotation(obj=obj, label2id=label2id)
ann.update({'image_id': img_id, 'id': bnd_id})
output_json_dict['annotations'].append(ann)
bnd_id = bnd_id + 1
for label, label_id in label2id.items():
category_info = {'supercategory': 'none', 'id': label_id, 'name': label}
output_json_dict['categories'].append(category_info)
output_file = os.path.join(output_dir, output_file)
with open(output_file, 'w') as f:
output_json = json.dumps(output_json_dict)
f.write(output_json)
def get_filelist_from_dir(dir, support_ext=".jpg|.jpeg|.png"):
'''
獲取指定目錄下的指定后綴文件名,返回列表,不帶路徑
'''
if (not os.path.exists(dir) or not os.path.isdir(dir)):
raise Exception("Image Directory [%s] invalid" % dir)
files = []
for item in os.listdir(dir):
ext = os.path.splitext(item)[1][1:].strip().lower()
if (len(ext) > 0 and ext in support_ext):
files.append(item)
return files
def main():
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--dataset_type', help='the type of dataset')
parser.add_argument('--json_input_dir', help='input annotated directory')
parser.add_argument('--image_input_dir', help='image directory')
parser.add_argument(
'--output_dir', help='output dataset directory', default='./')
parser.add_argument(
'--train_proportion',
help='the proportion of train dataset',
type=float,
default=1.0)
parser.add_argument(
'--val_proportion',
help='the proportion of validation dataset',
type=float,
default=0.0)
parser.add_argument(
'--test_proportion',
help='the proportion of test dataset',
type=float,
default=0.0)
parser.add_argument(
'--voc_anno_dir',
help='In Voc format dataset, path to annotation files directory.',
type=str,
default=None)
parser.add_argument(
'--voc_anno_list',
help='In Voc format dataset, path to annotation files ids list.',
type=str,
default=None)
parser.add_argument(
'--voc_label_list',
help='In Voc format dataset, path to label list. The content of each line is a category.',
type=str,
default=None)
parser.add_argument(
'--voc_out_name',
type=str,
default='voc.json',
help='In Voc format dataset, path to output json file')
args = parser.parse_args()
try:
assert args.dataset_type in ['voc', 'labelme', 'cityscape']
except AssertionError as e:
print(
'當前只支持 voc, cityscape dataset and labelme 的數(shù)據(jù)格式轉(zhuǎn)換!!')
os._exit(0)
if args.dataset_type == 'voc':
assert args.voc_anno_dir and args.voc_anno_list and args.voc_label_list
label2id, ann_paths = voc_get_label_anno(
args.voc_anno_dir, args.voc_anno_list, args.voc_label_list)
voc_xmls_to_cocojson(
annotation_paths=ann_paths,
label2id=label2id,
output_dir=args.output_dir,
output_file=args.voc_out_name)
else:
try:
assert os.path.exists(args.json_input_dir)
except AssertionError as e:
print('The json folder does not exist!')
os._exit(0)
try:
assert os.path.exists(args.image_input_dir)
except AssertionError as e:
print('The image folder does not exist!')
os._exit(0)
try:
assert abs(args.train_proportion + args.val_proportion \
+ args.test_proportion - 1.0) < 1e-5
except AssertionError as e:
print(
'The sum of pqoportion of training, validation and test datase must be 1!'
)
os._exit(0)
# Allocate the dataset.
total_num = len(glob.glob(osp.join(args.json_input_dir, '*.json')))
if args.train_proportion != 0:
train_num = int(total_num * args.train_proportion)
# 增加判斷,判斷輸出的文件夾是否存在
exist=os.path.exists(args.output_dir)
if exist:
print('目標文件夾已經(jīng)存在,正在刪除!')
shutil.rmtree(args.output_dir)
os.makedirs(args.output_dir + '/train')
else:
train_num = 0
if args.val_proportion == 0.0:
val_num = 0
test_num = total_num - train_num
if args.test_proportion != 0.0:
os.makedirs(args.output_dir + '/test')
else:
val_num = int(total_num * args.val_proportion)
test_num = total_num - train_num - val_num
os.makedirs(args.output_dir + '/val')
if args.test_proportion != 0.0:
os.makedirs(args.output_dir + '/test')
count = 0
# 這里有bug,沒有過濾文件夾下的文件類型,如果json文件和圖片文件在一個文件夾下會導致訓練樣本和驗證樣本不準確
for img_name in get_filelist_from_dir(args.image_input_dir,support_ext=".jpg|.jpeg|.png"):
jsonfilename=os.path.basename(img_name).rsplit('.',1)[0]+'.json'
if count <= train_num:
if osp.exists(args.output_dir + '/train/'):
shutil.copyfile(
osp.join(args.image_input_dir, img_name),
osp.join(args.output_dir + '/train/', img_name))
shutil.copyfile(
osp.join(args.json_input_dir, jsonfilename),
osp.join(args.output_dir + '/train/', jsonfilename))
else:
if count <= train_num + val_num:
if osp.exists(args.output_dir + '/val/'):
shutil.copyfile(
osp.join(args.image_input_dir, img_name),
osp.join(args.output_dir + '/val/', img_name))
shutil.copyfile(
osp.join(args.json_input_dir, jsonfilename),
osp.join(args.output_dir + '/val/', jsonfilename))
else:
if osp.exists(args.output_dir + '/test/'):
shutil.copyfile(
osp.join(args.image_input_dir, img_name),
osp.join(args.output_dir + '/test/', img_name))
shutil.copyfile(
osp.join(args.json_input_dir, jsonfilename),
osp.join(args.output_dir + '/test/', jsonfilename))
count = count + 1
# Deal with the json files.
if not os.path.exists(args.output_dir + '/annotations'):
os.makedirs(args.output_dir + '/annotations')
if args.train_proportion != 0:
train_data_coco = deal_json(args.dataset_type,
args.output_dir + '/train',
args.json_input_dir)
train_json_path = osp.join(args.output_dir + '/annotations',
'train.json')
json.dump(
train_data_coco,
open(train_json_path, 'w'),
indent=4,
cls=MyEncoder)
if args.val_proportion != 0:
val_data_coco = deal_json(args.dataset_type,
args.output_dir + '/val',
args.json_input_dir)
val_json_path = osp.join(args.output_dir + '/annotations',
'val.json')
json.dump(
val_data_coco,
open(val_json_path, 'w'),
indent=4,
cls=MyEncoder)
if args.test_proportion != 0:
test_data_coco = deal_json(args.dataset_type,
args.output_dir + '/test',
args.json_input_dir)
test_json_path = osp.join(args.output_dir + '/annotations',
'test.json')
json.dump(
test_data_coco,
open(test_json_path, 'w'),
indent=4,
cls=MyEncoder)
if __name__ == '__main__':
main()