VGG-16卷积神经网络
VGG-16模型是由牛津大学的Visual Geometry Group(VGG)在2014年提出的一种卷积神经网络(CNN)架构。
它以其深度而闻名,包括16层,其中包括13个卷积层和3个全连接层。
VGG-16以其简洁和高效而著称,能够在各种计算机视觉任务上取得出色的表现,包括图像分类和物体识别。
该模型的架构包括一系列卷积层,后跟最大池化层,深度逐渐增加。
这种设计使模型能够学习复杂的视觉特征的分层表示,从而实现强大且准确的预测。
尽管与更近期的架构相比,VGG-16相对简单,但由于其多功能性和出色的性能,仍然是许多深度学习应用的热门选择。
该模型在李飞飞的ImageNet数据集上实现了92.7%的前5名测试准确率,该数据集包含属于1000个类的1400万张图像。
电气元件图像分类Demo
目的:用于识别原理图上的电气元件分类
导入包
# 文件相关
import os
from os.path import join
import shutil
# 图像处理
from PIL import Image, ImageChops, ImageDraw, ImageFont
# 机器学习
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dense
from keras.applications import VGG16
from keras.applications.vgg16 import preprocess_input
from keras.models import load_model
import tensorflow as tf
from sklearn.model_selection import train_test_split
# 工具
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
import random
print(f"TF version {tf.__version__}")
TF version 2.13.0
# 一个物理GPU可以被划分为多个逻辑GPU,这样在训练模型时可以更有效地利用GPU资源
gpus = tf.config.experimental.list_physical_devices('GPU')
if(gpus):
try:
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu,True)
logical_gpus = tf.config.experimental.list_logical_devices('GPU')
print(len(gpus), "物理GPU,", len(logical_gpus), "逻辑GPU")
except RuntimeError as e:
print(e)
gpus
定义一个字典,包含了一系列参数用于图像处理和模型训练的参数。
# Resistor(电阻)、Capacitor(电容器)、Inductor(电感器)、 Diode(二极管)
# Corner(角点)、Junction(连接点)、Cross(交叉)、GND(地线,地电)
SPICENET_PARAMS = dict({
"src_path": "img_src",
"dataset_path": "dataset",
"resize": 224,
"offset_x": range(-30, 30, 5),
"offset_y": range(-30, 30, 5),
"rotation": range(0, 180, 90),
"zoom": range(100, 110, 1),
"letter_noise_n": 1,
"contrast_thresh": 170,
"train_test_ratio": 0.2,
"only_train_test_split": True,
"batches": 20,
"epochs": 2,
"class_list": ["Resistor",
"Capacitor",
"Inductor",
"Diode",
"Corner",
"Junction",
"Cross",
"GND"]
})
数据生成
定义了几个函数,用于创建数据集、添加噪声、缩放图像以及将数据集划分为训练集和测试集。
def create_dataset_from_original(reference_image_path: str, output_path: str):
reference_image = Image.open(reference_image_path)
i = 2
reference_image = add_letter_noise(reference_image, amount=SPICENET_PARAMS["letter_noise_n"])
for offset_value_x in SPICENET_PARAMS["offset_x"]:
for offset_value_y in SPICENET_PARAMS["offset_y"]:
offset_img = ImageChops.offset(reference_image, offset_value_x, offset_value_y)
fn = lambda x : 255 if x > SPICENET_PARAMS["contrast_thresh"] else 0
offset_img = offset_img.convert('L').point(fn, mode='1')
for rot_value in SPICENET_PARAMS["rotation"]:
new_img = offset_img.rotate(-rot_value)
for zoom_value in SPICENET_PARAMS["zoom"]:
new_img = scale_image(new_img, (0.5, 0.5), zoom_value/100)
new_img.save(output_path + str(i) + ".png", "PNG")
i += 1
def add_letter_noise(img: Image, amount=1):
components = ['R', 'R1', 'R2', 'R3', 'C', 'C1', 'C2', 'C3', 'L', 'L1', 'L2', 'L3']
font_size = 30
font = ImageFont.truetype("arial.ttf", font_size)
draw = ImageDraw.Draw(img)
for _ in range(amount):
component = random.choice(components)
x = random.randint(0, img.width - font_size)
y = random.randint(0, img.height - font_size)
color = (0, 0, 0)
draw.text((x, y), component, font=font, fill=color)
return img
def scale_image(img: Image, xy, scale_factor):
center = (img.size[0] * xy[0], img.size[1] * xy[1])
new_size = (img.size[0] / scale_factor, img.size[1] / scale_factor)
left = max (0, (int) (center[0] - new_size[0] / 2))
right = min (img.size[0], (int) (center[0] + new_size[0] / 2))
upper = max (0, (int) (center[1] - new_size[1] / 2))
lower = min (img.size[1], (int) (center[1] + new_size[1] / 2))
cropped_img = img.crop((left, upper, right, lower))
return cropped_img
def train_valid_dataset(dataset_path, train_path, test_path):
symbols = os.listdir(dataset_path)
symbols.sort()
splits = train_test_split(symbols, test_size=SPICENET_PARAMS["train_test_ratio"])
dest_paths = [train_path, test_path]
for split, dest_path in zip(splits, dest_paths):
src_data = [dataset_path+'/{}'.format(image) for image in split]
dest_data = [dest_path+'/{}'.format(image) for image in split]
for i in range(len(src_data)):
shutil.copy(src_data[i], dest_data[i])
根据参数和文件路径,创建数据集目录,并将原始数据集按照训练集和测试集进行划分。
if SPICENET_PARAMS["only_train_test_split"]:
data_dir = "./temp"
else:
data_dir = "./raw_dataset"
if os.access(data_dir, os.F_OK) == True:
shutil.rmtree(data_dir) # doesnt work
os.mkdir(data_dir)
parts = [p for p in os.listdir(SPICENET_PARAMS["src_path"]) if p.endswith((".png", ".PNG",
".jpg", ".jpeg",
".JPG", ".JPEG"))]
for part in parts:
part_path = join(data_dir, part[0])
if not os.path.exists(part_path):
os.mkdir(part_path)
output_path = join(part_path, os.path.splitext(part)[0] + "_")
reference_image_path = join(SPICENET_PARAMS["src_path"], part)
print(f"创建 '{(SPICENET_PARAMS['class_list'])[int(part[0])-1]}' 符号...")
create_dataset_from_original(reference_image_path, output_path)
dataset_path = SPICENET_PARAMS["dataset_path"]
if os.access(dataset_path, os.F_OK) == True:
shutil.rmtree(dataset_path)
os.mkdir(dataset_path)
os.mkdir(join(dataset_path, "train"))
os.mkdir(join(dataset_path, "test"))
print("创建数据集目录...")
for part in parts:
part_train_path = join(dataset_path, "train", part[0])
part_test_path = join(dataset_path, "test", part[0])
if not os.path.exists(part_train_path):
os.mkdir(part_train_path)
if not os.path.exists(part_test_path):
os.mkdir(part_test_path)
part_path = join(data_dir, part[0])
train_valid_dataset(dataset_path=part_path,
train_path=part_train_path,
test_path=part_test_path)
shutil.copytree('./validation', dataset_path+'/valid')
if SPICENET_PARAMS["only_train_test_split"]:
shutil.rmtree(data_dir)
print("Done.")
训练集共有126,687 个图片,8 个分类。
# 绘制一个包含多个电气元件符号的图像
parts = os.listdir(SPICENET_PARAMS["src_path"])
SPICENET_PARAMS["num_classes"] = len(SPICENET_PARAMS["class_list"])
fig, axs = plt.subplots(nrows=1, ncols=len(parts), figsize=(50, 50))
for i, part in enumerate(parts):
part_symbol = plt.imread(join(SPICENET_PARAMS["src_path"], part))
axs[i].imshow(part_symbol)
axs[i].title.set_text(os.path.splitext(part)[0])
# 用于显示一个包含多个文件夹(每个文件夹包含一个图像文件)的图像
path = join(SPICENET_PARAMS["dataset_path"], "train")
parts = [p for p in os.listdir(path) if os.path.isdir(join(path, p))]
fig, axs = plt.subplots(nrows=1, ncols=len(parts), figsize=(50, 50))
for i, part in enumerate(parts):
part_path = join(path, part, os.listdir(join(path, part))[0])
part_img = plt.imread(part_path)
axs[i].imshow(part_img)
axs[i].title.set_text(part)
开始训练
# 创建了一个数据生成器对象data_generator,用于生成经过预处理的图像数据
data_generator = ImageDataGenerator(
preprocessing_function=preprocess_input,
)
# 使用了之前创建的data_generator对象来生成训练数据
train_generator = data_generator.flow_from_directory(
join(SPICENET_PARAMS["dataset_path"], "train"),
target_size=(SPICENET_PARAMS["resize"], SPICENET_PARAMS["resize"]),
batch_size=SPICENET_PARAMS["batches"],
class_mode='categorical')
Found 126687 images belonging to 8 classes.
test_generator = data_generator.flow_from_directory(
join(SPICENET_PARAMS["dataset_path"], "test"),
target_size=(SPICENET_PARAMS["resize"], SPICENET_PARAMS["resize"]),
batch_size=SPICENET_PARAMS["batches"],
class_mode='categorical')
Found 89709 images belonging to 8 classes.
# Sequential 模型是 Keras 中的一种简单的模型类型,可以按顺序添加不同的层来构建深度神经网络模型。
vgg16_model = Sequential()
# 向 VGG16 模型中添加了一个预训练的 VGG16 网络作为一个层
vgg16_model.add(VGG16(
include_top=False,
pooling='avg',
weights='imagenet',
))
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
58889256/58889256 [==============================] - 3s 0us/step
# 向 vgg16_model 中添加了一个具有 num_classes 个神经元和 softmax 激活函数的全连接层。
# 接着,将 vgg16_model 的第一个层(假设这是 VGG16 模型)设置为不可训练,即冻结该层的权重参数。
# 最后,打印了 vgg16_model 的摘要信息,以查看模型的架构和参数数量。
vgg16_model.add(Dense(SPICENET_PARAMS["num_classes"], activation='softmax'))
vgg16_model.layers[0].trainable = False
vgg16_model.summary()
# 对 vgg16_model 进行了编译操作。您使用了 Adam 优化器、分类交叉熵损失函数和准确率作为评估指标。
vgg16_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
steps_per_epoch_training = len(train_generator)
steps_per_epoch_test = len(test_generator)
# 对 VGG16 模型的训练,并将训练好的模型保存到了本地文件中,以备将来使用。您可以根据 fit_history 中的记录来分析模型的训练情况,以及模型在验证集上的表现。
fit_history = vgg16_model.fit(
train_generator,
steps_per_epoch=steps_per_epoch_training,
epochs=SPICENET_PARAMS["epochs"],
validation_data=test_generator,
validation_steps=steps_per_epoch_test,
validation_batch_size=SPICENET_PARAMS["batches"],
verbose=1,
)
vgg16_model.save(f'SPICEnet.h5')
昨晚开始训练,花了6个小时。
Epoch 1/2
6335/6335 [==============================] - 11678s 2s/step - loss: 0.1048 - accuracy: 0.9743 - val_loss: 0.0142 - val_accuracy: 0.9964
Epoch 2/2
6335/6335 [==============================] - 10713s 2s/step - loss: 0.0128 - accuracy: 0.9962 - val_loss: 0.0101 - val_accuracy: 0.9972
评估
evaluate_generator = data_generator.flow_from_directory(
join(SPICENET_PARAMS["dataset_path"], "valid"),
target_size=(SPICENET_PARAMS["resize"], SPICENET_PARAMS["resize"]),
shuffle=False)
Found 8 images belonging to 8 classes.
vgg16_evaluate_history = vgg16_model.evaluate(
evaluate_generator,
verbose=1,
)
1/1 [==============================] - 0s 394ms/step - loss: 0.0060 - accuracy: 1.0000
1/1 [==============================] - 0s 394ms/step - loss: 0.0060 - accuracy: 1.0000
print("VGG16 准确率:", vgg16_evaluate_history[1])
VGG16 准确率: 1.0
使用
# 使用 VGG16 模型进行图像预测,并可视化预测结果
vgg16_predict = vgg16_model.predict(
evaluate_generator
)
vgg16_predict
result_index = [SPICENET_PARAMS["class_list"][s] for s in np.argmax(vgg16_predict,axis=1)]
result_index
batch = evaluate_generator.next()
batch_images = evaluate_generator.next()[0]
fig, axs = plt.subplots(nrows=1, ncols=len(SPICENET_PARAMS["class_list"]), figsize=(40, 5))
ind = 0
for ax1 in axs:
image_data = batch_images[ind]
ax1.imshow(image_data, vmin=0, vmax=255)
ax1.title.set_text(result_index[ind])
ind += 1
fig.suptitle('电气符号图像测试')
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.show()