VGG-16卷积神经网络

VGG-16模型是由牛津大学的Visual Geometry Group（VGG）在2014年提出的一种卷积神经网络（CNN）架构。

它以其深度而闻名，包括16层，其中包括13个卷积层和3个全连接层。

VGG-16以其简洁和高效而著称，能够在各种计算机视觉任务上取得出色的表现，包括图像分类和物体识别。

该模型的架构包括一系列卷积层，后跟最大池化层，深度逐渐增加。

这种设计使模型能够学习复杂的视觉特征的分层表示，从而实现强大且准确的预测。

尽管与更近期的架构相比，VGG-16相对简单，但由于其多功能性和出色的性能，仍然是许多深度学习应用的热门选择。

该模型在李飞飞的ImageNet数据集上实现了92.7%的前5名测试准确率，该数据集包含属于1000个类的1400万张图像。

电气元件图像分类Demo

目的：用于识别原理图上的电气元件分类

导入包

# 文件相关
import os
from os.path import join
import shutil

# 图像处理
from PIL import Image, ImageChops, ImageDraw, ImageFont

# 机器学习
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dense
from keras.applications import VGG16
from keras.applications.vgg16 import preprocess_input
from keras.models import load_model
import tensorflow as tf
from sklearn.model_selection import train_test_split

# 工具
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
import random

print(f"TF version {tf.__version__}")
TF version 2.13.0

# 一个物理GPU可以被划分为多个逻辑GPU，这样在训练模型时可以更有效地利用GPU资源
gpus = tf.config.experimental.list_physical_devices('GPU')
if(gpus):
    try:
        for gpu in gpus:            
            tf.config.experimental.set_memory_growth(gpu,True)
            logical_gpus = tf.config.experimental.list_logical_devices('GPU')
            print(len(gpus), "物理GPU,", len(logical_gpus), "逻辑GPU")
    except RuntimeError as e:
        print(e)
gpus

定义一个字典，包含了一系列参数用于图像处理和模型训练的参数。

# Resistor（电阻）、Capacitor（电容器）、Inductor（电感器）、 Diode（二极管）
# Corner（角点）、Junction（连接点）、Cross（交叉）、GND（地线，地电）
SPICENET_PARAMS = dict({
    "src_path": "img_src",
    "dataset_path": "dataset",
    "resize": 224,
    "offset_x": range(-30, 30, 5),
    "offset_y": range(-30, 30, 5),
    "rotation": range(0, 180, 90),
    "zoom": range(100, 110, 1),
    "letter_noise_n": 1,
    "contrast_thresh": 170,
    "train_test_ratio": 0.2,
    "only_train_test_split": True,
    "batches": 20,
    "epochs": 2,
    "class_list": ["Resistor",
                   "Capacitor",
                   "Inductor",
                   "Diode",
                   "Corner",
                   "Junction",
                   "Cross",
                   "GND"]
})

数据生成

定义了几个函数，用于创建数据集、添加噪声、缩放图像以及将数据集划分为训练集和测试集。

def create_dataset_from_original(reference_image_path: str, output_path: str):
    reference_image = Image.open(reference_image_path)
    i = 2
    reference_image = add_letter_noise(reference_image, amount=SPICENET_PARAMS["letter_noise_n"])
    for offset_value_x in SPICENET_PARAMS["offset_x"]:
        for offset_value_y in SPICENET_PARAMS["offset_y"]:
            offset_img = ImageChops.offset(reference_image, offset_value_x, offset_value_y)
            fn = lambda x : 255 if x > SPICENET_PARAMS["contrast_thresh"] else 0
            offset_img = offset_img.convert('L').point(fn, mode='1')
            for rot_value in SPICENET_PARAMS["rotation"]:
                new_img = offset_img.rotate(-rot_value)
                for zoom_value in SPICENET_PARAMS["zoom"]:
                    new_img = scale_image(new_img, (0.5, 0.5), zoom_value/100)
                    new_img.save(output_path + str(i) + ".png", "PNG")
                    i += 1

def add_letter_noise(img: Image, amount=1):
    components = ['R', 'R1', 'R2', 'R3', 'C', 'C1', 'C2', 'C3', 'L', 'L1', 'L2', 'L3']
    font_size = 30
    font = ImageFont.truetype("arial.ttf", font_size)
    draw = ImageDraw.Draw(img)

    for _ in range(amount):
        component = random.choice(components)
        x = random.randint(0, img.width - font_size)
        y = random.randint(0, img.height - font_size)
        color = (0, 0, 0)
        draw.text((x, y), component, font=font, fill=color)

    return img

def scale_image(img: Image, xy, scale_factor):
    center = (img.size[0] * xy[0], img.size[1] * xy[1])
    new_size = (img.size[0] / scale_factor, img.size[1] / scale_factor)
    left = max (0, (int) (center[0] - new_size[0] / 2))
    right = min (img.size[0], (int) (center[0] + new_size[0] / 2))
    upper = max (0, (int) (center[1] - new_size[1] / 2))
    lower = min (img.size[1], (int) (center[1] + new_size[1] / 2))
    cropped_img = img.crop((left, upper, right, lower))
    return cropped_img

def train_valid_dataset(dataset_path, train_path, test_path):
    symbols = os.listdir(dataset_path)
    symbols.sort()
    splits = train_test_split(symbols, test_size=SPICENET_PARAMS["train_test_ratio"])
    dest_paths = [train_path, test_path]

    for split, dest_path in zip(splits, dest_paths):
        src_data = [dataset_path+'/{}'.format(image) for image in split]
        dest_data = [dest_path+'/{}'.format(image) for image in split]
        for i in range(len(src_data)):
            shutil.copy(src_data[i], dest_data[i])

根据参数和文件路径，创建数据集目录，并将原始数据集按照训练集和测试集进行划分。

if SPICENET_PARAMS["only_train_test_split"]:
    data_dir = "./temp"
else:
    data_dir = "./raw_dataset"

if os.access(data_dir, os.F_OK) == True:
    shutil.rmtree(data_dir) # doesnt work

os.mkdir(data_dir)
parts = [p for p in os.listdir(SPICENET_PARAMS["src_path"]) if p.endswith((".png", ".PNG", 
                                                                           ".jpg", ".jpeg", 
                                                                           ".JPG", ".JPEG"))]
for part in parts:
    part_path = join(data_dir, part[0])
    if not os.path.exists(part_path):
        os.mkdir(part_path)
    output_path = join(part_path, os.path.splitext(part)[0] + "_")
    reference_image_path = join(SPICENET_PARAMS["src_path"], part)
    print(f"创建 '{(SPICENET_PARAMS['class_list'])[int(part[0])-1]}' 符号...")
    create_dataset_from_original(reference_image_path, output_path)

dataset_path = SPICENET_PARAMS["dataset_path"]
if os.access(dataset_path, os.F_OK) == True:
    shutil.rmtree(dataset_path)

os.mkdir(dataset_path)
os.mkdir(join(dataset_path, "train"))
os.mkdir(join(dataset_path, "test"))

print("创建数据集目录...")
for part in parts:
    part_train_path = join(dataset_path, "train", part[0])
    part_test_path = join(dataset_path, "test", part[0])
    if not os.path.exists(part_train_path):
        os.mkdir(part_train_path)
    if not os.path.exists(part_test_path):
        os.mkdir(part_test_path)
    part_path = join(data_dir, part[0])
    train_valid_dataset(dataset_path=part_path,
                        train_path=part_train_path,
                        test_path=part_test_path)

shutil.copytree('./validation', dataset_path+'/valid')
if SPICENET_PARAMS["only_train_test_split"]:
    shutil.rmtree(data_dir)
print("Done.")

训练集共有126,687 个图片，8 个分类。

# 绘制一个包含多个电气元件符号的图像
parts = os.listdir(SPICENET_PARAMS["src_path"])
SPICENET_PARAMS["num_classes"] = len(SPICENET_PARAMS["class_list"])
fig, axs = plt.subplots(nrows=1, ncols=len(parts), figsize=(50, 50))

for i, part in enumerate(parts):
    part_symbol = plt.imread(join(SPICENET_PARAMS["src_path"], part))
    axs[i].imshow(part_symbol)
    axs[i].title.set_text(os.path.splitext(part)[0])

# 用于显示一个包含多个文件夹（每个文件夹包含一个图像文件）的图像
path = join(SPICENET_PARAMS["dataset_path"], "train")
parts = [p for p in os.listdir(path) if os.path.isdir(join(path, p))]
fig, axs = plt.subplots(nrows=1, ncols=len(parts), figsize=(50, 50))

for i, part in enumerate(parts):
    part_path = join(path, part, os.listdir(join(path, part))[0])
    part_img = plt.imread(part_path)
    axs[i].imshow(part_img)
    axs[i].title.set_text(part)

开始训练

# 创建了一个数据生成器对象data_generator，用于生成经过预处理的图像数据
data_generator = ImageDataGenerator(
    preprocessing_function=preprocess_input,
)

# 使用了之前创建的data_generator对象来生成训练数据
train_generator = data_generator.flow_from_directory(
    join(SPICENET_PARAMS["dataset_path"], "train"),
    target_size=(SPICENET_PARAMS["resize"], SPICENET_PARAMS["resize"]),
    batch_size=SPICENET_PARAMS["batches"],
    class_mode='categorical')
Found 126687 images belonging to 8 classes.

test_generator = data_generator.flow_from_directory(
    join(SPICENET_PARAMS["dataset_path"], "test"),
    target_size=(SPICENET_PARAMS["resize"], SPICENET_PARAMS["resize"]),
    batch_size=SPICENET_PARAMS["batches"],
    class_mode='categorical')
Found 89709 images belonging to 8 classes.

# Sequential 模型是 Keras 中的一种简单的模型类型，可以按顺序添加不同的层来构建深度神经网络模型。
vgg16_model = Sequential()

# 向 VGG16 模型中添加了一个预训练的 VGG16 网络作为一个层
vgg16_model.add(VGG16(
    include_top=False,
    pooling='avg',
    weights='imagenet',
    ))

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
58889256/58889256 [==============================] - 3s 0us/step

# 向 vgg16_model 中添加了一个具有 num_classes 个神经元和 softmax 激活函数的全连接层。
# 接着，将 vgg16_model 的第一个层（假设这是 VGG16 模型）设置为不可训练，即冻结该层的权重参数。
# 最后，打印了 vgg16_model 的摘要信息，以查看模型的架构和参数数量。
vgg16_model.add(Dense(SPICENET_PARAMS["num_classes"], activation='softmax'))
vgg16_model.layers[0].trainable = False
vgg16_model.summary()

# 对 vgg16_model 进行了编译操作。您使用了 Adam 优化器、分类交叉熵损失函数和准确率作为评估指标。
vgg16_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

steps_per_epoch_training = len(train_generator)
steps_per_epoch_test = len(test_generator)

# 对 VGG16 模型的训练，并将训练好的模型保存到了本地文件中，以备将来使用。您可以根据 fit_history 中的记录来分析模型的训练情况，以及模型在验证集上的表现。
fit_history = vgg16_model.fit(
    train_generator,
    steps_per_epoch=steps_per_epoch_training,
    epochs=SPICENET_PARAMS["epochs"],
    validation_data=test_generator,
    validation_steps=steps_per_epoch_test,
    validation_batch_size=SPICENET_PARAMS["batches"],
    verbose=1,
)
vgg16_model.save(f'SPICEnet.h5')

昨晚开始训练，花了6个小时。

Epoch 1/2
6335/6335 [==============================] - 11678s 2s/step - loss: 0.1048 - accuracy: 0.9743 - val_loss: 0.0142 - val_accuracy: 0.9964
Epoch 2/2
6335/6335 [==============================] - 10713s 2s/step - loss: 0.0128 - accuracy: 0.9962 - val_loss: 0.0101 - val_accuracy: 0.9972

评估

evaluate_generator = data_generator.flow_from_directory(
    join(SPICENET_PARAMS["dataset_path"], "valid"),
    target_size=(SPICENET_PARAMS["resize"], SPICENET_PARAMS["resize"]),
    shuffle=False)
Found 8 images belonging to 8 classes.

vgg16_evaluate_history = vgg16_model.evaluate(
    evaluate_generator,
    verbose=1,
)
1/1 [==============================] - 0s 394ms/step - loss: 0.0060 - accuracy: 1.0000
1/1 [==============================] - 0s 394ms/step - loss: 0.0060 - accuracy: 1.0000

print("VGG16 准确率:", vgg16_evaluate_history[1])
VGG16 准确率: 1.0

使用

# 使用 VGG16 模型进行图像预测，并可视化预测结果
vgg16_predict = vgg16_model.predict(
    evaluate_generator
)
vgg16_predict

result_index = [SPICENET_PARAMS["class_list"][s] for s in np.argmax(vgg16_predict,axis=1)]
result_index

batch = evaluate_generator.next()
batch_images = evaluate_generator.next()[0]

fig, axs = plt.subplots(nrows=1, ncols=len(SPICENET_PARAMS["class_list"]), figsize=(40, 5))
ind = 0
for ax1 in axs:
    image_data = batch_images[ind]
    ax1.imshow(image_data, vmin=0, vmax=255)
    ax1.title.set_text(result_index[ind])
    ind += 1

fig.suptitle('电气符号图像测试') 
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.show()

玖叶教程网

前端编程开发入门

VGG-16学习 & 识别原理图上的电气元件分类

VGG-16卷积神经网络

电气元件图像分类Demo

数据生成

开始训练

评估

使用

玖叶教程网

前端编程开发入门

VGG-16学习 &amp; 识别原理图上的电气元件分类

VGG-16卷积神经网络

电气元件图像分类Demo

数据生成

开始训练

评估

使用

VGG-16学习 & 识别原理图上的电气元件分类