「感知器」是一种最简单的人工神经网络,它的激活函数通常是 符号函数 或 阈值函数,这类激活函数是不可导的。为了计算和推导的方便,人们引入了 Sigmoid 函数,其中最著名的是 Logistic 函数。通常写程序的时候加入一个alpha参数,通过调整这个参数来调整函数f(x)趋向于1或0的速度。
1 f(x) = ------------------ 1 + exp(-alpha * x) alpha * exp(-alpha * x ) f'(x) = ---------------------------- (1 + exp(-alpha * x))^2 = alpha * f(x) * (1 - f(x))
今天我们介绍的 Delta 学习规则就是采用这样的激活函数,通过梯度下降的方式来调整权值和阈值,以使得网络总误差最小。
需要注意的是,这篇图文是基于以往两篇图文的,在往下看之前最好复习一下。
- [如何利用 C# 对神经网络模型进行抽象?]
- [如何利用 C# 实现神经网络的感知器模型?]
Delta学习规则
该学习规则用于训练具有连续激活功能的神经元的单层神经网络,常用的激活函数是 Sigmoid 函数。详细介绍可以参见维基百科相应部分。
https://en.wikipedia.org/wiki/Delta_rule
Sigmoid 函数
详细介绍可以参见维基百科相应部分。
https://en.wikipedia.org/wiki/Sigmoid_function
通过以上的介绍,大家对 Delta 学习规则应该有了基本了解,下面我们进行代码实现以及具体应用。
1. 实现神经元的激活函数 IActivationFunction。
public class SigmoidFunction : IActivationFunction { // alpha值,用于控制函数的光滑程度 public double Alpha { get; set; } = 2; // 构造函数 public SigmoidFunction() { } // 构造函数 public SigmoidFunction(double alpha) { Alpha = alpha; } // 计算输出 public double Function(double x) { return 1/(1 + Math.Exp(-Alpha*x)); } // 求导数1 public double Derivative(double x) { double y = Function(x); return Alpha*y*(1 - y); } // 求导数2 public double Derivative2(double y) { return Alpha*y*(1 - y); } }
2. 实现监督学习算法 ISupervisedLearning。
// Delta 学习规则 public class DeltaRuleLearning : ISupervisedLearning { private readonly ActivationNetwork _network; private double _learningRate = 0.1; // 学习率 0到1 public double LearningRate { get { return _learningRate; } set { _learningRate = Math.Max(0.0, Math.Min(1.0, value)); } } // 构造函数 public DeltaRuleLearning(ActivationNetwork network) { if (network.Layers.Length != 1) { throw new ArgumentException("无效的神经网络,它应该只有一层。"); } _network = network; } // 调整权值阈值,返回误差。 public double Run(double[] input, double[] output) { double[] networkOutput = _network.Compute(input); Layer layer = _network.Layers[0]; double error = 0.0; for (int j = 0; j < layer.Neurons.Length; j++) { ActivationNeuron neuron = layer.Neurons[j] as ActivationNeuron; if (neuron == null) throw new Exception("神经元为null。"); double e = output[j] - networkOutput[j]; double functionDerivative = neuron.ActivationFunction.Derivative2(networkOutput[j]); for (int i = 0; i < neuron.Weights.Length; i++) { neuron.Weights[i] += _learningRate * e * functionDerivative * input[i]; } neuron.Threshold += _learningRate * e * functionDerivative; error += e * e; } return error / 2; } // 训练神经网络,返回总体误差 public double RunEpoch(double[][] input, double[][] output) { double error = 0.0; for (int i = 0, n = input.Length; i < n; i++) { error += Run(input[i], output[i]); } return error; } }
3. Delta 学习规则的应用。
首先,我们利用 Delta 学习规则解决 And 问题。
double[][] inputs = new double[4][]; double[][] outputs = new double[4][]; //(0,0);(0,1);(1,0) inputs[0] = new double[] {0, 0}; inputs[1] = new double[] {0, 1}; inputs[2] = new double[] {1, 0}; outputs[0] = new double[] {0}; outputs[1] = new double[] {0}; outputs[2] = new double[] {0}; //(1,1) inputs[3] = new double[] {1, 1}; outputs[3] = new double[] {1}; ActivationNetwork network = new ActivationNetwork( new SigmoidFunction(), 2, 1); DeltaRuleLearning teacher = new DeltaRuleLearning(network); teacher.LearningRate = 0.1; int iteration = 1; while (true) { double error = teacher.RunEpoch(inputs, outputs)/4; Console.WriteLine(@"迭代次数:{0},错误率:{1}", iteration, error); if (error <= 0.1 || iteration >= 1000) break; iteration++; } Console.WriteLine(); ActivationNeuron neuron = network.Layers[0].Neurons[0] as ActivationNeuron; Console.WriteLine(@"Weight 1:{0}", neuron.Weights[0].ToString("F3")); Console.WriteLine(@"Weight 2:{0}", neuron.Weights[1].ToString("F3")); Console.WriteLine(@"Threshold:{0}", neuron.Threshold.ToString("F3"));
训练结果如下:
其次,我们利用 Delta 学习规则解决 Or 问题。
double[][] inputs = new double[4][]; double[][] outputs = new double[4][]; //(0,0) inputs[0] = new double[] {0, 0}; outputs[0] = new double[] {0}; //(1,1);(0,1);(1,0) inputs[1] = new double[] {0, 1}; inputs[2] = new double[] {1, 0}; inputs[3] = new double[] {1, 1}; outputs[1] = new double[] {1}; outputs[2] = new double[] {1}; outputs[3] = new double[] {1}; ActivationNetwork network = new ActivationNetwork( new SigmoidFunction(), 2, 1); DeltaRuleLearning teacher = new DeltaRuleLearning(network); teacher.LearningRate = 0.1; int iteration = 1; while (true) { double error = teacher.RunEpoch(inputs, outputs)/4; Console.WriteLine(@"迭代次数:{0},错误率:{1}", iteration, error); if (error <= 0.06 || iteration >= 1000) break; iteration++; } Console.WriteLine(); ActivationNeuron neuron = network.Layers[0].Neurons[0] as ActivationNeuron; Console.WriteLine(@"Weight 1:{0}", neuron.Weights[0].ToString("F3")); Console.WriteLine(@"Weight 2:{0}", neuron.Weights[1].ToString("F3")); Console.WriteLine(@"Threshold:{0}", neuron.Threshold.ToString("F3"));
训练结果如下:
接着,我们利用 Delta 学习规则处理一个通常的二分类问题。
double[][] inputs = new double[8][]; double[][] outputs = new double[8][]; //(0,0,0);(0,0,1);(0,1,0);(0,1,1) inputs[0] = new double[] {0, 0, 0}; inputs[1] = new double[] {0, 0, 1}; inputs[2] = new double[] {0, 1, 0}; inputs[3] = new double[] {0, 1, 1}; outputs[0] = new double[] {0}; outputs[1] = new double[] {0}; outputs[2] = new double[] {0}; outputs[3] = new double[] {0}; //(1,0,0);(1,0,1);(1,1,0);(1,1,1) inputs[4] = new double[] {1, 0, 0}; inputs[5] = new double[] {1, 0, 1}; inputs[6] = new double[] {1, 1, 0}; inputs[7] = new double[] {1, 1, 1}; outputs[4] = new double[] {1}; outputs[5] = new double[] {1}; outputs[6] = new double[] {1}; outputs[7] = new double[] {1}; ActivationNetwork network = new ActivationNetwork( new SigmoidFunction(), 3, 1); DeltaRuleLearning teacher = new DeltaRuleLearning(network); teacher.LearningRate = 0.1; int iteration = 1; while (true) { double error = teacher.RunEpoch(inputs, outputs)/8; Console.WriteLine(@"迭代次数:{0},错误率:{1}", iteration, error); if (error <= 0.1 || iteration >= 1000) break; iteration++; } Console.WriteLine(); ActivationNeuron neuron = network.Layers[0].Neurons[0] as ActivationNeuron; Console.WriteLine(@"Weight 1:{0}", neuron.Weights[0].ToString("F3")); Console.WriteLine(@"Weight 2:{0}", neuron.Weights[1].ToString("F3")); Console.WriteLine(@"Threshold:{0}", neuron.Threshold.ToString("F3"));
训练结果如下:
最后,我们利用 Delta 学习规则处理一个稍微复杂的多分类问题。
double[][] inputs = new double[15][]; double[][] outputs = new double[15][]; //(0.1,0.1);(0.2,0.3);(0.3,0.4);(0.1,0.3);(0.2,0.5) inputs[0] = new double[] {0.1, 0.1}; inputs[1] = new double[] {0.2, 0.3}; inputs[2] = new double[] {0.3, 0.4}; inputs[3] = new double[] {0.1, 0.3}; inputs[4] = new double[] {0.2, 0.5}; outputs[0] = new double[] {1, 0, 0}; outputs[1] = new double[] {1, 0, 0}; outputs[2] = new double[] {1, 0, 0}; outputs[3] = new double[] {1, 0, 0}; outputs[4] = new double[] {1, 0, 0}; //(0.1,1.0);(0.2,1.1);(0.3,0.9);(0.4,0.8);(0.2,0.9) inputs[5] = new double[] {0.1, 1.0}; inputs[6] = new double[] {0.2, 1.1}; inputs[7] = new double[] {0.3, 0.9}; inputs[8] = new double[] {0.4, 0.8}; inputs[9] = new double[] {0.2, 0.9}; outputs[5] = new double[] {0, 1, 0}; outputs[6] = new double[] {0, 1, 0}; outputs[7] = new double[] {0, 1, 0}; outputs[8] = new double[] {0, 1, 0}; outputs[9] = new double[] {0, 1, 0}; //(1.0,0.4);(0.9,0.5);(0.8,0.6);(0.9,0.4);(1.0,0.5) inputs[10] = new double[] {1.0, 0.4}; inputs[11] = new double[] {0.9, 0.5}; inputs[12] = new double[] {0.8, 0.6}; inputs[13] = new double[] {0.9, 0.4}; inputs[14] = new double[] {1.0, 0.5}; outputs[10] = new double[] {0, 0, 1}; outputs[11] = new double[] {0, 0, 1}; outputs[12] = new double[] {0, 0, 1}; outputs[13] = new double[] {0, 0, 1}; outputs[14] = new double[] {0, 0, 1}; ActivationNetwork network = new ActivationNetwork( new SigmoidFunction(), 2, 3); DeltaRuleLearning teacher = new DeltaRuleLearning(network); teacher.LearningRate = 0.1; int iteration = 1; while (true) { double error = teacher.RunEpoch(inputs, outputs)/15; Console.WriteLine(@"迭代次数:{0},错误率:{1}", iteration, error); if (error <= 0.05 || iteration >= 1000) break; iteration++; } Console.WriteLine(); ActivationLayer layer = network.Layers[0] as ActivationLayer; for (int i = 0; i < 3; i++) { Console.WriteLine(@"神经元:{0}", i + 1); Console.WriteLine(@"Weight 1:{0}", layer.Neurons[i].Weights[0]); Console.WriteLine(@"Weight 2:{0}", layer.Neurons[i].Weights[1]); Console.WriteLine(@"Threshold:{0}", ((ActivationNeuron) layer.Neurons[i]).Threshold); }
训练结果如下:
到此为止,有关 Delta 学习规则的神经网络实现就全部介绍完了。后面我计划把 BP 和 SOM 神经网络写完就结束这个系列。还是我一直强调的观点,在学习阶段最好是自己动手来具体实现,到了真正工程应用的时候再来用 Python 或 Matlab 这样的工具,调 Package 配 Parameter 啊!当然写论文就无所谓了,可以编啊!
今天就到这里吧!See You!