1 | #region License Information
|
---|
2 | /* HeuristicLab
|
---|
3 | * Copyright (C) 2002-2013 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
|
---|
4 | *
|
---|
5 | * This file is part of HeuristicLab.
|
---|
6 | *
|
---|
7 | * HeuristicLab is free software: you can redistribute it and/or modify
|
---|
8 | * it under the terms of the GNU General Public License as published by
|
---|
9 | * the Free Software Foundation, either version 3 of the License, or
|
---|
10 | * (at your option) any later version.
|
---|
11 | *
|
---|
12 | * HeuristicLab is distributed in the hope that it will be useful,
|
---|
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
15 | * GNU General Public License for more details.
|
---|
16 | *
|
---|
17 | * You should have received a copy of the GNU General Public License
|
---|
18 | * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
|
---|
19 | *
|
---|
20 | *
|
---|
21 | * Source based on
|
---|
22 | * http://crsouza.blogspot.co.at/2010/04/kernel-support-vector-machines-for.html,
|
---|
23 | * licensed under GNU GPL
|
---|
24 | *
|
---|
25 | */
|
---|
26 | #endregion
|
---|
27 |
|
---|
28 | using System;
|
---|
29 | using System.Collections.Generic;
|
---|
30 |
|
---|
31 | namespace HeuristicLab.Analysis.AlgorithmBehavior.Analyzers {
|
---|
32 | public interface IKernel {
|
---|
33 | double Function(double[] x, double[] y);
|
---|
34 | }
|
---|
35 |
|
---|
36 | public class LinearKernel : IKernel {
|
---|
37 | public double Function(double[] x, double[] y) {
|
---|
38 | double sum = 0.0;
|
---|
39 | for (int i = 0; i < x.Length; i++) {
|
---|
40 | sum += x[i] * y[i];
|
---|
41 | }
|
---|
42 | return sum;
|
---|
43 | }
|
---|
44 | }
|
---|
45 |
|
---|
46 | /// <summary>
|
---|
47 | /// Sparse Linear Support Vector Machine (SVM)
|
---|
48 | /// </summary>
|
---|
49 | public class SupportVectorMachine {
|
---|
50 |
|
---|
51 | private int inputCount;
|
---|
52 | private double[][] supportVectors;
|
---|
53 | private double[] weights;
|
---|
54 | private double threshold;
|
---|
55 |
|
---|
56 | /// <summary>
|
---|
57 | /// Creates a new Support Vector Machine
|
---|
58 | /// </summary>
|
---|
59 | public SupportVectorMachine(int inputs) {
|
---|
60 | this.inputCount = inputs;
|
---|
61 | }
|
---|
62 |
|
---|
63 | /// <summary>
|
---|
64 | /// Gets the number of inputs accepted by this SVM.
|
---|
65 | /// </summary>
|
---|
66 | public int Inputs {
|
---|
67 | get { return inputCount; }
|
---|
68 | }
|
---|
69 |
|
---|
70 | /// <summary>
|
---|
71 | /// Gets or sets the collection of support vectors used by this machine.
|
---|
72 | /// </summary>
|
---|
73 | public double[][] SupportVectors {
|
---|
74 | get { return supportVectors; }
|
---|
75 | set { supportVectors = value; }
|
---|
76 | }
|
---|
77 |
|
---|
78 | /// <summary>
|
---|
79 | /// Gets or sets the collection of weights used by this machine.
|
---|
80 | /// </summary>
|
---|
81 | public double[] Weights {
|
---|
82 | get { return weights; }
|
---|
83 | set { weights = value; }
|
---|
84 | }
|
---|
85 |
|
---|
86 | /// <summary>
|
---|
87 | /// Gets or sets the threshold (bias) term for this machine.
|
---|
88 | /// </summary>
|
---|
89 | public double Threshold {
|
---|
90 | get { return threshold; }
|
---|
91 | set { threshold = value; }
|
---|
92 | }
|
---|
93 |
|
---|
94 | /// <summary>
|
---|
95 | /// Computes the given input to produce the corresponding output.
|
---|
96 | /// </summary>
|
---|
97 | /// <param name="input">An input vector.</param>
|
---|
98 | /// <returns>The ouput for the given input.</returns>
|
---|
99 | public virtual double Compute(double[] input) {
|
---|
100 | double s = threshold;
|
---|
101 | for (int i = 0; i < supportVectors.Length; i++) {
|
---|
102 | double p = 0;
|
---|
103 | for (int j = 0; j < input.Length; j++)
|
---|
104 | p += supportVectors[i][j] * input[j];
|
---|
105 |
|
---|
106 | s += weights[i] * p;
|
---|
107 | }
|
---|
108 |
|
---|
109 | return s;
|
---|
110 | }
|
---|
111 |
|
---|
112 | /// <summary>
|
---|
113 | /// Computes the given inputs to produce the corresponding outputs.
|
---|
114 | /// </summary>
|
---|
115 | public double[] Compute(double[][] inputs) {
|
---|
116 | double[] outputs = new double[inputs.Length];
|
---|
117 |
|
---|
118 | for (int i = 0; i < inputs.Length; i++)
|
---|
119 | outputs[i] = Compute(inputs[i]);
|
---|
120 |
|
---|
121 | return outputs;
|
---|
122 | }
|
---|
123 |
|
---|
124 | }
|
---|
125 |
|
---|
126 |
|
---|
127 | /// <summary>
|
---|
128 | /// Sparse Kernel Support Vector Machine (kSVM)
|
---|
129 | /// </summary>
|
---|
130 | /// <remarks>
|
---|
131 | /// <para>
|
---|
132 | /// The original optimal hyperplane algorithm (SVM) proposed by Vladimir Vapnik in 1963 was a
|
---|
133 | /// linear classifier. However, in 1992, Bernhard Boser, Isabelle Guyon and Vapnik suggested
|
---|
134 | /// a way to create non-linear classifiers by applying the kernel trick (originally proposed
|
---|
135 | /// by Aizerman et al.) to maximum-margin hyperplanes. The resulting algorithm is formally
|
---|
136 | /// similar, except that every dot product is replaced by a non-linear kernel function.</para>
|
---|
137 | /// <para>
|
---|
138 | /// This allows the algorithm to fit the maximum-margin hyperplane in a transformed feature space.
|
---|
139 | /// The transformation may be non-linear and the transformed space high dimensional; thus though
|
---|
140 | /// the classifier is a hyperplane in the high-dimensional feature space, it may be non-linear in
|
---|
141 | /// the original input space.</para>
|
---|
142 | /// <para>
|
---|
143 | /// References:
|
---|
144 | /// <list type="bullet">
|
---|
145 | /// <item><description><a href="http://en.wikipedia.org/wiki/Support_vector_machine">
|
---|
146 | /// http://en.wikipedia.org/wiki/Support_vector_machine</a></description></item>
|
---|
147 | /// <item><description><a href="http://www.kernel-machines.org/">
|
---|
148 | /// http://www.kernel-machines.org/</a></description></item>
|
---|
149 | /// </list></para>
|
---|
150 | /// </remarks>
|
---|
151 | ///
|
---|
152 | /// <example>
|
---|
153 | /// <code>
|
---|
154 | /// // Example XOR problem
|
---|
155 | /// double[][] inputs =
|
---|
156 | /// {
|
---|
157 | /// new double[] { 0, 0 }, // 0 xor 0: 1 (label +1)
|
---|
158 | /// new double[] { 0, 1 }, // 0 xor 1: 0 (label -1)
|
---|
159 | /// new double[] { 1, 0 }, // 1 xor 0: 0 (label -1)
|
---|
160 | /// new double[] { 1, 1 } // 1 xor 1: 1 (label +1)
|
---|
161 | /// };
|
---|
162 | ///
|
---|
163 | /// // Dichotomy SVM outputs should be given as [-1;+1]
|
---|
164 | /// int[] labels =
|
---|
165 | /// {
|
---|
166 | /// // 1, 0, 0, 1
|
---|
167 | /// 1, -1, -1, 1
|
---|
168 | /// };
|
---|
169 | ///
|
---|
170 | /// // Create a Kernel Support Vector Machine for the given inputs
|
---|
171 | /// KernelSupportVectorMachine machine = new KernelSupportVectorMachine(new Gaussian(0.1), inputs[0].Length);
|
---|
172 | ///
|
---|
173 | /// // Instantiate a new learning algorithm for SVMs
|
---|
174 | /// SequentialMinimalOptimization smo = new SequentialMinimalOptimization(machine, inputs, labels);
|
---|
175 | ///
|
---|
176 | /// // Set up the learning algorithm
|
---|
177 | /// smo.Complexity = 1.0;
|
---|
178 | ///
|
---|
179 | /// // Run the learning algorithm
|
---|
180 | /// double error = smo.Run();
|
---|
181 | ///
|
---|
182 | /// // Compute the decision output for one of the input vectors
|
---|
183 | /// int decision = System.Math.Sign(svm.Compute(inputs[0]));
|
---|
184 | /// </code>
|
---|
185 | /// </example>
|
---|
186 | ///
|
---|
187 | public class KernelSupportVectorMachine : SupportVectorMachine {
|
---|
188 |
|
---|
189 | private IKernel kernel;
|
---|
190 |
|
---|
191 |
|
---|
192 | /// <summary>
|
---|
193 | /// Creates a new Kernel Support Vector Machine.
|
---|
194 | /// </summary>
|
---|
195 | ///
|
---|
196 | /// <param name="kernel">The chosen kernel for the machine.</param>
|
---|
197 | /// <param name="inputs">The number of inputs for the machine.</param>
|
---|
198 | ///
|
---|
199 | /// <remarks>
|
---|
200 | /// If the number of inputs is zero, this means the machine
|
---|
201 | /// accepts a indefinite number of inputs. This is often the
|
---|
202 | /// case for kernel vector machines using a sequence kernel.
|
---|
203 | /// </remarks>
|
---|
204 | ///
|
---|
205 | public KernelSupportVectorMachine(IKernel kernel, int inputs)
|
---|
206 | : base(inputs) {
|
---|
207 | if (kernel == null) throw new ArgumentNullException("kernel");
|
---|
208 |
|
---|
209 | this.kernel = kernel;
|
---|
210 | }
|
---|
211 |
|
---|
212 | /// <summary>
|
---|
213 | /// Gets or sets the kernel used by this machine.
|
---|
214 | /// </summary>
|
---|
215 | ///
|
---|
216 | public IKernel Kernel {
|
---|
217 | get { return kernel; }
|
---|
218 | set { kernel = value; }
|
---|
219 | }
|
---|
220 |
|
---|
221 | /// <summary>
|
---|
222 | /// Computes the given input to produce the corresponding output.
|
---|
223 | /// </summary>
|
---|
224 | ///
|
---|
225 | /// <remarks>
|
---|
226 | /// For a binary decision problem, the decision for the negative
|
---|
227 | /// or positive class is typically computed by taking the sign of
|
---|
228 | /// the machine's output.
|
---|
229 | /// </remarks>
|
---|
230 | ///
|
---|
231 | /// <param name="inputs">An input vector.</param>
|
---|
232 | /// <returns>The output for the given input.</returns>
|
---|
233 | ///
|
---|
234 | public override double Compute(double[] inputs) {
|
---|
235 | double s = Threshold;
|
---|
236 |
|
---|
237 | for (int i = 0; i < SupportVectors.Length; i++)
|
---|
238 | s += Weights[i] * kernel.Function(SupportVectors[i], inputs);
|
---|
239 |
|
---|
240 | return s;
|
---|
241 | }
|
---|
242 | }
|
---|
243 |
|
---|
244 |
|
---|
245 |
|
---|
246 |
|
---|
247 |
|
---|
248 |
|
---|
249 |
|
---|
250 |
|
---|
251 |
|
---|
252 |
|
---|
253 |
|
---|
254 |
|
---|
255 | /// <summary>
|
---|
256 | /// Sequential Minimal Optimization (SMO) Algorithm
|
---|
257 | /// </summary>
|
---|
258 | ///
|
---|
259 | /// <remarks>
|
---|
260 | /// <para>
|
---|
261 | /// The SMO algorithm is an algorithm for solving large quadratic programming (QP)
|
---|
262 | /// optimization problems, widely used for the training of support vector machines.
|
---|
263 | /// First developed by John C. Platt in 1998, SMO breaks up large QP problems into
|
---|
264 | /// a series of smallest possible QP problems, which are then solved analytically.</para>
|
---|
265 | /// <para>
|
---|
266 | /// This class follows the original algorithm by Platt as strictly as possible.</para>
|
---|
267 | ///
|
---|
268 | /// <para>
|
---|
269 | /// References:
|
---|
270 | /// <list type="bullet">
|
---|
271 | /// <item><description>
|
---|
272 | /// <a href="http://en.wikipedia.org/wiki/Sequential_Minimal_Optimization">
|
---|
273 | /// Wikipedia, The Free Encyclopedia. Sequential Minimal Optimization. Available on:
|
---|
274 | /// http://en.wikipedia.org/wiki/Sequential_Minimal_Optimization </a></description></item>
|
---|
275 | /// <item><description>
|
---|
276 | /// <a href="http://research.microsoft.com/en-us/um/people/jplatt/smoTR.pdf">
|
---|
277 | /// John C. Platt, Sequential Minimal Optimization: A Fast Algorithm for Training Support
|
---|
278 | /// Vector Machines. 1998. Available on: http://research.microsoft.com/en-us/um/people/jplatt/smoTR.pdf </a></description></item>
|
---|
279 | /// <item><description>
|
---|
280 | /// <a href="http://www.idiom.com/~zilla/Work/Notes/svmtutorial.pdf">
|
---|
281 | /// J. P. Lewis. A Short SVM (Support Vector Machine) Tutorial. Available on:
|
---|
282 | /// http://www.idiom.com/~zilla/Work/Notes/svmtutorial.pdf </a></description></item>
|
---|
283 | /// </list></para>
|
---|
284 | /// </remarks>
|
---|
285 | ///
|
---|
286 | /// <example>
|
---|
287 | /// <code>
|
---|
288 | /// // Example XOR problem
|
---|
289 | /// double[][] inputs =
|
---|
290 | /// {
|
---|
291 | /// new double[] { 0, 0 }, // 0 xor 0: 1 (label +1)
|
---|
292 | /// new double[] { 0, 1 }, // 0 xor 1: 0 (label -1)
|
---|
293 | /// new double[] { 1, 0 }, // 1 xor 0: 0 (label -1)
|
---|
294 | /// new double[] { 1, 1 } // 1 xor 1: 1 (label +1)
|
---|
295 | /// };
|
---|
296 | ///
|
---|
297 | /// // Dichotomy SVM outputs should be given as [-1;+1]
|
---|
298 | /// int[] labels =
|
---|
299 | /// {
|
---|
300 | /// 1, -1, -1, 1
|
---|
301 | /// };
|
---|
302 | ///
|
---|
303 | /// // Create a Kernel Support Vector Machine for the given inputs
|
---|
304 | /// KernelSupportVectorMachine machine = new KernelSupportVectorMachine(new Gaussian(0.1), inputs[0].Length);
|
---|
305 | ///
|
---|
306 | /// // Instantiate a new learning algorithm for SVMs
|
---|
307 | /// SequentialMinimalOptimization smo = new SequentialMinimalOptimization(machine, inputs, labels);
|
---|
308 | ///
|
---|
309 | /// // Set up the learning algorithm
|
---|
310 | /// smo.Complexity = 1.0;
|
---|
311 | ///
|
---|
312 | /// // Run the learning algorithm
|
---|
313 | /// double error = smo.Run();
|
---|
314 | ///
|
---|
315 | /// // Compute the decision output for one of the input vectors
|
---|
316 | /// int decision = System.Math.Sign(svm.Compute(inputs[0]));
|
---|
317 | /// </code>
|
---|
318 | /// </example>
|
---|
319 | ///
|
---|
320 | public class SequentialMinimalOptimization {
|
---|
321 | private static Random random = new Random();
|
---|
322 |
|
---|
323 | // Training data
|
---|
324 | private double[][] inputs;
|
---|
325 | private int[] outputs;
|
---|
326 |
|
---|
327 | // Learning algorithm parameters
|
---|
328 | private double c = 1.0;
|
---|
329 | private double tolerance = 1e-3;
|
---|
330 | private double epsilon = 1e-3;
|
---|
331 | private bool useComplexityHeuristic;
|
---|
332 |
|
---|
333 | // Support Vector Machine parameters
|
---|
334 | private SupportVectorMachine machine;
|
---|
335 | private IKernel kernel;
|
---|
336 | private double[] alpha;
|
---|
337 | private double bias;
|
---|
338 |
|
---|
339 |
|
---|
340 | // Error cache to speed up computations
|
---|
341 | private double[] errors;
|
---|
342 |
|
---|
343 |
|
---|
344 | /// <summary>
|
---|
345 | /// Initializes a new instance of a Sequential Minimal Optimization (SMO) algorithm.
|
---|
346 | /// </summary>
|
---|
347 | ///
|
---|
348 | /// <param name="machine">A Support Vector Machine.</param>
|
---|
349 | /// <param name="inputs">The input data points as row vectors.</param>
|
---|
350 | /// <param name="outputs">The classification label for each data point in the range [-1;+1].</param>
|
---|
351 | ///
|
---|
352 | public SequentialMinimalOptimization(SupportVectorMachine machine,
|
---|
353 | double[][] inputs, int[] outputs) {
|
---|
354 |
|
---|
355 | // Initial argument checking
|
---|
356 | if (machine == null)
|
---|
357 | throw new ArgumentNullException("machine");
|
---|
358 |
|
---|
359 | if (inputs == null)
|
---|
360 | throw new ArgumentNullException("inputs");
|
---|
361 |
|
---|
362 | if (outputs == null)
|
---|
363 | throw new ArgumentNullException("outputs");
|
---|
364 |
|
---|
365 | if (inputs.Length != outputs.Length)
|
---|
366 | throw new ArgumentException("The number of inputs and outputs does not match.", "outputs");
|
---|
367 |
|
---|
368 | for (int i = 0; i < outputs.Length; i++) {
|
---|
369 | if (outputs[i] != 1 && outputs[i] != -1)
|
---|
370 | throw new ArgumentOutOfRangeException("outputs", "One of the labels in the output vector is neither +1 or -1.");
|
---|
371 | }
|
---|
372 |
|
---|
373 | if (machine.Inputs > 0) {
|
---|
374 | // This machine has a fixed input vector size
|
---|
375 | for (int i = 0; i < inputs.Length; i++)
|
---|
376 | if (inputs[i].Length != machine.Inputs)
|
---|
377 | throw new ArgumentException(
|
---|
378 | "The size of the input vectors does not match the expected number of inputs of the machine");
|
---|
379 | }
|
---|
380 |
|
---|
381 |
|
---|
382 | // Machine
|
---|
383 | this.machine = machine;
|
---|
384 |
|
---|
385 | // Kernel (if applicable)
|
---|
386 | KernelSupportVectorMachine ksvm = machine as KernelSupportVectorMachine;
|
---|
387 | this.kernel = (ksvm != null) ? ksvm.Kernel : new LinearKernel();
|
---|
388 |
|
---|
389 |
|
---|
390 | // Learning data
|
---|
391 | this.inputs = inputs;
|
---|
392 | this.outputs = outputs;
|
---|
393 |
|
---|
394 | }
|
---|
395 |
|
---|
396 |
|
---|
397 | //---------------------------------------------
|
---|
398 |
|
---|
399 |
|
---|
400 | #region Properties
|
---|
401 |
|
---|
402 | /// <summary>
|
---|
403 | /// Complexity (cost) parameter C. Increasing the value of C forces the creation
|
---|
404 | /// of a more accurate model that may not generalize well. Default value is the
|
---|
405 | /// number of examples divided by the trace of the kernel matrix.
|
---|
406 | /// </summary>
|
---|
407 | /// <remarks>
|
---|
408 | /// The cost parameter C controls the trade off between allowing training
|
---|
409 | /// errors and forcing rigid margins. It creates a soft margin that permits
|
---|
410 | /// some misclassifications. Increasing the value of C increases the cost of
|
---|
411 | /// misclassifying points and forces the creation of a more accurate model
|
---|
412 | /// that may not generalize well.
|
---|
413 | /// </remarks>
|
---|
414 | public double Complexity {
|
---|
415 | get { return this.c; }
|
---|
416 | set { this.c = value; }
|
---|
417 | }
|
---|
418 |
|
---|
419 |
|
---|
420 | /// <summary>
|
---|
421 | /// Gets or sets a value indicating whether the Complexity parameter C
|
---|
422 | /// should be computed automatically by employing an heuristic rule.
|
---|
423 | /// </summary>
|
---|
424 | /// <value>
|
---|
425 | /// <c>true</c> if complexity should be computed automatically; otherwise, <c>false</c>.
|
---|
426 | /// </value>
|
---|
427 | public bool UseComplexityHeuristic {
|
---|
428 | get { return useComplexityHeuristic; }
|
---|
429 | set { useComplexityHeuristic = value; }
|
---|
430 | }
|
---|
431 |
|
---|
432 | /// <summary>
|
---|
433 | /// Insensitivity zone ε. Increasing the value of ε can result in fewer support
|
---|
434 | /// vectors in the created model. Default value is 1e-3.
|
---|
435 | /// </summary>
|
---|
436 | /// <remarks>
|
---|
437 | /// Parameter ε controls the width of the ε-insensitive zone, used to fit the training
|
---|
438 | /// data. The value of ε can affect the number of support vectors used to construct the
|
---|
439 | /// regression function. The bigger ε, the fewer support vectors are selected. On the
|
---|
440 | /// other hand, bigger ε-values results in more flat estimates.
|
---|
441 | /// </remarks>
|
---|
442 | public double Epsilon {
|
---|
443 | get { return epsilon; }
|
---|
444 | set { epsilon = value; }
|
---|
445 | }
|
---|
446 |
|
---|
447 | /// <summary>
|
---|
448 | /// Convergence tolerance. Default value is 1e-3.
|
---|
449 | /// </summary>
|
---|
450 | /// <remarks>
|
---|
451 | /// The criterion for completing the model training process. The default is 0.001.
|
---|
452 | /// </remarks>
|
---|
453 | public double Tolerance {
|
---|
454 | get { return this.tolerance; }
|
---|
455 | set { this.tolerance = value; }
|
---|
456 | }
|
---|
457 |
|
---|
458 | #endregion
|
---|
459 |
|
---|
460 |
|
---|
461 | //---------------------------------------------
|
---|
462 |
|
---|
463 |
|
---|
464 | /// <summary>
|
---|
465 | /// Runs the SMO algorithm.
|
---|
466 | /// </summary>
|
---|
467 | ///
|
---|
468 | /// <param name="computeError">
|
---|
469 | /// True to Compute error after the training
|
---|
470 | /// process completes, false otherwise. Default is true.
|
---|
471 | /// </param>
|
---|
472 | ///
|
---|
473 | /// <returns>
|
---|
474 | /// The misclassification error rate of
|
---|
475 | /// the resulting support vector machine.
|
---|
476 | /// </returns>
|
---|
477 | ///
|
---|
478 | public double Run(bool computeError) {
|
---|
479 |
|
---|
480 | // The SMO algorithm chooses to solve the smallest possible optimization problem
|
---|
481 | // at every step. At every step, SMO chooses two Lagrange multipliers to jointly
|
---|
482 | // optimize, finds the optimal values for these multipliers, and updates the SVM
|
---|
483 | // to reflect the new optimal values
|
---|
484 | //
|
---|
485 | // Reference: http://research.microsoft.com/en-us/um/people/jplatt/smoTR.pdf
|
---|
486 |
|
---|
487 |
|
---|
488 | // Initialize variables
|
---|
489 | int N = inputs.Length;
|
---|
490 |
|
---|
491 | if (useComplexityHeuristic)
|
---|
492 | c = ComputeComplexity();
|
---|
493 |
|
---|
494 | // Lagrange multipliers
|
---|
495 | this.alpha = new double[N];
|
---|
496 |
|
---|
497 | // Error cache
|
---|
498 | this.errors = new double[N];
|
---|
499 |
|
---|
500 | // Algorithm:
|
---|
501 | int numChanged = 0;
|
---|
502 | int examineAll = 1;
|
---|
503 |
|
---|
504 | while (numChanged > 0 || examineAll > 0) {
|
---|
505 | numChanged = 0;
|
---|
506 | if (examineAll > 0) {
|
---|
507 | // loop I over all training examples
|
---|
508 | for (int i = 0; i < N; i++)
|
---|
509 | numChanged += examineExample(i);
|
---|
510 | } else {
|
---|
511 | // loop I over examples where alpha is not 0 and not C
|
---|
512 | for (int i = 0; i < N; i++)
|
---|
513 | if (alpha[i] != 0 && alpha[i] != c)
|
---|
514 | numChanged += examineExample(i);
|
---|
515 | }
|
---|
516 |
|
---|
517 | if (examineAll == 1)
|
---|
518 | examineAll = 0;
|
---|
519 | else if (numChanged == 0)
|
---|
520 | examineAll = 1;
|
---|
521 | }
|
---|
522 |
|
---|
523 |
|
---|
524 | // Store Support Vectors in the SV Machine. Only vectors which have lagrange multipliers
|
---|
525 | // greater than zero will be stored as only those are actually required during evaluation.
|
---|
526 | List<int> indices = new List<int>();
|
---|
527 | for (int i = 0; i < N; i++) {
|
---|
528 | // Only store vectors with multipliers > 0
|
---|
529 | if (alpha[i] > 0) indices.Add(i);
|
---|
530 | }
|
---|
531 |
|
---|
532 | int vectors = indices.Count;
|
---|
533 | machine.SupportVectors = new double[vectors][];
|
---|
534 | machine.Weights = new double[vectors];
|
---|
535 | for (int i = 0; i < vectors; i++) {
|
---|
536 | int j = indices[i];
|
---|
537 | machine.SupportVectors[i] = inputs[j];
|
---|
538 | machine.Weights[i] = alpha[j] * outputs[j];
|
---|
539 | }
|
---|
540 | machine.Threshold = -bias;
|
---|
541 |
|
---|
542 |
|
---|
543 | // Compute error if required.
|
---|
544 | return (computeError) ? ComputeError(inputs, outputs) : 0.0;
|
---|
545 | }
|
---|
546 |
|
---|
547 | /// <summary>
|
---|
548 | /// Runs the SMO algorithm.
|
---|
549 | /// </summary>
|
---|
550 | ///
|
---|
551 | /// <returns>
|
---|
552 | /// The misclassification error rate of
|
---|
553 | /// the resulting support vector machine.
|
---|
554 | /// </returns>
|
---|
555 | ///
|
---|
556 | public double Run() {
|
---|
557 | return Run(true);
|
---|
558 | }
|
---|
559 |
|
---|
560 | /// <summary>
|
---|
561 | /// Computes the error rate for a given set of input and outputs.
|
---|
562 | /// </summary>
|
---|
563 | ///
|
---|
564 | public double ComputeError(double[][] inputs, int[] expectedOutputs) {
|
---|
565 | // Compute errors
|
---|
566 | int count = 0;
|
---|
567 | for (int i = 0; i < inputs.Length; i++) {
|
---|
568 | if (Math.Sign(Compute(inputs[i])) != Math.Sign(expectedOutputs[i]))
|
---|
569 | count++;
|
---|
570 | }
|
---|
571 |
|
---|
572 | // Return misclassification error ratio
|
---|
573 | return (double)count / inputs.Length;
|
---|
574 | }
|
---|
575 |
|
---|
576 | //---------------------------------------------
|
---|
577 |
|
---|
578 |
|
---|
579 | /// <summary>
|
---|
580 | /// Chooses which multipliers to optimize using heuristics.
|
---|
581 | /// </summary>
|
---|
582 | ///
|
---|
583 | private int examineExample(int i2) {
|
---|
584 | double[] p2 = inputs[i2]; // Input point at index i2
|
---|
585 | double y2 = outputs[i2]; // Classification label for p2
|
---|
586 | double alph2 = alpha[i2]; // Lagrange multiplier for p2
|
---|
587 |
|
---|
588 | // SVM output on p2 - y2. Check if it has already been computed
|
---|
589 | double e2 = (alph2 > 0 && alph2 < c) ? errors[i2] : Compute(p2) - y2;
|
---|
590 |
|
---|
591 | double r2 = y2 * e2;
|
---|
592 |
|
---|
593 |
|
---|
594 | // Heuristic 01 (for the first multiplier choice):
|
---|
595 | // - Testing for KKT conditions within the tolerance margin
|
---|
596 | if (!(r2 < -tolerance && alph2 < c) && !(r2 > tolerance && alph2 > 0))
|
---|
597 | return 0;
|
---|
598 |
|
---|
599 |
|
---|
600 | // Heuristic 02 (for the second multiplier choice):
|
---|
601 | // - Once a first Lagrange multiplier is chosen, SMO chooses the second Lagrange multiplier to
|
---|
602 | // maximize the size of the step taken during joint optimization. Now, evaluating the kernel
|
---|
603 | // function is time consuming, so SMO approximates the step size by the absolute value of the
|
---|
604 | // absolute error difference.
|
---|
605 | int i1 = -1;
|
---|
606 | double max = 0;
|
---|
607 | for (int i = 0; i < inputs.Length; i++) {
|
---|
608 | if (alpha[i] > 0 && alpha[i] < c) {
|
---|
609 | double error1 = errors[i];
|
---|
610 | double aux = System.Math.Abs(e2 - error1);
|
---|
611 |
|
---|
612 | if (aux > max) {
|
---|
613 | max = aux;
|
---|
614 | i1 = i;
|
---|
615 | }
|
---|
616 | }
|
---|
617 | }
|
---|
618 |
|
---|
619 | if (i1 >= 0 && takeStep(i1, i2)) return 1;
|
---|
620 |
|
---|
621 |
|
---|
622 | // Heuristic 03:
|
---|
623 | // - Under unusual circumstances, SMO cannot make positive progress using the second
|
---|
624 | // choice heuristic above. If it is the case, then SMO starts iterating through the
|
---|
625 | // non-bound examples, searching for an second example that can make positive progress.
|
---|
626 |
|
---|
627 | int start = random.Next(inputs.Length);
|
---|
628 | for (i1 = start; i1 < inputs.Length; i1++) {
|
---|
629 | if (alpha[i1] > 0 && alpha[i1] < c)
|
---|
630 | if (takeStep(i1, i2)) return 1;
|
---|
631 | }
|
---|
632 | for (i1 = 0; i1 < start; i1++) {
|
---|
633 | if (alpha[i1] > 0 && alpha[i1] < c)
|
---|
634 | if (takeStep(i1, i2)) return 1;
|
---|
635 | }
|
---|
636 |
|
---|
637 |
|
---|
638 | // Heuristic 04:
|
---|
639 | // - If none of the non-bound examples make positive progress, then SMO starts iterating
|
---|
640 | // through the entire training set until an example is found that makes positive progress.
|
---|
641 | // Both the iteration through the non-bound examples and the iteration through the entire
|
---|
642 | // training set are started at random locations, in order not to bias SMO towards the
|
---|
643 | // examples at the beginning of the training set.
|
---|
644 |
|
---|
645 | start = random.Next(inputs.Length);
|
---|
646 | for (i1 = start; i1 < inputs.Length; i1++) {
|
---|
647 | if (takeStep(i1, i2)) return 1;
|
---|
648 | }
|
---|
649 | for (i1 = 0; i1 < start; i1++) {
|
---|
650 | if (takeStep(i1, i2)) return 1;
|
---|
651 | }
|
---|
652 |
|
---|
653 |
|
---|
654 | // In extremely degenerate circumstances, none of the examples will make an adequate second
|
---|
655 | // example. When this happens, the first example is skipped and SMO continues with another
|
---|
656 | // chosen first example.
|
---|
657 | return 0;
|
---|
658 | }
|
---|
659 |
|
---|
660 | /// <summary>
|
---|
661 | /// Analytically solves the optimization problem for two Lagrange multipliers.
|
---|
662 | /// </summary>
|
---|
663 | ///
|
---|
664 | private bool takeStep(int i1, int i2) {
|
---|
665 | if (i1 == i2) return false;
|
---|
666 |
|
---|
667 | double[] p1 = inputs[i1]; // Input point at index i1
|
---|
668 | double alph1 = alpha[i1]; // Lagrange multiplier for p1
|
---|
669 | double y1 = outputs[i1]; // Classification label for p1
|
---|
670 |
|
---|
671 | // SVM output on p1 - y1. Check if it has already been computed
|
---|
672 | double e1 = (alph1 > 0 && alph1 < c) ? errors[i1] : Compute(p1) - y1;
|
---|
673 |
|
---|
674 | double[] p2 = inputs[i2]; // Input point at index i2
|
---|
675 | double alph2 = alpha[i2]; // Lagrange multiplier for p2
|
---|
676 | double y2 = outputs[i2]; // Classification label for p2
|
---|
677 |
|
---|
678 | // SVM output on p2 - y2. Check if it has already been computed
|
---|
679 | double e2 = (alph2 > 0 && alph2 < c) ? errors[i2] : Compute(p2) - y2;
|
---|
680 |
|
---|
681 |
|
---|
682 | double s = y1 * y2;
|
---|
683 |
|
---|
684 |
|
---|
685 | // Compute L and H according to equations (13) and (14) (Platt, 1998)
|
---|
686 | double L, H;
|
---|
687 | if (y1 != y2) {
|
---|
688 | // If the target y1 does not equal the target (13)
|
---|
689 | // y2, then the following bounds apply to a2:
|
---|
690 | L = Math.Max(0, alph2 - alph1);
|
---|
691 | H = Math.Min(c, c + alph2 - alph1);
|
---|
692 | } else {
|
---|
693 | // If the target y1 does equal the target (14)
|
---|
694 | // y2, then the following bounds apply to a2:
|
---|
695 | L = Math.Max(0, alph2 + alph1 - c);
|
---|
696 | H = Math.Min(c, alph2 + alph1);
|
---|
697 | }
|
---|
698 |
|
---|
699 | if (L == H) return false;
|
---|
700 |
|
---|
701 | double k11, k22, k12, eta;
|
---|
702 | k11 = kernel.Function(p1, p1);
|
---|
703 | k12 = kernel.Function(p1, p2);
|
---|
704 | k22 = kernel.Function(p2, p2);
|
---|
705 | eta = k11 + k22 - 2.0 * k12;
|
---|
706 |
|
---|
707 | double a1, a2;
|
---|
708 |
|
---|
709 | if (eta > 0) {
|
---|
710 | a2 = alph2 - y2 * (e2 - e1) / eta;
|
---|
711 |
|
---|
712 | if (a2 < L) a2 = L;
|
---|
713 | else if (a2 > H) a2 = H;
|
---|
714 | } else {
|
---|
715 | // Compute objective function Lobj and Hobj at
|
---|
716 | // a2=L and a2=H respectively, using (eq. 19)
|
---|
717 |
|
---|
718 | double L1 = alph1 + s * (alph2 - L);
|
---|
719 | double H1 = alph1 + s * (alph2 - H);
|
---|
720 | double f1 = y1 * (e1 + bias) - alph1 * k11 - s * alph2 * k12;
|
---|
721 | double f2 = y2 * (e2 + bias) - alph2 * k22 - s * alph1 * k12;
|
---|
722 | double Lobj = -0.5 * L1 * L1 * k11 - 0.5 * L * L * k22 - s * L * L1 * k12 - L1 * f1 - L * f2;
|
---|
723 | double Hobj = -0.5 * H1 * H1 * k11 - 0.5 * H * H * k22 - s * H * H1 * k12 - H1 * f1 - H * f2;
|
---|
724 |
|
---|
725 | if (Lobj > Hobj + epsilon) a2 = L;
|
---|
726 | else if (Lobj < Hobj - epsilon) a2 = H;
|
---|
727 | else a2 = alph2;
|
---|
728 | }
|
---|
729 |
|
---|
730 | if (Math.Abs(a2 - alph2) < epsilon * (a2 + alph2 + epsilon))
|
---|
731 | return false;
|
---|
732 |
|
---|
733 | a1 = alph1 + s * (alph2 - a2);
|
---|
734 |
|
---|
735 | if (a1 < 0) {
|
---|
736 | a2 += s * a1;
|
---|
737 | a1 = 0;
|
---|
738 | } else if (a1 > c) {
|
---|
739 | double d = a1 - c;
|
---|
740 | a2 += s * d;
|
---|
741 | a1 = c;
|
---|
742 | }
|
---|
743 |
|
---|
744 |
|
---|
745 | // Update threshold (bias) to reflect change in Lagrange multipliers
|
---|
746 | double b1 = 0, b2 = 0;
|
---|
747 | double new_b = 0, delta_b;
|
---|
748 |
|
---|
749 | if (a1 > 0 && a1 < c) {
|
---|
750 | // a1 is not at bounds
|
---|
751 | new_b = e1 + y1 * (a1 - alph1) * k11 + y2 * (a2 - alph2) * k12 + bias;
|
---|
752 | } else {
|
---|
753 | if (a2 > 0 && a2 < c) {
|
---|
754 | // a1 is at bounds but a2 is not.
|
---|
755 | new_b = e2 + y1 * (a1 - alph1) * k12 + y2 * (a2 - alph2) * k22 + bias;
|
---|
756 | } else {
|
---|
757 | // Both new Lagrange multipliers are at bound. SMO algorithm
|
---|
758 | // chooses the threshold to be halfway in between b1 and b2.
|
---|
759 | b1 = e1 + y1 * (a1 - alph1) * k11 + y2 * (a2 - alph2) * k12 + bias;
|
---|
760 | b2 = e2 + y1 * (a1 - alph1) * k12 + y2 * (a2 - alph2) * k22 + bias;
|
---|
761 | new_b = (b1 + b2) / 2;
|
---|
762 | }
|
---|
763 | }
|
---|
764 |
|
---|
765 | delta_b = new_b - bias;
|
---|
766 | bias = new_b;
|
---|
767 |
|
---|
768 |
|
---|
769 | // Update error cache using new Lagrange multipliers
|
---|
770 | double t1 = y1 * (a1 - alph1);
|
---|
771 | double t2 = y2 * (a2 - alph2);
|
---|
772 |
|
---|
773 | for (int i = 0; i < inputs.Length; i++) {
|
---|
774 | if (0 < alpha[i] && alpha[i] < c) {
|
---|
775 | double[] point = inputs[i];
|
---|
776 | errors[i] +=
|
---|
777 | t1 * kernel.Function(p1, point) +
|
---|
778 | t2 * kernel.Function(p2, point) -
|
---|
779 | delta_b;
|
---|
780 | }
|
---|
781 | }
|
---|
782 |
|
---|
783 | errors[i1] = 0f;
|
---|
784 | errors[i2] = 0f;
|
---|
785 |
|
---|
786 |
|
---|
787 | // Update lagrange multipliers
|
---|
788 | alpha[i1] = a1;
|
---|
789 | alpha[i2] = a2;
|
---|
790 |
|
---|
791 |
|
---|
792 | return true;
|
---|
793 | }
|
---|
794 |
|
---|
795 | /// <summary>
|
---|
796 | /// Computes the SVM output for a given point.
|
---|
797 | /// </summary>
|
---|
798 | ///
|
---|
799 | private double Compute(double[] point) {
|
---|
800 | double sum = -bias;
|
---|
801 | for (int i = 0; i < inputs.Length; i++) {
|
---|
802 | if (alpha[i] > 0)
|
---|
803 | sum += alpha[i] * outputs[i] * kernel.Function(inputs[i], point);
|
---|
804 | }
|
---|
805 |
|
---|
806 | return sum;
|
---|
807 | }
|
---|
808 |
|
---|
809 | private double ComputeComplexity() {
|
---|
810 | // Compute initial value for C as the number of examples
|
---|
811 | // divided by the trace of the input sample kernel matrix.
|
---|
812 | double sum = 0.0;
|
---|
813 | for (int i = 0; i < inputs.Length; i++)
|
---|
814 | sum += kernel.Function(inputs[i], inputs[i]);
|
---|
815 | return inputs.Length / sum;
|
---|
816 | }
|
---|
817 |
|
---|
818 | }
|
---|
819 | } |
---|