Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/SupportVectorMachine/SupportVectorMachineModel.cs @ 6604

Last change on this file since 6604 was 6604, checked in by mkommend, 13 years ago

#1600: Added possibility to create classification solutions from classification models.

File size: 9.9 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2011 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.IO;
25using System.Linq;
26using System.Text;
27using HeuristicLab.Common;
28using HeuristicLab.Core;
29using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
30using HeuristicLab.Problems.DataAnalysis;
31using SVM;
32
33namespace HeuristicLab.Algorithms.DataAnalysis {
34  /// <summary>
35  /// Represents a support vector machine model.
36  /// </summary>
37  [StorableClass]
38  [Item("SupportVectorMachineModel", "Represents a support vector machine model.")]
39  public sealed class SupportVectorMachineModel : NamedItem, ISupportVectorMachineModel {
40
41    private SVM.Model model;
42    /// <summary>
43    /// Gets or sets the SVM model.
44    /// </summary>
45    public SVM.Model Model {
46      get { return model; }
47      set {
48        if (value != model) {
49          if (value == null) throw new ArgumentNullException();
50          model = value;
51          OnChanged(EventArgs.Empty);
52        }
53      }
54    }
55
56    /// <summary>
57    /// Gets or sets the range transformation for the model.
58    /// </summary>
59    private SVM.RangeTransform rangeTransform;
60    public SVM.RangeTransform RangeTransform {
61      get { return rangeTransform; }
62      set {
63        if (value != rangeTransform) {
64          if (value == null) throw new ArgumentNullException();
65          rangeTransform = value;
66          OnChanged(EventArgs.Empty);
67        }
68      }
69    }
70
71    public Dataset SupportVectors {
72      get {
73        var data = new double[Model.SupportVectorCount, allowedInputVariables.Count()];
74        for (int i = 0; i < Model.SupportVectorCount; i++) {
75          var sv = Model.SupportVectors[i];
76          for (int j = 0; j < sv.Length; j++) {
77            data[i, j] = sv[j].Value;
78          }
79        }
80        return new Dataset(allowedInputVariables, data);
81      }
82    }
83
84    [Storable]
85    private string targetVariable;
86    [Storable]
87    private string[] allowedInputVariables;
88    [Storable]
89    private double[] classValues; // only for SVM classification models
90
91    [StorableConstructor]
92    private SupportVectorMachineModel(bool deserializing) : base(deserializing) { }
93    private SupportVectorMachineModel(SupportVectorMachineModel original, Cloner cloner)
94      : base(original, cloner) {
95      // only using a shallow copy here! (gkronber)
96      this.model = original.model;
97      this.rangeTransform = original.rangeTransform;
98      this.targetVariable = original.targetVariable;
99      this.allowedInputVariables = (string[])original.allowedInputVariables.Clone();
100      foreach (var dataset in original.cachedPredictions.Keys) {
101        this.cachedPredictions.Add(cloner.Clone(dataset), (double[])original.cachedPredictions[dataset].Clone());
102      }
103      if (original.classValues != null)
104        this.classValues = (double[])original.classValues.Clone();
105    }
106    public SupportVectorMachineModel(SVM.Model model, SVM.RangeTransform rangeTransform, string targetVariable, IEnumerable<string> allowedInputVariables, IEnumerable<double> classValues)
107      : this(model, rangeTransform, targetVariable, allowedInputVariables) {
108      this.classValues = classValues.ToArray();
109    }
110    public SupportVectorMachineModel(SVM.Model model, SVM.RangeTransform rangeTransform, string targetVariable, IEnumerable<string> allowedInputVariables)
111      : base() {
112      this.name = ItemName;
113      this.description = ItemDescription;
114      this.model = model;
115      this.rangeTransform = rangeTransform;
116      this.targetVariable = targetVariable;
117      this.allowedInputVariables = allowedInputVariables.ToArray();
118    }
119
120    public override IDeepCloneable Clone(Cloner cloner) {
121      return new SupportVectorMachineModel(this, cloner);
122    }
123
124    #region IRegressionModel Members
125    public IEnumerable<double> GetEstimatedValues(Dataset dataset, IEnumerable<int> rows) {
126      return GetEstimatedValuesHelper(dataset, rows);
127    }
128    public SupportVectorRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
129      return new SupportVectorRegressionSolution(this, problemData);
130    }
131    IRegressionSolution IRegressionModel.CreateRegressionSolution(IRegressionProblemData problemData) {
132      return CreateRegressionSolution(problemData);
133    }
134    #endregion
135
136    #region IClassificationModel Members
137    public IEnumerable<double> GetEstimatedClassValues(Dataset dataset, IEnumerable<int> rows) {
138      if (classValues == null) throw new NotSupportedException();
139      // return the original class value instead of the predicted value of the model
140      // svm classification only works for integer classes
141      foreach (var estimated in GetEstimatedValuesHelper(dataset, rows)) {
142        // find closest class
143        double bestDist = double.MaxValue;
144        double bestClass = -1;
145        for (int i = 0; i < classValues.Length; i++) {
146          double d = Math.Abs(estimated - classValues[i]);
147          if (d < bestDist) {
148            bestDist = d;
149            bestClass = classValues[i];
150            if (d.IsAlmost(0.0)) break; // exact match no need to look further
151          }
152        }
153        yield return bestClass;
154      }
155    }
156
157    public SupportVectorClassificationSolution CreateClassificationSolution(IClassificationProblemData problemData) {
158      return new SupportVectorClassificationSolution(this, problemData);
159    }
160    IClassificationSolution IClassificationModel.CreateClassificationSolution(IClassificationProblemData problemData) {
161      return CreateClassificationSolution(problemData);
162    }
163    #endregion
164    // cache for predictions, which is cloned but not persisted, must be cleared when the model is changed
165    private Dictionary<Dataset, double[]> cachedPredictions = new Dictionary<Dataset, double[]>();
166    private IEnumerable<double> GetEstimatedValuesHelper(Dataset dataset, IEnumerable<int> rows) {
167      if (!cachedPredictions.ContainsKey(dataset)) {
168        // create an array of cached predictions which is initially filled with NaNs
169        double[] predictions = Enumerable.Repeat(double.NaN, dataset.Rows).ToArray();
170        CalculatePredictions(dataset, rows, predictions);
171        cachedPredictions.Add(dataset, predictions);
172      }
173      // get the array of predictions and select the subset of requested rows
174      double[] p = cachedPredictions[dataset];
175      var requestedPredictions = from r in rows
176                                 select p[r];
177      // check if the requested predictions contain NaNs
178      // (this means for the request rows some predictions have not been cached)
179      if (requestedPredictions.Any(x => double.IsNaN(x))) {
180        // updated the predictions for currently requested rows
181        CalculatePredictions(dataset, rows, p);
182        cachedPredictions[dataset] = p;
183        // now we can be sure that for the current rows all predictions are available
184        return from r in rows
185               select p[r];
186      } else {
187        // there were no NaNs => just return the cached predictions
188        return requestedPredictions;
189      }
190    }
191
192    private void CalculatePredictions(Dataset dataset, IEnumerable<int> rows, double[] predictions) {
193      // calculate and cache predictions for the currently requested rows
194      SVM.Problem problem = SupportVectorMachineUtil.CreateSvmProblem(dataset, targetVariable, allowedInputVariables, rows);
195      SVM.Problem scaledProblem = Scaling.Scale(RangeTransform, problem);
196
197      // row is the index in the original dataset,
198      // i is the index in the scaled dataset (containing only the necessary rows)
199      int i = 0;
200      foreach (var row in rows) {
201        predictions[row] = SVM.Prediction.Predict(Model, scaledProblem.X[i]);
202        i++;
203      }
204    }
205
206    #region events
207    public event EventHandler Changed;
208    private void OnChanged(EventArgs e) {
209      cachedPredictions.Clear();
210      var handlers = Changed;
211      if (handlers != null)
212        handlers(this, e);
213    }
214    #endregion
215
216    #region persistence
217    [Storable]
218    private string ModelAsString {
219      get {
220        using (MemoryStream stream = new MemoryStream()) {
221          SVM.Model.Write(stream, Model);
222          stream.Seek(0, System.IO.SeekOrigin.Begin);
223          StreamReader reader = new StreamReader(stream);
224          return reader.ReadToEnd();
225        }
226      }
227      set {
228        using (MemoryStream stream = new MemoryStream(Encoding.ASCII.GetBytes(value))) {
229          model = SVM.Model.Read(stream);
230        }
231      }
232    }
233    [Storable]
234    private string RangeTransformAsString {
235      get {
236        using (MemoryStream stream = new MemoryStream()) {
237          SVM.RangeTransform.Write(stream, RangeTransform);
238          stream.Seek(0, System.IO.SeekOrigin.Begin);
239          StreamReader reader = new StreamReader(stream);
240          return reader.ReadToEnd();
241        }
242      }
243      set {
244        using (MemoryStream stream = new MemoryStream(Encoding.ASCII.GetBytes(value))) {
245          RangeTransform = SVM.RangeTransform.Read(stream);
246        }
247      }
248    }
249    #endregion
250  }
251}
Note: See TracBrowser for help on using the repository browser.