Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/SupportVectorMachine/SupportVectorMachineModel.cs @ 6603

Last change on this file since 6603 was 6603, checked in by mkommend, 13 years ago

#1600: Added possibility to create regression solutions from regression models.

File size: 9.5 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2011 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.IO;
25using System.Linq;
26using System.Text;
27using HeuristicLab.Common;
28using HeuristicLab.Core;
29using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
30using HeuristicLab.Problems.DataAnalysis;
31using SVM;
32
33namespace HeuristicLab.Algorithms.DataAnalysis {
34  /// <summary>
35  /// Represents a support vector machine model.
36  /// </summary>
37  [StorableClass]
38  [Item("SupportVectorMachineModel", "Represents a support vector machine model.")]
39  public sealed class SupportVectorMachineModel : NamedItem, ISupportVectorMachineModel {
40
41    private SVM.Model model;
42    /// <summary>
43    /// Gets or sets the SVM model.
44    /// </summary>
45    public SVM.Model Model {
46      get { return model; }
47      set {
48        if (value != model) {
49          if (value == null) throw new ArgumentNullException();
50          model = value;
51          OnChanged(EventArgs.Empty);
52        }
53      }
54    }
55
56    /// <summary>
57    /// Gets or sets the range transformation for the model.
58    /// </summary>
59    private SVM.RangeTransform rangeTransform;
60    public SVM.RangeTransform RangeTransform {
61      get { return rangeTransform; }
62      set {
63        if (value != rangeTransform) {
64          if (value == null) throw new ArgumentNullException();
65          rangeTransform = value;
66          OnChanged(EventArgs.Empty);
67        }
68      }
69    }
70
71    public Dataset SupportVectors {
72      get {
73        var data = new double[Model.SupportVectorCount, allowedInputVariables.Count()];
74        for (int i = 0; i < Model.SupportVectorCount; i++) {
75          var sv = Model.SupportVectors[i];
76          for (int j = 0; j < sv.Length; j++) {
77            data[i, j] = sv[j].Value;
78          }
79        }
80        return new Dataset(allowedInputVariables, data);
81      }
82    }
83
84    [Storable]
85    private string targetVariable;
86    [Storable]
87    private string[] allowedInputVariables;
88    [Storable]
89    private double[] classValues; // only for SVM classification models
90
91    [StorableConstructor]
92    private SupportVectorMachineModel(bool deserializing) : base(deserializing) { }
93    private SupportVectorMachineModel(SupportVectorMachineModel original, Cloner cloner)
94      : base(original, cloner) {
95      // only using a shallow copy here! (gkronber)
96      this.model = original.model;
97      this.rangeTransform = original.rangeTransform;
98      this.targetVariable = original.targetVariable;
99      this.allowedInputVariables = (string[])original.allowedInputVariables.Clone();
100      foreach (var dataset in original.cachedPredictions.Keys) {
101        this.cachedPredictions.Add(cloner.Clone(dataset), (double[])original.cachedPredictions[dataset].Clone());
102      }
103      if (original.classValues != null)
104        this.classValues = (double[])original.classValues.Clone();
105    }
106    public SupportVectorMachineModel(SVM.Model model, SVM.RangeTransform rangeTransform, string targetVariable, IEnumerable<string> allowedInputVariables, IEnumerable<double> classValues)
107      : this(model, rangeTransform, targetVariable, allowedInputVariables) {
108      this.classValues = classValues.ToArray();
109    }
110    public SupportVectorMachineModel(SVM.Model model, SVM.RangeTransform rangeTransform, string targetVariable, IEnumerable<string> allowedInputVariables)
111      : base() {
112      this.name = ItemName;
113      this.description = ItemDescription;
114      this.model = model;
115      this.rangeTransform = rangeTransform;
116      this.targetVariable = targetVariable;
117      this.allowedInputVariables = allowedInputVariables.ToArray();
118    }
119
120    public override IDeepCloneable Clone(Cloner cloner) {
121      return new SupportVectorMachineModel(this, cloner);
122    }
123
124    #region IRegressionModel Members
125    public IEnumerable<double> GetEstimatedValues(Dataset dataset, IEnumerable<int> rows) {
126      return GetEstimatedValuesHelper(dataset, rows);
127    }
128    public SupportVectorRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
129      return new SupportVectorRegressionSolution(this, problemData);
130    }
131    IRegressionSolution IRegressionModel.CreateRegressionSolution(IRegressionProblemData problemData) {
132      return CreateRegressionSolution(problemData);
133    }
134    #endregion
135
136    #region IClassificationModel Members
137    public IEnumerable<double> GetEstimatedClassValues(Dataset dataset, IEnumerable<int> rows) {
138      if (classValues == null) throw new NotSupportedException();
139      // return the original class value instead of the predicted value of the model
140      // svm classification only works for integer classes
141      foreach (var estimated in GetEstimatedValuesHelper(dataset, rows)) {
142        // find closest class
143        double bestDist = double.MaxValue;
144        double bestClass = -1;
145        for (int i = 0; i < classValues.Length; i++) {
146          double d = Math.Abs(estimated - classValues[i]);
147          if (d < bestDist) {
148            bestDist = d;
149            bestClass = classValues[i];
150            if (d.IsAlmost(0.0)) break; // exact match no need to look further
151          }
152        }
153        yield return bestClass;
154      }
155    }
156    #endregion
157    // cache for predictions, which is cloned but not persisted, must be cleared when the model is changed
158    private Dictionary<Dataset, double[]> cachedPredictions = new Dictionary<Dataset, double[]>();
159    private IEnumerable<double> GetEstimatedValuesHelper(Dataset dataset, IEnumerable<int> rows) {
160      if (!cachedPredictions.ContainsKey(dataset)) {
161        // create an array of cached predictions which is initially filled with NaNs
162        double[] predictions = Enumerable.Repeat(double.NaN, dataset.Rows).ToArray();
163        CalculatePredictions(dataset, rows, predictions);
164        cachedPredictions.Add(dataset, predictions);
165      }
166      // get the array of predictions and select the subset of requested rows
167      double[] p = cachedPredictions[dataset];
168      var requestedPredictions = from r in rows
169                                 select p[r];
170      // check if the requested predictions contain NaNs
171      // (this means for the request rows some predictions have not been cached)
172      if (requestedPredictions.Any(x => double.IsNaN(x))) {
173        // updated the predictions for currently requested rows
174        CalculatePredictions(dataset, rows, p);
175        cachedPredictions[dataset] = p;
176        // now we can be sure that for the current rows all predictions are available
177        return from r in rows
178               select p[r];
179      } else {
180        // there were no NaNs => just return the cached predictions
181        return requestedPredictions;
182      }
183    }
184
185    private void CalculatePredictions(Dataset dataset, IEnumerable<int> rows, double[] predictions) {
186      // calculate and cache predictions for the currently requested rows
187      SVM.Problem problem = SupportVectorMachineUtil.CreateSvmProblem(dataset, targetVariable, allowedInputVariables, rows);
188      SVM.Problem scaledProblem = Scaling.Scale(RangeTransform, problem);
189
190      // row is the index in the original dataset,
191      // i is the index in the scaled dataset (containing only the necessary rows)
192      int i = 0;
193      foreach (var row in rows) {
194        predictions[row] = SVM.Prediction.Predict(Model, scaledProblem.X[i]);
195        i++;
196      }
197    }
198
199    #region events
200    public event EventHandler Changed;
201    private void OnChanged(EventArgs e) {
202      cachedPredictions.Clear();
203      var handlers = Changed;
204      if (handlers != null)
205        handlers(this, e);
206    }
207    #endregion
208
209    #region persistence
210    [Storable]
211    private string ModelAsString {
212      get {
213        using (MemoryStream stream = new MemoryStream()) {
214          SVM.Model.Write(stream, Model);
215          stream.Seek(0, System.IO.SeekOrigin.Begin);
216          StreamReader reader = new StreamReader(stream);
217          return reader.ReadToEnd();
218        }
219      }
220      set {
221        using (MemoryStream stream = new MemoryStream(Encoding.ASCII.GetBytes(value))) {
222          model = SVM.Model.Read(stream);
223        }
224      }
225    }
226    [Storable]
227    private string RangeTransformAsString {
228      get {
229        using (MemoryStream stream = new MemoryStream()) {
230          SVM.RangeTransform.Write(stream, RangeTransform);
231          stream.Seek(0, System.IO.SeekOrigin.Begin);
232          StreamReader reader = new StreamReader(stream);
233          return reader.ReadToEnd();
234        }
235      }
236      set {
237        using (MemoryStream stream = new MemoryStream(Encoding.ASCII.GetBytes(value))) {
238          RangeTransform = SVM.RangeTransform.Read(stream);
239        }
240      }
241    }
242    #endregion
243  }
244}
Note: See TracBrowser for help on using the repository browser.