Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/SupportVectorMachine/SupportVectorMachineModel.cs @ 6566

Last change on this file since 6566 was 6566, checked in by gkronber, 13 years ago

#1581: implemented caching for SVM models.

File size: 9.1 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2011 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.IO;
25using System.Linq;
26using System.Text;
27using HeuristicLab.Common;
28using HeuristicLab.Core;
29using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
30using HeuristicLab.Problems.DataAnalysis;
31using SVM;
32
33namespace HeuristicLab.Algorithms.DataAnalysis {
34  /// <summary>
35  /// Represents a support vector machine model.
36  /// </summary>
37  [StorableClass]
38  [Item("SupportVectorMachineModel", "Represents a support vector machine model.")]
39  public sealed class SupportVectorMachineModel : NamedItem, ISupportVectorMachineModel {
40
41    private SVM.Model model;
42    /// <summary>
43    /// Gets or sets the SVM model.
44    /// </summary>
45    public SVM.Model Model {
46      get { return model; }
47      set {
48        if (value != model) {
49          if (value == null) throw new ArgumentNullException();
50          model = value;
51          OnChanged(EventArgs.Empty);
52        }
53      }
54    }
55
56    /// <summary>
57    /// Gets or sets the range transformation for the model.
58    /// </summary>
59    private SVM.RangeTransform rangeTransform;
60    public SVM.RangeTransform RangeTransform {
61      get { return rangeTransform; }
62      set {
63        if (value != rangeTransform) {
64          if (value == null) throw new ArgumentNullException();
65          rangeTransform = value;
66          OnChanged(EventArgs.Empty);
67        }
68      }
69    }
70
71    public Dataset SupportVectors {
72      get {
73        var data = new double[Model.SupportVectorCount, allowedInputVariables.Count()];
74        for (int i = 0; i < Model.SupportVectorCount; i++) {
75          var sv = Model.SupportVectors[i];
76          for (int j = 0; j < sv.Length; j++) {
77            data[i, j] = sv[j].Value;
78          }
79        }
80        return new Dataset(allowedInputVariables, data);
81      }
82    }
83
84    [Storable]
85    private string targetVariable;
86    [Storable]
87    private string[] allowedInputVariables;
88    [Storable]
89    private double[] classValues; // only for SVM classification models
90
91    [StorableConstructor]
92    private SupportVectorMachineModel(bool deserializing) : base(deserializing) { }
93    private SupportVectorMachineModel(SupportVectorMachineModel original, Cloner cloner)
94      : base(original, cloner) {
95      // only using a shallow copy here! (gkronber)
96      this.model = original.model;
97      this.rangeTransform = original.rangeTransform;
98      this.targetVariable = original.targetVariable;
99      this.allowedInputVariables = (string[])original.allowedInputVariables.Clone();
100      foreach (var dataset in original.cachedPredictions.Keys) {
101        this.cachedPredictions.Add(cloner.Clone(dataset), (double[])original.cachedPredictions[dataset].Clone());
102      }
103      if (original.classValues != null)
104        this.classValues = (double[])original.classValues.Clone();
105    }
106    public SupportVectorMachineModel(SVM.Model model, SVM.RangeTransform rangeTransform, string targetVariable, IEnumerable<string> allowedInputVariables, IEnumerable<double> classValues)
107      : this(model, rangeTransform, targetVariable, allowedInputVariables) {
108      this.classValues = classValues.ToArray();
109    }
110    public SupportVectorMachineModel(SVM.Model model, SVM.RangeTransform rangeTransform, string targetVariable, IEnumerable<string> allowedInputVariables)
111      : base() {
112      this.name = ItemName;
113      this.description = ItemDescription;
114      this.model = model;
115      this.rangeTransform = rangeTransform;
116      this.targetVariable = targetVariable;
117      this.allowedInputVariables = allowedInputVariables.ToArray();
118    }
119
120    public override IDeepCloneable Clone(Cloner cloner) {
121      return new SupportVectorMachineModel(this, cloner);
122    }
123
124    #region IRegressionModel Members
125    public IEnumerable<double> GetEstimatedValues(Dataset dataset, IEnumerable<int> rows) {
126      return GetEstimatedValuesHelper(dataset, rows);
127    }
128    #endregion
129    #region IClassificationModel Members
130    public IEnumerable<double> GetEstimatedClassValues(Dataset dataset, IEnumerable<int> rows) {
131      if (classValues == null) throw new NotSupportedException();
132      // return the original class value instead of the predicted value of the model
133      // svm classification only works for integer classes
134      foreach (var estimated in GetEstimatedValuesHelper(dataset, rows)) {
135        // find closest class
136        double bestDist = double.MaxValue;
137        double bestClass = -1;
138        for (int i = 0; i < classValues.Length; i++) {
139          double d = Math.Abs(estimated - classValues[i]);
140          if (d < bestDist) {
141            bestDist = d;
142            bestClass = classValues[i];
143            if (d.IsAlmost(0.0)) break; // exact match no need to look further
144          }
145        }
146        yield return bestClass;
147      }
148    }
149    #endregion
150    // cache for predictions, which is cloned but not persisted, must be cleared when the model is changed
151    private Dictionary<Dataset, double[]> cachedPredictions = new Dictionary<Dataset, double[]>();
152    private IEnumerable<double> GetEstimatedValuesHelper(Dataset dataset, IEnumerable<int> rows) {
153      if (!cachedPredictions.ContainsKey(dataset)) {
154        // create an array of cached predictions which is initially filled with NaNs
155        double[] predictions = Enumerable.Repeat(double.NaN, dataset.Rows).ToArray();
156        CalculatePredictions(dataset, rows, predictions);
157        cachedPredictions.Add(dataset, predictions);
158      }
159      // get the array of predictions and select the subset of requested rows
160      double[] p = cachedPredictions[dataset];
161      var requestedPredictions = from r in rows
162                                 select p[r];
163      // check if the requested predictions contain NaNs
164      // (this means for the request rows some predictions have not been cached)
165      if (requestedPredictions.Any(x => double.IsNaN(x))) {
166        // updated the predictions for currently requested rows
167        CalculatePredictions(dataset, rows, p);
168        cachedPredictions[dataset] = p;
169        // now we can be sure that for the current rows all predictions are available
170        return from r in rows
171               select p[r];
172      } else {
173        // there were no NaNs => just return the cached predictions
174        return requestedPredictions;
175      }
176    }
177
178    private void CalculatePredictions(Dataset dataset, IEnumerable<int> rows, double[] predictions) {
179      // calculate and cache predictions for the currently requested rows
180      SVM.Problem problem = SupportVectorMachineUtil.CreateSvmProblem(dataset, targetVariable, allowedInputVariables, rows);
181      SVM.Problem scaledProblem = Scaling.Scale(RangeTransform, problem);
182
183      // row is the index in the original dataset,
184      // i is the index in the scaled dataset (containing only the necessary rows)
185      int i = 0;
186      foreach (var row in rows) {
187        predictions[row] = SVM.Prediction.Predict(Model, scaledProblem.X[i]);
188        i++;
189      }
190    }
191
192    #region events
193    public event EventHandler Changed;
194    private void OnChanged(EventArgs e) {
195      cachedPredictions.Clear();
196      var handlers = Changed;
197      if (handlers != null)
198        handlers(this, e);
199    }
200    #endregion
201
202    #region persistence
203    [Storable]
204    private string ModelAsString {
205      get {
206        using (MemoryStream stream = new MemoryStream()) {
207          SVM.Model.Write(stream, Model);
208          stream.Seek(0, System.IO.SeekOrigin.Begin);
209          StreamReader reader = new StreamReader(stream);
210          return reader.ReadToEnd();
211        }
212      }
213      set {
214        using (MemoryStream stream = new MemoryStream(Encoding.ASCII.GetBytes(value))) {
215          model = SVM.Model.Read(stream);
216        }
217      }
218    }
219    [Storable]
220    private string RangeTransformAsString {
221      get {
222        using (MemoryStream stream = new MemoryStream()) {
223          SVM.RangeTransform.Write(stream, RangeTransform);
224          stream.Seek(0, System.IO.SeekOrigin.Begin);
225          StreamReader reader = new StreamReader(stream);
226          return reader.ReadToEnd();
227        }
228      }
229      set {
230        using (MemoryStream stream = new MemoryStream(Encoding.ASCII.GetBytes(value))) {
231          RangeTransform = SVM.RangeTransform.Read(stream);
232        }
233      }
234    }
235    #endregion
236  }
237}
Note: See TracBrowser for help on using the repository browser.