Free cookie consent management tool by TermsFeed Policy Generator

Ticket #1581: svm-performance.patch

File svm-performance.patch, 4.8 KB (added by gkronber, 13 years ago)

Patch to add caching of predicted values to SVMs

  • HeuristicLab.Algorithms.DataAnalysis/3.4/CrossValidation.cs

     
    362362    }
    363363
    364364    public void CollectResultValues(IDictionary<string, IItem> results) {
     365      var clonedResults = (ResultCollection)this.results.Clone();
     366      foreach (var result in clonedResults) {
     367        results.Add(result.Name, result.Value);
     368      }
     369    }
     370
     371    private void AggregateResultValues(IDictionary<string, IItem> results) {
    365372      Dictionary<string, List<double>> resultValues = new Dictionary<string, List<double>>();
    366373      IEnumerable<IRun> runs = clonedAlgorithms.Select(alg => alg.Runs.FirstOrDefault()).Where(run => run != null);
    367374      IEnumerable<KeyValuePair<string, IItem>> resultCollections = runs.Where(x => x != null).SelectMany(x => x.Results).ToList();
     
    701708    private void OnStopped() {
    702709      stopPending = false;
    703710      Dictionary<string, IItem> collectedResults = new Dictionary<string, IItem>();
    704       CollectResultValues(collectedResults);
     711      AggregateResultValues(collectedResults);
    705712      results.AddRange(collectedResults.Select(x => new Result(x.Key, x.Value)).Cast<IResult>().ToArray());
    706713      runsCounter++;
    707714      runs.Add(new Run(string.Format("{0} Run {1}", Name, runsCounter), this));
  • HeuristicLab.Algorithms.DataAnalysis/3.4/SupportVectorMachine/SupportVectorMachineModel.cs

     
    9797      this.rangeTransform = original.rangeTransform;
    9898      this.targetVariable = original.targetVariable;
    9999      this.allowedInputVariables = (string[])original.allowedInputVariables.Clone();
     100      foreach (var dataset in original.cachedPredictions.Keys) {
     101        this.cachedPredictions.Add(cloner.Clone(dataset), (double[])original.cachedPredictions[dataset].Clone());
     102      }
    100103      if (original.classValues != null)
    101104        this.classValues = (double[])original.classValues.Clone();
    102105    }
     
    144147      }
    145148    }
    146149    #endregion
     150    // cache for predictions, which is cloned but not persisted, must be cleared when the model is changed
     151    private Dictionary<Dataset, double[]> cachedPredictions = new Dictionary<Dataset, double[]>();
    147152    private IEnumerable<double> GetEstimatedValuesHelper(Dataset dataset, IEnumerable<int> rows) {
     153      if (!cachedPredictions.ContainsKey(dataset)) {
     154        // create an array of cached predictions which is initially filled with NaNs
     155        double[] predictions = Enumerable.Repeat(double.NaN, dataset.Rows).ToArray();
     156        CalculatePredictions(dataset, rows, predictions);
     157        cachedPredictions.Add(dataset, predictions);
     158      }
     159      // get the array of predictions and select the subset of requested rows
     160      double[] p = cachedPredictions[dataset];
     161      var requestedPredictions = from r in rows
     162                                 select p[r];
     163      // check if the requested predictions contain NaNs
     164      // (this means for the request rows some predictions have not been cached)
     165      if (requestedPredictions.Any(x => double.IsNaN(x))) {
     166        // updated the predictions for currently requested rows
     167        CalculatePredictions(dataset, rows, p);
     168        cachedPredictions[dataset] = p;
     169        // now we can be sure that for the current rows all predictions are available
     170        return from r in rows
     171               select p[r];
     172      } else {
     173        // there were no NaNs => just return the cached predictions
     174        return requestedPredictions;
     175      }
     176    }
     177
     178    private void CalculatePredictions(Dataset dataset, IEnumerable<int> rows, double[] predictions) {
     179      // calculate and cache predictions for the currently requested rows
    148180      SVM.Problem problem = SupportVectorMachineUtil.CreateSvmProblem(dataset, targetVariable, allowedInputVariables, rows);
    149181      SVM.Problem scaledProblem = Scaling.Scale(RangeTransform, problem);
    150182
    151183      foreach (var row in Enumerable.Range(0, scaledProblem.Count)) {
    152         yield return SVM.Prediction.Predict(Model, scaledProblem.X[row]);
     184        predictions[row] = SVM.Prediction.Predict(Model, scaledProblem.X[row]);
    153185      }
    154186    }
     187
    155188    #region events
    156189    public event EventHandler Changed;
    157190    private void OnChanged(EventArgs e) {
     191      cachedPredictions.Clear();
    158192      var handlers = Changed;
    159193      if (handlers != null)
    160194        handlers(this, e);