Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
08/01/11 17:48:53 (13 years ago)
Author:
mkommend
Message:

#1479: Integrated trunk changes.

Location:
branches/GP.Grammar.Editor/HeuristicLab.Algorithms.DataAnalysis/3.4
Files:
1 added
12 edited
29 copied

Legend:

Unmodified
Added
Removed
  • branches/GP.Grammar.Editor/HeuristicLab.Algorithms.DataAnalysis/3.4/CrossValidation.cs

    r6250 r6618  
    3434
    3535namespace HeuristicLab.Algorithms.DataAnalysis {
    36   [Item("Cross Validation", "Cross Validation wrapper for data analysis algorithms.")]
     36  [Item("Cross Validation", "Cross-validation wrapper for data analysis algorithms.")]
    3737  [Creatable("Data Analysis")]
    3838  [StorableClass]
     
    363363
    364364    public void CollectResultValues(IDictionary<string, IItem> results) {
     365      var clonedResults = (ResultCollection)this.results.Clone();
     366      foreach (var result in clonedResults) {
     367        results.Add(result.Name, result.Value);
     368      }
     369    }
     370
     371    private void AggregateResultValues(IDictionary<string, IItem> results) {
    365372      Dictionary<string, List<double>> resultValues = new Dictionary<string, List<double>>();
    366373      IEnumerable<IRun> runs = clonedAlgorithms.Select(alg => alg.Runs.FirstOrDefault()).Where(run => run != null);
     
    397404      List<IResult> aggregatedResults = new List<IResult>();
    398405      foreach (KeyValuePair<string, List<IRegressionSolution>> solutions in resultSolutions) {
    399         var problemDataClone = (IRegressionProblemData)Problem.ProblemData.Clone();
     406        // clone manually to correctly clone references between cloned root objects
     407        Cloner cloner = new Cloner();
     408        var problemDataClone = (IRegressionProblemData)cloner.Clone(Problem.ProblemData);
     409        // set partitions of problem data clone correctly
    400410        problemDataClone.TrainingPartition.Start = SamplesStart.Value; problemDataClone.TrainingPartition.End = SamplesEnd.Value;
    401411        problemDataClone.TestPartition.Start = SamplesStart.Value; problemDataClone.TestPartition.End = SamplesEnd.Value;
    402         var ensembleSolution = new RegressionEnsembleSolution(solutions.Value.Select(x => x.Model), problemDataClone,
    403           solutions.Value.Select(x => x.ProblemData.TrainingPartition),
    404           solutions.Value.Select(x => x.ProblemData.TestPartition));
     412        // clone models
     413        var ensembleSolution = new RegressionEnsembleSolution(
     414          solutions.Value.Select(x => cloner.Clone(x.Model)),
     415          problemDataClone,
     416          solutions.Value.Select(x => cloner.Clone(x.ProblemData.TrainingPartition)),
     417          solutions.Value.Select(x => cloner.Clone(x.ProblemData.TestPartition)));
    405418
    406419        aggregatedResults.Add(new Result(solutions.Key + " (ensemble)", ensembleSolution));
     
    425438      var aggregatedResults = new List<IResult>();
    426439      foreach (KeyValuePair<string, List<IClassificationSolution>> solutions in resultSolutions) {
    427         var problemDataClone = (IClassificationProblemData)Problem.ProblemData.Clone();
     440        // clone manually to correctly clone references between cloned root objects
     441        Cloner cloner = new Cloner();
     442        var problemDataClone = (IClassificationProblemData)cloner.Clone(Problem.ProblemData);
     443        // set partitions of problem data clone correctly
    428444        problemDataClone.TrainingPartition.Start = SamplesStart.Value; problemDataClone.TrainingPartition.End = SamplesEnd.Value;
    429445        problemDataClone.TestPartition.Start = SamplesStart.Value; problemDataClone.TestPartition.End = SamplesEnd.Value;
    430         var ensembleSolution = new ClassificationEnsembleSolution(solutions.Value.Select(x => x.Model), problemDataClone,
    431           solutions.Value.Select(x => x.ProblemData.TrainingPartition),
    432           solutions.Value.Select(x => x.ProblemData.TestPartition));
     446        // clone models
     447        var ensembleSolution = new ClassificationEnsembleSolution(
     448          solutions.Value.Select(x => cloner.Clone(x.Model)),
     449          problemDataClone,
     450          solutions.Value.Select(x => cloner.Clone(x.ProblemData.TrainingPartition)),
     451          solutions.Value.Select(x => cloner.Clone(x.ProblemData.TestPartition)));
    433452
    434453        aggregatedResults.Add(new Result(solutions.Key + " (ensemble)", ensembleSolution));
     
    553572      } else
    554573        SamplesEnd.Value = 0;
     574
     575      SamplesStart_ValueChanged(this, EventArgs.Empty);
     576      SamplesEnd_ValueChanged(this, EventArgs.Empty);
    555577    }
    556578
     
    699721      stopPending = false;
    700722      Dictionary<string, IItem> collectedResults = new Dictionary<string, IItem>();
    701       CollectResultValues(collectedResults);
     723      AggregateResultValues(collectedResults);
    702724      results.AddRange(collectedResults.Select(x => new Result(x.Key, x.Value)).Cast<IResult>().ToArray());
    703725      runsCounter++;
  • branches/GP.Grammar.Editor/HeuristicLab.Algorithms.DataAnalysis/3.4/FixedDataAnalysisAlgorithm.cs

    r5809 r6618  
    2121
    2222using System;
    23 using System.Linq;
     23using System.Threading;
     24using System.Threading.Tasks;
    2425using HeuristicLab.Common;
    25 using HeuristicLab.Core;
    26 using HeuristicLab.Data;
    2726using HeuristicLab.Optimization;
    28 using HeuristicLab.Parameters;
    2927using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    3028using HeuristicLab.Problems.DataAnalysis;
    31 using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
    32 using System.Collections.Generic;
    33 using HeuristicLab.Problems.DataAnalysis.Symbolic;
    34 using System.Threading.Tasks;
    35 using System.Threading;
    3629
    3730namespace HeuristicLab.Algorithms.DataAnalysis {
     
    5750    }
    5851    #endregion
    59    
     52
    6053    private DateTime lastUpdateTime;
    6154
     
    10396    private void Run(object state) {
    10497      CancellationToken cancellationToken = (CancellationToken)state;
    105       OnStarted();
    10698      lastUpdateTime = DateTime.Now;
    10799      System.Timers.Timer timer = new System.Timers.Timer(250);
  • branches/GP.Grammar.Editor/HeuristicLab.Algorithms.DataAnalysis/3.4/HeuristicLab.Algorithms.DataAnalysis-3.4.csproj

    r6241 r6618  
    107107  </ItemGroup>
    108108  <ItemGroup>
     109    <Compile Include="RegressionWorkbench.cs" />
    109110    <Compile Include="CrossValidation.cs">
    110111      <SubType>Code</SubType>
     
    112113    <Compile Include="HeuristicLabAlgorithmsDataAnalysisPlugin.cs" />
    113114    <Compile Include="FixedDataAnalysisAlgorithm.cs" />
     115    <Compile Include="Interfaces\INearestNeighbourClassificationSolution.cs" />
     116    <Compile Include="Interfaces\INearestNeighbourRegressionSolution.cs" />
     117    <Compile Include="Interfaces\INearestNeighbourModel.cs" />
     118    <Compile Include="Interfaces\INeuralNetworkEnsembleClassificationSolution.cs" />
     119    <Compile Include="Interfaces\INeuralNetworkEnsembleRegressionSolution.cs" />
     120    <Compile Include="Interfaces\INeuralNetworkEnsembleModel.cs" />
     121    <Compile Include="Interfaces\INeuralNetworkClassificationSolution.cs" />
     122    <Compile Include="Interfaces\INeuralNetworkRegressionSolution.cs" />
     123    <Compile Include="Interfaces\INeuralNetworkModel.cs" />
    114124    <Compile Include="Interfaces\IRandomForestClassificationSolution.cs" />
    115125    <Compile Include="Interfaces\IRandomForestModel.cs" />
     
    129139      <SubType>Code</SubType>
    130140    </Compile>
     141    <Compile Include="Linear\MultinomialLogitClassification.cs" />
     142    <Compile Include="Linear\MultinomialLogitClassificationSolution.cs" />
     143    <Compile Include="Linear\MultinomialLogitModel.cs" />
     144    <Compile Include="NearestNeighbour\NearestNeighbourClassification.cs" />
     145    <Compile Include="NearestNeighbour\NearestNeighbourClassificationSolution.cs" />
     146    <Compile Include="NearestNeighbour\NearestNeighbourModel.cs" />
     147    <Compile Include="NearestNeighbour\NearestNeighbourRegression.cs" />
     148    <Compile Include="NearestNeighbour\NearestNeighbourRegressionSolution.cs" />
     149    <Compile Include="NeuralNetwork\NeuralNetworkEnsembleClassification.cs" />
     150    <Compile Include="NeuralNetwork\NeuralNetworkEnsembleClassificationSolution.cs" />
     151    <Compile Include="NeuralNetwork\NeuralNetworkEnsembleModel.cs" />
     152    <Compile Include="NeuralNetwork\NeuralNetworkEnsembleRegressionSolution.cs" />
     153    <Compile Include="NeuralNetwork\NeuralNetworkEnsembleRegression.cs" />
     154    <Compile Include="NeuralNetwork\NeuralNetworkClassification.cs" />
     155    <Compile Include="NeuralNetwork\NeuralNetworkClassificationSolution.cs" />
     156    <Compile Include="NeuralNetwork\NeuralNetworkModel.cs" />
     157    <Compile Include="NeuralNetwork\NeuralNetworkRegression.cs" />
     158    <Compile Include="NeuralNetwork\NeuralNetworkRegressionSolution.cs" />
    131159    <Compile Include="Properties\AssemblyInfo.cs" />
    132160    <Compile Include="RandomForest\RandomForestClassificationSolution.cs" />
  • branches/GP.Grammar.Editor/HeuristicLab.Algorithms.DataAnalysis/3.4/HeuristicLabAlgorithmsDataAnalysisPlugin.cs.frame

    r5869 r6618  
    2626  /// Plugin class for HeuristicLab.Algorithms.DataAnalysis plugin.
    2727  /// </summary>
    28   [Plugin("HeuristicLab.Algorithms.DataAnalysis", "Provides wrappers for data analysis algorithms implemented in external libraries (linear regression, linear discriminant analysis, k-means clustering, support vector classification and regression)", "3.4.0.$WCREV$")]
     28  [Plugin("HeuristicLab.Algorithms.DataAnalysis", "Provides wrappers for data analysis algorithms implemented in external libraries (linear regression, linear discriminant analysis, k-means clustering, support vector classification and regression)", "3.4.1.$WCREV$")]
    2929  [PluginFile("HeuristicLab.Algorithms.DataAnalysis-3.4.dll", PluginFileType.Assembly)]
    3030  [PluginDependency("HeuristicLab.ALGLIB", "3.1.0")]
  • branches/GP.Grammar.Editor/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/LinearRegression.cs

    r6240 r6618  
    111111
    112112      SymbolicRegressionSolution solution = new SymbolicRegressionSolution(new SymbolicRegressionModel(tree, new SymbolicDataAnalysisExpressionTreeInterpreter()), problemData);
     113      solution.Model.Name = "Linear Regression Model";
    113114      return solution;
    114115    }
  • branches/GP.Grammar.Editor/HeuristicLab.Algorithms.DataAnalysis/3.4/Properties/AssemblyInfo.frame

    r5869 r6618  
    5353// by using the '*' as shown below:
    5454[assembly: AssemblyVersion("3.4.0.0")]
    55 [assembly: AssemblyFileVersion("3.4.0.$WCREV$")]
     55[assembly: AssemblyFileVersion("3.4.1.$WCREV$")]
  • branches/GP.Grammar.Editor/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestClassificationSolution.cs

    r6241 r6618  
    2020#endregion
    2121
    22 using System;
    23 using System.Collections.Generic;
    24 using System.Drawing;
    25 using System.Linq;
    2622using HeuristicLab.Common;
    2723using HeuristicLab.Core;
     
    4945    public RandomForestClassificationSolution(IClassificationProblemData problemData, IRandomForestModel randomForestModel)
    5046      : base(randomForestModel, problemData) {
     47      RecalculateResults();
    5148    }
    5249
     
    5451      return new RandomForestClassificationSolution(this, cloner);
    5552    }
     53
     54    protected override void RecalculateResults() {
     55      CalculateResults();
     56    }
    5657  }
    5758}
  • branches/GP.Grammar.Editor/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestModel.cs

    r6241 r6618  
    2222using System;
    2323using System.Collections.Generic;
    24 using System.IO;
    2524using System.Linq;
    26 using System.Text;
    2725using HeuristicLab.Common;
    2826using HeuristicLab.Core;
    2927using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    3028using HeuristicLab.Problems.DataAnalysis;
    31 using SVM;
    3229
    3330namespace HeuristicLab.Algorithms.DataAnalysis {
     
    134131    }
    135132
     133    public IRandomForestRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
     134      return new RandomForestRegressionSolution(problemData, this);
     135    }
     136    IRegressionSolution IRegressionModel.CreateRegressionSolution(IRegressionProblemData problemData) {
     137      return CreateRegressionSolution(problemData);
     138    }
     139    public IRandomForestClassificationSolution CreateClassificationSolution(IClassificationProblemData problemData) {
     140      return new RandomForestClassificationSolution(problemData, this);
     141    }
     142    IClassificationSolution IClassificationModel.CreateClassificationSolution(IClassificationProblemData problemData) {
     143      return CreateClassificationSolution(problemData);
     144    }
     145
    136146    #region events
    137147    public event EventHandler Changed;
  • branches/GP.Grammar.Editor/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestRegressionSolution.cs

    r6241 r6618  
    2020#endregion
    2121
    22 using System;
    23 using System.Collections.Generic;
    24 using System.Drawing;
    25 using System.Linq;
    2622using HeuristicLab.Common;
    2723using HeuristicLab.Core;
     
    4945    public RandomForestRegressionSolution(IRegressionProblemData problemData, IRandomForestModel randomForestModel)
    5046      : base(randomForestModel, problemData) {
     47      RecalculateResults();
    5148    }
    5249
  • branches/GP.Grammar.Editor/HeuristicLab.Algorithms.DataAnalysis/3.4/SupportVectorMachine/SupportVectorClassificationSolution.cs

    r5809 r6618  
    2020#endregion
    2121
    22 using System;
    23 using System.Collections.Generic;
    24 using System.Drawing;
    25 using System.Linq;
    2622using HeuristicLab.Common;
    2723using HeuristicLab.Core;
     
    4945    public SupportVectorClassificationSolution(SupportVectorMachineModel model, IClassificationProblemData problemData)
    5046      : base(model, problemData) {
     47      RecalculateResults();
    5148    }
    5249
     
    5451      return new SupportVectorClassificationSolution(this, cloner);
    5552    }
     53
     54    protected override void RecalculateResults() {
     55      CalculateResults();
     56    }
    5657  }
    5758}
  • branches/GP.Grammar.Editor/HeuristicLab.Algorithms.DataAnalysis/3.4/SupportVectorMachine/SupportVectorMachineModel.cs

    r5861 r6618  
    9898      this.targetVariable = original.targetVariable;
    9999      this.allowedInputVariables = (string[])original.allowedInputVariables.Clone();
     100      foreach (var dataset in original.cachedPredictions.Keys) {
     101        this.cachedPredictions.Add(cloner.Clone(dataset), (double[])original.cachedPredictions[dataset].Clone());
     102      }
    100103      if (original.classValues != null)
    101104        this.classValues = (double[])original.classValues.Clone();
     
    123126      return GetEstimatedValuesHelper(dataset, rows);
    124127    }
    125     #endregion
     128    public SupportVectorRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
     129      return new SupportVectorRegressionSolution(this, problemData);
     130    }
     131    IRegressionSolution IRegressionModel.CreateRegressionSolution(IRegressionProblemData problemData) {
     132      return CreateRegressionSolution(problemData);
     133    }
     134    #endregion
     135
    126136    #region IClassificationModel Members
    127137    public IEnumerable<double> GetEstimatedClassValues(Dataset dataset, IEnumerable<int> rows) {
     
    144154      }
    145155    }
    146     #endregion
     156
     157    public SupportVectorClassificationSolution CreateClassificationSolution(IClassificationProblemData problemData) {
     158      return new SupportVectorClassificationSolution(this, problemData);
     159    }
     160    IClassificationSolution IClassificationModel.CreateClassificationSolution(IClassificationProblemData problemData) {
     161      return CreateClassificationSolution(problemData);
     162    }
     163    #endregion
     164    // cache for predictions, which is cloned but not persisted, must be cleared when the model is changed
     165    private Dictionary<Dataset, double[]> cachedPredictions = new Dictionary<Dataset, double[]>();
    147166    private IEnumerable<double> GetEstimatedValuesHelper(Dataset dataset, IEnumerable<int> rows) {
     167      if (!cachedPredictions.ContainsKey(dataset)) {
     168        // create an array of cached predictions which is initially filled with NaNs
     169        double[] predictions = Enumerable.Repeat(double.NaN, dataset.Rows).ToArray();
     170        CalculatePredictions(dataset, rows, predictions);
     171        cachedPredictions.Add(dataset, predictions);
     172      }
     173      // get the array of predictions and select the subset of requested rows
     174      double[] p = cachedPredictions[dataset];
     175      var requestedPredictions = from r in rows
     176                                 select p[r];
     177      // check if the requested predictions contain NaNs
     178      // (this means for the request rows some predictions have not been cached)
     179      if (requestedPredictions.Any(x => double.IsNaN(x))) {
     180        // updated the predictions for currently requested rows
     181        CalculatePredictions(dataset, rows, p);
     182        cachedPredictions[dataset] = p;
     183        // now we can be sure that for the current rows all predictions are available
     184        return from r in rows
     185               select p[r];
     186      } else {
     187        // there were no NaNs => just return the cached predictions
     188        return requestedPredictions;
     189      }
     190    }
     191
     192    private void CalculatePredictions(Dataset dataset, IEnumerable<int> rows, double[] predictions) {
     193      // calculate and cache predictions for the currently requested rows
    148194      SVM.Problem problem = SupportVectorMachineUtil.CreateSvmProblem(dataset, targetVariable, allowedInputVariables, rows);
    149195      SVM.Problem scaledProblem = Scaling.Scale(RangeTransform, problem);
    150196
    151       foreach (var row in Enumerable.Range(0, scaledProblem.Count)) {
    152         yield return SVM.Prediction.Predict(Model, scaledProblem.X[row]);
    153       }
    154     }
     197      // row is the index in the original dataset,
     198      // i is the index in the scaled dataset (containing only the necessary rows)
     199      int i = 0;
     200      foreach (var row in rows) {
     201        predictions[row] = SVM.Prediction.Predict(Model, scaledProblem.X[i]);
     202        i++;
     203      }
     204    }
     205
    155206    #region events
    156207    public event EventHandler Changed;
    157208    private void OnChanged(EventArgs e) {
     209      cachedPredictions.Clear();
    158210      var handlers = Changed;
    159211      if (handlers != null)
  • branches/GP.Grammar.Editor/HeuristicLab.Algorithms.DataAnalysis/3.4/SupportVectorMachine/SupportVectorRegressionSolution.cs

    r5809 r6618  
    2020#endregion
    2121
    22 using System;
    23 using System.Collections.Generic;
    24 using System.Drawing;
    25 using System.Linq;
    2622using HeuristicLab.Common;
    2723using HeuristicLab.Core;
     
    4945    public SupportVectorRegressionSolution(SupportVectorMachineModel model, IRegressionProblemData problemData)
    5046      : base(model, problemData) {
     47      RecalculateResults();
    5148    }
    5249
Note: See TracChangeset for help on using the changeset viewer.