Changeset 14904


Ignore:
Timestamp:
05/02/17 17:41:44 (5 months ago)
Author:
bburlacu
Message:

#2760: Reuse the shuffled data when creating the solution ensemble.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/CrossValidation.cs

    r14864 r14904  
    4040  [StorableClass]
    4141  public sealed class CrossValidation : ParameterizedNamedItem, IAlgorithm, IStorableContent {
     42    private IDataAnalysisProblemData shuffledProblemData;
     43
    4244    public CrossValidation()
    4345      : base() {
     
    315317            clonedAlgorithms.Add(clonedAlgorithm);
    316318          }
     319          // save the shuffled problem data because it is necessary when creating the ensemble solution
     320          if (shuffledProblemData == null && shuffledDataset != null) {
     321            var dataAnalysisProblem = (IDataAnalysisProblem)algorithm.Problem;
     322            var dataset = (Dataset)dataAnalysisProblem.ProblemData.Dataset;
     323            var cloner = new Cloner();
     324            cloner.RegisterClonedObject(dataset, shuffledDataset);
     325            shuffledProblemData = cloner.Clone(dataAnalysisProblem.ProblemData);
     326          }
    317327        }
    318328
     
    436446        // clone manually to correctly clone references between cloned root objects
    437447        Cloner cloner = new Cloner();
    438         var problemDataClone = (IRegressionProblemData)cloner.Clone(Problem.ProblemData);
     448        var problemDataClone = ShuffleSamples.Value
     449          ? (IRegressionProblemData)cloner.Clone(shuffledProblemData)
     450          : (IRegressionProblemData)cloner.Clone(Problem.ProblemData);
    439451        // set partitions of problem data clone correctly
    440452        problemDataClone.TrainingPartition.Start = SamplesStart.Value; problemDataClone.TrainingPartition.End = SamplesEnd.Value;
     
    467479        // at least one algorithm (GBT with logistic regression loss) produces a classification solution even though the original problem is a regression problem.
    468480        var targetVariable = solutions.Value.First().ProblemData.TargetVariable;
    469         var problemDataClone = new ClassificationProblemData(Problem.ProblemData.Dataset,
    470           Problem.ProblemData.AllowedInputVariables, targetVariable);
     481        var problemDataClone = ShuffleSamples.Value
     482          ? new ClassificationProblemData(shuffledProblemData.Dataset, shuffledProblemData.AllowedInputVariables, targetVariable)
     483          : new ClassificationProblemData(Problem.ProblemData.Dataset, Problem.ProblemData.AllowedInputVariables, targetVariable);
    471484        // set partitions of problem data clone correctly
    472485        problemDataClone.TrainingPartition.Start = SamplesStart.Value; problemDataClone.TrainingPartition.End = SamplesEnd.Value;
     
    551564      algorithm.ProblemChanged += new EventHandler(Algorithm_ProblemChanged);
    552565      algorithm.ExecutionStateChanged += new EventHandler(Algorithm_ExecutionStateChanged);
    553       if (Problem != null) Problem.Reset += new EventHandler(Problem_Reset);
     566      if (Problem != null) {
     567        Problem.Reset += new EventHandler(Problem_Reset);
     568        Problem.ProblemDataChanged += Problem_ProblemDataChanged;
     569      }
    554570    }
    555571    private void DeregisterAlgorithmEvents() {
    556572      algorithm.ProblemChanged -= new EventHandler(Algorithm_ProblemChanged);
    557573      algorithm.ExecutionStateChanged -= new EventHandler(Algorithm_ExecutionStateChanged);
    558       if (Problem != null) Problem.Reset -= new EventHandler(Problem_Reset);
     574      if (Problem != null) {
     575        Problem.Reset -= new EventHandler(Problem_Reset);
     576        Problem.ProblemDataChanged -= Problem_ProblemDataChanged;
     577      }
    559578    }
    560579    private void Algorithm_ProblemChanged(object sender, EventArgs e) {
     
    574593      ConfigureProblem();
    575594    }
    576 
     595    public event EventHandler ProblemDataChanged;
     596    private void OnProblemDataChanged() {
     597      var handler = ProblemDataChanged;
     598      if (handler != null) handler(this, EventArgs.Empty);
     599      shuffledProblemData = null;
     600    }
     601    private void Problem_ProblemDataChanged(object sender, EventArgs e) {
     602      OnProblemDataChanged();
     603    }
    577604    private void Problem_Reset(object sender, EventArgs e) {
    578605      ConfigureProblem();
    579606    }
    580 
    581607    private void ConfigureProblem() {
    582608      SamplesStart.Value = 0;
Note: See TracChangeset for help on using the changeset viewer.