Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
05/30/17 15:03:51 (8 years ago)
Author:
bburlacu
Message:

#2760: Got rid of the shuffledProblemData by using a shared seed for all the folds (so that the dataset for each fold is shuffled in exactly the same way). Backwards compatibility should be restored.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/CrossValidation.cs

    r14904 r15002  
    4040  [StorableClass]
    4141  public sealed class CrossValidation : ParameterizedNamedItem, IAlgorithm, IStorableContent {
    42     private IDataAnalysisProblemData shuffledProblemData;
     42    [Storable]
     43    private int seed;
    4344
    4445    public CrossValidation()
     
    9495      samplesEnd = cloner.Clone(original.samplesEnd);
    9596      shuffleSamples = cloner.Clone(original.shuffleSamples);
     97      seed = original.seed;
     98
    9699      RegisterEvents();
    97100      if (Algorithm != null) RegisterAlgorithmEvents();
     
    279282        throw new InvalidOperationException(string.Format("Start not allowed in execution state \"{0}\".", ExecutionState));
    280283
     284      seed = new FastRandom().NextInt();
     285
    281286      if (Algorithm != null) {
    282287        //create cloned algorithms
     
    287292            var cloner = new Cloner();
    288293            if (ShuffleSamples.Value) {
     294              var random = new FastRandom(seed);
    289295              var dataAnalysisProblem = (IDataAnalysisProblem)algorithm.Problem;
    290296              var dataset = (Dataset)dataAnalysisProblem.ProblemData.Dataset;
    291               shuffledDataset = shuffledDataset ?? dataset.Shuffle(new FastRandom());
     297              shuffledDataset = shuffledDataset ?? dataset.Shuffle(random);
    292298              cloner.RegisterClonedObject(dataset, shuffledDataset);
    293299            }
     
    316322            clonedAlgorithm.Prepare();
    317323            clonedAlgorithms.Add(clonedAlgorithm);
    318           }
    319           // save the shuffled problem data because it is necessary when creating the ensemble solution
    320           if (shuffledProblemData == null && shuffledDataset != null) {
    321             var dataAnalysisProblem = (IDataAnalysisProblem)algorithm.Problem;
    322             var dataset = (Dataset)dataAnalysisProblem.ProblemData.Dataset;
    323             var cloner = new Cloner();
    324             cloner.RegisterClonedObject(dataset, shuffledDataset);
    325             shuffledProblemData = cloner.Clone(dataAnalysisProblem.ProblemData);
    326324          }
    327325        }
     
    446444        // clone manually to correctly clone references between cloned root objects
    447445        Cloner cloner = new Cloner();
    448         var problemDataClone = ShuffleSamples.Value
    449           ? (IRegressionProblemData)cloner.Clone(shuffledProblemData)
    450           : (IRegressionProblemData)cloner.Clone(Problem.ProblemData);
     446        if (ShuffleSamples.Value) {
     447          var dataset = (Dataset)Problem.ProblemData.Dataset;
     448          var random = new FastRandom(seed);
     449          var shuffledDataset = dataset.Shuffle(random);
     450          cloner.RegisterClonedObject(dataset, shuffledDataset);
     451        }
     452        var problemDataClone = (IRegressionProblemData)cloner.Clone(Problem.ProblemData);
    451453        // set partitions of problem data clone correctly
    452454        problemDataClone.TrainingPartition.Start = SamplesStart.Value; problemDataClone.TrainingPartition.End = SamplesEnd.Value;
     
    479481        // at least one algorithm (GBT with logistic regression loss) produces a classification solution even though the original problem is a regression problem.
    480482        var targetVariable = solutions.Value.First().ProblemData.TargetVariable;
    481         var problemDataClone = ShuffleSamples.Value
    482           ? new ClassificationProblemData(shuffledProblemData.Dataset, shuffledProblemData.AllowedInputVariables, targetVariable)
    483           : new ClassificationProblemData(Problem.ProblemData.Dataset, Problem.ProblemData.AllowedInputVariables, targetVariable);
     483        var dataset = (Dataset)Problem.ProblemData.Dataset;
     484        if (ShuffleSamples.Value) {
     485          var random = new FastRandom(seed);
     486          dataset = dataset.Shuffle(random);
     487        }
     488        var problemDataClone = new ClassificationProblemData(dataset, Problem.ProblemData.AllowedInputVariables, targetVariable);
    484489        // set partitions of problem data clone correctly
    485490        problemDataClone.TrainingPartition.Start = SamplesStart.Value; problemDataClone.TrainingPartition.End = SamplesEnd.Value;
     
    566571      if (Problem != null) {
    567572        Problem.Reset += new EventHandler(Problem_Reset);
    568         Problem.ProblemDataChanged += Problem_ProblemDataChanged;
    569573      }
    570574    }
     
    574578      if (Problem != null) {
    575579        Problem.Reset -= new EventHandler(Problem_Reset);
    576         Problem.ProblemDataChanged -= Problem_ProblemDataChanged;
    577580      }
    578581    }
     
    592595      if (handler != null) handler(this, EventArgs.Empty);
    593596      ConfigureProblem();
    594     }
    595     public event EventHandler ProblemDataChanged;
    596     private void OnProblemDataChanged() {
    597       var handler = ProblemDataChanged;
    598       if (handler != null) handler(this, EventArgs.Empty);
    599       shuffledProblemData = null;
    600     }
    601     private void Problem_ProblemDataChanged(object sender, EventArgs e) {
    602       OnProblemDataChanged();
    603597    }
    604598    private void Problem_Reset(object sender, EventArgs e) {
Note: See TracChangeset for help on using the changeset viewer.