Changeset 4468


Ignore:
Timestamp:
09/22/10 11:22:49 (9 years ago)
Author:
mkommend
Message:

Preparation for cross validation - removed the test samples from the trainining samples and added ValidationPercentage parameter (ticket #1199).

Location:
trunk/sources
Files:
16 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Analyzers/RegressionSolutionAnalyzer.cs

    r4068 r4468  
    121121      var solution = bestSolution;
    122122      #region update R2,MSE, Rel Error
    123       IEnumerable<double> trainingValues = problemData.Dataset.GetEnumeratedVariableValues(
    124         problemData.TargetVariable.Value,
    125         problemData.TrainingSamplesStart.Value,
    126         problemData.TrainingSamplesEnd.Value);
    127       IEnumerable<double> testValues = problemData.Dataset.GetEnumeratedVariableValues(
    128         problemData.TargetVariable.Value,
    129         problemData.TestSamplesStart.Value,
    130         problemData.TestSamplesEnd.Value);
     123      IEnumerable<double> trainingValues = problemData.Dataset.GetEnumeratedVariableValues(problemData.TargetVariable.Value, problemData.TrainingIndizes);
     124      IEnumerable<double> testValues = problemData.Dataset.GetEnumeratedVariableValues(problemData.TargetVariable.Value, problemData.TestIndizes);
    131125      OnlineMeanSquaredErrorEvaluator mseEvaluator = new OnlineMeanSquaredErrorEvaluator();
    132126      OnlineMeanAbsolutePercentageErrorEvaluator relErrorEvaluator = new OnlineMeanAbsolutePercentageErrorEvaluator();
    133127      OnlinePearsonsRSquaredEvaluator r2Evaluator = new OnlinePearsonsRSquaredEvaluator();
     128
    134129      #region training
    135130      var originalEnumerator = trainingValues.GetEnumerator();
     
    144139      double trainingRelError = relErrorEvaluator.MeanAbsolutePercentageError;
    145140      #endregion
     141
    146142      mseEvaluator.Reset();
    147143      relErrorEvaluator.Reset();
    148144      r2Evaluator.Reset();
     145
    149146      #region test
    150147      originalEnumerator = testValues.GetEnumerator();
     
    159156      double testRelError = relErrorEvaluator.MeanAbsolutePercentageError;
    160157      #endregion
     158
    161159      if (results.ContainsKey(BestSolutionResultName)) {
    162160        results[BestSolutionResultName].Value = solution;
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/BestSymbolicRegressionSolutionAnalyzer.cs

    r4125 r4468  
    9191        var model = new SymbolicRegressionModel((ISymbolicExpressionTreeInterpreter)SymbolicExpressionTreeInterpreter.Clone(),
    9292          SymbolicExpressionTree[i]);
    93         var solution = new SymbolicRegressionSolution(ProblemData, model, lowerEstimationLimit, upperEstimationLimit);
     93        DataAnalysisProblemData problemDataClone = (DataAnalysisProblemData)ProblemData.Clone();
     94        var solution = new SymbolicRegressionSolution(problemDataClone, model, lowerEstimationLimit, upperEstimationLimit);
    9495        solution.Name = BestSolutionParameterName;
    9596        solution.Description = "Best solution on validation partition found over the whole run.";
    9697        BestSolutionParameter.ActualValue = solution;
    9798        BestSolutionQualityParameter.ActualValue = Quality[i];
    98         BestSymbolicRegressionSolutionAnalyzer.UpdateSymbolicRegressionBestSolutionResults(solution, ProblemData, Results, VariableFrequencies);
     99        BestSymbolicRegressionSolutionAnalyzer.UpdateSymbolicRegressionBestSolutionResults(solution, problemDataClone, Results, VariableFrequencies);
    99100      }
    100101      return BestSolutionParameter.ActualValue;
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer.cs

    r4415 r4468  
    212212      int count = (int)((validationEnd - validationStart) * RelativeNumberOfEvaluatedSamples.Value);
    213213      if (count == 0) count = 1;
    214       IEnumerable<int> rows = RandomEnumerable.SampleRandomNumbers(seed, validationStart, validationEnd, count);
     214      IEnumerable<int> rows = RandomEnumerable.SampleRandomNumbers(seed, validationStart, validationEnd, count)
     215        .Where(row => row < ProblemData.TestSamplesStart.Value || ProblemData.TestSamplesEnd.Value <= row);
    215216
    216217      double upperEstimationLimit = UpperEstimationLimit != null ? UpperEstimationLimit.Value : double.PositiveInfinity;
     
    241242        // calculate scaling parameters and only for the best tree using the full training set
    242243        double alpha, beta;
    243         int trainingStart = ProblemData.TrainingSamplesStart.Value;
    244         int trainingEnd = ProblemData.TrainingSamplesEnd.Value;
    245         IEnumerable<int> trainingRows = Enumerable.Range(trainingStart, trainingEnd - trainingStart);
    246244        SymbolicRegressionScaledMeanSquaredErrorEvaluator.Calculate(SymbolicExpressionTreeInterpreter, bestTree,
    247245          lowerEstimationLimit, upperEstimationLimit,
    248246          ProblemData.Dataset, targetVariable,
    249           trainingRows, out beta, out alpha);
     247          ProblemData.TrainingIndizes, out beta, out alpha);
    250248
    251249        // scale tree for solution
     
    253251        var model = new SymbolicRegressionModel((ISymbolicExpressionTreeInterpreter)SymbolicExpressionTreeInterpreter.Clone(),
    254252          scaledTree);
    255         var solution = new SymbolicRegressionSolution(ProblemData, model, lowerEstimationLimit, upperEstimationLimit);
     253        var solution = new SymbolicRegressionSolution((DataAnalysisProblemData)ProblemData.Clone(), model, lowerEstimationLimit, upperEstimationLimit);
    256254        solution.Name = BestSolutionParameterName;
    257255        solution.Description = "Best solution on validation partition found over the whole run.";
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/SymbolicRegressionModelQualityAnalyzer.cs

    r4068 r4468  
    137137      Analyze(SymbolicExpressionTreeParameter.ActualValue, SymbolicExpressionTreeInterpreterParameter.ActualValue,
    138138        UpperEstimationLimit.Value, LowerEstimationLimit.Value, ProblemDataParameter.ActualValue,
    139         ProblemDataParameter.ActualValue.TrainingSamplesStart.Value, ProblemDataParameter.ActualValue.TrainingSamplesEnd.Value,
    140         ProblemDataParameter.ActualValue.TestSamplesStart.Value, ProblemDataParameter.ActualValue.TestSamplesEnd.Value,
    141139        ResultsParameter.ActualValue);
    142140      return base.Apply();
     
    145143    public static void Analyze(IEnumerable<SymbolicExpressionTree> trees, ISymbolicExpressionTreeInterpreter interpreter,
    146144      double upperEstimationLimit, double lowerEstimationLimit,
    147       DataAnalysisProblemData problemData, int trainingStart, int trainingEnd, int testStart, int testEnd, ResultCollection results) {
     145      DataAnalysisProblemData problemData, ResultCollection results) {
    148146      int targetVariableIndex = problemData.Dataset.GetVariableIndex(problemData.TargetVariable.Value);
    149       IEnumerable<double> originalTrainingValues = problemData.Dataset.GetEnumeratedVariableValues(targetVariableIndex, trainingStart, trainingEnd);
    150       IEnumerable<double> originalTestValues = problemData.Dataset.GetEnumeratedVariableValues(targetVariableIndex, testStart, testEnd);
     147      IEnumerable<double> originalTrainingValues = problemData.Dataset.GetEnumeratedVariableValues(targetVariableIndex, problemData.TrainingIndizes);
     148      IEnumerable<double> originalTestValues = problemData.Dataset.GetEnumeratedVariableValues(targetVariableIndex, problemData.TestIndizes);
    151149      List<double> trainingMse = new List<double>();
    152150      List<double> trainingR2 = new List<double>();
     
    162160      foreach (var tree in trees) {
    163161        #region training
    164         var estimatedTrainingValues = interpreter.GetSymbolicExpressionTreeValues(tree, problemData.Dataset, Enumerable.Range(trainingStart, trainingEnd - trainingStart));
     162        var estimatedTrainingValues = interpreter.GetSymbolicExpressionTreeValues(tree, problemData.Dataset, problemData.TrainingIndizes);
    165163        mseEvaluator.Reset();
    166164        r2Evaluator.Reset();
     
    184182        #endregion
    185183        #region test
    186         var estimatedTestValues = interpreter.GetSymbolicExpressionTreeValues(tree, problemData.Dataset, Enumerable.Range(testStart, testEnd - testStart));
     184        var estimatedTestValues = interpreter.GetSymbolicExpressionTreeValues(tree, problemData.Dataset, problemData.TestIndizes);
    187185
    188186        mseEvaluator.Reset();
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/SymbolicRegressionModelQualityCalculator.cs

    r4068 r4468  
    2020#endregion
    2121
     22using System;
    2223using HeuristicLab.Core;
    2324using HeuristicLab.Data;
     
    3536  [Item("SymbolicRegressionModelQualityCalculator", "An operator to calculate the quality values of a symbolic regression solution symbolic expression tree encoding.")]
    3637  [StorableClass]
     38  [Obsolete("This class should not be used anymore because of performance reasons and will therefore not be updated.")]
    3739  public sealed class SymbolicRegressionModelQualityCalculator : AlgorithmOperator {
    3840    private const string SymbolicExpressionTreeInterpreterParameterName = "SymbolicExpressionTreeInterpreter";
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/SymbolicRegressionTournamentPruning.cs

    r4191 r4468  
    2828using HeuristicLab.Optimization;
    2929using HeuristicLab.Parameters;
     30using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    3031using HeuristicLab.Problems.DataAnalysis.Symbolic;
    3132using HeuristicLab.Problems.DataAnalysis.Symbolic.Symbols;
    32 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    3333
    3434namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic.Analyzers {
     
    241241      double lowerEstimationLimit, double upperEstimationLimit,
    242242      double maxPruningRatio, double qualityGainWeight) {
    243       IEnumerable<int> rows = Enumerable.Range(samplesStart, samplesEnd - samplesStart);
     243        IEnumerable<int> rows = Enumerable.Range(samplesStart, samplesEnd - samplesStart)
     244          .Where(i => i < problemData.TestSamplesStart.Value || problemData.TestSamplesEnd.Value <= i);
    244245      int originalSize = tree.Size;
    245246      double originalQuality = evaluator.Evaluate(interpreter, tree,
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/ValidationBestScaledSymbolicRegressionSolutionAnalyzer.cs

    r4068 r4468  
    3939  [Item("ValidationBestScaledSymbolicRegressionSolutionAnalyzer", "An operator that analyzes the validation best scaled symbolic regression solution.")]
    4040  [StorableClass]
     41  [Obsolete("This class should not be used anymore because of performance reasons and will therefore not be updated.")]
    4142  public sealed class ValidationBestScaledSymbolicRegressionSolutionAnalyzer : AlgorithmOperator, ISymbolicRegressionAnalyzer {
    4243    private const string SymbolicExpressionTreeParameterName = "SymbolicExpressionTree";
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Evaluators/MultiObjectiveSymbolicRegressionEvaluator.cs

    r4246 r4468  
    2020#endregion
    2121
    22 using System;
    2322using System.Collections.Generic;
     23using System.Linq;
    2424using HeuristicLab.Core;
    2525using HeuristicLab.Data;
     
    122122    public override IOperation Apply() {
    123123      int seed = Random.Next();
    124       IEnumerable<int> rows = SingleObjectiveSymbolicRegressionEvaluator.GenerateRowsToEvaluate(seed, RelativeNumberOfEvaluatedSamples.Value, SamplesStart.Value, SamplesEnd.Value);
     124      IEnumerable<int> rows = SingleObjectiveSymbolicRegressionEvaluator.GenerateRowsToEvaluate(seed, RelativeNumberOfEvaluatedSamples.Value, SamplesStart.Value, SamplesEnd.Value)
     125         .Where(i => i < RegressionProblemData.TestSamplesStart.Value || RegressionProblemData.TestSamplesEnd.Value <= i);
    125126      double[] qualities = Evaluate(SymbolicExpressionTreeInterpreter, SymbolicExpressionTree, RegressionProblemData.Dataset,
    126127        RegressionProblemData.TargetVariable, rows);
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Evaluators/SingleObjectiveSymbolicRegressionEvaluator.cs

    r4246 r4468  
    2222using System;
    2323using System.Collections.Generic;
     24using System.Linq;
    2425using HeuristicLab.Core;
    2526using HeuristicLab.Data;
     
    140141    public override IOperation Apply() {
    141142      int seed = Random.Next();
    142       IEnumerable<int> rows = GenerateRowsToEvaluate(seed, RelativeNumberOfEvaluatedSamples.Value, SamplesStart.Value, SamplesEnd.Value);
     143      IEnumerable<int> rows = GenerateRowsToEvaluate(seed, RelativeNumberOfEvaluatedSamples.Value, SamplesStart.Value, SamplesEnd.Value)
     144          .Where(i => i < RegressionProblemData.TestSamplesStart.Value || RegressionProblemData.TestSamplesEnd.Value <= i);
    143145      double quality = Evaluate(SymbolicExpressionTreeInterpreter, SymbolicExpressionTree, LowerEstimationLimit.Value, UpperEstimationLimit.Value,
    144146        RegressionProblemData.Dataset,
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/SymbolicRegressionModel.cs

    r4415 r4468  
    6868
    6969    public IEnumerable<double> GetEstimatedValues(DataAnalysisProblemData problemData, int start, int end) {
    70       return interpreter.GetSymbolicExpressionTreeValues(tree, problemData.Dataset, Enumerable.Range(start, end - start));
     70      return GetEstimatedValues(problemData, Enumerable.Range(start, end - start));
     71    }
     72    public IEnumerable<double> GetEstimatedValues(DataAnalysisProblemData problemData, IEnumerable<int> rows) {
     73      return interpreter.GetSymbolicExpressionTreeValues(tree, problemData.Dataset, rows);
    7174    }
    7275
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/SymbolicRegressionProblem.cs

    r4250 r4468  
    170170          fixedBestValidationSolutionAnalyzer.BestKnownQualityParameter.ActualName = BestKnownQualityParameter.Name;
    171171        }
     172
    172173        var bestValidationSolutionAnalyzer = analyzer as ValidationBestScaledSymbolicRegressionSolutionAnalyzer;
    173174        if (bestValidationSolutionAnalyzer != null) {
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/SymbolicRegressionProblemBase.cs

    r4251 r4468  
    125125    }
    126126    public IntValue TrainingSamplesStart {
    127       get { return new IntValue(DataAnalysisProblemData.TrainingSamplesStart.Value); }
     127      get { return new IntValue(DataAnalysisProblemData.TrainingIndizes.First()); }
    128128    }
    129129    public IntValue TrainingSamplesEnd {
    130130      get {
    131         return new IntValue((DataAnalysisProblemData.TrainingSamplesStart.Value +
    132           DataAnalysisProblemData.TrainingSamplesEnd.Value) / 2);
     131        int endIndex = (int)(DataAnalysisProblemData.TrainingIndizes.Count() * (1.0 - DataAnalysisProblemData.ValidationPercentage.Value));
     132        return new IntValue(DataAnalysisProblemData.TrainingIndizes.ElementAt(endIndex));
    133133      }
    134134    }
     
    137137    }
    138138    public IntValue ValidationSamplesEnd {
    139       get { return new IntValue(DataAnalysisProblemData.TrainingSamplesEnd.Value); }
     139      get { return new IntValue(DataAnalysisProblemData.TrainingIndizes.Last() + 1); }
    140140    }
    141141    public IntValue TestSamplesStart {
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/SymbolicRegressionSolution.cs

    r4415 r4468  
    6767      get {
    6868        if (estimatedValues == null) RecalculateEstimatedValues();
    69         return estimatedValues.AsEnumerable();
     69        return estimatedValues;
    7070      }
    7171    }
    7272
    7373    public override IEnumerable<double> EstimatedTrainingValues {
    74       get {
    75         if (estimatedValues == null) RecalculateEstimatedValues();
    76         int start = ProblemData.TrainingSamplesStart.Value;
    77         int n = ProblemData.TrainingSamplesEnd.Value - start;
    78         return estimatedValues.Skip(start).Take(n).ToList();
    79       }
     74      get { return GetEstimatedValues(ProblemData.TrainingIndizes); }
    8075    }
    8176
    8277    public override IEnumerable<double> EstimatedTestValues {
    83       get {
    84         if (estimatedValues == null) RecalculateEstimatedValues();
    85         int start = ProblemData.TestSamplesStart.Value;
    86         int n = ProblemData.TestSamplesEnd.Value - start;
    87         return estimatedValues.Skip(start).Take(n).ToList();
    88       }
     78      get { return GetEstimatedValues(ProblemData.TestIndizes); }
     79    }
     80
     81    public virtual IEnumerable<double> GetEstimatedValues(IEnumerable<int> rows) {
     82      if (estimatedValues == null) RecalculateEstimatedValues();
     83      foreach (int row in rows)
     84        yield return estimatedValues[row];
    8985    }
    9086  }
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Views/3.3/ResultsView.cs

    r4068 r4468  
    7777        matrix.SortableView = false;
    7878
    79         IEnumerable<double> originalTrainingValues = Content.ProblemData.Dataset.GetVariableValues(Content.ProblemData.TargetVariable.Value, Content.ProblemData.TrainingSamplesStart.Value, Content.ProblemData.TrainingSamplesEnd.Value);
    80         IEnumerable<double> originalTestValues = Content.ProblemData.Dataset.GetVariableValues(Content.ProblemData.TargetVariable.Value, Content.ProblemData.TestSamplesStart.Value, Content.ProblemData.TestSamplesEnd.Value);
     79        IEnumerable<double> originalTrainingValues = Content.ProblemData.Dataset.GetEnumeratedVariableValues(Content.ProblemData.TargetVariable.Value, Content.ProblemData.TrainingIndizes);
     80        IEnumerable<double> originalTestValues = Content.ProblemData.Dataset.GetEnumeratedVariableValues(Content.ProblemData.TargetVariable.Value, Content.ProblemData.TestIndizes);
    8181        matrix[0, 0] = SimpleMSEEvaluator.Calculate(originalTrainingValues, Content.EstimatedTrainingValues);
    8282        matrix[0, 1] = SimpleMSEEvaluator.Calculate(originalTestValues, Content.EstimatedTestValues);
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Views/3.3/ScatterPlotView.cs

    r4068 r4468  
    127127        string targetVariableName = Content.ProblemData.TargetVariable.Value;
    128128        Dataset dataset = Content.ProblemData.Dataset;
    129         int trainingStart = Content.ProblemData.TrainingSamplesStart.Value;
    130         int trainingEnd = Content.ProblemData.TrainingSamplesEnd.Value;
    131         int testStart = Content.ProblemData.TestSamplesStart.Value;
    132         int testEnd = Content.ProblemData.TestSamplesEnd.Value;
    133129        if (this.chart.Series[ALL_SERIES].Points.Count > 0)
    134130          this.chart.Series[ALL_SERIES].Points.DataBindXY(Content.EstimatedValues.ToArray(), "",
     
    136132        if (this.chart.Series[TRAINING_SERIES].Points.Count > 0)
    137133          this.chart.Series[TRAINING_SERIES].Points.DataBindXY(Content.EstimatedTrainingValues.ToArray(), "",
    138             dataset.GetVariableValues(targetVariableName, trainingStart, trainingEnd), "");
     134            dataset.GetEnumeratedVariableValues(targetVariableName, Content.ProblemData.TrainingIndizes).ToArray(), "");
    139135        if (this.chart.Series[TEST_SERIES].Points.Count > 0)
    140136          this.chart.Series[TEST_SERIES].Points.DataBindXY(Content.EstimatedTestValues.ToArray(), "",
    141             dataset.GetVariableValues(targetVariableName, testStart, testEnd), "");
     137           dataset.GetEnumeratedVariableValues(targetVariableName, Content.ProblemData.TestIndizes).ToArray(), "");
    142138
    143139        double max = Math.Max(Content.EstimatedValues.Max(), dataset.GetVariableValues(targetVariableName).Max());
     
    168164      } else if (Content != null) {
    169165        string targetVariableName = Content.ProblemData.TargetVariable.Value;
    170         Dataset dataset = Content.ProblemData.Dataset;
    171         int trainingStart = Content.ProblemData.TrainingSamplesStart.Value;
    172         int trainingEnd = Content.ProblemData.TrainingSamplesEnd.Value;
    173         int testStart = Content.ProblemData.TestSamplesStart.Value;
    174         int testEnd = Content.ProblemData.TestSamplesEnd.Value;
    175166
    176167        IEnumerable<double> predictedValues = null;
     
    178169        switch (series.Name) {
    179170          case ALL_SERIES:
    180             predictedValues = Content.EstimatedValues;
    181             targetValues = dataset.GetVariableValues(targetVariableName);
     171            predictedValues = Content.EstimatedValues.ToArray();
     172            targetValues = Content.ProblemData.Dataset.GetVariableValues(targetVariableName);
    182173            break;
    183174          case TRAINING_SERIES:
    184             predictedValues = Content.EstimatedTrainingValues;
    185             targetValues = dataset.GetVariableValues(targetVariableName, trainingStart, trainingEnd);
     175            predictedValues = Content.EstimatedTrainingValues.ToArray();
     176            targetValues = Content.ProblemData.Dataset.GetEnumeratedVariableValues(targetVariableName, Content.ProblemData.TrainingIndizes).ToArray();
    186177            break;
    187178          case TEST_SERIES:
    188             predictedValues = Content.EstimatedTestValues;
    189             targetValues = dataset.GetVariableValues(targetVariableName, testStart, testEnd);
     179            predictedValues = Content.EstimatedTestValues.ToArray();
     180            targetValues = Content.ProblemData.Dataset.GetEnumeratedVariableValues(targetVariableName, Content.ProblemData.TestIndizes).ToArray();
    190181            break;
    191182        }
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.3/DataAnalysisProblemData.cs

    r4451 r4468  
    3535  public class DataAnalysisProblemData : ParameterizedNamedItem, IStorableContent {
    3636    protected bool suppressEvents = false;
     37    #region IStorableContent Members
     38    public string Filename { get; set; }
     39    #endregion
    3740    #region default data
    3841    // y = x^4 + x^3 + x^2 + x
     
    8790      get { return (IValueParameter<IntValue>)Parameters["TestSamplesEnd"]; }
    8891    }
     92    public IValueParameter<PercentValue> ValidationPercentageParameter {
     93      get { return (IValueParameter<PercentValue>)Parameters["ValidationPercentage"]; }
     94    }
    8995    #endregion
    9096
    9197    #region properties
    9298    public Dataset Dataset {
    93       get { return (Dataset)DatasetParameter.Value; }
     99      get { return DatasetParameter.Value; }
    94100      set {
    95101        if (value != Dataset) {
     
    100106    }
    101107    public StringValue TargetVariable {
    102       get { return (StringValue)TargetVariableParameter.Value; }
     108      get { return TargetVariableParameter.Value; }
    103109      set {
    104110        if (value != TargetVariableParameter.Value) {
     
    110116    }
    111117    public ICheckedItemList<StringValue> InputVariables {
    112       get { return (ICheckedItemList<StringValue>)InputVariablesParameter.Value; }
     118      get { return InputVariablesParameter.Value; }
    113119      set {
    114120        if (value != InputVariables) {
     
    120126    }
    121127    public IntValue TrainingSamplesStart {
    122       get { return (IntValue)TrainingSamplesStartParameter.Value; }
     128      get { return TrainingSamplesStartParameter.Value; }
    123129      set {
    124130        if (value != TrainingSamplesStart) {
     
    130136    }
    131137    public IntValue TrainingSamplesEnd {
    132       get { return (IntValue)TrainingSamplesEndParameter.Value; }
     138      get { return TrainingSamplesEndParameter.Value; }
    133139      set {
    134140        if (value != TrainingSamplesEnd) {
     
    140146    }
    141147    public IntValue TestSamplesStart {
    142       get { return (IntValue)TestSamplesStartParameter.Value; }
     148      get { return TestSamplesStartParameter.Value; }
    143149      set {
    144150        if (value != TestSamplesStart) {
     
    150156    }
    151157    public IntValue TestSamplesEnd {
    152       get { return (IntValue)TestSamplesEndParameter.Value; }
     158      get { return TestSamplesEndParameter.Value; }
    153159      set {
    154160        if (value != TestSamplesEnd) {
     
    159165      }
    160166    }
    161     #endregion
    162 
    163     #region IStorableContent Members
    164     public string Filename { get; set; }
    165     #endregion
     167    public PercentValue ValidationPercentage {
     168      get { return ValidationPercentageParameter.Value; }
     169      set {
     170        if (value != ValidationPercentage) {
     171          if (value == null) throw new ArgumentNullException();
     172          if (value.Value < 0 || value.Value > 1) throw new ArgumentException("ValidationPercentage must be between 0 and 1.");
     173          if (ValidationPercentage != null) DeregisterValueTypeEventHandlers(ValidationPercentage);
     174          ValidationPercentageParameter.Value = value;
     175        }
     176      }
     177    }
     178
     179    public IEnumerable<int> TrainingIndizes {
     180      get {
     181        return Enumerable.Range(TrainingSamplesStart.Value, TrainingSamplesEnd.Value - TrainingSamplesStart.Value)
     182                         .Where(i => i > 0 && i < Dataset.Rows && (i < TestSamplesStart.Value || TestSamplesEnd.Value <= i));
     183      }
     184    }
     185    public IEnumerable<int> TestIndizes {
     186      get {
     187        return Enumerable.Range(TestSamplesStart.Value, TestSamplesEnd.Value - TestSamplesStart.Value)
     188           .Where(i => i > 0 && i < Dataset.Rows);
     189      }
     190    }
     191    #endregion
     192
    166193
    167194    public DataAnalysisProblemData()
     
    180207      Parameters.Add(new ValueParameter<IntValue>("TestSamplesStart", new IntValue(15)));
    181208      Parameters.Add(new ValueParameter<IntValue>("TestSamplesEnd", new IntValue(25)));
     209      Parameters.Add(new ValueParameter<PercentValue>("ValidationPercentage", "The relative amount of the training samples that should be used as validation set.", new PercentValue(0.5)));
    182210      RegisterParameterEventHandlers();
    183211      RegisterParameterValueEventHandlers();
     
    200228      Parameters.Add(new ValueParameter<IntValue>("TestSamplesStart", new IntValue(testSamplesStart)));
    201229      Parameters.Add(new ValueParameter<IntValue>("TestSamplesEnd", new IntValue(testSamplesEnd)));
     230      Parameters.Add(new ValueParameter<PercentValue>("ValidationPercentage", "The relative amount of the training samples that should be used as validation set.", new PercentValue(0.5)));
    202231      RegisterParameterEventHandlers();
    203232      RegisterParameterValueEventHandlers();
     
    209238    [StorableHook(HookType.AfterDeserialization)]
    210239    private void AfterDeserializationHook() {
     240      if (!Parameters.ContainsKey("ValidationPercentage"))
     241        Parameters.Add(new ValueParameter<PercentValue>("ValidationPercentage", "The relative amount of the training samples that should be used as validation set.", new PercentValue(0.5)));
     242
    211243      RegisterParameterEventHandlers();
    212244      RegisterParameterValueEventHandlers();
     
    230262      TestSamplesStartParameter.ValueChanged += new EventHandler(TestSamplesStartParameter_ValueChanged);
    231263      TestSamplesEndParameter.ValueChanged += new EventHandler(TestSamplesEndParameter_ValueChanged);
     264      ValidationPercentageParameter.ValueChanged += new EventHandler(ValidationPercentageParameter_ValueChanged);
    232265    }
    233266
     
    239272      RegisterValueTypeEventHandlers(TestSamplesStart);
    240273      RegisterValueTypeEventHandlers(TestSamplesEnd);
     274      RegisterValueTypeEventHandlers(ValidationPercentage);
    241275    }
    242276
     
    270304    private void TestSamplesEndParameter_ValueChanged(object sender, EventArgs e) {
    271305      RegisterValueTypeEventHandlers(TestSamplesEnd);
     306      OnProblemDataChanged(EventArgs.Empty);
     307    }
     308    private void ValidationPercentageParameter_ValueChanged(object sender, EventArgs e) {
     309      RegisterValueTypeEventHandlers(ValidationPercentage);
    272310      OnProblemDataChanged(EventArgs.Empty);
    273311    }
Note: See TracChangeset for help on using the changeset viewer.