Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
04/20/10 20:31:23 (15 years ago)
Author:
gkronber
Message:

Included tracking of best of run solution (based on validation set) and calculation of MSE, R² and rel. Error on training and test sets. #938 (Data types and operators for regression problems)

Location:
trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/BestValidationSymbolicRegressionSolutionVisualizer.cs

    r3442 r3452  
    2929using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    3030using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
     31using HeuristicLab.Problems.DataAnalysis.Evaluators;
     32using HeuristicLab.Problems.DataAnalysis.Symbolic;
    3133
    3234namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic {
     
    3739  [StorableClass]
    3840  public sealed class BestValidationSymbolicRegressionSolutionVisualizer : SingleSuccessorOperator, ISingleObjectiveSolutionsVisualizer, ISolutionsVisualizer {
     41    private const string EvaluatorParameterName = "Evaluator";
    3942    private const string SymbolicRegressionModelParameterName = "SymbolicRegressionModel";
    4043    private const string DataAnalysisProblemDataParameterName = "DataAnalysisProblemData";
    4144    private const string BestValidationSolutionParameterName = "BestValidationSolution";
     45    private const string ValidationSamplesStartParameterName = "ValidationSamplesStart";
     46    private const string ValidationSamplesEndParameterName = "ValidationSamplesEnd";
    4247    private const string QualityParameterName = "Quality";
     48    private const string ResultsParameterName = "Results";
     49
     50    #region parameter properties
     51    public ILookupParameter<ISymbolicRegressionEvaluator> EvaluatorParameter {
     52      get { return (ILookupParameter<ISymbolicRegressionEvaluator>)Parameters[EvaluatorParameterName]; }
     53    }
     54    public IValueLookupParameter<IntValue> ValidationSamplesStartParameter {
     55      get { return (IValueLookupParameter<IntValue>)Parameters[ValidationSamplesStartParameterName]; }
     56    }
     57    public IValueLookupParameter<IntValue> ValidationSamplesEndParameter {
     58      get { return (IValueLookupParameter<IntValue>)Parameters[ValidationSamplesEndParameterName]; }
     59    }
     60
    4361    public ILookupParameter<ItemArray<SymbolicExpressionTree>> SymbolicExpressionTreeParameter {
    4462      get { return (ILookupParameter<ItemArray<SymbolicExpressionTree>>)Parameters[SymbolicRegressionModelParameterName]; }
     
    5876    }
    5977
     78    public ILookupParameter<ResultCollection> ResultParameter {
     79      get { return (ILookupParameter<ResultCollection>)Parameters[ResultsParameterName]; }
     80    }
     81    #endregion
     82
     83    #region properties
     84    public ISymbolicRegressionEvaluator Evaluator {
     85      get { return EvaluatorParameter.ActualValue; }
     86    }
     87    public IntValue ValidationSamplesStart {
     88      get { return ValidationSamplesStartParameter.ActualValue; }
     89    }
     90    public IntValue ValidationSamplesEnd {
     91      get { return ValidationSamplesEndParameter.ActualValue; }
     92    }
     93    #endregion
     94
    6095    public BestValidationSymbolicRegressionSolutionVisualizer()
    6196      : base() {
     
    6398      Parameters.Add(new SubScopesLookupParameter<DoubleValue>(QualityParameterName, "The quality of the symbolic regression solutions."));
    6499      Parameters.Add(new LookupParameter<DataAnalysisProblemData>(DataAnalysisProblemDataParameterName, "The symbolic regression problme data on which the best solution should be evaluated."));
     100      Parameters.Add(new ValueLookupParameter<IntValue>(ValidationSamplesStartParameterName, "The start index of the validation partition (part of the training partition)."));
     101      Parameters.Add(new ValueLookupParameter<IntValue>(ValidationSamplesEndParameterName, "The end index of the validation partition (part of the training partition)."));
    65102      Parameters.Add(new LookupParameter<SymbolicRegressionSolution>(BestValidationSolutionParameterName, "The best symbolic expression tree based on the validation data for the symbolic regression problem."));
    66       Parameters.Add(new LookupParameter<ResultCollection>("Results"));
     103      Parameters.Add(new LookupParameter<ResultCollection>(ResultsParameterName, "The result collection of the algorithm."));
    67104    }
    68105
     
    70107      ItemArray<SymbolicExpressionTree> expressions = SymbolicExpressionTreeParameter.ActualValue;
    71108      DataAnalysisProblemData problemData = DataAnalysisProblemDataParameter.ActualValue;
    72       ItemArray<DoubleValue> qualities = QualityParameter.ActualValue;
    73109
    74       var bestExpressionIndex = (from index in Enumerable.Range(0, qualities.Count())
    75                                  select new { Index = index, Quality = qualities[index] }).OrderBy(x => x.Quality).Select(x => x.Index).First();
     110      int validationSamplesStart = ValidationSamplesStart.Value;
     111      int validationSamplesEnd = ValidationSamplesEnd.Value;
     112      var validationValues = problemData.Dataset.GetVariableValues(problemData.TargetVariable.Value, validationSamplesStart, validationSamplesEnd);
    76113
    77       var bestExpression = expressions[bestExpressionIndex];
    78       SymbolicRegressionSolution bestSolution = BestValidationSolutionParameter.ActualValue;
    79       if (bestSolution == null) BestValidationSolutionParameter.ActualValue = CreateDataAnalysisSolution(problemData, bestExpression);
    80       else {
    81         bestSolution.Model = CreateModel(problemData, bestExpression);
     114      var currentBestExpression = (from expression in expressions
     115                                   let validationQuality = SymbolicRegressionMeanSquaredErrorEvaluator.Calculate(expression, problemData.Dataset, problemData.TargetVariable.Value, validationSamplesStart, validationSamplesEnd)
     116                                   select new { Expression = expression, ValidationQuality = validationQuality })
     117                                   .OrderBy(x => x.ValidationQuality)
     118                                   .First();
     119
     120      SymbolicRegressionSolution bestOfRunSolution = BestValidationSolutionParameter.ActualValue;
     121      if (bestOfRunSolution == null) {
     122        // no best of run solution yet -> make a solution from the currentBestExpression
     123        UpdateBestOfRunSolution(problemData, currentBestExpression.Expression);
     124      } else {
     125        // compare quality of current best with best of run solution
     126        var estimatedValidationValues = bestOfRunSolution.EstimatedValues.Skip(validationSamplesStart).Take(validationSamplesEnd - validationSamplesStart);
     127        var bestOfRunValidationQuality = SimpleMSEEvaluator.Calculate(validationValues, estimatedValidationValues);
     128        if (bestOfRunValidationQuality > currentBestExpression.ValidationQuality) {
     129          UpdateBestOfRunSolution(problemData, currentBestExpression.Expression);
     130        }
    82131      }
    83       // ((ResultCollection)Parameters["Results"].ActualValue).Add(new Result("ValidationMSE", new DoubleValue(3.15)));
     132
     133
    84134      return base.Apply();
     135    }
     136
     137    private void UpdateBestOfRunSolution(DataAnalysisProblemData problemData, SymbolicExpressionTree tree) {
     138      var newBestSolution = CreateDataAnalysisSolution(problemData, tree);
     139      BestValidationSolutionParameter.ActualValue = newBestSolution;
     140
     141      var trainingValues = problemData.Dataset.GetVariableValues(problemData.TargetVariable.Value, problemData.TrainingSamplesStart.Value, problemData.TrainingSamplesEnd.Value);
     142      var testValues = problemData.Dataset.GetVariableValues(problemData.TargetVariable.Value, problemData.TestSamplesStart.Value, problemData.TestSamplesEnd.Value);
     143
     144      AddResult("MeanSquaredError (Training)", new DoubleValue(SimpleMSEEvaluator.Calculate(trainingValues, newBestSolution.EstimatedTrainingValues)));
     145      AddResult("MeanRelativeError (Training)", new PercentValue(SimpleMeanAbsolutePercentageErrorEvaluator.Calculate(trainingValues, newBestSolution.EstimatedTrainingValues)));
     146      AddResult("RSquared (Training)", new DoubleValue(SimpleRSquaredEvaluator.Calculate(trainingValues, newBestSolution.EstimatedTrainingValues)));
     147
     148      AddResult("MeanSquaredError (Test)", new DoubleValue(SimpleMSEEvaluator.Calculate(testValues, newBestSolution.EstimatedTestValues)));
     149      AddResult("MeanRelativeError (Test)", new PercentValue(SimpleMeanAbsolutePercentageErrorEvaluator.Calculate(testValues, newBestSolution.EstimatedTestValues)));
     150      AddResult("RSquared (Test)", new DoubleValue(SimpleRSquaredEvaluator.Calculate(testValues, newBestSolution.EstimatedTestValues)));
     151    }
     152
     153    private void AddResult(string resultName, IItem value) {
     154      var resultCollection = ResultParameter.ActualValue;
     155      if (resultCollection.ContainsKey(resultName)) {
     156        resultCollection[resultName].Value = value;
     157      } else {
     158        resultCollection.Add(new Result(resultName, value));
     159      }
    85160    }
    86161
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/ISymbolicRegressionEvaluator.cs

    r3373 r3452  
    3636namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic {
    3737  public interface ISymbolicRegressionEvaluator : ISingleObjectiveEvaluator {
    38     ILookupParameter<SymbolicExpressionTree> FunctionTreeParameter { get; }
     38    ILookupParameter<SymbolicExpressionTree> SymbolicExpressionTreeParameter { get; }
    3939    ILookupParameter<DataAnalysisProblemData> RegressionProblemDataParameter { get; }
     40    IValueLookupParameter<IntValue> SamplesStartParameter { get; }
     41    IValueLookupParameter<IntValue> SamplesEndParameter { get; }
    4042    ILookupParameter<DoubleValue> NumberOfEvaluatedNodesParameter { get; }
    4143  }
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/SymbolicRegressionEvaluator.cs

    r3374 r3452  
    4242    private const string FunctionTreeParameterName = "FunctionTree";
    4343    private const string RegressionProblemDataParameterName = "RegressionProblemData";
     44    private const string SamplesStartParameterName = "SamplesStart";
     45    private const string SamplesEndParameterName = "SamplesEnd";
    4446    private const string NumberOfEvaluatedNodexParameterName = "NumberOfEvaluatedNodes";
    4547    #region ISymbolicRegressionEvaluator Members
     
    4951    }
    5052
    51     public ILookupParameter<SymbolicExpressionTree> FunctionTreeParameter {
     53    public ILookupParameter<SymbolicExpressionTree> SymbolicExpressionTreeParameter {
    5254      get { return (ILookupParameter<SymbolicExpressionTree>)Parameters[FunctionTreeParameterName]; }
    5355    }
     
    5759    }
    5860
    59     //public ILookupParameter<IntValue> SamplesStartParameter {
    60     //  get { return (ILookupParameter<IntValue>)Parameters["SamplesStart"]; }
    61     //}
     61    public IValueLookupParameter<IntValue> SamplesStartParameter {
     62      get { return (IValueLookupParameter<IntValue>)Parameters[SamplesStartParameterName]; }
     63    }
    6264
    63     //public ILookupParameter<IntValue> SamplesEndParameter {
    64     //  get { return (ILookupParameter<IntValue>)Parameters["SamplesEnd"]; }
    65     //}
     65    public IValueLookupParameter<IntValue> SamplesEndParameter {
     66      get { return (IValueLookupParameter<IntValue>)Parameters[SamplesEndParameterName]; }
     67    }
    6668
    6769    public ILookupParameter<DoubleValue> NumberOfEvaluatedNodesParameter {
    6870      get { return (ILookupParameter<DoubleValue>)Parameters[NumberOfEvaluatedNodexParameterName]; }
    6971    }
    70 
     72    #endregion
     73    #region properties
     74    public SymbolicExpressionTree SymbolicExpressionTree {
     75      get { return SymbolicExpressionTreeParameter.ActualValue; }
     76    }
     77    public DataAnalysisProblemData RegressionProblemData {
     78      get { return RegressionProblemDataParameter.ActualValue; }
     79    }
     80    public IntValue SamplesStart {
     81      get { return SamplesStartParameter.ActualValue; }
     82    }
     83    public IntValue SamplesEnd {
     84      get { return SamplesEndParameter.ActualValue; }
     85    }
    7186    #endregion
    7287
     
    7590      Parameters.Add(new LookupParameter<DoubleValue>(QualityParameterName, "The quality of the evaluated symbolic regression solution."));
    7691      Parameters.Add(new LookupParameter<SymbolicExpressionTree>(FunctionTreeParameterName, "The symbolic regression solution encoded as a symbolic expression tree."));
    77       Parameters.Add(new LookupParameter<DataAnalysisProblemData>(RegressionProblemDataParameterName, "The data set on which the symbolic regression solution should be evaluated."));
     92      Parameters.Add(new LookupParameter<DataAnalysisProblemData>(RegressionProblemDataParameterName, "The problem data on which the symbolic regression solution should be evaluated."));
     93      Parameters.Add(new ValueLookupParameter<IntValue>(SamplesStartParameterName, "The start index of the dataset partition on which the symbolic regression solution should be evaluated."));
     94      Parameters.Add(new ValueLookupParameter<IntValue>(SamplesEndParameterName, "The end index of the dataset partition on which the symbolic regression solution should be evaluated."));
    7895      Parameters.Add(new LookupParameter<DoubleValue>(NumberOfEvaluatedNodexParameterName, "The number of evaluated nodes so far (for performance measurements.)"));
    7996    }
    8097
    8198    public override IOperation Apply() {
    82       SymbolicExpressionTree solution = FunctionTreeParameter.ActualValue;
    83       DataAnalysisProblemData regressionProblemData = RegressionProblemDataParameter.ActualValue;
    8499      DoubleValue numberOfEvaluatedNodes = NumberOfEvaluatedNodesParameter.ActualValue;
    85      
    86       QualityParameter.ActualValue = new DoubleValue(Evaluate(solution, regressionProblemData.Dataset, regressionProblemData.TargetVariable, regressionProblemData.TrainingSamplesStart, regressionProblemData.TrainingSamplesEnd, numberOfEvaluatedNodes));
     100      QualityParameter.ActualValue = new DoubleValue(Evaluate(SymbolicExpressionTree, RegressionProblemData.Dataset,
     101        RegressionProblemData.TargetVariable, SamplesStart, SamplesEnd, numberOfEvaluatedNodes));
    87102      return null;
    88103    }
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/SymbolicRegressionProblem.cs

    r3442 r3452  
    8484      get { return VisualizerParameter; }
    8585    }
    86     public ValueParameter<DoubleValue> BestKnownQualityParameter {
    87       get { return (ValueParameter<DoubleValue>)Parameters["BestKnownQuality"]; }
     86    public OptionalValueParameter<DoubleValue> BestKnownQualityParameter {
     87      get { return (OptionalValueParameter<DoubleValue>)Parameters["BestKnownQuality"]; }
    8888    }
    8989    IParameter ISingleObjectiveProblem.BestKnownQualityParameter {
     
    147147      Parameters.Add(new ValueParameter<SymbolicExpressionTreeCreator>("SolutionCreator", "The operator which should be used to create new symbolic regression solutions.", creator));
    148148      Parameters.Add(new ValueParameter<ISymbolicRegressionEvaluator>("Evaluator", "The operator which should be used to evaluate symbolic regression solutions.", evaluator));
    149       Parameters.Add(new ValueParameter<DoubleValue>("BestKnownQuality", "The minimal error value that can be reached by symbolic regression models.", new DoubleValue(0)));
     149      Parameters.Add(new OptionalValueParameter<DoubleValue>("BestKnownQuality", "The minimal error value that reached by symbolic regression solutions for the problem."));
    150150      Parameters.Add(new ValueParameter<ISymbolicExpressionGrammar>("FunctionTreeGrammar", "The grammar that should be used for symbolic regression models.", globalGrammar));
    151151      Parameters.Add(new ValueParameter<IntValue>("MaxExpressionLength", "Maximal length of the symbolic expression.", new IntValue(100)));
     
    159159      creator.MaxFunctionArgumentsParameter.ActualName = "MaxFunctionArguments";
    160160      creator.MaxFunctionDefinitionsParameter.ActualName = "MaxFunctionDefiningBranches";
    161       evaluator.QualityParameter.ActualName = "TrainingMeanSquaredError";
    162161      DataAnalysisProblemDataParameter.ValueChanged += new EventHandler(DataAnalysisProblemDataParameter_ValueChanged);
    163162      DataAnalysisProblemData.ProblemDataChanged += new EventHandler(DataAnalysisProblemData_Changed);
     
    169168    }
    170169
    171     void DataAnalysisProblemDataParameter_ValueChanged(object sender, EventArgs e) {
    172       DataAnalysisProblemData.ProblemDataChanged += new EventHandler(DataAnalysisProblemData_Changed);
    173     }
    174 
    175     void DataAnalysisProblemData_Changed(object sender, EventArgs e) {
    176       foreach (var varSymbol in FunctionTreeGrammar.Symbols.OfType<HeuristicLab.Problems.DataAnalysis.Symbolic.Symbols.Variable>()) {
    177         varSymbol.VariableNames = DataAnalysisProblemData.InputVariables.Select(x => x.Value);
    178       }
    179     }
    180170
    181171    [StorableConstructor]
     
    189179
    190180    #region Events
     181    void DataAnalysisProblemDataParameter_ValueChanged(object sender, EventArgs e) {
     182      DataAnalysisProblemData.ProblemDataChanged += new EventHandler(DataAnalysisProblemData_Changed);
     183    }
     184
     185    void DataAnalysisProblemData_Changed(object sender, EventArgs e) {
     186      foreach (var varSymbol in FunctionTreeGrammar.Symbols.OfType<HeuristicLab.Problems.DataAnalysis.Symbolic.Symbols.Variable>()) {
     187        varSymbol.VariableNames = DataAnalysisProblemData.InputVariables.Select(x => x.Value);
     188      }
     189      UpdatePartitioningParameters();
     190    }
     191
     192    private void UpdatePartitioningParameters() {
     193      int trainingStart = DataAnalysisProblemData.TrainingSamplesStart.Value;
     194      int validationEnd = DataAnalysisProblemData.TrainingSamplesEnd.Value;
     195      int trainingEnd = trainingStart + (validationEnd - trainingStart) / 2;
     196      int validationStart = trainingEnd;
     197      var solutionVisualizer = Visualizer as BestValidationSymbolicRegressionSolutionVisualizer;
     198      if (solutionVisualizer != null) {
     199        solutionVisualizer.ValidationSamplesStartParameter.Value = new IntValue(validationStart);
     200        solutionVisualizer.ValidationSamplesEndParameter.Value = new IntValue(validationEnd);
     201      }
     202      Evaluator.SamplesStartParameter.Value = new IntValue(trainingStart);
     203      Evaluator.SamplesEndParameter.Value = new IntValue(trainingEnd);
     204    }
     205
    191206    public event EventHandler SolutionCreatorChanged;
    192207    private void OnSolutionCreatorChanged() {
     
    269284    }
    270285    private void ParameterizeEvaluator() {
    271       Evaluator.FunctionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
     286      Evaluator.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
    272287      Evaluator.RegressionProblemDataParameter.ActualName = DataAnalysisProblemDataParameter.Name;
     288      Evaluator.QualityParameter.ActualName = "TrainingMeanSquaredError";
     289      Evaluator.SamplesStartParameter.Value = new IntValue(DataAnalysisProblemData.TrainingSamplesStart.Value);
     290      Evaluator.SamplesEndParameter.Value = new IntValue((DataAnalysisProblemData.TrainingSamplesStart.Value + DataAnalysisProblemData.TrainingSamplesEnd.Value) / 2);
    273291    }
    274292    private void ParameterizeVisualizer() {
     
    278296          solutionVisualizer.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
    279297          solutionVisualizer.DataAnalysisProblemDataParameter.ActualName = DataAnalysisProblemDataParameter.Name;
     298          solutionVisualizer.ValidationSamplesStartParameter.Value = new IntValue((DataAnalysisProblemData.TrainingSamplesStart.Value + DataAnalysisProblemData.TrainingSamplesEnd.Value) / 2);
     299          solutionVisualizer.ValidationSamplesEndParameter.Value = new IntValue(DataAnalysisProblemData.TrainingSamplesEnd.Value);
    280300        }
    281301      }
     
    289309      }
    290310      foreach (ISymbolicRegressionEvaluator op in Operators.OfType<ISymbolicRegressionEvaluator>()) {
    291         op.FunctionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
     311        op.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
    292312        op.RegressionProblemDataParameter.ActualName = DataAnalysisProblemDataParameter.Name;
    293313        op.NumberOfEvaluatedNodesParameter.ActualName = NumberOfEvaluatedNodesParameter.Name;
Note: See TracChangeset for help on using the changeset viewer.