Free cookie consent management tool by TermsFeed Policy Generator

Changeset 3513 for trunk


Ignore:
Timestamp:
04/23/10 14:28:07 (14 years ago)
Author:
gkronber
Message:

Added upper and lower estimation limits. #938 (Data types and operators for regression problems)

Location:
trunk/sources
Files:
7 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/BestValidationSymbolicRegressionSolutionVisualizer.cs

    r3462 r3513  
    4040  public sealed class BestValidationSymbolicRegressionSolutionVisualizer : SingleSuccessorOperator, ISingleObjectiveSolutionsVisualizer, ISolutionsVisualizer {
    4141    private const string SymbolicExpressionTreeInterpreterParameterName = "SymbolicExpressionTreeInterpreter";
     42    private const string UpperEstimationLimitParameterName = "UpperEstimationLimit";
     43    private const string LowerEstimationLimitParameterName = "LowerEstimationLimit";
    4244    private const string SymbolicRegressionModelParameterName = "SymbolicRegressionModel";
    4345    private const string DataAnalysisProblemDataParameterName = "DataAnalysisProblemData";
     
    5153    public ILookupParameter<ISymbolicExpressionTreeInterpreter> SymbolicExpressionTreeInterpreterParameter {
    5254      get { return (ILookupParameter<ISymbolicExpressionTreeInterpreter>)Parameters[SymbolicExpressionTreeInterpreterParameterName]; }
     55    }
     56    public IValueLookupParameter<DoubleValue> UpperEstimationLimitParameter {
     57      get { return (IValueLookupParameter<DoubleValue>)Parameters[UpperEstimationLimitParameterName]; }
     58    }
     59    public IValueLookupParameter<DoubleValue> LowerEstimationLimitParameter {
     60      get { return (IValueLookupParameter<DoubleValue>)Parameters[LowerEstimationLimitParameterName]; }
    5361    }
    5462    public IValueLookupParameter<IntValue> ValidationSamplesStartParameter {
     
    8593      get { return SymbolicExpressionTreeInterpreterParameter.ActualValue; }
    8694    }
     95    public DoubleValue UpperEstimationLimit {
     96      get { return UpperEstimationLimitParameter.ActualValue; }
     97    }
     98    public DoubleValue LowerEstimationLimit {
     99      get { return LowerEstimationLimitParameter.ActualValue; }
     100    }
    87101    public IntValue ValidationSamplesStart {
    88102      get { return ValidationSamplesStartParameter.ActualValue; }
     
    99113      Parameters.Add(new LookupParameter<DataAnalysisProblemData>(DataAnalysisProblemDataParameterName, "The symbolic regression problme data on which the best solution should be evaluated."));
    100114      Parameters.Add(new LookupParameter<ISymbolicExpressionTreeInterpreter>(SymbolicExpressionTreeInterpreterParameterName, "The interpreter that should be used to calculate the output values of symbolic expression trees."));
     115      Parameters.Add(new ValueLookupParameter<DoubleValue>(UpperEstimationLimitParameterName, "The upper limit that should be used as cut off value for the output values of symbolic expression trees."));
     116      Parameters.Add(new ValueLookupParameter<DoubleValue>(LowerEstimationLimitParameterName, "The lower limit that should be used as cut off value for the output values of symbolic expression trees."));
    101117      Parameters.Add(new ValueLookupParameter<IntValue>(ValidationSamplesStartParameterName, "The start index of the validation partition (part of the training partition)."));
    102118      Parameters.Add(new ValueLookupParameter<IntValue>(ValidationSamplesEndParameterName, "The end index of the validation partition (part of the training partition)."));
     
    112128      int validationSamplesEnd = ValidationSamplesEnd.Value;
    113129      var validationValues = problemData.Dataset.GetVariableValues(problemData.TargetVariable.Value, validationSamplesStart, validationSamplesEnd);
    114 
     130      double upperEstimationLimit = UpperEstimationLimit.Value;
     131      double lowerEstimationLimit = LowerEstimationLimit.Value;
    115132      var currentBestExpression = (from expression in expressions
    116133                                   let validationQuality =
    117134                                     SymbolicRegressionMeanSquaredErrorEvaluator.Calculate(
    118135                                       SymbolicExpressionTreeInterpreter, expression,
     136                                       lowerEstimationLimit, upperEstimationLimit,
    119137                                       problemData.Dataset, problemData.TargetVariable.Value,
    120138                                       validationSamplesStart, validationSamplesEnd)
     
    126144      if (bestOfRunSolution == null) {
    127145        // no best of run solution yet -> make a solution from the currentBestExpression
    128         UpdateBestOfRunSolution(problemData, currentBestExpression.Expression, SymbolicExpressionTreeInterpreter);
     146        UpdateBestOfRunSolution(problemData, currentBestExpression.Expression, SymbolicExpressionTreeInterpreter, lowerEstimationLimit, upperEstimationLimit);
    129147      } else {
    130148        // compare quality of current best with best of run solution
     
    132150        var bestOfRunValidationQuality = SimpleMSEEvaluator.Calculate(validationValues, estimatedValidationValues);
    133151        if (bestOfRunValidationQuality > currentBestExpression.ValidationQuality) {
    134           UpdateBestOfRunSolution(problemData, currentBestExpression.Expression, SymbolicExpressionTreeInterpreter);
     152          UpdateBestOfRunSolution(problemData, currentBestExpression.Expression, SymbolicExpressionTreeInterpreter, lowerEstimationLimit, upperEstimationLimit);
    135153        }
    136154      }
     
    140158    }
    141159
    142     private void UpdateBestOfRunSolution(DataAnalysisProblemData problemData, SymbolicExpressionTree tree, ISymbolicExpressionTreeInterpreter interpreter) {
    143       var newBestSolution = CreateDataAnalysisSolution(problemData, tree, interpreter);
     160    private void UpdateBestOfRunSolution(DataAnalysisProblemData problemData, SymbolicExpressionTree tree, ISymbolicExpressionTreeInterpreter interpreter,
     161      double lowerEstimationLimit, double upperEstimationLimit) {
     162      var newBestSolution = CreateDataAnalysisSolution(problemData, tree, interpreter, lowerEstimationLimit, upperEstimationLimit);
    144163      if (BestValidationSolutionParameter.ActualValue == null)
    145164        BestValidationSolutionParameter.ActualValue = newBestSolution;
     
    169188    }
    170189
    171     private SymbolicRegressionSolution CreateDataAnalysisSolution(DataAnalysisProblemData problemData, SymbolicExpressionTree expression, ISymbolicExpressionTreeInterpreter interpreter) {
     190    private SymbolicRegressionSolution CreateDataAnalysisSolution(DataAnalysisProblemData problemData, SymbolicExpressionTree expression, ISymbolicExpressionTreeInterpreter interpreter,
     191      double lowerEstimationLimit, double upperEstimationLimit) {
    172192      var model = new SymbolicRegressionModel(interpreter, expression, problemData.InputVariables.Select(s => s.Value));
    173       return new SymbolicRegressionSolution(problemData, model);
     193      return new SymbolicRegressionSolution(problemData, model, lowerEstimationLimit, upperEstimationLimit);
    174194    }
    175195  }
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/SymbolicRegressionMeanSquaredErrorEvaluator.cs

    r3491 r3513  
    4141  [StorableClass]
    4242  public class SymbolicRegressionMeanSquaredErrorEvaluator : SymbolicRegressionEvaluator {
     43    private const string UpperEstimationLimitParameterName = "UpperEstimationLimit";
     44    private const string LowerEstimationLimitParameterName = "LowerEstimationLimit";
     45
     46    #region parameter properties
     47    public IValueLookupParameter<DoubleValue> UpperEstimationLimitParameter {
     48      get { return (IValueLookupParameter<DoubleValue>)Parameters[UpperEstimationLimitParameterName]; }
     49    }
     50    public IValueLookupParameter<DoubleValue> LowerEstimationLimitParameter {
     51      get { return (IValueLookupParameter<DoubleValue>)Parameters[LowerEstimationLimitParameterName]; }
     52    }
     53    #endregion
     54    #region properties
     55    public DoubleValue UpperEstimationLimit {
     56      get { return UpperEstimationLimitParameter.ActualValue; }
     57    }
     58    public DoubleValue LowerEstimationLimit {
     59      get { return LowerEstimationLimitParameter.ActualValue; }
     60    }
     61    #endregion
     62    public SymbolicRegressionMeanSquaredErrorEvaluator()
     63      : base() {
     64      Parameters.Add(new ValueLookupParameter<DoubleValue>(UpperEstimationLimitParameterName, "The upper limit that should be used as cut off value for the output values of symbolic expression trees."));
     65      Parameters.Add(new ValueLookupParameter<DoubleValue>(LowerEstimationLimitParameterName, "The lower limit that should be used as cut off value for the output values of symbolic expression trees."));
     66    }
     67
    4368    protected override double Evaluate(ISymbolicExpressionTreeInterpreter interpreter, SymbolicExpressionTree solution, Dataset dataset, StringValue targetVariable, IntValue samplesStart, IntValue samplesEnd) {
    44       double mse = Calculate(interpreter, solution, dataset, targetVariable.Value, samplesStart.Value, samplesEnd.Value);
     69      double mse = Calculate(interpreter, solution, LowerEstimationLimit.Value, UpperEstimationLimit.Value, dataset, targetVariable.Value, samplesStart.Value, samplesEnd.Value);
    4570      return mse;
    4671    }
    4772
    48     public static double Calculate(ISymbolicExpressionTreeInterpreter interpreter, SymbolicExpressionTree solution, Dataset dataset, string targetVariable, int start, int end) {
     73    public static double Calculate(ISymbolicExpressionTreeInterpreter interpreter, SymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, Dataset dataset, string targetVariable, int start, int end) {
    4974      int targetVariableIndex = dataset.GetVariableIndex(targetVariable);
    50       var estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, dataset, Enumerable.Range(start, end - start));
     75      var estimatedValues = from x in interpreter.GetSymbolicExpressionTreeValues(solution, dataset, Enumerable.Range(start, end - start))
     76                            let boundedX = Math.Min(upperEstimationLimit, Math.Max(lowerEstimationLimit, x))
     77                            select double.IsNaN(boundedX) ? upperEstimationLimit : boundedX;
    5178      var originalValues = from row in Enumerable.Range(start, end - start) select dataset[row, targetVariableIndex];
    5279      return SimpleMSEEvaluator.Calculate(originalValues, estimatedValues);
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/SymbolicRegressionModel.cs

    r3493 r3513  
    5555      get { return inputVariables.AsEnumerable(); }
    5656    }
    57 
    5857    public SymbolicRegressionModel() : base() { } // for cloning
    5958
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/SymbolicRegressionProblem.cs

    r3491 r3513  
    5858      get { return SolutionCreatorParameter; }
    5959    }
     60    public ValueParameter<DoubleValue> LowerEstimationLimitParameter {
     61      get { return (ValueParameter<DoubleValue>)Parameters["LowerEstimationLimit"]; }
     62    }
     63    public ValueParameter<DoubleValue> UpperEstimationLimitParameter {
     64      get { return (ValueParameter<DoubleValue>)Parameters["UpperEstimationLimit"]; }
     65    }
     66    public ValueParameter<ISymbolicExpressionTreeInterpreter> SymbolicExpressionTreeInterpreterParameter {
     67      get { return (ValueParameter<ISymbolicExpressionTreeInterpreter>)Parameters["SymbolicExpressionTreeInterpreter"]; }
     68    }
    6069    public ValueParameter<ISymbolicRegressionEvaluator> EvaluatorParameter {
    6170      get { return (ValueParameter<ISymbolicRegressionEvaluator>)Parameters["Evaluator"]; }
     
    109118      get { return SolutionCreatorParameter.Value; }
    110119    }
     120    public ISymbolicExpressionTreeInterpreter SymbolicExpressionTreeInterpreter {
     121      get { return SymbolicExpressionTreeInterpreterParameter.Value; }
     122      set { SymbolicExpressionTreeInterpreterParameter.Value = value; }
     123    }
     124    public DoubleValue LowerEstimationLimit {
     125      get { return LowerEstimationLimitParameter.Value; }
     126      set { LowerEstimationLimitParameter.Value = value; }
     127    }
     128    public DoubleValue UpperEstimationLimit {
     129      get { return UpperEstimationLimitParameter.Value; }
     130      set { UpperEstimationLimitParameter.Value = value; }
     131    }
     132
    111133    public ISymbolicRegressionEvaluator Evaluator {
    112134      get { return EvaluatorParameter.Value; }
     
    135157    public IEnumerable<IOperator> Operators {
    136158      get { return operators.Cast<IOperator>(); }
     159    }
     160    public DoubleValue PunishmentFactor {
     161      get { return new DoubleValue(10.0); }
    137162    }
    138163    #endregion
     
    150175      Parameters.Add(new ValueParameter<ISymbolicExpressionTreeInterpreter>("SymbolicExpressionTreeInterpreter", "The interpreter that should be used to evaluate the symbolic expression tree.", interpreter));
    151176      Parameters.Add(new ValueParameter<ISymbolicRegressionEvaluator>("Evaluator", "The operator which should be used to evaluate symbolic regression solutions.", evaluator));
     177      Parameters.Add(new ValueParameter<DoubleValue>("LowerEstimationLimit", "The lower limit for the estimated value that can be returned by the symbolic regression model.", new DoubleValue(double.NegativeInfinity)));
     178      Parameters.Add(new ValueParameter<DoubleValue>("UpperEstimationLimit", "The upper limit for the estimated value that can be returned by the symbolic regression model.", new DoubleValue(double.PositiveInfinity)));
    152179      Parameters.Add(new OptionalValueParameter<DoubleValue>("BestKnownQuality", "The minimal error value that reached by symbolic regression solutions for the problem."));
    153180      Parameters.Add(new ValueParameter<ISymbolicExpressionGrammar>("FunctionTreeGrammar", "The grammar that should be used for symbolic regression models.", globalGrammar));
     
    169196      Initialize();
    170197    }
    171 
    172198
    173199    [StorableConstructor]
     
    208234      Evaluator.SamplesStartParameter.Value = new IntValue(trainingStart);
    209235      Evaluator.SamplesEndParameter.Value = new IntValue(trainingEnd);
     236
     237      if (trainingEnd - trainingStart > 0 && DataAnalysisProblemData.TargetVariable.Value != string.Empty) {
     238        var targetValues = DataAnalysisProblemData.Dataset.GetVariableValues(DataAnalysisProblemData.TargetVariable.Value, trainingStart, trainingEnd);
     239        var mean = targetValues.Average();
     240        var range = targetValues.Max() - targetValues.Min();
     241        UpperEstimationLimit = new DoubleValue(mean + PunishmentFactor.Value * range);
     242        LowerEstimationLimit = new DoubleValue(mean - PunishmentFactor.Value * range);
     243      }
    210244    }
    211245
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/SymbolicRegressionSolution.cs

    r3493 r3513  
    5050
    5151    public SymbolicRegressionSolution() : base() { }
    52     public SymbolicRegressionSolution(DataAnalysisProblemData problemData, SymbolicRegressionModel model)
    53       : base(problemData) {
     52    public SymbolicRegressionSolution(DataAnalysisProblemData problemData, SymbolicRegressionModel model, double lowerEstimationLimit, double upperEstimationLimit)
     53      : base(problemData, lowerEstimationLimit, upperEstimationLimit) {
    5454      this.model = model;
    5555    }
     
    6868
    6969    private void RecalculateEstimatedValues() {
    70       estimatedValues = model.GetEstimatedValues(ProblemData.Dataset, 0, ProblemData.Dataset.Rows).ToList();
     70      estimatedValues = (from x in model.GetEstimatedValues(ProblemData.Dataset, 0, ProblemData.Dataset.Rows)
     71                         let boundedX = Math.Min(UpperEstimationLimit, Math.Max(LowerEstimationLimit, x))
     72                         select double.IsNaN(boundedX) ? UpperEstimationLimit : boundedX).ToList();
    7173      OnEstimatedValuesChanged(EventArgs.Empty);
    7274    }
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.3/DataAnalysisSolution.cs

    r3462 r3513  
    5050      }
    5151    }
     52    [Storable]
     53    private double lowerEstimationLimit;
     54    public double LowerEstimationLimit {
     55      get { return lowerEstimationLimit; }
     56      set {
     57        if (lowerEstimationLimit != value) {
     58          lowerEstimationLimit = value;
     59          OnEstimatedValuesChanged(EventArgs.Empty);
     60        }
     61      }
     62    }
     63
     64    [Storable]
     65    private double upperEstimationLimit;
     66    public double UpperEstimationLimit {
     67      get { return upperEstimationLimit; }
     68      set {
     69        if (upperEstimationLimit != value) {
     70          upperEstimationLimit = value;
     71          OnEstimatedValuesChanged(EventArgs.Empty);
     72        }
     73      }
     74    }
    5275
    5376    public abstract IEnumerable<double> EstimatedValues { get; }
     
    5679
    5780    protected DataAnalysisSolution() : base() { }
    58     protected DataAnalysisSolution(DataAnalysisProblemData problemData)
     81    protected DataAnalysisSolution(DataAnalysisProblemData problemData) : this(problemData, double.NegativeInfinity, double.PositiveInfinity) { }
     82    protected DataAnalysisSolution(DataAnalysisProblemData problemData, double lowerEstimationLimit, double upperEstimationLimit)
    5983      : this() {
    6084      this.problemData = problemData;
     85      this.lowerEstimationLimit = lowerEstimationLimit;
     86      this.upperEstimationLimit = upperEstimationLimit;
    6187      Initialize();
    6288    }
     
    74100      // don't clone the problem data!
    75101      clone.problemData = problemData;
     102      clone.lowerEstimationLimit = lowerEstimationLimit;
     103      clone.upperEstimationLimit = upperEstimationLimit;
    76104      clone.Initialize();
    77105      return clone;
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.3/Symbolic/SimpleArithmeticExpressionInterpreter.cs

    r3491 r3513  
    3434  [Item("SimpleArithmeticExpressionInterpreter", "Interpreter for arithmetic symbolic expression trees including function calls.")]
    3535  // not thread safe!
    36   public class SimpleArithmeticExpressionInterpreter : Item, ISymbolicExpressionTreeInterpreter {
     36  public class SimpleArithmeticExpressionInterpreter : NamedItem, ISymbolicExpressionTreeInterpreter {
    3737    private class OpCodes {
    3838      public const byte Add = 1;
     
    4747
    4848    private const int ARGUMENT_STACK_SIZE = 1024;
     49
    4950    private Dataset dataset;
    5051    private int row;
    5152    private Instruction[] code;
    5253    private int pc;
     54
     55    public SimpleArithmeticExpressionInterpreter()
     56      : base() {
     57    }
    5358
    5459    public IEnumerable<double> GetSymbolicExpressionTreeValues(SymbolicExpressionTree tree, Dataset dataset, IEnumerable<int> rows) {
     
    6166        pc = 0;
    6267        argStackPointer = 0;
    63         var estimatedValue = Evaluate();
    64         if (double.IsNaN(estimatedValue) || double.IsInfinity(estimatedValue)) yield return 0.0;
    65         else yield return estimatedValue;
     68        yield return Evaluate();
    6669      }
    6770    }
Note: See TracChangeset for help on using the changeset viewer.