Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
09/17/12 11:18:40 (12 years ago)
Author:
mkommend
Message:

#1951:

  • Added linear scaling parameter to data analysis problems.
  • Adapted interfaces, evaluators and analyzers accordingly.
  • Added OnlineBoundedMeanSquaredErrorCalculator.
  • Adapted symbolic regression sample unit test.
Location:
trunk/sources/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4
Files:
8 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Analyzers/SymbolicDataAnalysisMultiObjectiveAnalyzer.cs

    r7259 r8664  
    2020#endregion
    2121
    22 using System.Collections.Generic;
    23 using System.Linq;
    2422using HeuristicLab.Common;
    2523using HeuristicLab.Core;
    2624using HeuristicLab.Data;
    27 using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
    28 using HeuristicLab.Operators;
    2925using HeuristicLab.Parameters;
    3026using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    31 using HeuristicLab.Optimization;
    3227
    3328namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
     
    3934    private const string QualitiesParameterName = "Qualities";
    4035    private const string MaximizationParameterName = "Maximization";
     36    private const string ApplyLinearScalingParameterName = "ApplyLinearScaling";
    4137    #region parameter properties
    4238    public IScopeTreeLookupParameter<DoubleArray> QualitiesParameter {
     
    4541    public ILookupParameter<BoolArray> MaximizationParameter {
    4642      get { return (ILookupParameter<BoolArray>)Parameters[MaximizationParameterName]; }
     43    }
     44    public ILookupParameter<BoolValue> ApplyLinearScalingParameter {
     45      get { return (ILookupParameter<BoolValue>)Parameters[ApplyLinearScalingParameterName]; }
    4746    }
    4847    #endregion
     
    6463      Parameters.Add(new ScopeTreeLookupParameter<DoubleArray>(QualitiesParameterName, "The qualities of the trees that should be analyzed."));
    6564      Parameters.Add(new LookupParameter<BoolArray>(MaximizationParameterName, "The directions of optimization for each dimension."));
     65      Parameters.Add(new LookupParameter<BoolValue>(ApplyLinearScalingParameterName, "Flag that indicates if the individual should be linearly scaled before evaluating."));
     66    }
     67
     68    [StorableHook(HookType.AfterDeserialization)]
     69    private void AfterDeserialization() {
     70      if (Parameters.ContainsKey(ApplyLinearScalingParameterName) && Parameters[ApplyLinearScalingParameterName] is LookupParameter<BoolValue>)
     71        Parameters.Remove(ApplyLinearScalingParameterName);
     72      if (!Parameters.ContainsKey(ApplyLinearScalingParameterName))
     73        Parameters.Add(new LookupParameter<BoolValue>(ApplyLinearScalingParameterName, "Flag that indicates if the individual should be linearly scaled before evaluating."));
    6674    }
    6775  }
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Analyzers/SymbolicDataAnalysisSingleObjectiveAnalyzer.cs

    r7259 r8664  
    2020#endregion
    2121
    22 using System.Collections.Generic;
    23 using System.Linq;
    2422using HeuristicLab.Common;
    2523using HeuristicLab.Core;
    2624using HeuristicLab.Data;
    27 using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
    28 using HeuristicLab.Operators;
    2925using HeuristicLab.Parameters;
    3026using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    31 using HeuristicLab.Optimization;
    3227
    3328namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
     
    3934    private const string QualityParameterName = "Quality";
    4035    private const string MaximizationParameterName = "Maximization";
     36    private const string ApplyLinearScalingParameterName = "ApplyLinearScaling";
     37
    4138    #region parameter properties
    4239    public IScopeTreeLookupParameter<DoubleValue> QualityParameter {
     
    4542    public ILookupParameter<BoolValue> MaximizationParameter {
    4643      get { return (ILookupParameter<BoolValue>)Parameters[MaximizationParameterName]; }
     44    }
     45    public ILookupParameter<BoolValue> ApplyLinearScalingParameter {
     46      get { return (ILookupParameter<BoolValue>)Parameters[ApplyLinearScalingParameterName]; }
    4747    }
    4848    #endregion
     
    6464      Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>(QualityParameterName, "The qualities of the trees that should be analyzed."));
    6565      Parameters.Add(new LookupParameter<BoolValue>(MaximizationParameterName, "The direction of optimization."));
     66      Parameters.Add(new LookupParameter<BoolValue>(ApplyLinearScalingParameterName, "Flag that indicates if the individual should be linearly scaled before evaluating."));
     67    }
     68
     69    [StorableHook(HookType.AfterDeserialization)]
     70    private void AfterDeserialization() {
     71      if (Parameters.ContainsKey(ApplyLinearScalingParameterName) && !(Parameters[ApplyLinearScalingParameterName] is LookupParameter<BoolValue>))
     72        Parameters.Remove(ApplyLinearScalingParameterName);
     73      if (!Parameters.ContainsKey(ApplyLinearScalingParameterName))
     74        Parameters.Add(new LookupParameter<BoolValue>(ApplyLinearScalingParameterName, "Flag that indicates if the individual should be linearly scaled before evaluating."));
    6675    }
    6776  }
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Evaluators/SymbolicDataAnalysisEvaluator.cs

    r7259 r8664  
    4444    private const string EvaluationPartitionParameterName = "EvaluationPartition";
    4545    private const string RelativeNumberOfEvaluatedSamplesParameterName = "RelativeNumberOfEvaluatedSamples";
     46    private const string ApplyLinearScalingParameterName = "ApplyLinearScaling";
    4647
    4748    public override bool CanChangeName { get { return false; } }
     
    7071      get { return (IValueLookupParameter<PercentValue>)Parameters[RelativeNumberOfEvaluatedSamplesParameterName]; }
    7172    }
     73    public ILookupParameter<BoolValue> ApplyLinearScalingParameter {
     74      get { return (ILookupParameter<BoolValue>)Parameters[ApplyLinearScalingParameterName]; }
     75    }
    7276    #endregion
    7377
     
    8791      Parameters.Add(new ValueLookupParameter<DoubleLimit>(EstimationLimitsParameterName, "The upper and lower limit that should be used as cut off value for the output values of symbolic data analysis trees."));
    8892      Parameters.Add(new ValueLookupParameter<PercentValue>(RelativeNumberOfEvaluatedSamplesParameterName, "The relative number of samples of the dataset partition, which should be randomly chosen for evaluation between the start and end index."));
     93      Parameters.Add(new LookupParameter<BoolValue>(ApplyLinearScalingParameterName, "Flag that indicates if the individual should be linearly scaled before evaluating."));
     94    }
     95
     96    [StorableHook(HookType.AfterDeserialization)]
     97    private void AfterDeserialization() {
     98      if (Parameters.ContainsKey(ApplyLinearScalingParameterName) && !(Parameters[ApplyLinearScalingParameterName] is LookupParameter<BoolValue>))
     99        Parameters.Remove(ApplyLinearScalingParameterName);
     100      if (!Parameters.ContainsKey(ApplyLinearScalingParameterName))
     101        Parameters.Add(new LookupParameter<BoolValue>(ApplyLinearScalingParameterName, "Flag that indicates if the individual should be linearly scaled before evaluating."));
    89102    }
    90103
     
    94107
    95108    protected IEnumerable<int> GenerateRowsToEvaluate(double percentageOfRows) {
    96 
    97 
    98109      IEnumerable<int> rows;
    99110      int samplesStart = EvaluationPartitionParameter.ActualValue.Start;
     
    115126      return rows.Where(i => i < testPartitionStart || testPartitionEnd <= i);
    116127    }
     128
     129    [ThreadStatic]
     130    private static double[] cache;
     131    protected static void CalculateWithScaling(IEnumerable<double> targetValues, IEnumerable<double> estimatedValues,
     132      double lowerEstimationLimit, double upperEstimationLimit,
     133      IOnlineCalculator calculator, int maxRows) {
     134      if (cache == null || cache.GetLength(0) < maxRows) {
     135        cache = new double[maxRows];
     136      }
     137
     138      //calculate linear scaling
     139      //the static methods of the calculator could not be used as it performs a check if the enumerators have an equal amount of elements
     140      //this is not true if the cache is used
     141      int i = 0;
     142      var linearScalingCalculator = new OnlineLinearScalingParameterCalculator();
     143      var targetValuesEnumerator = targetValues.GetEnumerator();
     144      var estimatedValuesEnumerator = estimatedValues.GetEnumerator();
     145      while (targetValuesEnumerator.MoveNext() & estimatedValuesEnumerator.MoveNext()) {
     146        double target = targetValuesEnumerator.Current;
     147        double estimated = estimatedValuesEnumerator.Current;
     148        cache[i] = estimated;
     149        if (!double.IsNaN(estimated) && !double.IsInfinity(estimated))
     150          linearScalingCalculator.Add(estimated, target);
     151        i++;
     152      }
     153      if (linearScalingCalculator.ErrorState == OnlineCalculatorError.None && (targetValuesEnumerator.MoveNext() || estimatedValuesEnumerator.MoveNext()))
     154        throw new ArgumentException("Number of elements in target and estimated values enumeration do not match.");
     155
     156      double alpha = linearScalingCalculator.Alpha;
     157      double beta = linearScalingCalculator.Beta;
     158      if (linearScalingCalculator.ErrorState != OnlineCalculatorError.None) {
     159        alpha = 0.0;
     160        beta = 1.0;
     161      }
     162
     163      //calculate the quality by using the passed online calculator
     164      targetValuesEnumerator = targetValues.GetEnumerator();
     165      var scaledBoundedEstimatedValuesEnumerator = Enumerable.Range(0, i).Select(x => cache[x] * beta + alpha)
     166        .LimitToRange(lowerEstimationLimit, upperEstimationLimit).GetEnumerator();
     167
     168      while (targetValuesEnumerator.MoveNext() & scaledBoundedEstimatedValuesEnumerator.MoveNext()) {
     169        calculator.Add(targetValuesEnumerator.Current, scaledBoundedEstimatedValuesEnumerator.Current);
     170      }
     171    }
    117172  }
    118173}
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Interfaces/ISymbolicDataAnalysisEvaluator.cs

    r7259 r8664  
    2929    IValueLookupParameter<IntRange> EvaluationPartitionParameter { get; }
    3030    IValueLookupParameter<PercentValue> RelativeNumberOfEvaluatedSamplesParameter { get; }
     31    ILookupParameter<BoolValue> ApplyLinearScalingParameter { get; }
    3132
    3233    IValueLookupParameter<T> ProblemDataParameter { get; }
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Interfaces/ISymbolicDataAnalysisMultiObjectiveAnalyzer.cs

    r7259 r8664  
    2121using HeuristicLab.Core;
    2222using HeuristicLab.Data;
    23 using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
    24 using HeuristicLab.Optimization;
    25 using HeuristicLab.Parameters;
    2623namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
    2724  public interface ISymbolicDataAnalysisMultiObjectiveAnalyzer : ISymbolicDataAnalysisAnalyzer {
    2825    IScopeTreeLookupParameter<DoubleArray> QualitiesParameter { get; }
    2926    ILookupParameter<BoolArray> MaximizationParameter { get; }
     27    ILookupParameter<BoolValue> ApplyLinearScalingParameter { get; }
    3028
    3129  }
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Interfaces/ISymbolicDataAnalysisSingleObjectiveAnalyzer.cs

    r7259 r8664  
    2828    IScopeTreeLookupParameter<DoubleValue> QualityParameter { get; }
    2929    ILookupParameter<BoolValue> MaximizationParameter { get; }
     30    ILookupParameter<BoolValue> ApplyLinearScalingParameter { get; }
    3031  }
    3132}
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/SymbolicDataAnalysisModel.cs

    r7259 r8664  
    2020#endregion
    2121
     22using System;
    2223using System.Drawing;
    2324using HeuristicLab.Common;
     
    6667      this.interpreter = interpreter;
    6768    }
     69
     70    #region Scaling
     71    public static void Scale(ISymbolicDataAnalysisModel model, IDataAnalysisProblemData problemData, string targetVariable) {
     72      var dataset = problemData.Dataset;
     73      var rows = problemData.TrainingIndices;
     74      var estimatedValues = model.Interpreter.GetSymbolicExpressionTreeValues(model.SymbolicExpressionTree, dataset, rows);
     75      var targetValues = dataset.GetDoubleValues(targetVariable, rows);
     76
     77      var linearScalingCalculator = new OnlineLinearScalingParameterCalculator();
     78      var targetValuesEnumerator = targetValues.GetEnumerator();
     79      var estimatedValuesEnumerator = estimatedValues.GetEnumerator();
     80      while (targetValuesEnumerator.MoveNext() & estimatedValuesEnumerator.MoveNext()) {
     81        double target = targetValuesEnumerator.Current;
     82        double estimated = estimatedValuesEnumerator.Current;
     83        if (!double.IsNaN(estimated) && !double.IsInfinity(estimated))
     84          linearScalingCalculator.Add(estimated, target);
     85      }
     86      if (linearScalingCalculator.ErrorState == OnlineCalculatorError.None && (targetValuesEnumerator.MoveNext() || estimatedValuesEnumerator.MoveNext()))
     87        throw new ArgumentException("Number of elements in target and estimated values enumeration do not match.");
     88
     89      double alpha = linearScalingCalculator.Alpha;
     90      double beta = linearScalingCalculator.Beta;
     91      if (linearScalingCalculator.ErrorState != OnlineCalculatorError.None) return;
     92
     93      ConstantTreeNode alphaTreeNode = null;
     94      ConstantTreeNode betaTreeNode = null;
     95      // check if model has been scaled previously by analyzing the structure of the tree
     96      var startNode = model.SymbolicExpressionTree.Root.GetSubtree(0);
     97      if (startNode.GetSubtree(0).Symbol is Addition) {
     98        var addNode = startNode.GetSubtree(0);
     99        if (addNode.SubtreeCount == 2 && addNode.GetSubtree(0).Symbol is Multiplication && addNode.GetSubtree(1).Symbol is Constant) {
     100          alphaTreeNode = addNode.GetSubtree(1) as ConstantTreeNode;
     101          var mulNode = addNode.GetSubtree(0);
     102          if (mulNode.SubtreeCount == 2 && mulNode.GetSubtree(1).Symbol is Constant) {
     103            betaTreeNode = mulNode.GetSubtree(1) as ConstantTreeNode;
     104          }
     105        }
     106      }
     107      // if tree structure matches the structure necessary for linear scaling then reuse the existing tree nodes
     108      if (alphaTreeNode != null && betaTreeNode != null) {
     109        betaTreeNode.Value *= beta;
     110        alphaTreeNode.Value *= beta;
     111        alphaTreeNode.Value += alpha;
     112      } else {
     113        var mainBranch = startNode.GetSubtree(0);
     114        startNode.RemoveSubtree(0);
     115        var scaledMainBranch = MakeSum(MakeProduct(mainBranch, beta), alpha);
     116        startNode.AddSubtree(scaledMainBranch);
     117      }
     118    }
     119
     120    private static ISymbolicExpressionTreeNode MakeSum(ISymbolicExpressionTreeNode treeNode, double alpha) {
     121      if (alpha.IsAlmost(0.0)) {
     122        return treeNode;
     123      } else {
     124        var addition = new Addition();
     125        var node = addition.CreateTreeNode();
     126        var alphaConst = MakeConstant(alpha);
     127        node.AddSubtree(treeNode);
     128        node.AddSubtree(alphaConst);
     129        return node;
     130      }
     131    }
     132
     133    private static ISymbolicExpressionTreeNode MakeProduct(ISymbolicExpressionTreeNode treeNode, double beta) {
     134      if (beta.IsAlmost(1.0)) {
     135        return treeNode;
     136      } else {
     137        var multipliciation = new Multiplication();
     138        var node = multipliciation.CreateTreeNode();
     139        var betaConst = MakeConstant(beta);
     140        node.AddSubtree(treeNode);
     141        node.AddSubtree(betaConst);
     142        return node;
     143      }
     144    }
     145
     146    private static ISymbolicExpressionTreeNode MakeConstant(double c) {
     147      var node = (ConstantTreeNode)(new Constant()).CreateTreeNode();
     148      node.Value = c;
     149      return node;
     150    }
     151    #endregion
    68152  }
    69153}
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/SymbolicDataAnalysisProblem.cs

    r8203 r8664  
    5353    private const string FitnessCalculationPartitionParameterName = "FitnessCalculationPartition";
    5454    private const string ValidationPartitionParameterName = "ValidationPartition";
     55    private const string ApplyLinearScalingParameterName = "ApplyLinearScaling";
    5556
    5657    private const string ProblemDataParameterDescription = "";
     
    6465    private const string FitnessCalculationPartitionParameterDescription = "The partition of the problem data training partition, that should be used to calculate the fitness of an individual.";
    6566    private const string ValidationPartitionParameterDescription = "The partition of the problem data training partition, that should be used to select the best model from (optional).";
     67    private const string ApplyLinearScalingParameterDescription = "Flag that indicates if the individual should be linearly scaled before evaluating.";
    6668    #endregion
    6769
     
    100102      get { return (IFixedValueParameter<IntRange>)Parameters[ValidationPartitionParameterName]; }
    101103    }
     104    public IFixedValueParameter<BoolValue> ApplyLinearScalingParameter {
     105      get { return (IFixedValueParameter<BoolValue>)Parameters[ApplyLinearScalingParameterName]; }
     106    }
    102107    #endregion
    103108
     
    144149    public IntRange ValidationPartition {
    145150      get { return ValidationPartitionParameter.Value; }
     151    }
     152    public BoolValue ApplyLinearScaling {
     153      get { return ApplyLinearScalingParameter.Value; }
    146154    }
    147155    #endregion
     
    151159    [StorableHook(HookType.AfterDeserialization)]
    152160    private void AfterDeserialization() {
     161      if (!Parameters.ContainsKey(ApplyLinearScalingParameterName)) {
     162        Parameters.Add(new FixedValueParameter<BoolValue>(ApplyLinearScalingParameterName, ApplyLinearScalingParameterDescription, new BoolValue(false)));
     163        ApplyLinearScalingParameter.Hidden = true;
     164      }
     165
    153166      RegisterEventHandlers();
    154167    }
     
    170183      Parameters.Add(new FixedValueParameter<IntRange>(ValidationPartitionParameterName, ValidationPartitionParameterDescription));
    171184      Parameters.Add(new FixedValueParameter<PercentValue>(RelativeNumberOfEvaluatedSamplesParameterName, RelativeNumberOfEvaluatedSamplesParameterDescription, new PercentValue(1)));
     185      Parameters.Add(new FixedValueParameter<BoolValue>(ApplyLinearScalingParameterName, ApplyLinearScalingParameterDescription, new BoolValue(false)));
    172186
    173187      SymbolicExpressionTreeInterpreterParameter.Hidden = true;
    174188      MaximumFunctionArgumentsParameter.Hidden = true;
    175189      MaximumFunctionDefinitionsParameter.Hidden = true;
     190      ApplyLinearScalingParameter.Hidden = true;
    176191
    177192      SymbolicExpressionTreeGrammar = new TypeCoherentExpressionGrammar();
     
    274289
    275290      foreach (var op in operators.OfType<ISymbolicExpressionTreeGrammarBasedOperator>()) {
    276         op.SymbolicExpressionTreeGrammarParameter.ActualName = SymbolicExpressionTreeGrammarParameterName;
     291        op.SymbolicExpressionTreeGrammarParameter.ActualName = SymbolicExpressionTreeGrammarParameter.Name;
    277292      }
    278293      foreach (var op in operators.OfType<ISymbolicExpressionTreeSizeConstraintOperator>()) {
    279         op.MaximumSymbolicExpressionTreeDepthParameter.ActualName = MaximumSymbolicExpressionTreeDepthParameterName;
    280         op.MaximumSymbolicExpressionTreeLengthParameter.ActualName = MaximumSymbolicExpressionTreeLengthParameterName;
     294        op.MaximumSymbolicExpressionTreeDepthParameter.ActualName = MaximumSymbolicExpressionTreeDepthParameter.Name;
     295        op.MaximumSymbolicExpressionTreeLengthParameter.ActualName = MaximumSymbolicExpressionTreeLengthParameter.Name;
    281296      }
    282297      foreach (var op in operators.OfType<ISymbolicExpressionTreeArchitectureAlteringOperator>()) {
    283         op.MaximumFunctionArgumentsParameter.ActualName = MaximumFunctionArgumentsParameterName;
    284         op.MaximumFunctionDefinitionsParameter.ActualName = MaximumFunctionDefinitionsParameterName;
     298        op.MaximumFunctionArgumentsParameter.ActualName = MaximumFunctionArgumentsParameter.Name;
     299        op.MaximumFunctionDefinitionsParameter.ActualName = MaximumFunctionDefinitionsParameter.Name;
    285300      }
    286301      foreach (var op in operators.OfType<ISymbolicDataAnalysisEvaluator<T>>()) {
     
    289304        op.EvaluationPartitionParameter.ActualName = FitnessCalculationPartitionParameter.Name;
    290305        op.RelativeNumberOfEvaluatedSamplesParameter.ActualName = RelativeNumberOfEvaluatedSamplesParameter.Name;
     306        op.ApplyLinearScalingParameter.ActualName = ApplyLinearScalingParameter.Name;
    291307      }
    292308      foreach (var op in operators.OfType<ISymbolicExpressionTreeCrossover>()) {
     
    300316        op.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
    301317      }
     318      foreach (var op in operators.OfType<ISymbolicDataAnalysisSingleObjectiveAnalyzer>()) {
     319        op.ApplyLinearScalingParameter.ActualName = ApplyLinearScalingParameter.Name;
     320      }
     321      foreach (var op in operators.OfType<ISymbolicDataAnalysisMultiObjectiveAnalyzer>()) {
     322        op.ApplyLinearScalingParameter.ActualName = ApplyLinearScalingParameter.Name;
     323      }
    302324      foreach (var op in operators.OfType<ISymbolicDataAnalysisAnalyzer>()) {
    303325        op.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
     
    308330      }
    309331      foreach (var op in operators.OfType<ISymbolicDataAnalysisInterpreterOperator>()) {
    310         op.SymbolicDataAnalysisTreeInterpreterParameter.ActualName = SymbolicExpressionTreeInterpreterParameterName;
     332        op.SymbolicDataAnalysisTreeInterpreterParameter.ActualName = SymbolicExpressionTreeInterpreterParameter.Name;
    311333      }
    312334      foreach (var op in operators.OfType<ISymbolicDataAnalysisExpressionCrossover<T>>()) {
    313         op.EvaluationPartitionParameter.ActualName = FitnessCalculationPartitionParameterName;
     335        op.EvaluationPartitionParameter.ActualName = FitnessCalculationPartitionParameter.Name;
    314336        op.ProblemDataParameter.ActualName = ProblemDataParameter.Name;
    315337        op.EvaluationPartitionParameter.ActualName = FitnessCalculationPartitionParameter.Name;
Note: See TracChangeset for help on using the changeset viewer.