Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
02/19/14 14:04:03 (10 years ago)
Author:
bburlacu
Message:

#2143: Refactored pruning analyzer and operators as per review.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Analyzers/SymbolicDataAnalysisSingleObjectivePruningAnalyzer.cs

    r10428 r10469  
    55using HeuristicLab.Core;
    66using HeuristicLab.Data;
    7 using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
    8 using HeuristicLab.Optimization;
     7using HeuristicLab.Operators;
     8using HeuristicLab.Optimization.Operators;
    99using HeuristicLab.Parameters;
    1010using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
     
    1414  [Item("SymbolicDataAnalysisSingleObjectivePruningAnalyzer", "An analyzer that prunes introns from trees in single objective symbolic data analysis problems.")]
    1515  public abstract class SymbolicDataAnalysisSingleObjectivePruningAnalyzer : SymbolicDataAnalysisSingleObjectiveAnalyzer {
     16    #region parameter names
    1617    private const string ProblemDataParameterName = "ProblemData";
    17     private const string InterpreterParameterName = "SymbolicExpressionTreeInterpreter";
    18 
    1918    private const string UpdateIntervalParameterName = "UpdateInverval";
    2019    private const string UpdateCounterParameterName = "UpdateCounter";
    21 
    2220    private const string PopulationSliceParameterName = "PopulationSlice";
    2321    private const string PruningProbabilityParameterName = "PruningProbability";
    24 
    25     private const string NumberOfPrunedSubtreesParameterName = "PrunedSubtrees";
    26     private const string NumberOfPrunedTreesParameterName = "PrunedTrees";
    27 
     22    private const string TotalNumberOfPrunedSubtreesParameterName = "Number of pruned subtrees";
     23    private const string TotalNumberOfPrunedTreesParameterName = "Number of pruned trees";
    2824    private const string RandomParameterName = "Random";
    29     private const string EstimationLimitsParameterName = "EstimationLimits";
    30 
    3125    private const string PruneOnlyZeroImpactNodesParameterName = "PruneOnlyZeroImpactNodes";
    3226    private const string NodeImpactThresholdParameterName = "ImpactThreshold";
    33 
    34     private const string FitnessCalculationPartitionParameterName = "FitnessCalculationPartition";
    35 
    36     private bool reentry;
    37     [Storable]
    38     protected ISymbolicDataAnalysisSolutionImpactValuesCalculator impactValuesCalculator;
    39 
     27    private const string PruningOperatorParameterName = "PruningOperator";
     28    private const string ResultsParameterName = "Results";
     29    #endregion
     30    #region private members
     31    private DataReducer prunedSubtreesReducer;
     32    private DataReducer prunedTreesReducer;
     33    private DataTableValuesCollector valuesCollector;
     34    private ResultsCollector resultsCollector;
     35    private EmptyOperator emptyOp;
     36    #endregion
    4037    #region parameter properties
     38    public IValueParameter<SymbolicDataAnalysisExpressionPruningOperator> PruningOperatorParameter {
     39      get { return (IValueParameter<SymbolicDataAnalysisExpressionPruningOperator>)Parameters[PruningOperatorParameterName]; }
     40    }
    4141    public IFixedValueParameter<BoolValue> PruneOnlyZeroImpactNodesParameter {
    4242      get { return (IFixedValueParameter<BoolValue>)Parameters[PruneOnlyZeroImpactNodesParameterName]; }
     
    4545      get { return (IFixedValueParameter<DoubleValue>)Parameters[NodeImpactThresholdParameterName]; }
    4646    }
    47     public ILookupParameter<DoubleLimit> EstimationLimitsParameter {
    48       get { return (ILookupParameter<DoubleLimit>)Parameters[EstimationLimitsParameterName]; }
    49     }
    5047    public ILookupParameter<IRandom> RandomParameter {
    5148      get { return (ILookupParameter<IRandom>)Parameters[RandomParameterName]; }
     
    5350    private ILookupParameter<IDataAnalysisProblemData> ProblemDataParameter {
    5451      get { return (ILookupParameter<IDataAnalysisProblemData>)Parameters[ProblemDataParameterName]; }
    55     }
    56     public ILookupParameter<IntRange> FitnessCalculationPartitionParameter {
    57       get { return (ILookupParameter<IntRange>)Parameters[FitnessCalculationPartitionParameterName]; }
    58     }
    59     private ILookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter> InterpreterParameter {
    60       get { return (ILookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter>)Parameters[InterpreterParameterName]; }
    6152    }
    6253    public IValueParameter<IntValue> UpdateIntervalParameter {
     
    7263      get { return (IValueParameter<DoubleValue>)Parameters[PruningProbabilityParameterName]; }
    7364    }
    74     public IFixedValueParameter<DoubleValue> NumberOfPrunedSubtreesParameter {
    75       get { return (IFixedValueParameter<DoubleValue>)Parameters[NumberOfPrunedSubtreesParameterName]; }
    76     }
    77     public IFixedValueParameter<DoubleValue> NumberOfPrunedTreesParameter {
    78       get { return (IFixedValueParameter<DoubleValue>)Parameters[NumberOfPrunedTreesParameterName]; }
    79     }
    8065    #endregion
    8166    #region properties
     67    protected SymbolicDataAnalysisExpressionPruningOperator PruningOperator { get { return PruningOperatorParameter.Value; } }
    8268    protected IDataAnalysisProblemData ProblemData { get { return ProblemDataParameter.ActualValue; } }
    83     protected IntRange FitnessCalculationPartition { get { return FitnessCalculationPartitionParameter.ActualValue; } }
    84     protected ISymbolicDataAnalysisExpressionTreeInterpreter Interpreter { get { return InterpreterParameter.ActualValue; } }
    8569    protected IntValue UpdateInterval { get { return UpdateIntervalParameter.Value; } }
    8670    protected IntValue UpdateCounter { get { return UpdateCounterParameter.Value; } }
    8771    protected DoubleRange PopulationSlice { get { return PopulationSliceParameter.Value; } }
    8872    protected DoubleValue PruningProbability { get { return PruningProbabilityParameter.Value; } }
    89     protected DoubleValue PrunedSubtrees { get { return NumberOfPrunedSubtreesParameter.Value; } }
    90     protected DoubleValue PrunedTrees { get { return NumberOfPrunedTreesParameter.Value; } }
    91     protected DoubleLimit EstimationLimits { get { return EstimationLimitsParameter.ActualValue; } }
    9273    protected IRandom Random { get { return RandomParameter.ActualValue; } }
    9374    protected DoubleValue NodeImpactThreshold { get { return NodeImpactThresholdParameter.Value; } }
    9475    protected BoolValue PruneOnlyZeroImpactNodes { get { return PruneOnlyZeroImpactNodesParameter.Value; } }
    9576    #endregion
    96 
    9777    #region IStatefulItem members
    9878    public override void InitializeState() {
    9979      base.InitializeState();
    10080      UpdateCounter.Value = 0;
    101       PrunedSubtrees.Value = 0;
    102       PrunedTrees.Value = 0;
    10381    }
    10482    public override void ClearState() {
    10583      base.ClearState();
    10684      UpdateCounter.Value = 0;
    107       PrunedSubtrees.Value = 0;
    108       PrunedTrees.Value = 0;
    10985    }
    11086    #endregion
     
    11288    [StorableConstructor]
    11389    protected SymbolicDataAnalysisSingleObjectivePruningAnalyzer(bool deserializing) : base(deserializing) { }
    114     [StorableHook(HookType.AfterDeserialization)]
    115     private void AfterDeserialization() {
    116       if (!Parameters.ContainsKey(FitnessCalculationPartitionParameterName))
    117         Parameters.Add(new LookupParameter<IntRange>(FitnessCalculationPartitionParameterName, ""));
    118     }
    11990    protected SymbolicDataAnalysisSingleObjectivePruningAnalyzer(SymbolicDataAnalysisSingleObjectivePruningAnalyzer original, Cloner cloner)
    12091      : base(original, cloner) {
    121       impactValuesCalculator = original.impactValuesCalculator;
     92      this.prunedSubtreesReducer = (DataReducer)original.prunedSubtreesReducer.Clone();
     93      this.prunedTreesReducer = (DataReducer)original.prunedTreesReducer.Clone();
     94      this.valuesCollector = (DataTableValuesCollector)original.valuesCollector.Clone();
     95      this.resultsCollector = (ResultsCollector)original.resultsCollector.Clone();
    12296    }
    12397    protected SymbolicDataAnalysisSingleObjectivePruningAnalyzer() {
     98      #region add parameters
    12499      Parameters.Add(new ValueParameter<DoubleRange>(PopulationSliceParameterName, new DoubleRange(0.75, 1)));
    125100      Parameters.Add(new ValueParameter<DoubleValue>(PruningProbabilityParameterName, new DoubleValue(0.5)));
    126       // analyzer parameters
    127101      Parameters.Add(new ValueParameter<IntValue>(UpdateIntervalParameterName, "The interval in which the tree length analysis should be applied.", new IntValue(1)));
    128102      Parameters.Add(new ValueParameter<IntValue>(UpdateCounterParameterName, "The value which counts how many times the operator was called", new IntValue(0)));
    129103      Parameters.Add(new LookupParameter<IRandom>(RandomParameterName));
    130104      Parameters.Add(new LookupParameter<IDataAnalysisProblemData>(ProblemDataParameterName));
    131       Parameters.Add(new LookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter>(InterpreterParameterName));
    132 
    133       Parameters.Add(new FixedValueParameter<DoubleValue>(NumberOfPrunedSubtreesParameterName, new DoubleValue(0)));
    134       Parameters.Add(new FixedValueParameter<DoubleValue>(NumberOfPrunedTreesParameterName, new DoubleValue(0)));
    135       Parameters.Add(new LookupParameter<DoubleLimit>(EstimationLimitsParameterName));
    136105      Parameters.Add(new FixedValueParameter<DoubleValue>(NodeImpactThresholdParameterName, new DoubleValue(0.0)));
    137106      Parameters.Add(new FixedValueParameter<BoolValue>(PruneOnlyZeroImpactNodesParameterName, new BoolValue(false)));
    138       Parameters.Add(new LookupParameter<IntRange>(FitnessCalculationPartitionParameterName, ""));
     107      #endregion
     108    }
     109
     110    private void InitializeOperators() {
     111      prunedSubtreesReducer = new DataReducer();
     112      prunedSubtreesReducer.ParameterToReduce.ActualName = PruningOperator.PrunedSubtreesParameter.ActualName;
     113      prunedSubtreesReducer.ReductionOperation.Value = new ReductionOperation(ReductionOperations.Sum); // sum all the pruned subtrees parameter values
     114      prunedSubtreesReducer.TargetOperation.Value = new ReductionOperation(ReductionOperations.Assign); // asign the sum to the target parameter
     115      prunedSubtreesReducer.TargetParameter.ActualName = TotalNumberOfPrunedSubtreesParameterName;
     116
     117      prunedTreesReducer = new DataReducer();
     118      prunedTreesReducer.ParameterToReduce.ActualName = PruningOperator.PrunedTreesParameter.ActualName;
     119      prunedTreesReducer.ReductionOperation.Value = new ReductionOperation(ReductionOperations.Sum);
     120      prunedTreesReducer.TargetOperation.Value = new ReductionOperation(ReductionOperations.Assign);
     121      prunedTreesReducer.TargetParameter.ActualName = TotalNumberOfPrunedTreesParameterName;
     122
     123      valuesCollector = new DataTableValuesCollector();
     124      valuesCollector.CollectedValues.Add(new LookupParameter<IntValue>(TotalNumberOfPrunedSubtreesParameterName));
     125      valuesCollector.CollectedValues.Add(new LookupParameter<IntValue>(TotalNumberOfPrunedTreesParameterName));
     126      valuesCollector.DataTableParameter.ActualName = "Population pruning";
     127
     128      resultsCollector = new ResultsCollector();
     129      resultsCollector.CollectedValues.Add(new LookupParameter<DataTable>("Population pruning"));
     130      resultsCollector.ResultsParameter.ActualName = ResultsParameterName;
     131
     132      emptyOp = new EmptyOperator();
     133    }
     134
     135    //
     136    /// <summary>
     137    /// Computes the closed interval bounding the portion of the population that is to be pruned.
     138    /// </summary>
     139    /// <returns>Returns an int range [start, end]</returns>
     140    private IntRange GetSliceBounds() {
     141      var count = ExecutionContext.Scope.SubScopes.Count;
     142      var start = (int)Math.Round(PopulationSlice.Start * count);
     143      var end = (int)Math.Round(PopulationSlice.End * count);
     144      if (end > count) end = count;
     145
     146      if (start >= end) throw new ArgumentOutOfRangeException("Invalid PopulationSlice bounds.");
     147      return new IntRange(start, end);
     148    }
     149
     150    private IOperation CreatePruningOperation() {
     151      var oc = new OperationCollection { Parallel = true };
     152      var range = GetSliceBounds();
     153      var qualities = Quality.Select(x => x.Value).ToArray();
     154      var indices = Enumerable.Range(0, qualities.Length).ToArray();
     155      Array.Sort(qualities, indices);
     156      if (!Maximization.Value) Array.Reverse(indices);
     157
     158      var subscopes = ExecutionContext.Scope.SubScopes;
     159
     160      for (int i = 0; i < subscopes.Count; ++i) {
     161        IOperator op;
     162        if (range.Start <= i && i < range.End && Random.NextDouble() <= PruningProbability.Value)
     163          op = PruningOperator;
     164        else op = emptyOp;
     165        var index = indices[i];
     166        var subscope = subscopes[index];
     167        oc.Add(ExecutionContext.CreateChildOperation(op, subscope));
     168      }
     169      return oc;
    139170    }
    140171
    141172    public override IOperation Apply() {
    142       if (reentry) {
    143         UpdateCounter.Value++;
     173      UpdateCounter.Value++;
     174      if (UpdateCounter.Value != UpdateInterval.Value) return base.Apply();
     175      UpdateCounter.Value = 0;
    144176
    145         if (UpdateCounter.Value != UpdateInterval.Value) return base.Apply();
    146         UpdateCounter.Value = 0;
     177      if (prunedSubtreesReducer == null || prunedTreesReducer == null || valuesCollector == null || resultsCollector == null) { InitializeOperators(); }
    147178
    148         var trees = SymbolicExpressionTreeParameter.ActualValue.ToList();
    149         var qualities = QualityParameter.ActualValue.ToList();
     179      var prune = CreatePruningOperation();
     180      var reducePrunedSubtrees = ExecutionContext.CreateChildOperation(prunedSubtreesReducer);
     181      var reducePrunedTrees = ExecutionContext.CreateChildOperation(prunedTreesReducer);
     182      var collectValues = ExecutionContext.CreateChildOperation(valuesCollector);
     183      var collectResults = ExecutionContext.CreateChildOperation(resultsCollector);
    150184
    151         var population = trees.Zip(qualities, (tree, quality) => new { Tree = tree, Quality = quality }).ToList();
    152         Func<double, double, int> compare = (a, b) => Maximization.Value ? a.CompareTo(b) : b.CompareTo(a);
    153         population.Sort((a, b) => compare(a.Quality.Value, b.Quality.Value));
    154 
    155         var start = (int)Math.Round(PopulationSlice.Start * trees.Count);
    156         var end = (int)Math.Round(PopulationSlice.End * trees.Count);
    157 
    158         if (end == population.Count) end--;
    159 
    160         if (start >= end || end >= population.Count) throw new Exception("Invalid PopulationSlice bounds.");
    161 
    162         PrunedSubtrees.Value = 0;
    163         PrunedTrees.Value = 0;
    164 
    165         reentry = false;
    166 
    167         var operations = new OperationCollection { Parallel = true };
    168         foreach (var p in population.Skip(start).Take(end)) {
    169           if (Random.NextDouble() > PruningProbability.Value) continue;
    170           var op = new SymbolicDataAnalysisExpressionPruningOperator {
    171             Model = CreateModel(p.Tree, Interpreter, EstimationLimits.Lower, EstimationLimits.Upper),
    172             ImpactsCalculator = impactValuesCalculator,
    173             ProblemData = ProblemData,
    174             Random = Random,
    175             PruneOnlyZeroImpactNodes = PruneOnlyZeroImpactNodes.Value,
    176             NodeImpactThreshold = NodeImpactThreshold.Value,
    177             FitnessCalculationPartition = FitnessCalculationPartition
    178           };
    179           operations.Add(ExecutionContext.CreateChildOperation(op, ExecutionContext.Scope));
    180         }
    181         return new OperationCollection { operations, ExecutionContext.CreateOperation(this) };
    182       }
    183 
    184       DataTable table;
    185 
    186       if (ResultCollection.ContainsKey("Population Pruning")) {
    187         table = (DataTable)ResultCollection["Population Pruning"].Value;
    188       } else {
    189         table = new DataTable("Population Pruning");
    190         table.Rows.Add(new DataRow("Pruned Trees") { VisualProperties = { StartIndexZero = true } });
    191         table.Rows.Add(new DataRow("Pruned Subtrees") { VisualProperties = { StartIndexZero = true } });
    192         ResultCollection.Add(new Result("Population Pruning", table));
    193       }
    194 
    195       table.Rows["Pruned Trees"].Values.Add(PrunedTrees.Value);
    196       table.Rows["Pruned Subtrees"].Values.Add(PrunedSubtrees.Value);
    197 
    198       reentry = true;
    199 
    200       return base.Apply();
     185      return new OperationCollection { prune, reducePrunedSubtrees, reducePrunedTrees, collectValues, collectResults, base.Apply() };
    201186    }
    202 
    203     protected abstract ISymbolicDataAnalysisModel CreateModel(ISymbolicExpressionTree tree, ISymbolicDataAnalysisExpressionTreeInterpreter interpreter,
    204       double lowerEstimationLimit = double.MinValue, double upperEstimationLimit = double.MaxValue);
    205187  }
    206188}
Note: See TracChangeset for help on using the changeset viewer.