Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Analyzers/SymbolicDataAnalysisSingleObjectivePruningAnalyzer.cs @ 10414

Last change on this file since 10414 was 10414, checked in by bburlacu, 10 years ago

#2143: Modified the pruning operator and analyzer to use the FitnessCalculationPartition for impact and replacement values calculation, instead of the whole training data partition.

File size: 10.9 KB
Line 
1using System;
2using System.Linq;
3using HeuristicLab.Analysis;
4using HeuristicLab.Common;
5using HeuristicLab.Core;
6using HeuristicLab.Data;
7using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
8using HeuristicLab.Optimization;
9using HeuristicLab.Parameters;
10using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
11
12namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
13  [StorableClass]
14  [Item("SymbolicDataAnalysisSingleObjectivePruningAnalyzer", "An analyzer that prunes introns from trees in single objective symbolic data analysis problems.")]
15  public abstract class SymbolicDataAnalysisSingleObjectivePruningAnalyzer : SymbolicDataAnalysisSingleObjectiveAnalyzer {
16    private const string ProblemDataParameterName = "ProblemData";
17    private const string InterpreterParameterName = "SymbolicExpressionTreeInterpreter";
18
19    private const string UpdateIntervalParameterName = "UpdateInverval";
20    private const string UpdateCounterParameterName = "UpdateCounter";
21
22    private const string PopulationSliceParameterName = "PopulationSlice";
23    private const string PruningProbabilityParameterName = "PruningProbability";
24
25    private const string NumberOfPrunedSubtreesParameterName = "PrunedSubtrees";
26    private const string NumberOfPrunedTreesParameterName = "PrunedTrees";
27
28    private const string RandomParameterName = "Random";
29    private const string EstimationLimitsParameterName = "EstimationLimits";
30
31    private const string PruneOnlyZeroImpactNodesParameterName = "PruneOnlyZeroImpactNodes";
32    private const string NodeImpactThresholdParameterName = "ImpactThreshold";
33
34    private const string FitnessCalculationPartitionParameterName = "FitnessCalculationPartition";
35
36    private bool reentry;
37    [Storable]
38    protected ISymbolicDataAnalysisSolutionImpactValuesCalculator impactValuesCalculator;
39
40    #region parameter properties
41    public IFixedValueParameter<BoolValue> PruneOnlyZeroImpactNodesParameter {
42      get { return (IFixedValueParameter<BoolValue>)Parameters[PruneOnlyZeroImpactNodesParameterName]; }
43    }
44    public IFixedValueParameter<DoubleValue> NodeImpactThresholdParameter {
45      get { return (IFixedValueParameter<DoubleValue>)Parameters[NodeImpactThresholdParameterName]; }
46    }
47    public ILookupParameter<DoubleLimit> EstimationLimitsParameter {
48      get { return (ILookupParameter<DoubleLimit>)Parameters[EstimationLimitsParameterName]; }
49    }
50    public ILookupParameter<IRandom> RandomParameter {
51      get { return (ILookupParameter<IRandom>)Parameters[RandomParameterName]; }
52    }
53    private ILookupParameter<IDataAnalysisProblemData> ProblemDataParameter {
54      get { return (ILookupParameter<IDataAnalysisProblemData>)Parameters[ProblemDataParameterName]; }
55    }
56    public ILookupParameter<IntRange> FitnessCalculationPartitionParameter {
57      get { return (ILookupParameter<IntRange>)Parameters[FitnessCalculationPartitionParameterName]; }
58    }
59    private ILookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter> InterpreterParameter {
60      get { return (ILookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter>)Parameters[InterpreterParameterName]; }
61    }
62    public IValueParameter<IntValue> UpdateIntervalParameter {
63      get { return (IValueParameter<IntValue>)Parameters[UpdateIntervalParameterName]; }
64    }
65    public IValueParameter<IntValue> UpdateCounterParameter {
66      get { return (IValueParameter<IntValue>)Parameters[UpdateCounterParameterName]; }
67    }
68    public IValueParameter<DoubleRange> PopulationSliceParameter {
69      get { return (IValueParameter<DoubleRange>)Parameters[PopulationSliceParameterName]; }
70    }
71    public IValueParameter<DoubleValue> PruningProbabilityParameter {
72      get { return (IValueParameter<DoubleValue>)Parameters[PruningProbabilityParameterName]; }
73    }
74    public IFixedValueParameter<DoubleValue> NumberOfPrunedSubtreesParameter {
75      get { return (IFixedValueParameter<DoubleValue>)Parameters[NumberOfPrunedSubtreesParameterName]; }
76    }
77    public IFixedValueParameter<DoubleValue> NumberOfPrunedTreesParameter {
78      get { return (IFixedValueParameter<DoubleValue>)Parameters[NumberOfPrunedTreesParameterName]; }
79    }
80    #endregion
81    #region properties
82    protected IDataAnalysisProblemData ProblemData { get { return ProblemDataParameter.ActualValue; } }
83    protected IntRange FitnessCalculationPartition { get { return FitnessCalculationPartitionParameter.ActualValue; } }
84    protected ISymbolicDataAnalysisExpressionTreeInterpreter Interpreter { get { return InterpreterParameter.ActualValue; } }
85    protected IntValue UpdateInterval { get { return UpdateIntervalParameter.Value; } }
86    protected IntValue UpdateCounter { get { return UpdateCounterParameter.Value; } }
87    protected DoubleRange PopulationSlice { get { return PopulationSliceParameter.Value; } }
88    protected DoubleValue PruningProbability { get { return PruningProbabilityParameter.Value; } }
89    protected DoubleValue PrunedSubtrees { get { return NumberOfPrunedSubtreesParameter.Value; } }
90    protected DoubleValue PrunedTrees { get { return NumberOfPrunedTreesParameter.Value; } }
91    protected DoubleLimit EstimationLimits { get { return EstimationLimitsParameter.ActualValue; } }
92    protected IRandom Random { get { return RandomParameter.ActualValue; } }
93    protected DoubleValue NodeImpactThreshold { get { return NodeImpactThresholdParameter.Value; } }
94    protected BoolValue PruneOnlyZeroImpactNodes { get { return PruneOnlyZeroImpactNodesParameter.Value; } }
95    #endregion
96
97    #region IStatefulItem members
98    public override void InitializeState() {
99      base.InitializeState();
100      UpdateCounter.Value = 0;
101    }
102    public override void ClearState() {
103      base.ClearState();
104      UpdateCounter.Value = 0;
105    }
106    #endregion
107
108    [StorableConstructor]
109    protected SymbolicDataAnalysisSingleObjectivePruningAnalyzer(bool deserializing) : base(deserializing) { }
110    [StorableHook(HookType.AfterDeserialization)]
111    private void AfterDeserialization() {
112      if (!Parameters.ContainsKey(FitnessCalculationPartitionParameterName))
113        Parameters.Add(new LookupParameter<IntRange>(FitnessCalculationPartitionParameterName, ""));
114    }
115    protected SymbolicDataAnalysisSingleObjectivePruningAnalyzer(SymbolicDataAnalysisSingleObjectivePruningAnalyzer original, Cloner cloner)
116      : base(original, cloner) {
117      impactValuesCalculator = original.impactValuesCalculator;
118    }
119    protected SymbolicDataAnalysisSingleObjectivePruningAnalyzer() {
120      Parameters.Add(new ValueParameter<DoubleRange>(PopulationSliceParameterName, new DoubleRange(0.75, 1)));
121      Parameters.Add(new ValueParameter<DoubleValue>(PruningProbabilityParameterName, new DoubleValue(0.5)));
122      // analyzer parameters
123      Parameters.Add(new ValueParameter<IntValue>(UpdateIntervalParameterName, "The interval in which the tree length analysis should be applied.", new IntValue(1)));
124      Parameters.Add(new ValueParameter<IntValue>(UpdateCounterParameterName, "The value which counts how many times the operator was called", new IntValue(0)));
125      Parameters.Add(new LookupParameter<IRandom>(RandomParameterName));
126      Parameters.Add(new LookupParameter<IDataAnalysisProblemData>(ProblemDataParameterName));
127      Parameters.Add(new LookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter>(InterpreterParameterName));
128
129      Parameters.Add(new FixedValueParameter<DoubleValue>(NumberOfPrunedSubtreesParameterName, new DoubleValue(0)));
130      Parameters.Add(new FixedValueParameter<DoubleValue>(NumberOfPrunedTreesParameterName, new DoubleValue(0)));
131      Parameters.Add(new LookupParameter<DoubleLimit>(EstimationLimitsParameterName));
132      Parameters.Add(new FixedValueParameter<DoubleValue>(NodeImpactThresholdParameterName, new DoubleValue(0.0)));
133      Parameters.Add(new FixedValueParameter<BoolValue>(PruneOnlyZeroImpactNodesParameterName, new BoolValue(false)));
134      Parameters.Add(new LookupParameter<IntRange>(FitnessCalculationPartitionParameterName, ""));
135    }
136
137    public override IOperation Apply() {
138      if (reentry) {
139        UpdateCounter.Value++;
140
141        if (UpdateCounter.Value != UpdateInterval.Value) return base.Apply();
142        UpdateCounter.Value = 0;
143
144        var trees = SymbolicExpressionTreeParameter.ActualValue.ToList();
145        var qualities = QualityParameter.ActualValue.ToList();
146
147        var population = trees.Zip(qualities, (tree, quality) => new { Tree = tree, Quality = quality }).ToList();
148        Func<double, double, int> compare = (a, b) => Maximization.Value ? a.CompareTo(b) : b.CompareTo(a);
149        population.Sort((a, b) => compare(a.Quality.Value, b.Quality.Value));
150
151        var start = (int)Math.Round(PopulationSlice.Start * trees.Count);
152        var end = (int)Math.Round(PopulationSlice.End * trees.Count);
153
154        if (end == population.Count) end--;
155
156        if (start >= end || end >= population.Count) throw new Exception("Invalid PopulationSlice bounds.");
157
158        PrunedSubtrees.Value = 0;
159        PrunedTrees.Value = 0;
160
161        reentry = false;
162
163        var operations = new OperationCollection { Parallel = true };
164        foreach (var p in population.Skip(start).Take(end)) {
165          if (Random.NextDouble() > PruningProbability.Value) continue;
166          var op = new SymbolicDataAnalysisExpressionPruningOperator {
167            Model = CreateModel(p.Tree, Interpreter, EstimationLimits.Lower, EstimationLimits.Upper),
168            ImpactsCalculator = impactValuesCalculator,
169            ProblemData = ProblemData,
170            Random = Random,
171            PruneOnlyZeroImpactNodes = PruneOnlyZeroImpactNodes.Value,
172            NodeImpactThreshold = NodeImpactThreshold.Value,
173            FitnessCalculationPartition = FitnessCalculationPartition
174          };
175          operations.Add(ExecutionContext.CreateChildOperation(op, ExecutionContext.Scope));
176        }
177        return new OperationCollection { operations, ExecutionContext.CreateOperation(this) };
178      }
179
180      DataTable table;
181
182      if (ResultCollection.ContainsKey("Population Pruning")) {
183        table = (DataTable)ResultCollection["Population Pruning"].Value;
184      } else {
185        table = new DataTable("Population Pruning");
186        table.Rows.Add(new DataRow("Pruned Trees") { VisualProperties = { StartIndexZero = true } });
187        table.Rows.Add(new DataRow("Pruned Subtrees") { VisualProperties = { StartIndexZero = true } });
188        ResultCollection.Add(new Result("Population Pruning", table));
189      }
190
191      table.Rows["Pruned Trees"].Values.Add(PrunedTrees.Value);
192      table.Rows["Pruned Subtrees"].Values.Add(PrunedSubtrees.Value);
193
194      reentry = true;
195
196      return base.Apply();
197    }
198
199    protected abstract ISymbolicDataAnalysisModel CreateModel(ISymbolicExpressionTree tree, ISymbolicDataAnalysisExpressionTreeInterpreter interpreter,
200      double lowerEstimationLimit = double.MinValue, double upperEstimationLimit = double.MaxValue);
201  }
202}
Note: See TracBrowser for help on using the repository browser.