source: branches/HeuristicLab.EvolutionaryTracking/HeuristicLab.EvolutionaryTracking/3.4/Analyzers/SymbolicExpressionTreePruningAnalyzer.cs @ 9963

Last change on this file since 9963 was 9963, checked in by bburlacu, 8 years ago

#1772: Merged changes from the trunk and other branches. Added new ExtendedSymbolicExpressionTreeCanvas control for the visual exploration of tree genealogies. Reorganized some files and folders.

File size: 11.6 KB
Line 
1using System.Linq;
2using HeuristicLab.Analysis;
3using HeuristicLab.Common;
4using HeuristicLab.Core;
5using HeuristicLab.Data;
6using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
7using HeuristicLab.Operators;
8using HeuristicLab.Optimization;
9using HeuristicLab.Parameters;
10using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
11using HeuristicLab.Problems.DataAnalysis;
12using HeuristicLab.Problems.DataAnalysis.Symbolic;
13using HeuristicLab.Problems.DataAnalysis.Symbolic.Regression;
14
15namespace HeuristicLab.EvolutionaryTracking {
16  [Item("SymbolicExpressionTreePruningAnalyzer", "An analyzer that identifies introns in the population and optionally removes them.")]
17  [StorableClass]
18  public sealed class SymbolicExpressionTreePruningAnalyzer : SingleSuccessorOperator, IAnalyzer {
19    private const string SymbolicExpressionTreeParameterName = "SymbolicExpressionTree";
20    private const string SymbolicExpressionTreeQualityParameterName = "Quality";
21    private const string UpdateIntervalParameterName = "UpdateInterval";
22    private const string UpdateCounterParameterName = "UpdateCounter";
23    private const string MatchVariablesParameterName = "MatchVariableNames";
24    private const string MatchVariableWeightsParameterName = "MatchVariableWeights";
25    private const string MatchConstantValuesParameterName = "MatchConstantValues";
26    private const string ResultsParameterName = "Results";
27    private const string SymbolicExpressionInterpreterParameterName = "SymbolicExpressionTreeInterpreter";
28    private const string SymbolicRegressionProblemDataParameterName = "ProblemData";
29    private const string FrequencyThresholdParameterName = "FrequencyThreshold";
30    private const string IgnoreSubtreeFrequenciesParameterName = "IgnoreFrequencies";
31
32    [Storable]
33    private SymbolicRegressionSolutionImpactValuesCalculator calculator;
34    [Storable]
35    private SymbolicExpressionTreeNodeSimilarityComparer comparer;
36
37    #region Parameters
38    public ValueParameter<IntValue> UpdateIntervalParameter {
39      get { return (ValueParameter<IntValue>)Parameters[UpdateIntervalParameterName]; }
40    }
41    public ValueParameter<IntValue> UpdateCounterParameter {
42      get { return (ValueParameter<IntValue>)Parameters[UpdateCounterParameterName]; }
43    }
44    public ValueParameter<BoolValue> MatchVariableNamesParameter {
45      get { return (ValueParameter<BoolValue>)Parameters[MatchVariablesParameterName]; }
46    }
47    public ValueParameter<BoolValue> MatchVariableWeightsParameter {
48      get { return (ValueParameter<BoolValue>)Parameters[MatchVariableWeightsParameterName]; }
49    }
50    public ValueParameter<BoolValue> MatchConstantValuesParameter {
51      get { return (ValueParameter<BoolValue>)Parameters[MatchConstantValuesParameterName]; }
52    }
53    public IScopeTreeLookupParameter<ISymbolicExpressionTree> SymbolicExpressionTreeParameter {
54      get { return (IScopeTreeLookupParameter<ISymbolicExpressionTree>)Parameters[SymbolicExpressionTreeParameterName]; }
55    }
56    public IScopeTreeLookupParameter<DoubleValue> SymbolicExpressionTreeQualityParameter {
57      get { return (IScopeTreeLookupParameter<DoubleValue>)Parameters[SymbolicExpressionTreeQualityParameterName]; }
58    }
59    public LookupParameter<ResultCollection> ResultsParameter {
60      get { return (LookupParameter<ResultCollection>)Parameters[ResultsParameterName]; }
61    }
62    public ILookupParameter<SymbolicDataAnalysisExpressionTreeInterpreter> SymbolicExpressionTreeInterpreterParameter {
63      get { return (ILookupParameter<SymbolicDataAnalysisExpressionTreeInterpreter>)Parameters[SymbolicExpressionInterpreterParameterName]; }
64    }
65    public ILookupParameter<RegressionProblemData> SymbolicRegressionProblemDataParameter {
66      get { return (ILookupParameter<RegressionProblemData>)Parameters[SymbolicRegressionProblemDataParameterName]; }
67    }
68    public ValueParameter<DoubleValue> FrequencyThresholdParameter {
69      get { return (ValueParameter<DoubleValue>)Parameters[FrequencyThresholdParameterName]; }
70    }
71    public ValueParameter<BoolValue> IgnoreSubtreeFrequenciesParameter {
72      get { return (ValueParameter<BoolValue>)Parameters[IgnoreSubtreeFrequenciesParameterName]; }
73    }
74    #endregion
75
76    #region Parameter properties
77    public bool MatchValues {
78      get { return MatchConstantValuesParameter.Value.Value; }
79    }
80    public bool MatchWeights {
81      get { return MatchVariableWeightsParameter.Value.Value; }
82    }
83    public bool MatchVariables {
84      get { return MatchVariableNamesParameter.Value.Value; }
85    }
86    public IntValue UpdateCounter {
87      get { return UpdateCounterParameter.Value; }
88    }
89    public IntValue UpdateInterval {
90      get { return UpdateIntervalParameter.Value; }
91    }
92    public ResultCollection Results {
93      get { return ResultsParameter.ActualValue; }
94    }
95    public SymbolicDataAnalysisExpressionTreeInterpreter SymbolicExpressionTreeInterpreter {
96      get { return SymbolicExpressionTreeInterpreterParameter.ActualValue; }
97    }
98    public RegressionProblemData ProblemData {
99      get { return SymbolicRegressionProblemDataParameter.ActualValue; }
100    }
101    public DoubleValue FrequencyThreshold {
102      get { return FrequencyThresholdParameter.Value; }
103    }
104    public bool IgnoreSubtreesFrequencies {
105      get { return IgnoreSubtreeFrequenciesParameter.Value.Value; }
106    }
107    #endregion
108
109    [StorableConstructor]
110    private SymbolicExpressionTreePruningAnalyzer(bool deserializing) : base(deserializing) { }
111    private SymbolicExpressionTreePruningAnalyzer(SymbolicExpressionTreePruningAnalyzer original, Cloner cloner)
112      : base(original, cloner) {
113      calculator = original.calculator;
114      comparer = original.comparer;
115    }
116    public override IDeepCloneable Clone(Cloner cloner) {
117      return new SymbolicExpressionTreePruningAnalyzer(this, cloner);
118    }
119
120    public SymbolicExpressionTreePruningAnalyzer() {
121      Parameters.Add(new ScopeTreeLookupParameter<ISymbolicExpressionTree>(SymbolicExpressionTreeParameterName, "The symbolic expression trees to analyze."));
122      Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>(SymbolicExpressionTreeQualityParameterName, "The qualities of the symbolic expression trees"));
123      Parameters.Add(new ValueParameter<IntValue>(UpdateIntervalParameterName, "The interval in which the tree length analysis should be applied.", new IntValue(1)));
124      Parameters.Add(new ValueParameter<IntValue>(UpdateCounterParameterName, "The value which counts how many times the operator was called since the last update", new IntValue(0)));
125      Parameters.Add(new ValueParameter<BoolValue>(MatchVariablesParameterName, "Specify if the symbolic expression tree comparer should match variable names.", new BoolValue(true)));
126      Parameters.Add(new ValueParameter<BoolValue>(MatchVariableWeightsParameterName, "Specify if the symbolic expression tree comparer should match variable weights.", new BoolValue(true)));
127      Parameters.Add(new ValueParameter<BoolValue>(MatchConstantValuesParameterName, "Specify if the symbolic expression tree comparer should match constant values.", new BoolValue(true)));
128      Parameters.Add(new LookupParameter<SymbolicDataAnalysisExpressionTreeInterpreter>(SymbolicExpressionInterpreterParameterName, "Interpreter for symbolic expression trees"));
129      Parameters.Add(new LookupParameter<RegressionProblemData>(SymbolicRegressionProblemDataParameterName, "The symbolic data analysis problem."));
130      Parameters.Add(new ValueLookupParameter<ResultCollection>(ResultsParameterName, "The results collection where the analysis values should be stored."));
131      Parameters.Add(new ValueParameter<DoubleValue>(FrequencyThresholdParameterName, "The frequency threshold a fragment has to exceed before being removed."));
132      Parameters.Add(new ValueParameter<BoolValue>(IgnoreSubtreeFrequenciesParameterName, "Specifies whether or not subtree frequencies should be taken into account."));
133
134      calculator = new SymbolicRegressionSolutionImpactValuesCalculator();
135      comparer = new SymbolicExpressionTreeNodeSimilarityComparer { MatchConstantValues = MatchValues, MatchVariableWeights = MatchWeights, MatchVariableNames = MatchVariables };
136
137      UpdateCounterParameter.Hidden = true;
138      UpdateIntervalParameter.Hidden = true;
139    }
140
141    public override IOperation Apply() {
142      UpdateCounter.Value++;
143      if (UpdateCounter.Value != UpdateInterval.Value) return base.Apply();
144      UpdateCounter.Value = 0;
145
146      //var prunedSubtrees = new IntValue(0);
147      int prunedNodes = 0;
148      int prunedTrees = 0;
149      DataTable prunedSubtreesTable;
150      if (Results.ContainsKey("Pruning")) {
151        prunedSubtreesTable = (DataTable)Results["Pruning"].Value;
152      } else {
153        prunedSubtreesTable = new DataTable("Pruning");
154        var prunedNodesRow = new DataRow("Pruned Nodes") { VisualProperties = { StartIndexZero = true } };
155        var prunedTreesRow = new DataRow("Pruned Trees") { VisualProperties = { StartIndexZero = true } };
156        prunedSubtreesTable.Rows.Add(prunedNodesRow);
157        prunedSubtreesTable.Rows.Add(prunedTreesRow);
158        Results.Add(new Result("Pruning", prunedSubtreesTable));
159      }
160
161      var trees = SymbolicExpressionTreeParameter.ActualValue.ToList();
162      var qualities = SymbolicExpressionTreeQualityParameter.ActualValue.ToList();
163      var prunedTreesMap = new bool[trees.Count];
164
165      for (int i = 0; i != trees.Count; ++i) {
166        var model = new SymbolicRegressionModel(trees[i], SymbolicExpressionTreeInterpreter);
167        var root = trees[i].Root.GetSubtree(0).GetSubtree(0); // skip the program root symbol and the start symbol
168        var nodes = root.IterateNodesPrefix().Select((node, index) => new { Node = node, Index = index }).ToList();
169        for (int j = 0; j < nodes.Count; ++j) {
170          var n = nodes[j];
171          if (n.Node is ConstantTreeNode) continue;
172          double impact = calculator.CalculateImpactValue(model, n.Node, ProblemData, ProblemData.TrainingIndices, qualities[i].Value);
173
174          //if (impact.IsAlmost(0.0)) {
175          if (impact < 0.001) {
176            bool prune = true;
177            if (!IgnoreSubtreesFrequencies) {
178              double frequency = (double)trees.Count(t => t.Root.ContainsFragment(new Fragment { Root = n.Node, Index = n.Index }, comparer)) / trees.Count;
179              // remove node if its frequency is above the threshold
180              if (frequency < FrequencyThreshold.Value) { prune = false; }
181            }
182            if (!prune) continue;
183            var parent = n.Node.Parent;
184            if (parent != null && !(parent.Symbol is StartSymbol)) {
185              double replacementValue = calculator.CalculateReplacementValue(model, n.Node, ProblemData, ProblemData.TrainingIndices);
186              var constantNode = new ConstantTreeNode(new Constant()) { Value = replacementValue };
187              var index = parent.IndexOfSubtree(n.Node);
188              parent.RemoveSubtree(index);
189              parent.InsertSubtree(index, constantNode);
190              // skip the removed subtree
191              int length = n.Node.GetLength();
192              j += length;
193
194              if (!prunedTreesMap[i]) {
195                prunedTrees++;
196                prunedTreesMap[i] = true;
197              }
198              prunedNodes += (length - 1);
199            }
200          }
201        }
202      }
203
204      prunedSubtreesTable.Rows["Pruned Nodes"].Values.Add(prunedNodes);
205      prunedSubtreesTable.Rows["Pruned Trees"].Values.Add(prunedTrees);
206
207      return base.Apply();
208    }
209
210    public bool EnabledByDefault { get { return true; } }
211
212  }
213}
Note: See TracBrowser for help on using the repository browser.