Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.EvolutionaryTracking/HeuristicLab.EvolutionaryTracking/3.4/Operators/SymbolicExpressionTreeFPBuilder.cs @ 9419

Last change on this file since 9419 was 9419, checked in by bburlacu, 11 years ago

#1772: Refactoring of directed graph components, added code for correctly serializing vertices and edges. Added specific building blocks analyzers and new population diversity analyzer which correctly integrates with the parallel engine.

File size: 10.2 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Drawing;
4using System.IO;
5using System.Linq;
6using System.Text;
7using HeuristicLab.Common;
8using HeuristicLab.Core;
9using HeuristicLab.Data;
10using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
11using HeuristicLab.Operators;
12using HeuristicLab.Optimization;
13using HeuristicLab.Parameters;
14using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
15using HeuristicLab.Problems.DataAnalysis;
16using HeuristicLab.Problems.DataAnalysis.Symbolic;
17using HeuristicLab.Problems.DataAnalysis.Symbolic.Regression;
18
19namespace HeuristicLab.EvolutionaryTracking {
20  /// <summary>
21  /// Operator which builds a so-called frequenct pattern tree
22  /// (a compact representation of the symbol relationships in the population)
23  /// </summary>
24  [Item("SymbolicExpressionTreeFPBuilder", "Builds a frequent-pattern tree out of the GP population.")]
25  [StorableClass]
26  public class SymbolicExpressionTreeFPBuilder : SingleSuccessorOperator {
27    private const string ResultsParameterName = "Results";
28    private const string SymbolicExpressionInterpreterParameterName = "SymbolicExpressionTreeInterpreter";
29    private const string SymbolicRegressionProblemDataParameterName = "ProblemData";
30    private const string SymbolicExpressionTreeParameterName = "SymbolicExpressionTree";
31    private const string SymbolicExpressionTreeQualityParameterName = "Quality";
32
33    private SymbolicRegressionSolutionImpactValuesCalculator calculator;
34    //    private Dictionary<SymbolNode, double> averageNodeImpacts;
35
36    #region Parameter properties
37
38    public IScopeTreeLookupParameter<ISymbolicExpressionTree> SymbolicExpressionTreeParameter {
39      get { return (IScopeTreeLookupParameter<ISymbolicExpressionTree>)Parameters[SymbolicExpressionTreeParameterName]; }
40    }
41
42    public IScopeTreeLookupParameter<DoubleValue> SymbolicExpressionTreeQualityParameter {
43      get { return (IScopeTreeLookupParameter<DoubleValue>)Parameters[SymbolicExpressionTreeQualityParameterName]; }
44    }
45
46    public ILookupParameter<SymbolicDataAnalysisExpressionTreeInterpreter> SymbolicExpressionTreeInterpreterParameter {
47      get {
48        return
49          (ILookupParameter<SymbolicDataAnalysisExpressionTreeInterpreter>)
50          Parameters[SymbolicExpressionInterpreterParameterName];
51      }
52    }
53
54    public ILookupParameter<RegressionProblemData> SymbolicRegressionProblemDataParameter {
55      get { return (ILookupParameter<RegressionProblemData>)Parameters[SymbolicRegressionProblemDataParameterName]; }
56    }
57
58    #endregion
59
60    [StorableConstructor]
61    private SymbolicExpressionTreeFPBuilder(bool deserializing)
62      : base(deserializing) {
63    }
64
65    private SymbolicExpressionTreeFPBuilder(SymbolicExpressionTreeFPBuilder original, Cloner cloner)
66      : base(original, cloner) {
67    }
68
69    public override IDeepCloneable Clone(Cloner cloner) {
70      return new SymbolicExpressionTreeFPBuilder(this, cloner);
71    }
72
73    public SymbolicExpressionTreeFPBuilder()
74      : base() {
75      Parameters.Add(new ValueLookupParameter<ResultCollection>(ResultsParameterName, "The results collection where the analysis values should be stored."));
76      Parameters.Add(new LookupParameter<SymbolicDataAnalysisExpressionTreeInterpreter>(SymbolicExpressionInterpreterParameterName, "Interpreter for symbolic expression trees"));
77      Parameters.Add(new LookupParameter<RegressionProblemData>(SymbolicRegressionProblemDataParameterName, "The symbolic data analysis problem."));
78      Parameters.Add(new ScopeTreeLookupParameter<ISymbolicExpressionTree>(SymbolicExpressionTreeParameterName, "The symbolic expression trees to analyze"));
79      Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>(SymbolicExpressionTreeQualityParameterName, "The qualities of the symbolic expression trees"));
80
81      calculator = new SymbolicRegressionSolutionImpactValuesCalculator();
82    }
83
84    [StorableHook(HookType.AfterDeserialization)]
85    private void AfterDeserialization() {
86    }
87
88    #region IStatefulItem members
89
90    public override void InitializeState() {
91      base.InitializeState();
92    }
93
94    public override void ClearState() {
95      base.ClearState();
96    }
97
98    #endregion
99
100    public override IOperation Apply() {
101      var trees = SymbolicExpressionTreeParameter.ActualValue;
102      var qualities = SymbolicExpressionTreeQualityParameter.ActualValue;
103
104      if (trees == null || qualities == null) return base.Apply();
105
106      if (trees.Length != qualities.Length)
107        throw new Exception("Error: trees and qualities array sizes do not match!");
108
109      var graph = new FPGraph();
110      var canonicalSorter = new SymbolicExpressionTreeCanonicalSorter();
111
112      var nodeImpacts = new Dictionary<SymbolNode, IList<double>>();
113      var impactsCalculator = new SymbolicRegressionSolutionImpactValuesCalculator();
114
115      for (int i = 0; i != trees.Length; ++i) {
116        var model = new SymbolicRegressionModel(trees[i], SymbolicExpressionTreeInterpreterParameter.ActualValue);
117
118        var root = trees[i].Root;
119        var originalQuality = qualities[i].Value;
120
121        //        canonicalSorter.SortSubtrees(root);
122        foreach (var node in root.GetSubtree(0).GetSubtree(0).IterateNodesPrefix())
123          MergeNodesAndImpacts(graph, model, node, originalQuality, nodeImpacts, impactsCalculator, 0);
124      }
125
126      WriteGraphAndImpacts(graph, nodeImpacts, Environment.GetFolderPath(Environment.SpecialFolder.UserProfile) + "\\fpgraph.dot", 0.0);
127      WriteGraphAndImpacts(graph, nodeImpacts, Environment.GetFolderPath(Environment.SpecialFolder.UserProfile) + "\\prunedfpgraph.dot", 0.1);
128      return base.Apply();
129    }
130
131    private void MergeNodesAndImpacts(FPGraph graph, ISymbolicDataAnalysisModel model, ISymbolicExpressionTreeNode node,
132                                      double originalQuality, IDictionary<SymbolNode, IList<double>> nodeImpacts,
133                                      ISymbolicDataAnalysisSolutionImpactValuesCalculator impactValuesCalculator, int level = 0) {
134      var label = node.Label();
135      var symbolNode = graph.GetNode(label, level);
136      if (symbolNode == null) {
137        symbolNode = new SymbolNode { Label = label, Symbol = node.Symbol, Weight = 1 };
138        graph.AddNode(symbolNode, level);
139      } else {
140        symbolNode.Weight += 1;
141      }
142      var problemData = SymbolicRegressionProblemDataParameter.ActualValue;
143
144      var impact = impactValuesCalculator.CalculateImpactValue(model, node, problemData, problemData.TrainingIndices, originalQuality);
145      if (nodeImpacts.ContainsKey(symbolNode)) {
146        nodeImpacts[symbolNode].Add(impact);
147      } else {
148        nodeImpacts.Add(symbolNode, new List<double> { impact });
149      }
150      foreach (var s in node.Subtrees) {
151        MergeNodesAndImpacts(graph, model, s, originalQuality, nodeImpacts, impactValuesCalculator, level + 1);
152
153        var childLabel = s.Label();
154        var childSymbolNode = graph.GetNode(childLabel, level + 1);
155        if (childSymbolNode == null)
156          throw new ArgumentException("SymbolNode cannot be null!");
157
158        if (symbolNode.OutEdges == null || symbolNode.OutEdges.Count == 0)
159          symbolNode.AddForwardArc(childSymbolNode);
160        else {
161          var arc = symbolNode.OutEdges.Find(e => e.Target == childSymbolNode);
162          if (arc == null)
163            symbolNode.AddForwardArc(childSymbolNode);
164          else arc.Weight += 1;
165        }
166      }
167    }
168
169    private static void WriteGraphAndImpacts(FPGraph graph, IDictionary<SymbolNode, IList<double>> nodeImpacts, string path, double pruneFactor = 0.1) {
170      var nl = Environment.NewLine;
171      const string shape = "rectangle";
172
173      var avgImpacts = nodeImpacts.ToDictionary(n => n.Key, n => n.Value.Average());
174
175      double max = avgImpacts.Values.Max();
176      double min = avgImpacts.Values.Min();
177
178      long total = (long)graph.Nodes.Sum(node => node.Weight);
179
180      var tuples = (from l in graph.Levels.ToList()
181                    let sum = l.Value.Sum(n => n.Weight)
182                    from node in l.Value
183                    where node.Weight / sum > pruneFactor
184                    select new Tuple<SymbolNode, int, double>(node, l.Key, node.Weight / sum)
185                   ).ToList();
186
187      using (var file = new StreamWriter(path)) {
188        var sb = new StringBuilder();
189        file.WriteLine("digraph {");
190
191        var hashset = new HashSet<SymbolNode>(tuples.Select(t => t.Item1));
192        foreach (var tuple in tuples) {
193          var node = tuple.Item1;
194          var level = tuple.Item2;
195          node.Label = node.Label + ":" + level;
196          var relativeWeight = tuple.Item3;
197          var impact = avgImpacts[node];
198          if (double.IsInfinity(impact)) {
199            throw new ArgumentOutOfRangeException("Impact value cannot be infinite!");
200          }
201          Color color;
202          if (impact.IsAlmost(0.0)) {
203            color = Color.White;
204          } else if (impact < 0.0) {
205            color = Color.FromArgb((int)(impact / min * 255), Color.Red);
206          } else {
207            color = Color.FromArgb((int)(impact / max * 255), Color.Green);
208          }
209          file.WriteLine("\t\"" + node.Label + "\" [shape=" + shape + ", style=filled, fillcolor=\"" + color.ToHex() + "\""
210                         + ", label=\"" + node.Label + "\\n" + string.Format("{0:0.00%}", node.Weight / total)
211                         + "\\n" + string.Format("{0:0.00%}", relativeWeight) + "\", rank=same]");
212        }
213        foreach (var node in tuples.Select(t => t.Item1)) {
214          if (node.OutEdges == null || node.OutEdges.Count == 0) continue;
215          foreach (var arc in node.OutEdges) {
216            if (!hashset.Contains(arc.Target)) continue;
217            //            double relativeWeight = arc.Weight / node.OutEdges.Sum(a => a.Weight);
218            sb.Clear();
219            sb.Append("\t\"" + node.Label + "\" -> \"" + arc.Target.Label + "\""
220              //                        + "[label=\"" + string.Format("{0:0.00%}", relativeWeight) + "\"]"
221                      );
222            file.WriteLine(sb);
223          }
224        }
225        file.WriteLine("}");
226      }
227    }
228  }
229}
Note: See TracBrowser for help on using the repository browser.