Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Tracking/Analyzers/SymbolicDataAnalysisSchemaFrequencyAnalyzer.cs @ 13624

Last change on this file since 13624 was 13624, checked in by bburlacu, 8 years ago

#1772: Improve the SchemaFrequencyAnalyzer and add calculation of schema genotype and phenotype diversities.

File size: 17.4 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using System.Text;
26using System.Threading.Tasks;
27using HeuristicLab.Common;
28using HeuristicLab.Core;
29using HeuristicLab.Data;
30using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
31using HeuristicLab.EvolutionTracking;
32using HeuristicLab.Optimization;
33using HeuristicLab.Parameters;
34using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
35
36namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Tracking.Analyzers {
37  [Item("SymbolicDataAnalysisSchemaFrequencyAnalyzer", "An analyzer which counts schema frequencies in the population.")]
38  [StorableClass]
39  public class SymbolicDataAnalysisSchemaFrequencyAnalyzer : EvolutionTrackingAnalyzer<ISymbolicExpressionTree> {
40    private const string MinimumSchemaLengthParameterName = "MinimumSchemaLength";
41    private const string StrictSchemaMatchingParameterName = "StrictSchemaMatching";
42    private const string ProblemDataParameterName = "ProblemData";
43    private const string InterpreterParameterName = "SymbolicExpressionTreeInterpreter";
44    private const string ExecuteInParallelParameterName = "ExecuteInParallel";
45    private const string MaximumDegreeOfParallelismParameterName = "MaximumDegreeOfParallelism";
46
47    private static readonly Dictionary<string, string> ShortNames = new Dictionary<string, string> {
48    { "Addition", "+" },
49    { "Subtraction", "-" },
50    { "Multiplication", "*" },
51    { "Division", "/" },
52    { "Exponential", "exp" },
53    { "Logarithm", "log" }
54  };
55
56    [Storable]
57    private readonly SymbolicExpressionTreePhenotypicSimilarityCalculator phenotypicSimilarityCalculator;
58
59    [Storable]
60    private readonly SymbolicExpressionTreeBottomUpSimilarityCalculator genotypicSimilarityCalculator;
61
62    [Storable]
63    private readonly ISymbolicExpressionTreeNodeEqualityComparer comparer;
64    private QueryMatch qm;
65
66    public IFixedValueParameter<BoolValue> ExecuteInParallelParameter {
67      get { return (IFixedValueParameter<BoolValue>)Parameters[ExecuteInParallelParameterName]; }
68    }
69
70    public IFixedValueParameter<IntValue> MaximumDegreeOfParallelismParameter {
71      get { return (IFixedValueParameter<IntValue>)Parameters[MaximumDegreeOfParallelismParameterName]; }
72    }
73
74    public IFixedValueParameter<IntValue> MinimumSchemaLengthParameter {
75      get { return (IFixedValueParameter<IntValue>)Parameters[MinimumSchemaLengthParameterName]; }
76    }
77
78    public IFixedValueParameter<BoolValue> StrictSchemaMatchingParameter {
79      get { return (IFixedValueParameter<BoolValue>)Parameters[StrictSchemaMatchingParameterName]; }
80    }
81
82    public ILookupParameter<IRegressionProblemData> ProblemDataParameter {
83      get { return (ILookupParameter<IRegressionProblemData>)Parameters[ProblemDataParameterName]; }
84    }
85
86    public ILookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter> InterpreterParameter {
87      get { return (ILookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter>)Parameters[InterpreterParameterName]; }
88    }
89
90    public bool ExecuteInParallel {
91      get { return ExecuteInParallelParameter.Value.Value; }
92      set { ExecuteInParallelParameter.Value.Value = value; }
93    }
94
95    public int MaximumDegreeOfParallelism {
96      get { return MaximumDegreeOfParallelismParameter.Value.Value; }
97      set { MaximumDegreeOfParallelismParameter.Value.Value = value; }
98    }
99
100    public int MinimumSchemaLength {
101      get { return MinimumSchemaLengthParameter.Value.Value; }
102      set { MinimumSchemaLengthParameter.Value.Value = value; }
103    }
104
105    public bool StrictSchemaMatching {
106      get { return StrictSchemaMatchingParameter.Value.Value; }
107      set { StrictSchemaMatchingParameter.Value.Value = value; }
108    }
109
110    public SymbolicDataAnalysisSchemaFrequencyAnalyzer() {
111      comparer = new SymbolicExpressionTreeNodeEqualityComparer {
112        MatchConstantValues = false,
113        MatchVariableNames = true,
114        MatchVariableWeights = false
115      };
116      qm = new QueryMatch(comparer) { MatchParents = true };
117      phenotypicSimilarityCalculator = new SymbolicExpressionTreePhenotypicSimilarityCalculator();
118      genotypicSimilarityCalculator = new SymbolicExpressionTreeBottomUpSimilarityCalculator { SolutionVariableName = "SymbolicExpressionTree" };
119      Parameters.Add(new FixedValueParameter<IntValue>(MinimumSchemaLengthParameterName, new IntValue(10)));
120      Parameters.Add(new FixedValueParameter<IntValue>(MaximumDegreeOfParallelismParameterName, new IntValue(4)));
121      Parameters.Add(new FixedValueParameter<BoolValue>(StrictSchemaMatchingParameterName, new BoolValue(true)));
122      Parameters.Add(new FixedValueParameter<BoolValue>(ExecuteInParallelParameterName, new BoolValue(true)));
123      Parameters.Add(new LookupParameter<IRegressionProblemData>(ProblemDataParameterName));
124      Parameters.Add(new LookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter>(InterpreterParameterName));
125    }
126
127    protected SymbolicDataAnalysisSchemaFrequencyAnalyzer(SymbolicDataAnalysisSchemaFrequencyAnalyzer original,
128      Cloner cloner) : base(original, cloner) {
129      comparer = cloner.Clone(original.comparer);
130      phenotypicSimilarityCalculator = cloner.Clone(original.phenotypicSimilarityCalculator);
131      genotypicSimilarityCalculator = cloner.Clone(original.genotypicSimilarityCalculator);
132      MinimumSchemaLength = original.MinimumSchemaLength;
133      StrictSchemaMatching = original.StrictSchemaMatching;
134      qm = new QueryMatch(comparer) { MatchParents = true };
135    }
136
137    public override IDeepCloneable Clone(Cloner cloner) {
138      return new SymbolicDataAnalysisSchemaFrequencyAnalyzer(this, cloner);
139    }
140
141    [StorableConstructor]
142    protected SymbolicDataAnalysisSchemaFrequencyAnalyzer(bool deserializing) : base(deserializing) { }
143
144    [StorableHook(HookType.AfterDeserialization)]
145    private void AfterDeserialization() {
146      qm = new QueryMatch(comparer) { MatchParents = true };
147    }
148
149    public override IOperation Apply() {
150      if (PopulationGraph == null || Generation.Value == 0 ||
151          (Generation.Value > 1 && Generation.Value % UpdateInterval.Value != 0))
152        return base.Apply();
153
154      comparer.MatchVariableWeights = comparer.MatchConstantValues = StrictSchemaMatching;
155      phenotypicSimilarityCalculator.Interpreter = InterpreterParameter.ActualValue;
156      phenotypicSimilarityCalculator.ProblemData = ProblemDataParameter.ActualValue;
157      var generation = Generation.Value;
158
159      var population = PopulationGraph.GetByRank(generation).Cast<IGenealogyGraphNode<ISymbolicExpressionTree>>().ToList();
160      var vertices = population.Where(x => x.InDegree == 2).OrderByDescending(x => x.Quality).ToList();
161      ResultCollection resultCollection;
162      if (Results.ContainsKey("Schema Frequencies")) {
163        resultCollection = (ResultCollection)Results["Schema Frequencies"].Value;
164      } else {
165        resultCollection = new ResultCollection();
166        var result = new Result("Schema Frequencies", resultCollection);
167        Results.Add(result);
168      }
169
170      var mostFrequentPerGeneration = new List<Tuple<string, double[]>>();
171
172      var trees = population.Select(x => x.Data).ToList();
173      var qualities = population.Select(x => x.Quality).ToList();
174      var genSimMatrix = new double[trees.Count, trees.Count];
175      var phenSimMatrix = new double[trees.Count, trees.Count];
176
177      for (int i = 0; i < trees.Count - 1; ++i) {
178        for (int j = i + 1; j < trees.Count; ++j) {
179          genSimMatrix[i, j] = double.NaN;
180          phenSimMatrix[i, j] = double.NaN;
181        }
182      }
183      var schemas = SchemaCreator.GenerateSchemas(vertices, MinimumSchemaLength, StrictSchemaMatching).ToList();
184      var schemaStrings = schemas.Select(x => SubtreeToString(x.Root.GetSubtree(0).GetSubtree(0), StrictSchemaMatching)).ToList();
185      int[][] matchingIndices;
186      if (ExecuteInParallel) {
187        matchingIndices = new int[schemas.Count][];
188        Parallel.For(0, schemas.Count, new ParallelOptions { MaxDegreeOfParallelism = MaximumDegreeOfParallelism }, i => {
189          var schema = schemas[i];
190          matchingIndices[i] = Enumerable.Range(0, trees.Count).Where(v => qm.Match(trees[v].Root, schema.Root)).ToArray();
191        });
192      } else {
193        matchingIndices = schemas.Select(x => Enumerable.Range(0, trees.Count).Where(v => qm.Match(trees[v].Root, x.Root)).ToArray()).ToArray();
194      }
195
196      var schemaStatistics = new List<Tuple<string, double[]>>();
197      var avgPopQuality = qualities.Average();
198
199      if (ExecuteInParallel) {
200        var locker = new object();
201        Parallel.For(0, schemas.Count, new ParallelOptions { MaxDegreeOfParallelism = MaximumDegreeOfParallelism }, i => {
202          var indices = matchingIndices[i];
203          if (indices.Length > 1) {
204            var avgSchemaQuality = indices.Average(x => qualities[x]);
205            var avgLength = indices.Average(x => trees[x].Length);
206            var avgGenSim = AverageSimilarity(indices, trees, genSimMatrix, genotypicSimilarityCalculator.CalculateSimilarity);
207            var avgPhenSim = AverageSimilarity(indices, trees, phenSimMatrix, phenotypicSimilarityCalculator.CalculateSimilarity);
208            var array = new[] { indices.Length, avgSchemaQuality, avgLength, avgGenSim, avgPhenSim, avgPopQuality };
209            var t = new Tuple<string, double[]>(schemaStrings[i], array);
210            lock (locker) {
211              schemaStatistics.Add(t);
212            }
213          }
214        });
215      } else {
216        for (int i = 0; i < schemas.Count; ++i) {
217          var indices = matchingIndices[i];
218          if (indices.Length > 1) {
219            var avgSchemaQuality = indices.Average(x => qualities[x]);
220            var avgLength = indices.Average(x => trees[x].Length);
221            var avgGenSim = AverageSimilarity(indices, trees, genSimMatrix, genotypicSimilarityCalculator.CalculateSimilarity);
222            var avgPhenSim = AverageSimilarity(indices, trees, phenSimMatrix, phenotypicSimilarityCalculator.CalculateSimilarity);
223            var array = new[] { indices.Length, avgSchemaQuality, avgLength, avgGenSim, avgPhenSim, avgPopQuality };
224            var t = new Tuple<string, double[]>(schemaStrings[i], array);
225            schemaStatistics.Add(t);
226          }
227        }
228      }
229
230      if (!schemaStatistics.Any()) return base.Apply(); // shouldn't ever happen
231      var columnNames = new[] { "Count", "Avg Quality", "Avg Length", "Avg Genotype Similarity", "Avg Phenotype Similarity", "Avg Population Quality" };
232      var mostFrequent = new DoubleMatrix(schemaStatistics.Count, schemaStatistics[0].Item2.Length);
233      mostFrequent.SortableView = true;
234      schemaStatistics.Sort((a, b) => { if (a.Item2[0].Equals(b.Item2[0])) return b.Item2[1].CompareTo(a.Item2[1]); return b.Item2[0].CompareTo(a.Item2[0]); });
235      mostFrequentPerGeneration.Add(new Tuple<string, double[]>(schemaStatistics[0].Item1, new[] { (double)generation }.Concat(schemaStatistics[0].Item2).ToArray()));
236      mostFrequent.RowNames = schemaStatistics.Select(x => x.Item1);
237      mostFrequent.ColumnNames = columnNames;
238      for (int i = 0; i < schemaStatistics.Count; ++i) {
239        var values = schemaStatistics[i].Item2;
240        for (int j = 0; j < values.Length; ++j) {
241          mostFrequent[i, j] = values[j];
242        }
243      }
244      resultCollection.Add(new Result("Generation " + generation + " Most Frequent Schemas", mostFrequent));
245
246      columnNames = new[] { "Generation", "Count", "Avg Quality", "Avg Length", "Avg Genotype Similarity", "Avg Phenotype Similarity", "Avg Population Quality" };
247      // sort according to quality, then count
248      schemaStatistics.Sort((a, b) => { if (a.Item2[1].Equals(b.Item2[1])) return b.Item2[0].CompareTo(a.Item2[0]); return b.Item2[1].CompareTo(a.Item2[1]); });
249      DoubleMatrix bestSchemasMatrix;
250      if (!resultCollection.ContainsKey("Best Schemas")) {
251        bestSchemasMatrix = new DoubleMatrix(1, 7);
252        bestSchemasMatrix.RowNames = new[] { schemaStatistics[0].Item1 };
253        bestSchemasMatrix.ColumnNames = columnNames;
254        var values = new[] { (double)generation }.Concat(schemaStatistics[0].Item2).ToArray();
255        for (int i = 0; i < values.Length; ++i)
256          bestSchemasMatrix[0, i] = values[i];
257        resultCollection.Add(new Result("Best Schemas", bestSchemasMatrix));
258      } else {
259        bestSchemasMatrix = (DoubleMatrix)resultCollection["Best Schemas"].Value;
260        resultCollection["Best Schemas"].Value = AddRow(bestSchemasMatrix, new[] { (double)generation }.Concat(schemaStatistics[0].Item2).ToArray(), schemaStatistics[0].Item1);
261      }
262
263      // sort according to count, then quality
264      schemaStatistics.Sort((a, b) => { if (a.Item2[0].Equals(b.Item2[0])) return b.Item2[1].CompareTo(a.Item2[1]); return b.Item2[0].CompareTo(a.Item2[0]); });
265      DoubleMatrix frequentSchemasMatrix;
266      if (!resultCollection.ContainsKey("Most Frequent Schemas")) {
267        frequentSchemasMatrix = new DoubleMatrix(1, 7);
268        frequentSchemasMatrix.RowNames = new[] { schemaStatistics[0].Item1 };
269        frequentSchemasMatrix.ColumnNames = columnNames;
270        var values = new[] { (double)generation }.Concat(schemaStatistics[0].Item2).ToArray();
271        for (int i = 0; i < values.Length; ++i)
272          frequentSchemasMatrix[0, i] = values[i];
273        resultCollection.Add(new Result("Most Frequent Schemas", frequentSchemasMatrix));
274      } else {
275        frequentSchemasMatrix = (DoubleMatrix)resultCollection["Most Frequent Schemas"].Value;
276        resultCollection["Most Frequent Schemas"].Value = AddRow(frequentSchemasMatrix, new[] { (double)generation }.Concat(schemaStatistics[0].Item2).ToArray(), schemaStatistics[0].Item1);
277      }
278      return base.Apply();
279    }
280
281    private static DoubleMatrix AddRow(DoubleMatrix m, double[] row, string rowName) {
282      if (row.Length != m.Columns)
283        throw new Exception("Row value count must match matrix column count.");
284      var x = new DoubleMatrix(m.Rows + 1, m.Columns);
285      x.RowNames = m.RowNames.Concat(new[] { rowName });
286      x.ColumnNames = m.ColumnNames;
287      for (int i = 0; i < m.Rows; ++i)
288        for (int j = 0; j < m.Columns; ++j)
289          x[i, j] = m[i, j];
290      for (int j = 0; j < m.Columns; ++j)
291        x[m.Rows, j] = row[j];
292      return x;
293    }
294
295    private static double AverageSimilarity(int[] indices, List<ISymbolicExpressionTree> trees, double[,] similarityMatrix, Func<ISymbolicExpressionTree, ISymbolicExpressionTree, double> similarityFunction) {
296      var agg = 0d;
297      int len = indices.Length;
298      var count = len * (len - 1) / 2d;
299      for (int i = 0; i < indices.Length - 1; ++i) {
300        var a = indices[i];
301        for (int j = i + 1; j < indices.Length; ++j) {
302          var b = indices[j];
303          if (double.IsNaN(similarityMatrix[a, b])) similarityMatrix[a, b] = similarityFunction(trees[a], trees[b]);
304          agg += similarityMatrix[a, b];
305        }
306      }
307      return agg / count;
308    }
309
310    private static string SubtreeToString(ISymbolicExpressionTreeNode node, bool strict = false) {
311      StringBuilder strBuilder = new StringBuilder();
312      // internal nodes or leaf nodes?
313      if (node is AnySubtree)
314        return "# ";
315
316      if (node.SubtreeCount > 0) {
317        strBuilder.Append("(");
318        // symbol on same line as '('
319        string label = string.Empty;
320        if (node is AnyNode)
321          label = "=";
322        else {
323          var name = node.Symbol.Name;
324          label = ShortNames.ContainsKey(name) ? ShortNames[name] : name;
325        }
326        strBuilder.Append(label + " ");
327        // each subtree expression on a new line
328        // and closing ')' also on new line
329        foreach (var subtree in node.Subtrees) {
330          strBuilder.Append(SubtreeToString(subtree, strict));
331        }
332        strBuilder.Append(") ");
333      } else {
334        // symbol in the same line with as '(' and ')'
335        var v = node as VariableTreeNode;
336        var c = node as ConstantTreeNode;
337        var w = node as AnyNode; // wildcard
338        string label = string.Empty;
339        if (w != null)
340          label = "=";
341        else if (v != null)
342          label = strict ? string.Format("{0:0.00}_{1}", v.Weight, v.VariableName) : string.Format("{0}", v.VariableName);
343        else if (c != null)
344          label = strict ? string.Format("{0:0.00}", c.Value) : "C";
345        strBuilder.Append(label);
346        if (node.Parent != null && node != node.Parent.Subtrees.Last())
347          strBuilder.Append(" ");
348      }
349      return strBuilder.ToString();
350    }
351  }
352}
Note: See TracBrowser for help on using the repository browser.