Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Tracking/Analyzers/SymbolicDataAnalysisSchemaFrequencyAnalyzer.cs @ 14536

Last change on this file since 14536 was 14427, checked in by bburlacu, 8 years ago

#1772: Extract common methods (used by the schema creator and the schema frequency analyzer) in static SchemaUtil class. Make AnyNode constructor public.

File size: 15.7 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using System.Threading.Tasks;
26using HeuristicLab.Common;
27using HeuristicLab.Core;
28using HeuristicLab.Data;
29using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
30using HeuristicLab.EvolutionTracking;
31using HeuristicLab.Optimization;
32using HeuristicLab.Parameters;
33using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
34
35namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
36  [Item("SymbolicDataAnalysisSchemaFrequencyAnalyzer", "An analyzer which counts schema frequencies in the population.")]
37  [StorableClass]
38  public class SymbolicDataAnalysisSchemaFrequencyAnalyzer : EvolutionTrackingAnalyzer<ISymbolicExpressionTree> {
39    private const string MinimumSchemaLengthParameterName = "MinimumSchemaLength";
40    private const string StrictSchemaMatchingParameterName = "StrictSchemaMatching";
41    private const string ProblemDataParameterName = "ProblemData";
42    private const string InterpreterParameterName = "SymbolicExpressionTreeInterpreter";
43    private const string ExecuteInParallelParameterName = "ExecuteInParallel";
44    private const string MaximumDegreeOfParallelismParameterName = "MaximumDegreeOfParallelism";
45
46    private static readonly Dictionary<string, string> ShortNames = new Dictionary<string, string> {
47    { "Addition", "+" },
48    { "Subtraction", "-" },
49    { "Multiplication", "*" },
50    { "Division", "/" },
51    { "Exponential", "exp" },
52    { "Logarithm", "log" }
53  };
54
55    [Storable]
56    private readonly SymbolicExpressionTreePhenotypicSimilarityCalculator phenotypicSimilarityCalculator;
57
58    [Storable]
59    private readonly SymbolicExpressionTreeBottomUpSimilarityCalculator genotypicSimilarityCalculator;
60
61    [Storable]
62    private readonly ISymbolicExpressionTreeNodeEqualityComparer comparer;
63    private QueryMatch qm;
64
65    public IFixedValueParameter<BoolValue> ExecuteInParallelParameter {
66      get { return (IFixedValueParameter<BoolValue>)Parameters[ExecuteInParallelParameterName]; }
67    }
68
69    public IFixedValueParameter<IntValue> MaximumDegreeOfParallelismParameter {
70      get { return (IFixedValueParameter<IntValue>)Parameters[MaximumDegreeOfParallelismParameterName]; }
71    }
72
73    public IFixedValueParameter<IntValue> MinimumSchemaLengthParameter {
74      get { return (IFixedValueParameter<IntValue>)Parameters[MinimumSchemaLengthParameterName]; }
75    }
76
77    public IFixedValueParameter<BoolValue> StrictSchemaMatchingParameter {
78      get { return (IFixedValueParameter<BoolValue>)Parameters[StrictSchemaMatchingParameterName]; }
79    }
80
81    public ILookupParameter<IRegressionProblemData> ProblemDataParameter {
82      get { return (ILookupParameter<IRegressionProblemData>)Parameters[ProblemDataParameterName]; }
83    }
84
85    public ILookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter> InterpreterParameter {
86      get { return (ILookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter>)Parameters[InterpreterParameterName]; }
87    }
88
89    public bool ExecuteInParallel {
90      get { return ExecuteInParallelParameter.Value.Value; }
91      set { ExecuteInParallelParameter.Value.Value = value; }
92    }
93
94    public int MaximumDegreeOfParallelism {
95      get { return MaximumDegreeOfParallelismParameter.Value.Value; }
96      set { MaximumDegreeOfParallelismParameter.Value.Value = value; }
97    }
98
99    public int MinimumSchemaLength {
100      get { return MinimumSchemaLengthParameter.Value.Value; }
101      set { MinimumSchemaLengthParameter.Value.Value = value; }
102    }
103
104    public bool StrictSchemaMatching {
105      get { return StrictSchemaMatchingParameter.Value.Value; }
106      set { StrictSchemaMatchingParameter.Value.Value = value; }
107    }
108
109    public SymbolicDataAnalysisSchemaFrequencyAnalyzer() {
110      comparer = new SymbolicExpressionTreeNodeEqualityComparer {
111        MatchConstantValues = false,
112        MatchVariableNames = true,
113        MatchVariableWeights = false
114      };
115      qm = new QueryMatch(comparer) { MatchParents = true };
116      phenotypicSimilarityCalculator = new SymbolicExpressionTreePhenotypicSimilarityCalculator();
117      genotypicSimilarityCalculator = new SymbolicExpressionTreeBottomUpSimilarityCalculator { SolutionVariableName = "SymbolicExpressionTree" };
118      Parameters.Add(new FixedValueParameter<IntValue>(MinimumSchemaLengthParameterName, new IntValue(10)));
119      Parameters.Add(new FixedValueParameter<IntValue>(MaximumDegreeOfParallelismParameterName, new IntValue(4)));
120      Parameters.Add(new FixedValueParameter<BoolValue>(StrictSchemaMatchingParameterName, new BoolValue(true)));
121      Parameters.Add(new FixedValueParameter<BoolValue>(ExecuteInParallelParameterName, new BoolValue(true)));
122      Parameters.Add(new LookupParameter<IRegressionProblemData>(ProblemDataParameterName));
123      Parameters.Add(new LookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter>(InterpreterParameterName));
124    }
125
126    protected SymbolicDataAnalysisSchemaFrequencyAnalyzer(SymbolicDataAnalysisSchemaFrequencyAnalyzer original,
127      Cloner cloner) : base(original, cloner) {
128      comparer = cloner.Clone(original.comparer);
129      phenotypicSimilarityCalculator = cloner.Clone(original.phenotypicSimilarityCalculator);
130      genotypicSimilarityCalculator = cloner.Clone(original.genotypicSimilarityCalculator);
131      MinimumSchemaLength = original.MinimumSchemaLength;
132      StrictSchemaMatching = original.StrictSchemaMatching;
133      qm = new QueryMatch(comparer) { MatchParents = true };
134    }
135
136    public override IDeepCloneable Clone(Cloner cloner) {
137      return new SymbolicDataAnalysisSchemaFrequencyAnalyzer(this, cloner);
138    }
139
140    [StorableConstructor]
141    protected SymbolicDataAnalysisSchemaFrequencyAnalyzer(bool deserializing) : base(deserializing) { }
142
143    [StorableHook(HookType.AfterDeserialization)]
144    private void AfterDeserialization() {
145      qm = new QueryMatch(comparer) { MatchParents = true };
146    }
147
148    public override IOperation Apply() {
149      if (PopulationGraph == null || Generation.Value == 0 ||
150          (Generation.Value > 1 && Generation.Value % UpdateInterval.Value != 0))
151        return base.Apply();
152
153      comparer.MatchVariableWeights = comparer.MatchConstantValues = StrictSchemaMatching;
154      phenotypicSimilarityCalculator.Interpreter = InterpreterParameter.ActualValue;
155      phenotypicSimilarityCalculator.ProblemData = ProblemDataParameter.ActualValue;
156      var generation = Generation.Value;
157
158      var population = PopulationGraph.Vertices.Where(x => x.InDegree == 2 && x.Rank > generation - 1).ToList();
159      var vertices = population.Where(x => x.InDegree == 2).OrderByDescending(x => x.Quality).ToList();
160      ResultCollection resultCollection;
161      if (Results.ContainsKey("Schema Frequencies")) {
162        resultCollection = (ResultCollection)Results["Schema Frequencies"].Value;
163      } else {
164        resultCollection = new ResultCollection();
165        var result = new Result("Schema Frequencies", resultCollection);
166        Results.Add(result);
167      }
168
169      var mostFrequentPerGeneration = new List<Tuple<string, double[]>>();
170
171      var trees = population.Select(x => x.Data).ToList();
172      var qualities = population.Select(x => x.Quality).ToList();
173      var genSimMatrix = new double[trees.Count, trees.Count];
174      var phenSimMatrix = new double[trees.Count, trees.Count];
175
176      for (int i = 0; i < trees.Count - 1; ++i) {
177        for (int j = i + 1; j < trees.Count; ++j) {
178          genSimMatrix[i, j] = double.NaN;
179          phenSimMatrix[i, j] = double.NaN;
180        }
181      }
182      var schemas = SchemaCreator.GenerateSchemas(vertices, MinimumSchemaLength, StrictSchemaMatching).ToList();
183      var schemaStrings = schemas.Select(x => x.Root.GetSubtree(0).GetSubtree(0).FormatToString(StrictSchemaMatching)).ToList();
184      int[][] matchingIndices;
185      if (ExecuteInParallel) {
186        matchingIndices = new int[schemas.Count][];
187        Parallel.For(0, schemas.Count, new ParallelOptions { MaxDegreeOfParallelism = MaximumDegreeOfParallelism }, i => {
188          var schema = schemas[i];
189          matchingIndices[i] = Enumerable.Range(0, trees.Count).Where(v => qm.Match(trees[v], schema)).ToArray();
190        });
191      } else {
192        matchingIndices = schemas.Select(x => Enumerable.Range(0, trees.Count).Where(v => qm.Match(trees[v], x)).ToArray()).ToArray();
193      }
194
195      var schemaStatistics = new List<Tuple<string, double[]>>();
196      var avgPopQuality = qualities.Average();
197
198      if (ExecuteInParallel) {
199        var locker = new object();
200        Parallel.For(0, schemas.Count, new ParallelOptions { MaxDegreeOfParallelism = MaximumDegreeOfParallelism }, i => {
201          var indices = matchingIndices[i];
202          if (indices.Length > 1) {
203            var avgSchemaQuality = indices.Average(x => qualities[x]);
204            var avgLength = indices.Average(x => trees[x].Length);
205            var avgGenSim = AverageSimilarity(indices, trees, genSimMatrix, genotypicSimilarityCalculator.CalculateSimilarity);
206            var avgPhenSim = AverageSimilarity(indices, trees, phenSimMatrix, phenotypicSimilarityCalculator.CalculateSimilarity);
207            var array = new[] { indices.Length, avgSchemaQuality, avgLength, avgGenSim, avgPhenSim, avgPopQuality };
208            var t = new Tuple<string, double[]>(schemaStrings[i], array);
209            lock (locker) {
210              schemaStatistics.Add(t);
211            }
212          }
213        });
214      } else {
215        for (int i = 0; i < schemas.Count; ++i) {
216          var indices = matchingIndices[i];
217          if (indices.Length > 1) {
218            var avgSchemaQuality = indices.Average(x => qualities[x]);
219            var avgLength = indices.Average(x => trees[x].Length);
220            var avgGenSim = AverageSimilarity(indices, trees, genSimMatrix, genotypicSimilarityCalculator.CalculateSimilarity);
221            var avgPhenSim = AverageSimilarity(indices, trees, phenSimMatrix, phenotypicSimilarityCalculator.CalculateSimilarity);
222            var array = new[] { indices.Length, avgSchemaQuality, avgLength, avgGenSim, avgPhenSim, avgPopQuality };
223            var t = new Tuple<string, double[]>(schemaStrings[i], array);
224            schemaStatistics.Add(t);
225          }
226        }
227      }
228
229      if (!schemaStatistics.Any()) return base.Apply(); // shouldn't ever happen
230      var columnNames = new[] { "Count", "Avg Quality", "Avg Length", "Avg Genotype Similarity", "Avg Phenotype Similarity", "Avg Population Quality" };
231      var mostFrequent = new DoubleMatrix(schemaStatistics.Count, schemaStatistics[0].Item2.Length) {
232        SortableView = true
233      };
234      schemaStatistics.Sort((a, b) => { if (a.Item2[0].Equals(b.Item2[0])) return b.Item2[1].CompareTo(a.Item2[1]); return b.Item2[0].CompareTo(a.Item2[0]); });
235      mostFrequentPerGeneration.Add(Tuple.Create(schemaStatistics[0].Item1, new[] { (double)generation }.Concat(schemaStatistics[0].Item2).ToArray()));
236      mostFrequent.RowNames = schemaStatistics.Select(x => x.Item1);
237      mostFrequent.ColumnNames = columnNames;
238      for (int i = 0; i < schemaStatistics.Count; ++i) {
239        var values = schemaStatistics[i].Item2;
240        for (int j = 0; j < values.Length; ++j) {
241          mostFrequent[i, j] = values[j];
242        }
243      }
244      resultCollection.Add(new Result("Generation " + generation + " Most Frequent Schemas", mostFrequent));
245
246      columnNames = new[] { "Generation", "Count", "Avg Quality", "Avg Length", "Avg Genotype Similarity", "Avg Phenotype Similarity", "Avg Population Quality" };
247      // sort according to quality, then count
248      schemaStatistics.Sort((a, b) => { if (a.Item2[1].Equals(b.Item2[1])) return b.Item2[0].CompareTo(a.Item2[0]); return b.Item2[1].CompareTo(a.Item2[1]); });
249      DoubleMatrix bestSchemasMatrix;
250      if (!resultCollection.ContainsKey("Best Schemas")) {
251        bestSchemasMatrix = new DoubleMatrix(1, 7);
252        bestSchemasMatrix.RowNames = new[] { schemaStatistics[0].Item1 };
253        bestSchemasMatrix.ColumnNames = columnNames;
254        var values = new[] { (double)generation }.Concat(schemaStatistics[0].Item2).ToArray();
255        for (int i = 0; i < values.Length; ++i)
256          bestSchemasMatrix[0, i] = values[i];
257        resultCollection.Add(new Result("Best Schemas", bestSchemasMatrix));
258      } else {
259        bestSchemasMatrix = (DoubleMatrix)resultCollection["Best Schemas"].Value;
260        resultCollection["Best Schemas"].Value = AddRow(bestSchemasMatrix, new[] { (double)generation }.Concat(schemaStatistics[0].Item2).ToArray(), schemaStatistics[0].Item1);
261      }
262
263      // sort according to count, then quality
264      schemaStatistics.Sort((a, b) => { if (a.Item2[0].Equals(b.Item2[0])) return b.Item2[1].CompareTo(a.Item2[1]); return b.Item2[0].CompareTo(a.Item2[0]); });
265      DoubleMatrix frequentSchemasMatrix;
266      if (!resultCollection.ContainsKey("Most Frequent Schemas")) {
267        frequentSchemasMatrix = new DoubleMatrix(1, 7);
268        frequentSchemasMatrix.RowNames = new[] { schemaStatistics[0].Item1 };
269        frequentSchemasMatrix.ColumnNames = columnNames;
270        var values = new[] { (double)generation }.Concat(schemaStatistics[0].Item2).ToArray();
271        for (int i = 0; i < values.Length; ++i)
272          frequentSchemasMatrix[0, i] = values[i];
273        resultCollection.Add(new Result("Most Frequent Schemas", frequentSchemasMatrix));
274      } else {
275        frequentSchemasMatrix = (DoubleMatrix)resultCollection["Most Frequent Schemas"].Value;
276        resultCollection["Most Frequent Schemas"].Value = AddRow(frequentSchemasMatrix, new[] { (double)generation }.Concat(schemaStatistics[0].Item2).ToArray(), schemaStatistics[0].Item1);
277      }
278      return base.Apply();
279    }
280
281    private static DoubleMatrix AddRow(DoubleMatrix m, double[] row, string rowName) {
282      if (row.Length != m.Columns)
283        throw new Exception("Row value count must match matrix column count.");
284      var x = new DoubleMatrix(m.Rows + 1, m.Columns);
285      x.RowNames = m.RowNames.Concat(new[] { rowName });
286      x.ColumnNames = m.ColumnNames;
287      for (int i = 0; i < m.Rows; ++i)
288        for (int j = 0; j < m.Columns; ++j)
289          x[i, j] = m[i, j];
290      for (int j = 0; j < m.Columns; ++j)
291        x[m.Rows, j] = row[j];
292      return x;
293    }
294
295    private static double AverageSimilarity(int[] indices, List<ISymbolicExpressionTree> trees, double[,] similarityMatrix, Func<ISymbolicExpressionTree, ISymbolicExpressionTree, double> similarityFunction) {
296      var agg = 0d;
297      int len = indices.Length;
298      var count = len * (len - 1) / 2d;
299      for (int i = 0; i < indices.Length - 1; ++i) {
300        var a = indices[i];
301        for (int j = i + 1; j < indices.Length; ++j) {
302          var b = indices[j];
303          if (double.IsNaN(similarityMatrix[a, b]))
304            similarityMatrix[a, b] = similarityFunction(trees[a], trees[b]);
305          agg += similarityMatrix[a, b];
306        }
307      }
308      return agg / count;
309    }
310  }
311}
Note: See TracBrowser for help on using the repository browser.