Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Analyzers/BuildingBlockAnalyzers/SymbolicDataAnalysisPoly10Analyzer.cs @ 11493

Last change on this file since 11493 was 11493, checked in by bburlacu, 8 years ago

#1772: Improved the way the TraceCalculator handles mutations, worked on the SymbolicDataAnalysisPoly10Analyzer (analyzer that tries to identify building blocks for the Poly-10 problem by doing a semantic comparison).

File size: 10.6 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2014 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Data;
28using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
29using HeuristicLab.Optimization;
30using HeuristicLab.Parameters;
31using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
32using DataRow = HeuristicLab.Analysis.DataRow;
33using DataTable = HeuristicLab.Analysis.DataTable;
34
35namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Analyzers.BuidingBlocks {
36  [Item("Poly-10 building blocks analyzer", "An analyzer which attempts to identify parts of the Poly-10 formula")]
37  [StorableClass]
38  public class SymbolicDataAnalysisPoly10Analyzer : SymbolicDataAnalysisAnalyzer {
39    private const string SymbolicDataAnalysisTreeInterpreterParameterName = "SymbolicExpressionTreeInterpreter";
40    private const string ProblemDataParameterName = "ProblemData";
41    private const string GenerationsParameterName = "Generations";
42    private const string PhenotypicSimilarityThresholdParameterName = "PhenotypicSimilarityThreshold";
43    private const string UpdateCounterParameterName = "UpdateCounter";
44    private const string UpdateIntervalParameterName = "UpdateInterval";
45    private const string BuildingBlocksFrequenciesTableName = "Building blocks frequencies";
46
47
48    // store evaluations of building blocks for phenotypic matching
49    private readonly Dictionary<string, List<double>> evaluationMap = new Dictionary<string, List<double>>();
50    private readonly Dictionary<string, ISymbolicExpressionTreeNode> fragmentMap = new Dictionary<string, ISymbolicExpressionTreeNode>();
51    private readonly Dictionary<string, string> prettyLabels = new Dictionary<string, string>();
52    private readonly SymbolicExpressionImporter importer = new SymbolicExpressionImporter();
53
54    #region Parameters
55    public IValueParameter<DoubleValue> PhenotypicSimilarityThresholdParameter {
56      get { return (IValueParameter<DoubleValue>)Parameters[PhenotypicSimilarityThresholdParameterName]; }
57    }
58
59    public ILookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter> SymbolicDataAnalysisTreeInterpreterParameter {
60      get { return (ILookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter>)Parameters[SymbolicDataAnalysisTreeInterpreterParameterName]; }
61    }
62
63    public ILookupParameter<IDataAnalysisProblemData> ProblemDataParameter {
64      get { return (ILookupParameter<IDataAnalysisProblemData>)Parameters[ProblemDataParameterName]; }
65    }
66
67    public ILookupParameter<IntValue> GenerationsParameter {
68      get { return (ILookupParameter<IntValue>)Parameters[GenerationsParameterName]; }
69    }
70
71    public ValueParameter<IntValue> UpdateCounterParameter {
72      get { return (ValueParameter<IntValue>)Parameters[UpdateCounterParameterName]; }
73    }
74
75    public ValueParameter<IntValue> UpdateIntervalParameter {
76      get { return (ValueParameter<IntValue>)Parameters[UpdateIntervalParameterName]; }
77    }
78    #endregion
79
80    #region Parameter properties
81    public double PhenotypicSimilarityThreshold {
82      get { return PhenotypicSimilarityThresholdParameter.Value.Value; }
83      set { PhenotypicSimilarityThresholdParameter.Value.Value = value; }
84    }
85
86    public int UpdateCounter {
87      get { return UpdateCounterParameter.Value.Value; }
88      set { UpdateCounterParameter.Value.Value = value; }
89    }
90
91    public int UpdateInterval {
92      get { return UpdateIntervalParameter.Value.Value; }
93      set { UpdateIntervalParameter.Value.Value = value; }
94    }
95    #endregion
96
97    public SymbolicDataAnalysisPoly10Analyzer() {
98      #region Add parameters
99      Parameters.Add(new LookupParameter<IDataAnalysisProblemData>(ProblemDataParameterName));
100      Parameters.Add(new LookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter>(SymbolicDataAnalysisTreeInterpreterParameterName));
101      Parameters.Add(new LookupParameter<IntValue>(GenerationsParameterName));
102      Parameters.Add(new ValueParameter<DoubleValue>(PhenotypicSimilarityThresholdParameterName, "The phenotypic similarity threshold", new DoubleValue(0.9)));
103      Parameters.Add(new ValueParameter<IntValue>(UpdateCounterParameterName, new IntValue(0)));
104      Parameters.Add(new ValueParameter<IntValue>(UpdateIntervalParameterName, new IntValue(1)));
105      #endregion
106    }
107
108    [StorableConstructor]
109    protected SymbolicDataAnalysisPoly10Analyzer(bool deserializing)
110      : base(deserializing) {
111    }
112
113    protected SymbolicDataAnalysisPoly10Analyzer(SymbolicDataAnalysisPoly10Analyzer original, Cloner cloner)
114      : base(original, cloner) {
115    }
116
117    public override IDeepCloneable Clone(Cloner cloner) {
118      return new SymbolicDataAnalysisPoly10Analyzer(this, cloner);
119    }
120
121    new public bool EnabledByDefault {
122      get { return false; }
123    }
124
125    public override IOperation Apply() {
126      #region Update counter & update interval
127      UpdateCounter++;
128      if (UpdateCounter != UpdateInterval) {
129        return base.Apply();
130      }
131      UpdateCounter = 0;
132      #endregion
133
134      int generations = GenerationsParameter.ActualValue.Value;
135      if (generations == 0)
136        InitializeBuildingBlockCollection();
137
138      var results = ResultCollectionParameter.ActualValue;
139      var trees = SymbolicExpressionTreeParameter.ActualValue;
140      var interpreter = (SymbolicDataAnalysisExpressionTreeLinearInterpreter)SymbolicDataAnalysisTreeInterpreterParameter.ActualValue;
141      var dataset = ProblemDataParameter.ActualValue.Dataset;
142      var rows = ProblemDataParameter.ActualValue.TrainingIndices.ToList();
143      OnlineCalculatorError error;
144      Dictionary<string, int> bbFrequencies = evaluationMap.Keys.ToDictionary(x => x, x => 0);
145
146      foreach (var key in evaluationMap.Keys) {
147        var bb = fragmentMap[key];
148        int len = bb.GetLength();
149        foreach (var t in trees) {
150          var root = t.Root.GetSubtree(0).GetSubtree(0);
151          var nodes = root.IterateNodesPrefix().Where(x => x.GetLength() > len).ToList();
152
153          for (int i = 0; i < nodes.Count; ++i) {
154            var s = nodes[i];
155            var values = interpreter.GetValues(s, dataset, rows);
156            var r2 = OnlinePearsonsRSquaredCalculator.Calculate(values, evaluationMap[key], out error);
157            if (error == OnlineCalculatorError.None && r2 >= PhenotypicSimilarityThreshold) {
158              bbFrequencies[key]++;
159              i += s.GetLength();
160            }
161          }
162        }
163      }
164      var table = (DataTable)results[BuildingBlocksFrequenciesTableName].Value;
165      foreach (var pair in bbFrequencies) {
166        var label = prettyLabels[pair.Key];
167        table.Rows[label].Values.Add(pair.Value);
168      }
169
170      return base.Apply();
171    }
172
173    private void InitializeBuildingBlockCollection() {
174      #region Add building blocks
175      // building blocks
176      const string x1 = "(variable 1 X1)";
177      const string x2 = "(variable 1 X2)";
178      const string x3 = "(variable 1 X3)";
179      const string x4 = "(variable 1 X4)";
180      const string x5 = "(variable 1 X5)";
181      const string x6 = "(variable 1 X6)";
182      const string x7 = "(variable 1 X7)";
183      // x8 is never used in the formula
184      // const string x8 = "(variable 1 X8)";
185      const string x9 = "(variable 1 X9)";
186      const string x10 = "(variable 1 X10)";
187      string s1 = String.Format("(* {0} {1})", x1, x2);
188      string s2 = String.Format("(* {0} {1})", x3, x4);
189      string s3 = String.Format("(* {0} {1})", x5, x6);
190      string s4 = String.Format("(* (* {0} {1}) {2})", x1, x7, x9);
191      string s5 = String.Format("(* (* {0} {1}) {2})", x3, x6, x10);
192      string s6 = String.Format("(+ {0} {1})", s1, s2); // x1x2 + x3x4
193      string s7 = String.Format("(+ {0} {1})", s1, s3); // x1x2 + x5x6
194      string s8 = String.Format("(+ {0} {1})", s2, s3); // x3x4 + x5x6
195      string s9 = String.Format("(+ (+ {0} {1}) {2})", s1, s2, s3); // x1x2 + x3x4 + x5x6
196      string s10 = String.Format("(+ (+ {0} {1}) {2})", s4, s5, s9); // x1x2 + x3x4 + x5x6 + x1x7x9 + x3x6x10
197      prettyLabels[s1] = "X1*X2";
198      prettyLabels[s2] = "X3*X4";
199      prettyLabels[s3] = "X5*X6";
200      prettyLabels[s4] = "X1*X7*X9";
201      prettyLabels[s5] = "X3*X6*X10";
202      prettyLabels[s6] = prettyLabels[s1] + " + " + prettyLabels[s2];
203      prettyLabels[s7] = prettyLabels[s1] + " + " + prettyLabels[s3];
204      prettyLabels[s8] = prettyLabels[s2] + " + " + prettyLabels[s3];
205      prettyLabels[s9] = prettyLabels[s1] + " + " + prettyLabels[s2] + " + " + prettyLabels[s3];
206      prettyLabels[s10] = prettyLabels[s9] + " + " + prettyLabels[s4] + " + " + prettyLabels[s5];
207      #endregion
208      var interpreter = SymbolicDataAnalysisTreeInterpreterParameter.ActualValue;
209      var dataset = ProblemDataParameter.ActualValue.Dataset;
210      var rows = ProblemDataParameter.ActualValue.TrainingIndices.ToList();
211
212      foreach (var s in new[] { s1, s2, s2, s4, s5, s6, s7, s8, s9, s10 }) {
213        if (evaluationMap.ContainsKey(s)) continue;
214        var t = importer.Import(s);
215        evaluationMap.Add(s, interpreter.GetSymbolicExpressionTreeValues(t, dataset, rows).ToList());
216        fragmentMap.Add(s, t.Root.GetSubtree(0).GetSubtree(0));
217      }
218
219      var results = ResultCollectionParameter.ActualValue;
220      DataTable table;
221      if (!results.ContainsKey(BuildingBlocksFrequenciesTableName)) {
222        table = new DataTable(BuildingBlocksFrequenciesTableName);
223        results.Add(new Result(BuildingBlocksFrequenciesTableName, table));
224      } else {
225        table = (DataTable)results[BuildingBlocksFrequenciesTableName].Value;
226      }
227      table.Rows.Clear();
228      foreach (var key in evaluationMap.Keys) {
229        table.Rows.Add(new DataRow(prettyLabels[key]));
230      }
231    }
232  }
233}
Note: See TracBrowser for help on using the repository browser.