Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Analyzers/BuildingBlockAnalyzers/SymbolicDataAnalysisPoly10Analyzer.cs @ 16130

Last change on this file since 16130 was 12891, checked in by bburlacu, 9 years ago

#1772: Merge trunk changes. Remove dead code from the genealogy analyzer.

File size: 10.9 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2014 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Data;
28using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
29using HeuristicLab.Optimization;
30using HeuristicLab.Parameters;
31using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
32using DataRow = HeuristicLab.Analysis.DataRow;
33using DataTable = HeuristicLab.Analysis.DataTable;
34
35namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Analyzers.BuidingBlocks {
36  [Item("Poly-10 building blocks analyzer", "An analyzer which attempts to identify parts of the Poly-10 formula")]
37  [StorableClass]
38  public class SymbolicDataAnalysisPoly10Analyzer : SymbolicDataAnalysisAnalyzer {
39    private const string SymbolicDataAnalysisTreeInterpreterParameterName = "SymbolicExpressionTreeInterpreter";
40    private const string ProblemDataParameterName = "ProblemData";
41    private const string GenerationsParameterName = "Generations";
42    private const string PhenotypicSimilarityThresholdParameterName = "PhenotypicSimilarityThreshold";
43    private const string UpdateCounterParameterName = "UpdateCounter";
44    private const string UpdateIntervalParameterName = "UpdateInterval";
45    private const string BuildingBlocksFrequenciesTableName = "Building blocks frequencies";
46
47    // store evaluations of building blocks for phenotypic matching
48    private readonly Dictionary<string, List<double>> evaluationMap = new Dictionary<string, List<double>>();
49    private readonly Dictionary<string, ISymbolicExpressionTreeNode> fragmentMap = new Dictionary<string, ISymbolicExpressionTreeNode>();
50    private readonly Dictionary<string, string> prettyLabels = new Dictionary<string, string>();
51    private readonly SymbolicExpressionImporter importer = new SymbolicExpressionImporter();
52
53    #region Parameters
54    public IValueParameter<DoubleValue> PhenotypicSimilarityThresholdParameter {
55      get { return (IValueParameter<DoubleValue>)Parameters[PhenotypicSimilarityThresholdParameterName]; }
56    }
57
58    public ILookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter> SymbolicDataAnalysisTreeInterpreterParameter {
59      get { return (ILookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter>)Parameters[SymbolicDataAnalysisTreeInterpreterParameterName]; }
60    }
61
62    public ILookupParameter<IDataAnalysisProblemData> ProblemDataParameter {
63      get { return (ILookupParameter<IDataAnalysisProblemData>)Parameters[ProblemDataParameterName]; }
64    }
65
66    public ILookupParameter<IntValue> GenerationsParameter {
67      get { return (ILookupParameter<IntValue>)Parameters[GenerationsParameterName]; }
68    }
69
70    public ValueParameter<IntValue> UpdateCounterParameter {
71      get { return (ValueParameter<IntValue>)Parameters[UpdateCounterParameterName]; }
72    }
73
74    public ValueParameter<IntValue> UpdateIntervalParameter {
75      get { return (ValueParameter<IntValue>)Parameters[UpdateIntervalParameterName]; }
76    }
77    #endregion
78
79    #region Parameter properties
80    public double PhenotypicSimilarityThreshold {
81      get { return PhenotypicSimilarityThresholdParameter.Value.Value; }
82      set { PhenotypicSimilarityThresholdParameter.Value.Value = value; }
83    }
84
85    public int UpdateCounter {
86      get { return UpdateCounterParameter.Value.Value; }
87      set { UpdateCounterParameter.Value.Value = value; }
88    }
89
90    public int UpdateInterval {
91      get { return UpdateIntervalParameter.Value.Value; }
92      set { UpdateIntervalParameter.Value.Value = value; }
93    }
94    #endregion
95
96    public SymbolicDataAnalysisPoly10Analyzer() {
97      #region Add parameters
98      Parameters.Add(new LookupParameter<IDataAnalysisProblemData>(ProblemDataParameterName));
99      Parameters.Add(new LookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter>(SymbolicDataAnalysisTreeInterpreterParameterName));
100      Parameters.Add(new LookupParameter<IntValue>(GenerationsParameterName));
101      Parameters.Add(new ValueParameter<DoubleValue>(PhenotypicSimilarityThresholdParameterName, "The phenotypic similarity threshold", new DoubleValue(0.9)));
102      Parameters.Add(new ValueParameter<IntValue>(UpdateCounterParameterName, new IntValue(0)));
103      Parameters.Add(new ValueParameter<IntValue>(UpdateIntervalParameterName, new IntValue(1)));
104      #endregion
105    }
106
107    [StorableConstructor]
108    protected SymbolicDataAnalysisPoly10Analyzer(bool deserializing)
109      : base(deserializing) {
110    }
111
112    protected SymbolicDataAnalysisPoly10Analyzer(SymbolicDataAnalysisPoly10Analyzer original, Cloner cloner)
113      : base(original, cloner) {
114    }
115
116    public override IDeepCloneable Clone(Cloner cloner) {
117      return new SymbolicDataAnalysisPoly10Analyzer(this, cloner);
118    }
119
120    new public bool EnabledByDefault {
121      get { return false; }
122    }
123
124    public override IOperation Apply() {
125      #region Update counter & update interval
126      UpdateCounter++;
127      if (UpdateCounter != UpdateInterval) {
128        return base.Apply();
129      }
130      UpdateCounter = 0;
131      #endregion
132
133      int generations = GenerationsParameter.ActualValue.Value;
134      if (generations == 0)
135        InitializeBuildingBlockCollection();
136
137      var results = ResultCollectionParameter.ActualValue;
138      var trees = SymbolicExpressionTreeParameter.ActualValue;
139      var interpreter = (SymbolicDataAnalysisExpressionTreeLinearInterpreter)SymbolicDataAnalysisTreeInterpreterParameter.ActualValue;
140      var dataset = ProblemDataParameter.ActualValue.Dataset;
141      var rows = ProblemDataParameter.ActualValue.TrainingIndices.ToList();
142      var bbFrequencies = evaluationMap.Keys.ToDictionary(x => x, x => 0);
143
144      foreach (var key in evaluationMap.Keys) {
145        var bb = fragmentMap[key];
146        int len = bb.GetLength();
147        foreach (var t in trees) {
148          var root = t.Root.GetSubtree(0).GetSubtree(0);
149          var nodes = root.IterateNodesPrefix().Where(x => x.GetLength() > len).ToList();
150
151          for (int i = 0; i < nodes.Count; ++i) {
152            var s = nodes[i];
153            var values = interpreter.GetValues(s, dataset, rows);
154            OnlineCalculatorError error;
155            var r = OnlinePearsonsRCalculator.Calculate(values, evaluationMap[key], out error);
156            var r2 = error == OnlineCalculatorError.None ? r * r : double.NaN;
157            if (!double.IsNaN(r2) && r2 >= PhenotypicSimilarityThreshold) {
158              bbFrequencies[key]++;
159              i += s.GetLength();
160            }
161          }
162        }
163      }
164      var table = (DataTable)results[BuildingBlocksFrequenciesTableName].Value;
165      foreach (var pair in bbFrequencies) {
166        var formatter = new SymbolicExpressionTreeStringFormatter();
167        //        var label = formatter.Format(fragmentMap[pair.Key]) + "(" + prettyLabels[pair.Key] + ")";
168        var label = prettyLabels[pair.Key];
169        if (table.Rows.ContainsKey(label)) {
170          var row = table.Rows[label];
171          row.Values.Add(pair.Value);
172        }
173      }
174
175      return base.Apply();
176    }
177
178    private void InitializeBuildingBlockCollection() {
179      #region Add building blocks
180      // building blocks
181      const string x1 = "(variable 1 X1)";
182      const string x2 = "(variable 1 X2)";
183      const string x3 = "(variable 1 X3)";
184      const string x4 = "(variable 1 X4)";
185      const string x5 = "(variable 1 X5)";
186      const string x6 = "(variable 1 X6)";
187      const string x7 = "(variable 1 X7)";
188      // x8 is never used in the formula
189      // const string x8 = "(variable 1 X8)";
190      const string x9 = "(variable 1 X9)";
191      const string x10 = "(variable 1 X10)";
192      string s1 = String.Format("(* {0} {1})", x1, x2);
193      string s2 = String.Format("(* {0} {1})", x3, x4);
194      string s3 = String.Format("(* {0} {1})", x5, x6);
195      string s4 = String.Format("(* (* {0} {1}) {2})", x1, x7, x9);
196      string s5 = String.Format("(* (* {0} {1}) {2})", x3, x6, x10);
197      string s6 = String.Format("(+ {0} {1})", s1, s2); // x1x2 + x3x4
198      string s7 = String.Format("(+ {0} {1})", s1, s3); // x1x2 + x5x6
199      string s8 = String.Format("(+ {0} {1})", s2, s3); // x3x4 + x5x6
200      string s9 = String.Format("(+ (+ {0} {1}) {2})", s1, s2, s3); // x1x2 + x3x4 + x5x6
201      string s10 = String.Format("(+ (+ {0} {1}) {2})", s4, s5, s9); // x1x2 + x3x4 + x5x6 + x1x7x9 + x3x6x10
202      prettyLabels[s1] = "X1*X2";
203      prettyLabels[s2] = "X3*X4";
204      prettyLabels[s3] = "X5*X6";
205      prettyLabels[s4] = "X1*X7*X9";
206      prettyLabels[s5] = "X3*X6*X10";
207      prettyLabels[s6] = prettyLabels[s1] + " + " + prettyLabels[s2];
208      prettyLabels[s7] = prettyLabels[s1] + " + " + prettyLabels[s3];
209      prettyLabels[s8] = prettyLabels[s2] + " + " + prettyLabels[s3];
210      prettyLabels[s9] = prettyLabels[s1] + " + " + prettyLabels[s2] + " + " + prettyLabels[s3];
211      prettyLabels[s10] = prettyLabels[s9] + " + " + prettyLabels[s4] + " + " + prettyLabels[s5];
212      #endregion
213      var interpreter = SymbolicDataAnalysisTreeInterpreterParameter.ActualValue;
214      var dataset = ProblemDataParameter.ActualValue.Dataset;
215      var rows = ProblemDataParameter.ActualValue.TrainingIndices.ToList();
216
217      foreach (var s in new[] { s1, s2, s3, s4, s5, s6, s7, s8, s9, s10 }) {
218        if (evaluationMap.ContainsKey(s)) continue;
219        var t = importer.Import(s);
220        evaluationMap.Add(s, interpreter.GetSymbolicExpressionTreeValues(t, dataset, rows).ToList());
221        fragmentMap.Add(s, t.Root.GetSubtree(0).GetSubtree(0));
222      }
223
224      var results = ResultCollectionParameter.ActualValue;
225      DataTable table;
226      if (!results.ContainsKey(BuildingBlocksFrequenciesTableName)) {
227        table = new DataTable(BuildingBlocksFrequenciesTableName);
228        results.Add(new Result(BuildingBlocksFrequenciesTableName, table));
229      } else {
230        table = (DataTable)results[BuildingBlocksFrequenciesTableName].Value;
231      }
232      table.Rows.Clear();
233      foreach (var key in evaluationMap.Keys) {
234        table.Rows.Add(new DataRow(prettyLabels[key]) { VisualProperties = { StartIndexZero = true } });
235      }
236    }
237  }
238}
Note: See TracBrowser for help on using the repository browser.