Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Analyzers/BuildingBlockAnalyzers/SymbolicDataAnalysisPoly10Analyzer.cs @ 12318

Last change on this file since 12318 was 12318, checked in by bburlacu, 9 years ago

#1772: Fixed small bug/typo in the SymbolicDataAnalysisPoly10Analyzer

File size: 11.2 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2014 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Data;
28using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
29using HeuristicLab.Optimization;
30using HeuristicLab.Parameters;
31using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
32using DataRow = HeuristicLab.Analysis.DataRow;
33using DataTable = HeuristicLab.Analysis.DataTable;
34
35namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Analyzers.BuidingBlocks {
36  [Item("Poly-10 building blocks analyzer", "An analyzer which attempts to identify parts of the Poly-10 formula")]
37  [StorableClass]
38  public class SymbolicDataAnalysisPoly10Analyzer : SymbolicDataAnalysisAnalyzer {
39    private const string SymbolicDataAnalysisTreeInterpreterParameterName = "SymbolicExpressionTreeInterpreter";
40    private const string ProblemDataParameterName = "ProblemData";
41    private const string GenerationsParameterName = "Generations";
42    private const string PhenotypicSimilarityThresholdParameterName = "PhenotypicSimilarityThreshold";
43    private const string UpdateCounterParameterName = "UpdateCounter";
44    private const string UpdateIntervalParameterName = "UpdateInterval";
45    private const string BuildingBlocksFrequenciesTableName = "Building blocks frequencies";
46
47    // store evaluations of building blocks for phenotypic matching
48    private readonly Dictionary<string, List<double>> evaluationMap = new Dictionary<string, List<double>>();
49    private readonly Dictionary<string, ISymbolicExpressionTreeNode> fragmentMap = new Dictionary<string, ISymbolicExpressionTreeNode>();
50    private readonly Dictionary<string, string> prettyLabels = new Dictionary<string, string>();
51    private readonly SymbolicExpressionImporter importer = new SymbolicExpressionImporter();
52
53    #region Parameters
54    public IValueParameter<DoubleValue> PhenotypicSimilarityThresholdParameter {
55      get { return (IValueParameter<DoubleValue>)Parameters[PhenotypicSimilarityThresholdParameterName]; }
56    }
57
58    public ILookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter> SymbolicDataAnalysisTreeInterpreterParameter {
59      get { return (ILookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter>)Parameters[SymbolicDataAnalysisTreeInterpreterParameterName]; }
60    }
61
62    public ILookupParameter<IDataAnalysisProblemData> ProblemDataParameter {
63      get { return (ILookupParameter<IDataAnalysisProblemData>)Parameters[ProblemDataParameterName]; }
64    }
65
66    public ILookupParameter<IntValue> GenerationsParameter {
67      get { return (ILookupParameter<IntValue>)Parameters[GenerationsParameterName]; }
68    }
69
70    public ValueParameter<IntValue> UpdateCounterParameter {
71      get { return (ValueParameter<IntValue>)Parameters[UpdateCounterParameterName]; }
72    }
73
74    public ValueParameter<IntValue> UpdateIntervalParameter {
75      get { return (ValueParameter<IntValue>)Parameters[UpdateIntervalParameterName]; }
76    }
77    #endregion
78
79    #region Parameter properties
80    public double PhenotypicSimilarityThreshold {
81      get { return PhenotypicSimilarityThresholdParameter.Value.Value; }
82      set { PhenotypicSimilarityThresholdParameter.Value.Value = value; }
83    }
84
85    public int UpdateCounter {
86      get { return UpdateCounterParameter.Value.Value; }
87      set { UpdateCounterParameter.Value.Value = value; }
88    }
89
90    public int UpdateInterval {
91      get { return UpdateIntervalParameter.Value.Value; }
92      set { UpdateIntervalParameter.Value.Value = value; }
93    }
94    #endregion
95
96    public SymbolicDataAnalysisPoly10Analyzer() {
97      #region Add parameters
98      Parameters.Add(new LookupParameter<IDataAnalysisProblemData>(ProblemDataParameterName));
99      Parameters.Add(new LookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter>(SymbolicDataAnalysisTreeInterpreterParameterName));
100      Parameters.Add(new LookupParameter<IntValue>(GenerationsParameterName));
101      Parameters.Add(new ValueParameter<DoubleValue>(PhenotypicSimilarityThresholdParameterName, "The phenotypic similarity threshold", new DoubleValue(0.9)));
102      Parameters.Add(new ValueParameter<IntValue>(UpdateCounterParameterName, new IntValue(0)));
103      Parameters.Add(new ValueParameter<IntValue>(UpdateIntervalParameterName, new IntValue(1)));
104      #endregion
105    }
106
107    [StorableConstructor]
108    protected SymbolicDataAnalysisPoly10Analyzer(bool deserializing)
109      : base(deserializing) {
110    }
111
112    protected SymbolicDataAnalysisPoly10Analyzer(SymbolicDataAnalysisPoly10Analyzer original, Cloner cloner)
113      : base(original, cloner) {
114    }
115
116    public override IDeepCloneable Clone(Cloner cloner) {
117      return new SymbolicDataAnalysisPoly10Analyzer(this, cloner);
118    }
119
120    new public bool EnabledByDefault {
121      get { return false; }
122    }
123
124    public override IOperation Apply() {
125      #region Update counter & update interval
126      UpdateCounter++;
127      if (UpdateCounter != UpdateInterval) {
128        return base.Apply();
129      }
130      UpdateCounter = 0;
131      #endregion
132
133      int generations = GenerationsParameter.ActualValue.Value;
134      if (generations == 0)
135        InitializeBuildingBlockCollection();
136
137      var results = ResultCollectionParameter.ActualValue;
138      var trees = SymbolicExpressionTreeParameter.ActualValue;
139      var interpreter = (SymbolicDataAnalysisExpressionTreeLinearInterpreter)SymbolicDataAnalysisTreeInterpreterParameter.ActualValue;
140      var dataset = ProblemDataParameter.ActualValue.Dataset;
141      var rows = ProblemDataParameter.ActualValue.TrainingIndices.ToList();
142      var bbFrequencies = evaluationMap.Keys.ToDictionary(x => x, x => 0);
143
144      foreach (var key in evaluationMap.Keys) {
145        var bb = fragmentMap[key];
146        int len = bb.GetLength();
147        foreach (var t in trees) {
148          var root = t.Root.GetSubtree(0).GetSubtree(0);
149          var nodes = root.IterateNodesPrefix().Where(x => x.GetLength() > len).ToList();
150
151          for (int i = 0; i < nodes.Count; ++i) {
152            var s = nodes[i];
153            var values = interpreter.GetValues(s, dataset, rows);
154            OnlineCalculatorError error;
155            var r2 = OnlinePearsonsRSquaredCalculator.Calculate(values, evaluationMap[key], out error);
156            if (error == OnlineCalculatorError.None && r2 >= PhenotypicSimilarityThreshold) {
157              bbFrequencies[key]++;
158              i += s.GetLength();
159            }
160          }
161        }
162      }
163      var table = (DataTable)results[BuildingBlocksFrequenciesTableName].Value;
164      double max = bbFrequencies.Max(x => x.Value);
165      foreach (var pair in bbFrequencies) {
166        var formatter = new SymbolicExpressionTreeStringFormatter();
167        //        var label = formatter.Format(fragmentMap[pair.Key]) + "(" + prettyLabels[pair.Key] + ")";
168        var label = prettyLabels[pair.Key];
169        if (table.Rows.ContainsKey(label)) {
170          var row = table.Rows[label];
171          // scale previous values according to the new max
172          //          if (row.Values.Count > 0) {
173          //            var values = new List<double>(row.Values);
174          //            var m = values.Max();
175          //            row.Values.Replace(values.Select(x => x * m / max));
176          //          }
177          row.Values.Add(max > 0 ? pair.Value / max : 0);
178        }
179      }
180
181      return base.Apply();
182    }
183
184    private void InitializeBuildingBlockCollection() {
185      #region Add building blocks
186      // building blocks
187      const string x1 = "(variable 1 X1)";
188      const string x2 = "(variable 1 X2)";
189      const string x3 = "(variable 1 X3)";
190      const string x4 = "(variable 1 X4)";
191      const string x5 = "(variable 1 X5)";
192      const string x6 = "(variable 1 X6)";
193      const string x7 = "(variable 1 X7)";
194      // x8 is never used in the formula
195      // const string x8 = "(variable 1 X8)";
196      const string x9 = "(variable 1 X9)";
197      const string x10 = "(variable 1 X10)";
198      string s1 = String.Format("(* {0} {1})", x1, x2);
199      string s2 = String.Format("(* {0} {1})", x3, x4);
200      string s3 = String.Format("(* {0} {1})", x5, x6);
201      string s4 = String.Format("(* (* {0} {1}) {2})", x1, x7, x9);
202      string s5 = String.Format("(* (* {0} {1}) {2})", x3, x6, x10);
203      string s6 = String.Format("(+ {0} {1})", s1, s2); // x1x2 + x3x4
204      string s7 = String.Format("(+ {0} {1})", s1, s3); // x1x2 + x5x6
205      string s8 = String.Format("(+ {0} {1})", s2, s3); // x3x4 + x5x6
206      string s9 = String.Format("(+ (+ {0} {1}) {2})", s1, s2, s3); // x1x2 + x3x4 + x5x6
207      string s10 = String.Format("(+ (+ {0} {1}) {2})", s4, s5, s9); // x1x2 + x3x4 + x5x6 + x1x7x9 + x3x6x10
208      prettyLabels[s1] = "X1*X2";
209      prettyLabels[s2] = "X3*X4";
210      prettyLabels[s3] = "X5*X6";
211      prettyLabels[s4] = "X1*X7*X9";
212      prettyLabels[s5] = "X3*X6*X10";
213      prettyLabels[s6] = prettyLabels[s1] + " + " + prettyLabels[s2];
214      prettyLabels[s7] = prettyLabels[s1] + " + " + prettyLabels[s3];
215      prettyLabels[s8] = prettyLabels[s2] + " + " + prettyLabels[s3];
216      prettyLabels[s9] = prettyLabels[s1] + " + " + prettyLabels[s2] + " + " + prettyLabels[s3];
217      prettyLabels[s10] = prettyLabels[s9] + " + " + prettyLabels[s4] + " + " + prettyLabels[s5];
218      #endregion
219      var interpreter = SymbolicDataAnalysisTreeInterpreterParameter.ActualValue;
220      var dataset = ProblemDataParameter.ActualValue.Dataset;
221      var rows = ProblemDataParameter.ActualValue.TrainingIndices.ToList();
222
223      foreach (var s in new[] { s1, s2, s3, s4, s5, s6, s7, s8, s9, s10 }) {
224        if (evaluationMap.ContainsKey(s)) continue;
225        var t = importer.Import(s);
226        evaluationMap.Add(s, interpreter.GetSymbolicExpressionTreeValues(t, dataset, rows).ToList());
227        fragmentMap.Add(s, t.Root.GetSubtree(0).GetSubtree(0));
228      }
229
230      var results = ResultCollectionParameter.ActualValue;
231      DataTable table;
232      if (!results.ContainsKey(BuildingBlocksFrequenciesTableName)) {
233        table = new DataTable(BuildingBlocksFrequenciesTableName);
234        results.Add(new Result(BuildingBlocksFrequenciesTableName, table));
235      } else {
236        table = (DataTable)results[BuildingBlocksFrequenciesTableName].Value;
237      }
238      table.Rows.Clear();
239      foreach (var key in evaluationMap.Keys) {
240        table.Rows.Add(new DataRow(prettyLabels[key]) { VisualProperties = { StartIndexZero = true } });
241      }
242    }
243  }
244}
Note: See TracBrowser for help on using the repository browser.