Free cookie consent management tool by TermsFeed Policy Generator

source: branches/1772_HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Analyzers/BuildingBlockAnalyzers/SymbolicDataAnalysisPoly10Analyzer.cs

Last change on this file was 17434, checked in by bburlacu, 5 years ago

#1772: Merge trunk changes and fix all errors and compilation warnings.

File size: 11.0 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2014 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using HEAL.Attic;
23using HeuristicLab.Common;
24using HeuristicLab.Core;
25using HeuristicLab.Data;
26using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
27using HeuristicLab.Optimization;
28using HeuristicLab.Parameters;
29using System;
30using System.Collections.Generic;
31using System.Linq;
32using DataRow = HeuristicLab.Analysis.DataRow;
33using DataTable = HeuristicLab.Analysis.DataTable;
34
35namespace HeuristicLab.Problems.DataAnalysis.Symbolic
36{
37  [Item("Poly-10 building blocks analyzer", "An analyzer which attempts to identify parts of the Poly-10 formula")]
38  [StorableType("FA93D06D-B7CE-428A-8B22-ACB9A2BCE3CB")]
39  public class SymbolicDataAnalysisPoly10Analyzer : SymbolicDataAnalysisAnalyzer
40  {
41    private const string SymbolicDataAnalysisTreeInterpreterParameterName = "SymbolicExpressionTreeInterpreter";
42    private const string ProblemDataParameterName = "ProblemData";
43    private const string GenerationsParameterName = "Generations";
44    private const string PhenotypicSimilarityThresholdParameterName = "PhenotypicSimilarityThreshold";
45    private const string UpdateCounterParameterName = "UpdateCounter";
46    private const string UpdateIntervalParameterName = "UpdateInterval";
47    private const string BuildingBlocksFrequenciesTableName = "Building blocks frequencies";
48
49    // store evaluations of building blocks for phenotypic matching
50    private readonly Dictionary<string, List<double>> evaluationMap = new Dictionary<string, List<double>>();
51    private readonly Dictionary<string, ISymbolicExpressionTreeNode> fragmentMap = new Dictionary<string, ISymbolicExpressionTreeNode>();
52    private readonly Dictionary<string, string> prettyLabels = new Dictionary<string, string>();
53    private readonly SymbolicExpressionImporter importer = new SymbolicExpressionImporter();
54
55    #region Parameters
56    public IValueParameter<DoubleValue> PhenotypicSimilarityThresholdParameter {
57      get { return (IValueParameter<DoubleValue>)Parameters[PhenotypicSimilarityThresholdParameterName]; }
58    }
59
60    public ILookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter> SymbolicDataAnalysisTreeInterpreterParameter {
61      get { return (ILookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter>)Parameters[SymbolicDataAnalysisTreeInterpreterParameterName]; }
62    }
63
64    public ILookupParameter<IDataAnalysisProblemData> ProblemDataParameter {
65      get { return (ILookupParameter<IDataAnalysisProblemData>)Parameters[ProblemDataParameterName]; }
66    }
67
68    public ILookupParameter<IntValue> GenerationsParameter {
69      get { return (ILookupParameter<IntValue>)Parameters[GenerationsParameterName]; }
70    }
71
72    public ValueParameter<IntValue> UpdateCounterParameter {
73      get { return (ValueParameter<IntValue>)Parameters[UpdateCounterParameterName]; }
74    }
75
76    public ValueParameter<IntValue> UpdateIntervalParameter {
77      get { return (ValueParameter<IntValue>)Parameters[UpdateIntervalParameterName]; }
78    }
79    #endregion
80
81    #region Parameter properties
82    public double PhenotypicSimilarityThreshold {
83      get { return PhenotypicSimilarityThresholdParameter.Value.Value; }
84      set { PhenotypicSimilarityThresholdParameter.Value.Value = value; }
85    }
86
87    public int UpdateCounter {
88      get { return UpdateCounterParameter.Value.Value; }
89      set { UpdateCounterParameter.Value.Value = value; }
90    }
91
92    public int UpdateInterval {
93      get { return UpdateIntervalParameter.Value.Value; }
94      set { UpdateIntervalParameter.Value.Value = value; }
95    }
96    #endregion
97
98    public SymbolicDataAnalysisPoly10Analyzer()
99    {
100      #region Add parameters
101      Parameters.Add(new LookupParameter<IDataAnalysisProblemData>(ProblemDataParameterName));
102      Parameters.Add(new LookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter>(SymbolicDataAnalysisTreeInterpreterParameterName));
103      Parameters.Add(new LookupParameter<IntValue>(GenerationsParameterName));
104      Parameters.Add(new ValueParameter<DoubleValue>(PhenotypicSimilarityThresholdParameterName, "The phenotypic similarity threshold", new DoubleValue(0.9)));
105      Parameters.Add(new ValueParameter<IntValue>(UpdateCounterParameterName, new IntValue(0)));
106      Parameters.Add(new ValueParameter<IntValue>(UpdateIntervalParameterName, new IntValue(1)));
107      #endregion
108    }
109
110    [StorableConstructor]
111    protected SymbolicDataAnalysisPoly10Analyzer(StorableConstructorFlag _) : base(_) { }
112
113    protected SymbolicDataAnalysisPoly10Analyzer(SymbolicDataAnalysisPoly10Analyzer original, Cloner cloner)
114      : base(original, cloner)
115    {
116    }
117
118    public override IDeepCloneable Clone(Cloner cloner)
119    {
120      return new SymbolicDataAnalysisPoly10Analyzer(this, cloner);
121    }
122
123    new public bool EnabledByDefault {
124      get { return false; }
125    }
126
127    public override IOperation Apply()
128    {
129      #region Update counter & update interval
130      UpdateCounter++;
131      if (UpdateCounter != UpdateInterval)
132      {
133        return base.Apply();
134      }
135      UpdateCounter = 0;
136      #endregion
137
138      int generations = GenerationsParameter.ActualValue.Value;
139      if (generations == 0)
140        InitializeBuildingBlockCollection();
141
142      var results = ResultCollectionParameter.ActualValue;
143      var trees = SymbolicExpressionTreeParameter.ActualValue;
144      var interpreter = (SymbolicDataAnalysisExpressionTreeLinearInterpreter)SymbolicDataAnalysisTreeInterpreterParameter.ActualValue;
145      var dataset = ProblemDataParameter.ActualValue.Dataset;
146      var rows = ProblemDataParameter.ActualValue.TrainingIndices.ToList();
147      var bbFrequencies = evaluationMap.Keys.ToDictionary(x => x, x => 0);
148
149      foreach (var key in evaluationMap.Keys)
150      {
151        var bb = fragmentMap[key];
152        int len = bb.GetLength();
153        foreach (var t in trees)
154        {
155          var root = t.Root.GetSubtree(0).GetSubtree(0);
156          var nodes = root.IterateNodesPrefix().Where(x => x.GetLength() > len).ToList();
157
158          for (int i = 0; i < nodes.Count; ++i)
159          {
160            var s = nodes[i];
161            var values = interpreter.GetValues(s, dataset, rows);
162            OnlineCalculatorError error;
163            var r = OnlinePearsonsRCalculator.Calculate(values, evaluationMap[key], out error);
164            var r2 = error == OnlineCalculatorError.None ? r * r : double.NaN;
165            if (!double.IsNaN(r2) && r2 >= PhenotypicSimilarityThreshold)
166            {
167              bbFrequencies[key]++;
168              i += s.GetLength();
169            }
170          }
171        }
172      }
173      var table = (DataTable)results[BuildingBlocksFrequenciesTableName].Value;
174      foreach (var pair in bbFrequencies)
175      {
176        var formatter = new SymbolicExpressionTreeStringFormatter();
177        //        var label = formatter.Format(fragmentMap[pair.Key]) + "(" + prettyLabels[pair.Key] + ")";
178        var label = prettyLabels[pair.Key];
179        if (table.Rows.ContainsKey(label))
180        {
181          var row = table.Rows[label];
182          row.Values.Add(pair.Value);
183        }
184      }
185
186      return base.Apply();
187    }
188
189    private void InitializeBuildingBlockCollection()
190    {
191      #region Add building blocks
192      // building blocks
193      const string x1 = "(variable 1 X1)";
194      const string x2 = "(variable 1 X2)";
195      const string x3 = "(variable 1 X3)";
196      const string x4 = "(variable 1 X4)";
197      const string x5 = "(variable 1 X5)";
198      const string x6 = "(variable 1 X6)";
199      const string x7 = "(variable 1 X7)";
200      // x8 is never used in the formula
201      // const string x8 = "(variable 1 X8)";
202      const string x9 = "(variable 1 X9)";
203      const string x10 = "(variable 1 X10)";
204      string s1 = String.Format("(* {0} {1})", x1, x2);
205      string s2 = String.Format("(* {0} {1})", x3, x4);
206      string s3 = String.Format("(* {0} {1})", x5, x6);
207      string s4 = String.Format("(* (* {0} {1}) {2})", x1, x7, x9);
208      string s5 = String.Format("(* (* {0} {1}) {2})", x3, x6, x10);
209      string s6 = String.Format("(+ {0} {1})", s1, s2); // x1x2 + x3x4
210      string s7 = String.Format("(+ {0} {1})", s1, s3); // x1x2 + x5x6
211      string s8 = String.Format("(+ {0} {1})", s2, s3); // x3x4 + x5x6
212      string s9 = String.Format("(+ (+ {0} {1}) {2})", s1, s2, s3); // x1x2 + x3x4 + x5x6
213      string s10 = String.Format("(+ (+ {0} {1}) {2})", s4, s5, s9); // x1x2 + x3x4 + x5x6 + x1x7x9 + x3x6x10
214      prettyLabels[s1] = "X1*X2";
215      prettyLabels[s2] = "X3*X4";
216      prettyLabels[s3] = "X5*X6";
217      prettyLabels[s4] = "X1*X7*X9";
218      prettyLabels[s5] = "X3*X6*X10";
219      prettyLabels[s6] = prettyLabels[s1] + " + " + prettyLabels[s2];
220      prettyLabels[s7] = prettyLabels[s1] + " + " + prettyLabels[s3];
221      prettyLabels[s8] = prettyLabels[s2] + " + " + prettyLabels[s3];
222      prettyLabels[s9] = prettyLabels[s1] + " + " + prettyLabels[s2] + " + " + prettyLabels[s3];
223      prettyLabels[s10] = prettyLabels[s9] + " + " + prettyLabels[s4] + " + " + prettyLabels[s5];
224      #endregion
225      var interpreter = SymbolicDataAnalysisTreeInterpreterParameter.ActualValue;
226      var dataset = ProblemDataParameter.ActualValue.Dataset;
227      var rows = ProblemDataParameter.ActualValue.TrainingIndices.ToList();
228
229      foreach (var s in new[] { s1, s2, s3, s4, s5, s6, s7, s8, s9, s10 })
230      {
231        if (evaluationMap.ContainsKey(s)) continue;
232        var t = importer.Import(s);
233        evaluationMap.Add(s, interpreter.GetSymbolicExpressionTreeValues(t, dataset, rows).ToList());
234        fragmentMap.Add(s, t.Root.GetSubtree(0).GetSubtree(0));
235      }
236
237      var results = ResultCollectionParameter.ActualValue;
238      DataTable table;
239      if (!results.ContainsKey(BuildingBlocksFrequenciesTableName))
240      {
241        table = new DataTable(BuildingBlocksFrequenciesTableName);
242        results.Add(new Result(BuildingBlocksFrequenciesTableName, table));
243      }
244      else
245      {
246        table = (DataTable)results[BuildingBlocksFrequenciesTableName].Value;
247      }
248      table.Rows.Clear();
249      foreach (var key in evaluationMap.Keys)
250      {
251        table.Rows.Add(new DataRow(prettyLabels[key]) { VisualProperties = { StartIndexZero = true } });
252      }
253    }
254  }
255}
Note: See TracBrowser for help on using the repository browser.