Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataAnalysis.PopulationDiversityAnalysis/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/GeneticInformationItem.cs @ 13806

Last change on this file since 13806 was 5024, checked in by swinkler, 14 years ago

Worked on population diversity for GP (#1278): Moved GeneticInformationItem class into separate file.

File size: 16.3 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using HeuristicLab.Core;
25using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
26using HeuristicLab.Optimization;
27using HeuristicLab.Parameters;
28using HeuristicLab.Problems.DataAnalysis.Symbolic.Symbols;
29using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Symbols;
30
31namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic.Analyzers {
32
33  public class GeneticInformationItem {
34
35    private Type myAncestorDefinition;
36    public Type AncestorDefinition {
37      get { return myAncestorDefinition; }
38    }
39
40    private int myAncestorIndex;
41    public int AncestorIndex {
42      get { return myAncestorIndex; }
43    }
44
45    private Type myDescendantDefinition;
46    public Type DescendantDefinition {
47      get { return myDescendantDefinition; }
48    }
49
50    private int myAncestorLevel;
51    public int AncestorLevel {
52      get { return myAncestorLevel; }
53    }
54
55    private double myDescendantCoefficient;
56    public double DescendantCoefficient {
57      get { return myDescendantCoefficient; }
58    }
59
60    private double myDescendantVariableIndex;
61    public double DescendantVariableIndex {
62      get { return myDescendantVariableIndex; }
63    }
64
65    private int myDescendantTimeOffset;
66    public int DescendantTimeOffset {
67      get { return myDescendantTimeOffset; }
68    }
69
70    private SymbolicExpressionTreeNode myDescendantTreeNode;
71    public SymbolicExpressionTreeNode DescendantTreeNode {
72      get { return myDescendantTreeNode; }
73    }
74
75    private int myDescendantLevel;
76    public int DescendantLevel {
77      get { return myDescendantLevel; }
78    }
79
80    public int LevelDelta {
81      get { return (myDescendantLevel - myAncestorLevel); }
82    }
83
84    public GeneticInformationItem(SymbolicExpressionTreeNode parentNode, GeneticInformationItem descendantGeneticInformationItem,
85        int ancestorIndex, int parentNodeLevel) {
86      myAncestorIndex = ancestorIndex;
87      myAncestorLevel = parentNodeLevel;
88      myAncestorDefinition = parentNode.Symbol.GetType();
89      myDescendantCoefficient = descendantGeneticInformationItem.DescendantCoefficient;
90      myDescendantDefinition = descendantGeneticInformationItem.DescendantDefinition;
91      myDescendantTimeOffset = descendantGeneticInformationItem.DescendantTimeOffset;
92      myDescendantVariableIndex = descendantGeneticInformationItem.DescendantVariableIndex;
93      myDescendantLevel = descendantGeneticInformationItem.DescendantLevel;
94      myDescendantTreeNode = descendantGeneticInformationItem.DescendantTreeNode;
95    }
96
97    public static IList<GeneticInformationItem> CopyList(IList<GeneticInformationItem> geneticInformationItemsList) {
98      List<GeneticInformationItem> list = new List<GeneticInformationItem>(geneticInformationItemsList.Count);
99      list.AddRange(geneticInformationItemsList);
100      return list;
101    }
102
103    public static string GetKey(GeneticInformationItem item) {
104      return item.AncestorDefinition.Name.ToString() + "," + item.DescendantDefinition.Name.ToString();
105    }
106
107    public static IDictionary<string, IList<GeneticInformationItem>> GetDictionary(IList<GeneticInformationItem> geneticInformationItemsList) {
108      IDictionary<string, IList<GeneticInformationItem>> dictionary = new Dictionary<string, IList<GeneticInformationItem>>();
109      foreach (GeneticInformationItem item in geneticInformationItemsList) {
110        string key = GetKey(item);
111        if (!dictionary.ContainsKey(key))
112          dictionary.Add(key, new List<GeneticInformationItem>());
113        dictionary[key].Add(item);
114      }
115      return dictionary;
116    }
117
118    public static IDictionary<string, IList<GeneticInformationItem>> CopyDictionary(IDictionary<string, IList<GeneticInformationItem>> dictionary) {
119      IDictionary<string, IList<GeneticInformationItem>> copy = new Dictionary<string, IList<GeneticInformationItem>>();
120      foreach (KeyValuePair<string, IList<GeneticInformationItem>> pair in dictionary) {
121        copy.Add(pair.Key, CopyList(pair.Value));
122      }
123      return copy;
124    }
125
126    public static GeneticInformationItem FindBestPendant(GeneticInformationItem item, IList<GeneticInformationItem> comparisonItems,
127        double constantMinimum, double constantMaximum, double variableWeightSigma,
128        int maximumTreeHeight, int minimumTimeOffset, int maximumTimeOffset,
129        double levelDifferenceCoefficient, double ancestorIndexCoefficient,
130        double constantValueCoefficient, double variableWeightCoefficient, double timeOffsetCoefficient, double variableIndexCoefficient,
131        bool additiveSimilarityCalculation,
132        out double bestPendantSimilarity) {
133      int maxSimilarityIndex = -1;
134      double similarity, maxSimilarity = -double.MaxValue;
135      for (int i = 0; i < comparisonItems.Count; i++) {
136        similarity = Similarity(item, comparisonItems[i], constantMinimum, constantMaximum, variableWeightSigma, maximumTreeHeight, minimumTimeOffset, maximumTimeOffset,
137          levelDifferenceCoefficient, ancestorIndexCoefficient, constantValueCoefficient, variableWeightSigma, timeOffsetCoefficient,
138          variableWeightCoefficient, additiveSimilarityCalculation);
139        if (!double.IsNaN(similarity) && similarity > maxSimilarity) {
140          maxSimilarity = similarity;
141          maxSimilarityIndex = i;
142        }
143      }
144      bestPendantSimilarity = maxSimilarity;
145      if (maxSimilarityIndex >= 0)
146        return comparisonItems[maxSimilarityIndex];
147      else
148        return null;
149    }
150
151    public static double Similarity(GeneticInformationItem item1, GeneticInformationItem item2,
152        double constantMinimum, double constantMaximum, double variableWeightSigma,
153        int maximumTreeHeight, int minimumTimeOffset, int maximumTimeOffset,
154        double levelDifferenceCoefficient, double ancestorIndexCoefficient,
155        double constantValueCoefficient, double variableWeightCoefficient, double timeOffsetCoefficient, double variableIndexCoefficient,
156        bool additiveSimilarityCalculation) {
157
158      if (item1.AncestorDefinition != item2.AncestorDefinition || item1.DescendantDefinition != item2.DescendantDefinition)
159        return double.NaN;
160
161      // the two items for sure have the same behavior definitions
162
163      #region initialize punishments
164
165      double punishmentContributionSum = 0;
166      double punishmentCoefficientsProduct = 1;
167
168      double ancestorIndexDifferencePunishment = 0;
169      double levelDifferencePunishment = 0;
170
171      double descendantConstantValueDifferencePunishment = 0;
172      double descendantVariableWeightDifferencePunishment = 0;
173      double descendantTimeOffsetDifferencePunishment = 0;
174      double descendantVariableIndexDifferencePunishment = 0;
175
176      #endregion
177
178      if (levelDifferenceCoefficient > 0) {
179        levelDifferencePunishment = item1.LevelDelta - item2.LevelDelta;
180        if (levelDifferencePunishment < 0)
181          levelDifferencePunishment = -levelDifferencePunishment;
182        levelDifferencePunishment /= maximumTreeHeight;
183        if (levelDifferencePunishment > 1)
184          levelDifferencePunishment = 1;
185        levelDifferencePunishment *= levelDifferenceCoefficient;
186        punishmentContributionSum += levelDifferenceCoefficient;
187        punishmentCoefficientsProduct *= (1 - levelDifferenceCoefficient);
188      }
189      if (ancestorIndexCoefficient > 0) {
190        if (item1.AncestorIndex != item2.AncestorIndex)
191          ancestorIndexDifferencePunishment = 1;
192        else
193          ancestorIndexDifferencePunishment = 0;
194        ancestorIndexDifferencePunishment *= ancestorIndexCoefficient;
195        punishmentContributionSum += ancestorIndexCoefficient;
196        punishmentCoefficientsProduct *= (1 - ancestorIndexCoefficient);
197      }
198
199      if (item1.DescendantTreeNode is ConstantTreeNode) {
200        if (constantValueCoefficient > 0) {
201          double constantValueCoefficientDifference = Math.Abs(item1.DescendantCoefficient - item2.DescendantCoefficient);
202          // assume uniform distribution within given minimum and maximum
203          descendantConstantValueDifferencePunishment = (constantValueCoefficientDifference / (constantMaximum - constantMinimum));
204          if (descendantConstantValueDifferencePunishment > 1)
205            descendantConstantValueDifferencePunishment = 1;
206          descendantConstantValueDifferencePunishment *= constantValueCoefficient;
207          punishmentContributionSum += constantValueCoefficient;
208          punishmentCoefficientsProduct *= (1 - constantValueCoefficient);
209        }
210      }
211      if (item1.DescendantTreeNode is VariableTreeNode) {
212        if (variableWeightCoefficient > 0) {
213          double variableWeightDifference = Math.Abs(item1.DescendantCoefficient - item2.DescendantCoefficient);
214          // assume normal distribution within given sigma
215          descendantVariableWeightDifferencePunishment = variableWeightDifference / (variableWeightSigma * 4);
216          if (descendantVariableWeightDifferencePunishment > 1)
217            descendantVariableWeightDifferencePunishment = 1;
218          descendantVariableWeightDifferencePunishment *= variableWeightCoefficient;
219          punishmentContributionSum += variableWeightCoefficient;
220          punishmentCoefficientsProduct *= (1 - variableWeightCoefficient);
221        }
222        if (timeOffsetCoefficient > 0) {
223          double timeOffsetDifference = Math.Abs(item1.DescendantTimeOffset - item2.DescendantTimeOffset);
224          if (maximumTimeOffset > 0)
225            descendantTimeOffsetDifferencePunishment = timeOffsetDifference / (maximumTimeOffset - minimumTimeOffset);
226          descendantTimeOffsetDifferencePunishment *= timeOffsetCoefficient;
227          punishmentContributionSum += timeOffsetCoefficient;
228          punishmentCoefficientsProduct *= (1 - timeOffsetCoefficient);
229        }
230        if (variableIndexCoefficient > 0) {
231          if (item1.DescendantVariableIndex != item2.DescendantVariableIndex)
232            descendantVariableIndexDifferencePunishment = 1;
233          else
234            descendantVariableIndexDifferencePunishment = 0;
235          descendantVariableIndexDifferencePunishment *= variableIndexCoefficient;
236          punishmentContributionSum += variableIndexCoefficient;
237          punishmentCoefficientsProduct *= (1 - variableIndexCoefficient);
238        }
239      }
240
241      double result;
242
243      if (additiveSimilarityCalculation) {
244        double punishmentsSum = ancestorIndexDifferencePunishment + levelDifferencePunishment +
245          descendantConstantValueDifferencePunishment + descendantVariableWeightDifferencePunishment +
246          descendantTimeOffsetDifferencePunishment + descendantVariableIndexDifferencePunishment;
247        result = (1 - punishmentsSum / punishmentContributionSum);
248      } else {
249        result =
250          (1 - ancestorIndexDifferencePunishment) *
251          (1 - levelDifferencePunishment) *
252          (1 - descendantConstantValueDifferencePunishment) *
253          (1 - descendantVariableWeightDifferencePunishment) *
254          (1 - descendantTimeOffsetDifferencePunishment) *
255          (1 - descendantVariableIndexDifferencePunishment);
256        // worst possible result is (1-punishmentCoefficientsProduct), so scale linearly to [0;1]:
257        result = (result - punishmentCoefficientsProduct) / (1 - punishmentCoefficientsProduct);
258      }
259
260      if (result < 0 || result > 1)
261        throw new InvalidOperationException("ERROR in GeneticInformationItem.Similarity: An invalid similarity value (" + result.ToString() + ") has been calculated.");
262
263      return result;
264
265    }
266
267    public static IList<GeneticInformationItem> GetGeneticInformationItems(SymbolicExpressionTreeNode node, List<string> variableNames,
268        int MinimumLevelDelta, int MaximumLevelDelta) {
269      // first we have to collect all items, then we filter; it is not possible to filter while collecting because the items are
270      // collected recursively and used for collecting the parents' items.
271      if (MinimumLevelDelta > MaximumLevelDelta)
272        return new List<GeneticInformationItem>();
273      IList<GeneticInformationItem> list = GetGeneticInformationItems(node, variableNames, 0);
274      List<GeneticInformationItem> resultList = new List<GeneticInformationItem>();
275      foreach (GeneticInformationItem item in list)
276        if (item.LevelDelta >= MinimumLevelDelta && item.LevelDelta <= MaximumLevelDelta)
277          resultList.Add(item);
278      return resultList;
279    }
280
281    private static IList<GeneticInformationItem> GetGeneticInformationItems(SymbolicExpressionTreeNode node, List<string> variableNames, int level) {
282      // Idea: collect all descendants' lists and then add new items using the retrieved ones.
283      // This should save lots of time and reduce complexity of the items retrieval process.
284      // Program roots are not considered, neither are start symbol nodes
285      if (node.Symbol is ProgramRootSymbol)
286        return GetGeneticInformationItems(node.SubTrees[0], variableNames, level + 1);
287      List<GeneticInformationItem> list = new List<GeneticInformationItem>();
288      // add item for this node:
289      if (!(node.Symbol is StartSymbol)) {
290        list.Add(new GeneticInformationItem(node, variableNames, level));
291      }
292      // add items for the descendants, but prevent multiple references to descendant nodes:
293      List<SymbolicExpressionTreeNode> descendantNodes = new List<SymbolicExpressionTreeNode>();
294      for (int i = 0; i < node.SubTrees.Count; i++) {
295        IList<GeneticInformationItem> descendantItems = GetGeneticInformationItems(node.SubTrees[i], variableNames, level + 1);
296        list.AddRange(descendantItems);
297        if (!(node.Symbol is StartSymbol))
298          foreach (GeneticInformationItem item in descendantItems) {
299            if (!descendantNodes.Contains(item.DescendantTreeNode)) {
300              list.Add(new GeneticInformationItem(node, item, i, level));
301              descendantNodes.Add(item.DescendantTreeNode);
302            }
303          }
304      }
305      return list;
306    }
307
308    private GeneticInformationItem(SymbolicExpressionTreeNode node, List<string> variableNames, int level) {
309      myAncestorIndex = -1;
310      VariableTreeNode variableTreeNode = node as VariableTreeNode;
311      LaggedVariableTreeNode laggedVariableTreeNode = node as LaggedVariableTreeNode;
312      ConstantTreeNode constantTreeNode = node as ConstantTreeNode;
313      myAncestorDefinition = node.Symbol.GetType();
314      myDescendantDefinition = myAncestorDefinition;
315      if (variableTreeNode != null)
316        myDescendantCoefficient = variableTreeNode.Weight;
317      else if (constantTreeNode != null)
318        myDescendantCoefficient = constantTreeNode.Value;
319      else
320        myDescendantCoefficient = double.NaN;
321      if (laggedVariableTreeNode != null)
322        myDescendantTimeOffset = laggedVariableTreeNode.Lag;
323      else
324        myDescendantTimeOffset = 0;
325      if (variableTreeNode != null)
326        myDescendantVariableIndex = variableNames.IndexOf(variableTreeNode.VariableName);
327      else
328        myDescendantVariableIndex = -1;
329      myAncestorLevel = level;
330      myDescendantLevel = level;
331      myDescendantTreeNode = node;
332    }
333
334    public override string ToString() {
335      return "ancestor: " + AncestorDefinition.Name.ToString() + ", [" + AncestorIndex + "]; "
336        + "descendant: " + DescendantDefinition.Name.ToString() + " (varIndex " + DescendantVariableIndex + ", "
337        + DescendantCoefficient + ", t-" + DescendantTimeOffset + ");"
338        + " level delta = " + DescendantLevel + "-" + AncestorLevel + " = " + LevelDelta;
339    }
340
341  }
342
343}
Note: See TracBrowser for help on using the repository browser.