source: branches/DataAnalysis.PopulationDiversityAnalysis/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/FineGrainedStructuralPopulationDiversityAnalyzer.cs @ 5024

Last change on this file since 5024 was 5024, checked in by swinkler, 12 years ago

Worked on population diversity for GP (#1278): Moved GeneticInformationItem class into separate file.

File size: 13.5 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using HeuristicLab.Analysis;
24using HeuristicLab.Common;
25using HeuristicLab.Core;
26using HeuristicLab.Data;
27using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
28using HeuristicLab.Operators;
29using HeuristicLab.Optimization;
30using HeuristicLab.Optimization.Operators;
31using HeuristicLab.Parameters;
32using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
33using HeuristicLab.Problems.DataAnalysis.Symbolic;
34using System.Collections.Generic;
35using HeuristicLab.Problems.DataAnalysis.Symbolic.Symbols;
36using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Symbols;
37
38namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic.Analyzers {
39  /// <summary>
40  /// An operator that analyzes the population diversity using fine grained structural tree similarity estimation.
41  /// </summary>
42  [Item("FineGrainedStructuralPopulationDiversityAnalyzer", "An operator that analyzes the population diversity using fine grained structural tree similarity estimation.")]
43  [StorableClass]
44  public sealed class FineGrainedStructuralPopulationDiversityAnalyzer : SymbolicRegressionPopulationDiversityAnalyzer {
45
46    #region Properties and Parameters
47
48    private const string FunctionTreeGrammarParameterName = "FunctionTreeGrammar";
49    private const string MinimumLevelDeltaParameterName = "MinimumLevelDelta";
50    private const string MaximumLevelDeltaParameterName = "MaximumLevelDelta";
51    private const string PreventMultipleComparisonContributionParameterName = "PreventMultipleComparisonContribution";
52    private const string MaximumExpressionDepthParameterName = "MaxExpressionDepth";
53    private const string LevelDifferenceCoefficientParameterName = "LevelDifferenceCoefficient";
54    private const string AncestorIndexCoefficientParameterName = "AncestorIndexCoefficient";
55    private const string ConstantValueCoefficientParameterName = "ConstantValueCoefficient";
56    private const string VariableWeightCoefficientParameterName = "VariableWeightCoefficient";
57    private const string TimeOffsetCoefficientParameterName = "TimeOffsetCoefficient";
58    private const string VariableIndexCoefficientParameterName = "VariableIndexCoefficient";
59    private const string AdditiveSimilarityCalculationParameterName = "AdditiveSimilarityCalculation";
60
61    public IValueLookupParameter<GlobalSymbolicExpressionGrammar> FunctionTreeGrammarParameter {
62      get { return (IValueLookupParameter<GlobalSymbolicExpressionGrammar>)Parameters[FunctionTreeGrammarParameterName]; }
63    }
64    public GlobalSymbolicExpressionGrammar FunctionTreeGrammar {
65      get { return FunctionTreeGrammarParameter.ActualValue; }
66    }
67    public IValueLookupParameter<IntValue> MaximumExpressionDepthParameter {
68      get { return (IValueLookupParameter<IntValue>)Parameters[MaximumExpressionDepthParameterName]; }
69    }
70    public int MaximumExpressionDepth {
71      get { return MaximumExpressionDepthParameter.ActualValue.Value; }
72    }
73
74    public IValueParameter<IntValue> MinimumLevelDeltaParameter {
75      get { return (IValueParameter<IntValue>)Parameters[MinimumLevelDeltaParameterName]; }
76    }
77    public int MinimumLevelDelta {
78      get { return MinimumLevelDeltaParameter.Value.Value; }
79    }
80    public IValueParameter<IntValue> MaximumLevelDeltaParameter {
81      get { return (IValueParameter<IntValue>)Parameters[MaximumLevelDeltaParameterName]; }
82    }
83    public int MaximumLevelDelta {
84      get { return MaximumLevelDeltaParameter.Value.Value; }
85    }
86    public IValueParameter<BoolValue> PreventMultipleComparisonContributionParameter {
87      get { return (IValueParameter<BoolValue>)Parameters[PreventMultipleComparisonContributionParameterName]; }
88    }
89    public bool PreventMultipleComparisonContribution {
90      get { return PreventMultipleComparisonContributionParameter.Value.Value; }
91    }
92
93    public IValueParameter<DoubleValue> LevelDifferenceCoefficientParameter {
94      get { return (IValueParameter<DoubleValue>)Parameters[LevelDifferenceCoefficientParameterName]; }
95    }
96    public double LevelDifferenceCoefficient {
97      get { return LevelDifferenceCoefficientParameter.Value.Value; }
98    }
99    public IValueParameter<DoubleValue> AncestorIndexCoefficientParameter {
100      get { return (IValueParameter<DoubleValue>)Parameters[AncestorIndexCoefficientParameterName]; }
101    }
102    public double AncestorIndexCoefficient {
103      get { return AncestorIndexCoefficientParameter.Value.Value; }
104    }
105    public IValueParameter<DoubleValue> ConstantValueCoefficientParameter {
106      get { return (IValueParameter<DoubleValue>)Parameters[ConstantValueCoefficientParameterName]; }
107    }
108    public double ConstantValueCoefficient {
109      get { return ConstantValueCoefficientParameter.Value.Value; }
110    }
111    public IValueParameter<DoubleValue> VariableWeightCoefficientParameter {
112      get { return (IValueParameter<DoubleValue>)Parameters[VariableWeightCoefficientParameterName]; }
113    }
114    public double VariableWeightCoefficient {
115      get { return VariableWeightCoefficientParameter.Value.Value; }
116    }
117    public IValueParameter<DoubleValue> TimeOffsetCoefficientParameter {
118      get { return (IValueParameter<DoubleValue>)Parameters[TimeOffsetCoefficientParameterName]; }
119    }
120    public double TimeOffsetCoefficientCoefficient {
121      get { return TimeOffsetCoefficientParameter.Value.Value; }
122    }
123    public IValueParameter<DoubleValue> VariableIndexCoefficientParameter {
124      get { return (IValueParameter<DoubleValue>)Parameters[VariableIndexCoefficientParameterName]; }
125    }
126    public double VariableIndexCoefficient {
127      get { return VariableIndexCoefficientParameter.Value.Value; }
128    }
129    public IValueParameter<BoolValue> AdditiveSimilarityCalculationParameter {
130      get { return (IValueParameter<BoolValue>)Parameters[AdditiveSimilarityCalculationParameterName]; }
131    }
132    public bool AdditiveSimilarityCalculation {
133      get { return AdditiveSimilarityCalculationParameter.Value.Value; }
134    }
135
136    #endregion
137
138    [StorableConstructor]
139    private FineGrainedStructuralPopulationDiversityAnalyzer(bool deserializing) : base(deserializing) { }
140    private FineGrainedStructuralPopulationDiversityAnalyzer(FineGrainedStructuralPopulationDiversityAnalyzer original, Cloner cloner) : base(original, cloner) { }
141    public FineGrainedStructuralPopulationDiversityAnalyzer() : base() {
142      Parameters.Add(new ValueLookupParameter<GlobalSymbolicExpressionGrammar>(FunctionTreeGrammarParameterName, "The grammar that is used for symbolic regression models."));
143      Parameters.Add(new ValueLookupParameter<IntValue>(MaximumExpressionDepthParameterName, "Maximal depth of the analyzed symbolic expressions."));
144      Parameters.Add(new ValueParameter<IntValue>(MinimumLevelDeltaParameterName, "Minimum value for the level delta of the analyzed genetic information items.", new IntValue(0)));
145      Parameters.Add(new ValueParameter<IntValue>(MaximumLevelDeltaParameterName, "Maximum value for the level delta of the analyzed genetic information items.", new IntValue(int.MaxValue)));
146      Parameters.Add(new ValueParameter<BoolValue>(PreventMultipleComparisonContributionParameterName, "Flag that denotes whether genetic information items are hindered from contributing to the similarity function multiple times.", new BoolValue(false)));
147      Parameters.Add(new ValueParameter<DoubleValue>(LevelDifferenceCoefficientParameterName, "Weighting coefficient for level differences.", new DoubleValue(0.2)));
148      Parameters.Add(new ValueParameter<DoubleValue>(AncestorIndexCoefficientParameterName, "Weighting coefficient for ancestor index differences.", new DoubleValue(0.2)));
149      Parameters.Add(new ValueParameter<DoubleValue>(ConstantValueCoefficientParameterName, "Weighting coefficient for constant value differences.", new DoubleValue(0.2)));
150      Parameters.Add(new ValueParameter<DoubleValue>(VariableWeightCoefficientParameterName, "Weighting coefficient for variable weight differences.", new DoubleValue(0.2)));
151      Parameters.Add(new ValueParameter<DoubleValue>(TimeOffsetCoefficientParameterName, "Weighting coefficient for time lag differences.", new DoubleValue(0.2)));
152      Parameters.Add(new ValueParameter<DoubleValue>(VariableIndexCoefficientParameterName, "Weighting coefficient for variable index differences.", new DoubleValue(0.2)));
153      Parameters.Add(new ValueParameter<BoolValue>(AdditiveSimilarityCalculationParameterName, "Flag that denotes whether the similarity of genetic information items shall be calculated using additive calculation.", new BoolValue(true)));
154    }
155
156    public override IDeepCloneable Clone(Cloner cloner) {
157      return new FineGrainedStructuralPopulationDiversityAnalyzer(this, cloner);
158    }
159
160    protected override double[,] CalculateSimilarities(SymbolicExpressionTree[] solutions) {
161      // collect information stored int the problem's parameters
162      double variableWeightSigma = 0;
163      double constantMinimumValue = 0;
164      double constantMaximumValue = 0;
165      int minimumTimeOffset = 0;
166      int maximumTimeOffset = 0;
167      foreach (Symbol symbol in FunctionTreeGrammar.Symbols) {
168        Constant constant = symbol as Constant;
169        if (constant !=null) {
170          constantMinimumValue = constant.MinValue;
171          constantMaximumValue = constant.MaxValue;
172        }
173        DataAnalysis.Symbolic.Symbols.Variable variable = symbol as DataAnalysis.Symbolic.Symbols.Variable;
174        if (variable != null)
175          variableWeightSigma = variable.WeightSigma;
176        LaggedVariable laggedVariable = symbol as LaggedVariable;
177        if (laggedVariable !=null) {
178          minimumTimeOffset = laggedVariable.MinLag;
179          maximumTimeOffset = laggedVariable.MaxLag;
180        }
181      }
182      int n = solutions.Length;
183      List<string> variableNames = new List<string>();
184      foreach (StringValue variableName in ProblemData.InputVariables) {
185        variableNames.Add(variableName.Value);
186      }
187      variableNames.Add(ProblemData.TargetVariable.Value);
188      // collect genetic information item lists and store them also in dictionaries
189      IList<GeneticInformationItem>[] geneticInformationItemsLists = new List<GeneticInformationItem>[n];
190      IDictionary<string, IList<GeneticInformationItem>>[] geneticInformationItemsListsDictionaries = new IDictionary<string, IList<GeneticInformationItem>>[n];
191      for (int i = 0; i < n; i++) {
192        geneticInformationItemsLists[i] = GeneticInformationItem.GetGeneticInformationItems(solutions[i].Root, variableNames, MinimumLevelDelta, MaximumLevelDelta);
193        geneticInformationItemsListsDictionaries[i] = GeneticInformationItem.GetDictionary(geneticInformationItemsLists[i]);
194      }
195      // calculate solution similarities
196      double[,] similarities = new double[n, n];
197      for (int i = 0; i < n; i++) {
198        for (int j = 0; j < n; j++) {
199          if (i == j)
200            similarities[i, j] = 1;
201          else {
202            IList<GeneticInformationItem> solution1GeneticItems = geneticInformationItemsLists[i];
203            IDictionary<string, IList<GeneticInformationItem>> solution2GeneticItemsDictionary = GeneticInformationItem.CopyDictionary(geneticInformationItemsListsDictionaries[j]);
204            double similarity = 0;
205            for (int k = 0; k < solution1GeneticItems.Count; k++) {
206              double bestPendantSimilarity = 0;
207              GeneticInformationItem item = solution1GeneticItems[k];
208              GeneticInformationItem bestPendant = null;
209              IList<GeneticInformationItem> geneticInformationItemsList = null;
210              string key = GeneticInformationItem.GetKey(item);
211              if (solution2GeneticItemsDictionary.ContainsKey(key)) {
212                geneticInformationItemsList = solution2GeneticItemsDictionary[GeneticInformationItem.GetKey(item)];
213                bestPendant = GeneticInformationItem.FindBestPendant(item, geneticInformationItemsList,
214                  constantMinimumValue, constantMaximumValue, variableWeightSigma,
215                  MaximumExpressionDepth, minimumTimeOffset, maximumTimeOffset,
216                  LevelDifferenceCoefficient, AncestorIndexCoefficient, ConstantValueCoefficient, VariableWeightCoefficient,
217                  TimeOffsetCoefficientCoefficient, VariableIndexCoefficient, AdditiveSimilarityCalculation,
218                  out bestPendantSimilarity);
219              }
220              if (bestPendant != null) {
221                similarity += bestPendantSimilarity;
222                if (PreventMultipleComparisonContribution)
223                  geneticInformationItemsList.Remove(bestPendant);
224              }
225            }
226            similarities[i, j] = similarity / solution1GeneticItems.Count;
227          }
228        }
229      }
230      return similarities;
231    }
232
233  }
234
235}
Note: See TracBrowser for help on using the repository browser.