Changeset 5024


Ignore:
Timestamp:
12/05/10 22:10:49 (11 years ago)
Author:
swinkler
Message:

Worked on population diversity for GP (#1278): Moved GeneticInformationItem class into separate file.

Location:
branches/DataAnalysis.PopulationDiversityAnalysis/HeuristicLab.Problems.DataAnalysis.Regression/3.3
Files:
1 added
2 edited

Legend:

Unmodified
Added
Removed
  • branches/DataAnalysis.PopulationDiversityAnalysis/HeuristicLab.Problems.DataAnalysis.Regression/3.3/HeuristicLab.Problems.DataAnalysis.Regression-3.3.csproj

    r4886 r5024  
    182182    <Compile Include="SupportVectorRegression\SupportVectorRegressionSolution.cs" />
    183183    <Compile Include="Symbolic\Analyzers\BestSymbolicRegressionSolutionAnalyzer.cs" />
     184    <Compile Include="Symbolic\Analyzers\GeneticInformationItem.cs" />
    184185    <Compile Include="Symbolic\Analyzers\FineGrainedStructuralPopulationDiversityAnalyzer.cs" />
    185186    <Compile Include="Symbolic\Analyzers\SymbolicRegressionPopulationDiversityAnalyzer.cs" />
  • branches/DataAnalysis.PopulationDiversityAnalysis/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/FineGrainedStructuralPopulationDiversityAnalyzer.cs

    r4949 r5024  
    4343  [StorableClass]
    4444  public sealed class FineGrainedStructuralPopulationDiversityAnalyzer : SymbolicRegressionPopulationDiversityAnalyzer {
    45 
    46     // properties: min level delts, max level delta, etc.
    4745
    4846    #region Properties and Parameters
     
    192190      IDictionary<string, IList<GeneticInformationItem>>[] geneticInformationItemsListsDictionaries = new IDictionary<string, IList<GeneticInformationItem>>[n];
    193191      for (int i = 0; i < n; i++) {
    194         geneticInformationItemsLists[i] = GeneticInformationItem.getGeneticInformationItems(solutions[i].Root, variableNames, MinimumLevelDelta, MaximumLevelDelta);
     192        geneticInformationItemsLists[i] = GeneticInformationItem.GetGeneticInformationItems(solutions[i].Root, variableNames, MinimumLevelDelta, MaximumLevelDelta);
    195193        geneticInformationItemsListsDictionaries[i] = GeneticInformationItem.GetDictionary(geneticInformationItemsLists[i]);
    196194      }
     
    233231    }
    234232
    235     #region private class GeneticInformationItem
    236 
    237     private class GeneticInformationItem {
    238 
    239       private Type myAncestorDefinition;
    240       public Type AncestorDefinition {
    241         get { return myAncestorDefinition; }
    242       }
    243 
    244       private int myAncestorIndex;
    245       public int AncestorIndex {
    246         get { return myAncestorIndex; }
    247       }
    248 
    249       private Type myDescendantDefinition;
    250       public Type DescendantDefinition {
    251         get { return myDescendantDefinition; }
    252       }
    253 
    254       private int myAncestorLevel;
    255       public int AncestorLevel {
    256         get { return myAncestorLevel; }
    257       }
    258 
    259       private double myDescendantCoefficient;
    260       public double DescendantCoefficient {
    261         get { return myDescendantCoefficient; }
    262       }
    263 
    264       private double myDescendantVariableIndex;
    265       public double DescendantVariableIndex {
    266         get { return myDescendantVariableIndex; }
    267       }
    268 
    269       private int myDescendantTimeOffset;
    270       public int DescendantTimeOffset {
    271         get { return myDescendantTimeOffset; }
    272       }
    273 
    274       private SymbolicExpressionTreeNode myDescendantTreeNode;
    275       public SymbolicExpressionTreeNode DescendantTreeNode {
    276         get { return myDescendantTreeNode; }
    277       }
    278 
    279       private int myDescendantLevel;
    280       public int DescendantLevel {
    281         get { return myDescendantLevel; }
    282       }
    283 
    284       public int LevelDelta {
    285         get { return (myDescendantLevel - myAncestorLevel); }
    286       }
    287 
    288       public static IList<GeneticInformationItem> CopyList(IList<GeneticInformationItem> GeneticInformationItemsList) {
    289         List<GeneticInformationItem> list = new List<GeneticInformationItem>(GeneticInformationItemsList.Count);
    290         list.AddRange(GeneticInformationItemsList);
    291         return list;
    292       }
    293 
    294       public static string GetKey(GeneticInformationItem item) {
    295         return item.AncestorDefinition.Name.ToString() + "," + item.DescendantDefinition.Name.ToString();
    296       }
    297 
    298       public static IDictionary<string, IList<GeneticInformationItem>> GetDictionary(IList<GeneticInformationItem> GeneticInformationItemsList) {
    299         IDictionary<string, IList<GeneticInformationItem>> dictionary = new Dictionary<string, IList<GeneticInformationItem>>();
    300         foreach (GeneticInformationItem item in GeneticInformationItemsList) {
    301           string key = GetKey(item);
    302           if (!dictionary.ContainsKey(key))
    303             dictionary.Add(key, new List<GeneticInformationItem>());
    304           dictionary[key].Add(item);
    305         }
    306         return dictionary;
    307       }
    308 
    309       public static IDictionary<string, IList<GeneticInformationItem>> CopyDictionary(IDictionary<string, IList<GeneticInformationItem>> Dictionary) {
    310         IDictionary<string, IList<GeneticInformationItem>> copy = new Dictionary<string, IList<GeneticInformationItem>>();
    311         foreach (KeyValuePair<string, IList<GeneticInformationItem>> pair in Dictionary) {
    312           copy.Add(pair.Key, CopyList(pair.Value));
    313         }
    314         return copy;
    315       }
    316 
    317       public static GeneticInformationItem FindBestPendant(GeneticInformationItem Item, IList<GeneticInformationItem> ComparisonItems,
    318           double ConstantMinimum, double ConstantMaximum, double VariableWeightSigma,
    319           int MaximumTreeHeight, int MinimumTimeOffset, int MaximumTimeOffset,
    320           double LevelDifferenceCoefficient, double AncestorIndexCoefficient,
    321           double ConstantValueCoefficient, double VariableWeightCoefficient, double TimeOffsetCoefficient, double VariableIndexCoefficient,
    322           bool AdditiveSimilarityCalculation,
    323           out double BestPendantSimilarity) {
    324         int maxSimilarityIndex = -1;
    325         double similarity, maxSimilarity = -double.MaxValue;
    326         for (int i = 0; i < ComparisonItems.Count; i++) {
    327           similarity = Similarity(Item, ComparisonItems[i], ConstantMinimum, ConstantMaximum, VariableWeightSigma, MaximumTreeHeight, MinimumTimeOffset, MaximumTimeOffset,
    328             LevelDifferenceCoefficient, AncestorIndexCoefficient, ConstantValueCoefficient, VariableWeightSigma, TimeOffsetCoefficient,
    329             VariableWeightCoefficient, AdditiveSimilarityCalculation);
    330           if (!double.IsNaN(similarity) && similarity > maxSimilarity) {
    331             maxSimilarity = similarity;
    332             maxSimilarityIndex = i;
    333           }
    334         }
    335         BestPendantSimilarity = maxSimilarity;
    336         if (maxSimilarityIndex >= 0)
    337           return ComparisonItems[maxSimilarityIndex];
    338         else
    339           return null;
    340       }
    341 
    342       public static double Similarity(GeneticInformationItem Item1, GeneticInformationItem Item2,
    343           double ConstantMinimum, double ConstantMaximum, double VariableWeightSigma,
    344           int MaximumTreeHeight, int MinimumTimeOffset, int MaximumTimeOffset,
    345           double LevelDifferenceCoefficient, double AncestorIndexCoefficient,
    346           double ConstantValueCoefficient, double VariableWeightCoefficient, double TimeOffsetCoefficient, double VariableIndexCoefficient,
    347           bool AdditiveSimilarityCalculation) {
    348 
    349         if (Item1.AncestorDefinition != Item2.AncestorDefinition || Item1.DescendantDefinition != Item2.DescendantDefinition)
    350           return double.NaN;
    351 
    352         // the two items for sure have the same behavior definitions
    353 
    354         #region initialize punishments
    355 
    356         double punishmentContributionSum = 0;
    357         double punishmentCoefficientsProduct = 1;
    358 
    359         double ancestorIndexDifferencePunishment = 0;
    360         double levelDifferencePunishment = 0;
    361 
    362         double descendantConstantValueDifferencePunishment = 0;
    363         double descendantVariableWeightDifferencePunishment = 0;
    364         double descendantTimeOffsetDifferencePunishment = 0;
    365         double descendantVariableIndexDifferencePunishment = 0;
    366 
    367         #endregion
    368 
    369         if (LevelDifferenceCoefficient > 0) {
    370           levelDifferencePunishment = Item1.LevelDelta - Item2.LevelDelta;
    371           if (levelDifferencePunishment < 0)
    372             levelDifferencePunishment = -levelDifferencePunishment;
    373           levelDifferencePunishment /= MaximumTreeHeight;
    374           if (levelDifferencePunishment > 1)
    375             levelDifferencePunishment = 1;
    376           levelDifferencePunishment *= LevelDifferenceCoefficient;
    377           punishmentContributionSum += LevelDifferenceCoefficient;
    378           punishmentCoefficientsProduct *= (1 - LevelDifferenceCoefficient);
    379         }
    380         if (AncestorIndexCoefficient > 0) {
    381           if (Item1.AncestorIndex != Item2.AncestorIndex)
    382             ancestorIndexDifferencePunishment = 1;
    383           else
    384             ancestorIndexDifferencePunishment = 0;
    385           ancestorIndexDifferencePunishment *= AncestorIndexCoefficient;
    386           punishmentContributionSum += AncestorIndexCoefficient;
    387           punishmentCoefficientsProduct *= (1 - AncestorIndexCoefficient);
    388         }
    389 
    390         if (Item1.DescendantTreeNode is ConstantTreeNode) {
    391           if (ConstantValueCoefficient > 0) {
    392             double constantValueCoefficientDifference = Math.Abs(Item1.DescendantCoefficient - Item2.DescendantCoefficient);
    393             // assume uniform distribution within given minimum and maximum
    394             descendantConstantValueDifferencePunishment = (constantValueCoefficientDifference / (ConstantMaximum - ConstantMinimum));
    395             if (descendantConstantValueDifferencePunishment > 1)
    396               descendantConstantValueDifferencePunishment = 1;
    397             descendantConstantValueDifferencePunishment *= ConstantValueCoefficient;
    398             punishmentContributionSum += ConstantValueCoefficient;
    399             punishmentCoefficientsProduct *= (1 - ConstantValueCoefficient);
    400           }
    401         }
    402         if(Item1.DescendantTreeNode is VariableTreeNode) {
    403           if (VariableWeightCoefficient > 0) {
    404             double variableWeightDifference = Math.Abs(Item1.DescendantCoefficient - Item2.DescendantCoefficient);
    405             // assume normal distribution within given sigma
    406             descendantVariableWeightDifferencePunishment = variableWeightDifference / (VariableWeightSigma * 4);
    407             if (descendantVariableWeightDifferencePunishment > 1)
    408               descendantVariableWeightDifferencePunishment = 1;
    409             descendantVariableWeightDifferencePunishment *= VariableWeightCoefficient;
    410             punishmentContributionSum += VariableWeightCoefficient;
    411             punishmentCoefficientsProduct *= (1 - VariableWeightCoefficient);
    412           }
    413           if (TimeOffsetCoefficient > 0) {
    414             double timeOffsetDifference = Math.Abs(Item1.DescendantTimeOffset - Item2.DescendantTimeOffset);
    415             if (MaximumTimeOffset > 0)
    416               descendantTimeOffsetDifferencePunishment = timeOffsetDifference / (MaximumTimeOffset - MinimumTimeOffset);
    417             descendantTimeOffsetDifferencePunishment *= TimeOffsetCoefficient;
    418             punishmentContributionSum += TimeOffsetCoefficient;
    419             punishmentCoefficientsProduct *= (1 - TimeOffsetCoefficient);
    420           }
    421           if (VariableIndexCoefficient > 0) {
    422             if (Item1.DescendantVariableIndex != Item2.DescendantVariableIndex)
    423               descendantVariableIndexDifferencePunishment = 1;
    424             else
    425               descendantVariableIndexDifferencePunishment = 0;
    426             descendantVariableIndexDifferencePunishment *= VariableIndexCoefficient;
    427             punishmentContributionSum += VariableIndexCoefficient;
    428             punishmentCoefficientsProduct *= (1 - VariableIndexCoefficient);
    429           }
    430         }
    431 
    432         double result;
    433 
    434         if (AdditiveSimilarityCalculation) {
    435           double punishmentsSum = ancestorIndexDifferencePunishment + levelDifferencePunishment +
    436             descendantConstantValueDifferencePunishment + descendantVariableWeightDifferencePunishment +
    437             descendantTimeOffsetDifferencePunishment + descendantVariableIndexDifferencePunishment;
    438           result = (1 - punishmentsSum / punishmentContributionSum);
    439         } else {
    440           result =
    441             (1 - ancestorIndexDifferencePunishment) *
    442             (1 - levelDifferencePunishment) *
    443             (1 - descendantConstantValueDifferencePunishment) *
    444             (1 - descendantVariableWeightDifferencePunishment) *
    445             (1 - descendantTimeOffsetDifferencePunishment) *
    446             (1 - descendantVariableIndexDifferencePunishment);
    447           // worst possible result is (1-punishmentCoefficientsProduct), so scale linearly to [0;1]:
    448           result = (result - punishmentCoefficientsProduct) / (1 - punishmentCoefficientsProduct);
    449         }
    450 
    451         if (result < 0 || result > 1)
    452           throw new InvalidOperationException("ERROR in GeneticInformationItem.Similarity: An invalid similarity value (" + result.ToString() + ") has been calculated.");
    453 
    454         return result;
    455 
    456       }
    457 
    458       public static IList<GeneticInformationItem> getGeneticInformationItems(SymbolicExpressionTreeNode node, List<string> variableNames,
    459           int MinimumLevelDelta, int MaximumLevelDelta) {
    460         // first we have to collect all items, then we filter; it is not possible to filter while collecting because the items are
    461         // collected recursively and used for collecting the parents' items.
    462         if (MinimumLevelDelta > MaximumLevelDelta)
    463           return new List<GeneticInformationItem>();
    464         IList<GeneticInformationItem> list = getGeneticInformationItems(node, variableNames, 0);
    465         List<GeneticInformationItem> resultList = new List<GeneticInformationItem>();
    466         foreach (GeneticInformationItem item in list)
    467           if (item.LevelDelta >= MinimumLevelDelta && item.LevelDelta <= MaximumLevelDelta)
    468             resultList.Add(item);
    469         return resultList;
    470       }
    471 
    472       private static IList<GeneticInformationItem> getGeneticInformationItems(SymbolicExpressionTreeNode node, List<string> variableNames, int level) {
    473         // Idea: collect all descendants' lists and then add new items using the retrieved ones.
    474         // This should save lots of time and reduce complexity of the items retrieval process.
    475         // Program roots are not considered, neither are start symbol nodes
    476         if (node.Symbol is ProgramRootSymbol)
    477           return getGeneticInformationItems(node.SubTrees[0], variableNames, level + 1);
    478         List<GeneticInformationItem> list = new List<GeneticInformationItem>();
    479         // add item for this node:
    480         if (!(node.Symbol is StartSymbol)) {
    481           list.Add(new GeneticInformationItem(node, variableNames, level));
    482         }
    483         // add items for the descendants, but prevent multiple references to descendant nodes:
    484         List<SymbolicExpressionTreeNode> descendantNodes = new List<SymbolicExpressionTreeNode>();
    485         for (int i = 0; i < node.SubTrees.Count; i++) {
    486           IList<GeneticInformationItem> descendantItems = getGeneticInformationItems(node.SubTrees[i], variableNames, level + 1);
    487           list.AddRange(descendantItems);
    488           if (!(node.Symbol is StartSymbol))
    489             foreach (GeneticInformationItem item in descendantItems) {
    490               if (!descendantNodes.Contains(item.DescendantTreeNode)) {
    491                 list.Add(new GeneticInformationItem(node, item, i, level));
    492                 descendantNodes.Add(item.DescendantTreeNode);
    493               }
    494             }
    495         }
    496         return list;
    497       }
    498 
    499       private GeneticInformationItem (SymbolicExpressionTreeNode node, List<string> variableNames, int level) {
    500         myAncestorIndex = -1;
    501         VariableTreeNode variableTreeNode = node as VariableTreeNode;
    502         LaggedVariableTreeNode laggedVariableTreeNode = node as LaggedVariableTreeNode;
    503         ConstantTreeNode constantTreeNode = node as ConstantTreeNode;
    504         myAncestorDefinition = node.Symbol.GetType();
    505         myDescendantDefinition = myAncestorDefinition;
    506         if (variableTreeNode != null)
    507           myDescendantCoefficient = variableTreeNode.Weight;
    508         else if (constantTreeNode != null)
    509           myDescendantCoefficient = constantTreeNode.Value;
    510         else
    511           myDescendantCoefficient = double.NaN;
    512         if (laggedVariableTreeNode != null)
    513           myDescendantTimeOffset = laggedVariableTreeNode.Lag;
    514         else
    515           myDescendantTimeOffset = 0;
    516         if (variableTreeNode != null)
    517           myDescendantVariableIndex = variableNames.IndexOf(variableTreeNode.VariableName);
    518         else
    519           myDescendantVariableIndex = -1;
    520         myAncestorLevel = level;
    521         myDescendantLevel = level;
    522         myDescendantTreeNode = node;
    523       }
    524 
    525       private GeneticInformationItem(SymbolicExpressionTreeNode parentNode, GeneticInformationItem descendantGeneticInformationItem,
    526           int ancestorIndex, int parentNodeLevel) {
    527         myAncestorIndex = ancestorIndex;
    528         myAncestorLevel = parentNodeLevel;
    529         myAncestorDefinition = parentNode.Symbol.GetType();
    530         myDescendantCoefficient = descendantGeneticInformationItem.DescendantCoefficient;
    531         myDescendantDefinition = descendantGeneticInformationItem.DescendantDefinition;
    532         myDescendantTimeOffset = descendantGeneticInformationItem.DescendantTimeOffset;
    533         myDescendantVariableIndex = descendantGeneticInformationItem.DescendantVariableIndex;
    534         myDescendantLevel = descendantGeneticInformationItem.DescendantLevel;
    535         myDescendantTreeNode = descendantGeneticInformationItem.DescendantTreeNode;
    536       }
    537 
    538       public override string ToString() {
    539         return "ancestor: " + AncestorDefinition.Name.ToString() + ", [" + AncestorIndex + "]; "
    540           + "descendant: " + DescendantDefinition.Name.ToString() + " (varIndex " + DescendantVariableIndex + ", "
    541           + DescendantCoefficient + ", t-" + DescendantTimeOffset + ");"
    542           + " level delta = " + DescendantLevel + "-" + AncestorLevel + " = " + LevelDelta;
    543       }
    544 
    545     }
    546 
    547     #endregion
    548 
    549233  }
     234
    550235}
Note: See TracChangeset for help on using the changeset viewer.