Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
11/25/10 21:41:07 (14 years ago)
Author:
swinkler
Message:

Worked on structural population diversity analysis (#1278): Improved and pruned similarity function for genetic information items.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/DataAnalysis.PopulationDiversityAnalysis/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/FineGrainedStructuralPopulationDiversityAnalyzer.cs

    r4934 r4938  
    4444  public sealed class FineGrainedStructuralPopulationDiversityAnalyzer : SymbolicRegressionPopulationDiversityAnalyzer {
    4545
     46    // properties: min level delts, max level delta, etc.
     47
     48    private const string FunctionTreeGrammarParameterName = "FunctionTreeGrammar";
     49
     50    public IValueLookupParameter<GlobalSymbolicExpressionGrammar> FunctionTreeGrammarParameter {
     51      get { return (IValueLookupParameter<GlobalSymbolicExpressionGrammar>)Parameters[FunctionTreeGrammarParameterName]; }
     52    }
     53    public GlobalSymbolicExpressionGrammar FunctionTreeGrammar {
     54      get { return FunctionTreeGrammarParameter.ActualValue; }
     55    }
     56
     57
    4658    [StorableConstructor]
    4759    private FineGrainedStructuralPopulationDiversityAnalyzer(bool deserializing) : base(deserializing) { }
    4860    private FineGrainedStructuralPopulationDiversityAnalyzer(FineGrainedStructuralPopulationDiversityAnalyzer original, Cloner cloner) : base(original, cloner) { }
    49     public FineGrainedStructuralPopulationDiversityAnalyzer() : base() { }
     61    public FineGrainedStructuralPopulationDiversityAnalyzer() : base() {
     62      Parameters.Add(new ValueLookupParameter<GlobalSymbolicExpressionGrammar>(FunctionTreeGrammarParameterName, "The grammar that is used for symbolic regression models."));
     63    }
    5064
    5165    public override IDeepCloneable Clone(Cloner cloner) {
     
    5468
    5569    protected override double[,] CalculateSimilarities(SymbolicExpressionTree[] solutions) {
     70      Constant c = null;
     71      foreach (Symbol symbol in FunctionTreeGrammar.Symbols) {
     72        if (symbol is Constant) {
     73          if (c != null) throw new InvalidProgramException("There are more and one constnat definitions in the given function tree grammar.");
     74          c = (Constant)symbol;
     75        }
     76      }
    5677      int n = solutions.Length;
    5778      List<string> variableNames = new List<string>();
     
    6283      IList<GeneticInformationItem>[] geneticInformationItemsList = new List<GeneticInformationItem>[n];
    6384      for (int i = 0; i < n; i++) {
    64         geneticInformationItemsList[i] = GeneticInformationItem.getGeneticInformationItems(solutions[i].Root, variableNames, 1, 5);
     85        geneticInformationItemsList[i] = GeneticInformationItem.getGeneticInformationItems(solutions[i].Root, variableNames, 0, int.MaxValue);
    6586      }
    6687      double[,] result = new double[n, n];
     
    101122      }
    102123
    103       // not used in HL 3.3
    104       // private double myAncestorCoefficient;
    105       // public double AncestorCoefficient {
    106       //   get { return myAncestorCoefficient; }
    107       // }
    108 
    109       // not used in HL 3.3
    110       // private double myAncestorVariableIndex;
    111       // public double AncestorVariableIndex {
    112       //   get { return myAncestorVariableIndex; }
    113       // }
    114 
    115       // not used in HL 3.3
    116       // private int myAncestorTimeOffset;
    117       // public int AncestorTimeOffset {
    118       //   get { return myAncestorTimeOffset; }
    119       // }
    120 
    121       // not used in HL 3.3
    122       // private int myAncestorVariant;
    123       // public int AncestorVariant {
    124       //   get { return myAncestorVariant; }
    125       // }
    126 
    127124      private double myDescendantCoefficient;
    128125      public double DescendantCoefficient {
     
    153150        get { return (myDescendantLevel - myAncestorLevel); }
    154151      }
    155 
    156       // not used in HL 3.3
    157       //private int myDescendantVariant;
    158       //public int DescendantVariant {
    159       //  get { return myDescendantVariant; }
    160       //}
    161152
    162153      /*
     
    181172      }*/
    182173
    183       /*
    184174      public static double Similarity(GeneticInformationItem Item1, GeneticInformationItem Item2,
    185           StructuralSimilarityAnalysisParameters Parameters,
    186           int MaxTreeHeight, int MaxTimeOffset) {
    187 
    188         if (Item1.AncestorDefinition != Item2.AncestorDefinition ||
    189           Item1.DescendantDefinition != Item2.DescendantDefinition)
     175          GlobalSymbolicExpressionGrammar ExpressionGrammar, double ConstantMinimum, double ConstantMaximum, double VariableWeightSigma,
     176          int MaximumTreeHeight, int MaximumTimeOffset,
     177          // StructuralSimilarityAnalysisParameters Parameters,
     178          double LevelDifferenceCoefficient,
     179          double AncestorIndexCoefficient,
     180          double ConstantValueCoefficient,
     181          double VariableWeightCoefficient,
     182          double TimeOffsetCoefficient,
     183          double VariableIndexCoefficient,
     184          bool AdditiveSimilarityCalculation
     185        ) {
     186
     187        if (Item1.AncestorDefinition != Item2.AncestorDefinition || Item1.DescendantDefinition != Item2.DescendantDefinition)
    190188          return double.NaN;
    191189
    192190        // the two items for sure have the same behavior definitions
    193         #region init
     191
     192        #region initialize punishments
     193
    194194        double punishmentContributionSum = 0;
    195195        double punishmentCoefficientsProduct = 1;
    196         double ancestorCoefficientDifferencePunishment = 0;
    197         double ancestorTimeOffsetDifferencePunishment = 0;
    198         double ancestorVariantDifferencePunishment = 0;
    199         double ancestorVariableIndexDifferencePunishment = 0;
    200         double descendantCoefficientDifferencePunishment = 0;
    201         double descendantTimeOffsetDifferencePunishment = 0;
    202         double descendantVariantDifferencePunishment = 0;
    203         double descendantVariableIndexDifferencePunishment = 0;
     196
    204197        double ancestorIndexDifferencePunishment = 0;
    205198        double levelDifferencePunishment = 0;
     199
     200        double descendantConstantValueDifferencePunishment = 0;
     201        double descendantVariableWeightDifferencePunishment = 0;
     202        double descendantTimeOffsetDifferencePunishment = 0;
     203        double descendantVariableIndexDifferencePunishment = 0;
     204
    206205        #endregion
    207206
    208         ITerminalDefinition ancestorTerminal = Item1.AncestorDefinition as ITerminalDefinition;
    209         ITerminalDefinition descendantTerminal = Item1.DescendantDefinition as ITerminalDefinition;
    210         IFunctionDefinition ancestorFunction = Item1.AncestorDefinition as IFunctionDefinition;
    211         IFunctionDefinition descendantFunction = Item1.DescendantDefinition as IFunctionDefinition;
    212 
    213         if (Parameters.ConsiderLevelDifference) {
     207        if (LevelDifferenceCoefficient > 0) {
    214208          levelDifferencePunishment = Item1.LevelDelta - Item2.LevelDelta;
    215209          if (levelDifferencePunishment < 0)
    216210            levelDifferencePunishment = -levelDifferencePunishment;
    217           levelDifferencePunishment /= MaxTreeHeight;
     211          levelDifferencePunishment /= MaximumTreeHeight;
    218212          if (levelDifferencePunishment > 1)
    219213            levelDifferencePunishment = 1;
    220           levelDifferencePunishment *= Parameters.LevelDifferenceCoefficient;
    221           punishmentContributionSum += Parameters.LevelDifferenceCoefficient;
    222           punishmentCoefficientsProduct *= (1 - Parameters.LevelDifferenceCoefficient);
    223         }
    224         if (Parameters.ConsiderAncestorIndex) {
     214          levelDifferencePunishment *= LevelDifferenceCoefficient;
     215          punishmentContributionSum += LevelDifferenceCoefficient;
     216          punishmentCoefficientsProduct *= (1 - LevelDifferenceCoefficient);
     217        }
     218        if (AncestorIndexCoefficient > 0) {
    225219          if (Item1.AncestorIndex != Item2.AncestorIndex)
    226220            ancestorIndexDifferencePunishment = 1;
    227221          else
    228222            ancestorIndexDifferencePunishment = 0;
    229           ancestorIndexDifferencePunishment *= Parameters.AncestorIndexCoefficient;
    230           punishmentContributionSum += Parameters.AncestorIndexCoefficient;
    231           punishmentCoefficientsProduct *= (1 - Parameters.AncestorIndexCoefficient);
    232         }
    233 
    234         if (Item1.AncestorDefinition is ITerminalDefinition) {
    235           if (Parameters.ConsiderCoefficient) {
    236             double coefficientDifference = Math.Abs(Item1.myAncestorCoefficient - Item2.myAncestorCoefficient);
    237             if (ancestorTerminal.Parametrization.CoefficientIsGaussian)
    238               ancestorCoefficientDifferencePunishment = (coefficientDifference / (ancestorTerminal.Parametrization.CoefficientSigma * 4));
    239             else
    240               ancestorCoefficientDifferencePunishment = (coefficientDifference / (ancestorTerminal.Parametrization.CoefficientMaxValue - ancestorTerminal.Parametrization.CoefficientMinValue));
    241             if (ancestorCoefficientDifferencePunishment > 1)
    242               ancestorCoefficientDifferencePunishment = 1;
    243             ancestorCoefficientDifferencePunishment *= Parameters.CoefficientCoefficient;
    244             punishmentContributionSum += Parameters.CoefficientCoefficient;
    245             punishmentCoefficientsProduct *= (1 - Parameters.CoefficientCoefficient);
    246           }
    247           if (Parameters.ConsiderTimeOffset) {
    248             double timeOffsetDifference = Math.Abs(Item1.AncestorTimeOffset - Item2.AncestorTimeOffset);
    249             if (MaxTimeOffset > 0)
    250               ancestorTimeOffsetDifferencePunishment = timeOffsetDifference / MaxTimeOffset;
    251             ancestorTimeOffsetDifferencePunishment *= Parameters.TimeOffsetCoefficient;
    252             punishmentContributionSum += Parameters.TimeOffsetCoefficient;
    253             punishmentCoefficientsProduct *= (1 - Parameters.TimeOffsetCoefficient);
    254           }
    255           if (Parameters.ConsiderVariableIndex) {
    256             if (Item1.AncestorVariableIndex != Item2.AncestorVariableIndex)
    257               ancestorVariableIndexDifferencePunishment = 1;
    258             else
    259               ancestorVariableIndexDifferencePunishment = 0;
    260             ancestorVariableIndexDifferencePunishment *= Parameters.VariableIndexCoefficient;
    261             punishmentContributionSum += Parameters.VariableIndexCoefficient;
    262             punishmentCoefficientsProduct *= (1 - Parameters.VariableIndexCoefficient);
    263           }
    264         } else {
    265           if (Parameters.ConsiderVariant) {
    266             if (Item1.AncestorVariant != Item2.AncestorVariant)
    267               ancestorVariantDifferencePunishment = 1;
    268             else
    269               ancestorVariantDifferencePunishment = 0;
    270             ancestorVariantDifferencePunishment *= Parameters.VariantCoefficient;
    271             punishmentContributionSum += Parameters.VariantCoefficient;
    272             punishmentCoefficientsProduct *= (1 - Parameters.VariantCoefficient);
    273           }
    274         }
    275 
    276         if (Item1.DescendantDefinition is ITerminalDefinition) {
    277           if (Parameters.ConsiderCoefficient) {
    278             double coefficientDifference = Math.Abs(Item1.myDescendantCoefficient - Item2.myDescendantCoefficient);
    279             if (descendantTerminal.Parametrization.CoefficientIsGaussian)
    280               descendantCoefficientDifferencePunishment = (coefficientDifference / (descendantTerminal.Parametrization.CoefficientSigma * 4));
    281             else
    282               descendantCoefficientDifferencePunishment = (coefficientDifference / (descendantTerminal.Parametrization.CoefficientMaxValue - descendantTerminal.Parametrization.CoefficientMinValue));
    283             if (descendantCoefficientDifferencePunishment > 1)
    284               descendantCoefficientDifferencePunishment = 1;
    285             descendantCoefficientDifferencePunishment *= Parameters.CoefficientCoefficient;
    286             punishmentContributionSum += Parameters.CoefficientCoefficient;
    287             punishmentCoefficientsProduct *= (1 - Parameters.CoefficientCoefficient);
    288           }
    289           if (Parameters.ConsiderTimeOffset) {
     223          ancestorIndexDifferencePunishment *= AncestorIndexCoefficient;
     224          punishmentContributionSum += AncestorIndexCoefficient;
     225          punishmentCoefficientsProduct *= (1 - AncestorIndexCoefficient);
     226        }
     227
     228        if (Item1.DescendantTreeNode is ConstantTreeNode) {
     229          if (ConstantValueCoefficient > 0) {
     230            double constantValueCoefficientDifference = Math.Abs(Item1.DescendantCoefficient - Item2.DescendantCoefficient);
     231            // assume uniform distribution within given minimum and maximum
     232            descendantConstantValueDifferencePunishment = (constantValueCoefficientDifference / (ConstantMaximum - ConstantMinimum));
     233            if (descendantConstantValueDifferencePunishment > 1)
     234              descendantConstantValueDifferencePunishment = 1;
     235            descendantConstantValueDifferencePunishment *= ConstantValueCoefficient;
     236            punishmentContributionSum += ConstantValueCoefficient;
     237            punishmentCoefficientsProduct *= (1 - ConstantValueCoefficient);
     238          }
     239        }
     240        if(Item1.DescendantTreeNode is VariableTreeNode) {
     241          if (VariableWeightCoefficient > 0) {
     242            double variableWeightDifference = Math.Abs(Item1.DescendantCoefficient - Item2.DescendantCoefficient);
     243            // assume normal distribution within given sigma
     244            descendantVariableWeightDifferencePunishment = variableWeightDifference / (VariableWeightSigma * 4);
     245            if (descendantVariableWeightDifferencePunishment > 1)
     246              descendantVariableWeightDifferencePunishment = 1;
     247            descendantVariableWeightDifferencePunishment *= VariableWeightCoefficient;
     248            punishmentContributionSum += VariableWeightCoefficient;
     249            punishmentCoefficientsProduct *= (1 - VariableWeightCoefficient);
     250          }
     251          if (TimeOffsetCoefficient > 0) {
    290252            double timeOffsetDifference = Math.Abs(Item1.DescendantTimeOffset - Item2.DescendantTimeOffset);
    291             if (MaxTimeOffset > 0)
    292               descendantTimeOffsetDifferencePunishment = timeOffsetDifference / MaxTimeOffset;
    293             descendantTimeOffsetDifferencePunishment *= Parameters.TimeOffsetCoefficient;
    294             punishmentContributionSum += Parameters.TimeOffsetCoefficient;
    295             punishmentCoefficientsProduct *= (1 - Parameters.TimeOffsetCoefficient);
    296           }
    297           if (Parameters.ConsiderVariableIndex) {
     253            if (MaximumTimeOffset > 0)
     254              descendantTimeOffsetDifferencePunishment = timeOffsetDifference / MaximumTimeOffset;
     255            descendantTimeOffsetDifferencePunishment *= TimeOffsetCoefficient;
     256            punishmentContributionSum += TimeOffsetCoefficient;
     257            punishmentCoefficientsProduct *= (1 - TimeOffsetCoefficient);
     258          }
     259          if (VariableIndexCoefficient > 0) {
    298260            if (Item1.DescendantVariableIndex != Item2.DescendantVariableIndex)
    299261              descendantVariableIndexDifferencePunishment = 1;
    300262            else
    301263              descendantVariableIndexDifferencePunishment = 0;
    302             descendantVariableIndexDifferencePunishment *= Parameters.VariableIndexCoefficient;
    303             punishmentContributionSum += Parameters.VariableIndexCoefficient;
    304             punishmentCoefficientsProduct *= (1 - Parameters.VariableIndexCoefficient);
    305           }
    306         } else {
    307           if (Parameters.ConsiderVariant) {
    308             if (Item1.DescendantVariant != Item2.DescendantVariant)
    309               descendantVariantDifferencePunishment = 1;
    310             else
    311               descendantVariantDifferencePunishment = 0;
    312             descendantVariantDifferencePunishment *= Parameters.VariantCoefficient;
    313             punishmentContributionSum += Parameters.VariantCoefficient;
    314             punishmentCoefficientsProduct *= (1 - Parameters.VariantCoefficient);
     264            descendantVariableIndexDifferencePunishment *= VariableIndexCoefficient;
     265            punishmentContributionSum += VariableIndexCoefficient;
     266            punishmentCoefficientsProduct *= (1 - VariableIndexCoefficient);
    315267          }
    316268        }
     
    318270        double result;
    319271
    320         if (Parameters.AdditiveSimilarityCalculation) {
    321           double punishmentsSum =
    322             ancestorCoefficientDifferencePunishment + ancestorTimeOffsetDifferencePunishment +
    323             ancestorVariantDifferencePunishment + ancestorVariableIndexDifferencePunishment +
    324             descendantCoefficientDifferencePunishment + descendantTimeOffsetDifferencePunishment +
    325             descendantVariantDifferencePunishment + descendantVariableIndexDifferencePunishment +
    326             ancestorIndexDifferencePunishment + levelDifferencePunishment;
     272        if (AdditiveSimilarityCalculation) {
     273          double punishmentsSum = ancestorIndexDifferencePunishment + levelDifferencePunishment +
     274            descendantConstantValueDifferencePunishment + descendantVariableWeightDifferencePunishment +
     275            descendantTimeOffsetDifferencePunishment + descendantVariableIndexDifferencePunishment;
    327276          result = (1 - punishmentsSum / punishmentContributionSum);
    328277        } else {
    329278          result =
    330             (1 - ancestorCoefficientDifferencePunishment) *
    331             (1 - ancestorTimeOffsetDifferencePunishment) *
    332             (1 - ancestorVariantDifferencePunishment) *
    333             (1 - ancestorVariableIndexDifferencePunishment) *
    334             (1 - descendantCoefficientDifferencePunishment) *
     279            (1 - ancestorIndexDifferencePunishment) *
     280            (1 - levelDifferencePunishment) *
     281            (1 - descendantConstantValueDifferencePunishment) *
     282            (1 - descendantVariableWeightDifferencePunishment) *
    335283            (1 - descendantTimeOffsetDifferencePunishment) *
    336             (1 - descendantVariantDifferencePunishment) *
    337             (1 - descendantVariableIndexDifferencePunishment) *
    338             (1 - ancestorIndexDifferencePunishment) *
    339             (1 - levelDifferencePunishment);
     284            (1 - descendantVariableIndexDifferencePunishment);
    340285          // worst possible result is (1-punishmentCoefficientsProduct), so scale linearly to [0;1]:
    341286          result = (result - punishmentCoefficientsProduct) / (1 - punishmentCoefficientsProduct);
     
    347292        return result;
    348293
    349       }*/
    350 
    351       /*
    352       public static List<GeneticInformationItem> GetGeneticInformationItems(SymbolicExpressionTreeNode Formula, int MinLevelDifference, int MaxLevelDifference) {
    353         List<GeneticInformationItem> result = new List<GeneticInformationItem>();
    354         List<SymbolicExpressionTreeNode> allNodes = new List<SymbolicExpressionTreeNode>();
    355         allNodes.Add(Formula);
    356         allNodes.AddRange(getDescendants(Formula));
    357         foreach (SymbolicExpressionTreeNode node in allNodes) {
    358           List<SymbolicExpressionTreeNode> allDescendants = new List<SymbolicExpressionTreeNode>();
    359           allDescendants.Add(node);
    360           allDescendants.AddRange(getDescendants(node));
    361           foreach (SymbolicExpressionTreeNode descendant in allDescendants) {
    362             GeneticInformationItem item = new GeneticInformationItem(node, descendant);
    363             if (item.LevelDelta >= MinLevelDifference && item.LevelDelta <= MaxLevelDifference)
    364               result.Add(item);
    365           }
    366         }
    367         return result;
    368       }*/
    369 
    370       /*
    371       public static List<SymbolicExpressionTreeNode> GetDescendants(SymbolicExpressionTreeNode node) {
    372         List<SymbolicExpressionTreeNode> descendants = new List<SymbolicExpressionTreeNode>();
    373         foreach (SymbolicExpressionTreeNode subTreeNode in node.SubTrees) {
    374           AddDescendants(descendants, subTreeNode);
    375         }
    376         return descendants;
    377       }
    378       private static void AddDescendants(List<SymbolicExpressionTreeNode> list, SymbolicExpressionTreeNode node) {
    379         list.Add(node);
    380         foreach (SymbolicExpressionTreeNode subTreeNode in node.SubTrees) {
    381           AddDescendants(list, subTreeNode);
    382         }
    383       }*/
     294      }
    384295
    385296      public static IList<GeneticInformationItem> getGeneticInformationItems(SymbolicExpressionTreeNode node, List<string> variableNames,
     
    415326          if (!(node.Symbol is StartSymbol))
    416327            foreach (GeneticInformationItem item in descendantItems) {
    417               if (!descendantNodes.Contains(item.DescendantTreeNode))
     328              if (!descendantNodes.Contains(item.DescendantTreeNode)) {
    418329                list.Add(new GeneticInformationItem(node, item, i, level));
    419               descendantNodes.Add(item.DescendantTreeNode);
     330                descendantNodes.Add(item.DescendantTreeNode);
     331              }
    420332            }
    421333        }
Note: See TracChangeset for help on using the changeset viewer.