Changeset 14427


Ignore:
Timestamp:
11/28/16 17:55:52 (3 years ago)
Author:
bburlacu
Message:

#1772: Extract common methods (used by the schema creator and the schema frequency analyzer) in static SchemaUtil class. Make AnyNode constructor public.

Location:
branches/HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4
Files:
1 added
5 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/HeuristicLab.Problems.DataAnalysis.Symbolic-3.4.csproj

    r14312 r14427  
    317317    <Compile Include="Tracking\SchemaDiversification\SchemaEvaluator.cs" />
    318318    <Compile Include="Tracking\SchemaDiversification\SchemaCreator.cs" />
     319    <Compile Include="Tracking\SchemaDiversification\SchemaUtil.cs" />
    319320    <Compile Include="Tracking\SchemaDiversification\UpdateQualityOperator.cs" />
    320321    <Compile Include="Tracking\SymbolicDataAnalysisExpressionAfterCrossoverOperator.cs" />
  • branches/HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Symbols/Wildcards/AnyNodeSymbol.cs

    r12951 r14427  
    4141    }
    4242
    43     private AnyNodeSymbol() : base("=", "A wildcard symbol that can match any node of the same type (function or leaf node)") { }
     43    public AnyNodeSymbol() : base("=", "A wildcard symbol that can match any node of the same type (function or leaf node)") { }
    4444
    4545    public AnyNodeSymbol(int minimumArity, int maximumArity)
  • branches/HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Tracking/Analyzers/SymbolicDataAnalysisSchemaFrequencyAnalyzer.cs

    r13624 r14427  
    2323using System.Collections.Generic;
    2424using System.Linq;
    25 using System.Text;
    2625using System.Threading.Tasks;
    2726using HeuristicLab.Common;
     
    3433using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    3534
    36 namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Tracking.Analyzers {
     35namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
    3736  [Item("SymbolicDataAnalysisSchemaFrequencyAnalyzer", "An analyzer which counts schema frequencies in the population.")]
    3837  [StorableClass]
     
    157156      var generation = Generation.Value;
    158157
    159       var population = PopulationGraph.GetByRank(generation).Cast<IGenealogyGraphNode<ISymbolicExpressionTree>>().ToList();
     158      var population = PopulationGraph.Vertices.Where(x => x.InDegree == 2 && x.Rank > generation - 1).ToList();
    160159      var vertices = population.Where(x => x.InDegree == 2).OrderByDescending(x => x.Quality).ToList();
    161160      ResultCollection resultCollection;
     
    182181      }
    183182      var schemas = SchemaCreator.GenerateSchemas(vertices, MinimumSchemaLength, StrictSchemaMatching).ToList();
    184       var schemaStrings = schemas.Select(x => SubtreeToString(x.Root.GetSubtree(0).GetSubtree(0), StrictSchemaMatching)).ToList();
     183      var schemaStrings = schemas.Select(x => x.Root.GetSubtree(0).GetSubtree(0).FormatToString(StrictSchemaMatching)).ToList();
    185184      int[][] matchingIndices;
    186185      if (ExecuteInParallel) {
     
    188187        Parallel.For(0, schemas.Count, new ParallelOptions { MaxDegreeOfParallelism = MaximumDegreeOfParallelism }, i => {
    189188          var schema = schemas[i];
    190           matchingIndices[i] = Enumerable.Range(0, trees.Count).Where(v => qm.Match(trees[v].Root, schema.Root)).ToArray();
     189          matchingIndices[i] = Enumerable.Range(0, trees.Count).Where(v => qm.Match(trees[v], schema)).ToArray();
    191190        });
    192191      } else {
    193         matchingIndices = schemas.Select(x => Enumerable.Range(0, trees.Count).Where(v => qm.Match(trees[v].Root, x.Root)).ToArray()).ToArray();
     192        matchingIndices = schemas.Select(x => Enumerable.Range(0, trees.Count).Where(v => qm.Match(trees[v], x)).ToArray()).ToArray();
    194193      }
    195194
     
    230229      if (!schemaStatistics.Any()) return base.Apply(); // shouldn't ever happen
    231230      var columnNames = new[] { "Count", "Avg Quality", "Avg Length", "Avg Genotype Similarity", "Avg Phenotype Similarity", "Avg Population Quality" };
    232       var mostFrequent = new DoubleMatrix(schemaStatistics.Count, schemaStatistics[0].Item2.Length);
    233       mostFrequent.SortableView = true;
     231      var mostFrequent = new DoubleMatrix(schemaStatistics.Count, schemaStatistics[0].Item2.Length) {
     232        SortableView = true
     233      };
    234234      schemaStatistics.Sort((a, b) => { if (a.Item2[0].Equals(b.Item2[0])) return b.Item2[1].CompareTo(a.Item2[1]); return b.Item2[0].CompareTo(a.Item2[0]); });
    235       mostFrequentPerGeneration.Add(new Tuple<string, double[]>(schemaStatistics[0].Item1, new[] { (double)generation }.Concat(schemaStatistics[0].Item2).ToArray()));
     235      mostFrequentPerGeneration.Add(Tuple.Create(schemaStatistics[0].Item1, new[] { (double)generation }.Concat(schemaStatistics[0].Item2).ToArray()));
    236236      mostFrequent.RowNames = schemaStatistics.Select(x => x.Item1);
    237237      mostFrequent.ColumnNames = columnNames;
     
    301301        for (int j = i + 1; j < indices.Length; ++j) {
    302302          var b = indices[j];
    303           if (double.IsNaN(similarityMatrix[a, b])) similarityMatrix[a, b] = similarityFunction(trees[a], trees[b]);
     303          if (double.IsNaN(similarityMatrix[a, b]))
     304            similarityMatrix[a, b] = similarityFunction(trees[a], trees[b]);
    304305          agg += similarityMatrix[a, b];
    305306        }
     
    307308      return agg / count;
    308309    }
    309 
    310     private static string SubtreeToString(ISymbolicExpressionTreeNode node, bool strict = false) {
    311       StringBuilder strBuilder = new StringBuilder();
    312       // internal nodes or leaf nodes?
    313       if (node is AnySubtree)
    314         return "# ";
    315 
    316       if (node.SubtreeCount > 0) {
    317         strBuilder.Append("(");
    318         // symbol on same line as '('
    319         string label = string.Empty;
    320         if (node is AnyNode)
    321           label = "=";
    322         else {
    323           var name = node.Symbol.Name;
    324           label = ShortNames.ContainsKey(name) ? ShortNames[name] : name;
    325         }
    326         strBuilder.Append(label + " ");
    327         // each subtree expression on a new line
    328         // and closing ')' also on new line
    329         foreach (var subtree in node.Subtrees) {
    330           strBuilder.Append(SubtreeToString(subtree, strict));
    331         }
    332         strBuilder.Append(") ");
    333       } else {
    334         // symbol in the same line with as '(' and ')'
    335         var v = node as VariableTreeNode;
    336         var c = node as ConstantTreeNode;
    337         var w = node as AnyNode; // wildcard
    338         string label = string.Empty;
    339         if (w != null)
    340           label = "=";
    341         else if (v != null)
    342           label = strict ? string.Format("{0:0.00}_{1}", v.Weight, v.VariableName) : string.Format("{0}", v.VariableName);
    343         else if (c != null)
    344           label = strict ? string.Format("{0:0.00}", c.Value) : "C";
    345         strBuilder.Append(label);
    346         if (node.Parent != null && node != node.Parent.Subtrees.Last())
    347           strBuilder.Append(" ");
    348       }
    349       return strBuilder.ToString();
    350     }
    351310  }
    352311}
  • branches/HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Tracking/SchemaDiversification/SchemaCreator.cs

    r13876 r14427  
    2323using System.Collections.Generic;
    2424using System.Linq;
    25 using System.Text;
    2625using HeuristicLab.Common;
    2726using HeuristicLab.Core;
     
    202201                     select v;
    203202
    204       var schemas = new List<ISymbolicExpressionTree>(GenerateSchemas(vertices, MinimumSchemaLength, StrictSchemaMatching));
     203      var schemas = GenerateSchemas(vertices, MinimumSchemaLength, StrictSchemaMatching).ToList();
    205204
    206205      #region create schemas and add subscopes representing the individuals
     
    226225    public static IEnumerable<ISymbolicExpressionTree> GenerateSchemas(IEnumerable<IGenealogyGraphNode<ISymbolicExpressionTree>> vertices, int minimumSchemaLength, bool strict = true) {
    227226      var anySubtreeSymbol = new AnySubtreeSymbol();
    228       //            var anyNodeSymbol = new AnyNodeSymbol();
    229       var groups = vertices.GroupBy(x => x.Parents.First()).OrderByDescending(g => g.Count()).ToList();
     227      var groups = vertices.GroupBy(x => x.Parents.First()).OrderByDescending(g => g.Count());
    230228      var hash = new HashSet<string>();
    231       //      var formatter = new SymbolicExpressionTreeStringFormatter { Indent = false, AppendNewLines = false };
    232229      foreach (var g in groups) {
    233230        var parent = g.Key;
     
    237234        var schema = (ISymbolicExpressionTree)parent.Data.Clone();
    238235        var nodes = schema.IterateNodesPrefix().ToList();
    239         var arcs = g.Select(x => x.InArcs.Last()).Where(x => x.Data != null);
    240         var indices = (from arc in arcs
    241                        let fragment = (IFragment<ISymbolicExpressionTreeNode>)arc.Data
    242                        select fragment.Index1).Distinct().ToArray();
    243         var levels = indices.Select(x => schema.Root.GetBranchLevel(nodes[x])).ToArray();
    244         Array.Sort(levels, indices);
    245         // order nodes by their depth so that cutpoints are replaced with wildcards from the bottom up
    246         var nodesToReplace = indices.Select(x => nodes[x]).ToList();
    247         for (int i = nodesToReplace.Count - 1; i >= 0; --i) {
    248           var node = nodesToReplace[i];
    249 
     236        var fragments = g.Select(x => x.InArcs.Last().Data).Where(x => x != null).Cast<IFragment<ISymbolicExpressionTreeNode>>();
     237        var indices = fragments.Select(x => x.Index1).Distinct().OrderByDescending(x => schema.Root.GetBranchLevel(nodes[x]));
     238        foreach (var i in indices) {
     239          var node = nodes[i];
    250240          // do not replace the node with a wildcard if it would result in a length < MinimumSchemaLength
    251           if (schema.Length - node.GetLength() + 1 < minimumSchemaLength)
    252             continue;
    253 
    254           var replacement = anySubtreeSymbol.CreateTreeNode();
    255           ReplaceSubtree(node, replacement, false);
     241          //          if (schema.Length - node.GetLength() + 1 < minimumSchemaLength)
     242          //            continue;
     243          ISymbolicExpressionTreeNode replacement;
     244          if (node.SubtreeCount > 0) {
     245            var anyNodeSymbol = new AnyNodeSymbol(node.Symbol.MinimumArity, node.Symbol.MaximumArity);
     246            replacement = anyNodeSymbol.CreateTreeNode();
     247          } else {
     248            replacement = anySubtreeSymbol.CreateTreeNode();
     249          }
     250          SchemaUtil.ReplaceSubtree(node, replacement, true);
    256251          //          var replacement = new AnyNodeSymbol(node.Symbol.MinimumArity, node.Symbol.MinimumArity).CreateTreeNode();
    257252          //          ReplaceSubtree(node, replacement, true);
     
    259254        }
    260255        if (replaced) {
    261           //          var str = formatter.Format(schema.Root.GetSubtree(0).GetSubtree(0));
    262           var str = SubtreeToString(schema.Root.GetSubtree(0).GetSubtree(0), strict);
    263           if (hash.Contains(str)) continue;
    264           yield return schema;
    265           hash.Add(str);
     256          var str = schema.Root.GetSubtree(0).GetSubtree(0).FormatToString(strict);
     257          if (hash.Add(str))
     258            yield return schema;
    266259        }
    267260      }
    268261    }
    269 
    270     private static void ReplaceSubtree(ISymbolicExpressionTreeNode original, ISymbolicExpressionTreeNode replacement, bool preserveChildren = true) {
    271       var parent = original.Parent;
    272       if (parent == null)
    273         throw new ArgumentException("Parent cannot be null for node " + original);
    274       var index = parent.IndexOfSubtree(original);
    275       parent.RemoveSubtree(index);
    276       parent.InsertSubtree(index, replacement);
    277 
    278       if (preserveChildren) {
    279         var subtrees = original.Subtrees.ToList();
    280 
    281         for (int i = subtrees.Count - 1; i >= 0; --i)
    282           original.RemoveSubtree(i);
    283 
    284         for (int i = 0; i < subtrees.Count; ++i) {
    285           replacement.AddSubtree(subtrees[i]);
    286         }
    287       }
    288     }
    289 
    290     private static string SubtreeToString(ISymbolicExpressionTreeNode node, bool strict = false) {
    291       StringBuilder strBuilder = new StringBuilder();
    292       // internal nodes or leaf nodes?
    293       if (node is AnySubtree)
    294         return "# ";
    295 
    296       if (node.SubtreeCount > 0) {
    297         strBuilder.Append("(");
    298         // symbol on same line as '('
    299         string label = string.Empty;
    300         if (node is AnyNode)
    301           label = "=";
    302         else {
    303           label = node.Symbol.Name;
    304         }
    305         strBuilder.Append(label + " ");
    306         // each subtree expression on a new line
    307         // and closing ')' also on new line
    308         foreach (var subtree in node.Subtrees) {
    309           strBuilder.Append(SubtreeToString(subtree, strict));
    310         }
    311         strBuilder.Append(") ");
    312       } else {
    313         // symbol in the same line with as '(' and ')'
    314         var v = node as VariableTreeNode;
    315         var c = node as ConstantTreeNode;
    316         var w = node as AnyNode; // wildcard
    317         string label = string.Empty;
    318         if (w != null)
    319           label = "=";
    320         else if (v != null)
    321           label = strict ? string.Format("{0:0.00}_{1}", v.Weight, v.VariableName) : string.Format("{0}", v.VariableName);
    322         else if (c != null)
    323           label = strict ? string.Format("{0:0.00}", c.Value) : "C";
    324         strBuilder.Append(label);
    325         if (node.Parent != null && node != node.Parent.Subtrees.Last())
    326           strBuilder.Append(" ");
    327         //strBuilder.Append(")");
    328       }
    329       return strBuilder.ToString();
    330     }
    331262  }
    332263}
  • branches/HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Tracking/SchemaDiversification/SchemaEvaluator.cs

    r13565 r14427  
    234234        var t = trees[i];
    235235        var tRoot = t.Root.GetSubtree(0).GetSubtree(0);
    236         if (t.Length < s.Length || !qm.Comparer.Equals(tRoot, sRoot)) continue;
     236        if (t.Length < s.Length || !qm.EqualityComparer.Equals(tRoot, sRoot)) continue;
    237237        filtered.Add(i);
    238238      }
Note: See TracChangeset for help on using the changeset viewer.