Changeset 12979


Ignore:
Timestamp:
10/02/15 01:08:30 (6 years ago)
Author:
bburlacu
Message:

#1772:

  • added parameters to the SchemaCreator for calculating things in parallel
  • added parallel code to the SchemaEvaluator
  • implemented quality calculation and saving of estimated values in the scope in the UpdateEstimatedValuesOperator
  • small refactoring of QueryMatch (added some useful methods and made NodeInfo internal)
  • updated query match unit test
Location:
branches/HeuristicLab.EvolutionTracking
Files:
5 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.EvolutionTracking/HeuristicLab.EvolutionTracking.Tests/QueryMatchPerformanceTest.cs

    r12951 r12979  
    11using System;
     2using System.Diagnostics;
    23using System.Linq;
    34using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
     
    1112    private const int MaxDepth = 12;
    1213    private const int MaxLength = 50;
    13     private const int N = 100000;
    14 
     14    private const int N = 50000;
     15    private const int Repetitions = 5;
    1516
    1617    [TestMethod]
     
    2829      };
    2930      var qm = new QueryMatch(comparer) { MatchParents = true };
    30 
     31      // warmup
    3132      int count = trees.Skip(1).Count(x => qm.Match(x, trees[0]));
    3233
     34      var sw = new Stopwatch();
     35      sw.Start();
     36      for (int i = 0; i < Repetitions; ++i)
     37        count = trees.Skip(1).Count(x => x.Length >= trees[0].Length && qm.Match(x, trees[0]));
     38      sw.Stop();
     39
    3340      Console.WriteLine("Count: " + count);
     41      Console.WriteLine("Query matches per second: " + (N - 1) / (sw.ElapsedMilliseconds / 1000.0) * Repetitions);
     42      Console.WriteLine("Total time: " + sw.ElapsedMilliseconds / 1000.0 + "s");
     43
     44      sw.Restart();
     45      for (int i = 0; i < Repetitions; ++i)
     46        count = qm.GetMatchingTrees(trees.Skip(1), trees[0]).Count();
     47      sw.Stop();
     48
     49      Console.WriteLine("Count: " + count);
     50      Console.WriteLine("Optimized query matches per second: " + (N - 1) / (sw.ElapsedMilliseconds / 1000.0) * Repetitions);
     51      Console.WriteLine("Total time: " + sw.ElapsedMilliseconds / 1000.0 + "s");
    3452    }
    3553  }
  • branches/HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Tracking/SchemaDiversification/SchemaCreator.cs

    r12958 r12979  
    4242    private const string ReplacementRatioParameterName = "ReplacementRatio";
    4343    private const string RandomReplacementParameterName = "RandomReplacement";
     44    private const string ExecuteInParallelParameterName = "ExecuteInParallel";
     45    private const string MaxDegreeOfParalellismParameterName = "MaxDegreeOfParallelism";
    4446
    4547    #region parameters
     
    4850    }
    4951
     52    public IFixedValueParameter<BoolValue> ExecuteInParallelParameter {
     53      get { return (IFixedValueParameter<BoolValue>)Parameters[ExecuteInParallelParameterName]; }
     54    }
     55
     56    public IFixedValueParameter<IntValue> MaxDegreeOfParallelismParameter {
     57      get { return (IFixedValueParameter<IntValue>)Parameters[MaxDegreeOfParalellismParameterName]; }
     58    }
     59
    5060    public IFixedValueParameter<PercentValue> MinimumSchemaFrequencyParameter {
    5161      get { return (IFixedValueParameter<PercentValue>)Parameters[MinimumSchemaFrequencyParameterName]; }
     
    6272
    6373    #region parameter properties
    64     public int MinimumSchemaLength {
    65       get { return MinimumSchemaLengthParameter.Value.Value; }
    66     }
     74    public int MinimumSchemaLength { get { return MinimumSchemaLengthParameter.Value.Value; } }
     75    public int MaxDegreeOfParallelism { get { return MaxDegreeOfParallelismParameter.Value.Value; } }
     76    public bool ExecuteInParallel { get { return ExecuteInParallelParameter.Value.Value; } }
    6777    #endregion
    6878
     
    7282    private DataTableValuesCollector valuesCollector;
    7383    private ResultsCollector resultsCollector;
     84    private UpdateEstimatedValuesOperator updateEstimatedValuesOperator;
    7485
    7586    private void ParameterizeOperators() {
     
    89100      resultsCollector = new ResultsCollector();
    90101      resultsCollector.CollectedValues.Add(new LookupParameter<DataTable>("Diversification"));
     102
     103      updateEstimatedValuesOperator = new UpdateEstimatedValuesOperator();
    91104    }
    92105
    93106    public SchemaCreator() {
    94107      Parameters.Add(new FixedValueParameter<IntValue>(MinimumSchemaLengthParameterName, new IntValue(10)));
    95       Parameters.Add(new FixedValueParameter<PercentValue>(MinimumSchemaFrequencyParameterName, new PercentValue(0.05)));
    96       Parameters.Add(new FixedValueParameter<PercentValue>(MinimumPhenotypicSimilarityParameterName, new PercentValue(0.8)));
    97       Parameters.Add(new FixedValueParameter<PercentValue>(ReplacementRatioParameterName, new PercentValue(0.2)));
     108      Parameters.Add(new FixedValueParameter<PercentValue>(MinimumSchemaFrequencyParameterName, new PercentValue(0.01)));
     109      Parameters.Add(new FixedValueParameter<PercentValue>(MinimumPhenotypicSimilarityParameterName, new PercentValue(0.9)));
     110      Parameters.Add(new FixedValueParameter<PercentValue>(ReplacementRatioParameterName, new PercentValue(0.9)));
    98111      Parameters.Add(new FixedValueParameter<BoolValue>(RandomReplacementParameterName, new BoolValue(false)));
     112      Parameters.Add(new FixedValueParameter<BoolValue>(ExecuteInParallelParameterName, new BoolValue(false)));
     113      Parameters.Add(new FixedValueParameter<IntValue>(MaxDegreeOfParalellismParameterName, new IntValue(-1)));
     114
     115      ExecuteInParallelParameter.Hidden = true;
     116      MaxDegreeOfParallelismParameter.Hidden = true;
    99117
    100118      ParameterizeOperators();
     
    116134        return base.Apply();
    117135      // for now, only consider crossover offspring
     136
     137      var scopes = new ScopeList(this.ExecutionContext.Scope.SubScopes);
    118138      var vertices = GenealogyGraph.GetByRank(gen).Where(x => x.InDegree == 2).Cast<IGenealogyGraphNode<ISymbolicExpressionTree>>();
    119139      var groups = vertices.GroupBy(x => x.Parents.First()).OrderByDescending(g => g.Count()).ToList();
    120140      var anySubtreeSymbol = new AnySubtreeSymbol();
    121141
    122       var scopes = new ScopeList(this.ExecutionContext.Scope.SubScopes);
    123 
    124142      if (schemaEvaluator == null || schemaCleanupOperator == null || changedTreesReducer == null || valuesCollector == null || resultsCollector == null)
    125143        ParameterizeOperators();
     
    129147      var collectResults = ExecutionContext.CreateChildOperation(resultsCollector);
    130148
    131       var oc = new OperationCollection();
     149      var updateEstimatedValues = new OperationCollection { Parallel = true };
     150      foreach (var s in scopes) {
     151        if (!s.Variables.ContainsKey("EstimatedValues"))
     152          s.Variables.Add(new Core.Variable("EstimatedValues"));
     153        updateEstimatedValues.Add(ExecutionContext.CreateChildOperation(updateEstimatedValuesOperator, s));
     154      }
     155      var oc = new OperationCollection { updateEstimatedValues };
    132156
    133157      #region create schemas and add subscopes representing the individuals
  • branches/HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Tracking/SchemaDiversification/SchemaEvaluator.cs

    r12970 r12979  
    2222using System;
    2323using System.Linq;
     24using System.Threading.Tasks;
    2425using HeuristicLab.Common;
    2526using HeuristicLab.Core;
     
    5051    private const string RandomReplacementParameterName = "RandomReplacement";
    5152    private const string ChangedTreesParameterName = "ChangedTrees";
     53    private const string ExecuteInParallelParameterName = "ExecuteInParallel";
     54    private const string MaxDegreeOfParalellismParameterName = "MaxDegreeOfParallelism";
    5255    #endregion
    5356
    5457    #region parameters
     58    public ILookupParameter<BoolValue> ExecuteInParallelParameter {
     59      get { return (ILookupParameter<BoolValue>)Parameters[ExecuteInParallelParameterName]; }
     60    }
     61    public ILookupParameter<IntValue> MaxDegreeOfParallelismParameter {
     62      get { return (ILookupParameter<IntValue>)Parameters[MaxDegreeOfParalellismParameterName]; }
     63    }
    5564    public ILookupParameter<ISymbolicDataAnalysisSingleObjectiveEvaluator<IRegressionProblemData>> EvaluatorParameter {
    5665      get { return (ILookupParameter<ISymbolicDataAnalysisSingleObjectiveEvaluator<IRegressionProblemData>>)Parameters[EvaluatorParameterName]; }
     
    98107
    99108    #region parameter properties
    100     public PercentValue MinimumSchemaFrequency {
    101       get { return MinimumSchemaFrequencyParameter.ActualValue; }
    102     }
    103 
    104     public PercentValue ReplacementRatio {
    105       get { return ReplacementRatioParameter.ActualValue; }
    106     }
    107 
    108     public PercentValue MinimumPhenotypicSimilarity {
    109       get { return MinimumPhenotypicSimilarityParameter.ActualValue; }
    110     }
    111 
    112     public BoolValue RandomReplacement {
    113       get { return RandomReplacementParameter.ActualValue; }
    114     }
     109    public PercentValue MinimumSchemaFrequency { get { return MinimumSchemaFrequencyParameter.ActualValue; } }
     110    public PercentValue ReplacementRatio { get { return ReplacementRatioParameter.ActualValue; } }
     111    public PercentValue MinimumPhenotypicSimilarity { get { return MinimumPhenotypicSimilarityParameter.ActualValue; } }
     112    public BoolValue RandomReplacement { get { return RandomReplacementParameter.ActualValue; } }
    115113    #endregion
    116114
     
    124122    };
    125123
    126 
    127     [StorableHook(HookType.AfterDeserialization)]
    128     private void AfterDeserialization() {
    129       if (!Parameters.ContainsKey(ChangedTreesParameterName))
    130         Parameters.Add(new LookupParameter<IntValue>(ChangedTreesParameterName));
    131     }
     124    [Storable]
     125    private readonly UpdateEstimatedValuesOperator updateEstimatedValuesOperator;
    132126
    133127    public SchemaEvaluator() {
    134128      qm = new QueryMatch(comp) { MatchParents = true };
     129      this.updateEstimatedValuesOperator = new UpdateEstimatedValuesOperator();
    135130
    136131      Parameters.Add(new LookupParameter<ISymbolicExpressionTree>(SchemaParameterName, "The current schema to be evaluated"));
     
    148143      Parameters.Add(new LookupParameter<BoolValue>(RandomReplacementParameterName));
    149144      Parameters.Add(new LookupParameter<IntValue>(ChangedTreesParameterName));
    150     }
    151 
     145      Parameters.Add(new LookupParameter<BoolValue>(ExecuteInParallelParameterName));
     146      Parameters.Add(new LookupParameter<IntValue>(MaxDegreeOfParalellismParameterName));
     147    }
    152148
    153149    [StorableConstructor]
     
    155151
    156152    protected SchemaEvaluator(SchemaEvaluator original, Cloner cloner) : base(original, cloner) {
    157       this.comp = (ISymbolicExpressionTreeNodeEqualityComparer)original.comp.Clone();
    158       this.qm = new QueryMatch(comp) { MatchParents = original.qm.MatchParents };
     153      this.comp = original.comp == null ? new SymbolicExpressionTreeNodeEqualityComparer {
     154        MatchConstantValues = false,
     155        MatchVariableWeights = false,
     156        MatchVariableNames = true
     157      } : (ISymbolicExpressionTreeNodeEqualityComparer)original.comp.Clone();
     158      this.qm = new QueryMatch(comp) { MatchParents = original.qm?.MatchParents ?? true };
     159      this.updateEstimatedValuesOperator = new UpdateEstimatedValuesOperator();
    159160    }
    160161
     
    163164    }
    164165
    165     private static double CalculatePhenotypicSimilarity(ScopeList individuals, SymbolicExpressionTreePhenotypicSimilarityCalculator calculator) {
    166       double similarity = 0;
    167       int count = individuals.Count;
    168       for (int i = 0; i < count - 1; ++i) {
    169         for (int j = i + 1; j < count; ++j) {
    170           similarity += calculator.CalculateSolutionSimilarity(individuals[i], individuals[j]);
    171         }
    172       }
    173       return similarity / (count * (count - 1) / 2.0);
    174     }
    175 
    176166    public override IOperation Apply() {
    177167      var individuals = ExecutionContext.Scope.SubScopes; // the scopes represent the individuals
     
    179169      var random = RandomParameter.ActualValue;
    180170      var mutator = MutatorParameter.ActualValue;
    181       var evaluator = EvaluatorParameter.ActualValue;
    182       var updateEstimatedValuesOperator = new UpdateEstimatedValuesOperator();
    183171
    184172      var s = SchemaParameter.ActualValue;
    185       var matchingIndividuals = new ScopeList();
    186       foreach (var ind in individuals) {
    187         var t = (ISymbolicExpressionTree)ind.Variables["SymbolicExpressionTree"].Value;
    188         if (t.Length >= s.Length && qm.Match(t, s))
    189           matchingIndividuals.Add(ind);
    190       }
    191 
    192       if (matchingIndividuals.Count < (int)Math.Max(2, Math.Round(MinimumSchemaFrequency.Value * individuals.Count))) {
     173      var sRoot = s.Root.GetSubtree(0).GetSubtree(0);
     174      int countThreshold = (int)Math.Max(2, Math.Round(MinimumSchemaFrequency.Value * individuals.Count));
     175
     176      // first apply the length and root equality checks in order to filter the individuals
     177      var filtered = (from ind in individuals
     178                      let t = (ISymbolicExpressionTree)ind.Variables["SymbolicExpressionTree"].Value
     179                      where t.Length >= s.Length && qm.Comparer.Equals(t.Root.GetSubtree(0).GetSubtree(0), sRoot)
     180                      select ind).ToList();
     181
     182      // if we don't have enough filtered individuals, then we are done
     183      if (filtered.Count < countThreshold) {
    193184        ChangedTreesParameter.ActualValue = new IntValue(0);
    194185        return base.Apply();
    195186      }
    196187
    197       var similarity = CalculatePhenotypicSimilarity(matchingIndividuals, calculator);
     188      // check if the filtered individuals match the schema
     189      var sNodes = QueryMatch.InitializePostOrder(sRoot);
     190      var matchingIndividuals = new ScopeList();
     191      bool executeInParallel = ExecuteInParallelParameter.ActualValue.Value;
     192      int maxDegreeOfParallelism = MaxDegreeOfParallelismParameter.ActualValue.Value;
     193
     194      if (executeInParallel) {
     195        var matching = new bool[filtered.Count];
     196
     197        Parallel.For(0, filtered.Count, new ParallelOptions { MaxDegreeOfParallelism = maxDegreeOfParallelism },
     198          i => {
     199            var t = (ISymbolicExpressionTree)filtered[i].Variables["SymbolicExpressionTree"].Value;
     200            var tNodes = QueryMatch.InitializePostOrder(t.Root.GetSubtree(0).GetSubtree(0));
     201            if (qm.Match(tNodes, sNodes)) {
     202              matching[i] = true;
     203            }
     204          });
     205
     206        for (int i = 0; i < matching.Length; ++i) {
     207          if (matching[i])
     208            matchingIndividuals.Add(filtered[i]);
     209        }
     210      } else {
     211        for (int i = 0; i < filtered.Count; ++i) {
     212
     213          // break early if it becomes impossible to reach the minimum threshold
     214          if (matchingIndividuals.Count + filtered.Count - i < countThreshold)
     215            break;
     216
     217          var ind = filtered[i];
     218          var t = (ISymbolicExpressionTree)ind.Variables["SymbolicExpressionTree"].Value;
     219          var tNodes = QueryMatch.InitializePostOrder(t.Root.GetSubtree(0).GetSubtree(0));
     220          if (qm.Match(tNodes, sNodes))
     221            matchingIndividuals.Add(ind);
     222        }
     223      }
     224
     225      if (matchingIndividuals.Count < countThreshold) {
     226        ChangedTreesParameter.ActualValue = new IntValue(0);
     227        return base.Apply();
     228      }
     229
     230      var similarity = CalculatePhenotypicSimilarity(matchingIndividuals, calculator, executeInParallel, maxDegreeOfParallelism);
    198231      if (similarity < MinimumPhenotypicSimilarity.Value) {
    199232        ChangedTreesParameter.ActualValue = new IntValue(0);
     
    201234      }
    202235
    203       var oc = new OperationCollection();
    204236      int n = (int)Math.Floor(matchingIndividuals.Count * ReplacementRatio.Value);
    205237      var individualsToReplace = RandomReplacement.Value ? matchingIndividuals.SampleRandomWithoutRepetition(random, n).ToList()
    206238                                                         : matchingIndividuals.OrderBy(x => (DoubleValue)x.Variables["Quality"].Value).Take(n).ToList();
     239      var mutationOc = new OperationCollection { Parallel = false }; // cannot be parallel due to the before/after operators which insert vertices in the genealogy graph
     240      var updateEstimatedValues = new OperationCollection { Parallel = true }; // evaluation should be done in parallel when possible
    207241      foreach (var ind in individualsToReplace) {
    208242        var mutatorOp = ExecutionContext.CreateChildOperation(mutator, ind);
    209         var evaluatorOp = ExecutionContext.CreateChildOperation(evaluator, ind);
    210         var updateEstimatedValuesOp = ExecutionContext.CreateChildOperation(updateEstimatedValuesOperator, ind);
    211         oc.Add(mutatorOp);
    212         oc.Add(evaluatorOp);
    213         oc.Add(updateEstimatedValuesOp);
     243        var updateOp = ExecutionContext.CreateChildOperation(updateEstimatedValuesOperator, ind);
     244        mutationOc.Add(mutatorOp);
     245        updateEstimatedValues.Add(updateOp);
    214246      }
    215247      ChangedTreesParameter.ActualValue = new IntValue(individualsToReplace.Count);
    216       return new OperationCollection(oc, base.Apply());
     248      return new OperationCollection(mutationOc, updateEstimatedValues, base.Apply());
     249    }
     250
     251    private static double CalculatePhenotypicSimilarity(ScopeList individuals, SymbolicExpressionTreePhenotypicSimilarityCalculator calculator, bool parallel = false, int nThreads = -1) {
     252      double similarity = 0;
     253      int count = individuals.Count;
     254      if (parallel) {
     255        var parallelOptions = new ParallelOptions { MaxDegreeOfParallelism = nThreads };
     256        var simArray = new double[count - 1];
     257        Parallel.For(0, count - 1, parallelOptions, i => {
     258          double innerSim = 0;
     259          for (int j = i + 1; j < count; ++j) {
     260            innerSim += calculator.CalculateSolutionSimilarity(individuals[i], individuals[j]);
     261          }
     262          simArray[i] = innerSim;
     263        });
     264        similarity = simArray.Sum();
     265      } else {
     266        for (int i = 0; i < count - 1; ++i) {
     267          for (int j = i + 1; j < count; ++j) {
     268            similarity += calculator.CalculateSolutionSimilarity(individuals[i], individuals[j]);
     269          }
     270        }
     271      }
     272      return similarity / (count * (count - 1) / 2.0);
    217273    }
    218274  }
  • branches/HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Tracking/SchemaDiversification/UpdateEstimatedValuesOperator.cs

    r12958 r12979  
    2020#endregion
    2121
     22using System;
    2223using System.Linq;
    2324using HeuristicLab.Common;
     
    5152    }
    5253
     54
    5355    public UpdateEstimatedValuesOperator() {
    5456      Parameters.Add(new LookupParameter<IRegressionProblemData>(ProblemDataParameterName));
     
    7476      var interpreter = InterpreterParameter.ActualValue;
    7577
    76       var estimatedValues = interpreter.GetSymbolicExpressionTreeValues(tree, problemData.Dataset, problemData.TrainingIndices)
    77                                        .LimitToRange(estimationLimits.Lower, estimationLimits.Upper).ToArray();
     78      var estimatedValues = interpreter.GetSymbolicExpressionTreeValues(tree, problemData.Dataset, problemData.TrainingIndices).ToArray();
     79      var targetValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TrainingIndices).ToArray();
     80
     81      if (estimatedValues.Length != targetValues.Length)
     82        throw new ArgumentException("Number of elements in target and estimated values enumeration do not match.");
     83
     84      var linearScalingCalculator = new OnlineLinearScalingParameterCalculator();
     85
     86      for (int i = 0; i < estimatedValues.Length; ++i) {
     87        var estimated = estimatedValues[i];
     88        var target = targetValues[i];
     89        if (!double.IsNaN(estimated) && !double.IsInfinity(estimated))
     90          linearScalingCalculator.Add(estimated, target);
     91      }
     92      double alpha = linearScalingCalculator.Alpha;
     93      double beta = linearScalingCalculator.Beta;
     94      if (linearScalingCalculator.ErrorState != OnlineCalculatorError.None) {
     95        alpha = 0.0;
     96        beta = 1.0;
     97      }
     98
     99      var scaled = estimatedValues.Select(x => x * beta + alpha).LimitToRange(estimationLimits.Lower, estimationLimits.Upper).ToArray();
     100      OnlineCalculatorError error;
     101      var r = OnlinePearsonsRCalculator.Calculate(targetValues, scaled, out error);
     102      if (error != OnlineCalculatorError.None) r = 0;
     103
     104      var r2 = r * r;
     105      if (r2 > 1.0) r2 = 1.0;
    78106
    79107      var variables = ExecutionContext.Scope.Variables;
    80       if (variables.ContainsKey("EstimatedValues"))
    81         variables["EstimatedValues"].Value = new DoubleArray(estimatedValues);
    82       else
    83         variables.Add(new Core.Variable("EstimatedValues", new DoubleArray(estimatedValues)));
     108      ((DoubleValue)variables["Quality"].Value).Value = r2;
     109
     110      if (variables.ContainsKey("EstimatedValues")) {
     111        variables["EstimatedValues"].Value = new DoubleArray(scaled);
     112      } else {
     113        variables.Add(new Core.Variable("EstimatedValues", new DoubleArray(scaled)));
     114      }
    84115      return base.Apply();
    85116    }
  • branches/HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/TreeMatching/QueryMatch.cs

    r12951 r12979  
    2828  // by M. Götz, C. Koch and W. Martens in http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.182.5440
    2929  public class QueryMatch {
    30     private List<NodeInfo> dNodes;
    31     private List<NodeInfo> qNodes;
    32     private readonly ISymbolicExpressionTreeNodeEqualityComparer comparer;
     30    public ISymbolicExpressionTreeNodeEqualityComparer Comparer { get; }
    3331
    3432    private QueryMatch() { }
     
    4139
    4240    public QueryMatch(ISymbolicExpressionTreeNodeEqualityComparer comparer) {
    43       this.comparer = comparer;
     41      this.Comparer = comparer;
     42    }
     43
     44    internal bool Match(List<NodeInfo> data, List<NodeInfo> query) {
     45      var dRoot = data.Last();
     46      var qRoot = query.Last();
     47
     48      var result = Tmatch(dRoot, query.First(), qRoot);
     49      return result == qRoot;
    4450    }
    4551
     
    4955
    5056    public bool Match(ISymbolicExpressionTreeNode data, ISymbolicExpressionTreeNode query) {
    51       if (!comparer.Equals(data, query))
     57      if (!Comparer.Equals(data, query))
    5258        return false;
    5359
    54       dNodes = InitializePostOrder(data);
    55       qNodes = InitializePostOrder(query);
     60      var dNodes = InitializePostOrder(data);
     61      var qNodes = InitializePostOrder(query);
    5662
    5763      var dRoot = dNodes.Last();
    5864      var qRoot = qNodes.Last();
    59       var result = Tmatch(dRoot, qNodes.First(), qNodes.Last());
     65      var result = Tmatch(dRoot, qNodes.First(), qRoot);
    6066      return result == qRoot;
     67    }
     68
     69    public IEnumerable<ISymbolicExpressionTree> GetMatchingTrees(IEnumerable<ISymbolicExpressionTree> data, ISymbolicExpressionTree query) {
     70      var qRoot = query.Root.GetSubtree(0).GetSubtree(0);
     71      var filtered = data.Where(x => x.Length >= query.Length && Comparer.Equals(x.Root.GetSubtree(0).GetSubtree(0), qRoot));
     72      var qNodes = InitializePostOrder(query.Root.GetSubtree(0).GetSubtree(0));
     73      return from d in filtered let dNodes = InitializePostOrder(d.Root.GetSubtree(0).GetSubtree(0)) where Match(dNodes, qNodes) select d;
    6174    }
    6275
     
    6679        return false;
    6780
    68       bool equals = comparer.Equals(a.Node, b.Node);
     81      bool equals = Comparer.Equals(a.Node, b.Node);
    6982
    7083      if (equals && MatchParents) {
     
    7487          return true;
    7588        if (pa != null && pb != null)
    76           return pa.Level == pb.Level && comparer.Equals(pa.Node, pb.Node);
     89          return pa.Level == pb.Level && Comparer.Equals(pa.Node, pb.Node);
    7790        return false;
    7891      }
     
    145158    }
    146159
    147     private List<NodeInfo> InitializePostOrder(ISymbolicExpressionTreeNode node) {
     160    internal static List<NodeInfo> InitializePostOrder(ISymbolicExpressionTreeNode node) {
    148161      var list = node.IterateNodesPostfix().ToList();
    149162      var nodes = list.Select((x, i) => new NodeInfo { Node = x, Index = i, Level = node.GetBranchLevel(x) }).ToList();
     
    171184      return nodes;
    172185    }
    173 
    174     private class NodeInfo {
    175       public ISymbolicExpressionTreeNode Node { get; set; }
    176       public int Index { get; set; }
    177       public int Level { get; set; }
    178       public NodeInfo Parent { get; set; }
    179       public NodeInfo Previous { get; set; }
    180       public NodeInfo PreviousSibling { get; set; }
    181       public NodeInfo Next { get; set; }
    182       public NodeInfo NextSibling { get; set; }
    183       public NodeInfo LastSibling { get; set; }
    184       public NodeInfo LastChild { get; set; }
    185 
    186       public bool IsLeaf {
    187         get { return Node.SubtreeCount == 0; }
    188       }
    189 
    190       public bool IsFirstSibling {
    191         get { return Node.Parent != null && Node == Node.Parent.Subtrees.First(); }
    192       }
    193 
    194       public bool IsLastSibling {
    195         get { return Node.Parent != null && Node == Node.Parent.Subtrees.Last(); }
    196       }
    197 
    198       public IEnumerable<NodeInfo> Ancestors {
    199         get {
    200           var p = Parent;
    201           while (p != null) {
    202             yield return p;
    203             p = p.Parent;
    204           }
     186  }
     187
     188  internal class NodeInfo {
     189    public ISymbolicExpressionTreeNode Node { get; set; }
     190    public int Index { get; set; }
     191    public int Level { get; set; }
     192    public NodeInfo Parent { get; set; }
     193    public NodeInfo Previous { get; set; }
     194    public NodeInfo PreviousSibling { get; set; }
     195    public NodeInfo Next { get; set; }
     196    public NodeInfo NextSibling { get; set; }
     197    public NodeInfo LastSibling { get; set; }
     198    public NodeInfo LastChild { get; set; }
     199
     200    public bool IsLeaf {
     201      get { return Node.SubtreeCount == 0; }
     202    }
     203
     204    public bool IsFirstSibling {
     205      get { return Node.Parent != null && Node == Node.Parent.Subtrees.First(); }
     206    }
     207
     208    public bool IsLastSibling {
     209      get { return Node.Parent != null && Node == Node.Parent.Subtrees.Last(); }
     210    }
     211
     212    public IEnumerable<NodeInfo> Ancestors {
     213      get {
     214        var p = Parent;
     215        while (p != null) {
     216          yield return p;
     217          p = p.Parent;
    205218        }
    206219      }
Note: See TracChangeset for help on using the changeset viewer.