Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
10/02/15 01:08:30 (9 years ago)
Author:
bburlacu
Message:

#1772:

  • added parameters to the SchemaCreator for calculating things in parallel
  • added parallel code to the SchemaEvaluator
  • implemented quality calculation and saving of estimated values in the scope in the UpdateEstimatedValuesOperator
  • small refactoring of QueryMatch (added some useful methods and made NodeInfo internal)
  • updated query match unit test
File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Tracking/SchemaDiversification/SchemaEvaluator.cs

    r12970 r12979  
    2222using System;
    2323using System.Linq;
     24using System.Threading.Tasks;
    2425using HeuristicLab.Common;
    2526using HeuristicLab.Core;
     
    5051    private const string RandomReplacementParameterName = "RandomReplacement";
    5152    private const string ChangedTreesParameterName = "ChangedTrees";
     53    private const string ExecuteInParallelParameterName = "ExecuteInParallel";
     54    private const string MaxDegreeOfParalellismParameterName = "MaxDegreeOfParallelism";
    5255    #endregion
    5356
    5457    #region parameters
     58    public ILookupParameter<BoolValue> ExecuteInParallelParameter {
     59      get { return (ILookupParameter<BoolValue>)Parameters[ExecuteInParallelParameterName]; }
     60    }
     61    public ILookupParameter<IntValue> MaxDegreeOfParallelismParameter {
     62      get { return (ILookupParameter<IntValue>)Parameters[MaxDegreeOfParalellismParameterName]; }
     63    }
    5564    public ILookupParameter<ISymbolicDataAnalysisSingleObjectiveEvaluator<IRegressionProblemData>> EvaluatorParameter {
    5665      get { return (ILookupParameter<ISymbolicDataAnalysisSingleObjectiveEvaluator<IRegressionProblemData>>)Parameters[EvaluatorParameterName]; }
     
    98107
    99108    #region parameter properties
    100     public PercentValue MinimumSchemaFrequency {
    101       get { return MinimumSchemaFrequencyParameter.ActualValue; }
    102     }
    103 
    104     public PercentValue ReplacementRatio {
    105       get { return ReplacementRatioParameter.ActualValue; }
    106     }
    107 
    108     public PercentValue MinimumPhenotypicSimilarity {
    109       get { return MinimumPhenotypicSimilarityParameter.ActualValue; }
    110     }
    111 
    112     public BoolValue RandomReplacement {
    113       get { return RandomReplacementParameter.ActualValue; }
    114     }
     109    public PercentValue MinimumSchemaFrequency { get { return MinimumSchemaFrequencyParameter.ActualValue; } }
     110    public PercentValue ReplacementRatio { get { return ReplacementRatioParameter.ActualValue; } }
     111    public PercentValue MinimumPhenotypicSimilarity { get { return MinimumPhenotypicSimilarityParameter.ActualValue; } }
     112    public BoolValue RandomReplacement { get { return RandomReplacementParameter.ActualValue; } }
    115113    #endregion
    116114
     
    124122    };
    125123
    126 
    127     [StorableHook(HookType.AfterDeserialization)]
    128     private void AfterDeserialization() {
    129       if (!Parameters.ContainsKey(ChangedTreesParameterName))
    130         Parameters.Add(new LookupParameter<IntValue>(ChangedTreesParameterName));
    131     }
     124    [Storable]
     125    private readonly UpdateEstimatedValuesOperator updateEstimatedValuesOperator;
    132126
    133127    public SchemaEvaluator() {
    134128      qm = new QueryMatch(comp) { MatchParents = true };
     129      this.updateEstimatedValuesOperator = new UpdateEstimatedValuesOperator();
    135130
    136131      Parameters.Add(new LookupParameter<ISymbolicExpressionTree>(SchemaParameterName, "The current schema to be evaluated"));
     
    148143      Parameters.Add(new LookupParameter<BoolValue>(RandomReplacementParameterName));
    149144      Parameters.Add(new LookupParameter<IntValue>(ChangedTreesParameterName));
    150     }
    151 
     145      Parameters.Add(new LookupParameter<BoolValue>(ExecuteInParallelParameterName));
     146      Parameters.Add(new LookupParameter<IntValue>(MaxDegreeOfParalellismParameterName));
     147    }
    152148
    153149    [StorableConstructor]
     
    155151
    156152    protected SchemaEvaluator(SchemaEvaluator original, Cloner cloner) : base(original, cloner) {
    157       this.comp = (ISymbolicExpressionTreeNodeEqualityComparer)original.comp.Clone();
    158       this.qm = new QueryMatch(comp) { MatchParents = original.qm.MatchParents };
     153      this.comp = original.comp == null ? new SymbolicExpressionTreeNodeEqualityComparer {
     154        MatchConstantValues = false,
     155        MatchVariableWeights = false,
     156        MatchVariableNames = true
     157      } : (ISymbolicExpressionTreeNodeEqualityComparer)original.comp.Clone();
     158      this.qm = new QueryMatch(comp) { MatchParents = original.qm?.MatchParents ?? true };
     159      this.updateEstimatedValuesOperator = new UpdateEstimatedValuesOperator();
    159160    }
    160161
     
    163164    }
    164165
    165     private static double CalculatePhenotypicSimilarity(ScopeList individuals, SymbolicExpressionTreePhenotypicSimilarityCalculator calculator) {
    166       double similarity = 0;
    167       int count = individuals.Count;
    168       for (int i = 0; i < count - 1; ++i) {
    169         for (int j = i + 1; j < count; ++j) {
    170           similarity += calculator.CalculateSolutionSimilarity(individuals[i], individuals[j]);
    171         }
    172       }
    173       return similarity / (count * (count - 1) / 2.0);
    174     }
    175 
    176166    public override IOperation Apply() {
    177167      var individuals = ExecutionContext.Scope.SubScopes; // the scopes represent the individuals
     
    179169      var random = RandomParameter.ActualValue;
    180170      var mutator = MutatorParameter.ActualValue;
    181       var evaluator = EvaluatorParameter.ActualValue;
    182       var updateEstimatedValuesOperator = new UpdateEstimatedValuesOperator();
    183171
    184172      var s = SchemaParameter.ActualValue;
    185       var matchingIndividuals = new ScopeList();
    186       foreach (var ind in individuals) {
    187         var t = (ISymbolicExpressionTree)ind.Variables["SymbolicExpressionTree"].Value;
    188         if (t.Length >= s.Length && qm.Match(t, s))
    189           matchingIndividuals.Add(ind);
    190       }
    191 
    192       if (matchingIndividuals.Count < (int)Math.Max(2, Math.Round(MinimumSchemaFrequency.Value * individuals.Count))) {
     173      var sRoot = s.Root.GetSubtree(0).GetSubtree(0);
     174      int countThreshold = (int)Math.Max(2, Math.Round(MinimumSchemaFrequency.Value * individuals.Count));
     175
     176      // first apply the length and root equality checks in order to filter the individuals
     177      var filtered = (from ind in individuals
     178                      let t = (ISymbolicExpressionTree)ind.Variables["SymbolicExpressionTree"].Value
     179                      where t.Length >= s.Length && qm.Comparer.Equals(t.Root.GetSubtree(0).GetSubtree(0), sRoot)
     180                      select ind).ToList();
     181
     182      // if we don't have enough filtered individuals, then we are done
     183      if (filtered.Count < countThreshold) {
    193184        ChangedTreesParameter.ActualValue = new IntValue(0);
    194185        return base.Apply();
    195186      }
    196187
    197       var similarity = CalculatePhenotypicSimilarity(matchingIndividuals, calculator);
     188      // check if the filtered individuals match the schema
     189      var sNodes = QueryMatch.InitializePostOrder(sRoot);
     190      var matchingIndividuals = new ScopeList();
     191      bool executeInParallel = ExecuteInParallelParameter.ActualValue.Value;
     192      int maxDegreeOfParallelism = MaxDegreeOfParallelismParameter.ActualValue.Value;
     193
     194      if (executeInParallel) {
     195        var matching = new bool[filtered.Count];
     196
     197        Parallel.For(0, filtered.Count, new ParallelOptions { MaxDegreeOfParallelism = maxDegreeOfParallelism },
     198          i => {
     199            var t = (ISymbolicExpressionTree)filtered[i].Variables["SymbolicExpressionTree"].Value;
     200            var tNodes = QueryMatch.InitializePostOrder(t.Root.GetSubtree(0).GetSubtree(0));
     201            if (qm.Match(tNodes, sNodes)) {
     202              matching[i] = true;
     203            }
     204          });
     205
     206        for (int i = 0; i < matching.Length; ++i) {
     207          if (matching[i])
     208            matchingIndividuals.Add(filtered[i]);
     209        }
     210      } else {
     211        for (int i = 0; i < filtered.Count; ++i) {
     212
     213          // break early if it becomes impossible to reach the minimum threshold
     214          if (matchingIndividuals.Count + filtered.Count - i < countThreshold)
     215            break;
     216
     217          var ind = filtered[i];
     218          var t = (ISymbolicExpressionTree)ind.Variables["SymbolicExpressionTree"].Value;
     219          var tNodes = QueryMatch.InitializePostOrder(t.Root.GetSubtree(0).GetSubtree(0));
     220          if (qm.Match(tNodes, sNodes))
     221            matchingIndividuals.Add(ind);
     222        }
     223      }
     224
     225      if (matchingIndividuals.Count < countThreshold) {
     226        ChangedTreesParameter.ActualValue = new IntValue(0);
     227        return base.Apply();
     228      }
     229
     230      var similarity = CalculatePhenotypicSimilarity(matchingIndividuals, calculator, executeInParallel, maxDegreeOfParallelism);
    198231      if (similarity < MinimumPhenotypicSimilarity.Value) {
    199232        ChangedTreesParameter.ActualValue = new IntValue(0);
     
    201234      }
    202235
    203       var oc = new OperationCollection();
    204236      int n = (int)Math.Floor(matchingIndividuals.Count * ReplacementRatio.Value);
    205237      var individualsToReplace = RandomReplacement.Value ? matchingIndividuals.SampleRandomWithoutRepetition(random, n).ToList()
    206238                                                         : matchingIndividuals.OrderBy(x => (DoubleValue)x.Variables["Quality"].Value).Take(n).ToList();
     239      var mutationOc = new OperationCollection { Parallel = false }; // cannot be parallel due to the before/after operators which insert vertices in the genealogy graph
     240      var updateEstimatedValues = new OperationCollection { Parallel = true }; // evaluation should be done in parallel when possible
    207241      foreach (var ind in individualsToReplace) {
    208242        var mutatorOp = ExecutionContext.CreateChildOperation(mutator, ind);
    209         var evaluatorOp = ExecutionContext.CreateChildOperation(evaluator, ind);
    210         var updateEstimatedValuesOp = ExecutionContext.CreateChildOperation(updateEstimatedValuesOperator, ind);
    211         oc.Add(mutatorOp);
    212         oc.Add(evaluatorOp);
    213         oc.Add(updateEstimatedValuesOp);
     243        var updateOp = ExecutionContext.CreateChildOperation(updateEstimatedValuesOperator, ind);
     244        mutationOc.Add(mutatorOp);
     245        updateEstimatedValues.Add(updateOp);
    214246      }
    215247      ChangedTreesParameter.ActualValue = new IntValue(individualsToReplace.Count);
    216       return new OperationCollection(oc, base.Apply());
     248      return new OperationCollection(mutationOc, updateEstimatedValues, base.Apply());
     249    }
     250
     251    private static double CalculatePhenotypicSimilarity(ScopeList individuals, SymbolicExpressionTreePhenotypicSimilarityCalculator calculator, bool parallel = false, int nThreads = -1) {
     252      double similarity = 0;
     253      int count = individuals.Count;
     254      if (parallel) {
     255        var parallelOptions = new ParallelOptions { MaxDegreeOfParallelism = nThreads };
     256        var simArray = new double[count - 1];
     257        Parallel.For(0, count - 1, parallelOptions, i => {
     258          double innerSim = 0;
     259          for (int j = i + 1; j < count; ++j) {
     260            innerSim += calculator.CalculateSolutionSimilarity(individuals[i], individuals[j]);
     261          }
     262          simArray[i] = innerSim;
     263        });
     264        similarity = simArray.Sum();
     265      } else {
     266        for (int i = 0; i < count - 1; ++i) {
     267          for (int j = i + 1; j < count; ++j) {
     268            similarity += calculator.CalculateSolutionSimilarity(individuals[i], individuals[j]);
     269          }
     270        }
     271      }
     272      return similarity / (count * (count - 1) / 2.0);
    217273    }
    218274  }
Note: See TracChangeset for help on using the changeset viewer.