Ignore:
Timestamp:
04/16/18 08:35:59 (4 years ago)
Author:
bburlacu
Message:

#1772: Refactoring and speed optimization of diversification operators

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Tracking/SchemaDiversification/SchemaEvaluator.cs

    r15482 r15906  
    2121
    2222using System;
     23using System.Collections.Concurrent;
    2324using System.Collections.Generic;
    2425using System.Linq;
     
    4142    private const string MinimumSchemaFrequencyParameterName = "MinimumSchemaFrequency";
    4243    private const string MinimumPhenotypicSimilarityParameterName = "MinimumPhenotypicSimilarity";
    43     private const string ReplacementRatioParameterName = "ReplacementRatio";
     44    private const string MutationRateParameterName = "MutationRate";
    4445    private const string SchemaParameterName = "Schema";
    4546    private const string PopulationSizeParameterName = "PopulationSize";
     
    5253    private const string MutatorParameterName = "Mutator";
    5354    private const string CrossoverParameterName = "Crossover";
    54     private const string RandomReplacementParameterName = "RandomReplacement";
    5555    private const string NumberOfChangedTreesParameterName = "NumberOfChangedTrees";
    5656    private const string ExecuteInParallelParameterName = "ExecuteInParallel";
    5757    private const string MaxDegreeOfParalellismParameterName = "MaxDegreeOfParallelism";
    5858    private const string ExclusiveMatchingParameterName = "ExclusiveMatching";
    59     private const string UseAdaptiveReplacementRatioParameterName = "UseAdaptiveReplacementRatio";
     59    private const string UseAdaptiveMutationRateParameterName = "UseAdaptiveMutationRate";
    6060    private const string StrictSchemaMatchingParameterName = "StrictSchemaMatching";
    6161    #endregion
    6262
    6363    #region parameters
    64     public ILookupParameter<BoolValue> UseAdaptiveReplacementRatioParameter {
    65       get { return (ILookupParameter<BoolValue>)Parameters[UseAdaptiveReplacementRatioParameterName]; }
     64    public ILookupParameter<BoolValue> UseAdaptiveMutationRateParameter {
     65      get { return (ILookupParameter<BoolValue>)Parameters[UseAdaptiveMutationRateParameterName]; }
    6666    }
    6767    public ILookupParameter<BoolValue> ExclusiveMatchingParameter {
     
    8989      get { return (ILookupParameter<BoolValue>)Parameters[ApplyLinearScalingParameterName]; }
    9090    }
    91     public ILookupParameter<BoolValue> RandomReplacementParameter {
    92       get { return (ILookupParameter<BoolValue>)Parameters[RandomReplacementParameterName]; }
    93     }
    9491    public ILookupParameter<ISymbolicExpressionTreeCrossover> CrossoverParameter {
    9592      get { return (ILookupParameter<ISymbolicExpressionTreeCrossover>)Parameters[CrossoverParameterName]; }
     
    107104      get { return (ILookupParameter<PercentValue>)Parameters[MinimumSchemaFrequencyParameterName]; }
    108105    }
    109     public ILookupParameter<PercentValue> ReplacementRatioParameter {
    110       get { return (ILookupParameter<PercentValue>)Parameters[ReplacementRatioParameterName]; }
     106    public ILookupParameter<PercentValue> MutationRateParameter {
     107      get { return (ILookupParameter<PercentValue>)Parameters[MutationRateParameterName]; }
    111108    }
    112109    public ILookupParameter<PercentValue> MinimumPhenotypicSimilarityParameter {
     
    123120    #region parameter properties
    124121    public PercentValue MinimumSchemaFrequency { get { return MinimumSchemaFrequencyParameter.ActualValue; } }
    125     public PercentValue ReplacementRatio { get { return ReplacementRatioParameter.ActualValue; } }
     122    public PercentValue MutationRate { get { return MutationRateParameter.ActualValue; } }
    126123    public PercentValue MinimumPhenotypicSimilarity { get { return MinimumPhenotypicSimilarityParameter.ActualValue; } }
    127     public BoolValue RandomReplacement { get { return RandomReplacementParameter.ActualValue; } }
    128124    public IntValue NumberOfChangedTrees { get { return NumberOfChangedTreesParameter.ActualValue; } }
    129125    #endregion
     
    135131
    136132    [Storable]
    137     public string ReplacementRule { get; set; }
     133    public string MutationRateUpdateRule { get; set; }
    138134
    139135    [Storable]
     
    171167      Parameters.Add(new LookupParameter<ISymbolicExpressionTree>(SchemaParameterName, "The current schema to be evaluated"));
    172168      Parameters.Add(new LookupParameter<PercentValue>(MinimumSchemaFrequencyParameterName));
    173       Parameters.Add(new LookupParameter<PercentValue>(ReplacementRatioParameterName));
     169      Parameters.Add(new LookupParameter<PercentValue>(MutationRateParameterName));
    174170      Parameters.Add(new LookupParameter<PercentValue>(MinimumPhenotypicSimilarityParameterName));
    175171      Parameters.Add(new LookupParameter<ISymbolicExpressionTree>(PopulationSizeParameterName));
     
    183179      Parameters.Add(new LookupParameter<ISymbolicExpressionTreeManipulator>(MutatorParameterName));
    184180      Parameters.Add(new LookupParameter<ISymbolicExpressionTreeCrossover>(CrossoverParameterName));
    185       Parameters.Add(new LookupParameter<BoolValue>(RandomReplacementParameterName));
    186181      Parameters.Add(new LookupParameter<IntValue>(NumberOfChangedTreesParameterName));
    187182      Parameters.Add(new LookupParameter<BoolValue>(ExecuteInParallelParameterName));
    188183      Parameters.Add(new LookupParameter<IntValue>(MaxDegreeOfParalellismParameterName));
    189184      Parameters.Add(new LookupParameter<BoolValue>(ExclusiveMatchingParameterName));
    190       Parameters.Add(new LookupParameter<BoolValue>(UseAdaptiveReplacementRatioParameterName));
     185      Parameters.Add(new LookupParameter<BoolValue>(UseAdaptiveMutationRateParameterName));
    191186      #endregion
    192187    }
     
    218213
    219214      var individuals = ExecutionContext.Scope.SubScopes; // the scopes represent the individuals
    220       var trees = individuals.Select(x => (ISymbolicExpressionTree)x.Variables["SymbolicExpressionTree"].Value).ToList();
     215      var trees = new ISymbolicExpressionTree[individuals.Count];
     216      var qualities = new double[individuals.Count];
     217
     218      for (int i = 0; i < individuals.Count; ++i) {
     219        trees[i] = (ISymbolicExpressionTree)individuals[i].Variables["SymbolicExpressionTree"].Value;
     220        qualities[i] = ((DoubleValue)individuals[i].Variables["Quality"].Value).Value;
     221      }
    221222
    222223      var random = RandomParameter.ActualValue;
    223 
    224       var s = Schema;
    225       var sRoot = s.Root.GetSubtree(0).GetSubtree(0);
    226       int countThreshold = (int)Math.Max(2, Math.Round(MinimumSchemaFrequency.Value * individuals.Count));
     224      var sRoot = Schema.Root.GetSubtree(0).GetSubtree(0);
    227225
    228226      // first apply the length and root equality checks in order to filter the individuals
    229227      var exclusiveMatching = ExclusiveMatchingParameter.ActualValue.Value;
    230228      var filtered = new List<int>();
    231       for (int i = 0; i < individuals.Count; ++i) {
     229
     230      for (int i = 0; i < trees.Length; ++i) {
    232231        if (exclusiveMatching && individuals[i].Variables.ContainsKey("AlreadyMatched")) continue;
    233232        var t = trees[i];
    234233        var tRoot = t.Root.GetSubtree(0).GetSubtree(0);
    235         if (t.Length < s.Length || !qm.EqualityComparer.Equals(tRoot, sRoot)) continue;
     234        if (t.Length < Schema.Length || !qm.EqualityComparer.Equals(tRoot, sRoot)) continue;
    236235        filtered.Add(i);
    237236      }
     
    239238      // if we don't have enough filtered individuals, then we are done
    240239      // if the schema exceeds the minimum frequency, it gets processed further
     240      int countThreshold = (int)Math.Max(2, Math.Round(MinimumSchemaFrequency.Value * individuals.Count));
    241241      if (filtered.Count < countThreshold) {
    242242        return base.Apply();
     
    244244
    245245      // check if the filtered individuals match the schema
     246      var matching = new List<int>();
    246247      var sNodes = QueryMatch.InitializePostOrder(sRoot);
    247       var matchingIndividuals = new ScopeList();
     248
    248249      bool executeInParallel = ExecuteInParallelParameter.ActualValue.Value;
    249250      int maxDegreeOfParallelism = MaxDegreeOfParallelismParameter.ActualValue.Value;
    250251
    251252      if (executeInParallel) {
    252         var matching = new bool[filtered.Count];
    253 
    254         Parallel.For(0, filtered.Count, new ParallelOptions { MaxDegreeOfParallelism = maxDegreeOfParallelism },
    255           i => {
    256             var index = filtered[i];
    257             var t = trees[index];
    258             var tNodes = QueryMatch.InitializePostOrder(t.Root.GetSubtree(0).GetSubtree(0));
    259             if (qm.Match(tNodes, sNodes)) {
    260               matching[i] = true;
    261             }
    262           });
    263 
    264         matchingIndividuals.AddRange(filtered.Where((x, i) => matching[i]).Select(x => individuals[x]));
     253        var partitioner = Partitioner.Create(0, filtered.Count);
     254        var po = new ParallelOptions { MaxDegreeOfParallelism = maxDegreeOfParallelism };
     255
     256        Parallel.ForEach(partitioner, po, (range, loop) => {
     257          var partial = new List<int>();
     258          for (int i = range.Item1; i < range.Item2; ++i) {
     259            var idx = filtered[i];
     260            var tRoot = trees[idx].Root.GetSubtree(0).GetSubtree(0);
     261            var tNodes = QueryMatch.InitializePostOrder(tRoot);
     262
     263            if (qm.Match(tNodes, sNodes)) { partial.Add(idx); }
     264          }
     265          lock (matching) { matching.AddRange(partial); }
     266        });
    265267      } else {
    266268        for (int i = 0; i < filtered.Count; ++i) {
    267           // break early if it becomes impossible to reach the minimum threshold
    268           if (matchingIndividuals.Count + filtered.Count - i < countThreshold)
    269             break;
    270 
    271269          var index = filtered[i];
    272270          var tRoot = trees[index].Root.GetSubtree(0).GetSubtree(0);
    273271          var tNodes = QueryMatch.InitializePostOrder(tRoot);
    274           if (qm.Match(tNodes, sNodes))
    275             matchingIndividuals.Add(individuals[index]);
     272          if (qm.Match(tNodes, sNodes)) {
     273            matching.Add(index);
     274          }
    276275        }
    277276      }
    278 
    279       if (matchingIndividuals.Count < countThreshold) {
     277      if (matching.Count < countThreshold) {
    280278        return base.Apply();
    281279      }
    282280
     281      matching.Sort((a, b) => qualities[a].CompareTo(qualities[b])); // sort by ascending quality
     282      var matchingIndividuals = matching.Select(x => individuals[x]).ToArray(); // fittest individual will be last in the array
    283283      var similarity = CalculateSimilarity(matchingIndividuals, calculator, executeInParallel, maxDegreeOfParallelism);
    284284      if (similarity < MinimumPhenotypicSimilarity.Value) {
     
    286286      }
    287287
    288       double replacementRatio;
    289       var adaptiveReplacementRatio = UseAdaptiveReplacementRatioParameter.ActualValue.Value;
    290 
    291       if (adaptiveReplacementRatio) {
    292         var r = (double)matchingIndividuals.Count / individuals.Count;
    293         replacementRatio = CalculateReplacementRatio(r);
     288      double mutationRate;
     289      var useAdaptiveMutationRate = UseAdaptiveMutationRateParameter.ActualValue.Value;
     290
     291      if (useAdaptiveMutationRate) {
     292        var r = (double)matchingIndividuals.Length / individuals.Count;
     293        mutationRate = CalculateMutationRate(r);
    294294      } else {
    295         replacementRatio = ReplacementRatio.Value;
    296       }
    297 
    298       int n = (int)Math.Floor(matchingIndividuals.Count * replacementRatio);
    299       var individualsToReplace = RandomReplacement.Value ? matchingIndividuals.SampleRandomWithoutRepetition(random, n).ToList()
    300                                                          : matchingIndividuals.OrderBy(x => (DoubleValue)x.Variables["Quality"].Value).Take(n).ToList();
    301       var mutationOc = new OperationCollection { Parallel = false }; // cannot be parallel due to the before/after operators which insert vertices in the genealogy graph
    302       var updateEstimatedValues = new OperationCollection { Parallel = true }; // evaluation should be done in parallel when possible
    303       foreach (var ind in individualsToReplace) {
    304         var mutatorOp = ExecutionContext.CreateChildOperation(SchemaManipulator, ind);
    305         var updateOp = ExecutionContext.CreateChildOperation(updateQualityOperator, ind);
    306         mutationOc.Add(mutatorOp);
    307         updateEstimatedValues.Add(updateOp);
     295        mutationRate = MutationRate.Value;
     296      }
     297
     298      var mutations = new OperationCollection { Parallel = false }; // cannot be parallel due to the before/after operators which insert vertices in the genealogy graph
     299      var updates = new OperationCollection { Parallel = true }; // evaluation should be done in parallel when possible
     300
     301      // use length - 1 because we don't want to mutate the best individual in each schema group (which could also be the overall elite)
     302      for (int i = 0; i < matchingIndividuals.Length - 1; ++i) {
     303        if (random.NextDouble() > mutationRate) continue;
     304
     305        var ind = matchingIndividuals[i];
     306
     307        var mutate = ExecutionContext.CreateChildOperation(SchemaManipulator, ind);
     308        var update = ExecutionContext.CreateChildOperation(updateQualityOperator, ind);
     309
     310        mutations.Add(mutate);
     311        updates.Add(update);
     312
    308313        if (exclusiveMatching)
    309314          ind.Variables.Add(new Core.Variable("AlreadyMatched"));
    310315      }
    311 
    312       NumberOfChangedTrees.Value += individualsToReplace.Count; // a lock is not necessary here because the SchemaEvaluators cannot be executed in parallel
    313 
    314       return new OperationCollection(mutationOc, updateEstimatedValues, base.Apply());
    315     }
    316 
    317     private double CalculateReplacementRatio(double r) {
    318       switch (ReplacementRule) {
     316      NumberOfChangedTrees.Value += mutations.Count;
     317
     318      return new OperationCollection(mutations, updates, base.Apply());
     319    }
     320
     321    private double CalculateMutationRate(double r) {
     322      switch (MutationRateUpdateRule) {
    319323        case "f(x) = x": {
    320324            return r;
     
    340344    }
    341345
    342     public static double CalculateSimilarity(ScopeList individuals, ISolutionSimilarityCalculator calculator, bool parallel = false, int nThreads = -1) {
     346    public static double CalculateSimilarity(IScope[] individuals, ISolutionSimilarityCalculator calculator, bool parallel = false, int nThreads = -1) {
     347      if (individuals.Length < 2)
     348        return double.NaN;
     349
    343350      double similarity = 0;
    344       int count = individuals.Count;
    345       if (count < 2) return double.NaN;
     351      int count = individuals.Length;
     352      int n = count * (count - 1) / 2;
     353
    346354      if (parallel) {
    347         var parallelOptions = new ParallelOptions { MaxDegreeOfParallelism = nThreads };
    348         var simArray = new double[count - 1];
    349         Parallel.For(0, count - 1, parallelOptions, i => {
    350           double innerSim = 0;
     355        var ii = new int[n];
     356        var jj = new int[n];
     357        int k = 0;
     358        for (int i = 0; i < count - 1; ++i)
    351359          for (int j = i + 1; j < count; ++j) {
    352             innerSim += calculator.CalculateSolutionSimilarity(individuals[i], individuals[j]);
    353           }
    354           simArray[i] = innerSim;
     360            ii[k] = i;
     361            jj[k] = j;
     362            ++k;
     363          }
     364        var po = new ParallelOptions { MaxDegreeOfParallelism = nThreads };
     365        var partitioner = Partitioner.Create(0, n);
     366        var locker = new object();
     367        Parallel.ForEach(partitioner, new ParallelOptions { MaxDegreeOfParallelism = 4 }, (range, loop) => {
     368          var partial = 0d;
     369          for (int idx = range.Item1; idx < range.Item2; ++idx) {
     370            int i = ii[idx];
     371            int j = jj[idx];
     372            partial += calculator.CalculateSolutionSimilarity(individuals[i], individuals[j]);
     373          }
     374          lock (locker) { similarity += partial; }
    355375        });
    356         similarity = simArray.Sum();
    357376      } else {
    358377        for (int i = 0; i < count - 1; ++i) {
     
    362381        }
    363382      }
    364       return similarity / (count * (count - 1) / 2.0);
     383      return similarity / n;
    365384    }
    366385  }
Note: See TracChangeset for help on using the changeset viewer.