Changeset 15906


Ignore:
Timestamp:
04/16/18 08:35:59 (16 months ago)
Author:
bburlacu
Message:

#1772: Refactoring and speed optimization of diversification operators

Location:
branches/HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Tracking
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Tracking/Analyzers/SymbolicDataAnalysisSchemaFrequencyAnalyzer.cs

    r15351 r15906  
    4343    private const string ExecuteInParallelParameterName = "ExecuteInParallel";
    4444    private const string MaximumDegreeOfParallelismParameterName = "MaximumDegreeOfParallelism";
     45    private const string SchemaDefinitionParameterName = "SchemaDefinition";
    4546
    4647    private static readonly Dictionary<string, string> ShortNames = new Dictionary<string, string> {
    47     { "Addition", "+" },
    48     { "Subtraction", "-" },
    49     { "Multiplication", "*" },
    50     { "Division", "/" },
    51     { "Exponential", "exp" },
    52     { "Logarithm", "log" }
    53   };
     48      { "Addition", "+" },
     49      { "Subtraction", "-" },
     50      { "Multiplication", "*" },
     51      { "Division", "/" },
     52      { "Exponential", "exp" },
     53      { "Logarithm", "log" }
     54    };
    5455
    5556    [Storable]
     
    6364    private QueryMatch qm;
    6465
     66
     67    public IConstrainedValueParameter<StringValue> SchemaDefinitionParameter {
     68      get { return (IConstrainedValueParameter<StringValue>)Parameters[SchemaDefinitionParameterName]; }
     69    }
    6570    public IFixedValueParameter<BoolValue> ExecuteInParallelParameter {
    6671      get { return (IFixedValueParameter<BoolValue>)Parameters[ExecuteInParallelParameterName]; }
     
    122127      Parameters.Add(new LookupParameter<IRegressionProblemData>(ProblemDataParameterName));
    123128      Parameters.Add(new LookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter>(InterpreterParameterName));
     129
     130      var schemaDefinitions = new ItemSet<StringValue>(new[] {
     131         new StringValue("="),
     132         new StringValue("#"),
     133         new StringValue("=,#")
     134      });
     135      var schemaDefinitionParameter = new ConstrainedValueParameter<StringValue>(SchemaDefinitionParameterName, schemaDefinitions);
     136      schemaDefinitionParameter.Value = schemaDefinitions.First();
     137      Parameters.Add(schemaDefinitionParameter);
    124138    }
    125139
     
    185199      }
    186200
    187       var schemas = SchemaCreator.GenerateCombinedSchemas(vertices, MinimumSchemaLength, 0, StrictSchemaMatching).ToList();
     201      List<ISymbolicExpressionTree> schemas;
     202      var def = SchemaDefinitionParameter.Value.Value;
     203      switch (def) {
     204        case "=":
     205          schemas = SchemaCreator.GenerateAnyNodeSchemas(vertices, MinimumSchemaLength, 0, StrictSchemaMatching).ToList();
     206          break;
     207        case "#":
     208          schemas = SchemaCreator.GenerateAnySubtreeSchemas(vertices, MinimumSchemaLength, 0, StrictSchemaMatching).ToList();
     209          break;
     210        case "=,#":
     211          schemas = SchemaCreator.GenerateCombinedSchemas(vertices, MinimumSchemaLength, 0, StrictSchemaMatching).ToList();
     212          break;
     213        default:
     214          return base.Apply();
     215      }
    188216      var schemaStrings = schemas.Select(x => x.Root.GetSubtree(0).GetSubtree(0).FormatToString(StrictSchemaMatching)).ToList();
    189       int[][] matchingIndices;
     217      int[][] matchingIndices = new int[schemas.Count][];
     218
     219      var tNodes = trees.Select(x => QueryMatch.InitializePostOrder(x.Root.GetSubtree(0).GetSubtree(0))).ToList();
    190220      if (ExecuteInParallel) {
    191         matchingIndices = new int[schemas.Count][];
    192221        Parallel.For(0, schemas.Count, new ParallelOptions { MaxDegreeOfParallelism = MaximumDegreeOfParallelism }, i => {
    193222          var schema = schemas[i];
    194           matchingIndices[i] = Enumerable.Range(0, trees.Count).Where(v => qm.Match(trees[v], schema)).ToArray();
     223          var sNodes = QueryMatch.InitializePostOrder(schema.Root.GetSubtree(0).GetSubtree(0));
     224          matchingIndices[i] = Enumerable.Range(0, trees.Count).Where(idx => qm.Match(tNodes[idx], sNodes)).ToArray();
    195225        });
    196226      } else {
    197         matchingIndices = schemas.Select(x => Enumerable.Range(0, trees.Count).Where(v => qm.Match(trees[v], x)).ToArray()).ToArray();
     227        for (int i = 0; i < schemas.Count; ++i) {
     228          var schema = schemas[i];
     229          var sNodes = QueryMatch.InitializePostOrder(schema.Root.GetSubtree(0).GetSubtree(0));
     230          matchingIndices[i] = Enumerable.Range(0, trees.Count).Where(idx => qm.Match(tNodes[idx], sNodes)).ToArray();
     231        }
    198232      }
    199233
     
    203237      if (ExecuteInParallel) {
    204238        var locker = new object();
    205         Parallel.For(0, schemas.Count, new ParallelOptions { MaxDegreeOfParallelism = MaximumDegreeOfParallelism }, i => {
     239        Parallel.For(0, schemas.Count, new ParallelOptions {
     240          MaxDegreeOfParallelism = MaximumDegreeOfParallelism
     241        }, i => {
    206242          var indices = matchingIndices[i];
    207243          if (indices.Length > 1) {
  • branches/HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Tracking/SchemaDiversification/SchemaCreator.cs

    r15482 r15906  
    3939  public class SchemaCreator : EvolutionTrackingOperator<ISymbolicExpressionTree> {
    4040    #region parameter names
     41    // criteria to trigger schema-based diversification
    4142    private const string MinimumSchemaLengthParameterName = "MinimumSchemaLength";
    4243    private const string MinimumSchemaFrequencyParameterName = "MinimumSchemaFrequency";
    4344    private const string MinimumPhenotypicSimilarityParameterName = "MinimumPhenotypicSimilarity";
    44     private const string ReplacementRatioParameterName = "ReplacementRatio";
    45     private const string RandomReplacementParameterName = "RandomReplacement";
     45    // parameters controlling the diversification strategy
     46    private const string UseAdaptiveMutationRateParameterName = "UseAdaptiveMutationRate"; // dynamically control mutation rate
     47    private const string MutationRateUpdateRuleParameterName = "ReplacementRatioUpdateRule";
     48    private const string MutationRateParameterName = "MutationRate"; // fixed mutation rate when not using adaptive update
     49    private const string ExclusiveMatchingParameterName = "ExclusiveMatching"; // an individual can belong to only 1 schema
     50    private const string ParentsRatio = "ParentsRatio"; // use best % parents to generate schemas from
     51    private const string StrictSchemaMatchingParameterName = "StrictSchemaMatching"; // use strict node comparison (for constant values and variable weights)
     52    private const string SchemaDefinitionParameterName = "SchemaDefinition"; // which schema definition to use: {=}, {#} or {=,#}
     53    private const string SchemaManipulatorParameterName = "SchemaManipulator"; // mutation operator to apply within schemas
     54
     55    // control parallel behaviour
    4656    private const string ExecuteInParallelParameterName = "ExecuteInParallel";
    4757    private const string MaxDegreeOfParalellismParameterName = "MaxDegreeOfParallelism";
    48     private const string PercentageOfPopulationParameterName = "PercentageOfPopulationToDiversify";
    4958    private const string ScaleEstimatedValuesParameterName = "ScaleEstimatedValues";
    50     private const string ExclusiveMatchingParameterName = "ExclusiveMatching";
     59
     60    private const string UpdateCounterParameterName = "UpdateCounter";
     61    private const string UpdateIntervalParameterName = "UpdateInterval";
     62
     63    #region information parameters
    5164    private const string NumberOfChangedTreesParameterName = "NumberOfChangedTrees";
    5265    private const string NumberOfSchemasParameterName = "NumberOfSchemas";
    5366    private const string AverageSchemaLengthParameterName = "AverageSchemaLength";
    54     private const string UseAdaptiveReplacementRatioParameterName = "UseAdaptiveReplacementRatio";
    55     private const string ReplacementRatioUpdateRuleParameterName = "ReplacementRatioUpdateRule";
    56     private const string StrictSchemaMatchingParameterName = "StrictSchemaMatching";
    57     private const string SchemaDefinitionParameterName = "SchemaDefinition";
    58     private const string SchemaManipulatorParameterName = "SchemaManipulator";
     67    #endregion
    5968    #endregion
    6069
     
    6675      get { return (IConstrainedValueParameter<StringValue>)Parameters[SchemaDefinitionParameterName]; }
    6776    }
    68     public IConstrainedValueParameter<StringValue> ReplacementRatioUpdateRuleParameter {
    69       get { return (IConstrainedValueParameter<StringValue>)Parameters[ReplacementRatioUpdateRuleParameterName]; }
    70     }
    71     public IFixedValueParameter<BoolValue> UseAdaptiveReplacementRatioParameter {
    72       get { return (IFixedValueParameter<BoolValue>)Parameters[UseAdaptiveReplacementRatioParameterName]; }
     77    public IConstrainedValueParameter<StringValue> MutationRateUpdateRuleParameter {
     78      get { return (IConstrainedValueParameter<StringValue>)Parameters[MutationRateUpdateRuleParameterName]; }
     79    }
     80    public IFixedValueParameter<BoolValue> UseAdaptiveMutationRateParameter {
     81      get { return (IFixedValueParameter<BoolValue>)Parameters[UseAdaptiveMutationRateParameterName]; }
    7382    }
    7483    public IFixedValueParameter<BoolValue> StrictSchemaMatchingParameter {
     
    8190      get { return (IFixedValueParameter<BoolValue>)Parameters[ScaleEstimatedValuesParameterName]; }
    8291    }
    83     public IFixedValueParameter<PercentValue> PercentageOfPopulationParameter {
    84       get { return (IFixedValueParameter<PercentValue>)Parameters[PercentageOfPopulationParameterName]; }
     92    public IFixedValueParameter<PercentValue> ParentsRatioParameter {
     93      get { return (IFixedValueParameter<PercentValue>)Parameters[ParentsRatio]; }
    8594    }
    8695    public IFixedValueParameter<IntValue> MinimumSchemaLengthParameter {
     
    99108      get { return (IFixedValueParameter<PercentValue>)Parameters[MinimumPhenotypicSimilarityParameterName]; }
    100109    }
    101     public IFixedValueParameter<PercentValue> ReplacementRatioParameter {
    102       get { return (IFixedValueParameter<PercentValue>)Parameters[ReplacementRatioParameterName]; }
     110    public IFixedValueParameter<PercentValue> MutationRateParameter {
     111      get { return (IFixedValueParameter<PercentValue>)Parameters[MutationRateParameterName]; }
    103112    }
    104113    public IValueParameter<IntValue> NumberOfSchemasParameter {
     
    110119    public IValueParameter<IntValue> NumberOfChangedTreesParameter {
    111120      get { return (IValueParameter<IntValue>)Parameters[NumberOfChangedTreesParameterName]; }
     121    }
     122    public IFixedValueParameter<IntValue> UpdateCounterParameter {
     123      get { return (IFixedValueParameter<IntValue>)Parameters[UpdateCounterParameterName]; }
     124    }
     125    public IFixedValueParameter<IntValue> UpdateIntervalParameter {
     126      get { return (IFixedValueParameter<IntValue>)Parameters[UpdateIntervalParameterName]; }
    112127    }
    113128    #endregion
     
    117132    public int MaxDegreeOfParallelism { get { return MaxDegreeOfParallelismParameter.Value.Value; } }
    118133    public bool ExecuteInParallel { get { return ExecuteInParallelParameter.Value.Value; } }
    119     public double PercentageOfPopulation { get { return PercentageOfPopulationParameter.Value.Value; } }
     134    public double PercentageOfPopulation { get { return ParentsRatioParameter.Value.Value; } }
    120135    public bool StrictSchemaMatching { get { return StrictSchemaMatchingParameter.Value.Value; } }
     136    public IntValue UpdateCounter { get { return UpdateCounterParameter.Value; } }
     137    public IntValue UpdateInterval { get { return UpdateIntervalParameter.Value; } }
     138
    121139    #endregion
    122140
    123141    private UpdateQualityOperator updateQualityOperator;
    124142    private DiversificationStatisticsOperator diversificationStatisticsOperator;
     143
     144    public override void InitializeState() {
     145      base.InitializeState();
     146      NumberOfChangedTreesParameter.Value.Value = 0;
     147      NumberOfChangedTreesParameter.Value.Value = 0;
     148      AverageSchemaLengthParameter.Value.Value = 0;
     149      UpdateCounter.Value = 0;
     150    }
    125151
    126152    public override void ClearState() {
     
    128154      NumberOfChangedTreesParameter.Value.Value = 0;
    129155      AverageSchemaLengthParameter.Value.Value = 0;
     156      UpdateCounter.Value = 0;
    130157      base.ClearState();
    131158    }
     
    136163      Parameters.Add(new FixedValueParameter<PercentValue>(MinimumSchemaFrequencyParameterName, new PercentValue(0.01)));
    137164      Parameters.Add(new FixedValueParameter<PercentValue>(MinimumPhenotypicSimilarityParameterName, new PercentValue(0.9)));
    138       Parameters.Add(new FixedValueParameter<PercentValue>(ReplacementRatioParameterName, new PercentValue(0.9)));
    139       Parameters.Add(new FixedValueParameter<PercentValue>(PercentageOfPopulationParameterName, new PercentValue(1)));
    140       Parameters.Add(new FixedValueParameter<BoolValue>(RandomReplacementParameterName, new BoolValue(false)));
     165      Parameters.Add(new FixedValueParameter<PercentValue>(MutationRateParameterName, new PercentValue(0.9)));
     166      Parameters.Add(new FixedValueParameter<PercentValue>(ParentsRatio, new PercentValue(1)));
    141167      Parameters.Add(new FixedValueParameter<BoolValue>(ExecuteInParallelParameterName, new BoolValue(false)));
    142168      Parameters.Add(new FixedValueParameter<IntValue>(MaxDegreeOfParalellismParameterName, new IntValue(-1)));
     
    147173      Parameters.Add(new ValueParameter<IntValue>(NumberOfSchemasParameterName, new IntValue(0)));
    148174      Parameters.Add(new ValueParameter<DoubleValue>(AverageSchemaLengthParameterName, new DoubleValue(0)));
    149       Parameters.Add(new FixedValueParameter<BoolValue>(UseAdaptiveReplacementRatioParameterName, new BoolValue(true)));
     175      Parameters.Add(new FixedValueParameter<BoolValue>(UseAdaptiveMutationRateParameterName, new BoolValue(true)));
     176      Parameters.Add(new FixedValueParameter<IntValue>(UpdateCounterParameterName, new IntValue(0)));
     177      Parameters.Add(new FixedValueParameter<IntValue>(UpdateIntervalParameterName, new IntValue(1)));
    150178
    151179      // add update rules
    152       var replacementRatioUpdateRules = new ItemSet<StringValue>(new[] {
     180      var mutationRateUpdateRules = new ItemSet<StringValue>(new[] {
    153181        new StringValue("f(x) = x"),
    154182        new StringValue("f(x) = tanh(x)"),
     
    158186        new StringValue("f(x) = 1-sqrt(1-x)")
    159187      });
    160       var replacementRatioUpdateRuleParameter = new ConstrainedValueParameter<StringValue>(ReplacementRatioUpdateRuleParameterName, replacementRatioUpdateRules);
    161       replacementRatioUpdateRuleParameter.Value = replacementRatioUpdateRules.First();
    162       Parameters.Add(replacementRatioUpdateRuleParameter);
     188      var mutationRateUpdateRuleParameter = new ConstrainedValueParameter<StringValue>(MutationRateUpdateRuleParameterName, mutationRateUpdateRules);
     189      mutationRateUpdateRuleParameter.Value = mutationRateUpdateRules.First();
     190      Parameters.Add(mutationRateUpdateRuleParameter);
    163191
    164192      // add schema definitions
     
    208236
    209237    public override IOperation Apply() {
     238      UpdateCounter.Value++;
     239      if (UpdateCounter.Value != UpdateInterval.Value)
     240        return base.Apply();
     241      UpdateCounter.Value = 0;
     242
    210243      // apply only when at least one generation has passed
    211244      var gen = Generations.Value;
     
    213246        return base.Apply();
    214247
    215       var n = (int)Math.Round(ExecutionContext.Scope.SubScopes.Count * PercentageOfPopulation);
    216248
    217249      var updateEstimatedValues = new OperationCollection { Parallel = true };
     
    219251        updateQualityOperator = new UpdateQualityOperator();
    220252
    221       foreach (var s in ExecutionContext.Scope.SubScopes.Where(s => !s.Variables.ContainsKey("EstimatedValues"))) {
     253      var scope = ExecutionContext.Scope;
     254
     255      foreach (var s in scope.SubScopes.Where(s => !s.Variables.ContainsKey("EstimatedValues"))) {
    222256        updateEstimatedValues.Add(ExecutionContext.CreateChildOperation(updateQualityOperator, s));
    223257      }
    224258
    225       var replacementRule = ReplacementRatioUpdateRuleParameter.Value.Value;
     259      var updateRule = MutationRateUpdateRuleParameter.Value.Value;
    226260      var schemaManipulator = SchemaManipulatorParameter.Value;
    227261
    228262      var evaluateSchemas = new OperationCollection();
    229263
     264      Func<IScope, double> getQuality = s => ((DoubleValue)s.Variables["Quality"].Value).Value;
     265
     266      var bestN = (int)Math.Round(scope.SubScopes.Count * PercentageOfPopulation);
     267      var scopes = new ScopeList(scope.SubScopes.OrderByDescending(getQuality).Take(bestN));
    230268      // for now, only consider crossover offspring
    231       var scopes = new ScopeList(ExecutionContext.Scope.SubScopes.OrderByDescending(x => ((DoubleValue)x.Variables["Quality"].Value).Value).Take(n));
    232269      var vertices = from s in scopes
    233270                     let t = (ISymbolicExpressionTree)s.Variables["SymbolicExpressionTree"].Value
     
    236273                     select v;
    237274
    238       List<ISymbolicExpressionTree> schemas;
     275      IEnumerable<ISymbolicExpressionTree> schemas;
    239276      switch (SchemaDefinitionParameter.Value.Value) {
    240277        case "=":
    241           schemas = GenerateAnyNodeSchemas(vertices, MinimumSchemaLength, 0, StrictSchemaMatching).ToList();
     278          schemas = GenerateAnyNodeSchemas(vertices, MinimumSchemaLength, 0, StrictSchemaMatching);
    242279          break;
    243280        case "#":
    244           schemas = GenerateAnySubtreeSchemas(vertices, MinimumSchemaLength, 0, StrictSchemaMatching).ToList();
     281          schemas = GenerateAnySubtreeSchemas(vertices, MinimumSchemaLength, 0, StrictSchemaMatching);
    245282          break;
    246283        case "=,#":
    247           schemas = GenerateCombinedSchemas(vertices, MinimumSchemaLength, 0, StrictSchemaMatching).ToList();
     284          schemas = GenerateCombinedSchemas(vertices, MinimumSchemaLength, 0, StrictSchemaMatching);
    248285          break;
    249286        default:
     
    255292
    256293      #region create schemas and add subscopes representing the individuals
     294      double avgLength = 0;
     295      int count = 0;
    257296      foreach (var schema in schemas) {
    258         evaluateSchemas.Add(ExecutionContext.CreateChildOperation(new SchemaEvaluator { Schema = schema, ReplacementRule = replacementRule, SchemaManipulator = schemaManipulator }, ExecutionContext.Scope));
     297        avgLength += schema.Length;
     298        ++count;
     299        evaluateSchemas.Add(ExecutionContext.CreateChildOperation(new SchemaEvaluator { Schema = schema, MutationRateUpdateRule = updateRule, SchemaManipulator = schemaManipulator }, scope));
    259300      }
     301      avgLength /= count;
    260302      #endregion
     303
     304      // set parameters for statistics
     305      AverageSchemaLengthParameter.Value = new DoubleValue(avgLength);
     306      NumberOfSchemasParameter.Value = new IntValue(count);
     307      NumberOfChangedTreesParameter.Value = new IntValue(0);
    261308
    262309      if (diversificationStatisticsOperator == null)
     
    264311
    265312      var calculateStatistics = ExecutionContext.CreateChildOperation(diversificationStatisticsOperator);
    266 
    267       // set parameters for statistics
    268       AverageSchemaLengthParameter.Value = new DoubleValue(schemas.Average(x => x.Length));
    269       NumberOfSchemasParameter.Value = new IntValue(schemas.Count);
    270       NumberOfChangedTreesParameter.Value = new IntValue(0);
    271313
    272314      // return an operation collection containing all the scope operations + base.Apply()
  • branches/HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Tracking/SchemaDiversification/SchemaEvaluator.cs

    r15482 r15906  
    2121
    2222using System;
     23using System.Collections.Concurrent;
    2324using System.Collections.Generic;
    2425using System.Linq;
     
    4142    private const string MinimumSchemaFrequencyParameterName = "MinimumSchemaFrequency";
    4243    private const string MinimumPhenotypicSimilarityParameterName = "MinimumPhenotypicSimilarity";
    43     private const string ReplacementRatioParameterName = "ReplacementRatio";
     44    private const string MutationRateParameterName = "MutationRate";
    4445    private const string SchemaParameterName = "Schema";
    4546    private const string PopulationSizeParameterName = "PopulationSize";
     
    5253    private const string MutatorParameterName = "Mutator";
    5354    private const string CrossoverParameterName = "Crossover";
    54     private const string RandomReplacementParameterName = "RandomReplacement";
    5555    private const string NumberOfChangedTreesParameterName = "NumberOfChangedTrees";
    5656    private const string ExecuteInParallelParameterName = "ExecuteInParallel";
    5757    private const string MaxDegreeOfParalellismParameterName = "MaxDegreeOfParallelism";
    5858    private const string ExclusiveMatchingParameterName = "ExclusiveMatching";
    59     private const string UseAdaptiveReplacementRatioParameterName = "UseAdaptiveReplacementRatio";
     59    private const string UseAdaptiveMutationRateParameterName = "UseAdaptiveMutationRate";
    6060    private const string StrictSchemaMatchingParameterName = "StrictSchemaMatching";
    6161    #endregion
    6262
    6363    #region parameters
    64     public ILookupParameter<BoolValue> UseAdaptiveReplacementRatioParameter {
    65       get { return (ILookupParameter<BoolValue>)Parameters[UseAdaptiveReplacementRatioParameterName]; }
     64    public ILookupParameter<BoolValue> UseAdaptiveMutationRateParameter {
     65      get { return (ILookupParameter<BoolValue>)Parameters[UseAdaptiveMutationRateParameterName]; }
    6666    }
    6767    public ILookupParameter<BoolValue> ExclusiveMatchingParameter {
     
    8989      get { return (ILookupParameter<BoolValue>)Parameters[ApplyLinearScalingParameterName]; }
    9090    }
    91     public ILookupParameter<BoolValue> RandomReplacementParameter {
    92       get { return (ILookupParameter<BoolValue>)Parameters[RandomReplacementParameterName]; }
    93     }
    9491    public ILookupParameter<ISymbolicExpressionTreeCrossover> CrossoverParameter {
    9592      get { return (ILookupParameter<ISymbolicExpressionTreeCrossover>)Parameters[CrossoverParameterName]; }
     
    107104      get { return (ILookupParameter<PercentValue>)Parameters[MinimumSchemaFrequencyParameterName]; }
    108105    }
    109     public ILookupParameter<PercentValue> ReplacementRatioParameter {
    110       get { return (ILookupParameter<PercentValue>)Parameters[ReplacementRatioParameterName]; }
     106    public ILookupParameter<PercentValue> MutationRateParameter {
     107      get { return (ILookupParameter<PercentValue>)Parameters[MutationRateParameterName]; }
    111108    }
    112109    public ILookupParameter<PercentValue> MinimumPhenotypicSimilarityParameter {
     
    123120    #region parameter properties
    124121    public PercentValue MinimumSchemaFrequency { get { return MinimumSchemaFrequencyParameter.ActualValue; } }
    125     public PercentValue ReplacementRatio { get { return ReplacementRatioParameter.ActualValue; } }
     122    public PercentValue MutationRate { get { return MutationRateParameter.ActualValue; } }
    126123    public PercentValue MinimumPhenotypicSimilarity { get { return MinimumPhenotypicSimilarityParameter.ActualValue; } }
    127     public BoolValue RandomReplacement { get { return RandomReplacementParameter.ActualValue; } }
    128124    public IntValue NumberOfChangedTrees { get { return NumberOfChangedTreesParameter.ActualValue; } }
    129125    #endregion
     
    135131
    136132    [Storable]
    137     public string ReplacementRule { get; set; }
     133    public string MutationRateUpdateRule { get; set; }
    138134
    139135    [Storable]
     
    171167      Parameters.Add(new LookupParameter<ISymbolicExpressionTree>(SchemaParameterName, "The current schema to be evaluated"));
    172168      Parameters.Add(new LookupParameter<PercentValue>(MinimumSchemaFrequencyParameterName));
    173       Parameters.Add(new LookupParameter<PercentValue>(ReplacementRatioParameterName));
     169      Parameters.Add(new LookupParameter<PercentValue>(MutationRateParameterName));
    174170      Parameters.Add(new LookupParameter<PercentValue>(MinimumPhenotypicSimilarityParameterName));
    175171      Parameters.Add(new LookupParameter<ISymbolicExpressionTree>(PopulationSizeParameterName));
     
    183179      Parameters.Add(new LookupParameter<ISymbolicExpressionTreeManipulator>(MutatorParameterName));
    184180      Parameters.Add(new LookupParameter<ISymbolicExpressionTreeCrossover>(CrossoverParameterName));
    185       Parameters.Add(new LookupParameter<BoolValue>(RandomReplacementParameterName));
    186181      Parameters.Add(new LookupParameter<IntValue>(NumberOfChangedTreesParameterName));
    187182      Parameters.Add(new LookupParameter<BoolValue>(ExecuteInParallelParameterName));
    188183      Parameters.Add(new LookupParameter<IntValue>(MaxDegreeOfParalellismParameterName));
    189184      Parameters.Add(new LookupParameter<BoolValue>(ExclusiveMatchingParameterName));
    190       Parameters.Add(new LookupParameter<BoolValue>(UseAdaptiveReplacementRatioParameterName));
     185      Parameters.Add(new LookupParameter<BoolValue>(UseAdaptiveMutationRateParameterName));
    191186      #endregion
    192187    }
     
    218213
    219214      var individuals = ExecutionContext.Scope.SubScopes; // the scopes represent the individuals
    220       var trees = individuals.Select(x => (ISymbolicExpressionTree)x.Variables["SymbolicExpressionTree"].Value).ToList();
     215      var trees = new ISymbolicExpressionTree[individuals.Count];
     216      var qualities = new double[individuals.Count];
     217
     218      for (int i = 0; i < individuals.Count; ++i) {
     219        trees[i] = (ISymbolicExpressionTree)individuals[i].Variables["SymbolicExpressionTree"].Value;
     220        qualities[i] = ((DoubleValue)individuals[i].Variables["Quality"].Value).Value;
     221      }
    221222
    222223      var random = RandomParameter.ActualValue;
    223 
    224       var s = Schema;
    225       var sRoot = s.Root.GetSubtree(0).GetSubtree(0);
    226       int countThreshold = (int)Math.Max(2, Math.Round(MinimumSchemaFrequency.Value * individuals.Count));
     224      var sRoot = Schema.Root.GetSubtree(0).GetSubtree(0);
    227225
    228226      // first apply the length and root equality checks in order to filter the individuals
    229227      var exclusiveMatching = ExclusiveMatchingParameter.ActualValue.Value;
    230228      var filtered = new List<int>();
    231       for (int i = 0; i < individuals.Count; ++i) {
     229
     230      for (int i = 0; i < trees.Length; ++i) {
    232231        if (exclusiveMatching && individuals[i].Variables.ContainsKey("AlreadyMatched")) continue;
    233232        var t = trees[i];
    234233        var tRoot = t.Root.GetSubtree(0).GetSubtree(0);
    235         if (t.Length < s.Length || !qm.EqualityComparer.Equals(tRoot, sRoot)) continue;
     234        if (t.Length < Schema.Length || !qm.EqualityComparer.Equals(tRoot, sRoot)) continue;
    236235        filtered.Add(i);
    237236      }
     
    239238      // if we don't have enough filtered individuals, then we are done
    240239      // if the schema exceeds the minimum frequency, it gets processed further
     240      int countThreshold = (int)Math.Max(2, Math.Round(MinimumSchemaFrequency.Value * individuals.Count));
    241241      if (filtered.Count < countThreshold) {
    242242        return base.Apply();
     
    244244
    245245      // check if the filtered individuals match the schema
     246      var matching = new List<int>();
    246247      var sNodes = QueryMatch.InitializePostOrder(sRoot);
    247       var matchingIndividuals = new ScopeList();
     248
    248249      bool executeInParallel = ExecuteInParallelParameter.ActualValue.Value;
    249250      int maxDegreeOfParallelism = MaxDegreeOfParallelismParameter.ActualValue.Value;
    250251
    251252      if (executeInParallel) {
    252         var matching = new bool[filtered.Count];
    253 
    254         Parallel.For(0, filtered.Count, new ParallelOptions { MaxDegreeOfParallelism = maxDegreeOfParallelism },
    255           i => {
    256             var index = filtered[i];
    257             var t = trees[index];
    258             var tNodes = QueryMatch.InitializePostOrder(t.Root.GetSubtree(0).GetSubtree(0));
    259             if (qm.Match(tNodes, sNodes)) {
    260               matching[i] = true;
    261             }
    262           });
    263 
    264         matchingIndividuals.AddRange(filtered.Where((x, i) => matching[i]).Select(x => individuals[x]));
     253        var partitioner = Partitioner.Create(0, filtered.Count);
     254        var po = new ParallelOptions { MaxDegreeOfParallelism = maxDegreeOfParallelism };
     255
     256        Parallel.ForEach(partitioner, po, (range, loop) => {
     257          var partial = new List<int>();
     258          for (int i = range.Item1; i < range.Item2; ++i) {
     259            var idx = filtered[i];
     260            var tRoot = trees[idx].Root.GetSubtree(0).GetSubtree(0);
     261            var tNodes = QueryMatch.InitializePostOrder(tRoot);
     262
     263            if (qm.Match(tNodes, sNodes)) { partial.Add(idx); }
     264          }
     265          lock (matching) { matching.AddRange(partial); }
     266        });
    265267      } else {
    266268        for (int i = 0; i < filtered.Count; ++i) {
    267           // break early if it becomes impossible to reach the minimum threshold
    268           if (matchingIndividuals.Count + filtered.Count - i < countThreshold)
    269             break;
    270 
    271269          var index = filtered[i];
    272270          var tRoot = trees[index].Root.GetSubtree(0).GetSubtree(0);
    273271          var tNodes = QueryMatch.InitializePostOrder(tRoot);
    274           if (qm.Match(tNodes, sNodes))
    275             matchingIndividuals.Add(individuals[index]);
     272          if (qm.Match(tNodes, sNodes)) {
     273            matching.Add(index);
     274          }
    276275        }
    277276      }
    278 
    279       if (matchingIndividuals.Count < countThreshold) {
     277      if (matching.Count < countThreshold) {
    280278        return base.Apply();
    281279      }
    282280
     281      matching.Sort((a, b) => qualities[a].CompareTo(qualities[b])); // sort by ascending quality
     282      var matchingIndividuals = matching.Select(x => individuals[x]).ToArray(); // fittest individual will be last in the array
    283283      var similarity = CalculateSimilarity(matchingIndividuals, calculator, executeInParallel, maxDegreeOfParallelism);
    284284      if (similarity < MinimumPhenotypicSimilarity.Value) {
     
    286286      }
    287287
    288       double replacementRatio;
    289       var adaptiveReplacementRatio = UseAdaptiveReplacementRatioParameter.ActualValue.Value;
    290 
    291       if (adaptiveReplacementRatio) {
    292         var r = (double)matchingIndividuals.Count / individuals.Count;
    293         replacementRatio = CalculateReplacementRatio(r);
     288      double mutationRate;
     289      var useAdaptiveMutationRate = UseAdaptiveMutationRateParameter.ActualValue.Value;
     290
     291      if (useAdaptiveMutationRate) {
     292        var r = (double)matchingIndividuals.Length / individuals.Count;
     293        mutationRate = CalculateMutationRate(r);
    294294      } else {
    295         replacementRatio = ReplacementRatio.Value;
    296       }
    297 
    298       int n = (int)Math.Floor(matchingIndividuals.Count * replacementRatio);
    299       var individualsToReplace = RandomReplacement.Value ? matchingIndividuals.SampleRandomWithoutRepetition(random, n).ToList()
    300                                                          : matchingIndividuals.OrderBy(x => (DoubleValue)x.Variables["Quality"].Value).Take(n).ToList();
    301       var mutationOc = new OperationCollection { Parallel = false }; // cannot be parallel due to the before/after operators which insert vertices in the genealogy graph
    302       var updateEstimatedValues = new OperationCollection { Parallel = true }; // evaluation should be done in parallel when possible
    303       foreach (var ind in individualsToReplace) {
    304         var mutatorOp = ExecutionContext.CreateChildOperation(SchemaManipulator, ind);
    305         var updateOp = ExecutionContext.CreateChildOperation(updateQualityOperator, ind);
    306         mutationOc.Add(mutatorOp);
    307         updateEstimatedValues.Add(updateOp);
     295        mutationRate = MutationRate.Value;
     296      }
     297
     298      var mutations = new OperationCollection { Parallel = false }; // cannot be parallel due to the before/after operators which insert vertices in the genealogy graph
     299      var updates = new OperationCollection { Parallel = true }; // evaluation should be done in parallel when possible
     300
     301      // use length - 1 because we don't want to mutate the best individual in each schema group (which could also be the overall elite)
     302      for (int i = 0; i < matchingIndividuals.Length - 1; ++i) {
     303        if (random.NextDouble() > mutationRate) continue;
     304
     305        var ind = matchingIndividuals[i];
     306
     307        var mutate = ExecutionContext.CreateChildOperation(SchemaManipulator, ind);
     308        var update = ExecutionContext.CreateChildOperation(updateQualityOperator, ind);
     309
     310        mutations.Add(mutate);
     311        updates.Add(update);
     312
    308313        if (exclusiveMatching)
    309314          ind.Variables.Add(new Core.Variable("AlreadyMatched"));
    310315      }
    311 
    312       NumberOfChangedTrees.Value += individualsToReplace.Count; // a lock is not necessary here because the SchemaEvaluators cannot be executed in parallel
    313 
    314       return new OperationCollection(mutationOc, updateEstimatedValues, base.Apply());
    315     }
    316 
    317     private double CalculateReplacementRatio(double r) {
    318       switch (ReplacementRule) {
     316      NumberOfChangedTrees.Value += mutations.Count;
     317
     318      return new OperationCollection(mutations, updates, base.Apply());
     319    }
     320
     321    private double CalculateMutationRate(double r) {
     322      switch (MutationRateUpdateRule) {
    319323        case "f(x) = x": {
    320324            return r;
     
    340344    }
    341345
    342     public static double CalculateSimilarity(ScopeList individuals, ISolutionSimilarityCalculator calculator, bool parallel = false, int nThreads = -1) {
     346    public static double CalculateSimilarity(IScope[] individuals, ISolutionSimilarityCalculator calculator, bool parallel = false, int nThreads = -1) {
     347      if (individuals.Length < 2)
     348        return double.NaN;
     349
    343350      double similarity = 0;
    344       int count = individuals.Count;
    345       if (count < 2) return double.NaN;
     351      int count = individuals.Length;
     352      int n = count * (count - 1) / 2;
     353
    346354      if (parallel) {
    347         var parallelOptions = new ParallelOptions { MaxDegreeOfParallelism = nThreads };
    348         var simArray = new double[count - 1];
    349         Parallel.For(0, count - 1, parallelOptions, i => {
    350           double innerSim = 0;
     355        var ii = new int[n];
     356        var jj = new int[n];
     357        int k = 0;
     358        for (int i = 0; i < count - 1; ++i)
    351359          for (int j = i + 1; j < count; ++j) {
    352             innerSim += calculator.CalculateSolutionSimilarity(individuals[i], individuals[j]);
    353           }
    354           simArray[i] = innerSim;
     360            ii[k] = i;
     361            jj[k] = j;
     362            ++k;
     363          }
     364        var po = new ParallelOptions { MaxDegreeOfParallelism = nThreads };
     365        var partitioner = Partitioner.Create(0, n);
     366        var locker = new object();
     367        Parallel.ForEach(partitioner, new ParallelOptions { MaxDegreeOfParallelism = 4 }, (range, loop) => {
     368          var partial = 0d;
     369          for (int idx = range.Item1; idx < range.Item2; ++idx) {
     370            int i = ii[idx];
     371            int j = jj[idx];
     372            partial += calculator.CalculateSolutionSimilarity(individuals[i], individuals[j]);
     373          }
     374          lock (locker) { similarity += partial; }
    355375        });
    356         similarity = simArray.Sum();
    357376      } else {
    358377        for (int i = 0; i < count - 1; ++i) {
     
    362381        }
    363382      }
    364       return similarity / (count * (count - 1) / 2.0);
     383      return similarity / n;
    365384    }
    366385  }
  • branches/HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Tracking/SchemaDiversification/UpdateQualityOperator.cs

    r13876 r15906  
    11#region License Information
    22/* HeuristicLab
    3  * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
    44 *
    55 * This file is part of HeuristicLab.
     
    2121
    2222using System;
     23using System.Collections;
     24using System.Collections.Generic;
    2325using System.Linq;
    2426using HeuristicLab.Common;
     
    4042    private const string ScaleEstimatedValuesParameterName = "ScaleEstimatedValues";
    4143
     44    #region parameters
    4245    public ILookupParameter<IRegressionProblemData> ProblemDataParameter {
    4346      get { return (ILookupParameter<IRegressionProblemData>)Parameters[ProblemDataParameterName]; }
     
    5558      get { return (ILookupParameter<BoolValue>)Parameters[ScaleEstimatedValuesParameterName]; }
    5659    }
     60    #endregion
    5761
    5862    public UpdateQualityOperator() {
     
    7680    public override IOperation Apply() {
    7781      var tree = SymbolicExpressionTreeParameter.ActualValue;
    78       FixParentLinks(tree);
    7982      var problemData = ProblemDataParameter.ActualValue;
    8083      var estimationLimits = EstimationLimitsParameter.ActualValue;
    8184      var interpreter = InterpreterParameter.ActualValue;
    8285
    83       var estimatedValues = interpreter.GetSymbolicExpressionTreeValues(tree, problemData.Dataset, problemData.TrainingIndices).ToArray();
    84       var targetValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TrainingIndices).ToArray();
     86      var estimatedValues = interpreter.GetSymbolicExpressionTreeValues(tree, problemData.Dataset, problemData.TrainingIndices);
     87      var targetValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TrainingIndices);
    8588
    86       if (estimatedValues.Length != targetValues.Length)
    87         throw new ArgumentException("Number of elements in target and estimated values enumeration do not match.");
     89      var scaleEstimatedValues = ScaleEstimatedValuesParameter.ActualValue.Value;
    8890
    89       var linearScalingCalculator = new OnlineLinearScalingParameterCalculator();
     91      IEnumerable<double> scaled;
     92      if (scaleEstimatedValues) {
     93        var linearScalingCalculator = new OnlineLinearScalingParameterCalculator();
    9094
    91       for (int i = 0; i < estimatedValues.Length; ++i) {
    92         var estimated = estimatedValues[i];
    93         var target = targetValues[i];
    94         if (!double.IsNaN(estimated) && !double.IsInfinity(estimated))
    95           linearScalingCalculator.Add(estimated, target);
     95        var e1 = estimatedValues.GetEnumerator();
     96        var e2 = targetValues.GetEnumerator();
     97
     98        int count = 0;
     99        while (e1.MoveNext() && e2.MoveNext()) {
     100          var estimated = e1.Current;
     101          var target = e2.Current;
     102          if (!double.IsNaN(estimated) && !double.IsInfinity(estimated))
     103            linearScalingCalculator.Add(estimated, target);
     104          ++count;
     105        }
     106        if (e1.MoveNext() || e2.MoveNext())
     107          throw new ArgumentException("Number of elements in target and estimated values enumeration do not match.");
     108
     109        double alpha = linearScalingCalculator.Alpha;
     110        double beta = linearScalingCalculator.Beta;
     111        if (linearScalingCalculator.ErrorState != OnlineCalculatorError.None) {
     112          alpha = 0.0;
     113          beta = 1.0;
     114        }
     115        scaled = estimatedValues.Select(x => x * beta + alpha).LimitToRange(estimationLimits.Lower, estimationLimits.Upper);
     116      } else {
     117        scaled = estimatedValues.LimitToRange(estimationLimits.Lower, estimationLimits.Upper);
    96118      }
    97       double alpha = linearScalingCalculator.Alpha;
    98       double beta = linearScalingCalculator.Beta;
    99       if (linearScalingCalculator.ErrorState != OnlineCalculatorError.None) {
    100         alpha = 0.0;
    101         beta = 1.0;
    102       }
    103       var scaled = estimatedValues.Select(x => x * beta + alpha).LimitToRange(estimationLimits.Lower, estimationLimits.Upper).ToArray();
     119
    104120      OnlineCalculatorError error;
    105121      var r = OnlinePearsonsRCalculator.Calculate(targetValues, scaled, out error);
     
    111127
    112128      ((DoubleValue)variables["Quality"].Value).Value = r2;
    113       GenealogyGraph.GetByContent(tree).Quality = r2;
    114 
    115       var scaleEstimatedValues = ScaleEstimatedValuesParameter.ActualValue;
    116       if (!scaleEstimatedValues.Value)
    117         scaled = estimatedValues.LimitToRange(estimationLimits.Lower, estimationLimits.Upper).ToArray();
     129      //GenealogyGraph.GetByContent(tree).Quality = r2;
    118130
    119131      if (variables.ContainsKey("EstimatedValues")) {
    120         variables["EstimatedValues"].Value = new DoubleArray(scaled);
     132        variables["EstimatedValues"].Value = new DoubleArray(scaled.ToArray());
    121133      } else {
    122         variables.Add(new Core.Variable("EstimatedValues", new DoubleArray(scaled)));
     134        variables.Add(new Core.Variable("EstimatedValues", new DoubleArray(scaled.ToArray())));
    123135      }
    124136      return base.Apply();
    125137    }
    126 
    127     private static void FixParentLinks(ISymbolicExpressionTree tree) {
    128       foreach (var node in tree.IterateNodesPrefix().Where(x => x.SubtreeCount > 0)) {
    129         foreach (var s in node.Subtrees) {
    130           if (s.Parent != node)
    131             s.Parent = node;
    132         }
    133       }
    134     }
    135138  }
    136139}
Note: See TracChangeset for help on using the changeset viewer.