Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
10/02/15 01:08:30 (9 years ago)
Author:
bburlacu
Message:

#1772:

  • added parameters to the SchemaCreator for calculating things in parallel
  • added parallel code to the SchemaEvaluator
  • implemented quality calculation and saving of estimated values in the scope in the UpdateEstimatedValuesOperator
  • small refactoring of QueryMatch (added some useful methods and made NodeInfo internal)
  • updated query match unit test
Location:
branches/HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Tracking/SchemaDiversification
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Tracking/SchemaDiversification/SchemaCreator.cs

    r12958 r12979  
    4242    private const string ReplacementRatioParameterName = "ReplacementRatio";
    4343    private const string RandomReplacementParameterName = "RandomReplacement";
     44    private const string ExecuteInParallelParameterName = "ExecuteInParallel";
     45    private const string MaxDegreeOfParalellismParameterName = "MaxDegreeOfParallelism";
    4446
    4547    #region parameters
     
    4850    }
    4951
     52    public IFixedValueParameter<BoolValue> ExecuteInParallelParameter {
     53      get { return (IFixedValueParameter<BoolValue>)Parameters[ExecuteInParallelParameterName]; }
     54    }
     55
     56    public IFixedValueParameter<IntValue> MaxDegreeOfParallelismParameter {
     57      get { return (IFixedValueParameter<IntValue>)Parameters[MaxDegreeOfParalellismParameterName]; }
     58    }
     59
    5060    public IFixedValueParameter<PercentValue> MinimumSchemaFrequencyParameter {
    5161      get { return (IFixedValueParameter<PercentValue>)Parameters[MinimumSchemaFrequencyParameterName]; }
     
    6272
    6373    #region parameter properties
    64     public int MinimumSchemaLength {
    65       get { return MinimumSchemaLengthParameter.Value.Value; }
    66     }
     74    public int MinimumSchemaLength { get { return MinimumSchemaLengthParameter.Value.Value; } }
     75    public int MaxDegreeOfParallelism { get { return MaxDegreeOfParallelismParameter.Value.Value; } }
     76    public bool ExecuteInParallel { get { return ExecuteInParallelParameter.Value.Value; } }
    6777    #endregion
    6878
     
    7282    private DataTableValuesCollector valuesCollector;
    7383    private ResultsCollector resultsCollector;
     84    private UpdateEstimatedValuesOperator updateEstimatedValuesOperator;
    7485
    7586    private void ParameterizeOperators() {
     
    89100      resultsCollector = new ResultsCollector();
    90101      resultsCollector.CollectedValues.Add(new LookupParameter<DataTable>("Diversification"));
     102
     103      updateEstimatedValuesOperator = new UpdateEstimatedValuesOperator();
    91104    }
    92105
    93106    public SchemaCreator() {
    94107      Parameters.Add(new FixedValueParameter<IntValue>(MinimumSchemaLengthParameterName, new IntValue(10)));
    95       Parameters.Add(new FixedValueParameter<PercentValue>(MinimumSchemaFrequencyParameterName, new PercentValue(0.05)));
    96       Parameters.Add(new FixedValueParameter<PercentValue>(MinimumPhenotypicSimilarityParameterName, new PercentValue(0.8)));
    97       Parameters.Add(new FixedValueParameter<PercentValue>(ReplacementRatioParameterName, new PercentValue(0.2)));
     108      Parameters.Add(new FixedValueParameter<PercentValue>(MinimumSchemaFrequencyParameterName, new PercentValue(0.01)));
     109      Parameters.Add(new FixedValueParameter<PercentValue>(MinimumPhenotypicSimilarityParameterName, new PercentValue(0.9)));
     110      Parameters.Add(new FixedValueParameter<PercentValue>(ReplacementRatioParameterName, new PercentValue(0.9)));
    98111      Parameters.Add(new FixedValueParameter<BoolValue>(RandomReplacementParameterName, new BoolValue(false)));
     112      Parameters.Add(new FixedValueParameter<BoolValue>(ExecuteInParallelParameterName, new BoolValue(false)));
     113      Parameters.Add(new FixedValueParameter<IntValue>(MaxDegreeOfParalellismParameterName, new IntValue(-1)));
     114
     115      ExecuteInParallelParameter.Hidden = true;
     116      MaxDegreeOfParallelismParameter.Hidden = true;
    99117
    100118      ParameterizeOperators();
     
    116134        return base.Apply();
    117135      // for now, only consider crossover offspring
     136
     137      var scopes = new ScopeList(this.ExecutionContext.Scope.SubScopes);
    118138      var vertices = GenealogyGraph.GetByRank(gen).Where(x => x.InDegree == 2).Cast<IGenealogyGraphNode<ISymbolicExpressionTree>>();
    119139      var groups = vertices.GroupBy(x => x.Parents.First()).OrderByDescending(g => g.Count()).ToList();
    120140      var anySubtreeSymbol = new AnySubtreeSymbol();
    121141
    122       var scopes = new ScopeList(this.ExecutionContext.Scope.SubScopes);
    123 
    124142      if (schemaEvaluator == null || schemaCleanupOperator == null || changedTreesReducer == null || valuesCollector == null || resultsCollector == null)
    125143        ParameterizeOperators();
     
    129147      var collectResults = ExecutionContext.CreateChildOperation(resultsCollector);
    130148
    131       var oc = new OperationCollection();
     149      var updateEstimatedValues = new OperationCollection { Parallel = true };
     150      foreach (var s in scopes) {
     151        if (!s.Variables.ContainsKey("EstimatedValues"))
     152          s.Variables.Add(new Core.Variable("EstimatedValues"));
     153        updateEstimatedValues.Add(ExecutionContext.CreateChildOperation(updateEstimatedValuesOperator, s));
     154      }
     155      var oc = new OperationCollection { updateEstimatedValues };
    132156
    133157      #region create schemas and add subscopes representing the individuals
  • branches/HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Tracking/SchemaDiversification/SchemaEvaluator.cs

    r12970 r12979  
    2222using System;
    2323using System.Linq;
     24using System.Threading.Tasks;
    2425using HeuristicLab.Common;
    2526using HeuristicLab.Core;
     
    5051    private const string RandomReplacementParameterName = "RandomReplacement";
    5152    private const string ChangedTreesParameterName = "ChangedTrees";
     53    private const string ExecuteInParallelParameterName = "ExecuteInParallel";
     54    private const string MaxDegreeOfParalellismParameterName = "MaxDegreeOfParallelism";
    5255    #endregion
    5356
    5457    #region parameters
     58    public ILookupParameter<BoolValue> ExecuteInParallelParameter {
     59      get { return (ILookupParameter<BoolValue>)Parameters[ExecuteInParallelParameterName]; }
     60    }
     61    public ILookupParameter<IntValue> MaxDegreeOfParallelismParameter {
     62      get { return (ILookupParameter<IntValue>)Parameters[MaxDegreeOfParalellismParameterName]; }
     63    }
    5564    public ILookupParameter<ISymbolicDataAnalysisSingleObjectiveEvaluator<IRegressionProblemData>> EvaluatorParameter {
    5665      get { return (ILookupParameter<ISymbolicDataAnalysisSingleObjectiveEvaluator<IRegressionProblemData>>)Parameters[EvaluatorParameterName]; }
     
    98107
    99108    #region parameter properties
    100     public PercentValue MinimumSchemaFrequency {
    101       get { return MinimumSchemaFrequencyParameter.ActualValue; }
    102     }
    103 
    104     public PercentValue ReplacementRatio {
    105       get { return ReplacementRatioParameter.ActualValue; }
    106     }
    107 
    108     public PercentValue MinimumPhenotypicSimilarity {
    109       get { return MinimumPhenotypicSimilarityParameter.ActualValue; }
    110     }
    111 
    112     public BoolValue RandomReplacement {
    113       get { return RandomReplacementParameter.ActualValue; }
    114     }
     109    public PercentValue MinimumSchemaFrequency { get { return MinimumSchemaFrequencyParameter.ActualValue; } }
     110    public PercentValue ReplacementRatio { get { return ReplacementRatioParameter.ActualValue; } }
     111    public PercentValue MinimumPhenotypicSimilarity { get { return MinimumPhenotypicSimilarityParameter.ActualValue; } }
     112    public BoolValue RandomReplacement { get { return RandomReplacementParameter.ActualValue; } }
    115113    #endregion
    116114
     
    124122    };
    125123
    126 
    127     [StorableHook(HookType.AfterDeserialization)]
    128     private void AfterDeserialization() {
    129       if (!Parameters.ContainsKey(ChangedTreesParameterName))
    130         Parameters.Add(new LookupParameter<IntValue>(ChangedTreesParameterName));
    131     }
     124    [Storable]
     125    private readonly UpdateEstimatedValuesOperator updateEstimatedValuesOperator;
    132126
    133127    public SchemaEvaluator() {
    134128      qm = new QueryMatch(comp) { MatchParents = true };
     129      this.updateEstimatedValuesOperator = new UpdateEstimatedValuesOperator();
    135130
    136131      Parameters.Add(new LookupParameter<ISymbolicExpressionTree>(SchemaParameterName, "The current schema to be evaluated"));
     
    148143      Parameters.Add(new LookupParameter<BoolValue>(RandomReplacementParameterName));
    149144      Parameters.Add(new LookupParameter<IntValue>(ChangedTreesParameterName));
    150     }
    151 
     145      Parameters.Add(new LookupParameter<BoolValue>(ExecuteInParallelParameterName));
     146      Parameters.Add(new LookupParameter<IntValue>(MaxDegreeOfParalellismParameterName));
     147    }
    152148
    153149    [StorableConstructor]
     
    155151
    156152    protected SchemaEvaluator(SchemaEvaluator original, Cloner cloner) : base(original, cloner) {
    157       this.comp = (ISymbolicExpressionTreeNodeEqualityComparer)original.comp.Clone();
    158       this.qm = new QueryMatch(comp) { MatchParents = original.qm.MatchParents };
     153      this.comp = original.comp == null ? new SymbolicExpressionTreeNodeEqualityComparer {
     154        MatchConstantValues = false,
     155        MatchVariableWeights = false,
     156        MatchVariableNames = true
     157      } : (ISymbolicExpressionTreeNodeEqualityComparer)original.comp.Clone();
     158      this.qm = new QueryMatch(comp) { MatchParents = original.qm?.MatchParents ?? true };
     159      this.updateEstimatedValuesOperator = new UpdateEstimatedValuesOperator();
    159160    }
    160161
     
    163164    }
    164165
    165     private static double CalculatePhenotypicSimilarity(ScopeList individuals, SymbolicExpressionTreePhenotypicSimilarityCalculator calculator) {
    166       double similarity = 0;
    167       int count = individuals.Count;
    168       for (int i = 0; i < count - 1; ++i) {
    169         for (int j = i + 1; j < count; ++j) {
    170           similarity += calculator.CalculateSolutionSimilarity(individuals[i], individuals[j]);
    171         }
    172       }
    173       return similarity / (count * (count - 1) / 2.0);
    174     }
    175 
    176166    public override IOperation Apply() {
    177167      var individuals = ExecutionContext.Scope.SubScopes; // the scopes represent the individuals
     
    179169      var random = RandomParameter.ActualValue;
    180170      var mutator = MutatorParameter.ActualValue;
    181       var evaluator = EvaluatorParameter.ActualValue;
    182       var updateEstimatedValuesOperator = new UpdateEstimatedValuesOperator();
    183171
    184172      var s = SchemaParameter.ActualValue;
    185       var matchingIndividuals = new ScopeList();
    186       foreach (var ind in individuals) {
    187         var t = (ISymbolicExpressionTree)ind.Variables["SymbolicExpressionTree"].Value;
    188         if (t.Length >= s.Length && qm.Match(t, s))
    189           matchingIndividuals.Add(ind);
    190       }
    191 
    192       if (matchingIndividuals.Count < (int)Math.Max(2, Math.Round(MinimumSchemaFrequency.Value * individuals.Count))) {
     173      var sRoot = s.Root.GetSubtree(0).GetSubtree(0);
     174      int countThreshold = (int)Math.Max(2, Math.Round(MinimumSchemaFrequency.Value * individuals.Count));
     175
     176      // first apply the length and root equality checks in order to filter the individuals
     177      var filtered = (from ind in individuals
     178                      let t = (ISymbolicExpressionTree)ind.Variables["SymbolicExpressionTree"].Value
     179                      where t.Length >= s.Length && qm.Comparer.Equals(t.Root.GetSubtree(0).GetSubtree(0), sRoot)
     180                      select ind).ToList();
     181
     182      // if we don't have enough filtered individuals, then we are done
     183      if (filtered.Count < countThreshold) {
    193184        ChangedTreesParameter.ActualValue = new IntValue(0);
    194185        return base.Apply();
    195186      }
    196187
    197       var similarity = CalculatePhenotypicSimilarity(matchingIndividuals, calculator);
     188      // check if the filtered individuals match the schema
     189      var sNodes = QueryMatch.InitializePostOrder(sRoot);
     190      var matchingIndividuals = new ScopeList();
     191      bool executeInParallel = ExecuteInParallelParameter.ActualValue.Value;
     192      int maxDegreeOfParallelism = MaxDegreeOfParallelismParameter.ActualValue.Value;
     193
     194      if (executeInParallel) {
     195        var matching = new bool[filtered.Count];
     196
     197        Parallel.For(0, filtered.Count, new ParallelOptions { MaxDegreeOfParallelism = maxDegreeOfParallelism },
     198          i => {
     199            var t = (ISymbolicExpressionTree)filtered[i].Variables["SymbolicExpressionTree"].Value;
     200            var tNodes = QueryMatch.InitializePostOrder(t.Root.GetSubtree(0).GetSubtree(0));
     201            if (qm.Match(tNodes, sNodes)) {
     202              matching[i] = true;
     203            }
     204          });
     205
     206        for (int i = 0; i < matching.Length; ++i) {
     207          if (matching[i])
     208            matchingIndividuals.Add(filtered[i]);
     209        }
     210      } else {
     211        for (int i = 0; i < filtered.Count; ++i) {
     212
     213          // break early if it becomes impossible to reach the minimum threshold
     214          if (matchingIndividuals.Count + filtered.Count - i < countThreshold)
     215            break;
     216
     217          var ind = filtered[i];
     218          var t = (ISymbolicExpressionTree)ind.Variables["SymbolicExpressionTree"].Value;
     219          var tNodes = QueryMatch.InitializePostOrder(t.Root.GetSubtree(0).GetSubtree(0));
     220          if (qm.Match(tNodes, sNodes))
     221            matchingIndividuals.Add(ind);
     222        }
     223      }
     224
     225      if (matchingIndividuals.Count < countThreshold) {
     226        ChangedTreesParameter.ActualValue = new IntValue(0);
     227        return base.Apply();
     228      }
     229
     230      var similarity = CalculatePhenotypicSimilarity(matchingIndividuals, calculator, executeInParallel, maxDegreeOfParallelism);
    198231      if (similarity < MinimumPhenotypicSimilarity.Value) {
    199232        ChangedTreesParameter.ActualValue = new IntValue(0);
     
    201234      }
    202235
    203       var oc = new OperationCollection();
    204236      int n = (int)Math.Floor(matchingIndividuals.Count * ReplacementRatio.Value);
    205237      var individualsToReplace = RandomReplacement.Value ? matchingIndividuals.SampleRandomWithoutRepetition(random, n).ToList()
    206238                                                         : matchingIndividuals.OrderBy(x => (DoubleValue)x.Variables["Quality"].Value).Take(n).ToList();
     239      var mutationOc = new OperationCollection { Parallel = false }; // cannot be parallel due to the before/after operators which insert vertices in the genealogy graph
     240      var updateEstimatedValues = new OperationCollection { Parallel = true }; // evaluation should be done in parallel when possible
    207241      foreach (var ind in individualsToReplace) {
    208242        var mutatorOp = ExecutionContext.CreateChildOperation(mutator, ind);
    209         var evaluatorOp = ExecutionContext.CreateChildOperation(evaluator, ind);
    210         var updateEstimatedValuesOp = ExecutionContext.CreateChildOperation(updateEstimatedValuesOperator, ind);
    211         oc.Add(mutatorOp);
    212         oc.Add(evaluatorOp);
    213         oc.Add(updateEstimatedValuesOp);
     243        var updateOp = ExecutionContext.CreateChildOperation(updateEstimatedValuesOperator, ind);
     244        mutationOc.Add(mutatorOp);
     245        updateEstimatedValues.Add(updateOp);
    214246      }
    215247      ChangedTreesParameter.ActualValue = new IntValue(individualsToReplace.Count);
    216       return new OperationCollection(oc, base.Apply());
     248      return new OperationCollection(mutationOc, updateEstimatedValues, base.Apply());
     249    }
     250
     251    private static double CalculatePhenotypicSimilarity(ScopeList individuals, SymbolicExpressionTreePhenotypicSimilarityCalculator calculator, bool parallel = false, int nThreads = -1) {
     252      double similarity = 0;
     253      int count = individuals.Count;
     254      if (parallel) {
     255        var parallelOptions = new ParallelOptions { MaxDegreeOfParallelism = nThreads };
     256        var simArray = new double[count - 1];
     257        Parallel.For(0, count - 1, parallelOptions, i => {
     258          double innerSim = 0;
     259          for (int j = i + 1; j < count; ++j) {
     260            innerSim += calculator.CalculateSolutionSimilarity(individuals[i], individuals[j]);
     261          }
     262          simArray[i] = innerSim;
     263        });
     264        similarity = simArray.Sum();
     265      } else {
     266        for (int i = 0; i < count - 1; ++i) {
     267          for (int j = i + 1; j < count; ++j) {
     268            similarity += calculator.CalculateSolutionSimilarity(individuals[i], individuals[j]);
     269          }
     270        }
     271      }
     272      return similarity / (count * (count - 1) / 2.0);
    217273    }
    218274  }
  • branches/HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Tracking/SchemaDiversification/UpdateEstimatedValuesOperator.cs

    r12958 r12979  
    2020#endregion
    2121
     22using System;
    2223using System.Linq;
    2324using HeuristicLab.Common;
     
    5152    }
    5253
     54
    5355    public UpdateEstimatedValuesOperator() {
    5456      Parameters.Add(new LookupParameter<IRegressionProblemData>(ProblemDataParameterName));
     
    7476      var interpreter = InterpreterParameter.ActualValue;
    7577
    76       var estimatedValues = interpreter.GetSymbolicExpressionTreeValues(tree, problemData.Dataset, problemData.TrainingIndices)
    77                                        .LimitToRange(estimationLimits.Lower, estimationLimits.Upper).ToArray();
     78      var estimatedValues = interpreter.GetSymbolicExpressionTreeValues(tree, problemData.Dataset, problemData.TrainingIndices).ToArray();
     79      var targetValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TrainingIndices).ToArray();
     80
     81      if (estimatedValues.Length != targetValues.Length)
     82        throw new ArgumentException("Number of elements in target and estimated values enumeration do not match.");
     83
     84      var linearScalingCalculator = new OnlineLinearScalingParameterCalculator();
     85
     86      for (int i = 0; i < estimatedValues.Length; ++i) {
     87        var estimated = estimatedValues[i];
     88        var target = targetValues[i];
     89        if (!double.IsNaN(estimated) && !double.IsInfinity(estimated))
     90          linearScalingCalculator.Add(estimated, target);
     91      }
     92      double alpha = linearScalingCalculator.Alpha;
     93      double beta = linearScalingCalculator.Beta;
     94      if (linearScalingCalculator.ErrorState != OnlineCalculatorError.None) {
     95        alpha = 0.0;
     96        beta = 1.0;
     97      }
     98
     99      var scaled = estimatedValues.Select(x => x * beta + alpha).LimitToRange(estimationLimits.Lower, estimationLimits.Upper).ToArray();
     100      OnlineCalculatorError error;
     101      var r = OnlinePearsonsRCalculator.Calculate(targetValues, scaled, out error);
     102      if (error != OnlineCalculatorError.None) r = 0;
     103
     104      var r2 = r * r;
     105      if (r2 > 1.0) r2 = 1.0;
    78106
    79107      var variables = ExecutionContext.Scope.Variables;
    80       if (variables.ContainsKey("EstimatedValues"))
    81         variables["EstimatedValues"].Value = new DoubleArray(estimatedValues);
    82       else
    83         variables.Add(new Core.Variable("EstimatedValues", new DoubleArray(estimatedValues)));
     108      ((DoubleValue)variables["Quality"].Value).Value = r2;
     109
     110      if (variables.ContainsKey("EstimatedValues")) {
     111        variables["EstimatedValues"].Value = new DoubleArray(scaled);
     112      } else {
     113        variables.Add(new Core.Variable("EstimatedValues", new DoubleArray(scaled)));
     114      }
    84115      return base.Apply();
    85116    }
Note: See TracChangeset for help on using the changeset viewer.