#region License Information
/* HeuristicLab
* Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
*
* This file is part of HeuristicLab.
*
* HeuristicLab is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* HeuristicLab is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HeuristicLab. If not, see .
*/
#endregion
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
using HeuristicLab.Common;
using HeuristicLab.Core;
using HeuristicLab.Data;
using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
using HeuristicLab.EvolutionTracking;
using HeuristicLab.Optimization;
using HeuristicLab.Parameters;
using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
using HeuristicLab.Random;
namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
[Item("SchemaEvaluator", "An operator that builds schemas based on the heredity relationship in the genealogy graph.")]
[StorableClass]
public class SchemaEvaluator : EvolutionTrackingOperator {
#region parameter names
private const string MinimumSchemaFrequencyParameterName = "MinimumSchemaFrequency";
private const string MinimumPhenotypicSimilarityParameterName = "MinimumPhenotypicSimilarity";
private const string MutationRateParameterName = "MutationRate";
private const string SchemaParameterName = "Schema";
private const string PopulationSizeParameterName = "PopulationSize";
private const string RandomParameterName = "Random";
private const string EvaluatorParameterName = "Evaluator";
private const string ProblemDataParameterName = "ProblemData";
private const string InterpreterParameterName = "SymbolicExpressionTreeInterpreter";
private const string EstimationLimitsParameterName = "EstimationLimits";
private const string ApplyLinearScalingParameterName = "ApplyLinearScaling";
private const string MutatorParameterName = "Mutator";
private const string CrossoverParameterName = "Crossover";
private const string NumberOfChangedTreesParameterName = "NumberOfChangedTrees";
private const string ExecuteInParallelParameterName = "ExecuteInParallel";
private const string MaxDegreeOfParalellismParameterName = "MaxDegreeOfParallelism";
private const string ExclusiveMatchingParameterName = "ExclusiveMatching";
private const string UseAdaptiveMutationRateParameterName = "UseAdaptiveMutationRate";
private const string StrictSchemaMatchingParameterName = "StrictSchemaMatching";
#endregion
#region parameters
public ILookupParameter UseAdaptiveMutationRateParameter {
get { return (ILookupParameter)Parameters[UseAdaptiveMutationRateParameterName]; }
}
public ILookupParameter ExclusiveMatchingParameter {
get { return (ILookupParameter)Parameters[ExclusiveMatchingParameterName]; }
}
public ILookupParameter ExecuteInParallelParameter {
get { return (ILookupParameter)Parameters[ExecuteInParallelParameterName]; }
}
public ILookupParameter MaxDegreeOfParallelismParameter {
get { return (ILookupParameter)Parameters[MaxDegreeOfParalellismParameterName]; }
}
public ILookupParameter> EvaluatorParameter {
get { return (ILookupParameter>)Parameters[EvaluatorParameterName]; }
}
public ILookupParameter ProblemDataParameter {
get { return (ILookupParameter)Parameters[ProblemDataParameterName]; }
}
public ILookupParameter InterpreterParameter {
get { return (ILookupParameter)Parameters[InterpreterParameterName]; }
}
public ILookupParameter EstimationLimitsParameter {
get { return (ILookupParameter)Parameters[EstimationLimitsParameterName]; }
}
public ILookupParameter ApplyLinearScalingParameter {
get { return (ILookupParameter)Parameters[ApplyLinearScalingParameterName]; }
}
public ILookupParameter CrossoverParameter {
get { return (ILookupParameter)Parameters[CrossoverParameterName]; }
}
public ILookupParameter RandomParameter {
get { return (ILookupParameter)Parameters[RandomParameterName]; }
}
public ILookupParameter PopulationSizeParameter {
get { return (ILookupParameter)Parameters[PopulationSizeParameterName]; }
}
public ILookupParameter SchemaParameter {
get { return (ILookupParameter)Parameters[SchemaParameterName]; }
}
public ILookupParameter MinimumSchemaFrequencyParameter {
get { return (ILookupParameter)Parameters[MinimumSchemaFrequencyParameterName]; }
}
public ILookupParameter MutationRateParameter {
get { return (ILookupParameter)Parameters[MutationRateParameterName]; }
}
public ILookupParameter MinimumPhenotypicSimilarityParameter {
get { return (ILookupParameter)Parameters[MinimumPhenotypicSimilarityParameterName]; }
}
public LookupParameter NumberOfChangedTreesParameter {
get { return (LookupParameter)Parameters[NumberOfChangedTreesParameterName]; }
}
public LookupParameter StrictSchemaMatchingParameter {
get { return (LookupParameter)Parameters[StrictSchemaMatchingParameterName]; }
}
#endregion
#region parameter properties
public PercentValue MinimumSchemaFrequency { get { return MinimumSchemaFrequencyParameter.ActualValue; } }
public PercentValue MutationRate { get { return MutationRateParameter.ActualValue; } }
public PercentValue MinimumPhenotypicSimilarity { get { return MinimumPhenotypicSimilarityParameter.ActualValue; } }
public IntValue NumberOfChangedTrees { get { return NumberOfChangedTreesParameter.ActualValue; } }
#endregion
private QueryMatch qm;
[Storable]
private SymbolicExpressionTreePhenotypicSimilarityCalculator calculator;
[Storable]
public string MutationRateUpdateRule { get; set; }
[Storable]
private ISymbolicExpressionTreeNodeEqualityComparer comparer;
[Storable]
public ISymbolicExpressionTree Schema { get; set; }
[Storable]
private readonly UpdateQualityOperator updateQualityOperator;
[Storable]
public ISymbolicExpressionTreeManipulator SchemaManipulator { get; set; }
[StorableHook(HookType.AfterDeserialization)]
private void AfterDeserialization() {
if (!Parameters.ContainsKey(StrictSchemaMatchingParameterName))
Parameters.Add(new LookupParameter(StrictSchemaMatchingParameterName));
if (calculator == null)
calculator = new SymbolicExpressionTreePhenotypicSimilarityCalculator();
if (comparer == null)
comparer = new SymbolicExpressionTreeNodeEqualityComparer { MatchVariableNames = true, MatchVariableWeights = true, MatchConstantValues = false };
qm = new QueryMatch(comparer) { MatchParents = true };
}
public SchemaEvaluator() {
calculator = new SymbolicExpressionTreePhenotypicSimilarityCalculator();
comparer = new SymbolicExpressionTreeNodeEqualityComparer { MatchVariableNames = true, MatchVariableWeights = true, MatchConstantValues = false };
qm = new QueryMatch(comparer) { MatchParents = true };
updateQualityOperator = new UpdateQualityOperator();
#region add parameters
Parameters.Add(new LookupParameter(SchemaParameterName, "The current schema to be evaluated"));
Parameters.Add(new LookupParameter(MinimumSchemaFrequencyParameterName));
Parameters.Add(new LookupParameter(MutationRateParameterName));
Parameters.Add(new LookupParameter(MinimumPhenotypicSimilarityParameterName));
Parameters.Add(new LookupParameter(PopulationSizeParameterName));
Parameters.Add(new LookupParameter(RandomParameterName));
Parameters.Add(new LookupParameter>(EvaluatorParameterName));
Parameters.Add(new LookupParameter(ProblemDataParameterName));
Parameters.Add(new LookupParameter(InterpreterParameterName));
Parameters.Add(new LookupParameter(EstimationLimitsParameterName));
Parameters.Add(new LookupParameter(ApplyLinearScalingParameterName));
Parameters.Add(new LookupParameter(StrictSchemaMatchingParameterName));
Parameters.Add(new LookupParameter(MutatorParameterName));
Parameters.Add(new LookupParameter(CrossoverParameterName));
Parameters.Add(new LookupParameter(NumberOfChangedTreesParameterName));
Parameters.Add(new LookupParameter(ExecuteInParallelParameterName));
Parameters.Add(new LookupParameter(MaxDegreeOfParalellismParameterName));
Parameters.Add(new LookupParameter(ExclusiveMatchingParameterName));
Parameters.Add(new LookupParameter(UseAdaptiveMutationRateParameterName));
#endregion
}
[StorableConstructor]
protected SchemaEvaluator(bool deserializing) : base(deserializing) { }
protected SchemaEvaluator(SchemaEvaluator original, Cloner cloner) : base(original, cloner) {
calculator = cloner.Clone(original.calculator);
comparer = cloner.Clone(original.comparer);
qm = new QueryMatch(comparer) { MatchParents = true };
updateQualityOperator = new UpdateQualityOperator();
Schema = original.Schema;
}
public override IDeepCloneable Clone(Cloner cloner) {
return new SchemaEvaluator(this, cloner);
}
public override IOperation Apply() {
var strictSchemaMatching = StrictSchemaMatchingParameter.ActualValue.Value;
if (strictSchemaMatching) {
comparer.MatchVariableWeights = true;
comparer.MatchConstantValues = true;
} else {
comparer.MatchVariableWeights = false;
comparer.MatchConstantValues = false;
}
var individuals = ExecutionContext.Scope.SubScopes; // the scopes represent the individuals
var trees = new ISymbolicExpressionTree[individuals.Count];
var qualities = new double[individuals.Count];
for (int i = 0; i < individuals.Count; ++i) {
trees[i] = (ISymbolicExpressionTree)individuals[i].Variables["SymbolicExpressionTree"].Value;
qualities[i] = ((DoubleValue)individuals[i].Variables["Quality"].Value).Value;
}
var random = RandomParameter.ActualValue;
var sRoot = Schema.Root.GetSubtree(0).GetSubtree(0);
// first apply the length and root equality checks in order to filter the individuals
var exclusiveMatching = ExclusiveMatchingParameter.ActualValue.Value;
var filtered = new List();
for (int i = 0; i < trees.Length; ++i) {
if (exclusiveMatching && individuals[i].Variables.ContainsKey("AlreadyMatched")) continue;
var t = trees[i];
var tRoot = t.Root.GetSubtree(0).GetSubtree(0);
if (t.Length < Schema.Length || !qm.EqualityComparer.Equals(tRoot, sRoot)) continue;
filtered.Add(i);
}
// if we don't have enough filtered individuals, then we are done
// if the schema exceeds the minimum frequency, it gets processed further
int countThreshold = (int)Math.Max(2, Math.Round(MinimumSchemaFrequency.Value * individuals.Count));
if (filtered.Count < countThreshold) {
return base.Apply();
}
// check if the filtered individuals match the schema
var matching = new List();
var sNodes = QueryMatch.InitializePostOrder(sRoot);
bool executeInParallel = ExecuteInParallelParameter.ActualValue.Value;
int maxDegreeOfParallelism = MaxDegreeOfParallelismParameter.ActualValue.Value;
if (executeInParallel) {
var partitioner = Partitioner.Create(0, filtered.Count);
var po = new ParallelOptions { MaxDegreeOfParallelism = maxDegreeOfParallelism };
Parallel.ForEach(partitioner, po, (range, loop) => {
var partial = new List();
for (int i = range.Item1; i < range.Item2; ++i) {
var idx = filtered[i];
var tRoot = trees[idx].Root.GetSubtree(0).GetSubtree(0);
var tNodes = QueryMatch.InitializePostOrder(tRoot);
if (qm.Match(tNodes, sNodes)) { partial.Add(idx); }
}
lock (matching) { matching.AddRange(partial); }
});
} else {
for (int i = 0; i < filtered.Count; ++i) {
var index = filtered[i];
var tRoot = trees[index].Root.GetSubtree(0).GetSubtree(0);
var tNodes = QueryMatch.InitializePostOrder(tRoot);
if (qm.Match(tNodes, sNodes)) {
matching.Add(index);
}
}
}
if (matching.Count < countThreshold) {
return base.Apply();
}
matching.Sort((a, b) => qualities[a].CompareTo(qualities[b])); // sort by ascending quality
var matchingIndividuals = matching.Select(x => individuals[x]).ToArray(); // fittest individual will be last in the array
var similarity = CalculateSimilarity(matchingIndividuals, calculator, executeInParallel, maxDegreeOfParallelism);
if (similarity < MinimumPhenotypicSimilarity.Value) {
return base.Apply();
}
double mutationRate;
var useAdaptiveMutationRate = UseAdaptiveMutationRateParameter.ActualValue.Value;
if (useAdaptiveMutationRate) {
var r = (double)matchingIndividuals.Length / individuals.Count;
mutationRate = CalculateMutationRate(r);
} else {
mutationRate = MutationRate.Value;
}
var mutations = new OperationCollection { Parallel = false }; // cannot be parallel due to the before/after operators which insert vertices in the genealogy graph
var updates = new OperationCollection { Parallel = true }; // evaluation should be done in parallel when possible
// use length - 1 because we don't want to mutate the best individual in each schema group (which could also be the overall elite)
for (int i = 0; i < matchingIndividuals.Length - 1; ++i) {
if (random.NextDouble() > mutationRate) continue;
var ind = matchingIndividuals[i];
var mutate = ExecutionContext.CreateChildOperation(SchemaManipulator, ind);
var update = ExecutionContext.CreateChildOperation(updateQualityOperator, ind);
mutations.Add(mutate);
updates.Add(update);
if (exclusiveMatching)
ind.Variables.Add(new Core.Variable("AlreadyMatched"));
}
NumberOfChangedTrees.Value += mutations.Count;
return new OperationCollection(mutations, updates, base.Apply());
}
private double CalculateMutationRate(double r) {
switch (MutationRateUpdateRule) {
case "f(x) = x": {
return r;
}
case "f(x) = tanh(x)": {
return Math.Tanh(r);
}
case "f(x) = tanh(2x)": {
return Math.Tanh(2 * r);
}
case "f(x) = tanh(3x)": {
return Math.Tanh(3 * r);
}
case "f(x) = tanh(4x)": {
return Math.Tanh(4 * r);
}
case "f(x) = 1-sqrt(1-x)": {
return 1 - Math.Sqrt(1 - r);
}
default:
throw new ArgumentException("Unknown replacement rule");
}
}
public static double CalculateSimilarity(IScope[] individuals, ISolutionSimilarityCalculator calculator, bool parallel = false, int nThreads = -1) {
if (individuals.Length < 2)
return double.NaN;
double similarity = 0;
int count = individuals.Length;
int n = count * (count - 1) / 2;
if (parallel) {
var ii = new int[n];
var jj = new int[n];
int k = 0;
for (int i = 0; i < count - 1; ++i)
for (int j = i + 1; j < count; ++j) {
ii[k] = i;
jj[k] = j;
++k;
}
var po = new ParallelOptions { MaxDegreeOfParallelism = nThreads };
var partitioner = Partitioner.Create(0, n);
var locker = new object();
Parallel.ForEach(partitioner, new ParallelOptions { MaxDegreeOfParallelism = 4 }, (range, loop) => {
var partial = 0d;
for (int idx = range.Item1; idx < range.Item2; ++idx) {
int i = ii[idx];
int j = jj[idx];
partial += calculator.CalculateSolutionSimilarity(individuals[i], individuals[j]);
}
lock (locker) { similarity += partial; }
});
} else {
for (int i = 0; i < count - 1; ++i) {
for (int j = i + 1; j < count; ++j) {
similarity += calculator.CalculateSolutionSimilarity(individuals[i], individuals[j]);
}
}
}
return similarity / n;
}
}
}