#region License Information
/* HeuristicLab
* Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
*
* This file is part of HeuristicLab.
*
* HeuristicLab is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* HeuristicLab is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HeuristicLab. If not, see .
*/
#endregion
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
using HeuristicLab.Common;
using HeuristicLab.Core;
using HeuristicLab.Data;
using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
using HeuristicLab.EvolutionTracking;
using HeuristicLab.Optimization;
using HeuristicLab.Parameters;
using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
using HeuristicLab.Random;
namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
[Item("SchemaEvaluator", "An operator that builds schemas based on the heredity relationship in the genealogy graph.")]
[StorableClass]
public class SchemaEvaluator : EvolutionTrackingOperator {
#region parameter names
private const string MinimumSchemaFrequencyParameterName = "MinimumSchemaFrequency";
private const string MinimumPhenotypicSimilarityParameterName = "MinimumPhenotypicSimilarity";
private const string ReplacementRatioParameterName = "ReplacementRatio";
private const string SchemaParameterName = "Schema";
private const string PopulationSizeParameterName = "PopulationSize";
private const string RandomParameterName = "Random";
private const string EvaluatorParameterName = "Evaluator";
private const string ProblemDataParameterName = "ProblemData";
private const string InterpreterParameterName = "SymbolicExpressionTreeInterpreter";
private const string EstimationLimitsParameterName = "EstimationLimits";
private const string ApplyLinearScalingParameterName = "ApplyLinearScaling";
private const string MutatorParameterName = "Mutator";
private const string CrossoverParameterName = "Crossover";
private const string RandomReplacementParameterName = "RandomReplacement";
private const string NumberOfChangedTreesParameterName = "NumberOfChangedTrees";
private const string ExecuteInParallelParameterName = "ExecuteInParallel";
private const string MaxDegreeOfParalellismParameterName = "MaxDegreeOfParallelism";
private const string ExclusiveMatchingParameterName = "ExclusiveMatching";
private const string UseAdaptiveReplacementRatioParameterName = "UseAdaptiveReplacementRatio";
private const string StrictSchemaMatchingParameterName = "StrictSchemaMatching";
#endregion
#region parameters
public ILookupParameter UseAdaptiveReplacementRatioParameter {
get { return (ILookupParameter)Parameters[UseAdaptiveReplacementRatioParameterName]; }
}
public ILookupParameter ExclusiveMatchingParameter {
get { return (ILookupParameter)Parameters[ExclusiveMatchingParameterName]; }
}
public ILookupParameter ExecuteInParallelParameter {
get { return (ILookupParameter)Parameters[ExecuteInParallelParameterName]; }
}
public ILookupParameter MaxDegreeOfParallelismParameter {
get { return (ILookupParameter)Parameters[MaxDegreeOfParalellismParameterName]; }
}
public ILookupParameter> EvaluatorParameter {
get { return (ILookupParameter>)Parameters[EvaluatorParameterName]; }
}
public ILookupParameter ProblemDataParameter {
get { return (ILookupParameter)Parameters[ProblemDataParameterName]; }
}
public ILookupParameter InterpreterParameter {
get { return (ILookupParameter)Parameters[InterpreterParameterName]; }
}
public ILookupParameter EstimationLimitsParameter {
get { return (ILookupParameter)Parameters[EstimationLimitsParameterName]; }
}
public ILookupParameter ApplyLinearScalingParameter {
get { return (ILookupParameter)Parameters[ApplyLinearScalingParameterName]; }
}
public ILookupParameter RandomReplacementParameter {
get { return (ILookupParameter)Parameters[RandomReplacementParameterName]; }
}
public ILookupParameter MutatorParameter {
get { return (ILookupParameter)Parameters[MutatorParameterName]; }
}
public ILookupParameter CrossoverParameter {
get { return (ILookupParameter)Parameters[CrossoverParameterName]; }
}
public ILookupParameter RandomParameter {
get { return (ILookupParameter)Parameters[RandomParameterName]; }
}
public ILookupParameter PopulationSizeParameter {
get { return (ILookupParameter)Parameters[PopulationSizeParameterName]; }
}
public ILookupParameter SchemaParameter {
get { return (ILookupParameter)Parameters[SchemaParameterName]; }
}
public ILookupParameter MinimumSchemaFrequencyParameter {
get { return (ILookupParameter)Parameters[MinimumSchemaFrequencyParameterName]; }
}
public ILookupParameter ReplacementRatioParameter {
get { return (ILookupParameter)Parameters[ReplacementRatioParameterName]; }
}
public ILookupParameter MinimumPhenotypicSimilarityParameter {
get { return (ILookupParameter)Parameters[MinimumPhenotypicSimilarityParameterName]; }
}
public LookupParameter NumberOfChangedTreesParameter {
get { return (LookupParameter)Parameters[NumberOfChangedTreesParameterName]; }
}
public LookupParameter StrictSchemaMatchingParameter {
get { return (LookupParameter)Parameters[StrictSchemaMatchingParameterName]; }
}
#endregion
#region parameter properties
public PercentValue MinimumSchemaFrequency { get { return MinimumSchemaFrequencyParameter.ActualValue; } }
public PercentValue ReplacementRatio { get { return ReplacementRatioParameter.ActualValue; } }
public PercentValue MinimumPhenotypicSimilarity { get { return MinimumPhenotypicSimilarityParameter.ActualValue; } }
public BoolValue RandomReplacement { get { return RandomReplacementParameter.ActualValue; } }
public IntValue NumberOfChangedTrees { get { return NumberOfChangedTreesParameter.ActualValue; } }
#endregion
private QueryMatch qm;
[Storable]
private SymbolicExpressionTreePhenotypicSimilarityCalculator calculator;
[Storable]
public string ReplacementRule { get; set; }
[Storable]
private ISymbolicExpressionTreeNodeEqualityComparer comparer;
[Storable]
public ISymbolicExpressionTree Schema { get; set; }
[Storable]
private readonly UpdateQualityOperator updateQualityOperator;
[StorableHook(HookType.AfterDeserialization)]
private void AfterDeserialization() {
if (!Parameters.ContainsKey(StrictSchemaMatchingParameterName))
Parameters.Add(new LookupParameter(StrictSchemaMatchingParameterName));
if (calculator == null)
calculator = new SymbolicExpressionTreePhenotypicSimilarityCalculator();
if (comparer == null)
comparer = new SymbolicExpressionTreeNodeEqualityComparer { MatchVariableNames = true, MatchVariableWeights = true, MatchConstantValues = false };
qm = new QueryMatch(comparer) { MatchParents = true };
}
public SchemaEvaluator() {
calculator = new SymbolicExpressionTreePhenotypicSimilarityCalculator();
comparer = new SymbolicExpressionTreeNodeEqualityComparer { MatchVariableNames = true, MatchVariableWeights = true, MatchConstantValues = false };
qm = new QueryMatch(comparer) { MatchParents = true };
updateQualityOperator = new UpdateQualityOperator();
#region add parameters
Parameters.Add(new LookupParameter(SchemaParameterName, "The current schema to be evaluated"));
Parameters.Add(new LookupParameter(MinimumSchemaFrequencyParameterName));
Parameters.Add(new LookupParameter(ReplacementRatioParameterName));
Parameters.Add(new LookupParameter(MinimumPhenotypicSimilarityParameterName));
Parameters.Add(new LookupParameter(PopulationSizeParameterName));
Parameters.Add(new LookupParameter(RandomParameterName));
Parameters.Add(new LookupParameter>(EvaluatorParameterName));
Parameters.Add(new LookupParameter(ProblemDataParameterName));
Parameters.Add(new LookupParameter(InterpreterParameterName));
Parameters.Add(new LookupParameter(EstimationLimitsParameterName));
Parameters.Add(new LookupParameter(ApplyLinearScalingParameterName));
Parameters.Add(new LookupParameter(StrictSchemaMatchingParameterName));
Parameters.Add(new LookupParameter(MutatorParameterName));
Parameters.Add(new LookupParameter(CrossoverParameterName));
Parameters.Add(new LookupParameter(RandomReplacementParameterName));
Parameters.Add(new LookupParameter(NumberOfChangedTreesParameterName));
Parameters.Add(new LookupParameter(ExecuteInParallelParameterName));
Parameters.Add(new LookupParameter(MaxDegreeOfParalellismParameterName));
Parameters.Add(new LookupParameter(ExclusiveMatchingParameterName));
Parameters.Add(new LookupParameter(UseAdaptiveReplacementRatioParameterName));
#endregion
}
[StorableConstructor]
protected SchemaEvaluator(bool deserializing) : base(deserializing) { }
protected SchemaEvaluator(SchemaEvaluator original, Cloner cloner) : base(original, cloner) {
calculator = cloner.Clone(original.calculator);
comparer = cloner.Clone(original.comparer);
qm = new QueryMatch(comparer) { MatchParents = true };
updateQualityOperator = new UpdateQualityOperator();
Schema = original.Schema;
}
public override IDeepCloneable Clone(Cloner cloner) {
return new SchemaEvaluator(this, cloner);
}
public override IOperation Apply() {
var strictSchemaMatching = StrictSchemaMatchingParameter.ActualValue.Value;
if (strictSchemaMatching) {
comparer.MatchVariableWeights = true;
comparer.MatchConstantValues = true;
} else {
comparer.MatchVariableWeights = false;
comparer.MatchConstantValues = false;
}
var individuals = ExecutionContext.Scope.SubScopes; // the scopes represent the individuals
var trees = individuals.Select(x => (ISymbolicExpressionTree)x.Variables["SymbolicExpressionTree"].Value).ToList();
var random = RandomParameter.ActualValue;
var mutator = MutatorParameter.ActualValue;
var s = Schema;
var sRoot = s.Root.GetSubtree(0).GetSubtree(0);
int countThreshold = (int)Math.Max(2, Math.Round(MinimumSchemaFrequency.Value * individuals.Count));
// first apply the length and root equality checks in order to filter the individuals
var exclusiveMatching = ExclusiveMatchingParameter.ActualValue.Value;
var filtered = new List();
for (int i = 0; i < individuals.Count; ++i) {
if (exclusiveMatching && individuals[i].Variables.ContainsKey("AlreadyMatched")) continue;
var t = trees[i];
var tRoot = t.Root.GetSubtree(0).GetSubtree(0);
if (t.Length < s.Length || !qm.EqualityComparer.Equals(tRoot, sRoot)) continue;
filtered.Add(i);
}
// if we don't have enough filtered individuals, then we are done
// if the schema exceeds the minimum frequency, it gets processed further
if (filtered.Count < countThreshold) {
return base.Apply();
}
// check if the filtered individuals match the schema
var sNodes = QueryMatch.InitializePostOrder(sRoot);
var matchingIndividuals = new ScopeList();
bool executeInParallel = ExecuteInParallelParameter.ActualValue.Value;
int maxDegreeOfParallelism = MaxDegreeOfParallelismParameter.ActualValue.Value;
if (executeInParallel) {
var matching = new bool[filtered.Count];
Parallel.For(0, filtered.Count, new ParallelOptions { MaxDegreeOfParallelism = maxDegreeOfParallelism },
i => {
var index = filtered[i];
var t = trees[index];
var tNodes = QueryMatch.InitializePostOrder(t.Root.GetSubtree(0).GetSubtree(0));
if (qm.Match(tNodes, sNodes)) {
matching[i] = true;
}
});
matchingIndividuals.AddRange(filtered.Where((x, i) => matching[i]).Select(x => individuals[x]));
} else {
for (int i = 0; i < filtered.Count; ++i) {
// break early if it becomes impossible to reach the minimum threshold
if (matchingIndividuals.Count + filtered.Count - i < countThreshold)
break;
var index = filtered[i];
var tRoot = trees[index].Root.GetSubtree(0).GetSubtree(0);
var tNodes = QueryMatch.InitializePostOrder(tRoot);
if (qm.Match(tNodes, sNodes))
matchingIndividuals.Add(individuals[index]);
}
}
if (matchingIndividuals.Count < countThreshold) {
return base.Apply();
}
var similarity = CalculateSimilarity(matchingIndividuals, calculator, executeInParallel, maxDegreeOfParallelism);
if (similarity < MinimumPhenotypicSimilarity.Value) {
return base.Apply();
}
double replacementRatio;
var adaptiveReplacementRatio = UseAdaptiveReplacementRatioParameter.ActualValue.Value;
if (adaptiveReplacementRatio) {
var r = (double)matchingIndividuals.Count / individuals.Count;
replacementRatio = CalculateReplacementRatio(r);
} else {
replacementRatio = ReplacementRatio.Value;
}
int n = (int)Math.Floor(matchingIndividuals.Count * replacementRatio);
var individualsToReplace = RandomReplacement.Value ? matchingIndividuals.SampleRandomWithoutRepetition(random, n).ToList()
: matchingIndividuals.OrderBy(x => (DoubleValue)x.Variables["Quality"].Value).Take(n).ToList();
var mutationOc = new OperationCollection { Parallel = false }; // cannot be parallel due to the before/after operators which insert vertices in the genealogy graph
var updateEstimatedValues = new OperationCollection { Parallel = true }; // evaluation should be done in parallel when possible
foreach (var ind in individualsToReplace) {
var mutatorOp = ExecutionContext.CreateChildOperation(mutator, ind);
var updateOp = ExecutionContext.CreateChildOperation(updateQualityOperator, ind);
mutationOc.Add(mutatorOp);
updateEstimatedValues.Add(updateOp);
if (exclusiveMatching)
ind.Variables.Add(new Core.Variable("AlreadyMatched"));
}
NumberOfChangedTrees.Value += individualsToReplace.Count; // a lock is not necessary here because the SchemaEvaluators cannot be executed in parallel
return new OperationCollection(mutationOc, updateEstimatedValues, base.Apply());
}
private double CalculateReplacementRatio(double r) {
switch (ReplacementRule) {
case "f(x) = x": {
return r;
}
case "f(x) = tanh(x)": {
return Math.Tanh(r);
}
case "f(x) = tanh(2x)": {
return Math.Tanh(2 * r);
}
case "f(x) = tanh(3x)": {
return Math.Tanh(3 * r);
}
case "f(x) = tanh(4x)": {
return Math.Tanh(4 * r);
}
case "f(x) = 1-sqrt(1-x)": {
return 1 - Math.Sqrt(1 - r);
}
default:
throw new ArgumentException("Unknown replacement rule");
}
}
public static double CalculateSimilarity(ScopeList individuals, ISolutionSimilarityCalculator calculator, bool parallel = false, int nThreads = -1) {
double similarity = 0;
int count = individuals.Count;
if (count < 2) return double.NaN;
if (parallel) {
var parallelOptions = new ParallelOptions { MaxDegreeOfParallelism = nThreads };
var simArray = new double[count - 1];
Parallel.For(0, count - 1, parallelOptions, i => {
double innerSim = 0;
for (int j = i + 1; j < count; ++j) {
innerSim += calculator.CalculateSolutionSimilarity(individuals[i], individuals[j]);
}
simArray[i] = innerSim;
});
similarity = simArray.Sum();
} else {
for (int i = 0; i < count - 1; ++i) {
for (int j = i + 1; j < count; ++j) {
similarity += calculator.CalculateSolutionSimilarity(individuals[i], individuals[j]);
}
}
}
return similarity / (count * (count - 1) / 2.0);
}
}
}