#region License Information /* HeuristicLab * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System; using System.Collections.Concurrent; using System.Collections.Generic; using System.Linq; using System.Threading.Tasks; using HeuristicLab.Common; using HeuristicLab.Core; using HeuristicLab.Data; using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; using HeuristicLab.EvolutionTracking; using HeuristicLab.Optimization; using HeuristicLab.Parameters; using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; using HeuristicLab.Random; namespace HeuristicLab.Problems.DataAnalysis.Symbolic { [Item("SchemaEvaluator", "An operator that builds schemas based on the heredity relationship in the genealogy graph.")] [StorableClass] public class SchemaEvaluator : EvolutionTrackingOperator { #region parameter names private const string MinimumSchemaFrequencyParameterName = "MinimumSchemaFrequency"; private const string MinimumPhenotypicSimilarityParameterName = "MinimumPhenotypicSimilarity"; private const string MutationRateParameterName = "MutationRate"; private const string SchemaParameterName = "Schema"; private const string PopulationSizeParameterName = "PopulationSize"; private const string RandomParameterName = "Random"; private const string EvaluatorParameterName = "Evaluator"; private const string ProblemDataParameterName = "ProblemData"; private const string InterpreterParameterName = "SymbolicExpressionTreeInterpreter"; private const string EstimationLimitsParameterName = "EstimationLimits"; private const string ApplyLinearScalingParameterName = "ApplyLinearScaling"; private const string MutatorParameterName = "Mutator"; private const string CrossoverParameterName = "Crossover"; private const string NumberOfChangedTreesParameterName = "NumberOfChangedTrees"; private const string ExecuteInParallelParameterName = "ExecuteInParallel"; private const string MaxDegreeOfParalellismParameterName = "MaxDegreeOfParallelism"; private const string ExclusiveMatchingParameterName = "ExclusiveMatching"; private const string UseAdaptiveMutationRateParameterName = "UseAdaptiveMutationRate"; private const string StrictSchemaMatchingParameterName = "StrictSchemaMatching"; #endregion #region parameters public ILookupParameter UseAdaptiveMutationRateParameter { get { return (ILookupParameter)Parameters[UseAdaptiveMutationRateParameterName]; } } public ILookupParameter ExclusiveMatchingParameter { get { return (ILookupParameter)Parameters[ExclusiveMatchingParameterName]; } } public ILookupParameter ExecuteInParallelParameter { get { return (ILookupParameter)Parameters[ExecuteInParallelParameterName]; } } public ILookupParameter MaxDegreeOfParallelismParameter { get { return (ILookupParameter)Parameters[MaxDegreeOfParalellismParameterName]; } } public ILookupParameter> EvaluatorParameter { get { return (ILookupParameter>)Parameters[EvaluatorParameterName]; } } public ILookupParameter ProblemDataParameter { get { return (ILookupParameter)Parameters[ProblemDataParameterName]; } } public ILookupParameter InterpreterParameter { get { return (ILookupParameter)Parameters[InterpreterParameterName]; } } public ILookupParameter EstimationLimitsParameter { get { return (ILookupParameter)Parameters[EstimationLimitsParameterName]; } } public ILookupParameter ApplyLinearScalingParameter { get { return (ILookupParameter)Parameters[ApplyLinearScalingParameterName]; } } public ILookupParameter CrossoverParameter { get { return (ILookupParameter)Parameters[CrossoverParameterName]; } } public ILookupParameter RandomParameter { get { return (ILookupParameter)Parameters[RandomParameterName]; } } public ILookupParameter PopulationSizeParameter { get { return (ILookupParameter)Parameters[PopulationSizeParameterName]; } } public ILookupParameter SchemaParameter { get { return (ILookupParameter)Parameters[SchemaParameterName]; } } public ILookupParameter MinimumSchemaFrequencyParameter { get { return (ILookupParameter)Parameters[MinimumSchemaFrequencyParameterName]; } } public ILookupParameter MutationRateParameter { get { return (ILookupParameter)Parameters[MutationRateParameterName]; } } public ILookupParameter MinimumPhenotypicSimilarityParameter { get { return (ILookupParameter)Parameters[MinimumPhenotypicSimilarityParameterName]; } } public LookupParameter NumberOfChangedTreesParameter { get { return (LookupParameter)Parameters[NumberOfChangedTreesParameterName]; } } public LookupParameter StrictSchemaMatchingParameter { get { return (LookupParameter)Parameters[StrictSchemaMatchingParameterName]; } } #endregion #region parameter properties public PercentValue MinimumSchemaFrequency { get { return MinimumSchemaFrequencyParameter.ActualValue; } } public PercentValue MutationRate { get { return MutationRateParameter.ActualValue; } } public PercentValue MinimumPhenotypicSimilarity { get { return MinimumPhenotypicSimilarityParameter.ActualValue; } } public IntValue NumberOfChangedTrees { get { return NumberOfChangedTreesParameter.ActualValue; } } #endregion private QueryMatch qm; [Storable] private SymbolicExpressionTreePhenotypicSimilarityCalculator calculator; [Storable] public string MutationRateUpdateRule { get; set; } [Storable] private ISymbolicExpressionTreeNodeEqualityComparer comparer; [Storable] public ISymbolicExpressionTree Schema { get; set; } [Storable] private readonly UpdateQualityOperator updateQualityOperator; [Storable] public ISymbolicExpressionTreeManipulator SchemaManipulator { get; set; } [StorableHook(HookType.AfterDeserialization)] private void AfterDeserialization() { if (!Parameters.ContainsKey(StrictSchemaMatchingParameterName)) Parameters.Add(new LookupParameter(StrictSchemaMatchingParameterName)); if (calculator == null) calculator = new SymbolicExpressionTreePhenotypicSimilarityCalculator(); if (comparer == null) comparer = new SymbolicExpressionTreeNodeEqualityComparer { MatchVariableNames = true, MatchVariableWeights = true, MatchConstantValues = false }; qm = new QueryMatch(comparer) { MatchParents = true }; } public SchemaEvaluator() { calculator = new SymbolicExpressionTreePhenotypicSimilarityCalculator(); comparer = new SymbolicExpressionTreeNodeEqualityComparer { MatchVariableNames = true, MatchVariableWeights = true, MatchConstantValues = false }; qm = new QueryMatch(comparer) { MatchParents = true }; updateQualityOperator = new UpdateQualityOperator(); #region add parameters Parameters.Add(new LookupParameter(SchemaParameterName, "The current schema to be evaluated")); Parameters.Add(new LookupParameter(MinimumSchemaFrequencyParameterName)); Parameters.Add(new LookupParameter(MutationRateParameterName)); Parameters.Add(new LookupParameter(MinimumPhenotypicSimilarityParameterName)); Parameters.Add(new LookupParameter(PopulationSizeParameterName)); Parameters.Add(new LookupParameter(RandomParameterName)); Parameters.Add(new LookupParameter>(EvaluatorParameterName)); Parameters.Add(new LookupParameter(ProblemDataParameterName)); Parameters.Add(new LookupParameter(InterpreterParameterName)); Parameters.Add(new LookupParameter(EstimationLimitsParameterName)); Parameters.Add(new LookupParameter(ApplyLinearScalingParameterName)); Parameters.Add(new LookupParameter(StrictSchemaMatchingParameterName)); Parameters.Add(new LookupParameter(MutatorParameterName)); Parameters.Add(new LookupParameter(CrossoverParameterName)); Parameters.Add(new LookupParameter(NumberOfChangedTreesParameterName)); Parameters.Add(new LookupParameter(ExecuteInParallelParameterName)); Parameters.Add(new LookupParameter(MaxDegreeOfParalellismParameterName)); Parameters.Add(new LookupParameter(ExclusiveMatchingParameterName)); Parameters.Add(new LookupParameter(UseAdaptiveMutationRateParameterName)); #endregion } [StorableConstructor] protected SchemaEvaluator(bool deserializing) : base(deserializing) { } protected SchemaEvaluator(SchemaEvaluator original, Cloner cloner) : base(original, cloner) { calculator = cloner.Clone(original.calculator); comparer = cloner.Clone(original.comparer); qm = new QueryMatch(comparer) { MatchParents = true }; updateQualityOperator = new UpdateQualityOperator(); Schema = original.Schema; } public override IDeepCloneable Clone(Cloner cloner) { return new SchemaEvaluator(this, cloner); } public override IOperation Apply() { var strictSchemaMatching = StrictSchemaMatchingParameter.ActualValue.Value; if (strictSchemaMatching) { comparer.MatchVariableWeights = true; comparer.MatchConstantValues = true; } else { comparer.MatchVariableWeights = false; comparer.MatchConstantValues = false; } var individuals = ExecutionContext.Scope.SubScopes; // the scopes represent the individuals var trees = new ISymbolicExpressionTree[individuals.Count]; var qualities = new double[individuals.Count]; for (int i = 0; i < individuals.Count; ++i) { trees[i] = (ISymbolicExpressionTree)individuals[i].Variables["SymbolicExpressionTree"].Value; qualities[i] = ((DoubleValue)individuals[i].Variables["Quality"].Value).Value; } var random = RandomParameter.ActualValue; var sRoot = Schema.Root.GetSubtree(0).GetSubtree(0); // first apply the length and root equality checks in order to filter the individuals var exclusiveMatching = ExclusiveMatchingParameter.ActualValue.Value; var filtered = new List(); for (int i = 0; i < trees.Length; ++i) { if (exclusiveMatching && individuals[i].Variables.ContainsKey("AlreadyMatched")) continue; var t = trees[i]; var tRoot = t.Root.GetSubtree(0).GetSubtree(0); if (t.Length < Schema.Length || !qm.EqualityComparer.Equals(tRoot, sRoot)) continue; filtered.Add(i); } // if we don't have enough filtered individuals, then we are done // if the schema exceeds the minimum frequency, it gets processed further int countThreshold = (int)Math.Max(2, Math.Round(MinimumSchemaFrequency.Value * individuals.Count)); if (filtered.Count < countThreshold) { return base.Apply(); } // check if the filtered individuals match the schema var matching = new List(); var sNodes = QueryMatch.InitializePostOrder(sRoot); bool executeInParallel = ExecuteInParallelParameter.ActualValue.Value; int maxDegreeOfParallelism = MaxDegreeOfParallelismParameter.ActualValue.Value; if (executeInParallel) { var partitioner = Partitioner.Create(0, filtered.Count); var po = new ParallelOptions { MaxDegreeOfParallelism = maxDegreeOfParallelism }; Parallel.ForEach(partitioner, po, (range, loop) => { var partial = new List(); for (int i = range.Item1; i < range.Item2; ++i) { var idx = filtered[i]; var tRoot = trees[idx].Root.GetSubtree(0).GetSubtree(0); var tNodes = QueryMatch.InitializePostOrder(tRoot); if (qm.Match(tNodes, sNodes)) { partial.Add(idx); } } lock (matching) { matching.AddRange(partial); } }); } else { for (int i = 0; i < filtered.Count; ++i) { var index = filtered[i]; var tRoot = trees[index].Root.GetSubtree(0).GetSubtree(0); var tNodes = QueryMatch.InitializePostOrder(tRoot); if (qm.Match(tNodes, sNodes)) { matching.Add(index); } } } if (matching.Count < countThreshold) { return base.Apply(); } matching.Sort((a, b) => qualities[a].CompareTo(qualities[b])); // sort by ascending quality var matchingIndividuals = matching.Select(x => individuals[x]).ToArray(); // fittest individual will be last in the array var similarity = CalculateSimilarity(matchingIndividuals, calculator, executeInParallel, maxDegreeOfParallelism); if (similarity < MinimumPhenotypicSimilarity.Value) { return base.Apply(); } double mutationRate; var useAdaptiveMutationRate = UseAdaptiveMutationRateParameter.ActualValue.Value; if (useAdaptiveMutationRate) { var r = (double)matchingIndividuals.Length / individuals.Count; mutationRate = CalculateMutationRate(r); } else { mutationRate = MutationRate.Value; } var mutations = new OperationCollection { Parallel = false }; // cannot be parallel due to the before/after operators which insert vertices in the genealogy graph var updates = new OperationCollection { Parallel = true }; // evaluation should be done in parallel when possible // use length - 1 because we don't want to mutate the best individual in each schema group (which could also be the overall elite) for (int i = 0; i < matchingIndividuals.Length - 1; ++i) { if (random.NextDouble() > mutationRate) continue; var ind = matchingIndividuals[i]; var mutate = ExecutionContext.CreateChildOperation(SchemaManipulator, ind); var update = ExecutionContext.CreateChildOperation(updateQualityOperator, ind); mutations.Add(mutate); updates.Add(update); if (exclusiveMatching) ind.Variables.Add(new Core.Variable("AlreadyMatched")); } NumberOfChangedTrees.Value += mutations.Count; return new OperationCollection(mutations, updates, base.Apply()); } private double CalculateMutationRate(double r) { switch (MutationRateUpdateRule) { case "f(x) = x": { return r; } case "f(x) = tanh(x)": { return Math.Tanh(r); } case "f(x) = tanh(2x)": { return Math.Tanh(2 * r); } case "f(x) = tanh(3x)": { return Math.Tanh(3 * r); } case "f(x) = tanh(4x)": { return Math.Tanh(4 * r); } case "f(x) = 1-sqrt(1-x)": { return 1 - Math.Sqrt(1 - r); } default: throw new ArgumentException("Unknown replacement rule"); } } public static double CalculateSimilarity(IScope[] individuals, ISolutionSimilarityCalculator calculator, bool parallel = false, int nThreads = -1) { if (individuals.Length < 2) return double.NaN; double similarity = 0; int count = individuals.Length; int n = count * (count - 1) / 2; if (parallel) { var ii = new int[n]; var jj = new int[n]; int k = 0; for (int i = 0; i < count - 1; ++i) for (int j = i + 1; j < count; ++j) { ii[k] = i; jj[k] = j; ++k; } var po = new ParallelOptions { MaxDegreeOfParallelism = nThreads }; var partitioner = Partitioner.Create(0, n); var locker = new object(); Parallel.ForEach(partitioner, new ParallelOptions { MaxDegreeOfParallelism = 4 }, (range, loop) => { var partial = 0d; for (int idx = range.Item1; idx < range.Item2; ++idx) { int i = ii[idx]; int j = jj[idx]; partial += calculator.CalculateSolutionSimilarity(individuals[i], individuals[j]); } lock (locker) { similarity += partial; } }); } else { for (int i = 0; i < count - 1; ++i) { for (int j = i + 1; j < count; ++j) { similarity += calculator.CalculateSolutionSimilarity(individuals[i], individuals[j]); } } } return similarity / n; } } }