#region License Information /* HeuristicLab * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System; using System.Collections.Generic; using System.Linq; using System.Threading.Tasks; using HeuristicLab.Common; using HeuristicLab.Core; using HeuristicLab.Data; using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; using HeuristicLab.EvolutionTracking; using HeuristicLab.Optimization; using HeuristicLab.Parameters; using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; using HeuristicLab.Random; namespace HeuristicLab.Problems.DataAnalysis.Symbolic { [Item("SchemaEvaluator", "An operator that builds schemas based on the heredity relationship in the genealogy graph.")] [StorableClass] public class SchemaEvaluator : EvolutionTrackingOperator { #region parameter names private const string MinimumSchemaFrequencyParameterName = "MinimumSchemaFrequency"; private const string MinimumPhenotypicSimilarityParameterName = "MinimumPhenotypicSimilarity"; private const string ReplacementRatioParameterName = "ReplacementRatio"; private const string SchemaParameterName = "Schema"; private const string PopulationSizeParameterName = "PopulationSize"; private const string RandomParameterName = "Random"; private const string EvaluatorParameterName = "Evaluator"; private const string ProblemDataParameterName = "ProblemData"; private const string InterpreterParameterName = "SymbolicExpressionTreeInterpreter"; private const string EstimationLimitsParameterName = "EstimationLimits"; private const string ApplyLinearScalingParameterName = "ApplyLinearScaling"; private const string MutatorParameterName = "Mutator"; private const string CrossoverParameterName = "Crossover"; private const string RandomReplacementParameterName = "RandomReplacement"; private const string NumberOfChangedTreesParameterName = "NumberOfChangedTrees"; private const string ExecuteInParallelParameterName = "ExecuteInParallel"; private const string MaxDegreeOfParalellismParameterName = "MaxDegreeOfParallelism"; private const string ExclusiveMatchingParameterName = "ExclusiveMatching"; private const string UseAdaptiveReplacementRatioParameterName = "UseAdaptiveReplacementRatio"; private const string StrictSchemaMatchingParameterName = "StrictSchemaMatching"; #endregion #region parameters public ILookupParameter UseAdaptiveReplacementRatioParameter { get { return (ILookupParameter)Parameters[UseAdaptiveReplacementRatioParameterName]; } } public ILookupParameter ExclusiveMatchingParameter { get { return (ILookupParameter)Parameters[ExclusiveMatchingParameterName]; } } public ILookupParameter ExecuteInParallelParameter { get { return (ILookupParameter)Parameters[ExecuteInParallelParameterName]; } } public ILookupParameter MaxDegreeOfParallelismParameter { get { return (ILookupParameter)Parameters[MaxDegreeOfParalellismParameterName]; } } public ILookupParameter> EvaluatorParameter { get { return (ILookupParameter>)Parameters[EvaluatorParameterName]; } } public ILookupParameter ProblemDataParameter { get { return (ILookupParameter)Parameters[ProblemDataParameterName]; } } public ILookupParameter InterpreterParameter { get { return (ILookupParameter)Parameters[InterpreterParameterName]; } } public ILookupParameter EstimationLimitsParameter { get { return (ILookupParameter)Parameters[EstimationLimitsParameterName]; } } public ILookupParameter ApplyLinearScalingParameter { get { return (ILookupParameter)Parameters[ApplyLinearScalingParameterName]; } } public ILookupParameter RandomReplacementParameter { get { return (ILookupParameter)Parameters[RandomReplacementParameterName]; } } public ILookupParameter CrossoverParameter { get { return (ILookupParameter)Parameters[CrossoverParameterName]; } } public ILookupParameter RandomParameter { get { return (ILookupParameter)Parameters[RandomParameterName]; } } public ILookupParameter PopulationSizeParameter { get { return (ILookupParameter)Parameters[PopulationSizeParameterName]; } } public ILookupParameter SchemaParameter { get { return (ILookupParameter)Parameters[SchemaParameterName]; } } public ILookupParameter MinimumSchemaFrequencyParameter { get { return (ILookupParameter)Parameters[MinimumSchemaFrequencyParameterName]; } } public ILookupParameter ReplacementRatioParameter { get { return (ILookupParameter)Parameters[ReplacementRatioParameterName]; } } public ILookupParameter MinimumPhenotypicSimilarityParameter { get { return (ILookupParameter)Parameters[MinimumPhenotypicSimilarityParameterName]; } } public LookupParameter NumberOfChangedTreesParameter { get { return (LookupParameter)Parameters[NumberOfChangedTreesParameterName]; } } public LookupParameter StrictSchemaMatchingParameter { get { return (LookupParameter)Parameters[StrictSchemaMatchingParameterName]; } } #endregion #region parameter properties public PercentValue MinimumSchemaFrequency { get { return MinimumSchemaFrequencyParameter.ActualValue; } } public PercentValue ReplacementRatio { get { return ReplacementRatioParameter.ActualValue; } } public PercentValue MinimumPhenotypicSimilarity { get { return MinimumPhenotypicSimilarityParameter.ActualValue; } } public BoolValue RandomReplacement { get { return RandomReplacementParameter.ActualValue; } } public IntValue NumberOfChangedTrees { get { return NumberOfChangedTreesParameter.ActualValue; } } #endregion private QueryMatch qm; [Storable] private SymbolicExpressionTreePhenotypicSimilarityCalculator calculator; [Storable] public string ReplacementRule { get; set; } [Storable] private ISymbolicExpressionTreeNodeEqualityComparer comparer; [Storable] public ISymbolicExpressionTree Schema { get; set; } [Storable] private readonly UpdateQualityOperator updateQualityOperator; [Storable] public ISymbolicExpressionTreeManipulator SchemaManipulator { get; set; } [StorableHook(HookType.AfterDeserialization)] private void AfterDeserialization() { if (!Parameters.ContainsKey(StrictSchemaMatchingParameterName)) Parameters.Add(new LookupParameter(StrictSchemaMatchingParameterName)); if (calculator == null) calculator = new SymbolicExpressionTreePhenotypicSimilarityCalculator(); if (comparer == null) comparer = new SymbolicExpressionTreeNodeEqualityComparer { MatchVariableNames = true, MatchVariableWeights = true, MatchConstantValues = false }; qm = new QueryMatch(comparer) { MatchParents = true }; } public SchemaEvaluator() { calculator = new SymbolicExpressionTreePhenotypicSimilarityCalculator(); comparer = new SymbolicExpressionTreeNodeEqualityComparer { MatchVariableNames = true, MatchVariableWeights = true, MatchConstantValues = false }; qm = new QueryMatch(comparer) { MatchParents = true }; updateQualityOperator = new UpdateQualityOperator(); #region add parameters Parameters.Add(new LookupParameter(SchemaParameterName, "The current schema to be evaluated")); Parameters.Add(new LookupParameter(MinimumSchemaFrequencyParameterName)); Parameters.Add(new LookupParameter(ReplacementRatioParameterName)); Parameters.Add(new LookupParameter(MinimumPhenotypicSimilarityParameterName)); Parameters.Add(new LookupParameter(PopulationSizeParameterName)); Parameters.Add(new LookupParameter(RandomParameterName)); Parameters.Add(new LookupParameter>(EvaluatorParameterName)); Parameters.Add(new LookupParameter(ProblemDataParameterName)); Parameters.Add(new LookupParameter(InterpreterParameterName)); Parameters.Add(new LookupParameter(EstimationLimitsParameterName)); Parameters.Add(new LookupParameter(ApplyLinearScalingParameterName)); Parameters.Add(new LookupParameter(StrictSchemaMatchingParameterName)); Parameters.Add(new LookupParameter(MutatorParameterName)); Parameters.Add(new LookupParameter(CrossoverParameterName)); Parameters.Add(new LookupParameter(RandomReplacementParameterName)); Parameters.Add(new LookupParameter(NumberOfChangedTreesParameterName)); Parameters.Add(new LookupParameter(ExecuteInParallelParameterName)); Parameters.Add(new LookupParameter(MaxDegreeOfParalellismParameterName)); Parameters.Add(new LookupParameter(ExclusiveMatchingParameterName)); Parameters.Add(new LookupParameter(UseAdaptiveReplacementRatioParameterName)); #endregion } [StorableConstructor] protected SchemaEvaluator(bool deserializing) : base(deserializing) { } protected SchemaEvaluator(SchemaEvaluator original, Cloner cloner) : base(original, cloner) { calculator = cloner.Clone(original.calculator); comparer = cloner.Clone(original.comparer); qm = new QueryMatch(comparer) { MatchParents = true }; updateQualityOperator = new UpdateQualityOperator(); Schema = original.Schema; } public override IDeepCloneable Clone(Cloner cloner) { return new SchemaEvaluator(this, cloner); } public override IOperation Apply() { var strictSchemaMatching = StrictSchemaMatchingParameter.ActualValue.Value; if (strictSchemaMatching) { comparer.MatchVariableWeights = true; comparer.MatchConstantValues = true; } else { comparer.MatchVariableWeights = false; comparer.MatchConstantValues = false; } var individuals = ExecutionContext.Scope.SubScopes; // the scopes represent the individuals var trees = individuals.Select(x => (ISymbolicExpressionTree)x.Variables["SymbolicExpressionTree"].Value).ToList(); var random = RandomParameter.ActualValue; var s = Schema; var sRoot = s.Root.GetSubtree(0).GetSubtree(0); int countThreshold = (int)Math.Max(2, Math.Round(MinimumSchemaFrequency.Value * individuals.Count)); // first apply the length and root equality checks in order to filter the individuals var exclusiveMatching = ExclusiveMatchingParameter.ActualValue.Value; var filtered = new List(); for (int i = 0; i < individuals.Count; ++i) { if (exclusiveMatching && individuals[i].Variables.ContainsKey("AlreadyMatched")) continue; var t = trees[i]; var tRoot = t.Root.GetSubtree(0).GetSubtree(0); if (t.Length < s.Length || !qm.EqualityComparer.Equals(tRoot, sRoot)) continue; filtered.Add(i); } // if we don't have enough filtered individuals, then we are done // if the schema exceeds the minimum frequency, it gets processed further if (filtered.Count < countThreshold) { return base.Apply(); } // check if the filtered individuals match the schema var sNodes = QueryMatch.InitializePostOrder(sRoot); var matchingIndividuals = new ScopeList(); bool executeInParallel = ExecuteInParallelParameter.ActualValue.Value; int maxDegreeOfParallelism = MaxDegreeOfParallelismParameter.ActualValue.Value; if (executeInParallel) { var matching = new bool[filtered.Count]; Parallel.For(0, filtered.Count, new ParallelOptions { MaxDegreeOfParallelism = maxDegreeOfParallelism }, i => { var index = filtered[i]; var t = trees[index]; var tNodes = QueryMatch.InitializePostOrder(t.Root.GetSubtree(0).GetSubtree(0)); if (qm.Match(tNodes, sNodes)) { matching[i] = true; } }); matchingIndividuals.AddRange(filtered.Where((x, i) => matching[i]).Select(x => individuals[x])); } else { for (int i = 0; i < filtered.Count; ++i) { // break early if it becomes impossible to reach the minimum threshold if (matchingIndividuals.Count + filtered.Count - i < countThreshold) break; var index = filtered[i]; var tRoot = trees[index].Root.GetSubtree(0).GetSubtree(0); var tNodes = QueryMatch.InitializePostOrder(tRoot); if (qm.Match(tNodes, sNodes)) matchingIndividuals.Add(individuals[index]); } } if (matchingIndividuals.Count < countThreshold) { return base.Apply(); } var similarity = CalculateSimilarity(matchingIndividuals, calculator, executeInParallel, maxDegreeOfParallelism); if (similarity < MinimumPhenotypicSimilarity.Value) { return base.Apply(); } double replacementRatio; var adaptiveReplacementRatio = UseAdaptiveReplacementRatioParameter.ActualValue.Value; if (adaptiveReplacementRatio) { var r = (double)matchingIndividuals.Count / individuals.Count; replacementRatio = CalculateReplacementRatio(r); } else { replacementRatio = ReplacementRatio.Value; } int n = (int)Math.Floor(matchingIndividuals.Count * replacementRatio); var individualsToReplace = RandomReplacement.Value ? matchingIndividuals.SampleRandomWithoutRepetition(random, n).ToList() : matchingIndividuals.OrderBy(x => (DoubleValue)x.Variables["Quality"].Value).Take(n).ToList(); var mutationOc = new OperationCollection { Parallel = false }; // cannot be parallel due to the before/after operators which insert vertices in the genealogy graph var updateEstimatedValues = new OperationCollection { Parallel = true }; // evaluation should be done in parallel when possible foreach (var ind in individualsToReplace) { var mutatorOp = ExecutionContext.CreateChildOperation(SchemaManipulator, ind); var updateOp = ExecutionContext.CreateChildOperation(updateQualityOperator, ind); mutationOc.Add(mutatorOp); updateEstimatedValues.Add(updateOp); if (exclusiveMatching) ind.Variables.Add(new Core.Variable("AlreadyMatched")); } NumberOfChangedTrees.Value += individualsToReplace.Count; // a lock is not necessary here because the SchemaEvaluators cannot be executed in parallel return new OperationCollection(mutationOc, updateEstimatedValues, base.Apply()); } private double CalculateReplacementRatio(double r) { switch (ReplacementRule) { case "f(x) = x": { return r; } case "f(x) = tanh(x)": { return Math.Tanh(r); } case "f(x) = tanh(2x)": { return Math.Tanh(2 * r); } case "f(x) = tanh(3x)": { return Math.Tanh(3 * r); } case "f(x) = tanh(4x)": { return Math.Tanh(4 * r); } case "f(x) = 1-sqrt(1-x)": { return 1 - Math.Sqrt(1 - r); } default: throw new ArgumentException("Unknown replacement rule"); } } public static double CalculateSimilarity(ScopeList individuals, ISolutionSimilarityCalculator calculator, bool parallel = false, int nThreads = -1) { double similarity = 0; int count = individuals.Count; if (count < 2) return double.NaN; if (parallel) { var parallelOptions = new ParallelOptions { MaxDegreeOfParallelism = nThreads }; var simArray = new double[count - 1]; Parallel.For(0, count - 1, parallelOptions, i => { double innerSim = 0; for (int j = i + 1; j < count; ++j) { innerSim += calculator.CalculateSolutionSimilarity(individuals[i], individuals[j]); } simArray[i] = innerSim; }); similarity = simArray.Sum(); } else { for (int i = 0; i < count - 1; ++i) { for (int j = i + 1; j < count; ++j) { similarity += calculator.CalculateSolutionSimilarity(individuals[i], individuals[j]); } } } return similarity / (count * (count - 1) / 2.0); } } }