#region License Information /* HeuristicLab * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System; using System.Collections.Generic; using System.Linq; using HeuristicLab.Common; using HeuristicLab.Core; using HeuristicLab.Data; using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; using HeuristicLab.EvolutionTracking; using HeuristicLab.Parameters; using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; using HeuristicLab.PluginInfrastructure; namespace HeuristicLab.Problems.DataAnalysis.Symbolic { using Vertices = IEnumerable>; [Item("SchemaCreator", "An operator that builds schemas based on the heredity relationship in the genealogy graph.")] [StorableClass] public class SchemaCreator : EvolutionTrackingOperator { #region parameter names // criteria to trigger schema-based diversification private const string MinimumSchemaLengthParameterName = "MinimumSchemaLength"; private const string MinimumSchemaFrequencyParameterName = "MinimumSchemaFrequency"; private const string MinimumPhenotypicSimilarityParameterName = "MinimumPhenotypicSimilarity"; // parameters controlling the diversification strategy private const string UseAdaptiveMutationRateParameterName = "UseAdaptiveMutationRate"; // dynamically control mutation rate private const string MutationRateUpdateRuleParameterName = "ReplacementRatioUpdateRule"; private const string MutationRateParameterName = "MutationRate"; // fixed mutation rate when not using adaptive update private const string ExclusiveMatchingParameterName = "ExclusiveMatching"; // an individual can belong to only 1 schema private const string ParentsRatio = "ParentsRatio"; // use best % parents to generate schemas from private const string StrictSchemaMatchingParameterName = "StrictSchemaMatching"; // use strict node comparison (for constant values and variable weights) private const string SchemaDefinitionParameterName = "SchemaDefinition"; // which schema definition to use: {=}, {#} or {=,#} private const string SchemaManipulatorParameterName = "SchemaManipulator"; // mutation operator to apply within schemas // control parallel behaviour private const string ExecuteInParallelParameterName = "ExecuteInParallel"; private const string MaxDegreeOfParalellismParameterName = "MaxDegreeOfParallelism"; private const string ScaleEstimatedValuesParameterName = "ScaleEstimatedValues"; private const string UpdateCounterParameterName = "UpdateCounter"; private const string UpdateIntervalParameterName = "UpdateInterval"; #region information parameters private const string NumberOfChangedTreesParameterName = "NumberOfChangedTrees"; private const string NumberOfSchemasParameterName = "NumberOfSchemas"; private const string AverageSchemaLengthParameterName = "AverageSchemaLength"; #endregion #endregion #region parameters public IConstrainedValueParameter SchemaManipulatorParameter { get { return (IConstrainedValueParameter)Parameters[SchemaManipulatorParameterName]; } } public IConstrainedValueParameter SchemaDefinitionParameter { get { return (IConstrainedValueParameter)Parameters[SchemaDefinitionParameterName]; } } public IConstrainedValueParameter MutationRateUpdateRuleParameter { get { return (IConstrainedValueParameter)Parameters[MutationRateUpdateRuleParameterName]; } } public IFixedValueParameter UseAdaptiveMutationRateParameter { get { return (IFixedValueParameter)Parameters[UseAdaptiveMutationRateParameterName]; } } public IFixedValueParameter StrictSchemaMatchingParameter { get { return (IFixedValueParameter)Parameters[StrictSchemaMatchingParameterName]; } } public IFixedValueParameter ExclusiveMatchingParameter { get { return (IFixedValueParameter)Parameters[ExclusiveMatchingParameterName]; } } public IFixedValueParameter ScaleEstimatedValuesParameter { get { return (IFixedValueParameter)Parameters[ScaleEstimatedValuesParameterName]; } } public IFixedValueParameter ParentsRatioParameter { get { return (IFixedValueParameter)Parameters[ParentsRatio]; } } public IFixedValueParameter MinimumSchemaLengthParameter { get { return (IFixedValueParameter)Parameters[MinimumSchemaLengthParameterName]; } } public IFixedValueParameter ExecuteInParallelParameter { get { return (IFixedValueParameter)Parameters[ExecuteInParallelParameterName]; } } public IFixedValueParameter MaxDegreeOfParallelismParameter { get { return (IFixedValueParameter)Parameters[MaxDegreeOfParalellismParameterName]; } } public IFixedValueParameter MinimumSchemaFrequencyParameter { get { return (IFixedValueParameter)Parameters[MinimumSchemaFrequencyParameterName]; } } public IFixedValueParameter MinimumPhenotypicSimilarityParameter { get { return (IFixedValueParameter)Parameters[MinimumPhenotypicSimilarityParameterName]; } } public IFixedValueParameter MutationRateParameter { get { return (IFixedValueParameter)Parameters[MutationRateParameterName]; } } public IValueParameter NumberOfSchemasParameter { get { return (IValueParameter)Parameters[NumberOfSchemasParameterName]; } } public IValueParameter AverageSchemaLengthParameter { get { return (IValueParameter)Parameters[AverageSchemaLengthParameterName]; } } public IValueParameter NumberOfChangedTreesParameter { get { return (IValueParameter)Parameters[NumberOfChangedTreesParameterName]; } } public IFixedValueParameter UpdateCounterParameter { get { return (IFixedValueParameter)Parameters[UpdateCounterParameterName]; } } public IFixedValueParameter UpdateIntervalParameter { get { return (IFixedValueParameter)Parameters[UpdateIntervalParameterName]; } } #endregion #region parameter properties public int MinimumSchemaLength { get { return MinimumSchemaLengthParameter.Value.Value; } } public int MaxDegreeOfParallelism { get { return MaxDegreeOfParallelismParameter.Value.Value; } } public bool ExecuteInParallel { get { return ExecuteInParallelParameter.Value.Value; } } public double PercentageOfPopulation { get { return ParentsRatioParameter.Value.Value; } } public bool StrictSchemaMatching { get { return StrictSchemaMatchingParameter.Value.Value; } } public IntValue UpdateCounter { get { return UpdateCounterParameter.Value; } } public IntValue UpdateInterval { get { return UpdateIntervalParameter.Value; } } #endregion private UpdateQualityOperator updateQualityOperator; private DiversificationStatisticsOperator diversificationStatisticsOperator; public override void InitializeState() { base.InitializeState(); NumberOfChangedTreesParameter.Value.Value = 0; NumberOfChangedTreesParameter.Value.Value = 0; AverageSchemaLengthParameter.Value.Value = 0; UpdateCounter.Value = 0; } public override void ClearState() { NumberOfChangedTreesParameter.Value.Value = 0; NumberOfChangedTreesParameter.Value.Value = 0; AverageSchemaLengthParameter.Value.Value = 0; UpdateCounter.Value = 0; base.ClearState(); } public SchemaCreator() { #region add parameters Parameters.Add(new FixedValueParameter(MinimumSchemaLengthParameterName, new IntValue(10))); Parameters.Add(new FixedValueParameter(MinimumSchemaFrequencyParameterName, new PercentValue(0.01))); Parameters.Add(new FixedValueParameter(MinimumPhenotypicSimilarityParameterName, new PercentValue(0.9))); Parameters.Add(new FixedValueParameter(MutationRateParameterName, new PercentValue(0.9))); Parameters.Add(new FixedValueParameter(ParentsRatio, new PercentValue(1))); Parameters.Add(new FixedValueParameter(ExecuteInParallelParameterName, new BoolValue(false))); Parameters.Add(new FixedValueParameter(MaxDegreeOfParalellismParameterName, new IntValue(-1))); Parameters.Add(new FixedValueParameter(ScaleEstimatedValuesParameterName, new BoolValue(true))); Parameters.Add(new FixedValueParameter(ExclusiveMatchingParameterName, new BoolValue(false))); Parameters.Add(new FixedValueParameter(StrictSchemaMatchingParameterName, new BoolValue(false))); Parameters.Add(new ValueParameter(NumberOfChangedTreesParameterName, new IntValue(0))); Parameters.Add(new ValueParameter(NumberOfSchemasParameterName, new IntValue(0))); Parameters.Add(new ValueParameter(AverageSchemaLengthParameterName, new DoubleValue(0))); Parameters.Add(new FixedValueParameter(UseAdaptiveMutationRateParameterName, new BoolValue(true))); Parameters.Add(new FixedValueParameter(UpdateCounterParameterName, new IntValue(0))); Parameters.Add(new FixedValueParameter(UpdateIntervalParameterName, new IntValue(1))); // add update rules var mutationRateUpdateRules = new ItemSet(new[] { new StringValue("f(x) = x"), new StringValue("f(x) = tanh(x)"), new StringValue("f(x) = tanh(2x)"), new StringValue("f(x) = tanh(3x)"), new StringValue("f(x) = tanh(4x)"), new StringValue("f(x) = 1-sqrt(1-x)") }); var mutationRateUpdateRuleParameter = new ConstrainedValueParameter(MutationRateUpdateRuleParameterName, mutationRateUpdateRules); mutationRateUpdateRuleParameter.Value = mutationRateUpdateRules.First(); Parameters.Add(mutationRateUpdateRuleParameter); // add schema definitions var schemaDefinitions = new ItemSet(new[] { "=", "#", "=,#" }.Select(x => new StringValue(x))); var schemaDefinitionParameter = new ConstrainedValueParameter(SchemaDefinitionParameterName, schemaDefinitions); schemaDefinitionParameter.Value = schemaDefinitions.First(); Parameters.Add(schemaDefinitionParameter); // use a separate set of manipulators in order to allow the user to specify different mutations for schemas // and not be limited to the manipulator parameter for the whole algorithm var manipulators = ApplicationManager.Manager.GetTypes(typeof(ISymbolicExpressionTreeManipulator)) .Where(x => !typeof(IMultiOperator).IsAssignableFrom(x) && !typeof(ISymbolicExpressionTreeArchitectureAlteringOperator).IsAssignableFrom(x)) .Select(x => (ISymbolicExpressionTreeManipulator)Activator.CreateInstance(x)).ToList(); manipulators.Add(new MultiSymbolicExpressionTreeManipulator()); // add individual manipulators var manipulatorParameter = new ConstrainedValueParameter(SchemaManipulatorParameterName, new ItemSet(manipulators)); // add a multi manipulator as well manipulatorParameter.Value = manipulators.First(); Parameters.Add(manipulatorParameter); #endregion NumberOfChangedTreesParameter.Hidden = true; NumberOfSchemasParameter.Hidden = true; AverageSchemaLengthParameter.Hidden = true; ExecuteInParallelParameter.Hidden = true; MaxDegreeOfParallelismParameter.Hidden = true; } protected SchemaCreator(SchemaCreator original, Cloner cloner) : base(original, cloner) { } public override IDeepCloneable Clone(Cloner cloner) { return new SchemaCreator(this, cloner); } [StorableConstructor] protected SchemaCreator(bool deserializing) : base(deserializing) { } [StorableHook(HookType.AfterDeserialization)] private void AfterDeserialization() { if (!Parameters.ContainsKey(StrictSchemaMatchingParameterName)) Parameters.Add(new FixedValueParameter(StrictSchemaMatchingParameterName, new BoolValue(false))); } public override IOperation Apply() { UpdateCounter.Value++; if (UpdateCounter.Value != UpdateInterval.Value) return base.Apply(); UpdateCounter.Value = 0; // apply only when at least one generation has passed var gen = Generations.Value; if (gen < 1 || GenealogyGraph == null) return base.Apply(); var updateEstimatedValues = new OperationCollection { Parallel = true }; if (updateQualityOperator == null) updateQualityOperator = new UpdateQualityOperator(); var scope = ExecutionContext.Scope; foreach (var s in scope.SubScopes.Where(s => !s.Variables.ContainsKey("EstimatedValues"))) { updateEstimatedValues.Add(ExecutionContext.CreateChildOperation(updateQualityOperator, s)); } var updateRule = MutationRateUpdateRuleParameter.Value.Value; var schemaManipulator = SchemaManipulatorParameter.Value; var evaluateSchemas = new OperationCollection(); Func getQuality = s => ((DoubleValue)s.Variables["Quality"].Value).Value; var bestN = (int)Math.Round(scope.SubScopes.Count * PercentageOfPopulation); var scopes = new ScopeList(scope.SubScopes.OrderByDescending(getQuality).Take(bestN)); // for now, only consider crossover offspring var vertices = from s in scopes let t = (ISymbolicExpressionTree)s.Variables["SymbolicExpressionTree"].Value let v = GenealogyGraph.GetByContent(t) where v.InDegree == 2 select v; IEnumerable schemas; switch (SchemaDefinitionParameter.Value.Value) { case "=": schemas = GenerateAnyNodeSchemas(vertices, MinimumSchemaLength, 0, StrictSchemaMatching); break; case "#": schemas = GenerateAnySubtreeSchemas(vertices, MinimumSchemaLength, 0, StrictSchemaMatching); break; case "=,#": schemas = GenerateCombinedSchemas(vertices, MinimumSchemaLength, 0, StrictSchemaMatching); break; default: return base.Apply(); } if (!schemas.Any()) return base.Apply(); #region create schemas and add subscopes representing the individuals double avgLength = 0; int count = 0; foreach (var schema in schemas) { avgLength += schema.Length; ++count; evaluateSchemas.Add(ExecutionContext.CreateChildOperation(new SchemaEvaluator { Schema = schema, MutationRateUpdateRule = updateRule, SchemaManipulator = schemaManipulator }, scope)); } avgLength /= count; #endregion // set parameters for statistics AverageSchemaLengthParameter.Value = new DoubleValue(avgLength); NumberOfSchemasParameter.Value = new IntValue(count); NumberOfChangedTreesParameter.Value = new IntValue(0); if (diversificationStatisticsOperator == null) diversificationStatisticsOperator = new DiversificationStatisticsOperator(); var calculateStatistics = ExecutionContext.CreateChildOperation(diversificationStatisticsOperator); // return an operation collection containing all the scope operations + base.Apply() return new OperationCollection { updateEstimatedValues, evaluateSchemas, calculateStatistics, base.Apply() }; } #region schema generation public static IEnumerable GenerateAnyNodeSchemas(Vertices vertices, int minimumSchemaLength, int minOffspringCount = 1, bool strict = true) { return GenerateSchemas(vertices, ReplaceAnyNode, minimumSchemaLength, minOffspringCount, strict); } public static IEnumerable GenerateAnySubtreeSchemas(Vertices vertices, int minimumSchemaLength, int minOffspringCount = 1, bool strict = true) { return GenerateSchemas(vertices, ReplaceAnySubtree, minimumSchemaLength, minOffspringCount, strict); } public static IEnumerable GenerateCombinedSchemas(Vertices vertices, int minimumSchemaLength, int minOffspringCount = 1, bool strict = true) { return GenerateSchemas(vertices, ReplaceCombined, minimumSchemaLength, minOffspringCount, strict); } public static IEnumerable GenerateSchemas(Vertices vertices, Func replaceFunc, int minimumSchemaLength, int minOffspringCount, bool strict = true) { var anySubtreeSymbol = new AnySubtreeSymbol(); var groups = vertices.GroupBy(x => x.Parents.First()).Where(g => g.Skip(minOffspringCount - 1).Any()).OrderByDescending(g => g.Count()); var hash = new HashSet(); foreach (var g in groups) { var parent = g.Key; if (parent.Data.Length < minimumSchemaLength) continue; bool replaced = false; var schema = (ISymbolicExpressionTree)parent.Data.Clone(); var nodes = schema.IterateNodesPrefix().ToList(); var fragments = g.Select(x => x.InArcs.Last().Data).Where(x => x != null).Cast>(); var indices = fragments.Select(x => x.Index1).Distinct().OrderByDescending(x => schema.Root.GetBranchLevel(nodes[x])); foreach (var i in indices) { replaced |= replaceFunc(schema, nodes[i], minimumSchemaLength); } if (replaced) { var str = schema.Root.GetSubtree(0).GetSubtree(0).FormatToString(strict); if (hash.Add(str)) yield return schema; } } } // node replacement rules private static bool ReplaceAnyNode(ISymbolicExpressionTree schema, ISymbolicExpressionTreeNode node, int minSchemaLength) { var anyNodeSymbol = new AnyNodeSymbol(node.Symbol.MinimumArity, node.Symbol.MaximumArity); var replacement = anyNodeSymbol.CreateTreeNode(); SchemaUtil.ReplaceSubtree(node, replacement, true); return true; } private static bool ReplaceAnySubtree(ISymbolicExpressionTree schema, ISymbolicExpressionTreeNode node, int minSchemaLength) { if (schema.Length - node.GetLength() + 1 < minSchemaLength) return false; var anySubtreeSymbol = new AnySubtreeSymbol(); var replacement = anySubtreeSymbol.CreateTreeNode(); SchemaUtil.ReplaceSubtree(node, replacement, false); return true; } private static bool ReplaceCombined(ISymbolicExpressionTree schema, ISymbolicExpressionTreeNode node, int minSchemaLength) { ISymbol wildcard; if (node.SubtreeCount > 0) wildcard = new AnyNodeSymbol(node.Symbol.MinimumArity, node.Symbol.MaximumArity); else wildcard = new AnySubtreeSymbol(); SchemaUtil.ReplaceSubtree(node, wildcard.CreateTreeNode(), node.SubtreeCount > 0); return true; } #endregion } }