#region License Information /* HeuristicLab * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System; using System.Collections.Generic; using System.Linq; using HeuristicLab.Common; using HeuristicLab.Core; using HeuristicLab.Data; using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; using HeuristicLab.EvolutionTracking; using HeuristicLab.Parameters; using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; namespace HeuristicLab.Problems.DataAnalysis.Symbolic { [Item("SchemaCreator", "An operator that builds schemas based on the heredity relationship in the genealogy graph.")] [StorableClass] public class SchemaCreator : EvolutionTrackingOperator { #region parameter names private const string MinimumSchemaLengthParameterName = "MinimumSchemaLength"; private const string MinimumSchemaFrequencyParameterName = "MinimumSchemaFrequency"; private const string MinimumPhenotypicSimilarityParameterName = "MinimumPhenotypicSimilarity"; private const string ReplacementRatioParameterName = "ReplacementRatio"; private const string RandomReplacementParameterName = "RandomReplacement"; private const string ExecuteInParallelParameterName = "ExecuteInParallel"; private const string MaxDegreeOfParalellismParameterName = "MaxDegreeOfParallelism"; private const string PercentageOfPopulationParameterName = "PercentageOfPopulationToDiversify"; private const string ScaleEstimatedValuesParameterName = "ScaleEstimatedValues"; private const string ExclusiveMatchingParameterName = "ExclusiveMatching"; private const string NumberOfChangedTreesParameterName = "NumberOfChangedTrees"; private const string NumberOfSchemasParameterName = "NumberOfSchemas"; private const string AverageSchemaLengthParameterName = "AverageSchemaLength"; private const string UseAdaptiveReplacementRatioParameterName = "UseAdaptiveReplacementRatio"; private const string ReplacementRatioUpdateRuleParameterName = "ReplacementRatioUpdateRule"; #endregion #region parameters public IConstrainedValueParameter ReplacementRatioUpdateRuleParameter { get { return (IConstrainedValueParameter)Parameters[ReplacementRatioUpdateRuleParameterName]; } } public IFixedValueParameter UseAdaptiveReplacementRatioParameter { get { return (IFixedValueParameter)Parameters[UseAdaptiveReplacementRatioParameterName]; } } public IFixedValueParameter ExclusiveMatchingParameter { get { return (IFixedValueParameter)Parameters[ExclusiveMatchingParameterName]; } } public IFixedValueParameter ScaleEstimatedValuesParameter { get { return (IFixedValueParameter)Parameters[ScaleEstimatedValuesParameterName]; } } public IFixedValueParameter PercentageOfPopulationParameter { get { return (IFixedValueParameter)Parameters[PercentageOfPopulationParameterName]; } } public IFixedValueParameter MinimumSchemaLengthParameter { get { return (IFixedValueParameter)Parameters[MinimumSchemaLengthParameterName]; } } public IFixedValueParameter ExecuteInParallelParameter { get { return (IFixedValueParameter)Parameters[ExecuteInParallelParameterName]; } } public IFixedValueParameter MaxDegreeOfParallelismParameter { get { return (IFixedValueParameter)Parameters[MaxDegreeOfParalellismParameterName]; } } public IFixedValueParameter MinimumSchemaFrequencyParameter { get { return (IFixedValueParameter)Parameters[MinimumSchemaFrequencyParameterName]; } } public IFixedValueParameter MinimumPhenotypicSimilarityParameter { get { return (IFixedValueParameter)Parameters[MinimumPhenotypicSimilarityParameterName]; } } public IFixedValueParameter ReplacementRatioParameter { get { return (IFixedValueParameter)Parameters[ReplacementRatioParameterName]; } } public IValueParameter NumberOfSchemasParameter { get { return (IValueParameter)Parameters[NumberOfSchemasParameterName]; } } public IValueParameter AverageSchemaLengthParameter { get { return (IValueParameter)Parameters[AverageSchemaLengthParameterName]; } } public IValueParameter NumberOfChangedTreesParameter { get { return (IValueParameter)Parameters[NumberOfChangedTreesParameterName]; } } #endregion #region parameter properties public int MinimumSchemaLength { get { return MinimumSchemaLengthParameter.Value.Value; } } public int MaxDegreeOfParallelism { get { return MaxDegreeOfParallelismParameter.Value.Value; } } public bool ExecuteInParallel { get { return ExecuteInParallelParameter.Value.Value; } } public double PercentageOfPopulation { get { return PercentageOfPopulationParameter.Value.Value; } } #endregion private UpdateEstimatedValuesOperator updateEstimatedValuesOperator; private DiversificationStatisticsOperator diversificationStatisticsOperator; public SchemaCreator() { #region add parameters Parameters.Add(new FixedValueParameter(MinimumSchemaLengthParameterName, new IntValue(10))); Parameters.Add(new FixedValueParameter(MinimumSchemaFrequencyParameterName, new PercentValue(0.01))); Parameters.Add(new FixedValueParameter(MinimumPhenotypicSimilarityParameterName, new PercentValue(0.9))); Parameters.Add(new FixedValueParameter(ReplacementRatioParameterName, new PercentValue(0.9))); Parameters.Add(new FixedValueParameter(PercentageOfPopulationParameterName, new PercentValue(1))); Parameters.Add(new FixedValueParameter(RandomReplacementParameterName, new BoolValue(false))); Parameters.Add(new FixedValueParameter(ExecuteInParallelParameterName, new BoolValue(false))); Parameters.Add(new FixedValueParameter(MaxDegreeOfParalellismParameterName, new IntValue(-1))); Parameters.Add(new FixedValueParameter(ScaleEstimatedValuesParameterName, new BoolValue(true))); Parameters.Add(new FixedValueParameter(ExclusiveMatchingParameterName, new BoolValue(false))); Parameters.Add(new ValueParameter(NumberOfChangedTreesParameterName, new IntValue(0))); Parameters.Add(new ValueParameter(NumberOfSchemasParameterName, new IntValue(0))); Parameters.Add(new ValueParameter(AverageSchemaLengthParameterName, new DoubleValue(0))); Parameters.Add(new FixedValueParameter(UseAdaptiveReplacementRatioParameterName, new BoolValue(true))); var replacementRatioUpdateRules = new ItemSet(new[] { new StringValue("f(x) = x"), new StringValue("f(x) = tanh(x)"), new StringValue("f(x) = tanh(2x)"), new StringValue("f(x) = tanh(3x)") }); Parameters.Add(new ConstrainedValueParameter(ReplacementRatioUpdateRuleParameterName, replacementRatioUpdateRules)); #endregion NumberOfChangedTreesParameter.Hidden = true; NumberOfSchemasParameter.Hidden = true; AverageSchemaLengthParameter.Hidden = true; ExecuteInParallelParameter.Hidden = true; MaxDegreeOfParallelismParameter.Hidden = true; } protected SchemaCreator(SchemaCreator original, Cloner cloner) : base(original, cloner) { } public override IDeepCloneable Clone(Cloner cloner) { return new SchemaCreator(this, cloner); } [StorableConstructor] protected SchemaCreator(bool deserializing) : base(deserializing) { } public override IOperation Apply() { // apply only when at least one generation has passed var gen = Generations.Value; if (gen < 1 || GenealogyGraph == null) return base.Apply(); var n = (int)Math.Round(ExecutionContext.Scope.SubScopes.Count * PercentageOfPopulation); var scopes = new ScopeList(ExecutionContext.Scope.SubScopes.Take(n)); var updateEstimatedValues = new OperationCollection { Parallel = true }; if (updateEstimatedValuesOperator == null) updateEstimatedValuesOperator = new UpdateEstimatedValuesOperator(); foreach (var s in scopes) { if (!s.Variables.ContainsKey("EstimatedValues")) s.Variables.Add(new Core.Variable("EstimatedValues")); updateEstimatedValues.Add(ExecutionContext.CreateChildOperation(updateEstimatedValuesOperator, s)); } Func rule; var replacementRule = ReplacementRatioUpdateRuleParameter.Value.Value; switch (replacementRule) { case "f(x) = x": { rule = x => x; break; } case "f(x) = tanh(x)": { rule = x => Math.Tanh(x); break; } case "f(x) = tanh(2x)": { rule = x => Math.Tanh(2 * x); break; } case "f(x) = tanh(3x)": { rule = x => Math.Tanh(3 * x); break; } default: throw new ArgumentException("Unknown replacement rule"); } var evaluateSchemas = new OperationCollection(); // for now, only consider crossover offspring var vertices = from s in scopes let t = (ISymbolicExpressionTree)s.Variables["SymbolicExpressionTree"].Value let v = GenealogyGraph.GetByContent(t) where v.InDegree == 2 select v; var schemas = new List(GenerateSchemas(vertices, MinimumSchemaLength)); #region create schemas and add subscopes representing the individuals foreach (var schema in schemas) { evaluateSchemas.Add(ExecutionContext.CreateChildOperation(new SchemaEvaluator { Schema = schema, ReplacementRule = rule }, ExecutionContext.Scope)); } #endregion if (diversificationStatisticsOperator == null) diversificationStatisticsOperator = new DiversificationStatisticsOperator(); var calculateStatistics = ExecutionContext.CreateChildOperation(diversificationStatisticsOperator); // set parameters for statistics AverageSchemaLengthParameter.Value = new DoubleValue(schemas.Average(x => x.Length)); NumberOfSchemasParameter.Value = new IntValue(schemas.Count); NumberOfChangedTreesParameter.Value = new IntValue(0); // return an operation collection containing all the scope operations + base.Apply() return new OperationCollection { updateEstimatedValues, evaluateSchemas, calculateStatistics, base.Apply() }; } public static IEnumerable GenerateSchemas(IEnumerable> vertices, int minimumSchemaLength) { var anySubtreeSymbol = new AnySubtreeSymbol(); // var anyNodeSymbol = new AnyNodeSymbol(); var groups = vertices.GroupBy(x => x.Parents.First()).OrderByDescending(g => g.Count()).ToList(); var hash = new HashSet(); var formatter = new SymbolicExpressionTreeStringFormatter { Indent = false, AppendNewLines = false }; foreach (var g in groups) { var parent = g.Key; if (parent.Data.Length < minimumSchemaLength) continue; bool replaced = false; var schema = (ISymbolicExpressionTree)parent.Data.Clone(); var nodes = schema.IterateNodesPrefix().ToList(); var arcs = g.Select(x => x.InArcs.Last()).Where(x => x.Data != null); var indices = (from arc in arcs let fragment = (IFragment)arc.Data select fragment.Index1).Distinct().ToArray(); Array.Sort(indices); var nodesToReplace = indices.Select(x => nodes[x]).ToList(); for (int i = nodesToReplace.Count - 1; i >= 0; --i) { var node = nodesToReplace[i]; // do not replace the node with a wildcard if it would result in a length < MinimumSchemaLength if (schema.Length - node.GetLength() + 1 < minimumSchemaLength) continue; // var replacement = anySubtreeSymbol.CreateTreeNode(); // ReplaceSubtree(node, replacement, false); var replacement = new AnyNodeSymbol(node.Symbol.MinimumArity, node.Symbol.MinimumArity).CreateTreeNode(); ReplaceSubtree(node, replacement, true); replaced = true; } if (replaced) { var str = formatter.Format(schema.Root.GetSubtree(0).GetSubtree(0)); if (hash.Contains(str)) continue; yield return schema; hash.Add(str); } } } private static void ReplaceSubtree(ISymbolicExpressionTreeNode original, ISymbolicExpressionTreeNode replacement, bool preserveChildren = true) { var parent = original.Parent; if (parent == null) throw new ArgumentException("Parent cannot be null for node " + original); var index = parent.IndexOfSubtree(original); parent.RemoveSubtree(index); parent.InsertSubtree(index, replacement); if (preserveChildren) { var subtrees = original.Subtrees.ToList(); for (int i = subtrees.Count - 1; i >= 0; --i) original.RemoveSubtree(i); for (int i = 0; i < subtrees.Count; ++i) { replacement.AddSubtree(subtrees[i]); } } } } }