#region License Information
/* HeuristicLab
* Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
*
* This file is part of HeuristicLab.
*
* HeuristicLab is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* HeuristicLab is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HeuristicLab. If not, see .
*/
#endregion
using System;
using System.Collections.Generic;
using System.Linq;
using HeuristicLab.Common;
using HeuristicLab.Core;
using HeuristicLab.Data;
using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
using HeuristicLab.EvolutionTracking;
using HeuristicLab.Parameters;
using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
[Item("SchemaCreator", "An operator that builds schemas based on the heredity relationship in the genealogy graph.")]
[StorableClass]
public class SchemaCreator : EvolutionTrackingOperator {
#region parameter names
private const string MinimumSchemaLengthParameterName = "MinimumSchemaLength";
private const string MinimumSchemaFrequencyParameterName = "MinimumSchemaFrequency";
private const string MinimumPhenotypicSimilarityParameterName = "MinimumPhenotypicSimilarity";
private const string ReplacementRatioParameterName = "ReplacementRatio";
private const string RandomReplacementParameterName = "RandomReplacement";
private const string ExecuteInParallelParameterName = "ExecuteInParallel";
private const string MaxDegreeOfParalellismParameterName = "MaxDegreeOfParallelism";
private const string PercentageOfPopulationParameterName = "PercentageOfPopulationToDiversify";
private const string ScaleEstimatedValuesParameterName = "ScaleEstimatedValues";
private const string ExclusiveMatchingParameterName = "ExclusiveMatching";
private const string NumberOfChangedTreesParameterName = "NumberOfChangedTrees";
private const string NumberOfSchemasParameterName = "NumberOfSchemas";
private const string AverageSchemaLengthParameterName = "AverageSchemaLength";
private const string UseAdaptiveReplacementRatioParameterName = "UseAdaptiveReplacementRatio";
private const string ReplacementRatioUpdateRuleParameterName = "ReplacementRatioUpdateRule";
#endregion
#region parameters
public IConstrainedValueParameter ReplacementRatioUpdateRuleParameter {
get { return (IConstrainedValueParameter)Parameters[ReplacementRatioUpdateRuleParameterName]; }
}
public IFixedValueParameter UseAdaptiveReplacementRatioParameter {
get { return (IFixedValueParameter)Parameters[UseAdaptiveReplacementRatioParameterName]; }
}
public IFixedValueParameter ExclusiveMatchingParameter {
get { return (IFixedValueParameter)Parameters[ExclusiveMatchingParameterName]; }
}
public IFixedValueParameter ScaleEstimatedValuesParameter {
get { return (IFixedValueParameter)Parameters[ScaleEstimatedValuesParameterName]; }
}
public IFixedValueParameter PercentageOfPopulationParameter {
get { return (IFixedValueParameter)Parameters[PercentageOfPopulationParameterName]; }
}
public IFixedValueParameter MinimumSchemaLengthParameter {
get { return (IFixedValueParameter)Parameters[MinimumSchemaLengthParameterName]; }
}
public IFixedValueParameter ExecuteInParallelParameter {
get { return (IFixedValueParameter)Parameters[ExecuteInParallelParameterName]; }
}
public IFixedValueParameter MaxDegreeOfParallelismParameter {
get { return (IFixedValueParameter)Parameters[MaxDegreeOfParalellismParameterName]; }
}
public IFixedValueParameter MinimumSchemaFrequencyParameter {
get { return (IFixedValueParameter)Parameters[MinimumSchemaFrequencyParameterName]; }
}
public IFixedValueParameter MinimumPhenotypicSimilarityParameter {
get { return (IFixedValueParameter)Parameters[MinimumPhenotypicSimilarityParameterName]; }
}
public IFixedValueParameter ReplacementRatioParameter {
get { return (IFixedValueParameter)Parameters[ReplacementRatioParameterName]; }
}
public IValueParameter NumberOfSchemasParameter {
get { return (IValueParameter)Parameters[NumberOfSchemasParameterName]; }
}
public IValueParameter AverageSchemaLengthParameter {
get { return (IValueParameter)Parameters[AverageSchemaLengthParameterName]; }
}
public IValueParameter NumberOfChangedTreesParameter {
get { return (IValueParameter)Parameters[NumberOfChangedTreesParameterName]; }
}
#endregion
#region parameter properties
public int MinimumSchemaLength { get { return MinimumSchemaLengthParameter.Value.Value; } }
public int MaxDegreeOfParallelism { get { return MaxDegreeOfParallelismParameter.Value.Value; } }
public bool ExecuteInParallel { get { return ExecuteInParallelParameter.Value.Value; } }
public double PercentageOfPopulation { get { return PercentageOfPopulationParameter.Value.Value; } }
#endregion
private UpdateEstimatedValuesOperator updateEstimatedValuesOperator;
private DiversificationStatisticsOperator diversificationStatisticsOperator;
public SchemaCreator() {
#region add parameters
Parameters.Add(new FixedValueParameter(MinimumSchemaLengthParameterName, new IntValue(10)));
Parameters.Add(new FixedValueParameter(MinimumSchemaFrequencyParameterName, new PercentValue(0.01)));
Parameters.Add(new FixedValueParameter(MinimumPhenotypicSimilarityParameterName, new PercentValue(0.9)));
Parameters.Add(new FixedValueParameter(ReplacementRatioParameterName, new PercentValue(0.9)));
Parameters.Add(new FixedValueParameter(PercentageOfPopulationParameterName, new PercentValue(1)));
Parameters.Add(new FixedValueParameter(RandomReplacementParameterName, new BoolValue(false)));
Parameters.Add(new FixedValueParameter(ExecuteInParallelParameterName, new BoolValue(false)));
Parameters.Add(new FixedValueParameter(MaxDegreeOfParalellismParameterName, new IntValue(-1)));
Parameters.Add(new FixedValueParameter(ScaleEstimatedValuesParameterName, new BoolValue(true)));
Parameters.Add(new FixedValueParameter(ExclusiveMatchingParameterName, new BoolValue(false)));
Parameters.Add(new ValueParameter(NumberOfChangedTreesParameterName, new IntValue(0)));
Parameters.Add(new ValueParameter(NumberOfSchemasParameterName, new IntValue(0)));
Parameters.Add(new ValueParameter(AverageSchemaLengthParameterName, new DoubleValue(0)));
Parameters.Add(new FixedValueParameter(UseAdaptiveReplacementRatioParameterName, new BoolValue(true)));
var replacementRatioUpdateRules = new ItemSet(new[] { new StringValue("f(x) = x"), new StringValue("f(x) = tanh(x)"), new StringValue("f(x) = tanh(2x)"), new StringValue("f(x) = tanh(3x)") });
Parameters.Add(new ConstrainedValueParameter(ReplacementRatioUpdateRuleParameterName, replacementRatioUpdateRules));
#endregion
NumberOfChangedTreesParameter.Hidden = true;
NumberOfSchemasParameter.Hidden = true;
AverageSchemaLengthParameter.Hidden = true;
ExecuteInParallelParameter.Hidden = true;
MaxDegreeOfParallelismParameter.Hidden = true;
}
protected SchemaCreator(SchemaCreator original, Cloner cloner) : base(original, cloner) { }
public override IDeepCloneable Clone(Cloner cloner) {
return new SchemaCreator(this, cloner);
}
[StorableConstructor]
protected SchemaCreator(bool deserializing) : base(deserializing) { }
public override IOperation Apply() {
// apply only when at least one generation has passed
var gen = Generations.Value;
if (gen < 1 || GenealogyGraph == null)
return base.Apply();
var n = (int)Math.Round(ExecutionContext.Scope.SubScopes.Count * PercentageOfPopulation);
var scopes = new ScopeList(ExecutionContext.Scope.SubScopes.Take(n));
var updateEstimatedValues = new OperationCollection { Parallel = true };
if (updateEstimatedValuesOperator == null)
updateEstimatedValuesOperator = new UpdateEstimatedValuesOperator();
foreach (var s in scopes) {
if (!s.Variables.ContainsKey("EstimatedValues"))
s.Variables.Add(new Core.Variable("EstimatedValues"));
updateEstimatedValues.Add(ExecutionContext.CreateChildOperation(updateEstimatedValuesOperator, s));
}
Func rule;
var replacementRule = ReplacementRatioUpdateRuleParameter.Value.Value;
switch (replacementRule) {
case "f(x) = x": {
rule = x => x;
break;
}
case "f(x) = tanh(x)": {
rule = x => Math.Tanh(x);
break;
}
case "f(x) = tanh(2x)": {
rule = x => Math.Tanh(2 * x);
break;
}
case "f(x) = tanh(3x)": {
rule = x => Math.Tanh(3 * x);
break;
}
default:
throw new ArgumentException("Unknown replacement rule");
}
var evaluateSchemas = new OperationCollection();
// for now, only consider crossover offspring
var vertices = from s in scopes
let t = (ISymbolicExpressionTree)s.Variables["SymbolicExpressionTree"].Value
let v = GenealogyGraph.GetByContent(t)
where v.InDegree == 2
select v;
var schemas = new List(GenerateSchemas(vertices, MinimumSchemaLength));
#region create schemas and add subscopes representing the individuals
foreach (var schema in schemas) {
evaluateSchemas.Add(ExecutionContext.CreateChildOperation(new SchemaEvaluator { Schema = schema, ReplacementRule = rule }, ExecutionContext.Scope));
}
#endregion
if (diversificationStatisticsOperator == null)
diversificationStatisticsOperator = new DiversificationStatisticsOperator();
var calculateStatistics = ExecutionContext.CreateChildOperation(diversificationStatisticsOperator);
// set parameters for statistics
AverageSchemaLengthParameter.Value = new DoubleValue(schemas.Average(x => x.Length));
NumberOfSchemasParameter.Value = new IntValue(schemas.Count);
NumberOfChangedTreesParameter.Value = new IntValue(0);
// return an operation collection containing all the scope operations + base.Apply()
return new OperationCollection { updateEstimatedValues, evaluateSchemas, calculateStatistics, base.Apply() };
}
public static IEnumerable GenerateSchemas(IEnumerable> vertices, int minimumSchemaLength) {
var anySubtreeSymbol = new AnySubtreeSymbol();
// var anyNodeSymbol = new AnyNodeSymbol();
var groups = vertices.GroupBy(x => x.Parents.First()).OrderByDescending(g => g.Count()).ToList();
var hash = new HashSet();
var formatter = new SymbolicExpressionTreeStringFormatter { Indent = false, AppendNewLines = false };
foreach (var g in groups) {
var parent = g.Key;
if (parent.Data.Length < minimumSchemaLength)
continue;
bool replaced = false;
var schema = (ISymbolicExpressionTree)parent.Data.Clone();
var nodes = schema.IterateNodesPrefix().ToList();
var arcs = g.Select(x => x.InArcs.Last()).Where(x => x.Data != null);
var indices = (from arc in arcs
let fragment = (IFragment)arc.Data
select fragment.Index1).Distinct().ToArray();
Array.Sort(indices);
var nodesToReplace = indices.Select(x => nodes[x]).ToList();
for (int i = nodesToReplace.Count - 1; i >= 0; --i) {
var node = nodesToReplace[i];
// do not replace the node with a wildcard if it would result in a length < MinimumSchemaLength
if (schema.Length - node.GetLength() + 1 < minimumSchemaLength)
continue;
// var replacement = anySubtreeSymbol.CreateTreeNode();
// ReplaceSubtree(node, replacement, false);
var replacement = new AnyNodeSymbol(node.Symbol.MinimumArity, node.Symbol.MinimumArity).CreateTreeNode();
ReplaceSubtree(node, replacement, true);
replaced = true;
}
if (replaced) {
var str = formatter.Format(schema.Root.GetSubtree(0).GetSubtree(0));
if (hash.Contains(str)) continue;
yield return schema;
hash.Add(str);
}
}
}
private static void ReplaceSubtree(ISymbolicExpressionTreeNode original, ISymbolicExpressionTreeNode replacement, bool preserveChildren = true) {
var parent = original.Parent;
if (parent == null)
throw new ArgumentException("Parent cannot be null for node " + original);
var index = parent.IndexOfSubtree(original);
parent.RemoveSubtree(index);
parent.InsertSubtree(index, replacement);
if (preserveChildren) {
var subtrees = original.Subtrees.ToList();
for (int i = subtrees.Count - 1; i >= 0; --i)
original.RemoveSubtree(i);
for (int i = 0; i < subtrees.Count; ++i) {
replacement.AddSubtree(subtrees[i]);
}
}
}
}
}