#region License Information
/* HeuristicLab
* Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
*
* This file is part of HeuristicLab.
*
* HeuristicLab is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* HeuristicLab is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HeuristicLab. If not, see .
*/
#endregion
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using HeuristicLab.Common;
using HeuristicLab.Core;
using HeuristicLab.Data;
using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
using HeuristicLab.EvolutionTracking;
using HeuristicLab.Parameters;
using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
[Item("SchemaCreator", "An operator that builds schemas based on the heredity relationship in the genealogy graph.")]
[StorableClass]
public class SchemaCreator : EvolutionTrackingOperator {
#region parameter names
private const string MinimumSchemaLengthParameterName = "MinimumSchemaLength";
private const string MinimumSchemaFrequencyParameterName = "MinimumSchemaFrequency";
private const string MinimumPhenotypicSimilarityParameterName = "MinimumPhenotypicSimilarity";
private const string ReplacementRatioParameterName = "ReplacementRatio";
private const string RandomReplacementParameterName = "RandomReplacement";
private const string ExecuteInParallelParameterName = "ExecuteInParallel";
private const string MaxDegreeOfParalellismParameterName = "MaxDegreeOfParallelism";
private const string PercentageOfPopulationParameterName = "PercentageOfPopulationToDiversify";
private const string ScaleEstimatedValuesParameterName = "ScaleEstimatedValues";
private const string ExclusiveMatchingParameterName = "ExclusiveMatching";
private const string NumberOfChangedTreesParameterName = "NumberOfChangedTrees";
private const string NumberOfSchemasParameterName = "NumberOfSchemas";
private const string AverageSchemaLengthParameterName = "AverageSchemaLength";
private const string UseAdaptiveReplacementRatioParameterName = "UseAdaptiveReplacementRatio";
private const string ReplacementRatioUpdateRuleParameterName = "ReplacementRatioUpdateRule";
private const string StrictSchemaMatchingParameterName = "StrictSchemaMatching";
#endregion
#region parameters
public IConstrainedValueParameter ReplacementRatioUpdateRuleParameter {
get { return (IConstrainedValueParameter)Parameters[ReplacementRatioUpdateRuleParameterName]; }
}
public IFixedValueParameter UseAdaptiveReplacementRatioParameter {
get { return (IFixedValueParameter)Parameters[UseAdaptiveReplacementRatioParameterName]; }
}
public IFixedValueParameter StrictSchemaMatchingParameter {
get { return (IFixedValueParameter)Parameters[StrictSchemaMatchingParameterName]; }
}
public IFixedValueParameter ExclusiveMatchingParameter {
get { return (IFixedValueParameter)Parameters[ExclusiveMatchingParameterName]; }
}
public IFixedValueParameter ScaleEstimatedValuesParameter {
get { return (IFixedValueParameter)Parameters[ScaleEstimatedValuesParameterName]; }
}
public IFixedValueParameter PercentageOfPopulationParameter {
get { return (IFixedValueParameter)Parameters[PercentageOfPopulationParameterName]; }
}
public IFixedValueParameter MinimumSchemaLengthParameter {
get { return (IFixedValueParameter)Parameters[MinimumSchemaLengthParameterName]; }
}
public IFixedValueParameter ExecuteInParallelParameter {
get { return (IFixedValueParameter)Parameters[ExecuteInParallelParameterName]; }
}
public IFixedValueParameter MaxDegreeOfParallelismParameter {
get { return (IFixedValueParameter)Parameters[MaxDegreeOfParalellismParameterName]; }
}
public IFixedValueParameter MinimumSchemaFrequencyParameter {
get { return (IFixedValueParameter)Parameters[MinimumSchemaFrequencyParameterName]; }
}
public IFixedValueParameter MinimumPhenotypicSimilarityParameter {
get { return (IFixedValueParameter)Parameters[MinimumPhenotypicSimilarityParameterName]; }
}
public IFixedValueParameter ReplacementRatioParameter {
get { return (IFixedValueParameter)Parameters[ReplacementRatioParameterName]; }
}
public IValueParameter NumberOfSchemasParameter {
get { return (IValueParameter)Parameters[NumberOfSchemasParameterName]; }
}
public IValueParameter AverageSchemaLengthParameter {
get { return (IValueParameter)Parameters[AverageSchemaLengthParameterName]; }
}
public IValueParameter NumberOfChangedTreesParameter {
get { return (IValueParameter)Parameters[NumberOfChangedTreesParameterName]; }
}
#endregion
#region parameter properties
public int MinimumSchemaLength { get { return MinimumSchemaLengthParameter.Value.Value; } }
public int MaxDegreeOfParallelism { get { return MaxDegreeOfParallelismParameter.Value.Value; } }
public bool ExecuteInParallel { get { return ExecuteInParallelParameter.Value.Value; } }
public double PercentageOfPopulation { get { return PercentageOfPopulationParameter.Value.Value; } }
public bool StrictSchemaMatching { get { return StrictSchemaMatchingParameter.Value.Value; } }
#endregion
private UpdateQualityOperator updateQualityOperator;
private DiversificationStatisticsOperator diversificationStatisticsOperator;
public override void ClearState() {
NumberOfChangedTreesParameter.Value.Value = 0;
NumberOfChangedTreesParameter.Value.Value = 0;
AverageSchemaLengthParameter.Value.Value = 0;
base.ClearState();
}
public SchemaCreator() {
#region add parameters
Parameters.Add(new FixedValueParameter(MinimumSchemaLengthParameterName, new IntValue(10)));
Parameters.Add(new FixedValueParameter(MinimumSchemaFrequencyParameterName, new PercentValue(0.01)));
Parameters.Add(new FixedValueParameter(MinimumPhenotypicSimilarityParameterName, new PercentValue(0.9)));
Parameters.Add(new FixedValueParameter(ReplacementRatioParameterName, new PercentValue(0.9)));
Parameters.Add(new FixedValueParameter(PercentageOfPopulationParameterName, new PercentValue(1)));
Parameters.Add(new FixedValueParameter(RandomReplacementParameterName, new BoolValue(false)));
Parameters.Add(new FixedValueParameter(ExecuteInParallelParameterName, new BoolValue(false)));
Parameters.Add(new FixedValueParameter(MaxDegreeOfParalellismParameterName, new IntValue(-1)));
Parameters.Add(new FixedValueParameter(ScaleEstimatedValuesParameterName, new BoolValue(true)));
Parameters.Add(new FixedValueParameter(ExclusiveMatchingParameterName, new BoolValue(false)));
Parameters.Add(new FixedValueParameter(StrictSchemaMatchingParameterName, new BoolValue(false)));
Parameters.Add(new ValueParameter(NumberOfChangedTreesParameterName, new IntValue(0)));
Parameters.Add(new ValueParameter(NumberOfSchemasParameterName, new IntValue(0)));
Parameters.Add(new ValueParameter(AverageSchemaLengthParameterName, new DoubleValue(0)));
Parameters.Add(new FixedValueParameter(UseAdaptiveReplacementRatioParameterName, new BoolValue(true)));
var replacementRatioUpdateRules = new ItemSet(new[] {
new StringValue("f(x) = x"),
new StringValue("f(x) = tanh(x)"),
new StringValue("f(x) = tanh(2x)"),
new StringValue("f(x) = tanh(3x)"),
new StringValue("f(x) = tanh(4x)"),
new StringValue("f(x) = 1-sqrt(1-x)")
});
Parameters.Add(new ConstrainedValueParameter(ReplacementRatioUpdateRuleParameterName, replacementRatioUpdateRules));
#endregion
NumberOfChangedTreesParameter.Hidden = true;
NumberOfSchemasParameter.Hidden = true;
AverageSchemaLengthParameter.Hidden = true;
ExecuteInParallelParameter.Hidden = true;
MaxDegreeOfParallelismParameter.Hidden = true;
}
protected SchemaCreator(SchemaCreator original, Cloner cloner) : base(original, cloner) { }
public override IDeepCloneable Clone(Cloner cloner) {
return new SchemaCreator(this, cloner);
}
[StorableConstructor]
protected SchemaCreator(bool deserializing) : base(deserializing) { }
[StorableHook(HookType.AfterDeserialization)]
private void AfterDeserialization() {
if (!Parameters.ContainsKey(StrictSchemaMatchingParameterName))
Parameters.Add(new FixedValueParameter(StrictSchemaMatchingParameterName, new BoolValue(false)));
}
public override IOperation Apply() {
// apply only when at least one generation has passed
var gen = Generations.Value;
if (gen < 1 || GenealogyGraph == null)
return base.Apply();
var n = (int)Math.Round(ExecutionContext.Scope.SubScopes.Count * PercentageOfPopulation);
var updateEstimatedValues = new OperationCollection { Parallel = true };
if (updateQualityOperator == null)
updateQualityOperator = new UpdateQualityOperator();
foreach (var s in ExecutionContext.Scope.SubScopes.Where(s => !s.Variables.ContainsKey("EstimatedValues"))) {
updateEstimatedValues.Add(ExecutionContext.CreateChildOperation(updateQualityOperator, s));
}
var replacementRule = ReplacementRatioUpdateRuleParameter.Value.Value;
var evaluateSchemas = new OperationCollection();
// for now, only consider crossover offspring
var scopes = new ScopeList(ExecutionContext.Scope.SubScopes.OrderByDescending(x => ((DoubleValue)x.Variables["Quality"].Value).Value).Take(n));
var vertices = from s in scopes
let t = (ISymbolicExpressionTree)s.Variables["SymbolicExpressionTree"].Value
let v = GenealogyGraph.GetByContent(t)
where v.InDegree == 2
select v;
var schemas = new List(GenerateSchemas(vertices, MinimumSchemaLength, StrictSchemaMatching));
#region create schemas and add subscopes representing the individuals
foreach (var schema in schemas) {
evaluateSchemas.Add(ExecutionContext.CreateChildOperation(new SchemaEvaluator { Schema = schema, ReplacementRule = replacementRule }, ExecutionContext.Scope));
}
#endregion
if (diversificationStatisticsOperator == null)
diversificationStatisticsOperator = new DiversificationStatisticsOperator();
var calculateStatistics = ExecutionContext.CreateChildOperation(diversificationStatisticsOperator);
// set parameters for statistics
AverageSchemaLengthParameter.Value = new DoubleValue(schemas.Average(x => x.Length));
NumberOfSchemasParameter.Value = new IntValue(schemas.Count);
NumberOfChangedTreesParameter.Value = new IntValue(0);
// return an operation collection containing all the scope operations + base.Apply()
return new OperationCollection { updateEstimatedValues, evaluateSchemas, calculateStatistics, base.Apply() };
}
public static IEnumerable GenerateSchemas(IEnumerable> vertices, int minimumSchemaLength, bool strict = true) {
var anySubtreeSymbol = new AnySubtreeSymbol();
// var anyNodeSymbol = new AnyNodeSymbol();
var groups = vertices.GroupBy(x => x.Parents.First()).OrderByDescending(g => g.Count()).ToList();
var hash = new HashSet();
// var formatter = new SymbolicExpressionTreeStringFormatter { Indent = false, AppendNewLines = false };
foreach (var g in groups) {
var parent = g.Key;
if (parent.Data.Length < minimumSchemaLength)
continue;
bool replaced = false;
var schema = (ISymbolicExpressionTree)parent.Data.Clone();
var nodes = schema.IterateNodesPrefix().ToList();
var arcs = g.Select(x => x.InArcs.Last()).Where(x => x.Data != null);
var indices = (from arc in arcs
let fragment = (IFragment)arc.Data
select fragment.Index1).Distinct().ToArray();
Array.Sort(indices);
var nodesToReplace = indices.Select(x => nodes[x]).ToList();
for (int i = nodesToReplace.Count - 1; i >= 0; --i) {
var node = nodesToReplace[i];
// do not replace the node with a wildcard if it would result in a length < MinimumSchemaLength
if (schema.Length - node.GetLength() + 1 < minimumSchemaLength)
continue;
var replacement = anySubtreeSymbol.CreateTreeNode();
ReplaceSubtree(node, replacement, false);
// var replacement = new AnyNodeSymbol(node.Symbol.MinimumArity, node.Symbol.MinimumArity).CreateTreeNode();
// ReplaceSubtree(node, replacement, true);
replaced = true;
}
if (replaced) {
// var str = formatter.Format(schema.Root.GetSubtree(0).GetSubtree(0));
var str = SubtreeToString(schema.Root.GetSubtree(0).GetSubtree(0), strict);
if (hash.Contains(str)) continue;
yield return schema;
hash.Add(str);
}
}
}
private static void ReplaceSubtree(ISymbolicExpressionTreeNode original, ISymbolicExpressionTreeNode replacement, bool preserveChildren = true) {
var parent = original.Parent;
if (parent == null)
throw new ArgumentException("Parent cannot be null for node " + original);
var index = parent.IndexOfSubtree(original);
parent.RemoveSubtree(index);
parent.InsertSubtree(index, replacement);
if (preserveChildren) {
var subtrees = original.Subtrees.ToList();
for (int i = subtrees.Count - 1; i >= 0; --i)
original.RemoveSubtree(i);
for (int i = 0; i < subtrees.Count; ++i) {
replacement.AddSubtree(subtrees[i]);
}
}
}
private static string SubtreeToString(ISymbolicExpressionTreeNode node, bool strict = false) {
StringBuilder strBuilder = new StringBuilder();
// internal nodes or leaf nodes?
if (node is AnySubtree)
return "# ";
if (node.SubtreeCount > 0) {
strBuilder.Append("(");
// symbol on same line as '('
string label = string.Empty;
if (node is AnyNode)
label = "=";
else {
label = node.Symbol.Name;
}
strBuilder.Append(label + " ");
// each subtree expression on a new line
// and closing ')' also on new line
foreach (var subtree in node.Subtrees) {
strBuilder.Append(SubtreeToString(subtree, strict));
}
strBuilder.Append(") ");
} else {
// symbol in the same line with as '(' and ')'
var v = node as VariableTreeNode;
var c = node as ConstantTreeNode;
var w = node as AnyNode; // wildcard
string label = string.Empty;
if (w != null)
label = "=";
else if (v != null)
label = strict ? string.Format("{0:0.00}_{1}", v.Weight, v.VariableName) : string.Format("{0}", v.VariableName);
else if (c != null)
label = strict ? string.Format("{0:0.00}", c.Value) : "C";
strBuilder.Append(label);
if (node.Parent != null && node != node.Parent.Subtrees.Last())
strBuilder.Append(" ");
//strBuilder.Append(")");
}
return strBuilder.ToString();
}
}
}