#region License Information
/* HeuristicLab
* Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
*
* This file is part of HeuristicLab.
*
* HeuristicLab is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* HeuristicLab is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HeuristicLab. If not, see .
*/
#endregion
using System;
using System.Collections.Generic;
using System.Linq;
using HeuristicLab.Common;
using HeuristicLab.Core;
using HeuristicLab.Data;
using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
using HeuristicLab.EvolutionTracking;
using HeuristicLab.Parameters;
using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
using HeuristicLab.PluginInfrastructure;
namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
using Vertices = IEnumerable>;
[Item("SchemaCreator", "An operator that builds schemas based on the heredity relationship in the genealogy graph.")]
[StorableClass]
public class SchemaCreator : EvolutionTrackingOperator {
#region parameter names
// criteria to trigger schema-based diversification
private const string MinimumSchemaLengthParameterName = "MinimumSchemaLength";
private const string MinimumSchemaFrequencyParameterName = "MinimumSchemaFrequency";
private const string MinimumPhenotypicSimilarityParameterName = "MinimumPhenotypicSimilarity";
// parameters controlling the diversification strategy
private const string UseAdaptiveMutationRateParameterName = "UseAdaptiveMutationRate"; // dynamically control mutation rate
private const string MutationRateUpdateRuleParameterName = "ReplacementRatioUpdateRule";
private const string MutationRateParameterName = "MutationRate"; // fixed mutation rate when not using adaptive update
private const string ExclusiveMatchingParameterName = "ExclusiveMatching"; // an individual can belong to only 1 schema
private const string ParentsRatio = "ParentsRatio"; // use best % parents to generate schemas from
private const string StrictSchemaMatchingParameterName = "StrictSchemaMatching"; // use strict node comparison (for constant values and variable weights)
private const string SchemaDefinitionParameterName = "SchemaDefinition"; // which schema definition to use: {=}, {#} or {=,#}
private const string SchemaManipulatorParameterName = "SchemaManipulator"; // mutation operator to apply within schemas
// control parallel behaviour
private const string ExecuteInParallelParameterName = "ExecuteInParallel";
private const string MaxDegreeOfParalellismParameterName = "MaxDegreeOfParallelism";
private const string ScaleEstimatedValuesParameterName = "ScaleEstimatedValues";
private const string UpdateCounterParameterName = "UpdateCounter";
private const string UpdateIntervalParameterName = "UpdateInterval";
#region information parameters
private const string NumberOfChangedTreesParameterName = "NumberOfChangedTrees";
private const string NumberOfSchemasParameterName = "NumberOfSchemas";
private const string AverageSchemaLengthParameterName = "AverageSchemaLength";
#endregion
#endregion
#region parameters
public IConstrainedValueParameter SchemaManipulatorParameter {
get { return (IConstrainedValueParameter)Parameters[SchemaManipulatorParameterName]; }
}
public IConstrainedValueParameter SchemaDefinitionParameter {
get { return (IConstrainedValueParameter)Parameters[SchemaDefinitionParameterName]; }
}
public IConstrainedValueParameter MutationRateUpdateRuleParameter {
get { return (IConstrainedValueParameter)Parameters[MutationRateUpdateRuleParameterName]; }
}
public IFixedValueParameter UseAdaptiveMutationRateParameter {
get { return (IFixedValueParameter)Parameters[UseAdaptiveMutationRateParameterName]; }
}
public IFixedValueParameter StrictSchemaMatchingParameter {
get { return (IFixedValueParameter)Parameters[StrictSchemaMatchingParameterName]; }
}
public IFixedValueParameter ExclusiveMatchingParameter {
get { return (IFixedValueParameter)Parameters[ExclusiveMatchingParameterName]; }
}
public IFixedValueParameter ScaleEstimatedValuesParameter {
get { return (IFixedValueParameter)Parameters[ScaleEstimatedValuesParameterName]; }
}
public IFixedValueParameter ParentsRatioParameter {
get { return (IFixedValueParameter)Parameters[ParentsRatio]; }
}
public IFixedValueParameter MinimumSchemaLengthParameter {
get { return (IFixedValueParameter)Parameters[MinimumSchemaLengthParameterName]; }
}
public IFixedValueParameter ExecuteInParallelParameter {
get { return (IFixedValueParameter)Parameters[ExecuteInParallelParameterName]; }
}
public IFixedValueParameter MaxDegreeOfParallelismParameter {
get { return (IFixedValueParameter)Parameters[MaxDegreeOfParalellismParameterName]; }
}
public IFixedValueParameter MinimumSchemaFrequencyParameter {
get { return (IFixedValueParameter)Parameters[MinimumSchemaFrequencyParameterName]; }
}
public IFixedValueParameter MinimumPhenotypicSimilarityParameter {
get { return (IFixedValueParameter)Parameters[MinimumPhenotypicSimilarityParameterName]; }
}
public IFixedValueParameter MutationRateParameter {
get { return (IFixedValueParameter)Parameters[MutationRateParameterName]; }
}
public IValueParameter NumberOfSchemasParameter {
get { return (IValueParameter)Parameters[NumberOfSchemasParameterName]; }
}
public IValueParameter AverageSchemaLengthParameter {
get { return (IValueParameter)Parameters[AverageSchemaLengthParameterName]; }
}
public IValueParameter NumberOfChangedTreesParameter {
get { return (IValueParameter)Parameters[NumberOfChangedTreesParameterName]; }
}
public IFixedValueParameter UpdateCounterParameter {
get { return (IFixedValueParameter)Parameters[UpdateCounterParameterName]; }
}
public IFixedValueParameter UpdateIntervalParameter {
get { return (IFixedValueParameter)Parameters[UpdateIntervalParameterName]; }
}
#endregion
#region parameter properties
public int MinimumSchemaLength { get { return MinimumSchemaLengthParameter.Value.Value; } }
public int MaxDegreeOfParallelism { get { return MaxDegreeOfParallelismParameter.Value.Value; } }
public bool ExecuteInParallel { get { return ExecuteInParallelParameter.Value.Value; } }
public double PercentageOfPopulation { get { return ParentsRatioParameter.Value.Value; } }
public bool StrictSchemaMatching { get { return StrictSchemaMatchingParameter.Value.Value; } }
public IntValue UpdateCounter { get { return UpdateCounterParameter.Value; } }
public IntValue UpdateInterval { get { return UpdateIntervalParameter.Value; } }
#endregion
private UpdateQualityOperator updateQualityOperator;
private DiversificationStatisticsOperator diversificationStatisticsOperator;
public override void InitializeState() {
base.InitializeState();
NumberOfChangedTreesParameter.Value.Value = 0;
NumberOfChangedTreesParameter.Value.Value = 0;
AverageSchemaLengthParameter.Value.Value = 0;
UpdateCounter.Value = 0;
}
public override void ClearState() {
NumberOfChangedTreesParameter.Value.Value = 0;
NumberOfChangedTreesParameter.Value.Value = 0;
AverageSchemaLengthParameter.Value.Value = 0;
UpdateCounter.Value = 0;
base.ClearState();
}
public SchemaCreator() {
#region add parameters
Parameters.Add(new FixedValueParameter(MinimumSchemaLengthParameterName, new IntValue(10)));
Parameters.Add(new FixedValueParameter(MinimumSchemaFrequencyParameterName, new PercentValue(0.01)));
Parameters.Add(new FixedValueParameter(MinimumPhenotypicSimilarityParameterName, new PercentValue(0.9)));
Parameters.Add(new FixedValueParameter(MutationRateParameterName, new PercentValue(0.9)));
Parameters.Add(new FixedValueParameter(ParentsRatio, new PercentValue(1)));
Parameters.Add(new FixedValueParameter(ExecuteInParallelParameterName, new BoolValue(false)));
Parameters.Add(new FixedValueParameter(MaxDegreeOfParalellismParameterName, new IntValue(-1)));
Parameters.Add(new FixedValueParameter(ScaleEstimatedValuesParameterName, new BoolValue(true)));
Parameters.Add(new FixedValueParameter(ExclusiveMatchingParameterName, new BoolValue(false)));
Parameters.Add(new FixedValueParameter(StrictSchemaMatchingParameterName, new BoolValue(false)));
Parameters.Add(new ValueParameter(NumberOfChangedTreesParameterName, new IntValue(0)));
Parameters.Add(new ValueParameter(NumberOfSchemasParameterName, new IntValue(0)));
Parameters.Add(new ValueParameter(AverageSchemaLengthParameterName, new DoubleValue(0)));
Parameters.Add(new FixedValueParameter(UseAdaptiveMutationRateParameterName, new BoolValue(true)));
Parameters.Add(new FixedValueParameter(UpdateCounterParameterName, new IntValue(0)));
Parameters.Add(new FixedValueParameter(UpdateIntervalParameterName, new IntValue(1)));
// add update rules
var mutationRateUpdateRules = new ItemSet(new[] {
new StringValue("f(x) = x"),
new StringValue("f(x) = tanh(x)"),
new StringValue("f(x) = tanh(2x)"),
new StringValue("f(x) = tanh(3x)"),
new StringValue("f(x) = tanh(4x)"),
new StringValue("f(x) = 1-sqrt(1-x)")
});
var mutationRateUpdateRuleParameter = new ConstrainedValueParameter(MutationRateUpdateRuleParameterName, mutationRateUpdateRules);
mutationRateUpdateRuleParameter.Value = mutationRateUpdateRules.First();
Parameters.Add(mutationRateUpdateRuleParameter);
// add schema definitions
var schemaDefinitions = new ItemSet(new[] { "=", "#", "=,#" }.Select(x => new StringValue(x)));
var schemaDefinitionParameter = new ConstrainedValueParameter(SchemaDefinitionParameterName, schemaDefinitions);
schemaDefinitionParameter.Value = schemaDefinitions.First();
Parameters.Add(schemaDefinitionParameter);
// use a separate set of manipulators in order to allow the user to specify different mutations for schemas
// and not be limited to the manipulator parameter for the whole algorithm
var manipulators = ApplicationManager.Manager.GetTypes(typeof(ISymbolicExpressionTreeManipulator))
.Where(x => !typeof(IMultiOperator).IsAssignableFrom(x)
&& !typeof(ISymbolicExpressionTreeArchitectureAlteringOperator).IsAssignableFrom(x))
.Select(x => (ISymbolicExpressionTreeManipulator)Activator.CreateInstance(x)).ToList();
manipulators.Add(new MultiSymbolicExpressionTreeManipulator());
// add individual manipulators
var manipulatorParameter = new ConstrainedValueParameter(SchemaManipulatorParameterName, new ItemSet(manipulators));
// add a multi manipulator as well
manipulatorParameter.Value = manipulators.First();
Parameters.Add(manipulatorParameter);
#endregion
NumberOfChangedTreesParameter.Hidden = true;
NumberOfSchemasParameter.Hidden = true;
AverageSchemaLengthParameter.Hidden = true;
ExecuteInParallelParameter.Hidden = true;
MaxDegreeOfParallelismParameter.Hidden = true;
}
protected SchemaCreator(SchemaCreator original, Cloner cloner) : base(original, cloner) { }
public override IDeepCloneable Clone(Cloner cloner) {
return new SchemaCreator(this, cloner);
}
[StorableConstructor]
protected SchemaCreator(bool deserializing) : base(deserializing) { }
[StorableHook(HookType.AfterDeserialization)]
private void AfterDeserialization() {
if (!Parameters.ContainsKey(StrictSchemaMatchingParameterName))
Parameters.Add(new FixedValueParameter(StrictSchemaMatchingParameterName, new BoolValue(false)));
}
public override IOperation Apply() {
UpdateCounter.Value++;
if (UpdateCounter.Value != UpdateInterval.Value)
return base.Apply();
UpdateCounter.Value = 0;
// apply only when at least one generation has passed
var gen = Generations.Value;
if (gen < 1 || GenealogyGraph == null)
return base.Apply();
var updateEstimatedValues = new OperationCollection { Parallel = true };
if (updateQualityOperator == null)
updateQualityOperator = new UpdateQualityOperator();
var scope = ExecutionContext.Scope;
foreach (var s in scope.SubScopes.Where(s => !s.Variables.ContainsKey("EstimatedValues"))) {
updateEstimatedValues.Add(ExecutionContext.CreateChildOperation(updateQualityOperator, s));
}
var updateRule = MutationRateUpdateRuleParameter.Value.Value;
var schemaManipulator = SchemaManipulatorParameter.Value;
var evaluateSchemas = new OperationCollection();
Func getQuality = s => ((DoubleValue)s.Variables["Quality"].Value).Value;
var bestN = (int)Math.Round(scope.SubScopes.Count * PercentageOfPopulation);
var scopes = new ScopeList(scope.SubScopes.OrderByDescending(getQuality).Take(bestN));
// for now, only consider crossover offspring
var vertices = from s in scopes
let t = (ISymbolicExpressionTree)s.Variables["SymbolicExpressionTree"].Value
let v = GenealogyGraph.GetByContent(t)
where v.InDegree == 2
select v;
IEnumerable schemas;
switch (SchemaDefinitionParameter.Value.Value) {
case "=":
schemas = GenerateAnyNodeSchemas(vertices, MinimumSchemaLength, 0, StrictSchemaMatching);
break;
case "#":
schemas = GenerateAnySubtreeSchemas(vertices, MinimumSchemaLength, 0, StrictSchemaMatching);
break;
case "=,#":
schemas = GenerateCombinedSchemas(vertices, MinimumSchemaLength, 0, StrictSchemaMatching);
break;
default:
return base.Apply();
}
if (!schemas.Any())
return base.Apply();
#region create schemas and add subscopes representing the individuals
double avgLength = 0;
int count = 0;
foreach (var schema in schemas) {
avgLength += schema.Length;
++count;
evaluateSchemas.Add(ExecutionContext.CreateChildOperation(new SchemaEvaluator { Schema = schema, MutationRateUpdateRule = updateRule, SchemaManipulator = schemaManipulator }, scope));
}
avgLength /= count;
#endregion
// set parameters for statistics
AverageSchemaLengthParameter.Value = new DoubleValue(avgLength);
NumberOfSchemasParameter.Value = new IntValue(count);
NumberOfChangedTreesParameter.Value = new IntValue(0);
if (diversificationStatisticsOperator == null)
diversificationStatisticsOperator = new DiversificationStatisticsOperator();
var calculateStatistics = ExecutionContext.CreateChildOperation(diversificationStatisticsOperator);
// return an operation collection containing all the scope operations + base.Apply()
return new OperationCollection { updateEstimatedValues, evaluateSchemas, calculateStatistics, base.Apply() };
}
#region schema generation
public static IEnumerable GenerateAnyNodeSchemas(Vertices vertices, int minimumSchemaLength, int minOffspringCount = 1, bool strict = true) {
return GenerateSchemas(vertices, ReplaceAnyNode, minimumSchemaLength, minOffspringCount, strict);
}
public static IEnumerable GenerateAnySubtreeSchemas(Vertices vertices, int minimumSchemaLength, int minOffspringCount = 1, bool strict = true) {
return GenerateSchemas(vertices, ReplaceAnySubtree, minimumSchemaLength, minOffspringCount, strict);
}
public static IEnumerable GenerateCombinedSchemas(Vertices vertices, int minimumSchemaLength, int minOffspringCount = 1, bool strict = true) {
return GenerateSchemas(vertices, ReplaceCombined, minimumSchemaLength, minOffspringCount, strict);
}
public static IEnumerable GenerateSchemas(Vertices vertices, Func replaceFunc, int minimumSchemaLength, int minOffspringCount, bool strict = true) {
var anySubtreeSymbol = new AnySubtreeSymbol();
var groups = vertices.GroupBy(x => x.Parents.First()).Where(g => g.Skip(minOffspringCount - 1).Any()).OrderByDescending(g => g.Count());
var hash = new HashSet();
foreach (var g in groups) {
var parent = g.Key;
if (parent.Data.Length < minimumSchemaLength)
continue;
bool replaced = false;
var schema = (ISymbolicExpressionTree)parent.Data.Clone();
var nodes = schema.IterateNodesPrefix().ToList();
var fragments = g.Select(x => x.InArcs.Last().Data).Where(x => x != null).Cast>();
var indices = fragments.Select(x => x.Index1).Distinct().OrderByDescending(x => schema.Root.GetBranchLevel(nodes[x]));
foreach (var i in indices) {
replaced |= replaceFunc(schema, nodes[i], minimumSchemaLength);
}
if (replaced) {
var str = schema.Root.GetSubtree(0).GetSubtree(0).FormatToString(strict);
if (hash.Add(str))
yield return schema;
}
}
}
// node replacement rules
private static bool ReplaceAnyNode(ISymbolicExpressionTree schema, ISymbolicExpressionTreeNode node, int minSchemaLength) {
var anyNodeSymbol = new AnyNodeSymbol(node.Symbol.MinimumArity, node.Symbol.MaximumArity);
var replacement = anyNodeSymbol.CreateTreeNode();
SchemaUtil.ReplaceSubtree(node, replacement, true);
return true;
}
private static bool ReplaceAnySubtree(ISymbolicExpressionTree schema, ISymbolicExpressionTreeNode node, int minSchemaLength) {
if (schema.Length - node.GetLength() + 1 < minSchemaLength)
return false;
var anySubtreeSymbol = new AnySubtreeSymbol();
var replacement = anySubtreeSymbol.CreateTreeNode();
SchemaUtil.ReplaceSubtree(node, replacement, false);
return true;
}
private static bool ReplaceCombined(ISymbolicExpressionTree schema, ISymbolicExpressionTreeNode node, int minSchemaLength) {
ISymbol wildcard;
if (node.SubtreeCount > 0)
wildcard = new AnyNodeSymbol(node.Symbol.MinimumArity, node.Symbol.MaximumArity);
else
wildcard = new AnySubtreeSymbol();
SchemaUtil.ReplaceSubtree(node, wildcard.CreateTreeNode(), node.SubtreeCount > 0);
return true;
}
#endregion
}
}