#region License Information
/* HeuristicLab
* Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
*
* This file is part of HeuristicLab.
*
* HeuristicLab is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* HeuristicLab is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HeuristicLab. If not, see .
*/
#endregion
using System.Linq;
using HeuristicLab.Common;
using HeuristicLab.Core;
using HeuristicLab.Data;
using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
using HeuristicLab.Operators;
using HeuristicLab.Parameters;
using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
[StorableType("A26ECCD9-A802-4F17-907B-7278D2B0A0C7")]
[Item("SymbolicExpressionTreePruningOperator", "An operator that replaces introns with constant values in a symbolic expression tree.")]
public abstract class SymbolicDataAnalysisExpressionPruningOperator : SingleSuccessorOperator, ISymbolicExpressionTreeOperator {
#region parameter names
private const string ProblemDataParameterName = "ProblemData";
private const string SymbolicDataAnalysisModelParameterName = "SymbolicDataAnalysisModel";
private const string ImpactValuesCalculatorParameterName = "ImpactValuesCalculator";
private const string PrunedSubtreesParameterName = "PrunedSubtrees";
private const string PrunedTreesParameterName = "PrunedTrees";
private const string PrunedNodesParameterName = "PrunedNodes";
private const string FitnessCalculationPartitionParameterName = "FitnessCalculationPartition";
private const string NodeImpactThresholdParameterName = "ImpactThreshold";
private const string PruneOnlyZeroImpactNodesParameterName = "PruneOnlyZeroImpactNodes";
private const string SymbolicExpressionTreeParameterName = "SymbolicExpressionTree"; // the tree to be pruned
private const string QualityParameterName = "Quality"; // the quality
private const string EstimationLimitsParameterName = "EstimationLimits";
private const string InterpreterParameterName = "SymbolicExpressionTreeInterpreter";
private const string ApplyLinearScalingParameterName = "ApplyLinearScaling";
#endregion
#region parameter properties
public ILookupParameter SymbolicExpressionTreeParameter {
get { return (ILookupParameter)Parameters[SymbolicExpressionTreeParameterName]; }
}
public ILookupParameter QualityParameter {
get { return (ILookupParameter)Parameters[QualityParameterName]; }
}
public ILookupParameter ProblemDataParameter {
get { return (ILookupParameter)Parameters[ProblemDataParameterName]; }
}
public IValueParameter ImpactValuesCalculatorParameter {
get { return (IValueParameter)Parameters[ImpactValuesCalculatorParameterName]; }
}
public ILookupParameter FitnessCalculationPartitionParameter {
get { return (ILookupParameter)Parameters[FitnessCalculationPartitionParameterName]; }
}
public ILookupParameter PrunedSubtreesParameter {
get { return (ILookupParameter)Parameters[PrunedSubtreesParameterName]; }
}
public ILookupParameter PrunedTreesParameter {
get { return (ILookupParameter)Parameters[PrunedTreesParameterName]; }
}
public ILookupParameter PrunedNodesParameter {
get { return (ILookupParameter)Parameters[PrunedNodesParameterName]; }
}
public IFixedValueParameter NodeImpactThresholdParameter {
get { return (IFixedValueParameter)Parameters[NodeImpactThresholdParameterName]; }
}
public IFixedValueParameter PruneOnlyZeroImpactNodesParameter {
get { return (IFixedValueParameter)Parameters[PruneOnlyZeroImpactNodesParameterName]; }
}
public ILookupParameter EstimationLimitsParameter {
get { return (ILookupParameter)Parameters[EstimationLimitsParameterName]; }
}
public ILookupParameter InterpreterParameter {
get { return (ILookupParameter)Parameters[InterpreterParameterName]; }
}
public ILookupParameter ApplyLinearScalingParameter {
get { return (ILookupParameter)Parameters[ApplyLinearScalingParameterName]; }
}
#endregion
#region properties
public ISymbolicDataAnalysisSolutionImpactValuesCalculator ImpactValuesCalculator {
get { return ImpactValuesCalculatorParameter.Value; }
set { ImpactValuesCalculatorParameter.Value = value; }
}
public bool PruneOnlyZeroImpactNodes {
get { return PruneOnlyZeroImpactNodesParameter.Value.Value; }
set { PruneOnlyZeroImpactNodesParameter.Value.Value = value; }
}
public double NodeImpactThreshold {
get { return NodeImpactThresholdParameter.Value.Value; }
set { NodeImpactThresholdParameter.Value.Value = value; }
}
#endregion
[StorableConstructor]
protected SymbolicDataAnalysisExpressionPruningOperator(bool deserializing) : base(deserializing) { }
protected SymbolicDataAnalysisExpressionPruningOperator(SymbolicDataAnalysisExpressionPruningOperator original, Cloner cloner)
: base(original, cloner) { }
protected SymbolicDataAnalysisExpressionPruningOperator(ISymbolicDataAnalysisSolutionImpactValuesCalculator impactValuesCalculator) {
#region add parameters
Parameters.Add(new LookupParameter(ProblemDataParameterName));
Parameters.Add(new LookupParameter(SymbolicDataAnalysisModelParameterName));
Parameters.Add(new LookupParameter(FitnessCalculationPartitionParameterName));
Parameters.Add(new LookupParameter(PrunedNodesParameterName, "A counter of how many nodes were pruned."));
Parameters.Add(new LookupParameter(PrunedSubtreesParameterName, "A counter of how many subtrees were replaced."));
Parameters.Add(new LookupParameter(PrunedTreesParameterName, "A counter of how many trees were pruned."));
Parameters.Add(new FixedValueParameter(PruneOnlyZeroImpactNodesParameterName, "Specify whether or not only zero impact nodes should be pruned."));
Parameters.Add(new FixedValueParameter(NodeImpactThresholdParameterName, "Specifies an impact value threshold below which nodes should be pruned."));
Parameters.Add(new LookupParameter(EstimationLimitsParameterName));
Parameters.Add(new LookupParameter(InterpreterParameterName));
Parameters.Add(new LookupParameter(SymbolicExpressionTreeParameterName));
Parameters.Add(new LookupParameter(QualityParameterName));
Parameters.Add(new LookupParameter(ApplyLinearScalingParameterName));
Parameters.Add(new ValueParameter(ImpactValuesCalculatorParameterName, impactValuesCalculator));
#endregion
}
[StorableHook(HookType.AfterDeserialization)]
private void AfterDeserialization() {
// BackwardsCompatibility3.3
#region Backwards compatible code, remove with 3.4
if (!Parameters.ContainsKey(PrunedNodesParameterName)) {
Parameters.Add(new LookupParameter(PrunedNodesParameterName, "A counter of how many nodes were pruned."));
}
if (!Parameters.ContainsKey(ApplyLinearScalingParameterName)) {
Parameters.Add(new LookupParameter(ApplyLinearScalingParameterName));
}
if (!Parameters.ContainsKey(ImpactValuesCalculatorParameterName)) {
// value must be set by derived operators (regression/classification)
Parameters.Add(new ValueParameter(ImpactValuesCalculatorParameterName));
}
#endregion
}
protected abstract ISymbolicDataAnalysisModel CreateModel(ISymbolicExpressionTree tree, ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, IDataAnalysisProblemData problemData, DoubleLimit estimationLimits);
protected abstract double Evaluate(IDataAnalysisModel model);
public override IOperation Apply() {
var tree = SymbolicExpressionTreeParameter.ActualValue;
var problemData = ProblemDataParameter.ActualValue;
var fitnessCalculationPartition = FitnessCalculationPartitionParameter.ActualValue;
var estimationLimits = EstimationLimitsParameter.ActualValue;
var interpreter = InterpreterParameter.ActualValue;
var model = CreateModel(tree, interpreter, problemData, estimationLimits);
var nodes = tree.Root.GetSubtree(0).GetSubtree(0).IterateNodesPrefix().ToList();
var rows = Enumerable.Range(fitnessCalculationPartition.Start, fitnessCalculationPartition.Size).ToList();
var prunedSubtrees = 0;
var prunedTrees = 0;
var prunedNodes = 0;
double qualityForImpactsCalculation = double.NaN;
for (int i = 0; i < nodes.Count; ++i) {
var node = nodes[i];
if (node is ConstantTreeNode) continue;
double impactValue, replacementValue;
double newQualityForImpacts;
ImpactValuesCalculator.CalculateImpactAndReplacementValues(model, node, problemData, rows, out impactValue, out replacementValue, out newQualityForImpacts, qualityForImpactsCalculation);
if (PruneOnlyZeroImpactNodes && !impactValue.IsAlmost(0.0)) continue;
if (!PruneOnlyZeroImpactNodes && impactValue > NodeImpactThreshold) continue;
var constantNode = (ConstantTreeNode)node.Grammar.GetSymbol("Constant").CreateTreeNode();
constantNode.Value = replacementValue;
var length = node.GetLength();
ReplaceWithConstant(node, constantNode);
i += length - 1; // skip subtrees under the node that was folded
prunedSubtrees++;
prunedNodes += length;
qualityForImpactsCalculation = newQualityForImpacts;
}
if (prunedSubtrees > 0) prunedTrees = 1;
PrunedSubtreesParameter.ActualValue = new IntValue(prunedSubtrees);
PrunedTreesParameter.ActualValue = new IntValue(prunedTrees);
PrunedNodesParameter.ActualValue = new IntValue(prunedNodes);
if (prunedSubtrees > 0) // if nothing was pruned then there's no need to re-evaluate the tree
QualityParameter.ActualValue.Value = Evaluate(model);
return base.Apply();
}
protected static void ReplaceWithConstant(ISymbolicExpressionTreeNode original, ISymbolicExpressionTreeNode replacement) {
var parent = original.Parent;
var i = parent.IndexOfSubtree(original);
parent.RemoveSubtree(i);
parent.InsertSubtree(i, replacement);
}
}
}