Free cookie consent management tool by TermsFeed Policy Generator

source: stable/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/SymbolicDataAnalysisExpressionPruningOperator.cs @ 17741

Last change on this file since 17741 was 17181, checked in by swagner, 5 years ago

#2875: Merged r17180 from trunk to stable

File size: 11.5 KB
RevLine 
[10368]1#region License Information
2
3/* HeuristicLab
[17181]4 * Copyright (C) Heuristic and Evolutionary Algorithms Laboratory (HEAL)
[10368]5 *
6 * This file is part of HeuristicLab.
7 *
8 * HeuristicLab is free software: you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation, either version 3 of the License, or
11 * (at your option) any later version.
12 *
13 * HeuristicLab is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
20 */
21
22#endregion
23
24using System.Linq;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Data;
28using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
29using HeuristicLab.Operators;
30using HeuristicLab.Parameters;
[17097]31using HEAL.Attic;
[10368]32
33namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
[17097]34  [StorableType("DBE27385-2E8C-4314-88B4-F96A5240FC9D")]
[10368]35  [Item("SymbolicExpressionTreePruningOperator", "An operator that replaces introns with constant values in a symbolic expression tree.")]
[12745]36  public abstract class SymbolicDataAnalysisExpressionPruningOperator : SingleSuccessorOperator, ISymbolicExpressionTreeOperator {
[11145]37    #region parameter names
38    private const string ProblemDataParameterName = "ProblemData";
39    private const string SymbolicDataAnalysisModelParameterName = "SymbolicDataAnalysisModel";
40    private const string ImpactValuesCalculatorParameterName = "ImpactValuesCalculator";
41    private const string PrunedSubtreesParameterName = "PrunedSubtrees";
42    private const string PrunedTreesParameterName = "PrunedTrees";
[12745]43    private const string PrunedNodesParameterName = "PrunedNodes";
[11145]44    private const string FitnessCalculationPartitionParameterName = "FitnessCalculationPartition";
45    private const string NodeImpactThresholdParameterName = "ImpactThreshold";
46    private const string PruneOnlyZeroImpactNodesParameterName = "PruneOnlyZeroImpactNodes";
47    private const string SymbolicExpressionTreeParameterName = "SymbolicExpressionTree"; // the tree to be pruned
48    private const string QualityParameterName = "Quality"; // the quality
49    private const string EstimationLimitsParameterName = "EstimationLimits";
50    private const string InterpreterParameterName = "SymbolicExpressionTreeInterpreter";
[12745]51    private const string ApplyLinearScalingParameterName = "ApplyLinearScaling";
[11145]52    #endregion
53
[10368]54    #region parameter properties
[11145]55    public ILookupParameter<ISymbolicExpressionTree> SymbolicExpressionTreeParameter {
56      get { return (ILookupParameter<ISymbolicExpressionTree>)Parameters[SymbolicExpressionTreeParameterName]; }
[10368]57    }
[11145]58    public ILookupParameter<DoubleValue> QualityParameter {
59      get { return (ILookupParameter<DoubleValue>)Parameters[QualityParameterName]; }
[10368]60    }
[11145]61    public ILookupParameter<IDataAnalysisProblemData> ProblemDataParameter {
62      get { return (ILookupParameter<IDataAnalysisProblemData>)Parameters[ProblemDataParameterName]; }
63    }
64    public IValueParameter<ISymbolicDataAnalysisSolutionImpactValuesCalculator> ImpactValuesCalculatorParameter {
65      get { return (IValueParameter<ISymbolicDataAnalysisSolutionImpactValuesCalculator>)Parameters[ImpactValuesCalculatorParameterName]; }
66    }
67    public ILookupParameter<IntRange> FitnessCalculationPartitionParameter {
68      get { return (ILookupParameter<IntRange>)Parameters[FitnessCalculationPartitionParameterName]; }
69    }
70    public ILookupParameter<IntValue> PrunedSubtreesParameter {
71      get { return (ILookupParameter<IntValue>)Parameters[PrunedSubtreesParameterName]; }
72    }
73    public ILookupParameter<IntValue> PrunedTreesParameter {
74      get { return (ILookupParameter<IntValue>)Parameters[PrunedTreesParameterName]; }
75    }
[12745]76    public ILookupParameter<IntValue> PrunedNodesParameter {
77      get { return (ILookupParameter<IntValue>)Parameters[PrunedNodesParameterName]; }
78    }
[11145]79    public IFixedValueParameter<DoubleValue> NodeImpactThresholdParameter {
80      get { return (IFixedValueParameter<DoubleValue>)Parameters[NodeImpactThresholdParameterName]; }
81    }
82    public IFixedValueParameter<BoolValue> PruneOnlyZeroImpactNodesParameter {
83      get { return (IFixedValueParameter<BoolValue>)Parameters[PruneOnlyZeroImpactNodesParameterName]; }
84    }
85    public ILookupParameter<DoubleLimit> EstimationLimitsParameter {
86      get { return (ILookupParameter<DoubleLimit>)Parameters[EstimationLimitsParameterName]; }
87    }
88    public ILookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter> InterpreterParameter {
89      get { return (ILookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter>)Parameters[InterpreterParameterName]; }
90    }
[12745]91    public ILookupParameter<BoolValue> ApplyLinearScalingParameter {
92      get { return (ILookupParameter<BoolValue>)Parameters[ApplyLinearScalingParameterName]; }
93    }
[10368]94    #endregion
[11145]95
[10368]96    #region properties
[12745]97    public ISymbolicDataAnalysisSolutionImpactValuesCalculator ImpactValuesCalculator {
98      get { return ImpactValuesCalculatorParameter.Value; }
99      set { ImpactValuesCalculatorParameter.Value = value; }
100    }
101    public bool PruneOnlyZeroImpactNodes {
[11145]102      get { return PruneOnlyZeroImpactNodesParameter.Value.Value; }
103      set { PruneOnlyZeroImpactNodesParameter.Value.Value = value; }
[10368]104    }
[12745]105    public double NodeImpactThreshold {
[11145]106      get { return NodeImpactThresholdParameter.Value.Value; }
107      set { NodeImpactThresholdParameter.Value.Value = value; }
[10368]108    }
[11145]109    #endregion
[10368]110
[11145]111    [StorableConstructor]
[17097]112    protected SymbolicDataAnalysisExpressionPruningOperator(StorableConstructorFlag _) : base(_) { }
[11145]113    protected SymbolicDataAnalysisExpressionPruningOperator(SymbolicDataAnalysisExpressionPruningOperator original, Cloner cloner)
114      : base(original, cloner) { }
115
[12745]116    protected SymbolicDataAnalysisExpressionPruningOperator(ISymbolicDataAnalysisSolutionImpactValuesCalculator impactValuesCalculator) {
[11145]117      #region add parameters
118      Parameters.Add(new LookupParameter<IDataAnalysisProblemData>(ProblemDataParameterName));
119      Parameters.Add(new LookupParameter<ISymbolicDataAnalysisModel>(SymbolicDataAnalysisModelParameterName));
120      Parameters.Add(new LookupParameter<IntRange>(FitnessCalculationPartitionParameterName));
[12745]121      Parameters.Add(new LookupParameter<IntValue>(PrunedNodesParameterName, "A counter of how many nodes were pruned."));
[11145]122      Parameters.Add(new LookupParameter<IntValue>(PrunedSubtreesParameterName, "A counter of how many subtrees were replaced."));
123      Parameters.Add(new LookupParameter<IntValue>(PrunedTreesParameterName, "A counter of how many trees were pruned."));
124      Parameters.Add(new FixedValueParameter<BoolValue>(PruneOnlyZeroImpactNodesParameterName, "Specify whether or not only zero impact nodes should be pruned."));
125      Parameters.Add(new FixedValueParameter<DoubleValue>(NodeImpactThresholdParameterName, "Specifies an impact value threshold below which nodes should be pruned."));
126      Parameters.Add(new LookupParameter<DoubleLimit>(EstimationLimitsParameterName));
127      Parameters.Add(new LookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter>(InterpreterParameterName));
128      Parameters.Add(new LookupParameter<ISymbolicExpressionTree>(SymbolicExpressionTreeParameterName));
129      Parameters.Add(new LookupParameter<DoubleValue>(QualityParameterName));
[12745]130      Parameters.Add(new LookupParameter<BoolValue>(ApplyLinearScalingParameterName));
131      Parameters.Add(new ValueParameter<ISymbolicDataAnalysisSolutionImpactValuesCalculator>(ImpactValuesCalculatorParameterName, impactValuesCalculator));
[11145]132      #endregion
[10368]133    }
134
[12745]135    [StorableHook(HookType.AfterDeserialization)]
136    private void AfterDeserialization() {
137      // BackwardsCompatibility3.3
138      #region Backwards compatible code, remove with 3.4
139      if (!Parameters.ContainsKey(PrunedNodesParameterName)) {
140        Parameters.Add(new LookupParameter<IntValue>(PrunedNodesParameterName, "A counter of how many nodes were pruned."));
141      }
142      if (!Parameters.ContainsKey(ApplyLinearScalingParameterName)) {
143        Parameters.Add(new LookupParameter<BoolValue>(ApplyLinearScalingParameterName));
144      }
145      if (!Parameters.ContainsKey(ImpactValuesCalculatorParameterName)) {
146        // value must be set by derived operators (regression/classification)
147        Parameters.Add(new ValueParameter<ISymbolicDataAnalysisSolutionImpactValuesCalculator>(ImpactValuesCalculatorParameterName));
148      }
149      #endregion
150    }
[10368]151
[12745]152    protected abstract ISymbolicDataAnalysisModel CreateModel(ISymbolicExpressionTree tree, ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, IDataAnalysisProblemData problemData, DoubleLimit estimationLimits);
153
[11145]154    protected abstract double Evaluate(IDataAnalysisModel model);
[10368]155
156    public override IOperation Apply() {
[12745]157      var tree = SymbolicExpressionTreeParameter.ActualValue;
158      var problemData = ProblemDataParameter.ActualValue;
159      var fitnessCalculationPartition = FitnessCalculationPartitionParameter.ActualValue;
160      var estimationLimits = EstimationLimitsParameter.ActualValue;
161      var interpreter = InterpreterParameter.ActualValue;
162
163      var model = CreateModel(tree, interpreter, problemData, estimationLimits);
164      var nodes = tree.Root.GetSubtree(0).GetSubtree(0).IterateNodesPrefix().ToList();
165      var rows = Enumerable.Range(fitnessCalculationPartition.Start, fitnessCalculationPartition.Size).ToList();
[11145]166      var prunedSubtrees = 0;
167      var prunedTrees = 0;
[12745]168      var prunedNodes = 0;
[10368]169
[12745]170      double qualityForImpactsCalculation = double.NaN;
[10368]171
[11145]172      for (int i = 0; i < nodes.Count; ++i) {
173        var node = nodes[i];
[10368]174        if (node is ConstantTreeNode) continue;
175
[11145]176        double impactValue, replacementValue;
[12745]177        double newQualityForImpacts;
178        ImpactValuesCalculator.CalculateImpactAndReplacementValues(model, node, problemData, rows, out impactValue, out replacementValue, out newQualityForImpacts, qualityForImpactsCalculation);
[10368]179
[12745]180        if (PruneOnlyZeroImpactNodes && !impactValue.IsAlmost(0.0)) continue;
181        if (!PruneOnlyZeroImpactNodes && impactValue > NodeImpactThreshold) continue;
[10368]182
[11145]183        var constantNode = (ConstantTreeNode)node.Grammar.GetSymbol("Constant").CreateTreeNode();
184        constantNode.Value = replacementValue;
185
[12745]186        var length = node.GetLength();
[10368]187        ReplaceWithConstant(node, constantNode);
[12745]188        i += length - 1; // skip subtrees under the node that was folded
[10368]189
[12745]190        prunedSubtrees++;
191        prunedNodes += length;
[11145]192
[12745]193        qualityForImpactsCalculation = newQualityForImpacts;
[10368]194      }
195
[11145]196      if (prunedSubtrees > 0) prunedTrees = 1;
197      PrunedSubtreesParameter.ActualValue = new IntValue(prunedSubtrees);
198      PrunedTreesParameter.ActualValue = new IntValue(prunedTrees);
[12745]199      PrunedNodesParameter.ActualValue = new IntValue(prunedNodes);
[11145]200
[12745]201      if (prunedSubtrees > 0) // if nothing was pruned then there's no need to re-evaluate the tree
202        QualityParameter.ActualValue.Value = Evaluate(model);
203
[10368]204      return base.Apply();
205    }
[11145]206
[12745]207    protected static void ReplaceWithConstant(ISymbolicExpressionTreeNode original, ISymbolicExpressionTreeNode replacement) {
[10368]208      var parent = original.Parent;
209      var i = parent.IndexOfSubtree(original);
210      parent.RemoveSubtree(i);
211      parent.InsertSubtree(i, replacement);
212    }
213  }
214}
Note: See TracBrowser for help on using the repository browser.