Changeset 13665
- Timestamp:
- 03/08/16 11:33:21 (9 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
TabularUnified branches/HeuristicLab.VariableInteractionNetworks/HeuristicLab.VariableInteractionNetworks/3.3/SymbolicDataAnalysisVariableImpactsAnalyzer.cs ¶
r12568 r13665 1 using System; 1 #region License Information 2 /* HeuristicLab 3 * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL) 4 * 5 * This file is part of HeuristicLab. 6 * 7 * HeuristicLab is free software: you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation, either version 3 of the License, or 10 * (at your option) any later version. 11 * 12 * HeuristicLab is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>. 19 */ 20 #endregion 21 22 using System; 2 23 using System.Linq; 3 24 using HeuristicLab.Analysis; … … 5 26 using HeuristicLab.Core; 6 27 using HeuristicLab.Data; 28 using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; 7 29 using HeuristicLab.Optimization; 8 30 using HeuristicLab.Parameters; … … 11 33 using HeuristicLab.Problems.DataAnalysis.Symbolic; 12 34 using HeuristicLab.Problems.DataAnalysis.Symbolic.Regression; 13 using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; 14 15 namespace HeuristicLab.VariableInteractionNetworks 16 { 17 [Item("SymbolicRegressionVariableImpactsAnalyzer", "An analyzer which calculates variable impacts based on the average node impacts from the tree")] 18 [StorableClass] 19 public class SymbolicRegressionVariableImpactsAnalyzer : SymbolicDataAnalysisAnalyzer 20 { 21 private const string UpdateCounterParameterName = "UpdateCounter"; 22 private const string UpdateIntervalParameterName = "UpdateInterval"; 23 public const string QualityParameterName = "Quality"; 24 private const string SymbolicDataAnalysisTreeInterpreterParameterName = "SymbolicExpressionTreeInterpreter"; 25 private const string ProblemDataParameterName = "ProblemData"; 26 private const string ApplyLinearScalingParameterName = "ApplyLinearScaling"; 27 private const string MaxCOIterationsParameterName = "MaxCOIterations"; 28 private const string EstimationLimitsParameterName = "EstimationLimits"; 29 private const string EvaluatorParameterName = "Evaluator"; 30 31 private const string VariableFrequenciesParameterName = "VariableFrequencies"; 32 private const string VariableImpactsParameterName = "AverageVariableImpacts"; 33 private const string PercentageBestParameterName = "PercentageBest"; 34 private const string LastGenerationsParameterName = "LastGenerations"; 35 private const string MaximumGenerationsParameterName = "MaximumGenerations"; 36 private const string OptimizeConstantsParameterName = "OptimizeConstants"; 37 private const string PruneTreesParameterName = "PruneTrees"; 38 39 private SymbolicDataAnalysisExpressionTreeSimplifier simplifier; 40 private SymbolicRegressionSolutionImpactValuesCalculator impactsCalculator; 41 42 #region parameters 43 public ValueParameter<IntValue> UpdateCounterParameter 44 { 45 get { return (ValueParameter<IntValue>)Parameters[UpdateCounterParameterName]; } 46 } 47 public ValueParameter<IntValue> UpdateIntervalParameter 48 { 49 get { return (ValueParameter<IntValue>)Parameters[UpdateIntervalParameterName]; } 50 } 51 public IScopeTreeLookupParameter<DoubleValue> QualityParameter 52 { 53 get { return (IScopeTreeLookupParameter<DoubleValue>)Parameters[QualityParameterName]; } 54 } 55 public ILookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter> SymbolicDataAnalysisTreeInterpreterParameter 56 { 57 get { return (ILookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter>)Parameters[SymbolicDataAnalysisTreeInterpreterParameterName]; } 58 } 59 public ILookupParameter<IRegressionProblemData> ProblemDataParameter 60 { 61 get { return (ILookupParameter<IRegressionProblemData>)Parameters[ProblemDataParameterName]; } 62 } 63 public ILookupParameter<BoolValue> ApplyLinearScalingParameter 64 { 65 get { return (ILookupParameter<BoolValue>)Parameters[ApplyLinearScalingParameterName]; } 66 } 67 public IFixedValueParameter<IntValue> MaxCOIterationsParameter 68 { 69 get { return (IFixedValueParameter<IntValue>)Parameters[MaxCOIterationsParameterName]; } 70 } 71 public ILookupParameter<DoubleLimit> EstimationLimitsParameter 72 { 73 get { return (ILookupParameter<DoubleLimit>)Parameters[EstimationLimitsParameterName]; } 74 } 75 public ILookupParameter<ISymbolicRegressionSingleObjectiveEvaluator> EvaluatorParameter 76 { 77 get { return (ILookupParameter<ISymbolicRegressionSingleObjectiveEvaluator>)Parameters[EvaluatorParameterName]; } 78 } 79 public ILookupParameter<DataTable> VariableImpactsParameter 80 { 81 get { return (ILookupParameter<DataTable>)Parameters[VariableImpactsParameterName]; } 82 } 83 public IFixedValueParameter<PercentValue> PercentageBestParameter 84 { 85 get { return (IFixedValueParameter<PercentValue>)Parameters[PercentageBestParameterName]; } 86 } 87 public IFixedValueParameter<IntValue> LastGenerationsParameter 88 { 89 get { return (IFixedValueParameter<IntValue>)Parameters[LastGenerationsParameterName]; } 90 } 91 public IFixedValueParameter<BoolValue> OptimizeConstantsParameter 92 { 93 get { return (IFixedValueParameter<BoolValue>)Parameters[OptimizeConstantsParameterName]; } 94 } 95 public IFixedValueParameter<BoolValue> PruneTreesParameter 96 { 97 get { return (IFixedValueParameter<BoolValue>)Parameters[PruneTreesParameterName]; } 98 } 99 private ILookupParameter<IntValue> MaximumGenerationsParameter 100 { 101 get { return (ILookupParameter<IntValue>)Parameters[MaximumGenerationsParameterName]; } 102 } 103 #endregion 104 105 #region parameter properties 106 public int UpdateCounter 107 { 108 get { return UpdateCounterParameter.Value.Value; } 109 set { UpdateCounterParameter.Value.Value = value; } 110 } 111 public int UpdateInterval 112 { 113 get { return UpdateIntervalParameter.Value.Value; } 114 set { UpdateIntervalParameter.Value.Value = value; } 115 } 116 #endregion 117 118 public SymbolicRegressionVariableImpactsAnalyzer() 119 { 120 Parameters.Add(new ValueParameter<IntValue>(UpdateCounterParameterName, new IntValue(0))); 121 Parameters.Add(new ValueParameter<IntValue>(UpdateIntervalParameterName, new IntValue(1))); 122 Parameters.Add(new LookupParameter<IRegressionProblemData>(ProblemDataParameterName)); 123 Parameters.Add(new LookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter>(SymbolicDataAnalysisTreeInterpreterParameterName)); 124 Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>(QualityParameterName, "The individual qualities.")); 125 Parameters.Add(new LookupParameter<BoolValue>(ApplyLinearScalingParameterName)); 126 Parameters.Add(new LookupParameter<DoubleLimit>(EstimationLimitsParameterName)); 127 Parameters.Add(new FixedValueParameter<IntValue>(MaxCOIterationsParameterName)); 128 Parameters.Add(new LookupParameter<DataTable>(VariableImpactsParameterName, "The relative variable relevance calculated as the average relative variable frequency over the whole run.")); 129 Parameters.Add(new FixedValueParameter<PercentValue>(PercentageBestParameterName)); 130 Parameters.Add(new FixedValueParameter<IntValue>(LastGenerationsParameterName)); 131 Parameters.Add(new FixedValueParameter<BoolValue>(OptimizeConstantsParameterName, new BoolValue(false))); 132 Parameters.Add(new FixedValueParameter<BoolValue>(PruneTreesParameterName, new BoolValue(false))); 133 Parameters.Add(new LookupParameter<IntValue>(MaximumGenerationsParameterName, "The maximum number of generations which should be processed.")); 134 135 impactsCalculator = new SymbolicRegressionSolutionImpactValuesCalculator(); 136 simplifier = new SymbolicDataAnalysisExpressionTreeSimplifier(); 137 } 138 139 [StorableConstructor] 140 protected SymbolicRegressionVariableImpactsAnalyzer(bool deserializing) : base(deserializing) { } 141 142 [StorableHook(HookType.AfterDeserialization)] 143 private void AfterDeserialization() 144 { 145 impactsCalculator = new SymbolicRegressionSolutionImpactValuesCalculator(); 146 simplifier = new SymbolicDataAnalysisExpressionTreeSimplifier(); 147 } 148 149 protected SymbolicRegressionVariableImpactsAnalyzer(SymbolicRegressionVariableImpactsAnalyzer original, Cloner cloner) 150 : base(original, cloner) 151 { 152 impactsCalculator = new SymbolicRegressionSolutionImpactValuesCalculator(); 153 simplifier = new SymbolicDataAnalysisExpressionTreeSimplifier(); 154 } 155 156 public override IDeepCloneable Clone(Cloner cloner) 157 { 158 return new SymbolicRegressionVariableImpactsAnalyzer(this, cloner); 159 } 160 161 public override IOperation Apply() 162 { 163 #region Update counter & update interval 164 UpdateCounter++; 165 if (UpdateCounter != UpdateInterval) 166 { 167 return base.Apply(); 168 } 169 UpdateCounter = 0; 170 #endregion 171 var results = ResultCollectionParameter.ActualValue; 172 int maxGen = MaximumGenerationsParameter.ActualValue.Value; 173 int gen = ((IntValue)results["Generations"].Value).Value; 174 int lastGen = LastGenerationsParameter.Value.Value; 175 176 if (gen < maxGen - lastGen) 177 return base.Apply(); 178 179 var trees = SymbolicExpressionTree.ToArray(); 180 var qualities = QualityParameter.ActualValue.ToArray(); 181 182 Array.Sort(qualities, trees); 183 Array.Reverse(qualities); 184 Array.Reverse(trees); 185 186 var interpreter = SymbolicDataAnalysisTreeInterpreterParameter.ActualValue; 187 var problemData = ProblemDataParameter.ActualValue; 188 var applyLinearScaling = ApplyLinearScalingParameter.ActualValue.Value; 189 var maxCOIterations = MaxCOIterationsParameter.Value.Value; // fixed value parameter => Value 190 var estimationLimits = EstimationLimitsParameter.ActualValue; // lookup parameter => ActualValue 191 var percentageBest = PercentageBestParameter.Value.Value; 192 var optimizeConstants = OptimizeConstantsParameter.Value.Value; 193 var pruneTrees = PruneTreesParameter.Value.Value; 194 195 var allowedInputVariables = problemData.AllowedInputVariables.ToList(); 196 var variableImpacts = allowedInputVariables.ToDictionary(x => x, x => 0.0); 197 DataTable datatable; 198 if (VariableImpactsParameter.ActualValue == null) 199 { 200 datatable = new DataTable("Variable impacts", "Average impact of variables over the population"); 201 datatable.VisualProperties.XAxisTitle = "Generation"; 202 datatable.VisualProperties.YAxisTitle = "Average variable impact"; 203 VariableImpactsParameter.ActualValue = datatable; 204 results.Add(new Result("Average variable impacts", "The relative variable relevance calculated as the average relative variable frequency over the whole run.", new DataTable())); 205 206 foreach (var v in allowedInputVariables) 207 { 208 datatable.Rows.Add(new DataRow(v) { VisualProperties = { StartIndexZero = true } }); 209 } 210 VariableImpactsParameter.ActualValue = datatable; 211 } 212 datatable = VariableImpactsParameter.ActualValue; 213 int nTrees = (int)Math.Round(trees.Length * percentageBest); 214 var bestTrees = trees.Take(nTrees).Select(x => (ISymbolicExpressionTree)x.Clone()).ToList(); 215 216 if (optimizeConstants) 217 { 218 foreach (var tree in bestTrees) 219 SymbolicRegressionConstantOptimizationEvaluator.OptimizeConstants(interpreter, tree, problemData, problemData.TrainingIndices, applyLinearScaling, maxCOIterations, estimationLimits.Upper, estimationLimits.Lower); 220 } 221 var simplifiedTrees = bestTrees.Select(x => simplifier.Simplify(x)).ToList(); 222 var variableCounts = problemData.AllowedInputVariables.ToDictionary(x => x, x => simplifiedTrees.Count(t => t.IterateNodesPrefix().Any(n => n is VariableTreeNode && ((VariableTreeNode)n).VariableName == x))); 223 for (int i = 0; i < simplifiedTrees.Count; ++i) 224 { 225 var simplifiedTree = simplifiedTrees[i]; 226 227 if (pruneTrees) 228 simplifiedTree = SymbolicRegressionPruningOperator.Prune(simplifiedTree, impactsCalculator, interpreter, problemData, estimationLimits, problemData.TrainingIndices); 229 230 var quality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(interpreter, simplifiedTree, estimationLimits.Lower, estimationLimits.Upper, problemData, problemData.TrainingIndices, applyLinearScaling); 231 232 var model = new SymbolicRegressionModel(simplifiedTree, interpreter, estimationLimits.Lower, estimationLimits.Upper); 233 var variables = simplifiedTree.IterateNodesPrefix().Where(x => x is VariableTreeNode).GroupBy(x => ((VariableTreeNode)x).VariableName); 234 235 foreach (var g in variables) 236 { 237 var avgImpact = g.Average(x => impactsCalculator.CalculateImpactValue(model, x, problemData, problemData.TrainingIndices, quality)); 238 if (double.IsNaN(avgImpact)) 239 avgImpact = 0; 240 variableImpacts[g.Key] += avgImpact; 241 } 242 } 243 244 foreach (var pair in variableImpacts) 245 { 246 var v = variableCounts[pair.Key] > 0 ? pair.Value / variableCounts[pair.Key] : 0; 247 datatable.Rows[pair.Key].Values.Add(v); 248 } 249 results["Average variable impacts"].Value = datatable; 250 return base.Apply(); 251 } 252 } 35 36 namespace HeuristicLab.VariableInteractionNetworks { 37 [Item("SymbolicRegressionVariableImpactsAnalyzer", "An analyzer which calculates variable impacts based on the average node impacts from the tree")] 38 [StorableClass] 39 public class SymbolicRegressionVariableImpactsAnalyzer : SymbolicDataAnalysisAnalyzer { 40 private const string UpdateCounterParameterName = "UpdateCounter"; 41 private const string UpdateIntervalParameterName = "UpdateInterval"; 42 public const string QualityParameterName = "Quality"; 43 private const string SymbolicDataAnalysisTreeInterpreterParameterName = "SymbolicExpressionTreeInterpreter"; 44 private const string ProblemDataParameterName = "ProblemData"; 45 private const string ApplyLinearScalingParameterName = "ApplyLinearScaling"; 46 private const string MaxCOIterationsParameterName = "MaxCOIterations"; 47 private const string EstimationLimitsParameterName = "EstimationLimits"; 48 private const string EvaluatorParameterName = "Evaluator"; 49 private const string VariableImpactsParameterName = "AverageVariableImpacts"; 50 private const string PercentageBestParameterName = "PercentageBest"; 51 private const string LastGenerationsParameterName = "LastGenerations"; 52 private const string MaximumGenerationsParameterName = "MaximumGenerations"; 53 private const string OptimizeConstantsParameterName = "OptimizeConstants"; 54 private const string PruneTreesParameterName = "PruneTrees"; 55 56 private SymbolicDataAnalysisExpressionTreeSimplifier simplifier; 57 private SymbolicRegressionSolutionImpactValuesCalculator impactsCalculator; 58 59 #region parameters 60 public ValueParameter<IntValue> UpdateCounterParameter { 61 get { return (ValueParameter<IntValue>)Parameters[UpdateCounterParameterName]; } 62 } 63 public ValueParameter<IntValue> UpdateIntervalParameter { 64 get { return (ValueParameter<IntValue>)Parameters[UpdateIntervalParameterName]; } 65 } 66 public IScopeTreeLookupParameter<DoubleValue> QualityParameter { 67 get { return (IScopeTreeLookupParameter<DoubleValue>)Parameters[QualityParameterName]; } 68 } 69 public ILookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter> SymbolicDataAnalysisTreeInterpreterParameter { 70 get { return (ILookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter>)Parameters[SymbolicDataAnalysisTreeInterpreterParameterName]; } 71 } 72 public ILookupParameter<IRegressionProblemData> ProblemDataParameter { 73 get { return (ILookupParameter<IRegressionProblemData>)Parameters[ProblemDataParameterName]; } 74 } 75 public ILookupParameter<BoolValue> ApplyLinearScalingParameter { 76 get { return (ILookupParameter<BoolValue>)Parameters[ApplyLinearScalingParameterName]; } 77 } 78 public IFixedValueParameter<IntValue> MaxCOIterationsParameter { 79 get { return (IFixedValueParameter<IntValue>)Parameters[MaxCOIterationsParameterName]; } 80 } 81 public ILookupParameter<DoubleLimit> EstimationLimitsParameter { 82 get { return (ILookupParameter<DoubleLimit>)Parameters[EstimationLimitsParameterName]; } 83 } 84 public ILookupParameter<ISymbolicRegressionSingleObjectiveEvaluator> EvaluatorParameter { 85 get { return (ILookupParameter<ISymbolicRegressionSingleObjectiveEvaluator>)Parameters[EvaluatorParameterName]; } 86 } 87 public ILookupParameter<DataTable> VariableImpactsParameter { 88 get { return (ILookupParameter<DataTable>)Parameters[VariableImpactsParameterName]; } 89 } 90 public IFixedValueParameter<PercentValue> PercentageBestParameter { 91 get { return (IFixedValueParameter<PercentValue>)Parameters[PercentageBestParameterName]; } 92 } 93 public IFixedValueParameter<IntValue> LastGenerationsParameter { 94 get { return (IFixedValueParameter<IntValue>)Parameters[LastGenerationsParameterName]; } 95 } 96 public IFixedValueParameter<BoolValue> OptimizeConstantsParameter { 97 get { return (IFixedValueParameter<BoolValue>)Parameters[OptimizeConstantsParameterName]; } 98 } 99 public IFixedValueParameter<BoolValue> PruneTreesParameter { 100 get { return (IFixedValueParameter<BoolValue>)Parameters[PruneTreesParameterName]; } 101 } 102 private ILookupParameter<IntValue> MaximumGenerationsParameter { 103 get { return (ILookupParameter<IntValue>)Parameters[MaximumGenerationsParameterName]; } 104 } 105 #endregion 106 107 #region parameter properties 108 public int UpdateCounter { 109 get { return UpdateCounterParameter.Value.Value; } 110 set { UpdateCounterParameter.Value.Value = value; } 111 } 112 public int UpdateInterval { 113 get { return UpdateIntervalParameter.Value.Value; } 114 set { UpdateIntervalParameter.Value.Value = value; } 115 } 116 #endregion 117 118 public SymbolicRegressionVariableImpactsAnalyzer() { 119 #region add parameters 120 Parameters.Add(new ValueParameter<IntValue>(UpdateCounterParameterName, new IntValue(0))); 121 Parameters.Add(new ValueParameter<IntValue>(UpdateIntervalParameterName, new IntValue(1))); 122 Parameters.Add(new LookupParameter<IRegressionProblemData>(ProblemDataParameterName)); 123 Parameters.Add(new LookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter>(SymbolicDataAnalysisTreeInterpreterParameterName)); 124 Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>(QualityParameterName, "The individual qualities.")); 125 Parameters.Add(new LookupParameter<BoolValue>(ApplyLinearScalingParameterName)); 126 Parameters.Add(new LookupParameter<DoubleLimit>(EstimationLimitsParameterName)); 127 Parameters.Add(new FixedValueParameter<IntValue>(MaxCOIterationsParameterName, new IntValue(3))); 128 Parameters.Add(new LookupParameter<DataTable>(VariableImpactsParameterName, "The relative variable relevance calculated as the average relative variable frequency over the whole run.")); 129 Parameters.Add(new FixedValueParameter<PercentValue>(PercentageBestParameterName, new PercentValue(100))); 130 Parameters.Add(new FixedValueParameter<IntValue>(LastGenerationsParameterName, new IntValue(10))); 131 Parameters.Add(new FixedValueParameter<BoolValue>(OptimizeConstantsParameterName, new BoolValue(false))); 132 Parameters.Add(new FixedValueParameter<BoolValue>(PruneTreesParameterName, new BoolValue(false))); 133 Parameters.Add(new LookupParameter<IntValue>(MaximumGenerationsParameterName, "The maximum number of generations which should be processed.")); 134 Parameters.Add(new LookupParameter<ISymbolicRegressionSingleObjectiveEvaluator>(EvaluatorParameterName)); 135 #endregion 136 137 impactsCalculator = new SymbolicRegressionSolutionImpactValuesCalculator(); 138 simplifier = new SymbolicDataAnalysisExpressionTreeSimplifier(); 139 } 140 141 [StorableConstructor] 142 protected SymbolicRegressionVariableImpactsAnalyzer(bool deserializing) : base(deserializing) { } 143 144 [StorableHook(HookType.AfterDeserialization)] 145 private void AfterDeserialization() { 146 impactsCalculator = new SymbolicRegressionSolutionImpactValuesCalculator(); 147 simplifier = new SymbolicDataAnalysisExpressionTreeSimplifier(); 148 149 if (!Parameters.ContainsKey(EvaluatorParameterName)) 150 Parameters.Add(new LookupParameter<ISymbolicRegressionSingleObjectiveEvaluator>(EvaluatorParameterName)); 151 } 152 153 protected SymbolicRegressionVariableImpactsAnalyzer(SymbolicRegressionVariableImpactsAnalyzer original, Cloner cloner) 154 : base(original, cloner) { 155 impactsCalculator = new SymbolicRegressionSolutionImpactValuesCalculator(); 156 simplifier = new SymbolicDataAnalysisExpressionTreeSimplifier(); 157 } 158 159 public override IDeepCloneable Clone(Cloner cloner) { 160 return new SymbolicRegressionVariableImpactsAnalyzer(this, cloner); 161 } 162 163 public override IOperation Apply() { 164 #region Update counter & update interval 165 UpdateCounter++; 166 if (UpdateCounter != UpdateInterval) { 167 return base.Apply(); 168 } 169 UpdateCounter = 0; 170 #endregion 171 var results = ResultCollectionParameter.ActualValue; 172 int maxGen = MaximumGenerationsParameter.ActualValue.Value; 173 int gen = ((IntValue)results["Generations"].Value).Value; 174 int lastGen = LastGenerationsParameter.Value.Value; 175 176 if (lastGen > 0 && gen < maxGen - lastGen) 177 return base.Apply(); 178 179 var trees = SymbolicExpressionTree.ToArray(); 180 var qualities = QualityParameter.ActualValue.ToArray(); 181 182 Array.Sort(qualities, trees); 183 Array.Reverse(qualities); 184 Array.Reverse(trees); 185 186 var interpreter = SymbolicDataAnalysisTreeInterpreterParameter.ActualValue; 187 var problemData = ProblemDataParameter.ActualValue; 188 var applyLinearScaling = ApplyLinearScalingParameter.ActualValue.Value; 189 var constantOptimizationIterations = MaxCOIterationsParameter.Value.Value; // fixed value parameter => Value 190 var estimationLimits = EstimationLimitsParameter.ActualValue; // lookup parameter => ActualValue 191 var percentageBest = PercentageBestParameter.Value.Value; 192 var optimizeConstants = OptimizeConstantsParameter.Value.Value; 193 var pruneTrees = PruneTreesParameter.Value.Value; 194 195 var allowedInputVariables = problemData.AllowedInputVariables.ToList(); 196 DataTable dataTable; 197 if (VariableImpactsParameter.ActualValue == null) { 198 dataTable = new DataTable("Variable impacts", "Average impact of variables over the population"); 199 dataTable.VisualProperties.XAxisTitle = "Generation"; 200 dataTable.VisualProperties.YAxisTitle = "Average variable impact"; 201 VariableImpactsParameter.ActualValue = dataTable; 202 results.Add(new Result("Average variable impacts", "The relative variable relevance calculated as the average relative variable frequency over the whole run.", new DataTable())); 203 204 foreach (var v in allowedInputVariables) { 205 dataTable.Rows.Add(new DataRow(v) { VisualProperties = { StartIndexZero = true } }); 206 } 207 VariableImpactsParameter.ActualValue = dataTable; 208 } 209 dataTable = VariableImpactsParameter.ActualValue; 210 int nTrees = (int)Math.Round(trees.Length * percentageBest); 211 var bestTrees = trees.Take(nTrees).Select(x => (ISymbolicExpressionTree)x.Clone()).ToList(); 212 // simplify trees before doing anything else 213 var simplifiedTrees = bestTrees.Select(x => simplifier.Simplify(x)).ToList(); 214 215 if (optimizeConstants) { 216 for (int i = 0; i < simplifiedTrees.Count; ++i) { 217 qualities[i].Value = SymbolicRegressionConstantOptimizationEvaluator.OptimizeConstants(interpreter, simplifiedTrees[i], problemData, problemData.TrainingIndices, applyLinearScaling, constantOptimizationIterations, estimationLimits.Upper, estimationLimits.Lower); 218 } 219 } 220 221 if (pruneTrees) { 222 for (int i = 0; i < simplifiedTrees.Count; ++i) { 223 simplifiedTrees[i] = SymbolicRegressionPruningOperator.Prune(simplifiedTrees[i], impactsCalculator, interpreter, problemData, estimationLimits, problemData.TrainingIndices); 224 } 225 } 226 // map each variable to a list of indices of the trees that contain it 227 var variablesToTreeIndices = allowedInputVariables.ToDictionary(x => x, x => Enumerable.Range(0, simplifiedTrees.Count).Where(i => ContainsVariable(simplifiedTrees[i], x)).ToList()); 228 229 foreach (var mapping in variablesToTreeIndices) { 230 var variableName = mapping.Key; 231 var median = problemData.Dataset.GetDoubleValues(variableName, problemData.TrainingIndices).Median(); 232 var ds = new ModifiableDataset(problemData.Dataset.DoubleVariables, problemData.Dataset.DoubleVariables.Select(x => problemData.Dataset.GetReadOnlyDoubleValues(x).ToList())); 233 foreach (var i in problemData.TrainingIndices) { 234 ds.SetVariableValue(median, variableName, i); 235 } 236 var pd = new RegressionProblemData(ds, allowedInputVariables, problemData.TargetVariable); 237 pd.TrainingPartition.Start = problemData.TrainingPartition.Start; 238 pd.TrainingPartition.End = problemData.TrainingPartition.End; 239 pd.TestPartition.Start = problemData.TestPartition.Start; 240 pd.TestPartition.End = problemData.TestPartition.End; 241 242 var indices = mapping.Value; 243 var averageImpact = 0d; 244 for (int i = 0; i < indices.Count; ++i) { 245 var originalQuality = qualities[i].Value; 246 double newQuality; 247 if (optimizeConstants) { 248 newQuality = SymbolicRegressionConstantOptimizationEvaluator.OptimizeConstants(interpreter, simplifiedTrees[i], pd, problemData.TrainingIndices, applyLinearScaling, constantOptimizationIterations, estimationLimits.Upper, estimationLimits.Lower); 249 } else { 250 var evaluator = EvaluatorParameter.ActualValue; 251 newQuality = evaluator.Evaluate(this.ExecutionContext, simplifiedTrees[i], pd, pd.TrainingIndices); 252 } 253 averageImpact += originalQuality - newQuality; // impact calculated this way may be negative 254 } 255 averageImpact /= indices.Count; 256 dataTable.Rows[variableName].Values.Add(averageImpact); 257 } 258 259 results["Average variable impacts"].Value = dataTable; 260 return base.Apply(); 261 } 262 263 private static bool ContainsVariable(ISymbolicExpressionTree tree, string variableName) { 264 return tree.IterateNodesPrefix().OfType<VariableTreeNode>().Any(x => x.VariableName == variableName); 265 } 266 } 253 267 }
Note: See TracChangeset
for help on using the changeset viewer.