Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.VariableInteractionNetworks/HeuristicLab.VariableInteractionNetworks/3.3/SymbolicDataAnalysisVariableImpactsAnalyzer.cs @ 13321

Last change on this file since 13321 was 12568, checked in by arapeanu, 9 years ago

#2288: Fixed bug in SymbolicDataAnalysisVariableImpactsAnalyzer (simplification before optimization) and in VariableInteractionNetworkView for computing the adjacency matrix and sorting the variable impacts matrix

File size: 13.6 KB
Line 
1using System;
2using System.Linq;
3using HeuristicLab.Analysis;
4using HeuristicLab.Common;
5using HeuristicLab.Core;
6using HeuristicLab.Data;
7using HeuristicLab.Optimization;
8using HeuristicLab.Parameters;
9using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
10using HeuristicLab.Problems.DataAnalysis;
11using HeuristicLab.Problems.DataAnalysis.Symbolic;
12using HeuristicLab.Problems.DataAnalysis.Symbolic.Regression;
13using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
14
15namespace HeuristicLab.VariableInteractionNetworks
16{
17    [Item("SymbolicRegressionVariableImpactsAnalyzer", "An analyzer which calculates variable impacts based on the average node impacts from the tree")]
18    [StorableClass]
19    public class SymbolicRegressionVariableImpactsAnalyzer : SymbolicDataAnalysisAnalyzer
20    {
21        private const string UpdateCounterParameterName = "UpdateCounter";
22        private const string UpdateIntervalParameterName = "UpdateInterval";
23        public const string QualityParameterName = "Quality";
24        private const string SymbolicDataAnalysisTreeInterpreterParameterName = "SymbolicExpressionTreeInterpreter";
25        private const string ProblemDataParameterName = "ProblemData";
26        private const string ApplyLinearScalingParameterName = "ApplyLinearScaling";
27        private const string MaxCOIterationsParameterName = "MaxCOIterations";
28        private const string EstimationLimitsParameterName = "EstimationLimits";
29        private const string EvaluatorParameterName = "Evaluator";
30
31        private const string VariableFrequenciesParameterName = "VariableFrequencies";
32        private const string VariableImpactsParameterName = "AverageVariableImpacts";
33        private const string PercentageBestParameterName = "PercentageBest";
34        private const string LastGenerationsParameterName = "LastGenerations";
35        private const string MaximumGenerationsParameterName = "MaximumGenerations";
36        private const string OptimizeConstantsParameterName = "OptimizeConstants";
37        private const string PruneTreesParameterName = "PruneTrees";
38
39        private SymbolicDataAnalysisExpressionTreeSimplifier simplifier;
40        private SymbolicRegressionSolutionImpactValuesCalculator impactsCalculator;
41
42        #region parameters
43        public ValueParameter<IntValue> UpdateCounterParameter
44        {
45            get { return (ValueParameter<IntValue>)Parameters[UpdateCounterParameterName]; }
46        }
47        public ValueParameter<IntValue> UpdateIntervalParameter
48        {
49            get { return (ValueParameter<IntValue>)Parameters[UpdateIntervalParameterName]; }
50        }
51        public IScopeTreeLookupParameter<DoubleValue> QualityParameter
52        {
53            get { return (IScopeTreeLookupParameter<DoubleValue>)Parameters[QualityParameterName]; }
54        }
55        public ILookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter> SymbolicDataAnalysisTreeInterpreterParameter
56        {
57            get { return (ILookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter>)Parameters[SymbolicDataAnalysisTreeInterpreterParameterName]; }
58        }
59        public ILookupParameter<IRegressionProblemData> ProblemDataParameter
60        {
61            get { return (ILookupParameter<IRegressionProblemData>)Parameters[ProblemDataParameterName]; }
62        }
63        public ILookupParameter<BoolValue> ApplyLinearScalingParameter
64        {
65            get { return (ILookupParameter<BoolValue>)Parameters[ApplyLinearScalingParameterName]; }
66        }
67        public IFixedValueParameter<IntValue> MaxCOIterationsParameter
68        {
69            get { return (IFixedValueParameter<IntValue>)Parameters[MaxCOIterationsParameterName]; }
70        }
71        public ILookupParameter<DoubleLimit> EstimationLimitsParameter
72        {
73            get { return (ILookupParameter<DoubleLimit>)Parameters[EstimationLimitsParameterName]; }
74        }
75        public ILookupParameter<ISymbolicRegressionSingleObjectiveEvaluator> EvaluatorParameter
76        {
77            get { return (ILookupParameter<ISymbolicRegressionSingleObjectiveEvaluator>)Parameters[EvaluatorParameterName]; }
78        }
79        public ILookupParameter<DataTable> VariableImpactsParameter
80        {
81            get { return (ILookupParameter<DataTable>)Parameters[VariableImpactsParameterName]; }
82        }
83        public IFixedValueParameter<PercentValue> PercentageBestParameter
84        {
85            get { return (IFixedValueParameter<PercentValue>)Parameters[PercentageBestParameterName]; }
86        }
87        public IFixedValueParameter<IntValue> LastGenerationsParameter
88        {
89            get { return (IFixedValueParameter<IntValue>)Parameters[LastGenerationsParameterName]; }
90        }
91        public IFixedValueParameter<BoolValue> OptimizeConstantsParameter
92        {
93            get { return (IFixedValueParameter<BoolValue>)Parameters[OptimizeConstantsParameterName]; }
94        }
95        public IFixedValueParameter<BoolValue> PruneTreesParameter
96        {
97            get { return (IFixedValueParameter<BoolValue>)Parameters[PruneTreesParameterName]; }
98        }
99        private ILookupParameter<IntValue> MaximumGenerationsParameter
100        {
101            get { return (ILookupParameter<IntValue>)Parameters[MaximumGenerationsParameterName]; }
102        }
103        #endregion
104
105        #region parameter properties
106        public int UpdateCounter
107        {
108            get { return UpdateCounterParameter.Value.Value; }
109            set { UpdateCounterParameter.Value.Value = value; }
110        }
111        public int UpdateInterval
112        {
113            get { return UpdateIntervalParameter.Value.Value; }
114            set { UpdateIntervalParameter.Value.Value = value; }
115        }
116        #endregion
117
118        public SymbolicRegressionVariableImpactsAnalyzer()
119        {
120            Parameters.Add(new ValueParameter<IntValue>(UpdateCounterParameterName, new IntValue(0)));
121            Parameters.Add(new ValueParameter<IntValue>(UpdateIntervalParameterName, new IntValue(1)));
122            Parameters.Add(new LookupParameter<IRegressionProblemData>(ProblemDataParameterName));
123            Parameters.Add(new LookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter>(SymbolicDataAnalysisTreeInterpreterParameterName));
124            Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>(QualityParameterName, "The individual qualities."));
125            Parameters.Add(new LookupParameter<BoolValue>(ApplyLinearScalingParameterName));
126            Parameters.Add(new LookupParameter<DoubleLimit>(EstimationLimitsParameterName));
127            Parameters.Add(new FixedValueParameter<IntValue>(MaxCOIterationsParameterName));
128            Parameters.Add(new LookupParameter<DataTable>(VariableImpactsParameterName, "The relative variable relevance calculated as the average relative variable frequency over the whole run."));
129            Parameters.Add(new FixedValueParameter<PercentValue>(PercentageBestParameterName));
130            Parameters.Add(new FixedValueParameter<IntValue>(LastGenerationsParameterName));
131            Parameters.Add(new FixedValueParameter<BoolValue>(OptimizeConstantsParameterName, new BoolValue(false)));
132            Parameters.Add(new FixedValueParameter<BoolValue>(PruneTreesParameterName, new BoolValue(false)));
133            Parameters.Add(new LookupParameter<IntValue>(MaximumGenerationsParameterName, "The maximum number of generations which should be processed."));
134
135            impactsCalculator = new SymbolicRegressionSolutionImpactValuesCalculator();
136            simplifier = new SymbolicDataAnalysisExpressionTreeSimplifier();
137        }
138
139        [StorableConstructor]
140        protected SymbolicRegressionVariableImpactsAnalyzer(bool deserializing) : base(deserializing) { }
141
142        [StorableHook(HookType.AfterDeserialization)]
143        private void AfterDeserialization()
144        {
145            impactsCalculator = new SymbolicRegressionSolutionImpactValuesCalculator();
146            simplifier = new SymbolicDataAnalysisExpressionTreeSimplifier();
147        }
148
149        protected SymbolicRegressionVariableImpactsAnalyzer(SymbolicRegressionVariableImpactsAnalyzer original, Cloner cloner)
150            : base(original, cloner)
151        {
152            impactsCalculator = new SymbolicRegressionSolutionImpactValuesCalculator();
153            simplifier = new SymbolicDataAnalysisExpressionTreeSimplifier();
154        }
155
156        public override IDeepCloneable Clone(Cloner cloner)
157        {
158            return new SymbolicRegressionVariableImpactsAnalyzer(this, cloner);
159        }
160
161        public override IOperation Apply()
162        {
163            #region Update counter & update interval
164            UpdateCounter++;
165            if (UpdateCounter != UpdateInterval)
166            {
167                return base.Apply();
168            }
169            UpdateCounter = 0;
170            #endregion
171            var results = ResultCollectionParameter.ActualValue;
172            int maxGen = MaximumGenerationsParameter.ActualValue.Value;
173            int gen = ((IntValue)results["Generations"].Value).Value;
174            int lastGen = LastGenerationsParameter.Value.Value;
175
176            if (gen < maxGen - lastGen)
177                return base.Apply();
178
179            var trees = SymbolicExpressionTree.ToArray();
180            var qualities = QualityParameter.ActualValue.ToArray();
181
182            Array.Sort(qualities, trees);
183            Array.Reverse(qualities);
184            Array.Reverse(trees);
185
186            var interpreter = SymbolicDataAnalysisTreeInterpreterParameter.ActualValue;
187            var problemData = ProblemDataParameter.ActualValue;
188            var applyLinearScaling = ApplyLinearScalingParameter.ActualValue.Value;
189            var maxCOIterations = MaxCOIterationsParameter.Value.Value; // fixed value parameter => Value
190            var estimationLimits = EstimationLimitsParameter.ActualValue; // lookup parameter => ActualValue
191            var percentageBest = PercentageBestParameter.Value.Value;
192            var optimizeConstants = OptimizeConstantsParameter.Value.Value;
193            var pruneTrees = PruneTreesParameter.Value.Value;
194
195            var allowedInputVariables = problemData.AllowedInputVariables.ToList();
196            var variableImpacts = allowedInputVariables.ToDictionary(x => x, x => 0.0);
197            DataTable datatable;
198            if (VariableImpactsParameter.ActualValue == null)
199            {
200                datatable = new DataTable("Variable impacts", "Average impact of variables over the population");
201                datatable.VisualProperties.XAxisTitle = "Generation";
202                datatable.VisualProperties.YAxisTitle = "Average variable impact";
203                VariableImpactsParameter.ActualValue = datatable;
204                results.Add(new Result("Average variable impacts", "The relative variable relevance calculated as the average relative variable frequency over the whole run.", new DataTable()));
205
206                foreach (var v in allowedInputVariables)
207                {
208                    datatable.Rows.Add(new DataRow(v) { VisualProperties = { StartIndexZero = true } });
209                }
210                VariableImpactsParameter.ActualValue = datatable;
211            }
212            datatable = VariableImpactsParameter.ActualValue;
213            int nTrees = (int)Math.Round(trees.Length * percentageBest);
214            var bestTrees = trees.Take(nTrees).Select(x => (ISymbolicExpressionTree)x.Clone()).ToList();
215
216            if (optimizeConstants)
217            {
218                foreach (var tree in bestTrees)
219                    SymbolicRegressionConstantOptimizationEvaluator.OptimizeConstants(interpreter, tree, problemData, problemData.TrainingIndices, applyLinearScaling, maxCOIterations, estimationLimits.Upper, estimationLimits.Lower);
220            }
221            var simplifiedTrees = bestTrees.Select(x => simplifier.Simplify(x)).ToList();
222            var variableCounts = problemData.AllowedInputVariables.ToDictionary(x => x, x => simplifiedTrees.Count(t => t.IterateNodesPrefix().Any(n => n is VariableTreeNode && ((VariableTreeNode)n).VariableName == x)));
223            for (int i = 0; i < simplifiedTrees.Count; ++i)
224            {
225                var simplifiedTree = simplifiedTrees[i];
226
227                if (pruneTrees)
228                    simplifiedTree = SymbolicRegressionPruningOperator.Prune(simplifiedTree, impactsCalculator, interpreter, problemData, estimationLimits, problemData.TrainingIndices);
229
230                var quality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(interpreter, simplifiedTree, estimationLimits.Lower, estimationLimits.Upper, problemData, problemData.TrainingIndices, applyLinearScaling);
231
232                var model = new SymbolicRegressionModel(simplifiedTree, interpreter, estimationLimits.Lower, estimationLimits.Upper);
233                var variables = simplifiedTree.IterateNodesPrefix().Where(x => x is VariableTreeNode).GroupBy(x => ((VariableTreeNode)x).VariableName);
234
235                foreach (var g in variables)
236                {
237                    var avgImpact = g.Average(x => impactsCalculator.CalculateImpactValue(model, x, problemData, problemData.TrainingIndices, quality));
238                    if (double.IsNaN(avgImpact))
239                        avgImpact = 0;
240                    variableImpacts[g.Key] += avgImpact;
241                }
242            }
243
244            foreach (var pair in variableImpacts)
245            {
246                var v = variableCounts[pair.Key] > 0 ? pair.Value / variableCounts[pair.Key] : 0;
247                datatable.Rows[pair.Key].Values.Add(v);
248            }
249            results["Average variable impacts"].Value = datatable;
250            return base.Apply();
251        }
252    }
253}
Note: See TracBrowser for help on using the repository browser.