Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.VariableInteractionNetworks/HeuristicLab.VariableInteractionNetworks/3.3/SymbolicDataAnalysisVariableImpactsAnalyzer.cs @ 12460

Last change on this file since 12460 was 12460, checked in by arapeanu, 9 years ago

#2288: SymbolicDataAnalysisVariableImpactsAnalyzer for computing the average relative variable frequencies over a GP with SR run

File size: 12.3 KB
Line 
1using System;
2using System.Linq;
3using HeuristicLab.Analysis;
4using HeuristicLab.Common;
5using HeuristicLab.Core;
6using HeuristicLab.Data;
7using HeuristicLab.Optimization;
8using HeuristicLab.Parameters;
9using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
10using HeuristicLab.Problems.DataAnalysis;
11using HeuristicLab.Problems.DataAnalysis.Symbolic;
12using HeuristicLab.Problems.DataAnalysis.Symbolic.Regression;
13
14namespace HeuristicLab.VariableInteractionNetworks
15{
16    [Item("SymbolicRegressionVariableImpactsAnalyzer", "An analyzer which calculates variable impacts based on the average node impacts from the tree")]
17    [StorableClass]
18    public class SymbolicRegressionVariableImpactsAnalyzer : SymbolicDataAnalysisAnalyzer
19    {
20        private const string UpdateCounterParameterName = "UpdateCounter";
21        private const string UpdateIntervalParameterName = "UpdateInterval";
22        public const string QualityParameterName = "Quality";
23        private const string SymbolicDataAnalysisTreeInterpreterParameterName = "SymbolicExpressionTreeInterpreter";
24        private const string ProblemDataParameterName = "ProblemData";
25        private const string ApplyLinearScalingParameterName = "ApplyLinearScaling";
26        private const string MaxCOIterationsParameterName = "MaxCOIterations";
27        private const string EstimationLimitsParameterName = "EstimationLimits";
28        private const string EvaluatorParameterName = "Evaluator";
29
30        private const string VariableFrequenciesParameterName = "VariableFrequencies";
31        private const string VariableImpactsParameterName = "AverageVariableImpacts";
32        private const string PercentageBestParameterName = "PercentageBest";
33        private const string LastGenerationsParameterName = "LastGenerations";
34        private const string MaximumGenerationsParameterName = "MaximumGenerations";
35        private const string OptimizedConstantsParameterName = "OptimizedConstants";
36       
37        private readonly SymbolicDataAnalysisExpressionTreeSimplifier simplifier;
38        private readonly SymbolicRegressionSolutionImpactValuesCalculator impactsCalculator;
39
40        #region parameters
41        public ValueParameter<IntValue> UpdateCounterParameter
42        {
43            get { return (ValueParameter<IntValue>)Parameters[UpdateCounterParameterName]; }
44        }
45        public ValueParameter<IntValue> UpdateIntervalParameter
46        {
47            get { return (ValueParameter<IntValue>)Parameters[UpdateIntervalParameterName]; }
48        }
49        public IScopeTreeLookupParameter<DoubleValue> QualityParameter
50        {
51            get { return (IScopeTreeLookupParameter<DoubleValue>)Parameters[QualityParameterName]; }
52        }
53        public ILookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter> SymbolicDataAnalysisTreeInterpreterParameter
54        {
55            get { return (ILookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter>)Parameters[SymbolicDataAnalysisTreeInterpreterParameterName]; }
56        }
57        public ILookupParameter<IRegressionProblemData> ProblemDataParameter
58        {
59            get { return (ILookupParameter<IRegressionProblemData>)Parameters[ProblemDataParameterName]; }
60        }
61        public ILookupParameter<BoolValue> ApplyLinearScalingParameter
62        {
63            get { return (ILookupParameter<BoolValue>)Parameters[ApplyLinearScalingParameterName]; }
64        }
65        public IFixedValueParameter<IntValue> MaxCOIterationsParameter
66        {
67            get { return (IFixedValueParameter<IntValue>)Parameters[MaxCOIterationsParameterName]; }
68        }
69        public ILookupParameter<DoubleLimit> EstimationLimitsParameter
70        {
71            get { return (ILookupParameter<DoubleLimit>)Parameters[EstimationLimitsParameterName]; }
72        }
73        public ILookupParameter<ISymbolicRegressionSingleObjectiveEvaluator> EvaluatorParameter
74        {
75            get { return (ILookupParameter<ISymbolicRegressionSingleObjectiveEvaluator>)Parameters[EvaluatorParameterName]; }
76        }
77        public ILookupParameter<DataTable> VariableImpactsParameter
78        {
79            get { return (ILookupParameter<DataTable>)Parameters[VariableImpactsParameterName]; }
80        }
81        public IFixedValueParameter<PercentValue> PercentageBestParameter
82        {
83            get { return (IFixedValueParameter<PercentValue>)Parameters[PercentageBestParameterName]; }
84        }
85        public IFixedValueParameter<IntValue> LastGenerationsParameter
86        {
87            get { return (IFixedValueParameter<IntValue>)Parameters[LastGenerationsParameterName]; }
88        }
89        public IFixedValueParameter<BoolValue> OptimizedParameter
90        {
91            get { return (IFixedValueParameter<BoolValue>)Parameters[OptimizedConstantsParameterName]; }
92        }
93        private ILookupParameter<IntValue> MaximumGenerationsParameter
94        {
95            get { return (ILookupParameter<IntValue>)Parameters[MaximumGenerationsParameterName]; }
96        }
97        #endregion
98
99        #region parameter properties
100        public int UpdateCounter
101        {
102            get { return UpdateCounterParameter.Value.Value; }
103            set { UpdateCounterParameter.Value.Value = value; }
104        }
105        public int UpdateInterval
106        {
107            get { return UpdateIntervalParameter.Value.Value; }
108            set { UpdateIntervalParameter.Value.Value = value; }
109        }
110        #endregion
111
112        public SymbolicRegressionVariableImpactsAnalyzer()
113        {
114            Parameters.Add(new ValueParameter<IntValue>(UpdateCounterParameterName, new IntValue(0)));
115            Parameters.Add(new ValueParameter<IntValue>(UpdateIntervalParameterName, new IntValue(1)));
116            Parameters.Add(new LookupParameter<IRegressionProblemData>(ProblemDataParameterName));
117            Parameters.Add(new LookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter>(SymbolicDataAnalysisTreeInterpreterParameterName));
118            Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>(QualityParameterName, "The individual qualities."));
119            Parameters.Add(new LookupParameter<BoolValue>(ApplyLinearScalingParameterName));
120            Parameters.Add(new LookupParameter<DoubleLimit>(EstimationLimitsParameterName));
121            Parameters.Add(new FixedValueParameter<IntValue>(MaxCOIterationsParameterName));         
122            Parameters.Add(new LookupParameter<DataTable>(VariableImpactsParameterName, "The relative variable relevance calculated as the average relative variable frequency over the whole run."));
123            Parameters.Add(new FixedValueParameter<PercentValue>(PercentageBestParameterName));
124            Parameters.Add(new FixedValueParameter<IntValue>(LastGenerationsParameterName));
125            Parameters.Add(new FixedValueParameter<BoolValue>(OptimizedConstantsParameterName));
126            Parameters.Add(new LookupParameter<IntValue>(MaximumGenerationsParameterName, "The maximum number of generations which should be processed."));
127
128            simplifier = new SymbolicDataAnalysisExpressionTreeSimplifier();
129            impactsCalculator = new SymbolicRegressionSolutionImpactValuesCalculator();
130        }
131
132        [StorableConstructor]
133        protected SymbolicRegressionVariableImpactsAnalyzer(bool deserializing) : base(deserializing) { }
134
135        protected SymbolicRegressionVariableImpactsAnalyzer(SymbolicRegressionVariableImpactsAnalyzer original, Cloner cloner)
136            : base(original, cloner)
137        {
138        }
139
140        public override IDeepCloneable Clone(Cloner cloner)
141        {
142            return new SymbolicRegressionVariableImpactsAnalyzer(this, cloner);
143        }
144
145        public override IOperation Apply()
146        {
147            #region Update counter & update interval
148            UpdateCounter++;
149            if (UpdateCounter != UpdateInterval)
150            {
151                return base.Apply();
152            }
153            UpdateCounter = 0;
154            #endregion
155            var results = ResultCollectionParameter.ActualValue;
156            int maxGen = MaximumGenerationsParameter.ActualValue.Value;
157            int gen = ((IntValue)results["Generations"].Value).Value;
158            int lastGen = LastGenerationsParameter.Value.Value;
159
160            if (lastGen > maxGen)
161                lastGen = maxGen;
162            if (maxGen - gen < lastGen)
163                return base.Apply();
164
165            var trees = SymbolicExpressionTree.ToArray();
166            var qualities = QualityParameter.ActualValue.ToArray();
167
168            Array.Sort(qualities, trees);
169            Array.Reverse(qualities);
170            Array.Reverse(trees);
171
172            var interpreter = SymbolicDataAnalysisTreeInterpreterParameter.ActualValue;
173            var problemData = ProblemDataParameter.ActualValue;
174            var applyLinearScaling = ApplyLinearScalingParameter.ActualValue.Value;
175            var maxCOIterations = MaxCOIterationsParameter.Value.Value; // fixed value parameter => Value
176            var estimationLimits = EstimationLimitsParameter.ActualValue; // lookup parameter => ActualValue
177            var percentageBest = PercentageBestParameter.Value.Value;
178            var optimizedConstants = OptimizedParameter.Value.Value;
179
180            var allowedInputVariables = problemData.AllowedInputVariables.ToList();
181            var variableImpacts = allowedInputVariables.ToDictionary(x => x, x => 0.0);
182            DataTable datatable;
183            if (VariableImpactsParameter.ActualValue == null)
184            {
185                datatable = new DataTable("Variable impacts", "Average impact of variables over the population");
186                datatable.VisualProperties.XAxisTitle = "Generation";
187                datatable.VisualProperties.YAxisTitle = "Average variable impact";
188                VariableImpactsParameter.ActualValue = datatable;
189                results.Add(new Result("Average variable impacts", "The relative variable relevance calculated as the average relative variable frequency over the whole run.", new DataTable()));
190               
191                foreach (var v in allowedInputVariables)
192                {
193                    datatable.Rows.Add(new DataRow(v) { VisualProperties = { StartIndexZero = true } });
194                }
195                VariableImpactsParameter.ActualValue = datatable;
196            }
197            datatable = VariableImpactsParameter.ActualValue;
198            int nTrees = (int)Math.Round(trees.Length * percentageBest);
199
200            // simplify trees
201            var simplifiedTrees = trees.Take(nTrees).Select(x => simplifier.Simplify(x));
202            var variableCounts = problemData.AllowedInputVariables.ToDictionary(x => x, x => simplifiedTrees.Count(t => t.IterateNodesPrefix().Any(n => n is VariableTreeNode && ((VariableTreeNode)n).VariableName == x)));
203            foreach (var simplifiedTree in simplifiedTrees)
204            {
205                if (optimizedConstants == true)
206                {
207                    SymbolicRegressionConstantOptimizationEvaluator.OptimizeConstants(interpreter, simplifiedTree, problemData, problemData.TrainingIndices, applyLinearScaling, maxCOIterations, estimationLimits.Upper, estimationLimits.Lower);
208                }
209
210                var quality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(interpreter, simplifiedTree, estimationLimits.Lower, estimationLimits.Upper, problemData, problemData.TrainingIndices, applyLinearScaling);
211
212                var model = new SymbolicRegressionModel(simplifiedTree, interpreter, estimationLimits.Lower, estimationLimits.Upper);
213                var variables = simplifiedTree.IterateNodesPrefix().Where(x => x is VariableTreeNode).GroupBy(x => ((VariableTreeNode)x).VariableName);
214
215                foreach (var g in variables)
216                {
217                    var avgImpact = g.Average(x => impactsCalculator.CalculateImpactValue(model, x, problemData, problemData.TrainingIndices, quality));
218                    variableImpacts[g.Key] += avgImpact;
219                }
220            }
221
222            foreach (var pair in variableImpacts)
223            {
224                datatable.Rows[pair.Key].Values.Add(pair.Value / variableCounts[pair.Key]);
225            }
226            results["Average variable impacts"].Value = datatable;
227            return base.Apply();
228        }
229    }
230}
Note: See TracBrowser for help on using the repository browser.