Ignore:
Timestamp:
07/01/15 21:06:50 (7 years ago)
Author:
arapeanu
Message:

#2288: Fixed bug in SymbolicDataAnalysisVariableImpactsAnalyzer (simplification before optimization) and in VariableInteractionNetworkView for computing the adjacency matrix and sorting the variable impacts matrix

Location:
branches/HeuristicLab.VariableInteractionNetworks/HeuristicLab.VariableInteractionNetworks/3.3
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.VariableInteractionNetworks/HeuristicLab.VariableInteractionNetworks/3.3/Plugin.cs.frame

    r12462 r12568  
    4747  [PluginDependency("HeuristicLab.Problems.DataAnalysis.Symbolic", "3.4")]
    4848  [PluginDependency("HeuristicLab.Problems.DataAnalysis", "3.4")]
    49   [PluginDependency("HeuristicLab.Problems.Instances", "3.4")]
     49  [PluginDependency("HeuristicLab.Problems.Instances", "3.3")]
    5050  [PluginDependency("HeuristicLab.Encodings.SymbolicExpressionTreeEncoding", "3.4")] 
    5151  public class Plugin : PluginBase {
  • branches/HeuristicLab.VariableInteractionNetworks/HeuristicLab.VariableInteractionNetworks/3.3/SymbolicDataAnalysisVariableImpactsAnalyzer.cs

    r12460 r12568  
    1111using HeuristicLab.Problems.DataAnalysis.Symbolic;
    1212using HeuristicLab.Problems.DataAnalysis.Symbolic.Regression;
     13using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
    1314
    1415namespace HeuristicLab.VariableInteractionNetworks
     
    3334        private const string LastGenerationsParameterName = "LastGenerations";
    3435        private const string MaximumGenerationsParameterName = "MaximumGenerations";
    35         private const string OptimizedConstantsParameterName = "OptimizedConstants";
    36        
    37         private readonly SymbolicDataAnalysisExpressionTreeSimplifier simplifier;
    38         private readonly SymbolicRegressionSolutionImpactValuesCalculator impactsCalculator;
     36        private const string OptimizeConstantsParameterName = "OptimizeConstants";
     37        private const string PruneTreesParameterName = "PruneTrees";
     38
     39        private SymbolicDataAnalysisExpressionTreeSimplifier simplifier;
     40        private SymbolicRegressionSolutionImpactValuesCalculator impactsCalculator;
    3941
    4042        #region parameters
     
    8789            get { return (IFixedValueParameter<IntValue>)Parameters[LastGenerationsParameterName]; }
    8890        }
    89         public IFixedValueParameter<BoolValue> OptimizedParameter
    90         {
    91             get { return (IFixedValueParameter<BoolValue>)Parameters[OptimizedConstantsParameterName]; }
     91        public IFixedValueParameter<BoolValue> OptimizeConstantsParameter
     92        {
     93            get { return (IFixedValueParameter<BoolValue>)Parameters[OptimizeConstantsParameterName]; }
     94        }
     95        public IFixedValueParameter<BoolValue> PruneTreesParameter
     96        {
     97            get { return (IFixedValueParameter<BoolValue>)Parameters[PruneTreesParameterName]; }
    9298        }
    9399        private ILookupParameter<IntValue> MaximumGenerationsParameter
     
    119125            Parameters.Add(new LookupParameter<BoolValue>(ApplyLinearScalingParameterName));
    120126            Parameters.Add(new LookupParameter<DoubleLimit>(EstimationLimitsParameterName));
    121             Parameters.Add(new FixedValueParameter<IntValue>(MaxCOIterationsParameterName));         
     127            Parameters.Add(new FixedValueParameter<IntValue>(MaxCOIterationsParameterName));
    122128            Parameters.Add(new LookupParameter<DataTable>(VariableImpactsParameterName, "The relative variable relevance calculated as the average relative variable frequency over the whole run."));
    123129            Parameters.Add(new FixedValueParameter<PercentValue>(PercentageBestParameterName));
    124130            Parameters.Add(new FixedValueParameter<IntValue>(LastGenerationsParameterName));
    125             Parameters.Add(new FixedValueParameter<BoolValue>(OptimizedConstantsParameterName));
     131            Parameters.Add(new FixedValueParameter<BoolValue>(OptimizeConstantsParameterName, new BoolValue(false)));
     132            Parameters.Add(new FixedValueParameter<BoolValue>(PruneTreesParameterName, new BoolValue(false)));
    126133            Parameters.Add(new LookupParameter<IntValue>(MaximumGenerationsParameterName, "The maximum number of generations which should be processed."));
    127134
     135            impactsCalculator = new SymbolicRegressionSolutionImpactValuesCalculator();
    128136            simplifier = new SymbolicDataAnalysisExpressionTreeSimplifier();
    129             impactsCalculator = new SymbolicRegressionSolutionImpactValuesCalculator();
    130137        }
    131138
     
    133140        protected SymbolicRegressionVariableImpactsAnalyzer(bool deserializing) : base(deserializing) { }
    134141
     142        [StorableHook(HookType.AfterDeserialization)]
     143        private void AfterDeserialization()
     144        {
     145            impactsCalculator = new SymbolicRegressionSolutionImpactValuesCalculator();
     146            simplifier = new SymbolicDataAnalysisExpressionTreeSimplifier();
     147        }
     148
    135149        protected SymbolicRegressionVariableImpactsAnalyzer(SymbolicRegressionVariableImpactsAnalyzer original, Cloner cloner)
    136150            : base(original, cloner)
    137151        {
     152            impactsCalculator = new SymbolicRegressionSolutionImpactValuesCalculator();
     153            simplifier = new SymbolicDataAnalysisExpressionTreeSimplifier();
    138154        }
    139155
     
    158174            int lastGen = LastGenerationsParameter.Value.Value;
    159175
    160             if (lastGen > maxGen)
    161                 lastGen = maxGen;
    162             if (maxGen - gen < lastGen)
     176            if (gen < maxGen - lastGen)
    163177                return base.Apply();
    164178
     
    176190            var estimationLimits = EstimationLimitsParameter.ActualValue; // lookup parameter => ActualValue
    177191            var percentageBest = PercentageBestParameter.Value.Value;
    178             var optimizedConstants = OptimizedParameter.Value.Value;
     192            var optimizeConstants = OptimizeConstantsParameter.Value.Value;
     193            var pruneTrees = PruneTreesParameter.Value.Value;
    179194
    180195            var allowedInputVariables = problemData.AllowedInputVariables.ToList();
     
    188203                VariableImpactsParameter.ActualValue = datatable;
    189204                results.Add(new Result("Average variable impacts", "The relative variable relevance calculated as the average relative variable frequency over the whole run.", new DataTable()));
    190                
     205
    191206                foreach (var v in allowedInputVariables)
    192207                {
     
    197212            datatable = VariableImpactsParameter.ActualValue;
    198213            int nTrees = (int)Math.Round(trees.Length * percentageBest);
    199 
    200             // simplify trees
    201             var simplifiedTrees = trees.Take(nTrees).Select(x => simplifier.Simplify(x));
     214            var bestTrees = trees.Take(nTrees).Select(x => (ISymbolicExpressionTree)x.Clone()).ToList();
     215
     216            if (optimizeConstants)
     217            {
     218                foreach (var tree in bestTrees)
     219                    SymbolicRegressionConstantOptimizationEvaluator.OptimizeConstants(interpreter, tree, problemData, problemData.TrainingIndices, applyLinearScaling, maxCOIterations, estimationLimits.Upper, estimationLimits.Lower);
     220            }
     221            var simplifiedTrees = bestTrees.Select(x => simplifier.Simplify(x)).ToList();
    202222            var variableCounts = problemData.AllowedInputVariables.ToDictionary(x => x, x => simplifiedTrees.Count(t => t.IterateNodesPrefix().Any(n => n is VariableTreeNode && ((VariableTreeNode)n).VariableName == x)));
    203             foreach (var simplifiedTree in simplifiedTrees)
    204             {
    205                 if (optimizedConstants == true)
    206                 {
    207                     SymbolicRegressionConstantOptimizationEvaluator.OptimizeConstants(interpreter, simplifiedTree, problemData, problemData.TrainingIndices, applyLinearScaling, maxCOIterations, estimationLimits.Upper, estimationLimits.Lower);
    208                 }
     223            for (int i = 0; i < simplifiedTrees.Count; ++i)
     224            {
     225                var simplifiedTree = simplifiedTrees[i];
     226
     227                if (pruneTrees)
     228                    simplifiedTree = SymbolicRegressionPruningOperator.Prune(simplifiedTree, impactsCalculator, interpreter, problemData, estimationLimits, problemData.TrainingIndices);
    209229
    210230                var quality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(interpreter, simplifiedTree, estimationLimits.Lower, estimationLimits.Upper, problemData, problemData.TrainingIndices, applyLinearScaling);
     
    216236                {
    217237                    var avgImpact = g.Average(x => impactsCalculator.CalculateImpactValue(model, x, problemData, problemData.TrainingIndices, quality));
     238                    if (double.IsNaN(avgImpact))
     239                        avgImpact = 0;
    218240                    variableImpacts[g.Key] += avgImpact;
    219241                }
     
    222244            foreach (var pair in variableImpacts)
    223245            {
    224                 datatable.Rows[pair.Key].Values.Add(pair.Value / variableCounts[pair.Key]);
     246                var v = variableCounts[pair.Key] > 0 ? pair.Value / variableCounts[pair.Key] : 0;
     247                datatable.Rows[pair.Key].Values.Add(v);
    225248            }
    226249            results["Average variable impacts"].Value = datatable;
Note: See TracChangeset for help on using the changeset viewer.