Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
08/03/16 18:54:14 (8 years ago)
Author:
gkronber
Message:

created a feature branch for #2650 (support for categorical variables in symb reg) with a first set of changes

work in progress...

Location:
branches/symbreg-factors-2650
Files:
1 edited
1 copied

Legend:

Unmodified
Added
Removed
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Analyzers/SymbolicDataAnalysisVariableFrequencyAnalyzer.cs

    r14185 r14232  
    2222using System;
    2323using System.Collections.Generic;
     24using System.Globalization;
    2425using System.Linq;
    2526using HeuristicLab.Analysis;
     
    4142    private const string VariableFrequenciesParameterName = "VariableFrequencies";
    4243    private const string AggregateLaggedVariablesParameterName = "AggregateLaggedVariables";
     44    private const string AggregateFactorVariablesParameterName = "AggregateFactorVariables";
    4345    private const string VariableImpactsParameterName = "VariableImpacts";
    4446
     
    5254    public IValueLookupParameter<BoolValue> AggregateLaggedVariablesParameter {
    5355      get { return (IValueLookupParameter<BoolValue>)Parameters[AggregateLaggedVariablesParameterName]; }
     56    }
     57    public IValueLookupParameter<BoolValue> AggregateFactorVariablesParameter {
     58      get { return (IValueLookupParameter<BoolValue>)Parameters[AggregateFactorVariablesParameterName]; }
    5459    }
    5560    #endregion
     
    5964      set { AggregateLaggedVariablesParameter.Value = value; }
    6065    }
     66    public BoolValue AggregateFactorVariables {
     67      get { return AggregateFactorVariablesParameter.ActualValue; }
     68      set { AggregateFactorVariablesParameter.Value = value; }
     69    }
    6170    #endregion
    6271    [StorableConstructor]
     
    7079      Parameters.Add(new LookupParameter<DoubleMatrix>(VariableImpactsParameterName, "The relative variable relevance calculated as the average relative variable frequency over the whole run."));
    7180      Parameters.Add(new ValueLookupParameter<BoolValue>(AggregateLaggedVariablesParameterName, "Switch that determines whether all references to a variable should be aggregated regardless of time-offsets. Turn off to analyze all variable references with different time offsets separately.", new BoolValue(true)));
     81      Parameters.Add(new ValueLookupParameter<BoolValue>(AggregateFactorVariablesParameterName, "Switch that determines whether all references to factor variables should be aggregated regardless of the value. Turn off to analyze all factor variable references with different values separately.", new BoolValue(true)));
     82    }
     83
     84    [StorableHook(HookType.AfterDeserialization)]
     85    private void AfterDeserialization() {
     86      // BackwardsCompatibility3.3
     87      #region Backwards compatible code, remove with 3.4
     88      if (!Parameters.ContainsKey(AggregateFactorVariablesParameterName)) {
     89        Parameters.Add(new ValueLookupParameter<BoolValue>(AggregateFactorVariablesParameterName, "Switch that determines whether all references to factor variables should be aggregated regardless of the value. Turn off to analyze all factor variable references with different values separately.", new BoolValue(true)));
     90      }
     91      #endregion
    7292    }
    7393
     
    93113      int numberOfValues = datatable.Rows.Select(r => r.Values.Count).DefaultIfEmpty().First();
    94114
    95       foreach (var pair in SymbolicDataAnalysisVariableFrequencyAnalyzer.CalculateVariableFrequencies(expressions, AggregateLaggedVariables.Value)) {
     115      foreach (var pair in CalculateVariableFrequencies(expressions, AggregateLaggedVariables.Value, AggregateFactorVariables.Value)) {
    96116        if (!datatable.Rows.ContainsKey(pair.Key)) {
    97117          // initialize a new row for the variable and pad with zeros
     
    128148    }
    129149
    130     public static IEnumerable<KeyValuePair<string, double>> CalculateVariableFrequencies(IEnumerable<ISymbolicExpressionTree> trees, bool aggregateLaggedVariables = true) {
     150    public static IEnumerable<KeyValuePair<string, double>> CalculateVariableFrequencies(IEnumerable<ISymbolicExpressionTree> trees,
     151      bool aggregateLaggedVariables = true, bool aggregateFactorVariables = true) {
    131152
    132153      var variableFrequencies = trees
    133         .SelectMany(t => GetVariableReferences(t, aggregateLaggedVariables))
     154        .SelectMany(t => GetVariableReferences(t, aggregateLaggedVariables, aggregateFactorVariables))
    134155        .GroupBy(pair => pair.Key, pair => pair.Value)
    135156        .ToDictionary(g => g.Key, g => (double)g.Sum());
     
    141162    }
    142163
    143     private static IEnumerable<KeyValuePair<string, int>> GetVariableReferences(ISymbolicExpressionTree tree, bool aggregateLaggedVariables = true) {
     164    private static IEnumerable<KeyValuePair<string, int>> GetVariableReferences(ISymbolicExpressionTree tree,
     165      bool aggregateLaggedVariables = true, bool aggregateFactorVariables = true) {
    144166      Dictionary<string, int> references = new Dictionary<string, int>();
    145167      if (aggregateLaggedVariables) {
     
    151173            var varCondNode = node as VariableConditionTreeNode;
    152174            IncReferenceCount(references, varCondNode.VariableName);
     175          } else if (node.Symbol is FactorVariable) {
     176            var factorNode = node as FactorVariableTreeNode;
     177            if (aggregateFactorVariables) {
     178              IncReferenceCount(references, factorNode.VariableName);
     179            } else {
     180              IncReferenceCount(references, factorNode.ToString());
     181            }
    153182          }
    154183        });
    155184      } else {
    156         GetVariableReferences(references, tree.Root, 0);
     185        GetVariableReferences(references, tree.Root, 0, aggregateFactorVariables);
    157186      }
    158187      return references;
    159188    }
    160189
    161     private static void GetVariableReferences(Dictionary<string, int> references, ISymbolicExpressionTreeNode node, int currentLag) {
     190    private static void GetVariableReferences(Dictionary<string, int> references, ISymbolicExpressionTreeNode node, int currentLag, bool aggregateFactorVariables) {
    162191      if (node.Symbol is LaggedVariable) {
    163192        var laggedVarNode = node as LaggedVariableTreeNode;
     
    166195        var varNode = node as VariableTreeNode;
    167196        IncReferenceCount(references, varNode.VariableName, currentLag);
     197      } else if (node.Symbol is FactorVariable) {
     198        var factorNode = node as FactorVariableTreeNode;
     199        if (aggregateFactorVariables) {
     200          IncReferenceCount(references, factorNode.VariableName, currentLag);
     201        } else {
     202          IncReferenceCount(references, factorNode.ToString(), currentLag);
     203        }
    168204      } else if (node.Symbol is VariableCondition) {
    169205        var varCondNode = node as VariableConditionTreeNode;
    170206        IncReferenceCount(references, varCondNode.VariableName, currentLag);
    171         GetVariableReferences(references, node.GetSubtree(0), currentLag);
    172         GetVariableReferences(references, node.GetSubtree(1), currentLag);
     207        GetVariableReferences(references, node.GetSubtree(0), currentLag, aggregateFactorVariables);
     208        GetVariableReferences(references, node.GetSubtree(1), currentLag, aggregateFactorVariables);
    173209      } else if (node.Symbol is Integral) {
    174210        var laggedNode = node as LaggedTreeNode;
    175211        for (int l = laggedNode.Lag; l <= 0; l++) {
    176           GetVariableReferences(references, node.GetSubtree(0), currentLag + l);
     212          GetVariableReferences(references, node.GetSubtree(0), currentLag + l, aggregateFactorVariables);
    177213        }
    178214      } else if (node.Symbol is Derivative) {
    179215        for (int l = -4; l <= 0; l++) {
    180           GetVariableReferences(references, node.GetSubtree(0), currentLag + l);
     216          GetVariableReferences(references, node.GetSubtree(0), currentLag + l, aggregateFactorVariables);
    181217        }
    182218      } else if (node.Symbol is TimeLag) {
    183219        var laggedNode = node as LaggedTreeNode;
    184         GetVariableReferences(references, node.GetSubtree(0), currentLag + laggedNode.Lag);
     220        GetVariableReferences(references, node.GetSubtree(0), currentLag + laggedNode.Lag, aggregateFactorVariables);
    185221      } else {
    186222        foreach (var subtree in node.Subtrees) {
    187           GetVariableReferences(references, subtree, currentLag);
     223          GetVariableReferences(references, subtree, currentLag, aggregateFactorVariables);
    188224        }
    189225      }
Note: See TracChangeset for help on using the changeset viewer.