Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
08/03/16 18:54:14 (8 years ago)
Author:
gkronber
Message:

created a feature branch for #2650 (support for categorical variables in symb reg) with a first set of changes

work in progress...

Location:
branches/symbreg-factors-2650
Files:
13 edited
1 copied

Legend:

Unmodified
Added
Removed
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Analyzers/SymbolicDataAnalysisVariableFrequencyAnalyzer.cs

    r14185 r14232  
    2222using System;
    2323using System.Collections.Generic;
     24using System.Globalization;
    2425using System.Linq;
    2526using HeuristicLab.Analysis;
     
    4142    private const string VariableFrequenciesParameterName = "VariableFrequencies";
    4243    private const string AggregateLaggedVariablesParameterName = "AggregateLaggedVariables";
     44    private const string AggregateFactorVariablesParameterName = "AggregateFactorVariables";
    4345    private const string VariableImpactsParameterName = "VariableImpacts";
    4446
     
    5254    public IValueLookupParameter<BoolValue> AggregateLaggedVariablesParameter {
    5355      get { return (IValueLookupParameter<BoolValue>)Parameters[AggregateLaggedVariablesParameterName]; }
     56    }
     57    public IValueLookupParameter<BoolValue> AggregateFactorVariablesParameter {
     58      get { return (IValueLookupParameter<BoolValue>)Parameters[AggregateFactorVariablesParameterName]; }
    5459    }
    5560    #endregion
     
    5964      set { AggregateLaggedVariablesParameter.Value = value; }
    6065    }
     66    public BoolValue AggregateFactorVariables {
     67      get { return AggregateFactorVariablesParameter.ActualValue; }
     68      set { AggregateFactorVariablesParameter.Value = value; }
     69    }
    6170    #endregion
    6271    [StorableConstructor]
     
    7079      Parameters.Add(new LookupParameter<DoubleMatrix>(VariableImpactsParameterName, "The relative variable relevance calculated as the average relative variable frequency over the whole run."));
    7180      Parameters.Add(new ValueLookupParameter<BoolValue>(AggregateLaggedVariablesParameterName, "Switch that determines whether all references to a variable should be aggregated regardless of time-offsets. Turn off to analyze all variable references with different time offsets separately.", new BoolValue(true)));
     81      Parameters.Add(new ValueLookupParameter<BoolValue>(AggregateFactorVariablesParameterName, "Switch that determines whether all references to factor variables should be aggregated regardless of the value. Turn off to analyze all factor variable references with different values separately.", new BoolValue(true)));
     82    }
     83
     84    [StorableHook(HookType.AfterDeserialization)]
     85    private void AfterDeserialization() {
     86      // BackwardsCompatibility3.3
     87      #region Backwards compatible code, remove with 3.4
     88      if (!Parameters.ContainsKey(AggregateFactorVariablesParameterName)) {
     89        Parameters.Add(new ValueLookupParameter<BoolValue>(AggregateFactorVariablesParameterName, "Switch that determines whether all references to factor variables should be aggregated regardless of the value. Turn off to analyze all factor variable references with different values separately.", new BoolValue(true)));
     90      }
     91      #endregion
    7292    }
    7393
     
    93113      int numberOfValues = datatable.Rows.Select(r => r.Values.Count).DefaultIfEmpty().First();
    94114
    95       foreach (var pair in SymbolicDataAnalysisVariableFrequencyAnalyzer.CalculateVariableFrequencies(expressions, AggregateLaggedVariables.Value)) {
     115      foreach (var pair in CalculateVariableFrequencies(expressions, AggregateLaggedVariables.Value, AggregateFactorVariables.Value)) {
    96116        if (!datatable.Rows.ContainsKey(pair.Key)) {
    97117          // initialize a new row for the variable and pad with zeros
     
    128148    }
    129149
    130     public static IEnumerable<KeyValuePair<string, double>> CalculateVariableFrequencies(IEnumerable<ISymbolicExpressionTree> trees, bool aggregateLaggedVariables = true) {
     150    public static IEnumerable<KeyValuePair<string, double>> CalculateVariableFrequencies(IEnumerable<ISymbolicExpressionTree> trees,
     151      bool aggregateLaggedVariables = true, bool aggregateFactorVariables = true) {
    131152
    132153      var variableFrequencies = trees
    133         .SelectMany(t => GetVariableReferences(t, aggregateLaggedVariables))
     154        .SelectMany(t => GetVariableReferences(t, aggregateLaggedVariables, aggregateFactorVariables))
    134155        .GroupBy(pair => pair.Key, pair => pair.Value)
    135156        .ToDictionary(g => g.Key, g => (double)g.Sum());
     
    141162    }
    142163
    143     private static IEnumerable<KeyValuePair<string, int>> GetVariableReferences(ISymbolicExpressionTree tree, bool aggregateLaggedVariables = true) {
     164    private static IEnumerable<KeyValuePair<string, int>> GetVariableReferences(ISymbolicExpressionTree tree,
     165      bool aggregateLaggedVariables = true, bool aggregateFactorVariables = true) {
    144166      Dictionary<string, int> references = new Dictionary<string, int>();
    145167      if (aggregateLaggedVariables) {
     
    151173            var varCondNode = node as VariableConditionTreeNode;
    152174            IncReferenceCount(references, varCondNode.VariableName);
     175          } else if (node.Symbol is FactorVariable) {
     176            var factorNode = node as FactorVariableTreeNode;
     177            if (aggregateFactorVariables) {
     178              IncReferenceCount(references, factorNode.VariableName);
     179            } else {
     180              IncReferenceCount(references, factorNode.ToString());
     181            }
    153182          }
    154183        });
    155184      } else {
    156         GetVariableReferences(references, tree.Root, 0);
     185        GetVariableReferences(references, tree.Root, 0, aggregateFactorVariables);
    157186      }
    158187      return references;
    159188    }
    160189
    161     private static void GetVariableReferences(Dictionary<string, int> references, ISymbolicExpressionTreeNode node, int currentLag) {
     190    private static void GetVariableReferences(Dictionary<string, int> references, ISymbolicExpressionTreeNode node, int currentLag, bool aggregateFactorVariables) {
    162191      if (node.Symbol is LaggedVariable) {
    163192        var laggedVarNode = node as LaggedVariableTreeNode;
     
    166195        var varNode = node as VariableTreeNode;
    167196        IncReferenceCount(references, varNode.VariableName, currentLag);
     197      } else if (node.Symbol is FactorVariable) {
     198        var factorNode = node as FactorVariableTreeNode;
     199        if (aggregateFactorVariables) {
     200          IncReferenceCount(references, factorNode.VariableName, currentLag);
     201        } else {
     202          IncReferenceCount(references, factorNode.ToString(), currentLag);
     203        }
    168204      } else if (node.Symbol is VariableCondition) {
    169205        var varCondNode = node as VariableConditionTreeNode;
    170206        IncReferenceCount(references, varCondNode.VariableName, currentLag);
    171         GetVariableReferences(references, node.GetSubtree(0), currentLag);
    172         GetVariableReferences(references, node.GetSubtree(1), currentLag);
     207        GetVariableReferences(references, node.GetSubtree(0), currentLag, aggregateFactorVariables);
     208        GetVariableReferences(references, node.GetSubtree(1), currentLag, aggregateFactorVariables);
    173209      } else if (node.Symbol is Integral) {
    174210        var laggedNode = node as LaggedTreeNode;
    175211        for (int l = laggedNode.Lag; l <= 0; l++) {
    176           GetVariableReferences(references, node.GetSubtree(0), currentLag + l);
     212          GetVariableReferences(references, node.GetSubtree(0), currentLag + l, aggregateFactorVariables);
    177213        }
    178214      } else if (node.Symbol is Derivative) {
    179215        for (int l = -4; l <= 0; l++) {
    180           GetVariableReferences(references, node.GetSubtree(0), currentLag + l);
     216          GetVariableReferences(references, node.GetSubtree(0), currentLag + l, aggregateFactorVariables);
    181217        }
    182218      } else if (node.Symbol is TimeLag) {
    183219        var laggedNode = node as LaggedTreeNode;
    184         GetVariableReferences(references, node.GetSubtree(0), currentLag + laggedNode.Lag);
     220        GetVariableReferences(references, node.GetSubtree(0), currentLag + laggedNode.Lag, aggregateFactorVariables);
    185221      } else {
    186222        foreach (var subtree in node.Subtrees) {
    187           GetVariableReferences(references, subtree, currentLag);
     223          GetVariableReferences(references, subtree, currentLag, aggregateFactorVariables);
    188224        }
    189225      }
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Grammars/ArithmeticExpressionGrammar.cs

    r14185 r14232  
    5353      constant.MaxValue = 20;
    5454      var variableSymbol = new HeuristicLab.Problems.DataAnalysis.Symbolic.Variable();
     55      var factorVariableSymbol = new FactorVariable();
    5556
    56       var allSymbols = new List<Symbol>() { add, sub, mul, div, constant, variableSymbol };
     57      var allSymbols = new List<Symbol>() { add, sub, mul, div, constant, variableSymbol, factorVariableSymbol };
    5758      var functionSymbols = new List<Symbol>() { add, sub, mul, div };
    5859
     
    6566      SetSubtreeCount(constant, 0, 0);
    6667      SetSubtreeCount(variableSymbol, 0, 0);
     68      SetSubtreeCount(factorVariableSymbol, 0, 0);
    6769
    6870      // allow each symbol as child of the start symbol
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Grammars/FullFunctionalExpressionGrammar.cs

    r14185 r14232  
    115115      constant.MaxValue = 20;
    116116      var variableSymbol = new HeuristicLab.Problems.DataAnalysis.Symbolic.Variable();
     117      var factorVariable = new FactorVariable();
    117118      var laggedVariable = new LaggedVariable();
    118119      laggedVariable.InitialFrequency = 0.0;
     
    123124      var allSymbols = new List<Symbol>() { add, sub, mul, div, mean, sin, cos, tan, log, square, pow, sqrt, root, exp,
    124125        airyA, airyB, bessel, cosineIntegral, dawson, erf, expIntegralEi, fresnelCosineIntegral, fresnelSineIntegral, gamma, hypCosineIntegral, hypSineIntegral, norm, psi, sineIntegral,
    125         @if, gt, lt, and, or, not,xor, timeLag, integral, derivative, constant, variableSymbol, laggedVariable,autoregressiveVariable, variableCondition };
     126        @if, gt, lt, and, or, not,xor, timeLag, integral, derivative, constant, variableSymbol, factorVariable, laggedVariable,autoregressiveVariable, variableCondition };
    126127      var unaryFunctionSymbols = new List<Symbol>() { square, sqrt, sin, cos, tan, log, exp, not, timeLag, integral, derivative,
    127128        airyA, airyB, bessel, cosineIntegral, dawson, erf, expIntegralEi, fresnelCosineIntegral, fresnelSineIntegral, gamma, hypCosineIntegral, hypSineIntegral, norm, psi, sineIntegral
     
    130131      var binaryFunctionSymbols = new List<Symbol>() { pow, root, gt, lt, variableCondition };
    131132      var ternarySymbols = new List<Symbol>() { add, sub, mul, div, mean, and, or, xor };
    132       var terminalSymbols = new List<Symbol>() { variableSymbol, constant, laggedVariable, autoregressiveVariable };
     133      var terminalSymbols = new List<Symbol>() { variableSymbol, factorVariable, constant, laggedVariable, autoregressiveVariable };
    133134
    134135      foreach (var symb in allSymbols)
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Grammars/TypeCoherentExpressionGrammar.cs

    r14185 r14232  
    104104      constant.MaxValue = 20;
    105105      var variableSymbol = new Variable();
     106      var factorVariable = new FactorVariable();
    106107      var laggedVariable = new LaggedVariable();
    107108      var autoregressiveVariable = new AutoregressiveTargetVariable();
     
    114115      var specialFunctions = new GroupSymbol(SpecialFunctionsName, new List<ISymbol> { airyA, airyB, bessel, cosineIntegral, dawson, erf, expIntegralEi,
    115116        fresnelCosineIntegral,fresnelSineIntegral,gamma,hypCosineIntegral,hypSineIntegral,norm, psi, sineIntegral});
    116       var terminalSymbols = new GroupSymbol(TerminalsName, new List<ISymbol> { constant, variableSymbol });
     117      var terminalSymbols = new GroupSymbol(TerminalsName, new List<ISymbol> { constant, variableSymbol, factorVariable });
    117118      var realValuedSymbols = new GroupSymbol(RealValuedSymbolsName, new List<ISymbol>() { arithmeticSymbols, trigonometricSymbols, exponentialAndLogarithmicSymbols, specialFunctions, terminalSymbols });
    118119
     
    122123      var comparisonSymbols = new GroupSymbol(ComparisonsName, new List<ISymbol> { gt, lt });
    123124      var booleanOperationSymbols = new GroupSymbol(BooleanOperatorsName, new List<ISymbol> { and, or, not, xor });
    124       var conditionalSymbols = new GroupSymbol(ConditionalSymbolsName, new List<ISymbol> { conditionSymbols, comparisonSymbols, booleanOperationSymbols });
     125      var conditionalSymbols = new GroupSymbol(ConditionalSymbolsName, new List<ISymbol> { conditionSymbols, comparisonSymbols, booleanOperationSymbols }); // TODO: factorVariableBool?
    125126
    126127      var timeSeriesSymbols = new GroupSymbol(TimeSeriesSymbolsName, new List<ISymbol> { timeLag, integral, derivative, laggedVariable, autoregressiveVariable });
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/HeuristicLab.Problems.DataAnalysis.Symbolic-3.4.csproj

    r14024 r14232  
    198198    <Compile Include="Symbols\AiryB.cs" />
    199199    <Compile Include="Symbols\Bessel.cs" />
     200    <Compile Include="Symbols\FactorVariable.cs" />
     201    <Compile Include="Symbols\FactorVariableTreeNode.cs" />
    200202    <Compile Include="Symbols\Xor.cs" />
    201203    <Compile Include="Symbols\Erf.cs" />
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Interfaces/ISymbolicDataAnalysisImpactValuesCalculator.cs

    r12720 r14232  
    55namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
    66  public interface ISymbolicDataAnalysisSolutionImpactValuesCalculator : IItem {
    7     double CalculateReplacementValue(ISymbolicDataAnalysisModel model, ISymbolicExpressionTreeNode node, IDataAnalysisProblemData problemData, IEnumerable<int> rows);
    8     double CalculateImpactValue(ISymbolicDataAnalysisModel model, ISymbolicExpressionTreeNode node, IDataAnalysisProblemData problemData, IEnumerable<int> rows, double qualityForImpactsCalculation = double.NaN);
    97    void CalculateImpactAndReplacementValues(ISymbolicDataAnalysisModel model, ISymbolicExpressionTreeNode node, IDataAnalysisProblemData problemData,
    108      IEnumerable<int> rows, out double impactValue, out double replacementValue, out double newQualityForImpactsCalculation, double qualityForImpactsCalculation = double.NaN);
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Interpreter/OpCodes.cs

    r14185 r14232  
    8383    public const byte Erf = 43;
    8484    public const byte Bessel = 44;
     85    public const byte FactorVariable = 46;
    8586
    8687    private static Dictionary<Type, byte> symbolToOpcode = new Dictionary<Type, byte>() {
     
    130131      { typeof(Norm), OpCodes.Norm},
    131132      { typeof(Erf), OpCodes.Erf},
    132       { typeof(Bessel), OpCodes.Bessel}   
     133      { typeof(Bessel), OpCodes.Bessel},
     134      { typeof(FactorVariable), OpCodes.FactorVariable }
    133135    };
    134136
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Interpreter/SymbolicDataAnalysisExpressionTreeILEmittingInterpreter.cs

    r14185 r14232  
    6666    private static MethodInfo erf = thisType.GetMethod("Erf", new Type[] { typeof(double) });
    6767    private static MethodInfo bessel = thisType.GetMethod("Bessel", new Type[] { typeof(double) });
     68    private static MethodInfo string_eq = typeof(string).GetMethod("Equals", new Type[] {typeof(string)});
    6869    #endregion
    6970
     
    627628            return;
    628629          }
     630        case OpCodes.FactorVariable: {
     631            FactorVariableTreeNode varNode = currentInstr.dynamicNode as FactorVariableTreeNode;
     632            il.Emit(System.Reflection.Emit.OpCodes.Ldarg_1); // load columns array
     633            il.Emit(System.Reflection.Emit.OpCodes.Ldc_I4, (int)currentInstr.data);
     634            // load correct column of the current variable
     635            il.Emit(System.Reflection.Emit.OpCodes.Ldelem_Ref);
     636            il.Emit(System.Reflection.Emit.OpCodes.Ldarg_0); // rowIndex
     637            if (!state.InLaggedContext) {
     638              il.Emit(System.Reflection.Emit.OpCodes.Call, listGetValue);
     639              il.Emit(System.Reflection.Emit.OpCodes.Ldc_R8, varNode.VariableValue);
     640              il.Emit(System.Reflection.Emit.OpCodes.Call, string_eq);
     641              // TODO: convert bool to 1 / 0?
     642            } else {
     643              var nanResult = il.DefineLabel();
     644              var normalResult = il.DefineLabel();
     645              il.Emit(System.Reflection.Emit.OpCodes.Dup);
     646              il.Emit(System.Reflection.Emit.OpCodes.Ldc_I4_0);
     647              il.Emit(System.Reflection.Emit.OpCodes.Blt, nanResult);
     648              il.Emit(System.Reflection.Emit.OpCodes.Dup);
     649              il.Emit(System.Reflection.Emit.OpCodes.Ldc_I4, ds.Rows);
     650              il.Emit(System.Reflection.Emit.OpCodes.Bge, nanResult);
     651              il.Emit(System.Reflection.Emit.OpCodes.Call, listGetValue);
     652              il.Emit(System.Reflection.Emit.OpCodes.Ldc_R8, varNode.VariableValue);
     653              il.Emit(System.Reflection.Emit.OpCodes.Call, string_eq);
     654              // TODO: convert bool to 1 / 0?
     655              il.Emit(System.Reflection.Emit.OpCodes.Br, normalResult);
     656              il.MarkLabel(nanResult);
     657              il.Emit(System.Reflection.Emit.OpCodes.Pop); // rowIndex
     658              il.Emit(System.Reflection.Emit.OpCodes.Pop); // column reference
     659              il.Emit(System.Reflection.Emit.OpCodes.Ldc_R8, double.NaN);
     660              il.MarkLabel(normalResult);
     661            }
     662            return;
     663          }
    629664        case OpCodes.LagVariable: {
    630665            var nanResult = il.DefineLabel();
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Interpreter/SymbolicDataAnalysisExpressionTreeInterpreter.cs

    r14185 r14232  
    2222using System;
    2323using System.Collections.Generic;
     24using System.Linq;
    2425using HeuristicLab.Common;
    2526using HeuristicLab.Core;
     
    143144          var variableTreeNode = (VariableTreeNode)instr.dynamicNode;
    144145          instr.data = dataset.GetReadOnlyDoubleValues(variableTreeNode.VariableName);
     146        } else if (instr.opCode == OpCodes.FactorVariable) {
     147          var factorTreeNode = instr.dynamicNode as FactorVariableTreeNode;
     148          instr.data = dataset.GetReadOnlyStringValues(factorTreeNode.VariableName);
    145149        } else if (instr.opCode == OpCodes.LagVariable) {
    146150          var laggedVariableTreeNode = (LaggedVariableTreeNode)instr.dynamicNode;
     
    455459            return ((IList<double>)currentInstr.data)[row] * variableTreeNode.Weight;
    456460          }
     461        case OpCodes.FactorVariable: {
     462            if (row < 0 || row >= dataset.Rows) return double.NaN;
     463            var factorVarTreeNode = currentInstr.dynamicNode as FactorVariableTreeNode;
     464            return ((IList<string>)currentInstr.data)[row] == factorVarTreeNode.VariableValue ? 1 : 0;
     465          }
    457466        case OpCodes.LagVariable: {
    458467            var laggedVariableTreeNode = (LaggedVariableTreeNode)currentInstr.dynamicNode;
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Interpreter/SymbolicDataAnalysisExpressionTreeLinearInterpreter.cs

    r14185 r14232  
    147147            var variableTreeNode = (VariableTreeNode)instr.dynamicNode;
    148148            instr.value = ((IList<double>)instr.data)[row] * variableTreeNode.Weight;
     149          }
     150        } else if (instr.opCode == OpCodes.FactorVariable) {
     151          if (row < 0 || row >= dataset.Rows) instr.value = double.NaN;
     152          else {
     153            var factorTreeNode = instr.dynamicNode as FactorVariableTreeNode;
     154            instr.value = ((IList<string>)instr.data)[row] == factorTreeNode.VariableValue ? 1 : 0;
    149155          }
    150156        } else if (instr.opCode == OpCodes.LagVariable) {
     
    392398            }
    393399            break;
     400          case OpCodes.FactorVariable: {
     401              var factorVariableTreeNode = instr.dynamicNode as FactorVariableTreeNode;
     402              instr.data = dataset.GetReadOnlyStringValues(factorVariableTreeNode.VariableName);
     403            }
     404            break;
    394405          case OpCodes.LagVariable: {
    395406              var laggedVariableTreeNode = (LaggedVariableTreeNode)instr.dynamicNode;
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/SymbolicDataAnalysisModelComplexityCalculator.cs

    r14185 r14232  
    3939            return 1;
    4040          }
    41         case OpCodes.Variable: {
     41        case OpCodes.Variable:
     42        case OpCodes.FactorVariable: {
    4243            return 2;
    4344          }
    44         case OpCodes.Add: 
     45        case OpCodes.Add:
    4546        case OpCodes.Sub: {
    4647            double complexity = 0;
     
    5051            return complexity;
    5152          }
    52         case OpCodes.Mul: 
     53        case OpCodes.Mul:
    5354        case OpCodes.Div: {
    5455            double complexity = 1;
     
    6061          }
    6162        case OpCodes.Sin:
    62         case OpCodes.Cos: 
     63        case OpCodes.Cos:
    6364        case OpCodes.Tan:
    64         case OpCodes.Exp: 
     65        case OpCodes.Exp:
    6566        case OpCodes.Log: {
    6667            double complexity = CalculateComplexity(node.GetSubtree(0));
     
    7576            return complexity * complexity * complexity;
    7677          }
    77         case OpCodes.Power:         
     78        case OpCodes.Power:
    7879        case OpCodes.Root: {
    7980            double complexity = CalculateComplexity(node.GetSubtree(0));
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/SymbolicDataAnalysisProblem.cs

    r14185 r14232  
    208208
    209209    protected virtual void UpdateGrammar() {
    210       SymbolicExpressionTreeGrammar.MaximumFunctionArguments = MaximumFunctionArguments.Value;
    211       SymbolicExpressionTreeGrammar.MaximumFunctionDefinitions = MaximumFunctionDefinitions.Value;
    212       foreach (var varSymbol in SymbolicExpressionTreeGrammar.Symbols.OfType<HeuristicLab.Problems.DataAnalysis.Symbolic.Variable>()) {
     210      var problemData = ProblemData;
     211      var ds = problemData.Dataset;
     212      var grammar = SymbolicExpressionTreeGrammar;
     213      grammar.MaximumFunctionArguments = MaximumFunctionArguments.Value;
     214      grammar.MaximumFunctionDefinitions = MaximumFunctionDefinitions.Value;
     215      foreach (var varSymbol in grammar.Symbols.OfType<HeuristicLab.Problems.DataAnalysis.Symbolic.Variable>()) {
    213216        if (!varSymbol.Fixed) {
    214           varSymbol.AllVariableNames = ProblemData.InputVariables.Select(x => x.Value);
    215           varSymbol.VariableNames = ProblemData.AllowedInputVariables;
     217          varSymbol.AllVariableNames = problemData.InputVariables.Select(x => x.Value).Where(x => ds.VariableHasType<double>(x));
     218          varSymbol.VariableNames = problemData.AllowedInputVariables.Where(x => ds.VariableHasType<double>(x));
    216219        }
    217220      }
    218       foreach (var varSymbol in SymbolicExpressionTreeGrammar.Symbols.OfType<HeuristicLab.Problems.DataAnalysis.Symbolic.VariableCondition>()) {
     221      foreach (var factorSymbol in grammar.Symbols.OfType<FactorVariable>()) {
     222        if (!factorSymbol.Fixed) {
     223          factorSymbol.AllVariableNames = problemData.InputVariables.Select(x => x.Value).Where(x => ds.VariableHasType<string>(x));
     224          factorSymbol.VariableNames = problemData.AllowedInputVariables.Where(x => ds.VariableHasType<string>(x));
     225          factorSymbol.VariableValues = factorSymbol.VariableNames
     226            .ToDictionary(varName => varName, varName => ds.GetStringValues(varName).Distinct().ToList());
     227        }
     228      }
     229      foreach (var varSymbol in grammar.Symbols.OfType<HeuristicLab.Problems.DataAnalysis.Symbolic.VariableCondition>()) {
    219230        if (!varSymbol.Fixed) {
    220           varSymbol.AllVariableNames = ProblemData.InputVariables.Select(x => x.Value);
    221           varSymbol.VariableNames = ProblemData.AllowedInputVariables;
     231          varSymbol.AllVariableNames = problemData.InputVariables.Select(x => x.Value).Where(x => ds.VariableHasType<double>(x));
     232          varSymbol.VariableNames = problemData.AllowedInputVariables;
    222233        }
    223234      }
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/SymbolicDataAnalysisSolutionImpactValuesCalculator.cs

    r14185 r14232  
    2121
    2222using System.Collections.Generic;
     23using System.Linq;
    2324using HeuristicLab.Common;
    2425using HeuristicLab.Core;
     
    3637    [StorableConstructor]
    3738    protected SymbolicDataAnalysisSolutionImpactValuesCalculator(bool deserializing) : base(deserializing) { }
    38     public abstract double CalculateReplacementValue(ISymbolicDataAnalysisModel model, ISymbolicExpressionTreeNode node, IDataAnalysisProblemData problemData, IEnumerable<int> rows);
    39     public abstract double CalculateImpactValue(ISymbolicDataAnalysisModel model, ISymbolicExpressionTreeNode node, IDataAnalysisProblemData problemData, IEnumerable<int> rows, double qualityForImpactsCalculation = double.NaN);
    4039    public abstract void CalculateImpactAndReplacementValues(ISymbolicDataAnalysisModel model, ISymbolicExpressionTreeNode node, IDataAnalysisProblemData problemData, IEnumerable<int> rows, out double impactValue, out double replacementValue, out double newQualityForImpactsCalculation, double qualityForImpactsCalculation = double.NaN);
    4140
    42     protected static double CalculateReplacementValue(ISymbolicExpressionTreeNode node, ISymbolicExpressionTree sourceTree, ISymbolicDataAnalysisExpressionTreeInterpreter interpreter,
     41    protected IEnumerable<double> CalculateReplacementValues(ISymbolicExpressionTreeNode node, ISymbolicExpressionTree sourceTree, ISymbolicDataAnalysisExpressionTreeInterpreter interpreter,
    4342      IDataset dataset, IEnumerable<int> rows) {
    4443      //optimization: constant nodes return always the same value
    4544      ConstantTreeNode constantNode = node as ConstantTreeNode;
    46       if (constantNode != null) return constantNode.Value;
     45      FactorVariableTreeNode factorNode = node as FactorVariableTreeNode;
     46      if (constantNode != null) {
     47        yield return constantNode.Value;
     48      } else if (factorNode != null) {
     49        // valid replacements are either all off or all on
     50        yield return 0;
     51        yield return 1;
     52      } else {
     53        var rootSymbol = new ProgramRootSymbol().CreateTreeNode();
     54        var startSymbol = new StartSymbol().CreateTreeNode();
     55        rootSymbol.AddSubtree(startSymbol);
     56        startSymbol.AddSubtree((ISymbolicExpressionTreeNode)node.Clone());
    4757
    48       var rootSymbol = new ProgramRootSymbol().CreateTreeNode();
    49       var startSymbol = new StartSymbol().CreateTreeNode();
    50       rootSymbol.AddSubtree(startSymbol);
    51       startSymbol.AddSubtree((ISymbolicExpressionTreeNode)node.Clone());
    52 
    53       var tempTree = new SymbolicExpressionTree(rootSymbol);
    54       // clone ADFs of source tree
    55       for (int i = 1; i < sourceTree.Root.SubtreeCount; i++) {
    56         tempTree.Root.AddSubtree((ISymbolicExpressionTreeNode)sourceTree.Root.GetSubtree(i).Clone());
     58        var tempTree = new SymbolicExpressionTree(rootSymbol);
     59        // clone ADFs of source tree
     60        for (int i = 1; i < sourceTree.Root.SubtreeCount; i++) {
     61          tempTree.Root.AddSubtree((ISymbolicExpressionTreeNode)sourceTree.Root.GetSubtree(i).Clone());
     62        }
     63        yield return interpreter.GetSymbolicExpressionTreeValues(tempTree, dataset, rows).Median();
     64        yield return interpreter.GetSymbolicExpressionTreeValues(tempTree, dataset, rows).Average(); // TODO perf
    5765      }
    58       return interpreter.GetSymbolicExpressionTreeValues(tempTree, dataset, rows).Median();
    5966    }
    6067  }
Note: See TracChangeset for help on using the changeset viewer.