Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
08/03/16 18:54:14 (8 years ago)
Author:
gkronber
Message:

created a feature branch for #2650 (support for categorical variables in symb reg) with a first set of changes

work in progress...

Location:
branches/symbreg-factors-2650
Files:
24 edited
1 copied

Legend:

Unmodified
Added
Removed
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification.Views/3.4/InteractiveSymbolicClassificationSolutionSimplifierViewBase.cs

    r14185 r14232  
    5959      return model;
    6060    }
    61     protected override Dictionary<ISymbolicExpressionTreeNode, double> CalculateReplacementValues(ISymbolicExpressionTree tree) {
    62       return tree.Root.GetSubtree(0).GetSubtree(0).IterateNodesPrefix().ToDictionary(
    63         n => n,
    64         n => calculator.CalculateReplacementValue(Content.Model, n, Content.ProblemData, Content.ProblemData.TrainingIndices)
    65         );
    66     }
    67 
    68     protected override Dictionary<ISymbolicExpressionTreeNode, double> CalculateImpactValues(ISymbolicExpressionTree tree) {
    69       var values = CalculateImpactAndReplacementValues(tree);
    70       return values.ToDictionary(x => x.Key, x => x.Value.Item1);
    71     }
    7261
    7362    protected override Dictionary<ISymbolicExpressionTreeNode, Tuple<double, double>> CalculateImpactAndReplacementValues(ISymbolicExpressionTree tree) {
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification/3.4/SymbolicClassificationSolutionImpactValuesCalculator.cs

    r14185 r14232  
    4040    protected SymbolicClassificationSolutionImpactValuesCalculator(bool deserializing) : base(deserializing) { }
    4141
    42     public override double CalculateReplacementValue(ISymbolicDataAnalysisModel model, ISymbolicExpressionTreeNode node, IDataAnalysisProblemData problemData, IEnumerable<int> rows) {
    43       var classificationModel = (ISymbolicClassificationModel)model;
    44       var classificationProblemData = (IClassificationProblemData)problemData;
    45 
    46       return CalculateReplacementValue(node, classificationModel.SymbolicExpressionTree, classificationModel.Interpreter, classificationProblemData.Dataset, rows);
    47     }
    48 
    49     public override double CalculateImpactValue(ISymbolicDataAnalysisModel model, ISymbolicExpressionTreeNode node, IDataAnalysisProblemData problemData, IEnumerable<int> rows, double qualityForImpactsCalculation = double.NaN) {
    50       double impactValue, replacementValue;
    51       double newQualityForImpactsCalculation;
    52       CalculateImpactAndReplacementValues(model, node, problemData, rows, out impactValue, out replacementValue, out newQualityForImpactsCalculation, qualityForImpactsCalculation);
    53       return impactValue;
    54     }
    55 
    56     public override void CalculateImpactAndReplacementValues(ISymbolicDataAnalysisModel model, ISymbolicExpressionTreeNode node,
    57       IDataAnalysisProblemData problemData, IEnumerable<int> rows, out double impactValue, out double replacementValue, out double newQualityForImpactsCalculation,
     42    public override void CalculateImpactAndReplacementValues(ISymbolicDataAnalysisModel model,
     43      ISymbolicExpressionTreeNode node,
     44      IDataAnalysisProblemData problemData, IEnumerable<int> rows, out double impactValue, out double replacementValue,
     45      out double newQualityForImpactsCalculation,
    5846      double qualityForImpactsCalculation = Double.NaN) {
    5947      var classificationModel = (ISymbolicClassificationModel)model;
     
    6351        qualityForImpactsCalculation = CalculateQualityForImpacts(classificationModel, classificationProblemData, rows);
    6452
    65       replacementValue = CalculateReplacementValue(classificationModel, node, classificationProblemData, rows);
    66       var constantNode = new ConstantTreeNode(new Constant()) { Value = replacementValue };
    6753
    6854      var cloner = new Cloner();
     
    7258      var tempModelParentNode = tempModelNode.Parent;
    7359      int i = tempModelParentNode.IndexOfSubtree(tempModelNode);
    74       tempModelParentNode.RemoveSubtree(i);
    75       tempModelParentNode.InsertSubtree(i, constantNode);
     60      double bestReplacementValue = 0.0;
     61      double bestImpactValue = double.NegativeInfinity;
     62      newQualityForImpactsCalculation = qualityForImpactsCalculation; // initialize
     63      // try the potentially reasonable replacement values and use the best one
     64      foreach (var repValue in CalculateReplacementValues(node, classificationModel.SymbolicExpressionTree, classificationModel.Interpreter, classificationProblemData.Dataset, classificationProblemData.TrainingIndices)) {
     65        tempModelParentNode.RemoveSubtree(i);
    7666
    77       OnlineCalculatorError errorState;
    78       var dataset = classificationProblemData.Dataset;
    79       var targetClassValues = dataset.GetDoubleValues(classificationProblemData.TargetVariable, rows);
    80       var estimatedClassValues = tempModel.GetEstimatedClassValues(dataset, rows);
    81       newQualityForImpactsCalculation = OnlineAccuracyCalculator.Calculate(targetClassValues, estimatedClassValues, out errorState);
    82       if (errorState != OnlineCalculatorError.None) newQualityForImpactsCalculation = 0.0;
     67        var constantNode = new ConstantTreeNode(new Constant()) { Value = repValue };
     68        tempModelParentNode.InsertSubtree(i, constantNode);
    8369
    84       impactValue = qualityForImpactsCalculation - newQualityForImpactsCalculation;
     70        var dataset = classificationProblemData.Dataset;
     71        var targetClassValues = dataset.GetDoubleValues(classificationProblemData.TargetVariable, rows);
     72        var estimatedClassValues = tempModel.GetEstimatedClassValues(dataset, rows);
     73        OnlineCalculatorError errorState;
     74        newQualityForImpactsCalculation = OnlineAccuracyCalculator.Calculate(targetClassValues, estimatedClassValues,
     75          out errorState);
     76        if (errorState != OnlineCalculatorError.None) newQualityForImpactsCalculation = 0.0;
     77
     78        impactValue = qualityForImpactsCalculation - newQualityForImpactsCalculation;
     79
     80        if (impactValue > bestImpactValue) {
     81          bestImpactValue = impactValue;
     82          bestReplacementValue = repValue;
     83        }
     84      }
     85      replacementValue = bestReplacementValue;
     86      impactValue = bestImpactValue;
    8587    }
    8688
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression.Views/3.4/InteractiveSymbolicRegressionSolutionSimplifierView.cs

    r14185 r14232  
    4848    }
    4949
    50     protected override Dictionary<ISymbolicExpressionTreeNode, double> CalculateReplacementValues(ISymbolicExpressionTree tree) {
    51       return tree.Root.GetSubtree(0).GetSubtree(0).IterateNodesPrefix().ToDictionary(
    52         n => n,
    53         n => calculator.CalculateReplacementValue(Content.Model, n, Content.ProblemData, Content.ProblemData.TrainingIndices)
    54         );
    55     }
    56 
    57     protected override Dictionary<ISymbolicExpressionTreeNode, double> CalculateImpactValues(ISymbolicExpressionTree tree) {
    58       var values = CalculateImpactAndReplacementValues(tree);
    59       return values.ToDictionary(x => x.Key, x => x.Value.Item1);
    60     }
    6150
    6251    protected override Dictionary<ISymbolicExpressionTreeNode, Tuple<double, double>> CalculateImpactAndReplacementValues(ISymbolicExpressionTree tree) {
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/SingleObjective/Evaluators/SymbolicRegressionConstantOptimizationEvaluator.cs

    r14185 r14232  
    181181      List<AutoDiff.Variable> parameters = new List<AutoDiff.Variable>();
    182182      List<string> variableNames = new List<string>();
     183      List<string> categoricalVariableValues = new List<string>();
    183184
    184185      AutoDiff.Term func;
    185       if (!TryTransformToAutoDiff(tree.Root.GetSubtree(0), variables, parameters, variableNames, updateVariableWeights, out func))
     186      if (!TryTransformToAutoDiff(tree.Root.GetSubtree(0), variables, parameters, variableNames, categoricalVariableValues, updateVariableWeights, out func))
    186187        throw new NotSupportedException("Could not optimize constants of symbolic expression tree due to not supported symbols used in the tree.");
    187       if (variableNames.Count == 0) return 0.0;
     188      if (variableNames.Count == 0) return 0.0; // gkronber: constant expressions always have a R² of 0.0
    188189
    189190      AutoDiff.IParametricCompiledTerm compiledFunc = func.Compile(variables.ToArray(), parameters.ToArray());
    190191
    191       List<SymbolicExpressionTreeTerminalNode> terminalNodes = null;
     192      List<SymbolicExpressionTreeTerminalNode> terminalNodes = null; // gkronber only used for extraction of initial constants
    192193      if (updateVariableWeights)
    193194        terminalNodes = tree.Root.IterateNodesPrefix().OfType<SymbolicExpressionTreeTerminalNode>().ToList();
     
    222223      foreach (var r in rows) {
    223224        for (int col = 0; col < variableNames.Count; col++) {
    224           x[row, col] = ds.GetDoubleValue(variableNames[col], r);
     225          if (ds.VariableHasType<double>(variableNames[col])) {
     226            x[row, col] = ds.GetDoubleValue(variableNames[col], r);
     227          } else if (ds.VariableHasType<string>(variableNames[col])) {
     228            x[row, col] = ds.GetStringValue(variableNames[col], r) == categoricalVariableValues[col] ? 1 : 0;
     229          } else throw new InvalidProgramException("found a variable of unknown type");
    225230        }
    226231        row++;
     
    286291    }
    287292
    288     private static bool TryTransformToAutoDiff(ISymbolicExpressionTreeNode node, List<AutoDiff.Variable> variables, List<AutoDiff.Variable> parameters, List<string> variableNames, bool updateVariableWeights, out AutoDiff.Term term) {
     293    private static bool TryTransformToAutoDiff(ISymbolicExpressionTreeNode node, List<AutoDiff.Variable> variables, List<AutoDiff.Variable> parameters,
     294      List<string> variableNames, List<string> categoricalVariableValues, bool updateVariableWeights, out AutoDiff.Term term) {
    289295      if (node.Symbol is Constant) {
    290296        var var = new AutoDiff.Variable();
     
    298304        parameters.Add(par);
    299305        variableNames.Add(varNode.VariableName);
     306        categoricalVariableValues.Add(string.Empty);   // as a value as placeholder (variableNames.Length == catVariableValues.Length)
    300307
    301308        if (updateVariableWeights) {
     
    308315        return true;
    309316      }
     317      if (node.Symbol is FactorVariable) {
     318        // nothing to update in this case (like a variable without a weight)
     319        // values are only 0 or 1 and set in x accordingly
     320        var factorNode = node as FactorVariableTreeNode;
     321        var par = new AutoDiff.Variable();
     322        parameters.Add(par);
     323        variableNames.Add(factorNode.VariableName);
     324        categoricalVariableValues.Add(factorNode.VariableValue);
     325        term = par;
     326        return true;
     327      }
    310328      if (node.Symbol is Addition) {
    311329        List<AutoDiff.Term> terms = new List<Term>();
    312330        foreach (var subTree in node.Subtrees) {
    313331          AutoDiff.Term t;
    314           if (!TryTransformToAutoDiff(subTree, variables, parameters, variableNames, updateVariableWeights, out t)) {
     332          if (!TryTransformToAutoDiff(subTree, variables, parameters, variableNames, categoricalVariableValues, updateVariableWeights, out t)) {
    315333            term = null;
    316334            return false;
     
    325343        for (int i = 0; i < node.SubtreeCount; i++) {
    326344          AutoDiff.Term t;
    327           if (!TryTransformToAutoDiff(node.GetSubtree(i), variables, parameters, variableNames, updateVariableWeights, out t)) {
     345          if (!TryTransformToAutoDiff(node.GetSubtree(i), variables, parameters, variableNames, categoricalVariableValues, updateVariableWeights, out t)) {
    328346            term = null;
    329347            return false;
     
    340358        foreach (var subTree in node.Subtrees) {
    341359          AutoDiff.Term t;
    342           if (!TryTransformToAutoDiff(subTree, variables, parameters, variableNames, updateVariableWeights, out t)) {
     360          if (!TryTransformToAutoDiff(subTree, variables, parameters, variableNames, categoricalVariableValues, updateVariableWeights, out t)) {
    343361            term = null;
    344362            return false;
     
    355373        foreach (var subTree in node.Subtrees) {
    356374          AutoDiff.Term t;
    357           if (!TryTransformToAutoDiff(subTree, variables, parameters, variableNames, updateVariableWeights, out t)) {
     375          if (!TryTransformToAutoDiff(subTree, variables, parameters, variableNames, categoricalVariableValues, updateVariableWeights, out t)) {
    358376            term = null;
    359377            return false;
     
    367385      if (node.Symbol is Logarithm) {
    368386        AutoDiff.Term t;
    369         if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, updateVariableWeights, out t)) {
     387        if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, categoricalVariableValues, updateVariableWeights, out t)) {
    370388          term = null;
    371389          return false;
     
    377395      if (node.Symbol is Exponential) {
    378396        AutoDiff.Term t;
    379         if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, updateVariableWeights, out t)) {
     397        if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, categoricalVariableValues, updateVariableWeights, out t)) {
    380398          term = null;
    381399          return false;
     
    387405      if (node.Symbol is Square) {
    388406        AutoDiff.Term t;
    389         if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, updateVariableWeights, out t)) {
     407        if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, categoricalVariableValues, updateVariableWeights, out t)) {
    390408          term = null;
    391409          return false;
     
    397415      if (node.Symbol is SquareRoot) {
    398416        AutoDiff.Term t;
    399         if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, updateVariableWeights, out t)) {
     417        if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, categoricalVariableValues, updateVariableWeights, out t)) {
    400418          term = null;
    401419          return false;
     
    407425      if (node.Symbol is Sine) {
    408426        AutoDiff.Term t;
    409         if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, updateVariableWeights, out t)) {
     427        if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, categoricalVariableValues, updateVariableWeights, out t)) {
    410428          term = null;
    411429          return false;
     
    417435      if (node.Symbol is Cosine) {
    418436        AutoDiff.Term t;
    419         if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, updateVariableWeights, out t)) {
     437        if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, categoricalVariableValues, updateVariableWeights, out t)) {
    420438          term = null;
    421439          return false;
     
    427445      if (node.Symbol is Tangent) {
    428446        AutoDiff.Term t;
    429         if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, updateVariableWeights, out t)) {
     447        if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, categoricalVariableValues, updateVariableWeights, out t)) {
    430448          term = null;
    431449          return false;
     
    437455      if (node.Symbol is Erf) {
    438456        AutoDiff.Term t;
    439         if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, updateVariableWeights, out t)) {
     457        if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, categoricalVariableValues, updateVariableWeights, out t)) {
    440458          term = null;
    441459          return false;
     
    447465      if (node.Symbol is Norm) {
    448466        AutoDiff.Term t;
    449         if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, updateVariableWeights, out t)) {
     467        if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, categoricalVariableValues, updateVariableWeights, out t)) {
    450468          term = null;
    451469          return false;
     
    461479        variables.Add(alpha);
    462480        AutoDiff.Term branchTerm;
    463         if (TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, updateVariableWeights, out branchTerm)) {
     481        if (TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, categoricalVariableValues, updateVariableWeights, out branchTerm)) {
    464482          term = branchTerm * alpha + beta;
    465483          return true;
     
    478496        where
    479497         !(n.Symbol is Variable) &&
     498         !(n.Symbol is FactorVariable) &&
    480499         !(n.Symbol is Constant) &&
    481500         !(n.Symbol is Addition) &&
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/SymbolicRegressionSolutionImpactValuesCalculator.cs

    r14185 r14232  
    4141    [StorableConstructor]
    4242    protected SymbolicRegressionSolutionImpactValuesCalculator(bool deserializing) : base(deserializing) { }
    43     public override double CalculateReplacementValue(ISymbolicDataAnalysisModel model, ISymbolicExpressionTreeNode node, IDataAnalysisProblemData problemData, IEnumerable<int> rows) {
    44       var regressionModel = (ISymbolicRegressionModel)model;
    45       var regressionProblemData = (IRegressionProblemData)problemData;
    46 
    47       return CalculateReplacementValue(node, regressionModel.SymbolicExpressionTree, regressionModel.Interpreter, regressionProblemData.Dataset, rows);
    48     }
    49 
    50     public override double CalculateImpactValue(ISymbolicDataAnalysisModel model, ISymbolicExpressionTreeNode node, IDataAnalysisProblemData problemData, IEnumerable<int> rows, double qualityForImpactsCalculation = double.NaN) {
    51       double impactValue, replacementValue, newQualityForImpactsCalculation;
    52       CalculateImpactAndReplacementValues(model, node, problemData, rows, out impactValue, out replacementValue, out newQualityForImpactsCalculation, qualityForImpactsCalculation);
    53       return impactValue;
    54     }
    5543
    5644    public override void CalculateImpactAndReplacementValues(ISymbolicDataAnalysisModel model, ISymbolicExpressionTreeNode node,
    5745      IDataAnalysisProblemData problemData, IEnumerable<int> rows, out double impactValue, out double replacementValue, out double newQualityForImpactsCalculation,
    58       double qualityForImpactsCalculation = Double.NaN) {
     46      double qualityForImpactsCalculation = double.NaN) {
    5947      var regressionModel = (ISymbolicRegressionModel)model;
    6048      var regressionProblemData = (IRegressionProblemData)problemData;
     
    6351      var targetValues = dataset.GetDoubleValues(regressionProblemData.TargetVariable, rows);
    6452
    65       OnlineCalculatorError errorState;
    6653      if (double.IsNaN(qualityForImpactsCalculation))
    6754        qualityForImpactsCalculation = CalculateQualityForImpacts(regressionModel, regressionProblemData, rows);
    68 
    69       replacementValue = CalculateReplacementValue(regressionModel, node, regressionProblemData, rows);
    70       var constantNode = new ConstantTreeNode(new Constant()) { Value = replacementValue };
    7155
    7256      var cloner = new Cloner();
     
    7660      var tempModelParentNode = tempModelNode.Parent;
    7761      int i = tempModelParentNode.IndexOfSubtree(tempModelNode);
    78       tempModelParentNode.RemoveSubtree(i);
    79       tempModelParentNode.InsertSubtree(i, constantNode);
    8062
    81       var estimatedValues = tempModel.GetEstimatedValues(dataset, rows);
    82       double r = OnlinePearsonsRCalculator.Calculate(targetValues, estimatedValues, out errorState);
    83       if (errorState != OnlineCalculatorError.None) r = 0.0;
    84       newQualityForImpactsCalculation = r * r;
     63      double bestReplacementValue = 0.0;
     64      double bestImpactValue = double.NegativeInfinity;
     65      newQualityForImpactsCalculation = qualityForImpactsCalculation; // initialize
     66      // try the potentially reasonable replacement values and use the best one
     67      foreach (var repValue in CalculateReplacementValues(node, regressionModel.SymbolicExpressionTree, regressionModel.Interpreter, regressionProblemData.Dataset, regressionProblemData.TrainingIndices)) {
    8568
    86       impactValue = qualityForImpactsCalculation - newQualityForImpactsCalculation;
     69        tempModelParentNode.RemoveSubtree(i);
     70
     71        var constantNode = new ConstantTreeNode(new Constant()) { Value = repValue };
     72
     73        tempModelParentNode.InsertSubtree(i, constantNode);
     74
     75        var estimatedValues = tempModel.GetEstimatedValues(dataset, rows);
     76        OnlineCalculatorError errorState;
     77        double r = OnlinePearsonsRCalculator.Calculate(targetValues, estimatedValues, out errorState);
     78        if (errorState != OnlineCalculatorError.None) r = 0.0;
     79        newQualityForImpactsCalculation = r * r;
     80
     81        impactValue = qualityForImpactsCalculation - newQualityForImpactsCalculation;
     82        if (impactValue > bestImpactValue) {
     83          bestImpactValue = impactValue;
     84          bestReplacementValue = repValue;
     85        }
     86      }
     87      replacementValue = bestReplacementValue;
     88      impactValue = bestImpactValue;
    8789    }
    8890
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic.TimeSeriesPrognosis.Views/3.4/InteractiveSymbolicTimeSeriesPrognosisSolutionSimplifierView.cs

    r14185 r14232  
    8888    }
    8989
    90     protected override Dictionary<ISymbolicExpressionTreeNode, double> CalculateReplacementValues(ISymbolicExpressionTree tree) {
    91       var replacementValues = new Dictionary<ISymbolicExpressionTreeNode, double>();
    92       foreach (var componentBranch in tree.Root.GetSubtree(0).Subtrees)
    93         foreach (ISymbolicExpressionTreeNode node in componentBranch.IterateNodesPrefix()) {
    94           replacementValues[node] = CalculateReplacementValue(node, tree);
    95         }
    96       return replacementValues;
    97     }
    98 
    99     protected override Dictionary<ISymbolicExpressionTreeNode, double> CalculateImpactValues(ISymbolicExpressionTree tree) {
    100       var impactAndReplacementValues = CalculateImpactAndReplacementValues(tree);
    101       return impactAndReplacementValues.ToDictionary(x => x.Key, x => x.Value.Item1); // item1 of the tuple is the impact value
    102     }
    103 
    10490    private double CalculateReplacementValue(ISymbolicExpressionTreeNode node, ISymbolicExpressionTree sourceTree) {
    10591      // remove old ADFs
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic.Views/3.4/InteractiveSymbolicDataAnalysisSolutionSimplifierView.cs

    r14185 r14232  
    174174    }
    175175
    176     protected abstract Dictionary<ISymbolicExpressionTreeNode, double> CalculateReplacementValues(ISymbolicExpressionTree tree);
    177     protected abstract Dictionary<ISymbolicExpressionTreeNode, double> CalculateImpactValues(ISymbolicExpressionTree tree);
    178176    protected abstract Dictionary<ISymbolicExpressionTreeNode, Tuple<double, double>> CalculateImpactAndReplacementValues(ISymbolicExpressionTree tree);
    179177    protected abstract void UpdateModel(ISymbolicExpressionTree tree);
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Analyzers/SymbolicDataAnalysisVariableFrequencyAnalyzer.cs

    r14185 r14232  
    2222using System;
    2323using System.Collections.Generic;
     24using System.Globalization;
    2425using System.Linq;
    2526using HeuristicLab.Analysis;
     
    4142    private const string VariableFrequenciesParameterName = "VariableFrequencies";
    4243    private const string AggregateLaggedVariablesParameterName = "AggregateLaggedVariables";
     44    private const string AggregateFactorVariablesParameterName = "AggregateFactorVariables";
    4345    private const string VariableImpactsParameterName = "VariableImpacts";
    4446
     
    5254    public IValueLookupParameter<BoolValue> AggregateLaggedVariablesParameter {
    5355      get { return (IValueLookupParameter<BoolValue>)Parameters[AggregateLaggedVariablesParameterName]; }
     56    }
     57    public IValueLookupParameter<BoolValue> AggregateFactorVariablesParameter {
     58      get { return (IValueLookupParameter<BoolValue>)Parameters[AggregateFactorVariablesParameterName]; }
    5459    }
    5560    #endregion
     
    5964      set { AggregateLaggedVariablesParameter.Value = value; }
    6065    }
     66    public BoolValue AggregateFactorVariables {
     67      get { return AggregateFactorVariablesParameter.ActualValue; }
     68      set { AggregateFactorVariablesParameter.Value = value; }
     69    }
    6170    #endregion
    6271    [StorableConstructor]
     
    7079      Parameters.Add(new LookupParameter<DoubleMatrix>(VariableImpactsParameterName, "The relative variable relevance calculated as the average relative variable frequency over the whole run."));
    7180      Parameters.Add(new ValueLookupParameter<BoolValue>(AggregateLaggedVariablesParameterName, "Switch that determines whether all references to a variable should be aggregated regardless of time-offsets. Turn off to analyze all variable references with different time offsets separately.", new BoolValue(true)));
     81      Parameters.Add(new ValueLookupParameter<BoolValue>(AggregateFactorVariablesParameterName, "Switch that determines whether all references to factor variables should be aggregated regardless of the value. Turn off to analyze all factor variable references with different values separately.", new BoolValue(true)));
     82    }
     83
     84    [StorableHook(HookType.AfterDeserialization)]
     85    private void AfterDeserialization() {
     86      // BackwardsCompatibility3.3
     87      #region Backwards compatible code, remove with 3.4
     88      if (!Parameters.ContainsKey(AggregateFactorVariablesParameterName)) {
     89        Parameters.Add(new ValueLookupParameter<BoolValue>(AggregateFactorVariablesParameterName, "Switch that determines whether all references to factor variables should be aggregated regardless of the value. Turn off to analyze all factor variable references with different values separately.", new BoolValue(true)));
     90      }
     91      #endregion
    7292    }
    7393
     
    93113      int numberOfValues = datatable.Rows.Select(r => r.Values.Count).DefaultIfEmpty().First();
    94114
    95       foreach (var pair in SymbolicDataAnalysisVariableFrequencyAnalyzer.CalculateVariableFrequencies(expressions, AggregateLaggedVariables.Value)) {
     115      foreach (var pair in CalculateVariableFrequencies(expressions, AggregateLaggedVariables.Value, AggregateFactorVariables.Value)) {
    96116        if (!datatable.Rows.ContainsKey(pair.Key)) {
    97117          // initialize a new row for the variable and pad with zeros
     
    128148    }
    129149
    130     public static IEnumerable<KeyValuePair<string, double>> CalculateVariableFrequencies(IEnumerable<ISymbolicExpressionTree> trees, bool aggregateLaggedVariables = true) {
     150    public static IEnumerable<KeyValuePair<string, double>> CalculateVariableFrequencies(IEnumerable<ISymbolicExpressionTree> trees,
     151      bool aggregateLaggedVariables = true, bool aggregateFactorVariables = true) {
    131152
    132153      var variableFrequencies = trees
    133         .SelectMany(t => GetVariableReferences(t, aggregateLaggedVariables))
     154        .SelectMany(t => GetVariableReferences(t, aggregateLaggedVariables, aggregateFactorVariables))
    134155        .GroupBy(pair => pair.Key, pair => pair.Value)
    135156        .ToDictionary(g => g.Key, g => (double)g.Sum());
     
    141162    }
    142163
    143     private static IEnumerable<KeyValuePair<string, int>> GetVariableReferences(ISymbolicExpressionTree tree, bool aggregateLaggedVariables = true) {
     164    private static IEnumerable<KeyValuePair<string, int>> GetVariableReferences(ISymbolicExpressionTree tree,
     165      bool aggregateLaggedVariables = true, bool aggregateFactorVariables = true) {
    144166      Dictionary<string, int> references = new Dictionary<string, int>();
    145167      if (aggregateLaggedVariables) {
     
    151173            var varCondNode = node as VariableConditionTreeNode;
    152174            IncReferenceCount(references, varCondNode.VariableName);
     175          } else if (node.Symbol is FactorVariable) {
     176            var factorNode = node as FactorVariableTreeNode;
     177            if (aggregateFactorVariables) {
     178              IncReferenceCount(references, factorNode.VariableName);
     179            } else {
     180              IncReferenceCount(references, factorNode.ToString());
     181            }
    153182          }
    154183        });
    155184      } else {
    156         GetVariableReferences(references, tree.Root, 0);
     185        GetVariableReferences(references, tree.Root, 0, aggregateFactorVariables);
    157186      }
    158187      return references;
    159188    }
    160189
    161     private static void GetVariableReferences(Dictionary<string, int> references, ISymbolicExpressionTreeNode node, int currentLag) {
     190    private static void GetVariableReferences(Dictionary<string, int> references, ISymbolicExpressionTreeNode node, int currentLag, bool aggregateFactorVariables) {
    162191      if (node.Symbol is LaggedVariable) {
    163192        var laggedVarNode = node as LaggedVariableTreeNode;
     
    166195        var varNode = node as VariableTreeNode;
    167196        IncReferenceCount(references, varNode.VariableName, currentLag);
     197      } else if (node.Symbol is FactorVariable) {
     198        var factorNode = node as FactorVariableTreeNode;
     199        if (aggregateFactorVariables) {
     200          IncReferenceCount(references, factorNode.VariableName, currentLag);
     201        } else {
     202          IncReferenceCount(references, factorNode.ToString(), currentLag);
     203        }
    168204      } else if (node.Symbol is VariableCondition) {
    169205        var varCondNode = node as VariableConditionTreeNode;
    170206        IncReferenceCount(references, varCondNode.VariableName, currentLag);
    171         GetVariableReferences(references, node.GetSubtree(0), currentLag);
    172         GetVariableReferences(references, node.GetSubtree(1), currentLag);
     207        GetVariableReferences(references, node.GetSubtree(0), currentLag, aggregateFactorVariables);
     208        GetVariableReferences(references, node.GetSubtree(1), currentLag, aggregateFactorVariables);
    173209      } else if (node.Symbol is Integral) {
    174210        var laggedNode = node as LaggedTreeNode;
    175211        for (int l = laggedNode.Lag; l <= 0; l++) {
    176           GetVariableReferences(references, node.GetSubtree(0), currentLag + l);
     212          GetVariableReferences(references, node.GetSubtree(0), currentLag + l, aggregateFactorVariables);
    177213        }
    178214      } else if (node.Symbol is Derivative) {
    179215        for (int l = -4; l <= 0; l++) {
    180           GetVariableReferences(references, node.GetSubtree(0), currentLag + l);
     216          GetVariableReferences(references, node.GetSubtree(0), currentLag + l, aggregateFactorVariables);
    181217        }
    182218      } else if (node.Symbol is TimeLag) {
    183219        var laggedNode = node as LaggedTreeNode;
    184         GetVariableReferences(references, node.GetSubtree(0), currentLag + laggedNode.Lag);
     220        GetVariableReferences(references, node.GetSubtree(0), currentLag + laggedNode.Lag, aggregateFactorVariables);
    185221      } else {
    186222        foreach (var subtree in node.Subtrees) {
    187           GetVariableReferences(references, subtree, currentLag);
     223          GetVariableReferences(references, subtree, currentLag, aggregateFactorVariables);
    188224        }
    189225      }
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Grammars/ArithmeticExpressionGrammar.cs

    r14185 r14232  
    5353      constant.MaxValue = 20;
    5454      var variableSymbol = new HeuristicLab.Problems.DataAnalysis.Symbolic.Variable();
     55      var factorVariableSymbol = new FactorVariable();
    5556
    56       var allSymbols = new List<Symbol>() { add, sub, mul, div, constant, variableSymbol };
     57      var allSymbols = new List<Symbol>() { add, sub, mul, div, constant, variableSymbol, factorVariableSymbol };
    5758      var functionSymbols = new List<Symbol>() { add, sub, mul, div };
    5859
     
    6566      SetSubtreeCount(constant, 0, 0);
    6667      SetSubtreeCount(variableSymbol, 0, 0);
     68      SetSubtreeCount(factorVariableSymbol, 0, 0);
    6769
    6870      // allow each symbol as child of the start symbol
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Grammars/FullFunctionalExpressionGrammar.cs

    r14185 r14232  
    115115      constant.MaxValue = 20;
    116116      var variableSymbol = new HeuristicLab.Problems.DataAnalysis.Symbolic.Variable();
     117      var factorVariable = new FactorVariable();
    117118      var laggedVariable = new LaggedVariable();
    118119      laggedVariable.InitialFrequency = 0.0;
     
    123124      var allSymbols = new List<Symbol>() { add, sub, mul, div, mean, sin, cos, tan, log, square, pow, sqrt, root, exp,
    124125        airyA, airyB, bessel, cosineIntegral, dawson, erf, expIntegralEi, fresnelCosineIntegral, fresnelSineIntegral, gamma, hypCosineIntegral, hypSineIntegral, norm, psi, sineIntegral,
    125         @if, gt, lt, and, or, not,xor, timeLag, integral, derivative, constant, variableSymbol, laggedVariable,autoregressiveVariable, variableCondition };
     126        @if, gt, lt, and, or, not,xor, timeLag, integral, derivative, constant, variableSymbol, factorVariable, laggedVariable,autoregressiveVariable, variableCondition };
    126127      var unaryFunctionSymbols = new List<Symbol>() { square, sqrt, sin, cos, tan, log, exp, not, timeLag, integral, derivative,
    127128        airyA, airyB, bessel, cosineIntegral, dawson, erf, expIntegralEi, fresnelCosineIntegral, fresnelSineIntegral, gamma, hypCosineIntegral, hypSineIntegral, norm, psi, sineIntegral
     
    130131      var binaryFunctionSymbols = new List<Symbol>() { pow, root, gt, lt, variableCondition };
    131132      var ternarySymbols = new List<Symbol>() { add, sub, mul, div, mean, and, or, xor };
    132       var terminalSymbols = new List<Symbol>() { variableSymbol, constant, laggedVariable, autoregressiveVariable };
     133      var terminalSymbols = new List<Symbol>() { variableSymbol, factorVariable, constant, laggedVariable, autoregressiveVariable };
    133134
    134135      foreach (var symb in allSymbols)
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Grammars/TypeCoherentExpressionGrammar.cs

    r14185 r14232  
    104104      constant.MaxValue = 20;
    105105      var variableSymbol = new Variable();
     106      var factorVariable = new FactorVariable();
    106107      var laggedVariable = new LaggedVariable();
    107108      var autoregressiveVariable = new AutoregressiveTargetVariable();
     
    114115      var specialFunctions = new GroupSymbol(SpecialFunctionsName, new List<ISymbol> { airyA, airyB, bessel, cosineIntegral, dawson, erf, expIntegralEi,
    115116        fresnelCosineIntegral,fresnelSineIntegral,gamma,hypCosineIntegral,hypSineIntegral,norm, psi, sineIntegral});
    116       var terminalSymbols = new GroupSymbol(TerminalsName, new List<ISymbol> { constant, variableSymbol });
     117      var terminalSymbols = new GroupSymbol(TerminalsName, new List<ISymbol> { constant, variableSymbol, factorVariable });
    117118      var realValuedSymbols = new GroupSymbol(RealValuedSymbolsName, new List<ISymbol>() { arithmeticSymbols, trigonometricSymbols, exponentialAndLogarithmicSymbols, specialFunctions, terminalSymbols });
    118119
     
    122123      var comparisonSymbols = new GroupSymbol(ComparisonsName, new List<ISymbol> { gt, lt });
    123124      var booleanOperationSymbols = new GroupSymbol(BooleanOperatorsName, new List<ISymbol> { and, or, not, xor });
    124       var conditionalSymbols = new GroupSymbol(ConditionalSymbolsName, new List<ISymbol> { conditionSymbols, comparisonSymbols, booleanOperationSymbols });
     125      var conditionalSymbols = new GroupSymbol(ConditionalSymbolsName, new List<ISymbol> { conditionSymbols, comparisonSymbols, booleanOperationSymbols }); // TODO: factorVariableBool?
    125126
    126127      var timeSeriesSymbols = new GroupSymbol(TimeSeriesSymbolsName, new List<ISymbol> { timeLag, integral, derivative, laggedVariable, autoregressiveVariable });
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/HeuristicLab.Problems.DataAnalysis.Symbolic-3.4.csproj

    r14024 r14232  
    198198    <Compile Include="Symbols\AiryB.cs" />
    199199    <Compile Include="Symbols\Bessel.cs" />
     200    <Compile Include="Symbols\FactorVariable.cs" />
     201    <Compile Include="Symbols\FactorVariableTreeNode.cs" />
    200202    <Compile Include="Symbols\Xor.cs" />
    201203    <Compile Include="Symbols\Erf.cs" />
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Interfaces/ISymbolicDataAnalysisImpactValuesCalculator.cs

    r12720 r14232  
    55namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
    66  public interface ISymbolicDataAnalysisSolutionImpactValuesCalculator : IItem {
    7     double CalculateReplacementValue(ISymbolicDataAnalysisModel model, ISymbolicExpressionTreeNode node, IDataAnalysisProblemData problemData, IEnumerable<int> rows);
    8     double CalculateImpactValue(ISymbolicDataAnalysisModel model, ISymbolicExpressionTreeNode node, IDataAnalysisProblemData problemData, IEnumerable<int> rows, double qualityForImpactsCalculation = double.NaN);
    97    void CalculateImpactAndReplacementValues(ISymbolicDataAnalysisModel model, ISymbolicExpressionTreeNode node, IDataAnalysisProblemData problemData,
    108      IEnumerable<int> rows, out double impactValue, out double replacementValue, out double newQualityForImpactsCalculation, double qualityForImpactsCalculation = double.NaN);
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Interpreter/OpCodes.cs

    r14185 r14232  
    8383    public const byte Erf = 43;
    8484    public const byte Bessel = 44;
     85    public const byte FactorVariable = 46;
    8586
    8687    private static Dictionary<Type, byte> symbolToOpcode = new Dictionary<Type, byte>() {
     
    130131      { typeof(Norm), OpCodes.Norm},
    131132      { typeof(Erf), OpCodes.Erf},
    132       { typeof(Bessel), OpCodes.Bessel}   
     133      { typeof(Bessel), OpCodes.Bessel},
     134      { typeof(FactorVariable), OpCodes.FactorVariable }
    133135    };
    134136
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Interpreter/SymbolicDataAnalysisExpressionTreeILEmittingInterpreter.cs

    r14185 r14232  
    6666    private static MethodInfo erf = thisType.GetMethod("Erf", new Type[] { typeof(double) });
    6767    private static MethodInfo bessel = thisType.GetMethod("Bessel", new Type[] { typeof(double) });
     68    private static MethodInfo string_eq = typeof(string).GetMethod("Equals", new Type[] {typeof(string)});
    6869    #endregion
    6970
     
    627628            return;
    628629          }
     630        case OpCodes.FactorVariable: {
     631            FactorVariableTreeNode varNode = currentInstr.dynamicNode as FactorVariableTreeNode;
     632            il.Emit(System.Reflection.Emit.OpCodes.Ldarg_1); // load columns array
     633            il.Emit(System.Reflection.Emit.OpCodes.Ldc_I4, (int)currentInstr.data);
     634            // load correct column of the current variable
     635            il.Emit(System.Reflection.Emit.OpCodes.Ldelem_Ref);
     636            il.Emit(System.Reflection.Emit.OpCodes.Ldarg_0); // rowIndex
     637            if (!state.InLaggedContext) {
     638              il.Emit(System.Reflection.Emit.OpCodes.Call, listGetValue);
     639              il.Emit(System.Reflection.Emit.OpCodes.Ldc_R8, varNode.VariableValue);
     640              il.Emit(System.Reflection.Emit.OpCodes.Call, string_eq);
     641              // TODO: convert bool to 1 / 0?
     642            } else {
     643              var nanResult = il.DefineLabel();
     644              var normalResult = il.DefineLabel();
     645              il.Emit(System.Reflection.Emit.OpCodes.Dup);
     646              il.Emit(System.Reflection.Emit.OpCodes.Ldc_I4_0);
     647              il.Emit(System.Reflection.Emit.OpCodes.Blt, nanResult);
     648              il.Emit(System.Reflection.Emit.OpCodes.Dup);
     649              il.Emit(System.Reflection.Emit.OpCodes.Ldc_I4, ds.Rows);
     650              il.Emit(System.Reflection.Emit.OpCodes.Bge, nanResult);
     651              il.Emit(System.Reflection.Emit.OpCodes.Call, listGetValue);
     652              il.Emit(System.Reflection.Emit.OpCodes.Ldc_R8, varNode.VariableValue);
     653              il.Emit(System.Reflection.Emit.OpCodes.Call, string_eq);
     654              // TODO: convert bool to 1 / 0?
     655              il.Emit(System.Reflection.Emit.OpCodes.Br, normalResult);
     656              il.MarkLabel(nanResult);
     657              il.Emit(System.Reflection.Emit.OpCodes.Pop); // rowIndex
     658              il.Emit(System.Reflection.Emit.OpCodes.Pop); // column reference
     659              il.Emit(System.Reflection.Emit.OpCodes.Ldc_R8, double.NaN);
     660              il.MarkLabel(normalResult);
     661            }
     662            return;
     663          }
    629664        case OpCodes.LagVariable: {
    630665            var nanResult = il.DefineLabel();
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Interpreter/SymbolicDataAnalysisExpressionTreeInterpreter.cs

    r14185 r14232  
    2222using System;
    2323using System.Collections.Generic;
     24using System.Linq;
    2425using HeuristicLab.Common;
    2526using HeuristicLab.Core;
     
    143144          var variableTreeNode = (VariableTreeNode)instr.dynamicNode;
    144145          instr.data = dataset.GetReadOnlyDoubleValues(variableTreeNode.VariableName);
     146        } else if (instr.opCode == OpCodes.FactorVariable) {
     147          var factorTreeNode = instr.dynamicNode as FactorVariableTreeNode;
     148          instr.data = dataset.GetReadOnlyStringValues(factorTreeNode.VariableName);
    145149        } else if (instr.opCode == OpCodes.LagVariable) {
    146150          var laggedVariableTreeNode = (LaggedVariableTreeNode)instr.dynamicNode;
     
    455459            return ((IList<double>)currentInstr.data)[row] * variableTreeNode.Weight;
    456460          }
     461        case OpCodes.FactorVariable: {
     462            if (row < 0 || row >= dataset.Rows) return double.NaN;
     463            var factorVarTreeNode = currentInstr.dynamicNode as FactorVariableTreeNode;
     464            return ((IList<string>)currentInstr.data)[row] == factorVarTreeNode.VariableValue ? 1 : 0;
     465          }
    457466        case OpCodes.LagVariable: {
    458467            var laggedVariableTreeNode = (LaggedVariableTreeNode)currentInstr.dynamicNode;
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Interpreter/SymbolicDataAnalysisExpressionTreeLinearInterpreter.cs

    r14185 r14232  
    147147            var variableTreeNode = (VariableTreeNode)instr.dynamicNode;
    148148            instr.value = ((IList<double>)instr.data)[row] * variableTreeNode.Weight;
     149          }
     150        } else if (instr.opCode == OpCodes.FactorVariable) {
     151          if (row < 0 || row >= dataset.Rows) instr.value = double.NaN;
     152          else {
     153            var factorTreeNode = instr.dynamicNode as FactorVariableTreeNode;
     154            instr.value = ((IList<string>)instr.data)[row] == factorTreeNode.VariableValue ? 1 : 0;
    149155          }
    150156        } else if (instr.opCode == OpCodes.LagVariable) {
     
    392398            }
    393399            break;
     400          case OpCodes.FactorVariable: {
     401              var factorVariableTreeNode = instr.dynamicNode as FactorVariableTreeNode;
     402              instr.data = dataset.GetReadOnlyStringValues(factorVariableTreeNode.VariableName);
     403            }
     404            break;
    394405          case OpCodes.LagVariable: {
    395406              var laggedVariableTreeNode = (LaggedVariableTreeNode)instr.dynamicNode;
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/SymbolicDataAnalysisModelComplexityCalculator.cs

    r14185 r14232  
    3939            return 1;
    4040          }
    41         case OpCodes.Variable: {
     41        case OpCodes.Variable:
     42        case OpCodes.FactorVariable: {
    4243            return 2;
    4344          }
    44         case OpCodes.Add: 
     45        case OpCodes.Add:
    4546        case OpCodes.Sub: {
    4647            double complexity = 0;
     
    5051            return complexity;
    5152          }
    52         case OpCodes.Mul: 
     53        case OpCodes.Mul:
    5354        case OpCodes.Div: {
    5455            double complexity = 1;
     
    6061          }
    6162        case OpCodes.Sin:
    62         case OpCodes.Cos: 
     63        case OpCodes.Cos:
    6364        case OpCodes.Tan:
    64         case OpCodes.Exp: 
     65        case OpCodes.Exp:
    6566        case OpCodes.Log: {
    6667            double complexity = CalculateComplexity(node.GetSubtree(0));
     
    7576            return complexity * complexity * complexity;
    7677          }
    77         case OpCodes.Power:         
     78        case OpCodes.Power:
    7879        case OpCodes.Root: {
    7980            double complexity = CalculateComplexity(node.GetSubtree(0));
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/SymbolicDataAnalysisProblem.cs

    r14185 r14232  
    208208
    209209    protected virtual void UpdateGrammar() {
    210       SymbolicExpressionTreeGrammar.MaximumFunctionArguments = MaximumFunctionArguments.Value;
    211       SymbolicExpressionTreeGrammar.MaximumFunctionDefinitions = MaximumFunctionDefinitions.Value;
    212       foreach (var varSymbol in SymbolicExpressionTreeGrammar.Symbols.OfType<HeuristicLab.Problems.DataAnalysis.Symbolic.Variable>()) {
     210      var problemData = ProblemData;
     211      var ds = problemData.Dataset;
     212      var grammar = SymbolicExpressionTreeGrammar;
     213      grammar.MaximumFunctionArguments = MaximumFunctionArguments.Value;
     214      grammar.MaximumFunctionDefinitions = MaximumFunctionDefinitions.Value;
     215      foreach (var varSymbol in grammar.Symbols.OfType<HeuristicLab.Problems.DataAnalysis.Symbolic.Variable>()) {
    213216        if (!varSymbol.Fixed) {
    214           varSymbol.AllVariableNames = ProblemData.InputVariables.Select(x => x.Value);
    215           varSymbol.VariableNames = ProblemData.AllowedInputVariables;
     217          varSymbol.AllVariableNames = problemData.InputVariables.Select(x => x.Value).Where(x => ds.VariableHasType<double>(x));
     218          varSymbol.VariableNames = problemData.AllowedInputVariables.Where(x => ds.VariableHasType<double>(x));
    216219        }
    217220      }
    218       foreach (var varSymbol in SymbolicExpressionTreeGrammar.Symbols.OfType<HeuristicLab.Problems.DataAnalysis.Symbolic.VariableCondition>()) {
     221      foreach (var factorSymbol in grammar.Symbols.OfType<FactorVariable>()) {
     222        if (!factorSymbol.Fixed) {
     223          factorSymbol.AllVariableNames = problemData.InputVariables.Select(x => x.Value).Where(x => ds.VariableHasType<string>(x));
     224          factorSymbol.VariableNames = problemData.AllowedInputVariables.Where(x => ds.VariableHasType<string>(x));
     225          factorSymbol.VariableValues = factorSymbol.VariableNames
     226            .ToDictionary(varName => varName, varName => ds.GetStringValues(varName).Distinct().ToList());
     227        }
     228      }
     229      foreach (var varSymbol in grammar.Symbols.OfType<HeuristicLab.Problems.DataAnalysis.Symbolic.VariableCondition>()) {
    219230        if (!varSymbol.Fixed) {
    220           varSymbol.AllVariableNames = ProblemData.InputVariables.Select(x => x.Value);
    221           varSymbol.VariableNames = ProblemData.AllowedInputVariables;
     231          varSymbol.AllVariableNames = problemData.InputVariables.Select(x => x.Value).Where(x => ds.VariableHasType<double>(x));
     232          varSymbol.VariableNames = problemData.AllowedInputVariables;
    222233        }
    223234      }
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/SymbolicDataAnalysisSolutionImpactValuesCalculator.cs

    r14185 r14232  
    2121
    2222using System.Collections.Generic;
     23using System.Linq;
    2324using HeuristicLab.Common;
    2425using HeuristicLab.Core;
     
    3637    [StorableConstructor]
    3738    protected SymbolicDataAnalysisSolutionImpactValuesCalculator(bool deserializing) : base(deserializing) { }
    38     public abstract double CalculateReplacementValue(ISymbolicDataAnalysisModel model, ISymbolicExpressionTreeNode node, IDataAnalysisProblemData problemData, IEnumerable<int> rows);
    39     public abstract double CalculateImpactValue(ISymbolicDataAnalysisModel model, ISymbolicExpressionTreeNode node, IDataAnalysisProblemData problemData, IEnumerable<int> rows, double qualityForImpactsCalculation = double.NaN);
    4039    public abstract void CalculateImpactAndReplacementValues(ISymbolicDataAnalysisModel model, ISymbolicExpressionTreeNode node, IDataAnalysisProblemData problemData, IEnumerable<int> rows, out double impactValue, out double replacementValue, out double newQualityForImpactsCalculation, double qualityForImpactsCalculation = double.NaN);
    4140
    42     protected static double CalculateReplacementValue(ISymbolicExpressionTreeNode node, ISymbolicExpressionTree sourceTree, ISymbolicDataAnalysisExpressionTreeInterpreter interpreter,
     41    protected IEnumerable<double> CalculateReplacementValues(ISymbolicExpressionTreeNode node, ISymbolicExpressionTree sourceTree, ISymbolicDataAnalysisExpressionTreeInterpreter interpreter,
    4342      IDataset dataset, IEnumerable<int> rows) {
    4443      //optimization: constant nodes return always the same value
    4544      ConstantTreeNode constantNode = node as ConstantTreeNode;
    46       if (constantNode != null) return constantNode.Value;
     45      FactorVariableTreeNode factorNode = node as FactorVariableTreeNode;
     46      if (constantNode != null) {
     47        yield return constantNode.Value;
     48      } else if (factorNode != null) {
     49        // valid replacements are either all off or all on
     50        yield return 0;
     51        yield return 1;
     52      } else {
     53        var rootSymbol = new ProgramRootSymbol().CreateTreeNode();
     54        var startSymbol = new StartSymbol().CreateTreeNode();
     55        rootSymbol.AddSubtree(startSymbol);
     56        startSymbol.AddSubtree((ISymbolicExpressionTreeNode)node.Clone());
    4757
    48       var rootSymbol = new ProgramRootSymbol().CreateTreeNode();
    49       var startSymbol = new StartSymbol().CreateTreeNode();
    50       rootSymbol.AddSubtree(startSymbol);
    51       startSymbol.AddSubtree((ISymbolicExpressionTreeNode)node.Clone());
    52 
    53       var tempTree = new SymbolicExpressionTree(rootSymbol);
    54       // clone ADFs of source tree
    55       for (int i = 1; i < sourceTree.Root.SubtreeCount; i++) {
    56         tempTree.Root.AddSubtree((ISymbolicExpressionTreeNode)sourceTree.Root.GetSubtree(i).Clone());
     58        var tempTree = new SymbolicExpressionTree(rootSymbol);
     59        // clone ADFs of source tree
     60        for (int i = 1; i < sourceTree.Root.SubtreeCount; i++) {
     61          tempTree.Root.AddSubtree((ISymbolicExpressionTreeNode)sourceTree.Root.GetSubtree(i).Clone());
     62        }
     63        yield return interpreter.GetSymbolicExpressionTreeValues(tempTree, dataset, rows).Median();
     64        yield return interpreter.GetSymbolicExpressionTreeValues(tempTree, dataset, rows).Average(); // TODO perf
    5765      }
    58       return interpreter.GetSymbolicExpressionTreeValues(tempTree, dataset, rows).Median();
    5966    }
    6067  }
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis/3.4/Dataset.cs

    r13761 r14232  
    168168      get { return variableValues.Where(p => p.Value is List<double>).Select(p => p.Key); }
    169169    }
     170
     171    public IEnumerable<string> StringVariables {
     172      get { return variableValues.Where(p => p.Value is List<string>).Select(p => p.Key); }
     173    }
     174
    170175    public IEnumerable<double> GetDoubleValues(string variableName) {
    171176      return GetValues<double>(variableName);
     
    189194      return GetValues<double>(variableName, rows);
    190195    }
     196
     197    public string GetStringValue(string variableName, int row) {
     198      var values = GetValues<string>(variableName);
     199      return values[row];
     200    }
     201
     202    public IEnumerable<string> GetStringValues(string variableName, IEnumerable<int> rows) {
     203      return GetValues<string>(variableName, rows);
     204    }
     205    public ReadOnlyCollection<string> GetReadOnlyStringValues(string variableName) {
     206      var values = GetValues<string>(variableName);
     207      return values.AsReadOnly();
     208    }
     209
    191210    private IEnumerable<T> GetValues<T>(string variableName, IEnumerable<int> rows) {
    192211      var values = GetValues<T>(variableName);
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/DataAnalysisProblemData.cs

    r14185 r14232  
    131131    protected DataAnalysisProblemData(IDataset dataset, IEnumerable<string> allowedInputVariables, IEnumerable<ITransformation> transformations = null) {
    132132      if (dataset == null) throw new ArgumentNullException("The dataset must not be null.");
    133       if (allowedInputVariables == null) throw new ArgumentNullException("The allowedInputVariables must not be null.");
    134 
    135       if (allowedInputVariables.Except(dataset.DoubleVariables).Any())
    136         throw new ArgumentException("All allowed input variables must be present in the dataset and of type double.");
    137 
    138       var inputVariables = new CheckedItemList<StringValue>(dataset.DoubleVariables.Select(x => new StringValue(x)));
     133      if (allowedInputVariables == null) throw new ArgumentNullException("The allowed input variables must not be null.");
     134
     135      if (allowedInputVariables.Except(dataset.DoubleVariables).Except(dataset.StringVariables).Any())
     136        throw new ArgumentException("All allowed input variables must be present in the dataset and of type double or string.");
     137
     138      var inputVariables = new CheckedItemList<StringValue>(dataset.DoubleVariables.Concat(dataset.StringVariables).Select(x => new StringValue(x)));
    139139      foreach (StringValue x in inputVariables)
    140140        inputVariables.SetItemCheckedState(x, allowedInputVariables.Contains(x.Value));
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis/3.4/Interfaces/IDataAnalysisProblemData.cs

    r14185 r14232  
    3030
    3131    IDataset Dataset { get; }
    32     ICheckedItemList<StringValue> InputVariables { get; }
     32    ICheckedItemList<StringValue> InputVariables { get; } // TODO: check all usages of InputVariables (distinguish between doubles and strings)
    3333    IEnumerable<string> AllowedInputVariables { get; }
    3434
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis/3.4/Interfaces/IDataset.cs

    r14185 r14232  
    3030    IEnumerable<string> VariableNames { get; }
    3131    IEnumerable<string> DoubleVariables { get; }
     32    IEnumerable<string> StringVariables { get; }
     33
     34    bool VariableHasType<T>(string variableName);
    3235
    3336    double GetDoubleValue(string variableName, int row);
     
    3639    ReadOnlyCollection<double> GetReadOnlyDoubleValues(string variableName);
    3740
     41    string GetStringValue(string variableName, int row);
    3842    IEnumerable<string> GetStringValues(string variableName);
     43    IEnumerable<string> GetStringValues(string variableName, IEnumerable<int> rows);
     44    ReadOnlyCollection<string> GetReadOnlyStringValues(string VariableName);
     45
    3946    IEnumerable<DateTime> GetDateTimeValues(string variableName);
    4047  }
Note: See TracChangeset for help on using the changeset viewer.