Changeset 14238


Ignore:
Timestamp:
08/05/16 17:34:16 (3 years ago)
Author:
gkronber
Message:

#2650:

  • added weight for FactorVariable (necessary for LR)
  • introduced VariableBase and VariableTreeNodeBase and IVariableSymbol
  • support for factors in LR
  • extended variable impacts in solution view
  • fixed ERC view for regression
  • support for FactorVariable in simplifier
  • improved support for FactorVariable in constants optimizer
  • multiple related changes and small fixes
Location:
branches/symbreg-factors-2650
Files:
1 added
20 edited

Legend:

Unmodified
Added
Removed
  • branches/symbreg-factors-2650/HeuristicLab.Algorithms.DataAnalysis/3.4/Nca/ModelCreation/NcaModelCreator.cs

    r14237 r14238  
    2020#endregion
    2121
    22 using System;
    2322using System.Linq;
    2423using HeuristicLab.Common;
  • branches/symbreg-factors-2650/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourModel.cs

    r14237 r14238  
    104104      this.allowedInputVariables = allowedInputVariables.ToArray();
    105105
    106       // check input variables. Only double variables are allowed.
    107       var invalidInputs =
    108         allowedInputVariables.Where(name => !dataset.VariableHasType<double>(name));
    109       if (invalidInputs.Any())
    110         throw new NotSupportedException("Gradient tree boosting only supports real-valued variables. Unsupported inputs: " + string.Join(", ", invalidInputs));
    111 
    112 
    113106      var inputMatrix = AlglibUtil.PrepareInputMatrix(dataset,
    114107                                   allowedInputVariables.Concat(new string[] { targetVariable }),
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression.Views/3.4/SymbolicRegressionSolutionErrorCharacteristicsCurveView.cs

    r14185 r14238  
    2222using System;
    2323using System.Collections.Generic;
     24using System.Diagnostics.Contracts;
    2425using System.Linq;
    2526using HeuristicLab.Algorithms.DataAnalysis;
     
    4647      if (!problemData.TrainingIndices.Any()) return null; // don't create an LR model if the problem does not have a training set (e.g. loaded into an existing model)
    4748
    48       //clear checked inputVariables
    49       foreach (var inputVariable in problemData.InputVariables.CheckedItems) {
    50         problemData.InputVariables.SetItemCheckedState(inputVariable.Value, false);
    51       }
     49      var usedDoubleVariables =
     50        Content.Model.SymbolicExpressionTree.IterateNodesPostfix()
     51        .OfType<VariableTreeNode>()
     52        .Select(node => node.VariableName)
     53      .Concat(
     54        Content.Model.SymbolicExpressionTree.IterateNodesPostfix()
     55        .OfType<VariableConditionTreeNode>()
     56        .Select(node => node.VariableName)
     57        )
     58      .Distinct();
    5259
    53       //check inputVariables used in the symbolic regression model
    54       var usedVariables =
    55         Content.Model.SymbolicExpressionTree.IterateNodesPostfix().OfType<VariableTreeNode>().Select(
    56           node => node.VariableName).Distinct();
    57       foreach (var variable in usedVariables) {
    58         problemData.InputVariables.SetItemCheckedState(
    59           problemData.InputVariables.First(x => x.Value == variable), true);
    60       }
     60      var usedFactorVariables =
     61        Content.Model.SymbolicExpressionTree.IterateNodesPostfix()
     62        .OfType<FactorVariableTreeNode>()
     63        .Select(node => Tuple.Create(node.VariableName, node.VariableValue))
     64        .Distinct();
    6165
    62       var solution = LinearRegression.CreateLinearRegressionSolution(problemData, out rmse, out cvRmsError);
     66      // create a new problem and dataset
     67      var variableNames =
     68        usedDoubleVariables
     69        .Concat(usedFactorVariables.Select(t => t.Item1 + "=" + t.Item2))
     70        .Concat(new string[] { problemData.TargetVariable })
     71        .ToArray();
     72      var variableValues =
     73        usedDoubleVariables.Select(name => problemData.Dataset.GetDoubleValues(name).ToList())
     74        .Concat(
     75        // create binary variable
     76          usedFactorVariables.Select(t => problemData.Dataset.GetReadOnlyStringValues(t.Item1).Select(val => val == t.Item2 ? 1.0 : 0.0).ToList())
     77        )
     78        .Concat(new[] { problemData.Dataset.GetDoubleValues(problemData.TargetVariable).ToList() });
     79
     80      var newDs = new Dataset(variableNames, variableValues);
     81      var newProblemData = new RegressionProblemData(newDs, variableNames.Take(variableNames.Length - 1), variableNames.Last());
     82      newProblemData.TrainingPartition.Start = problemData.TrainingPartition.Start;
     83      newProblemData.TrainingPartition.End = problemData.TrainingPartition.End;
     84      newProblemData.TestPartition.Start = problemData.TestPartition.Start;
     85      newProblemData.TestPartition.End = problemData.TestPartition.End;
     86
     87      var solution = LinearRegression.CreateLinearRegressionSolution(newProblemData, out rmse, out cvRmsError);
    6388      solution.Name = "Baseline (linear subset)";
    6489      return solution;
     
    6893    protected override IEnumerable<IRegressionSolution> CreateBaselineSolutions() {
    6994      foreach (var sol in base.CreateBaselineSolutions()) yield return sol;
     95
     96      // does not support lagged variables
     97      if (Content.Model.SymbolicExpressionTree.IterateNodesPrefix().OfType<LaggedVariableTreeNode>().Any()) yield break;
     98
    7099      yield return CreateLinearRegressionSolution();
    71100    }
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/MultiObjective/PearsonRSquaredNumberOfVariablesEvaluator.cs

    r14185 r14238  
    6666      if (decimalPlaces >= 0)
    6767        r2 = Math.Round(r2, decimalPlaces);
    68       return new double[2] { r2, solution.IterateNodesPostfix().OfType<VariableTreeNode>().Count() }; // count the number of variables
     68      return new double[2] { r2, solution.IterateNodesPostfix().OfType<IVariableTreeNode>().Count() }; // count the number of variables
    6969    }
    7070
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic.Views/3.4/InteractiveSymbolicExpressionTreeChart.cs

    r14237 r14238  
    200200      // check if the copied/cut node (stored in the tempNode) can be inserted as a child of the current selected node
    201201      var node = currSelected.Content;
    202       if (node is ConstantTreeNode || node is VariableTreeNodeBase) return;
     202      if (node is ConstantTreeNode || node is VariableTreeNode) return;
    203203      // check if the currently selected node can accept the copied node as a child
    204204      // no need to check the grammar, an arity check will do just fine here
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic.Views/3.4/Symbols/VariableView.cs

    r14185 r14238  
    3535  [View("Variable View")]
    3636  [Content(typeof(Variable), true)]
     37  [Content(typeof(FactorVariable), true)]
    3738  public partial class VariableView : SymbolView {
    3839    private CheckedItemCollectionView<StringValue> variableNamesView;
    3940
    40     public new Variable Content {
    41       get { return (Variable)base.Content; }
     41    public new VariableBase Content {
     42      get { return (VariableBase)base.Content; }
    4243      set { base.Content = value; }
    4344    }
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic.Views/3.4/TreeEditDialogs/SymbolicExpressionTreeNodeInsertDialog.cs

    r14185 r14238  
    5555        constantValueLabel.Visible = true;
    5656        constantValueTextBox.Visible = true;
    57       } else if (symbol is Variable) {
    58         var variable = (Variable)symbol;
     57      } else if (symbol is VariableBase) {
     58        var variable = (VariableBase)symbol;
    5959        foreach (var name in variable.VariableNames) variableNamesCombo.Items.Add(name);
    6060        variableNamesCombo.SelectedIndex = 0;
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Analyzers/SymbolicDataAnalysisVariableFrequencyAnalyzer.cs

    r14232 r14238  
    167167      if (aggregateLaggedVariables) {
    168168        tree.Root.ForEachNodePrefix(node => {
    169           if (node.Symbol is Variable) {
    170             var varNode = node as VariableTreeNode;
    171             IncReferenceCount(references, varNode.VariableName);
    172           } else if (node.Symbol is VariableCondition) {
    173             var varCondNode = node as VariableConditionTreeNode;
    174             IncReferenceCount(references, varCondNode.VariableName);
    175           } else if (node.Symbol is FactorVariable) {
     169          if (node is IVariableTreeNode) {
    176170            var factorNode = node as FactorVariableTreeNode;
    177             if (aggregateFactorVariables) {
    178               IncReferenceCount(references, factorNode.VariableName);
     171            if (factorNode != null && !aggregateFactorVariables) {
     172              IncReferenceCount(references, factorNode.VariableName + "=" + factorNode.VariableValue);
    179173            } else {
    180               IncReferenceCount(references, factorNode.ToString());
     174              var varNode = node as IVariableTreeNode;
     175              IncReferenceCount(references, varNode.VariableName);
    181176            }
    182177          }
     
    189184
    190185    private static void GetVariableReferences(Dictionary<string, int> references, ISymbolicExpressionTreeNode node, int currentLag, bool aggregateFactorVariables) {
    191       if (node.Symbol is LaggedVariable) {
    192         var laggedVarNode = node as LaggedVariableTreeNode;
    193         IncReferenceCount(references, laggedVarNode.VariableName, currentLag + laggedVarNode.Lag);
    194       } else if (node.Symbol is Variable) {
    195         var varNode = node as VariableTreeNode;
    196         IncReferenceCount(references, varNode.VariableName, currentLag);
    197       } else if (node.Symbol is FactorVariable) {
    198         var factorNode = node as FactorVariableTreeNode;
    199         if (aggregateFactorVariables) {
    200           IncReferenceCount(references, factorNode.VariableName, currentLag);
     186      if (node is IVariableTreeNode) {
     187        var laggedVarTreeNode = node as LaggedVariableTreeNode;
     188        var factorVarTreeNode = node as FactorVariableTreeNode;
     189        var varConditionTreeNode = node as VariableConditionTreeNode;
     190        if (laggedVarTreeNode != null) {
     191          IncReferenceCount(references, laggedVarTreeNode.VariableName, currentLag + laggedVarTreeNode.Lag);
     192        } else if (factorVarTreeNode != null) {
     193          if (aggregateFactorVariables) {
     194            IncReferenceCount(references, factorVarTreeNode.VariableName, currentLag);
     195          } else {
     196            IncReferenceCount(references, factorVarTreeNode.VariableName + "=" + factorVarTreeNode.VariableValue, currentLag);
     197          }
     198        } else if (varConditionTreeNode != null) {
     199          IncReferenceCount(references, varConditionTreeNode.VariableName, currentLag);
     200          GetVariableReferences(references, node.GetSubtree(0), currentLag, aggregateFactorVariables);
     201          GetVariableReferences(references, node.GetSubtree(1), currentLag, aggregateFactorVariables);
    201202        } else {
    202           IncReferenceCount(references, factorNode.ToString(), currentLag);
    203         }
    204       } else if (node.Symbol is VariableCondition) {
    205         var varCondNode = node as VariableConditionTreeNode;
    206         IncReferenceCount(references, varCondNode.VariableName, currentLag);
    207         GetVariableReferences(references, node.GetSubtree(0), currentLag, aggregateFactorVariables);
    208         GetVariableReferences(references, node.GetSubtree(1), currentLag, aggregateFactorVariables);
     203          var varNode = node as IVariableTreeNode;
     204          IncReferenceCount(references, varNode.VariableName, currentLag);
     205        }
    209206      } else if (node.Symbol is Integral) {
    210207        var laggedNode = node as LaggedTreeNode;
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/HeuristicLab.Problems.DataAnalysis.Symbolic-3.4.csproj

    r14237 r14238  
    140140    <Compile Include="Importer\Token.cs" />
    141141    <Compile Include="Interfaces\IModelBacktransformator.cs" />
     142    <Compile Include="Interfaces\IVariableSymbol.cs" />
    142143    <Compile Include="Interpreter\SymbolicDataAnalysisExpressionCompiledTreeInterpreter.cs" />
    143144    <Compile Include="SymbolicDataAnalysisExpressionTreeSimplificationOperator.cs" />
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/SymbolicDataAnalysisExpressionTreeSimplifier.cs

    r14237 r14238  
    182182      return node.Symbol is Variable;
    183183    }
    184 
     184    private bool IsVariableBase(ISymbolicExpressionTreeNode node) {
     185      return node.Symbol is VariableBase;
     186    }
    185187    private bool IsConstant(ISymbolicExpressionTreeNode node) {
    186188      return node.Symbol is Constant;
     
    203205    /// <returns></returns>
    204206    public ISymbolicExpressionTreeNode GetSimplifiedTree(ISymbolicExpressionTreeNode original) {
    205       if (IsConstant(original) || IsVariable(original)) {
     207      if (IsConstant(original) || IsVariableBase(original)) {
    206208        return (ISymbolicExpressionTreeNode)original.Clone();
    207209      } else if (IsAddition(original)) {
     
    718720      } if (IsConstant(a) && !((ConstantTreeNode)a).Value.IsAlmost(1.0)) {
    719721        return MakeFraction(MakeConstant(1.0), MakeProduct(b, Invert(a)));
    720       } else if (IsVariable(a) && IsConstant(b)) {
     722      } else if (IsVariableBase(a) && IsConstant(b)) {
    721723        // merge constant values into variable weights
    722724        var constB = ((ConstantTreeNode)b).Value;
    723         ((VariableTreeNode)a).Weight /= constB;
     725        ((VariableTreeNodeBase)a).Weight /= constB;
    724726        return a;
    725       } else if (IsVariable(a) && IsVariable(b) && AreSameVariable(a, b)) {
     727      } else if (IsVariableBase(a) && IsVariableBase(b) && AreSameVariable(a, b)) {
    726728        // cancel variables
    727729        var aVar = a as VariableTreeNode;
     
    835837      var groupedVarNodes = from node in subtrees.OfType<VariableTreeNodeBase>()
    836838                            let lag = (node is LaggedVariableTreeNode) ? ((LaggedVariableTreeNode)node).Lag : 0
    837                             group node by node.VariableName + lag into g
     839                            let cat = (node is FactorVariableTreeNode) ? ((FactorVariableTreeNode)node).VariableValue : string.Empty
     840                            group node by node.VariableName + cat + lag into g
    838841                            select g;
    839842      var unchangedSubtrees = subtrees.Where(t => !(t is VariableTreeNodeBase));
     
    861864        // $ * 1.0 => $
    862865        return a;
    863       } else if (IsConstant(b) && IsVariable(a)) {
     866      } else if (IsConstant(b) && IsVariableBase(a)) {
    864867        // multiply constants into variables weights
    865         ((VariableTreeNode)a).Weight *= ((ConstantTreeNode)b).Value;
     868        ((VariableTreeNodeBase)a).Weight *= ((ConstantTreeNode)b).Value;
    866869        return a;
    867870      } else if (IsConstant(b) && IsAddition(a)) {
     
    944947        return aVar.VariableName == bVar.VariableName;
    945948      }
     949      var aFactor = a as FactorVariableTreeNode;
     950      var bFactor = b as FactorVariableTreeNode;
     951      if (aFactor != null && bFactor != null) {
     952        return aFactor.VariableName == bFactor.VariableName &&
     953          aFactor.VariableValue == bFactor.VariableValue;
     954      }
     955
    946956      return false;
    947957    }
     
    951961      var subtrees = new List<ISymbolicExpressionTreeNode>(prod.Subtrees);
    952962      while (prod.Subtrees.Any()) prod.RemoveSubtree(0);
    953       var groupedVarNodes = from node in subtrees.OfType<VariableTreeNode>()
     963      var groupedVarNodes = from node in subtrees.OfType<VariableTreeNodeBase>()
    954964                            let lag = (node is LaggedVariableTreeNode) ? ((LaggedVariableTreeNode)node).Lag : 0
    955965                            group node by node.VariableName + lag into g
    956966                            orderby g.Count()
    957967                            select g;
    958       var constantProduct = (from node in subtrees.OfType<VariableTreeNode>()
     968      var constantProduct = (from node in subtrees.OfType<VariableTreeNodeBase>()
    959969                             select node.Weight)
    960970                            .Concat(from node in subtrees.OfType<ConstantTreeNode>()
     
    964974
    965975      var unchangedSubtrees = from tree in subtrees
    966                               where !(tree is VariableTreeNode)
     976                              where !(tree is VariableTreeNodeBase)
    967977                              where !(tree is ConstantTreeNode)
    968978                              select tree;
     
    10001010      if (IsConstant(x)) {
    10011011        ((ConstantTreeNode)x).Value *= -1;
    1002       } else if (IsVariable(x)) {
    1003         var variableTree = (VariableTreeNode)x;
     1012      } else if (IsVariableBase(x)) {
     1013        var variableTree = (VariableTreeNodeBase)x;
    10041014        variableTree.Weight *= -1.0;
    10051015      } else if (IsAddition(x)) {
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/SymbolicDataAnalysisModel.cs

    r14185 r14238  
    6363        var variables =
    6464          SymbolicExpressionTree.IterateNodesPrefix()
    65             .OfType<VariableTreeNode>()
     65            .OfType<IVariableTreeNode>()
    6666            .Select(x => x.VariableName)
    6767            .Distinct();
    68         var variableConditions = SymbolicExpressionTree.IterateNodesPrefix()
    69           .OfType<VariableConditionTreeNode>().Select(x => x.VariableName).Distinct();
    7068
    71         return variables.Union(variableConditions).OrderBy(x => x);
     69        return variables.OrderBy(x => x);
    7270      }
    7371    }
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/SymbolicDataAnalysisProblem.cs

    r14232 r14238  
    213213      grammar.MaximumFunctionArguments = MaximumFunctionArguments.Value;
    214214      grammar.MaximumFunctionDefinitions = MaximumFunctionDefinitions.Value;
    215       foreach (var varSymbol in grammar.Symbols.OfType<HeuristicLab.Problems.DataAnalysis.Symbolic.Variable>()) {
     215      foreach (var varSymbol in grammar.Symbols.OfType<HeuristicLab.Problems.DataAnalysis.Symbolic.VariableBase>()) {
    216216        if (!varSymbol.Fixed) {
    217217          varSymbol.AllVariableNames = problemData.InputVariables.Select(x => x.Value).Where(x => ds.VariableHasType<double>(x));
     
    225225          factorSymbol.VariableValues = factorSymbol.VariableNames
    226226            .ToDictionary(varName => varName, varName => ds.GetStringValues(varName).Distinct().ToList());
    227         }
    228       }
    229       foreach (var varSymbol in grammar.Symbols.OfType<HeuristicLab.Problems.DataAnalysis.Symbolic.VariableCondition>()) {
    230         if (!varSymbol.Fixed) {
    231           varSymbol.AllVariableNames = problemData.InputVariables.Select(x => x.Value).Where(x => ds.VariableHasType<double>(x));
    232           varSymbol.VariableNames = problemData.AllowedInputVariables;
    233227        }
    234228      }
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Symbols/Variable.cs

    r14237 r14238  
    3939    }
    4040    public Variable() : base("Variable", "Represents a variable value.") { }
     41    public Variable(string name, string description) : base(name, description) { }
    4142
    4243    public override ISymbolicExpressionTreeNode CreateTreeNode() {
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Symbols/VariableBase.cs

    r14237 r14238  
    2727namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
    2828  [StorableClass]
    29   public abstract class VariableBase : Symbol {
     29  public abstract class VariableBase : Symbol, IVariableSymbol {
    3030    #region Properties
    3131    [Storable]
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Symbols/VariableCondition.cs

    r14185 r14238  
    3030  [StorableClass]
    3131  [Item("Variable Condition", "Represents a condition that tests a given variable against a specified threshold.")]
    32   public sealed class VariableCondition : Symbol {
     32  public sealed class VariableCondition : Symbol, IVariableSymbol {
    3333    #region properties
    3434    [Storable]
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Symbols/VariableConditionTreeNode.cs

    r14185 r14238  
    2929namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
    3030  [StorableClass]
    31   public sealed class VariableConditionTreeNode : SymbolicExpressionTreeNode {
     31  public sealed class VariableConditionTreeNode : SymbolicExpressionTreeNode, IVariableTreeNode {
    3232    #region properties
    3333    public new VariableCondition Symbol {
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Symbols/VariableTreeNodeBase.cs

    r14237 r14238  
    2727namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
    2828  [StorableClass]
    29   public abstract class VariableTreeNodeBase : SymbolicExpressionTreeTerminalNode {
     29  public abstract class VariableTreeNodeBase : SymbolicExpressionTreeTerminalNode, IVariableTreeNode {
    3030    public new VariableBase Symbol {
    3131      get { return (VariableBase)base.Symbol; }
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Regression/RegressionSolutionVariableImpactsCalculator.cs

    r13986 r14238  
    9090    public static IEnumerable<Tuple<string, double>> CalculateImpacts(IRegressionSolution solution,
    9191      DataPartitionEnum data = DataPartitionEnum.Training,
    92       ReplacementMethodEnum replacement = ReplacementMethodEnum.Median) {
     92      ReplacementMethodEnum replacementMethod = ReplacementMethodEnum.Median) {
    9393
    9494      var problemData = solution.ProblemData;
     
    126126      var modifiableDataset = ((Dataset)dataset).ToModifiable();
    127127
    128       foreach (var inputVariable in problemData.AllowedInputVariables) {
    129         var newEstimates = EvaluateModelWithReplacedVariable(solution.Model, inputVariable, modifiableDataset, rows, replacement);
     128      // calculate impacts for double variables
     129      foreach (var inputVariable in problemData.AllowedInputVariables.Where(problemData.Dataset.VariableHasType<double>)) {
     130        var newEstimates = EvaluateModelWithReplacedVariable(solution.Model, inputVariable, modifiableDataset, rows, replacementMethod);
    130131        var newR2 = OnlinePearsonsRCalculator.Calculate(targetValues, newEstimates, out error);
    131132        if (error != OnlineCalculatorError.None) throw new InvalidOperationException("Error during R² calculation with replaced inputs.");
     
    134135        var impact = originalR2 - newR2;
    135136        impacts[inputVariable] = impact;
     137      }
     138      // calculate impacts for factor variables
     139      foreach (var inputVariable in problemData.AllowedInputVariables.Where(problemData.Dataset.VariableHasType<string>)) {
     140        var smallestImpact = double.PositiveInfinity;
     141        foreach (var repl in problemData.Dataset.GetStringValues(inputVariable, rows).Distinct()) {
     142          var newEstimates = EvaluateModelWithReplacedVariable(solution.Model, inputVariable, modifiableDataset, rows, Enumerable.Repeat(repl, dataset.Rows));
     143          var newR2 = OnlinePearsonsRCalculator.Calculate(targetValues, newEstimates, out error);
     144          if (error != OnlineCalculatorError.None) throw new InvalidOperationException("Error during R² calculation with replaced inputs.");
     145
     146          newR2 = newR2 * newR2;
     147          var impact = originalR2 - newR2;
     148          if (impact < smallestImpact) smallestImpact = impact;
     149        }
     150        impacts[inputVariable] = smallestImpact;
    136151      }
    137152      return impacts.OrderByDescending(i => i.Value).Select(i => Tuple.Create(i.Key, i.Value));
     
    169184      }
    170185
    171       dataset.ReplaceVariable(variable, replacementValues);
     186      return EvaluateModelWithReplacedVariable(model, variable, dataset, rows, replacementValues);
     187    }
     188
     189    private static IEnumerable<double> EvaluateModelWithReplacedVariable(IRegressionModel model, string variable, ModifiableDataset dataset, IEnumerable<int> rows, IEnumerable<double> replacementValues) {
     190      var originalValues = dataset.GetReadOnlyDoubleValues(variable).ToList();
     191      dataset.ReplaceVariable(variable, replacementValues.ToList());
    172192      //mkommend: ToList is used on purpose to avoid lazy evaluation that could result in wrong estimates due to variable replacements
    173193      var estimates = model.GetEstimatedValues(dataset, rows).ToList();
     
    176196      return estimates;
    177197    }
     198    private static IEnumerable<double> EvaluateModelWithReplacedVariable(IRegressionModel model, string variable, ModifiableDataset dataset, IEnumerable<int> rows, IEnumerable<string> replacementValues) {
     199      var originalValues = dataset.GetReadOnlyStringValues(variable).ToList();
     200      dataset.ReplaceVariable(variable, replacementValues.ToList());
     201      //mkommend: ToList is used on purpose to avoid lazy evaluation that could result in wrong estimates due to variable replacements
     202      var estimates = model.GetEstimatedValues(dataset, rows).ToList();
     203      dataset.ReplaceVariable(variable, originalValues);
     204
     205      return estimates;
     206    }
    178207  }
    179208}
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis/3.4/ModifiableDataset.cs

    r13761 r14238  
    7777    public void ReplaceVariable(string variableName, IList values) {
    7878      if (!variableValues.ContainsKey(variableName))
    79         throw new ArgumentException(string.Format("Variable {0} is not present in the dataset."), variableName);
     79        throw new ArgumentException(string.Format("Variable {0} is not present in the dataset.", variableName));
    8080      if (values.Count != variableValues[variableName].Count)
    8181        throw new ArgumentException("The number of values must coincide with the number of dataset rows.");
  • branches/symbreg-factors-2650/HeuristicLab.Problems.GrammaticalEvolution/3.4/SymbolicRegression/GESymbolicDataAnalysisProblem.cs

    r14185 r14238  
    177177      ApplyLinearScalingParameter.Hidden = true;
    178178
     179      if(problemData.AllowedInputVariables.Any(name => !problemData.Dataset.VariableHasType<double>(name)))  throw new NotSupportedException("Categorical variables are not supported");
    179180      SymbolicExpressionTreeGrammar = new GESymbolicExpressionGrammar(problemData.AllowedInputVariables, problemData.AllowedInputVariables.Count() * 3);
    180181      SymbolicExpressionTreeInterpreter = new SymbolicDataAnalysisExpressionTreeLinearInterpreter();
Note: See TracChangeset for help on using the changeset viewer.