Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
08/05/16 17:34:16 (8 years ago)
Author:
gkronber
Message:

#2650:

  • added weight for FactorVariable (necessary for LR)
  • introduced VariableBase and VariableTreeNodeBase and IVariableSymbol
  • support for factors in LR
  • extended variable impacts in solution view
  • fixed ERC view for regression
  • support for FactorVariable in simplifier
  • improved support for FactorVariable in constants optimizer
  • multiple related changes and small fixes
Location:
branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis/3.4
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Regression/RegressionSolutionVariableImpactsCalculator.cs

    r13986 r14238  
    9090    public static IEnumerable<Tuple<string, double>> CalculateImpacts(IRegressionSolution solution,
    9191      DataPartitionEnum data = DataPartitionEnum.Training,
    92       ReplacementMethodEnum replacement = ReplacementMethodEnum.Median) {
     92      ReplacementMethodEnum replacementMethod = ReplacementMethodEnum.Median) {
    9393
    9494      var problemData = solution.ProblemData;
     
    126126      var modifiableDataset = ((Dataset)dataset).ToModifiable();
    127127
    128       foreach (var inputVariable in problemData.AllowedInputVariables) {
    129         var newEstimates = EvaluateModelWithReplacedVariable(solution.Model, inputVariable, modifiableDataset, rows, replacement);
     128      // calculate impacts for double variables
     129      foreach (var inputVariable in problemData.AllowedInputVariables.Where(problemData.Dataset.VariableHasType<double>)) {
     130        var newEstimates = EvaluateModelWithReplacedVariable(solution.Model, inputVariable, modifiableDataset, rows, replacementMethod);
    130131        var newR2 = OnlinePearsonsRCalculator.Calculate(targetValues, newEstimates, out error);
    131132        if (error != OnlineCalculatorError.None) throw new InvalidOperationException("Error during R² calculation with replaced inputs.");
     
    134135        var impact = originalR2 - newR2;
    135136        impacts[inputVariable] = impact;
     137      }
     138      // calculate impacts for factor variables
     139      foreach (var inputVariable in problemData.AllowedInputVariables.Where(problemData.Dataset.VariableHasType<string>)) {
     140        var smallestImpact = double.PositiveInfinity;
     141        foreach (var repl in problemData.Dataset.GetStringValues(inputVariable, rows).Distinct()) {
     142          var newEstimates = EvaluateModelWithReplacedVariable(solution.Model, inputVariable, modifiableDataset, rows, Enumerable.Repeat(repl, dataset.Rows));
     143          var newR2 = OnlinePearsonsRCalculator.Calculate(targetValues, newEstimates, out error);
     144          if (error != OnlineCalculatorError.None) throw new InvalidOperationException("Error during R² calculation with replaced inputs.");
     145
     146          newR2 = newR2 * newR2;
     147          var impact = originalR2 - newR2;
     148          if (impact < smallestImpact) smallestImpact = impact;
     149        }
     150        impacts[inputVariable] = smallestImpact;
    136151      }
    137152      return impacts.OrderByDescending(i => i.Value).Select(i => Tuple.Create(i.Key, i.Value));
     
    169184      }
    170185
    171       dataset.ReplaceVariable(variable, replacementValues);
     186      return EvaluateModelWithReplacedVariable(model, variable, dataset, rows, replacementValues);
     187    }
     188
     189    private static IEnumerable<double> EvaluateModelWithReplacedVariable(IRegressionModel model, string variable, ModifiableDataset dataset, IEnumerable<int> rows, IEnumerable<double> replacementValues) {
     190      var originalValues = dataset.GetReadOnlyDoubleValues(variable).ToList();
     191      dataset.ReplaceVariable(variable, replacementValues.ToList());
    172192      //mkommend: ToList is used on purpose to avoid lazy evaluation that could result in wrong estimates due to variable replacements
    173193      var estimates = model.GetEstimatedValues(dataset, rows).ToList();
     
    176196      return estimates;
    177197    }
     198    private static IEnumerable<double> EvaluateModelWithReplacedVariable(IRegressionModel model, string variable, ModifiableDataset dataset, IEnumerable<int> rows, IEnumerable<string> replacementValues) {
     199      var originalValues = dataset.GetReadOnlyStringValues(variable).ToList();
     200      dataset.ReplaceVariable(variable, replacementValues.ToList());
     201      //mkommend: ToList is used on purpose to avoid lazy evaluation that could result in wrong estimates due to variable replacements
     202      var estimates = model.GetEstimatedValues(dataset, rows).ToList();
     203      dataset.ReplaceVariable(variable, originalValues);
     204
     205      return estimates;
     206    }
    178207  }
    179208}
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis/3.4/ModifiableDataset.cs

    r13761 r14238  
    7777    public void ReplaceVariable(string variableName, IList values) {
    7878      if (!variableValues.ContainsKey(variableName))
    79         throw new ArgumentException(string.Format("Variable {0} is not present in the dataset."), variableName);
     79        throw new ArgumentException(string.Format("Variable {0} is not present in the dataset.", variableName));
    8080      if (values.Count != variableValues[variableName].Count)
    8181        throw new ArgumentException("The number of values must coincide with the number of dataset rows.");
Note: See TracChangeset for help on using the changeset viewer.