Changeset 16020


Ignore:
Timestamp:
07/26/18 13:00:41 (13 months ago)
Author:
fholzing
Message:

#2904: Removed static calculator-variable, Changed efault ReplacementMethod from Median to Shuffle, Adapted Calculation-Method adhering to the OnlineCalculators, Re-Added the condition for counting the input-parameters

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/2904_CalculateImpacts/3.4/Implementation/Regression/RegressionSolutionVariableImpactsCalculator.cs

    r16018 r16020  
    3737  [Item("RegressionSolution Impacts Calculator", "Calculation of the impacts of input variables for any regression solution")]
    3838  public sealed class RegressionSolutionVariableImpactsCalculator : ParameterizedNamedItem {
    39     private static IOnlineCalculator calculator = new OnlinePearsonsRSquaredCalculator();
    40 
    4139    public enum ReplacementMethodEnum {
    4240      Median,
     
    112110    public static IEnumerable<Tuple<string, double>> CalculateImpacts(
    113111      IRegressionSolution solution,
    114       ReplacementMethodEnum replacementMethod = ReplacementMethodEnum.Median,
     112      ReplacementMethodEnum replacementMethod = ReplacementMethodEnum.Shuffle,
    115113      FactorReplacementMethodEnum factorReplacementMethod = FactorReplacementMethodEnum.Best,
    116114      DataPartitionEnum data = DataPartitionEnum.Training,
     
    123121      IRegressionProblemData problemData,
    124122      IEnumerable<double> estimatedValues,
    125       ReplacementMethodEnum replacementMethod = ReplacementMethodEnum.Median,
     123      ReplacementMethodEnum replacementMethod = ReplacementMethodEnum.Shuffle,
    126124      FactorReplacementMethodEnum factorReplacementMethod = FactorReplacementMethodEnum.Best,
    127125      DataPartitionEnum data = DataPartitionEnum.Training,
     
    151149     IEnumerable<double> estimatedValues,
    152150     IEnumerable<int> rows,
    153      ReplacementMethodEnum replacementMethod = ReplacementMethodEnum.Median,
     151     ReplacementMethodEnum replacementMethod = ReplacementMethodEnum.Shuffle,
    154152     FactorReplacementMethodEnum factorReplacementMethod = FactorReplacementMethodEnum.Best,
    155153     Func<double, string, bool> progressCallback = null) {
    156154
    157155      IEnumerable<double> targetValues;
    158       double originalValue = -1;
    159 
    160       PrepareData(rows, problemData, estimatedValues, out targetValues, out originalValue);
     156      double originalCalculatorValue = -1;
     157
     158      PrepareData(rows, problemData, estimatedValues, out targetValues, out originalCalculatorValue);
    161159
    162160      var impacts = new Dictionary<string, double>();
     
    165163
    166164      int curIdx = 0;
    167       int count = allowedInputVariables.Count();
     165      int count = allowedInputVariables.Count(v => problemData.Dataset.VariableHasType<double>(v) || problemData.Dataset.VariableHasType<string>(v));
    168166
    169167      foreach (var inputVariable in allowedInputVariables) {
     
    173171          if (progressCallback((double)curIdx / count, string.Format("Calculating impact for variable {0} ({1} of {2})", inputVariable, curIdx, count))) { return null; }
    174172        }
    175         impacts[inputVariable] = CalculateImpact(inputVariable, model, problemData.Dataset, rows, targetValues, originalValue, replacementMethod, factorReplacementMethod);
     173        impacts[inputVariable] = CalculateImpact(inputVariable, model, problemData.Dataset, rows, targetValues, originalCalculatorValue, replacementMethod, factorReplacementMethod);
    176174      }
    177175
     
    184182      IEnumerable<double> targetValues,
    185183      double originalValue,
    186       ReplacementMethodEnum replacementMethod = ReplacementMethodEnum.Median,
     184      ReplacementMethodEnum replacementMethod = ReplacementMethodEnum.Shuffle,
    187185      FactorReplacementMethodEnum factorReplacementMethod = FactorReplacementMethodEnum.Best,
    188186      DataPartitionEnum data = DataPartitionEnum.Training) {
     
    196194      IEnumerable<double> targetValues,
    197195      double originalValue,
    198       ReplacementMethodEnum replacementMethod = ReplacementMethodEnum.Median,
     196      ReplacementMethodEnum replacementMethod = ReplacementMethodEnum.Shuffle,
    199197      FactorReplacementMethodEnum factorReplacementMethod = FactorReplacementMethodEnum.Best) {
    200198
     
    276274    }
    277275
    278     private static IEnumerable<double> EvaluateModelWithReplacedVariable(IRegressionModel model, string variable, ModifiableDataset dataset, IEnumerable<int> rows, ReplacementMethodEnum replacement = ReplacementMethodEnum.Median) {
     276    private static IEnumerable<double> EvaluateModelWithReplacedVariable(IRegressionModel model, string variable, ModifiableDataset dataset, IEnumerable<int> rows, ReplacementMethodEnum replacement = ReplacementMethodEnum.Shuffle) {
    279277      var originalValues = dataset.GetReadOnlyDoubleValues(variable).ToList();
    280278      double replacementValue;
     
    369367    }
    370368
    371     private static double CalculateValue(IEnumerable<double> targets, IEnumerable<double> estimates, out OnlineCalculatorError error) {
    372       calculator.Reset();
    373 
    374       var targetsEnumerator = targets.GetEnumerator();
    375       var estimatesEnumerator = estimates.GetEnumerator();
    376 
    377       bool targetsHasNextValue = targetsEnumerator.MoveNext();
    378       bool estimatesHasNextValue = estimatesEnumerator.MoveNext();
    379 
    380       while (targetsHasNextValue && estimatesHasNextValue) {
    381         calculator.Add(targetsEnumerator.Current, estimatesEnumerator.Current);
    382         targetsHasNextValue = targetsEnumerator.MoveNext();
    383         estimatesHasNextValue = estimatesEnumerator.MoveNext();
    384       }
    385 
    386       //Check if there is an equal quantity of targets and estimates
    387       if (targetsHasNextValue != estimatesHasNextValue) {
    388         throw new ArgumentException(string.Format("Targets and Estimates must be of equal length ({0},{1})", targets.Count(), estimates.Count()));
    389 
    390       }
    391 
    392       error = calculator.ErrorState;
    393       return calculator.Value;
     369    private static double CalculateValue(IEnumerable<double> originalValues, IEnumerable<double> estimatedValues, out OnlineCalculatorError errorState) {
     370      IEnumerator<double> firstEnumerator = originalValues.GetEnumerator();
     371      IEnumerator<double> secondEnumerator = estimatedValues.GetEnumerator();
     372      var calculator = new OnlinePearsonsRSquaredCalculator();
     373
     374      // always move forward both enumerators (do not use short-circuit evaluation!)
     375      while (firstEnumerator.MoveNext() & secondEnumerator.MoveNext()) {
     376        double original = firstEnumerator.Current;
     377        double estimated = secondEnumerator.Current;
     378        calculator.Add(original, estimated);
     379        if (calculator.ErrorState != OnlineCalculatorError.None) break;
     380      }
     381
     382      // check if both enumerators are at the end to make sure both enumerations have the same length
     383      if (calculator.ErrorState == OnlineCalculatorError.None &&
     384           (secondEnumerator.MoveNext() || firstEnumerator.MoveNext())) {
     385        throw new ArgumentException("Number of elements in first and second enumeration doesn't match.");
     386      } else {
     387        errorState = calculator.ErrorState;
     388        return calculator.Value;
     389      }
    394390    }
    395391  }
Note: See TracChangeset for help on using the changeset viewer.