Changeset 16031


Ignore:
Timestamp:
07/30/18 13:07:55 (13 months ago)
Author:
fholzing
Message:

#2904: Changed formatting (adhering to the HL-standard) and renamed variables/methods for better comprehensibility

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/2904_CalculateImpacts/3.4/Implementation/Regression/RegressionSolutionVariableImpactsCalculator.cs

    r16020 r16031  
    5858    private const string DataPartitionParameterName = "DataPartition";
    5959
    60     public IFixedValueParameter<EnumValue<ReplacementMethodEnum>> ReplacementParameter
    61     {
     60    public IFixedValueParameter<EnumValue<ReplacementMethodEnum>> ReplacementParameter {
    6261      get { return (IFixedValueParameter<EnumValue<ReplacementMethodEnum>>)Parameters[ReplacementParameterName]; }
    6362    }
    64     public IFixedValueParameter<EnumValue<FactorReplacementMethodEnum>> FactorReplacementParameter
    65     {
     63    public IFixedValueParameter<EnumValue<FactorReplacementMethodEnum>> FactorReplacementParameter {
    6664      get { return (IFixedValueParameter<EnumValue<FactorReplacementMethodEnum>>)Parameters[FactorReplacementParameterName]; }
    6765    }
    68     public IFixedValueParameter<EnumValue<DataPartitionEnum>> DataPartitionParameter
    69     {
     66    public IFixedValueParameter<EnumValue<DataPartitionEnum>> DataPartitionParameter {
    7067      get { return (IFixedValueParameter<EnumValue<DataPartitionEnum>>)Parameters[DataPartitionParameterName]; }
    7168    }
    7269
    73     public ReplacementMethodEnum ReplacementMethod
    74     {
     70    public ReplacementMethodEnum ReplacementMethod {
    7571      get { return ReplacementParameter.Value.Value; }
    7672      set { ReplacementParameter.Value.Value = value; }
    7773    }
    78     public FactorReplacementMethodEnum FactorReplacementMethod
    79     {
     74    public FactorReplacementMethodEnum FactorReplacementMethod {
    8075      get { return FactorReplacementParameter.Value.Value; }
    8176      set { FactorReplacementParameter.Value.Value = value; }
    8277    }
    83     public DataPartitionEnum DataPartition
    84     {
     78    public DataPartitionEnum DataPartition {
    8579      get { return DataPartitionParameter.Value.Value; }
    8680      set { DataPartitionParameter.Value.Value = value; }
     
    152146     FactorReplacementMethodEnum factorReplacementMethod = FactorReplacementMethodEnum.Best,
    153147     Func<double, string, bool> progressCallback = null) {
    154 
    155       IEnumerable<double> targetValues;
    156       double originalCalculatorValue = -1;
    157 
    158       PrepareData(rows, problemData, estimatedValues, out targetValues, out originalCalculatorValue);
     148      //Calculate original quality-values (via calculator, default is R²)
     149      OnlineCalculatorError error;
     150      IEnumerable<double> targetValuesPartition = rows.Select(v => problemData.TargetVariableValues.ElementAt(v));
     151      IEnumerable<double> estimatedValuesPartition = rows.Select(v => estimatedValues.ElementAt(v));
     152      var originalCalculatorValue = CalculateVariableImpact(targetValuesPartition, estimatedValuesPartition, out error);
     153      if (error != OnlineCalculatorError.None) throw new InvalidOperationException("Error during calculation.");
    159154
    160155      var impacts = new Dictionary<string, double>();
     
    171166          if (progressCallback((double)curIdx / count, string.Format("Calculating impact for variable {0} ({1} of {2})", inputVariable, curIdx, count))) { return null; }
    172167        }
    173         impacts[inputVariable] = CalculateImpact(inputVariable, model, problemData.Dataset, rows, targetValues, originalCalculatorValue, replacementMethod, factorReplacementMethod);
     168        impacts[inputVariable] = CalculateImpact(inputVariable, model, problemData.Dataset, rows, targetValuesPartition, originalCalculatorValue, replacementMethod, factorReplacementMethod);
    174169      }
    175170
     
    202197      // calculate impacts for double variables
    203198      if (dataset.VariableHasType<double>(variableName)) {
    204         impact = CalculateImpactForDouble(variableName, model, modifiableDataset, rows, targetValues, originalValue, replacementMethod);
     199        impact = CalculateImpactForNumericalVariables(variableName, model, modifiableDataset, rows, targetValues, originalValue, replacementMethod);
    205200      } else if (dataset.VariableHasType<string>(variableName)) {
    206         impact = CalculateImpactForString(variableName, model, dataset, modifiableDataset, rows, targetValues, originalValue, factorReplacementMethod);
     201        impact = CalculateImpactForFactorVariables(variableName, model, dataset, modifiableDataset, rows, targetValues, originalValue, factorReplacementMethod);
    207202      } else {
    208203        throw new NotSupportedException("Variable not supported");
     
    221216      targetValues = rows.Select(v => targetVariableValueList.ElementAt(v));
    222217      var estimatedValuesPartition = rows.Select(v => estimatedValues.ElementAt(v));
    223       originalValue = CalculateValue(targetValues, estimatedValuesPartition, out error);
     218      originalValue = CalculateVariableImpact(targetValues, estimatedValuesPartition, out error);
    224219
    225220      if (error != OnlineCalculatorError.None) throw new InvalidOperationException("Error during calculation.");
    226221    }
    227222
    228     private static double CalculateImpactForDouble(string variableName,
     223    private static double CalculateImpactForNumericalVariables(string variableName,
    229224      IRegressionModel model,
    230225      ModifiableDataset modifiableDataset,
     
    234229      ReplacementMethodEnum replacementMethod) {
    235230      OnlineCalculatorError error;
    236       var newEstimates = EvaluateModelWithReplacedVariable(model, variableName, modifiableDataset, rows, replacementMethod);
    237       var newValue = CalculateValue(targetValues, newEstimates, out error);
     231      var newEstimates = GetReplacedValuesForNumericalVariables(model, variableName, modifiableDataset, rows, replacementMethod);
     232      var newValue = CalculateVariableImpact(targetValues, newEstimates, out error);
    238233      if (error != OnlineCalculatorError.None) { throw new InvalidOperationException("Error during calculation with replaced inputs."); }
    239234      return originalValue - newValue;
    240235    }
    241236
    242     private static double CalculateImpactForString(string variableName,
     237    private static double CalculateImpactForFactorVariables(string variableName,
    243238      IRegressionModel model,
    244239      IDataset problemData,
     
    255250        foreach (var repl in problemData.GetStringValues(variableName, rows).Distinct()) {
    256251          var originalValues = modifiableDataset.GetReadOnlyStringValues(variableName).ToList();
    257           var newEstimates = EvaluateModelWithReplacedVariable(originalValues, model, variableName, modifiableDataset, rows, Enumerable.Repeat(repl, problemData.Rows).ToList());
    258           var newValue = CalculateValue(targetValues, newEstimates, out error);
     252          var newEstimates = GetReplacedValues(originalValues, model, variableName, modifiableDataset, rows, Enumerable.Repeat(repl, problemData.Rows).ToList());
     253          var newValue = CalculateVariableImpact(targetValues, newEstimates, out error);
    259254          if (error != OnlineCalculatorError.None) throw new InvalidOperationException("Error during calculation with replaced inputs.");
    260255
     
    266261        // for replacement methods shuffle and mode
    267262        // calculate impacts for factor variables
    268         var newEstimates = EvaluateModelWithReplacedVariable(model, variableName, modifiableDataset, rows, factorReplacementMethod);
    269         var newValue = CalculateValue(targetValues, newEstimates, out error);
     263        var newEstimates = GetReplacedValuesForFactorVariables(model, variableName, modifiableDataset, rows, factorReplacementMethod);
     264        var newValue = CalculateVariableImpact(targetValues, newEstimates, out error);
    270265        if (error != OnlineCalculatorError.None) throw new InvalidOperationException("Error during calculation with replaced inputs.");
    271266
     
    274269    }
    275270
    276     private static IEnumerable<double> EvaluateModelWithReplacedVariable(IRegressionModel model, string variable, ModifiableDataset dataset, IEnumerable<int> rows, ReplacementMethodEnum replacement = ReplacementMethodEnum.Shuffle) {
     271    private static IEnumerable<double> GetReplacedValuesForNumericalVariables(
     272      IRegressionModel model,
     273      string variable,
     274      ModifiableDataset dataset,
     275      IEnumerable<int> rows,
     276      ReplacementMethodEnum replacement = ReplacementMethodEnum.Shuffle) {
    277277      var originalValues = dataset.GetReadOnlyDoubleValues(variable).ToList();
    278278      double replacementValue;
     
    318318      }
    319319
    320       return EvaluateModelWithReplacedVariable(originalValues, model, variable, dataset, rows, replacementValues);
    321     }
    322 
    323     private static IEnumerable<double> EvaluateModelWithReplacedVariable(
    324       IRegressionModel model, string variable, ModifiableDataset dataset,
    325       IEnumerable<int> rows,
    326       FactorReplacementMethodEnum replacement = FactorReplacementMethodEnum.Best) {
     320      return GetReplacedValues(originalValues, model, variable, dataset, rows, replacementValues);
     321    }
     322
     323    private static IEnumerable<double> GetReplacedValuesForFactorVariables(
     324      IRegressionModel model,
     325      string variable,
     326      ModifiableDataset dataset,
     327      IEnumerable<int> rows,
     328      FactorReplacementMethodEnum replacement = FactorReplacementMethodEnum.Shuffle) {
    327329      var originalValues = dataset.GetReadOnlyStringValues(variable).ToList();
    328330      List<string> replacementValues;
     
    354356      }
    355357
    356       return EvaluateModelWithReplacedVariable(originalValues, model, variable, dataset, rows, replacementValues);
    357     }
    358 
    359     private static IEnumerable<double> EvaluateModelWithReplacedVariable(IList originalValues, IRegressionModel model, string variable,
    360       ModifiableDataset dataset, IEnumerable<int> rows, IList replacementValues) {
     358      return GetReplacedValues(originalValues, model, variable, dataset, rows, replacementValues);
     359    }
     360
     361    private static IEnumerable<double> GetReplacedValues(
     362      IList originalValues,
     363      IRegressionModel model,
     364      string variable,
     365      ModifiableDataset dataset,
     366      IEnumerable<int> rows,
     367      IList replacementValues) {
    361368      dataset.ReplaceVariable(variable, replacementValues);
    362369      //mkommend: ToList is used on purpose to avoid lazy evaluation that could result in wrong estimates due to variable replacements
     
    367374    }
    368375
    369     private static double CalculateValue(IEnumerable<double> originalValues, IEnumerable<double> estimatedValues, out OnlineCalculatorError errorState) {
     376    private static double CalculateVariableImpact(IEnumerable<double> originalValues, IEnumerable<double> estimatedValues, out OnlineCalculatorError errorState) {
    370377      IEnumerator<double> firstEnumerator = originalValues.GetEnumerator();
    371378      IEnumerator<double> secondEnumerator = estimatedValues.GetEnumerator();
Note: See TracChangeset for help on using the changeset viewer.