Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
11/13/18 15:22:18 (5 years ago)
Author:
bburlacu
Message:

#2288: Refactor code (use HL impacts calculator instead of manually calculating impacts, various fixes and improvements)

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/2288_HeuristicLab.VariableInteractionNetworks/HeuristicLab.VariableInteractionNetworks.Views/3.3/RunCollectionVariableInteractionNetworkView.cs

    r15421 r16295  
    111111      var targetImpacts = new Dictionary<string, Tuple<IEnumerable<IRun>, Dictionary<string, double>>>();
    112112
     113      var groups = runs.GroupBy(run => {
     114        var sol = (IRegressionSolution)run.Results.Values.Single(x => x is IRegressionSolution);
     115        return Concatenate(sol.ProblemData.AllowedInputVariables) + sol.ProblemData.TargetVariable;
     116      });
     117
    113118      if (useBest) {
    114119        // build network using only the best run for each target
     120        foreach (var group in groups) {
     121          var solutions = group.Select(run => Tuple.Create(run, (IRegressionSolution)run.Results.Values.Single(sol => sol is IRegressionSolution)));
     122          var best = solutions.OrderBy(x => x.Item2.TrainingRSquared).Last();
     123          var impacts = RegressionSolutionVariableImpactsCalculator.CalculateImpacts(best.Item2, RegressionSolutionVariableImpactsCalculator.DataPartitionEnum.All, RegressionSolutionVariableImpactsCalculator.ReplacementMethodEnum.Shuffle).ToDictionary(x => x.Item1, x => x.Item2);
     124
     125          targetImpacts[best.Item2.ProblemData.TargetVariable] = Tuple.Create(new[] { best.Item1 }.AsEnumerable(), impacts);
     126        }
    115127      } else {
    116         var groups = runs.GroupBy(run => {
    117           var sol = (IRegressionSolution)run.Results.Values.Single(x => x is IRegressionSolution);
    118           return Concatenate(sol.ProblemData.AllowedInputVariables) + sol.ProblemData.TargetVariable;
    119         });
    120 
    121128        foreach (var group in groups) {
    122129          // calculate average impacts
     
    131138            }
    132139            var md = dataset.ToModifiable();
    133             var impacts = CalculateImpacts(sol, md, originalValues, medians, estimationLimits);
    134             foreach (var pair in impacts) {
    135               if (averageImpacts.ContainsKey(pair.Key))
    136                 averageImpacts[pair.Key] += pair.Value;
     140
     141            var impacts = RegressionSolutionVariableImpactsCalculator.CalculateImpacts(sol, RegressionSolutionVariableImpactsCalculator.DataPartitionEnum.All, RegressionSolutionVariableImpactsCalculator.ReplacementMethodEnum.Shuffle);
     142            foreach (var t in impacts) {
     143              if (averageImpacts.ContainsKey(t.Item1))
     144                averageImpacts[t.Item1] += t.Item2;
    137145              else {
    138                 averageImpacts[pair.Key] = pair.Value;
     146                averageImpacts[t.Item1] = t.Item2;
    139147              }
    140148            }
    141149          }
     150
    142151          var count = group.Count();
    143           var keys = averageImpacts.Keys.ToList();
    144           foreach (var v in keys) {
     152          foreach (var v in averageImpacts.Keys) {
    145153            averageImpacts[v] /= count;
    146154          }
    147155
    148           targetImpacts[solution.ProblemData.TargetVariable] = new Tuple<IEnumerable<IRun>, Dictionary<string, double>>(group, averageImpacts);
     156          targetImpacts[solution.ProblemData.TargetVariable] = Tuple.Create(group.AsEnumerable(), averageImpacts);
    149157        }
    150158      }
    151159      return targetImpacts;
    152     }
    153 
    154     private static Dictionary<string, double> CalculateImpacts(IRegressionSolution solution, ModifiableDataset dataset,
    155       Dictionary<string, List<double>> originalValues, Dictionary<string, List<double>> medianValues, DoubleLimit estimationLimits = null) {
    156       var impacts = new Dictionary<string, double>();
    157 
    158       var model = solution.Model;
    159       var pd = solution.ProblemData;
    160 
    161       var rows = pd.TrainingIndices.ToList();
    162       var targetValues = pd.Dataset.GetDoubleValues(pd.TargetVariable, rows).ToList();
    163 
    164 
    165       foreach (var v in pd.AllowedInputVariables) {
    166         dataset.ReplaceVariable(v, medianValues[v]);
    167 
    168         var estimatedValues = model.GetEstimatedValues(dataset, rows);
    169         if (estimationLimits != null)
    170           estimatedValues = estimatedValues.LimitToRange(estimationLimits.Lower, estimationLimits.Upper);
    171 
    172         OnlineCalculatorError error;
    173         var r = OnlinePearsonsRCalculator.Calculate(targetValues, estimatedValues, out error);
    174         var newQuality = error == OnlineCalculatorError.None ? r * r : double.NaN;
    175         var originalQuality = solution.TrainingRSquared;
    176         impacts[v] = originalQuality - newQuality;
    177 
    178         dataset.ReplaceVariable(v, originalValues[v]);
    179       }
    180       return impacts;
    181160    }
    182161
    183162    public static Dictionary<string, Tuple<IEnumerable<IRun>, Dictionary<string, double>>> CalculateVariableImpactsFromRunResults(RunCollection runs,
    184163      string qualityResultName, bool maximization, string impactsResultName, bool useBestRunsPerTarget = false) {
    185       var targets = runs.GroupBy(x => ((IRegressionProblemData)x.Parameters["ProblemData"]).TargetVariable).ToList();
     164
     165      Func<IRun, double> getQuality = run => ((DoubleValue)run.Results[qualityResultName]).Value;
     166      var targetGroups = runs.GroupBy(x => ((IRegressionProblemData)x.Parameters["ProblemData"]).TargetVariable).ToList();
    186167      var targetImpacts = new Dictionary<string, Tuple<IEnumerable<IRun>, Dictionary<string, double>>>();
    187168
    188       Func<IRun, double> getQualityValue = run => ((DoubleValue)run.Results[qualityResultName]).Value;
    189 
    190169      if (useBestRunsPerTarget) {
    191         var bestRunsPerTarget = maximization
    192           ? targets.Select(x => x.OrderBy(getQualityValue).Last())
    193           : targets.Select(x => x.OrderBy(getQualityValue).First());
    194 
    195         foreach (var run in bestRunsPerTarget) {
    196           var pd = (IRegressionProblemData)run.Parameters["ProblemData"];
    197           var target = pd.TargetVariable;
    198           var impacts = (DoubleMatrix)run.Results[impactsResultName];
    199           targetImpacts[target] = new Tuple<IEnumerable<IRun>, Dictionary<string, double>>(new[] { run }, impacts.RowNames.Select((x, i) => new { Name = x, Index = i }).ToDictionary(x => x.Name, x => impacts[x.Index, 0]));
     170        foreach (var group in targetGroups) {
     171          var ordered = group.OrderBy(getQuality);
     172          var best = maximization ? ordered.Last() : ordered.First();
     173          var pd = (IRegressionProblemData)best.Parameters["ProblemData"];
     174          var target = group.Key;
     175          var impacts = (DoubleMatrix)best.Results[impactsResultName];
     176          targetImpacts[target] = Tuple.Create((IEnumerable<IRun>)new[] { best }, impacts.RowNames.Select((x, i) => new { x, i }).ToDictionary(x => x.x, x => impacts[x.i, 0]));
    200177        }
    201178      } else {
    202         foreach (var target in targets) {
     179        foreach (var target in targetGroups) {
    203180          var averageImpacts = CalculateAverageImpacts(new RunCollection(target), impactsResultName);
    204181          targetImpacts[target.Key] = new Tuple<IEnumerable<IRun>, Dictionary<string, double>>(target, averageImpacts);
     
    460437      worker.DoWork += (o, e) => {
    461438        ControlsEnable(false);
    462         var impacts = CalculateVariableImpactsOnline(Content, false);
     439        var impacts = CalculateVariableImpactsOnline(Content, impactAggregationComboBox.SelectedIndex == 0);
    463440        variableInteractionNetwork = CreateNetwork(impacts);
    464441        var threshold = impactThresholdTrackBar.Minimum + (double)impactThresholdTrackBar.Value / impactThresholdTrackBar.Maximum;
Note: See TracChangeset for help on using the changeset viewer.