Changeset 8173


Ignore:
Timestamp:
07/02/12 09:31:33 (10 years ago)
Author:
sforsten
Message:

#1708: The data structure relativeFrequencies is now obsolete and some other changes should make it more readable.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Views/3.4/Regression/RegressionSolutionResidualHistogram.cs

    r8104 r8173  
    4242    /// </summary>
    4343    protected const double bins = 25;
    44     /// <summary>
    45     ///  keeps for all series a list for every bin with the position of the bin, the relative frequency of the
    46     ///  residuals and the beginning and the end of the interval of the bin
    47     ///  </summary>
    48     protected readonly Dictionary<string, List<List<double>>> relativeFrequencies;
    4944    #endregion
    5045
     
    5752      : base() {
    5853      InitializeComponent();
    59       relativeFrequencies = new Dictionary<string, List<List<double>>>();
    6054      foreach (string series in new List<String>() { ALL_SAMPLES, TRAINING_SAMPLES, TEST_SAMPLES }) {
    6155        chart.Series.Add(series);
     
    6761        chart.Series[series].BorderColor = Color.Black;
    6862        chart.Series[series].ToolTip = series + " Y = #VALY from #CUSTOMPROPERTY(from) to #CUSTOMPROPERTY(to)";
    69         relativeFrequencies[series] = new List<List<double>>();
    7063      }
    7164      //configure axis
     
    8578      foreach (Series series in chart.Series) {
    8679        series.Points.Clear();
    87         relativeFrequencies[series.Name].Clear();
    8880      }
    8981      if (Content != null) {
    90         List<double> residuals = CalculateResiduals();
    91         double realMax = Math.Max(Math.Abs(residuals.Min()), Math.Abs(residuals.Max()));
    92         double roundedMax = HumanRoundMax(realMax);
    93         double intervalWidth = (roundedMax * 2.0) / bins;
    94         intervalWidth = HumanRoundMax(intervalWidth);
    95         // sets roundedMax to a value, so that zero will be in the middle of the x axis
    96         double help = realMax / intervalWidth;
    97         help = help % 1 < 0.5 ? (int)help : (int)help + 1;
    98         roundedMax = help * intervalWidth;
    99 
     82        List<double> residuals = CalculateResiduals(Content);
     83
     84        double max = 0.0;
    10085        foreach (Series series in chart.Series) {
    101           CalculateFrequencies(residuals, series.Name, roundedMax, intervalWidth);
    102           if (!series.Equals(ALL_SAMPLES))
    103             ShowValues(series);
     86          CalculateFrequencies(residuals, series);
     87          double seriesMax = series.Points.Select(p => p.YValues.First()).Max();
     88          max = max < seriesMax ? seriesMax : max;
    10489        }
     90
     91        // ALL_SAMPLES has to be calculated to know its highest frequency, but it is not shown in the beginning
     92        chart.Series.Where(s => s.Name.Equals(ALL_SAMPLES)).First().Points.Clear();
     93
     94        double roundedMax, intervalWidth;
     95        CalculateResidualParameters(residuals, out roundedMax, out intervalWidth);
    10596
    10697        ChartArea chartArea = chart.ChartAreas[0];
     
    10899        chartArea.AxisX.Maximum = roundedMax + intervalWidth;
    109100        // get the highest frequency of a residual of any series
    110         chartArea.AxisY.Maximum = (from series in relativeFrequencies.Values
    111                                    select (from residual in series
    112                                            select residual.ElementAt(1)).Max()).Max();
     101        chartArea.AxisY.Maximum = max;
    113102        if (chartArea.AxisY.Maximum < 0.1) {
    114103          chartArea.AxisY.Interval = 0.01;
     
    128117    }
    129118
    130     private List<double> CalculateResiduals() {
     119    private List<double> CalculateResiduals(IRegressionSolution solution) {
    131120      List<double> residuals = new List<double>();
    132121
    133       IRegressionProblemData problemdata = Content.ProblemData;
     122      IRegressionProblemData problemdata = solution.ProblemData;
    134123      List<double> targetValues = problemdata.Dataset.GetDoubleValues(Content.ProblemData.TargetVariable).ToList();
    135       List<double> estimatedValues = Content.EstimatedValues.ToList();
    136 
    137       for (int i = 0; i < Content.ProblemData.Dataset.Rows; i++) {
     124      List<double> estimatedValues = solution.EstimatedValues.ToList();
     125
     126      for (int i = 0; i < solution.ProblemData.Dataset.Rows; i++) {
    138127        double residual = estimatedValues[i] - targetValues[i];
    139128        residuals.Add(residual);
     
    142131    }
    143132
    144     private void CalculateFrequencies(List<double> residualValues, string series, double max, double intervalWidth) {
     133    private void CalculateFrequencies(List<double> residualValues, Series series) {
     134      double roundedMax, intervalWidth;
     135      CalculateResidualParameters(residualValues, out roundedMax, out intervalWidth);
     136
    145137      IEnumerable<double> relevantResiduals = residualValues;
    146138      IRegressionProblemData problemdata = Content.ProblemData;
    147       if (series.Equals(TRAINING_SAMPLES)) {
     139      if (series.Name.Equals(TRAINING_SAMPLES)) {
    148140        relevantResiduals = residualValues.Skip(problemdata.TrainingPartition.Start).Take(problemdata.TrainingPartition.Size);
    149       } else if (series.Equals(TEST_SAMPLES)) {
     141      } else if (series.Name.Equals(TEST_SAMPLES)) {
    150142        relevantResiduals = residualValues.Skip(problemdata.TestPartition.Start).Take(problemdata.TestPartition.Size);
    151143      }
     
    153145      double intervalCenter = intervalWidth / 2.0;
    154146      double sampleCount = relevantResiduals.Count();
    155       double current = -max;
     147      double current = -roundedMax;
     148      DataPointCollection seriesPoints = series.Points;
    156149
    157150      for (int i = 0; i <= bins; i++) {
    158151        IEnumerable<double> help = relevantResiduals.Where(x => x >= (current - intervalCenter) && x < (current + intervalCenter));
    159         relativeFrequencies[series].Add(new List<double>() { current, help.Count() / sampleCount, current - intervalCenter, current + intervalCenter });
     152        seriesPoints.AddXY(current, help.Count() / sampleCount);
     153        seriesPoints[seriesPoints.Count - 1]["from"] = (current - intervalCenter).ToString();
     154        seriesPoints[seriesPoints.Count - 1]["to"] = (current + intervalCenter).ToString();
    160155        current += intervalWidth;
    161       }
    162     }
    163 
    164     private void ShowValues(Series series) {
    165       List<List<double>> relativeSeriesFrequencies = relativeFrequencies[series.Name];
    166       DataPointCollection seriesPoints = series.Points;
    167 
    168       foreach (var valueList in relativeSeriesFrequencies) {
    169         seriesPoints.AddXY(valueList[0], valueList[1]);
    170         seriesPoints[seriesPoints.Count - 1]["from"] = valueList[2].ToString();
    171         seriesPoints[seriesPoints.Count - 1]["to"] = valueList[3].ToString();
    172156      }
    173157    }
     
    179163        }
    180164      } else if (Content != null) {
    181         ShowValues(series);
     165        List<double> residuals = CalculateResiduals(Content);
     166        CalculateFrequencies(residuals, series);
    182167        chart.Legends[series.Legend].ForeColor = Color.Black;
    183168        chart.Refresh();
     
    185170    }
    186171
    187     private double HumanRoundMax(double max) {
     172    private static void CalculateResidualParameters(List<double> residuals, out double roundedMax, out double intervalWidth) {
     173      double realMax = Math.Max(Math.Abs(residuals.Min()), Math.Abs(residuals.Max()));
     174      roundedMax = HumanRoundMax(realMax);
     175      intervalWidth = (roundedMax * 2.0) / bins;
     176      intervalWidth = HumanRoundMax(intervalWidth);
     177      // sets roundedMax to a value, so that zero will be in the middle of the x axis
     178      double help = realMax / intervalWidth;
     179      help = help % 1 < 0.5 ? (int)help : (int)help + 1;
     180      roundedMax = help * intervalWidth;
     181    }
     182
     183    private static double HumanRoundMax(double max) {
    188184      double base10;
    189185      if (max > 0) base10 = Math.Pow(10.0, Math.Floor(Math.Log10(max)));
Note: See TracChangeset for help on using the changeset viewer.