Free cookie consent management tool by TermsFeed Policy Generator

Changeset 8276


Ignore:
Timestamp:
07/10/12 15:26:13 (12 years ago)
Author:
sforsten
Message:

#1292:

  • merged r8034:8179 from trunk
  • added BackgroundWorker
  • added ProgressBar
  • added SpearmansRankCorrelationCoefficientCalculator
  • corrected bug in HoeffdingsDependenceCalculator
  • made some changes in the GUI
Location:
branches/DatasetFeatureCorrelation
Files:
6 added
34 edited

Legend:

Unmodified
Added
Removed
  • branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis

  • branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis.Views

  • branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis.Views/3.4/Classification/ClassificationEnsembleSolutionEstimatedClassValuesView.cs

    r7259 r8276  
    7373      }
    7474
    75       int[] indizes;
     75      int[] indices;
    7676      double[] estimatedClassValues;
    7777
    7878      switch (SamplesComboBox.SelectedItem.ToString()) {
    7979        case SamplesComboBoxAllSamples: {
    80             indizes = Enumerable.Range(0, Content.ProblemData.Dataset.Rows).ToArray();
     80            indices = Enumerable.Range(0, Content.ProblemData.Dataset.Rows).ToArray();
    8181            estimatedClassValues = Content.EstimatedClassValues.ToArray();
    8282            break;
    8383          }
    8484        case SamplesComboBoxTrainingSamples: {
    85             indizes = Content.ProblemData.TrainingIndizes.ToArray();
     85            indices = Content.ProblemData.TrainingIndices.ToArray();
    8686            estimatedClassValues = Content.EstimatedTrainingClassValues.ToArray();
    8787            break;
    8888          }
    8989        case SamplesComboBoxTestSamples: {
    90             indizes = Content.ProblemData.TestIndizes.ToArray();
     90            indices = Content.ProblemData.TestIndices.ToArray();
    9191            estimatedClassValues = Content.EstimatedTestClassValues.ToArray();
    9292            break;
     
    9898      int classValuesCount = Content.ProblemData.ClassValues.Count;
    9999      int solutionsCount = Content.ClassificationSolutions.Count();
    100       string[,] values = new string[indizes.Length, 5 + classValuesCount + solutionsCount];
     100      string[,] values = new string[indices.Length, 5 + classValuesCount + solutionsCount];
    101101      double[] target = Content.ProblemData.Dataset.GetDoubleValues(Content.ProblemData.TargetVariable).ToArray();
    102       List<List<double?>> estimatedValuesVector = GetEstimatedValues(SamplesComboBox.SelectedItem.ToString(), indizes,
     102      List<List<double?>> estimatedValuesVector = GetEstimatedValues(SamplesComboBox.SelectedItem.ToString(), indices,
    103103                                                            Content.ClassificationSolutions);
    104104
    105       for (int i = 0; i < indizes.Length; i++) {
    106         int row = indizes[i];
     105      for (int i = 0; i < indices.Length; i++) {
     106        int row = indices[i];
    107107        values[i, 0] = row.ToString();
    108108        values[i, 1] = target[i].ToString();
  • branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis.Views/3.4/Classification/ClassificationSolutionConfusionMatrixView.cs

    r7259 r8276  
    107107        double[] predictedValues;
    108108        if (cmbSamples.SelectedItem.ToString() == TrainingSamples) {
    109           rows = Content.ProblemData.TrainingIndizes;
     109          rows = Content.ProblemData.TrainingIndices;
    110110          predictedValues = Content.EstimatedTrainingClassValues.ToArray();
    111111        } else if (cmbSamples.SelectedItem.ToString() == TestSamples) {
    112           rows = Content.ProblemData.TestIndizes;
     112          rows = Content.ProblemData.TestIndices;
    113113          predictedValues = Content.EstimatedTestClassValues.ToArray();
    114114        } else throw new InvalidOperationException();
  • branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis.Views/3.4/Classification/ClassificationSolutionEstimatedClassValuesView.cs

    r7259 r8276  
    9696          var estimatedTraining = Content.EstimatedTrainingClassValues.GetEnumerator();
    9797          estimatedTraining.MoveNext();
    98           foreach (var trainingRow in Content.ProblemData.TrainingIndizes) {
     98          foreach (var trainingRow in Content.ProblemData.TrainingIndices) {
    9999            values[trainingRow, 3] = estimatedTraining.Current.ToString();
    100100            estimatedTraining.MoveNext();
     
    102102          var estimatedTest = Content.EstimatedTestClassValues.GetEnumerator();
    103103          estimatedTest.MoveNext();
    104           foreach (var testRow in Content.ProblemData.TestIndizes) {
     104          foreach (var testRow in Content.ProblemData.TestIndices) {
    105105            values[testRow, 4] = estimatedTest.Current.ToString();
    106106            estimatedTest.MoveNext();
  • branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis.Views/3.4/Classification/DiscriminantFunctionClassificationRocCurvesView.cs

    r7259 r8276  
    101101
    102102        if (cmbSamples.SelectedItem.ToString() == TrainingSamples) {
    103           rows = Content.ProblemData.TrainingIndizes;
     103          rows = Content.ProblemData.TrainingIndices;
    104104        } else if (cmbSamples.SelectedItem.ToString() == TestSamples) {
    105           rows = Content.ProblemData.TestIndizes;
     105          rows = Content.ProblemData.TestIndices;
    106106        } else throw new InvalidOperationException();
    107107
  • branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis.Views/3.4/Classification/DiscriminantFunctionClassificationSolutionThresholdView.cs

    r7259 r8276  
    137137      var targetValues = Content.ProblemData.Dataset.GetDoubleValues(Content.ProblemData.TargetVariable).ToList();
    138138
    139       foreach (int row in Content.ProblemData.TrainingIndizes) {
     139      foreach (int row in Content.ProblemData.TrainingIndices) {
    140140        double estimatedValue = estimatedValues[row];
    141141        double targetValue = targetValues[row];
     
    150150      }
    151151
    152       foreach (int row in Content.ProblemData.TestIndizes) {
     152      foreach (int row in Content.ProblemData.TestIndices) {
    153153        double estimatedValue = estimatedValues[row];
    154154        double targetValue = targetValues[row];
  • branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis.Views/3.4/HeuristicLab.Problems.DataAnalysis.Views-3.4.csproj

    r8038 r8276  
    9393  </PropertyGroup>
    9494  <ItemGroup>
     95    <Reference Include="HeuristicLab.Analysis-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL" />
     96    <Reference Include="HeuristicLab.Analysis.Views-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL" />
    9597    <Reference Include="HeuristicLab.Collections-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL">
    9698      <Private>False</Private>
     
    170172      <DependentUpon>ClassificationEnsembleSolutionModelView.cs</DependentUpon>
    171173    </Compile>
     174    <Compile Include="CorrelationHeatMapView.cs">
     175      <SubType>UserControl</SubType>
     176    </Compile>
     177    <Compile Include="CorrelationHeatMapView.Designer.cs">
     178      <DependentUpon>CorrelationHeatMapView.cs</DependentUpon>
     179    </Compile>
    172180    <Compile Include="DataAnalysisSolutionEvaluationView.cs">
    173181      <SubType>UserControl</SubType>
     
    175183    <Compile Include="DataAnalysisSolutionEvaluationView.Designer.cs">
    176184      <DependentUpon>DataAnalysisSolutionEvaluationView.cs</DependentUpon>
     185    </Compile>
     186    <Compile Include="ExtendedHeatMapView.cs">
     187      <SubType>UserControl</SubType>
     188    </Compile>
     189    <Compile Include="ExtendedHeatMapView.Designer.cs">
     190      <DependentUpon>ExtendedHeatMapView.cs</DependentUpon>
    177191    </Compile>
    178192    <Compile Include="Plugin.cs" />
  • branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis.Views/3.4/Regression/RegressionSolutionErrorCharacteristicsCurveView.Designer.cs

    r7967 r8276  
    3636      //
    3737      this.chart.Anchor = ((System.Windows.Forms.AnchorStyles)((((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Bottom)
    38             | System.Windows.Forms.AnchorStyles.Left)
    39             | System.Windows.Forms.AnchorStyles.Right)));
     38                  | System.Windows.Forms.AnchorStyles.Left)
     39                  | System.Windows.Forms.AnchorStyles.Right)));
    4040      chartArea1.Name = "ChartArea1";
    4141      this.chart.ChartAreas.Add(chartArea1);
     
    7373      // RegressionSolutionErrorCharacteristicsCurveView
    7474      //
    75       this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F);
    7675      this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Inherit;
    7776      this.Controls.Add(this.label1);
  • branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis.Views/3.4/Regression/RegressionSolutionErrorCharacteristicsCurveView.cs

    r7701 r8276  
    2626using System.Windows.Forms.DataVisualization.Charting;
    2727using HeuristicLab.MainForm;
    28 using HeuristicLab.MainForm.WindowsForms;
    2928
    3029namespace HeuristicLab.Problems.DataAnalysis.Views {
     
    3231  [Content(typeof(IRegressionSolution))]
    3332  public partial class RegressionSolutionErrorCharacteristicsCurveView : DataAnalysisSolutionEvaluationView {
    34     private IRegressionSolution constantModel;
    3533    protected const string TrainingSamples = "Training";
    3634    protected const string TestSamples = "Test";
     
    103101      if (Content == null) return;
    104102
     103      var constantModel = CreateConstantModel();
    105104      var originalValues = GetOriginalValues().ToList();
    106       constantModel = CreateConstantModel();
    107105      var baselineEstimatedValues = GetEstimatedValues(constantModel);
    108106      var baselineResiduals = GetResiduals(originalValues, baselineEstimatedValues);
     
    117115      baselineSeries.ToolTip = "Area over Curve: " + CalculateAreaOverCurve(baselineSeries);
    118116      baselineSeries.Tag = constantModel;
     117      baselineSeries.LegendToolTip = "Double-click to open model";
    119118      chart.Series.Add(baselineSeries);
    120119
     
    131130      UpdateSeries(estimatedValues, solutionSeries);
    132131      solutionSeries.ToolTip = "Area over Curve: " + CalculateAreaOverCurve(solutionSeries);
     132      solutionSeries.LegendToolTip = "Double-click to open model";
    133133      chart.Series.Add(solutionSeries);
    134134    }
     
    169169      switch (cmbSamples.SelectedItem.ToString()) {
    170170        case TrainingSamples:
    171           originalValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndizes);
     171          originalValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndices);
    172172          break;
    173173        case TestSamples:
    174           originalValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TestIndizes);
     174          originalValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TestIndices);
    175175          break;
    176176        case AllSamples:
     
    201201    }
    202202
    203     protected IEnumerable<double> GetbaselineEstimatedValues(IEnumerable<double> originalValues) {
    204       double averageTrainingTarget = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndizes).Average();
    205       return Enumerable.Repeat(averageTrainingTarget, originalValues.Count());
    206     }
    207 
    208203    protected virtual List<double> GetResiduals(IEnumerable<double> originalValues, IEnumerable<double> estimatedValues) {
    209204      return originalValues.Zip(estimatedValues, (x, y) => Math.Abs(x - y)).ToList();
     
    239234
    240235    private IRegressionSolution CreateConstantModel() {
    241       double averageTrainingTarget = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndizes).Average();
     236      double averageTrainingTarget = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndices).Average();
    242237      var solution = new ConstantRegressionModel(averageTrainingTarget).CreateRegressionSolution(ProblemData);
    243238      solution.Name = "Baseline";
     
    248243    private void chart_MouseMove(object sender, MouseEventArgs e) {
    249244      HitTestResult result = chart.HitTest(e.X, e.Y);
    250       if (result.ChartElementType == ChartElementType.LegendItem)
     245      if (result.ChartElementType == ChartElementType.LegendItem) {
    251246        Cursor = Cursors.Hand;
    252       else
     247      } else {
    253248        Cursor = Cursors.Default;
     249      }
    254250    }
    255251  }
  • branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis.Views/3.4/Regression/RegressionSolutionEstimatedValuesView.cs

    r7259 r8276  
    9393          var estimated_test = Content.EstimatedTestValues.GetEnumerator();
    9494
    95           foreach (var row in Content.ProblemData.TrainingIndizes) {
     95          foreach (var row in Content.ProblemData.TrainingIndices) {
    9696            estimated_training.MoveNext();
    9797            values[row, 3] = estimated_training.Current.ToString();
    9898          }
    9999
    100           foreach (var row in Content.ProblemData.TestIndizes) {
     100          foreach (var row in Content.ProblemData.TestIndices) {
    101101            estimated_test.MoveNext();
    102102            values[row, 4] = estimated_test.Current.ToString();
  • branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis.Views/3.4/Regression/RegressionSolutionLineChartView.cs

    r7406 r8276  
    7272        this.chart.Series[ESTIMATEDVALUES_TRAINING_SERIES_NAME].ChartType = SeriesChartType.FastLine;
    7373        this.chart.Series[ESTIMATEDVALUES_TRAINING_SERIES_NAME].EmptyPointStyle.Color = this.chart.Series[ESTIMATEDVALUES_TRAINING_SERIES_NAME].Color;
    74         this.chart.Series[ESTIMATEDVALUES_TRAINING_SERIES_NAME].Points.DataBindXY(Content.ProblemData.TrainingIndizes.ToArray(), Content.EstimatedTrainingValues.ToArray());
     74        this.chart.Series[ESTIMATEDVALUES_TRAINING_SERIES_NAME].Points.DataBindXY(Content.ProblemData.TrainingIndices.ToArray(), Content.EstimatedTrainingValues.ToArray());
    7575        this.InsertEmptyPoints(this.chart.Series[ESTIMATEDVALUES_TRAINING_SERIES_NAME]);
    7676        this.chart.Series[ESTIMATEDVALUES_TRAINING_SERIES_NAME].Tag = Content;
     
    7979        this.chart.Series[ESTIMATEDVALUES_TEST_SERIES_NAME].LegendText = ESTIMATEDVALUES_TEST_SERIES_NAME;
    8080        this.chart.Series[ESTIMATEDVALUES_TEST_SERIES_NAME].ChartType = SeriesChartType.FastLine;
    81         this.chart.Series[ESTIMATEDVALUES_TEST_SERIES_NAME].Points.DataBindXY(Content.ProblemData.TestIndizes.ToArray(), Content.EstimatedTestValues.ToArray());
     81        this.chart.Series[ESTIMATEDVALUES_TEST_SERIES_NAME].Points.DataBindXY(Content.ProblemData.TestIndices.ToArray(), Content.EstimatedTestValues.ToArray());
    8282        this.InsertEmptyPoints(this.chart.Series[ESTIMATEDVALUES_TEST_SERIES_NAME]);
    8383        this.chart.Series[ESTIMATEDVALUES_TEST_SERIES_NAME].Tag = Content;
    8484        // series of remaining points
    85         int[] allIndizes = Enumerable.Range(0, Content.ProblemData.Dataset.Rows).Except(Content.ProblemData.TrainingIndizes).Except(Content.ProblemData.TestIndizes).ToArray();
     85        int[] allIndices = Enumerable.Range(0, Content.ProblemData.Dataset.Rows).Except(Content.ProblemData.TrainingIndices).Except(Content.ProblemData.TestIndices).ToArray();
    8686        var estimatedValues = Content.EstimatedValues.ToArray();
    87         List<double> allEstimatedValues = allIndizes.Select(index => estimatedValues[index]).ToList();
     87        List<double> allEstimatedValues = allIndices.Select(index => estimatedValues[index]).ToList();
    8888        this.chart.Series.Add(ESTIMATEDVALUES_ALL_SERIES_NAME);
    8989        this.chart.Series[ESTIMATEDVALUES_ALL_SERIES_NAME].LegendText = ESTIMATEDVALUES_ALL_SERIES_NAME;
    9090        this.chart.Series[ESTIMATEDVALUES_ALL_SERIES_NAME].ChartType = SeriesChartType.FastLine;
    91         this.chart.Series[ESTIMATEDVALUES_ALL_SERIES_NAME].Points.DataBindXY(allIndizes, allEstimatedValues);
     91        this.chart.Series[ESTIMATEDVALUES_ALL_SERIES_NAME].Points.DataBindXY(allIndices, allEstimatedValues);
    9292        this.InsertEmptyPoints(this.chart.Series[ESTIMATEDVALUES_ALL_SERIES_NAME]);
    9393        this.chart.Series[ESTIMATEDVALUES_ALL_SERIES_NAME].Tag = Content;
     
    170170
    171171      int[] attr = new int[Content.ProblemData.Dataset.Rows + 1]; // add a virtual last row that is again empty to simplify loop further down
    172       foreach (var row in Content.ProblemData.TrainingIndizes) {
     172      foreach (var row in Content.ProblemData.TrainingIndices) {
    173173        attr[row] += 1;
    174174      }
    175       foreach (var row in Content.ProblemData.TestIndizes) {
     175      foreach (var row in Content.ProblemData.TestIndices) {
    176176        attr[row] += 2;
    177177      }
     
    223223        string targetVariableName = Content.ProblemData.TargetVariable;
    224224
    225         IEnumerable<int> indizes = null;
     225        IEnumerable<int> indices = null;
    226226        IEnumerable<double> predictedValues = null;
    227227        switch (series.Name) {
    228228          case ESTIMATEDVALUES_ALL_SERIES_NAME:
    229             indizes = Enumerable.Range(0, Content.ProblemData.Dataset.Rows).Except(Content.ProblemData.TrainingIndizes).Except(Content.ProblemData.TestIndizes).ToArray();
     229            indices = Enumerable.Range(0, Content.ProblemData.Dataset.Rows).Except(Content.ProblemData.TrainingIndices).Except(Content.ProblemData.TestIndices).ToArray();
    230230            var estimatedValues = Content.EstimatedValues.ToArray();
    231             predictedValues = indizes.Select(index => estimatedValues[index]).ToList();
     231            predictedValues = indices.Select(index => estimatedValues[index]).ToList();
    232232            break;
    233233          case ESTIMATEDVALUES_TRAINING_SERIES_NAME:
    234             indizes = Content.ProblemData.TrainingIndizes.ToArray();
     234            indices = Content.ProblemData.TrainingIndices.ToArray();
    235235            predictedValues = Content.EstimatedTrainingValues.ToArray();
    236236            break;
    237237          case ESTIMATEDVALUES_TEST_SERIES_NAME:
    238             indizes = Content.ProblemData.TestIndizes.ToArray();
     238            indices = Content.ProblemData.TestIndices.ToArray();
    239239            predictedValues = Content.EstimatedTestValues.ToArray();
    240240            break;
    241241        }
    242         series.Points.DataBindXY(indizes, predictedValues);
     242        series.Points.DataBindXY(indices, predictedValues);
    243243        this.InsertEmptyPoints(series);
    244244        chart.Legends[series.Legend].ForeColor = Color.Black;
  • branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis.Views/3.4/Regression/RegressionSolutionResidualHistogram.Designer.cs

    r7967 r8276  
    4141      this.chart.Location = new System.Drawing.Point(0, 0);
    4242      this.chart.Name = "chart";
    43       this.chart.Size = new System.Drawing.Size(358, 225);
     43      this.chart.Size = new System.Drawing.Size(289, 220);
    4444      this.chart.TabIndex = 0;
    4545      this.chart.CustomizeLegend += new System.EventHandler<System.Windows.Forms.DataVisualization.Charting.CustomizeLegendEventArgs>(this.chart_CustomizeLegend);
     
    5050      //
    5151      this.AllowDrop = true;
    52       this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F);
    5352      this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Inherit;
    5453      this.Controls.Add(this.chart);
  • branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis.Views/3.4/Regression/RegressionSolutionResidualHistogram.cs

    r7503 r8276  
    3939    protected const string TEST_SAMPLES = "Test samples";
    4040    /// <summary>
    41     /// used to reduce code duplication
    42     /// </summary>
    43     protected static string[] ALL_SERIES = new string[] { ALL_SAMPLES, TRAINING_SAMPLES, TEST_SAMPLES };
    44     /// <summary>
    4541    /// approximate amount of bins
    4642    /// </summary>
    4743    protected const double bins = 25;
    48     /// <summary>
    49     ///  keeps for all series a list for every bin with the position of the bin, the relative frequency of the
    50     ///  residuals and the beginning and the end of the interval of the bin
    51     ///  </summary>
    52     protected Dictionary<string, List<List<double>>> relativeFrequencies;
    5344    #endregion
    5445
     
    6152      : base() {
    6253      InitializeComponent();
    63       relativeFrequencies = new Dictionary<string, List<List<double>>>();
    64       foreach (string series in ALL_SERIES) {
     54      foreach (string series in new List<String>() { ALL_SAMPLES, TRAINING_SAMPLES, TEST_SAMPLES }) {
    6555        chart.Series.Add(series);
    6656        chart.Series[series].LegendText = series;
     
    7161        chart.Series[series].BorderColor = Color.Black;
    7262        chart.Series[series].ToolTip = series + " Y = #VALY from #CUSTOMPROPERTY(from) to #CUSTOMPROPERTY(to)";
    73         relativeFrequencies[series] = new List<List<double>>();
    7463      }
    7564      //configure axis
     
    8776
    8877    private void RedrawChart() {
    89       foreach (string series in ALL_SERIES) {
    90         chart.Series[series].Points.Clear();
    91         relativeFrequencies[series].Clear();
     78      foreach (Series series in chart.Series) {
     79        series.Points.Clear();
    9280      }
    9381      if (Content != null) {
    94         Dictionary<string, List<double>> residuals = CalculateResiduals();
    95         double realMax = Math.Max(Math.Abs(residuals[ALL_SAMPLES].Min()), Math.Abs(residuals[ALL_SAMPLES].Max()));
    96         double roundedMax = HumanRoundMax(realMax);
    97         double intervalWidth = (roundedMax * 2.0) / bins;
    98         intervalWidth = HumanRoundMax(intervalWidth);
    99         // sets roundedMax to a value, so that zero will be in the middle of the x axis
    100         double help = realMax / intervalWidth;
    101         help = help % 1 < 0.5 ? (int)help : (int)help + 1;
    102         roundedMax = help * intervalWidth;
    103 
    104         foreach (string series in ALL_SERIES) {
    105           CalculateFrequencies(residuals[series], series, roundedMax, intervalWidth);
    106           if (!series.Equals(ALL_SAMPLES))
    107             ShowValues(chart.Series[series], relativeFrequencies[series]);
     82        List<double> residuals = CalculateResiduals(Content);
     83
     84        double max = 0.0;
     85        foreach (Series series in chart.Series) {
     86          CalculateFrequencies(residuals, series);
     87          double seriesMax = series.Points.Select(p => p.YValues.First()).Max();
     88          max = max < seriesMax ? seriesMax : max;
    10889        }
     90
     91        // ALL_SAMPLES has to be calculated to know its highest frequency, but it is not shown in the beginning
     92        chart.Series.First(s => s.Name.Equals(ALL_SAMPLES)).Points.Clear();
     93
     94        double roundedMax, intervalWidth;
     95        CalculateResidualParameters(residuals, out roundedMax, out intervalWidth);
    10996
    11097        ChartArea chartArea = chart.ChartAreas[0];
     
    11299        chartArea.AxisX.Maximum = roundedMax + intervalWidth;
    113100        // get the highest frequency of a residual of any series
    114         chartArea.AxisY.Maximum = (from series in relativeFrequencies.Values
    115                                    select (from residual in series
    116                                            select residual.ElementAt(1)).Max()).Max();
     101        chartArea.AxisY.Maximum = max;
    117102        if (chartArea.AxisY.Maximum < 0.1) {
    118103          chartArea.AxisY.Interval = 0.01;
     
    132117    }
    133118
    134     private Dictionary<string, List<double>> CalculateResiduals() {
    135       Dictionary<string, List<double>> residuals = new Dictionary<string, List<double>>();
    136 
    137       foreach (string series in ALL_SERIES) {
    138         residuals[series] = new List<double>();
    139       }
     119    private List<double> CalculateResiduals(IRegressionSolution solution) {
     120      List<double> residuals = new List<double>();
     121
     122      IRegressionProblemData problemdata = solution.ProblemData;
     123      List<double> targetValues = problemdata.Dataset.GetDoubleValues(Content.ProblemData.TargetVariable).ToList();
     124      List<double> estimatedValues = solution.EstimatedValues.ToList();
     125
     126      for (int i = 0; i < solution.ProblemData.Dataset.Rows; i++) {
     127        double residual = estimatedValues[i] - targetValues[i];
     128        residuals.Add(residual);
     129      }
     130      return residuals;
     131    }
     132
     133    private void CalculateFrequencies(List<double> residualValues, Series series) {
     134      double roundedMax, intervalWidth;
     135      CalculateResidualParameters(residualValues, out roundedMax, out intervalWidth);
     136
     137      IEnumerable<double> relevantResiduals = residualValues;
    140138      IRegressionProblemData problemdata = Content.ProblemData;
    141       List<double> targetValues = problemdata.Dataset.GetDoubleValues(Content.ProblemData.TargetVariable).ToList();
    142       List<double> estimatedValues = Content.EstimatedValues.ToList();
    143 
    144       for (int i = 0; i < Content.ProblemData.Dataset.Rows; i++) {
    145         double residual = estimatedValues[i] - targetValues[i];
    146         residuals[ALL_SAMPLES].Add(residual);
    147         if (i >= problemdata.TrainingPartition.Start && i < problemdata.TrainingPartition.End)
    148           residuals[TRAINING_SAMPLES].Add(residual);
    149         if (i >= problemdata.TestPartition.Start && i < problemdata.TestPartition.End)
    150           residuals[TEST_SAMPLES].Add(residual);
    151       }
    152       return residuals;
    153     }
    154 
    155     private void CalculateFrequencies(List<double> residualValues, string series, double max, double intervalWidth) {
     139      if (series.Name.Equals(TRAINING_SAMPLES)) {
     140        relevantResiduals = residualValues.Skip(problemdata.TrainingPartition.Start).Take(problemdata.TrainingPartition.Size);
     141      } else if (series.Name.Equals(TEST_SAMPLES)) {
     142        relevantResiduals = residualValues.Skip(problemdata.TestPartition.Start).Take(problemdata.TestPartition.Size);
     143      }
     144
    156145      double intervalCenter = intervalWidth / 2.0;
    157       double sampleCount = residualValues.Count();
    158       double current = -max;
     146      double sampleCount = relevantResiduals.Count();
     147      double current = -roundedMax;
     148      DataPointCollection seriesPoints = series.Points;
    159149
    160150      for (int i = 0; i <= bins; i++) {
    161         IEnumerable<double> help = residualValues.Where(x => x >= (current - intervalCenter) && x < (current + intervalCenter));
    162         relativeFrequencies[series].Add(new List<double>() { current, help.Count() / sampleCount, current - intervalCenter, current + intervalCenter });
     151        IEnumerable<double> help = relevantResiduals.Where(x => x >= (current - intervalCenter) && x < (current + intervalCenter));
     152        seriesPoints.AddXY(current, help.Count() / sampleCount);
     153        seriesPoints[seriesPoints.Count - 1]["from"] = (current - intervalCenter).ToString();
     154        seriesPoints[seriesPoints.Count - 1]["to"] = (current + intervalCenter).ToString();
    163155        current += intervalWidth;
    164156      }
    165157    }
    166158
    167     private double HumanRoundMax(double max) {
     159    private void ToggleSeriesData(Series series) {
     160      if (series.Points.Count > 0) {  //checks if series is shown
     161        if (chart.Series.Any(s => s != series && s.Points.Count > 0)) {
     162          series.Points.Clear();
     163        }
     164      } else if (Content != null) {
     165        List<double> residuals = CalculateResiduals(Content);
     166        CalculateFrequencies(residuals, series);
     167        chart.Legends[series.Legend].ForeColor = Color.Black;
     168        chart.Refresh();
     169      }
     170    }
     171
     172    private static void CalculateResidualParameters(List<double> residuals, out double roundedMax, out double intervalWidth) {
     173      double realMax = Math.Max(Math.Abs(residuals.Min()), Math.Abs(residuals.Max()));
     174      roundedMax = HumanRoundMax(realMax);
     175      intervalWidth = (roundedMax * 2.0) / bins;
     176      intervalWidth = HumanRoundMax(intervalWidth);
     177      // sets roundedMax to a value, so that zero will be in the middle of the x axis
     178      double help = realMax / intervalWidth;
     179      help = help % 1 < 0.5 ? (int)help : (int)help + 1;
     180      roundedMax = help * intervalWidth;
     181    }
     182
     183    private static double HumanRoundMax(double max) {
    168184      double base10;
    169185      if (max > 0) base10 = Math.Pow(10.0, Math.Floor(Math.Log10(max)));
     
    216232    }
    217233    #endregion
    218 
    219     private void ToggleSeriesData(Series series) {
    220       if (series.Points.Count > 0) {  //checks if series is shown
    221         if (chart.Series.Any(s => s != series && s.Points.Count > 0)) {
    222           series.Points.Clear();
    223         }
    224       } else if (Content != null) {
    225         ShowValues(series, relativeFrequencies[series.Name]);
    226         chart.Legends[series.Legend].ForeColor = Color.Black;
    227         chart.Refresh();
    228       }
    229     }
    230     private void ShowValues(Series series, List<List<double>> relativeSeriesFrequencies) {
    231       DataPointCollection seriesPoints = series.Points;
    232 
    233       foreach (var valueList in relativeSeriesFrequencies) {
    234         seriesPoints.AddXY(valueList[0], valueList[1]);
    235         seriesPoints[seriesPoints.Count - 1]["from"] = valueList[2].ToString();
    236         seriesPoints[seriesPoints.Count - 1]["to"] = valueList[3].ToString();
    237       }
    238     }
    239234  }
    240235}
  • branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis.Views/3.4/Regression/RegressionSolutionScatterPlotView.cs

    r7990 r8276  
    148148        if (this.chart.Series[TRAINING_SERIES].Points.Count > 0)
    149149          this.chart.Series[TRAINING_SERIES].Points.DataBindXY(Content.EstimatedTrainingValues.ToArray(), "",
    150             dataset.GetDoubleValues(targetVariableName, Content.ProblemData.TrainingIndizes).ToArray(), "");
     150            dataset.GetDoubleValues(targetVariableName, Content.ProblemData.TrainingIndices).ToArray(), "");
    151151        if (this.chart.Series[TEST_SERIES].Points.Count > 0)
    152152          this.chart.Series[TEST_SERIES].Points.DataBindXY(Content.EstimatedTestValues.ToArray(), "",
    153            dataset.GetDoubleValues(targetVariableName, Content.ProblemData.TestIndizes).ToArray(), "");
     153           dataset.GetDoubleValues(targetVariableName, Content.ProblemData.TestIndices).ToArray(), "");
    154154
    155155        double max = Content.EstimatedTrainingValues.Concat(Content.EstimatedTestValues.Concat(Content.EstimatedValues.Concat(dataset.GetDoubleValues(targetVariableName)))).Max();
     
    196196          case TRAINING_SERIES:
    197197            predictedValues = Content.EstimatedTrainingValues.ToArray();
    198             targetValues = Content.ProblemData.Dataset.GetDoubleValues(targetVariableName, Content.ProblemData.TrainingIndizes).ToArray();
     198            targetValues = Content.ProblemData.Dataset.GetDoubleValues(targetVariableName, Content.ProblemData.TrainingIndices).ToArray();
    199199            break;
    200200          case TEST_SERIES:
    201201            predictedValues = Content.EstimatedTestValues.ToArray();
    202             targetValues = Content.ProblemData.Dataset.GetDoubleValues(targetVariableName, Content.ProblemData.TestIndizes).ToArray();
     202            targetValues = Content.ProblemData.Dataset.GetDoubleValues(targetVariableName, Content.ProblemData.TestIndices).ToArray();
    203203            break;
    204204        }
  • branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis.Views/3.4/Solution Views/DataAnalysisSolutionView.cs

    r7259 r8276  
    137137    }
    138138
     139    protected override void showDetailsCheckBox_CheckedChanged(object sender, EventArgs e) {
     140      if (showDetailsCheckBox.Checked && itemsListView.SelectedItems.Count == 1 && itemsListView.SelectedItems[0].Tag is Type) {
     141        Type viewType = (Type)itemsListView.SelectedItems[0].Tag;
     142        viewHost.ViewType = viewType;
     143        viewHost.Content = Content;
     144        splitContainer.Panel2Collapsed = false;
     145        detailsGroupBox.Enabled = true;
     146      } else base.showDetailsCheckBox_CheckedChanged(sender, e);
     147    }
     148
    139149    #region drag and drop
    140150    protected override void itemsListView_DragEnter(object sender, DragEventArgs e) {
  • branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/HeuristicLab.Problems.DataAnalysis-3.4.csproj

    r8038 r8276  
    155155    <Compile Include="Implementation\Clustering\ClusteringProblemData.cs" />
    156156    <Compile Include="Implementation\Clustering\ClusteringSolution.cs" />
     157    <Compile Include="Implementation\ExtendedHeatMap.cs" />
    157158    <Compile Include="Implementation\Regression\ConstantRegressionModel.cs" />
    158159    <Compile Include="Implementation\Regression\ConstantRegressionSolution.cs" />
     
    213214    <Compile Include="OnlineCalculators\OnlinePearsonsRSquaredCalculator.cs" />
    214215    <Compile Include="Implementation\Regression\RegressionSolution.cs" />
     216    <Compile Include="OnlineCalculators\SpearmansRankCorrelationCoefficientCalculator.cs" />
    215217    <Compile Include="Plugin.cs" />
    216218    <Compile Include="Implementation\Classification\ThresholdCalculators\AccuracyMaximizationThresholdCalculator.cs" />
  • branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/ClassificationEnsembleSolution.cs

    r7259 r8276  
    3737  [Creatable("Data Analysis - Ensembles")]
    3838  public sealed class ClassificationEnsembleSolution : ClassificationSolution, IClassificationEnsembleSolution {
     39    private readonly Dictionary<int, double> trainingEvaluationCache = new Dictionary<int, double>();
     40    private readonly Dictionary<int, double> testEvaluationCache = new Dictionary<int, double>();
     41
    3942    public new IClassificationEnsembleModel Model {
    4043      get { return (IClassificationEnsembleModel)base.Model; }
     
    8588      }
    8689
     90      trainingEvaluationCache = new Dictionary<int, double>(original.ProblemData.TrainingIndices.Count());
     91      testEvaluationCache = new Dictionary<int, double>(original.ProblemData.TestIndices.Count());
     92
    8793      classificationSolutions = cloner.Clone(original.classificationSolutions);
    8894      RegisterClassificationSolutionsEventHandler();
     
    128134      }
    129135
     136      trainingEvaluationCache = new Dictionary<int, double>(problemData.TrainingIndices.Count());
     137      testEvaluationCache = new Dictionary<int, double>(problemData.TestIndices.Count());
     138
    130139      RegisterClassificationSolutionsEventHandler();
    131140      classificationSolutions.AddRange(solutions);
     
    148157    public override IEnumerable<double> EstimatedTrainingClassValues {
    149158      get {
    150         var rows = ProblemData.TrainingIndizes;
    151         var estimatedValuesEnumerators = (from model in Model.Models
    152                                           select new { Model = model, EstimatedValuesEnumerator = model.GetEstimatedClassValues(ProblemData.Dataset, rows).GetEnumerator() })
    153                                          .ToList();
    154         var rowsEnumerator = rows.GetEnumerator();
    155         // aggregate to make sure that MoveNext is called for all enumerators
    156         while (rowsEnumerator.MoveNext() & estimatedValuesEnumerators.Select(en => en.EstimatedValuesEnumerator.MoveNext()).Aggregate(true, (acc, b) => acc & b)) {
    157           int currentRow = rowsEnumerator.Current;
    158 
    159           var selectedEnumerators = from pair in estimatedValuesEnumerators
    160                                     where RowIsTrainingForModel(currentRow, pair.Model) && !RowIsTestForModel(currentRow, pair.Model)
    161                                     select pair.EstimatedValuesEnumerator;
    162           yield return AggregateEstimatedClassValues(selectedEnumerators.Select(x => x.Current));
     159        var rows = ProblemData.TrainingIndices;
     160        var rowsToEvaluate = rows.Except(trainingEvaluationCache.Keys);
     161        var rowsEnumerator = rowsToEvaluate.GetEnumerator();
     162        var valuesEnumerator = GetEstimatedValues(rowsToEvaluate, (r, m) => RowIsTrainingForModel(r, m) && !RowIsTestForModel(r, m)).GetEnumerator();
     163
     164        while (rowsEnumerator.MoveNext() & valuesEnumerator.MoveNext()) {
     165          trainingEvaluationCache.Add(rowsEnumerator.Current, valuesEnumerator.Current);
    163166        }
     167
     168        return rows.Select(row => trainingEvaluationCache[row]);
    164169      }
    165170    }
     
    167172    public override IEnumerable<double> EstimatedTestClassValues {
    168173      get {
    169         var rows = ProblemData.TestIndizes;
    170         var estimatedValuesEnumerators = (from model in Model.Models
    171                                           select new { Model = model, EstimatedValuesEnumerator = model.GetEstimatedClassValues(ProblemData.Dataset, rows).GetEnumerator() })
    172                                          .ToList();
    173         var rowsEnumerator = ProblemData.TestIndizes.GetEnumerator();
    174         // aggregate to make sure that MoveNext is called for all enumerators
    175         while (rowsEnumerator.MoveNext() & estimatedValuesEnumerators.Select(en => en.EstimatedValuesEnumerator.MoveNext()).Aggregate(true, (acc, b) => acc & b)) {
    176           int currentRow = rowsEnumerator.Current;
    177 
    178           var selectedEnumerators = from pair in estimatedValuesEnumerators
    179                                     where RowIsTestForModel(currentRow, pair.Model)
    180                                     select pair.EstimatedValuesEnumerator;
    181 
    182           yield return AggregateEstimatedClassValues(selectedEnumerators.Select(x => x.Current));
     174        var rows = ProblemData.TestIndices;
     175        var rowsToEvaluate = rows.Except(testEvaluationCache.Keys);
     176        var rowsEnumerator = rowsToEvaluate.GetEnumerator();
     177        var valuesEnumerator = GetEstimatedValues(rowsToEvaluate, RowIsTestForModel).GetEnumerator();
     178
     179        while (rowsEnumerator.MoveNext() & valuesEnumerator.MoveNext()) {
     180          testEvaluationCache.Add(rowsEnumerator.Current, valuesEnumerator.Current);
    183181        }
     182
     183        return rows.Select(row => testEvaluationCache[row]);
     184      }
     185    }
     186
     187    private IEnumerable<double> GetEstimatedValues(IEnumerable<int> rows, Func<int, IClassificationModel, bool> modelSelectionPredicate) {
     188      var estimatedValuesEnumerators = (from model in Model.Models
     189                                        select new { Model = model, EstimatedValuesEnumerator = model.GetEstimatedClassValues(ProblemData.Dataset, rows).GetEnumerator() })
     190                                       .ToList();
     191      var rowsEnumerator = rows.GetEnumerator();
     192      // aggregate to make sure that MoveNext is called for all enumerators
     193      while (rowsEnumerator.MoveNext() & estimatedValuesEnumerators.Select(en => en.EstimatedValuesEnumerator.MoveNext()).Aggregate(true, (acc, b) => acc & b)) {
     194        int currentRow = rowsEnumerator.Current;
     195
     196        var selectedEnumerators = from pair in estimatedValuesEnumerators
     197                                  where modelSelectionPredicate(currentRow, pair.Model)
     198                                  select pair.EstimatedValuesEnumerator;
     199
     200        yield return AggregateEstimatedClassValues(selectedEnumerators.Select(x => x.Current));
    184201      }
    185202    }
     
    196213
    197214    public override IEnumerable<double> GetEstimatedClassValues(IEnumerable<int> rows) {
    198       return from xs in GetEstimatedClassValueVectors(ProblemData.Dataset, rows)
    199              select AggregateEstimatedClassValues(xs);
     215      var rowsToEvaluate = rows.Except(evaluationCache.Keys);
     216      var rowsEnumerator = rowsToEvaluate.GetEnumerator();
     217      var valuesEnumerator = (from xs in GetEstimatedClassValueVectors(ProblemData.Dataset, rowsToEvaluate)
     218                              select AggregateEstimatedClassValues(xs))
     219                             .GetEnumerator();
     220
     221      while (rowsEnumerator.MoveNext() & valuesEnumerator.MoveNext()) {
     222        evaluationCache.Add(rowsEnumerator.Current, valuesEnumerator.Current);
     223      }
     224
     225      return rows.Select(row => evaluationCache[row]);
    200226    }
    201227
     
    223249
    224250    protected override void OnProblemDataChanged() {
     251      trainingEvaluationCache.Clear();
     252      testEvaluationCache.Clear();
     253      evaluationCache.Clear();
     254
    225255      IClassificationProblemData problemData = new ClassificationProblemData(ProblemData.Dataset,
    226256                                                                     ProblemData.AllowedInputVariables,
     
    251281    public void AddClassificationSolutions(IEnumerable<IClassificationSolution> solutions) {
    252282      classificationSolutions.AddRange(solutions);
     283
     284      trainingEvaluationCache.Clear();
     285      testEvaluationCache.Clear();
     286      evaluationCache.Clear();
    253287    }
    254288    public void RemoveClassificationSolutions(IEnumerable<IClassificationSolution> solutions) {
    255289      classificationSolutions.RemoveRange(solutions);
     290
     291      trainingEvaluationCache.Clear();
     292      testEvaluationCache.Clear();
     293      evaluationCache.Clear();
    256294    }
    257295
     
    275313      trainingPartitions[solution.Model] = solution.ProblemData.TrainingPartition;
    276314      testPartitions[solution.Model] = solution.ProblemData.TestPartition;
     315
     316      trainingEvaluationCache.Clear();
     317      testEvaluationCache.Clear();
     318      evaluationCache.Clear();
    277319    }
    278320
     
    282324      trainingPartitions.Remove(solution.Model);
    283325      testPartitions.Remove(solution.Model);
     326
     327      trainingEvaluationCache.Clear();
     328      testEvaluationCache.Clear();
     329      evaluationCache.Clear();
    284330    }
    285331  }
  • branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/ClassificationProblemData.cs

    r7823 r8276  
    207207
    208208    #region parameter properties
    209     public ConstrainedValueParameter<StringValue> TargetVariableParameter {
    210       get { return (ConstrainedValueParameter<StringValue>)Parameters[TargetVariableParameterName]; }
     209    public IConstrainedValueParameter<StringValue> TargetVariableParameter {
     210      get { return (IConstrainedValueParameter<StringValue>)Parameters[TargetVariableParameterName]; }
    211211    }
    212212    public IFixedValueParameter<StringMatrix> ClassNamesParameter {
  • branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/ClassificationSolution.cs

    r7259 r8276  
    4444    public ClassificationSolution(IClassificationModel model, IClassificationProblemData problemData)
    4545      : base(model, problemData) {
    46       evaluationCache = new Dictionary<int, double>();
     46      evaluationCache = new Dictionary<int, double>(problemData.Dataset.Rows);
    4747    }
    4848
     
    5151    }
    5252    public override IEnumerable<double> EstimatedTrainingClassValues {
    53       get { return GetEstimatedClassValues(ProblemData.TrainingIndizes); }
     53      get { return GetEstimatedClassValues(ProblemData.TrainingIndices); }
    5454    }
    5555    public override IEnumerable<double> EstimatedTestClassValues {
    56       get { return GetEstimatedClassValues(ProblemData.TestIndizes); }
     56      get { return GetEstimatedClassValues(ProblemData.TestIndices); }
    5757    }
    5858
  • branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/ClassificationSolutionBase.cs

    r7259 r8276  
    8787    protected void CalculateResults() {
    8888      double[] estimatedTrainingClassValues = EstimatedTrainingClassValues.ToArray(); // cache values
    89       double[] originalTrainingClassValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndizes).ToArray();
     89      double[] originalTrainingClassValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndices).ToArray();
    9090      double[] estimatedTestClassValues = EstimatedTestClassValues.ToArray(); // cache values
    91       double[] originalTestClassValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TestIndizes).ToArray();
     91      double[] originalTestClassValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TestIndices).ToArray();
    9292
    9393      OnlineCalculatorError errorState;
  • branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/DiscriminantFunctionClassificationSolution.cs

    r7259 r8276  
    5959    }
    6060    public override IEnumerable<double> EstimatedTrainingClassValues {
    61       get { return GetEstimatedClassValues(ProblemData.TrainingIndizes); }
     61      get { return GetEstimatedClassValues(ProblemData.TrainingIndices); }
    6262    }
    6363    public override IEnumerable<double> EstimatedTestClassValues {
    64       get { return GetEstimatedClassValues(ProblemData.TestIndizes); }
     64      get { return GetEstimatedClassValues(ProblemData.TestIndices); }
    6565    }
    6666
     
    8282    }
    8383    public override IEnumerable<double> EstimatedTrainingValues {
    84       get { return GetEstimatedValues(ProblemData.TrainingIndizes); }
     84      get { return GetEstimatedValues(ProblemData.TrainingIndices); }
    8585    }
    8686    public override IEnumerable<double> EstimatedTestValues {
    87       get { return GetEstimatedValues(ProblemData.TestIndizes); }
     87      get { return GetEstimatedValues(ProblemData.TestIndices); }
    8888    }
    8989
  • branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/DiscriminantFunctionClassificationSolutionBase.cs

    r7259 r8276  
    103103    protected void CalculateRegressionResults() {
    104104      double[] estimatedTrainingValues = EstimatedTrainingValues.ToArray(); // cache values
    105       double[] originalTrainingValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndizes).ToArray();
     105      double[] originalTrainingValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndices).ToArray();
    106106      double[] estimatedTestValues = EstimatedTestValues.ToArray(); // cache values
    107       double[] originalTestValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TestIndizes).ToArray();
     107      double[] originalTestValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TestIndices).ToArray();
    108108
    109109      OnlineCalculatorError errorState;
     
    140140      double[] classValues;
    141141      double[] thresholds;
    142       var targetClassValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndizes);
     142      var targetClassValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndices);
    143143      AccuracyMaximizationThresholdCalculator.CalculateThresholds(ProblemData, EstimatedTrainingValues, targetClassValues, out classValues, out thresholds);
    144144
     
    149149      double[] classValues;
    150150      double[] thresholds;
    151       var targetClassValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndizes);
     151      var targetClassValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndices);
    152152      NormalDistributionCutPointsThresholdCalculator.CalculateThresholds(ProblemData, EstimatedTrainingValues, targetClassValues, out classValues, out thresholds);
    153153
  • branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/ThresholdCalculators/AccuracyMaximizationThresholdCalculator.cs

    r7259 r8276  
    5454    public static void CalculateThresholds(IClassificationProblemData problemData, IEnumerable<double> estimatedValues, IEnumerable<double> targetClassValues, out double[] classValues, out double[] thresholds) {
    5555      int slices = 100;
     56      double minThresholdInc = 10e-5; // necessary to prevent infinite loop when maxEstimated - minEstimated is effectively zero (constant model)
    5657      List<double> estimatedValuesList = estimatedValues.ToList();
    5758      double maxEstimatedValue = estimatedValuesList.Max();
    5859      double minEstimatedValue = estimatedValuesList.Min();
    59       double thresholdIncrement = (maxEstimatedValue - minEstimatedValue) / slices;
     60      double thresholdIncrement = Math.Max((maxEstimatedValue - minEstimatedValue) / slices, minThresholdInc);
    6061      var estimatedAndTargetValuePairs =
    6162        estimatedValuesList.Zip(targetClassValues, (x, y) => new { EstimatedValue = x, TargetClassValue = y })
     
    7071
    7172      // incrementally calculate accuracy of all possible thresholds
    72       int[,] confusionMatrix = new int[nClasses, nClasses];
    73 
    7473      for (int i = 1; i < thresholds.Length; i++) {
    7574        double lowerThreshold = thresholds[i - 1];
  • branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Clustering/ClusteringSolution.cs

    r7259 r8276  
    6868    public virtual IEnumerable<int> TrainingClusterValues {
    6969      get {
    70         return GetClusterValues(ProblemData.TrainingIndizes);
     70        return GetClusterValues(ProblemData.TrainingIndices);
    7171      }
    7272    }
     
    7474    public virtual IEnumerable<int> TestClusterValues {
    7575      get {
    76         return GetClusterValues(ProblemData.TestIndizes);
     76        return GetClusterValues(ProblemData.TestIndices);
    7777      }
    7878    }
  • branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/DataAnalysisProblemData.cs

    r8038 r8276  
    2323using System.Collections.Generic;
    2424using System.Linq;
    25 using HeuristicLab.Analysis;
    2625using HeuristicLab.Collections;
    2726using HeuristicLab.Common;
     
    5352      get { return (IFixedValueParameter<IntRange>)Parameters[TestPartitionParameterName]; }
    5453    }
    55     public IFixedValueParameter<HeatMap> DatasetHeatMapParameter {
    56       get { return (IFixedValueParameter<HeatMap>)Parameters[DatasetHeatMapParameterName]; }
     54    public IFixedValueParameter<ExtendedHeatMap> DatasetHeatMapParameter {
     55      get { return (IFixedValueParameter<ExtendedHeatMap>)Parameters[DatasetHeatMapParameterName]; }
    5756    }
    5857    #endregion
     
    7978      get { return TestPartitionParameter.Value; }
    8079    }
    81     public HeatMap DatasetHeatMap {
     80    public ExtendedHeatMap DatasetHeatMap {
    8281      get { return DatasetHeatMapParameter.Value; }
    8382    }
    8483
    85     public virtual IEnumerable<int> TrainingIndizes {
     84    public virtual IEnumerable<int> TrainingIndices {
    8685      get {
    8786        return Enumerable.Range(TrainingPartition.Start, Math.Max(0, TrainingPartition.End - TrainingPartition.Start))
     
    8988      }
    9089    }
    91     public virtual IEnumerable<int> TestIndizes {
     90    public virtual IEnumerable<int> TestIndices {
    9291      get {
    9392        return Enumerable.Range(TestPartition.Start, Math.Max(0, TestPartition.End - TestPartition.Start))
     
    140139      Parameters.Add(new FixedValueParameter<IntRange>(TrainingPartitionParameterName, "", new IntRange(trainingPartitionStart, trainingPartitionEnd)));
    141140      Parameters.Add(new FixedValueParameter<IntRange>(TestPartitionParameterName, "", new IntRange(testPartitionStart, testPartitionEnd)));
    142       Parameters.Add(new FixedValueParameter<HeatMap>(DatasetHeatMapParameterName, "", CalculateHeatMap(dataset)));
     141      Parameters.Add(new FixedValueParameter<ExtendedHeatMap>(DatasetHeatMapParameterName, "", new ExtendedHeatMap(this)));
    143142
    144143      ((ValueParameter<Dataset>)DatasetParameter).ReactOnValueToStringChangedAndValueItemImageChanged = false;
    145144      RegisterEventHandlers();
    146     }
    147 
    148     private HeatMap CalculateHeatMap(Dataset dataset) {
    149       IList<string> doubleVariableNames = dataset.DoubleVariables.ToList();
    150       OnlineCalculatorError error;
    151       int length = doubleVariableNames.Count;
    152       double[,] elements = new double[length, length];
    153 
    154       for (int i = 0; i < length; i++) {
    155         for (int j = 0; j < i + 1; j++) {
    156           elements[i, j] = OnlinePearsonsRSquaredCalculator.Calculate(dataset.GetDoubleValues(doubleVariableNames[length - 1 - i]), dataset.GetDoubleValues(doubleVariableNames[j]), out error);
    157           elements[j, i] = elements[i, j];
    158           if (!error.Equals(OnlineCalculatorError.None)) {
    159             throw new ArgumentException("Calculator returned " + error);
    160           }
    161         }
    162       }
    163       return new HeatMap(elements, "Hoeffdings Dependence");
    164145    }
    165146
  • branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Regression/RegressionEnsembleSolution.cs

    r7738 r8276  
    3737  [Creatable("Data Analysis - Ensembles")]
    3838  public sealed class RegressionEnsembleSolution : RegressionSolution, IRegressionEnsembleSolution {
     39    private readonly Dictionary<int, double> trainingEvaluationCache = new Dictionary<int, double>();
     40    private readonly Dictionary<int, double> testEvaluationCache = new Dictionary<int, double>();
     41
    3942    public new IRegressionEnsembleModel Model {
    4043      get { return (IRegressionEnsembleModel)base.Model; }
     
    5255
    5356    [Storable]
    54     private Dictionary<IRegressionModel, IntRange> trainingPartitions;
     57    private readonly Dictionary<IRegressionModel, IntRange> trainingPartitions;
    5558    [Storable]
    56     private Dictionary<IRegressionModel, IntRange> testPartitions;
     59    private readonly Dictionary<IRegressionModel, IntRange> testPartitions;
    5760
    5861    [StorableConstructor]
     
    8689      }
    8790
     91      trainingEvaluationCache = new Dictionary<int, double>(original.ProblemData.TrainingIndices.Count());
     92      testEvaluationCache = new Dictionary<int, double>(original.ProblemData.TestIndices.Count());
     93
    8894      regressionSolutions = cloner.Clone(original.regressionSolutions);
    8995      RegisterRegressionSolutionsEventHandler();
     
    133139      }
    134140
     141      trainingEvaluationCache = new Dictionary<int, double>(problemData.TrainingIndices.Count());
     142      testEvaluationCache = new Dictionary<int, double>(problemData.TestIndices.Count());
     143
    135144      RegisterRegressionSolutionsEventHandler();
    136145      regressionSolutions.AddRange(solutions);
     
    153162    public override IEnumerable<double> EstimatedTrainingValues {
    154163      get {
    155         var rows = ProblemData.TrainingIndizes;
    156         var estimatedValuesEnumerators = (from model in Model.Models
    157                                           select new { Model = model, EstimatedValuesEnumerator = model.GetEstimatedValues(ProblemData.Dataset, rows).GetEnumerator() })
    158                                          .ToList();
    159         var rowsEnumerator = rows.GetEnumerator();
    160         // aggregate to make sure that MoveNext is called for all enumerators
    161         while (rowsEnumerator.MoveNext() & estimatedValuesEnumerators.Select(en => en.EstimatedValuesEnumerator.MoveNext()).Aggregate(true, (acc, b) => acc & b)) {
    162           int currentRow = rowsEnumerator.Current;
    163 
    164           var selectedEnumerators = from pair in estimatedValuesEnumerators
    165                                     where RowIsTrainingForModel(currentRow, pair.Model) && !RowIsTestForModel(currentRow, pair.Model)
    166                                     select pair.EstimatedValuesEnumerator;
    167           yield return AggregateEstimatedValues(selectedEnumerators.Select(x => x.Current));
     164        var rows = ProblemData.TrainingIndices;
     165        var rowsToEvaluate = rows.Except(trainingEvaluationCache.Keys);
     166        var rowsEnumerator = rowsToEvaluate.GetEnumerator();
     167        var valuesEnumerator = GetEstimatedValues(rowsToEvaluate, (r, m) => RowIsTrainingForModel(r, m) && !RowIsTestForModel(r, m)).GetEnumerator();
     168
     169        while (rowsEnumerator.MoveNext() & valuesEnumerator.MoveNext()) {
     170          trainingEvaluationCache.Add(rowsEnumerator.Current, valuesEnumerator.Current);
    168171        }
     172
     173        return rows.Select(row => trainingEvaluationCache[row]);
    169174      }
    170175    }
     
    172177    public override IEnumerable<double> EstimatedTestValues {
    173178      get {
    174         var rows = ProblemData.TestIndizes;
    175         var estimatedValuesEnumerators = (from model in Model.Models
    176                                           select new { Model = model, EstimatedValuesEnumerator = model.GetEstimatedValues(ProblemData.Dataset, rows).GetEnumerator() })
    177                                          .ToList();
    178         var rowsEnumerator = ProblemData.TestIndizes.GetEnumerator();
    179         // aggregate to make sure that MoveNext is called for all enumerators
    180         while (rowsEnumerator.MoveNext() & estimatedValuesEnumerators.Select(en => en.EstimatedValuesEnumerator.MoveNext()).Aggregate(true, (acc, b) => acc & b)) {
    181           int currentRow = rowsEnumerator.Current;
    182 
    183           var selectedEnumerators = from pair in estimatedValuesEnumerators
    184                                     where RowIsTestForModel(currentRow, pair.Model)
    185                                     select pair.EstimatedValuesEnumerator;
    186 
    187           yield return AggregateEstimatedValues(selectedEnumerators.Select(x => x.Current));
     179        var rows = ProblemData.TestIndices;
     180        var rowsToEvaluate = rows.Except(testEvaluationCache.Keys);
     181        var rowsEnumerator = rowsToEvaluate.GetEnumerator();
     182        var valuesEnumerator = GetEstimatedValues(rowsToEvaluate, RowIsTestForModel).GetEnumerator();
     183
     184        while (rowsEnumerator.MoveNext() & valuesEnumerator.MoveNext()) {
     185          testEvaluationCache.Add(rowsEnumerator.Current, valuesEnumerator.Current);
    188186        }
     187
     188        return rows.Select(row => testEvaluationCache[row]);
     189      }
     190    }
     191
     192    private IEnumerable<double> GetEstimatedValues(IEnumerable<int> rows, Func<int, IRegressionModel, bool> modelSelectionPredicate) {
     193      var estimatedValuesEnumerators = (from model in Model.Models
     194                                        select new { Model = model, EstimatedValuesEnumerator = model.GetEstimatedValues(ProblemData.Dataset, rows).GetEnumerator() })
     195                                       .ToList();
     196      var rowsEnumerator = rows.GetEnumerator();
     197      // aggregate to make sure that MoveNext is called for all enumerators
     198      while (rowsEnumerator.MoveNext() & estimatedValuesEnumerators.Select(en => en.EstimatedValuesEnumerator.MoveNext()).Aggregate(true, (acc, b) => acc & b)) {
     199        int currentRow = rowsEnumerator.Current;
     200
     201        var selectedEnumerators = from pair in estimatedValuesEnumerators
     202                                  where modelSelectionPredicate(currentRow, pair.Model)
     203                                  select pair.EstimatedValuesEnumerator;
     204
     205        yield return AggregateEstimatedValues(selectedEnumerators.Select(x => x.Current));
    189206      }
    190207    }
     
    201218
    202219    public override IEnumerable<double> GetEstimatedValues(IEnumerable<int> rows) {
    203       return from xs in GetEstimatedValueVectors(ProblemData.Dataset, rows)
    204              select AggregateEstimatedValues(xs);
     220      var rowsToEvaluate = rows.Except(evaluationCache.Keys);
     221      var rowsEnumerator = rowsToEvaluate.GetEnumerator();
     222      var valuesEnumerator = (from xs in GetEstimatedValueVectors(ProblemData.Dataset, rowsToEvaluate)
     223                              select AggregateEstimatedValues(xs))
     224                             .GetEnumerator();
     225
     226      while (rowsEnumerator.MoveNext() & valuesEnumerator.MoveNext()) {
     227        evaluationCache.Add(rowsEnumerator.Current, valuesEnumerator.Current);
     228      }
     229
     230      return rows.Select(row => evaluationCache[row]);
    205231    }
    206232
     
    223249
    224250    protected override void OnProblemDataChanged() {
     251      trainingEvaluationCache.Clear();
     252      testEvaluationCache.Clear();
     253      evaluationCache.Clear();
    225254      IRegressionProblemData problemData = new RegressionProblemData(ProblemData.Dataset,
    226255                                                                     ProblemData.AllowedInputVariables,
     
    251280    public void AddRegressionSolutions(IEnumerable<IRegressionSolution> solutions) {
    252281      regressionSolutions.AddRange(solutions);
     282
     283      trainingEvaluationCache.Clear();
     284      testEvaluationCache.Clear();
     285      evaluationCache.Clear();
    253286    }
    254287    public void RemoveRegressionSolutions(IEnumerable<IRegressionSolution> solutions) {
    255288      regressionSolutions.RemoveRange(solutions);
     289
     290      trainingEvaluationCache.Clear();
     291      testEvaluationCache.Clear();
     292      evaluationCache.Clear();
    256293    }
    257294
     
    275312      trainingPartitions[solution.Model] = solution.ProblemData.TrainingPartition;
    276313      testPartitions[solution.Model] = solution.ProblemData.TestPartition;
     314
     315      trainingEvaluationCache.Clear();
     316      testEvaluationCache.Clear();
     317      evaluationCache.Clear();
    277318    }
    278319
     
    282323      trainingPartitions.Remove(solution.Model);
    283324      testPartitions.Remove(solution.Model);
     325
     326      trainingEvaluationCache.Clear();
     327      testEvaluationCache.Clear();
     328      evaluationCache.Clear();
    284329    }
    285330  }
  • branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Regression/RegressionProblemData.cs

    r7823 r8276  
    9595    #endregion
    9696
    97     public ConstrainedValueParameter<StringValue> TargetVariableParameter {
    98       get { return (ConstrainedValueParameter<StringValue>)Parameters[TargetVariableParameterName]; }
     97    public IConstrainedValueParameter<StringValue> TargetVariableParameter {
     98      get { return (IConstrainedValueParameter<StringValue>)Parameters[TargetVariableParameterName]; }
    9999    }
    100100    public string TargetVariable {
  • branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Regression/RegressionSolution.cs

    r7735 r8276  
    5555    }
    5656    public override IEnumerable<double> EstimatedTrainingValues {
    57       get { return GetEstimatedValues(ProblemData.TrainingIndizes); }
     57      get { return GetEstimatedValues(ProblemData.TrainingIndices); }
    5858    }
    5959    public override IEnumerable<double> EstimatedTestValues {
    60       get { return GetEstimatedValues(ProblemData.TestIndizes); }
     60      get { return GetEstimatedValues(ProblemData.TestIndices); }
    6161    }
    6262
  • branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Regression/RegressionSolutionBase.cs

    r7735 r8276  
    138138        OnlineCalculatorError errorState;
    139139        Add(new Result(TrainingMeanAbsoluteErrorResultName, "Mean of absolute errors of the model on the training partition", new DoubleValue()));
    140         double trainingMAE = OnlineMeanAbsoluteErrorCalculator.Calculate(EstimatedTrainingValues, ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndizes), out errorState);
     140        double trainingMAE = OnlineMeanAbsoluteErrorCalculator.Calculate(EstimatedTrainingValues, ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndices), out errorState);
    141141        TrainingMeanAbsoluteError = errorState == OnlineCalculatorError.None ? trainingMAE : double.NaN;
    142142      }
     
    145145        OnlineCalculatorError errorState;
    146146        Add(new Result(TestMeanAbsoluteErrorResultName, "Mean of absolute errors of the model on the test partition", new DoubleValue()));
    147         double testMAE = OnlineMeanAbsoluteErrorCalculator.Calculate(EstimatedTestValues, ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TestIndizes), out errorState);
     147        double testMAE = OnlineMeanAbsoluteErrorCalculator.Calculate(EstimatedTestValues, ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TestIndices), out errorState);
    148148        TestMeanAbsoluteError = errorState == OnlineCalculatorError.None ? testMAE : double.NaN;
    149149      }
     
    152152        OnlineCalculatorError errorState;
    153153        Add(new Result(TrainingMeanErrorResultName, "Mean of errors of the model on the training partition", new DoubleValue()));
    154         double trainingME = OnlineMeanErrorCalculator.Calculate(EstimatedTrainingValues, ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndizes), out errorState);
     154        double trainingME = OnlineMeanErrorCalculator.Calculate(EstimatedTrainingValues, ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndices), out errorState);
    155155        TrainingMeanError = errorState == OnlineCalculatorError.None ? trainingME : double.NaN;
    156156      }
     
    158158        OnlineCalculatorError errorState;
    159159        Add(new Result(TestMeanErrorResultName, "Mean of errors of the model on the test partition", new DoubleValue()));
    160         double testME = OnlineMeanErrorCalculator.Calculate(EstimatedTestValues, ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TestIndizes), out errorState);
     160        double testME = OnlineMeanErrorCalculator.Calculate(EstimatedTestValues, ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TestIndices), out errorState);
    161161        TestMeanError = errorState == OnlineCalculatorError.None ? testME : double.NaN;
    162162      }
     
    166166    protected void CalculateResults() {
    167167      IEnumerable<double> estimatedTrainingValues = EstimatedTrainingValues; // cache values
    168       IEnumerable<double> originalTrainingValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndizes);
     168      IEnumerable<double> originalTrainingValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndices);
    169169      IEnumerable<double> estimatedTestValues = EstimatedTestValues; // cache values
    170       IEnumerable<double> originalTestValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TestIndizes);
     170      IEnumerable<double> originalTestValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TestIndices);
    171171
    172172      OnlineCalculatorError errorState;
  • branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/Interfaces/IDataAnalysisProblemData.cs

    r7259 r8276  
    3636    IntRange TestPartition { get; }
    3737
    38     IEnumerable<int> TrainingIndizes { get; }
    39     IEnumerable<int> TestIndizes { get; }
     38    IEnumerable<int> TrainingIndices { get; }
     39    IEnumerable<int> TestIndices { get; }
    4040
    4141    bool IsTrainingSample(int index);
  • branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/Interfaces/IOnlineCalculator.cs

    r7259 r8276  
    2424namespace HeuristicLab.Problems.DataAnalysis {
    2525  [Flags]
    26   public enum OnlineCalculatorError { 
     26  public enum OnlineCalculatorError {
    2727    /// <summary>
    2828    /// No error occurred
    2929    /// </summary>
    30     None = 0, 
     30    None = 0,
    3131    /// <summary>
    3232    /// An invalid value has been added (often +/- Infinity and NaN are invalid values)
    3333    /// </summary>
    34     InvalidValueAdded = 1, 
     34    InvalidValueAdded = 1,
    3535    /// <summary>
    3636    /// The number of elements added to the evaluator is not sufficient to calculate the result value
  • branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/OnlineCalculators/HoeffdingsDependenceCalculator.cs

    r7969 r8276  
    2323using System.Collections.Generic;
    2424using System.Linq;
    25 using HeuristicLab.Common;
    2625
    2726namespace HeuristicLab.Problems.DataAnalysis {
  • branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/OnlineCalculators/OnlineLinearScalingParameterCalculator.cs

    r7259 r8276  
    5555    }
    5656
    57     private int cnt;
    5857    private OnlineMeanAndVarianceCalculator targetMeanCalculator;
    5958    private OnlineMeanAndVarianceCalculator originalMeanAndVarianceCalculator;
     
    6867
    6968    public void Reset() {
    70       cnt = 0;
    7169      targetMeanCalculator.Reset();
    7270      originalMeanAndVarianceCalculator.Reset();
     
    8583      originalTargetCovarianceCalculator.Add(original, target);
    8684
    87       cnt++;
    8885    }
    8986
Note: See TracChangeset for help on using the changeset viewer.