Changeset 8276
- Timestamp:
- 07/10/12 15:26:13 (12 years ago)
- Location:
- branches/DatasetFeatureCorrelation
- Files:
-
- 6 added
- 34 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis
- Property svn:mergeinfo changed
/trunk/sources/HeuristicLab.Problems.DataAnalysis (added) merged: 8113,8121,8126,8139,8151-8153,8167,8174
- Property svn:mergeinfo changed
-
branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis.Views
- Property svn:mergeinfo changed
/trunk/sources/HeuristicLab.Problems.DataAnalysis.Views (added) merged: 8098,8100,8102,8104-8105,8125,8139,8173,8176
- Property svn:mergeinfo changed
-
branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis.Views/3.4/Classification/ClassificationEnsembleSolutionEstimatedClassValuesView.cs
r7259 r8276 73 73 } 74 74 75 int[] indi zes;75 int[] indices; 76 76 double[] estimatedClassValues; 77 77 78 78 switch (SamplesComboBox.SelectedItem.ToString()) { 79 79 case SamplesComboBoxAllSamples: { 80 indi zes = Enumerable.Range(0, Content.ProblemData.Dataset.Rows).ToArray();80 indices = Enumerable.Range(0, Content.ProblemData.Dataset.Rows).ToArray(); 81 81 estimatedClassValues = Content.EstimatedClassValues.ToArray(); 82 82 break; 83 83 } 84 84 case SamplesComboBoxTrainingSamples: { 85 indi zes = Content.ProblemData.TrainingIndizes.ToArray();85 indices = Content.ProblemData.TrainingIndices.ToArray(); 86 86 estimatedClassValues = Content.EstimatedTrainingClassValues.ToArray(); 87 87 break; 88 88 } 89 89 case SamplesComboBoxTestSamples: { 90 indi zes = Content.ProblemData.TestIndizes.ToArray();90 indices = Content.ProblemData.TestIndices.ToArray(); 91 91 estimatedClassValues = Content.EstimatedTestClassValues.ToArray(); 92 92 break; … … 98 98 int classValuesCount = Content.ProblemData.ClassValues.Count; 99 99 int solutionsCount = Content.ClassificationSolutions.Count(); 100 string[,] values = new string[indi zes.Length, 5 + classValuesCount + solutionsCount];100 string[,] values = new string[indices.Length, 5 + classValuesCount + solutionsCount]; 101 101 double[] target = Content.ProblemData.Dataset.GetDoubleValues(Content.ProblemData.TargetVariable).ToArray(); 102 List<List<double?>> estimatedValuesVector = GetEstimatedValues(SamplesComboBox.SelectedItem.ToString(), indi zes,102 List<List<double?>> estimatedValuesVector = GetEstimatedValues(SamplesComboBox.SelectedItem.ToString(), indices, 103 103 Content.ClassificationSolutions); 104 104 105 for (int i = 0; i < indi zes.Length; i++) {106 int row = indi zes[i];105 for (int i = 0; i < indices.Length; i++) { 106 int row = indices[i]; 107 107 values[i, 0] = row.ToString(); 108 108 values[i, 1] = target[i].ToString(); -
branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis.Views/3.4/Classification/ClassificationSolutionConfusionMatrixView.cs
r7259 r8276 107 107 double[] predictedValues; 108 108 if (cmbSamples.SelectedItem.ToString() == TrainingSamples) { 109 rows = Content.ProblemData.TrainingIndi zes;109 rows = Content.ProblemData.TrainingIndices; 110 110 predictedValues = Content.EstimatedTrainingClassValues.ToArray(); 111 111 } else if (cmbSamples.SelectedItem.ToString() == TestSamples) { 112 rows = Content.ProblemData.TestIndi zes;112 rows = Content.ProblemData.TestIndices; 113 113 predictedValues = Content.EstimatedTestClassValues.ToArray(); 114 114 } else throw new InvalidOperationException(); -
branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis.Views/3.4/Classification/ClassificationSolutionEstimatedClassValuesView.cs
r7259 r8276 96 96 var estimatedTraining = Content.EstimatedTrainingClassValues.GetEnumerator(); 97 97 estimatedTraining.MoveNext(); 98 foreach (var trainingRow in Content.ProblemData.TrainingIndi zes) {98 foreach (var trainingRow in Content.ProblemData.TrainingIndices) { 99 99 values[trainingRow, 3] = estimatedTraining.Current.ToString(); 100 100 estimatedTraining.MoveNext(); … … 102 102 var estimatedTest = Content.EstimatedTestClassValues.GetEnumerator(); 103 103 estimatedTest.MoveNext(); 104 foreach (var testRow in Content.ProblemData.TestIndi zes) {104 foreach (var testRow in Content.ProblemData.TestIndices) { 105 105 values[testRow, 4] = estimatedTest.Current.ToString(); 106 106 estimatedTest.MoveNext(); -
branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis.Views/3.4/Classification/DiscriminantFunctionClassificationRocCurvesView.cs
r7259 r8276 101 101 102 102 if (cmbSamples.SelectedItem.ToString() == TrainingSamples) { 103 rows = Content.ProblemData.TrainingIndi zes;103 rows = Content.ProblemData.TrainingIndices; 104 104 } else if (cmbSamples.SelectedItem.ToString() == TestSamples) { 105 rows = Content.ProblemData.TestIndi zes;105 rows = Content.ProblemData.TestIndices; 106 106 } else throw new InvalidOperationException(); 107 107 -
branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis.Views/3.4/Classification/DiscriminantFunctionClassificationSolutionThresholdView.cs
r7259 r8276 137 137 var targetValues = Content.ProblemData.Dataset.GetDoubleValues(Content.ProblemData.TargetVariable).ToList(); 138 138 139 foreach (int row in Content.ProblemData.TrainingIndi zes) {139 foreach (int row in Content.ProblemData.TrainingIndices) { 140 140 double estimatedValue = estimatedValues[row]; 141 141 double targetValue = targetValues[row]; … … 150 150 } 151 151 152 foreach (int row in Content.ProblemData.TestIndi zes) {152 foreach (int row in Content.ProblemData.TestIndices) { 153 153 double estimatedValue = estimatedValues[row]; 154 154 double targetValue = targetValues[row]; -
branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis.Views/3.4/HeuristicLab.Problems.DataAnalysis.Views-3.4.csproj
r8038 r8276 93 93 </PropertyGroup> 94 94 <ItemGroup> 95 <Reference Include="HeuristicLab.Analysis-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL" /> 96 <Reference Include="HeuristicLab.Analysis.Views-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL" /> 95 97 <Reference Include="HeuristicLab.Collections-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL"> 96 98 <Private>False</Private> … … 170 172 <DependentUpon>ClassificationEnsembleSolutionModelView.cs</DependentUpon> 171 173 </Compile> 174 <Compile Include="CorrelationHeatMapView.cs"> 175 <SubType>UserControl</SubType> 176 </Compile> 177 <Compile Include="CorrelationHeatMapView.Designer.cs"> 178 <DependentUpon>CorrelationHeatMapView.cs</DependentUpon> 179 </Compile> 172 180 <Compile Include="DataAnalysisSolutionEvaluationView.cs"> 173 181 <SubType>UserControl</SubType> … … 175 183 <Compile Include="DataAnalysisSolutionEvaluationView.Designer.cs"> 176 184 <DependentUpon>DataAnalysisSolutionEvaluationView.cs</DependentUpon> 185 </Compile> 186 <Compile Include="ExtendedHeatMapView.cs"> 187 <SubType>UserControl</SubType> 188 </Compile> 189 <Compile Include="ExtendedHeatMapView.Designer.cs"> 190 <DependentUpon>ExtendedHeatMapView.cs</DependentUpon> 177 191 </Compile> 178 192 <Compile Include="Plugin.cs" /> -
branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis.Views/3.4/Regression/RegressionSolutionErrorCharacteristicsCurveView.Designer.cs
r7967 r8276 36 36 // 37 37 this.chart.Anchor = ((System.Windows.Forms.AnchorStyles)((((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Bottom) 38 | System.Windows.Forms.AnchorStyles.Left)39 | System.Windows.Forms.AnchorStyles.Right)));38 | System.Windows.Forms.AnchorStyles.Left) 39 | System.Windows.Forms.AnchorStyles.Right))); 40 40 chartArea1.Name = "ChartArea1"; 41 41 this.chart.ChartAreas.Add(chartArea1); … … 73 73 // RegressionSolutionErrorCharacteristicsCurveView 74 74 // 75 this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F);76 75 this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Inherit; 77 76 this.Controls.Add(this.label1); -
branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis.Views/3.4/Regression/RegressionSolutionErrorCharacteristicsCurveView.cs
r7701 r8276 26 26 using System.Windows.Forms.DataVisualization.Charting; 27 27 using HeuristicLab.MainForm; 28 using HeuristicLab.MainForm.WindowsForms;29 28 30 29 namespace HeuristicLab.Problems.DataAnalysis.Views { … … 32 31 [Content(typeof(IRegressionSolution))] 33 32 public partial class RegressionSolutionErrorCharacteristicsCurveView : DataAnalysisSolutionEvaluationView { 34 private IRegressionSolution constantModel;35 33 protected const string TrainingSamples = "Training"; 36 34 protected const string TestSamples = "Test"; … … 103 101 if (Content == null) return; 104 102 103 var constantModel = CreateConstantModel(); 105 104 var originalValues = GetOriginalValues().ToList(); 106 constantModel = CreateConstantModel();107 105 var baselineEstimatedValues = GetEstimatedValues(constantModel); 108 106 var baselineResiduals = GetResiduals(originalValues, baselineEstimatedValues); … … 117 115 baselineSeries.ToolTip = "Area over Curve: " + CalculateAreaOverCurve(baselineSeries); 118 116 baselineSeries.Tag = constantModel; 117 baselineSeries.LegendToolTip = "Double-click to open model"; 119 118 chart.Series.Add(baselineSeries); 120 119 … … 131 130 UpdateSeries(estimatedValues, solutionSeries); 132 131 solutionSeries.ToolTip = "Area over Curve: " + CalculateAreaOverCurve(solutionSeries); 132 solutionSeries.LegendToolTip = "Double-click to open model"; 133 133 chart.Series.Add(solutionSeries); 134 134 } … … 169 169 switch (cmbSamples.SelectedItem.ToString()) { 170 170 case TrainingSamples: 171 originalValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndi zes);171 originalValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndices); 172 172 break; 173 173 case TestSamples: 174 originalValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TestIndi zes);174 originalValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TestIndices); 175 175 break; 176 176 case AllSamples: … … 201 201 } 202 202 203 protected IEnumerable<double> GetbaselineEstimatedValues(IEnumerable<double> originalValues) {204 double averageTrainingTarget = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndizes).Average();205 return Enumerable.Repeat(averageTrainingTarget, originalValues.Count());206 }207 208 203 protected virtual List<double> GetResiduals(IEnumerable<double> originalValues, IEnumerable<double> estimatedValues) { 209 204 return originalValues.Zip(estimatedValues, (x, y) => Math.Abs(x - y)).ToList(); … … 239 234 240 235 private IRegressionSolution CreateConstantModel() { 241 double averageTrainingTarget = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndi zes).Average();236 double averageTrainingTarget = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndices).Average(); 242 237 var solution = new ConstantRegressionModel(averageTrainingTarget).CreateRegressionSolution(ProblemData); 243 238 solution.Name = "Baseline"; … … 248 243 private void chart_MouseMove(object sender, MouseEventArgs e) { 249 244 HitTestResult result = chart.HitTest(e.X, e.Y); 250 if (result.ChartElementType == ChartElementType.LegendItem) 245 if (result.ChartElementType == ChartElementType.LegendItem) { 251 246 Cursor = Cursors.Hand; 252 else247 } else { 253 248 Cursor = Cursors.Default; 249 } 254 250 } 255 251 } -
branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis.Views/3.4/Regression/RegressionSolutionEstimatedValuesView.cs
r7259 r8276 93 93 var estimated_test = Content.EstimatedTestValues.GetEnumerator(); 94 94 95 foreach (var row in Content.ProblemData.TrainingIndi zes) {95 foreach (var row in Content.ProblemData.TrainingIndices) { 96 96 estimated_training.MoveNext(); 97 97 values[row, 3] = estimated_training.Current.ToString(); 98 98 } 99 99 100 foreach (var row in Content.ProblemData.TestIndi zes) {100 foreach (var row in Content.ProblemData.TestIndices) { 101 101 estimated_test.MoveNext(); 102 102 values[row, 4] = estimated_test.Current.ToString(); -
branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis.Views/3.4/Regression/RegressionSolutionLineChartView.cs
r7406 r8276 72 72 this.chart.Series[ESTIMATEDVALUES_TRAINING_SERIES_NAME].ChartType = SeriesChartType.FastLine; 73 73 this.chart.Series[ESTIMATEDVALUES_TRAINING_SERIES_NAME].EmptyPointStyle.Color = this.chart.Series[ESTIMATEDVALUES_TRAINING_SERIES_NAME].Color; 74 this.chart.Series[ESTIMATEDVALUES_TRAINING_SERIES_NAME].Points.DataBindXY(Content.ProblemData.TrainingIndi zes.ToArray(), Content.EstimatedTrainingValues.ToArray());74 this.chart.Series[ESTIMATEDVALUES_TRAINING_SERIES_NAME].Points.DataBindXY(Content.ProblemData.TrainingIndices.ToArray(), Content.EstimatedTrainingValues.ToArray()); 75 75 this.InsertEmptyPoints(this.chart.Series[ESTIMATEDVALUES_TRAINING_SERIES_NAME]); 76 76 this.chart.Series[ESTIMATEDVALUES_TRAINING_SERIES_NAME].Tag = Content; … … 79 79 this.chart.Series[ESTIMATEDVALUES_TEST_SERIES_NAME].LegendText = ESTIMATEDVALUES_TEST_SERIES_NAME; 80 80 this.chart.Series[ESTIMATEDVALUES_TEST_SERIES_NAME].ChartType = SeriesChartType.FastLine; 81 this.chart.Series[ESTIMATEDVALUES_TEST_SERIES_NAME].Points.DataBindXY(Content.ProblemData.TestIndi zes.ToArray(), Content.EstimatedTestValues.ToArray());81 this.chart.Series[ESTIMATEDVALUES_TEST_SERIES_NAME].Points.DataBindXY(Content.ProblemData.TestIndices.ToArray(), Content.EstimatedTestValues.ToArray()); 82 82 this.InsertEmptyPoints(this.chart.Series[ESTIMATEDVALUES_TEST_SERIES_NAME]); 83 83 this.chart.Series[ESTIMATEDVALUES_TEST_SERIES_NAME].Tag = Content; 84 84 // series of remaining points 85 int[] allIndi zes = Enumerable.Range(0, Content.ProblemData.Dataset.Rows).Except(Content.ProblemData.TrainingIndizes).Except(Content.ProblemData.TestIndizes).ToArray();85 int[] allIndices = Enumerable.Range(0, Content.ProblemData.Dataset.Rows).Except(Content.ProblemData.TrainingIndices).Except(Content.ProblemData.TestIndices).ToArray(); 86 86 var estimatedValues = Content.EstimatedValues.ToArray(); 87 List<double> allEstimatedValues = allIndi zes.Select(index => estimatedValues[index]).ToList();87 List<double> allEstimatedValues = allIndices.Select(index => estimatedValues[index]).ToList(); 88 88 this.chart.Series.Add(ESTIMATEDVALUES_ALL_SERIES_NAME); 89 89 this.chart.Series[ESTIMATEDVALUES_ALL_SERIES_NAME].LegendText = ESTIMATEDVALUES_ALL_SERIES_NAME; 90 90 this.chart.Series[ESTIMATEDVALUES_ALL_SERIES_NAME].ChartType = SeriesChartType.FastLine; 91 this.chart.Series[ESTIMATEDVALUES_ALL_SERIES_NAME].Points.DataBindXY(allIndi zes, allEstimatedValues);91 this.chart.Series[ESTIMATEDVALUES_ALL_SERIES_NAME].Points.DataBindXY(allIndices, allEstimatedValues); 92 92 this.InsertEmptyPoints(this.chart.Series[ESTIMATEDVALUES_ALL_SERIES_NAME]); 93 93 this.chart.Series[ESTIMATEDVALUES_ALL_SERIES_NAME].Tag = Content; … … 170 170 171 171 int[] attr = new int[Content.ProblemData.Dataset.Rows + 1]; // add a virtual last row that is again empty to simplify loop further down 172 foreach (var row in Content.ProblemData.TrainingIndi zes) {172 foreach (var row in Content.ProblemData.TrainingIndices) { 173 173 attr[row] += 1; 174 174 } 175 foreach (var row in Content.ProblemData.TestIndi zes) {175 foreach (var row in Content.ProblemData.TestIndices) { 176 176 attr[row] += 2; 177 177 } … … 223 223 string targetVariableName = Content.ProblemData.TargetVariable; 224 224 225 IEnumerable<int> indi zes = null;225 IEnumerable<int> indices = null; 226 226 IEnumerable<double> predictedValues = null; 227 227 switch (series.Name) { 228 228 case ESTIMATEDVALUES_ALL_SERIES_NAME: 229 indi zes = Enumerable.Range(0, Content.ProblemData.Dataset.Rows).Except(Content.ProblemData.TrainingIndizes).Except(Content.ProblemData.TestIndizes).ToArray();229 indices = Enumerable.Range(0, Content.ProblemData.Dataset.Rows).Except(Content.ProblemData.TrainingIndices).Except(Content.ProblemData.TestIndices).ToArray(); 230 230 var estimatedValues = Content.EstimatedValues.ToArray(); 231 predictedValues = indi zes.Select(index => estimatedValues[index]).ToList();231 predictedValues = indices.Select(index => estimatedValues[index]).ToList(); 232 232 break; 233 233 case ESTIMATEDVALUES_TRAINING_SERIES_NAME: 234 indi zes = Content.ProblemData.TrainingIndizes.ToArray();234 indices = Content.ProblemData.TrainingIndices.ToArray(); 235 235 predictedValues = Content.EstimatedTrainingValues.ToArray(); 236 236 break; 237 237 case ESTIMATEDVALUES_TEST_SERIES_NAME: 238 indi zes = Content.ProblemData.TestIndizes.ToArray();238 indices = Content.ProblemData.TestIndices.ToArray(); 239 239 predictedValues = Content.EstimatedTestValues.ToArray(); 240 240 break; 241 241 } 242 series.Points.DataBindXY(indi zes, predictedValues);242 series.Points.DataBindXY(indices, predictedValues); 243 243 this.InsertEmptyPoints(series); 244 244 chart.Legends[series.Legend].ForeColor = Color.Black; -
branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis.Views/3.4/Regression/RegressionSolutionResidualHistogram.Designer.cs
r7967 r8276 41 41 this.chart.Location = new System.Drawing.Point(0, 0); 42 42 this.chart.Name = "chart"; 43 this.chart.Size = new System.Drawing.Size( 358, 225);43 this.chart.Size = new System.Drawing.Size(289, 220); 44 44 this.chart.TabIndex = 0; 45 45 this.chart.CustomizeLegend += new System.EventHandler<System.Windows.Forms.DataVisualization.Charting.CustomizeLegendEventArgs>(this.chart_CustomizeLegend); … … 50 50 // 51 51 this.AllowDrop = true; 52 this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F);53 52 this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Inherit; 54 53 this.Controls.Add(this.chart); -
branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis.Views/3.4/Regression/RegressionSolutionResidualHistogram.cs
r7503 r8276 39 39 protected const string TEST_SAMPLES = "Test samples"; 40 40 /// <summary> 41 /// used to reduce code duplication42 /// </summary>43 protected static string[] ALL_SERIES = new string[] { ALL_SAMPLES, TRAINING_SAMPLES, TEST_SAMPLES };44 /// <summary>45 41 /// approximate amount of bins 46 42 /// </summary> 47 43 protected const double bins = 25; 48 /// <summary>49 /// keeps for all series a list for every bin with the position of the bin, the relative frequency of the50 /// residuals and the beginning and the end of the interval of the bin51 /// </summary>52 protected Dictionary<string, List<List<double>>> relativeFrequencies;53 44 #endregion 54 45 … … 61 52 : base() { 62 53 InitializeComponent(); 63 relativeFrequencies = new Dictionary<string, List<List<double>>>(); 64 foreach (string series in ALL_SERIES) { 54 foreach (string series in new List<String>() { ALL_SAMPLES, TRAINING_SAMPLES, TEST_SAMPLES }) { 65 55 chart.Series.Add(series); 66 56 chart.Series[series].LegendText = series; … … 71 61 chart.Series[series].BorderColor = Color.Black; 72 62 chart.Series[series].ToolTip = series + " Y = #VALY from #CUSTOMPROPERTY(from) to #CUSTOMPROPERTY(to)"; 73 relativeFrequencies[series] = new List<List<double>>();74 63 } 75 64 //configure axis … … 87 76 88 77 private void RedrawChart() { 89 foreach (string series in ALL_SERIES) { 90 chart.Series[series].Points.Clear(); 91 relativeFrequencies[series].Clear(); 78 foreach (Series series in chart.Series) { 79 series.Points.Clear(); 92 80 } 93 81 if (Content != null) { 94 Dictionary<string, List<double>> residuals = CalculateResiduals(); 95 double realMax = Math.Max(Math.Abs(residuals[ALL_SAMPLES].Min()), Math.Abs(residuals[ALL_SAMPLES].Max())); 96 double roundedMax = HumanRoundMax(realMax); 97 double intervalWidth = (roundedMax * 2.0) / bins; 98 intervalWidth = HumanRoundMax(intervalWidth); 99 // sets roundedMax to a value, so that zero will be in the middle of the x axis 100 double help = realMax / intervalWidth; 101 help = help % 1 < 0.5 ? (int)help : (int)help + 1; 102 roundedMax = help * intervalWidth; 103 104 foreach (string series in ALL_SERIES) { 105 CalculateFrequencies(residuals[series], series, roundedMax, intervalWidth); 106 if (!series.Equals(ALL_SAMPLES)) 107 ShowValues(chart.Series[series], relativeFrequencies[series]); 82 List<double> residuals = CalculateResiduals(Content); 83 84 double max = 0.0; 85 foreach (Series series in chart.Series) { 86 CalculateFrequencies(residuals, series); 87 double seriesMax = series.Points.Select(p => p.YValues.First()).Max(); 88 max = max < seriesMax ? seriesMax : max; 108 89 } 90 91 // ALL_SAMPLES has to be calculated to know its highest frequency, but it is not shown in the beginning 92 chart.Series.First(s => s.Name.Equals(ALL_SAMPLES)).Points.Clear(); 93 94 double roundedMax, intervalWidth; 95 CalculateResidualParameters(residuals, out roundedMax, out intervalWidth); 109 96 110 97 ChartArea chartArea = chart.ChartAreas[0]; … … 112 99 chartArea.AxisX.Maximum = roundedMax + intervalWidth; 113 100 // get the highest frequency of a residual of any series 114 chartArea.AxisY.Maximum = (from series in relativeFrequencies.Values 115 select (from residual in series 116 select residual.ElementAt(1)).Max()).Max(); 101 chartArea.AxisY.Maximum = max; 117 102 if (chartArea.AxisY.Maximum < 0.1) { 118 103 chartArea.AxisY.Interval = 0.01; … … 132 117 } 133 118 134 private Dictionary<string, List<double>> CalculateResiduals() { 135 Dictionary<string, List<double>> residuals = new Dictionary<string, List<double>>(); 136 137 foreach (string series in ALL_SERIES) { 138 residuals[series] = new List<double>(); 139 } 119 private List<double> CalculateResiduals(IRegressionSolution solution) { 120 List<double> residuals = new List<double>(); 121 122 IRegressionProblemData problemdata = solution.ProblemData; 123 List<double> targetValues = problemdata.Dataset.GetDoubleValues(Content.ProblemData.TargetVariable).ToList(); 124 List<double> estimatedValues = solution.EstimatedValues.ToList(); 125 126 for (int i = 0; i < solution.ProblemData.Dataset.Rows; i++) { 127 double residual = estimatedValues[i] - targetValues[i]; 128 residuals.Add(residual); 129 } 130 return residuals; 131 } 132 133 private void CalculateFrequencies(List<double> residualValues, Series series) { 134 double roundedMax, intervalWidth; 135 CalculateResidualParameters(residualValues, out roundedMax, out intervalWidth); 136 137 IEnumerable<double> relevantResiduals = residualValues; 140 138 IRegressionProblemData problemdata = Content.ProblemData; 141 List<double> targetValues = problemdata.Dataset.GetDoubleValues(Content.ProblemData.TargetVariable).ToList(); 142 List<double> estimatedValues = Content.EstimatedValues.ToList(); 143 144 for (int i = 0; i < Content.ProblemData.Dataset.Rows; i++) { 145 double residual = estimatedValues[i] - targetValues[i]; 146 residuals[ALL_SAMPLES].Add(residual); 147 if (i >= problemdata.TrainingPartition.Start && i < problemdata.TrainingPartition.End) 148 residuals[TRAINING_SAMPLES].Add(residual); 149 if (i >= problemdata.TestPartition.Start && i < problemdata.TestPartition.End) 150 residuals[TEST_SAMPLES].Add(residual); 151 } 152 return residuals; 153 } 154 155 private void CalculateFrequencies(List<double> residualValues, string series, double max, double intervalWidth) { 139 if (series.Name.Equals(TRAINING_SAMPLES)) { 140 relevantResiduals = residualValues.Skip(problemdata.TrainingPartition.Start).Take(problemdata.TrainingPartition.Size); 141 } else if (series.Name.Equals(TEST_SAMPLES)) { 142 relevantResiduals = residualValues.Skip(problemdata.TestPartition.Start).Take(problemdata.TestPartition.Size); 143 } 144 156 145 double intervalCenter = intervalWidth / 2.0; 157 double sampleCount = residualValues.Count(); 158 double current = -max; 146 double sampleCount = relevantResiduals.Count(); 147 double current = -roundedMax; 148 DataPointCollection seriesPoints = series.Points; 159 149 160 150 for (int i = 0; i <= bins; i++) { 161 IEnumerable<double> help = residualValues.Where(x => x >= (current - intervalCenter) && x < (current + intervalCenter)); 162 relativeFrequencies[series].Add(new List<double>() { current, help.Count() / sampleCount, current - intervalCenter, current + intervalCenter }); 151 IEnumerable<double> help = relevantResiduals.Where(x => x >= (current - intervalCenter) && x < (current + intervalCenter)); 152 seriesPoints.AddXY(current, help.Count() / sampleCount); 153 seriesPoints[seriesPoints.Count - 1]["from"] = (current - intervalCenter).ToString(); 154 seriesPoints[seriesPoints.Count - 1]["to"] = (current + intervalCenter).ToString(); 163 155 current += intervalWidth; 164 156 } 165 157 } 166 158 167 private double HumanRoundMax(double max) { 159 private void ToggleSeriesData(Series series) { 160 if (series.Points.Count > 0) { //checks if series is shown 161 if (chart.Series.Any(s => s != series && s.Points.Count > 0)) { 162 series.Points.Clear(); 163 } 164 } else if (Content != null) { 165 List<double> residuals = CalculateResiduals(Content); 166 CalculateFrequencies(residuals, series); 167 chart.Legends[series.Legend].ForeColor = Color.Black; 168 chart.Refresh(); 169 } 170 } 171 172 private static void CalculateResidualParameters(List<double> residuals, out double roundedMax, out double intervalWidth) { 173 double realMax = Math.Max(Math.Abs(residuals.Min()), Math.Abs(residuals.Max())); 174 roundedMax = HumanRoundMax(realMax); 175 intervalWidth = (roundedMax * 2.0) / bins; 176 intervalWidth = HumanRoundMax(intervalWidth); 177 // sets roundedMax to a value, so that zero will be in the middle of the x axis 178 double help = realMax / intervalWidth; 179 help = help % 1 < 0.5 ? (int)help : (int)help + 1; 180 roundedMax = help * intervalWidth; 181 } 182 183 private static double HumanRoundMax(double max) { 168 184 double base10; 169 185 if (max > 0) base10 = Math.Pow(10.0, Math.Floor(Math.Log10(max))); … … 216 232 } 217 233 #endregion 218 219 private void ToggleSeriesData(Series series) {220 if (series.Points.Count > 0) { //checks if series is shown221 if (chart.Series.Any(s => s != series && s.Points.Count > 0)) {222 series.Points.Clear();223 }224 } else if (Content != null) {225 ShowValues(series, relativeFrequencies[series.Name]);226 chart.Legends[series.Legend].ForeColor = Color.Black;227 chart.Refresh();228 }229 }230 private void ShowValues(Series series, List<List<double>> relativeSeriesFrequencies) {231 DataPointCollection seriesPoints = series.Points;232 233 foreach (var valueList in relativeSeriesFrequencies) {234 seriesPoints.AddXY(valueList[0], valueList[1]);235 seriesPoints[seriesPoints.Count - 1]["from"] = valueList[2].ToString();236 seriesPoints[seriesPoints.Count - 1]["to"] = valueList[3].ToString();237 }238 }239 234 } 240 235 } -
branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis.Views/3.4/Regression/RegressionSolutionScatterPlotView.cs
r7990 r8276 148 148 if (this.chart.Series[TRAINING_SERIES].Points.Count > 0) 149 149 this.chart.Series[TRAINING_SERIES].Points.DataBindXY(Content.EstimatedTrainingValues.ToArray(), "", 150 dataset.GetDoubleValues(targetVariableName, Content.ProblemData.TrainingIndi zes).ToArray(), "");150 dataset.GetDoubleValues(targetVariableName, Content.ProblemData.TrainingIndices).ToArray(), ""); 151 151 if (this.chart.Series[TEST_SERIES].Points.Count > 0) 152 152 this.chart.Series[TEST_SERIES].Points.DataBindXY(Content.EstimatedTestValues.ToArray(), "", 153 dataset.GetDoubleValues(targetVariableName, Content.ProblemData.TestIndi zes).ToArray(), "");153 dataset.GetDoubleValues(targetVariableName, Content.ProblemData.TestIndices).ToArray(), ""); 154 154 155 155 double max = Content.EstimatedTrainingValues.Concat(Content.EstimatedTestValues.Concat(Content.EstimatedValues.Concat(dataset.GetDoubleValues(targetVariableName)))).Max(); … … 196 196 case TRAINING_SERIES: 197 197 predictedValues = Content.EstimatedTrainingValues.ToArray(); 198 targetValues = Content.ProblemData.Dataset.GetDoubleValues(targetVariableName, Content.ProblemData.TrainingIndi zes).ToArray();198 targetValues = Content.ProblemData.Dataset.GetDoubleValues(targetVariableName, Content.ProblemData.TrainingIndices).ToArray(); 199 199 break; 200 200 case TEST_SERIES: 201 201 predictedValues = Content.EstimatedTestValues.ToArray(); 202 targetValues = Content.ProblemData.Dataset.GetDoubleValues(targetVariableName, Content.ProblemData.TestIndi zes).ToArray();202 targetValues = Content.ProblemData.Dataset.GetDoubleValues(targetVariableName, Content.ProblemData.TestIndices).ToArray(); 203 203 break; 204 204 } -
branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis.Views/3.4/Solution Views/DataAnalysisSolutionView.cs
r7259 r8276 137 137 } 138 138 139 protected override void showDetailsCheckBox_CheckedChanged(object sender, EventArgs e) { 140 if (showDetailsCheckBox.Checked && itemsListView.SelectedItems.Count == 1 && itemsListView.SelectedItems[0].Tag is Type) { 141 Type viewType = (Type)itemsListView.SelectedItems[0].Tag; 142 viewHost.ViewType = viewType; 143 viewHost.Content = Content; 144 splitContainer.Panel2Collapsed = false; 145 detailsGroupBox.Enabled = true; 146 } else base.showDetailsCheckBox_CheckedChanged(sender, e); 147 } 148 139 149 #region drag and drop 140 150 protected override void itemsListView_DragEnter(object sender, DragEventArgs e) { -
branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/HeuristicLab.Problems.DataAnalysis-3.4.csproj
r8038 r8276 155 155 <Compile Include="Implementation\Clustering\ClusteringProblemData.cs" /> 156 156 <Compile Include="Implementation\Clustering\ClusteringSolution.cs" /> 157 <Compile Include="Implementation\ExtendedHeatMap.cs" /> 157 158 <Compile Include="Implementation\Regression\ConstantRegressionModel.cs" /> 158 159 <Compile Include="Implementation\Regression\ConstantRegressionSolution.cs" /> … … 213 214 <Compile Include="OnlineCalculators\OnlinePearsonsRSquaredCalculator.cs" /> 214 215 <Compile Include="Implementation\Regression\RegressionSolution.cs" /> 216 <Compile Include="OnlineCalculators\SpearmansRankCorrelationCoefficientCalculator.cs" /> 215 217 <Compile Include="Plugin.cs" /> 216 218 <Compile Include="Implementation\Classification\ThresholdCalculators\AccuracyMaximizationThresholdCalculator.cs" /> -
branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/ClassificationEnsembleSolution.cs
r7259 r8276 37 37 [Creatable("Data Analysis - Ensembles")] 38 38 public sealed class ClassificationEnsembleSolution : ClassificationSolution, IClassificationEnsembleSolution { 39 private readonly Dictionary<int, double> trainingEvaluationCache = new Dictionary<int, double>(); 40 private readonly Dictionary<int, double> testEvaluationCache = new Dictionary<int, double>(); 41 39 42 public new IClassificationEnsembleModel Model { 40 43 get { return (IClassificationEnsembleModel)base.Model; } … … 85 88 } 86 89 90 trainingEvaluationCache = new Dictionary<int, double>(original.ProblemData.TrainingIndices.Count()); 91 testEvaluationCache = new Dictionary<int, double>(original.ProblemData.TestIndices.Count()); 92 87 93 classificationSolutions = cloner.Clone(original.classificationSolutions); 88 94 RegisterClassificationSolutionsEventHandler(); … … 128 134 } 129 135 136 trainingEvaluationCache = new Dictionary<int, double>(problemData.TrainingIndices.Count()); 137 testEvaluationCache = new Dictionary<int, double>(problemData.TestIndices.Count()); 138 130 139 RegisterClassificationSolutionsEventHandler(); 131 140 classificationSolutions.AddRange(solutions); … … 148 157 public override IEnumerable<double> EstimatedTrainingClassValues { 149 158 get { 150 var rows = ProblemData.TrainingIndizes; 151 var estimatedValuesEnumerators = (from model in Model.Models 152 select new { Model = model, EstimatedValuesEnumerator = model.GetEstimatedClassValues(ProblemData.Dataset, rows).GetEnumerator() }) 153 .ToList(); 154 var rowsEnumerator = rows.GetEnumerator(); 155 // aggregate to make sure that MoveNext is called for all enumerators 156 while (rowsEnumerator.MoveNext() & estimatedValuesEnumerators.Select(en => en.EstimatedValuesEnumerator.MoveNext()).Aggregate(true, (acc, b) => acc & b)) { 157 int currentRow = rowsEnumerator.Current; 158 159 var selectedEnumerators = from pair in estimatedValuesEnumerators 160 where RowIsTrainingForModel(currentRow, pair.Model) && !RowIsTestForModel(currentRow, pair.Model) 161 select pair.EstimatedValuesEnumerator; 162 yield return AggregateEstimatedClassValues(selectedEnumerators.Select(x => x.Current)); 159 var rows = ProblemData.TrainingIndices; 160 var rowsToEvaluate = rows.Except(trainingEvaluationCache.Keys); 161 var rowsEnumerator = rowsToEvaluate.GetEnumerator(); 162 var valuesEnumerator = GetEstimatedValues(rowsToEvaluate, (r, m) => RowIsTrainingForModel(r, m) && !RowIsTestForModel(r, m)).GetEnumerator(); 163 164 while (rowsEnumerator.MoveNext() & valuesEnumerator.MoveNext()) { 165 trainingEvaluationCache.Add(rowsEnumerator.Current, valuesEnumerator.Current); 163 166 } 167 168 return rows.Select(row => trainingEvaluationCache[row]); 164 169 } 165 170 } … … 167 172 public override IEnumerable<double> EstimatedTestClassValues { 168 173 get { 169 var rows = ProblemData.TestIndizes; 170 var estimatedValuesEnumerators = (from model in Model.Models 171 select new { Model = model, EstimatedValuesEnumerator = model.GetEstimatedClassValues(ProblemData.Dataset, rows).GetEnumerator() }) 172 .ToList(); 173 var rowsEnumerator = ProblemData.TestIndizes.GetEnumerator(); 174 // aggregate to make sure that MoveNext is called for all enumerators 175 while (rowsEnumerator.MoveNext() & estimatedValuesEnumerators.Select(en => en.EstimatedValuesEnumerator.MoveNext()).Aggregate(true, (acc, b) => acc & b)) { 176 int currentRow = rowsEnumerator.Current; 177 178 var selectedEnumerators = from pair in estimatedValuesEnumerators 179 where RowIsTestForModel(currentRow, pair.Model) 180 select pair.EstimatedValuesEnumerator; 181 182 yield return AggregateEstimatedClassValues(selectedEnumerators.Select(x => x.Current)); 174 var rows = ProblemData.TestIndices; 175 var rowsToEvaluate = rows.Except(testEvaluationCache.Keys); 176 var rowsEnumerator = rowsToEvaluate.GetEnumerator(); 177 var valuesEnumerator = GetEstimatedValues(rowsToEvaluate, RowIsTestForModel).GetEnumerator(); 178 179 while (rowsEnumerator.MoveNext() & valuesEnumerator.MoveNext()) { 180 testEvaluationCache.Add(rowsEnumerator.Current, valuesEnumerator.Current); 183 181 } 182 183 return rows.Select(row => testEvaluationCache[row]); 184 } 185 } 186 187 private IEnumerable<double> GetEstimatedValues(IEnumerable<int> rows, Func<int, IClassificationModel, bool> modelSelectionPredicate) { 188 var estimatedValuesEnumerators = (from model in Model.Models 189 select new { Model = model, EstimatedValuesEnumerator = model.GetEstimatedClassValues(ProblemData.Dataset, rows).GetEnumerator() }) 190 .ToList(); 191 var rowsEnumerator = rows.GetEnumerator(); 192 // aggregate to make sure that MoveNext is called for all enumerators 193 while (rowsEnumerator.MoveNext() & estimatedValuesEnumerators.Select(en => en.EstimatedValuesEnumerator.MoveNext()).Aggregate(true, (acc, b) => acc & b)) { 194 int currentRow = rowsEnumerator.Current; 195 196 var selectedEnumerators = from pair in estimatedValuesEnumerators 197 where modelSelectionPredicate(currentRow, pair.Model) 198 select pair.EstimatedValuesEnumerator; 199 200 yield return AggregateEstimatedClassValues(selectedEnumerators.Select(x => x.Current)); 184 201 } 185 202 } … … 196 213 197 214 public override IEnumerable<double> GetEstimatedClassValues(IEnumerable<int> rows) { 198 return from xs in GetEstimatedClassValueVectors(ProblemData.Dataset, rows) 199 select AggregateEstimatedClassValues(xs); 215 var rowsToEvaluate = rows.Except(evaluationCache.Keys); 216 var rowsEnumerator = rowsToEvaluate.GetEnumerator(); 217 var valuesEnumerator = (from xs in GetEstimatedClassValueVectors(ProblemData.Dataset, rowsToEvaluate) 218 select AggregateEstimatedClassValues(xs)) 219 .GetEnumerator(); 220 221 while (rowsEnumerator.MoveNext() & valuesEnumerator.MoveNext()) { 222 evaluationCache.Add(rowsEnumerator.Current, valuesEnumerator.Current); 223 } 224 225 return rows.Select(row => evaluationCache[row]); 200 226 } 201 227 … … 223 249 224 250 protected override void OnProblemDataChanged() { 251 trainingEvaluationCache.Clear(); 252 testEvaluationCache.Clear(); 253 evaluationCache.Clear(); 254 225 255 IClassificationProblemData problemData = new ClassificationProblemData(ProblemData.Dataset, 226 256 ProblemData.AllowedInputVariables, … … 251 281 public void AddClassificationSolutions(IEnumerable<IClassificationSolution> solutions) { 252 282 classificationSolutions.AddRange(solutions); 283 284 trainingEvaluationCache.Clear(); 285 testEvaluationCache.Clear(); 286 evaluationCache.Clear(); 253 287 } 254 288 public void RemoveClassificationSolutions(IEnumerable<IClassificationSolution> solutions) { 255 289 classificationSolutions.RemoveRange(solutions); 290 291 trainingEvaluationCache.Clear(); 292 testEvaluationCache.Clear(); 293 evaluationCache.Clear(); 256 294 } 257 295 … … 275 313 trainingPartitions[solution.Model] = solution.ProblemData.TrainingPartition; 276 314 testPartitions[solution.Model] = solution.ProblemData.TestPartition; 315 316 trainingEvaluationCache.Clear(); 317 testEvaluationCache.Clear(); 318 evaluationCache.Clear(); 277 319 } 278 320 … … 282 324 trainingPartitions.Remove(solution.Model); 283 325 testPartitions.Remove(solution.Model); 326 327 trainingEvaluationCache.Clear(); 328 testEvaluationCache.Clear(); 329 evaluationCache.Clear(); 284 330 } 285 331 } -
branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/ClassificationProblemData.cs
r7823 r8276 207 207 208 208 #region parameter properties 209 public ConstrainedValueParameter<StringValue> TargetVariableParameter {210 get { return ( ConstrainedValueParameter<StringValue>)Parameters[TargetVariableParameterName]; }209 public IConstrainedValueParameter<StringValue> TargetVariableParameter { 210 get { return (IConstrainedValueParameter<StringValue>)Parameters[TargetVariableParameterName]; } 211 211 } 212 212 public IFixedValueParameter<StringMatrix> ClassNamesParameter { -
branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/ClassificationSolution.cs
r7259 r8276 44 44 public ClassificationSolution(IClassificationModel model, IClassificationProblemData problemData) 45 45 : base(model, problemData) { 46 evaluationCache = new Dictionary<int, double>( );46 evaluationCache = new Dictionary<int, double>(problemData.Dataset.Rows); 47 47 } 48 48 … … 51 51 } 52 52 public override IEnumerable<double> EstimatedTrainingClassValues { 53 get { return GetEstimatedClassValues(ProblemData.TrainingIndi zes); }53 get { return GetEstimatedClassValues(ProblemData.TrainingIndices); } 54 54 } 55 55 public override IEnumerable<double> EstimatedTestClassValues { 56 get { return GetEstimatedClassValues(ProblemData.TestIndi zes); }56 get { return GetEstimatedClassValues(ProblemData.TestIndices); } 57 57 } 58 58 -
branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/ClassificationSolutionBase.cs
r7259 r8276 87 87 protected void CalculateResults() { 88 88 double[] estimatedTrainingClassValues = EstimatedTrainingClassValues.ToArray(); // cache values 89 double[] originalTrainingClassValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndi zes).ToArray();89 double[] originalTrainingClassValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndices).ToArray(); 90 90 double[] estimatedTestClassValues = EstimatedTestClassValues.ToArray(); // cache values 91 double[] originalTestClassValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TestIndi zes).ToArray();91 double[] originalTestClassValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TestIndices).ToArray(); 92 92 93 93 OnlineCalculatorError errorState; -
branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/DiscriminantFunctionClassificationSolution.cs
r7259 r8276 59 59 } 60 60 public override IEnumerable<double> EstimatedTrainingClassValues { 61 get { return GetEstimatedClassValues(ProblemData.TrainingIndi zes); }61 get { return GetEstimatedClassValues(ProblemData.TrainingIndices); } 62 62 } 63 63 public override IEnumerable<double> EstimatedTestClassValues { 64 get { return GetEstimatedClassValues(ProblemData.TestIndi zes); }64 get { return GetEstimatedClassValues(ProblemData.TestIndices); } 65 65 } 66 66 … … 82 82 } 83 83 public override IEnumerable<double> EstimatedTrainingValues { 84 get { return GetEstimatedValues(ProblemData.TrainingIndi zes); }84 get { return GetEstimatedValues(ProblemData.TrainingIndices); } 85 85 } 86 86 public override IEnumerable<double> EstimatedTestValues { 87 get { return GetEstimatedValues(ProblemData.TestIndi zes); }87 get { return GetEstimatedValues(ProblemData.TestIndices); } 88 88 } 89 89 -
branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/DiscriminantFunctionClassificationSolutionBase.cs
r7259 r8276 103 103 protected void CalculateRegressionResults() { 104 104 double[] estimatedTrainingValues = EstimatedTrainingValues.ToArray(); // cache values 105 double[] originalTrainingValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndi zes).ToArray();105 double[] originalTrainingValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndices).ToArray(); 106 106 double[] estimatedTestValues = EstimatedTestValues.ToArray(); // cache values 107 double[] originalTestValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TestIndi zes).ToArray();107 double[] originalTestValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TestIndices).ToArray(); 108 108 109 109 OnlineCalculatorError errorState; … … 140 140 double[] classValues; 141 141 double[] thresholds; 142 var targetClassValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndi zes);142 var targetClassValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndices); 143 143 AccuracyMaximizationThresholdCalculator.CalculateThresholds(ProblemData, EstimatedTrainingValues, targetClassValues, out classValues, out thresholds); 144 144 … … 149 149 double[] classValues; 150 150 double[] thresholds; 151 var targetClassValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndi zes);151 var targetClassValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndices); 152 152 NormalDistributionCutPointsThresholdCalculator.CalculateThresholds(ProblemData, EstimatedTrainingValues, targetClassValues, out classValues, out thresholds); 153 153 -
branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/ThresholdCalculators/AccuracyMaximizationThresholdCalculator.cs
r7259 r8276 54 54 public static void CalculateThresholds(IClassificationProblemData problemData, IEnumerable<double> estimatedValues, IEnumerable<double> targetClassValues, out double[] classValues, out double[] thresholds) { 55 55 int slices = 100; 56 double minThresholdInc = 10e-5; // necessary to prevent infinite loop when maxEstimated - minEstimated is effectively zero (constant model) 56 57 List<double> estimatedValuesList = estimatedValues.ToList(); 57 58 double maxEstimatedValue = estimatedValuesList.Max(); 58 59 double minEstimatedValue = estimatedValuesList.Min(); 59 double thresholdIncrement = (maxEstimatedValue - minEstimatedValue) / slices;60 double thresholdIncrement = Math.Max((maxEstimatedValue - minEstimatedValue) / slices, minThresholdInc); 60 61 var estimatedAndTargetValuePairs = 61 62 estimatedValuesList.Zip(targetClassValues, (x, y) => new { EstimatedValue = x, TargetClassValue = y }) … … 70 71 71 72 // incrementally calculate accuracy of all possible thresholds 72 int[,] confusionMatrix = new int[nClasses, nClasses];73 74 73 for (int i = 1; i < thresholds.Length; i++) { 75 74 double lowerThreshold = thresholds[i - 1]; -
branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Clustering/ClusteringSolution.cs
r7259 r8276 68 68 public virtual IEnumerable<int> TrainingClusterValues { 69 69 get { 70 return GetClusterValues(ProblemData.TrainingIndi zes);70 return GetClusterValues(ProblemData.TrainingIndices); 71 71 } 72 72 } … … 74 74 public virtual IEnumerable<int> TestClusterValues { 75 75 get { 76 return GetClusterValues(ProblemData.TestIndi zes);76 return GetClusterValues(ProblemData.TestIndices); 77 77 } 78 78 } -
branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/DataAnalysisProblemData.cs
r8038 r8276 23 23 using System.Collections.Generic; 24 24 using System.Linq; 25 using HeuristicLab.Analysis;26 25 using HeuristicLab.Collections; 27 26 using HeuristicLab.Common; … … 53 52 get { return (IFixedValueParameter<IntRange>)Parameters[TestPartitionParameterName]; } 54 53 } 55 public IFixedValueParameter< HeatMap> DatasetHeatMapParameter {56 get { return (IFixedValueParameter< HeatMap>)Parameters[DatasetHeatMapParameterName]; }54 public IFixedValueParameter<ExtendedHeatMap> DatasetHeatMapParameter { 55 get { return (IFixedValueParameter<ExtendedHeatMap>)Parameters[DatasetHeatMapParameterName]; } 57 56 } 58 57 #endregion … … 79 78 get { return TestPartitionParameter.Value; } 80 79 } 81 public HeatMap DatasetHeatMap {80 public ExtendedHeatMap DatasetHeatMap { 82 81 get { return DatasetHeatMapParameter.Value; } 83 82 } 84 83 85 public virtual IEnumerable<int> TrainingIndi zes {84 public virtual IEnumerable<int> TrainingIndices { 86 85 get { 87 86 return Enumerable.Range(TrainingPartition.Start, Math.Max(0, TrainingPartition.End - TrainingPartition.Start)) … … 89 88 } 90 89 } 91 public virtual IEnumerable<int> TestIndi zes {90 public virtual IEnumerable<int> TestIndices { 92 91 get { 93 92 return Enumerable.Range(TestPartition.Start, Math.Max(0, TestPartition.End - TestPartition.Start)) … … 140 139 Parameters.Add(new FixedValueParameter<IntRange>(TrainingPartitionParameterName, "", new IntRange(trainingPartitionStart, trainingPartitionEnd))); 141 140 Parameters.Add(new FixedValueParameter<IntRange>(TestPartitionParameterName, "", new IntRange(testPartitionStart, testPartitionEnd))); 142 Parameters.Add(new FixedValueParameter< HeatMap>(DatasetHeatMapParameterName, "", CalculateHeatMap(dataset)));141 Parameters.Add(new FixedValueParameter<ExtendedHeatMap>(DatasetHeatMapParameterName, "", new ExtendedHeatMap(this))); 143 142 144 143 ((ValueParameter<Dataset>)DatasetParameter).ReactOnValueToStringChangedAndValueItemImageChanged = false; 145 144 RegisterEventHandlers(); 146 }147 148 private HeatMap CalculateHeatMap(Dataset dataset) {149 IList<string> doubleVariableNames = dataset.DoubleVariables.ToList();150 OnlineCalculatorError error;151 int length = doubleVariableNames.Count;152 double[,] elements = new double[length, length];153 154 for (int i = 0; i < length; i++) {155 for (int j = 0; j < i + 1; j++) {156 elements[i, j] = OnlinePearsonsRSquaredCalculator.Calculate(dataset.GetDoubleValues(doubleVariableNames[length - 1 - i]), dataset.GetDoubleValues(doubleVariableNames[j]), out error);157 elements[j, i] = elements[i, j];158 if (!error.Equals(OnlineCalculatorError.None)) {159 throw new ArgumentException("Calculator returned " + error);160 }161 }162 }163 return new HeatMap(elements, "Hoeffdings Dependence");164 145 } 165 146 -
branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Regression/RegressionEnsembleSolution.cs
r7738 r8276 37 37 [Creatable("Data Analysis - Ensembles")] 38 38 public sealed class RegressionEnsembleSolution : RegressionSolution, IRegressionEnsembleSolution { 39 private readonly Dictionary<int, double> trainingEvaluationCache = new Dictionary<int, double>(); 40 private readonly Dictionary<int, double> testEvaluationCache = new Dictionary<int, double>(); 41 39 42 public new IRegressionEnsembleModel Model { 40 43 get { return (IRegressionEnsembleModel)base.Model; } … … 52 55 53 56 [Storable] 54 private Dictionary<IRegressionModel, IntRange> trainingPartitions;57 private readonly Dictionary<IRegressionModel, IntRange> trainingPartitions; 55 58 [Storable] 56 private Dictionary<IRegressionModel, IntRange> testPartitions;59 private readonly Dictionary<IRegressionModel, IntRange> testPartitions; 57 60 58 61 [StorableConstructor] … … 86 89 } 87 90 91 trainingEvaluationCache = new Dictionary<int, double>(original.ProblemData.TrainingIndices.Count()); 92 testEvaluationCache = new Dictionary<int, double>(original.ProblemData.TestIndices.Count()); 93 88 94 regressionSolutions = cloner.Clone(original.regressionSolutions); 89 95 RegisterRegressionSolutionsEventHandler(); … … 133 139 } 134 140 141 trainingEvaluationCache = new Dictionary<int, double>(problemData.TrainingIndices.Count()); 142 testEvaluationCache = new Dictionary<int, double>(problemData.TestIndices.Count()); 143 135 144 RegisterRegressionSolutionsEventHandler(); 136 145 regressionSolutions.AddRange(solutions); … … 153 162 public override IEnumerable<double> EstimatedTrainingValues { 154 163 get { 155 var rows = ProblemData.TrainingIndizes; 156 var estimatedValuesEnumerators = (from model in Model.Models 157 select new { Model = model, EstimatedValuesEnumerator = model.GetEstimatedValues(ProblemData.Dataset, rows).GetEnumerator() }) 158 .ToList(); 159 var rowsEnumerator = rows.GetEnumerator(); 160 // aggregate to make sure that MoveNext is called for all enumerators 161 while (rowsEnumerator.MoveNext() & estimatedValuesEnumerators.Select(en => en.EstimatedValuesEnumerator.MoveNext()).Aggregate(true, (acc, b) => acc & b)) { 162 int currentRow = rowsEnumerator.Current; 163 164 var selectedEnumerators = from pair in estimatedValuesEnumerators 165 where RowIsTrainingForModel(currentRow, pair.Model) && !RowIsTestForModel(currentRow, pair.Model) 166 select pair.EstimatedValuesEnumerator; 167 yield return AggregateEstimatedValues(selectedEnumerators.Select(x => x.Current)); 164 var rows = ProblemData.TrainingIndices; 165 var rowsToEvaluate = rows.Except(trainingEvaluationCache.Keys); 166 var rowsEnumerator = rowsToEvaluate.GetEnumerator(); 167 var valuesEnumerator = GetEstimatedValues(rowsToEvaluate, (r, m) => RowIsTrainingForModel(r, m) && !RowIsTestForModel(r, m)).GetEnumerator(); 168 169 while (rowsEnumerator.MoveNext() & valuesEnumerator.MoveNext()) { 170 trainingEvaluationCache.Add(rowsEnumerator.Current, valuesEnumerator.Current); 168 171 } 172 173 return rows.Select(row => trainingEvaluationCache[row]); 169 174 } 170 175 } … … 172 177 public override IEnumerable<double> EstimatedTestValues { 173 178 get { 174 var rows = ProblemData.TestIndizes; 175 var estimatedValuesEnumerators = (from model in Model.Models 176 select new { Model = model, EstimatedValuesEnumerator = model.GetEstimatedValues(ProblemData.Dataset, rows).GetEnumerator() }) 177 .ToList(); 178 var rowsEnumerator = ProblemData.TestIndizes.GetEnumerator(); 179 // aggregate to make sure that MoveNext is called for all enumerators 180 while (rowsEnumerator.MoveNext() & estimatedValuesEnumerators.Select(en => en.EstimatedValuesEnumerator.MoveNext()).Aggregate(true, (acc, b) => acc & b)) { 181 int currentRow = rowsEnumerator.Current; 182 183 var selectedEnumerators = from pair in estimatedValuesEnumerators 184 where RowIsTestForModel(currentRow, pair.Model) 185 select pair.EstimatedValuesEnumerator; 186 187 yield return AggregateEstimatedValues(selectedEnumerators.Select(x => x.Current)); 179 var rows = ProblemData.TestIndices; 180 var rowsToEvaluate = rows.Except(testEvaluationCache.Keys); 181 var rowsEnumerator = rowsToEvaluate.GetEnumerator(); 182 var valuesEnumerator = GetEstimatedValues(rowsToEvaluate, RowIsTestForModel).GetEnumerator(); 183 184 while (rowsEnumerator.MoveNext() & valuesEnumerator.MoveNext()) { 185 testEvaluationCache.Add(rowsEnumerator.Current, valuesEnumerator.Current); 188 186 } 187 188 return rows.Select(row => testEvaluationCache[row]); 189 } 190 } 191 192 private IEnumerable<double> GetEstimatedValues(IEnumerable<int> rows, Func<int, IRegressionModel, bool> modelSelectionPredicate) { 193 var estimatedValuesEnumerators = (from model in Model.Models 194 select new { Model = model, EstimatedValuesEnumerator = model.GetEstimatedValues(ProblemData.Dataset, rows).GetEnumerator() }) 195 .ToList(); 196 var rowsEnumerator = rows.GetEnumerator(); 197 // aggregate to make sure that MoveNext is called for all enumerators 198 while (rowsEnumerator.MoveNext() & estimatedValuesEnumerators.Select(en => en.EstimatedValuesEnumerator.MoveNext()).Aggregate(true, (acc, b) => acc & b)) { 199 int currentRow = rowsEnumerator.Current; 200 201 var selectedEnumerators = from pair in estimatedValuesEnumerators 202 where modelSelectionPredicate(currentRow, pair.Model) 203 select pair.EstimatedValuesEnumerator; 204 205 yield return AggregateEstimatedValues(selectedEnumerators.Select(x => x.Current)); 189 206 } 190 207 } … … 201 218 202 219 public override IEnumerable<double> GetEstimatedValues(IEnumerable<int> rows) { 203 return from xs in GetEstimatedValueVectors(ProblemData.Dataset, rows) 204 select AggregateEstimatedValues(xs); 220 var rowsToEvaluate = rows.Except(evaluationCache.Keys); 221 var rowsEnumerator = rowsToEvaluate.GetEnumerator(); 222 var valuesEnumerator = (from xs in GetEstimatedValueVectors(ProblemData.Dataset, rowsToEvaluate) 223 select AggregateEstimatedValues(xs)) 224 .GetEnumerator(); 225 226 while (rowsEnumerator.MoveNext() & valuesEnumerator.MoveNext()) { 227 evaluationCache.Add(rowsEnumerator.Current, valuesEnumerator.Current); 228 } 229 230 return rows.Select(row => evaluationCache[row]); 205 231 } 206 232 … … 223 249 224 250 protected override void OnProblemDataChanged() { 251 trainingEvaluationCache.Clear(); 252 testEvaluationCache.Clear(); 253 evaluationCache.Clear(); 225 254 IRegressionProblemData problemData = new RegressionProblemData(ProblemData.Dataset, 226 255 ProblemData.AllowedInputVariables, … … 251 280 public void AddRegressionSolutions(IEnumerable<IRegressionSolution> solutions) { 252 281 regressionSolutions.AddRange(solutions); 282 283 trainingEvaluationCache.Clear(); 284 testEvaluationCache.Clear(); 285 evaluationCache.Clear(); 253 286 } 254 287 public void RemoveRegressionSolutions(IEnumerable<IRegressionSolution> solutions) { 255 288 regressionSolutions.RemoveRange(solutions); 289 290 trainingEvaluationCache.Clear(); 291 testEvaluationCache.Clear(); 292 evaluationCache.Clear(); 256 293 } 257 294 … … 275 312 trainingPartitions[solution.Model] = solution.ProblemData.TrainingPartition; 276 313 testPartitions[solution.Model] = solution.ProblemData.TestPartition; 314 315 trainingEvaluationCache.Clear(); 316 testEvaluationCache.Clear(); 317 evaluationCache.Clear(); 277 318 } 278 319 … … 282 323 trainingPartitions.Remove(solution.Model); 283 324 testPartitions.Remove(solution.Model); 325 326 trainingEvaluationCache.Clear(); 327 testEvaluationCache.Clear(); 328 evaluationCache.Clear(); 284 329 } 285 330 } -
branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Regression/RegressionProblemData.cs
r7823 r8276 95 95 #endregion 96 96 97 public ConstrainedValueParameter<StringValue> TargetVariableParameter {98 get { return ( ConstrainedValueParameter<StringValue>)Parameters[TargetVariableParameterName]; }97 public IConstrainedValueParameter<StringValue> TargetVariableParameter { 98 get { return (IConstrainedValueParameter<StringValue>)Parameters[TargetVariableParameterName]; } 99 99 } 100 100 public string TargetVariable { -
branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Regression/RegressionSolution.cs
r7735 r8276 55 55 } 56 56 public override IEnumerable<double> EstimatedTrainingValues { 57 get { return GetEstimatedValues(ProblemData.TrainingIndi zes); }57 get { return GetEstimatedValues(ProblemData.TrainingIndices); } 58 58 } 59 59 public override IEnumerable<double> EstimatedTestValues { 60 get { return GetEstimatedValues(ProblemData.TestIndi zes); }60 get { return GetEstimatedValues(ProblemData.TestIndices); } 61 61 } 62 62 -
branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Regression/RegressionSolutionBase.cs
r7735 r8276 138 138 OnlineCalculatorError errorState; 139 139 Add(new Result(TrainingMeanAbsoluteErrorResultName, "Mean of absolute errors of the model on the training partition", new DoubleValue())); 140 double trainingMAE = OnlineMeanAbsoluteErrorCalculator.Calculate(EstimatedTrainingValues, ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndi zes), out errorState);140 double trainingMAE = OnlineMeanAbsoluteErrorCalculator.Calculate(EstimatedTrainingValues, ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndices), out errorState); 141 141 TrainingMeanAbsoluteError = errorState == OnlineCalculatorError.None ? trainingMAE : double.NaN; 142 142 } … … 145 145 OnlineCalculatorError errorState; 146 146 Add(new Result(TestMeanAbsoluteErrorResultName, "Mean of absolute errors of the model on the test partition", new DoubleValue())); 147 double testMAE = OnlineMeanAbsoluteErrorCalculator.Calculate(EstimatedTestValues, ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TestIndi zes), out errorState);147 double testMAE = OnlineMeanAbsoluteErrorCalculator.Calculate(EstimatedTestValues, ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TestIndices), out errorState); 148 148 TestMeanAbsoluteError = errorState == OnlineCalculatorError.None ? testMAE : double.NaN; 149 149 } … … 152 152 OnlineCalculatorError errorState; 153 153 Add(new Result(TrainingMeanErrorResultName, "Mean of errors of the model on the training partition", new DoubleValue())); 154 double trainingME = OnlineMeanErrorCalculator.Calculate(EstimatedTrainingValues, ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndi zes), out errorState);154 double trainingME = OnlineMeanErrorCalculator.Calculate(EstimatedTrainingValues, ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndices), out errorState); 155 155 TrainingMeanError = errorState == OnlineCalculatorError.None ? trainingME : double.NaN; 156 156 } … … 158 158 OnlineCalculatorError errorState; 159 159 Add(new Result(TestMeanErrorResultName, "Mean of errors of the model on the test partition", new DoubleValue())); 160 double testME = OnlineMeanErrorCalculator.Calculate(EstimatedTestValues, ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TestIndi zes), out errorState);160 double testME = OnlineMeanErrorCalculator.Calculate(EstimatedTestValues, ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TestIndices), out errorState); 161 161 TestMeanError = errorState == OnlineCalculatorError.None ? testME : double.NaN; 162 162 } … … 166 166 protected void CalculateResults() { 167 167 IEnumerable<double> estimatedTrainingValues = EstimatedTrainingValues; // cache values 168 IEnumerable<double> originalTrainingValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndi zes);168 IEnumerable<double> originalTrainingValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndices); 169 169 IEnumerable<double> estimatedTestValues = EstimatedTestValues; // cache values 170 IEnumerable<double> originalTestValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TestIndi zes);170 IEnumerable<double> originalTestValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TestIndices); 171 171 172 172 OnlineCalculatorError errorState; -
branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/Interfaces/IDataAnalysisProblemData.cs
r7259 r8276 36 36 IntRange TestPartition { get; } 37 37 38 IEnumerable<int> TrainingIndi zes { get; }39 IEnumerable<int> TestIndi zes { get; }38 IEnumerable<int> TrainingIndices { get; } 39 IEnumerable<int> TestIndices { get; } 40 40 41 41 bool IsTrainingSample(int index); -
branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/Interfaces/IOnlineCalculator.cs
r7259 r8276 24 24 namespace HeuristicLab.Problems.DataAnalysis { 25 25 [Flags] 26 public enum OnlineCalculatorError { 26 public enum OnlineCalculatorError { 27 27 /// <summary> 28 28 /// No error occurred 29 29 /// </summary> 30 None = 0, 30 None = 0, 31 31 /// <summary> 32 32 /// An invalid value has been added (often +/- Infinity and NaN are invalid values) 33 33 /// </summary> 34 InvalidValueAdded = 1, 34 InvalidValueAdded = 1, 35 35 /// <summary> 36 36 /// The number of elements added to the evaluator is not sufficient to calculate the result value -
branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/OnlineCalculators/HoeffdingsDependenceCalculator.cs
r7969 r8276 23 23 using System.Collections.Generic; 24 24 using System.Linq; 25 using HeuristicLab.Common;26 25 27 26 namespace HeuristicLab.Problems.DataAnalysis { -
branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/OnlineCalculators/OnlineLinearScalingParameterCalculator.cs
r7259 r8276 55 55 } 56 56 57 private int cnt;58 57 private OnlineMeanAndVarianceCalculator targetMeanCalculator; 59 58 private OnlineMeanAndVarianceCalculator originalMeanAndVarianceCalculator; … … 68 67 69 68 public void Reset() { 70 cnt = 0;71 69 targetMeanCalculator.Reset(); 72 70 originalMeanAndVarianceCalculator.Reset(); … … 85 83 originalTargetCovarianceCalculator.Add(original, target); 86 84 87 cnt++;88 85 } 89 86
Note: See TracChangeset
for help on using the changeset viewer.