Changeset 6184 for trunk/sources/HeuristicLab.Problems.DataAnalysis
- Timestamp:
- 05/11/11 15:56:17 (14 years ago)
- Location:
- trunk/sources
- Files:
-
- 7 edited
- 4 copied
Legend:
- Unmodified
- Added
- Removed
-
trunk/sources
- Property svn:mergeinfo changed
/branches/DataAnalysis SolutionEnsembles (added) merged: 5815-5816
- Property svn:mergeinfo changed
-
trunk/sources/HeuristicLab.Problems.DataAnalysis
- Property svn:mergeinfo changed
/branches/DataAnalysis SolutionEnsembles/HeuristicLab.Problems.DataAnalysis (added) merged: 5816
- Property svn:mergeinfo changed
-
trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/HeuristicLab.Problems.DataAnalysis-3.4.csproj
r5943 r6184 109 109 <ItemGroup> 110 110 <Compile Include="DoubleLimit.cs" /> 111 <Compile Include="Implementation\Classification\ClassificationEnsembleModel.cs"> 112 <SubType>Code</SubType> 113 </Compile> 111 114 <Compile Include="Implementation\Classification\ClassificationProblemData.cs" /> 112 115 <Compile Include="Implementation\Classification\ClassificationProblem.cs" /> … … 115 118 <Compile Include="Implementation\Clustering\ClusteringProblemData.cs" /> 116 119 <Compile Include="Implementation\Clustering\ClusteringSolution.cs" /> 117 <Compile Include="Implementation\Classification\ClassificationEnsembleModel.cs" /> 120 <Compile Include="Implementation\Regression\RegressionEnsembleModel.cs"> 121 <SubType>Code</SubType> 122 </Compile> 123 <Compile Include="Implementation\Regression\RegressionEnsembleSolution.cs" /> 124 <Compile Include="Interfaces\Classification\IClassificationEnsembleModel.cs"> 125 <SubType>Code</SubType> 126 </Compile> 127 <Compile Include="Interfaces\Classification\IClassificationEnsembleSolution.cs" /> 118 128 <Compile Include="Interfaces\Classification\IDiscriminantFunctionThresholdCalculator.cs" /> 119 <Compile Include="Interfaces\Classification\IClassificationEnsembleModel.cs" /> 120 <Compile Include="Interfaces\Regression\IRegressionEnsembleModel.cs" /> 129 <Compile Include="Interfaces\Regression\IRegressionEnsembleModel.cs"> 130 <SubType>Code</SubType> 131 </Compile> 132 <Compile Include="Interfaces\Regression\IRegressionEnsembleSolution.cs" /> 121 133 <Compile Include="OnlineCalculators\OnlineLinearScalingParameterCalculator.cs" /> 122 <Compile Include="Implementation\Regression\RegressionEnsembleModel.cs" />123 134 <Compile Include="Implementation\Classification\DiscriminantFunctionClassificationModel.cs" /> 124 135 <Compile Include="Implementation\Classification\DiscriminantFunctionClassificationSolution.cs" /> -
trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/ClassificationSolution.cs
r5942 r6184 30 30 namespace HeuristicLab.Problems.DataAnalysis { 31 31 /// <summary> 32 /// Abstract base class for classification data analysis solutions32 /// Represents a classification data analysis solution 33 33 /// </summary> 34 34 [StorableClass] 35 public abstractclass ClassificationSolution : DataAnalysisSolution, IClassificationSolution {35 public class ClassificationSolution : DataAnalysisSolution, IClassificationSolution { 36 36 private const string TrainingAccuracyResultName = "Accuracy (training)"; 37 37 private const string TestAccuracyResultName = "Accuracy (test)"; … … 67 67 Add(new Result(TestAccuracyResultName, "Accuracy of the model on the test partition (percentage of correctly classified instances).", new PercentValue())); 68 68 RecalculateResults(); 69 } 70 71 public override IDeepCloneable Clone(Cloner cloner) { 72 return new ClassificationSolution(this, cloner); 69 73 } 70 74 -
trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Clustering/ClusteringSolution.cs
r5809 r6184 27 27 namespace HeuristicLab.Problems.DataAnalysis { 28 28 /// <summary> 29 /// Abstract base class for clustering data analysis solutions29 /// Represents a clustering data analysis solution 30 30 /// </summary> 31 31 [StorableClass] 32 public abstractclass ClusteringSolution : DataAnalysisSolution, IClusteringSolution {32 public class ClusteringSolution : DataAnalysisSolution, IClusteringSolution { 33 33 34 34 [StorableConstructor] … … 39 39 public ClusteringSolution(IClusteringModel model, IClusteringProblemData problemData) 40 40 : base(model, problemData) { 41 } 42 43 public override IDeepCloneable Clone(Cloner cloner) { 44 return new ClusteringSolution(this, cloner); 41 45 } 42 46 -
trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Regression/RegressionEnsembleSolution.cs
r6180 r6184 75 75 throw new ArgumentException(); 76 76 } 77 78 RecalculateResults(); 79 } 80 81 private void RecalculateResults() { 82 double[] estimatedTrainingValues = EstimatedTrainingValues.ToArray(); // cache values 83 var trainingIndizes = Enumerable.Range(ProblemData.TrainingPartition.Start, 84 ProblemData.TrainingPartition.End - ProblemData.TrainingPartition.Start); 85 IEnumerable<double> originalTrainingValues = ProblemData.Dataset.GetEnumeratedVariableValues(ProblemData.TargetVariable, trainingIndizes); 86 double[] estimatedTestValues = EstimatedTestValues.ToArray(); // cache values 87 IEnumerable<double> originalTestValues = ProblemData.Dataset.GetEnumeratedVariableValues(ProblemData.TargetVariable, ProblemData.TestIndizes); 88 89 OnlineCalculatorError errorState; 90 double trainingMSE = OnlineMeanSquaredErrorCalculator.Calculate(estimatedTrainingValues, originalTrainingValues, out errorState); 91 TrainingMeanSquaredError = errorState == OnlineCalculatorError.None ? trainingMSE : double.NaN; 92 double testMSE = OnlineMeanSquaredErrorCalculator.Calculate(estimatedTestValues, originalTestValues, out errorState); 93 TestMeanSquaredError = errorState == OnlineCalculatorError.None ? testMSE : double.NaN; 94 95 double trainingR2 = OnlinePearsonsRSquaredCalculator.Calculate(estimatedTrainingValues, originalTrainingValues, out errorState); 96 TrainingRSquared = errorState == OnlineCalculatorError.None ? trainingR2 : double.NaN; 97 double testR2 = OnlinePearsonsRSquaredCalculator.Calculate(estimatedTestValues, originalTestValues, out errorState); 98 TestRSquared = errorState == OnlineCalculatorError.None ? testR2 : double.NaN; 99 100 double trainingRelError = OnlineMeanAbsolutePercentageErrorCalculator.Calculate(estimatedTrainingValues, originalTrainingValues, out errorState); 101 TrainingRelativeError = errorState == OnlineCalculatorError.None ? trainingRelError : double.NaN; 102 double testRelError = OnlineMeanAbsolutePercentageErrorCalculator.Calculate(estimatedTestValues, originalTestValues, out errorState); 103 TestRelativeError = errorState == OnlineCalculatorError.None ? testRelError : double.NaN; 104 105 double trainingNMSE = OnlineNormalizedMeanSquaredErrorCalculator.Calculate(estimatedTrainingValues, originalTrainingValues, out errorState); 106 TrainingNormalizedMeanSquaredError = errorState == OnlineCalculatorError.None ? trainingNMSE : double.NaN; 107 double testNMSE = OnlineNormalizedMeanSquaredErrorCalculator.Calculate(estimatedTestValues, originalTestValues, out errorState); 108 TestNormalizedMeanSquaredError = errorState == OnlineCalculatorError.None ? testNMSE : double.NaN; 77 109 } 78 110 … … 83 115 public override IEnumerable<double> EstimatedTrainingValues { 84 116 get { 117 var rows = Enumerable.Range(ProblemData.TrainingPartition.Start, ProblemData.TrainingPartition.End - ProblemData.TrainingPartition.Start); 85 118 var estimatedValuesEnumerators = (from model in Model.Models 86 select new { Model = model, EstimatedValuesEnumerator = model.GetEstimatedValues(ProblemData.Dataset, ProblemData.TestIndizes).GetEnumerator() })119 select new { Model = model, EstimatedValuesEnumerator = model.GetEstimatedValues(ProblemData.Dataset, rows).GetEnumerator() }) 87 120 .ToList(); 88 var rowsEnumerator = ProblemData.TestIndizes.GetEnumerator();89 while (rowsEnumerator.MoveNext() & estimatedValuesEnumerators. All(en => en.EstimatedValuesEnumerator.MoveNext())) {121 var rowsEnumerator = rows.GetEnumerator(); 122 while (rowsEnumerator.MoveNext() & estimatedValuesEnumerators.Select(en => en.EstimatedValuesEnumerator.MoveNext()).Aggregate(true, (acc, b) => acc & b)) { 90 123 int currentRow = rowsEnumerator.Current; 91 124 92 125 var selectedEnumerators = from pair in estimatedValuesEnumerators 93 126 where trainingPartitions == null || !trainingPartitions.ContainsKey(pair.Model) || 94 (trainingPartitions[pair.Model].Start >= currentRow && trainingPartitions[pair.Model].End < currentRow)127 (trainingPartitions[pair.Model].Start <= currentRow && currentRow < trainingPartitions[pair.Model].End) 95 128 select pair.EstimatedValuesEnumerator; 96 129 yield return AggregateEstimatedValues(selectedEnumerators.Select(x => x.Current)); … … 105 138 .ToList(); 106 139 var rowsEnumerator = ProblemData.TestIndizes.GetEnumerator(); 107 while (rowsEnumerator.MoveNext() & estimatedValuesEnumerators. All(en => en.EstimatedValuesEnumerator.MoveNext())) {140 while (rowsEnumerator.MoveNext() & estimatedValuesEnumerators.Select(en => en.EstimatedValuesEnumerator.MoveNext()).Aggregate(true, (acc, b) => acc & b)) { 108 141 int currentRow = rowsEnumerator.Current; 109 142 110 143 var selectedEnumerators = from pair in estimatedValuesEnumerators 111 144 where testPartitions == null || !testPartitions.ContainsKey(pair.Model) || 112 (testPartitions[pair.Model].Start >= currentRow && testPartitions[pair.Model].End < currentRow)145 (testPartitions[pair.Model].Start <= currentRow && currentRow < testPartitions[pair.Model].End) 113 146 select pair.EstimatedValuesEnumerator; 114 147 -
trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Regression/RegressionSolution.cs
r5962 r6184 30 30 namespace HeuristicLab.Problems.DataAnalysis { 31 31 /// <summary> 32 /// Abstract base class for regression data analysis solutions32 /// Represents a regression data analysis solution 33 33 /// </summary> 34 34 [StorableClass] 35 public abstractclass RegressionSolution : DataAnalysisSolution, IRegressionSolution {35 public class RegressionSolution : DataAnalysisSolution, IRegressionSolution { 36 36 private const string TrainingMeanSquaredErrorResultName = "Mean squared error (training)"; 37 37 private const string TestMeanSquaredErrorResultName = "Mean squared error (test)"; … … 55 55 public double TrainingMeanSquaredError { 56 56 get { return ((DoubleValue)this[TrainingMeanSquaredErrorResultName].Value).Value; } 57 pr ivateset { ((DoubleValue)this[TrainingMeanSquaredErrorResultName].Value).Value = value; }57 protected set { ((DoubleValue)this[TrainingMeanSquaredErrorResultName].Value).Value = value; } 58 58 } 59 59 60 60 public double TestMeanSquaredError { 61 61 get { return ((DoubleValue)this[TestMeanSquaredErrorResultName].Value).Value; } 62 pr ivateset { ((DoubleValue)this[TestMeanSquaredErrorResultName].Value).Value = value; }62 protected set { ((DoubleValue)this[TestMeanSquaredErrorResultName].Value).Value = value; } 63 63 } 64 64 65 65 public double TrainingRSquared { 66 66 get { return ((DoubleValue)this[TrainingSquaredCorrelationResultName].Value).Value; } 67 pr ivateset { ((DoubleValue)this[TrainingSquaredCorrelationResultName].Value).Value = value; }67 protected set { ((DoubleValue)this[TrainingSquaredCorrelationResultName].Value).Value = value; } 68 68 } 69 69 70 70 public double TestRSquared { 71 71 get { return ((DoubleValue)this[TestSquaredCorrelationResultName].Value).Value; } 72 pr ivateset { ((DoubleValue)this[TestSquaredCorrelationResultName].Value).Value = value; }72 protected set { ((DoubleValue)this[TestSquaredCorrelationResultName].Value).Value = value; } 73 73 } 74 74 75 75 public double TrainingRelativeError { 76 76 get { return ((DoubleValue)this[TrainingRelativeErrorResultName].Value).Value; } 77 pr ivateset { ((DoubleValue)this[TrainingRelativeErrorResultName].Value).Value = value; }77 protected set { ((DoubleValue)this[TrainingRelativeErrorResultName].Value).Value = value; } 78 78 } 79 79 80 80 public double TestRelativeError { 81 81 get { return ((DoubleValue)this[TestRelativeErrorResultName].Value).Value; } 82 pr ivateset { ((DoubleValue)this[TestRelativeErrorResultName].Value).Value = value; }82 protected set { ((DoubleValue)this[TestRelativeErrorResultName].Value).Value = value; } 83 83 } 84 84 85 85 public double TrainingNormalizedMeanSquaredError { 86 86 get { return ((DoubleValue)this[TrainingNormalizedMeanSquaredErrorResultName].Value).Value; } 87 pr ivateset { ((DoubleValue)this[TrainingNormalizedMeanSquaredErrorResultName].Value).Value = value; }87 protected set { ((DoubleValue)this[TrainingNormalizedMeanSquaredErrorResultName].Value).Value = value; } 88 88 } 89 89 90 90 public double TestNormalizedMeanSquaredError { 91 91 get { return ((DoubleValue)this[TestNormalizedMeanSquaredErrorResultName].Value).Value; } 92 pr ivateset { ((DoubleValue)this[TestNormalizedMeanSquaredErrorResultName].Value).Value = value; }92 protected set { ((DoubleValue)this[TestNormalizedMeanSquaredErrorResultName].Value).Value = value; } 93 93 } 94 94 … … 113 113 } 114 114 115 public override IDeepCloneable Clone(Cloner cloner) { 116 return new RegressionSolution(this, cloner); 117 } 118 115 119 protected override void OnProblemDataChanged(EventArgs e) { 116 120 base.OnProblemDataChanged(e); … … 122 126 } 123 127 124 pr otectedvoid RecalculateResults() {128 private void RecalculateResults() { 125 129 double[] estimatedTrainingValues = EstimatedTrainingValues.ToArray(); // cache values 126 130 IEnumerable<double> originalTrainingValues = ProblemData.Dataset.GetEnumeratedVariableValues(ProblemData.TargetVariable, ProblemData.TrainingIndizes); -
trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/Tests/StatisticCalculatorsTest.cs
r5944 r6184 78 78 double variance = calculator.Variance; 79 79 80 Assert.AreEqual(mean_alglib , mean, 1E-6 * scale);81 Assert.AreEqual(variance_alglib , variance, 1E-6 * scale);80 Assert.AreEqual(mean_alglib.ToString(), mean.ToString()); 81 Assert.AreEqual(variance_alglib.ToString(), variance.ToString()); 82 82 } 83 83 } … … 109 109 double r2 = r2Calculator.RSquared; 110 110 111 Assert.AreEqual(r2_alglib , r2, 1E-6 * Math.Max(c1Scale, c2Scale));111 Assert.AreEqual(r2_alglib.ToString(), r2.ToString()); 112 112 } 113 113 } 114 114 } 115 115 } 116 [TestMethod] 117 public void CalculatePearsonsRSquaredOfConstantTest() { 118 System.Random random = new System.Random(31415); 119 int n = 12; 120 int cols = testData.GetLength(1); 121 for (int c1 = 0; c1 < cols; c1++) { 122 double c1Scale = random.NextDouble() * 1E7; 123 double c2Scale = 1.0; 124 IEnumerable<double> x = from rows in Enumerable.Range(0, n) 125 select testData[rows, c1] * c1Scale; 126 IEnumerable<double> y = (new List<double>() { 150494407424305.44 }) 127 .Concat(Enumerable.Repeat(150494407424305.47, n - 1)); 128 double[] xs = x.ToArray(); 129 double[] ys = y.ToArray(); 130 double r2_alglib = alglib.pearsoncorrelation(xs, ys, n); 131 r2_alglib *= r2_alglib; 132 133 var r2Calculator = new OnlinePearsonsRSquaredCalculator(); 134 for (int i = 0; i < n; i++) { 135 r2Calculator.Add(xs[i], ys[i]); 136 } 137 double r2 = r2Calculator.RSquared; 138 139 Assert.AreEqual(r2_alglib.ToString(), r2.ToString()); 140 } 141 } 116 142 } 117 143 }
Note: See TracChangeset
for help on using the changeset viewer.