Changeset 6195 for branches/histogram/HeuristicLab.Problems.DataAnalysis
- Timestamp:
- 05/14/11 16:45:46 (13 years ago)
- Location:
- branches/histogram
- Files:
-
- 12 edited
- 4 copied
Legend:
- Unmodified
- Added
- Removed
-
branches/histogram
-
branches/histogram/HeuristicLab.Problems.DataAnalysis
- Property svn:mergeinfo changed
/branches/DataAnalysis SolutionEnsembles/HeuristicLab.Problems.DataAnalysis (added) merged: 5816 /trunk/sources/HeuristicLab.Problems.DataAnalysis merged: 6092,6095,6099,6184,6186
- Property svn:mergeinfo changed
-
branches/histogram/HeuristicLab.Problems.DataAnalysis/3.3/HeuristicLabProblemsDataAnalysisPlugin.cs.frame
r5446 r6195 26 26 27 27 namespace HeuristicLab.Problems.DataAnalysis { 28 [Plugin("HeuristicLab.Problems.DataAnalysis", "3.3. 3.$WCREV$")]28 [Plugin("HeuristicLab.Problems.DataAnalysis", "3.3.4.$WCREV$")] 29 29 [PluginFile("HeuristicLab.Problems.DataAnalysis-3.3.dll", PluginFileType.Assembly)] 30 30 [PluginDependency("HeuristicLab.ALGLIB", "3.1")] -
branches/histogram/HeuristicLab.Problems.DataAnalysis/3.3/Properties/AssemblyInfo.frame
r5446 r6195 53 53 // by using the '*' as shown below: 54 54 [assembly: AssemblyVersion("3.3.0.0")] 55 [assembly: AssemblyFileVersion("3.3. 3.$WCREV$")]55 [assembly: AssemblyFileVersion("3.3.4.$WCREV$")] -
branches/histogram/HeuristicLab.Problems.DataAnalysis/3.4/HeuristicLab.Problems.DataAnalysis-3.4.csproj
r5943 r6195 109 109 <ItemGroup> 110 110 <Compile Include="DoubleLimit.cs" /> 111 <Compile Include="Implementation\Classification\ClassificationEnsembleModel.cs"> 112 <SubType>Code</SubType> 113 </Compile> 111 114 <Compile Include="Implementation\Classification\ClassificationProblemData.cs" /> 112 115 <Compile Include="Implementation\Classification\ClassificationProblem.cs" /> … … 115 118 <Compile Include="Implementation\Clustering\ClusteringProblemData.cs" /> 116 119 <Compile Include="Implementation\Clustering\ClusteringSolution.cs" /> 117 <Compile Include="Implementation\Classification\ClassificationEnsembleModel.cs" /> 120 <Compile Include="Implementation\Regression\RegressionEnsembleModel.cs"> 121 <SubType>Code</SubType> 122 </Compile> 123 <Compile Include="Implementation\Regression\RegressionEnsembleSolution.cs" /> 124 <Compile Include="Interfaces\Classification\IClassificationEnsembleModel.cs"> 125 <SubType>Code</SubType> 126 </Compile> 127 <Compile Include="Interfaces\Classification\IClassificationEnsembleSolution.cs" /> 118 128 <Compile Include="Interfaces\Classification\IDiscriminantFunctionThresholdCalculator.cs" /> 119 <Compile Include="Interfaces\Classification\IClassificationEnsembleModel.cs" /> 120 <Compile Include="Interfaces\Regression\IRegressionEnsembleModel.cs" /> 129 <Compile Include="Interfaces\Regression\IRegressionEnsembleModel.cs"> 130 <SubType>Code</SubType> 131 </Compile> 132 <Compile Include="Interfaces\Regression\IRegressionEnsembleSolution.cs" /> 121 133 <Compile Include="OnlineCalculators\OnlineLinearScalingParameterCalculator.cs" /> 122 <Compile Include="Implementation\Regression\RegressionEnsembleModel.cs" />123 134 <Compile Include="Implementation\Classification\DiscriminantFunctionClassificationModel.cs" /> 124 135 <Compile Include="Implementation\Classification\DiscriminantFunctionClassificationSolution.cs" /> -
branches/histogram/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/ClassificationProblemData.cs
r5847 r6195 37 37 private const string ClassNamesParameterName = "ClassNames"; 38 38 private const string ClassificationPenaltiesParameterName = "ClassificationPenalties"; 39 private const int MaximumNumberOfClass = 100; 40 private const int InspectedRowsToDetermineTargets = 500; 39 41 40 42 #region default data … … 252 254 public ClassificationProblemData(Dataset dataset, IEnumerable<string> allowedInputVariables, string targetVariable) 253 255 : base(dataset, allowedInputVariables) { 254 var variables = InputVariables.Select(x => x.AsReadOnly()).ToList(); 255 Parameters.Add(new ConstrainedValueParameter<StringValue>(TargetVariableParameterName, new ItemSet<StringValue>(variables), variables.Where(x => x.Value == targetVariable).First())); 256 var validTargetVariableValues = CheckVariablesForPossibleTargetVariables(dataset).Select(x => new StringValue(x).AsReadOnly()).ToList(); 257 var target = validTargetVariableValues.Where(x => x.Value == targetVariable).DefaultIfEmpty(validTargetVariableValues.First()).First(); 258 259 Parameters.Add(new ConstrainedValueParameter<StringValue>(TargetVariableParameterName, new ItemSet<StringValue>(validTargetVariableValues), target)); 256 260 Parameters.Add(new FixedValueParameter<StringMatrix>(ClassNamesParameterName, "")); 257 261 Parameters.Add(new FixedValueParameter<DoubleMatrix>(ClassificationPenaltiesParameterName, "")); … … 260 264 RegisterParameterEvents(); 261 265 } 266 267 private static IEnumerable<string> CheckVariablesForPossibleTargetVariables(Dataset dataset) { 268 var validTargetVariables = from v in dataset.VariableNames 269 let DistinctValues = dataset.Rows > InspectedRowsToDetermineTargets ? dataset.GetVariableValues(v, 0, InspectedRowsToDetermineTargets).Distinct().Count() 270 : dataset.GetVariableValues(v).Distinct().Count() 271 where DistinctValues < MaximumNumberOfClass 272 select v; 273 274 if (!validTargetVariables.Any()) 275 throw new ArgumentException("Import of classification problem data was not successfull, because no target variable was found." + 276 " A target variable must have at most " + MaximumNumberOfClass + " distinct values to be applicable to classification."); 277 return validTargetVariables; 278 } 279 262 280 263 281 private void ResetTargetVariableDependentMembers() { -
branches/histogram/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/ClassificationSolution.cs
r5942 r6195 30 30 namespace HeuristicLab.Problems.DataAnalysis { 31 31 /// <summary> 32 /// Abstract base class for classification data analysis solutions32 /// Represents a classification data analysis solution 33 33 /// </summary> 34 34 [StorableClass] 35 public abstractclass ClassificationSolution : DataAnalysisSolution, IClassificationSolution {35 public class ClassificationSolution : DataAnalysisSolution, IClassificationSolution { 36 36 private const string TrainingAccuracyResultName = "Accuracy (training)"; 37 37 private const string TestAccuracyResultName = "Accuracy (test)"; … … 67 67 Add(new Result(TestAccuracyResultName, "Accuracy of the model on the test partition (percentage of correctly classified instances).", new PercentValue())); 68 68 RecalculateResults(); 69 } 70 71 public override IDeepCloneable Clone(Cloner cloner) { 72 return new ClassificationSolution(this, cloner); 69 73 } 70 74 -
branches/histogram/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Clustering/ClusteringSolution.cs
r5809 r6195 27 27 namespace HeuristicLab.Problems.DataAnalysis { 28 28 /// <summary> 29 /// Abstract base class for clustering data analysis solutions29 /// Represents a clustering data analysis solution 30 30 /// </summary> 31 31 [StorableClass] 32 public abstractclass ClusteringSolution : DataAnalysisSolution, IClusteringSolution {32 public class ClusteringSolution : DataAnalysisSolution, IClusteringSolution { 33 33 34 34 [StorableConstructor] … … 39 39 public ClusteringSolution(IClusteringModel model, IClusteringProblemData problemData) 40 40 : base(model, problemData) { 41 } 42 43 public override IDeepCloneable Clone(Cloner cloner) { 44 return new ClusteringSolution(this, cloner); 41 45 } 42 46 -
branches/histogram/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Regression/RegressionSolution.cs
r6011 r6195 30 30 namespace HeuristicLab.Problems.DataAnalysis { 31 31 /// <summary> 32 /// Abstract base class for regression data analysis solutions32 /// Represents a regression data analysis solution 33 33 /// </summary> 34 34 [StorableClass] 35 public abstractclass RegressionSolution : DataAnalysisSolution, IRegressionSolution {35 public class RegressionSolution : DataAnalysisSolution, IRegressionSolution { 36 36 private const string TrainingMeanSquaredErrorResultName = "Mean squared error (training)"; 37 37 private const string TestMeanSquaredErrorResultName = "Mean squared error (test)"; … … 55 55 public double TrainingMeanSquaredError { 56 56 get { return ((DoubleValue)this[TrainingMeanSquaredErrorResultName].Value).Value; } 57 pr ivateset { ((DoubleValue)this[TrainingMeanSquaredErrorResultName].Value).Value = value; }57 protected set { ((DoubleValue)this[TrainingMeanSquaredErrorResultName].Value).Value = value; } 58 58 } 59 59 60 60 public double TestMeanSquaredError { 61 61 get { return ((DoubleValue)this[TestMeanSquaredErrorResultName].Value).Value; } 62 pr ivateset { ((DoubleValue)this[TestMeanSquaredErrorResultName].Value).Value = value; }62 protected set { ((DoubleValue)this[TestMeanSquaredErrorResultName].Value).Value = value; } 63 63 } 64 64 65 65 public double TrainingRSquared { 66 66 get { return ((DoubleValue)this[TrainingSquaredCorrelationResultName].Value).Value; } 67 pr ivateset { ((DoubleValue)this[TrainingSquaredCorrelationResultName].Value).Value = value; }67 protected set { ((DoubleValue)this[TrainingSquaredCorrelationResultName].Value).Value = value; } 68 68 } 69 69 70 70 public double TestRSquared { 71 71 get { return ((DoubleValue)this[TestSquaredCorrelationResultName].Value).Value; } 72 pr ivateset { ((DoubleValue)this[TestSquaredCorrelationResultName].Value).Value = value; }72 protected set { ((DoubleValue)this[TestSquaredCorrelationResultName].Value).Value = value; } 73 73 } 74 74 75 75 public double TrainingRelativeError { 76 76 get { return ((DoubleValue)this[TrainingRelativeErrorResultName].Value).Value; } 77 pr ivateset { ((DoubleValue)this[TrainingRelativeErrorResultName].Value).Value = value; }77 protected set { ((DoubleValue)this[TrainingRelativeErrorResultName].Value).Value = value; } 78 78 } 79 79 80 80 public double TestRelativeError { 81 81 get { return ((DoubleValue)this[TestRelativeErrorResultName].Value).Value; } 82 pr ivateset { ((DoubleValue)this[TestRelativeErrorResultName].Value).Value = value; }82 protected set { ((DoubleValue)this[TestRelativeErrorResultName].Value).Value = value; } 83 83 } 84 84 85 85 public double TrainingNormalizedMeanSquaredError { 86 86 get { return ((DoubleValue)this[TrainingNormalizedMeanSquaredErrorResultName].Value).Value; } 87 pr ivateset { ((DoubleValue)this[TrainingNormalizedMeanSquaredErrorResultName].Value).Value = value; }87 protected set { ((DoubleValue)this[TrainingNormalizedMeanSquaredErrorResultName].Value).Value = value; } 88 88 } 89 89 90 90 public double TestNormalizedMeanSquaredError { 91 91 get { return ((DoubleValue)this[TestNormalizedMeanSquaredErrorResultName].Value).Value; } 92 pr ivateset { ((DoubleValue)this[TestNormalizedMeanSquaredErrorResultName].Value).Value = value; }92 protected set { ((DoubleValue)this[TestNormalizedMeanSquaredErrorResultName].Value).Value = value; } 93 93 } 94 94 … … 113 113 } 114 114 115 public override IDeepCloneable Clone(Cloner cloner) { 116 return new RegressionSolution(this, cloner); 117 } 118 115 119 protected override void OnProblemDataChanged(EventArgs e) { 116 120 base.OnProblemDataChanged(e); … … 122 126 } 123 127 124 pr otectedvoid RecalculateResults() {128 private void RecalculateResults() { 125 129 double[] estimatedTrainingValues = EstimatedTrainingValues.ToArray(); // cache values 126 130 IEnumerable<double> originalTrainingValues = ProblemData.Dataset.GetEnumeratedVariableValues(ProblemData.TargetVariable, ProblemData.TrainingIndizes); -
branches/histogram/HeuristicLab.Problems.DataAnalysis/3.4/OnlineCalculators/OnlineMeanAndVarianceCalculator.cs
r5945 r6195 73 73 74 74 public void Add(double x) { 75 if (double.IsNaN(x) || double.IsInfinity(x) || (errorState & OnlineCalculatorError.InvalidValueAdded) > 0) {75 if (double.IsNaN(x) || double.IsInfinity(x) || x > 1E13 || x < -1E13 || (errorState & OnlineCalculatorError.InvalidValueAdded) > 0) { 76 76 errorState = errorState | OnlineCalculatorError.InvalidValueAdded; 77 77 varianceErrorState = errorState | OnlineCalculatorError.InvalidValueAdded; … … 84 84 errorState = errorState & (~OnlineCalculatorError.InsufficientElementsAdded); // n >= 1 85 85 } else { 86 86 87 varianceErrorState = varianceErrorState & (~OnlineCalculatorError.InsufficientElementsAdded); // n >= 2 87 88 m_newM = m_oldM + (x - m_oldM) / n; -
branches/histogram/HeuristicLab.Problems.DataAnalysis/3.4/Tests
- Property svn:ignore
-
old new 2 2 obj 3 3 *.vs10x 4 *.user
-
- Property svn:ignore
-
branches/histogram/HeuristicLab.Problems.DataAnalysis/3.4/Tests/StatisticCalculatorsTest.cs
r5944 r6195 78 78 double variance = calculator.Variance; 79 79 80 Assert.AreEqual(mean_alglib , mean, 1E-6 * scale);81 Assert.AreEqual(variance_alglib , variance, 1E-6 * scale);80 Assert.AreEqual(mean_alglib.ToString(), mean.ToString()); 81 Assert.AreEqual(variance_alglib.ToString(), variance.ToString()); 82 82 } 83 83 } … … 109 109 double r2 = r2Calculator.RSquared; 110 110 111 Assert.AreEqual(r2_alglib , r2, 1E-6 * Math.Max(c1Scale, c2Scale));111 Assert.AreEqual(r2_alglib.ToString(), r2.ToString()); 112 112 } 113 113 } 114 114 } 115 115 } 116 [TestMethod] 117 public void CalculatePearsonsRSquaredOfConstantTest() { 118 System.Random random = new System.Random(31415); 119 int n = 12; 120 int cols = testData.GetLength(1); 121 for (int c1 = 0; c1 < cols; c1++) { 122 double c1Scale = random.NextDouble() * 1E7; 123 double c2Scale = 1.0; 124 IEnumerable<double> x = from rows in Enumerable.Range(0, n) 125 select testData[rows, c1] * c1Scale; 126 IEnumerable<double> y = (new List<double>() { 150494407424305.44 }) 127 .Concat(Enumerable.Repeat(150494407424305.47, n - 1)); 128 double[] xs = x.ToArray(); 129 double[] ys = y.ToArray(); 130 double r2_alglib = alglib.pearsoncorrelation(xs, ys, n); 131 r2_alglib *= r2_alglib; 132 133 var r2Calculator = new OnlinePearsonsRSquaredCalculator(); 134 for (int i = 0; i < n; i++) { 135 r2Calculator.Add(xs[i], ys[i]); 136 } 137 double r2 = r2Calculator.RSquared; 138 139 Assert.AreEqual(r2_alglib.ToString(), r2.ToString()); 140 } 141 } 116 142 } 117 143 }
Note: See TracChangeset
for help on using the changeset viewer.