- Timestamp:
- 09/22/10 11:22:49 (14 years ago)
- Location:
- trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3
- Files:
-
- 13 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Analyzers/RegressionSolutionAnalyzer.cs
r4068 r4468 121 121 var solution = bestSolution; 122 122 #region update R2,MSE, Rel Error 123 IEnumerable<double> trainingValues = problemData.Dataset.GetEnumeratedVariableValues( 124 problemData.TargetVariable.Value, 125 problemData.TrainingSamplesStart.Value, 126 problemData.TrainingSamplesEnd.Value); 127 IEnumerable<double> testValues = problemData.Dataset.GetEnumeratedVariableValues( 128 problemData.TargetVariable.Value, 129 problemData.TestSamplesStart.Value, 130 problemData.TestSamplesEnd.Value); 123 IEnumerable<double> trainingValues = problemData.Dataset.GetEnumeratedVariableValues(problemData.TargetVariable.Value, problemData.TrainingIndizes); 124 IEnumerable<double> testValues = problemData.Dataset.GetEnumeratedVariableValues(problemData.TargetVariable.Value, problemData.TestIndizes); 131 125 OnlineMeanSquaredErrorEvaluator mseEvaluator = new OnlineMeanSquaredErrorEvaluator(); 132 126 OnlineMeanAbsolutePercentageErrorEvaluator relErrorEvaluator = new OnlineMeanAbsolutePercentageErrorEvaluator(); 133 127 OnlinePearsonsRSquaredEvaluator r2Evaluator = new OnlinePearsonsRSquaredEvaluator(); 128 134 129 #region training 135 130 var originalEnumerator = trainingValues.GetEnumerator(); … … 144 139 double trainingRelError = relErrorEvaluator.MeanAbsolutePercentageError; 145 140 #endregion 141 146 142 mseEvaluator.Reset(); 147 143 relErrorEvaluator.Reset(); 148 144 r2Evaluator.Reset(); 145 149 146 #region test 150 147 originalEnumerator = testValues.GetEnumerator(); … … 159 156 double testRelError = relErrorEvaluator.MeanAbsolutePercentageError; 160 157 #endregion 158 161 159 if (results.ContainsKey(BestSolutionResultName)) { 162 160 results[BestSolutionResultName].Value = solution; -
trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/BestSymbolicRegressionSolutionAnalyzer.cs
r4125 r4468 91 91 var model = new SymbolicRegressionModel((ISymbolicExpressionTreeInterpreter)SymbolicExpressionTreeInterpreter.Clone(), 92 92 SymbolicExpressionTree[i]); 93 var solution = new SymbolicRegressionSolution(ProblemData, model, lowerEstimationLimit, upperEstimationLimit); 93 DataAnalysisProblemData problemDataClone = (DataAnalysisProblemData)ProblemData.Clone(); 94 var solution = new SymbolicRegressionSolution(problemDataClone, model, lowerEstimationLimit, upperEstimationLimit); 94 95 solution.Name = BestSolutionParameterName; 95 96 solution.Description = "Best solution on validation partition found over the whole run."; 96 97 BestSolutionParameter.ActualValue = solution; 97 98 BestSolutionQualityParameter.ActualValue = Quality[i]; 98 BestSymbolicRegressionSolutionAnalyzer.UpdateSymbolicRegressionBestSolutionResults(solution, ProblemData, Results, VariableFrequencies);99 BestSymbolicRegressionSolutionAnalyzer.UpdateSymbolicRegressionBestSolutionResults(solution, problemDataClone, Results, VariableFrequencies); 99 100 } 100 101 return BestSolutionParameter.ActualValue; -
trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer.cs
r4415 r4468 212 212 int count = (int)((validationEnd - validationStart) * RelativeNumberOfEvaluatedSamples.Value); 213 213 if (count == 0) count = 1; 214 IEnumerable<int> rows = RandomEnumerable.SampleRandomNumbers(seed, validationStart, validationEnd, count); 214 IEnumerable<int> rows = RandomEnumerable.SampleRandomNumbers(seed, validationStart, validationEnd, count) 215 .Where(row => row < ProblemData.TestSamplesStart.Value || ProblemData.TestSamplesEnd.Value <= row); 215 216 216 217 double upperEstimationLimit = UpperEstimationLimit != null ? UpperEstimationLimit.Value : double.PositiveInfinity; … … 241 242 // calculate scaling parameters and only for the best tree using the full training set 242 243 double alpha, beta; 243 int trainingStart = ProblemData.TrainingSamplesStart.Value;244 int trainingEnd = ProblemData.TrainingSamplesEnd.Value;245 IEnumerable<int> trainingRows = Enumerable.Range(trainingStart, trainingEnd - trainingStart);246 244 SymbolicRegressionScaledMeanSquaredErrorEvaluator.Calculate(SymbolicExpressionTreeInterpreter, bestTree, 247 245 lowerEstimationLimit, upperEstimationLimit, 248 246 ProblemData.Dataset, targetVariable, 249 trainingRows, out beta, out alpha);247 ProblemData.TrainingIndizes, out beta, out alpha); 250 248 251 249 // scale tree for solution … … 253 251 var model = new SymbolicRegressionModel((ISymbolicExpressionTreeInterpreter)SymbolicExpressionTreeInterpreter.Clone(), 254 252 scaledTree); 255 var solution = new SymbolicRegressionSolution( ProblemData, model, lowerEstimationLimit, upperEstimationLimit);253 var solution = new SymbolicRegressionSolution((DataAnalysisProblemData)ProblemData.Clone(), model, lowerEstimationLimit, upperEstimationLimit); 256 254 solution.Name = BestSolutionParameterName; 257 255 solution.Description = "Best solution on validation partition found over the whole run."; -
trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/SymbolicRegressionModelQualityAnalyzer.cs
r4068 r4468 137 137 Analyze(SymbolicExpressionTreeParameter.ActualValue, SymbolicExpressionTreeInterpreterParameter.ActualValue, 138 138 UpperEstimationLimit.Value, LowerEstimationLimit.Value, ProblemDataParameter.ActualValue, 139 ProblemDataParameter.ActualValue.TrainingSamplesStart.Value, ProblemDataParameter.ActualValue.TrainingSamplesEnd.Value,140 ProblemDataParameter.ActualValue.TestSamplesStart.Value, ProblemDataParameter.ActualValue.TestSamplesEnd.Value,141 139 ResultsParameter.ActualValue); 142 140 return base.Apply(); … … 145 143 public static void Analyze(IEnumerable<SymbolicExpressionTree> trees, ISymbolicExpressionTreeInterpreter interpreter, 146 144 double upperEstimationLimit, double lowerEstimationLimit, 147 DataAnalysisProblemData problemData, int trainingStart, int trainingEnd, int testStart, int testEnd,ResultCollection results) {145 DataAnalysisProblemData problemData, ResultCollection results) { 148 146 int targetVariableIndex = problemData.Dataset.GetVariableIndex(problemData.TargetVariable.Value); 149 IEnumerable<double> originalTrainingValues = problemData.Dataset.GetEnumeratedVariableValues(targetVariableIndex, trainingStart, trainingEnd);150 IEnumerable<double> originalTestValues = problemData.Dataset.GetEnumeratedVariableValues(targetVariableIndex, testStart, testEnd);147 IEnumerable<double> originalTrainingValues = problemData.Dataset.GetEnumeratedVariableValues(targetVariableIndex, problemData.TrainingIndizes); 148 IEnumerable<double> originalTestValues = problemData.Dataset.GetEnumeratedVariableValues(targetVariableIndex, problemData.TestIndizes); 151 149 List<double> trainingMse = new List<double>(); 152 150 List<double> trainingR2 = new List<double>(); … … 162 160 foreach (var tree in trees) { 163 161 #region training 164 var estimatedTrainingValues = interpreter.GetSymbolicExpressionTreeValues(tree, problemData.Dataset, Enumerable.Range(trainingStart, trainingEnd - trainingStart));162 var estimatedTrainingValues = interpreter.GetSymbolicExpressionTreeValues(tree, problemData.Dataset, problemData.TrainingIndizes); 165 163 mseEvaluator.Reset(); 166 164 r2Evaluator.Reset(); … … 184 182 #endregion 185 183 #region test 186 var estimatedTestValues = interpreter.GetSymbolicExpressionTreeValues(tree, problemData.Dataset, Enumerable.Range(testStart, testEnd - testStart));184 var estimatedTestValues = interpreter.GetSymbolicExpressionTreeValues(tree, problemData.Dataset, problemData.TestIndizes); 187 185 188 186 mseEvaluator.Reset(); -
trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/SymbolicRegressionModelQualityCalculator.cs
r4068 r4468 20 20 #endregion 21 21 22 using System; 22 23 using HeuristicLab.Core; 23 24 using HeuristicLab.Data; … … 35 36 [Item("SymbolicRegressionModelQualityCalculator", "An operator to calculate the quality values of a symbolic regression solution symbolic expression tree encoding.")] 36 37 [StorableClass] 38 [Obsolete("This class should not be used anymore because of performance reasons and will therefore not be updated.")] 37 39 public sealed class SymbolicRegressionModelQualityCalculator : AlgorithmOperator { 38 40 private const string SymbolicExpressionTreeInterpreterParameterName = "SymbolicExpressionTreeInterpreter"; -
trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/SymbolicRegressionTournamentPruning.cs
r4191 r4468 28 28 using HeuristicLab.Optimization; 29 29 using HeuristicLab.Parameters; 30 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; 30 31 using HeuristicLab.Problems.DataAnalysis.Symbolic; 31 32 using HeuristicLab.Problems.DataAnalysis.Symbolic.Symbols; 32 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;33 33 34 34 namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic.Analyzers { … … 241 241 double lowerEstimationLimit, double upperEstimationLimit, 242 242 double maxPruningRatio, double qualityGainWeight) { 243 IEnumerable<int> rows = Enumerable.Range(samplesStart, samplesEnd - samplesStart); 243 IEnumerable<int> rows = Enumerable.Range(samplesStart, samplesEnd - samplesStart) 244 .Where(i => i < problemData.TestSamplesStart.Value || problemData.TestSamplesEnd.Value <= i); 244 245 int originalSize = tree.Size; 245 246 double originalQuality = evaluator.Evaluate(interpreter, tree, -
trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/ValidationBestScaledSymbolicRegressionSolutionAnalyzer.cs
r4068 r4468 39 39 [Item("ValidationBestScaledSymbolicRegressionSolutionAnalyzer", "An operator that analyzes the validation best scaled symbolic regression solution.")] 40 40 [StorableClass] 41 [Obsolete("This class should not be used anymore because of performance reasons and will therefore not be updated.")] 41 42 public sealed class ValidationBestScaledSymbolicRegressionSolutionAnalyzer : AlgorithmOperator, ISymbolicRegressionAnalyzer { 42 43 private const string SymbolicExpressionTreeParameterName = "SymbolicExpressionTree"; -
trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Evaluators/MultiObjectiveSymbolicRegressionEvaluator.cs
r4246 r4468 20 20 #endregion 21 21 22 using System;23 22 using System.Collections.Generic; 23 using System.Linq; 24 24 using HeuristicLab.Core; 25 25 using HeuristicLab.Data; … … 122 122 public override IOperation Apply() { 123 123 int seed = Random.Next(); 124 IEnumerable<int> rows = SingleObjectiveSymbolicRegressionEvaluator.GenerateRowsToEvaluate(seed, RelativeNumberOfEvaluatedSamples.Value, SamplesStart.Value, SamplesEnd.Value); 124 IEnumerable<int> rows = SingleObjectiveSymbolicRegressionEvaluator.GenerateRowsToEvaluate(seed, RelativeNumberOfEvaluatedSamples.Value, SamplesStart.Value, SamplesEnd.Value) 125 .Where(i => i < RegressionProblemData.TestSamplesStart.Value || RegressionProblemData.TestSamplesEnd.Value <= i); 125 126 double[] qualities = Evaluate(SymbolicExpressionTreeInterpreter, SymbolicExpressionTree, RegressionProblemData.Dataset, 126 127 RegressionProblemData.TargetVariable, rows); -
trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Evaluators/SingleObjectiveSymbolicRegressionEvaluator.cs
r4246 r4468 22 22 using System; 23 23 using System.Collections.Generic; 24 using System.Linq; 24 25 using HeuristicLab.Core; 25 26 using HeuristicLab.Data; … … 140 141 public override IOperation Apply() { 141 142 int seed = Random.Next(); 142 IEnumerable<int> rows = GenerateRowsToEvaluate(seed, RelativeNumberOfEvaluatedSamples.Value, SamplesStart.Value, SamplesEnd.Value); 143 IEnumerable<int> rows = GenerateRowsToEvaluate(seed, RelativeNumberOfEvaluatedSamples.Value, SamplesStart.Value, SamplesEnd.Value) 144 .Where(i => i < RegressionProblemData.TestSamplesStart.Value || RegressionProblemData.TestSamplesEnd.Value <= i); 143 145 double quality = Evaluate(SymbolicExpressionTreeInterpreter, SymbolicExpressionTree, LowerEstimationLimit.Value, UpperEstimationLimit.Value, 144 146 RegressionProblemData.Dataset, -
trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/SymbolicRegressionModel.cs
r4415 r4468 68 68 69 69 public IEnumerable<double> GetEstimatedValues(DataAnalysisProblemData problemData, int start, int end) { 70 return interpreter.GetSymbolicExpressionTreeValues(tree, problemData.Dataset, Enumerable.Range(start, end - start)); 70 return GetEstimatedValues(problemData, Enumerable.Range(start, end - start)); 71 } 72 public IEnumerable<double> GetEstimatedValues(DataAnalysisProblemData problemData, IEnumerable<int> rows) { 73 return interpreter.GetSymbolicExpressionTreeValues(tree, problemData.Dataset, rows); 71 74 } 72 75 -
trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/SymbolicRegressionProblem.cs
r4250 r4468 170 170 fixedBestValidationSolutionAnalyzer.BestKnownQualityParameter.ActualName = BestKnownQualityParameter.Name; 171 171 } 172 172 173 var bestValidationSolutionAnalyzer = analyzer as ValidationBestScaledSymbolicRegressionSolutionAnalyzer; 173 174 if (bestValidationSolutionAnalyzer != null) { -
trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/SymbolicRegressionProblemBase.cs
r4251 r4468 125 125 } 126 126 public IntValue TrainingSamplesStart { 127 get { return new IntValue(DataAnalysisProblemData.Training SamplesStart.Value); }127 get { return new IntValue(DataAnalysisProblemData.TrainingIndizes.First()); } 128 128 } 129 129 public IntValue TrainingSamplesEnd { 130 130 get { 131 return new IntValue((DataAnalysisProblemData.TrainingSamplesStart.Value +132 DataAnalysisProblemData.TrainingSamplesEnd.Value) / 2);131 int endIndex = (int)(DataAnalysisProblemData.TrainingIndizes.Count() * (1.0 - DataAnalysisProblemData.ValidationPercentage.Value)); 132 return new IntValue(DataAnalysisProblemData.TrainingIndizes.ElementAt(endIndex)); 133 133 } 134 134 } … … 137 137 } 138 138 public IntValue ValidationSamplesEnd { 139 get { return new IntValue(DataAnalysisProblemData.Training SamplesEnd.Value); }139 get { return new IntValue(DataAnalysisProblemData.TrainingIndizes.Last() + 1); } 140 140 } 141 141 public IntValue TestSamplesStart { -
trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/SymbolicRegressionSolution.cs
r4415 r4468 67 67 get { 68 68 if (estimatedValues == null) RecalculateEstimatedValues(); 69 return estimatedValues .AsEnumerable();69 return estimatedValues; 70 70 } 71 71 } 72 72 73 73 public override IEnumerable<double> EstimatedTrainingValues { 74 get { 75 if (estimatedValues == null) RecalculateEstimatedValues(); 76 int start = ProblemData.TrainingSamplesStart.Value; 77 int n = ProblemData.TrainingSamplesEnd.Value - start; 78 return estimatedValues.Skip(start).Take(n).ToList(); 79 } 74 get { return GetEstimatedValues(ProblemData.TrainingIndizes); } 80 75 } 81 76 82 77 public override IEnumerable<double> EstimatedTestValues { 83 get { 84 if (estimatedValues == null) RecalculateEstimatedValues(); 85 int start = ProblemData.TestSamplesStart.Value; 86 int n = ProblemData.TestSamplesEnd.Value - start; 87 return estimatedValues.Skip(start).Take(n).ToList(); 88 } 78 get { return GetEstimatedValues(ProblemData.TestIndizes); } 79 } 80 81 public virtual IEnumerable<double> GetEstimatedValues(IEnumerable<int> rows) { 82 if (estimatedValues == null) RecalculateEstimatedValues(); 83 foreach (int row in rows) 84 yield return estimatedValues[row]; 89 85 } 90 86 }
Note: See TracChangeset
for help on using the changeset viewer.