Changeset 3995
- Timestamp:
- 07/05/10 13:58:47 (14 years ago)
- Location:
- trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/SymbolicRegressionMeanSquaredErrorEvaluator.cs
r3513 r3995 72 72 73 73 public static double Calculate(ISymbolicExpressionTreeInterpreter interpreter, SymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, Dataset dataset, string targetVariable, int start, int end) { 74 int targetVariableIndex = dataset.GetVariableIndex(targetVariable);75 74 var estimatedValues = from x in interpreter.GetSymbolicExpressionTreeValues(solution, dataset, Enumerable.Range(start, end - start)) 76 75 let boundedX = Math.Min(upperEstimationLimit, Math.Max(lowerEstimationLimit, x)) 77 76 select double.IsNaN(boundedX) ? upperEstimationLimit : boundedX; 78 var originalValues = from row in Enumerable.Range(start, end - start) select dataset[row, targetVariableIndex];77 var originalValues = dataset.GetEnumeratedVariableValues(targetVariable, start, end); 79 78 return SimpleMSEEvaluator.Calculate(originalValues, estimatedValues); 80 79 } -
trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/SymbolicRegressionScaledMeanSquaredErrorEvaluator.cs
r3807 r3995 75 75 76 76 public static double Calculate(ISymbolicExpressionTreeInterpreter interpreter, SymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, Dataset dataset, string targetVariable, int start, int end, out double beta, out double alpha) { 77 var estimatedValues = CalculateScaledEstimatedValues(interpreter, solution, dataset, targetVariable, start, end, out beta, out alpha); 78 estimatedValues = from x in estimatedValues 79 let boundedX = Math.Min(upperEstimationLimit, Math.Max(lowerEstimationLimit, x)) 80 select double.IsNaN(boundedX) ? upperEstimationLimit : boundedX; 81 var originalValues = dataset.GetVariableValues(targetVariable, start, end); 82 return SimpleMSEEvaluator.Calculate(originalValues, estimatedValues); 77 IEnumerable<double> originalValues = dataset.GetEnumeratedVariableValues(targetVariable, start, end); 78 IEnumerable<double> estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, dataset, Enumerable.Range(start, end - start)); 79 CalculateScalingParameters(originalValues, estimatedValues, out beta, out alpha); 80 81 return CalculateWithScaling(interpreter, solution, lowerEstimationLimit, upperEstimationLimit, dataset, targetVariable, start, end, beta, alpha); 83 82 } 84 83 85 84 public static double CalculateWithScaling(ISymbolicExpressionTreeInterpreter interpreter, SymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, Dataset dataset, string targetVariable, int start, int end, double beta, double alpha) { 86 var estimatedValues = from x in interpreter.GetSymbolicExpressionTreeValues(solution, dataset, Enumerable.Range(start, end - start)) 87 let boundedX = Math.Min(upperEstimationLimit, Math.Max(lowerEstimationLimit, x * beta + alpha)) 88 select double.IsNaN(boundedX) ? upperEstimationLimit : boundedX; 89 var originalValues = dataset.GetVariableValues(targetVariable, start, end); 90 return SimpleMSEEvaluator.Calculate(originalValues, estimatedValues); 85 //IEnumerable<double> estimatedValues = from x in interpreter.GetSymbolicExpressionTreeValues(solution, dataset, Enumerable.Range(start, end - start)) 86 // let boundedX = Math.Min(upperEstimationLimit, Math.Max(lowerEstimationLimit, x * beta + alpha)) 87 // select double.IsNaN(boundedX) ? upperEstimationLimit : boundedX; 88 IEnumerable<double> estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, dataset, Enumerable.Range(start, end - start)); 89 IEnumerable<double> originalValues = dataset.GetEnumeratedVariableValues(targetVariable, start, end); 90 IEnumerator<double> originalEnumerator = originalValues.GetEnumerator(); 91 IEnumerator<double> estimatedEnumerator = estimatedValues.GetEnumerator(); 92 double cnt = 0; 93 double sse = 0; 94 95 while (originalEnumerator.MoveNext() & estimatedEnumerator.MoveNext()) { 96 double estimated = estimatedEnumerator.Current * beta + alpha; 97 double original = originalEnumerator.Current; 98 estimated = Math.Min(upperEstimationLimit, Math.Max(lowerEstimationLimit, estimated)); 99 if (double.IsNaN(estimated)) 100 estimated = upperEstimationLimit; 101 if (!double.IsNaN(estimated) && !double.IsInfinity(estimated) && 102 !double.IsNaN(original) && !double.IsInfinity(original)) { 103 double error = estimated - original; 104 sse += error * error; 105 cnt++; 106 } 107 } 108 109 if (estimatedEnumerator.MoveNext() || originalEnumerator.MoveNext()) { 110 throw new ArgumentException("Number of elements in original and estimated enumeration doesn't match."); 111 } else if (cnt == 0) { 112 throw new ArgumentException("Mean squared errors is not defined for input vectors of NaN or Inf"); 113 } else { 114 double mse = sse / cnt; 115 return mse; 116 } 91 117 } 92 118 93 private static IEnumerable<double> CalculateScaledEstimatedValues(ISymbolicExpressionTreeInterpreter interpreter, SymbolicExpressionTree solution, Dataset dataset, string targetVariable, int start, int end, out double beta, out double alpha) { 94 int targetVariableIndex = dataset.GetVariableIndex(targetVariable); 95 var estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, dataset, Enumerable.Range(start, end - start)).ToArray(); 96 var originalValues = dataset.GetVariableValues(targetVariable, start, end); 97 CalculateScalingParameters(originalValues, estimatedValues, out beta, out alpha); 98 for (int i = 0; i < estimatedValues.Length; i++) 99 estimatedValues[i] = estimatedValues[i] * beta + alpha; 100 return estimatedValues; 101 } 119 /// <summary> 120 /// Calculates linear scaling parameters in one pass. 121 /// The formulas to calculate the scaling parameters were taken from Scaled Symblic Regression by Maarten Keijzer. 122 /// http://www.springerlink.com/content/x035121165125175/ 123 /// </summary> 124 public static void CalculateScalingParameters(IEnumerable<double> original, IEnumerable<double> estimated, out double beta, out double alpha) { 125 IEnumerator<double> originalEnumerator = original.GetEnumerator(); 126 IEnumerator<double> estimatedEnumerator = estimated.GetEnumerator(); 102 127 128 int cnt = 0; 129 double tSum = 0; 130 double ySum = 0; 131 double yySum = 0; 132 double ytSum = 0; 103 133 104 public static void CalculateScalingParameters(IEnumerable<double> original, IEnumerable<double> estimated, out double beta, out double alpha) { 105 double[] originalValues = original.ToArray(); 106 double[] estimatedValues = estimated.ToArray(); 107 if (originalValues.Length != estimatedValues.Length) throw new ArgumentException(); 108 var filteredResult = (from row in Enumerable.Range(0, originalValues.Length) 109 let t = originalValues[row] 110 let e = estimatedValues[row] 111 where IsValidValue(t) 112 where IsValidValue(e) 113 select new { Estimation = e, Target = t }) 114 .OrderBy(x => Math.Abs(x.Target)) // make sure small values are considered before large values 115 .ToArray(); 134 while (originalEnumerator.MoveNext() & estimatedEnumerator.MoveNext()) { 135 double y = estimatedEnumerator.Current; 136 double t = originalEnumerator.Current; 137 if (IsValidValue(t) && IsValidValue(y)) { 138 cnt++; 139 tSum += t; 140 ySum += y; 141 yySum += y * y; 142 ytSum += t * y; 143 } 144 } 116 145 117 // calculate alpha and beta on the subset of rows with valid values 118 originalValues = filteredResult.Select(x => x.Target).ToArray(); 119 estimatedValues = filteredResult.Select(x => x.Estimation).ToArray(); 120 int n = originalValues.Length; 121 if (n > 2) { 122 double tMean = originalValues.Average(); 123 double xMean = estimatedValues.Average(); 124 double sumXT = 0; 125 double sumXX = 0; 126 for (int i = 0; i < n; i++) { 127 // calculate alpha and beta on the subset of rows with valid values 128 double x = estimatedValues[i]; 129 double t = originalValues[i]; 130 sumXT += (x - xMean) * (t - tMean); 131 sumXX += (x - xMean) * (x - xMean); 132 } 133 if (!sumXX.IsAlmost(0.0)) { 134 beta = sumXT / sumXX; 135 } else { 146 if (estimatedEnumerator.MoveNext() || originalEnumerator.MoveNext()) 147 throw new ArgumentException("Number of elements in original and estimated enumeration doesn't match."); 148 if (cnt < 2) { 149 alpha = 0; 150 beta = 1; 151 } else { 152 double tMean = tSum / cnt; 153 double yMean = ySum / cnt; 154 //division by cnt is omited because the variance and covariance are divided afterwards. 155 double yVariance = yySum - 2 * yMean * ySum + cnt * yMean * yMean; 156 double ytCovariance = ytSum - tMean * ySum - yMean * tSum + cnt * yMean * tMean; 157 158 if (yVariance.IsAlmost(0.0)) 136 159 beta = 1; 137 } 138 alpha = tMean - beta * xMean; 139 } else { 140 alpha = 0.0; 141 beta = 1.0; 160 else 161 beta = ytCovariance / yVariance; 162 163 alpha = tMean - beta * yMean; 142 164 } 143 165 }
Note: See TracChangeset
for help on using the changeset viewer.