Changeset 4044 for trunk/sources
- Timestamp:
- 07/19/10 14:36:11 (14 years ago)
- Location:
- trunk/sources
- Files:
-
- 1 added
- 4 edited
- 2 copied
Legend:
- Unmodified
- Added
- Removed
-
trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/HeuristicLab.Problems.DataAnalysis.Regression-3.3.csproj
r4028 r4044 104 104 <Compile Include="Symbolic\Analyzers\SymbolicRegressionVariableFrequencyAnalyzer.cs" /> 105 105 <Compile Include="Symbolic\Analyzers\ValidationBestScaledSymbolicRegressionSolutionAnalyzer.cs" /> 106 <Compile Include="Symbolic\SymbolicRegressionScaledMeanAndVarianceSquaredErrorEvaluator.cs" /> 106 107 <Compile Include="Symbolic\SimpleSymbolicRegressionEvaluator.cs" /> 107 108 <Compile Include="Symbolic\SymbolicRegressionScaledMeanSquaredErrorEvaluator.cs" /> -
trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/SymbolicRegressionScaledMeanAndVarianceSquaredErrorEvaluator.cs
r4038 r4044 38 38 39 39 namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic { 40 [Item("SymbolicRegressionScaledMean SquaredErrorEvaluator", "Calculates the mean squared errorof a linearly scaled symbolic regression solution.")]40 [Item("SymbolicRegressionScaledMeanAndVarianceSquaredErrorEvaluator", "Calculates the mean and the variance of the squared errors of a linearly scaled symbolic regression solution.")] 41 41 [StorableClass] 42 public class SymbolicRegressionScaledMeanSquaredErrorEvaluator : SymbolicRegressionMeanSquaredErrorEvaluator { 42 public class SymbolicRegressionScaledMeanAndVarianceSquaredErrorEvaluator : SymbolicRegressionMeanSquaredErrorEvaluator { 43 private const string QualityVarianceParameterName = "QualityVariance"; 44 private const string QualitySamplesParameterName = "QualitySamples"; 43 45 44 46 #region parameter properties … … 49 51 get { return (ILookupParameter<DoubleValue>)Parameters["Beta"]; } 50 52 } 53 public ILookupParameter<DoubleValue> QualityVarianceParameter { 54 get { return (ILookupParameter<DoubleValue>)Parameters[QualityVarianceParameterName]; } 55 } 56 public ILookupParameter<IntValue> QualitySamplesParameter { 57 get { return (ILookupParameter<IntValue>)Parameters[QualitySamplesParameterName]; } 58 } 59 51 60 #endregion 52 61 #region properties … … 59 68 set { BetaParameter.ActualValue = value; } 60 69 } 70 public DoubleValue QualityVariance { 71 get { return QualityVarianceParameter.ActualValue; } 72 set { QualityVarianceParameter.ActualValue = value; } 73 } 74 public IntValue QualitySamples { 75 get { return QualitySamplesParameter.ActualValue; } 76 set { QualitySamplesParameter.ActualValue = value; } 77 } 61 78 #endregion 62 public SymbolicRegressionScaledMean SquaredErrorEvaluator()79 public SymbolicRegressionScaledMeanAndVarianceSquaredErrorEvaluator() 63 80 : base() { 64 81 Parameters.Add(new LookupParameter<DoubleValue>("Alpha", "Alpha parameter for linear scaling of the estimated values.")); 65 82 Parameters.Add(new LookupParameter<DoubleValue>("Beta", "Beta parameter for linear scaling of the estimated values.")); 83 Parameters.Add(new LookupParameter<DoubleValue>(QualityVarianceParameterName, "A parameter which stores the variance of the squared errors.")); 84 Parameters.Add(new LookupParameter<IntValue>(QualitySamplesParameterName, " The number of evaluated samples.")); 66 85 } 67 86 68 87 protected override double Evaluate(ISymbolicExpressionTreeInterpreter interpreter, SymbolicExpressionTree solution, Dataset dataset, StringValue targetVariable, IEnumerable<int> rows) { 69 88 double alpha, beta; 70 double mse = Calculate(interpreter, solution, LowerEstimationLimit.Value, UpperEstimationLimit.Value, dataset, targetVariable.Value, rows, out beta, out alpha); 71 AlphaParameter.ActualValue = new DoubleValue(alpha); 72 BetaParameter.ActualValue = new DoubleValue(beta); 89 double meanSE, varianceSE; 90 int count; 91 double mse = Calculate(interpreter, solution, LowerEstimationLimit.Value, UpperEstimationLimit.Value, dataset, targetVariable.Value, rows, out beta, out alpha, out meanSE, out varianceSE, out count); 92 Alpha = new DoubleValue(alpha); 93 Beta = new DoubleValue(beta); 94 QualityVariance = new DoubleValue(varianceSE); 95 QualitySamples = new IntValue(count); 73 96 return mse; 74 97 } 75 98 76 public static double Calculate(ISymbolicExpressionTreeInterpreter interpreter, SymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, Dataset dataset, string targetVariable, IEnumerable<int> rows, out double beta, out double alpha ) {77 IEnumerable<double> originalValues = dataset.GetEnumeratedVariableValues(targetVariable, rows);99 public static double Calculate(ISymbolicExpressionTreeInterpreter interpreter, SymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, Dataset dataset, string targetVariable, IEnumerable<int> rows, out double beta, out double alpha, out double meanSE, out double varianceSE, out int count) { 100 IEnumerable<double> originalValues = dataset.GetEnumeratedVariableValues(targetVariable, rows); 78 101 IEnumerable<double> estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, dataset, rows); 79 102 CalculateScalingParameters(originalValues, estimatedValues, out beta, out alpha); 80 103 81 return CalculateWithScaling(interpreter, solution, lowerEstimationLimit, upperEstimationLimit, dataset, targetVariable, rows, beta, alpha );104 return CalculateWithScaling(interpreter, solution, lowerEstimationLimit, upperEstimationLimit, dataset, targetVariable, rows, beta, alpha, out meanSE, out varianceSE, out count); 82 105 } 83 106 84 public static double CalculateWithScaling(ISymbolicExpressionTreeInterpreter interpreter, SymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, Dataset dataset, string targetVariable, IEnumerable<int> rows, double beta, double alpha ) {107 public static double CalculateWithScaling(ISymbolicExpressionTreeInterpreter interpreter, SymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, Dataset dataset, string targetVariable, IEnumerable<int> rows, double beta, double alpha, out double meanSE, out double varianceSE, out int count) { 85 108 IEnumerable<double> estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, dataset, rows); 86 109 IEnumerable<double> originalValues = dataset.GetEnumeratedVariableValues(targetVariable, rows); 87 110 IEnumerator<double> originalEnumerator = originalValues.GetEnumerator(); 88 111 IEnumerator<double> estimatedEnumerator = estimatedValues.GetEnumerator(); 89 OnlineMean SquaredErrorEvaluator mseEvaluator = new OnlineMeanSquaredErrorEvaluator();112 OnlineMeanAndVarianceCalculator seEvaluator = new OnlineMeanAndVarianceCalculator(); 90 113 91 114 while (originalEnumerator.MoveNext() & estimatedEnumerator.MoveNext()) { … … 96 119 else 97 120 estimated = Math.Min(upperEstimationLimit, Math.Max(lowerEstimationLimit, estimated)); 98 mseEvaluator.Add(original, estimated); 121 double error = estimated - original; 122 error *= error; 123 seEvaluator.Add(error); 99 124 } 100 125 … … 102 127 throw new ArgumentException("Number of elements in original and estimated enumeration doesn't match."); 103 128 } else { 104 return mseEvaluator.MeanSquaredError; 129 meanSE = seEvaluator.Mean; 130 varianceSE = seEvaluator.Variance; 131 count = seEvaluator.Count; 132 return seEvaluator.Mean; 105 133 } 106 134 } -
trunk/sources/HeuristicLab.Problems.DataAnalysis/3.3/Evaluators/OnlineMeanAndVarianceCalculator.cs
r4022 r4044 47 47 } 48 48 49 public int Count { 50 get { return n; } 51 } 52 49 53 public OnlineMeanAndVarianceCalculator() { 50 54 Reset(); -
trunk/sources/HeuristicLab.Problems.DataAnalysis/3.3/HeuristicLab.Problems.DataAnalysis-3.3.csproj
r4027 r4044 108 108 <Compile Include="Interfaces\IOnlineEvaluator.cs" /> 109 109 <Compile Include="MatrixExtensions.cs" /> 110 <Compile Include="Operators\WeightedParentsQualityVarianceComparator.cs" /> 110 111 <Compile Include="Properties\AssemblyInfo.cs" /> 111 112 <Compile Include="SupportVectorMachine\ParameterAdjustmentProblem\SupportVectorMachineParameterAdjustmentBestSolutionAnalyzer.cs" /> … … 175 176 <Name>HeuristicLab.Encodings.SymbolicExpressionTreeEncoding-3.3</Name> 176 177 </ProjectReference> 178 <ProjectReference Include="..\..\HeuristicLab.ExtLibs\HeuristicLab.ALGLIB\2.5.0\ALGLIB-2.5.0\ALGLIB-2.5.0.csproj"> 179 <Project>{29E4B033-1FEF-4FE1-AE17-0A9319D7C54E}</Project> 180 <Name>ALGLIB-2.5.0</Name> 181 </ProjectReference> 177 182 <ProjectReference Include="..\..\HeuristicLab.ExtLibs\HeuristicLab.LibSVM\1.6.3\HeuristicLab.LibSVM-1.6.3\HeuristicLab.LibSVM-1.6.3.csproj"> 178 183 <Project>{89B50302-9CEE-4D13-9779-633EADCAE624}</Project> -
trunk/sources/HeuristicLab.Problems.DataAnalysis/3.3/HeuristicLabProblemsDataAnalysisPlugin.cs.frame
r3842 r4044 28 28 [Plugin("HeuristicLab.Problems.DataAnalysis","3.3.0.$WCREV$")] 29 29 [PluginFile("HeuristicLab.Problems.DataAnalysis-3.3.dll", PluginFileType.Assembly)] 30 [PluginDependency("HeuristicLab.ALGLIB", "2.5")] 30 31 [PluginDependency("HeuristicLab.Collections", "3.3.0.0")] 31 32 [PluginDependency("HeuristicLab.Common", "3.3.0.0")] -
trunk/sources/HeuristicLab.Problems.DataAnalysis/3.3/Operators/WeightedParentsQualityVarianceComparator.cs
r4038 r4044 23 23 using System.Collections.Generic; 24 24 using System.Linq; 25 using alglib; 25 26 using HeuristicLab.Core; 26 27 using HeuristicLab.Data; 27 28 using HeuristicLab.Operators; 29 using HeuristicLab.Optimization; 28 30 using HeuristicLab.Parameters; 29 31 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; 30 32 31 namespace HeuristicLab. Optimization.Operators {32 [Item("WeightedParentsQuality Comparator", "Comparesthe quality against that of its parents (assumes the parents are subscopes to the child scope). This operator works with any number of subscopes > 0.")]33 namespace HeuristicLab.Problems.DataAnalysis.Operators { 34 [Item("WeightedParentsQualityVarianceComparator", "Compares the quality and variance of the quality against that of its parents (assumes the parents are subscopes to the child scope). This operator works with any number of subscopes > 0.")] 33 35 [StorableClass] 34 public class WeightedParentsQuality Comparator : SingleSuccessorOperator, ISubScopesQualityComparator {36 public class WeightedParentsQualityVarianceComparator : SingleSuccessorOperator, ISubScopesQualityComparator { 35 37 public IValueLookupParameter<BoolValue> MaximizationParameter { 36 38 get { return (IValueLookupParameter<BoolValue>)Parameters["Maximization"]; } 39 } 40 public ILookupParameter<BoolValue> ResultParameter { 41 get { return (ILookupParameter<BoolValue>)Parameters["Result"]; } 42 } 43 public IValueLookupParameter<DoubleValue> ConfidenceIntervalParameter { 44 get { return (IValueLookupParameter<DoubleValue>)Parameters["ConfidenceInterval"]; } 37 45 } 38 46 public ILookupParameter<DoubleValue> LeftSideParameter { 39 47 get { return (ILookupParameter<DoubleValue>)Parameters["LeftSide"]; } 40 48 } 49 public ILookupParameter<DoubleValue> LeftSideVarianceParameter { 50 get { return (ILookupParameter<DoubleValue>)Parameters["LeftSideVariance"]; } 51 } 52 public ILookupParameter<IntValue> LeftSideSamplesParameter { 53 get { return (ILookupParameter<IntValue>)Parameters["LeftSideSamples"]; } 54 } 41 55 public ILookupParameter<ItemArray<DoubleValue>> RightSideParameter { 42 56 get { return (ILookupParameter<ItemArray<DoubleValue>>)Parameters["RightSide"]; } 43 57 } 44 public ILookupParameter< BoolValue> ResultParameter{45 get { return (ILookupParameter< BoolValue>)Parameters["Result"]; }58 public ILookupParameter<ItemArray<DoubleValue>> RightSideVariancesParameters { 59 get { return (ILookupParameter<ItemArray<DoubleValue>>)Parameters["RightSideVariances"]; } 46 60 } 47 public ValueLookupParameter<DoubleValue> ComparisonFactorParameter{48 get { return ( ValueLookupParameter<DoubleValue>)Parameters["ComparisonFactor"]; }61 public ILookupParameter<ItemArray<IntValue>> RightSideSamplesParameters { 62 get { return (ILookupParameter<ItemArray<IntValue>>)Parameters["RightSideSamples"]; } 49 63 } 50 64 51 public WeightedParentsQuality Comparator()65 public WeightedParentsQualityVarianceComparator() 52 66 : base() { 53 67 Parameters.Add(new ValueLookupParameter<BoolValue>("Maximization", "True if the problem is a maximization problem, false otherwise")); 68 Parameters.Add(new LookupParameter<BoolValue>("Result", "The result of the comparison: True means Quality is better, False means it is worse than parents.")); 69 Parameters.Add(new ValueLookupParameter<DoubleValue>("ConfidenceInterval", "The confidence interval used for the test.", new DoubleValue(0.05))); 70 54 71 Parameters.Add(new LookupParameter<DoubleValue>("LeftSide", "The quality of the child.")); 72 Parameters.Add(new LookupParameter<DoubleValue>("LeftSideVariance", "The variances of the quality of the new child.")); 73 Parameters.Add(new LookupParameter<IntValue>("LeftSideSamples", "The number of samples used to calculate the quality of the new child.")); 74 55 75 Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>("RightSide", "The qualities of the parents.")); 56 Parameters.Add(new LookupParameter<BoolValue>("Result", "The result of the comparison: True means Quality is better, False means it is worse thanparents."));57 Parameters.Add(new ValueLookupParameter<DoubleValue>("ComparisonFactor", "Determines if the quality should be compared to the better parent (1.0), to the worse (0.0) or to any linearly interpolated value between them."));76 Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>("RightSideVariances", "The variances of the parents.")); 77 Parameters.Add(new LookupParameter<IntValue>("RightSideSamples", "The number of samples used to calculate the quality of the parent.")); 58 78 } 59 79 60 80 public override IOperation Apply() { 81 double leftQuality = LeftSideParameter.ActualValue.Value; 82 double leftVariance = LeftSideVarianceParameter.ActualValue.Value; 83 int leftSamples = LeftSideSamplesParameter.ActualValue.Value; 84 61 85 ItemArray<DoubleValue> rightQualities = RightSideParameter.ActualValue; 86 ItemArray<DoubleValue> rightVariances = RightSideVariancesParameters.ActualValue; 87 ItemArray<IntValue> rightSamples = RightSideSamplesParameters.ActualValue; 88 62 89 if (rightQualities.Length < 1) throw new InvalidOperationException(Name + ": No subscopes found."); 63 double compFact = ComparisonFactorParameter.ActualValue.Value;64 90 bool maximization = MaximizationParameter.ActualValue.Value; 65 double leftQuality = LeftSideParameter.ActualValue.Value;66 91 67 double threshold = 0; 92 int bestParentIndex; 93 double bestParentQuality; 94 double bestParentVariance; 95 int bestParentSamples; 68 96 69 #region Calculate threshold 70 if (rightQualities.Length == 2) { // this case will probably be used most often 71 double minQuality = Math.Min(rightQualities[0].Value, rightQualities[1].Value); 72 double maxQuality = Math.Max(rightQualities[0].Value, rightQualities[1].Value); 73 if (maximization) 74 threshold = minQuality + (maxQuality - minQuality) * compFact; 75 else 76 threshold = maxQuality - (maxQuality - minQuality) * compFact; 77 } else if (rightQualities.Length == 1) { // case for just one parent 78 threshold = rightQualities[0].Value; 79 } else { // general case extended to 3 or more parents 80 List<double> sortedQualities = rightQualities.Select(x => x.Value).ToList(); 81 sortedQualities.Sort(); 82 double minimumQuality = sortedQualities.First(); 97 if (maximization) 98 bestParentQuality = rightQualities.Max(x => x.Value); 99 else 100 bestParentQuality = rightQualities.Min(x => x.Value); 101 bestParentIndex = rightQualities.FindIndex(x => x.Value == bestParentQuality); 102 bestParentVariance = rightVariances[bestParentIndex].Value; 103 bestParentSamples = rightSamples[bestParentIndex].Value; 83 104 84 double integral = 0; 85 for (int i = 0; i < sortedQualities.Count - 1; i++) { 86 integral += (sortedQualities[i] + sortedQualities[i + 1]) / 2.0; // sum of the trapezoid 87 } 88 integral -= minimumQuality * sortedQualities.Count; 89 if (integral == 0) threshold = sortedQualities[0]; // all qualities are equal 90 else { 91 double selectedArea = integral * (maximization ? compFact : (1 - compFact)); 92 integral = 0; 93 for (int i = 0; i < sortedQualities.Count - 1; i++) { 94 double currentSliceArea = (sortedQualities[i] + sortedQualities[i + 1]) / 2.0; 95 double windowedSliceArea = currentSliceArea - minimumQuality; 96 if (windowedSliceArea == 0) continue; 97 integral += windowedSliceArea; 98 if (integral >= selectedArea) { 99 double factor = 1 - ((integral - selectedArea) / (windowedSliceArea)); 100 threshold = sortedQualities[i] + (sortedQualities[i + 1] - sortedQualities[i]) * factor; 101 break; 102 } 103 } 104 } 105 } 106 #endregion 105 double xmean = leftQuality; 106 double xvar = leftVariance; 107 int n = leftSamples; 108 double ymean = bestParentQuality; 109 double yvar = bestParentVariance; 110 double m = bestParentSamples; 107 111 108 bool result = maximization && leftQuality > threshold || !maximization && leftQuality < threshold; 112 113 //following code taken from ALGLIB studentttest line 351 114 // Two-sample unpooled test 115 double p = 0; 116 double stat = (xmean - ymean) / Math.Sqrt(xvar / n + yvar / m); 117 double c = xvar / n / (xvar / n + yvar / m); 118 double df = (n - 1) * (m - 1) / ((m - 1) * AP.Math.Sqr(c) + (n - 1) * (1 - AP.Math.Sqr(c))); 119 if ((double)(stat) > (double)(0)) 120 p = 1 - 0.5 * ibetaf.incompletebeta(df / 2, 0.5, df / (df + AP.Math.Sqr(stat))); 121 else 122 p = 0.5 * ibetaf.incompletebeta(df / 2, 0.5, df / (df + AP.Math.Sqr(stat))); 123 double bothtails = 2 * Math.Min(p, 1 - p); 124 double lefttail = p; 125 double righttail = 1 - p; 126 127 bool result = false; 128 if (maximization) 129 result = righttail < ConfidenceIntervalParameter.ActualValue.Value; 130 else 131 result = lefttail < ConfidenceIntervalParameter.ActualValue.Value; 132 109 133 BoolValue resultValue = ResultParameter.ActualValue; 110 134 if (resultValue == null) { … … 114 138 } 115 139 140 141 116 142 return base.Apply(); 117 143 }
Note: See TracChangeset
for help on using the changeset viewer.