Free cookie consent management tool by TermsFeed Policy Generator

Changeset 4044


Ignore:
Timestamp:
07/19/10 14:36:11 (12 years ago)
Author:
mkommend
Message:

added statistical comperator operator for SymReg OSGP (ticket #1082)

Location:
trunk/sources
Files:
1 added
4 edited
2 copied

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/HeuristicLab.Problems.DataAnalysis.Regression-3.3.csproj

    r4028 r4044  
    104104    <Compile Include="Symbolic\Analyzers\SymbolicRegressionVariableFrequencyAnalyzer.cs" />
    105105    <Compile Include="Symbolic\Analyzers\ValidationBestScaledSymbolicRegressionSolutionAnalyzer.cs" />
     106    <Compile Include="Symbolic\SymbolicRegressionScaledMeanAndVarianceSquaredErrorEvaluator.cs" />
    106107    <Compile Include="Symbolic\SimpleSymbolicRegressionEvaluator.cs" />
    107108    <Compile Include="Symbolic\SymbolicRegressionScaledMeanSquaredErrorEvaluator.cs" />
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/SymbolicRegressionScaledMeanAndVarianceSquaredErrorEvaluator.cs

    r4038 r4044  
    3838
    3939namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic {
    40   [Item("SymbolicRegressionScaledMeanSquaredErrorEvaluator", "Calculates the mean squared error of a linearly scaled symbolic regression solution.")]
     40  [Item("SymbolicRegressionScaledMeanAndVarianceSquaredErrorEvaluator", "Calculates the mean and the variance of the squared errors of a linearly scaled symbolic regression solution.")]
    4141  [StorableClass]
    42   public class SymbolicRegressionScaledMeanSquaredErrorEvaluator : SymbolicRegressionMeanSquaredErrorEvaluator {
     42  public class SymbolicRegressionScaledMeanAndVarianceSquaredErrorEvaluator : SymbolicRegressionMeanSquaredErrorEvaluator {
     43    private const string QualityVarianceParameterName = "QualityVariance";
     44    private const string QualitySamplesParameterName = "QualitySamples";
    4345
    4446    #region parameter properties
     
    4951      get { return (ILookupParameter<DoubleValue>)Parameters["Beta"]; }
    5052    }
     53    public ILookupParameter<DoubleValue> QualityVarianceParameter {
     54      get { return (ILookupParameter<DoubleValue>)Parameters[QualityVarianceParameterName]; }
     55    }
     56    public ILookupParameter<IntValue> QualitySamplesParameter {
     57      get { return (ILookupParameter<IntValue>)Parameters[QualitySamplesParameterName]; }
     58    }
     59
    5160    #endregion
    5261    #region properties
     
    5968      set { BetaParameter.ActualValue = value; }
    6069    }
     70    public DoubleValue QualityVariance {
     71      get { return QualityVarianceParameter.ActualValue; }
     72      set { QualityVarianceParameter.ActualValue = value; }
     73    }
     74    public IntValue QualitySamples {
     75      get { return QualitySamplesParameter.ActualValue; }
     76      set { QualitySamplesParameter.ActualValue = value; }
     77    }
    6178    #endregion
    62     public SymbolicRegressionScaledMeanSquaredErrorEvaluator()
     79    public SymbolicRegressionScaledMeanAndVarianceSquaredErrorEvaluator()
    6380      : base() {
    6481      Parameters.Add(new LookupParameter<DoubleValue>("Alpha", "Alpha parameter for linear scaling of the estimated values."));
    6582      Parameters.Add(new LookupParameter<DoubleValue>("Beta", "Beta parameter for linear scaling of the estimated values."));
     83      Parameters.Add(new LookupParameter<DoubleValue>(QualityVarianceParameterName, "A parameter which stores the variance of the squared errors."));
     84      Parameters.Add(new LookupParameter<IntValue>(QualitySamplesParameterName, " The number of evaluated samples."));
    6685    }
    6786
    6887    protected override double Evaluate(ISymbolicExpressionTreeInterpreter interpreter, SymbolicExpressionTree solution, Dataset dataset, StringValue targetVariable, IEnumerable<int> rows) {
    6988      double alpha, beta;
    70       double mse = Calculate(interpreter, solution, LowerEstimationLimit.Value, UpperEstimationLimit.Value, dataset, targetVariable.Value, rows, out beta, out alpha);
    71       AlphaParameter.ActualValue = new DoubleValue(alpha);
    72       BetaParameter.ActualValue = new DoubleValue(beta);
     89      double meanSE, varianceSE;
     90      int count;
     91      double mse = Calculate(interpreter, solution, LowerEstimationLimit.Value, UpperEstimationLimit.Value, dataset, targetVariable.Value, rows, out beta, out alpha, out meanSE, out varianceSE, out count);
     92      Alpha = new DoubleValue(alpha);
     93      Beta = new DoubleValue(beta);
     94      QualityVariance = new DoubleValue(varianceSE);
     95      QualitySamples = new IntValue(count);
    7396      return mse;
    7497    }
    7598
    76     public static double Calculate(ISymbolicExpressionTreeInterpreter interpreter, SymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, Dataset dataset, string targetVariable, IEnumerable<int> rows, out double beta, out double alpha) {
    77       IEnumerable<double> originalValues = dataset.GetEnumeratedVariableValues(targetVariable,rows);
     99    public static double Calculate(ISymbolicExpressionTreeInterpreter interpreter, SymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, Dataset dataset, string targetVariable, IEnumerable<int> rows, out double beta, out double alpha, out double meanSE, out double varianceSE, out int count) {
     100      IEnumerable<double> originalValues = dataset.GetEnumeratedVariableValues(targetVariable, rows);
    78101      IEnumerable<double> estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, dataset, rows);
    79102      CalculateScalingParameters(originalValues, estimatedValues, out beta, out alpha);
    80103
    81       return CalculateWithScaling(interpreter, solution, lowerEstimationLimit, upperEstimationLimit, dataset, targetVariable, rows, beta, alpha);
     104      return CalculateWithScaling(interpreter, solution, lowerEstimationLimit, upperEstimationLimit, dataset, targetVariable, rows, beta, alpha, out meanSE, out varianceSE, out count);
    82105    }
    83106
    84     public static double CalculateWithScaling(ISymbolicExpressionTreeInterpreter interpreter, SymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, Dataset dataset, string targetVariable, IEnumerable<int> rows, double beta, double alpha) {
     107    public static double CalculateWithScaling(ISymbolicExpressionTreeInterpreter interpreter, SymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, Dataset dataset, string targetVariable, IEnumerable<int> rows, double beta, double alpha, out double meanSE, out double varianceSE, out int count) {
    85108      IEnumerable<double> estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, dataset, rows);
    86109      IEnumerable<double> originalValues = dataset.GetEnumeratedVariableValues(targetVariable, rows);
    87110      IEnumerator<double> originalEnumerator = originalValues.GetEnumerator();
    88111      IEnumerator<double> estimatedEnumerator = estimatedValues.GetEnumerator();
    89       OnlineMeanSquaredErrorEvaluator mseEvaluator = new OnlineMeanSquaredErrorEvaluator();
     112      OnlineMeanAndVarianceCalculator seEvaluator = new OnlineMeanAndVarianceCalculator();
    90113
    91114      while (originalEnumerator.MoveNext() & estimatedEnumerator.MoveNext()) {
     
    96119        else
    97120          estimated = Math.Min(upperEstimationLimit, Math.Max(lowerEstimationLimit, estimated));
    98         mseEvaluator.Add(original, estimated);
     121        double error = estimated - original;
     122        error *= error;
     123        seEvaluator.Add(error);
    99124      }
    100125
     
    102127        throw new ArgumentException("Number of elements in original and estimated enumeration doesn't match.");
    103128      } else {
    104         return mseEvaluator.MeanSquaredError;
     129        meanSE = seEvaluator.Mean;
     130        varianceSE = seEvaluator.Variance;
     131        count = seEvaluator.Count;
     132        return seEvaluator.Mean;
    105133      }
    106134    }
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.3/Evaluators/OnlineMeanAndVarianceCalculator.cs

    r4022 r4044  
    4747    }
    4848
     49    public int Count {
     50      get { return n; }
     51    }
     52
    4953    public OnlineMeanAndVarianceCalculator() {
    5054      Reset();
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.3/HeuristicLab.Problems.DataAnalysis-3.3.csproj

    r4027 r4044  
    108108    <Compile Include="Interfaces\IOnlineEvaluator.cs" />
    109109    <Compile Include="MatrixExtensions.cs" />
     110    <Compile Include="Operators\WeightedParentsQualityVarianceComparator.cs" />
    110111    <Compile Include="Properties\AssemblyInfo.cs" />
    111112    <Compile Include="SupportVectorMachine\ParameterAdjustmentProblem\SupportVectorMachineParameterAdjustmentBestSolutionAnalyzer.cs" />
     
    175176      <Name>HeuristicLab.Encodings.SymbolicExpressionTreeEncoding-3.3</Name>
    176177    </ProjectReference>
     178    <ProjectReference Include="..\..\HeuristicLab.ExtLibs\HeuristicLab.ALGLIB\2.5.0\ALGLIB-2.5.0\ALGLIB-2.5.0.csproj">
     179      <Project>{29E4B033-1FEF-4FE1-AE17-0A9319D7C54E}</Project>
     180      <Name>ALGLIB-2.5.0</Name>
     181    </ProjectReference>
    177182    <ProjectReference Include="..\..\HeuristicLab.ExtLibs\HeuristicLab.LibSVM\1.6.3\HeuristicLab.LibSVM-1.6.3\HeuristicLab.LibSVM-1.6.3.csproj">
    178183      <Project>{89B50302-9CEE-4D13-9779-633EADCAE624}</Project>
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.3/HeuristicLabProblemsDataAnalysisPlugin.cs.frame

    r3842 r4044  
    2828  [Plugin("HeuristicLab.Problems.DataAnalysis","3.3.0.$WCREV$")]
    2929  [PluginFile("HeuristicLab.Problems.DataAnalysis-3.3.dll", PluginFileType.Assembly)]
     30  [PluginDependency("HeuristicLab.ALGLIB", "2.5")]
    3031  [PluginDependency("HeuristicLab.Collections", "3.3.0.0")]
    3132  [PluginDependency("HeuristicLab.Common", "3.3.0.0")]
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.3/Operators/WeightedParentsQualityVarianceComparator.cs

    r4038 r4044  
    2323using System.Collections.Generic;
    2424using System.Linq;
     25using alglib;
    2526using HeuristicLab.Core;
    2627using HeuristicLab.Data;
    2728using HeuristicLab.Operators;
     29using HeuristicLab.Optimization;
    2830using HeuristicLab.Parameters;
    2931using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    3032
    31 namespace HeuristicLab.Optimization.Operators {
    32   [Item("WeightedParentsQualityComparator", "Compares the quality against that of its parents (assumes the parents are subscopes to the child scope). This operator works with any number of subscopes > 0.")]
     33namespace HeuristicLab.Problems.DataAnalysis.Operators {
     34  [Item("WeightedParentsQualityVarianceComparator", "Compares the quality and variance of the quality against that of its parents (assumes the parents are subscopes to the child scope). This operator works with any number of subscopes > 0.")]
    3335  [StorableClass]
    34   public class WeightedParentsQualityComparator : SingleSuccessorOperator, ISubScopesQualityComparator {
     36  public class WeightedParentsQualityVarianceComparator : SingleSuccessorOperator, ISubScopesQualityComparator {
    3537    public IValueLookupParameter<BoolValue> MaximizationParameter {
    3638      get { return (IValueLookupParameter<BoolValue>)Parameters["Maximization"]; }
     39    }
     40    public ILookupParameter<BoolValue> ResultParameter {
     41      get { return (ILookupParameter<BoolValue>)Parameters["Result"]; }
     42    }
     43    public IValueLookupParameter<DoubleValue> ConfidenceIntervalParameter {
     44      get { return (IValueLookupParameter<DoubleValue>)Parameters["ConfidenceInterval"]; }
    3745    }
    3846    public ILookupParameter<DoubleValue> LeftSideParameter {
    3947      get { return (ILookupParameter<DoubleValue>)Parameters["LeftSide"]; }
    4048    }
     49    public ILookupParameter<DoubleValue> LeftSideVarianceParameter {
     50      get { return (ILookupParameter<DoubleValue>)Parameters["LeftSideVariance"]; }
     51    }
     52    public ILookupParameter<IntValue> LeftSideSamplesParameter {
     53      get { return (ILookupParameter<IntValue>)Parameters["LeftSideSamples"]; }
     54    }
    4155    public ILookupParameter<ItemArray<DoubleValue>> RightSideParameter {
    4256      get { return (ILookupParameter<ItemArray<DoubleValue>>)Parameters["RightSide"]; }
    4357    }
    44     public ILookupParameter<BoolValue> ResultParameter {
    45       get { return (ILookupParameter<BoolValue>)Parameters["Result"]; }
     58    public ILookupParameter<ItemArray<DoubleValue>> RightSideVariancesParameters {
     59      get { return (ILookupParameter<ItemArray<DoubleValue>>)Parameters["RightSideVariances"]; }
    4660    }
    47     public ValueLookupParameter<DoubleValue> ComparisonFactorParameter {
    48       get { return (ValueLookupParameter<DoubleValue>)Parameters["ComparisonFactor"]; }
     61    public ILookupParameter<ItemArray<IntValue>> RightSideSamplesParameters {
     62      get { return (ILookupParameter<ItemArray<IntValue>>)Parameters["RightSideSamples"]; }
    4963    }
    5064
    51     public WeightedParentsQualityComparator()
     65    public WeightedParentsQualityVarianceComparator()
    5266      : base() {
    5367      Parameters.Add(new ValueLookupParameter<BoolValue>("Maximization", "True if the problem is a maximization problem, false otherwise"));
     68      Parameters.Add(new LookupParameter<BoolValue>("Result", "The result of the comparison: True means Quality is better, False means it is worse than parents."));
     69      Parameters.Add(new ValueLookupParameter<DoubleValue>("ConfidenceInterval", "The confidence interval used for the test.", new DoubleValue(0.05)));
     70
    5471      Parameters.Add(new LookupParameter<DoubleValue>("LeftSide", "The quality of the child."));
     72      Parameters.Add(new LookupParameter<DoubleValue>("LeftSideVariance", "The variances of the quality of the new child."));
     73      Parameters.Add(new LookupParameter<IntValue>("LeftSideSamples", "The number of samples used to calculate the quality of the new child."));
     74
    5575      Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>("RightSide", "The qualities of the parents."));
    56       Parameters.Add(new LookupParameter<BoolValue>("Result", "The result of the comparison: True means Quality is better, False means it is worse than parents."));
    57       Parameters.Add(new ValueLookupParameter<DoubleValue>("ComparisonFactor", "Determines if the quality should be compared to the better parent (1.0), to the worse (0.0) or to any linearly interpolated value between them."));
     76      Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>("RightSideVariances", "The variances of the parents."));
     77      Parameters.Add(new LookupParameter<IntValue>("RightSideSamples", "The number of samples used to calculate the quality of the parent."));
    5878    }
    5979
    6080    public override IOperation Apply() {
     81      double leftQuality = LeftSideParameter.ActualValue.Value;
     82      double leftVariance = LeftSideVarianceParameter.ActualValue.Value;
     83      int leftSamples = LeftSideSamplesParameter.ActualValue.Value;
     84
    6185      ItemArray<DoubleValue> rightQualities = RightSideParameter.ActualValue;
     86      ItemArray<DoubleValue> rightVariances = RightSideVariancesParameters.ActualValue;
     87      ItemArray<IntValue> rightSamples = RightSideSamplesParameters.ActualValue;
     88
    6289      if (rightQualities.Length < 1) throw new InvalidOperationException(Name + ": No subscopes found.");
    63       double compFact = ComparisonFactorParameter.ActualValue.Value;
    6490      bool maximization = MaximizationParameter.ActualValue.Value;
    65       double leftQuality = LeftSideParameter.ActualValue.Value;
    6691
    67       double threshold = 0;
     92      int bestParentIndex;
     93      double bestParentQuality;
     94      double bestParentVariance;
     95      int bestParentSamples;
    6896
    69       #region Calculate threshold
    70       if (rightQualities.Length == 2) { // this case will probably be used most often
    71         double minQuality = Math.Min(rightQualities[0].Value, rightQualities[1].Value);
    72         double maxQuality = Math.Max(rightQualities[0].Value, rightQualities[1].Value);
    73         if (maximization)
    74           threshold = minQuality + (maxQuality - minQuality) * compFact;
    75         else
    76           threshold = maxQuality - (maxQuality - minQuality) * compFact;
    77       } else if (rightQualities.Length == 1) { // case for just one parent
    78         threshold = rightQualities[0].Value;
    79       } else { // general case extended to 3 or more parents
    80         List<double> sortedQualities = rightQualities.Select(x => x.Value).ToList();
    81         sortedQualities.Sort();
    82         double minimumQuality = sortedQualities.First();
     97      if (maximization)
     98        bestParentQuality = rightQualities.Max(x => x.Value);
     99      else
     100        bestParentQuality = rightQualities.Min(x => x.Value);
     101      bestParentIndex = rightQualities.FindIndex(x => x.Value == bestParentQuality);
     102      bestParentVariance = rightVariances[bestParentIndex].Value;
     103      bestParentSamples = rightSamples[bestParentIndex].Value;
    83104
    84         double integral = 0;
    85         for (int i = 0; i < sortedQualities.Count - 1; i++) {
    86           integral += (sortedQualities[i] + sortedQualities[i + 1]) / 2.0; // sum of the trapezoid
    87         }
    88         integral -= minimumQuality * sortedQualities.Count;
    89         if (integral == 0) threshold = sortedQualities[0]; // all qualities are equal
    90         else {
    91           double selectedArea = integral * (maximization ? compFact : (1 - compFact));
    92           integral = 0;
    93           for (int i = 0; i < sortedQualities.Count - 1; i++) {
    94             double currentSliceArea = (sortedQualities[i] + sortedQualities[i + 1]) / 2.0;
    95             double windowedSliceArea = currentSliceArea - minimumQuality;
    96             if (windowedSliceArea == 0) continue;
    97             integral += windowedSliceArea;
    98             if (integral >= selectedArea) {
    99               double factor = 1 - ((integral - selectedArea) / (windowedSliceArea));
    100               threshold = sortedQualities[i] + (sortedQualities[i + 1] - sortedQualities[i]) * factor;
    101               break;
    102             }
    103           }
    104         }
    105       }
    106       #endregion
     105      double xmean = leftQuality;
     106      double xvar = leftVariance;
     107      int n = leftSamples;
     108      double ymean = bestParentQuality;
     109      double yvar = bestParentVariance;
     110      double m = bestParentSamples;
    107111
    108       bool result = maximization && leftQuality > threshold || !maximization && leftQuality < threshold;
     112
     113      //following code taken from ALGLIB studentttest line 351
     114      // Two-sample unpooled test
     115      double p = 0;
     116      double stat = (xmean - ymean) / Math.Sqrt(xvar / n + yvar / m);
     117      double c = xvar / n / (xvar / n + yvar / m);
     118      double df = (n - 1) * (m - 1) / ((m - 1) * AP.Math.Sqr(c) + (n - 1) * (1 - AP.Math.Sqr(c)));
     119      if ((double)(stat) > (double)(0))
     120        p = 1 - 0.5 * ibetaf.incompletebeta(df / 2, 0.5, df / (df + AP.Math.Sqr(stat)));
     121      else
     122        p = 0.5 * ibetaf.incompletebeta(df / 2, 0.5, df / (df + AP.Math.Sqr(stat)));
     123      double bothtails = 2 * Math.Min(p, 1 - p);
     124      double lefttail = p;
     125      double righttail = 1 - p;
     126
     127      bool result = false;
     128      if (maximization)
     129        result = righttail < ConfidenceIntervalParameter.ActualValue.Value;
     130      else
     131        result = lefttail < ConfidenceIntervalParameter.ActualValue.Value;
     132
    109133      BoolValue resultValue = ResultParameter.ActualValue;
    110134      if (resultValue == null) {
     
    114138      }
    115139
     140
     141
    116142      return base.Apply();
    117143    }
Note: See TracChangeset for help on using the changeset viewer.