Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
07/19/10 14:36:11 (14 years ago)
Author:
mkommend
Message:

added statistical comperator operator for SymReg OSGP (ticket #1082)

Location:
trunk/sources/HeuristicLab.Problems.DataAnalysis/3.3/Operators
Files:
1 added
1 copied

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.3/Operators/WeightedParentsQualityVarianceComparator.cs

    r4038 r4044  
    2323using System.Collections.Generic;
    2424using System.Linq;
     25using alglib;
    2526using HeuristicLab.Core;
    2627using HeuristicLab.Data;
    2728using HeuristicLab.Operators;
     29using HeuristicLab.Optimization;
    2830using HeuristicLab.Parameters;
    2931using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    3032
    31 namespace HeuristicLab.Optimization.Operators {
    32   [Item("WeightedParentsQualityComparator", "Compares the quality against that of its parents (assumes the parents are subscopes to the child scope). This operator works with any number of subscopes > 0.")]
     33namespace HeuristicLab.Problems.DataAnalysis.Operators {
     34  [Item("WeightedParentsQualityVarianceComparator", "Compares the quality and variance of the quality against that of its parents (assumes the parents are subscopes to the child scope). This operator works with any number of subscopes > 0.")]
    3335  [StorableClass]
    34   public class WeightedParentsQualityComparator : SingleSuccessorOperator, ISubScopesQualityComparator {
     36  public class WeightedParentsQualityVarianceComparator : SingleSuccessorOperator, ISubScopesQualityComparator {
    3537    public IValueLookupParameter<BoolValue> MaximizationParameter {
    3638      get { return (IValueLookupParameter<BoolValue>)Parameters["Maximization"]; }
     39    }
     40    public ILookupParameter<BoolValue> ResultParameter {
     41      get { return (ILookupParameter<BoolValue>)Parameters["Result"]; }
     42    }
     43    public IValueLookupParameter<DoubleValue> ConfidenceIntervalParameter {
     44      get { return (IValueLookupParameter<DoubleValue>)Parameters["ConfidenceInterval"]; }
    3745    }
    3846    public ILookupParameter<DoubleValue> LeftSideParameter {
    3947      get { return (ILookupParameter<DoubleValue>)Parameters["LeftSide"]; }
    4048    }
     49    public ILookupParameter<DoubleValue> LeftSideVarianceParameter {
     50      get { return (ILookupParameter<DoubleValue>)Parameters["LeftSideVariance"]; }
     51    }
     52    public ILookupParameter<IntValue> LeftSideSamplesParameter {
     53      get { return (ILookupParameter<IntValue>)Parameters["LeftSideSamples"]; }
     54    }
    4155    public ILookupParameter<ItemArray<DoubleValue>> RightSideParameter {
    4256      get { return (ILookupParameter<ItemArray<DoubleValue>>)Parameters["RightSide"]; }
    4357    }
    44     public ILookupParameter<BoolValue> ResultParameter {
    45       get { return (ILookupParameter<BoolValue>)Parameters["Result"]; }
     58    public ILookupParameter<ItemArray<DoubleValue>> RightSideVariancesParameters {
     59      get { return (ILookupParameter<ItemArray<DoubleValue>>)Parameters["RightSideVariances"]; }
    4660    }
    47     public ValueLookupParameter<DoubleValue> ComparisonFactorParameter {
    48       get { return (ValueLookupParameter<DoubleValue>)Parameters["ComparisonFactor"]; }
     61    public ILookupParameter<ItemArray<IntValue>> RightSideSamplesParameters {
     62      get { return (ILookupParameter<ItemArray<IntValue>>)Parameters["RightSideSamples"]; }
    4963    }
    5064
    51     public WeightedParentsQualityComparator()
     65    public WeightedParentsQualityVarianceComparator()
    5266      : base() {
    5367      Parameters.Add(new ValueLookupParameter<BoolValue>("Maximization", "True if the problem is a maximization problem, false otherwise"));
     68      Parameters.Add(new LookupParameter<BoolValue>("Result", "The result of the comparison: True means Quality is better, False means it is worse than parents."));
     69      Parameters.Add(new ValueLookupParameter<DoubleValue>("ConfidenceInterval", "The confidence interval used for the test.", new DoubleValue(0.05)));
     70
    5471      Parameters.Add(new LookupParameter<DoubleValue>("LeftSide", "The quality of the child."));
     72      Parameters.Add(new LookupParameter<DoubleValue>("LeftSideVariance", "The variances of the quality of the new child."));
     73      Parameters.Add(new LookupParameter<IntValue>("LeftSideSamples", "The number of samples used to calculate the quality of the new child."));
     74
    5575      Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>("RightSide", "The qualities of the parents."));
    56       Parameters.Add(new LookupParameter<BoolValue>("Result", "The result of the comparison: True means Quality is better, False means it is worse than parents."));
    57       Parameters.Add(new ValueLookupParameter<DoubleValue>("ComparisonFactor", "Determines if the quality should be compared to the better parent (1.0), to the worse (0.0) or to any linearly interpolated value between them."));
     76      Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>("RightSideVariances", "The variances of the parents."));
     77      Parameters.Add(new LookupParameter<IntValue>("RightSideSamples", "The number of samples used to calculate the quality of the parent."));
    5878    }
    5979
    6080    public override IOperation Apply() {
     81      double leftQuality = LeftSideParameter.ActualValue.Value;
     82      double leftVariance = LeftSideVarianceParameter.ActualValue.Value;
     83      int leftSamples = LeftSideSamplesParameter.ActualValue.Value;
     84
    6185      ItemArray<DoubleValue> rightQualities = RightSideParameter.ActualValue;
     86      ItemArray<DoubleValue> rightVariances = RightSideVariancesParameters.ActualValue;
     87      ItemArray<IntValue> rightSamples = RightSideSamplesParameters.ActualValue;
     88
    6289      if (rightQualities.Length < 1) throw new InvalidOperationException(Name + ": No subscopes found.");
    63       double compFact = ComparisonFactorParameter.ActualValue.Value;
    6490      bool maximization = MaximizationParameter.ActualValue.Value;
    65       double leftQuality = LeftSideParameter.ActualValue.Value;
    6691
    67       double threshold = 0;
     92      int bestParentIndex;
     93      double bestParentQuality;
     94      double bestParentVariance;
     95      int bestParentSamples;
    6896
    69       #region Calculate threshold
    70       if (rightQualities.Length == 2) { // this case will probably be used most often
    71         double minQuality = Math.Min(rightQualities[0].Value, rightQualities[1].Value);
    72         double maxQuality = Math.Max(rightQualities[0].Value, rightQualities[1].Value);
    73         if (maximization)
    74           threshold = minQuality + (maxQuality - minQuality) * compFact;
    75         else
    76           threshold = maxQuality - (maxQuality - minQuality) * compFact;
    77       } else if (rightQualities.Length == 1) { // case for just one parent
    78         threshold = rightQualities[0].Value;
    79       } else { // general case extended to 3 or more parents
    80         List<double> sortedQualities = rightQualities.Select(x => x.Value).ToList();
    81         sortedQualities.Sort();
    82         double minimumQuality = sortedQualities.First();
     97      if (maximization)
     98        bestParentQuality = rightQualities.Max(x => x.Value);
     99      else
     100        bestParentQuality = rightQualities.Min(x => x.Value);
     101      bestParentIndex = rightQualities.FindIndex(x => x.Value == bestParentQuality);
     102      bestParentVariance = rightVariances[bestParentIndex].Value;
     103      bestParentSamples = rightSamples[bestParentIndex].Value;
    83104
    84         double integral = 0;
    85         for (int i = 0; i < sortedQualities.Count - 1; i++) {
    86           integral += (sortedQualities[i] + sortedQualities[i + 1]) / 2.0; // sum of the trapezoid
    87         }
    88         integral -= minimumQuality * sortedQualities.Count;
    89         if (integral == 0) threshold = sortedQualities[0]; // all qualities are equal
    90         else {
    91           double selectedArea = integral * (maximization ? compFact : (1 - compFact));
    92           integral = 0;
    93           for (int i = 0; i < sortedQualities.Count - 1; i++) {
    94             double currentSliceArea = (sortedQualities[i] + sortedQualities[i + 1]) / 2.0;
    95             double windowedSliceArea = currentSliceArea - minimumQuality;
    96             if (windowedSliceArea == 0) continue;
    97             integral += windowedSliceArea;
    98             if (integral >= selectedArea) {
    99               double factor = 1 - ((integral - selectedArea) / (windowedSliceArea));
    100               threshold = sortedQualities[i] + (sortedQualities[i + 1] - sortedQualities[i]) * factor;
    101               break;
    102             }
    103           }
    104         }
    105       }
    106       #endregion
     105      double xmean = leftQuality;
     106      double xvar = leftVariance;
     107      int n = leftSamples;
     108      double ymean = bestParentQuality;
     109      double yvar = bestParentVariance;
     110      double m = bestParentSamples;
    107111
    108       bool result = maximization && leftQuality > threshold || !maximization && leftQuality < threshold;
     112
     113      //following code taken from ALGLIB studentttest line 351
     114      // Two-sample unpooled test
     115      double p = 0;
     116      double stat = (xmean - ymean) / Math.Sqrt(xvar / n + yvar / m);
     117      double c = xvar / n / (xvar / n + yvar / m);
     118      double df = (n - 1) * (m - 1) / ((m - 1) * AP.Math.Sqr(c) + (n - 1) * (1 - AP.Math.Sqr(c)));
     119      if ((double)(stat) > (double)(0))
     120        p = 1 - 0.5 * ibetaf.incompletebeta(df / 2, 0.5, df / (df + AP.Math.Sqr(stat)));
     121      else
     122        p = 0.5 * ibetaf.incompletebeta(df / 2, 0.5, df / (df + AP.Math.Sqr(stat)));
     123      double bothtails = 2 * Math.Min(p, 1 - p);
     124      double lefttail = p;
     125      double righttail = 1 - p;
     126
     127      bool result = false;
     128      if (maximization)
     129        result = righttail < ConfidenceIntervalParameter.ActualValue.Value;
     130      else
     131        result = lefttail < ConfidenceIntervalParameter.ActualValue.Value;
     132
    109133      BoolValue resultValue = ResultParameter.ActualValue;
    110134      if (resultValue == null) {
     
    114138      }
    115139
     140
     141
    116142      return base.Apply();
    117143    }
Note: See TracChangeset for help on using the changeset viewer.