Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
01/09/15 14:57:28 (10 years ago)
Author:
gkronber
Message:

#2283 refactoring

Location:
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits
Files:
3 added
1 deleted
21 edited
3 moved

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/BernoulliPolicyActionInfo.cs

    r11732 r11742  
    77using HeuristicLab.Common;
    88
    9 namespace HeuristicLab.Algorithms.Bandits {
    10   public class BernoulliPolicyActionInfo : IPolicyActionInfo {
     9namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
     10  public class BernoulliPolicyActionInfo : IBanditPolicyActionInfo {
    1111    public bool Disabled { get { return NumSuccess == -1; } }
    1212    public int NumSuccess { get; private set; }
    1313    public int NumFailure { get; private set; }
     14    public int Tries { get { return NumSuccess + NumFailure; } }
     15    public double Value { get { return NumSuccess / (double)(Tries); } }
    1416    public void UpdateReward(double reward) {
    1517      Debug.Assert(!Disabled);
     
    2931    }
    3032    public void PrintStats() {
    31       Console.WriteLine("expected value {0,5:F2} disabled {1}", NumSuccess / (double)NumFailure, Disabled);
     33      Console.WriteLine("expected value {0,5:F2} disabled {1}", Value, Disabled);
    3234    }
    3335  }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/BernoulliThompsonSamplingPolicy.cs

    r11732 r11742  
    77using HeuristicLab.Common;
    88
    9 namespace HeuristicLab.Algorithms.Bandits {
    10   public class BernoulliThompsonSamplingPolicy : IPolicy {
     9namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
     10  public class BernoulliThompsonSamplingPolicy : IBanditPolicy {
    1111    // parameters of beta prior distribution
    1212    private readonly double alpha = 1.0;
    1313    private readonly double beta = 1.0;
    1414
    15     public int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos) {
    16       var myActionInfos = actionInfos.OfType<BernoulliPolicyActionInfo>(); // TODO: performance
     15    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
     16      var myActionInfos = actionInfos.OfType<BernoulliPolicyActionInfo>();
    1717      int bestAction = -1;
    1818      double maxTheta = double.NegativeInfinity;
     
    3232    }
    3333
    34     public IPolicyActionInfo CreateActionInfo() {
     34    public IBanditPolicyActionInfo CreateActionInfo() {
    3535      return new BernoulliPolicyActionInfo();
    3636    }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/BoltzmannExplorationPolicy.cs

    r11732 r11742  
    77using HeuristicLab.Common;
    88
    9 namespace HeuristicLab.Algorithms.Bandits {
     9namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
    1010  // also called softmax policy
    11   public class BoltzmannExplorationPolicy : IPolicy {
     11  public class BoltzmannExplorationPolicy : IBanditPolicy {
    1212    private readonly double beta;
     13    private readonly Func<DefaultPolicyActionInfo, double> valueFunction;
    1314
    14     public BoltzmannExplorationPolicy(double beta) {
     15    public BoltzmannExplorationPolicy(double eps) : this(eps, DefaultPolicyActionInfo.AverageReward) { }
     16
     17    public BoltzmannExplorationPolicy(double beta, Func<DefaultPolicyActionInfo, double> valueFunction) {
    1518      if (beta < 0) throw new ArgumentException();
    1619      this.beta = beta;
     20      this.valueFunction = valueFunction;
    1721    }
    18     public int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos) {
     22    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
    1923      Debug.Assert(actionInfos.Any());
    2024
    2125      // select best
    22       var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>().ToArray(); // TODO: performance
     26      var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>();
    2327      Debug.Assert(myActionInfos.Any(a => !a.Disabled));
    24       double[] w = new double[myActionInfos.Length];
    2528
    26       for (int a = 0; a < myActionInfos.Length; a++) {
    27         if (myActionInfos[a].Disabled) {
    28           w[a] = 0; continue;
    29         }
    30         if (myActionInfos[a].Tries == 0) return a;
    31         var sumReward = myActionInfos[a].SumReward;
    32         var tries = myActionInfos[a].Tries;
    33         var avgReward = sumReward / tries;
    34         w[a] = Math.Exp(beta * avgReward);
    35       }
     29      var w = from aInfo in myActionInfos
     30              select aInfo.Disabled
     31                ? 0.0
     32                : Math.Exp(beta * valueFunction(aInfo));
    3633
    37 
    38       var bestAction = Enumerable.Range(0, w.Length).SampleProportional(random, w).First();
     34      var bestAction = myActionInfos
     35        .Select((aInfo, idx) => new { aInfo, idx })
     36        .SampleProportional(random, w)
     37        .Select(p => p.idx)
     38        .First();
    3939      Debug.Assert(bestAction >= 0);
    40       Debug.Assert(bestAction < w.Length);
    41       Debug.Assert(!myActionInfos[bestAction].Disabled);
    4240      return bestAction;
    4341    }
    4442
    45     public IPolicyActionInfo CreateActionInfo() {
     43    public IBanditPolicyActionInfo CreateActionInfo() {
    4644      return new DefaultPolicyActionInfo();
    4745    }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/ChernoffIntervalEstimationPolicy.cs

    r11732 r11742  
    66using System.Threading.Tasks;
    77
    8 namespace HeuristicLab.Algorithms.Bandits {
     8namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
    99  /* see: Streeter and Smith: A simple distribution-free approach to the max k-armed bandit problem, Proceedings  of the 12th
    1010International Conference, CP 2006, Nantes, France, September 25-29, 2006. pp 560-574 */
    1111
    12   public class ChernoffIntervalEstimationPolicy : IPolicy {
     12  public class ChernoffIntervalEstimationPolicy : IBanditPolicy {
    1313    private readonly double delta;
    1414
     
    1616      this.delta = delta;
    1717    }
    18     public int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos) {
     18    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
    1919      Debug.Assert(actionInfos.Any());
    2020      // select best
    21       var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>().ToArray(); // TODO: performance
    22       int k = myActionInfos.Length;
     21      var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>();
     22      int k = myActionInfos.Count(a => !a.Disabled);
    2323      int totalTries = myActionInfos.Where(a => !a.Disabled).Sum(a => a.Tries);
    2424      int bestAction = -1;
    2525      double bestQ = double.NegativeInfinity;
    26       for (int a = 0; a < myActionInfos.Length; a++) {
    27         if (myActionInfos[a].Disabled) continue;
    28         if (myActionInfos[a].Tries == 0) return a;
     26      var aIdx = -1;
     27      foreach (var aInfo in myActionInfos) {
     28        aIdx++;
     29        if (aInfo.Disabled) continue;
     30        if (aInfo.Tries == 0) return aIdx;
    2931
    30         var sumReward = myActionInfos[a].SumReward;
    31         var tries = myActionInfos[a].Tries;
    32 
    33         var avgReward = sumReward / tries;
     32        var avgReward = aInfo.SumReward / aInfo.Tries;
    3433
    3534        // page 5 of "A simple distribution-free appraoch to the max k-armed bandit problem"
    3635        // var alpha = Math.Log(2 * totalTries * k / delta);
    37         double alpha = Math.Log(2) + Math.Log(totalTries) + Math.Log(k) - Math.Log(delta); // total tries is max tries in the original paper
    38         var q = avgReward + (alpha + Math.Sqrt(2 * tries * avgReward * alpha + alpha * alpha)) / tries;
     36        double alpha = Math.Log(2.0) + Math.Log(totalTries) + Math.Log(k) - Math.Log(delta); // total tries is max tries in the original paper
     37        var q = avgReward + (alpha + Math.Sqrt(2 * aInfo.Tries * avgReward * alpha + alpha * alpha)) / aInfo.Tries;
    3938        if (q > bestQ) {
    4039          bestQ = q;
    41           bestAction = a;
     40          bestAction = aIdx;
    4241        }
    4342      }
     
    4645    }
    4746
    48     public IPolicyActionInfo CreateActionInfo() {
     47    public IBanditPolicyActionInfo CreateActionInfo() {
    4948      return new DefaultPolicyActionInfo();
    5049    }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/DefaultPolicyActionInfo.cs

    r11732 r11742  
    66using System.Threading.Tasks;
    77
    8 namespace HeuristicLab.Algorithms.Bandits {
     8namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
    99  // stores information that is relevant for most of the policies
    10   public class DefaultPolicyActionInfo : IPolicyActionInfo {
     10  public class DefaultPolicyActionInfo : IBanditPolicyActionInfo {
    1111    public bool Disabled { get { return Tries == -1; } }
    1212    public double SumReward { get; private set; }
     13    public int Tries { get; private set; }
    1314    public double MaxReward { get; private set; }
    14     public int Tries { get; private set; }
    15 
     15    public double Value { get { return SumReward / Tries; } }
    1616    public DefaultPolicyActionInfo() {
    17       MaxReward = double.NegativeInfinity;
     17      MaxReward = double.MinValue;
    1818    }
    1919
     
    3737      Console.WriteLine("avg reward {0,5:F2} disabled {1}", SumReward / Tries, Disabled);
    3838    }
     39
     40    public static Func<DefaultPolicyActionInfo, double> AverageReward {
     41      get {
     42        return (aInfo) =>
     43          aInfo.Tries == 0 ?
     44          double.PositiveInfinity :
     45          aInfo.SumReward / (double)aInfo.Tries;
     46      }
     47    }
    3948  }
    4049}
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/EmptyPolicyActionInfo.cs

    r11732 r11742  
    66using System.Threading.Tasks;
    77
    8 namespace HeuristicLab.Algorithms.Bandits {
    9   public class EmptyPolicyActionInfo : IPolicyActionInfo {
     8namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
     9  public class EmptyPolicyActionInfo : IBanditPolicyActionInfo {
     10    public double Value { get { return 0.0; } }
     11
    1012    public bool Disabled { get; private set; }
    1113    public void UpdateReward(double reward) {
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/EpsGreedyPolicy.cs

    r11732 r11742  
    55using System.Text;
    66using System.Threading.Tasks;
     7using HeuristicLab.Common;
    78
    8 namespace HeuristicLab.Algorithms.Bandits {
    9   public class EpsGreedyPolicy : IPolicy {
     9namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
     10  public class EpsGreedyPolicy : IBanditPolicy {
    1011    private readonly double eps;
    1112    private readonly RandomPolicy randomPolicy;
     13    private readonly Func<DefaultPolicyActionInfo, double> valueFunction;
     14    private readonly string desc;
    1215
    13     public EpsGreedyPolicy(double eps) {
     16
     17    public EpsGreedyPolicy(double eps) : this(eps, DefaultPolicyActionInfo.AverageReward, string.Empty) { }
     18
     19    public EpsGreedyPolicy(double eps, Func<DefaultPolicyActionInfo, double> valueFunction, string desc) {
    1420      this.eps = eps;
    1521      this.randomPolicy = new RandomPolicy();
     22      this.valueFunction = valueFunction;
     23      this.desc = desc;
    1624    }
    17     public int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos) {
     25
     26    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
    1827      Debug.Assert(actionInfos.Any());
    1928      if (random.NextDouble() > eps) {
    2029        // select best
    2130        var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>();
    22         int bestAction = -1;
     31        var bestActions = new List<int>();
    2332        double bestQ = double.NegativeInfinity;
     33
    2434        int aIdx = -1;
    2535        foreach (var aInfo in myActionInfos) {
    26 
    2736          aIdx++;
    2837          if (aInfo.Disabled) continue;
    29           if (aInfo.Tries == 0) return aIdx;
    3038
     39          var q = valueFunction(aInfo);
    3140
    32           var avgReward = aInfo.SumReward / aInfo.Tries;         
    33           //var q = avgReward;
    34           var q = aInfo.MaxReward;
    3541          if (q > bestQ) {
     42            bestActions.Clear();
     43            bestActions.Add(aIdx);
    3644            bestQ = q;
    37             bestAction = aIdx;
     45          } else if (q.IsAlmost(bestQ)) {
     46            bestActions.Add(aIdx);
    3847          }
    3948        }
    40         Debug.Assert(bestAction >= 0);
    41         return bestAction;
     49        Debug.Assert(bestActions.Any());
     50        return bestActions.SelectRandom(random);
    4251      } else {
    4352        // select random
     
    4655    }
    4756
    48     public IPolicyActionInfo CreateActionInfo() {
     57    public IBanditPolicyActionInfo CreateActionInfo() {
    4958      return new DefaultPolicyActionInfo();
    5059    }
     
    5261
    5362    public override string ToString() {
    54       return string.Format("EpsGreedyPolicy({0:F2})", eps);
     63      return string.Format("EpsGreedyPolicy({0:F2},{1})", eps, desc);
    5564    }
    5665  }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/Exp3Policy.cs

    r11730 r11742  
    66using System.Threading.Tasks;
    77
    8 namespace HeuristicLab.Algorithms.Bandits {
     8namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
    99  public class Exp3Policy : BanditPolicy {
    1010    private readonly Random random;
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/GaussianThompsonSamplingPolicy.cs

    r11732 r11742  
    55using HeuristicLab.Common;
    66
    7 namespace HeuristicLab.Algorithms.Bandits {
     7namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
    88
    9   public class GaussianThompsonSamplingPolicy : IPolicy {
     9  [Obsolete("Replaced by GenericThompsonSamplingPolicy(GaussianModel(0.5, 1.0, 0.1))")]
     10  public class GaussianThompsonSamplingPolicy : IBanditPolicy {
    1011    private bool compatibility;
    1112
     
    2223    }
    2324
    24     public int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos) {
     25    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
    2526      var myActionInfos = actionInfos.OfType<MeanAndVariancePolicyActionInfo>();
    2627      int bestAction = -1;
     
    3839        double theta;
    3940        if (compatibility) {
     41          // old code used for old experiments (preserved because it performed very well)
    4042          if (tries < 2) return aIdx;
    4143          var mu = sampleMean;
     
    6567    }
    6668
    67     public IPolicyActionInfo CreateActionInfo() {
     69    public IBanditPolicyActionInfo CreateActionInfo() {
    6870      return new MeanAndVariancePolicyActionInfo();
    6971    }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/GenericThompsonSamplingPolicy.cs

    r11732 r11742  
    77using HeuristicLab.Common;
    88
    9 namespace HeuristicLab.Algorithms.Bandits {
    10   public class GenericThompsonSamplingPolicy : IPolicy {
     9namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
     10  public class GenericThompsonSamplingPolicy : IBanditPolicy {
    1111    private readonly IModel model;
    1212
     
    1515    }
    1616
    17     public int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos) {
     17    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
    1818      var myActionInfos = actionInfos.OfType<ModelPolicyActionInfo>();
    1919      int bestAction = -1;
     
    3434    }
    3535
    36     public IPolicyActionInfo CreateActionInfo() {
     36    public IBanditPolicyActionInfo CreateActionInfo() {
    3737      return new ModelPolicyActionInfo((IModel)model.Clone());
    3838    }
    3939
    4040    public override string ToString() {
    41       return string.Format("GenericThompsonSamplingPolicy({0})", model);
     41      return string.Format("GenericThompsonSamplingPolicy(\"{0}\")", model);
    4242    }
    4343  }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/MeanAndVariancePolicyActionInfo.cs

    r11732 r11742  
    66using System.Threading.Tasks;
    77
    8 namespace HeuristicLab.Algorithms.Bandits {
    9   public class MeanAndVariancePolicyActionInfo : IPolicyActionInfo {
     8namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
     9  public class MeanAndVariancePolicyActionInfo : IBanditPolicyActionInfo {
    1010    private bool disabled;
    1111    public bool Disabled { get { return disabled; } }
     
    1515    public double AvgReward { get { return estimator.Avg; } }
    1616    public double RewardVariance { get { return estimator.Variance; } }
     17    public double Value { get { return AvgReward; } }
    1718
    1819    public void UpdateReward(double reward) {
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/ModelPolicyActionInfo.cs

    r11732 r11742  
    66using System.Threading.Tasks;
    77
    8 namespace HeuristicLab.Algorithms.Bandits {
     8namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
    99  // uses a statistical model to sample and update posterior distribution p(Reward | Data)
    10   public class ModelPolicyActionInfo : IPolicyActionInfo {
     10  public class ModelPolicyActionInfo : IBanditPolicyActionInfo {
    1111    private readonly IModel model;
    1212    public bool Disabled { get { return Tries == -1; } }
     13    public double Value { get { return model.SampleExpectedReward(new Random()); } }
    1314
    1415    public int Tries { get; private set; }
     
    3839      model.PrintStats();
    3940    }
     41
     42    public override string ToString() {
     43      return string.Format("disabled {0} model {1}", Disabled, model);
     44    }
    4045  }
    4146}
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/RandomPolicy.cs

    r11732 r11742  
    77using HeuristicLab.Common;
    88
    9 namespace HeuristicLab.Algorithms.Bandits {
    10   public class RandomPolicy : IPolicy {
     9namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
     10  public class RandomPolicy : IBanditPolicy {
    1111
    1212    public override string ToString() {
     
    1414    }
    1515
    16     public int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos) {
     16    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
    1717      return actionInfos
    18         .Select((a, i) => Tuple.Create(a, i))
     18        .Select((aInfo, idx) => Tuple.Create(aInfo, idx))
    1919        .Where(p => !p.Item1.Disabled)
    2020        .SelectRandom(random).Item2;
    2121    }
    2222
    23     public IPolicyActionInfo CreateActionInfo() {
    24       return new EmptyPolicyActionInfo();
     23    public IBanditPolicyActionInfo CreateActionInfo() {
     24      return new DefaultPolicyActionInfo();
    2525    }
    2626  }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/ThresholdAscentPolicy.cs

    r11730 r11742  
    55using System.Text;
    66using System.Threading.Tasks;
     7using HeuristicLab.Common;
    78
    8 namespace HeuristicLab.Algorithms.Bandits {
     9namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
    910  /* see: Streeter and Smith: A simple distribution-free approach to the max k-armed bandit problem, Proceedings  of the 12th
    1011 International Conference, CP 2006, Nantes, France, September 25-29, 2006. pp 560-574 */
    1112
    12   public class ThresholdAscentPolicy : BanditPolicy {
    13     const int numBins = 101;
    14     const double binSize = 1.0 / (numBins - 1);
     13  public class ThresholdAscentPolicy : IBanditPolicy {
     14    public const int numBins = 101;
     15    public const double binSize = 1.0 / (numBins - 1);
    1516
    16     // for each arm store the number of observed rewards for each bin of size delta
    17     // for delta = 0.01 we have 101 bins
    18     // the first bin is freq of rewards  >= 0 // all
    19     // the second bin is freq of rewards > 0
    20     // the third bin is freq of rewards > 0.01
    21     // the last bin is for rewards > 0.99
    22     //
    23     // (also see RewardBin function)
    24     private readonly int[,] armRewardHistogram; // for performance reasons we store cumulative counts (freq of rewards > lower threshold)
     17    private class ThresholdAscentActionInfo : IBanditPolicyActionInfo {
    2518
     19      // for each arm store the number of observed rewards for each bin of size delta
     20      // for delta = 0.01 we have 101 bins
     21      // the first bin is freq of rewards  >= 0 // all
     22      // the second bin is freq of rewards > 0
     23      // the third bin is freq of rewards > 0.01
     24      // the last bin is for rewards > 0.99
     25      //
     26      // (also see RewardBin function)
     27      public int[] rewardHistogram = new int[numBins];    // for performance reasons we store cumulative counts (freq of rewards > lower threshold)
     28      public int Tries { get; private set; }
     29      public int thresholdBin = 1;
     30      public double Value { get { return rewardHistogram[thresholdBin] / (double)Tries; } }
    2631
    27     private readonly int[] tries;
     32      public bool Disabled { get { return Tries == -1; } }
     33
     34      public void UpdateReward(double reward) {
     35        Tries++;
     36        for (var idx = thresholdBin; idx <= RewardBin(reward); idx++)
     37          rewardHistogram[idx]++;
     38      }
     39
     40      public void Disable() {
     41        Tries = -1;
     42      }
     43
     44      public void Reset() {
     45        Tries = 0;
     46        thresholdBin = 1;
     47        Array.Clear(rewardHistogram, 0, rewardHistogram.Length);
     48      }
     49
     50      public void PrintStats() {
     51        if (Tries >= 0) {
     52          Console.Write("{0,6}", Tries);
     53        } else {
     54          Console.Write("{0,6}", "");
     55        }
     56      }
     57
     58      // maps a reward value to it's bin
     59      private static int RewardBin(double reward) {
     60        Debug.Assert(reward >= 0 && reward <= 1.0);
     61        // reward = 0 => 0
     62        // ]0.00 .. 0.01] => 1
     63        // ]0.01 .. 0.02] => 2
     64        // ...
     65        // ]0.99 .. 1.00] => 100
     66        if (reward <= 0) return 0;
     67        return (int)Math.Ceiling((reward / binSize));
     68      }
     69    }
     70
    2871    private readonly int s;
    2972    private readonly double delta;
    3073
    31     private int totalTries = 0;
    32     private int thresholdBin; // bin index of current threshold
    33     private const double maxTries = 1E6;
    34 
    35     public ThresholdAscentPolicy(int numActions, int s = 100, double delta = 0.05)
    36       : base(numActions) {
    37       this.thresholdBin = 1; // first bin to check is bin idx 1 == freq of rewards > 0
     74    public ThresholdAscentPolicy(int s = 100, double delta = 0.05) {
    3875      this.s = s;
    3976      this.delta = delta;
    40       this.armRewardHistogram = new int[numActions, numBins];
    41       this.tries = new int[numActions];
    4277    }
    4378
    44     // maps a reward value to it's bin
    45     private static int RewardBin(double reward) {
    46       Debug.Assert(reward >= 0 && reward <= 1.0);
    47       // reward = 0 => 0
    48       // ]0.00 .. 0.01] => 1
    49       // ]0.01 .. 0.02] => 2
    50       // ...
    51       // ]0.99 .. 1.00] => 100
    52       if (reward <= 0) return 0;
    53       return (int)Math.Ceiling((reward / binSize));
    54     }
    55 
    56 
    57     private double U(double mu, int n, int k) {
     79    private double U(double mu, int totalTries, int n, int k) {
    5880      //var alpha = Math.Log(2.0 * totalTries * k / delta);
    59       double alpha = Math.Log(2) + Math.Log(maxTries) + Math.Log(k) - Math.Log(delta); // totalTries is max iterations in original paper
     81      double alpha = Math.Log(2) + Math.Log(totalTries) + Math.Log(k) - Math.Log(delta);
    6082      return mu + (alpha + Math.Sqrt(2 * n * mu * alpha + alpha * alpha)) / n;
    6183    }
    6284
    6385
    64     public override int SelectAction() {
    65       Debug.Assert(Actions.Any());
    66       UpdateThreshold();
     86    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
     87      Debug.Assert(actionInfos.Any());
     88      var myActionInfos = actionInfos.OfType<ThresholdAscentActionInfo>();
     89      UpdateThreshold(myActionInfos);
     90
    6791      int bestAction = -1;
    6892      double bestQ = double.NegativeInfinity;
    69       int k = Actions.Count();
    70       foreach (var a in Actions) {
    71         if (tries[a] == 0) return a;
    72         double mu = armRewardHistogram[a, thresholdBin] / (double)tries[a]; // probability of rewards > T
    73         double q = U(mu, tries[a], k);
     93      int k = myActionInfos.Count(a => !a.Disabled);
     94      var totalTries = myActionInfos.Where(a => !a.Disabled).Sum(a => a.Tries);
     95      int aIdx = -1;
     96      foreach (var aInfo in myActionInfos) {
     97        aIdx++;
     98        if (aInfo.Disabled) continue;
     99        if (aInfo.Tries == 0) return aIdx;
     100        double mu = aInfo.Value; // probability of rewards > T
     101        double q = U(mu, totalTries, aInfo.Tries, k);          // totalTries is max iterations in original paper
    74102        if (q > bestQ) {
    75103          bestQ = q;
    76           bestAction = a;
     104          bestAction = aIdx;
    77105        }
    78106      }
    79       Debug.Assert(Actions.Contains(bestAction));
     107      Debug.Assert(bestAction > -1);
    80108      return bestAction;
    81109    }
    82110
    83     private void UpdateThreshold() {
    84       while (thresholdBin < (numBins - 1) && Actions.Sum(a => armRewardHistogram[a, thresholdBin]) >= s) {
     111
     112    private void UpdateThreshold(IEnumerable<ThresholdAscentActionInfo> actionInfos) {
     113      var thresholdBin = 1; // first bin to check is bin idx 1 == freq of rewards > 0
     114      while (thresholdBin < (numBins - 1) && actionInfos.Sum(a => a.rewardHistogram[thresholdBin]) >= s) {
    85115        thresholdBin++;
    86116        // Console.WriteLine("New threshold {0:F2}", T);
    87117      }
     118      foreach (var aInfo in actionInfos) {
     119        aInfo.thresholdBin = thresholdBin;
     120      }
    88121    }
    89122
    90     public override void UpdateReward(int action, double reward) {
    91       Debug.Assert(Actions.Contains(action));
    92       totalTries++;
    93       tries[action]++;
    94       // efficiency: we can start at the current threshold bin because all bins below that are not accessed in select-action
    95       for (var idx = thresholdBin; idx <= RewardBin(reward); idx++)
    96         armRewardHistogram[action, idx]++;
     123
     124    public IBanditPolicyActionInfo CreateActionInfo() {
     125      return new ThresholdAscentActionInfo();
    97126    }
    98127
    99     public override void DisableAction(int action) {
    100       base.DisableAction(action);
    101       totalTries -= tries[action];
    102       tries[action] = -1;
    103     }
    104 
    105     public override void Reset() {
    106       base.Reset();
    107       totalTries = 0;
    108       thresholdBin = 1;
    109       Array.Clear(tries, 0, tries.Length);
    110       Array.Clear(armRewardHistogram, 0, armRewardHistogram.Length);
    111     }
    112 
    113     public override void PrintStats() {
    114       for (int i = 0; i < tries.Length; i++) {
    115         if (tries[i] >= 0) {
    116           Console.Write("{0,6}", tries[i]);
    117         } else {
    118           Console.Write("{0,6}", "");
    119         }
    120       }
    121       Console.WriteLine();
    122     }
    123128    public override string ToString() {
    124129      return string.Format("ThresholdAscentPolicy({0},{1:F2})", s, delta);
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/UCB1Policy.cs

    r11732 r11742  
    66using System.Threading.Tasks;
    77
    8 namespace HeuristicLab.Algorithms.Bandits {
    9   public class UCB1Policy : IPolicy {
    10     public int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos) {
     8namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
     9  // policy for k-armed bandit (see Auer et al. 2002)
     10  public class UCB1Policy : IBanditPolicy {
     11    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
    1112      var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>().ToArray(); // TODO: performance
    1213      int bestAction = -1;
     
    2728    }
    2829
    29     public IPolicyActionInfo CreateActionInfo() {
     30    public IBanditPolicyActionInfo CreateActionInfo() {
    3031      return new DefaultPolicyActionInfo();
    3132    }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/UCB1TunedPolicy.cs

    r11732 r11742  
    66using System.Threading.Tasks;
    77
    8 namespace HeuristicLab.Algorithms.Bandits {
    9   public class UCB1TunedPolicy : IPolicy {
     8namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
     9  // policy for k-armed bandit (see Auer et al. 2002)
     10  public class UCB1TunedPolicy : IBanditPolicy {
    1011
    11     public int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos) {
    12       var myActionInfos = actionInfos.OfType<MeanAndVariancePolicyActionInfo>().ToArray(); // TODO: performance
     12    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
     13      var myActionInfos = actionInfos.OfType<MeanAndVariancePolicyActionInfo>();
    1314      int bestAction = -1;
    1415      double bestQ = double.NegativeInfinity;
    1516      int totalTries = myActionInfos.Where(a => !a.Disabled).Sum(a => a.Tries);
    1617
    17       for (int a = 0; a < myActionInfos.Length; a++) {
    18         if (myActionInfos[a].Disabled) continue;
    19         if (myActionInfos[a].Tries == 0) return a;
     18      int aIdx = -1;
     19      foreach (var aInfo in myActionInfos) {
     20        aIdx++;
     21        if (aInfo.Disabled) continue;
     22        if (aInfo.Tries == 0) return aIdx;
    2023
    21         var sumReward = myActionInfos[a].SumReward;
    22         var tries = myActionInfos[a].Tries;
     24        var sumReward = aInfo.SumReward;
     25        var tries = aInfo.Tries;
    2326
    2427        var avgReward = sumReward / tries;
    25         var q = avgReward + Math.Sqrt((Math.Log(totalTries) / tries) * Math.Min(1.0 / 4, V(myActionInfos[a], totalTries))); // 1/4 is upper bound of bernoulli distributed variable
     28        var q = avgReward + Math.Sqrt((Math.Log(totalTries) / tries) * Math.Min(1.0 / 4, V(aInfo, totalTries))); // 1/4 is upper bound of bernoulli distributed variable
    2629        if (q > bestQ) {
    2730          bestQ = q;
    28           bestAction = a;
     31          bestAction = aIdx;
    2932        }
    3033      }
     
    3336    }
    3437
    35     public IPolicyActionInfo CreateActionInfo() {
     38    public IBanditPolicyActionInfo CreateActionInfo() {
    3639      return new MeanAndVariancePolicyActionInfo();
    3740    }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/UCBNormalPolicy.cs

    r11732 r11742  
    66using System.Threading.Tasks;
    77
    8 namespace HeuristicLab.Algorithms.Bandits {
    9   public class UCBNormalPolicy : IPolicy {
     8namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
     9  public class UCBNormalPolicy : IBanditPolicy {
    1010
    11     public int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos) {
    12       var myActionInfos = actionInfos.OfType<MeanAndVariancePolicyActionInfo>().ToArray(); // TODO: performance
     11    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
     12      var myActionInfos = actionInfos.OfType<MeanAndVariancePolicyActionInfo>();
    1313      int bestAction = -1;
    1414      double bestQ = double.NegativeInfinity;
    1515      int totalTries = myActionInfos.Where(a => !a.Disabled).Sum(a => a.Tries);
     16      int aIdx = -1;
     17      foreach (var aInfo in myActionInfos) {
     18        aIdx++;
     19        if (aInfo.Disabled) continue;
     20        if (totalTries <= 1 || aInfo.Tries <= 1 || aInfo.Tries <= Math.Ceiling(8 * Math.Log(totalTries))) return aIdx;
    1621
    17       for (int a = 0; a < myActionInfos.Length; a++) {
    18         if (myActionInfos[a].Disabled) continue;
    19         if (totalTries <= 1 || myActionInfos[a].Tries <= 1 || myActionInfos[a].Tries <= Math.Ceiling(8 * Math.Log(totalTries))) return a;
    20 
    21         var tries = myActionInfos[a].Tries;
    22         var avgReward = myActionInfos[a].AvgReward;
    23         var rewardVariance = myActionInfos[a].RewardVariance;
    24         var estVariance = 16 * rewardVariance * (Math.Log(totalTries - 1) / tries);
    25         if (estVariance < 0) estVariance = 0; // numerical problems
    26         var q = avgReward
    27           + Math.Sqrt(estVariance);
     22        var tries = aInfo.Tries;
     23        var avgReward = aInfo.AvgReward;
     24        var rewardVariance = aInfo.RewardVariance;
     25        var estVariance = 16.0 * rewardVariance * (Math.Log(totalTries - 1) / tries);
     26        var q = avgReward + Math.Sqrt(estVariance);
    2827        if (q > bestQ) {
    2928          bestQ = q;
    30           bestAction = a;
     29          bestAction = aIdx;
    3130        }
    3231      }
     
    3534    }
    3635
    37     public IPolicyActionInfo CreateActionInfo() {
     36    public IBanditPolicyActionInfo CreateActionInfo() {
    3837      return new MeanAndVariancePolicyActionInfo();
    3938    }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/UCTPolicy.cs

    r11732 r11742  
    55using System.Text;
    66using System.Threading.Tasks;
    7 
    8 namespace HeuristicLab.Algorithms.Bandits {
     7namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
    98  /* Kocsis et al. Bandit based Monte-Carlo Planning */
    10   public class UCTPolicy : IPolicy {
     9  public class UCTPolicy : IBanditPolicy {
    1110    private readonly double c;
    1211
     
    1615
    1716
    18     public int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos) {
    19       var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>().ToArray(); // TODO: performance
     17    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
     18      var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>();
    2019      int bestAction = -1;
    2120      double bestQ = double.NegativeInfinity;
    2221      int totalTries = myActionInfos.Where(a => !a.Disabled).Sum(a => a.Tries);
    2322
    24       for (int a = 0; a < myActionInfos.Length; a++) {
    25         if (myActionInfos[a].Disabled) continue;
    26         if (myActionInfos[a].Tries == 0) return a;
    27         var q = myActionInfos[a].SumReward / myActionInfos[a].Tries + 2 * c * Math.Sqrt(Math.Log(totalTries) / myActionInfos[a].Tries);
     23      int aIdx = -1;
     24      foreach (var aInfo in myActionInfos) {
     25        aIdx++;
     26        if (aInfo.Disabled) continue;
     27        if (aInfo.Tries == 0) return aIdx;
     28        var q = aInfo.SumReward / aInfo.Tries + 2.0 * c * Math.Sqrt(Math.Log(totalTries) / aInfo.Tries);
    2829        if (q > bestQ) {
    2930          bestQ = q;
    30           bestAction = a;
     31          bestAction = aIdx;
    3132        }
    3233      }
     
    3536    }
    3637
    37     public IPolicyActionInfo CreateActionInfo() {
     38    public IBanditPolicyActionInfo CreateActionInfo() {
    3839      return new DefaultPolicyActionInfo();
    3940    }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/HeuristicLab.Algorithms.Bandits.csproj

    r11732 r11742  
    4444  <ItemGroup>
    4545    <Compile Include="BanditHelper.cs" />
     46    <Compile Include="BanditPolicies\BernoulliPolicyActionInfo.cs" />
     47    <Compile Include="BanditPolicies\BernoulliThompsonSamplingPolicy.cs" />
     48    <Compile Include="BanditPolicies\BoltzmannExplorationPolicy.cs" />
     49    <Compile Include="BanditPolicies\ChernoffIntervalEstimationPolicy.cs" />
     50    <Compile Include="BanditPolicies\DefaultPolicyActionInfo.cs" />
     51    <Compile Include="BanditPolicies\EpsGreedyPolicy.cs" />
     52    <Compile Include="BanditPolicies\GaussianThompsonSamplingPolicy.cs" />
     53    <Compile Include="BanditPolicies\GenericThompsonSamplingPolicy.cs" />
     54    <Compile Include="BanditPolicies\MeanAndVariancePolicyActionInfo.cs" />
     55    <Compile Include="BanditPolicies\ModelPolicyActionInfo.cs" />
     56    <Compile Include="BanditPolicies\RandomPolicy.cs" />
     57    <Compile Include="BanditPolicies\ThresholdAscentPolicy.cs" />
     58    <Compile Include="BanditPolicies\UCB1Policy.cs" />
     59    <Compile Include="BanditPolicies\UCB1TunedPolicy.cs" />
     60    <Compile Include="BanditPolicies\UCBNormalPolicy.cs" />
     61    <Compile Include="BanditPolicies\UCTPolicy.cs" />
    4662    <Compile Include="Bandits\BernoulliBandit.cs" />
    4763    <Compile Include="Bandits\GaussianBandit.cs" />
     
    4965    <Compile Include="Bandits\IBandit.cs" />
    5066    <Compile Include="Bandits\TruncatedNormalBandit.cs" />
     67    <Compile Include="GrammarPolicies\RandomPolicy.cs" />
     68    <Compile Include="IPolicy.cs" />
     69    <Compile Include="IBanditPolicy.cs" />
     70    <Compile Include="IBanditPolicyActionInfo.cs" />
    5171    <Compile Include="OnlineMeanAndVarianceEstimator.cs" />
    52     <Compile Include="IPolicyActionInfo.cs" />
    5372    <Compile Include="Models\BernoulliModel.cs" />
    5473    <Compile Include="Models\GaussianModel.cs" />
    5574    <Compile Include="Models\IModel.cs" />
    56     <Compile Include="Policies\BernoulliThompsonSamplingPolicy.cs">
    57       <SubType>Code</SubType>
    58     </Compile>
    59     <Compile Include="Policies\BoltzmannExplorationPolicy.cs">
    60       <SubType>Code</SubType>
    61     </Compile>
    62     <Compile Include="Policies\ChernoffIntervalEstimationPolicy.cs">
    63       <SubType>Code</SubType>
    64     </Compile>
    65     <Compile Include="Policies\BernoulliPolicyActionInfo.cs" />
    66     <Compile Include="Policies\ModelPolicyActionInfo.cs" />
    67     <Compile Include="Policies\EpsGreedyPolicy.cs">
    68       <SubType>Code</SubType>
    69     </Compile>
    70     <Compile Include="Policies\GaussianThompsonSamplingPolicy.cs">
    71       <SubType>Code</SubType>
    72     </Compile>
    73     <Compile Include="Policies\GenericThompsonSamplingPolicy.cs">
    74       <SubType>Code</SubType>
    75     </Compile>
    76     <Compile Include="Policies\MeanAndVariancePolicyActionInfo.cs" />
    77     <Compile Include="Policies\DefaultPolicyActionInfo.cs" />
    78     <Compile Include="Policies\EmptyPolicyActionInfo.cs" />
    79     <Compile Include="Policies\RandomPolicy.cs" />
    80     <Compile Include="Policies\UCB1Policy.cs" />
    81     <Compile Include="IPolicy.cs" />
    82     <Compile Include="Policies\UCB1TunedPolicy.cs">
    83       <SubType>Code</SubType>
    84     </Compile>
    85     <Compile Include="Policies\UCBNormalPolicy.cs">
    86       <SubType>Code</SubType>
    87     </Compile>
    88     <Compile Include="Policies\UCTPolicy.cs">
    89       <SubType>Code</SubType>
    90     </Compile>
    9175    <Compile Include="Properties\AssemblyInfo.cs" />
    9276  </ItemGroup>
     
    9579      <Project>{3A2FBBCB-F9DF-4970-87F3-F13337D941AD}</Project>
    9680      <Name>HeuristicLab.Common</Name>
     81    </ProjectReference>
     82    <ProjectReference Include="..\HeuristicLab.Problems.GrammaticalOptimization\HeuristicLab.Problems.GrammaticalOptimization.csproj">
     83      <Project>{cb9dccf6-667e-4a13-b82d-dbd6b45a045e}</Project>
     84      <Name>HeuristicLab.Problems.GrammaticalOptimization</Name>
    9785    </ProjectReference>
    9886  </ItemGroup>
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/IBanditPolicy.cs

    r11737 r11742  
    66
    77namespace HeuristicLab.Algorithms.Bandits {
    8   // this interface represents a policy for reinforcement learning
    9   public interface IPolicy {
    10     int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos);
    11     IPolicyActionInfo CreateActionInfo();
     8  // this interface represents a policy for bandit problems
     9  public interface IBanditPolicy {
     10    int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actions);
     11    IBanditPolicyActionInfo CreateActionInfo();
    1212  }
    1313}
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/IBanditPolicyActionInfo.cs

    r11737 r11742  
    66
    77namespace HeuristicLab.Algorithms.Bandits {
    8   public interface IPolicyActionInfo {
     8  public interface IBanditPolicyActionInfo {
    99    bool Disabled { get; }
     10    double Value { get; }
     11    int Tries { get; }
    1012    void UpdateReward(double reward);
    1113    void Disable();
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Models/BernoulliModel.cs

    r11732 r11742  
    99namespace HeuristicLab.Algorithms.Bandits.Models {
    1010  public class BernoulliModel : IModel {
     11
    1112    private int success;
    1213    private int failure;
     
    4748      return new BernoulliModel() { failure = this.failure, success = this.success };
    4849    }
     50
     51    public override string ToString() {
     52      return string.Format("Bernoulli with Beta prior: mu={0:F2}", (success + alpha) / (success + alpha + failure + beta));
     53    }
    4954  }
    5055}
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Models/GaussianModel.cs

    r11732 r11742  
    77  // 2) unknown mean and unknown variance
    88  public class GaussianModel : IModel {
     9
    910    private OnlineMeanAndVarianceEstimator estimator = new OnlineMeanAndVarianceEstimator();
    1011
     
    8283      }
    8384
     85
    8486      // sample from the posterior marginal for mu (expected value) equ. 91
    8587      // p(µ|D) = T2αn (µ| µn, βn/(αnκn))
    8688
    87       // sample from Tk distribution : http://stats.stackexchange.com/a/70270
    8889      var t2alpha = alglib.invstudenttdistribution((int)(2 * posteriorAlpha), random.NextDouble());
    8990
     
    9192      return theta;
    9293
    93       //return alglib.invnormaldistribution(random.NextDouble()) * + theta;
    94       //return alglib.invstudenttdistribution((int)(2 * posteriorAlpha), 0.99) * (posteriorBeta*posteriorK + posteriorBeta) / (posteriorAlpha*posteriorK) + posteriorMean;
     94
     95      /*
     96       * value function : 0.99-quantile
     97      // sample posterior mean and posterior variance independently
     98      var sampledPrec = Rand.GammaRand(random, posteriorAlpha) * posteriorBeta;
     99      var t2alpha = alglib.invstudenttdistribution((int)(2 * posteriorAlpha), random.NextDouble());
     100      var sampledMean = t2alpha * posteriorBeta / (posteriorAlpha * posteriorK) + posteriorMean;
     101
     102      return alglib.invnormaldistribution(0.99) / Math.Sqrt(sampledPrec) + sampledMean;
     103       */
    95104    }
    96105
     
    114123        return new GaussianModel(meanPriorMu, meanPriorVariance, precisionPriorAlpha, precisionPriorBeta);
    115124    }
     125
     126    public override string ToString() {
     127      if (knownVariance) {
     128        var posteriorMeanVariance = 1.0 / (estimator.N / rewardVariance + 1.0 / meanPriorVariance);
     129        var posteriorMeanMean = posteriorMeanVariance * (meanPriorMu / meanPriorVariance + estimator.Sum / rewardVariance);
     130        return string.Format("Gaussian(mu, var=0.1), mu ~ Gaussian(mu'={0:F3}, var'={1:F3})", posteriorMeanMean, posteriorMeanVariance);
     131      } else {
     132        var posteriorMean = (priorK * meanPriorMu + estimator.Sum) / (priorK + estimator.N);
     133        var posteriorK = priorK + estimator.N;
     134        var posteriorAlpha = precisionPriorAlpha + estimator.N / 2.0;
     135        double posteriorBeta;
     136        if (estimator.N > 0) {
     137          posteriorBeta = precisionPriorBeta + 0.5 * estimator.N * estimator.Variance + priorK * estimator.N * Math.Pow(estimator.Avg - meanPriorMu, 2) / (2.0 * (priorK + estimator.N));
     138        } else {
     139          posteriorBeta = precisionPriorBeta;
     140        }
     141        var nu = (int)(2 * posteriorAlpha);
     142        var meanVariance = posteriorBeta / (posteriorAlpha * posteriorK) * (nu / (double)(nu - 2));
     143        return string.Format("Gaussian(mu, var), mu ~ T{0}(mu'={1:F3}, var'={2:F3}), 1.0/var ~ Gamma(mu={3:F3}, var={4:F3})",
     144          nu, posteriorMean, meanVariance,
     145          posteriorAlpha / posteriorBeta, posteriorAlpha / (posteriorBeta * posteriorBeta));
     146      }
     147    }
    116148  }
    117149}
Note: See TracChangeset for help on using the changeset viewer.