Free cookie consent management tool by TermsFeed Policy Generator

Changeset 11742


Ignore:
Timestamp:
01/09/15 14:57:28 (9 years ago)
Author:
gkronber
Message:

#2283 refactoring

Location:
branches/HeuristicLab.Problems.GrammaticalOptimization
Files:
4 added
1 deleted
41 edited
3 moved

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/BernoulliPolicyActionInfo.cs

    r11732 r11742  
    77using HeuristicLab.Common;
    88
    9 namespace HeuristicLab.Algorithms.Bandits {
    10   public class BernoulliPolicyActionInfo : IPolicyActionInfo {
     9namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
     10  public class BernoulliPolicyActionInfo : IBanditPolicyActionInfo {
    1111    public bool Disabled { get { return NumSuccess == -1; } }
    1212    public int NumSuccess { get; private set; }
    1313    public int NumFailure { get; private set; }
     14    public int Tries { get { return NumSuccess + NumFailure; } }
     15    public double Value { get { return NumSuccess / (double)(Tries); } }
    1416    public void UpdateReward(double reward) {
    1517      Debug.Assert(!Disabled);
     
    2931    }
    3032    public void PrintStats() {
    31       Console.WriteLine("expected value {0,5:F2} disabled {1}", NumSuccess / (double)NumFailure, Disabled);
     33      Console.WriteLine("expected value {0,5:F2} disabled {1}", Value, Disabled);
    3234    }
    3335  }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/BernoulliThompsonSamplingPolicy.cs

    r11732 r11742  
    77using HeuristicLab.Common;
    88
    9 namespace HeuristicLab.Algorithms.Bandits {
    10   public class BernoulliThompsonSamplingPolicy : IPolicy {
     9namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
     10  public class BernoulliThompsonSamplingPolicy : IBanditPolicy {
    1111    // parameters of beta prior distribution
    1212    private readonly double alpha = 1.0;
    1313    private readonly double beta = 1.0;
    1414
    15     public int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos) {
    16       var myActionInfos = actionInfos.OfType<BernoulliPolicyActionInfo>(); // TODO: performance
     15    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
     16      var myActionInfos = actionInfos.OfType<BernoulliPolicyActionInfo>();
    1717      int bestAction = -1;
    1818      double maxTheta = double.NegativeInfinity;
     
    3232    }
    3333
    34     public IPolicyActionInfo CreateActionInfo() {
     34    public IBanditPolicyActionInfo CreateActionInfo() {
    3535      return new BernoulliPolicyActionInfo();
    3636    }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/BoltzmannExplorationPolicy.cs

    r11732 r11742  
    77using HeuristicLab.Common;
    88
    9 namespace HeuristicLab.Algorithms.Bandits {
     9namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
    1010  // also called softmax policy
    11   public class BoltzmannExplorationPolicy : IPolicy {
     11  public class BoltzmannExplorationPolicy : IBanditPolicy {
    1212    private readonly double beta;
     13    private readonly Func<DefaultPolicyActionInfo, double> valueFunction;
    1314
    14     public BoltzmannExplorationPolicy(double beta) {
     15    public BoltzmannExplorationPolicy(double eps) : this(eps, DefaultPolicyActionInfo.AverageReward) { }
     16
     17    public BoltzmannExplorationPolicy(double beta, Func<DefaultPolicyActionInfo, double> valueFunction) {
    1518      if (beta < 0) throw new ArgumentException();
    1619      this.beta = beta;
     20      this.valueFunction = valueFunction;
    1721    }
    18     public int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos) {
     22    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
    1923      Debug.Assert(actionInfos.Any());
    2024
    2125      // select best
    22       var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>().ToArray(); // TODO: performance
     26      var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>();
    2327      Debug.Assert(myActionInfos.Any(a => !a.Disabled));
    24       double[] w = new double[myActionInfos.Length];
    2528
    26       for (int a = 0; a < myActionInfos.Length; a++) {
    27         if (myActionInfos[a].Disabled) {
    28           w[a] = 0; continue;
    29         }
    30         if (myActionInfos[a].Tries == 0) return a;
    31         var sumReward = myActionInfos[a].SumReward;
    32         var tries = myActionInfos[a].Tries;
    33         var avgReward = sumReward / tries;
    34         w[a] = Math.Exp(beta * avgReward);
    35       }
     29      var w = from aInfo in myActionInfos
     30              select aInfo.Disabled
     31                ? 0.0
     32                : Math.Exp(beta * valueFunction(aInfo));
    3633
    37 
    38       var bestAction = Enumerable.Range(0, w.Length).SampleProportional(random, w).First();
     34      var bestAction = myActionInfos
     35        .Select((aInfo, idx) => new { aInfo, idx })
     36        .SampleProportional(random, w)
     37        .Select(p => p.idx)
     38        .First();
    3939      Debug.Assert(bestAction >= 0);
    40       Debug.Assert(bestAction < w.Length);
    41       Debug.Assert(!myActionInfos[bestAction].Disabled);
    4240      return bestAction;
    4341    }
    4442
    45     public IPolicyActionInfo CreateActionInfo() {
     43    public IBanditPolicyActionInfo CreateActionInfo() {
    4644      return new DefaultPolicyActionInfo();
    4745    }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/ChernoffIntervalEstimationPolicy.cs

    r11732 r11742  
    66using System.Threading.Tasks;
    77
    8 namespace HeuristicLab.Algorithms.Bandits {
     8namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
    99  /* see: Streeter and Smith: A simple distribution-free approach to the max k-armed bandit problem, Proceedings  of the 12th
    1010International Conference, CP 2006, Nantes, France, September 25-29, 2006. pp 560-574 */
    1111
    12   public class ChernoffIntervalEstimationPolicy : IPolicy {
     12  public class ChernoffIntervalEstimationPolicy : IBanditPolicy {
    1313    private readonly double delta;
    1414
     
    1616      this.delta = delta;
    1717    }
    18     public int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos) {
     18    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
    1919      Debug.Assert(actionInfos.Any());
    2020      // select best
    21       var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>().ToArray(); // TODO: performance
    22       int k = myActionInfos.Length;
     21      var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>();
     22      int k = myActionInfos.Count(a => !a.Disabled);
    2323      int totalTries = myActionInfos.Where(a => !a.Disabled).Sum(a => a.Tries);
    2424      int bestAction = -1;
    2525      double bestQ = double.NegativeInfinity;
    26       for (int a = 0; a < myActionInfos.Length; a++) {
    27         if (myActionInfos[a].Disabled) continue;
    28         if (myActionInfos[a].Tries == 0) return a;
     26      var aIdx = -1;
     27      foreach (var aInfo in myActionInfos) {
     28        aIdx++;
     29        if (aInfo.Disabled) continue;
     30        if (aInfo.Tries == 0) return aIdx;
    2931
    30         var sumReward = myActionInfos[a].SumReward;
    31         var tries = myActionInfos[a].Tries;
    32 
    33         var avgReward = sumReward / tries;
     32        var avgReward = aInfo.SumReward / aInfo.Tries;
    3433
    3534        // page 5 of "A simple distribution-free appraoch to the max k-armed bandit problem"
    3635        // var alpha = Math.Log(2 * totalTries * k / delta);
    37         double alpha = Math.Log(2) + Math.Log(totalTries) + Math.Log(k) - Math.Log(delta); // total tries is max tries in the original paper
    38         var q = avgReward + (alpha + Math.Sqrt(2 * tries * avgReward * alpha + alpha * alpha)) / tries;
     36        double alpha = Math.Log(2.0) + Math.Log(totalTries) + Math.Log(k) - Math.Log(delta); // total tries is max tries in the original paper
     37        var q = avgReward + (alpha + Math.Sqrt(2 * aInfo.Tries * avgReward * alpha + alpha * alpha)) / aInfo.Tries;
    3938        if (q > bestQ) {
    4039          bestQ = q;
    41           bestAction = a;
     40          bestAction = aIdx;
    4241        }
    4342      }
     
    4645    }
    4746
    48     public IPolicyActionInfo CreateActionInfo() {
     47    public IBanditPolicyActionInfo CreateActionInfo() {
    4948      return new DefaultPolicyActionInfo();
    5049    }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/DefaultPolicyActionInfo.cs

    r11732 r11742  
    66using System.Threading.Tasks;
    77
    8 namespace HeuristicLab.Algorithms.Bandits {
     8namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
    99  // stores information that is relevant for most of the policies
    10   public class DefaultPolicyActionInfo : IPolicyActionInfo {
     10  public class DefaultPolicyActionInfo : IBanditPolicyActionInfo {
    1111    public bool Disabled { get { return Tries == -1; } }
    1212    public double SumReward { get; private set; }
     13    public int Tries { get; private set; }
    1314    public double MaxReward { get; private set; }
    14     public int Tries { get; private set; }
    15 
     15    public double Value { get { return SumReward / Tries; } }
    1616    public DefaultPolicyActionInfo() {
    17       MaxReward = double.NegativeInfinity;
     17      MaxReward = double.MinValue;
    1818    }
    1919
     
    3737      Console.WriteLine("avg reward {0,5:F2} disabled {1}", SumReward / Tries, Disabled);
    3838    }
     39
     40    public static Func<DefaultPolicyActionInfo, double> AverageReward {
     41      get {
     42        return (aInfo) =>
     43          aInfo.Tries == 0 ?
     44          double.PositiveInfinity :
     45          aInfo.SumReward / (double)aInfo.Tries;
     46      }
     47    }
    3948  }
    4049}
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/EmptyPolicyActionInfo.cs

    r11732 r11742  
    66using System.Threading.Tasks;
    77
    8 namespace HeuristicLab.Algorithms.Bandits {
    9   public class EmptyPolicyActionInfo : IPolicyActionInfo {
     8namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
     9  public class EmptyPolicyActionInfo : IBanditPolicyActionInfo {
     10    public double Value { get { return 0.0; } }
     11
    1012    public bool Disabled { get; private set; }
    1113    public void UpdateReward(double reward) {
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/EpsGreedyPolicy.cs

    r11732 r11742  
    55using System.Text;
    66using System.Threading.Tasks;
     7using HeuristicLab.Common;
    78
    8 namespace HeuristicLab.Algorithms.Bandits {
    9   public class EpsGreedyPolicy : IPolicy {
     9namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
     10  public class EpsGreedyPolicy : IBanditPolicy {
    1011    private readonly double eps;
    1112    private readonly RandomPolicy randomPolicy;
     13    private readonly Func<DefaultPolicyActionInfo, double> valueFunction;
     14    private readonly string desc;
    1215
    13     public EpsGreedyPolicy(double eps) {
     16
     17    public EpsGreedyPolicy(double eps) : this(eps, DefaultPolicyActionInfo.AverageReward, string.Empty) { }
     18
     19    public EpsGreedyPolicy(double eps, Func<DefaultPolicyActionInfo, double> valueFunction, string desc) {
    1420      this.eps = eps;
    1521      this.randomPolicy = new RandomPolicy();
     22      this.valueFunction = valueFunction;
     23      this.desc = desc;
    1624    }
    17     public int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos) {
     25
     26    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
    1827      Debug.Assert(actionInfos.Any());
    1928      if (random.NextDouble() > eps) {
    2029        // select best
    2130        var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>();
    22         int bestAction = -1;
     31        var bestActions = new List<int>();
    2332        double bestQ = double.NegativeInfinity;
     33
    2434        int aIdx = -1;
    2535        foreach (var aInfo in myActionInfos) {
    26 
    2736          aIdx++;
    2837          if (aInfo.Disabled) continue;
    29           if (aInfo.Tries == 0) return aIdx;
    3038
     39          var q = valueFunction(aInfo);
    3140
    32           var avgReward = aInfo.SumReward / aInfo.Tries;         
    33           //var q = avgReward;
    34           var q = aInfo.MaxReward;
    3541          if (q > bestQ) {
     42            bestActions.Clear();
     43            bestActions.Add(aIdx);
    3644            bestQ = q;
    37             bestAction = aIdx;
     45          } else if (q.IsAlmost(bestQ)) {
     46            bestActions.Add(aIdx);
    3847          }
    3948        }
    40         Debug.Assert(bestAction >= 0);
    41         return bestAction;
     49        Debug.Assert(bestActions.Any());
     50        return bestActions.SelectRandom(random);
    4251      } else {
    4352        // select random
     
    4655    }
    4756
    48     public IPolicyActionInfo CreateActionInfo() {
     57    public IBanditPolicyActionInfo CreateActionInfo() {
    4958      return new DefaultPolicyActionInfo();
    5059    }
     
    5261
    5362    public override string ToString() {
    54       return string.Format("EpsGreedyPolicy({0:F2})", eps);
     63      return string.Format("EpsGreedyPolicy({0:F2},{1})", eps, desc);
    5564    }
    5665  }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/Exp3Policy.cs

    r11730 r11742  
    66using System.Threading.Tasks;
    77
    8 namespace HeuristicLab.Algorithms.Bandits {
     8namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
    99  public class Exp3Policy : BanditPolicy {
    1010    private readonly Random random;
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/GaussianThompsonSamplingPolicy.cs

    r11732 r11742  
    55using HeuristicLab.Common;
    66
    7 namespace HeuristicLab.Algorithms.Bandits {
     7namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
    88
    9   public class GaussianThompsonSamplingPolicy : IPolicy {
     9  [Obsolete("Replaced by GenericThompsonSamplingPolicy(GaussianModel(0.5, 1.0, 0.1))")]
     10  public class GaussianThompsonSamplingPolicy : IBanditPolicy {
    1011    private bool compatibility;
    1112
     
    2223    }
    2324
    24     public int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos) {
     25    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
    2526      var myActionInfos = actionInfos.OfType<MeanAndVariancePolicyActionInfo>();
    2627      int bestAction = -1;
     
    3839        double theta;
    3940        if (compatibility) {
     41          // old code used for old experiments (preserved because it performed very well)
    4042          if (tries < 2) return aIdx;
    4143          var mu = sampleMean;
     
    6567    }
    6668
    67     public IPolicyActionInfo CreateActionInfo() {
     69    public IBanditPolicyActionInfo CreateActionInfo() {
    6870      return new MeanAndVariancePolicyActionInfo();
    6971    }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/GenericThompsonSamplingPolicy.cs

    r11732 r11742  
    77using HeuristicLab.Common;
    88
    9 namespace HeuristicLab.Algorithms.Bandits {
    10   public class GenericThompsonSamplingPolicy : IPolicy {
     9namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
     10  public class GenericThompsonSamplingPolicy : IBanditPolicy {
    1111    private readonly IModel model;
    1212
     
    1515    }
    1616
    17     public int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos) {
     17    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
    1818      var myActionInfos = actionInfos.OfType<ModelPolicyActionInfo>();
    1919      int bestAction = -1;
     
    3434    }
    3535
    36     public IPolicyActionInfo CreateActionInfo() {
     36    public IBanditPolicyActionInfo CreateActionInfo() {
    3737      return new ModelPolicyActionInfo((IModel)model.Clone());
    3838    }
    3939
    4040    public override string ToString() {
    41       return string.Format("GenericThompsonSamplingPolicy({0})", model);
     41      return string.Format("GenericThompsonSamplingPolicy(\"{0}\")", model);
    4242    }
    4343  }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/MeanAndVariancePolicyActionInfo.cs

    r11732 r11742  
    66using System.Threading.Tasks;
    77
    8 namespace HeuristicLab.Algorithms.Bandits {
    9   public class MeanAndVariancePolicyActionInfo : IPolicyActionInfo {
     8namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
     9  public class MeanAndVariancePolicyActionInfo : IBanditPolicyActionInfo {
    1010    private bool disabled;
    1111    public bool Disabled { get { return disabled; } }
     
    1515    public double AvgReward { get { return estimator.Avg; } }
    1616    public double RewardVariance { get { return estimator.Variance; } }
     17    public double Value { get { return AvgReward; } }
    1718
    1819    public void UpdateReward(double reward) {
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/ModelPolicyActionInfo.cs

    r11732 r11742  
    66using System.Threading.Tasks;
    77
    8 namespace HeuristicLab.Algorithms.Bandits {
     8namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
    99  // uses a statistical model to sample and update posterior distribution p(Reward | Data)
    10   public class ModelPolicyActionInfo : IPolicyActionInfo {
     10  public class ModelPolicyActionInfo : IBanditPolicyActionInfo {
    1111    private readonly IModel model;
    1212    public bool Disabled { get { return Tries == -1; } }
     13    public double Value { get { return model.SampleExpectedReward(new Random()); } }
    1314
    1415    public int Tries { get; private set; }
     
    3839      model.PrintStats();
    3940    }
     41
     42    public override string ToString() {
     43      return string.Format("disabled {0} model {1}", Disabled, model);
     44    }
    4045  }
    4146}
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/RandomPolicy.cs

    r11732 r11742  
    77using HeuristicLab.Common;
    88
    9 namespace HeuristicLab.Algorithms.Bandits {
    10   public class RandomPolicy : IPolicy {
     9namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
     10  public class RandomPolicy : IBanditPolicy {
    1111
    1212    public override string ToString() {
     
    1414    }
    1515
    16     public int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos) {
     16    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
    1717      return actionInfos
    18         .Select((a, i) => Tuple.Create(a, i))
     18        .Select((aInfo, idx) => Tuple.Create(aInfo, idx))
    1919        .Where(p => !p.Item1.Disabled)
    2020        .SelectRandom(random).Item2;
    2121    }
    2222
    23     public IPolicyActionInfo CreateActionInfo() {
    24       return new EmptyPolicyActionInfo();
     23    public IBanditPolicyActionInfo CreateActionInfo() {
     24      return new DefaultPolicyActionInfo();
    2525    }
    2626  }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/ThresholdAscentPolicy.cs

    r11730 r11742  
    55using System.Text;
    66using System.Threading.Tasks;
     7using HeuristicLab.Common;
    78
    8 namespace HeuristicLab.Algorithms.Bandits {
     9namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
    910  /* see: Streeter and Smith: A simple distribution-free approach to the max k-armed bandit problem, Proceedings  of the 12th
    1011 International Conference, CP 2006, Nantes, France, September 25-29, 2006. pp 560-574 */
    1112
    12   public class ThresholdAscentPolicy : BanditPolicy {
    13     const int numBins = 101;
    14     const double binSize = 1.0 / (numBins - 1);
     13  public class ThresholdAscentPolicy : IBanditPolicy {
     14    public const int numBins = 101;
     15    public const double binSize = 1.0 / (numBins - 1);
    1516
    16     // for each arm store the number of observed rewards for each bin of size delta
    17     // for delta = 0.01 we have 101 bins
    18     // the first bin is freq of rewards  >= 0 // all
    19     // the second bin is freq of rewards > 0
    20     // the third bin is freq of rewards > 0.01
    21     // the last bin is for rewards > 0.99
    22     //
    23     // (also see RewardBin function)
    24     private readonly int[,] armRewardHistogram; // for performance reasons we store cumulative counts (freq of rewards > lower threshold)
     17    private class ThresholdAscentActionInfo : IBanditPolicyActionInfo {
    2518
     19      // for each arm store the number of observed rewards for each bin of size delta
     20      // for delta = 0.01 we have 101 bins
     21      // the first bin is freq of rewards  >= 0 // all
     22      // the second bin is freq of rewards > 0
     23      // the third bin is freq of rewards > 0.01
     24      // the last bin is for rewards > 0.99
     25      //
     26      // (also see RewardBin function)
     27      public int[] rewardHistogram = new int[numBins];    // for performance reasons we store cumulative counts (freq of rewards > lower threshold)
     28      public int Tries { get; private set; }
     29      public int thresholdBin = 1;
     30      public double Value { get { return rewardHistogram[thresholdBin] / (double)Tries; } }
    2631
    27     private readonly int[] tries;
     32      public bool Disabled { get { return Tries == -1; } }
     33
     34      public void UpdateReward(double reward) {
     35        Tries++;
     36        for (var idx = thresholdBin; idx <= RewardBin(reward); idx++)
     37          rewardHistogram[idx]++;
     38      }
     39
     40      public void Disable() {
     41        Tries = -1;
     42      }
     43
     44      public void Reset() {
     45        Tries = 0;
     46        thresholdBin = 1;
     47        Array.Clear(rewardHistogram, 0, rewardHistogram.Length);
     48      }
     49
     50      public void PrintStats() {
     51        if (Tries >= 0) {
     52          Console.Write("{0,6}", Tries);
     53        } else {
     54          Console.Write("{0,6}", "");
     55        }
     56      }
     57
     58      // maps a reward value to it's bin
     59      private static int RewardBin(double reward) {
     60        Debug.Assert(reward >= 0 && reward <= 1.0);
     61        // reward = 0 => 0
     62        // ]0.00 .. 0.01] => 1
     63        // ]0.01 .. 0.02] => 2
     64        // ...
     65        // ]0.99 .. 1.00] => 100
     66        if (reward <= 0) return 0;
     67        return (int)Math.Ceiling((reward / binSize));
     68      }
     69    }
     70
    2871    private readonly int s;
    2972    private readonly double delta;
    3073
    31     private int totalTries = 0;
    32     private int thresholdBin; // bin index of current threshold
    33     private const double maxTries = 1E6;
    34 
    35     public ThresholdAscentPolicy(int numActions, int s = 100, double delta = 0.05)
    36       : base(numActions) {
    37       this.thresholdBin = 1; // first bin to check is bin idx 1 == freq of rewards > 0
     74    public ThresholdAscentPolicy(int s = 100, double delta = 0.05) {
    3875      this.s = s;
    3976      this.delta = delta;
    40       this.armRewardHistogram = new int[numActions, numBins];
    41       this.tries = new int[numActions];
    4277    }
    4378
    44     // maps a reward value to it's bin
    45     private static int RewardBin(double reward) {
    46       Debug.Assert(reward >= 0 && reward <= 1.0);
    47       // reward = 0 => 0
    48       // ]0.00 .. 0.01] => 1
    49       // ]0.01 .. 0.02] => 2
    50       // ...
    51       // ]0.99 .. 1.00] => 100
    52       if (reward <= 0) return 0;
    53       return (int)Math.Ceiling((reward / binSize));
    54     }
    55 
    56 
    57     private double U(double mu, int n, int k) {
     79    private double U(double mu, int totalTries, int n, int k) {
    5880      //var alpha = Math.Log(2.0 * totalTries * k / delta);
    59       double alpha = Math.Log(2) + Math.Log(maxTries) + Math.Log(k) - Math.Log(delta); // totalTries is max iterations in original paper
     81      double alpha = Math.Log(2) + Math.Log(totalTries) + Math.Log(k) - Math.Log(delta);
    6082      return mu + (alpha + Math.Sqrt(2 * n * mu * alpha + alpha * alpha)) / n;
    6183    }
    6284
    6385
    64     public override int SelectAction() {
    65       Debug.Assert(Actions.Any());
    66       UpdateThreshold();
     86    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
     87      Debug.Assert(actionInfos.Any());
     88      var myActionInfos = actionInfos.OfType<ThresholdAscentActionInfo>();
     89      UpdateThreshold(myActionInfos);
     90
    6791      int bestAction = -1;
    6892      double bestQ = double.NegativeInfinity;
    69       int k = Actions.Count();
    70       foreach (var a in Actions) {
    71         if (tries[a] == 0) return a;
    72         double mu = armRewardHistogram[a, thresholdBin] / (double)tries[a]; // probability of rewards > T
    73         double q = U(mu, tries[a], k);
     93      int k = myActionInfos.Count(a => !a.Disabled);
     94      var totalTries = myActionInfos.Where(a => !a.Disabled).Sum(a => a.Tries);
     95      int aIdx = -1;
     96      foreach (var aInfo in myActionInfos) {
     97        aIdx++;
     98        if (aInfo.Disabled) continue;
     99        if (aInfo.Tries == 0) return aIdx;
     100        double mu = aInfo.Value; // probability of rewards > T
     101        double q = U(mu, totalTries, aInfo.Tries, k);          // totalTries is max iterations in original paper
    74102        if (q > bestQ) {
    75103          bestQ = q;
    76           bestAction = a;
     104          bestAction = aIdx;
    77105        }
    78106      }
    79       Debug.Assert(Actions.Contains(bestAction));
     107      Debug.Assert(bestAction > -1);
    80108      return bestAction;
    81109    }
    82110
    83     private void UpdateThreshold() {
    84       while (thresholdBin < (numBins - 1) && Actions.Sum(a => armRewardHistogram[a, thresholdBin]) >= s) {
     111
     112    private void UpdateThreshold(IEnumerable<ThresholdAscentActionInfo> actionInfos) {
     113      var thresholdBin = 1; // first bin to check is bin idx 1 == freq of rewards > 0
     114      while (thresholdBin < (numBins - 1) && actionInfos.Sum(a => a.rewardHistogram[thresholdBin]) >= s) {
    85115        thresholdBin++;
    86116        // Console.WriteLine("New threshold {0:F2}", T);
    87117      }
     118      foreach (var aInfo in actionInfos) {
     119        aInfo.thresholdBin = thresholdBin;
     120      }
    88121    }
    89122
    90     public override void UpdateReward(int action, double reward) {
    91       Debug.Assert(Actions.Contains(action));
    92       totalTries++;
    93       tries[action]++;
    94       // efficiency: we can start at the current threshold bin because all bins below that are not accessed in select-action
    95       for (var idx = thresholdBin; idx <= RewardBin(reward); idx++)
    96         armRewardHistogram[action, idx]++;
     123
     124    public IBanditPolicyActionInfo CreateActionInfo() {
     125      return new ThresholdAscentActionInfo();
    97126    }
    98127
    99     public override void DisableAction(int action) {
    100       base.DisableAction(action);
    101       totalTries -= tries[action];
    102       tries[action] = -1;
    103     }
    104 
    105     public override void Reset() {
    106       base.Reset();
    107       totalTries = 0;
    108       thresholdBin = 1;
    109       Array.Clear(tries, 0, tries.Length);
    110       Array.Clear(armRewardHistogram, 0, armRewardHistogram.Length);
    111     }
    112 
    113     public override void PrintStats() {
    114       for (int i = 0; i < tries.Length; i++) {
    115         if (tries[i] >= 0) {
    116           Console.Write("{0,6}", tries[i]);
    117         } else {
    118           Console.Write("{0,6}", "");
    119         }
    120       }
    121       Console.WriteLine();
    122     }
    123128    public override string ToString() {
    124129      return string.Format("ThresholdAscentPolicy({0},{1:F2})", s, delta);
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/UCB1Policy.cs

    r11732 r11742  
    66using System.Threading.Tasks;
    77
    8 namespace HeuristicLab.Algorithms.Bandits {
    9   public class UCB1Policy : IPolicy {
    10     public int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos) {
     8namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
     9  // policy for k-armed bandit (see Auer et al. 2002)
     10  public class UCB1Policy : IBanditPolicy {
     11    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
    1112      var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>().ToArray(); // TODO: performance
    1213      int bestAction = -1;
     
    2728    }
    2829
    29     public IPolicyActionInfo CreateActionInfo() {
     30    public IBanditPolicyActionInfo CreateActionInfo() {
    3031      return new DefaultPolicyActionInfo();
    3132    }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/UCB1TunedPolicy.cs

    r11732 r11742  
    66using System.Threading.Tasks;
    77
    8 namespace HeuristicLab.Algorithms.Bandits {
    9   public class UCB1TunedPolicy : IPolicy {
     8namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
     9  // policy for k-armed bandit (see Auer et al. 2002)
     10  public class UCB1TunedPolicy : IBanditPolicy {
    1011
    11     public int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos) {
    12       var myActionInfos = actionInfos.OfType<MeanAndVariancePolicyActionInfo>().ToArray(); // TODO: performance
     12    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
     13      var myActionInfos = actionInfos.OfType<MeanAndVariancePolicyActionInfo>();
    1314      int bestAction = -1;
    1415      double bestQ = double.NegativeInfinity;
    1516      int totalTries = myActionInfos.Where(a => !a.Disabled).Sum(a => a.Tries);
    1617
    17       for (int a = 0; a < myActionInfos.Length; a++) {
    18         if (myActionInfos[a].Disabled) continue;
    19         if (myActionInfos[a].Tries == 0) return a;
     18      int aIdx = -1;
     19      foreach (var aInfo in myActionInfos) {
     20        aIdx++;
     21        if (aInfo.Disabled) continue;
     22        if (aInfo.Tries == 0) return aIdx;
    2023
    21         var sumReward = myActionInfos[a].SumReward;
    22         var tries = myActionInfos[a].Tries;
     24        var sumReward = aInfo.SumReward;
     25        var tries = aInfo.Tries;
    2326
    2427        var avgReward = sumReward / tries;
    25         var q = avgReward + Math.Sqrt((Math.Log(totalTries) / tries) * Math.Min(1.0 / 4, V(myActionInfos[a], totalTries))); // 1/4 is upper bound of bernoulli distributed variable
     28        var q = avgReward + Math.Sqrt((Math.Log(totalTries) / tries) * Math.Min(1.0 / 4, V(aInfo, totalTries))); // 1/4 is upper bound of bernoulli distributed variable
    2629        if (q > bestQ) {
    2730          bestQ = q;
    28           bestAction = a;
     31          bestAction = aIdx;
    2932        }
    3033      }
     
    3336    }
    3437
    35     public IPolicyActionInfo CreateActionInfo() {
     38    public IBanditPolicyActionInfo CreateActionInfo() {
    3639      return new MeanAndVariancePolicyActionInfo();
    3740    }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/UCBNormalPolicy.cs

    r11732 r11742  
    66using System.Threading.Tasks;
    77
    8 namespace HeuristicLab.Algorithms.Bandits {
    9   public class UCBNormalPolicy : IPolicy {
     8namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
     9  public class UCBNormalPolicy : IBanditPolicy {
    1010
    11     public int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos) {
    12       var myActionInfos = actionInfos.OfType<MeanAndVariancePolicyActionInfo>().ToArray(); // TODO: performance
     11    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
     12      var myActionInfos = actionInfos.OfType<MeanAndVariancePolicyActionInfo>();
    1313      int bestAction = -1;
    1414      double bestQ = double.NegativeInfinity;
    1515      int totalTries = myActionInfos.Where(a => !a.Disabled).Sum(a => a.Tries);
     16      int aIdx = -1;
     17      foreach (var aInfo in myActionInfos) {
     18        aIdx++;
     19        if (aInfo.Disabled) continue;
     20        if (totalTries <= 1 || aInfo.Tries <= 1 || aInfo.Tries <= Math.Ceiling(8 * Math.Log(totalTries))) return aIdx;
    1621
    17       for (int a = 0; a < myActionInfos.Length; a++) {
    18         if (myActionInfos[a].Disabled) continue;
    19         if (totalTries <= 1 || myActionInfos[a].Tries <= 1 || myActionInfos[a].Tries <= Math.Ceiling(8 * Math.Log(totalTries))) return a;
    20 
    21         var tries = myActionInfos[a].Tries;
    22         var avgReward = myActionInfos[a].AvgReward;
    23         var rewardVariance = myActionInfos[a].RewardVariance;
    24         var estVariance = 16 * rewardVariance * (Math.Log(totalTries - 1) / tries);
    25         if (estVariance < 0) estVariance = 0; // numerical problems
    26         var q = avgReward
    27           + Math.Sqrt(estVariance);
     22        var tries = aInfo.Tries;
     23        var avgReward = aInfo.AvgReward;
     24        var rewardVariance = aInfo.RewardVariance;
     25        var estVariance = 16.0 * rewardVariance * (Math.Log(totalTries - 1) / tries);
     26        var q = avgReward + Math.Sqrt(estVariance);
    2827        if (q > bestQ) {
    2928          bestQ = q;
    30           bestAction = a;
     29          bestAction = aIdx;
    3130        }
    3231      }
     
    3534    }
    3635
    37     public IPolicyActionInfo CreateActionInfo() {
     36    public IBanditPolicyActionInfo CreateActionInfo() {
    3837      return new MeanAndVariancePolicyActionInfo();
    3938    }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/UCTPolicy.cs

    r11732 r11742  
    55using System.Text;
    66using System.Threading.Tasks;
    7 
    8 namespace HeuristicLab.Algorithms.Bandits {
     7namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
    98  /* Kocsis et al. Bandit based Monte-Carlo Planning */
    10   public class UCTPolicy : IPolicy {
     9  public class UCTPolicy : IBanditPolicy {
    1110    private readonly double c;
    1211
     
    1615
    1716
    18     public int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos) {
    19       var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>().ToArray(); // TODO: performance
     17    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
     18      var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>();
    2019      int bestAction = -1;
    2120      double bestQ = double.NegativeInfinity;
    2221      int totalTries = myActionInfos.Where(a => !a.Disabled).Sum(a => a.Tries);
    2322
    24       for (int a = 0; a < myActionInfos.Length; a++) {
    25         if (myActionInfos[a].Disabled) continue;
    26         if (myActionInfos[a].Tries == 0) return a;
    27         var q = myActionInfos[a].SumReward / myActionInfos[a].Tries + 2 * c * Math.Sqrt(Math.Log(totalTries) / myActionInfos[a].Tries);
     23      int aIdx = -1;
     24      foreach (var aInfo in myActionInfos) {
     25        aIdx++;
     26        if (aInfo.Disabled) continue;
     27        if (aInfo.Tries == 0) return aIdx;
     28        var q = aInfo.SumReward / aInfo.Tries + 2.0 * c * Math.Sqrt(Math.Log(totalTries) / aInfo.Tries);
    2829        if (q > bestQ) {
    2930          bestQ = q;
    30           bestAction = a;
     31          bestAction = aIdx;
    3132        }
    3233      }
     
    3536    }
    3637
    37     public IPolicyActionInfo CreateActionInfo() {
     38    public IBanditPolicyActionInfo CreateActionInfo() {
    3839      return new DefaultPolicyActionInfo();
    3940    }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/HeuristicLab.Algorithms.Bandits.csproj

    r11732 r11742  
    4444  <ItemGroup>
    4545    <Compile Include="BanditHelper.cs" />
     46    <Compile Include="BanditPolicies\BernoulliPolicyActionInfo.cs" />
     47    <Compile Include="BanditPolicies\BernoulliThompsonSamplingPolicy.cs" />
     48    <Compile Include="BanditPolicies\BoltzmannExplorationPolicy.cs" />
     49    <Compile Include="BanditPolicies\ChernoffIntervalEstimationPolicy.cs" />
     50    <Compile Include="BanditPolicies\DefaultPolicyActionInfo.cs" />
     51    <Compile Include="BanditPolicies\EpsGreedyPolicy.cs" />
     52    <Compile Include="BanditPolicies\GaussianThompsonSamplingPolicy.cs" />
     53    <Compile Include="BanditPolicies\GenericThompsonSamplingPolicy.cs" />
     54    <Compile Include="BanditPolicies\MeanAndVariancePolicyActionInfo.cs" />
     55    <Compile Include="BanditPolicies\ModelPolicyActionInfo.cs" />
     56    <Compile Include="BanditPolicies\RandomPolicy.cs" />
     57    <Compile Include="BanditPolicies\ThresholdAscentPolicy.cs" />
     58    <Compile Include="BanditPolicies\UCB1Policy.cs" />
     59    <Compile Include="BanditPolicies\UCB1TunedPolicy.cs" />
     60    <Compile Include="BanditPolicies\UCBNormalPolicy.cs" />
     61    <Compile Include="BanditPolicies\UCTPolicy.cs" />
    4662    <Compile Include="Bandits\BernoulliBandit.cs" />
    4763    <Compile Include="Bandits\GaussianBandit.cs" />
     
    4965    <Compile Include="Bandits\IBandit.cs" />
    5066    <Compile Include="Bandits\TruncatedNormalBandit.cs" />
     67    <Compile Include="GrammarPolicies\RandomPolicy.cs" />
     68    <Compile Include="IPolicy.cs" />
     69    <Compile Include="IBanditPolicy.cs" />
     70    <Compile Include="IBanditPolicyActionInfo.cs" />
    5171    <Compile Include="OnlineMeanAndVarianceEstimator.cs" />
    52     <Compile Include="IPolicyActionInfo.cs" />
    5372    <Compile Include="Models\BernoulliModel.cs" />
    5473    <Compile Include="Models\GaussianModel.cs" />
    5574    <Compile Include="Models\IModel.cs" />
    56     <Compile Include="Policies\BernoulliThompsonSamplingPolicy.cs">
    57       <SubType>Code</SubType>
    58     </Compile>
    59     <Compile Include="Policies\BoltzmannExplorationPolicy.cs">
    60       <SubType>Code</SubType>
    61     </Compile>
    62     <Compile Include="Policies\ChernoffIntervalEstimationPolicy.cs">
    63       <SubType>Code</SubType>
    64     </Compile>
    65     <Compile Include="Policies\BernoulliPolicyActionInfo.cs" />
    66     <Compile Include="Policies\ModelPolicyActionInfo.cs" />
    67     <Compile Include="Policies\EpsGreedyPolicy.cs">
    68       <SubType>Code</SubType>
    69     </Compile>
    70     <Compile Include="Policies\GaussianThompsonSamplingPolicy.cs">
    71       <SubType>Code</SubType>
    72     </Compile>
    73     <Compile Include="Policies\GenericThompsonSamplingPolicy.cs">
    74       <SubType>Code</SubType>
    75     </Compile>
    76     <Compile Include="Policies\MeanAndVariancePolicyActionInfo.cs" />
    77     <Compile Include="Policies\DefaultPolicyActionInfo.cs" />
    78     <Compile Include="Policies\EmptyPolicyActionInfo.cs" />
    79     <Compile Include="Policies\RandomPolicy.cs" />
    80     <Compile Include="Policies\UCB1Policy.cs" />
    81     <Compile Include="IPolicy.cs" />
    82     <Compile Include="Policies\UCB1TunedPolicy.cs">
    83       <SubType>Code</SubType>
    84     </Compile>
    85     <Compile Include="Policies\UCBNormalPolicy.cs">
    86       <SubType>Code</SubType>
    87     </Compile>
    88     <Compile Include="Policies\UCTPolicy.cs">
    89       <SubType>Code</SubType>
    90     </Compile>
    9175    <Compile Include="Properties\AssemblyInfo.cs" />
    9276  </ItemGroup>
     
    9579      <Project>{3A2FBBCB-F9DF-4970-87F3-F13337D941AD}</Project>
    9680      <Name>HeuristicLab.Common</Name>
     81    </ProjectReference>
     82    <ProjectReference Include="..\HeuristicLab.Problems.GrammaticalOptimization\HeuristicLab.Problems.GrammaticalOptimization.csproj">
     83      <Project>{cb9dccf6-667e-4a13-b82d-dbd6b45a045e}</Project>
     84      <Name>HeuristicLab.Problems.GrammaticalOptimization</Name>
    9785    </ProjectReference>
    9886  </ItemGroup>
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/IBanditPolicy.cs

    r11737 r11742  
    66
    77namespace HeuristicLab.Algorithms.Bandits {
    8   // this interface represents a policy for reinforcement learning
    9   public interface IPolicy {
    10     int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos);
    11     IPolicyActionInfo CreateActionInfo();
     8  // this interface represents a policy for bandit problems
     9  public interface IBanditPolicy {
     10    int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actions);
     11    IBanditPolicyActionInfo CreateActionInfo();
    1212  }
    1313}
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/IBanditPolicyActionInfo.cs

    r11737 r11742  
    66
    77namespace HeuristicLab.Algorithms.Bandits {
    8   public interface IPolicyActionInfo {
     8  public interface IBanditPolicyActionInfo {
    99    bool Disabled { get; }
     10    double Value { get; }
     11    int Tries { get; }
    1012    void UpdateReward(double reward);
    1113    void Disable();
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Models/BernoulliModel.cs

    r11732 r11742  
    99namespace HeuristicLab.Algorithms.Bandits.Models {
    1010  public class BernoulliModel : IModel {
     11
    1112    private int success;
    1213    private int failure;
     
    4748      return new BernoulliModel() { failure = this.failure, success = this.success };
    4849    }
     50
     51    public override string ToString() {
     52      return string.Format("Bernoulli with Beta prior: mu={0:F2}", (success + alpha) / (success + alpha + failure + beta));
     53    }
    4954  }
    5055}
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Models/GaussianModel.cs

    r11732 r11742  
    77  // 2) unknown mean and unknown variance
    88  public class GaussianModel : IModel {
     9
    910    private OnlineMeanAndVarianceEstimator estimator = new OnlineMeanAndVarianceEstimator();
    1011
     
    8283      }
    8384
     85
    8486      // sample from the posterior marginal for mu (expected value) equ. 91
    8587      // p(µ|D) = T2αn (µ| µn, βn/(αnκn))
    8688
    87       // sample from Tk distribution : http://stats.stackexchange.com/a/70270
    8889      var t2alpha = alglib.invstudenttdistribution((int)(2 * posteriorAlpha), random.NextDouble());
    8990
     
    9192      return theta;
    9293
    93       //return alglib.invnormaldistribution(random.NextDouble()) * + theta;
    94       //return alglib.invstudenttdistribution((int)(2 * posteriorAlpha), 0.99) * (posteriorBeta*posteriorK + posteriorBeta) / (posteriorAlpha*posteriorK) + posteriorMean;
     94
     95      /*
     96       * value function : 0.99-quantile
     97      // sample posterior mean and posterior variance independently
     98      var sampledPrec = Rand.GammaRand(random, posteriorAlpha) * posteriorBeta;
     99      var t2alpha = alglib.invstudenttdistribution((int)(2 * posteriorAlpha), random.NextDouble());
     100      var sampledMean = t2alpha * posteriorBeta / (posteriorAlpha * posteriorK) + posteriorMean;
     101
     102      return alglib.invnormaldistribution(0.99) / Math.Sqrt(sampledPrec) + sampledMean;
     103       */
    95104    }
    96105
     
    114123        return new GaussianModel(meanPriorMu, meanPriorVariance, precisionPriorAlpha, precisionPriorBeta);
    115124    }
     125
     126    public override string ToString() {
     127      if (knownVariance) {
     128        var posteriorMeanVariance = 1.0 / (estimator.N / rewardVariance + 1.0 / meanPriorVariance);
     129        var posteriorMeanMean = posteriorMeanVariance * (meanPriorMu / meanPriorVariance + estimator.Sum / rewardVariance);
     130        return string.Format("Gaussian(mu, var=0.1), mu ~ Gaussian(mu'={0:F3}, var'={1:F3})", posteriorMeanMean, posteriorMeanVariance);
     131      } else {
     132        var posteriorMean = (priorK * meanPriorMu + estimator.Sum) / (priorK + estimator.N);
     133        var posteriorK = priorK + estimator.N;
     134        var posteriorAlpha = precisionPriorAlpha + estimator.N / 2.0;
     135        double posteriorBeta;
     136        if (estimator.N > 0) {
     137          posteriorBeta = precisionPriorBeta + 0.5 * estimator.N * estimator.Variance + priorK * estimator.N * Math.Pow(estimator.Avg - meanPriorMu, 2) / (2.0 * (priorK + estimator.N));
     138        } else {
     139          posteriorBeta = precisionPriorBeta;
     140        }
     141        var nu = (int)(2 * posteriorAlpha);
     142        var meanVariance = posteriorBeta / (posteriorAlpha * posteriorK) * (nu / (double)(nu - 2));
     143        return string.Format("Gaussian(mu, var), mu ~ T{0}(mu'={1:F3}, var'={2:F3}), 1.0/var ~ Gamma(mu={3:F3}, var={4:F3})",
     144          nu, posteriorMean, meanVariance,
     145          posteriorAlpha / posteriorBeta, posteriorAlpha / (posteriorBeta * posteriorBeta));
     146      }
     147    }
    116148  }
    117149}
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/AlternativesContextSampler.cs

    r11732 r11742  
    1717    private readonly Random random;
    1818    private readonly int contextLen;
    19     private readonly IPolicy policy;
     19    private readonly IBanditPolicy policy;
    2020
    21     public AlternativesContextSampler(IProblem problem, Random random, int maxLen, int contextLen, IPolicy policy) {
     21    public AlternativesContextSampler(IProblem problem, Random random, int maxLen, int contextLen, IBanditPolicy policy) {
    2222      this.maxLen = maxLen;
    2323      this.problem = problem;
     
    4545
    4646
    47     private Dictionary<string, IPolicyActionInfo[]> contextActionInfos;
     47    private Dictionary<string, IBanditPolicyActionInfo[]> contextActionInfos;
    4848    private List<Tuple<string, int>> updateChain;
    4949
    5050    private void InitPolicies(IGrammar grammar) {
    51       this.contextActionInfos = new Dictionary<string, IPolicyActionInfo[]>();
     51      this.contextActionInfos = new Dictionary<string, IBanditPolicyActionInfo[]>();
    5252      this.updateChain = new List<Tuple<string, int>>();
    5353    }
     
    8282          var endIdx = Math.Min(startIdx + contextLen, ntIdx);
    8383          var lft = phrase.Subsequence(startIdx, endIdx - startIdx + 1).ToString();
    84           lft = problem.Hash(lft);
     84          lft = problem.CanonicalRepresentation(lft);
    8585          if (!contextActionInfos.ContainsKey(lft)) {
    8686            contextActionInfos.Add(lft, g.GetAlternatives(nt).Select(_ => policy.CreateActionInfo()).ToArray());
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/AlternativesSampler.cs

    r11732 r11742  
    1616    private readonly Random random;
    1717    private readonly IProblem problem;
    18     private readonly IPolicy policy;
     18    private readonly IBanditPolicy policy;
    1919
    20     public AlternativesSampler(IProblem problem, IPolicy policy, int maxLen) {
     20    public AlternativesSampler(IProblem problem, IBanditPolicy policy, int maxLen) {
    2121      this.problem = problem;
    2222      this.maxLen = maxLen;
     
    4343
    4444
    45     private Dictionary<char, IPolicyActionInfo[]> ntActionInfos;
     45    private Dictionary<char, IBanditPolicyActionInfo[]> ntActionInfos;
    4646    private List<Tuple<char, int>> updateChain;
    4747
    4848    private void InitPolicies(IGrammar grammar) {
    49       this.ntActionInfos = new Dictionary<char, IPolicyActionInfo[]>();
     49      this.ntActionInfos = new Dictionary<char, IBanditPolicyActionInfo[]>();
    5050      this.updateChain = new List<Tuple<char, int>>();
    5151      foreach (var nt in grammar.NonTerminalSymbols) {
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization.csproj

    r11732 r11742  
    4646    <Compile Include="AlternativesContextSampler.cs" />
    4747    <Compile Include="ExhaustiveRandomFirstSearch.cs" />
     48    <Compile Include="MctsContextualSampler.cs">
     49      <SubType>Code</SubType>
     50    </Compile>
    4851    <Compile Include="MctsSampler.cs" />
    4952    <Compile Include="ExhaustiveDepthFirstSearch.cs" />
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/MctsSampler.cs

    r11732 r11742  
    1212      public string ident;
    1313      public int randomTries;
    14       public int policyTries;
    15       public IPolicyActionInfo actionInfo;
     14      public IBanditPolicyActionInfo actionInfo;
    1615      public TreeNode[] children;
    1716      public bool done = false;
     
    2221
    2322      public override string ToString() {
    24         return string.Format("Node({0} tries: {1}, done: {2}, policy: {3})", ident, randomTries + policyTries, done, actionInfo);
     23        return string.Format("Node({0} tries: {1}, done: {2}, policy: {3})", ident, actionInfo.Tries, done, actionInfo);
    2524      }
    2625    }
     
    3433    private readonly Random random;
    3534    private readonly int randomTries;
    36     private readonly IPolicy policy;
     35    private readonly IBanditPolicy policy;
    3736
    3837    private List<TreeNode> updateChain;
     
    4746    // }
    4847
    49     public MctsSampler(IProblem problem, int maxLen, Random random, int randomTries, IPolicy policy) {
     48    public MctsSampler(IProblem problem, int maxLen, Random random, int randomTries, IBanditPolicy policy) {
    5049      this.maxLen = maxLen;
    5150      this.problem = problem;
     
    7877    public void PrintStats() {
    7978      var n = rootNode;
    80       Console.WriteLine("depth: {0,5} size: {1,10} root tries {2,10}", treeDepth, treeSize, rootNode.policyTries + rootNode.randomTries);
     79      Console.WriteLine("depth: {0,5} size: {1,10} root tries {2,10}", treeDepth, treeSize, n.actionInfo.Tries);
    8180      while (n.children != null) {
    8281        Console.WriteLine();
    8382        Console.WriteLine("{0,5}->{1,-50}", n.ident, string.Join(" ", n.children.Select(ch => string.Format("{0,4}", ch.ident))));
    84         Console.WriteLine("{0,5}  {1,-50}", string.Empty, string.Join(" ", n.children.Select(ch => string.Format("{0,4}", ch.randomTries + ch.policyTries))));
     83        Console.WriteLine("{0,5}  {1,-50}", string.Empty, string.Join(" ", n.children.Select(ch => string.Format("{0,4:F2}", ch.actionInfo.Value * 10))));
     84        Console.WriteLine("{0,5}  {1,-50}", string.Empty, string.Join(" ", n.children.Select(ch => string.Format("{0,4}", ch.done ? "X" : ch.actionInfo.Tries.ToString()))));
    8585        //n.policy.PrintStats();
    86         n = n.children.OrderByDescending(c => c.policyTries).First();
     86        n = n.children.Where(ch => !ch.done).OrderByDescending(c => c.actionInfo.Value).First();
    8787      }
    8888      Console.ReadLine();
     
    108108      if (g.MinPhraseLength(phrase) > maxLen) throw new ArgumentException();
    109109      TreeNode n = rootNode;
    110       bool done = phrase.IsTerminal;
    111110      var curDepth = 0;
    112       while (!done) {
     111      while (!phrase.IsTerminal) {
    113112        updateChain.Add(n);
    114113
     
    127126          if (n.randomTries == randomTries && n.children == null) {
    128127            n.children = alts.Select(alt => new TreeNode(alt.ToString())).ToArray(); // create a new node for each alternative
    129             //n.children = alts.Select(alt => new TreeNode(string.Empty)).ToArray(); // create a new node for each alternative
    130128            foreach (var ch in n.children) ch.actionInfo = policy.CreateActionInfo();
    131129            treeSize += n.children.Length;
    132130          }
    133           n.policyTries++;
    134131          // => select using bandit policy
    135132          int selectedAltIdx = policy.SelectAction(random, n.children.Select(c => c.actionInfo));
     
    140137
    141138          curDepth++;
    142 
    143           done = phrase.IsTerminal;
    144139
    145140          // prepare for next iteration
     
    153148      // the last node is a leaf node (sentence is done), so we never need to visit this node again
    154149      n.done = true;
    155       n.actionInfo.Disable();
    156150
    157151      treeDepth = Math.Max(treeDepth, curDepth);
     
    165159      foreach (var e in updateChain) {
    166160        var node = e;
     161        if (node.done) node.actionInfo.Disable();
    167162        if (node.children != null && node.children.All(c => c.done)) {
    168163          node.done = true;
     
    171166        if (!node.done) {
    172167          node.actionInfo.UpdateReward(reward);
    173           //policy.UpdateReward(action, reward / updateChain.Count);
    174168        }
    175169      }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Common/Extensions.cs

    r11732 r11742  
    88  public static class Extensions {
    99    public static bool IsAlmost(this double x, double y) {
    10       return Math.Abs(x - y) < 1.0e-6;
     10      return Math.Abs(x - y) < 1.0e-12;
    1111    }
    1212
     
    3838      var ssX = 0.0;
    3939      var ssY = 0.0;
    40       foreach (var p in xs.Zip(ys, (x, y) => new { x, y })) {
    41         s += (p.x - meanX) * (p.y - meanY);
    42         ssX += Math.Pow(p.x - meanX, 2);
    43         ssY += Math.Pow(p.y - meanY, 2);
     40      var xEnum = xs.GetEnumerator();
     41      var yEnum = ys.GetEnumerator();
     42      while (xEnum.MoveNext() & yEnum.MoveNext()) {
     43        var x = xEnum.Current;
     44        var y = yEnum.Current;
     45        s += (x - meanX) * (y - meanY);
     46        ssX += Math.Pow(x - meanX, 2);
     47        ssY += Math.Pow(y - meanY, 2);
    4448      }
     49      if (xEnum.MoveNext() | yEnum.MoveNext()) throw new ArgumentException("lengths are not equal");
    4550
    4651      if (s.IsAlmost(0)) return 0;
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization.SymbReg/SymbolicRegressionProblem.cs

    r11732 r11742  
    7474
    7575    // right now only + and * is supported
    76     public string Hash(string terminalPhrase) {
     76    public string CanonicalRepresentation(string terminalPhrase) {
    7777      return terminalPhrase;
    7878      //var terms = terminalPhrase.Split('+');
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization.Test/TestBanditPolicies.cs

    r11732 r11742  
    44using System.Globalization;
    55using HeuristicLab.Algorithms.Bandits;
     6using HeuristicLab.Algorithms.Bandits.BanditPolicies;
    67using HeuristicLab.Algorithms.Bandits.Models;
    78using Microsoft.VisualStudio.TestTools.UnitTesting;
     
    1617      var nArms = 20;
    1718
    18       // Console.WriteLine("Threshold Ascent (20)"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new ThresholdAscent(20, 0.01));
    19       // Console.WriteLine("Threshold Ascent (100)"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new ThresholdAscent(100, 0.01));
    20       // Console.WriteLine("Threshold Ascent (500)"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new ThresholdAscent(500, 0.01));
    21       // Console.WriteLine("Threshold Ascent (1000)"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new ThresholdAscent(1000, 0.01));
    22       Console.WriteLine("Thompson (Gaussian fixed variance)"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0, 1, 1)));
    23       Console.WriteLine("Thompson (Gaussian est variance)"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0, 1, 1, 0.1)));
     19      // ThresholdAscent only works for rewards in [0..1] so far
     20
     21      Console.WriteLine("Thompson (Gaussian est variance)"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0, 1, 1, 1)));
     22      Console.WriteLine("Thompson (Gaussian fixed variance)"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0, 1, 0.1)));
    2423      Console.WriteLine("GaussianThompson (compat)"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new GaussianThompsonSamplingPolicy(true));
    2524      Console.WriteLine("GaussianThompson"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new GaussianThompsonSamplingPolicy());
     
    9190      var randSeed = 31415;
    9291      var nArms = 20;
     92      Console.WriteLine("Threshold Ascent (20)"); TestPolicyGaussian(randSeed, nArms, new ThresholdAscentPolicy(20, 0.01));
     93      Console.WriteLine("Threshold Ascent (100)"); TestPolicyGaussian(randSeed, nArms, new ThresholdAscentPolicy(100, 0.01));
     94      Console.WriteLine("Threshold Ascent (500)"); TestPolicyGaussian(randSeed, nArms, new ThresholdAscentPolicy(500, 0.01));
     95      Console.WriteLine("Threshold Ascent (1000)"); TestPolicyGaussian(randSeed, nArms, new ThresholdAscentPolicy(1000, 0.01));
     96      Console.WriteLine("Generic Thompson (Gaussian fixed var)"); TestPolicyGaussian(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 1)));
     97      Console.WriteLine("Generic Thompson (Gaussian unknown var)"); TestPolicyGaussian(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 1, 1, 1)));
    9398      Console.WriteLine("Thompson (Gaussian orig)"); TestPolicyGaussian(randSeed, nArms, new GaussianThompsonSamplingPolicy(true));
    9499      Console.WriteLine("Thompson (Gaussian new)"); TestPolicyGaussian(randSeed, nArms, new GaussianThompsonSamplingPolicy());
    95       Console.WriteLine("Generic Thompson (Gaussian)"); TestPolicyGaussian(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 1)));
     100
    96101      /*
    97102      Console.WriteLine("Random"); TestPolicyNormal(randSeed, nArms, new RandomPolicy(new Random(seedForPolicy), nArms));
     
    123128      Console.WriteLine("ChernoffIntervalEstimationPolicy(0.01)"); TestPolicyNormal(randSeed, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.01));
    124129      Console.WriteLine("ChernoffIntervalEstimationPolicy(0.05)"); TestPolicyNormal(randSeed, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.05));
    125       Console.WriteLine("ChernoffIntervalEstimationPolicy(0.1) "); TestPolicyNormal(randSeed, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.1));
     130      Console.WriteLine("ChernoffIntervalEstimationPolicy(0.1) "); TestPolicyNormal(randSeed, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.1));     
    126131      Console.WriteLine("ThresholdAscent(10,0.01)  "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 10, 0.01));
    127132      Console.WriteLine("ThresholdAscent(10,0.05)  "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 10, 0.05));
     
    141146      var randSeed = 31415;
    142147      var nArms = 20;
     148      Console.WriteLine("Threshold Ascent (20)"); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(20, 0.01));
     149      Console.WriteLine("Threshold Ascent (100)"); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(100, 0.01));
     150      Console.WriteLine("Threshold Ascent (500)"); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(500, 0.01));
     151      Console.WriteLine("Threshold Ascent (1000)"); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(1000, 0.01));
    143152      Console.WriteLine("Thompson (Gaussian orig)"); TestPolicyGaussianMixture(randSeed, nArms, new GaussianThompsonSamplingPolicy(true));
    144153      Console.WriteLine("Thompson (Gaussian new)"); TestPolicyGaussianMixture(randSeed, nArms, new GaussianThompsonSamplingPolicy());
    145       Console.WriteLine("Generic Thompson (Gaussian)"); TestPolicyGaussianMixture(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 1)));
     154      Console.WriteLine("Generic Thompson (Gaussian fixed variance)"); TestPolicyGaussianMixture(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 1, 0.1)));
     155      Console.WriteLine("Generic Thompson (Gaussian unknown variance)"); TestPolicyGaussianMixture(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 1, 1, 1)));
    146156
    147157      /*
     
    167177
    168178
    169     private void TestPolicyBernoulli(int randSeed, int nArms, IPolicy policy) {
     179    private void TestPolicyBernoulli(int randSeed, int nArms, IBanditPolicy policy) {
    170180      TestPolicy(randSeed, nArms, policy, (banditRandom, nActions) => new BernoulliBandit(banditRandom, nActions));
    171181    }
    172     private void TestPolicyGaussian(int randSeed, int nArms, IPolicy policy) {
     182    private void TestPolicyGaussian(int randSeed, int nArms, IBanditPolicy policy) {
    173183      TestPolicy(randSeed, nArms, policy, (banditRandom, nActions) => new TruncatedNormalBandit(banditRandom, nActions));
    174184    }
    175     private void TestPolicyGaussianMixture(int randSeed, int nArms, IPolicy policy) {
     185    private void TestPolicyGaussianMixture(int randSeed, int nArms, IBanditPolicy policy) {
    176186      TestPolicy(randSeed, nArms, policy, (banditRandom, nActions) => new GaussianMixtureBandit(banditRandom, nActions));
    177187    }
    178     private void TestPolicyGaussianUnknownVariance(int randSeed, int nArms, IPolicy policy) {
     188    private void TestPolicyGaussianUnknownVariance(int randSeed, int nArms, IBanditPolicy policy) {
    179189      TestPolicy(randSeed, nArms, policy, (banditRandom, nActions) => new GaussianBandit(banditRandom, nActions));
    180190    }
    181191
    182192
    183     private void TestPolicy(int randSeed, int nArms, IPolicy policy, Func<Random, int, IBandit> banditFactory) {
     193    private void TestPolicy(int randSeed, int nArms, IBanditPolicy policy, Func<Random, int, IBandit> banditFactory) {
    184194      var maxIt = 1E5;
    185195      var reps = 10; // independent runs
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/EvenParityProblem.cs

    r11732 r11742  
    5151    }
    5252
    53     public string Hash(string terminalPhrase) {
     53    public string CanonicalRepresentation(string terminalPhrase) {
    5454      return terminalPhrase;
    5555    }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/HardPalindromeProblem.cs

    r11732 r11742  
    3939    }
    4040
    41     public string Hash(string terminalPhrase) {
     41    public string CanonicalRepresentation(string terminalPhrase) {
    4242      return terminalPhrase;
    4343    }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/IProblem.cs

    r11732 r11742  
    99    IGrammar Grammar { get; }
    1010    double Evaluate(string sentence);
    11     string Hash(string terminalPhrase);
     11    string CanonicalRepresentation(string terminalPhrase);
    1212  }
    1313}
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/PalindromeProblem.cs

    r11732 r11742  
    8080    }
    8181
    82     public string Hash(string terminalPhrase) {
     82    public string CanonicalRepresentation(string terminalPhrase) {
    8383      return terminalPhrase;
    8484    }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/ReadonlySequence.cs

    r11732 r11742  
    44  public class ReadonlySequence : Sequence {
    55    public ReadonlySequence(string s)
    6       : base(s) {
     6      : base(s, s.Length) {
    77    }
    88
    99    public ReadonlySequence(char ch)
    10       : base(ch) {
     10      : base(ch, 1) {
    1111    }
    1212
    1313    public ReadonlySequence(Sequence s)
    14       : base(s) {
     14      : base(s, s.Length) {
    1515    }
    1616
     
    3232    public override int GetHashCode() {
    3333      int h = 31 * Length;
    34       foreach (var ch in this) { h += 31 * (byte)h; }
     34      for (int i = 0; i < Length; i++) {
     35        h += 31 * (byte)this[i];
     36      }
    3537      return h;
    3638    }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/RoyalPairProblem.cs

    r11732 r11742  
    3434    }
    3535
    36     public string Hash(string terminalPhrase) {
     36    public string CanonicalRepresentation(string terminalPhrase) {
    3737      return terminalPhrase;
    3838    }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/RoyalRoadProblem.cs

    r11732 r11742  
    2929      throw new NotImplementedException();
    3030    }
    31     public string Hash(string terminalPhrase) {
     31    public string CanonicalRepresentation(string terminalPhrase) {
    3232      return terminalPhrase;
    3333    }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/RoyalSymbolProblem.cs

    r11732 r11742  
    3333      return regex.Matches(sentence).Count;
    3434    }
    35     public string Hash(string terminalPhrase) {
     35    public string CanonicalRepresentation(string terminalPhrase) {
    3636      return terminalPhrase;
    3737    }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/RoyalTreeProblem.cs

    r11732 r11742  
    2929      throw new NotImplementedException();
    3030    }
    31     public string Hash(string terminalPhrase) {
     31    public string CanonicalRepresentation(string terminalPhrase) {
    3232      return terminalPhrase;
    3333    }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/SantaFeAntProblem.cs

    r11732 r11742  
    9898    }
    9999
    100     public string Hash(string terminalPhrase) {
     100    public string CanonicalRepresentation(string terminalPhrase) {
    101101      return terminalPhrase.Replace("rl", "").Replace("lr", "");
    102102    }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/Sequence.cs

    r11732 r11742  
    4646    }
    4747
    48     private Sequence() {
    49       this.symbols = new char[maxIdx + 1];
     48    private Sequence(int maxLength) {
     49      this.symbols = new char[maxLength];
    5050    }
    5151
    5252    // create a sequence from a character
    5353    public Sequence(char ch)
    54       : this() {
     54      : this(ch, maxIdx + 1) {
     55    }
     56
     57    protected Sequence(char ch, int maxLength)
     58      : this(maxLength) {
    5559      this.len = 1;
    5660      symbols[0] = ch;
     
    6165
    6266    // create a sequence from a string
    63     public Sequence(string s)
    64       : this() {
     67    public Sequence(string s) : this(s, maxIdx + 1) { }
     68    protected Sequence(string s, int maxLength)
     69      : this(maxLength) {
    6570      if (string.IsNullOrEmpty(s)) throw new ArgumentException();
    6671      if (s.Length > (maxIdx + 1)) throw new ArgumentException();
     
    7782
    7883    // cloning ctor
    79     public Sequence(Sequence original)
    80       : this() {
     84    public Sequence(Sequence original) : this(original, maxIdx + 1) { }
     85    protected Sequence(Sequence original, int maxLength)
     86      : this(maxLength) {
    8187      this.len = original.len;
    8288      Array.Copy(original.symbols, this.symbols, len);
     
    128134      if (startIdx >= this.len) throw new ArgumentException();
    129135      if (startIdx + len > this.len) throw new ArgumentException();
    130       var subsequence = new Sequence { len = len };
     136      var subsequence = new Sequence(maxIdx + 1) { len = len };
    131137
    132138      Array.Copy(this.symbols, startIdx, subsequence.symbols, 0, len);
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/SymbolicRegressionPoly10Problem.cs

    r11732 r11742  
    2121
    2222    private readonly IGrammar grammar;
    23     private readonly ExpressionInterpreter interpreter;
    2423
    2524    private readonly int N;
     
    2928    public SymbolicRegressionPoly10Problem() {
    3029      this.grammar = new Grammar(grammarString);
    31       this.interpreter = new ExpressionInterpreter();
    3230
    3331      this.N = 500;
     
    6765
    6866    public double Evaluate(string sentence) {
     67      var interpreter = new ExpressionInterpreter();
    6968      return HeuristicLab.Common.Extensions.RSq(y, Enumerable.Range(0, N).Select(i => interpreter.Interpret(sentence, x[i])).ToArray());
    7069    }
     
    7372
    7473    // right now only + and * is supported
    75     public string Hash(string terminalPhrase) {
     74    public string CanonicalRepresentation(string terminalPhrase) {
    7675      var terms = terminalPhrase.Split('+');
    7776      return string.Join("+", terms.Select(term => string.Join("", term.Replace("*", "").OrderBy(ch => ch)))
  • branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Program.cs

    r11732 r11742  
    88using System.Threading.Tasks;
    99using HeuristicLab.Algorithms.Bandits;
     10using HeuristicLab.Algorithms.Bandits.BanditPolicies;
    1011using HeuristicLab.Algorithms.Bandits.Models;
    1112using HeuristicLab.Algorithms.GrammaticalOptimization;
     
    2627      //var globalRandom = new Random(31415);
    2728      var localRandSeed = 31415;
    28       var reps = 20;
    29 
    30       var policies = new Func<IPolicy>[]
     29      var reps = 8;
     30
     31      var policies = new Func<IBanditPolicy>[]
    3132        {
    32          () => new GaussianThompsonSamplingPolicy(),
     33         () => new EpsGreedyPolicy(0.01, (aInfo)=> aInfo.MaxReward, "max"),
     34         () => new EpsGreedyPolicy(0.05, (aInfo)=> aInfo.MaxReward, "max"),
     35         () => new EpsGreedyPolicy(0.1, (aInfo)=> aInfo.MaxReward, "max"),
     36         () => new EpsGreedyPolicy(0.2, (aInfo)=> aInfo.MaxReward, "max"),
     37         //() => new GaussianThompsonSamplingPolicy(),
    3338         () => new GaussianThompsonSamplingPolicy(true),
    34          () => new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 1)),
    35          () => new BernoulliThompsonSamplingPolicy(),
     39         () => new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 10, 1)),
     40         () => new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 10, 1, 1)),
     41         //() => new BernoulliThompsonSamplingPolicy(),
    3642         () => new GenericThompsonSamplingPolicy(new BernoulliModel(1, 1)),
    3743         () => new RandomPolicy(),
     
    6167         () => new ChernoffIntervalEstimationPolicy( 0.1),
    6268         () => new ChernoffIntervalEstimationPolicy( 0.2),
    63          // (rand) => new ThresholdAscentPolicy(10, 0.01),
    64          // (rand) => new ThresholdAscentPolicy(10, 0.05),
    65          // (rand) => new ThresholdAscentPolicy(10, 0.1),
    66          // (rand) => new ThresholdAscentPolicy(10, 0.2),
    67          // (rand) => new ThresholdAscentPolicy(100, 0.01),
    68          // (rand) => new ThresholdAscentPolicy(100, 0.05),
    69          // (rand) => new ThresholdAscentPolicy(100, 0.1),
    70          // (rand) => new ThresholdAscentPolicy(100, 0.2),
    71          // (rand) => new ThresholdAscentPolicy(1000, 0.01),
    72          // (rand) => new ThresholdAscentPolicy(1000, 0.05),
    73          // (rand) => new ThresholdAscentPolicy(1000, 0.1),
    74          // (rand) => new ThresholdAscentPolicy(1000, 0.2),
    75          // (rand) => new ThresholdAscentPolicy(5000, 0.01),
    76          // (rand) => new ThresholdAscentPolicy(10000, 0.01),
     69         () => new ThresholdAscentPolicy(10, 0.01),
     70         () => new ThresholdAscentPolicy(10, 0.05),
     71         () => new ThresholdAscentPolicy(10, 0.1),
     72         () => new ThresholdAscentPolicy(10, 0.2),
     73         () => new ThresholdAscentPolicy(100, 0.01),
     74         () => new ThresholdAscentPolicy(100, 0.05),
     75         () => new ThresholdAscentPolicy(100, 0.1),
     76         () => new ThresholdAscentPolicy(100, 0.2),
     77         () => new ThresholdAscentPolicy(1000, 0.01),
     78         () => new ThresholdAscentPolicy(1000, 0.05),
     79         () => new ThresholdAscentPolicy(1000, 0.1),
     80         () => new ThresholdAscentPolicy(1000, 0.2),
     81         () => new ThresholdAscentPolicy(5000, 0.01),
     82         () => new ThresholdAscentPolicy(10000, 0.01),
    7783        };
    7884
    7985      foreach (var problem in new Tuple<IProblem, int>[]
    8086        {
    81           Tuple.Create((IProblem)new SantaFeAntProblem(), 17),
     87          //Tuple.Create((IProblem)new SantaFeAntProblem(), 17),
    8288          Tuple.Create((IProblem)new SymbolicRegressionPoly10Problem(), 23),
    8389        })
     
    8793            var localRand = new Random(localRandSeed);
    8894            var options = new ParallelOptions();
    89             options.MaxDegreeOfParallelism = 1;
     95            options.MaxDegreeOfParallelism = 4;
    9096            Parallel.For(0, reps, options, (i) => {
    9197              //var t = Task.Run(() => {
     
    132138    private static void RunDemo() {
    133139      // TODO: test with eps-greedy using max instead of average as value (seems to work well for symb-reg! explore further!)
    134       // TODO: implement GaussianWithUnknownMeanAndVariance Model for Thompson Sampling (verify with unit test if correct mean and variance is identified)
    135140      // TODO: separate value function from policy
    136       // TODO: debug and verify implementation variants of Gaussian Thompson Sampling with unit test
    137       // TODO: refactor Policies to use banditInfos (policies are factories for bandit infos and bandit info only has an update routine, each policy works only with it's type of banditinfo)
    138141      // TODO: in contextual MCTS store a bandit info for each node in the _graph_ and also update all bandit infos of all parents
    139142      // TODO: exhaustive search with priority list
    140       // TODO: warum funktioniert die alte Implementierung von GaussianThompson besser für SantaFe als alte? Siehe Vergleich: alte vs. neue implementierung GaussianThompsonSampling
     143      // TODO: warum funktioniert die alte Implementierung von GaussianThompson besser für SantaFe als neue? Siehe Vergleich: alte vs. neue implementierung GaussianThompsonSampling
    141144      // TODO: why does GaussianThompsonSampling work so well with MCTS for the artificial ant problem?
    142145      // TODO: wie kann ich sampler noch vergleichen bzw. was kann man messen um die qualität des samplers abzuschätzen (bis auf qualität und iterationen bis zur besten lösung) => ziel schnellere iterationen zu gutem ergebnis
    143       // TODO: likelihood für R=1 bei Gaussian oder GaussianMixture einfach berechenbar?
    144146      // TODO: research thompson sampling for max bandit?
    145147      // TODO: ausführlicher test von strategien für k-armed max bandit
    146148      // TODO: verify TA implementation using example from the original paper     
    147       // TODO: compare results for different policies also for the symb-reg problem
    148149      // TODO: separate policy from MCTS tree data structure to allow sharing of information over disconnected parts of the tree (semantic equivalence)
    149150      // TODO: implement thompson sampling for gaussian mixture models
    150151      // TODO: implement inspection for MCTS (eventuell interactive command line für statistiken aus dem baum anzeigen)
    151152      // TODO: implement ACO-style bandit policy
    152       // TODO: implement sequences that can be manipulated in-place (instead of strings), alternatives are also stored as sequences, for a sequence the index of the first NT-symb can be stored
    153153      // TODO: gleichzeitige modellierung von transformierter zielvariable (y, 1/y, log(y), exp(y), sqrt(y), ...)
    154154      // TODO: vergleich bei complete-randomly möglichst kurze sätze generieren vs. einfach zufällig alternativen wählen
     
    165165      var random = new Random();
    166166
    167       var problem = new SymbolicRegressionPoly10Problem();
    168       //var problem = new SantaFeAntProblem(); // good results e.g. with       var alg = new MctsSampler(problem, 17, random, 1, (rand, numActions) => new ThresholdAscentPolicy(numActions, 500, 0.01));
    169       //var problem = new SymbolicRegressionProblem("Tower"); // very good results e.g. new EpsGreedyPolicy(0.2) using max reward as quality !!!
     167      var problem = new SymbolicRegressionPoly10Problem();   // good results e.g. 10 randomtries and EpsGreedyPolicy(0.2, (aInfo)=>aInfo.MaxReward)
     168      // Ant
     169      // good results e.g. with       var alg = new MctsSampler(problem, 17, random, 1, (rand, numActions) => new ThresholdAscentPolicy(numActions, 500, 0.01));
     170      // GaussianModelWithUnknownVariance (and Q= 0.99-quantil) also works well for Ant
     171      //var problem = new SantaFeAntProblem();
     172      //var problem = new SymbolicRegressionProblem("Tower");
    170173      //var problem = new PalindromeProblem();
    171174      //var problem = new HardPalindromeProblem();
    172175      //var problem = new RoyalPairProblem();
    173176      //var problem = new EvenParityProblem();
    174       var alg = new MctsSampler(problem, 23, random, 10, new EpsGreedyPolicy(0.2)); // GaussianModelWithUnknownVariance (and Q= 0.99-quantil) works well for Ant
     177      var alg = new MctsSampler(problem, 25, random, 0, new GaussianThompsonSamplingPolicy(true));
    175178      //var alg = new ExhaustiveBreadthFirstSearch(problem, 17);
    176179      //var alg = new AlternativesContextSampler(problem, random, 17, 4, (rand, numActions) => new RandomPolicy(rand, numActions));
Note: See TracChangeset for help on using the changeset viewer.