Ignore:
Timestamp:
01/07/15 09:21:46 (5 years ago)
Author:
gkronber
Message:

#2283: refactoring and bug fixes

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/GenericThompsonSamplingPolicy.cs

    r11730 r11732  
    88
    99namespace HeuristicLab.Algorithms.Bandits {
    10   public class GenericThompsonSamplingPolicy : BanditPolicy {
    11     private readonly Random random;
     10  public class GenericThompsonSamplingPolicy : IPolicy {
    1211    private readonly IModel model;
    1312
    14     public GenericThompsonSamplingPolicy(Random random, int numActions, IModel model)
    15       : base(numActions) {
    16       this.random = random;
     13    public GenericThompsonSamplingPolicy(IModel model) {
    1714      this.model = model;
    1815    }
    1916
    20     public override int SelectAction() {
    21       Debug.Assert(Actions.Any());
    22       var maxR = double.NegativeInfinity;
     17    public int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos) {
     18      var myActionInfos = actionInfos.OfType<ModelPolicyActionInfo>();
    2319      int bestAction = -1;
    24       var expRewards = model.SampleExpectedRewards(random);
    25       foreach (var a in Actions) {
    26         var r = expRewards[a];
    27         if (r > maxR) {
    28           maxR = r;
    29           bestAction = a;
     20      double bestQ = double.NegativeInfinity;
     21      var aIdx = -1;
     22      foreach (var aInfo in myActionInfos) {
     23        aIdx++;
     24        if (aInfo.Disabled) continue;
     25        //if (aInfo.Tries == 0) return aIdx;
     26        var q = aInfo.SampleExpectedReward(random);
     27        if (q > bestQ) {
     28          bestQ = q;
     29          bestAction = aIdx;
    3030        }
    3131      }
     32      Debug.Assert(bestAction > -1);
    3233      return bestAction;
    3334    }
    3435
    35     public override void UpdateReward(int action, double reward) {
    36       Debug.Assert(Actions.Contains(action));
    37 
    38       model.Update(action, reward);
    39     }
    40 
    41     public override void DisableAction(int action) {
    42       base.DisableAction(action);
    43       model.Disable(action);
    44     }
    45 
    46     public override void Reset() {
    47       base.Reset();
    48       model.Reset();
    49     }
    50 
    51     public override void PrintStats() {
    52       model.PrintStats();
     36    public IPolicyActionInfo CreateActionInfo() {
     37      return new ModelPolicyActionInfo((IModel)model.Clone());
    5338    }
    5439
Note: See TracChangeset for help on using the changeset viewer.