Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/GenericThompsonSamplingPolicy.cs @ 11730

Last change on this file since 11730 was 11730, checked in by gkronber, 9 years ago

#2283: several major extensions for grammatical optimization

File size: 1.5 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Linq;
5using System.Text;
6using System.Threading.Tasks;
7using HeuristicLab.Common;
8
9namespace HeuristicLab.Algorithms.Bandits {
10  public class GenericThompsonSamplingPolicy : BanditPolicy {
11    private readonly Random random;
12    private readonly IModel model;
13
14    public GenericThompsonSamplingPolicy(Random random, int numActions, IModel model)
15      : base(numActions) {
16      this.random = random;
17      this.model = model;
18    }
19
20    public override int SelectAction() {
21      Debug.Assert(Actions.Any());
22      var maxR = double.NegativeInfinity;
23      int bestAction = -1;
24      var expRewards = model.SampleExpectedRewards(random);
25      foreach (var a in Actions) {
26        var r = expRewards[a];
27        if (r > maxR) {
28          maxR = r;
29          bestAction = a;
30        }
31      }
32      return bestAction;
33    }
34
35    public override void UpdateReward(int action, double reward) {
36      Debug.Assert(Actions.Contains(action));
37
38      model.Update(action, reward);
39    }
40
41    public override void DisableAction(int action) {
42      base.DisableAction(action);
43      model.Disable(action);
44    }
45
46    public override void Reset() {
47      base.Reset();
48      model.Reset();
49    }
50
51    public override void PrintStats() {
52      model.PrintStats();
53    }
54
55    public override string ToString() {
56      return string.Format("GenericThompsonSamplingPolicy({0})", model);
57    }
58  }
59}
Note: See TracBrowser for help on using the repository browser.