source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/ModelPolicyActionInfo.cs @ 11744

Last change on this file since 11744 was 11744, checked in by gkronber, 7 years ago

#2283 worked on TD, and models for MCTS

File size: 1.2 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Linq;
5using System.Text;
6using System.Threading.Tasks;
7
8namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
9  // uses a statistical model to sample and update posterior distribution p(Reward | Data)
10  public class ModelPolicyActionInfo : IBanditPolicyActionInfo {
11    private readonly IModel model;
12    public bool Disabled { get { return Tries == -1; } }
13    public double Value { get { return model.SampleExpectedReward(new Random()); } }
14
15    public int Tries { get; private set; }
16    public ModelPolicyActionInfo(IModel model) {
17      this.model = model;
18    }
19
20    public void UpdateReward(double reward) {
21      Debug.Assert(!Disabled);
22      Tries++;
23      model.Update(reward);
24    }
25
26    public double SampleExpectedReward(Random random) {
27      return model.SampleExpectedReward(random);
28    }
29
30    public void Disable() {
31      this.Tries = -1;
32    }
33
34    public void Reset() {
35      Tries = 0;
36      model.Reset();
37    }
38
39    public void PrintStats() {
40      model.PrintStats();
41    }
42
43    public override string ToString() {
44      return string.Format("disabled {0} model {1}", Disabled, model);
45    }
46  }
47}
Note: See TracBrowser for help on using the repository browser.