source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/IPolicy.cs @ 11728

Last change on this file since 11728 was 11727, checked in by gkronber, 8 years ago

#2283: worked on grammatical optimization problem solvers (simple MCTS done)

File size: 869 bytes
Line 
1using System;
2using System.Collections.Generic;
3using System.Linq;
4using System.Text;
5using System.Threading.Tasks;
6
7namespace HeuristicLab.Algorithms.Bandits {
8  // this interface represents a policy for reinforcement learning
9  public interface IPolicy {
10    IEnumerable<int> Actions { get; }
11    int SelectAction(); // action selection ...
12    void UpdateReward(int action, double reward); // ... and reward update are defined as usual
13
14    // policies must also support disabling of potential actions
15    // for instance if we know that an action in a state has a deterministic
16    // reward we need to sample it only once
17    // it is necessary to sample an action only once
18    void DisableAction(int action);
19
20    // reset causes the policy to be reinitialized to it's initial state (as after constructor-call)
21    void Reset();
22  }
23}
Note: See TracBrowser for help on using the repository browser.