Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/IPolicy.cs @ 11728

Last change on this file since 11728 was 11727, checked in by gkronber, 9 years ago

#2283: worked on grammatical optimization problem solvers (simple MCTS done)

File size: 869 bytes
RevLine 
[11708]1using System;
2using System.Collections.Generic;
3using System.Linq;
4using System.Text;
5using System.Threading.Tasks;
6
7namespace HeuristicLab.Algorithms.Bandits {
[11727]8  // this interface represents a policy for reinforcement learning
[11708]9  public interface IPolicy {
[11727]10    IEnumerable<int> Actions { get; }
11    int SelectAction(); // action selection ...
12    void UpdateReward(int action, double reward); // ... and reward update are defined as usual
13
14    // policies must also support disabling of potential actions
15    // for instance if we know that an action in a state has a deterministic
16    // reward we need to sample it only once
17    // it is necessary to sample an action only once
18    void DisableAction(int action);
19
20    // reset causes the policy to be reinitialized to it's initial state (as after constructor-call)
[11708]21    void Reset();
22  }
23}
Note: See TracBrowser for help on using the repository browser.