Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
12/29/14 11:02:36 (8 years ago)
Author:
gkronber
Message:

#2283: worked on grammatical optimization problem solvers (simple MCTS done)

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/IPolicy.cs

    r11708 r11727  
    66
    77namespace HeuristicLab.Algorithms.Bandits {
     8  // this interface represents a policy for reinforcement learning
    89  public interface IPolicy {
    9     int SelectAction();
    10     void UpdateReward(int action, double reward);
     10    IEnumerable<int> Actions { get; }
     11    int SelectAction(); // action selection ...
     12    void UpdateReward(int action, double reward); // ... and reward update are defined as usual
     13
     14    // policies must also support disabling of potential actions
     15    // for instance if we know that an action in a state has a deterministic
     16    // reward we need to sample it only once
     17    // it is necessary to sample an action only once
     18    void DisableAction(int action);
     19
     20    // reset causes the policy to be reinitialized to it's initial state (as after constructor-call)
    1121    void Reset();
    1222  }
Note: See TracChangeset for help on using the changeset viewer.