Changeset 11732 for branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/IPolicy.cs
- Timestamp:
- 01/07/15 09:21:46 (9 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/IPolicy.cs
r11730 r11732 8 8 // this interface represents a policy for reinforcement learning 9 9 public interface IPolicy { 10 IEnumerable<int> Actions { get; } 11 int SelectAction(); // action selection ... 12 void UpdateReward(int action, double reward); // ... and reward update are defined as usual 13 14 // policies must also support disabling of potential actions 15 // for instance if we know that an action in a state has a deterministic 16 // reward we need to sample it only once 17 // it is necessary to sample an action only once 18 void DisableAction(int action); 19 20 // reset causes the policy to be reinitialized to it's initial state (as after constructor-call) 21 void Reset(); 22 23 void PrintStats(); 10 int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos); 11 IPolicyActionInfo CreateActionInfo(); 24 12 } 25 13 }
Note: See TracChangeset
for help on using the changeset viewer.