using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; namespace HeuristicLab.Algorithms.Bandits { // this interface represents a policy for reinforcement learning public interface IPolicy { IEnumerable Actions { get; } int SelectAction(); // action selection ... void UpdateReward(int action, double reward); // ... and reward update are defined as usual // policies must also support disabling of potential actions // for instance if we know that an action in a state has a deterministic // reward we need to sample it only once // it is necessary to sample an action only once void DisableAction(int action); // reset causes the policy to be reinitialized to it's initial state (as after constructor-call) void Reset(); void PrintStats(); } }