using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace HeuristicLab.Algorithms.Bandits {
  // this interface represents a policy for reinforcement learning
  public interface IPolicy {
    IEnumerable<int> Actions { get; }
    int SelectAction(); // action selection ...
    void UpdateReward(int action, double reward); // ... and reward update are defined as usual

    // policies must also support disabling of potential actions
    // for instance if we know that an action in a state has a deterministic 
    // reward we need to sample it only once
    // it is necessary to sample an action only once
    void DisableAction(int action);

    // reset causes the policy to be reinitialized to it's initial state (as after constructor-call)
    void Reset();

    void PrintStats();
  }
}