using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; namespace HeuristicLab.Algorithms.Bandits { public class Bandit : IBandit { public int NumArms { get { return distributions.Length; } } public int OptimalExpectedRewardArm { get; private set; } public int OptimalMaximalRewardArm { get; private set; } private readonly IModel[] distributions; private readonly Random random; public Bandit(Random random, IEnumerable distributions, int bestExpRewardArm, int bestMaxRewardArm) { this.random = random; this.distributions = distributions.ToArray(); OptimalExpectedRewardArm = bestExpRewardArm; OptimalMaximalRewardArm = bestMaxRewardArm; } // pulling an arm results in a bernoulli distributed reward // with mean expReward[i] public double Pull(int arm) { return distributions[arm].Sample(random); } } }