using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; using HeuristicLab.Common; namespace HeuristicLab.Algorithms.Bandits { // for test case 1 in Extreme Bandits paper (Carpentier, NIPS 2014) public class ParetoBandit : IBandit { private double[] alpha; private double[] pZero; public int NumArms { get { return alpha.Length; } } public int OptimalExpectedRewardArm { get; private set; } public int OptimalMaximalRewardArm { get; private set; } private readonly Random random; public ParetoBandit(Random random, IEnumerable alpha) { this.alpha = alpha.ToArray(); this.pZero = new double[this.alpha.Length]; this.random = random; OptimalExpectedRewardArm = Array.IndexOf(this.alpha, alpha.Min()); OptimalMaximalRewardArm = OptimalExpectedRewardArm; } public ParetoBandit(Random random, IEnumerable alpha, IEnumerable pZero, int bestExpRewardArm, int bestMaxRewardArm) { // probability of a zero reward this.alpha = alpha.ToArray(); this.pZero = pZero.ToArray(); this.random = random; OptimalExpectedRewardArm = bestExpRewardArm; OptimalMaximalRewardArm = bestMaxRewardArm; } public double Pull(int arm) { if (random.NextDouble() < pZero[arm]) return 0.0; var u = random.NextDouble(); return Math.Pow(1.0 - u, (-1 / alpha[arm])); } } }