using System; using System.Collections.Generic; using System.Diagnostics; using System.Linq; using System.Text; using System.Threading.Tasks; using HeuristicLab.Common; namespace HeuristicLab.Algorithms.Bandits.BanditPolicies { // Extreme Bandits, NIPS2014 public class ExtremeHunterPolicy : IBanditPolicy { public double E { get; set; } public double D { get; set; } public double delta { get; set; } public double b { get; set; } public double n { get; set; } public int minPulls { get; set; } public ExtremeHunterPolicy(double E = 1.0E-3, double D = 1.0E-2, double b = 1.0, double n = 1.0E4, int minPulls = 100) { this.E = E; // parameter TODO this.D = D; // parameter TODO this.b = b; // parameter: set to 1 in original paper "to consider a wide class of distributions" // private communication with Alexandra Carpentier: // For instance, on our synthetic experiments, we calibrated the constants by // cross validation, using exact Pareto distributions and b=1, and we found // out that taking E = 1e-3 is acceptable. For all the datasets // (exact Pareto, approximate Pareto, and network data), we kept this same constant // minPulls seems to be set to 100 in the experiments in extreme bandit paper this.minPulls = minPulls; // parameter: TODO (there are conditions for N given in the paper) this.n = n; } public int SelectAction(Random random, IEnumerable actionInfos) { var myActionInfos = actionInfos.OfType(); double bestQ = double.NegativeInfinity; // int totalTries = myActionInfos.Sum(a => a.Tries); int K = myActionInfos.Count(); this.delta = Math.Exp(-Math.Log(Math.Log(n))) / (2.0 * n * K); // TODO var bestActions = new List(); int aIdx = -1; foreach (var aInfo in myActionInfos) { aIdx++; double q; if (aInfo.Tries <= minPulls) { q = double.PositiveInfinity; } else { double t = aInfo.Tries; double h = aInfo.Value; if (double.IsInfinity(h)) q = 0; else { var thres = Math.Pow(t, h / (2 * b + 1)); double c = Math.Pow(t, 1.0 / (2 * b + 1)) * ((1.0 / t) * aInfo.Rewards.Count(r => r >= thres)); q = Math.Pow((c + B2(t)) * n, h + B1(t)) * Gamma(h, B1(t)); // eqn (5) Debug.Assert(q > 0); } } if (q > bestQ) { bestQ = q; bestActions.Clear(); bestActions.Add(aIdx); } else if (q.IsAlmost(bestQ)) { bestActions.Add(aIdx); } } Debug.Assert(bestActions.Any()); return bestActions.SelectRandom(random); } public double Gamma(double x, double y) { if (1.0 - x - y <= 0) return double.PositiveInfinity; else return alglib.gammafunction(1.0 - x - y); // comment on eqn 5 } // eqn 2 public double B1(double t) { return D * Math.Sqrt(Math.Log(1.0 / delta)) * Math.Pow(t, -b / (2 * b + 1)); } // eqn 4 public double B2(double t) { return E * Math.Sqrt(Math.Log(t / delta)) * Math.Log(t) * Math.Pow(t, -b / (2 * b + 1)); } public IBanditPolicyActionInfo CreateActionInfo() { return new ExtremeHunterActionInfo(); } public override string ToString() { return string.Format("ExtremeHunter(E={0:F2},D={1:F2},b={2:F2},n={3:F0},minPulls={4:F0}", E, D, b, n, minPulls); } } }