Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/Policies/ExtremeHunterPolicy.cs @ 12876

Last change on this file since 12876 was 12876, checked in by gkronber, 8 years ago

#2283: implemented first crude version of extreme hunter algorithm in branch

File size: 3.1 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Linq;
5using System.Text;
6using System.Threading.Tasks;
7using HeuristicLab.Common;
8
9namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
10  // Extreme Bandits, NIPS2014
11  public class ExtremeHunterPolicy : IBanditPolicy {
12
13
14    public double E { get; set; }
15    public double D { get; set; }
16    public double delta { get; set; }
17    public double b { get; set; }
18
19    public ExtremeHunterPolicy(double E = 1.0E-3, double D = 1.0E-2, double b = 1.0) {
20      this.E = E; // parameter TODO
21      this.D = D; // parameter TODO
22      this.b = b; // parameter TODO
23      // private communication with Alexandra Carpentier:
24      // For instance, on our synthetic experiments, we calibrated the constants by
25      // cross validation, using exact Pareto distributions and b=1, and we found
26      // out that taking E =  1e-3 is acceptable. For all the datasets
27      // (exact Pareto, approximate Pareto, and network data), we kept this same constant
28    }
29
30    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
31      var myActionInfos = actionInfos.OfType<ExtremeHunterActionInfo>();
32      double bestQ = double.NegativeInfinity;
33      int totalTries = myActionInfos.Sum(a => a.Tries);
34      int K = myActionInfos.Count();
35      double n = 1.0E2; // total tries parameter
36      double minPulls = 100; // parameter: TODO (there are conditions for N given in the paper)
37
38      this.delta = Math.Exp(-Math.Log(Math.Log(n))) / (2.0 * n * K); // TODO
39
40      var bestActions = new List<int>();
41      int aIdx = -1;
42      foreach (var aInfo in myActionInfos) {
43        aIdx++;
44        double q;
45        if (aInfo.Tries <= minPulls) {
46          q = double.PositiveInfinity;
47        } else {
48          double t = aInfo.Tries;
49          double h = aInfo.Value;
50          var thres = Math.Pow(t, h / (2 * b + 1));
51          double c = Math.Pow(t, 1.0 / (2 * b + 1)) * ((1.0 / t) * aInfo.Rewards.Count(r => r >= thres));
52          q = Math.Pow((c + B2(t)) * n, h + B1(t)) * Gamma(h, B1(t)); // eqn (5)
53          Debug.Assert(q > 0);
54        }
55        if (q > bestQ) {
56          bestQ = q;
57          bestActions.Clear();
58          bestActions.Add(aIdx);
59        } else if (q.IsAlmost(bestQ)) {
60          bestActions.Add(aIdx);
61        }
62      }
63      Debug.Assert(bestActions.Any());
64      return bestActions.SelectRandom(random);
65    }
66
67    public double Gamma(double x, double y) {
68      if (1.0 - x - y <= 0) return double.PositiveInfinity;
69      else return alglib.gammafunction(1.0 - x - y); // comment on eqn 5
70    }
71
72    // eqn 2
73    public double B1(double t) {
74      return D * Math.Sqrt(Math.Log(1.0 / delta)) * Math.Pow(t, -b / (2 * b + 1));
75    }
76
77    // eqn 4
78    public double B2(double t) {
79      return E * Math.Sqrt(Math.Log(t / delta)) * Math.Log(t) * Math.Pow(t, -b / (2 * b + 1));
80    }
81
82    public IBanditPolicyActionInfo CreateActionInfo() {
83      return new ExtremeHunterActionInfo();
84    }
85    public override string ToString() {
86      return "ExtremeHunter";
87    }
88  }
89}
Note: See TracBrowser for help on using the repository browser.