Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/Policies/ExtremeHunterPolicy.cs @ 12966

Last change on this file since 12966 was 12893, checked in by gkronber, 9 years ago

#2283: experiments on grammatical optimization algorithms (maxreward instead of avg reward, ...)

File size: 3.5 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Linq;
5using System.Text;
6using System.Threading.Tasks;
7using HeuristicLab.Common;
8
9namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
10  // Extreme Bandits, NIPS2014
11  public class ExtremeHunterPolicy : IBanditPolicy {
12
13
14    public double E { get; set; }
15    public double D { get; set; }
16    public double delta { get; set; }
17    public double b { get; set; }
18    public double n { get; set; }
19    public int minPulls { get; set; }
20
21    public ExtremeHunterPolicy(double E = 1.0E-3, double D = 1.0E-2, double b = 1.0, double n = 1.0E4, int minPulls = 100) {
22      this.E = E; // parameter TODO
23      this.D = D; // parameter TODO
24      this.b = b; // parameter: set to 1 in original paper "to consider a wide class of distributions"
25      // private communication with Alexandra Carpentier:
26      // For instance, on our synthetic experiments, we calibrated the constants by
27      // cross validation, using exact Pareto distributions and b=1, and we found
28      // out that taking E =  1e-3 is acceptable. For all the datasets
29      // (exact Pareto, approximate Pareto, and network data), we kept this same constant
30
31      // minPulls seems to be set to 100 in the experiments in extreme bandit paper
32      this.minPulls = minPulls; // parameter: TODO (there are conditions for N given in the paper)
33      this.n = n;
34    }
35
36    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
37      var myActionInfos = actionInfos.OfType<ExtremeHunterActionInfo>();
38      double bestQ = double.NegativeInfinity;
39      // int totalTries = myActionInfos.Sum(a => a.Tries);
40      int K = myActionInfos.Count();
41
42      this.delta = Math.Exp(-Math.Log(Math.Log(n))) / (2.0 * n * K); // TODO
43
44      var bestActions = new List<int>();
45      int aIdx = -1;
46      foreach (var aInfo in myActionInfos) {
47        aIdx++;
48        double q;
49        if (aInfo.Tries <= minPulls) {
50          q = double.PositiveInfinity;
51        } else {
52          double t = aInfo.Tries;
53          double h = aInfo.Value;
54          if (double.IsInfinity(h)) q = 0;
55          else {
56            var thres = Math.Pow(t, h / (2 * b + 1));
57            double c = Math.Pow(t, 1.0 / (2 * b + 1)) * ((1.0 / t) * aInfo.Rewards.Count(r => r >= thres));
58            q = Math.Pow((c + B2(t)) * n, h + B1(t)) * Gamma(h, B1(t)); // eqn (5)
59            Debug.Assert(q > 0);
60          }
61        }
62        if (q > bestQ) {
63          bestQ = q;
64          bestActions.Clear();
65          bestActions.Add(aIdx);
66        } else if (q.IsAlmost(bestQ)) {
67          bestActions.Add(aIdx);
68        }
69      }
70      Debug.Assert(bestActions.Any());
71      return bestActions.SelectRandom(random);
72    }
73
74    public double Gamma(double x, double y) {
75      if (1.0 - x - y <= 0) return double.PositiveInfinity;
76      else return alglib.gammafunction(1.0 - x - y); // comment on eqn 5
77    }
78
79    // eqn 2
80    public double B1(double t) {
81      return D * Math.Sqrt(Math.Log(1.0 / delta)) * Math.Pow(t, -b / (2 * b + 1));
82    }
83
84    // eqn 4
85    public double B2(double t) {
86      return E * Math.Sqrt(Math.Log(t / delta)) * Math.Log(t) * Math.Pow(t, -b / (2 * b + 1));
87    }
88
89    public IBanditPolicyActionInfo CreateActionInfo() {
90      return new ExtremeHunterActionInfo();
91    }
92    public override string ToString() {
93      return string.Format("ExtremeHunter(E={0:F2},D={1:F2},b={2:F2},n={3:F0},minPulls={4:F0}", E, D, b, n, minPulls);
94    }
95  }
96}
Note: See TracBrowser for help on using the repository browser.