Context Navigation

source: branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/Policies/ExtremeHunterPolicy.cs @ 13728

Visit:

Last change on this file since 13728 was 12893, checked in by gkronber, 9 years ago
#2283: experiments on grammatical optimization algorithms (maxreward instead of avg reward, ...)
File size: 3.5 KB

Line
1	using System;
2	using System.Collections.Generic;
3	using System.Diagnostics;
4	using System.Linq;
5	using System.Text;
6	using System.Threading.Tasks;
7	using HeuristicLab.Common;
8
9	namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
10	// Extreme Bandits, NIPS2014
11	public class ExtremeHunterPolicy : IBanditPolicy {
12
13
14	public double E { get; set; }
15	public double D { get; set; }
16	public double delta { get; set; }
17	public double b { get; set; }
18	public double n { get; set; }
19	public int minPulls { get; set; }
20
21	public ExtremeHunterPolicy(double E = 1.0E-3, double D = 1.0E-2, double b = 1.0, double n = 1.0E4, int minPulls = 100) {
22	this.E = E; // parameter TODO
23	this.D = D; // parameter TODO
24	this.b = b; // parameter: set to 1 in original paper "to consider a wide class of distributions"
25	// private communication with Alexandra Carpentier:
26	// For instance, on our synthetic experiments, we calibrated the constants by
27	// cross validation, using exact Pareto distributions and b=1, and we found
28	// out that taking E = 1e-3 is acceptable. For all the datasets
29	// (exact Pareto, approximate Pareto, and network data), we kept this same constant
30
31	// minPulls seems to be set to 100 in the experiments in extreme bandit paper
32	this.minPulls = minPulls; // parameter: TODO (there are conditions for N given in the paper)
33	this.n = n;
34	}
35
36	public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
37	var myActionInfos = actionInfos.OfType<ExtremeHunterActionInfo>();
38	double bestQ = double.NegativeInfinity;
39	// int totalTries = myActionInfos.Sum(a => a.Tries);
40	int K = myActionInfos.Count();
41
42	this.delta = Math.Exp(-Math.Log(Math.Log(n))) / (2.0 * n * K); // TODO
43
44	var bestActions = new List<int>();
45	int aIdx = -1;
46	foreach (var aInfo in myActionInfos) {
47	aIdx++;
48	double q;
49	if (aInfo.Tries <= minPulls) {
50	q = double.PositiveInfinity;
51	} else {
52	double t = aInfo.Tries;
53	double h = aInfo.Value;
54	if (double.IsInfinity(h)) q = 0;
55	else {
56	var thres = Math.Pow(t, h / (2 * b + 1));
57	double c = Math.Pow(t, 1.0 / (2 * b + 1)) * ((1.0 / t) * aInfo.Rewards.Count(r => r >= thres));
58	q = Math.Pow((c + B2(t)) * n, h + B1(t)) * Gamma(h, B1(t)); // eqn (5)
59	Debug.Assert(q > 0);
60	}
61	}
62	if (q > bestQ) {
63	bestQ = q;
64	bestActions.Clear();
65	bestActions.Add(aIdx);
66	} else if (q.IsAlmost(bestQ)) {
67	bestActions.Add(aIdx);
68	}
69	}
70	Debug.Assert(bestActions.Any());
71	return bestActions.SelectRandom(random);
72	}
73
74	public double Gamma(double x, double y) {
75	if (1.0 - x - y <= 0) return double.PositiveInfinity;
76	else return alglib.gammafunction(1.0 - x - y); // comment on eqn 5
77	}
78
79	// eqn 2
80	public double B1(double t) {
81	return D * Math.Sqrt(Math.Log(1.0 / delta)) * Math.Pow(t, -b / (2 * b + 1));
82	}
83
84	// eqn 4
85	public double B2(double t) {
86	return E * Math.Sqrt(Math.Log(t / delta)) * Math.Log(t) * Math.Pow(t, -b / (2 * b + 1));
87	}
88
89	public IBanditPolicyActionInfo CreateActionInfo() {
90	return new ExtremeHunterActionInfo();
91	}
92	public override string ToString() {
93	return string.Format("ExtremeHunter(E={0:F2},D={1:F2},b={2:F2},n={3:F0},minPulls={4:F0}", E, D, b, n, minPulls);
94	}
95	}
96	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Update cookies preferences