Context Navigation

source: branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/Policies/ExtremeHunterPolicy.cs @ 13042

Visit:

Last change on this file since 13042 was 12893, checked in by gkronber, 9 years ago
#2283: experiments on grammatical optimization algorithms (maxreward instead of avg reward, ...)
File size: 3.5 KB

Rev	Line
[12876]	1	using System;
	2	using System.Collections.Generic;
	3	using System.Diagnostics;
	4	using System.Linq;
	5	using System.Text;
	6	using System.Threading.Tasks;
	7	using HeuristicLab.Common;
	8
	9	namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
	10	// Extreme Bandits, NIPS2014
	11	public class ExtremeHunterPolicy : IBanditPolicy {
	12
	13
	14	public double E { get; set; }
	15	public double D { get; set; }
	16	public double delta { get; set; }
	17	public double b { get; set; }
[12893]	18	public double n { get; set; }
	19	public int minPulls { get; set; }
[12876]	20
[12893]	21	public ExtremeHunterPolicy(double E = 1.0E-3, double D = 1.0E-2, double b = 1.0, double n = 1.0E4, int minPulls = 100) {
[12876]	22	this.E = E; // parameter TODO
	23	this.D = D; // parameter TODO
[12893]	24	this.b = b; // parameter: set to 1 in original paper "to consider a wide class of distributions"
[12876]	25	// private communication with Alexandra Carpentier:
	26	// For instance, on our synthetic experiments, we calibrated the constants by
	27	// cross validation, using exact Pareto distributions and b=1, and we found
	28	// out that taking E = 1e-3 is acceptable. For all the datasets
	29	// (exact Pareto, approximate Pareto, and network data), we kept this same constant
[12893]	30
	31	// minPulls seems to be set to 100 in the experiments in extreme bandit paper
	32	this.minPulls = minPulls; // parameter: TODO (there are conditions for N given in the paper)
	33	this.n = n;
[12876]	34	}
	35
	36	public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
	37	var myActionInfos = actionInfos.OfType<ExtremeHunterActionInfo>();
	38	double bestQ = double.NegativeInfinity;
[12893]	39	// int totalTries = myActionInfos.Sum(a => a.Tries);
[12876]	40	int K = myActionInfos.Count();
	41
	42	this.delta = Math.Exp(-Math.Log(Math.Log(n))) / (2.0 * n * K); // TODO
	43
	44	var bestActions = new List<int>();
	45	int aIdx = -1;
	46	foreach (var aInfo in myActionInfos) {
	47	aIdx++;
	48	double q;
	49	if (aInfo.Tries <= minPulls) {
	50	q = double.PositiveInfinity;
	51	} else {
	52	double t = aInfo.Tries;
	53	double h = aInfo.Value;
[12893]	54	if (double.IsInfinity(h)) q = 0;
	55	else {
	56	var thres = Math.Pow(t, h / (2 * b + 1));
	57	double c = Math.Pow(t, 1.0 / (2 * b + 1)) * ((1.0 / t) * aInfo.Rewards.Count(r => r >= thres));
	58	q = Math.Pow((c + B2(t)) * n, h + B1(t)) * Gamma(h, B1(t)); // eqn (5)
	59	Debug.Assert(q > 0);
	60	}
[12876]	61	}
	62	if (q > bestQ) {
	63	bestQ = q;
	64	bestActions.Clear();
	65	bestActions.Add(aIdx);
	66	} else if (q.IsAlmost(bestQ)) {
	67	bestActions.Add(aIdx);
	68	}
	69	}
	70	Debug.Assert(bestActions.Any());
	71	return bestActions.SelectRandom(random);
	72	}
	73
	74	public double Gamma(double x, double y) {
	75	if (1.0 - x - y <= 0) return double.PositiveInfinity;
	76	else return alglib.gammafunction(1.0 - x - y); // comment on eqn 5
	77	}
	78
	79	// eqn 2
	80	public double B1(double t) {
	81	return D * Math.Sqrt(Math.Log(1.0 / delta)) * Math.Pow(t, -b / (2 * b + 1));
	82	}
	83
	84	// eqn 4
	85	public double B2(double t) {
	86	return E * Math.Sqrt(Math.Log(t / delta)) * Math.Log(t) * Math.Pow(t, -b / (2 * b + 1));
	87	}
	88
	89	public IBanditPolicyActionInfo CreateActionInfo() {
	90	return new ExtremeHunterActionInfo();
	91	}
	92	public override string ToString() {
[12893]	93	return string.Format("ExtremeHunter(E={0:F2},D={1:F2},b={2:F2},n={3:F0},minPulls={4:F0}", E, D, b, n, minPulls);
[12876]	94	}
	95	}
	96	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Update cookies preferences