Context Navigation

ParetoBandit.cs @ 13739

Visit:

Last change on this file since 13739 was 12893, checked in by gkronber, 9 years ago
#2283: experiments on grammatical optimization algorithms (maxreward instead of avg reward, ...)
File size: 1.5 KB

Rev	Line
[12876]	1	using System;
	2	using System.Collections.Generic;
	3	using System.Linq;
	4	using System.Text;
	5	using System.Threading.Tasks;
	6	using HeuristicLab.Common;
	7
	8	namespace HeuristicLab.Algorithms.Bandits {
	9	// for test case 1 in Extreme Bandits paper (Carpentier, NIPS 2014)
	10	public class ParetoBandit : IBandit {
	11	private double[] alpha;
	12	private double[] pZero;
	13	public int NumArms { get { return alpha.Length; } }
	14	public int OptimalExpectedRewardArm { get; private set; }
	15	public int OptimalMaximalRewardArm { get; private set; }
	16	private readonly Random random;
[12893]	17
	18	public ParetoBandit(Random random, IEnumerable<double> alpha) {
[12876]	19	this.alpha = alpha.ToArray();
[12893]	20	this.pZero = new double[this.alpha.Length];
	21	this.random = random;
	22	OptimalExpectedRewardArm = Array.IndexOf(this.alpha, alpha.Min());
	23	OptimalMaximalRewardArm = OptimalExpectedRewardArm;
	24	}
	25	public ParetoBandit(Random random, IEnumerable<double> alpha, IEnumerable<double> pZero, int bestExpRewardArm, int bestMaxRewardArm) { // probability of a zero reward
	26	this.alpha = alpha.ToArray();
[12876]	27	this.pZero = pZero.ToArray();
	28	this.random = random;
[12893]	29	OptimalExpectedRewardArm = bestExpRewardArm;
	30	OptimalMaximalRewardArm = bestMaxRewardArm;
[12876]	31	}
	32
	33	public double Pull(int arm) {
	34	if (random.NextDouble() < pZero[arm]) return 0.0;
	35	var u = random.NextDouble();
	36	return Math.Pow(1.0 - u, (-1 / alpha[arm]));
	37	}
	38	}
	39	}

Note: See TracBrowser for help on using the repository browser.