Context Navigation

ParetoBandit.cs @ 13402

Visit:

Last change on this file since 13402 was 12893, checked in by gkronber, 9 years ago
#2283: experiments on grammatical optimization algorithms (maxreward instead of avg reward, ...)
File size: 1.5 KB

Line
1	using System;
2	using System.Collections.Generic;
3	using System.Linq;
4	using System.Text;
5	using System.Threading.Tasks;
6	using HeuristicLab.Common;
7
8	namespace HeuristicLab.Algorithms.Bandits {
9	// for test case 1 in Extreme Bandits paper (Carpentier, NIPS 2014)
10	public class ParetoBandit : IBandit {
11	private double[] alpha;
12	private double[] pZero;
13	public int NumArms { get { return alpha.Length; } }
14	public int OptimalExpectedRewardArm { get; private set; }
15	public int OptimalMaximalRewardArm { get; private set; }
16	private readonly Random random;
17
18	public ParetoBandit(Random random, IEnumerable<double> alpha) {
19	this.alpha = alpha.ToArray();
20	this.pZero = new double[this.alpha.Length];
21	this.random = random;
22	OptimalExpectedRewardArm = Array.IndexOf(this.alpha, alpha.Min());
23	OptimalMaximalRewardArm = OptimalExpectedRewardArm;
24	}
25	public ParetoBandit(Random random, IEnumerable<double> alpha, IEnumerable<double> pZero, int bestExpRewardArm, int bestMaxRewardArm) { // probability of a zero reward
26	this.alpha = alpha.ToArray();
27	this.pZero = pZero.ToArray();
28	this.random = random;
29	OptimalExpectedRewardArm = bestExpRewardArm;
30	OptimalMaximalRewardArm = bestMaxRewardArm;
31	}
32
33	public double Pull(int arm) {
34	if (random.NextDouble() < pZero[arm]) return 0.0;
35	var u = random.NextDouble();
36	return Math.Pow(1.0 - u, (-1 / alpha[arm]));
37	}
38	}
39	}

Note: See TracBrowser for help on using the repository browser.