Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Problems.Bandits/ParetoBandit.cs @ 13805

Last change on this file since 13805 was 12893, checked in by gkronber, 9 years ago

#2283: experiments on grammatical optimization algorithms (maxreward instead of avg reward, ...)

File size: 1.5 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Linq;
4using System.Text;
5using System.Threading.Tasks;
6using HeuristicLab.Common;
7
8namespace HeuristicLab.Algorithms.Bandits {
9  // for test case 1 in Extreme Bandits paper (Carpentier, NIPS 2014)
10  public class ParetoBandit : IBandit {
11    private double[] alpha;
12    private double[] pZero;
13    public int NumArms { get { return alpha.Length; } }
14    public int OptimalExpectedRewardArm { get; private set; }
15    public int OptimalMaximalRewardArm { get; private set; }
16    private readonly Random random;
17
18    public ParetoBandit(Random random, IEnumerable<double> alpha) {
19      this.alpha = alpha.ToArray();
20      this.pZero = new double[this.alpha.Length];
21      this.random = random;
22      OptimalExpectedRewardArm = Array.IndexOf(this.alpha, alpha.Min());
23      OptimalMaximalRewardArm = OptimalExpectedRewardArm;
24    }
25    public ParetoBandit(Random random, IEnumerable<double> alpha, IEnumerable<double> pZero, int bestExpRewardArm, int bestMaxRewardArm) { // probability of a zero reward
26      this.alpha = alpha.ToArray();
27      this.pZero = pZero.ToArray();
28      this.random = random;
29      OptimalExpectedRewardArm = bestExpRewardArm;
30      OptimalMaximalRewardArm = bestMaxRewardArm;
31    }
32
33    public double Pull(int arm) {
34      if (random.NextDouble() < pZero[arm]) return 0.0;
35      var u = random.NextDouble();
36      return Math.Pow(1.0 - u, (-1 / alpha[arm]));
37    }
38  }
39}
Note: See TracBrowser for help on using the repository browser.