Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Problems.Bandits/ParetoBandit.cs @ 12876

Last change on this file since 12876 was 12876, checked in by gkronber, 8 years ago

#2283: implemented first crude version of extreme hunter algorithm in branch

File size: 2.1 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Linq;
4using System.Text;
5using System.Threading.Tasks;
6using HeuristicLab.Common;
7
8namespace HeuristicLab.Algorithms.Bandits {
9  // for test case 1 in Extreme Bandits paper (Carpentier, NIPS 2014)
10  public class ParetoBandit : IBandit {
11    private double[] alpha;
12    private double[] pZero;
13    public int NumArms { get { return alpha.Length; } }
14    public double OptimalExpectedReward { get; private set; } // reward of the best arm, for calculating regret
15    public int OptimalExpectedRewardArm { get; private set; }
16    public int OptimalMaximalRewardArm { get; private set; }
17    public double MaxReward { get; private set; }
18    public double MinReward { get; private set; }
19    private readonly Random random;
20    public ParetoBandit(Random random, IEnumerable<double> alpha) : this(random, alpha, alpha.Select(_ => 0.0)) { }
21    public ParetoBandit(Random random, IEnumerable<double> alpha, IEnumerable<double> pZero) { // probability of a zero reward
22      this.alpha = alpha.ToArray();
23      this.pZero = pZero.ToArray();
24      this.random = random;
25
26      // find optimal arms using empirical estimates
27      var bestExpReward = double.NegativeInfinity;
28      var bestMaxReward = double.NegativeInfinity;
29      for (int k = 0; k < NumArms; k++) {
30        double expReward = 0.0;
31        double maxReward = double.NegativeInfinity;
32        for (int i = 0; i < 100000; i++) {
33          var r = Pull(k);
34          expReward += r;
35          maxReward = Math.Max(maxReward, r);
36        }
37        expReward /= 100000;
38
39        if (expReward > bestExpReward) {
40          bestExpReward = expReward;
41          OptimalExpectedRewardArm = k;
42          OptimalExpectedReward = expReward;
43        }
44        if (maxReward > bestMaxReward) {
45          bestMaxReward = maxReward;
46          OptimalMaximalRewardArm = k;
47        }
48      }
49    }
50
51    public double Pull(int arm) {
52      if (random.NextDouble() < pZero[arm]) return 0.0;
53      var u = random.NextDouble();
54      return Math.Pow(1.0 - u, (-1 / alpha[arm]));
55    }
56  }
57}
Note: See TracBrowser for help on using the repository browser.