source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/EpsGreedyPolicy.cs @ 11710

Last change on this file since 11710 was 11710, checked in by gkronber, 8 years ago

#2283: more bandit policies and tests

File size: 1.4 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Linq;
4using System.Text;
5using System.Threading.Tasks;
6
7namespace HeuristicLab.Algorithms.Bandits {
8  public class EpsGreedyPolicy : BanditPolicy {
9    private readonly Random random;
10    private readonly double eps;
11    private readonly int[] tries;
12    private readonly double[] sumReward;
13    public EpsGreedyPolicy(Random random, int numActions, double eps)
14      : base(numActions) {
15      this.random = random;
16      this.eps = eps;
17      this.tries = new int[NumActions];
18      this.sumReward = new double[NumActions];
19    }
20
21    public override int SelectAction() {
22      if (random.NextDouble() > eps) {
23        // select best
24        var maxReward = double.NegativeInfinity;
25        int bestAction = -1;
26        for (int i = 0; i < NumActions; i++) {
27          if (tries[i] == 0) return i;
28          var avgReward = sumReward[i] / tries[i];
29          if (maxReward < avgReward) {
30            maxReward = avgReward;
31            bestAction = i;
32          }
33        }
34        return bestAction;
35      } else {
36        // select random
37        return random.Next(NumActions);
38      }
39    }
40    public override void UpdateReward(int action, double reward) {
41      tries[action]++;
42      sumReward[action] += reward;
43    }
44    public override void Reset() {
45      Array.Clear(tries, 0, tries.Length);
46      Array.Clear(sumReward, 0, sumReward.Length);
47    }
48  }
49}
Note: See TracBrowser for help on using the repository browser.