Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/EpsGreedyPolicy.cs @ 11728

Last change on this file since 11728 was 11727, checked in by gkronber, 9 years ago

#2283: worked on grammatical optimization problem solvers (simple MCTS done)

File size: 2.0 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Linq;
5using System.Text;
6using System.Threading.Tasks;
7
8namespace HeuristicLab.Algorithms.Bandits {
9  public class EpsGreedyPolicy : BanditPolicy {
10    private readonly Random random;
11    private readonly double eps;
12    private readonly int[] tries;
13    private readonly double[] sumReward;
14    private readonly RandomPolicy randomPolicy;
15
16    public EpsGreedyPolicy(Random random, int numActions, double eps)
17      : base(numActions) {
18      this.random = random;
19      this.eps = eps;
20      this.randomPolicy = new RandomPolicy(random, numActions);
21      this.tries = new int[numActions];
22      this.sumReward = new double[numActions];
23    }
24
25    public override int SelectAction() {
26      Debug.Assert(Actions.Any());
27      if (random.NextDouble() > eps) {
28        // select best
29        var maxReward = double.NegativeInfinity;
30        int bestAction = -1;
31        foreach (var a in Actions) {
32          if (tries[a] == 0) return a;
33          var avgReward = sumReward[a] / tries[a];
34          if (maxReward < avgReward) {
35            maxReward = avgReward;
36            bestAction = a;
37          }
38        }
39        Debug.Assert(bestAction >= 0);
40        return bestAction;
41      } else {
42        // select random
43        return randomPolicy.SelectAction();
44      }
45    }
46    public override void UpdateReward(int action, double reward) {
47      Debug.Assert(Actions.Contains(action));
48
49      randomPolicy.UpdateReward(action, reward); // does nothing
50      tries[action]++;
51      sumReward[action] += reward;
52    }
53
54    public override void DisableAction(int action) {
55      base.DisableAction(action);
56      randomPolicy.DisableAction(action);
57      sumReward[action] = 0;
58      tries[action] = -1;
59    }
60
61    public override void Reset() {
62      base.Reset();
63      randomPolicy.Reset();
64      Array.Clear(tries, 0, tries.Length);
65      Array.Clear(sumReward, 0, sumReward.Length);
66    }
67  }
68}
Note: See TracBrowser for help on using the repository browser.