Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/UCB1Policy.cs @ 11710

Last change on this file since 11710 was 11710, checked in by gkronber, 9 years ago

#2283: more bandit policies and tests

File size: 1.2 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Linq;
4using System.Text;
5using System.Threading.Tasks;
6
7namespace HeuristicLab.Algorithms.Bandits {
8  public class UCB1Policy : BanditPolicy {
9    private readonly int[] tries;
10    private readonly double[] sumReward;
11    private int totalTries = 0;
12    public UCB1Policy(int numActions)
13      : base(numActions) {
14      this.tries = new int[NumActions];
15      this.sumReward = new double[NumActions];
16    }
17
18    public override int SelectAction() {
19      int bestAction = -1;
20      double bestQ = double.NegativeInfinity;
21      for (int i = 0; i < NumActions; i++) {
22        if (tries[i] == 0) return i;
23        var q = sumReward[i] / tries[i] + Math.Sqrt((2 * Math.Log(totalTries)) / tries[i]);
24        if (q > bestQ) {
25          bestQ = q;
26          bestAction = i;
27        }
28      }
29      return bestAction;
30    }
31    public override void UpdateReward(int action, double reward) {
32      totalTries++;
33      tries[action]++;
34      sumReward[action] += reward;
35    }
36    public override void Reset() {
37      totalTries = 0;
38      Array.Clear(tries, 0, tries.Length);
39      Array.Clear(sumReward, 0, sumReward.Length);
40    }
41  }
42}
Note: See TracBrowser for help on using the repository browser.