Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
12/29/14 11:02:36 (9 years ago)
Author:
gkronber
Message:

#2283: worked on grammatical optimization problem solvers (simple MCTS done)

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/UCB1TunedPolicy.cs

    r11711 r11727  
    11using System;
    22using System.Collections.Generic;
     3using System.Diagnostics;
    34using System.Linq;
    45using System.Text;
     
    1314    public UCB1TunedPolicy(int numActions)
    1415      : base(numActions) {
    15       this.tries = new int[NumActions];
    16       this.sumReward = new double[NumActions];
    17       this.sumSqrReward = new double[NumActions];
     16      this.tries = new int[numActions];
     17      this.sumReward = new double[numActions];
     18      this.sumSqrReward = new double[numActions];
    1819    }
    1920
     
    2526
    2627    public override int SelectAction() {
     28      Debug.Assert(Actions.Any());
    2729      int bestAction = -1;
    2830      double bestQ = double.NegativeInfinity;
    29       for (int i = 0; i < NumActions; i++) {
    30         if (tries[i] == 0) return i;
    31         var q = sumReward[i] / tries[i] + Math.Sqrt((Math.Log(totalTries) / tries[i]) * Math.Min(1.0 / 4, V(i))); // 1/4 is upper bound of bernoulli distributed variable
     31      foreach (var a in Actions) {
     32        if (tries[a] == 0) return a;
     33        var q = sumReward[a] / tries[a] + Math.Sqrt((Math.Log(totalTries) / tries[a]) * Math.Min(1.0 / 4, V(a))); // 1/4 is upper bound of bernoulli distributed variable
    3234        if (q > bestQ) {
    3335          bestQ = q;
    34           bestAction = i;
     36          bestAction = a;
    3537        }
    3638      }
     
    3840    }
    3941    public override void UpdateReward(int action, double reward) {
     42      Debug.Assert(Actions.Contains(action));
    4043      totalTries++;
    4144      tries[action]++;
     
    4346      sumSqrReward[action] += reward * reward;
    4447    }
     48
     49    public override void DisableAction(int action) {
     50      base.DisableAction(action);
     51      totalTries -= tries[action];
     52      tries[action] = -1;
     53      sumReward[action] = 0;
     54      sumSqrReward[action] = 0;
     55    }
     56
    4557    public override void Reset() {
     58      base.Reset();
    4659      totalTries = 0;
    4760      Array.Clear(tries, 0, tries.Length);
Note: See TracChangeset for help on using the changeset viewer.