Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
12/29/14 11:02:36 (9 years ago)
Author:
gkronber
Message:

#2283: worked on grammatical optimization problem solvers (simple MCTS done)

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/EpsGreedyPolicy.cs

    r11711 r11727  
    11using System;
    22using System.Collections.Generic;
     3using System.Diagnostics;
    34using System.Linq;
    45using System.Text;
     
    1112    private readonly int[] tries;
    1213    private readonly double[] sumReward;
     14    private readonly RandomPolicy randomPolicy;
     15
    1316    public EpsGreedyPolicy(Random random, int numActions, double eps)
    1417      : base(numActions) {
    1518      this.random = random;
    1619      this.eps = eps;
    17       this.tries = new int[NumActions];
    18       this.sumReward = new double[NumActions];
     20      this.randomPolicy = new RandomPolicy(random, numActions);
     21      this.tries = new int[numActions];
     22      this.sumReward = new double[numActions];
    1923    }
    2024
    2125    public override int SelectAction() {
     26      Debug.Assert(Actions.Any());
    2227      if (random.NextDouble() > eps) {
    2328        // select best
    2429        var maxReward = double.NegativeInfinity;
    2530        int bestAction = -1;
    26         for (int i = 0; i < NumActions; i++) {
    27           if (tries[i] == 0) return i;
    28           var avgReward = sumReward[i] / tries[i];
     31        foreach (var a in Actions) {
     32          if (tries[a] == 0) return a;
     33          var avgReward = sumReward[a] / tries[a];
    2934          if (maxReward < avgReward) {
    3035            maxReward = avgReward;
    31             bestAction = i;
     36            bestAction = a;
    3237          }
    3338        }
     39        Debug.Assert(bestAction >= 0);
    3440        return bestAction;
    3541      } else {
    3642        // select random
    37         return random.Next(NumActions);
     43        return randomPolicy.SelectAction();
    3844      }
    3945    }
    4046    public override void UpdateReward(int action, double reward) {
     47      Debug.Assert(Actions.Contains(action));
     48
     49      randomPolicy.UpdateReward(action, reward); // does nothing
    4150      tries[action]++;
    4251      sumReward[action] += reward;
    4352    }
     53
     54    public override void DisableAction(int action) {
     55      base.DisableAction(action);
     56      randomPolicy.DisableAction(action);
     57      sumReward[action] = 0;
     58      tries[action] = -1;
     59    }
     60
    4461    public override void Reset() {
     62      base.Reset();
     63      randomPolicy.Reset();
    4564      Array.Clear(tries, 0, tries.Length);
    4665      Array.Clear(sumReward, 0, sumReward.Length);
Note: See TracChangeset for help on using the changeset viewer.