Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/Policies/ActiveLearningPolicy.cs @ 12290

Last change on this file since 12290 was 11806, checked in by gkronber, 10 years ago

#2283: separated value-states from done-states in GenericGrammarPolicy and removed disabling of actions from bandit policies

File size: 1.7 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Linq;
5using System.Text;
6using System.Threading.Tasks;
7using HeuristicLab.Common;
8
9namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
10  public class ActiveLearningPolicy : IBanditPolicy {
11    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
12      var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>();
13      int totalTries = myActionInfos.Sum(a => a.Tries);
14      const double delta = 0.1;
15      int k = myActionInfos.Count();
16      var bestActions = new List<int>();
17      var us = new List<double>();
18      var ls = new List<double>();
19      int aIdx = -1;
20      foreach (var aInfo in myActionInfos) {
21        aIdx++;
22        double q;
23        double u;
24        double l;
25        if (aInfo.Tries == 0) {
26          u = double.PositiveInfinity;
27          l = double.NegativeInfinity;
28        } else {
29          q = aInfo.SumReward / aInfo.Tries;
30          var b = Math.Sqrt(Math.Log(2.0 * k * totalTries / delta) / (2.0 * aInfo.Tries));
31          u = q + 0.5 * b;
32          l = q - 0.5 * b;
33        }
34        bestActions.Add(aIdx);
35        us.Add(u);
36        ls.Add(l);
37      }
38      var active = new List<int>();
39      var maxL = ls.Max();
40      for (int i = 0; i < us.Count; i++) {
41        if (us[i] >= maxL) active.Add(bestActions[i]);
42      }
43      Debug.Assert(active.Any());
44      return active.SelectRandom(random);
45    }
46
47    public IBanditPolicyActionInfo CreateActionInfo() {
48      return new DefaultPolicyActionInfo();
49    }
50    public override string ToString() {
51      return "ActiveLearningPolicy";
52    }
53  }
54}
Note: See TracBrowser for help on using the repository browser.