Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/ActiveLearningPolicy.cs @ 11747

Last change on this file since 11747 was 11747, checked in by gkronber, 10 years ago

#2283: implemented test problems for MCTS

File size: 1.8 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Linq;
5using System.Text;
6using System.Threading.Tasks;
7using HeuristicLab.Common;
8
9namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
10  public class ActiveLearningPolicy : IBanditPolicy {
11    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
12      var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>();
13      double bestQ = double.NegativeInfinity;
14      int totalTries = myActionInfos.Where(a => !a.Disabled).Sum(a => a.Tries);
15      const double delta = 0.1;
16      int k = myActionInfos.Where(a => !a.Disabled).Count();
17      var bestActions = new List<int>();
18      var us = new List<double>();
19      var ls = new List<double>();
20      int aIdx = -1;
21      foreach (var aInfo in myActionInfos) {
22        aIdx++;
23        if (aInfo.Disabled) continue;
24        double q;
25        double u;
26        double l;
27        if (aInfo.Tries == 0) {
28          u = 1.0;
29          l = 0.0;
30        } else {
31          q = aInfo.SumReward / aInfo.Tries;
32          var b = Math.Sqrt(Math.Log(2.0 * k * totalTries / delta) / (2.0 * aInfo.Tries));
33          u = q + 0.5 * b;
34          l = q - 0.5 * b;
35        }
36        bestActions.Add(aIdx);
37        us.Add(u);
38        ls.Add(l);
39      }
40      var active = new List<int>();
41      var maxL = ls.Max();
42      for (int i = 0; i < us.Count; i++) {
43        if (us[i] >= maxL) active.Add(bestActions[i]);
44      }
45      Debug.Assert(active.Any());
46      return active.SelectRandom(random);
47    }
48
49    public IBanditPolicyActionInfo CreateActionInfo() {
50      return new DefaultPolicyActionInfo();
51    }
52    public override string ToString() {
53      return "ActiveLearningPolicy";
54    }
55  }
56}
Note: See TracBrowser for help on using the repository browser.