Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/ActiveLearningPolicy.cs @ 11793

Last change on this file since 11793 was 11792, checked in by gkronber, 10 years ago

#2283 work-in-progress commit (does not compile)

File size: 1.8 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Linq;
5using System.Text;
6using System.Threading.Tasks;
7using HeuristicLab.Common;
8
9namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
10  public class ActiveLearningPolicy : IBanditPolicy {
11    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
12      var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>();
13      int totalTries = myActionInfos.Where(a => !a.Disabled).Sum(a => a.Tries);
14      const double delta = 0.1;
15      int k = myActionInfos.Where(a => !a.Disabled).Count();
16      var bestActions = new List<int>();
17      var us = new List<double>();
18      var ls = new List<double>();
19      int aIdx = -1;
20      foreach (var aInfo in myActionInfos) {
21        aIdx++;
22        if (aInfo.Disabled) continue;
23        double q;
24        double u;
25        double l;
26        if (aInfo.Tries == 0) {
27          u = double.PositiveInfinity;
28          l = double.NegativeInfinity;
29        } else {
30          q = aInfo.SumReward / aInfo.Tries;
31          var b = Math.Sqrt(Math.Log(2.0 * k * totalTries / delta) / (2.0 * aInfo.Tries));
32          u = q + 0.5 * b;
33          l = q - 0.5 * b;
34        }
35        bestActions.Add(aIdx);
36        us.Add(u);
37        ls.Add(l);
38      }
39      var active = new List<int>();
40      var maxL = ls.Max();
41      for (int i = 0; i < us.Count; i++) {
42        if (us[i] >= maxL) active.Add(bestActions[i]);
43      }
44      Debug.Assert(active.Any());
45      return active.SelectRandom(random);
46    }
47
48    public IBanditPolicyActionInfo CreateActionInfo() {
49      return new DefaultPolicyActionInfo();
50    }
51    public override string ToString() {
52      return "ActiveLearningPolicy";
53    }
54  }
55}
Note: See TracBrowser for help on using the repository browser.