Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/Policies/ActiveLearningPolicy.cs @ 13862

Last change on this file since 13862 was 12893, checked in by gkronber, 9 years ago

#2283: experiments on grammatical optimization algorithms (maxreward instead of avg reward, ...)

File size: 1.8 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Linq;
5using System.Text;
6using System.Threading.Tasks;
7using HeuristicLab.Common;
8
9namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
10  public class ActiveLearningPolicy : IBanditPolicy {
11    public double MaxReward { get; private set; }
12    public ActiveLearningPolicy(double maxReward = 1.0) {
13      this.MaxReward = maxReward;
14    }
15    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
16      var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>();
17      int totalTries = myActionInfos.Sum(a => a.Tries);
18      const double delta = 0.1;
19      int k = myActionInfos.Count();
20      var bestActions = new List<int>();
21      var us = new List<double>();
22      var ls = new List<double>();
23      int aIdx = -1;
24      foreach (var aInfo in myActionInfos) {
25        aIdx++;
26        double q;
27        double u;
28        double l;
29        if (aInfo.Tries == 0) {
30          u = double.PositiveInfinity;
31          l = double.NegativeInfinity;
32        } else {
33          q = aInfo.MaxReward;
34          var b = Math.Sqrt(Math.Log(2.0 * k * totalTries / delta) / (2.0 * aInfo.Tries));
35          u = q + MaxReward * b;
36          l = q - MaxReward * b;
37        }
38        bestActions.Add(aIdx);
39        us.Add(u);
40        ls.Add(l);
41      }
42      var active = new List<int>();
43      var maxL = ls.Max();
44      for (int i = 0; i < us.Count; i++) {
45        if (us[i] >= maxL) active.Add(bestActions[i]);
46      }
47      Debug.Assert(active.Any());
48      return active.SelectRandom(random);
49    }
50
51    public IBanditPolicyActionInfo CreateActionInfo() {
52      return new DefaultPolicyActionInfo();
53    }
54    public override string ToString() {
55      return "ActiveLearningPolicy";
56    }
57  }
58}
Note: See TracBrowser for help on using the repository browser.