Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
01/16/15 18:26:35 (10 years ago)
Author:
gkronber
Message:

#2283 work-in-progress commit (does not compile)

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/UCB1TunedPolicy.cs

    r11742 r11792  
    55using System.Text;
    66using System.Threading.Tasks;
     7using HeuristicLab.Common;
    78
    89namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
     
    1213    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
    1314      var myActionInfos = actionInfos.OfType<MeanAndVariancePolicyActionInfo>();
    14       int bestAction = -1;
    15       double bestQ = double.NegativeInfinity;
     15
    1616      int totalTries = myActionInfos.Where(a => !a.Disabled).Sum(a => a.Tries);
    1717
    1818      int aIdx = -1;
     19      double bestQ = double.NegativeInfinity;
     20      var bestActions = new List<int>();
    1921      foreach (var aInfo in myActionInfos) {
    2022        aIdx++;
    2123        if (aInfo.Disabled) continue;
    22         if (aInfo.Tries == 0) return aIdx;
     24        double q;
     25        if (aInfo.Tries == 0) {
     26          q = double.PositiveInfinity;
     27        } else {
     28          var sumReward = aInfo.SumReward;
     29          var tries = aInfo.Tries;
    2330
    24         var sumReward = aInfo.SumReward;
    25         var tries = aInfo.Tries;
    26 
    27         var avgReward = sumReward / tries;
    28         var q = avgReward + Math.Sqrt((Math.Log(totalTries) / tries) * Math.Min(1.0 / 4, V(aInfo, totalTries))); // 1/4 is upper bound of bernoulli distributed variable
     31          var avgReward = sumReward / tries;
     32          q = avgReward + Math.Sqrt((Math.Log(totalTries) / tries) * Math.Min(1.0 / 4, V(aInfo, totalTries)));
     33          // 1/4 is upper bound of bernoulli distributed variable
     34        }
    2935        if (q > bestQ) {
    3036          bestQ = q;
    31           bestAction = aIdx;
     37          bestActions.Clear();
     38          bestActions.Add(aIdx);
     39        } else if (q == bestQ) {
     40          bestActions.Add(aIdx);
    3241        }
    3342      }
    34       Debug.Assert(bestAction > -1);
    35       return bestAction;
     43      Debug.Assert(bestActions.Any());
     44
     45      return bestActions.SelectRandom(random);
    3646    }
    3747
Note: See TracChangeset for help on using the changeset viewer.