Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
01/02/15 16:08:21 (10 years ago)
Author:
gkronber
Message:

#2283: several major extensions for grammatical optimization

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/UCBNormalPolicy.cs

    r11727 r11730  
    2424      double bestQ = double.NegativeInfinity;
    2525      foreach (var a in Actions) {
    26         if (totalTries == 0 || tries[a] == 0 || tries[a] < Math.Ceiling(8 * Math.Log(totalTries))) return a;
     26        if (totalTries <= 1 || tries[a] <= 1 || tries[a] <= Math.Ceiling(8 * Math.Log(totalTries))) return a;
    2727        var avgReward = sumReward[a] / tries[a];
     28        var estVariance = 16 * ((sumSqrReward[a] - tries[a] * Math.Pow(avgReward, 2)) / (tries[a] - 1)) * (Math.Log(totalTries - 1) / tries[a]);
     29        if (estVariance < 0) estVariance = 0; // numerical problems
    2830        var q = avgReward
    29           + Math.Sqrt(16 * ((sumSqrReward[a] - tries[a] * Math.Pow(avgReward, 2)) / (tries[a] - 1)) * (Math.Log(totalTries - 1) / tries[a]));
     31          + Math.Sqrt(estVariance);
    3032        if (q > bestQ) {
    3133          bestQ = q;
     
    3335        }
    3436      }
     37      Debug.Assert(Actions.Contains(bestAction));
    3538      return bestAction;
    3639    }
     
    5861      Array.Clear(sumSqrReward, 0, sumSqrReward.Length);
    5962    }
     63    public override void PrintStats() {
     64      for (int i = 0; i < sumReward.Length; i++) {
     65        if (tries[i] >= 0) {
     66          Console.Write("{0,5:F2}", sumReward[i] / tries[i]);
     67        } else {
     68          Console.Write("{0,5}", "");
     69        }
     70      }
     71      Console.WriteLine();
     72    }
     73    public override string ToString() {
     74      return "UCBNormalPolicy";
     75    }
    6076  }
    6177}
Note: See TracChangeset for help on using the changeset viewer.