Context Navigation

Policies

Timestamp:

08/17/15 19:13:19 (9 years ago)

Author:

gkronber

Message:

#2283: implemented first crude version of extreme hunter algorithm in branch

Location:

Files:

-                      r11806
+                      r12876
 namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
   public class ActiveLearningPolicy : IBanditPolicy {
+    public double MaxReward { get; private set; }
+    public ActiveLearningPolicy(double maxReward = 1.0) {
+      this.MaxReward = maxReward;
+    }
     public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
       var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>();
 …
           q = aInfo.SumReward / aInfo.Tries;
           var b = Math.Sqrt(Math.Log(2.0 * k * totalTries / delta) / (2.0 * aInfo.Tries));
           u = q + 0.5 * b;
           l = q - 0.5 * b;
+          u = q + MaxReward * b;
+          l = q - MaxReward * b;
+        }
         bestActions.Add(aIdx);

r11806	r12876
35	35	var avgReward = aInfo.SumReward / aInfo.Tries;
36	36
37		// page 5 of "A simple distribution-free appraoch to the max k-armed bandit problem"
	37	// page 5 of "A simple distribution-free approach to the max k-armed bandit problem"
38	38	// var alpha = Math.Log(2 * totalTries * k / delta);
39	39	double alpha = Math.Log(2.0) + Math.Log(totalTries) + Math.Log(k) - Math.Log(delta);

-                      r11806
+                      r12876
   // policy for k-armed bandit (see Auer et al. 2002)
   public class UCB1Policy : IBanditPolicy {
+    public double MaxReward { get; private set; }
+    public UCB1Policy(double maxReward = 1.0) {
+      this.MaxReward = maxReward;
+    }
     public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
       var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>();
 …
         } else {
           q = aInfo.SumReward / aInfo.Tries + 0.5 * Math.Sqrt((2 * Math.Log(totalTries)) / aInfo.Tries);
+          q = aInfo.SumReward / aInfo.Tries + MaxReward * Math.Sqrt((2 * Math.Log(totalTries)) / aInfo.Tries);
+        }
         if (q > bestQ) {

r11832	r12876
9	9	namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
10	10	// policy for k-armed bandit (see Auer et al. 2002)
	11	// specific to Bernoulli distributed rewards
11	12	public class UCB1TunedPolicy : IBanditPolicy {
12	13

Note: See TracChangeset for help on using the changeset viewer.