Context Navigation

HeuristicLab.Algorithms.Bandits

Timestamp:

08/17/15 19:13:19 (9 years ago)

Author:

gkronber

Message:

#2283: implemented first crude version of extreme hunter algorithm in branch

Location:

Files:

-                      r12290
+                      r12876
       MaxReward = Math.Max(MaxReward, reward);
       var delta = reward - avgValue;
+      //var alpha = 0.01;
+      var alpha = Math.Max(1.0/Tries, 0.01);
+      double alpha = 1.0 / Tries;
       avgValue = avgValue + alpha * delta;
+    }

-                      r11851
+                      r12876
   </PropertyGroup>
   <ItemGroup>
+    <Reference Include="ALGLIB-3.7.0, Version=3.7.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL">
+      <SpecificVersion>False</SpecificVersion>
+      <HintPath>..\..\..\trunk\sources\bin\ALGLIB-3.7.0.dll</HintPath>
+    </Reference>
     <Reference Include="System" />
     <Reference Include="System.Core" />
 …
   <ItemGroup>
     <Compile Include="ActionInfos\BernoulliPolicyActionInfo.cs" />
+    <Compile Include="ActionInfos\ExtremeHunterActionInfo.cs" />
     <Compile Include="ActionInfos\DefaultPolicyActionInfo.cs" />
     <Compile Include="ActionInfos\MeanAndVariancePolicyActionInfo.cs" />
 …
     <Compile Include="Policies\BoltzmannExplorationPolicy.cs" />
     <Compile Include="Policies\ChernoffIntervalEstimationPolicy.cs" />
+    <Compile Include="Policies\IntervalEstimationPolicy.cs" />
+    <Compile Include="Policies\ExtremeHunterPolicy.cs" />
     <Compile Include="Policies\EpsGreedyPolicy.cs" />
     <Compile Include="Policies\GaussianThompsonSamplingPolicy.cs" />

-                      r11806
+                      r12876
 namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
   public class ActiveLearningPolicy : IBanditPolicy {
+    public double MaxReward { get; private set; }
+    public ActiveLearningPolicy(double maxReward = 1.0) {
+      this.MaxReward = maxReward;
+    }
     public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
       var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>();
 …
           q = aInfo.SumReward / aInfo.Tries;
           var b = Math.Sqrt(Math.Log(2.0 * k * totalTries / delta) / (2.0 * aInfo.Tries));
           u = q + 0.5 * b;
           l = q - 0.5 * b;
+          u = q + MaxReward * b;
+          l = q - MaxReward * b;
+        }
         bestActions.Add(aIdx);

r11806	r12876
35	35	var avgReward = aInfo.SumReward / aInfo.Tries;
36	36
37		// page 5 of "A simple distribution-free appraoch to the max k-armed bandit problem"
	37	// page 5 of "A simple distribution-free approach to the max k-armed bandit problem"
38	38	// var alpha = Math.Log(2 * totalTries * k / delta);
39	39	double alpha = Math.Log(2.0) + Math.Log(totalTries) + Math.Log(k) - Math.Log(delta);

-                      r11806
+                      r12876
   // policy for k-armed bandit (see Auer et al. 2002)
   public class UCB1Policy : IBanditPolicy {
+    public double MaxReward { get; private set; }
+    public UCB1Policy(double maxReward = 1.0) {
+      this.MaxReward = maxReward;
+    }
     public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
       var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>();
 …
         } else {
           q = aInfo.SumReward / aInfo.Tries + 0.5 * Math.Sqrt((2 * Math.Log(totalTries)) / aInfo.Tries);
+          q = aInfo.SumReward / aInfo.Tries + MaxReward * Math.Sqrt((2 * Math.Log(totalTries)) / aInfo.Tries);
+        }
         if (q > bestQ) {

r11832	r12876
9	9	namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
10	10	// policy for k-armed bandit (see Auer et al. 2002)
	11	// specific to Bernoulli distributed rewards
11	12	public class UCB1TunedPolicy : IBanditPolicy {
12	13

Note: See TracChangeset for help on using the changeset viewer.