Context Navigation

← Previous Change
Next Change →

HeuristicLab.Problems.Bandits

Timestamp:

08/24/15 13:56:27 (9 years ago)

Author:

gkronber

Message:

#2283: experiments on grammatical optimization algorithms (maxreward instead of avg reward, ...)

Location:

branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Problems.Bandits

Files:

: 2 added
: 3 edited

Bandit.cs (added)
HeuristicLab.Problems.Bandits.csproj (modified) (3 diffs)
IBandit.cs (modified) (1 diff)
MixtureBandit.cs (added)
ParetoBandit.cs (modified) (1 diff)

Legend:

: Unmodified
: Added
: Removed

branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Problems.Bandits/HeuristicLab.Problems.Bandits.csproj

-                      r12876
+                      r12893
     <ErrorReport>prompt</ErrorReport>
     <WarningLevel>4</WarningLevel>
+    <UseVSHostingProcess>true</UseVSHostingProcess>
   </PropertyGroup>
   <ItemGroup>
 …
     <Compile Include="BanditHelper.cs" />
     <Compile Include="BernoulliBandit.cs" />
+    <Compile Include="Bandit.cs" />
+    <Compile Include="MixtureBandit.cs" />
     <Compile Include="ParetoBandit.cs" />
     <Compile Include="GaussianBandit.cs" />
 …
       <Name>HeuristicLab.Common</Name>
     </ProjectReference>
+    <ProjectReference Include="..\HeuristicLab.Distributions\HeuristicLab.Distributions.csproj">
+      <Project>{31171165-e16f-4a1a-a8ab-25c6ab3a71b9}</Project>
+      <Name>HeuristicLab.Distributions</Name>
+    </ProjectReference>
   </ItemGroup>
   <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />

branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Problems.Bandits/IBandit.cs

r12876	r12893
8	8	public interface IBandit {
9	9	int NumArms { get; }
10		~~double OptimalExpectedReward { get; } // expected reward of the best arm, for calculating regret~~
11	10	int OptimalExpectedRewardArm { get; } // arm which is optimal for optimization of expected reward
12	11	int OptimalMaximalRewardArm { get; } // arm which is optimal for optimization of maximal reward

branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Problems.Bandits/ParetoBandit.cs

-                      r12876
+                      r12893
     private double[] pZero;
     public int NumArms { get { return alpha.Length; } }
-    public double OptimalExpectedReward { get; private set; } // reward of the best arm, for calculating regret
     public int OptimalExpectedRewardArm { get; private set; }
     public int OptimalMaximalRewardArm { get; private set; }
-    public double MaxReward { get; private set; }
-    public double MinReward { get; private set; }
     private readonly Random random;
+    public ParetoBandit(Random random, IEnumerable<double> alpha) : this(random, alpha, alpha.Select(_ => 0.0)) { }
+    public ParetoBandit(Random random, IEnumerable<double> alpha, IEnumerable<double> pZero) { // probability of a zero reward
+    public ParetoBandit(Random random, IEnumerable<double> alpha) {
+      this.alpha = alpha.ToArray();
+      this.pZero = new double[this.alpha.Length];
+      this.random = random;
+      OptimalExpectedRewardArm = Array.IndexOf(this.alpha, alpha.Min());
+      OptimalMaximalRewardArm = OptimalExpectedRewardArm;
+    }
+    public ParetoBandit(Random random, IEnumerable<double> alpha, IEnumerable<double> pZero, int bestExpRewardArm, int bestMaxRewardArm) { // probability of a zero reward
       this.alpha = alpha.ToArray();
       this.pZero = pZero.ToArray();
       this.random = random;
+      // find optimal arms using empirical estimates
+      var bestExpReward = double.NegativeInfinity;
+      var bestMaxReward = double.NegativeInfinity;
+      for (int k = 0; k < NumArms; k++) {
+        double expReward = 0.0;
+        double maxReward = double.NegativeInfinity;
+        for (int i = 0; i < 100000; i++) {
+          var r = Pull(k);
+          expReward += r;
+          maxReward = Math.Max(maxReward, r);
+        }
+        expReward /= 100000;
+        if (expReward > bestExpReward) {
+          bestExpReward = expReward;
+          OptimalExpectedRewardArm = k;
+          OptimalExpectedReward = expReward;
+        }
+        if (maxReward > bestMaxReward) {
+          bestMaxReward = maxReward;
+          OptimalMaximalRewardArm = k;
+        }
+      }
+      OptimalExpectedRewardArm = bestExpRewardArm;
+      OptimalMaximalRewardArm = bestMaxRewardArm;
+    }

Note: See TracChangeset for help on using the changeset viewer.