Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
08/24/15 13:56:27 (9 years ago)
Author:
gkronber
Message:

#2283: experiments on grammatical optimization algorithms (maxreward instead of avg reward, ...)

Location:
branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Problems.Bandits
Files:
2 added
3 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Problems.Bandits/HeuristicLab.Problems.Bandits.csproj

    r12876 r12893  
    2929    <ErrorReport>prompt</ErrorReport>
    3030    <WarningLevel>4</WarningLevel>
     31    <UseVSHostingProcess>true</UseVSHostingProcess>
    3132  </PropertyGroup>
    3233  <ItemGroup>
     
    4142    <Compile Include="BanditHelper.cs" />
    4243    <Compile Include="BernoulliBandit.cs" />
     44    <Compile Include="Bandit.cs" />
     45    <Compile Include="MixtureBandit.cs" />
    4346    <Compile Include="ParetoBandit.cs" />
    4447    <Compile Include="GaussianBandit.cs" />
     
    5356      <Name>HeuristicLab.Common</Name>
    5457    </ProjectReference>
     58    <ProjectReference Include="..\HeuristicLab.Distributions\HeuristicLab.Distributions.csproj">
     59      <Project>{31171165-e16f-4a1a-a8ab-25c6ab3a71b9}</Project>
     60      <Name>HeuristicLab.Distributions</Name>
     61    </ProjectReference>
    5562  </ItemGroup>
    5663  <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
  • branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Problems.Bandits/IBandit.cs

    r12876 r12893  
    88  public interface IBandit {
    99    int NumArms { get; }
    10     double OptimalExpectedReward { get; } // expected reward of the best arm, for calculating regret
    1110    int OptimalExpectedRewardArm { get; } // arm which is optimal for optimization of expected reward
    1211    int OptimalMaximalRewardArm { get; } // arm which is optimal for optimization of maximal reward
  • branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Problems.Bandits/ParetoBandit.cs

    r12876 r12893  
    1212    private double[] pZero;
    1313    public int NumArms { get { return alpha.Length; } }
    14     public double OptimalExpectedReward { get; private set; } // reward of the best arm, for calculating regret
    1514    public int OptimalExpectedRewardArm { get; private set; }
    1615    public int OptimalMaximalRewardArm { get; private set; }
    17     public double MaxReward { get; private set; }
    18     public double MinReward { get; private set; }
    1916    private readonly Random random;
    20     public ParetoBandit(Random random, IEnumerable<double> alpha) : this(random, alpha, alpha.Select(_ => 0.0)) { }
    21     public ParetoBandit(Random random, IEnumerable<double> alpha, IEnumerable<double> pZero) { // probability of a zero reward
     17
     18    public ParetoBandit(Random random, IEnumerable<double> alpha) {
     19      this.alpha = alpha.ToArray();
     20      this.pZero = new double[this.alpha.Length];
     21      this.random = random;
     22      OptimalExpectedRewardArm = Array.IndexOf(this.alpha, alpha.Min());
     23      OptimalMaximalRewardArm = OptimalExpectedRewardArm;
     24    }
     25    public ParetoBandit(Random random, IEnumerable<double> alpha, IEnumerable<double> pZero, int bestExpRewardArm, int bestMaxRewardArm) { // probability of a zero reward
    2226      this.alpha = alpha.ToArray();
    2327      this.pZero = pZero.ToArray();
    2428      this.random = random;
    25 
    26       // find optimal arms using empirical estimates
    27       var bestExpReward = double.NegativeInfinity;
    28       var bestMaxReward = double.NegativeInfinity;
    29       for (int k = 0; k < NumArms; k++) {
    30         double expReward = 0.0;
    31         double maxReward = double.NegativeInfinity;
    32         for (int i = 0; i < 100000; i++) {
    33           var r = Pull(k);
    34           expReward += r;
    35           maxReward = Math.Max(maxReward, r);
    36         }
    37         expReward /= 100000;
    38 
    39         if (expReward > bestExpReward) {
    40           bestExpReward = expReward;
    41           OptimalExpectedRewardArm = k;
    42           OptimalExpectedReward = expReward;
    43         }
    44         if (maxReward > bestMaxReward) {
    45           bestMaxReward = maxReward;
    46           OptimalMaximalRewardArm = k;
    47         }
    48       }
     29      OptimalExpectedRewardArm = bestExpRewardArm;
     30      OptimalMaximalRewardArm = bestMaxRewardArm;
    4931    }
    5032
Note: See TracChangeset for help on using the changeset viewer.