Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
08/17/15 19:13:19 (9 years ago)
Author:
gkronber
Message:

#2283: implemented first crude version of extreme hunter algorithm in branch

Location:
branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits
Files:
4 added
6 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/ActionInfos/DefaultPolicyActionInfo.cs

    r12290 r12876  
    2727      MaxReward = Math.Max(MaxReward, reward);
    2828      var delta = reward - avgValue;
    29       //var alpha = 0.01;
    30       var alpha = Math.Max(1.0/Tries, 0.01);
     29      double alpha = 1.0 / Tries;
    3130      avgValue = avgValue + alpha * delta;
    3231    }
  • branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/HeuristicLab.Algorithms.Bandits.csproj

    r11851 r12876  
    3131  </PropertyGroup>
    3232  <ItemGroup>
     33    <Reference Include="ALGLIB-3.7.0, Version=3.7.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL">
     34      <SpecificVersion>False</SpecificVersion>
     35      <HintPath>..\..\..\trunk\sources\bin\ALGLIB-3.7.0.dll</HintPath>
     36    </Reference>
    3337    <Reference Include="System" />
    3438    <Reference Include="System.Core" />
     
    3640  <ItemGroup>
    3741    <Compile Include="ActionInfos\BernoulliPolicyActionInfo.cs" />
     42    <Compile Include="ActionInfos\ExtremeHunterActionInfo.cs" />
    3843    <Compile Include="ActionInfos\DefaultPolicyActionInfo.cs" />
    3944    <Compile Include="ActionInfos\MeanAndVariancePolicyActionInfo.cs" />
     
    4550    <Compile Include="Policies\BoltzmannExplorationPolicy.cs" />
    4651    <Compile Include="Policies\ChernoffIntervalEstimationPolicy.cs" />
     52    <Compile Include="Policies\IntervalEstimationPolicy.cs" />
     53    <Compile Include="Policies\ExtremeHunterPolicy.cs" />
    4754    <Compile Include="Policies\EpsGreedyPolicy.cs" />
    4855    <Compile Include="Policies\GaussianThompsonSamplingPolicy.cs" />
  • branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/Policies/ActiveLearningPolicy.cs

    r11806 r12876  
    99namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
    1010  public class ActiveLearningPolicy : IBanditPolicy {
     11    public double MaxReward { get; private set; }
     12    public ActiveLearningPolicy(double maxReward = 1.0) {
     13      this.MaxReward = maxReward;
     14    }
    1115    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
    1216      var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>();
     
    2933          q = aInfo.SumReward / aInfo.Tries;
    3034          var b = Math.Sqrt(Math.Log(2.0 * k * totalTries / delta) / (2.0 * aInfo.Tries));
    31           u = q + 0.5 * b;
    32           l = q - 0.5 * b;
     35          u = q + MaxReward * b;
     36          l = q - MaxReward * b;
    3337        }
    3438        bestActions.Add(aIdx);
  • branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/Policies/ChernoffIntervalEstimationPolicy.cs

    r11806 r12876  
    3535          var avgReward = aInfo.SumReward / aInfo.Tries;
    3636
    37           // page 5 of "A simple distribution-free appraoch to the max k-armed bandit problem"
     37          // page 5 of "A simple distribution-free approach to the max k-armed bandit problem"
    3838          // var alpha = Math.Log(2 * totalTries * k / delta);
    3939          double alpha = Math.Log(2.0) + Math.Log(totalTries) + Math.Log(k) - Math.Log(delta);
  • branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/Policies/UCB1Policy.cs

    r11806 r12876  
    1010  // policy for k-armed bandit (see Auer et al. 2002)
    1111  public class UCB1Policy : IBanditPolicy {
     12    public double MaxReward { get; private set; }
     13    public UCB1Policy(double maxReward = 1.0) {
     14      this.MaxReward = maxReward;
     15    }
    1216    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
    1317      var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>();
     
    2428        } else {
    2529
    26           q = aInfo.SumReward / aInfo.Tries + 0.5 * Math.Sqrt((2 * Math.Log(totalTries)) / aInfo.Tries);
     30          q = aInfo.SumReward / aInfo.Tries + MaxReward * Math.Sqrt((2 * Math.Log(totalTries)) / aInfo.Tries);
    2731        }
    2832        if (q > bestQ) {
  • branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/Policies/UCB1TunedPolicy.cs

    r11832 r12876  
    99namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
    1010  // policy for k-armed bandit (see Auer et al. 2002)
     11  // specific to Bernoulli distributed rewards
    1112  public class UCB1TunedPolicy : IBanditPolicy {
    1213
Note: See TracChangeset for help on using the changeset viewer.