Changeset 12876 for branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits
- Timestamp:
- 08/17/15 19:13:19 (9 years ago)
- Location:
- branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits
- Files:
-
- 4 added
- 6 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/ActionInfos/DefaultPolicyActionInfo.cs
r12290 r12876 27 27 MaxReward = Math.Max(MaxReward, reward); 28 28 var delta = reward - avgValue; 29 //var alpha = 0.01; 30 var alpha = Math.Max(1.0/Tries, 0.01); 29 double alpha = 1.0 / Tries; 31 30 avgValue = avgValue + alpha * delta; 32 31 } -
branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/HeuristicLab.Algorithms.Bandits.csproj
r11851 r12876 31 31 </PropertyGroup> 32 32 <ItemGroup> 33 <Reference Include="ALGLIB-3.7.0, Version=3.7.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL"> 34 <SpecificVersion>False</SpecificVersion> 35 <HintPath>..\..\..\trunk\sources\bin\ALGLIB-3.7.0.dll</HintPath> 36 </Reference> 33 37 <Reference Include="System" /> 34 38 <Reference Include="System.Core" /> … … 36 40 <ItemGroup> 37 41 <Compile Include="ActionInfos\BernoulliPolicyActionInfo.cs" /> 42 <Compile Include="ActionInfos\ExtremeHunterActionInfo.cs" /> 38 43 <Compile Include="ActionInfos\DefaultPolicyActionInfo.cs" /> 39 44 <Compile Include="ActionInfos\MeanAndVariancePolicyActionInfo.cs" /> … … 45 50 <Compile Include="Policies\BoltzmannExplorationPolicy.cs" /> 46 51 <Compile Include="Policies\ChernoffIntervalEstimationPolicy.cs" /> 52 <Compile Include="Policies\IntervalEstimationPolicy.cs" /> 53 <Compile Include="Policies\ExtremeHunterPolicy.cs" /> 47 54 <Compile Include="Policies\EpsGreedyPolicy.cs" /> 48 55 <Compile Include="Policies\GaussianThompsonSamplingPolicy.cs" /> -
branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/Policies/ActiveLearningPolicy.cs
r11806 r12876 9 9 namespace HeuristicLab.Algorithms.Bandits.BanditPolicies { 10 10 public class ActiveLearningPolicy : IBanditPolicy { 11 public double MaxReward { get; private set; } 12 public ActiveLearningPolicy(double maxReward = 1.0) { 13 this.MaxReward = maxReward; 14 } 11 15 public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) { 12 16 var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>(); … … 29 33 q = aInfo.SumReward / aInfo.Tries; 30 34 var b = Math.Sqrt(Math.Log(2.0 * k * totalTries / delta) / (2.0 * aInfo.Tries)); 31 u = q + 0.5* b;32 l = q - 0.5* b;35 u = q + MaxReward * b; 36 l = q - MaxReward * b; 33 37 } 34 38 bestActions.Add(aIdx); -
branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/Policies/ChernoffIntervalEstimationPolicy.cs
r11806 r12876 35 35 var avgReward = aInfo.SumReward / aInfo.Tries; 36 36 37 // page 5 of "A simple distribution-free appr aoch to the max k-armed bandit problem"37 // page 5 of "A simple distribution-free approach to the max k-armed bandit problem" 38 38 // var alpha = Math.Log(2 * totalTries * k / delta); 39 39 double alpha = Math.Log(2.0) + Math.Log(totalTries) + Math.Log(k) - Math.Log(delta); -
branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/Policies/UCB1Policy.cs
r11806 r12876 10 10 // policy for k-armed bandit (see Auer et al. 2002) 11 11 public class UCB1Policy : IBanditPolicy { 12 public double MaxReward { get; private set; } 13 public UCB1Policy(double maxReward = 1.0) { 14 this.MaxReward = maxReward; 15 } 12 16 public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) { 13 17 var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>(); … … 24 28 } else { 25 29 26 q = aInfo.SumReward / aInfo.Tries + 0.5* Math.Sqrt((2 * Math.Log(totalTries)) / aInfo.Tries);30 q = aInfo.SumReward / aInfo.Tries + MaxReward * Math.Sqrt((2 * Math.Log(totalTries)) / aInfo.Tries); 27 31 } 28 32 if (q > bestQ) { -
branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/Policies/UCB1TunedPolicy.cs
r11832 r12876 9 9 namespace HeuristicLab.Algorithms.Bandits.BanditPolicies { 10 10 // policy for k-armed bandit (see Auer et al. 2002) 11 // specific to Bernoulli distributed rewards 11 12 public class UCB1TunedPolicy : IBanditPolicy { 12 13
Note: See TracChangeset
for help on using the changeset viewer.