Changeset 11799 for branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits
- Timestamp:
- 01/19/15 20:09:12 (10 years ago)
- Location:
- branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits
- Files:
-
- 6 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/BoltzmannExplorationPolicy.cs
r11747 r11799 41 41 : Math.Exp(beta * valueFunction(aInfo)); 42 42 43 var bestAction = myActionInfos 44 .Select((aInfo, idx) => new { aInfo, idx }) 45 .SampleProportional(random, w) 46 .Select(p => p.idx) 47 .First(); 43 var bestAction = Enumerable.Range(0, myActionInfos.Count()).SampleProportional(random, w); 48 44 Debug.Assert(bestAction >= 0); 49 45 return bestAction; -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/GenericThompsonSamplingPolicy.cs
r11742 r11799 39 39 40 40 public override string ToString() { 41 return string.Format("GenericThompsonSamplingPolicy( \"{0}\")", model);41 return string.Format("GenericThompsonSamplingPolicy({0})", model); 42 42 } 43 43 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Bandits/GaussianMixtureBandit.cs
r11731 r11799 44 44 double x = 0; 45 45 do { 46 var k = Enumerable.Range(0, componentProb[arm].Length).SampleProportional(random, componentProb[arm]) .First();46 var k = Enumerable.Range(0, componentProb[arm].Length).SampleProportional(random, componentProb[arm]); 47 47 48 48 var z = Rand.RandNormal(random); -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/GrammarPolicies/GenericGrammarPolicy.cs
r11793 r11799 14 14 private readonly IProblem problem; 15 15 private readonly IBanditPolicy banditPolicy; 16 //private readonly HashSet<string> done;17 16 18 17 public GenericGrammarPolicy(IProblem problem, IBanditPolicy banditPolicy, bool useCanonicalState = false) { … … 21 20 this.banditPolicy = banditPolicy; 22 21 this.stateInfo = new Dictionary<string, IBanditPolicyActionInfo>(); 23 //this.done = new HashSet<string>();24 22 } 25 23 … … 29 27 // fail because all follow states have already been visited => also disable the current state (if we can be sure that it has been fully explored) 30 28 31 GetStateInfo(curState).Disable( 0.0); // should the value be max of afterstate values instead of 0.0?29 GetStateInfo(curState).Disable(afterStates.Select(afterState => GetStateInfo(afterState).Value).Max()); 32 30 selectedStateIdx = -1; 33 31 return false; … … 50 48 51 49 public virtual void UpdateReward(IEnumerable<string> stateTrajectory, double reward) { 52 // the last state could be terminal 53 var lastState = stateTrajectory.Last(); 54 if (problem.Grammar.IsTerminal(lastState)) { 55 GetStateInfo(lastState).Disable(reward); 56 } 50 foreach (var state in stateTrajectory) { 51 GetStateInfo(state).UpdateReward(reward); 57 52 58 // update remaining states 59 foreach (var state in stateTrajectory.Reverse().Skip(1)) { 60 GetStateInfo(state).UpdateReward(reward); 53 // only the last state can be terminal 54 if (problem.Grammar.IsTerminal(state)) { 55 GetStateInfo(state).Disable(reward); 56 } 61 57 } 62 58 } … … 64 60 public virtual void Reset() { 65 61 stateInfo.Clear(); 66 //done.Clear();67 62 } 68 63 … … 81 76 protected string CanonicalState(string state) { 82 77 if (useCanonicalState) { 83 if (problem.Grammar.IsTerminal(state)) 84 return problem.CanonicalRepresentation(state); 85 else { 86 // for non-terminal phrases make sure we don't disable canonical states that have not yet been fully explored 87 // e.g. if for the ant problem we have the phrase lllS (and we are limited to 4 symbols) and lllr as well as llll are explored 88 // then we are not allowed to disable rS (canonical of lllS) because rS might not have been fully explored 89 // solution: we disable the state rS4 90 return problem.CanonicalRepresentation(state) + state.Length; 91 } 78 return problem.CanonicalRepresentation(state); 92 79 } else 93 80 return state; -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/GrammarPolicies/TDPolicy.cs
r11793 r11799 35 35 return false; 36 36 } 37 throw new NotImplementedException(); // TODO: remap indices of reduced action enumerable to indices of original enumerable 37 throw new NotImplementedException(); // TODO: remap indices of reduced action enumerable to indices of original enumerable (see genericgrammarpolicy) 38 38 39 39 //return epsGreedy.TrySelect(random, curState, afterStates, out selectedState); -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Models/GaussianMixtureModel.cs
r11747 r11799 24 24 25 25 public double SampleExpectedReward(Random random) { 26 var k = Enumerable.Range(0, numComponents).SampleProportional(random, componentProbs) .First();26 var k = Enumerable.Range(0, numComponents).SampleProportional(random, componentProbs); 27 27 return alglib.invnormaldistribution(random.NextDouble()) * Math.Sqrt(componentVars[k]) + componentMeans[k]; 28 28 }
Note: See TracChangeset
for help on using the changeset viewer.