Changeset 11770 for branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/GrammarPolicies/RandomNoResamplingPolicy.cs
- Timestamp:
- 01/15/15 18:59:07 (9 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/GrammarPolicies/RandomNoResamplingPolicy.cs
r11742 r11770 1 1 using System; 2 2 using System.Collections.Generic; 3 using System.Configuration; 3 4 using System.Linq; 5 using System.Security.Policy; 4 6 using System.Text; 5 using System.Threading.Tasks;6 7 using HeuristicLab.Common; 7 8 using HeuristicLab.Problems.GrammaticalOptimization; 8 9 9 10 namespace HeuristicLab.Algorithms.Bandits.GrammarPolicies { 10 public class RandomNoResamplingPolicy : IGrammarPolicy {11 public class RandomNoResamplingPolicy : GrammarPolicy { 11 12 12 private readonly Dictionary<ReadonlySequence, bool> done; 13 private readonly Dictionary<Tuple<ReadonlySequence, ReadonlySequence>, ReadonlySequence> nextState; 13 private readonly HashSet<string> done; 14 14 15 16 public RandomNoResamplingPolicy() {17 this.done = new Dictionary<ReadonlySequence, bool>();15 public RandomNoResamplingPolicy(IProblem problem, bool useCanonicalRepresentation) 16 : base(problem, useCanonicalRepresentation) { 17 this.done = new HashSet<string>(); 18 18 } 19 19 20 public ReadonlySequence SelectAction(Random random, ReadonlySequence state, IEnumerable<ReadonlySequence> actions) { 21 var allDone = true; 22 foreach (var a in actions) { 23 var p = Tuple.Create(state, a); 24 allDone &= nextState.ContainsKey(p) && Done(nextState[p]); 25 if (!allDone) break; 20 public override bool TrySelect(Random random, ReadonlySequence curState, IEnumerable<ReadonlySequence> afterStates, out ReadonlySequence selectedState) { 21 // only select states that are not yet done 22 afterStates = afterStates.Where(a => !done.Contains(a.ToString())).ToArray(); 23 if (!afterStates.Any()) { 24 // fail because all follow states have already been visited => also disable the current state 25 done.Add(CanonicalState(curState.ToString())); 26 selectedState = null; 27 return false; 26 28 } 27 if(allDone) 28 return actions 29 .Where(a => !nextState.ContainsKey(Tuple.Create(state, a)) || Done(nextState[Tuple.Create(state, a)])) 30 .SelectRandom(random); 29 30 selectedState = afterStates.SelectRandom(random); 31 return true; 31 32 } 32 33 33 public void UpdateReward(ReadonlySequence state, ReadonlySequence action, double reward, ReadonlySequence newState) { 34 var key = Tuple.Create(state, action); 35 nextState[key] = newState; 36 if (newState.IsTerminal) done[newState] = true; 37 if 34 public override void UpdateReward(IEnumerable<ReadonlySequence> stateTrajectory, double reward) { 35 base.UpdateReward(stateTrajectory, reward); 36 // ignore rewards but update the set of visited terminal states 37 38 // the last state could be terminal 39 var lastState = stateTrajectory.Last(); 40 if (lastState.IsTerminal) done.Add(CanonicalState(lastState.ToString())); 38 41 } 39 42 40 public bool Done(ReadonlySequence state) { 41 return done.ContainsKey(state); 43 public override void Reset() { 44 base.Reset(); 45 done.Clear(); 42 46 } 43 47 }
Note: See TracChangeset
for help on using the changeset viewer.