Context Navigation

← Previous Change
Next Change →

GrammarPolicies

Timestamp:

01/18/15 18:24:58 (9 years ago)

Author:

gkronber

Message:

#2283 fixed compile errors and refactoring

Location:

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/GrammarPolicies

Files:

: 5 deleted
: 5 edited

BoltzmanExplorationPolicy.cs (deleted)
EpsGreedyPolicy.cs (deleted)
GenericGrammarPolicy.cs (modified) (7 diffs)
GrammarPolicy.cs (modified) (3 diffs)
GreedyPolicy.cs (deleted)
IGrammarPolicy.cs (modified) (1 diff)
RandomNoResamplingPolicy.cs (deleted)
RandomPolicy.cs (modified) (1 diff)
TDPolicy.cs (modified) (7 diffs)
UCTPolicy.cs (deleted)

Legend:

: Unmodified
: Added
: Removed

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/GrammarPolicies/GenericGrammarPolicy.cs

-                      r11792
+                      r11793
     private readonly IProblem problem;
     private readonly IBanditPolicy banditPolicy;
     private readonly HashSet<string> done;
+    //private readonly HashSet<string> done;
     public GenericGrammarPolicy(IProblem problem, IBanditPolicy banditPolicy, bool useCanonicalState = false) {
 …
       this.banditPolicy = banditPolicy;
       this.stateInfo = new Dictionary<string, IBanditPolicyActionInfo>();
       this.done = new HashSet<string>();
+      //this.done = new HashSet<string>();
+    }
+    public bool TrySelect(Random random, ReadonlySequence curState, IEnumerable<ReadonlySequence> afterStates,
+      out ReadonlySequence selectedState) {
+      // only select states that are not yet done
+      afterStates = afterStates.Where(a => !done.Contains(CanonicalState(a))).ToArray();
+      if (!afterStates.Any()) {
+    public bool TrySelect(Random random, string curState, IEnumerable<string> afterStates, out int selectedStateIdx) {
+      // fail if all states are done (corresponding state infos are disabled)
+      if (afterStates.All(s => GetStateInfo(s).Disabled)) {
         // fail because all follow states have already been visited => also disable the current state (if we can be sure that it has been fully explored)
         done.Add(CanonicalState(curState));
         selectedState = null;
+        GetStateInfo(curState).Disable(0.0); // should the value be max of afterstate values instead of 0.0?
+        selectedStateIdx = -1;
         return false;
+      }
+      selectedStateIdx = banditPolicy.SelectAction(random, afterStates.Select(s => GetStateInfo(s)));
-      var selectedIdx = banditPolicy.SelectAction(random, afterStates.Select(s => GetStateInfo(s)));
-      selectedState = afterStates.ElementAt(selectedIdx);
       return true;
+    }
     private IBanditPolicyActionInfo GetStateInfo(ReadonlySequence state) {
+    private IBanditPolicyActionInfo GetStateInfo(string state) {
       var s = CanonicalState(state);
       IBanditPolicyActionInfo info;
 …
+    }
     public virtual void UpdateReward(IEnumerable<ReadonlySequence> stateTrajectory, double reward) {
+    public virtual void UpdateReward(IEnumerable<string> stateTrajectory, double reward) {
       // the last state could be terminal
       var lastState = stateTrajectory.Last();
+      if (lastState.IsTerminal) done.Add(CanonicalState(lastState));
+      if (problem.Grammar.IsTerminal(lastState)) {
+        GetStateInfo(lastState).Disable(reward);
+      }
+      foreach (var state in stateTrajectory) {
+      // update remaining states
+      foreach (var state in stateTrajectory.Reverse().Skip(1)) {
         GetStateInfo(state).UpdateReward(reward);
+      }
 …
     public virtual void Reset() {
       stateInfo.Clear();
       done.Clear();
+      //done.Clear();
+    }
     public int GetTries(ReadonlySequence state) {
+    public int GetTries(string state) {
       var s = CanonicalState(state);
       if (stateInfo.ContainsKey(s)) return stateInfo[s].Tries;
 …
+    }
     public double GetValue(ReadonlySequence state) {
+    public double GetValue(string state) {
       var s = CanonicalState(state);
       if (stateInfo.ContainsKey(s)) return stateInfo[s].Value;
 …
+    }
     protected string CanonicalState(ReadonlySequence state) {
+    protected string CanonicalState(string state) {
       if (useCanonicalState) {
         if (state.IsTerminal)
           return problem.CanonicalRepresentation(state.ToString());
+        if (problem.Grammar.IsTerminal(state))
+          return problem.CanonicalRepresentation(state);
         else {
           // for non-terminal phrases make sure we don't disable canonical states that have not yet been fully explored
 …
           // then we are not allowed to disable rS (canonical of lllS) because rS might not have been fully explored
           // solution: we disable the state rS4
           return problem.CanonicalRepresentation(state.ToString()) + state.Length;
+          return problem.CanonicalRepresentation(state) + state.Length;
+        }
       } else
         return state.ToString();
+        return state;
+    }
+  }

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/GrammarPolicies/GrammarPolicy.cs

-                      r11770
+                      r11793
 namespace HeuristicLab.Algorithms.Bandits.GrammarPolicies {
   // stores: tries, avg reward and max reward for each state
+  // stores: tries, avg reward and max reward for each state (base class for RandomPolicy and TDPolicy
   public abstract class GrammarPolicy : IGrammarPolicy {
     protected Dictionary<string, double> avgReward;
     protected Dictionary<string, int> tries;
     protected Dictionary<string, double> maxReward;
     private readonly bool useCanonicalState;
     private readonly IProblem problem;
+    protected readonly bool useCanonicalState;
+    protected readonly IProblem problem;
     public GrammarPolicy(IProblem problem, bool useCanonicalState = false) {
+    protected GrammarPolicy(IProblem problem, bool useCanonicalState = false) {
       this.useCanonicalState = useCanonicalState;
       this.problem = problem;
 …
+    }
     public abstract bool TrySelect(Random random, ReadonlySequence curState, IEnumerable<ReadonlySequence> afterStates, out ReadonlySequence selectedState);
+    public abstract bool TrySelect(Random random, string curState, IEnumerable<string> afterStates, out int selectedStateIdx);
     public virtual void UpdateReward(IEnumerable<ReadonlySequence> stateTrajectory, double reward) {
+    public virtual void UpdateReward(IEnumerable<string> stateTrajectory, double reward) {
       foreach (var state in stateTrajectory) {
         var s = CanonicalState(state.ToString());
+        var s = CanonicalState(state);
         if (!tries.ContainsKey(s)) tries.Add(s, 0);
 …
+    }
     public double AvgReward(ReadonlySequence state) {
       var s = CanonicalState(state.ToString());
+    public double AvgReward(string state) {
+      var s = CanonicalState(state);
       if (avgReward.ContainsKey(s)) return avgReward[s];
       else return 0.0;
+    }
     public double MaxReward(ReadonlySequence state) {
       var s = CanonicalState(state.ToString());
+    public double MaxReward(string state) {
+      var s = CanonicalState(state);
       if (maxReward.ContainsKey(s)) return maxReward[s];
       else return 0.0;
+    }
     public virtual int GetTries(ReadonlySequence state) {
       var s = CanonicalState(state.ToString());
+    public virtual int GetTries(string state) {
+      var s = CanonicalState(state);
       if (tries.ContainsKey(s)) return tries[s];
       else return 0;
+    }
     public virtual double GetValue(ReadonlySequence state) {
+    public virtual double GetValue(string state) {
       return AvgReward(state);
+    }

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/GrammarPolicies/IGrammarPolicy.cs

r11770	r11793
8	8
9	9	namespace HeuristicLab.Algorithms.Bandits.GrammarPolicies {
10		public interface IGrammarPolicy : IPolicy<~~ReadonlySequence~~> {
	10	public interface IGrammarPolicy : IPolicy<string> {
11	11	}
12	12	}

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/GrammarPolicies/RandomPolicy.cs

r11770	r11793
13	13	}
14	14
15		public override bool TrySelect(Random random, ~~ReadonlySequence curState, IEnumerable<ReadonlySequence> afterStates, out ReadonlySequence selectedState~~) {
	15	public override bool TrySelect(Random random, string curState, IEnumerable<string> afterStates, out int selectedStateIdx) {
16	16	// never fail => allows re-visits of terminal states
17		selectedState ~~= afterStates.SelectRandom(random~~);
	17	selectedStateIdx = random.Next(afterStates.Count());
18	18	return true;
19	19	}

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/GrammarPolicies/TDPolicy.cs

-                      r11770
+                      r11793
 using System.Collections.Generic;
 using System.Configuration;
+using System.Diagnostics;
 using System.Linq;
 using System.Security.Policy;
 …
 using System.Threading;
 using System.Threading.Tasks;
+using HeuristicLab.Algorithms.Bandits.BanditPolicies;
 using HeuristicLab.Common;
 using HeuristicLab.Problems.GrammaticalOptimization;
 …
     private readonly HashSet<string> done;
     private readonly Dictionary<string, double> v;
     private EpsGreedyPolicy epsGreedy;
+    private IGrammarPolicy epsGreedy;
     public TDPolicy(IProblem problem, bool useCanonicalRepresentation = false)
 …
       this.done = new HashSet<string>();
       this.v = new Dictionary<string, double>();
       this.epsGreedy = new EpsGreedyPolicy(problem, useCanonicalRepresentation, 0.1);
+      this.epsGreedy = new GenericGrammarPolicy(problem, new EpsGreedyPolicy(0.1), useCanonicalRepresentation);
+    }
     public override bool TrySelect(Random random, ReadonlySequence curState, IEnumerable<ReadonlySequence> afterStates, out ReadonlySequence selectedState) {
+    public override bool TrySelect(Random random, string curState, IEnumerable<string> afterStates, out int selectedStateIdx) {
       // only select states that are not yet done
       afterStates = afterStates.Where(a => !done.Contains(CanonicalState(a.ToString()))).ToArray();
       if (!afterStates.Any()) {
         // fail because all follow states have already been visited => also disable the current state
         done.Add(CanonicalState(curState.ToString()));
         selectedState = null;
+        done.Add(CanonicalState(curState));
+        selectedStateIdx = -1;
         return false;
+      }
+      throw new NotImplementedException(); // TODO: remap indices of reduced action enumerable to indices of original enumerable
       //return epsGreedy.TrySelect(random, curState, afterStates, out selectedState);
       var bestQ = double.NegativeInfinity;
+      selectedState = null;
+      int idx = -1;
+      selectedStateIdx = -1;
       foreach (var state in afterStates) {
+        idx++;
         // try each state at least once
         if (GetTries(state) == 0) {
           selectedState = state;
+          selectedStateIdx = idx;
           return true;
+        }
 …
         if (q > bestQ) {
           bestQ = q;
           selectedState = state;
+          selectedStateIdx = idx;
+        }
+      }
+      Debug.Assert(selectedStateIdx > -1);
       return true;
+    }
     private double V(ReadonlySequence state) {
       var s = CanonicalState(state.ToString());
+    private double V(string state) {
+      var s = CanonicalState(state);
       if (v.ContainsKey(s)) return v[s];
       else return 0.0;
+    }
     public override void UpdateReward(IEnumerable<ReadonlySequence> stateTrajectory, double reward) {
+    public override void UpdateReward(IEnumerable<string> stateTrajectory, double reward) {
       base.UpdateReward(stateTrajectory, reward);
       epsGreedy.UpdateReward(stateTrajectory, reward);
       // the last state could be terminal
       var lastState = stateTrajectory.Last();
       if (lastState.IsTerminal) done.Add(CanonicalState(lastState.ToString()));
+      if (problem.Grammar.IsTerminal(lastState)) done.Add(CanonicalState(lastState));
       v[CanonicalState(lastState.ToString())] = V(lastState) + 1.0 / GetTries(lastState) * (reward - V(lastState));
+      v[CanonicalState(lastState)] = V(lastState) + 1.0 / GetTries(lastState) * (reward - V(lastState));
       foreach (var p in stateTrajectory.Zip(stateTrajectory.Skip(1), Tuple.Create).Reverse()) {
 …
         var next = p.Item2;
         v[CanonicalState(cur.ToString())] = V(cur) + 1.0 / GetTries(cur) * (V(next) - V(cur));
+        v[CanonicalState(cur)] = V(cur) + 1.0 / GetTries(cur) * (V(next) - V(cur));
         //v[CanonicalState(cur.ToString())] = V(cur) + 0.1 * (V(next) - V(cur));
+      }
 …
+    }
     public override double GetValue(ReadonlySequence state) {
+    public override double GetValue(string state) {
       return V(state);
+    }
     public void Reset() {
+    public override void Reset() {
       base.Reset();
       epsGreedy.Reset();

Note: See TracChangeset for help on using the changeset viewer.