Context Navigation

← Previous Changeset
Next Changeset →

Changeset 11793

Timestamp:

01/18/15 18:24:58 (10 years ago)

Author:

gkronber

Message:

#2283 fixed compile errors and refactoring

Location:

branches/HeuristicLab.Problems.GrammaticalOptimization

Files:

: 2 added
: 6 deleted
: 25 edited

HeuristicLab.Algorithms.Bandits/BanditPolicies/EmptyPolicyActionInfo.cs (deleted)
HeuristicLab.Algorithms.Bandits/BanditPolicies/EpsGreedyPolicy.cs (modified) (1 diff)
HeuristicLab.Algorithms.Bandits/GrammarPolicies/BoltzmanExplorationPolicy.cs (deleted)
HeuristicLab.Algorithms.Bandits/GrammarPolicies/EpsGreedyPolicy.cs (deleted)
HeuristicLab.Algorithms.Bandits/GrammarPolicies/GenericGrammarPolicy.cs (modified) (7 diffs)
HeuristicLab.Algorithms.Bandits/GrammarPolicies/GrammarPolicy.cs (modified) (3 diffs)
HeuristicLab.Algorithms.Bandits/GrammarPolicies/GreedyPolicy.cs (deleted)
HeuristicLab.Algorithms.Bandits/GrammarPolicies/IGrammarPolicy.cs (modified) (1 diff)
HeuristicLab.Algorithms.Bandits/GrammarPolicies/RandomNoResamplingPolicy.cs (deleted)
HeuristicLab.Algorithms.Bandits/GrammarPolicies/RandomPolicy.cs (modified) (1 diff)
HeuristicLab.Algorithms.Bandits/GrammarPolicies/TDPolicy.cs (modified) (7 diffs)
HeuristicLab.Algorithms.Bandits/GrammarPolicies/UCTPolicy.cs (deleted)
HeuristicLab.Algorithms.Bandits/HeuristicLab.Algorithms.Bandits.csproj (modified) (1 diff)
HeuristicLab.Algorithms.Bandits/IPolicy.cs (modified) (1 diff)
HeuristicLab.Algorithms.GrammaticalOptimization/ContextualMctsSampler.cs (added)
HeuristicLab.Algorithms.GrammaticalOptimization/ExhaustiveBreadthFirstSearch.cs (modified) (1 diff)
HeuristicLab.Algorithms.GrammaticalOptimization/MctsQLearningSampler.cs (added)
HeuristicLab.Algorithms.GrammaticalOptimization/RandomSearch.cs (modified) (1 diff)
HeuristicLab.Algorithms.GrammaticalOptimization/SequentialSearch.cs (modified) (9 diffs)
HeuristicLab.Problems.GrammaticalOptimization.Test/TestSequence.cs (modified) (5 diffs)
HeuristicLab.Problems.GrammaticalOptimization/FindPhrasesProblem.cs (modified) (2 diffs)
HeuristicLab.Problems.GrammaticalOptimization/Grammar.cs (modified) (1 diff)
HeuristicLab.Problems.GrammaticalOptimization/HardPalindromeProblem.cs (modified) (1 diff)
HeuristicLab.Problems.GrammaticalOptimization/IGrammar.cs (modified) (1 diff)
HeuristicLab.Problems.GrammaticalOptimization/IProblem.cs (modified) (1 diff)
HeuristicLab.Problems.GrammaticalOptimization/PalindromeProblem.cs (modified) (1 diff)
HeuristicLab.Problems.GrammaticalOptimization/ReadonlySequence.cs (modified) (2 diffs)
HeuristicLab.Problems.GrammaticalOptimization/RoyalPairProblem.cs (modified) (2 diffs)
HeuristicLab.Problems.GrammaticalOptimization/RoyalSymbolProblem.cs (modified) (2 diffs)
HeuristicLab.Problems.GrammaticalOptimization/RoyalTreeProblem.cs (modified) (1 diff)
HeuristicLab.Problems.GrammaticalOptimization/SantaFeAntProblem.cs (modified) (1 diff)
HeuristicLab.Problems.GrammaticalOptimization/Sequence.cs (modified) (6 diffs)
Main/Program.cs (modified) (3 diffs)

Legend:

: Unmodified
: Added
: Removed

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/EpsGreedyPolicy.cs

r11742	r11793
26	26	public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
27	27	Debug.Assert(actionInfos.Any());
28		if (random.NextDouble() > ~~eps) {~~
	28	if (random.NextDouble() >= eps) { // eps == 0 should be equivalent to pure exploitation, eps == 1 is pure exploration
29	29	// select best
30	30	var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>();

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/GrammarPolicies/GenericGrammarPolicy.cs

-                      r11792
+                      r11793
     private readonly IProblem problem;
     private readonly IBanditPolicy banditPolicy;
     private readonly HashSet<string> done;
+    //private readonly HashSet<string> done;
     public GenericGrammarPolicy(IProblem problem, IBanditPolicy banditPolicy, bool useCanonicalState = false) {
 …
       this.banditPolicy = banditPolicy;
       this.stateInfo = new Dictionary<string, IBanditPolicyActionInfo>();
       this.done = new HashSet<string>();
+      //this.done = new HashSet<string>();
+    }
+    public bool TrySelect(Random random, ReadonlySequence curState, IEnumerable<ReadonlySequence> afterStates,
+      out ReadonlySequence selectedState) {
+      // only select states that are not yet done
+      afterStates = afterStates.Where(a => !done.Contains(CanonicalState(a))).ToArray();
+      if (!afterStates.Any()) {
+    public bool TrySelect(Random random, string curState, IEnumerable<string> afterStates, out int selectedStateIdx) {
+      // fail if all states are done (corresponding state infos are disabled)
+      if (afterStates.All(s => GetStateInfo(s).Disabled)) {
         // fail because all follow states have already been visited => also disable the current state (if we can be sure that it has been fully explored)
         done.Add(CanonicalState(curState));
         selectedState = null;
+        GetStateInfo(curState).Disable(0.0); // should the value be max of afterstate values instead of 0.0?
+        selectedStateIdx = -1;
         return false;
+      }
+      selectedStateIdx = banditPolicy.SelectAction(random, afterStates.Select(s => GetStateInfo(s)));
-      var selectedIdx = banditPolicy.SelectAction(random, afterStates.Select(s => GetStateInfo(s)));
-      selectedState = afterStates.ElementAt(selectedIdx);
       return true;
+    }
     private IBanditPolicyActionInfo GetStateInfo(ReadonlySequence state) {
+    private IBanditPolicyActionInfo GetStateInfo(string state) {
       var s = CanonicalState(state);
       IBanditPolicyActionInfo info;
 …
+    }
     public virtual void UpdateReward(IEnumerable<ReadonlySequence> stateTrajectory, double reward) {
+    public virtual void UpdateReward(IEnumerable<string> stateTrajectory, double reward) {
       // the last state could be terminal
       var lastState = stateTrajectory.Last();
+      if (lastState.IsTerminal) done.Add(CanonicalState(lastState));
+      if (problem.Grammar.IsTerminal(lastState)) {
+        GetStateInfo(lastState).Disable(reward);
+      }
+      foreach (var state in stateTrajectory) {
+      // update remaining states
+      foreach (var state in stateTrajectory.Reverse().Skip(1)) {
         GetStateInfo(state).UpdateReward(reward);
+      }
 …
     public virtual void Reset() {
       stateInfo.Clear();
       done.Clear();
+      //done.Clear();
+    }
     public int GetTries(ReadonlySequence state) {
+    public int GetTries(string state) {
       var s = CanonicalState(state);
       if (stateInfo.ContainsKey(s)) return stateInfo[s].Tries;
 …
+    }
     public double GetValue(ReadonlySequence state) {
+    public double GetValue(string state) {
       var s = CanonicalState(state);
       if (stateInfo.ContainsKey(s)) return stateInfo[s].Value;
 …
+    }
     protected string CanonicalState(ReadonlySequence state) {
+    protected string CanonicalState(string state) {
       if (useCanonicalState) {
         if (state.IsTerminal)
           return problem.CanonicalRepresentation(state.ToString());
+        if (problem.Grammar.IsTerminal(state))
+          return problem.CanonicalRepresentation(state);
         else {
           // for non-terminal phrases make sure we don't disable canonical states that have not yet been fully explored
 …
           // then we are not allowed to disable rS (canonical of lllS) because rS might not have been fully explored
           // solution: we disable the state rS4
           return problem.CanonicalRepresentation(state.ToString()) + state.Length;
+          return problem.CanonicalRepresentation(state) + state.Length;
+        }
       } else
         return state.ToString();
+        return state;
+    }
+  }

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/GrammarPolicies/GrammarPolicy.cs

-                      r11770
+                      r11793
 namespace HeuristicLab.Algorithms.Bandits.GrammarPolicies {
   // stores: tries, avg reward and max reward for each state
+  // stores: tries, avg reward and max reward for each state (base class for RandomPolicy and TDPolicy
   public abstract class GrammarPolicy : IGrammarPolicy {
     protected Dictionary<string, double> avgReward;
     protected Dictionary<string, int> tries;
     protected Dictionary<string, double> maxReward;
     private readonly bool useCanonicalState;
     private readonly IProblem problem;
+    protected readonly bool useCanonicalState;
+    protected readonly IProblem problem;
     public GrammarPolicy(IProblem problem, bool useCanonicalState = false) {
+    protected GrammarPolicy(IProblem problem, bool useCanonicalState = false) {
       this.useCanonicalState = useCanonicalState;
       this.problem = problem;
 …
+    }
     public abstract bool TrySelect(Random random, ReadonlySequence curState, IEnumerable<ReadonlySequence> afterStates, out ReadonlySequence selectedState);
+    public abstract bool TrySelect(Random random, string curState, IEnumerable<string> afterStates, out int selectedStateIdx);
     public virtual void UpdateReward(IEnumerable<ReadonlySequence> stateTrajectory, double reward) {
+    public virtual void UpdateReward(IEnumerable<string> stateTrajectory, double reward) {
       foreach (var state in stateTrajectory) {
         var s = CanonicalState(state.ToString());
+        var s = CanonicalState(state);
         if (!tries.ContainsKey(s)) tries.Add(s, 0);
 …
+    }
     public double AvgReward(ReadonlySequence state) {
       var s = CanonicalState(state.ToString());
+    public double AvgReward(string state) {
+      var s = CanonicalState(state);
       if (avgReward.ContainsKey(s)) return avgReward[s];
       else return 0.0;
+    }
     public double MaxReward(ReadonlySequence state) {
       var s = CanonicalState(state.ToString());
+    public double MaxReward(string state) {
+      var s = CanonicalState(state);
       if (maxReward.ContainsKey(s)) return maxReward[s];
       else return 0.0;
+    }
     public virtual int GetTries(ReadonlySequence state) {
       var s = CanonicalState(state.ToString());
+    public virtual int GetTries(string state) {
+      var s = CanonicalState(state);
       if (tries.ContainsKey(s)) return tries[s];
       else return 0;
+    }
     public virtual double GetValue(ReadonlySequence state) {
+    public virtual double GetValue(string state) {
       return AvgReward(state);
+    }

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/GrammarPolicies/IGrammarPolicy.cs

r11770	r11793
8	8
9	9	namespace HeuristicLab.Algorithms.Bandits.GrammarPolicies {
10		public interface IGrammarPolicy : IPolicy<~~ReadonlySequence~~> {
	10	public interface IGrammarPolicy : IPolicy<string> {
11	11	}
12	12	}

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/GrammarPolicies/RandomPolicy.cs

r11770	r11793
13	13	}
14	14
15		public override bool TrySelect(Random random, ~~ReadonlySequence curState, IEnumerable<ReadonlySequence> afterStates, out ReadonlySequence selectedState~~) {
	15	public override bool TrySelect(Random random, string curState, IEnumerable<string> afterStates, out int selectedStateIdx) {
16	16	// never fail => allows re-visits of terminal states
17		selectedState ~~= afterStates.SelectRandom(random~~);
	17	selectedStateIdx = random.Next(afterStates.Count());
18	18	return true;
19	19	}

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/GrammarPolicies/TDPolicy.cs

-                      r11770
+                      r11793
 using System.Collections.Generic;
 using System.Configuration;
+using System.Diagnostics;
 using System.Linq;
 using System.Security.Policy;
 …
 using System.Threading;
 using System.Threading.Tasks;
+using HeuristicLab.Algorithms.Bandits.BanditPolicies;
 using HeuristicLab.Common;
 using HeuristicLab.Problems.GrammaticalOptimization;
 …
     private readonly HashSet<string> done;
     private readonly Dictionary<string, double> v;
     private EpsGreedyPolicy epsGreedy;
+    private IGrammarPolicy epsGreedy;
     public TDPolicy(IProblem problem, bool useCanonicalRepresentation = false)
 …
       this.done = new HashSet<string>();
       this.v = new Dictionary<string, double>();
       this.epsGreedy = new EpsGreedyPolicy(problem, useCanonicalRepresentation, 0.1);
+      this.epsGreedy = new GenericGrammarPolicy(problem, new EpsGreedyPolicy(0.1), useCanonicalRepresentation);
+    }
     public override bool TrySelect(Random random, ReadonlySequence curState, IEnumerable<ReadonlySequence> afterStates, out ReadonlySequence selectedState) {
+    public override bool TrySelect(Random random, string curState, IEnumerable<string> afterStates, out int selectedStateIdx) {
       // only select states that are not yet done
       afterStates = afterStates.Where(a => !done.Contains(CanonicalState(a.ToString()))).ToArray();
       if (!afterStates.Any()) {
         // fail because all follow states have already been visited => also disable the current state
         done.Add(CanonicalState(curState.ToString()));
         selectedState = null;
+        done.Add(CanonicalState(curState));
+        selectedStateIdx = -1;
         return false;
+      }
+      throw new NotImplementedException(); // TODO: remap indices of reduced action enumerable to indices of original enumerable
       //return epsGreedy.TrySelect(random, curState, afterStates, out selectedState);
       var bestQ = double.NegativeInfinity;
+      selectedState = null;
+      int idx = -1;
+      selectedStateIdx = -1;
       foreach (var state in afterStates) {
+        idx++;
         // try each state at least once
         if (GetTries(state) == 0) {
           selectedState = state;
+          selectedStateIdx = idx;
           return true;
+        }
 …
         if (q > bestQ) {
           bestQ = q;
           selectedState = state;
+          selectedStateIdx = idx;
+        }
+      }
+      Debug.Assert(selectedStateIdx > -1);
       return true;
+    }
     private double V(ReadonlySequence state) {
       var s = CanonicalState(state.ToString());
+    private double V(string state) {
+      var s = CanonicalState(state);
       if (v.ContainsKey(s)) return v[s];
       else return 0.0;
+    }
     public override void UpdateReward(IEnumerable<ReadonlySequence> stateTrajectory, double reward) {
+    public override void UpdateReward(IEnumerable<string> stateTrajectory, double reward) {
       base.UpdateReward(stateTrajectory, reward);
       epsGreedy.UpdateReward(stateTrajectory, reward);
       // the last state could be terminal
       var lastState = stateTrajectory.Last();
       if (lastState.IsTerminal) done.Add(CanonicalState(lastState.ToString()));
+      if (problem.Grammar.IsTerminal(lastState)) done.Add(CanonicalState(lastState));
       v[CanonicalState(lastState.ToString())] = V(lastState) + 1.0 / GetTries(lastState) * (reward - V(lastState));
+      v[CanonicalState(lastState)] = V(lastState) + 1.0 / GetTries(lastState) * (reward - V(lastState));
       foreach (var p in stateTrajectory.Zip(stateTrajectory.Skip(1), Tuple.Create).Reverse()) {
 …
         var next = p.Item2;
         v[CanonicalState(cur.ToString())] = V(cur) + 1.0 / GetTries(cur) * (V(next) - V(cur));
+        v[CanonicalState(cur)] = V(cur) + 1.0 / GetTries(cur) * (V(next) - V(cur));
         //v[CanonicalState(cur.ToString())] = V(cur) + 0.1 * (V(next) - V(cur));
+      }
 …
+    }
     public override double GetValue(ReadonlySequence state) {
+    public override double GetValue(string state) {
       return V(state);
+    }
     public void Reset() {
+    public override void Reset() {
       base.Reset();
       epsGreedy.Reset();

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/HeuristicLab.Algorithms.Bandits.csproj

-                      r11770
+                      r11793
     <Compile Include="Bandits\IBandit.cs" />
     <Compile Include="Bandits\TruncatedNormalBandit.cs" />
-    <Compile Include="GrammarPolicies\BoltzmanExplorationPolicy.cs" />
     <Compile Include="GrammarPolicies\GenericGrammarPolicy.cs">
       <SubType>Code</SubType>
     </Compile>
+    <Compile Include="GrammarPolicies\RandomPolicy.cs">
+      <SubType>Code</SubType>
+    </Compile>
     <Compile Include="GrammarPolicies\TDPolicy.cs" />
-    <Compile Include="GrammarPolicies\UCTPolicy.cs" />
     <Compile Include="GrammarPolicies\GrammarPolicy.cs" />
-    <Compile Include="GrammarPolicies\EpsGreedyPolicy.cs" />
-    <Compile Include="GrammarPolicies\GreedyPolicy.cs" />
     <Compile Include="GrammarPolicies\IGrammarPolicy.cs" />
-    <Compile Include="GrammarPolicies\RandomNoResamplingPolicy.cs" />
-    <Compile Include="GrammarPolicies\RandomPolicy.cs" />
     <Compile Include="IPolicy.cs" />
     <Compile Include="IBanditPolicy.cs" />

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/IPolicy.cs

-                      r11770
+                      r11793
   // here we assume that a reward is only recieved at the end of the episode and the update is done only after an episode is complete
   // we also assume that the policy can fail to select one of the followStates
   public interface IPolicy<TState> {
     bool TrySelect(Random random, TState curState, IEnumerable<TState> afterStates, out TState selectedState); // selectedState \in afterStates
+  public interface IPolicy<in TState> {
+    bool TrySelect(Random random, TState curState, IEnumerable<TState> afterStates, out int selectedStateIdx); // selectedState \in afterStates
     // state-trajectory are the states of the episode, at the end we recieved the reward (only for the terminal state)

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/ExhaustiveBreadthFirstSearch.cs

r11732	r11793
42	42
43	43	char nt = phrase.FirstNonTerminal;
44		~~int ntIdx;~~
45	44
46	45	var alts = grammar.GetAlternatives(nt);

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/RandomSearch.cs

-                      r11732
+                      r11793
 using System;
-using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Text;
 using HeuristicLab.Problems.GrammaticalOptimization;

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/SequentialSearch.cs

-                      r11792
+                      r11793
   // 3) Collect reward and update policy (feedback: state of visited rewards from step 2)
   public class SequentialSearch {
+    // only for storing states so that it is not necessary to allocate new state strings whenever we select a follow state using the policy
+    private class TreeNode {
+      public int randomTries;
+      public string phrase;
+      public Sequence alternative;
+      public TreeNode[] children;
+      public TreeNode(string phrase, Sequence alternative) {
+        this.alternative = alternative;
+        this.phrase = phrase;
+      }
+    }
     public event Action<string, double> FoundNewBestSolution;
 …
     private readonly IGrammarPolicy behaviourPolicy;
     private readonly IGrammarPolicy greedyPolicy;
+    private TreeNode rootNode;
+    private int tries;
     private int maxSearchDepth;
     private double bestQuality;
     private string bestPhrase;
+    private int tries;
+    private readonly List<ReadonlySequence> stateChain;
+    private readonly List<string> stateChain;
     public SequentialSearch(IProblem problem, int maxLen, Random random, int randomTries, IGrammarPolicy behaviourPolicy) {
 …
       this.randomTries = randomTries;
       this.behaviourPolicy = behaviourPolicy;
+      this.greedyPolicy = new GreedyPolicy(problem, false);
+      this.stateChain = new List<ReadonlySequence>();
+      this.cache = new Dictionary<ReadonlySequence, ReadonlySequence[]>();
+      this.greedyPolicy = new GenericGrammarPolicy(problem, new EpsGreedyPolicy(0.0), false);
+      this.stateChain = new List<string>();
+    }
     public void Run(int maxIterations) {
       bestQuality = double.MinValue;
-      //InitPolicies(problem.Grammar);
       Reset();
 …
     private ReadonlySequence SampleSentence(IGrammar grammar) {
       ReadonlySequence phrase;
+    private Sequence SampleSentence(IGrammar grammar) {
+      Sequence phrase;
       do {
         stateChain.Clear();
         phrase = new ReadonlySequence(grammar.SentenceSymbol);
+        phrase = new Sequence(rootNode.phrase);
         //var startPhrase = new Sequence("a*b+c*d+e*f+E");
       } while (!Done() && !TryCompleteSentence(grammar, ref phrase));
 …
+    }
     private bool TryCompleteSentence(IGrammar g, ref ReadonlySequence phrase) {
+    private bool TryCompleteSentence(IGrammar g, ref Sequence phrase) {
       if (phrase.Length > maxLen) throw new ArgumentException();
       if (g.MinPhraseLength(phrase) > maxLen) throw new ArgumentException();
       var curDepth = 0;
+      stateChain.Add(phrase);
+      var n = rootNode;
+      stateChain.Add(n.phrase);
       while (!phrase.IsTerminal) {
+        var newPhrases = GenerateFollowStates(g, phrase);
+        throw new NotImplementedException(); // TODO: reintroduce random-trie checking once the tree of all states has been reintroduced
         //if (n.randomTries < randomTries) {
         //  n.randomTries++;
         //  treeDepth = Math.Max(treeDepth, curDepth);
         //  lastNode = n;
         //  return g.CompleteSentenceRandomly(random, phrase, maxLen);
+        //  curDepth = Math.Max(curDepth, curDepth);
+        //  g.CompleteSentenceRandomly(random, phrase, maxLen);
+        //  return true;
         //} else {
+          // => select using bandit policy
+          // failure means we simply restart
+          if (!behaviourPolicy.TrySelect(random, phrase, newPhrases, out phrase)) {
+            return false;
+          }
+        // }
+        stateChain.Add(phrase);
+        // => select using bandit policy
+        // failure means we simply restart
+        GenerateFollowStates(n); // creates child nodes for node n
+        int selectedChildIdx;
+        if (!behaviourPolicy.TrySelect(random, n.phrase, n.children.Select(ch => ch.phrase), out selectedChildIdx)) {
+          return false;
+        }
+        phrase.ReplaceAt(phrase.FirstNonTerminalIndex, 1, n.children[selectedChildIdx].alternative);
+        // prepare for next iteration
+        n = n.children[selectedChildIdx];
+        stateChain.Add(n.phrase);
         curDepth++;
+        //}
       } // while
 …
     private readonly Dictionary<ReadonlySequence, ReadonlySequence[]> cache;
     private IEnumerable<ReadonlySequence> GenerateFollowStates(IGrammar g, ReadonlySequence phrase) {
       throw new NotImplementedException();
       // TODO: Replace caching by a tree of all states. tree is only used for easily retrieving the follow-states of a state
       ReadonlySequence[] follow;
       //if (!cache.TryGetValue(phrase, out follow)) {
+    private IEnumerable<string> GenerateFollowStates(TreeNode n) {
+      // create children on the first visit
+      if (n.children == null) {
+        var g = problem.Grammar;
+        // tree is only used for easily retrieving the follow-states of a state
+        var phrase = new Sequence(n.phrase);
         char nt = phrase.FirstNonTerminal;
 …
         var alts = g.GetAlternatives(nt).Where(alt => g.MinPhraseLength(alt) <= maxLenOfReplacement);
         follow = new ReadonlySequence[alts.Count()];
+        var children = new TreeNode[alts.Count()];
         int idx = 0;
         foreach (var alt in alts) {
           var newPhrase = new Sequence(phrase); // clone
           newPhrase.ReplaceAt(newPhrase.FirstNonTerminalIndex, 1, alt);
           follow[idx++] = new ReadonlySequence(newPhrase);
+        }
       //  cache[phrase] = follow;
       //}
       return follow;
+          children[idx++] = new TreeNode(newPhrase.ToString(), alt);
+        }
+        n.children = children;
+      }
+      return n.children.Select(ch => ch.phrase);
+    }
 …
       bestQuality = 0.0;
       tries = 0;
       cache.Clear();
+      rootNode = new TreeNode(problem.Grammar.SentenceSymbol.ToString(), new ReadonlySequence("$"));
+    }
     public bool Done() {
+      var g = problem.Grammar;
+      var startState = new ReadonlySequence(g.SentenceSymbol);
+      var follow = GenerateFollowStates(g, startState);
+      ReadonlySequence selectedState;
+      return !behaviourPolicy.TrySelect(random, startState, follow, out selectedState);
+      int selectedStateIdx;
+      return !behaviourPolicy.TrySelect(random, rootNode.phrase, GenerateFollowStates(rootNode), out selectedStateIdx);
+    }
 …
       Console.WriteLine("depth: {0,5} tries: {1,5} best phrase {2,50} bestQ {3:F3}", maxSearchDepth, tries, bestPhrase, bestQuality);
+      // use greedy strategy to generate the currently prefered sentence
+      var phrase = new ReadonlySequence(problem.Grammar.SentenceSymbol);
+      // use behaviour strategy to generate the currently prefered sentence
       var policy = behaviourPolicy;
+      while (!phrase.IsTerminal) {
+      var n = rootNode;
+      while (n != null) {
+        var phrase = n.phrase;
         Console.ForegroundColor = ConsoleColor.White;
         Console.WriteLine("{0,-30}", phrase);
         var newPhrases = GenerateFollowStates(problem.Grammar, phrase);
         if (!newPhrases.Any()) break;
         var values = newPhrases.Select(p => policy.GetValue(p));
+        var children = n.children;
+        if (children == null || !children.Any()) break;
+        var values = children.Select(ch => policy.GetValue(ch.phrase));
         var maxValue = values.Max();
         if (maxValue == 0) maxValue = 1.0;
         // write phrases
         foreach (var p in newPhrases) {
           SetColorForValue(policy.GetValue(p) / maxValue);
           Console.Write(" {0,-4}", p.Subsequence(Math.Max(0, p.Length - 3), Math.Min(3, p.Length)));
+        foreach (var ch in children) {
+          SetColorForValue(policy.GetValue(ch.phrase) / maxValue);
+          Console.Write(" {0,-4}", ch.phrase.Substring(Math.Max(0, ch.phrase.Length - 3), Math.Min(3, ch.phrase.Length)));
+        }
         Console.WriteLine();
         // write values
         foreach (var p in newPhrases) {
           SetColorForValue(policy.GetValue(p) / maxValue);
           Console.Write(" {0:F2}", policy.GetValue(p) * 10.0);
+        foreach (var ch in children) {
+          SetColorForValue(policy.GetValue(ch.phrase) / maxValue);
+          Console.Write(" {0:F2}", policy.GetValue(ch.phrase) * 10.0);
+        }
         Console.WriteLine();
         // write tries
         foreach (var p in newPhrases) {
           SetColorForValue(policy.GetValue(p) / maxValue);
           Console.Write(" {0,4}", policy.GetTries(p));
+        foreach (var ch in children) {
+          SetColorForValue(policy.GetValue(ch.phrase) / maxValue);
+          Console.Write(" {0,4}", policy.GetTries(ch.phrase));
+        }
         Console.WriteLine();
+        if (!policy.TrySelect(random, phrase, newPhrases, out phrase)) {
+        int selectedChildIdx;
+        if (!policy.TrySelect(random, phrase, children.Select(ch => ch.phrase), out selectedChildIdx)) {
           break;
+        }
+        n = n.children[selectedChildIdx];
+      }

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization.Test/TestSequence.cs

-                      r11730
+                      r11793
           var nt = s.FirstNonTerminal;
           Assert.Fail();
+        }
+        catch (IndexOutOfRangeException e) {
+        } catch (IndexOutOfRangeException e) {
+        }
+      }
 …
           s.ReplaceAt(0, 4, t);
           Assert.Fail();
+        }
+        catch (ArgumentException) { }
+        } catch (ArgumentException) { }
         s.ReplaceAt(0, 2, t); // should work
 …
           s.ReplaceAt(0, 3, t);
           Assert.Fail();
+        }
+        catch (ArgumentException) { }
+        } catch (ArgumentException) { }
         try {
           s.ReplaceAt(1, 2, t);
           Assert.Fail();
+        }
+        catch (ArgumentException) { }
+        } catch (ArgumentException) { }
         s.ReplaceAt(1, 1, new Sequence("A")); // should work
 …
           s.ReplaceAt(-1, 2, t);
           Assert.Fail();
+        }
+        catch (ArgumentException) { }
+        } catch (ArgumentException) { }
+      }
 …
         Assert.AreEqual("AA", sub.ToString());
+      }
+      {
+        var s = new Sequence("aaaAA");
+        var sub = s.Subsequence(2, 3);
+        Assert.AreEqual(1, sub.FirstNonTerminalIndex);
+        Assert.AreEqual("aAA", sub.ToString());
+      }
+    }
+    [TestMethod]
+    public void TestReadonlySequence() {
+      CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
+      {
+        var s = new ReadonlySequence("AAaaaAA");
+        var sub = s.Subsequence(0, 7);
+        Assert.AreEqual(0, sub.FirstNonTerminalIndex);
+        Assert.AreEqual("AAaaaAA", sub.ToString());
+      }
+      {
+        var s = new ReadonlySequence("AAaaaAA");
+        var sub = s.Subsequence(0, 3);
+        Assert.AreEqual(0, sub.FirstNonTerminalIndex);
+        Assert.AreEqual("AAa", sub.ToString());
+        sub = sub.Subsequence(1, 2);
+        Assert.AreEqual(0, sub.FirstNonTerminalIndex);
+        Assert.AreEqual("Aa", sub.ToString());
+      }
+      {
+        var s = new ReadonlySequence("AAaaaAA");
+        var sub = s.Subsequence(2, 3);
+        Assert.AreEqual(-1, sub.FirstNonTerminalIndex);
+        Assert.AreEqual("aaa", sub.ToString());
+      }
+      {
+        var s = new ReadonlySequence("AAaaaAA");
+        var sub = s.Subsequence(2, 4);
+        Assert.AreEqual(3, sub.FirstNonTerminalIndex);
+        Assert.AreEqual("aaaA", sub.ToString());
+      }
+      {
+        var s = new ReadonlySequence("AAaaaAA");
+        var sub = s.Subsequence(5, 2);
+        Assert.AreEqual(0, sub.FirstNonTerminalIndex);
+        Assert.AreEqual("AA", sub.ToString());
+      }
+      {
+        var s = new ReadonlySequence("aaaAA");
+        var sub = s.Subsequence(2, 3);
+        Assert.AreEqual(1, sub.FirstNonTerminalIndex);
+        Assert.AreEqual("aAA", sub.ToString());
+      }
+    }
+  }

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/FindPhrasesProblem.cs

-                      r11770
+                      r11793
     private readonly int numPhrases;
     private readonly int phraseLen;
-    private readonly int numOptimalPhrases;
-    private readonly int numDecoyPhrases;
     private readonly double correctReward;
     private readonly double decoyReward;
 …
                + phrases.Intersect(decoyPhrases).Count() * decoyReward;
       return reward;
+    }

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/Grammar.cs

-                      r11732
+                      r11793
+    }
+    public bool IsTerminal(string phrase) {
+      // reverse because for our grammars and left-canonical derivation it is more likely that NTs occur near the end of the sequence
+      return phrase.Reverse().All(IsTerminal);
+    }
     public bool IsNonTerminal(char symbol) {
       return nonTerminalSymbols.Contains(symbol);

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/HardPalindromeProblem.cs

r11792	r11793
40	40
41	41	public string CanonicalRepresentation(string terminalPhrase) {
42		~~throw new NotImplementedException();~~
43	42	return terminalPhrase;
44	43	}

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/IGrammar.cs

r11730	r11793
23	23
24	24	bool IsTerminal(char symbol);
	25	bool IsTerminal(string phrase);
25	26	bool IsNonTerminal(char symbol);
26	27	}

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/IProblem.cs

r11792	r11793
8	8	double BestKnownQuality(int maxLen);
9	9	IGrammar Grammar { get; }
10		double Evaluate(~~ReadonlySequence~~ sentence);
11		~~ReadonlySequence CanonicalRepresentation(ReadonlySequence~~ terminalPhrase);
	10	double Evaluate(string sentence);
	11	string CanonicalRepresentation(string terminalPhrase);
12	12	}
13	13	}

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/PalindromeProblem.cs

r11792	r11793
81	81
82	82	public string CanonicalRepresentation(string terminalPhrase) {
83		~~throw new NotImplementedException();~~
84	83	return terminalPhrase;
85	84	}

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/ReadonlySequence.cs

-                      r11742
+                      r11793
 using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+using System.Text;
 namespace HeuristicLab.Problems.GrammaticalOptimization {
   public class ReadonlySequence : Sequence {
+    private int symbolsOffset = 0; // the sequence does not have to start with the first symbol of the symbols array (when we reuse these arrays)
+    // cloning constructor for readonly sequences
+    // does not allocate the symbols array of the base class
+    // instead: reuse the symbols array (both sequences are readonly)
+    private ReadonlySequence(ReadonlySequence original)
+      : base() {
+      base.symbols = original.symbols;
+      this.symbolsOffset = original.symbolsOffset;
+    }
     public ReadonlySequence(string s)
       : base(s, s.Length) {
 …
+    }
+    public override char this[int idx] {
+      get {
+        return base[idx + symbolsOffset];
+      }
+      set {
+        throw new NotSupportedException();
+      }
+    }
+    public override IEnumerator<char> GetEnumerator() {
+      return symbols.Skip(symbolsOffset).Take(Length).GetEnumerator();
+    }
+    public override string ToString() {
+      var sb = new StringBuilder(Length);
+      sb.Append(symbols, symbolsOffset, Length);
+      return sb.ToString();
+    }
+    public new ReadonlySequence Subsequence(int startIdx, int len) {
+      if (startIdx < 0 || len < 0) throw new ArgumentException();
+      if (startIdx >= this.Length) throw new ArgumentException();
+      if (startIdx + len > this.Length) throw new ArgumentException();
+      var subsequence = new ReadonlySequence(this) { symbolsOffset = startIdx + this.symbolsOffset, Length = len };
+      if (FirstNonTerminalIndex < 0) {
+        subsequence.FirstNonTerminalIndex = -1;
+      } else if (FirstNonTerminalIndex < startIdx) {
+        // need to find first nt in subsequence
+        subsequence.FirstNonTerminalIndex = -1;
+        for (int i = 0; subsequence.FirstNonTerminalIndex == -1 && i < len; i++) {
+          if (subsequence[i] >= 'A' && subsequence[i] <= 'Z') subsequence.FirstNonTerminalIndex = i;
+        }
+      } else if (FirstNonTerminalIndex >= startIdx && FirstNonTerminalIndex < startIdx + len) {
+        subsequence.FirstNonTerminalIndex = FirstNonTerminalIndex - startIdx;
+      } else {
+        Debug.Assert(FirstNonTerminalIndex >= startIdx + len);
+        subsequence.FirstNonTerminalIndex = -1;
+      }
+      return subsequence;
+    }
     public override bool Equals(object obj) {
       var other = obj as Sequence;
+      var other = obj as ReadonlySequence;
       if (other == null) return false;
       if (other.Length != this.Length) return false;

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/RoyalPairProblem.cs

-                      r11742
+                      r11793
     public RoyalPairProblem() {
       this.grammar = new Grammar(grammarString);
+      // TODO: allow configuration of the number of symbols
+    }
 …
     public string CanonicalRepresentation(string terminalPhrase) {
+      throw new NotImplementedException();
       return terminalPhrase;
+    }

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/RoyalSymbolProblem.cs

-                      r11742
+                      r11793
     public RoyalSymbolProblem() {
       this.grammar = new Grammar(grammarString);
+      //TODO: allow configuration of the number of symbols
+    }
 …
       // sentence must contain only terminal symbols, we are not checking if the sentence is syntactically valid here because it would be too slow!
       Debug.Assert(sentence.Any(c => grammar.IsTerminal(c)));
       return regex.Matches(sentence).Count;
+      return regex.Matches(sentence.ToString()).Count;
+    }
     public string CanonicalRepresentation(string terminalPhrase) {
+      throw new NotImplementedException();
       return terminalPhrase;
+    }

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/RoyalTreeProblem.cs

r11742	r11793
30	30	}
31	31	public string CanonicalRepresentation(string terminalPhrase) {
	32	throw new NotImplementedException();
32	33	return terminalPhrase;
33	34	}

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/SantaFeAntProblem.cs

r11770	r11793
101	101
102	102	public string CanonicalRepresentation(string terminalPhrase) {
103		~~//return terminalPhrase;~~
104	103	string oldPhrase;
105	104	do {

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/Sequence.cs

-                      r11742
+                      r11793
     private int len;
     private int idxOfFirstNt;
     private readonly char[] symbols;
+    protected char[] symbols;
     public char this[int idx] {
+    public virtual char this[int idx] {
       get { return symbols[idx]; }
       set { throw new NotSupportedException(); }
 …
     public int Length {
       get { return len; }
       private set { len = value; }
+      protected set { len = value; }
+    }
 …
     public int FirstNonTerminalIndex {
       get { return idxOfFirstNt; }
+      protected set { idxOfFirstNt = value; }
+    }
     public char FirstNonTerminal {
       get { return symbols[idxOfFirstNt]; }
+      get { return this[idxOfFirstNt]; }
+    }
 …
+    }
+    // empty constructor does not allocate the symbol array
+    protected Sequence() { }
     public virtual void ReplaceAt(int position, int len, Sequence replacement) {
       if (replacement == null) throw new ArgumentNullException();
 …
         idxOfFirstNt = -1;
         for (int i = startIdxOfRemainingPart; idxOfFirstNt == -1 && i < Length; i++) {
           if (symbols[i] >= 'A' && symbols[i] <= 'Z') idxOfFirstNt = i;
+          if (this[i] >= 'A' && this[i] <= 'Z') idxOfFirstNt = i;
+        }
+      }
+    }
     public IEnumerator<char> GetEnumerator() {
+    public virtual IEnumerator<char> GetEnumerator() {
       return symbols.AsEnumerable().Take(len).GetEnumerator();
+    }
 …
         subsequence.idxOfFirstNt = -1;
         for (int i = 0; subsequence.idxOfFirstNt == -1 && i < len; i++) {
           if (subsequence.symbols[i] >= 'A' && subsequence.symbols[i] <= 'Z') subsequence.idxOfFirstNt = i;
+          if (subsequence[i] >= 'A' && subsequence[i] <= 'Z') subsequence.idxOfFirstNt = i;
+        }
       } else if (idxOfFirstNt >= startIdx && idxOfFirstNt < startIdx + len) {
         subsequence.idxOfFirstNt = idxOfFirstNt;
+        subsequence.idxOfFirstNt = idxOfFirstNt - startIdx;
       } else {
         Debug.Assert(idxOfFirstNt >= startIdx + len);

branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Program.cs

-                      r11792
+                      r11793
       CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
       RunDemo();
+      //RunDemo();
       RunGridTest();
+    }
 …
       // var problem = new FindPhrasesProblem(random, 15, numPhrases, phraseLen, numOptimalPhrases: numPhrases, numDecoyPhrases: 0, correctReward: 1.0, decoyReward: 0.0, phrasesAsSets: true);
       var problem = new SymbolicRegressionPoly10Problem();   // good results e.g. 10 randomtries and EpsGreedyPolicy(0.2, (aInfo)=>aInfo.MaxReward)
+      //var problem = new SymbolicRegressionPoly10Problem();   // good results e.g. 10 randomtries and EpsGreedyPolicy(0.2, (aInfo)=>aInfo.MaxReward)
       // Ant
       // good results e.g. with       var alg = new MctsSampler(problem, 17, random, 1, (rand, numActions) => new ThresholdAscentPolicy(numActions, 500, 0.01));
       // GaussianModelWithUnknownVariance (and Q= 0.99-quantil) also works well for Ant
+      // very good results with:       var alg = new SequentialSearch(problem, 17, random, 0,
+      // new HeuristicLab.Algorithms.Bandits.GrammarPolicies.GenericGrammarPolicy(problem, new UCB1TunedPolicy(), true));
+      //var problem = new SantaFeAntProblem();
+      var problem = new SantaFeAntProblem();
       //var problem = new SymbolicRegressionProblem("Tower");
       //var problem = new PalindromeProblem();
 …
       //var alg = new MctsSampler(problem, 23, random, 0, new BoltzmannExplorationPolicy(100));
       //var alg = new MctsSampler(problem, 23, random, 0, new EpsGreedyPolicy(0.1));
       var alg = new SequentialSearch(problem, 23, random, 0,
         new HeuristicLab.Algorithms.Bandits.GrammarPolicies.GenericGrammarPolicy(problem, new EpsGreedyPolicy(0.2), true));
+      var alg = new SequentialSearch(problem, 17, random, 0,
+        new HeuristicLab.Algorithms.Bandits.GrammarPolicies.GenericGrammarPolicy(problem, new BoltzmannExplorationPolicy(10), true));
       //var alg = new MctsQLearningSampler(problem, sentenceLen, random, 0, null);
       //var alg = new MctsQLearningSampler(problem, 30, random, 0, new EpsGreedyPolicy(0.2));

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 11793

Legend:

Download in other formats: