Context Navigation

← Previous Changeset
Next Changeset →

Changeset 11770

Timestamp:

01/15/15 18:59:07 (10 years ago)

Author:

gkronber

Message:

#2283: worked on generic sequential search alg with bandit policy as parameter

Location:

branches/HeuristicLab.Problems.GrammaticalOptimization

Files:

: 9 added
: 13 edited

HeuristicLab.Algorithms.Bandits/GrammarPolicies/BoltzmanExplorationPolicy.cs (added)
HeuristicLab.Algorithms.Bandits/GrammarPolicies/EpsGreedyPolicy.cs (added)
HeuristicLab.Algorithms.Bandits/GrammarPolicies/GenericGrammarPolicy.cs (added)
HeuristicLab.Algorithms.Bandits/GrammarPolicies/GrammarPolicy.cs (added)
HeuristicLab.Algorithms.Bandits/GrammarPolicies/GreedyPolicy.cs (added)
HeuristicLab.Algorithms.Bandits/GrammarPolicies/IGrammarPolicy.cs (added)
HeuristicLab.Algorithms.Bandits/GrammarPolicies/RandomNoResamplingPolicy.cs (modified) (1 diff)
HeuristicLab.Algorithms.Bandits/GrammarPolicies/RandomPolicy.cs (modified) (1 diff)
HeuristicLab.Algorithms.Bandits/GrammarPolicies/TDPolicy.cs (added)
HeuristicLab.Algorithms.Bandits/GrammarPolicies/UCTPolicy.cs (added)
HeuristicLab.Algorithms.Bandits/HeuristicLab.Algorithms.Bandits.csproj (modified) (1 diff)
HeuristicLab.Algorithms.Bandits/IBanditPolicyActionInfo.cs (modified) (1 diff)
HeuristicLab.Algorithms.Bandits/IPolicy.cs (modified) (2 diffs)
HeuristicLab.Algorithms.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization.csproj (modified) (1 diff)
HeuristicLab.Algorithms.GrammaticalOptimization/SequentialSearch.cs (added)
HeuristicLab.Common/ConsoleEx.cs (modified) (1 diff)
HeuristicLab.Problems.GrammaticalOptimization/EvenParityProblem.cs (modified) (3 diffs)
HeuristicLab.Problems.GrammaticalOptimization/FindPhrasesProblem.cs (modified) (1 diff)
HeuristicLab.Problems.GrammaticalOptimization/RoyalPhraseSequenceProblem.cs (modified) (3 diffs)
HeuristicLab.Problems.GrammaticalOptimization/SantaFeAntProblem.cs (modified) (2 diffs)
HeuristicLab.Problems.GrammaticalOptimization/SymbolicRegressionPoly10Problem.cs (modified) (2 diffs)
Main/Program.cs (modified) (7 diffs)

Legend:

: Unmodified
: Added
: Removed

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/GrammarPolicies/RandomNoResamplingPolicy.cs

-                      r11742
+                      r11770
 using System;
 using System.Collections.Generic;
+using System.Configuration;
 using System.Linq;
+using System.Security.Policy;
 using System.Text;
-using System.Threading.Tasks;
 using HeuristicLab.Common;
 using HeuristicLab.Problems.GrammaticalOptimization;
 namespace HeuristicLab.Algorithms.Bandits.GrammarPolicies {
   public class RandomNoResamplingPolicy : IGrammarPolicy {
+  public class RandomNoResamplingPolicy : GrammarPolicy {
+    private readonly Dictionary<ReadonlySequence, bool> done;
+    private readonly Dictionary<Tuple<ReadonlySequence, ReadonlySequence>, ReadonlySequence> nextState;
+    private readonly HashSet<string> done;
     public RandomNoResamplingPolicy() {
       this.done = new Dictionary<ReadonlySequence, bool>();
+    public RandomNoResamplingPolicy(IProblem problem, bool useCanonicalRepresentation)
+      : base(problem, useCanonicalRepresentation) {
+      this.done = new HashSet<string>();
+    }
+    public ReadonlySequence SelectAction(Random random, ReadonlySequence state, IEnumerable<ReadonlySequence> actions) {
+      var allDone = true;
+      foreach (var a in actions) {
+        var p = Tuple.Create(state, a);
+        allDone &= nextState.ContainsKey(p) && Done(nextState[p]);
+        if (!allDone) break;
+    public override bool TrySelect(Random random, ReadonlySequence curState, IEnumerable<ReadonlySequence> afterStates, out ReadonlySequence selectedState) {
+      // only select states that are not yet done
+      afterStates = afterStates.Where(a => !done.Contains(a.ToString())).ToArray();
+      if (!afterStates.Any()) {
+        // fail because all follow states have already been visited => also disable the current state
+        done.Add(CanonicalState(curState.ToString()));
+        selectedState = null;
+        return false;
+      }
+      if(allDone)
+      return actions
+        .Where(a => !nextState.ContainsKey(Tuple.Create(state, a)) || Done(nextState[Tuple.Create(state, a)]))
+        .SelectRandom(random);
+      selectedState = afterStates.SelectRandom(random);
+      return true;
+    }
+    public void UpdateReward(ReadonlySequence state, ReadonlySequence action, double reward, ReadonlySequence newState) {
+      var key = Tuple.Create(state, action);
+      nextState[key] = newState;
+      if (newState.IsTerminal) done[newState] = true;
+      if
+    public override void UpdateReward(IEnumerable<ReadonlySequence> stateTrajectory, double reward) {
+      base.UpdateReward(stateTrajectory, reward);
+      // ignore rewards but update the set of visited terminal states
+      // the last state could be terminal
+      var lastState = stateTrajectory.Last();
+      if (lastState.IsTerminal) done.Add(CanonicalState(lastState.ToString()));
+    }
+    public bool Done(ReadonlySequence state) {
+      return done.ContainsKey(state);
+    public override void Reset() {
+      base.Reset();
+      done.Clear();
+    }
+  }

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/GrammarPolicies/RandomPolicy.cs

-                      r11742
+                      r11770
 namespace HeuristicLab.Algorithms.Bandits.GrammarPolicies {
   public class RandomPolicy : IGrammarPolicy {
     public ReadonlySequence SelectAction(Random random, ReadonlySequence state, IEnumerable<ReadonlySequence> actions) {
       return actions.SelectRandom(random);
+  public class RandomPolicy : GrammarPolicy {
+    public RandomPolicy(IProblem problem, bool useCanonicalRepresentation)
+      : base(problem, useCanonicalRepresentation) {
+    }
+    public void UpdateReward(ReadonlySequence state, ReadonlySequence action, double reward, ReadonlySequence newState) {
+      // ignore
+    }
+    public bool Done(ReadonlySequence state) {
+      return false;
+    public override bool TrySelect(Random random, ReadonlySequence curState, IEnumerable<ReadonlySequence> afterStates, out ReadonlySequence selectedState) {
+      // never fail => allows re-visits of terminal states
+      selectedState = afterStates.SelectRandom(random);
+      return true;
+    }
+  }

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/HeuristicLab.Algorithms.Bandits.csproj

-                      r11747
+                      r11770
     <Compile Include="Bandits\IBandit.cs" />
     <Compile Include="Bandits\TruncatedNormalBandit.cs" />
+    <Compile Include="GrammarPolicies\BoltzmanExplorationPolicy.cs" />
+    <Compile Include="GrammarPolicies\GenericGrammarPolicy.cs">
+      <SubType>Code</SubType>
+    </Compile>
+    <Compile Include="GrammarPolicies\TDPolicy.cs" />
+    <Compile Include="GrammarPolicies\UCTPolicy.cs" />
+    <Compile Include="GrammarPolicies\GrammarPolicy.cs" />
+    <Compile Include="GrammarPolicies\EpsGreedyPolicy.cs" />
+    <Compile Include="GrammarPolicies\GreedyPolicy.cs" />
+    <Compile Include="GrammarPolicies\IGrammarPolicy.cs" />
+    <Compile Include="GrammarPolicies\RandomNoResamplingPolicy.cs" />
     <Compile Include="GrammarPolicies\RandomPolicy.cs" />
     <Compile Include="IPolicy.cs" />

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/IBanditPolicyActionInfo.cs

-                      r11747
+                      r11770
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+namespace HeuristicLab.Algorithms.Bandits {
+namespace HeuristicLab.Algorithms.Bandits {
   public interface IBanditPolicyActionInfo {
     bool Disabled { get; }

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/IPolicy.cs

-                      r11744
+                      r11770
 using System;
 using System.Collections.Generic;
+using System.Dynamic;
 using System.Linq;
 using System.Text;
 …
 namespace HeuristicLab.Algorithms.Bandits {
+  // this interface represents a policy for reinforcement learning
+  public interface IPolicy<in TState, TAction> {
+    TAction SelectAction(Random random, TState state, IEnumerable<TAction> actions);
+    void UpdateReward(TState state, TAction action, double reward, TState newState); // reward received when after taking action in state and new state
+    bool Done(TState state); // for deterministic MDP with deterministic rewards and goal to find a state with max reward
+  }
+  // this interface represents a policy for episodic reinforcement learning (with afterstates)
+  // here we assume that a reward is only recieved at the end of the episode and the update is done only after an episode is complete
+  // we also assume that the policy can fail to select one of the followStates
+  public interface IPolicy<TState> {
+    bool TrySelect(Random random, TState curState, IEnumerable<TState> afterStates, out TState selectedState); // selectedState \in afterStates
+  public interface IGrammarPolicy : IPolicy<ReadonlySequence, ReadonlySequence> {
+    // state-trajectory are the states of the episode, at the end we recieved the reward (only for the terminal state)
+    void UpdateReward(IEnumerable<TState> stateTrajectory, double reward);
+    void Reset(); // clears all internal state
+    // for introspection
+    double GetValue(TState state);
+    int GetTries(TState state);
+  }
+}

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization.csproj

r11755	r11770
45	45	<Compile Include="AlternativesSampler.cs" />
46	46	<Compile Include="AlternativesContextSampler.cs" />
	47	<Compile Include="SequentialSearch.cs" />
47	48	<Compile Include="TemporalDifferenceTreeSearchSampler.cs" />
48	49	<Compile Include="ExhaustiveRandomFirstSearch.cs" />

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Common/ConsoleEx.cs

-                      r11745
+                      r11770
     public static ConsoleColor ColorForValue(double d) {
+      Debug.Assert(d >= 0 && d <= 1.0);
+      //Debug.Assert(d >= 0 && d <= 1.0);
+      d = Math.Min(1.0, Math.Max(0.0, d));
       var cIdx = Math.Max(0, (int)Math.Floor(d * 15) - 1);
       return colors[cIdx];

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/EvenParityProblem.cs

-                      r11742
+                      r11770
     private const string grammarString = @"
 G(S):
+S -> N | N*S | N+S | !S | (S)
+N -> a | b | c | d
+S -> a | b | c | d | a*S | b*S | c*S | d*S | a+S | b+S | c+S | d+S | !S | (S)
 ";
 …
     private readonly ExpressionInterpreter interpreter = new ExpressionInterpreter();
     public EvenParityProblem() {
       this.grammar = new Grammar (grammarString);
+      this.grammar = new Grammar(grammarString);
+    }
 …
     public string CanonicalRepresentation(string terminalPhrase) {
+      throw new NotImplementedException();
       return terminalPhrase;
+    }

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/FindPhrasesProblem.cs

r11755	r11770
143	143	if (!phrases.Contains(phrase)) phrases.Add(phrase);
144	144	}
	145	var remainder = terminalPhrase.Substring(numPhrases * phraseLen, terminalPhrase.Length - (numPhrases * phraseLen));
	146	remainder = CanonicalPhrase(remainder);
	147	if (!phrases.Contains(remainder)) phrases.Add(remainder);
	148
145	149	return string.Join("", phrases);
146	150	}

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/RoyalPhraseSequenceProblem.cs

-                      r11755
+                      r11770
     private readonly double correctReward;
     private readonly double incorrectReward;
-    private readonly int _numCorrectPhrases;
     private readonly int sequenceLen;
-    private readonly int alphabetSize;
     private readonly int phraseLen;
     private readonly bool phrasesAsSets;
 …
       if (correctReward <= incorrectReward) throw new ArgumentException();
-      this.alphabetSize = alphabetSize;
       this.sequenceLen = sequenceLen;
       this.phraseLen = phraseLen;
-      this._numCorrectPhrases = numCorrectPhrases;
       this.correctReward = correctReward;
       this.incorrectReward = incorrectReward;
 …
+        }
+        var remainder = terminalPhrase.Substring(numPhrases * phraseLen, terminalPhrase.Length - (numPhrases * phraseLen));
+        remainder = CanonicalPhrase(remainder);
+        phrases.Add(remainder);
         return string.Join("", phrases);
       } else

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/SantaFeAntProblem.cs

-                      r11747
+                      r11770
 A -> l | r | m | ?(A)(A) | lA | rA | mA
 ";
     // original koza grammar
     // Ant -> left | right | move | if-food-ahead Ant Ant | Ant Ant | Ant Ant Ant
 …
       do {
         oldPhrase = terminalPhrase;
         terminalPhrase.Replace("ll", "rr").Replace("rl", "lr");
+        terminalPhrase = terminalPhrase.Replace("ll", "rr").Replace("rl", "lr").Replace("lr", "").Replace("lll", "r").Replace("rrr", "l");
       } while (terminalPhrase != oldPhrase);
       return terminalPhrase;

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/SymbolicRegressionPoly10Problem.cs

-                      r11747
+                      r11770
 using System.Collections.Generic;
 using System.Linq;
+using System.Net;
 using System.Security;
 using System.Security.AccessControl;
 …
     // right now only + and * is supported
+    private Dictionary<string, string> cache = new Dictionary<string, string>();
     public string CanonicalRepresentation(string phrase) {
+      var terms = phrase.Split('+').Select(t => t.Replace("*", ""));
+      var terminalTerms = terms.Where(t => t.All(ch => grammar.IsTerminal(ch)));
+      var nonTerminalTerms = terms.Where(t => t.Any(ch => grammar.IsNonTerminal(ch)));
+      string res;
+      if (!cache.TryGetValue(phrase, out res)) {
+        var terms = phrase.Split('+').Select(t => t.Replace("*", ""));
+        var terminalTerms = terms.Where(t => t.All(ch => grammar.IsTerminal(ch)));
+        var nonTerminalTerms = terms.Where(t => t.Any(ch => grammar.IsNonTerminal(ch)));
+      return string.Join("+", terminalTerms.Select(term => CanonicalTerm(term)).OrderBy(term => term).Concat(nonTerminalTerms.Select(term => CanonicalTerm(term))));
+        res = string.Join("+", terminalTerms.Select(term => CanonicalTerm(term)).OrderBy(term => term).Concat(nonTerminalTerms.Select(term => CanonicalTerm(term))));
+        cache[phrase] = res;
+      }
+      return res;
+    }
     private string CanonicalTerm(string term) {
       return string.Join("", term.OrderByDescending(ch => (byte)ch));
+      return string.Join("", term.OrderByDescending(ch => (byte)ch)); // we want to have the up-case characters last
+    }
+  }

branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Program.cs

-                      r11755
+                      r11770
     private static void RunDemo() {
+      // TODO: move problem instances into a separate folder
+      // TODO: improve performance of SequentialSearch (memory allocations related to sequences)
+      // TODO: implement bridge to HL-GP
       // TODO: unify MCTS, TD and ContextMCTS Solvers (stateInfos)
       // TODO: test with eps-greedy using max instead of average as value (seems to work well for symb-reg! explore further!)
 …
       int iterations = 0;
       var sw = new Stopwatch();
+      double bestQuality = 0;
+      string bestSentence = "";
       var globalStatistics = new SentenceSetStatistics();
       var random = new Random();
 …
       //var phraseLen = 3;
       //var numPhrases = 5;
       //var problem = new RoyalPhraseSequenceProblem(random, 10, numPhrases, phraseLen: phraseLen, numCorrectPhrases: 1, correctReward: 1, incorrectReward: 0.0, phrasesAsSets: true);
       //var phraseLen = 4;
       //var numPhrases = 5;
       //var problem = new FindPhrasesProblem(random, 15, numPhrases, phraseLen, numOptimalPhrases: numPhrases, numDecoyPhrases: 500, correctReward: 1.0, decoyReward: 0.2, phrasesAsSets: true);
       var problem = new SymbolicRegressionPoly10Problem();   // good results e.g. 10 randomtries and EpsGreedyPolicy(0.2, (aInfo)=>aInfo.MaxReward)
+      //var problem = new RoyalPhraseSequenceProblem(random, 15, numPhrases, phraseLen: phraseLen, numCorrectPhrases: 1, correctReward: 1, incorrectReward: 0.0, phrasesAsSets: true);
+      // var phraseLen = 2;
+      // var numPhrases = 5;
+      // var problem = new FindPhrasesProblem(random, 15, numPhrases, phraseLen, numOptimalPhrases: numPhrases, numDecoyPhrases: 0, correctReward: 1.0, decoyReward: 0.0, phrasesAsSets: true);
+      //var problem = new SymbolicRegressionPoly10Problem();   // good results e.g. 10 randomtries and EpsGreedyPolicy(0.2, (aInfo)=>aInfo.MaxReward)
       // Ant
       // good results e.g. with       var alg = new MctsSampler(problem, 17, random, 1, (rand, numActions) => new ThresholdAscentPolicy(numActions, 500, 0.01));
       // GaussianModelWithUnknownVariance (and Q= 0.99-quantil) also works well for Ant
+      //var problem = new SantaFeAntProblem();
+      // very good results with:       var alg = new SequentialSearch(problem, 17, random, 0,
+      // new HeuristicLab.Algorithms.Bandits.GrammarPolicies.GenericGrammarPolicy(problem, new UCB1TunedPolicy(), true));
+      var problem = new SantaFeAntProblem();
       //var problem = new SymbolicRegressionProblem("Tower");
       //var problem = new PalindromeProblem();
 …
       // symbreg length = 11 q = 0.824522210419616
       //var alg = new MctsSampler(problem, 23, random, 0, new BoltzmannExplorationPolicy(100));
+      var alg = new MctsSampler(problem, 23, random, 0, new EpsGreedyPolicy(0.1));
+      //var alg = new MctsSampler(problem, 23, random, 0, new EpsGreedyPolicy(0.1));
+      var alg = new SequentialSearch(problem, 10, random, 0,
+        new HeuristicLab.Algorithms.Bandits.GrammarPolicies.GenericGrammarPolicy(problem, new GaussianThompsonSamplingPolicy(true), true));
       //var alg = new MctsQLearningSampler(problem, sentenceLen, random, 0, null);
       //var alg = new MctsQLearningSampler(problem, 30, random, 0, new EpsGreedyPolicy(0.2));
 …
       alg.FoundNewBestSolution += (sentence, quality) => {
-        bestQuality = quality;
-        bestSentence = sentence;
         //Console.WriteLine("{0,4} {1,7} {2}", alg.treeDepth, alg.treeSize, globalStatistics);
         //Console.ReadLine();
 …
         globalStatistics.AddSentence(sentence, quality);
         if (iterations % 100 == 0) {
           //if (iterations % 1000 == 0) Console.Clear();
+          if (iterations % 1000 == 0) Console.Clear();
           Console.SetCursorPosition(0, 0);
           alg.PrintStats();
 …
       sw.Stop();
+      Console.WriteLine("{0,10} Best soultion: {1,10:F5} {2}", iterations, bestQuality, bestSentence);
+      Console.Clear();
+      alg.PrintStats();
+      Console.WriteLine(globalStatistics);
       Console.WriteLine("{0:F2} sec {1,10:F1} sols/sec {2,10:F1} ns/sol",
         sw.Elapsed.TotalSeconds,

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 11770

Legend:

Download in other formats: