Context Navigation

← Previous Change
Next Change →

Program.cs

Timestamp:

01/20/15 20:25:00 (10 years ago)

Author:

gkronber

Message:

#2283: separated value-states from done-states in GenericGrammarPolicy and removed disabling of actions from bandit policies

File:

: 1 edited

branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Program.cs (modified) (8 diffs)

Legend:

: Unmodified
: Added
: Removed

branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Program.cs

-                      r11801
+                      r11806
       CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
       //RunDemo();
       RunGridTest();
+      RunDemo();
+      //RunGridTest();
+    }
 …
          () => new UCTPolicy( 5),
          () => new UCTPolicy( 10),
+         () => new ModifiedUCTPolicy(0.01),
+         () => new ModifiedUCTPolicy(0.05),
+         () => new ModifiedUCTPolicy(0.1),
+         () => new ModifiedUCTPolicy(0.5),
+         () => new ModifiedUCTPolicy(1),
+         () => new ModifiedUCTPolicy(2),
+         () => new ModifiedUCTPolicy( 5),
+         () => new ModifiedUCTPolicy( 10),
          () => new UCB1Policy(),
          () => new UCB1TunedPolicy(),
 …
     private static void RunDemo() {
-      // TODO: move problem instances into a separate folder
       // TODO: implement bridge to HL-GP
       // TODO: unify MCTS, TD and ContextMCTS Solvers (stateInfos)
 …
       // TODO: warum funktioniert die alte Implementierung von GaussianThompson besser fÃŒr SantaFe als neue? Siehe Vergleich: alte vs. neue implementierung GaussianThompsonSampling
       // TODO: why does GaussianThompsonSampling work so well with MCTS for the artificial ant problem?
-      // TODO: wie kann ich sampler noch vergleichen bzw. was kann man messen um die qualitÃ€t des samplers abzuschÃ€tzen (bis auf qualitÃ€t und iterationen bis zur besten lÃ¶sung) => ziel schnellere iterationen zu gutem ergebnis
       // TODO: research thompson sampling for max bandit?
       // TODO: ausfÃŒhrlicher test von strategien fÃŒr numCorrectPhrases-armed max bandit
 …
       var problem = new RoyalSequenceProblem(random, 10, 30, 2, 1, 0);
+      //var problem = new RoyalSequenceProblem(random, 10, 30, 2, 1, 0);
       //var phraseLen = 3;
       //var numPhrases = 5;
 …
       //var problem = new SymbolicRegressionPoly10Problem();
       //var problem = new SantaFeAntProblem();
+      var problem = new SantaFeAntProblem();
       //var problem = new SymbolicRegressionProblem("Tower");
       //var problem = new PalindromeProblem();
 …
       //var alg = new MctsSampler(problem, 23, random, 0, new BoltzmannExplorationPolicy(100));
       //var alg = new MctsSampler(problem, 23, random, 0, new EpsGreedyPolicy(0.1));
+      var alg = new SequentialSearch(problem, 30, random, 0,
+        new HeuristicLab.Algorithms.Bandits.GrammarPolicies.GenericGrammarPolicy(problem, new EpsGreedyPolicy(0.1), true));
+      //var alg = new SequentialSearch(problem, 23, random, 0,
+      //  new HeuristicLab.Algorithms.Bandits.GrammarPolicies.GenericGrammarPolicy(problem, new ModifiedUCTPolicy(0.1), true));
+      var alg = new SequentialSearch(problem, 17, random, 0,
+        new HeuristicLab.Algorithms.Bandits.GrammarPolicies.GenericTDPolicy(problem, true));
       //var alg = new MctsQLearningSampler(problem, sentenceLen, random, 0, null);
       //var alg = new MctsQLearningSampler(problem, 30, random, 0, new EpsGreedyPolicy(0.2));
 …
         globalStatistics.AddSentence(sentence, quality);
         if (iterations % 1000 == 0) {
           if (iterations % 1000 == 0) Console.Clear();
+          if (iterations % 10000 == 0) Console.Clear();
           Console.SetCursorPosition(0, 0);
           alg.PrintStats();

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 11806 for branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Program.cs

Legend:

branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Program.cs

Download in other formats: