Context Navigation

← Previous Change
Next Change →

Program.cs

Timestamp:

01/07/15 09:21:46 (9 years ago)

Author:

gkronber

Message:

#2283: refactoring and bug fixes

File:

: 1 edited

branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Program.cs (modified) (6 diffs)

Legend:

: Unmodified
: Added
: Removed

branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Program.cs

-                      r11730
+                      r11732
 using HeuristicLab.Algorithms.GrammaticalOptimization;
 using HeuristicLab.Problems.GrammaticalOptimization;
+using HeuristicLab.Problems.GrammaticalOptimization.SymbReg;
 namespace Main {
 …
     private static void RunGridTest() {
       int maxIterations = 100000; // for poly-10 with 50000 evaluations no successful try with hl yet
       // var globalRandom = new Random(31415);
+      int maxIterations = 200000; // for poly-10 with 50000 evaluations no successful try with hl yet
+      //var globalRandom = new Random(31415);
       var localRandSeed = 31415;
       var reps = 20;
       var policyFactories = new Func<Random, int, IPolicy>[]
+      var policies = new Func<IPolicy>[]
+        {
+          (rand, numActions) => new GaussianThompsonSamplingPolicy(rand, numActions),
+          (rand, numActions) => new BernoulliThompsonSamplingPolicy(rand, numActions),
+          (rand, numActions) => new RandomPolicy(rand, numActions),
+          (rand, numActions) => new EpsGreedyPolicy(rand, numActions, 0.01),
+          (rand, numActions) => new EpsGreedyPolicy(rand, numActions, 0.05),
+          (rand, numActions) => new EpsGreedyPolicy(rand, numActions, 0.1),
+          (rand, numActions) => new EpsGreedyPolicy(rand, numActions, 0.2),
+          (rand, numActions) => new EpsGreedyPolicy(rand, numActions, 0.5),
+          (rand, numActions) => new UCTPolicy(numActions, 0.1),
+          (rand, numActions) => new UCTPolicy(numActions, 0.5),
+          (rand, numActions) => new UCTPolicy(numActions, 1),
+          (rand, numActions) => new UCTPolicy(numActions, 2),
+          (rand, numActions) => new UCTPolicy(numActions, 5),
+          (rand, numActions) => new UCTPolicy(numActions, 10),
+          (rand, numActions) => new UCB1Policy(numActions),
+          (rand, numActions) => new UCB1TunedPolicy(numActions),
+          (rand, numActions) => new UCBNormalPolicy(numActions),
+          (rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 0.1),
+          (rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 0.5),
+          (rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 1),
+          (rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 5),
+          (rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 10),
+          (rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 20),
+          (rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 100),
+          (rand, numActions) => new ChernoffIntervalEstimationPolicy(numActions, 0.01),
+          (rand, numActions) => new ChernoffIntervalEstimationPolicy(numActions, 0.05),
+          (rand, numActions) => new ChernoffIntervalEstimationPolicy(numActions, 0.1),
+          (rand, numActions) => new ChernoffIntervalEstimationPolicy(numActions, 0.2),
+          (rand, numActions) => new ThresholdAscentPolicy(numActions, 10, 0.01),
+          (rand, numActions) => new ThresholdAscentPolicy(numActions, 10, 0.05),
+          (rand, numActions) => new ThresholdAscentPolicy(numActions, 10, 0.1),
+          (rand, numActions) => new ThresholdAscentPolicy(numActions, 10, 0.2),
+          (rand, numActions) => new ThresholdAscentPolicy(numActions, 100, 0.01),
+          (rand, numActions) => new ThresholdAscentPolicy(numActions, 100, 0.05),
+          (rand, numActions) => new ThresholdAscentPolicy(numActions, 100, 0.1),
+          (rand, numActions) => new ThresholdAscentPolicy(numActions, 100, 0.2),
+          (rand, numActions) => new ThresholdAscentPolicy(numActions, 1000, 0.01),
+          (rand, numActions) => new ThresholdAscentPolicy(numActions, 1000, 0.05),
+          (rand, numActions) => new ThresholdAscentPolicy(numActions, 1000, 0.1),
+          (rand, numActions) => new ThresholdAscentPolicy(numActions, 1000, 0.2),
+          (rand, numActions) => new ThresholdAscentPolicy(numActions, 5000, 0.01),
+          (rand, numActions) => new ThresholdAscentPolicy(numActions, 10000, 0.01),
+         () => new GaussianThompsonSamplingPolicy(),
+         () => new GaussianThompsonSamplingPolicy(true),
+         () => new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 1)),
+         () => new BernoulliThompsonSamplingPolicy(),
+         () => new GenericThompsonSamplingPolicy(new BernoulliModel(1, 1)),
+         () => new RandomPolicy(),
+         () => new EpsGreedyPolicy(0.01),
+         () => new EpsGreedyPolicy(0.05),
+         () => new EpsGreedyPolicy(0.1),
+         () => new EpsGreedyPolicy(0.2),
+         () => new EpsGreedyPolicy(0.5),
+         () => new UCTPolicy(0.1),
+         () => new UCTPolicy(0.5),
+         () => new UCTPolicy(1),
+         () => new UCTPolicy(2),
+         () => new UCTPolicy( 5),
+         () => new UCTPolicy( 10),
+         () => new UCB1Policy(),
+         () => new UCB1TunedPolicy(),
+         () => new UCBNormalPolicy(),
+         () => new BoltzmannExplorationPolicy(0.1),
+         () => new BoltzmannExplorationPolicy(0.5),
+         () => new BoltzmannExplorationPolicy(1),
+         () => new BoltzmannExplorationPolicy(5),
+         () => new BoltzmannExplorationPolicy(10),
+         () => new BoltzmannExplorationPolicy(20),
+         () => new BoltzmannExplorationPolicy(100),
+         () => new ChernoffIntervalEstimationPolicy( 0.01),
+         () => new ChernoffIntervalEstimationPolicy( 0.05),
+         () => new ChernoffIntervalEstimationPolicy( 0.1),
+         () => new ChernoffIntervalEstimationPolicy( 0.2),
+         // (rand) => new ThresholdAscentPolicy(10, 0.01),
+         // (rand) => new ThresholdAscentPolicy(10, 0.05),
+         // (rand) => new ThresholdAscentPolicy(10, 0.1),
+         // (rand) => new ThresholdAscentPolicy(10, 0.2),
+         // (rand) => new ThresholdAscentPolicy(100, 0.01),
+         // (rand) => new ThresholdAscentPolicy(100, 0.05),
+         // (rand) => new ThresholdAscentPolicy(100, 0.1),
+         // (rand) => new ThresholdAscentPolicy(100, 0.2),
+         // (rand) => new ThresholdAscentPolicy(1000, 0.01),
+         // (rand) => new ThresholdAscentPolicy(1000, 0.05),
+         // (rand) => new ThresholdAscentPolicy(1000, 0.1),
+         // (rand) => new ThresholdAscentPolicy(1000, 0.2),
+         // (rand) => new ThresholdAscentPolicy(5000, 0.01),
+         // (rand) => new ThresholdAscentPolicy(10000, 0.01),
         };
+      var tasks = new List<Task>();
+      foreach (var randomTries in new int[] { 1, 10, /* 5, 100 /*, 500, 1000 */}) {
+        foreach (var policyFactory in policyFactories) {
+          var myPolicyFactory = policyFactory;
+          var myRandomTries = randomTries;
+          var localRand = new Random(localRandSeed);
+          var options = new ParallelOptions();
+          options.MaxDegreeOfParallelism = 1;
+          Parallel.For(0, reps, options, (i) => {
+            //var t = Task.Run(() => {
+            Random myLocalRand;
+            lock (localRand)
+              myLocalRand = new Random(localRand.Next());
+            //for (int i = 0; i < reps; i++) {
+            int iterations = 0;
+            var sw = new Stopwatch();
+            var globalStatistics = new SentenceSetStatistics();
+            var problem = new SymbolicRegressionPoly10Problem();
+            //var problem = new SantaFeAntProblem();
+            //var problem = new PalindromeProblem();
+            //var problem = new HardPalindromeProblem();
+            //var problem = new RoyalPairProblem();
+            //var problem = new EvenParityProblem();
+            var alg = new MctsSampler(problem, 25, myLocalRand, myRandomTries, myPolicyFactory);
+            //var alg = new ExhaustiveBreadthFirstSearch(problem, 25);
+            //var alg = new AlternativesContextSampler(problem, 25);
+            alg.SolutionEvaluated += (sentence, quality) => {
+              iterations++;
+              globalStatistics.AddSentence(sentence, quality);
+              if (iterations % 10000 == 0) {
+                Console.WriteLine("{0,4} {1,7} {2,5} {3,25} {4}", alg.treeDepth, alg.treeSize, myRandomTries, myPolicyFactory(myLocalRand, 1), globalStatistics);
+              }
+            };
+            sw.Start();
+            alg.Run(maxIterations);
+            sw.Stop();
+            //Console.WriteLine("{0,5} {1} {2}", randomTries, policyFactory(1), globalStatistics);
+            //}
+            //});
+            //tasks.Add(t);
+          });
+      foreach (var problem in new Tuple<IProblem, int>[]
+        {
+          Tuple.Create((IProblem)new SantaFeAntProblem(), 17),
+          Tuple.Create((IProblem)new SymbolicRegressionPoly10Problem(), 23),
+        })
+        foreach (var randomTries in new int[] { 1, 10, /* 5, 100 /*, 500, 1000 */}) {
+          foreach (var policy in policies) {
+            var myRandomTries = randomTries;
+            var localRand = new Random(localRandSeed);
+            var options = new ParallelOptions();
+            options.MaxDegreeOfParallelism = 1;
+            Parallel.For(0, reps, options, (i) => {
+              //var t = Task.Run(() => {
+              Random myLocalRand;
+              lock (localRand)
+                myLocalRand = new Random(localRand.Next());
+              //for (int i = 0; i < reps; i++) {
+              int iterations = 0;
+              var globalStatistics = new SentenceSetStatistics();
+              // var problem = new SymbolicRegressionPoly10Problem();
+              // var problem = new SantaFeAntProblem();
+              //var problem = new PalindromeProblem();
+              //var problem = new HardPalindromeProblem();
+              //var problem = new RoyalPairProblem();
+              //var problem = new EvenParityProblem();
+              var alg = new MctsSampler(problem.Item1, problem.Item2, myLocalRand, myRandomTries, policy()); // TODO: Make sure we generate the same random numbers for each experiment
+              //var alg = new ExhaustiveBreadthFirstSearch(problem, 25);
+              //var alg = new AlternativesContextSampler(problem, 25);
+              alg.SolutionEvaluated += (sentence, quality) => {
+                iterations++;
+                globalStatistics.AddSentence(sentence, quality);
+                if (iterations % 10000 == 0) {
+                  Console.WriteLine("{0,4} {1,7} {2,5} {3,25} {4}", alg.treeDepth, alg.treeSize, myRandomTries, policy(), globalStatistics);
+                }
+              };
+              alg.Run(maxIterations);
+              //Console.WriteLine("{0,5} {1} {2}", randomTries, policyFactory(1), globalStatistics);
+              //}
+              //});
+              //tasks.Add(t);
+            });
+          }
+        }
+      }
       //Task.WaitAll(tasks.ToArray());
+    }
     private static void RunDemo() {
+      // TODO: test with eps-greedy using max instead of average as value (seems to work well for symb-reg! explore further!)
+      // TODO: implement GaussianWithUnknownMeanAndVariance Model for Thompson Sampling (verify with unit test if correct mean and variance is identified)
+      // TODO: separate value function from policy
+      // TODO: debug and verify implementation variants of Gaussian Thompson Sampling with unit test
+      // TODO: refactor Policies to use banditInfos (policies are factories for bandit infos and bandit info only has an update routine, each policy works only with it's type of banditinfo)
+      // TODO: in contextual MCTS store a bandit info for each node in the _graph_ and also update all bandit infos of all parents
+      // TODO: exhaustive search with priority list
       // TODO: warum funktioniert die alte Implementierung von GaussianThompson besser für SantaFe als alte? Siehe Vergleich: alte vs. neue implementierung GaussianThompsonSampling
       // TODO: why does GaussianThompsonSampling work so well with MCTS for the artificial ant problem?
 …
       // TODO: research thompson sampling for max bandit?
       // TODO: ausführlicher test von strategien für k-armed max bandit
+      // TODO: verify TA implementation using example from the original paper
+      // TODO: reference HL.ProblemInstances and try on tower dataset
+      // TODO: verify TA implementation using example from the original paper
       // TODO: compare results for different policies also for the symb-reg problem
       // TODO: separate policy from MCTS tree data structure to allow sharing of information over disconnected parts of the tree (semantic equivalence)
 …
       // TODO: vergleich bei complete-randomly möglichst kurze sätze generieren vs. einfach zufällig alternativen wählen
       // TODO: reward discounting (für veränderliche reward distributions über zeit). speziellen unit-test dafür erstellen
+      int maxIterations = 10000000;
+      // TODO: constant optimization
+      int maxIterations = 100000;
       int iterations = 0;
       var sw = new Stopwatch();
 …
       var random = new Random();
+      //var problem = new SymbolicRegressionPoly10Problem();
+      var problem = new SantaFeAntProblem();
+      var problem = new SymbolicRegressionPoly10Problem();
+      //var problem = new SantaFeAntProblem(); // good results e.g. with       var alg = new MctsSampler(problem, 17, random, 1, (rand, numActions) => new ThresholdAscentPolicy(numActions, 500, 0.01));
+      //var problem = new SymbolicRegressionProblem("Tower"); // very good results e.g. new EpsGreedyPolicy(0.2) using max reward as quality !!!
       //var problem = new PalindromeProblem();
       //var problem = new HardPalindromeProblem();
       //var problem = new RoyalPairProblem();
       //var problem = new EvenParityProblem();
       //var alg = new MctsSampler(problem, 17, random, 1, (rand, numActions) => new GenericThompsonSamplingPolicy(rand, numActions, new GaussianModel(numActions, 0.5, 10)));
+      var alg = new MctsSampler(problem, 23, random, 10, new EpsGreedyPolicy(0.2)); // GaussianModelWithUnknownVariance (and Q= 0.99-quantil) works well for Ant
       //var alg = new ExhaustiveBreadthFirstSearch(problem, 17);
       //var alg = new AlternativesContextSampler(problem, random, 17, 4, (rand, numActions) => new RandomPolicy(rand, numActions));
       //var alg = new ExhaustiveDepthFirstSearch(problem, 17);
       // var alg = new AlternativesSampler(problem, 17);
+      var alg = new RandomSearch(problem, random, 17);
+      // var alg = new RandomSearch(problem, random, 17);
+      // var alg = new ExhaustiveRandomFirstSearch(problem, random, 17);
       alg.FoundNewBestSolution += (sentence, quality) => {
         bestQuality = quality;
         bestSentence = sentence;
         Console.WriteLine("{0,10} {1,10:F5} {2,10:F5} {3}", iterations, bestQuality, quality, sentence);
+        Console.WriteLine("{0,4} {1,7} {2}", alg.treeDepth, alg.treeSize, globalStatistics);
       };
       alg.SolutionEvaluated += (sentence, quality) => {
 …
         globalStatistics.AddSentence(sentence, quality);
         if (iterations % 1000 == 0) {
           //alg.PrintStats();
+          alg.PrintStats();
+        }
         if (iterations % 10000 == 0) {
           //Console.WriteLine("{0,10} {1,10:F5} {2,10:F5} {3}", iterations, bestQuality, quality, sentence);
           //Console.WriteLine("{0,4} {1,7} {2}", alg.treeDepth, alg.treeSize, globalStatistics);
           Console.WriteLine(globalStatistics);
+          Console.WriteLine("{0,4} {1,7} {2}", alg.treeDepth, alg.treeSize, globalStatistics);
+        }
       };

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 11732 for branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Program.cs

Legend:

branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Program.cs

Download in other formats: