Context Navigation

← Previous Change
Next Change →

Main

Timestamp:

01/02/15 16:08:21 (10 years ago)

Author:

gkronber

Message:

#2283: several major extensions for grammatical optimization

File:

: 1 edited

branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Program.cs (modified) (7 diffs)

Legend:

: Unmodified
: Added
: Removed

branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Program.cs

-                      r11727
+                      r11730
 using System.Data;
 using System.Diagnostics;
+using System.Globalization;
 using System.Linq;
 using System.Text;
 using System.Threading.Tasks;
 using HeuristicLab.Algorithms.Bandits;
+using HeuristicLab.Algorithms.Bandits.Models;
 using HeuristicLab.Algorithms.GrammaticalOptimization;
 using HeuristicLab.Problems.GrammaticalOptimization;
 …
   class Program {
     static void Main(string[] args) {
+      // RunDemo();
+      RunGridTest();
+      CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
+      RunDemo();
+      //RunGridTest();
+    }
     private static void RunGridTest() {
+      int maxIterations = 150000;
+      var globalRandom = new Random(31415);
+      var reps = 10;
+      Parallel.ForEach(new int[] { 1, 5, 10, 100, 500, 1000 }, (randomTries) => {
+        Random localRand;
+        lock (globalRandom) {
+          localRand = new Random(globalRandom.Next());
+        }
+        var policyFactories = new Func<int, IPolicy>[]
+      int maxIterations = 100000; // for poly-10 with 50000 evaluations no successful try with hl yet
+      // var globalRandom = new Random(31415);
+      var localRandSeed = 31415;
+      var reps = 20;
+      var policyFactories = new Func<Random, int, IPolicy>[]
+        {
+          (numActions) => new RandomPolicy(localRand, numActions),
+          (numActions) => new UCB1Policy(numActions),
+          (numActions) => new UCB1TunedPolicy(numActions),
+          (numActions) => new UCBNormalPolicy(numActions),
+          (numActions) => new EpsGreedyPolicy(localRand, numActions, 0.01),
+          (numActions) => new EpsGreedyPolicy(localRand, numActions, 0.05),
+          (numActions) => new EpsGreedyPolicy(localRand, numActions, 0.1),
+          (numActions) => new EpsGreedyPolicy(localRand, numActions, 0.2),
+          (numActions) => new EpsGreedyPolicy(localRand, numActions, 0.5),
+          (numActions) => new GaussianThompsonSamplingPolicy(localRand, numActions),
+          (numActions) => new BernoulliThompsonSamplingPolicy(localRand, numActions)
+          (rand, numActions) => new GaussianThompsonSamplingPolicy(rand, numActions),
+          (rand, numActions) => new BernoulliThompsonSamplingPolicy(rand, numActions),
+          (rand, numActions) => new RandomPolicy(rand, numActions),
+          (rand, numActions) => new EpsGreedyPolicy(rand, numActions, 0.01),
+          (rand, numActions) => new EpsGreedyPolicy(rand, numActions, 0.05),
+          (rand, numActions) => new EpsGreedyPolicy(rand, numActions, 0.1),
+          (rand, numActions) => new EpsGreedyPolicy(rand, numActions, 0.2),
+          (rand, numActions) => new EpsGreedyPolicy(rand, numActions, 0.5),
+          (rand, numActions) => new UCTPolicy(numActions, 0.1),
+          (rand, numActions) => new UCTPolicy(numActions, 0.5),
+          (rand, numActions) => new UCTPolicy(numActions, 1),
+          (rand, numActions) => new UCTPolicy(numActions, 2),
+          (rand, numActions) => new UCTPolicy(numActions, 5),
+          (rand, numActions) => new UCTPolicy(numActions, 10),
+          (rand, numActions) => new UCB1Policy(numActions),
+          (rand, numActions) => new UCB1TunedPolicy(numActions),
+          (rand, numActions) => new UCBNormalPolicy(numActions),
+          (rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 0.1),
+          (rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 0.5),
+          (rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 1),
+          (rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 5),
+          (rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 10),
+          (rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 20),
+          (rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 100),
+          (rand, numActions) => new ChernoffIntervalEstimationPolicy(numActions, 0.01),
+          (rand, numActions) => new ChernoffIntervalEstimationPolicy(numActions, 0.05),
+          (rand, numActions) => new ChernoffIntervalEstimationPolicy(numActions, 0.1),
+          (rand, numActions) => new ChernoffIntervalEstimationPolicy(numActions, 0.2),
+          (rand, numActions) => new ThresholdAscentPolicy(numActions, 10, 0.01),
+          (rand, numActions) => new ThresholdAscentPolicy(numActions, 10, 0.05),
+          (rand, numActions) => new ThresholdAscentPolicy(numActions, 10, 0.1),
+          (rand, numActions) => new ThresholdAscentPolicy(numActions, 10, 0.2),
+          (rand, numActions) => new ThresholdAscentPolicy(numActions, 100, 0.01),
+          (rand, numActions) => new ThresholdAscentPolicy(numActions, 100, 0.05),
+          (rand, numActions) => new ThresholdAscentPolicy(numActions, 100, 0.1),
+          (rand, numActions) => new ThresholdAscentPolicy(numActions, 100, 0.2),
+          (rand, numActions) => new ThresholdAscentPolicy(numActions, 1000, 0.01),
+          (rand, numActions) => new ThresholdAscentPolicy(numActions, 1000, 0.05),
+          (rand, numActions) => new ThresholdAscentPolicy(numActions, 1000, 0.1),
+          (rand, numActions) => new ThresholdAscentPolicy(numActions, 1000, 0.2),
+          (rand, numActions) => new ThresholdAscentPolicy(numActions, 5000, 0.01),
+          (rand, numActions) => new ThresholdAscentPolicy(numActions, 10000, 0.01),
         };
+        foreach (var policyFactory in policyFactories)
+          for (int i = 0; i < reps; i++) {
+      var tasks = new List<Task>();
+      foreach (var randomTries in new int[] { 1, 10, /* 5, 100 /*, 500, 1000 */}) {
+        foreach (var policyFactory in policyFactories) {
+          var myPolicyFactory = policyFactory;
+          var myRandomTries = randomTries;
+          var localRand = new Random(localRandSeed);
+          var options = new ParallelOptions();
+          options.MaxDegreeOfParallelism = 1;
+          Parallel.For(0, reps, options, (i) => {
+            //var t = Task.Run(() => {
+            Random myLocalRand;
+            lock (localRand)
+              myLocalRand = new Random(localRand.Next());
+            //for (int i = 0; i < reps; i++) {
             int iterations = 0;
             var sw = new Stopwatch();
             var globalStatistics = new SentenceSetStatistics();
             // var problem = new SymbolicRegressionPoly10Problem();
             var problem = new SantaFeAntProblem();
+            var problem = new SymbolicRegressionPoly10Problem();
+            //var problem = new SantaFeAntProblem();
             //var problem = new PalindromeProblem();
             //var problem = new HardPalindromeProblem();
             //var problem = new RoyalPairProblem();
             //var problem = new EvenParityProblem();
             var alg = new MctsSampler(problem, 17, localRand, randomTries, policyFactory);
+            var alg = new MctsSampler(problem, 25, myLocalRand, myRandomTries, myPolicyFactory);
             //var alg = new ExhaustiveBreadthFirstSearch(problem, 25);
             //var alg = new AlternativesContextSampler(problem, 25);
 …
               globalStatistics.AddSentence(sentence, quality);
               if (iterations % 10000 == 0) {
                 Console.WriteLine("{0} {1} {2}", randomTries, policyFactory(1), globalStatistics);
+                Console.WriteLine("{0,4} {1,7} {2,5} {3,25} {4}", alg.treeDepth, alg.treeSize, myRandomTries, myPolicyFactory(myLocalRand, 1), globalStatistics);
+              }
             };
 …
             sw.Stop();
+          }
+      });
+            //Console.WriteLine("{0,5} {1} {2}", randomTries, policyFactory(1), globalStatistics);
+            //}
+            //});
+            //tasks.Add(t);
+          });
+        }
+      }
+      //Task.WaitAll(tasks.ToArray());
+    }
     private static void RunDemo() {
+      // TODO: implement threshold ascent
+      // TODO: implement inspection for MCTS
+      // TODO: warum funktioniert die alte Implementierung von GaussianThompson besser für SantaFe als alte? Siehe Vergleich: alte vs. neue implementierung GaussianThompsonSampling
+      // TODO: why does GaussianThompsonSampling work so well with MCTS for the artificial ant problem?
+      // TODO: wie kann ich sampler noch vergleichen bzw. was kann man messen um die qualität des samplers abzuschätzen (bis auf qualität und iterationen bis zur besten lösung) => ziel schnellere iterationen zu gutem ergebnis
+      // TODO: likelihood für R=1 bei Gaussian oder GaussianMixture einfach berechenbar?
+      // TODO: research thompson sampling for max bandit?
+      // TODO: ausführlicher test von strategien für k-armed max bandit
+      // TODO: verify TA implementation using example from the original paper
+      // TODO: reference HL.ProblemInstances and try on tower dataset
+      // TODO: compare results for different policies also for the symb-reg problem
+      // TODO: separate policy from MCTS tree data structure to allow sharing of information over disconnected parts of the tree (semantic equivalence)
+      // TODO: implement thompson sampling for gaussian mixture models
+      // TODO: implement inspection for MCTS (eventuell interactive command line für statistiken aus dem baum anzeigen)
+      // TODO: implement ACO-style bandit policy
+      // TODO: implement sequences that can be manipulated in-place (instead of strings), alternatives are also stored as sequences, for a sequence the index of the first NT-symb can be stored
+      // TODO: gleichzeitige modellierung von transformierter zielvariable (y, 1/y, log(y), exp(y), sqrt(y), ...)
+      // TODO: vergleich bei complete-randomly möglichst kurze sätze generieren vs. einfach zufällig alternativen wählen
+      // TODO: reward discounting (für veränderliche reward distributions über zeit). speziellen unit-test dafür erstellen
       int maxIterations = 10000000;
 …
       string bestSentence = "";
       var globalStatistics = new SentenceSetStatistics();
       var random = new Random(31415);
       // var problem = new SymbolicRegressionPoly10Problem();
+      var random = new Random();
+      //var problem = new SymbolicRegressionPoly10Problem();
       var problem = new SantaFeAntProblem();
       //var problem = new PalindromeProblem();
 …
       //var problem = new RoyalPairProblem();
       //var problem = new EvenParityProblem();
+      var alg = new MctsSampler(problem, 17, random);
+      //var alg = new ExhaustiveBreadthFirstSearch(problem, 25);
+      //var alg = new AlternativesContextSampler(problem, 25);
+      //var alg = new MctsSampler(problem, 17, random, 1, (rand, numActions) => new GenericThompsonSamplingPolicy(rand, numActions, new GaussianModel(numActions, 0.5, 10)));
+      //var alg = new ExhaustiveBreadthFirstSearch(problem, 17);
+      //var alg = new AlternativesContextSampler(problem, random, 17, 4, (rand, numActions) => new RandomPolicy(rand, numActions));
+      //var alg = new ExhaustiveDepthFirstSearch(problem, 17);
+      // var alg = new AlternativesSampler(problem, 17);
+      var alg = new RandomSearch(problem, random, 17);
       alg.FoundNewBestSolution += (sentence, quality) => {
 …
         iterations++;
         globalStatistics.AddSentence(sentence, quality);
+        if (iterations % 1000 == 0) {
+          //alg.PrintStats();
+        }
         if (iterations % 10000 == 0) {
           //Console.WriteLine("{0,10} {1,10:F5} {2,10:F5} {3}", iterations, bestQuality, quality, sentence);
+          Console.WriteLine(globalStatistics.ToString());
+          //Console.WriteLine("{0,4} {1,7} {2}", alg.treeDepth, alg.treeSize, globalStatistics);
+          Console.WriteLine(globalStatistics);
+        }
       };

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 11730 for branches/HeuristicLab.Problems.GrammaticalOptimization/Main

Legend:

branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Program.cs

Download in other formats: