Context Navigation

← Previous Change
Next Change →

Main

Timestamp:

01/09/15 14:57:28 (9 years ago)

Author:

gkronber

Message:

#2283 refactoring

File:

: 1 edited

branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Program.cs (modified) (6 diffs)

Legend:

: Unmodified
: Added
: Removed

branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Program.cs

-                      r11732
+                      r11742
 using System.Threading.Tasks;
 using HeuristicLab.Algorithms.Bandits;
+using HeuristicLab.Algorithms.Bandits.BanditPolicies;
 using HeuristicLab.Algorithms.Bandits.Models;
 using HeuristicLab.Algorithms.GrammaticalOptimization;
 …
       //var globalRandom = new Random(31415);
       var localRandSeed = 31415;
       var reps = 20;
       var policies = new Func<IPolicy>[]
+      var reps = 8;
+      var policies = new Func<IBanditPolicy>[]
+        {
+         () => new GaussianThompsonSamplingPolicy(),
+         () => new EpsGreedyPolicy(0.01, (aInfo)=> aInfo.MaxReward, "max"),
+         () => new EpsGreedyPolicy(0.05, (aInfo)=> aInfo.MaxReward, "max"),
+         () => new EpsGreedyPolicy(0.1, (aInfo)=> aInfo.MaxReward, "max"),
+         () => new EpsGreedyPolicy(0.2, (aInfo)=> aInfo.MaxReward, "max"),
+         //() => new GaussianThompsonSamplingPolicy(),
          () => new GaussianThompsonSamplingPolicy(true),
+         () => new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 1)),
+         () => new BernoulliThompsonSamplingPolicy(),
+         () => new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 10, 1)),
+         () => new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 10, 1, 1)),
+         //() => new BernoulliThompsonSamplingPolicy(),
          () => new GenericThompsonSamplingPolicy(new BernoulliModel(1, 1)),
          () => new RandomPolicy(),
 …
          () => new ChernoffIntervalEstimationPolicy( 0.1),
          () => new ChernoffIntervalEstimationPolicy( 0.2),
          // (rand) => new ThresholdAscentPolicy(10, 0.01),
          // (rand) => new ThresholdAscentPolicy(10, 0.05),
          // (rand) => new ThresholdAscentPolicy(10, 0.1),
          // (rand) => new ThresholdAscentPolicy(10, 0.2),
          // (rand) => new ThresholdAscentPolicy(100, 0.01),
          // (rand) => new ThresholdAscentPolicy(100, 0.05),
          // (rand) => new ThresholdAscentPolicy(100, 0.1),
          // (rand) => new ThresholdAscentPolicy(100, 0.2),
          // (rand) => new ThresholdAscentPolicy(1000, 0.01),
          // (rand) => new ThresholdAscentPolicy(1000, 0.05),
          // (rand) => new ThresholdAscentPolicy(1000, 0.1),
          // (rand) => new ThresholdAscentPolicy(1000, 0.2),
          // (rand) => new ThresholdAscentPolicy(5000, 0.01),
          // (rand) => new ThresholdAscentPolicy(10000, 0.01),
+         () => new ThresholdAscentPolicy(10, 0.01),
+         () => new ThresholdAscentPolicy(10, 0.05),
+         () => new ThresholdAscentPolicy(10, 0.1),
+         () => new ThresholdAscentPolicy(10, 0.2),
+         () => new ThresholdAscentPolicy(100, 0.01),
+         () => new ThresholdAscentPolicy(100, 0.05),
+         () => new ThresholdAscentPolicy(100, 0.1),
+         () => new ThresholdAscentPolicy(100, 0.2),
+         () => new ThresholdAscentPolicy(1000, 0.01),
+         () => new ThresholdAscentPolicy(1000, 0.05),
+         () => new ThresholdAscentPolicy(1000, 0.1),
+         () => new ThresholdAscentPolicy(1000, 0.2),
+         () => new ThresholdAscentPolicy(5000, 0.01),
+         () => new ThresholdAscentPolicy(10000, 0.01),
         };
       foreach (var problem in new Tuple<IProblem, int>[]
+        {
           Tuple.Create((IProblem)new SantaFeAntProblem(), 17),
+          //Tuple.Create((IProblem)new SantaFeAntProblem(), 17),
           Tuple.Create((IProblem)new SymbolicRegressionPoly10Problem(), 23),
         })
 …
             var localRand = new Random(localRandSeed);
             var options = new ParallelOptions();
             options.MaxDegreeOfParallelism = 1;
+            options.MaxDegreeOfParallelism = 4;
             Parallel.For(0, reps, options, (i) => {
               //var t = Task.Run(() => {
 …
     private static void RunDemo() {
       // TODO: test with eps-greedy using max instead of average as value (seems to work well for symb-reg! explore further!)
-      // TODO: implement GaussianWithUnknownMeanAndVariance Model for Thompson Sampling (verify with unit test if correct mean and variance is identified)
       // TODO: separate value function from policy
-      // TODO: debug and verify implementation variants of Gaussian Thompson Sampling with unit test
-      // TODO: refactor Policies to use banditInfos (policies are factories for bandit infos and bandit info only has an update routine, each policy works only with it's type of banditinfo)
       // TODO: in contextual MCTS store a bandit info for each node in the _graph_ and also update all bandit infos of all parents
       // TODO: exhaustive search with priority list
       // TODO: warum funktioniert die alte Implementierung von GaussianThompson besser für SantaFe als alte? Siehe Vergleich: alte vs. neue implementierung GaussianThompsonSampling
+      // TODO: warum funktioniert die alte Implementierung von GaussianThompson besser für SantaFe als neue? Siehe Vergleich: alte vs. neue implementierung GaussianThompsonSampling
       // TODO: why does GaussianThompsonSampling work so well with MCTS for the artificial ant problem?
       // TODO: wie kann ich sampler noch vergleichen bzw. was kann man messen um die qualität des samplers abzuschätzen (bis auf qualität und iterationen bis zur besten lösung) => ziel schnellere iterationen zu gutem ergebnis
-      // TODO: likelihood für R=1 bei Gaussian oder GaussianMixture einfach berechenbar?
       // TODO: research thompson sampling for max bandit?
       // TODO: ausführlicher test von strategien für k-armed max bandit
       // TODO: verify TA implementation using example from the original paper
-      // TODO: compare results for different policies also for the symb-reg problem
       // TODO: separate policy from MCTS tree data structure to allow sharing of information over disconnected parts of the tree (semantic equivalence)
       // TODO: implement thompson sampling for gaussian mixture models
       // TODO: implement inspection for MCTS (eventuell interactive command line für statistiken aus dem baum anzeigen)
       // TODO: implement ACO-style bandit policy
-      // TODO: implement sequences that can be manipulated in-place (instead of strings), alternatives are also stored as sequences, for a sequence the index of the first NT-symb can be stored
       // TODO: gleichzeitige modellierung von transformierter zielvariable (y, 1/y, log(y), exp(y), sqrt(y), ...)
       // TODO: vergleich bei complete-randomly möglichst kurze sätze generieren vs. einfach zufällig alternativen wählen
 …
       var random = new Random();
+      var problem = new SymbolicRegressionPoly10Problem();
+      //var problem = new SantaFeAntProblem(); // good results e.g. with       var alg = new MctsSampler(problem, 17, random, 1, (rand, numActions) => new ThresholdAscentPolicy(numActions, 500, 0.01));
+      //var problem = new SymbolicRegressionProblem("Tower"); // very good results e.g. new EpsGreedyPolicy(0.2) using max reward as quality !!!
+      var problem = new SymbolicRegressionPoly10Problem();   // good results e.g. 10 randomtries and EpsGreedyPolicy(0.2, (aInfo)=>aInfo.MaxReward)
+      // Ant
+      // good results e.g. with       var alg = new MctsSampler(problem, 17, random, 1, (rand, numActions) => new ThresholdAscentPolicy(numActions, 500, 0.01));
+      // GaussianModelWithUnknownVariance (and Q= 0.99-quantil) also works well for Ant
+      //var problem = new SantaFeAntProblem();
+      //var problem = new SymbolicRegressionProblem("Tower");
       //var problem = new PalindromeProblem();
       //var problem = new HardPalindromeProblem();
       //var problem = new RoyalPairProblem();
       //var problem = new EvenParityProblem();
       var alg = new MctsSampler(problem, 23, random, 10, new EpsGreedyPolicy(0.2)); // GaussianModelWithUnknownVariance (and Q= 0.99-quantil) works well for Ant
+      var alg = new MctsSampler(problem, 25, random, 0, new GaussianThompsonSamplingPolicy(true));
       //var alg = new ExhaustiveBreadthFirstSearch(problem, 17);
       //var alg = new AlternativesContextSampler(problem, random, 17, 4, (rand, numActions) => new RandomPolicy(rand, numActions));

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 11742 for branches/HeuristicLab.Problems.GrammaticalOptimization/Main

Legend:

branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Program.cs

Download in other formats: