Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Program.cs @ 11972

Last change on this file since 11972 was 11972, checked in by gkronber, 9 years ago

#2283 new experiments

File size: 19.3 KB
RevLine 
[11795]1using System;
[11659]2using System.Collections.Generic;
3using System.Diagnostics;
[11730]4using System.Globalization;
[11895]5using System.Runtime.Remoting.Messaging;
[11659]6using System.Text;
[11846]7using System.Threading;
[11727]8using System.Threading.Tasks;
9using HeuristicLab.Algorithms.Bandits;
[11742]10using HeuristicLab.Algorithms.Bandits.BanditPolicies;
[11792]11using HeuristicLab.Algorithms.Bandits.GrammarPolicies;
[11730]12using HeuristicLab.Algorithms.Bandits.Models;
[11846]13using HeuristicLab.Algorithms.GeneticProgramming;
[11659]14using HeuristicLab.Algorithms.GrammaticalOptimization;
15using HeuristicLab.Problems.GrammaticalOptimization;
[11895]16using HeuristicLab.Problems.GrammaticalOptimization.SymbReg;
[11792]17using BoltzmannExplorationPolicy = HeuristicLab.Algorithms.Bandits.BanditPolicies.BoltzmannExplorationPolicy;
18using EpsGreedyPolicy = HeuristicLab.Algorithms.Bandits.BanditPolicies.EpsGreedyPolicy;
[11846]19using IProblem = HeuristicLab.Problems.GrammaticalOptimization.IProblem;
[11792]20using RandomPolicy = HeuristicLab.Algorithms.Bandits.BanditPolicies.RandomPolicy;
21using UCTPolicy = HeuristicLab.Algorithms.Bandits.BanditPolicies.UCTPolicy;
[11659]22
23namespace Main {
24  class Program {
25    static void Main(string[] args) {
[11730]26      CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
27
[11832]28      //RunDemo();
[11972]29      //RunGpDemo();
30      //RunGridTest();
31      RunGpGridTest();
[11727]32    }
33
34    private static void RunGridTest() {
[11966]35      int maxIterations = 200000; // for poly-10 with 50000 evaluations no successful try with hl yet
[11732]36      //var globalRandom = new Random(31415);
[11730]37      var localRandSeed = 31415;
[11966]38      var reps = 20;
[11730]39
[11799]40      var policyFactories = new Func<IBanditPolicy>[]
[11727]41        {
[11792]42         () => new RandomPolicy(),
43          () => new ActiveLearningPolicy(), 
[11742]44         () => new EpsGreedyPolicy(0.01, (aInfo)=> aInfo.MaxReward, "max"),
45         () => new EpsGreedyPolicy(0.05, (aInfo)=> aInfo.MaxReward, "max"),
46         () => new EpsGreedyPolicy(0.1, (aInfo)=> aInfo.MaxReward, "max"),
47         () => new EpsGreedyPolicy(0.2, (aInfo)=> aInfo.MaxReward, "max"),
48         //() => new GaussianThompsonSamplingPolicy(),
[11732]49         () => new GaussianThompsonSamplingPolicy(true),
[11742]50         () => new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 10, 1)),
51         () => new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 10, 1, 1)),
52         //() => new BernoulliThompsonSamplingPolicy(),
[11732]53         () => new GenericThompsonSamplingPolicy(new BernoulliModel(1, 1)),
54         () => new EpsGreedyPolicy(0.01),
55         () => new EpsGreedyPolicy(0.05),
56         () => new EpsGreedyPolicy(0.1),
57         () => new EpsGreedyPolicy(0.2),
58         () => new EpsGreedyPolicy(0.5),
[11801]59         () => new UCTPolicy(0.01),
60         () => new UCTPolicy(0.05),
[11732]61         () => new UCTPolicy(0.1),
62         () => new UCTPolicy(0.5),
63         () => new UCTPolicy(1),
64         () => new UCTPolicy(2),
65         () => new UCTPolicy( 5),
66         () => new UCTPolicy( 10),
[11806]67         () => new ModifiedUCTPolicy(0.01),
68         () => new ModifiedUCTPolicy(0.05),
69         () => new ModifiedUCTPolicy(0.1),
70         () => new ModifiedUCTPolicy(0.5),
71         () => new ModifiedUCTPolicy(1),
72         () => new ModifiedUCTPolicy(2),
73         () => new ModifiedUCTPolicy( 5),
74         () => new ModifiedUCTPolicy( 10),
[11732]75         () => new UCB1Policy(),
76         () => new UCB1TunedPolicy(),
77         () => new UCBNormalPolicy(),
78         () => new BoltzmannExplorationPolicy(1),
79         () => new BoltzmannExplorationPolicy(10),
80         () => new BoltzmannExplorationPolicy(20),
81         () => new BoltzmannExplorationPolicy(100),
[11792]82         () => new BoltzmannExplorationPolicy(200),
83         () => new BoltzmannExplorationPolicy(500),
[11732]84         () => new ChernoffIntervalEstimationPolicy( 0.01),
85         () => new ChernoffIntervalEstimationPolicy( 0.05),
86         () => new ChernoffIntervalEstimationPolicy( 0.1),
87         () => new ChernoffIntervalEstimationPolicy( 0.2),
[11799]88         () => new ThresholdAscentPolicy(5, 0.01),
89         () => new ThresholdAscentPolicy(5, 0.05),
90         () => new ThresholdAscentPolicy(5, 0.1),
91         () => new ThresholdAscentPolicy(5, 0.2),
[11742]92         () => new ThresholdAscentPolicy(10, 0.01),
93         () => new ThresholdAscentPolicy(10, 0.05),
94         () => new ThresholdAscentPolicy(10, 0.1),
95         () => new ThresholdAscentPolicy(10, 0.2),
[11799]96         () => new ThresholdAscentPolicy(50, 0.01),
97         () => new ThresholdAscentPolicy(50, 0.05),
98         () => new ThresholdAscentPolicy(50, 0.1),
99         () => new ThresholdAscentPolicy(50, 0.2),
[11742]100         () => new ThresholdAscentPolicy(100, 0.01),
101         () => new ThresholdAscentPolicy(100, 0.05),
102         () => new ThresholdAscentPolicy(100, 0.1),
103         () => new ThresholdAscentPolicy(100, 0.2),
[11799]104         () => new ThresholdAscentPolicy(500, 0.01),
105         () => new ThresholdAscentPolicy(500, 0.05),
106         () => new ThresholdAscentPolicy(500, 0.1),
107         () => new ThresholdAscentPolicy(500, 0.2),
[11792]108         //() => new ThresholdAscentPolicy(5000, 0.01),
109         //() => new ThresholdAscentPolicy(10000, 0.01),
[11727]110        };
111
[11799]112      var instanceFactories = new Func<Random, Tuple<IProblem, int>>[]
113      {
[11966]114        (rand) => Tuple.Create((IProblem)new SantaFeAntProblem(), 17),
[11832]115        //(rand) => Tuple.Create((IProblem)new FindPhrasesProblem(rand, 10, numPhrases:5, phraseLen:3, numOptimalPhrases:5, numDecoyPhrases:0, correctReward:1, decoyReward:0, phrasesAsSets:false ), 15),
116        //(rand) => Tuple.Create((IProblem)new FindPhrasesProblem(rand, 10, numPhrases:5, phraseLen:3, numOptimalPhrases:5, numDecoyPhrases:0, correctReward:1, decoyReward:0, phrasesAsSets:true ), 15),
117        //(rand) => Tuple.Create((IProblem)new FindPhrasesProblem(rand, 10, numPhrases:5, phraseLen:3, numOptimalPhrases:5, numDecoyPhrases:200, correctReward:1, decoyReward:0.5, phrasesAsSets:false), 15),
118        //(rand) => Tuple.Create((IProblem)new FindPhrasesProblem(rand, 10, numPhrases:5, phraseLen:3, numOptimalPhrases:5, numDecoyPhrases:200, correctReward:1, decoyReward:0.5, phrasesAsSets:true), 15),
119        (rand) => Tuple.Create((IProblem)new SymbolicRegressionPoly10Problem(), 23)
[11799]120      };
121
122      foreach (var instanceFactory in instanceFactories) {
[11966]123        foreach (var useCanonical in new bool[] { true, false }) {
124          foreach (var randomTries in new int[] { 0, 1, 10 /*, /* 5, 100 /*, 500, 1000 */}) {
[11799]125            foreach (var policyFactory in policyFactories) {
[11792]126              var myRandomTries = randomTries;
127              var localRand = new Random(localRandSeed);
128              var options = new ParallelOptions();
[11799]129              options.MaxDegreeOfParallelism = 4;
[11792]130              Parallel.For(0, reps, options, (i) => {
131                Random myLocalRand;
132                lock (localRand)
133                  myLocalRand = new Random(localRand.Next());
[11730]134
[11792]135                int iterations = 0;
136                var globalStatistics = new SentenceSetStatistics();
[11727]137
[11792]138                // var problem = new SymbolicRegressionPoly10Problem();
139                // var problem = new SantaFeAntProblem();
140                //var problem = new PalindromeProblem();
141                //var problem = new HardPalindromeProblem();
142                //var problem = new RoyalPairProblem();
143                //var problem = new EvenParityProblem();
[11799]144                // var alg = new MctsSampler(problem.Item1, problem.Item2, myLocalRand, myRandomTries, policy());
145                var instance = instanceFactory(myLocalRand);
146                var problem = instance.Item1;
147                var maxLen = instance.Item2;
[11966]148                var alg = new SequentialSearch(problem, maxLen, myLocalRand, myRandomTries,
149                  new GenericGrammarPolicy(problem, policyFactory(), useCanonical));
150                // var alg = new SequentialSearch(problem, maxLen, myLocalRand,
151                //   myRandomTries,
152                //   new GenericFunctionApproximationGrammarPolicy(problem,
153                //     useCanonical));
[11792]154                //var alg = new ExhaustiveBreadthFirstSearch(problem, 25);
155                //var alg = new AlternativesContextSampler(problem, 25);
[11727]156
[11792]157                alg.SolutionEvaluated += (sentence, quality) => {
158                  iterations++;
159                  globalStatistics.AddSentence(sentence, quality);
[11832]160                  if (iterations % 1000 == 0) {
161                    Console.WriteLine("{0,3} {1,5} \"{2,25}\" {3} {4} {5}", i, myRandomTries, policyFactory(), useCanonical, problem.ToString(), globalStatistics);
[11792]162                  }
163                };
164                alg.FoundNewBestSolution += (sentence, quality) => {
[11799]165                  //Console.WriteLine("{0,5} {1,25} {2} {3}",
166                  //  myRandomTries, policyFactory(), useCanonical,
167                  //  globalStatistics);
[11792]168                };
[11727]169
[11792]170                alg.Run(maxIterations);
171              });
172            }
[11732]173          }
[11799]174        }
175      }
[11727]176    }
177
178    private static void RunDemo() {
[11747]179      // TODO: unify MCTS, TD and ContextMCTS Solvers (stateInfos)
[11732]180      // TODO: test with eps-greedy using max instead of average as value (seems to work well for symb-reg! explore further!)
181      // TODO: separate value function from policy
[11742]182      // TODO: warum funktioniert die alte Implementierung von GaussianThompson besser fÃŒr SantaFe als neue? Siehe Vergleich: alte vs. neue implementierung GaussianThompsonSampling
[11730]183      // TODO: why does GaussianThompsonSampling work so well with MCTS for the artificial ant problem?
184      // TODO: research thompson sampling for max bandit?
[11732]185      // TODO: verify TA implementation using example from the original paper     
[11730]186      // TODO: implement thompson sampling for gaussian mixture models
187      // TODO: gleichzeitige modellierung von transformierter zielvariable (y, 1/y, log(y), exp(y), sqrt(y), ...)
188      // TODO: vergleich bei complete-randomly möglichst kurze sÀtze generieren vs. einfach zufÀllig alternativen wÀhlen
189      // TODO: reward discounting (fÃŒr verÀnderliche reward distributions ÃŒber zeit). speziellen unit-test dafÃŒr erstellen
[11732]190      // TODO: constant optimization
[11727]191
[11730]192
[11832]193      int maxIterations = 1000000;
[11659]194      int iterations = 0;
195      var sw = new Stopwatch();
[11770]196
[11727]197      var globalStatistics = new SentenceSetStatistics();
[11730]198      var random = new Random();
[11659]199
[11799]200
[11806]201      //var problem = new RoyalSequenceProblem(random, 10, 30, 2, 1, 0);
[11832]202      // var phraseLen = 3;
203      // var numPhrases = 5;
204      // var problem = new RoyalPhraseSequenceProblem(random, 10, numPhrases, phraseLen: phraseLen, numCorrectPhrases: 1, correctReward: 1, incorrectReward: 0.0, phrasesAsSets: false);
205
[11755]206      //var phraseLen = 3;
207      //var numPhrases = 5;
[11832]208      //var problem = new FindPhrasesProblem(random, 10, numPhrases, phraseLen, numOptimalPhrases: numPhrases, numDecoyPhrases: 0, correctReward: 1.0, decoyReward: 0, phrasesAsSets: false);
[11747]209
[11795]210      // good results for symb-reg
211      // prev results: e.g. 10 randomtries and EpsGreedyPolicy(0.2, (aInfo)=>aInfo.MaxReward)
212      // 2015 01 19: grid test with canonical states:
213      // - EpsGreedyPolicy(0.20,max)
214      // - GenericThompsonSamplingPolicy("")
[11801]215      // - UCTPolicy(0.10) (5 of 5 runs, 35000 iters avg.), 10 successful runs of 10 with rand-tries 0, bei 40000 iters 9 / 10, bei 30000 1 / 10
[11832]216      // 2015 01 22: symb-reg: grid test on find-phrases problem showed good results for UCB1TunedPolicy and SequentialSearch with canonical states
217      // - symb-reg: consistent results with UCB1Tuned. finds optimal solution in ~50k iters (new GenericGrammarPolicy(problem, new UCB1TunedPolicy(), true));
218      // 2015 01 23: grid test with canonical states:
219      // - UCTPolicy(0.10) und UCBNormalPolicy 10/10 optimale Lösungen bei max. 50k iters, etwas schlechter: generic-thompson with variable sigma und bolzmannexploration (100)
[11799]220
[11832]221
[11795]222      // good results for artificial ant:
223      // prev results:
224      // - var alg = new MctsSampler(problem, 17, random, 1, (rand, numActions) => new ThresholdAscentPolicy(numActions, 500, 0.01));
225      // - GaussianModelWithUnknownVariance (and Q= 0.99-quantil) also works well for Ant
226      // 2015 01 19: grid test with canonical states (non-canonical slightly worse)
[11832]227      // - ant: Threshold Ascent (best 100, 0.01; all variants relatively good)
228      // - ant: Policies where the variance has a large weight compared to the mean? (Gaussian(compatible), Gaussian with fixed variance, UCT with large c, alle TA)
229      // - ant: UCB1Tuned with canonical states also works very well for the artificial ant! constistent solutions in less than 10k iters     
[11770]230
[11972]231      //var problem = new SymbolicRegressionPoly10Problem();
[11832]232      //var problem = new SantaFeAntProblem();
[11972]233      var problem = new SymbolicRegressionProblem(random, "Breiman");
[11727]234      //var problem = new PalindromeProblem();
235      //var problem = new HardPalindromeProblem();
236      //var problem = new RoyalPairProblem();
237      //var problem = new EvenParityProblem();
[11747]238      // symbreg length = 11 q = 0.824522210419616
[11755]239      //var alg = new MctsSampler(problem, 23, random, 0, new BoltzmannExplorationPolicy(100));
[11770]240      //var alg = new MctsSampler(problem, 23, random, 0, new EpsGreedyPolicy(0.1));
[11806]241      //var alg = new SequentialSearch(problem, 23, random, 0,
[11832]242      //  new HeuristicLab.Algorithms.Bandits.GrammarPolicies.QLearningGrammarPolicy(problem, new BoltzmannExplorationPolicy(10),
243      //    1, 1, true));
244      //var alg = new SequentialSearch(problem, 23, random, 0,
245      //  new HeuristicLab.Algorithms.Bandits.GrammarPolicies.GenericContextualGrammarPolicy(problem, new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 10, 1, 1)), true));
[11972]246      var alg = new SequentialSearch(problem, 30, random, 0,
[11832]247        new HeuristicLab.Algorithms.Bandits.GrammarPolicies.GenericFunctionApproximationGrammarPolicy(problem, true));
[11747]248      //var alg = new MctsQLearningSampler(problem, sentenceLen, random, 0, null);
249      //var alg = new MctsQLearningSampler(problem, 30, random, 0, new EpsGreedyPolicy(0.2));
250      //var alg = new MctsContextualSampler(problem, 23, random, 0); // must visit each canonical solution only once
251      //var alg = new TemporalDifferenceTreeSearchSampler(problem, 30, random, 1);
252      //var alg = new ExhaustiveBreadthFirstSearch(problem, 7);
[11730]253      //var alg = new AlternativesContextSampler(problem, random, 17, 4, (rand, numActions) => new RandomPolicy(rand, numActions));
254      //var alg = new ExhaustiveDepthFirstSearch(problem, 17);
255      // var alg = new AlternativesSampler(problem, 17);
[11732]256      // var alg = new RandomSearch(problem, random, 17);
[11747]257      //var alg = new ExhaustiveRandomFirstSearch(problem, random, 17);
[11659]258
[11727]259      alg.FoundNewBestSolution += (sentence, quality) => {
[11832]260        //Console.WriteLine("{0}", globalStatistics);
[11745]261        //Console.ReadLine();
[11659]262      };
[11727]263      alg.SolutionEvaluated += (sentence, quality) => {
[11659]264        iterations++;
[11727]265        globalStatistics.AddSentence(sentence, quality);
[11832]266
[11972]267        //if (iterations % 100 == 0) {
268        //  if (iterations % 10000 == 0) Console.Clear();
269        //  Console.SetCursorPosition(0, 0);
270        //  alg.PrintStats();
271        //}
[11832]272
[11747]273        //Console.WriteLine(sentence);
[11745]274
[11972]275        if (iterations % 100 == 0) {
276          Console.WriteLine("{0}", globalStatistics);
277        }
[11659]278      };
279
280
281      sw.Start();
282
[11727]283      alg.Run(maxIterations);
[11659]284
285      sw.Stop();
286
[11770]287      Console.Clear();
288      alg.PrintStats();
289      Console.WriteLine(globalStatistics);
[11659]290      Console.WriteLine("{0:F2} sec {1,10:F1} sols/sec {2,10:F1} ns/sol",
291        sw.Elapsed.TotalSeconds,
292        maxIterations / (double)sw.Elapsed.TotalSeconds,
293        (double)sw.ElapsedMilliseconds * 1000 / maxIterations);
294    }
[11846]295
296    public static void RunGpDemo() {
297      int iterations = 0;
[11865]298      const int seed = 31415;
[11846]299      const int maxIterations = 100000;
[11865]300
[11895]301      //var prob = new SymbolicRegressionProblem(new Random(31415), "Tower");
302      var prob = new SymbolicRegressionPoly10Problem();
303      var sgp = new OffspringSelectionGP(prob, new Random(seed), true);
304      RunGP(sgp, prob, 200000, 500, 0.15, 50);
[11865]305    }
306
307
308    private static void RunGpGridTest() {
309      const int nReps = 20;
[11846]310      const int seed = 31415;
[11972]311      //const int maxIters = 50000;
[11865]312      var rand = new Random(seed);
[11972]313      var problemFactories = new Func<Tuple<int, int, ISymbolicExpressionTreeProblem>>[]
[11865]314      {
[11972]315        () => Tuple.Create(50000, 32, (ISymbolicExpressionTreeProblem)new PermutationProblem()),
316        () => Tuple.Create(50000, 32, (ISymbolicExpressionTreeProblem)new RoyalPairProblem()),
317        () => Tuple.Create(50000, 32,(ISymbolicExpressionTreeProblem)new RoyalSymbolProblem()),
318        () => Tuple.Create(50000, 64, (ISymbolicExpressionTreeProblem)new RoyalPairProblem()),
319        () => Tuple.Create(50000, 64,(ISymbolicExpressionTreeProblem)new RoyalSymbolProblem()),
320        () => Tuple.Create(50000, 128, (ISymbolicExpressionTreeProblem)new RoyalPairProblem()),
321        () => Tuple.Create(50000, 128,(ISymbolicExpressionTreeProblem)new RoyalSymbolProblem()),
322        () => Tuple.Create(50000, 256, (ISymbolicExpressionTreeProblem)new RoyalPairProblem()),
323        () => Tuple.Create(50000, 256,(ISymbolicExpressionTreeProblem)new RoyalSymbolProblem()),
324        //() => new RoyalPairProblem(),
325        //() => new FindPhrasesProblem(rand, 20, 5, 3, 5, 0, 1, 0, true),
326        //() => new FindPhrasesProblem(rand, 20, 5, 3, 5, 0, 1, 0, false),
327        //() => new FindPhrasesProblem(rand, 20, 5, 3, 5, 50, 1, 0.8, false),
[11865]328      };
[11972]329
330      foreach (var popSize in new int[] { 100 /*, 250, 500, 1000, 2500, 5000, 10000 */ }) {
[11895]331        foreach (var mutationRate in new double[] { /* 0.05, /* 0.10, */ 0.15, /* 0.25, 0.3 */}) {
[11972]332          // skip experiments that are already done
333          foreach (var problemFactory in problemFactories) {
334            for (int i = 0; i < nReps; i++) {
335              {
336                var solverSeed = rand.Next();
337                var tupel = problemFactory();
338                var maxIters = tupel.Item1;
339                var maxSize = tupel.Item2;
340                var prob = tupel.Item3;
341                var sgp = new StandardGP(prob, new Random(solverSeed));
342                RunGP(sgp, prob, maxIters, popSize, mutationRate, maxSize);
343              }
344              //{
345              //  var prob = problemFactory();
346              //  var osgp = new OffspringSelectionGP(prob, new Random(solverSeed));
347              //  RunGP(osgp, prob, maxIters, popSize, mutationRate, maxSize);
348              //}
[11895]349            }
[11865]350          }
351        }
[11972]352
[11865]353      }
354    }
[11846]355
[11895]356    private static void RunGP(IGPSolver gp, ISymbolicExpressionTreeProblem prob, int maxIters, int popSize, double mutationRate, int maxSize) {
[11865]357      int iterations = 0;
358      var globalStatistics = new SentenceSetStatistics(prob.BestKnownQuality(maxSize));
[11895]359      var gpName = gp.GetType().Name;
360      var probName = prob.GetType().Name;
[11865]361      gp.SolutionEvaluated += (sentence, quality) => {
362        iterations++;
363        globalStatistics.AddSentence(sentence, quality);
364
[11972]365        if (iterations % 100 == 0) {
[11895]366          Console.WriteLine("\"{0,25}\" {1} {2:N2} {3} \"{4,25}\" {5}", gpName, popSize, mutationRate, maxSize, probName, globalStatistics);
[11865]367        }
[11846]368      };
[11865]369
370      gp.PopulationSize = popSize;
371      gp.MutationRate = mutationRate;
372      gp.MaxSolutionSize = maxSize + 2;
373      gp.MaxSolutionDepth = maxSize + 2;
374
375      gp.Run(maxIters);
376    }
[11659]377  }
378}
Note: See TracBrowser for help on using the repository browser.