Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Program.cs @ 11869

Last change on this file since 11869 was 11865, checked in by gkronber, 10 years ago

#2283: implemented royal tree problem and grid test for tree-based gp variants

File size: 19.1 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Globalization;
5using System.Text;
6using System.Threading;
7using System.Threading.Tasks;
8using HeuristicLab.Algorithms.Bandits;
9using HeuristicLab.Algorithms.Bandits.BanditPolicies;
10using HeuristicLab.Algorithms.Bandits.GrammarPolicies;
11using HeuristicLab.Algorithms.Bandits.Models;
12using HeuristicLab.Algorithms.GeneticProgramming;
13using HeuristicLab.Algorithms.GrammaticalOptimization;
14using HeuristicLab.Problems.GrammaticalOptimization;
15using BoltzmannExplorationPolicy = HeuristicLab.Algorithms.Bandits.BanditPolicies.BoltzmannExplorationPolicy;
16using EpsGreedyPolicy = HeuristicLab.Algorithms.Bandits.BanditPolicies.EpsGreedyPolicy;
17using IProblem = HeuristicLab.Problems.GrammaticalOptimization.IProblem;
18using RandomPolicy = HeuristicLab.Algorithms.Bandits.BanditPolicies.RandomPolicy;
19using UCTPolicy = HeuristicLab.Algorithms.Bandits.BanditPolicies.UCTPolicy;
20
21namespace Main {
22  class Program {
23    static void Main(string[] args) {
24      CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
25
26      //RunDemo();
27      //RunGpDemo();
28      // RunGridTest();
29      RunGpGridTest();
30    }
31
32    private static void RunGridTest() {
33      int maxIterations = 70000; // for poly-10 with 50000 evaluations no successful try with hl yet
34      //var globalRandom = new Random(31415);
35      var localRandSeed = 31415;
36      var reps = 30;
37
38      var policyFactories = new Func<IBanditPolicy>[]
39        {
40         () => new RandomPolicy(),
41          () => new ActiveLearningPolicy(), 
42         () => new EpsGreedyPolicy(0.01, (aInfo)=> aInfo.MaxReward, "max"),
43         () => new EpsGreedyPolicy(0.05, (aInfo)=> aInfo.MaxReward, "max"),
44         () => new EpsGreedyPolicy(0.1, (aInfo)=> aInfo.MaxReward, "max"),
45         () => new EpsGreedyPolicy(0.2, (aInfo)=> aInfo.MaxReward, "max"),
46         //() => new GaussianThompsonSamplingPolicy(),
47         () => new GaussianThompsonSamplingPolicy(true),
48         () => new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 10, 1)),
49         () => new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 10, 1, 1)),
50         //() => new BernoulliThompsonSamplingPolicy(),
51         () => new GenericThompsonSamplingPolicy(new BernoulliModel(1, 1)),
52         () => new EpsGreedyPolicy(0.01),
53         () => new EpsGreedyPolicy(0.05),
54         () => new EpsGreedyPolicy(0.1),
55         () => new EpsGreedyPolicy(0.2),
56         () => new EpsGreedyPolicy(0.5),
57         () => new UCTPolicy(0.01),
58         () => new UCTPolicy(0.05),
59         () => new UCTPolicy(0.1),
60         () => new UCTPolicy(0.5),
61         () => new UCTPolicy(1),
62         () => new UCTPolicy(2),
63         () => new UCTPolicy( 5),
64         () => new UCTPolicy( 10),
65         () => new ModifiedUCTPolicy(0.01),
66         () => new ModifiedUCTPolicy(0.05),
67         () => new ModifiedUCTPolicy(0.1),
68         () => new ModifiedUCTPolicy(0.5),
69         () => new ModifiedUCTPolicy(1),
70         () => new ModifiedUCTPolicy(2),
71         () => new ModifiedUCTPolicy( 5),
72         () => new ModifiedUCTPolicy( 10),
73         () => new UCB1Policy(),
74         () => new UCB1TunedPolicy(),
75         () => new UCBNormalPolicy(),
76         () => new BoltzmannExplorationPolicy(1),
77         () => new BoltzmannExplorationPolicy(10),
78         () => new BoltzmannExplorationPolicy(20),
79         () => new BoltzmannExplorationPolicy(100),
80         () => new BoltzmannExplorationPolicy(200),
81         () => new BoltzmannExplorationPolicy(500),
82         () => new ChernoffIntervalEstimationPolicy( 0.01),
83         () => new ChernoffIntervalEstimationPolicy( 0.05),
84         () => new ChernoffIntervalEstimationPolicy( 0.1),
85         () => new ChernoffIntervalEstimationPolicy( 0.2),
86         () => new ThresholdAscentPolicy(5, 0.01),
87         () => new ThresholdAscentPolicy(5, 0.05),
88         () => new ThresholdAscentPolicy(5, 0.1),
89         () => new ThresholdAscentPolicy(5, 0.2),
90         () => new ThresholdAscentPolicy(10, 0.01),
91         () => new ThresholdAscentPolicy(10, 0.05),
92         () => new ThresholdAscentPolicy(10, 0.1),
93         () => new ThresholdAscentPolicy(10, 0.2),
94         () => new ThresholdAscentPolicy(50, 0.01),
95         () => new ThresholdAscentPolicy(50, 0.05),
96         () => new ThresholdAscentPolicy(50, 0.1),
97         () => new ThresholdAscentPolicy(50, 0.2),
98         () => new ThresholdAscentPolicy(100, 0.01),
99         () => new ThresholdAscentPolicy(100, 0.05),
100         () => new ThresholdAscentPolicy(100, 0.1),
101         () => new ThresholdAscentPolicy(100, 0.2),
102         () => new ThresholdAscentPolicy(500, 0.01),
103         () => new ThresholdAscentPolicy(500, 0.05),
104         () => new ThresholdAscentPolicy(500, 0.1),
105         () => new ThresholdAscentPolicy(500, 0.2),
106         //() => new ThresholdAscentPolicy(5000, 0.01),
107         //() => new ThresholdAscentPolicy(10000, 0.01),
108        };
109
110      var instanceFactories = new Func<Random, Tuple<IProblem, int>>[]
111      {
112        //(rand) => Tuple.Create((IProblem)new SantaFeAntProblem(), 17),
113        //(rand) => Tuple.Create((IProblem)new FindPhrasesProblem(rand, 10, numPhrases:5, phraseLen:3, numOptimalPhrases:5, numDecoyPhrases:0, correctReward:1, decoyReward:0, phrasesAsSets:false ), 15),
114        //(rand) => Tuple.Create((IProblem)new FindPhrasesProblem(rand, 10, numPhrases:5, phraseLen:3, numOptimalPhrases:5, numDecoyPhrases:0, correctReward:1, decoyReward:0, phrasesAsSets:true ), 15),
115        //(rand) => Tuple.Create((IProblem)new FindPhrasesProblem(rand, 10, numPhrases:5, phraseLen:3, numOptimalPhrases:5, numDecoyPhrases:200, correctReward:1, decoyReward:0.5, phrasesAsSets:false), 15),
116        //(rand) => Tuple.Create((IProblem)new FindPhrasesProblem(rand, 10, numPhrases:5, phraseLen:3, numOptimalPhrases:5, numDecoyPhrases:200, correctReward:1, decoyReward:0.5, phrasesAsSets:true), 15),
117        (rand) => Tuple.Create((IProblem)new SymbolicRegressionPoly10Problem(), 23)
118      };
119
120      foreach (var instanceFactory in instanceFactories) {
121        foreach (var useCanonical in new bool[] { true /*, false */}) {
122          foreach (var randomTries in new int[] { 0 /*, 1, 10 /*, /* 5, 100 /*, 500, 1000 */}) {
123            foreach (var policyFactory in policyFactories) {
124              var myRandomTries = randomTries;
125              var localRand = new Random(localRandSeed);
126              var options = new ParallelOptions();
127              options.MaxDegreeOfParallelism = 4;
128              Parallel.For(0, reps, options, (i) => {
129                Random myLocalRand;
130                lock (localRand)
131                  myLocalRand = new Random(localRand.Next());
132
133                int iterations = 0;
134                var globalStatistics = new SentenceSetStatistics();
135
136                // var problem = new SymbolicRegressionPoly10Problem();
137                // var problem = new SantaFeAntProblem();
138                //var problem = new PalindromeProblem();
139                //var problem = new HardPalindromeProblem();
140                //var problem = new RoyalPairProblem();
141                //var problem = new EvenParityProblem();
142                // var alg = new MctsSampler(problem.Item1, problem.Item2, myLocalRand, myRandomTries, policy());
143                var instance = instanceFactory(myLocalRand);
144                var problem = instance.Item1;
145                var maxLen = instance.Item2;
146                //var alg = new SequentialSearch(problem, maxLen, myLocalRand, myRandomTries,
147                //  new GenericGrammarPolicy(problem, policyFactory(), useCanonical));
148                var alg = new SequentialSearch(problem, maxLen, myLocalRand,
149                  myRandomTries,
150                  new GenericFunctionApproximationGrammarPolicy(problem,
151                    useCanonical));
152                //var alg = new ExhaustiveBreadthFirstSearch(problem, 25);
153                //var alg = new AlternativesContextSampler(problem, 25);
154
155                alg.SolutionEvaluated += (sentence, quality) => {
156                  iterations++;
157                  globalStatistics.AddSentence(sentence, quality);
158                  if (iterations % 1000 == 0) {
159                    Console.WriteLine("{0,3} {1,5} \"{2,25}\" {3} {4} {5}", i, myRandomTries, policyFactory(), useCanonical, problem.ToString(), globalStatistics);
160                  }
161                };
162                alg.FoundNewBestSolution += (sentence, quality) => {
163                  //Console.WriteLine("{0,5} {1,25} {2} {3}",
164                  //  myRandomTries, policyFactory(), useCanonical,
165                  //  globalStatistics);
166                };
167
168                alg.Run(maxIterations);
169              });
170            }
171          }
172        }
173      }
174    }
175
176    private static void RunDemo() {
177      // TODO: unify MCTS, TD and ContextMCTS Solvers (stateInfos)
178      // TODO: test with eps-greedy using max instead of average as value (seems to work well for symb-reg! explore further!)
179      // TODO: separate value function from policy
180      // TODO: warum funktioniert die alte Implementierung von GaussianThompson besser fÃŒr SantaFe als neue? Siehe Vergleich: alte vs. neue implementierung GaussianThompsonSampling
181      // TODO: why does GaussianThompsonSampling work so well with MCTS for the artificial ant problem?
182      // TODO: research thompson sampling for max bandit?
183      // TODO: verify TA implementation using example from the original paper     
184      // TODO: implement thompson sampling for gaussian mixture models
185      // TODO: gleichzeitige modellierung von transformierter zielvariable (y, 1/y, log(y), exp(y), sqrt(y), ...)
186      // TODO: vergleich bei complete-randomly möglichst kurze sÀtze generieren vs. einfach zufÀllig alternativen wÀhlen
187      // TODO: reward discounting (fÃŒr verÀnderliche reward distributions ÃŒber zeit). speziellen unit-test dafÃŒr erstellen
188      // TODO: constant optimization
189
190
191      int maxIterations = 1000000;
192      int iterations = 0;
193      var sw = new Stopwatch();
194
195      var globalStatistics = new SentenceSetStatistics();
196      var random = new Random();
197
198
199      //var problem = new RoyalSequenceProblem(random, 10, 30, 2, 1, 0);
200      // var phraseLen = 3;
201      // var numPhrases = 5;
202      // var problem = new RoyalPhraseSequenceProblem(random, 10, numPhrases, phraseLen: phraseLen, numCorrectPhrases: 1, correctReward: 1, incorrectReward: 0.0, phrasesAsSets: false);
203
204      //var phraseLen = 3;
205      //var numPhrases = 5;
206      //var problem = new FindPhrasesProblem(random, 10, numPhrases, phraseLen, numOptimalPhrases: numPhrases, numDecoyPhrases: 0, correctReward: 1.0, decoyReward: 0, phrasesAsSets: false);
207
208      // good results for symb-reg
209      // prev results: e.g. 10 randomtries and EpsGreedyPolicy(0.2, (aInfo)=>aInfo.MaxReward)
210      // 2015 01 19: grid test with canonical states:
211      // - EpsGreedyPolicy(0.20,max)
212      // - GenericThompsonSamplingPolicy("")
213      // - UCTPolicy(0.10) (5 of 5 runs, 35000 iters avg.), 10 successful runs of 10 with rand-tries 0, bei 40000 iters 9 / 10, bei 30000 1 / 10
214      // 2015 01 22: symb-reg: grid test on find-phrases problem showed good results for UCB1TunedPolicy and SequentialSearch with canonical states
215      // - symb-reg: consistent results with UCB1Tuned. finds optimal solution in ~50k iters (new GenericGrammarPolicy(problem, new UCB1TunedPolicy(), true));
216      // 2015 01 23: grid test with canonical states:
217      // - UCTPolicy(0.10) und UCBNormalPolicy 10/10 optimale Lösungen bei max. 50k iters, etwas schlechter: generic-thompson with variable sigma und bolzmannexploration (100)
218
219
220      // good results for artificial ant:
221      // prev results:
222      // - var alg = new MctsSampler(problem, 17, random, 1, (rand, numActions) => new ThresholdAscentPolicy(numActions, 500, 0.01));
223      // - GaussianModelWithUnknownVariance (and Q= 0.99-quantil) also works well for Ant
224      // 2015 01 19: grid test with canonical states (non-canonical slightly worse)
225      // - ant: Threshold Ascent (best 100, 0.01; all variants relatively good)
226      // - ant: Policies where the variance has a large weight compared to the mean? (Gaussian(compatible), Gaussian with fixed variance, UCT with large c, alle TA)
227      // - ant: UCB1Tuned with canonical states also works very well for the artificial ant! constistent solutions in less than 10k iters     
228
229      var problem = new SymbolicRegressionPoly10Problem();
230      //var problem = new SantaFeAntProblem();
231      //var problem = new SymbolicRegressionProblem(random, "Tower");
232      //var problem = new PalindromeProblem();
233      //var problem = new HardPalindromeProblem();
234      //var problem = new RoyalPairProblem();
235      //var problem = new EvenParityProblem();
236      // symbreg length = 11 q = 0.824522210419616
237      //var alg = new MctsSampler(problem, 23, random, 0, new BoltzmannExplorationPolicy(100));
238      //var alg = new MctsSampler(problem, 23, random, 0, new EpsGreedyPolicy(0.1));
239      //var alg = new SequentialSearch(problem, 23, random, 0,
240      //  new HeuristicLab.Algorithms.Bandits.GrammarPolicies.QLearningGrammarPolicy(problem, new BoltzmannExplorationPolicy(10),
241      //    1, 1, true));
242      //var alg = new SequentialSearch(problem, 23, random, 0,
243      //  new HeuristicLab.Algorithms.Bandits.GrammarPolicies.GenericContextualGrammarPolicy(problem, new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 10, 1, 1)), true));
244      var alg = new SequentialSearch(problem, 23, random, 0,
245        new HeuristicLab.Algorithms.Bandits.GrammarPolicies.GenericFunctionApproximationGrammarPolicy(problem, true));
246      //var alg = new MctsQLearningSampler(problem, sentenceLen, random, 0, null);
247      //var alg = new MctsQLearningSampler(problem, 30, random, 0, new EpsGreedyPolicy(0.2));
248      //var alg = new MctsContextualSampler(problem, 23, random, 0); // must visit each canonical solution only once
249      //var alg = new TemporalDifferenceTreeSearchSampler(problem, 30, random, 1);
250      //var alg = new ExhaustiveBreadthFirstSearch(problem, 7);
251      //var alg = new AlternativesContextSampler(problem, random, 17, 4, (rand, numActions) => new RandomPolicy(rand, numActions));
252      //var alg = new ExhaustiveDepthFirstSearch(problem, 17);
253      // var alg = new AlternativesSampler(problem, 17);
254      // var alg = new RandomSearch(problem, random, 17);
255      //var alg = new ExhaustiveRandomFirstSearch(problem, random, 17);
256
257      alg.FoundNewBestSolution += (sentence, quality) => {
258        //Console.WriteLine("{0}", globalStatistics);
259        //Console.ReadLine();
260      };
261      alg.SolutionEvaluated += (sentence, quality) => {
262        iterations++;
263        globalStatistics.AddSentence(sentence, quality);
264
265        if (iterations % 1000 == 0) {
266          if (iterations % 10000 == 0) Console.Clear();
267          Console.SetCursorPosition(0, 0);
268          alg.PrintStats();
269        }
270
271        //Console.WriteLine(sentence);
272
273        //if (iterations % 10000 == 0) {
274        //  Console.WriteLine("{0}", globalStatistics);
275        //}
276      };
277
278
279      sw.Start();
280
281      alg.Run(maxIterations);
282
283      sw.Stop();
284
285      Console.Clear();
286      alg.PrintStats();
287      Console.WriteLine(globalStatistics);
288      Console.WriteLine("{0:F2} sec {1,10:F1} sols/sec {2,10:F1} ns/sol",
289        sw.Elapsed.TotalSeconds,
290        maxIterations / (double)sw.Elapsed.TotalSeconds,
291        (double)sw.ElapsedMilliseconds * 1000 / maxIterations);
292    }
293
294    public static void RunGpDemo() {
295      int iterations = 0;
296      const int seed = 31415;
297      const int maxIterations = 100000;
298
299    }
300
301
302    private static void RunGpGridTest() {
303      const int nReps = 20;
304      const int seed = 31415;
305      const int maxIters = 100000;
306      var rand = new Random(seed);
307      var problemFactories = new Func<ISymbolicExpressionTreeProblem>[]
308      {
309        () => new SantaFeAntProblem(),
310        () => new SymbolicRegressionPoly10Problem(),
311      };
312      foreach (var popSize in new int[] { 50, 100, 250, 500, 1000, 2500, 5000 }) {
313        foreach (var mutationRate in new double[] {/* 0.05, 0.10, */ 0.15, /* 0.25, 0.3 */ }) {
314          foreach (var maxSize in new int[] { 30, 50, 100 }) {
315            foreach (var problemFactory in problemFactories)
316              for (int i = 0; i < nReps; i++) {
317                var solverSeed = rand.Next();
318                {
319                  var prob = problemFactory();
320                  RunStandardGP(prob, solverSeed, maxIters, popSize, mutationRate, maxSize);
321                }
322                {
323                  var prob = problemFactory();
324                  RunOSGP(prob, solverSeed, maxIters, popSize, mutationRate, maxSize);
325                }
326              }
327          }
328        }
329      }
330    }
331
332    private static void RunStandardGP(ISymbolicExpressionTreeProblem prob, int solverSeed, int maxIters, int popSize, double mutationRate, int maxSize) {
333      int iterations = 0;
334      var globalStatistics = new SentenceSetStatistics(prob.BestKnownQuality(maxSize));
335
336      var gp = new StandardGP(prob, new Random(solverSeed));
337      gp.SolutionEvaluated += (sentence, quality) => {
338        iterations++;
339        globalStatistics.AddSentence(sentence, quality);
340
341        if (iterations % 10000 == 0) {
342          Console.WriteLine("\"{0,25}\" \"{1,25}\" {2}", gp, prob, globalStatistics);
343        }
344      };
345
346      gp.PopulationSize = popSize;
347      gp.MutationRate = mutationRate;
348      gp.MaxSolutionSize = maxSize + 2;
349      gp.MaxSolutionDepth = maxSize + 2;
350
351      var sw = new Stopwatch();
352
353      sw.Start();
354      gp.Run(maxIters);
355      sw.Stop();
356
357      Console.WriteLine("\"{0,25}\" \"{1,25}\" {2}", gp, prob, globalStatistics);
358
359      // Console.WriteLine("{0:F2} sec {1,10:F1} sols/sec {2,10:F1} ns/sol",
360      //   sw.Elapsed.TotalSeconds,
361      //   maxIters / (double)sw.Elapsed.TotalSeconds,
362      //   (double)sw.ElapsedMilliseconds * 1000 / maxIters);
363    }
364
365    private static void RunOSGP(ISymbolicExpressionTreeProblem prob, int solverSeed, int maxIters, int popSize, double mutationRate, int maxSize) {
366      int iterations = 0;
367      var globalStatistics = new SentenceSetStatistics(prob.BestKnownQuality(maxSize));
368
369      var gp = new OffspringSelectionGP(prob, new Random(solverSeed));
370      gp.SolutionEvaluated += (sentence, quality) => {
371        iterations++;
372        globalStatistics.AddSentence(sentence, quality);
373
374        if (iterations % 10000 == 0) {
375          Console.WriteLine("\"{0,25}\" \"{1,25}\" {2}", gp, prob, globalStatistics);
376        }
377      };
378
379      gp.PopulationSize = popSize;
380      gp.MutationRate = mutationRate;
381      gp.MaxSolutionSize = maxSize + 2;
382      gp.MaxSolutionDepth = maxSize + 2;
383
384      var sw = new Stopwatch();
385
386      sw.Start();
387      gp.Run(maxIters);
388      sw.Stop();
389
390      Console.WriteLine("\"{0,25}\" \"{1,25}\" {2}", gp, prob, globalStatistics);
391
392      // Console.WriteLine("{0:F2} sec {1,10:F1} sols/sec {2,10:F1} ns/sol",
393      //   sw.Elapsed.TotalSeconds,
394      //   maxIters / (double)sw.Elapsed.TotalSeconds,
395      //   (double)sw.ElapsedMilliseconds * 1000 / maxIters);
396    }
397  }
398}
Note: See TracBrowser for help on using the repository browser.