Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
01/07/15 09:21:46 (8 years ago)
Author:
gkronber
Message:

#2283: refactoring and bug fixes

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Program.cs

    r11730 r11732  
    1111using HeuristicLab.Algorithms.GrammaticalOptimization;
    1212using HeuristicLab.Problems.GrammaticalOptimization;
     13using HeuristicLab.Problems.GrammaticalOptimization.SymbReg;
    1314
    1415namespace Main {
     
    2223
    2324    private static void RunGridTest() {
    24       int maxIterations = 100000; // for poly-10 with 50000 evaluations no successful try with hl yet
    25       // var globalRandom = new Random(31415);
     25      int maxIterations = 200000; // for poly-10 with 50000 evaluations no successful try with hl yet
     26      //var globalRandom = new Random(31415);
    2627      var localRandSeed = 31415;
    2728      var reps = 20;
    2829
    29       var policyFactories = new Func<Random, int, IPolicy>[]
     30      var policies = new Func<IPolicy>[]
    3031        {
    31           (rand, numActions) => new GaussianThompsonSamplingPolicy(rand, numActions),
    32           (rand, numActions) => new BernoulliThompsonSamplingPolicy(rand, numActions),
    33           (rand, numActions) => new RandomPolicy(rand, numActions),
    34           (rand, numActions) => new EpsGreedyPolicy(rand, numActions, 0.01),
    35           (rand, numActions) => new EpsGreedyPolicy(rand, numActions, 0.05),
    36           (rand, numActions) => new EpsGreedyPolicy(rand, numActions, 0.1),
    37           (rand, numActions) => new EpsGreedyPolicy(rand, numActions, 0.2),
    38           (rand, numActions) => new EpsGreedyPolicy(rand, numActions, 0.5),
    39           (rand, numActions) => new UCTPolicy(numActions, 0.1),
    40           (rand, numActions) => new UCTPolicy(numActions, 0.5),
    41           (rand, numActions) => new UCTPolicy(numActions, 1),
    42           (rand, numActions) => new UCTPolicy(numActions, 2),
    43           (rand, numActions) => new UCTPolicy(numActions, 5),
    44           (rand, numActions) => new UCTPolicy(numActions, 10),
    45           (rand, numActions) => new UCB1Policy(numActions),
    46           (rand, numActions) => new UCB1TunedPolicy(numActions),
    47           (rand, numActions) => new UCBNormalPolicy(numActions),
    48           (rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 0.1),
    49           (rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 0.5),
    50           (rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 1),
    51           (rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 5),
    52           (rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 10),
    53           (rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 20),
    54           (rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 100),
    55           (rand, numActions) => new ChernoffIntervalEstimationPolicy(numActions, 0.01),
    56           (rand, numActions) => new ChernoffIntervalEstimationPolicy(numActions, 0.05),
    57           (rand, numActions) => new ChernoffIntervalEstimationPolicy(numActions, 0.1),
    58           (rand, numActions) => new ChernoffIntervalEstimationPolicy(numActions, 0.2),
    59           (rand, numActions) => new ThresholdAscentPolicy(numActions, 10, 0.01),
    60           (rand, numActions) => new ThresholdAscentPolicy(numActions, 10, 0.05),
    61           (rand, numActions) => new ThresholdAscentPolicy(numActions, 10, 0.1),
    62           (rand, numActions) => new ThresholdAscentPolicy(numActions, 10, 0.2),
    63           (rand, numActions) => new ThresholdAscentPolicy(numActions, 100, 0.01),
    64           (rand, numActions) => new ThresholdAscentPolicy(numActions, 100, 0.05),
    65           (rand, numActions) => new ThresholdAscentPolicy(numActions, 100, 0.1),
    66           (rand, numActions) => new ThresholdAscentPolicy(numActions, 100, 0.2),
    67           (rand, numActions) => new ThresholdAscentPolicy(numActions, 1000, 0.01),
    68           (rand, numActions) => new ThresholdAscentPolicy(numActions, 1000, 0.05),
    69           (rand, numActions) => new ThresholdAscentPolicy(numActions, 1000, 0.1),
    70           (rand, numActions) => new ThresholdAscentPolicy(numActions, 1000, 0.2),
    71           (rand, numActions) => new ThresholdAscentPolicy(numActions, 5000, 0.01),
    72           (rand, numActions) => new ThresholdAscentPolicy(numActions, 10000, 0.01),
     32         () => new GaussianThompsonSamplingPolicy(),
     33         () => new GaussianThompsonSamplingPolicy(true),
     34         () => new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 1)),
     35         () => new BernoulliThompsonSamplingPolicy(),
     36         () => new GenericThompsonSamplingPolicy(new BernoulliModel(1, 1)),
     37         () => new RandomPolicy(),
     38         () => new EpsGreedyPolicy(0.01),
     39         () => new EpsGreedyPolicy(0.05),
     40         () => new EpsGreedyPolicy(0.1),
     41         () => new EpsGreedyPolicy(0.2),
     42         () => new EpsGreedyPolicy(0.5),
     43         () => new UCTPolicy(0.1),
     44         () => new UCTPolicy(0.5),
     45         () => new UCTPolicy(1),
     46         () => new UCTPolicy(2),
     47         () => new UCTPolicy( 5),
     48         () => new UCTPolicy( 10),
     49         () => new UCB1Policy(),
     50         () => new UCB1TunedPolicy(),
     51         () => new UCBNormalPolicy(),
     52         () => new BoltzmannExplorationPolicy(0.1),
     53         () => new BoltzmannExplorationPolicy(0.5),
     54         () => new BoltzmannExplorationPolicy(1),
     55         () => new BoltzmannExplorationPolicy(5),
     56         () => new BoltzmannExplorationPolicy(10),
     57         () => new BoltzmannExplorationPolicy(20),
     58         () => new BoltzmannExplorationPolicy(100),
     59         () => new ChernoffIntervalEstimationPolicy( 0.01),
     60         () => new ChernoffIntervalEstimationPolicy( 0.05),
     61         () => new ChernoffIntervalEstimationPolicy( 0.1),
     62         () => new ChernoffIntervalEstimationPolicy( 0.2),
     63         // (rand) => new ThresholdAscentPolicy(10, 0.01),
     64         // (rand) => new ThresholdAscentPolicy(10, 0.05),
     65         // (rand) => new ThresholdAscentPolicy(10, 0.1),
     66         // (rand) => new ThresholdAscentPolicy(10, 0.2),
     67         // (rand) => new ThresholdAscentPolicy(100, 0.01),
     68         // (rand) => new ThresholdAscentPolicy(100, 0.05),
     69         // (rand) => new ThresholdAscentPolicy(100, 0.1),
     70         // (rand) => new ThresholdAscentPolicy(100, 0.2),
     71         // (rand) => new ThresholdAscentPolicy(1000, 0.01),
     72         // (rand) => new ThresholdAscentPolicy(1000, 0.05),
     73         // (rand) => new ThresholdAscentPolicy(1000, 0.1),
     74         // (rand) => new ThresholdAscentPolicy(1000, 0.2),
     75         // (rand) => new ThresholdAscentPolicy(5000, 0.01),
     76         // (rand) => new ThresholdAscentPolicy(10000, 0.01),
    7377        };
    7478
    75       var tasks = new List<Task>();
    76       foreach (var randomTries in new int[] { 1, 10, /* 5, 100 /*, 500, 1000 */}) {
    77         foreach (var policyFactory in policyFactories) {
    78           var myPolicyFactory = policyFactory;
    79           var myRandomTries = randomTries;
    80           var localRand = new Random(localRandSeed);
    81           var options = new ParallelOptions();
    82           options.MaxDegreeOfParallelism = 1;
    83           Parallel.For(0, reps, options, (i) => {
    84             //var t = Task.Run(() => {
    85             Random myLocalRand;
    86             lock (localRand)
    87               myLocalRand = new Random(localRand.Next());
    88 
    89             //for (int i = 0; i < reps; i++) {
    90 
    91             int iterations = 0;
    92             var sw = new Stopwatch();
    93             var globalStatistics = new SentenceSetStatistics();
    94 
    95             var problem = new SymbolicRegressionPoly10Problem();
    96             //var problem = new SantaFeAntProblem();
    97             //var problem = new PalindromeProblem();
    98             //var problem = new HardPalindromeProblem();
    99             //var problem = new RoyalPairProblem();
    100             //var problem = new EvenParityProblem();
    101             var alg = new MctsSampler(problem, 25, myLocalRand, myRandomTries, myPolicyFactory);
    102             //var alg = new ExhaustiveBreadthFirstSearch(problem, 25);
    103             //var alg = new AlternativesContextSampler(problem, 25);
    104 
    105             alg.SolutionEvaluated += (sentence, quality) => {
    106               iterations++;
    107               globalStatistics.AddSentence(sentence, quality);
    108               if (iterations % 10000 == 0) {
    109                 Console.WriteLine("{0,4} {1,7} {2,5} {3,25} {4}", alg.treeDepth, alg.treeSize, myRandomTries, myPolicyFactory(myLocalRand, 1), globalStatistics);
    110               }
    111             };
    112 
    113             sw.Start();
    114 
    115             alg.Run(maxIterations);
    116 
    117             sw.Stop();
    118             //Console.WriteLine("{0,5} {1} {2}", randomTries, policyFactory(1), globalStatistics);
    119             //}
    120             //});
    121             //tasks.Add(t);
    122           });
     79      foreach (var problem in new Tuple<IProblem, int>[]
     80        {
     81          Tuple.Create((IProblem)new SantaFeAntProblem(), 17),
     82          Tuple.Create((IProblem)new SymbolicRegressionPoly10Problem(), 23),
     83        })
     84        foreach (var randomTries in new int[] { 1, 10, /* 5, 100 /*, 500, 1000 */}) {
     85          foreach (var policy in policies) {
     86            var myRandomTries = randomTries;
     87            var localRand = new Random(localRandSeed);
     88            var options = new ParallelOptions();
     89            options.MaxDegreeOfParallelism = 1;
     90            Parallel.For(0, reps, options, (i) => {
     91              //var t = Task.Run(() => {
     92              Random myLocalRand;
     93              lock (localRand)
     94                myLocalRand = new Random(localRand.Next());
     95
     96              //for (int i = 0; i < reps; i++) {
     97
     98              int iterations = 0;
     99              var globalStatistics = new SentenceSetStatistics();
     100
     101              // var problem = new SymbolicRegressionPoly10Problem();
     102              // var problem = new SantaFeAntProblem();
     103              //var problem = new PalindromeProblem();
     104              //var problem = new HardPalindromeProblem();
     105              //var problem = new RoyalPairProblem();
     106              //var problem = new EvenParityProblem();
     107              var alg = new MctsSampler(problem.Item1, problem.Item2, myLocalRand, myRandomTries, policy()); // TODO: Make sure we generate the same random numbers for each experiment
     108              //var alg = new ExhaustiveBreadthFirstSearch(problem, 25);
     109              //var alg = new AlternativesContextSampler(problem, 25);
     110
     111              alg.SolutionEvaluated += (sentence, quality) => {
     112                iterations++;
     113                globalStatistics.AddSentence(sentence, quality);
     114                if (iterations % 10000 == 0) {
     115                  Console.WriteLine("{0,4} {1,7} {2,5} {3,25} {4}", alg.treeDepth, alg.treeSize, myRandomTries, policy(), globalStatistics);
     116                }
     117              };
     118
     119
     120              alg.Run(maxIterations);
     121
     122              //Console.WriteLine("{0,5} {1} {2}", randomTries, policyFactory(1), globalStatistics);
     123              //}
     124              //});
     125              //tasks.Add(t);
     126            });
     127          }
    123128        }
    124       }
    125129      //Task.WaitAll(tasks.ToArray());
    126130    }
    127131
    128132    private static void RunDemo() {
     133      // TODO: test with eps-greedy using max instead of average as value (seems to work well for symb-reg! explore further!)
     134      // TODO: implement GaussianWithUnknownMeanAndVariance Model for Thompson Sampling (verify with unit test if correct mean and variance is identified)
     135      // TODO: separate value function from policy
     136      // TODO: debug and verify implementation variants of Gaussian Thompson Sampling with unit test
     137      // TODO: refactor Policies to use banditInfos (policies are factories for bandit infos and bandit info only has an update routine, each policy works only with it's type of banditinfo)
     138      // TODO: in contextual MCTS store a bandit info for each node in the _graph_ and also update all bandit infos of all parents
     139      // TODO: exhaustive search with priority list
    129140      // TODO: warum funktioniert die alte Implementierung von GaussianThompson besser für SantaFe als alte? Siehe Vergleich: alte vs. neue implementierung GaussianThompsonSampling
    130141      // TODO: why does GaussianThompsonSampling work so well with MCTS for the artificial ant problem?
     
    133144      // TODO: research thompson sampling for max bandit?
    134145      // TODO: ausführlicher test von strategien für k-armed max bandit
    135       // TODO: verify TA implementation using example from the original paper
    136       // TODO: reference HL.ProblemInstances and try on tower dataset
     146      // TODO: verify TA implementation using example from the original paper     
    137147      // TODO: compare results for different policies also for the symb-reg problem
    138148      // TODO: separate policy from MCTS tree data structure to allow sharing of information over disconnected parts of the tree (semantic equivalence)
     
    144154      // TODO: vergleich bei complete-randomly möglichst kurze sätze generieren vs. einfach zufällig alternativen wählen
    145155      // TODO: reward discounting (für veränderliche reward distributions über zeit). speziellen unit-test dafür erstellen
    146 
    147 
    148       int maxIterations = 10000000;
     156      // TODO: constant optimization
     157
     158
     159      int maxIterations = 100000;
    149160      int iterations = 0;
    150161      var sw = new Stopwatch();
     
    154165      var random = new Random();
    155166
    156       //var problem = new SymbolicRegressionPoly10Problem();
    157       var problem = new SantaFeAntProblem();
     167      var problem = new SymbolicRegressionPoly10Problem();
     168      //var problem = new SantaFeAntProblem(); // good results e.g. with       var alg = new MctsSampler(problem, 17, random, 1, (rand, numActions) => new ThresholdAscentPolicy(numActions, 500, 0.01));
     169      //var problem = new SymbolicRegressionProblem("Tower"); // very good results e.g. new EpsGreedyPolicy(0.2) using max reward as quality !!!
    158170      //var problem = new PalindromeProblem();
    159171      //var problem = new HardPalindromeProblem();
    160172      //var problem = new RoyalPairProblem();
    161173      //var problem = new EvenParityProblem();
    162       //var alg = new MctsSampler(problem, 17, random, 1, (rand, numActions) => new GenericThompsonSamplingPolicy(rand, numActions, new GaussianModel(numActions, 0.5, 10)));
     174      var alg = new MctsSampler(problem, 23, random, 10, new EpsGreedyPolicy(0.2)); // GaussianModelWithUnknownVariance (and Q= 0.99-quantil) works well for Ant
    163175      //var alg = new ExhaustiveBreadthFirstSearch(problem, 17);
    164176      //var alg = new AlternativesContextSampler(problem, random, 17, 4, (rand, numActions) => new RandomPolicy(rand, numActions));
    165177      //var alg = new ExhaustiveDepthFirstSearch(problem, 17);
    166178      // var alg = new AlternativesSampler(problem, 17);
    167       var alg = new RandomSearch(problem, random, 17);
     179      // var alg = new RandomSearch(problem, random, 17);
     180      // var alg = new ExhaustiveRandomFirstSearch(problem, random, 17);
    168181
    169182      alg.FoundNewBestSolution += (sentence, quality) => {
    170183        bestQuality = quality;
    171184        bestSentence = sentence;
    172         Console.WriteLine("{0,10} {1,10:F5} {2,10:F5} {3}", iterations, bestQuality, quality, sentence);
     185        Console.WriteLine("{0,4} {1,7} {2}", alg.treeDepth, alg.treeSize, globalStatistics);
    173186      };
    174187      alg.SolutionEvaluated += (sentence, quality) => {
     
    176189        globalStatistics.AddSentence(sentence, quality);
    177190        if (iterations % 1000 == 0) {
    178           //alg.PrintStats();
     191          alg.PrintStats();
    179192        }
    180193        if (iterations % 10000 == 0) {
    181194          //Console.WriteLine("{0,10} {1,10:F5} {2,10:F5} {3}", iterations, bestQuality, quality, sentence);
    182195          //Console.WriteLine("{0,4} {1,7} {2}", alg.treeDepth, alg.treeSize, globalStatistics);
    183           Console.WriteLine(globalStatistics);
     196          Console.WriteLine("{0,4} {1,7} {2}", alg.treeDepth, alg.treeSize, globalStatistics);
    184197        }
    185198      };
Note: See TracChangeset for help on using the changeset viewer.