Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
01/02/15 16:08:21 (10 years ago)
Author:
gkronber
Message:

#2283: several major extensions for grammatical optimization

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Program.cs

    r11727 r11730  
    33using System.Data;
    44using System.Diagnostics;
     5using System.Globalization;
    56using System.Linq;
    67using System.Text;
    78using System.Threading.Tasks;
    89using HeuristicLab.Algorithms.Bandits;
     10using HeuristicLab.Algorithms.Bandits.Models;
    911using HeuristicLab.Algorithms.GrammaticalOptimization;
    1012using HeuristicLab.Problems.GrammaticalOptimization;
     
    1315  class Program {
    1416    static void Main(string[] args) {
    15       // RunDemo();
    16       RunGridTest();
     17      CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
     18
     19      RunDemo();
     20      //RunGridTest();
    1721    }
    1822
    1923    private static void RunGridTest() {
    20       int maxIterations = 150000;
    21       var globalRandom = new Random(31415);
    22       var reps = 10;
    23       Parallel.ForEach(new int[] { 1, 5, 10, 100, 500, 1000 }, (randomTries) => {
    24         Random localRand;
    25         lock (globalRandom) {
    26           localRand = new Random(globalRandom.Next());
    27         }
    28         var policyFactories = new Func<int, IPolicy>[]
     24      int maxIterations = 100000; // for poly-10 with 50000 evaluations no successful try with hl yet
     25      // var globalRandom = new Random(31415);
     26      var localRandSeed = 31415;
     27      var reps = 20;
     28
     29      var policyFactories = new Func<Random, int, IPolicy>[]
    2930        {
    30           (numActions) => new RandomPolicy(localRand, numActions),
    31           (numActions) => new UCB1Policy(numActions),
    32           (numActions) => new UCB1TunedPolicy(numActions),
    33           (numActions) => new UCBNormalPolicy(numActions),
    34           (numActions) => new EpsGreedyPolicy(localRand, numActions, 0.01),
    35           (numActions) => new EpsGreedyPolicy(localRand, numActions, 0.05),
    36           (numActions) => new EpsGreedyPolicy(localRand, numActions, 0.1),
    37           (numActions) => new EpsGreedyPolicy(localRand, numActions, 0.2),
    38           (numActions) => new EpsGreedyPolicy(localRand, numActions, 0.5),
    39           (numActions) => new GaussianThompsonSamplingPolicy(localRand, numActions),
    40           (numActions) => new BernoulliThompsonSamplingPolicy(localRand, numActions)
     31          (rand, numActions) => new GaussianThompsonSamplingPolicy(rand, numActions),
     32          (rand, numActions) => new BernoulliThompsonSamplingPolicy(rand, numActions),
     33          (rand, numActions) => new RandomPolicy(rand, numActions),
     34          (rand, numActions) => new EpsGreedyPolicy(rand, numActions, 0.01),
     35          (rand, numActions) => new EpsGreedyPolicy(rand, numActions, 0.05),
     36          (rand, numActions) => new EpsGreedyPolicy(rand, numActions, 0.1),
     37          (rand, numActions) => new EpsGreedyPolicy(rand, numActions, 0.2),
     38          (rand, numActions) => new EpsGreedyPolicy(rand, numActions, 0.5),
     39          (rand, numActions) => new UCTPolicy(numActions, 0.1),
     40          (rand, numActions) => new UCTPolicy(numActions, 0.5),
     41          (rand, numActions) => new UCTPolicy(numActions, 1),
     42          (rand, numActions) => new UCTPolicy(numActions, 2),
     43          (rand, numActions) => new UCTPolicy(numActions, 5),
     44          (rand, numActions) => new UCTPolicy(numActions, 10),
     45          (rand, numActions) => new UCB1Policy(numActions),
     46          (rand, numActions) => new UCB1TunedPolicy(numActions),
     47          (rand, numActions) => new UCBNormalPolicy(numActions),
     48          (rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 0.1),
     49          (rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 0.5),
     50          (rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 1),
     51          (rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 5),
     52          (rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 10),
     53          (rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 20),
     54          (rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 100),
     55          (rand, numActions) => new ChernoffIntervalEstimationPolicy(numActions, 0.01),
     56          (rand, numActions) => new ChernoffIntervalEstimationPolicy(numActions, 0.05),
     57          (rand, numActions) => new ChernoffIntervalEstimationPolicy(numActions, 0.1),
     58          (rand, numActions) => new ChernoffIntervalEstimationPolicy(numActions, 0.2),
     59          (rand, numActions) => new ThresholdAscentPolicy(numActions, 10, 0.01),
     60          (rand, numActions) => new ThresholdAscentPolicy(numActions, 10, 0.05),
     61          (rand, numActions) => new ThresholdAscentPolicy(numActions, 10, 0.1),
     62          (rand, numActions) => new ThresholdAscentPolicy(numActions, 10, 0.2),
     63          (rand, numActions) => new ThresholdAscentPolicy(numActions, 100, 0.01),
     64          (rand, numActions) => new ThresholdAscentPolicy(numActions, 100, 0.05),
     65          (rand, numActions) => new ThresholdAscentPolicy(numActions, 100, 0.1),
     66          (rand, numActions) => new ThresholdAscentPolicy(numActions, 100, 0.2),
     67          (rand, numActions) => new ThresholdAscentPolicy(numActions, 1000, 0.01),
     68          (rand, numActions) => new ThresholdAscentPolicy(numActions, 1000, 0.05),
     69          (rand, numActions) => new ThresholdAscentPolicy(numActions, 1000, 0.1),
     70          (rand, numActions) => new ThresholdAscentPolicy(numActions, 1000, 0.2),
     71          (rand, numActions) => new ThresholdAscentPolicy(numActions, 5000, 0.01),
     72          (rand, numActions) => new ThresholdAscentPolicy(numActions, 10000, 0.01),
    4173        };
    4274
    43         foreach (var policyFactory in policyFactories)
    44           for (int i = 0; i < reps; i++) {
     75      var tasks = new List<Task>();
     76      foreach (var randomTries in new int[] { 1, 10, /* 5, 100 /*, 500, 1000 */}) {
     77        foreach (var policyFactory in policyFactories) {
     78          var myPolicyFactory = policyFactory;
     79          var myRandomTries = randomTries;
     80          var localRand = new Random(localRandSeed);
     81          var options = new ParallelOptions();
     82          options.MaxDegreeOfParallelism = 1;
     83          Parallel.For(0, reps, options, (i) => {
     84            //var t = Task.Run(() => {
     85            Random myLocalRand;
     86            lock (localRand)
     87              myLocalRand = new Random(localRand.Next());
     88
     89            //for (int i = 0; i < reps; i++) {
     90
    4591            int iterations = 0;
    4692            var sw = new Stopwatch();
    4793            var globalStatistics = new SentenceSetStatistics();
    4894
    49             // var problem = new SymbolicRegressionPoly10Problem();
    50             var problem = new SantaFeAntProblem();
     95            var problem = new SymbolicRegressionPoly10Problem();
     96            //var problem = new SantaFeAntProblem();
    5197            //var problem = new PalindromeProblem();
    5298            //var problem = new HardPalindromeProblem();
    5399            //var problem = new RoyalPairProblem();
    54100            //var problem = new EvenParityProblem();
    55             var alg = new MctsSampler(problem, 17, localRand, randomTries, policyFactory);
     101            var alg = new MctsSampler(problem, 25, myLocalRand, myRandomTries, myPolicyFactory);
    56102            //var alg = new ExhaustiveBreadthFirstSearch(problem, 25);
    57103            //var alg = new AlternativesContextSampler(problem, 25);
     
    61107              globalStatistics.AddSentence(sentence, quality);
    62108              if (iterations % 10000 == 0) {
    63                 Console.WriteLine("{0} {1} {2}", randomTries, policyFactory(1), globalStatistics);
     109                Console.WriteLine("{0,4} {1,7} {2,5} {3,25} {4}", alg.treeDepth, alg.treeSize, myRandomTries, myPolicyFactory(myLocalRand, 1), globalStatistics);
    64110              }
    65111            };
     
    70116
    71117            sw.Stop();
    72           }
    73       });
     118            //Console.WriteLine("{0,5} {1} {2}", randomTries, policyFactory(1), globalStatistics);
     119            //}
     120            //});
     121            //tasks.Add(t);
     122          });
     123        }
     124      }
     125      //Task.WaitAll(tasks.ToArray());
    74126    }
    75127
    76128    private static void RunDemo() {
    77       // TODO: implement threshold ascent
    78       // TODO: implement inspection for MCTS
     129      // TODO: warum funktioniert die alte Implementierung von GaussianThompson besser für SantaFe als alte? Siehe Vergleich: alte vs. neue implementierung GaussianThompsonSampling
     130      // TODO: why does GaussianThompsonSampling work so well with MCTS for the artificial ant problem?
     131      // TODO: wie kann ich sampler noch vergleichen bzw. was kann man messen um die qualität des samplers abzuschätzen (bis auf qualität und iterationen bis zur besten lösung) => ziel schnellere iterationen zu gutem ergebnis
     132      // TODO: likelihood für R=1 bei Gaussian oder GaussianMixture einfach berechenbar?
     133      // TODO: research thompson sampling for max bandit?
     134      // TODO: ausführlicher test von strategien für k-armed max bandit
     135      // TODO: verify TA implementation using example from the original paper
     136      // TODO: reference HL.ProblemInstances and try on tower dataset
     137      // TODO: compare results for different policies also for the symb-reg problem
     138      // TODO: separate policy from MCTS tree data structure to allow sharing of information over disconnected parts of the tree (semantic equivalence)
     139      // TODO: implement thompson sampling for gaussian mixture models
     140      // TODO: implement inspection for MCTS (eventuell interactive command line für statistiken aus dem baum anzeigen)
     141      // TODO: implement ACO-style bandit policy
     142      // TODO: implement sequences that can be manipulated in-place (instead of strings), alternatives are also stored as sequences, for a sequence the index of the first NT-symb can be stored
     143      // TODO: gleichzeitige modellierung von transformierter zielvariable (y, 1/y, log(y), exp(y), sqrt(y), ...)
     144      // TODO: vergleich bei complete-randomly möglichst kurze sätze generieren vs. einfach zufällig alternativen wählen
     145      // TODO: reward discounting (für veränderliche reward distributions über zeit). speziellen unit-test dafür erstellen
     146
    79147
    80148      int maxIterations = 10000000;
     
    84152      string bestSentence = "";
    85153      var globalStatistics = new SentenceSetStatistics();
    86       var random = new Random(31415);
    87 
    88       // var problem = new SymbolicRegressionPoly10Problem();
     154      var random = new Random();
     155
     156      //var problem = new SymbolicRegressionPoly10Problem();
    89157      var problem = new SantaFeAntProblem();
    90158      //var problem = new PalindromeProblem();
     
    92160      //var problem = new RoyalPairProblem();
    93161      //var problem = new EvenParityProblem();
    94       var alg = new MctsSampler(problem, 17, random);
    95       //var alg = new ExhaustiveBreadthFirstSearch(problem, 25);
    96       //var alg = new AlternativesContextSampler(problem, 25);
     162      //var alg = new MctsSampler(problem, 17, random, 1, (rand, numActions) => new GenericThompsonSamplingPolicy(rand, numActions, new GaussianModel(numActions, 0.5, 10)));
     163      //var alg = new ExhaustiveBreadthFirstSearch(problem, 17);
     164      //var alg = new AlternativesContextSampler(problem, random, 17, 4, (rand, numActions) => new RandomPolicy(rand, numActions));
     165      //var alg = new ExhaustiveDepthFirstSearch(problem, 17);
     166      // var alg = new AlternativesSampler(problem, 17);
     167      var alg = new RandomSearch(problem, random, 17);
    97168
    98169      alg.FoundNewBestSolution += (sentence, quality) => {
     
    104175        iterations++;
    105176        globalStatistics.AddSentence(sentence, quality);
     177        if (iterations % 1000 == 0) {
     178          //alg.PrintStats();
     179        }
    106180        if (iterations % 10000 == 0) {
    107181          //Console.WriteLine("{0,10} {1,10:F5} {2,10:F5} {3}", iterations, bestQuality, quality, sentence);
    108           Console.WriteLine(globalStatistics.ToString());
     182          //Console.WriteLine("{0,4} {1,7} {2}", alg.treeDepth, alg.treeSize, globalStatistics);
     183          Console.WriteLine(globalStatistics);
    109184        }
    110185      };
Note: See TracChangeset for help on using the changeset viewer.