Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
01/09/15 14:57:28 (9 years ago)
Author:
gkronber
Message:

#2283 refactoring

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Program.cs

    r11732 r11742  
    88using System.Threading.Tasks;
    99using HeuristicLab.Algorithms.Bandits;
     10using HeuristicLab.Algorithms.Bandits.BanditPolicies;
    1011using HeuristicLab.Algorithms.Bandits.Models;
    1112using HeuristicLab.Algorithms.GrammaticalOptimization;
     
    2627      //var globalRandom = new Random(31415);
    2728      var localRandSeed = 31415;
    28       var reps = 20;
    29 
    30       var policies = new Func<IPolicy>[]
     29      var reps = 8;
     30
     31      var policies = new Func<IBanditPolicy>[]
    3132        {
    32          () => new GaussianThompsonSamplingPolicy(),
     33         () => new EpsGreedyPolicy(0.01, (aInfo)=> aInfo.MaxReward, "max"),
     34         () => new EpsGreedyPolicy(0.05, (aInfo)=> aInfo.MaxReward, "max"),
     35         () => new EpsGreedyPolicy(0.1, (aInfo)=> aInfo.MaxReward, "max"),
     36         () => new EpsGreedyPolicy(0.2, (aInfo)=> aInfo.MaxReward, "max"),
     37         //() => new GaussianThompsonSamplingPolicy(),
    3338         () => new GaussianThompsonSamplingPolicy(true),
    34          () => new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 1)),
    35          () => new BernoulliThompsonSamplingPolicy(),
     39         () => new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 10, 1)),
     40         () => new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 10, 1, 1)),
     41         //() => new BernoulliThompsonSamplingPolicy(),
    3642         () => new GenericThompsonSamplingPolicy(new BernoulliModel(1, 1)),
    3743         () => new RandomPolicy(),
     
    6167         () => new ChernoffIntervalEstimationPolicy( 0.1),
    6268         () => new ChernoffIntervalEstimationPolicy( 0.2),
    63          // (rand) => new ThresholdAscentPolicy(10, 0.01),
    64          // (rand) => new ThresholdAscentPolicy(10, 0.05),
    65          // (rand) => new ThresholdAscentPolicy(10, 0.1),
    66          // (rand) => new ThresholdAscentPolicy(10, 0.2),
    67          // (rand) => new ThresholdAscentPolicy(100, 0.01),
    68          // (rand) => new ThresholdAscentPolicy(100, 0.05),
    69          // (rand) => new ThresholdAscentPolicy(100, 0.1),
    70          // (rand) => new ThresholdAscentPolicy(100, 0.2),
    71          // (rand) => new ThresholdAscentPolicy(1000, 0.01),
    72          // (rand) => new ThresholdAscentPolicy(1000, 0.05),
    73          // (rand) => new ThresholdAscentPolicy(1000, 0.1),
    74          // (rand) => new ThresholdAscentPolicy(1000, 0.2),
    75          // (rand) => new ThresholdAscentPolicy(5000, 0.01),
    76          // (rand) => new ThresholdAscentPolicy(10000, 0.01),
     69         () => new ThresholdAscentPolicy(10, 0.01),
     70         () => new ThresholdAscentPolicy(10, 0.05),
     71         () => new ThresholdAscentPolicy(10, 0.1),
     72         () => new ThresholdAscentPolicy(10, 0.2),
     73         () => new ThresholdAscentPolicy(100, 0.01),
     74         () => new ThresholdAscentPolicy(100, 0.05),
     75         () => new ThresholdAscentPolicy(100, 0.1),
     76         () => new ThresholdAscentPolicy(100, 0.2),
     77         () => new ThresholdAscentPolicy(1000, 0.01),
     78         () => new ThresholdAscentPolicy(1000, 0.05),
     79         () => new ThresholdAscentPolicy(1000, 0.1),
     80         () => new ThresholdAscentPolicy(1000, 0.2),
     81         () => new ThresholdAscentPolicy(5000, 0.01),
     82         () => new ThresholdAscentPolicy(10000, 0.01),
    7783        };
    7884
    7985      foreach (var problem in new Tuple<IProblem, int>[]
    8086        {
    81           Tuple.Create((IProblem)new SantaFeAntProblem(), 17),
     87          //Tuple.Create((IProblem)new SantaFeAntProblem(), 17),
    8288          Tuple.Create((IProblem)new SymbolicRegressionPoly10Problem(), 23),
    8389        })
     
    8793            var localRand = new Random(localRandSeed);
    8894            var options = new ParallelOptions();
    89             options.MaxDegreeOfParallelism = 1;
     95            options.MaxDegreeOfParallelism = 4;
    9096            Parallel.For(0, reps, options, (i) => {
    9197              //var t = Task.Run(() => {
     
    132138    private static void RunDemo() {
    133139      // TODO: test with eps-greedy using max instead of average as value (seems to work well for symb-reg! explore further!)
    134       // TODO: implement GaussianWithUnknownMeanAndVariance Model for Thompson Sampling (verify with unit test if correct mean and variance is identified)
    135140      // TODO: separate value function from policy
    136       // TODO: debug and verify implementation variants of Gaussian Thompson Sampling with unit test
    137       // TODO: refactor Policies to use banditInfos (policies are factories for bandit infos and bandit info only has an update routine, each policy works only with it's type of banditinfo)
    138141      // TODO: in contextual MCTS store a bandit info for each node in the _graph_ and also update all bandit infos of all parents
    139142      // TODO: exhaustive search with priority list
    140       // TODO: warum funktioniert die alte Implementierung von GaussianThompson besser für SantaFe als alte? Siehe Vergleich: alte vs. neue implementierung GaussianThompsonSampling
     143      // TODO: warum funktioniert die alte Implementierung von GaussianThompson besser für SantaFe als neue? Siehe Vergleich: alte vs. neue implementierung GaussianThompsonSampling
    141144      // TODO: why does GaussianThompsonSampling work so well with MCTS for the artificial ant problem?
    142145      // TODO: wie kann ich sampler noch vergleichen bzw. was kann man messen um die qualität des samplers abzuschätzen (bis auf qualität und iterationen bis zur besten lösung) => ziel schnellere iterationen zu gutem ergebnis
    143       // TODO: likelihood für R=1 bei Gaussian oder GaussianMixture einfach berechenbar?
    144146      // TODO: research thompson sampling for max bandit?
    145147      // TODO: ausführlicher test von strategien für k-armed max bandit
    146148      // TODO: verify TA implementation using example from the original paper     
    147       // TODO: compare results for different policies also for the symb-reg problem
    148149      // TODO: separate policy from MCTS tree data structure to allow sharing of information over disconnected parts of the tree (semantic equivalence)
    149150      // TODO: implement thompson sampling for gaussian mixture models
    150151      // TODO: implement inspection for MCTS (eventuell interactive command line für statistiken aus dem baum anzeigen)
    151152      // TODO: implement ACO-style bandit policy
    152       // TODO: implement sequences that can be manipulated in-place (instead of strings), alternatives are also stored as sequences, for a sequence the index of the first NT-symb can be stored
    153153      // TODO: gleichzeitige modellierung von transformierter zielvariable (y, 1/y, log(y), exp(y), sqrt(y), ...)
    154154      // TODO: vergleich bei complete-randomly möglichst kurze sätze generieren vs. einfach zufällig alternativen wählen
     
    165165      var random = new Random();
    166166
    167       var problem = new SymbolicRegressionPoly10Problem();
    168       //var problem = new SantaFeAntProblem(); // good results e.g. with       var alg = new MctsSampler(problem, 17, random, 1, (rand, numActions) => new ThresholdAscentPolicy(numActions, 500, 0.01));
    169       //var problem = new SymbolicRegressionProblem("Tower"); // very good results e.g. new EpsGreedyPolicy(0.2) using max reward as quality !!!
     167      var problem = new SymbolicRegressionPoly10Problem();   // good results e.g. 10 randomtries and EpsGreedyPolicy(0.2, (aInfo)=>aInfo.MaxReward)
     168      // Ant
     169      // good results e.g. with       var alg = new MctsSampler(problem, 17, random, 1, (rand, numActions) => new ThresholdAscentPolicy(numActions, 500, 0.01));
     170      // GaussianModelWithUnknownVariance (and Q= 0.99-quantil) also works well for Ant
     171      //var problem = new SantaFeAntProblem();
     172      //var problem = new SymbolicRegressionProblem("Tower");
    170173      //var problem = new PalindromeProblem();
    171174      //var problem = new HardPalindromeProblem();
    172175      //var problem = new RoyalPairProblem();
    173176      //var problem = new EvenParityProblem();
    174       var alg = new MctsSampler(problem, 23, random, 10, new EpsGreedyPolicy(0.2)); // GaussianModelWithUnknownVariance (and Q= 0.99-quantil) works well for Ant
     177      var alg = new MctsSampler(problem, 25, random, 0, new GaussianThompsonSamplingPolicy(true));
    175178      //var alg = new ExhaustiveBreadthFirstSearch(problem, 17);
    176179      //var alg = new AlternativesContextSampler(problem, random, 17, 4, (rand, numActions) => new RandomPolicy(rand, numActions));
Note: See TracChangeset for help on using the changeset viewer.