Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
01/20/15 20:25:00 (10 years ago)
Author:
gkronber
Message:

#2283: separated value-states from done-states in GenericGrammarPolicy and removed disabling of actions from bandit policies

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Program.cs

    r11801 r11806  
    2424      CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
    2525
    26       //RunDemo();
    27       RunGridTest();
     26      RunDemo();
     27      //RunGridTest();
    2828    }
    2929
     
    6161         () => new UCTPolicy( 5),
    6262         () => new UCTPolicy( 10),
     63         () => new ModifiedUCTPolicy(0.01),
     64         () => new ModifiedUCTPolicy(0.05),
     65         () => new ModifiedUCTPolicy(0.1),
     66         () => new ModifiedUCTPolicy(0.5),
     67         () => new ModifiedUCTPolicy(1),
     68         () => new ModifiedUCTPolicy(2),
     69         () => new ModifiedUCTPolicy( 5),
     70         () => new ModifiedUCTPolicy( 10),
    6371         () => new UCB1Policy(),
    6472         () => new UCB1TunedPolicy(),
     
    161169
    162170    private static void RunDemo() {
    163       // TODO: move problem instances into a separate folder
    164171      // TODO: implement bridge to HL-GP
    165172      // TODO: unify MCTS, TD and ContextMCTS Solvers (stateInfos)
     
    170177      // TODO: warum funktioniert die alte Implementierung von GaussianThompson besser fÃŒr SantaFe als neue? Siehe Vergleich: alte vs. neue implementierung GaussianThompsonSampling
    171178      // TODO: why does GaussianThompsonSampling work so well with MCTS for the artificial ant problem?
    172       // TODO: wie kann ich sampler noch vergleichen bzw. was kann man messen um die qualitÀt des samplers abzuschÀtzen (bis auf qualitÀt und iterationen bis zur besten lösung) => ziel schnellere iterationen zu gutem ergebnis
    173179      // TODO: research thompson sampling for max bandit?
    174180      // TODO: ausfÃŒhrlicher test von strategien fÃŒr numCorrectPhrases-armed max bandit
     
    192198
    193199
    194       var problem = new RoyalSequenceProblem(random, 10, 30, 2, 1, 0);
     200      //var problem = new RoyalSequenceProblem(random, 10, 30, 2, 1, 0);
    195201      //var phraseLen = 3;
    196202      //var numPhrases = 5;
     
    218224      //var problem = new SymbolicRegressionPoly10Problem();
    219225
    220       //var problem = new SantaFeAntProblem();
     226      var problem = new SantaFeAntProblem();
    221227      //var problem = new SymbolicRegressionProblem("Tower");
    222228      //var problem = new PalindromeProblem();
     
    227233      //var alg = new MctsSampler(problem, 23, random, 0, new BoltzmannExplorationPolicy(100));
    228234      //var alg = new MctsSampler(problem, 23, random, 0, new EpsGreedyPolicy(0.1));
    229       var alg = new SequentialSearch(problem, 30, random, 0,
    230         new HeuristicLab.Algorithms.Bandits.GrammarPolicies.GenericGrammarPolicy(problem, new EpsGreedyPolicy(0.1), true));
     235      //var alg = new SequentialSearch(problem, 23, random, 0,
     236      //  new HeuristicLab.Algorithms.Bandits.GrammarPolicies.GenericGrammarPolicy(problem, new ModifiedUCTPolicy(0.1), true));
     237      var alg = new SequentialSearch(problem, 17, random, 0,
     238        new HeuristicLab.Algorithms.Bandits.GrammarPolicies.GenericTDPolicy(problem, true));
    231239      //var alg = new MctsQLearningSampler(problem, sentenceLen, random, 0, null);
    232240      //var alg = new MctsQLearningSampler(problem, 30, random, 0, new EpsGreedyPolicy(0.2));
     
    248256        globalStatistics.AddSentence(sentence, quality);
    249257        if (iterations % 1000 == 0) {
    250           if (iterations % 1000 == 0) Console.Clear();
     258          if (iterations % 10000 == 0) Console.Clear();
    251259          Console.SetCursorPosition(0, 0);
    252260          alg.PrintStats();
Note: See TracChangeset for help on using the changeset viewer.