Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
01/27/15 16:34:34 (8 years ago)
Author:
gkronber
Message:

linear value function approximation and good results for poly-10 benchmark

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Program.cs

    r11806 r11832  
    2424      CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
    2525
    26       RunDemo();
    27       //RunGridTest();
     26      //RunDemo();
     27      RunGridTest();
    2828    }
    2929
    3030    private static void RunGridTest() {
    31       int maxIterations = 50000; // for poly-10 with 50000 evaluations no successful try with hl yet
     31      int maxIterations = 70000; // for poly-10 with 50000 evaluations no successful try with hl yet
    3232      //var globalRandom = new Random(31415);
    3333      var localRandSeed = 31415;
    34       var reps = 10;
     34      var reps = 30;
    3535
    3636      var policyFactories = new Func<IBanditPolicy>[]
     
    109109      {
    110110        //(rand) => Tuple.Create((IProblem)new SantaFeAntProblem(), 17),
    111         (rand) => Tuple.Create((IProblem)new FindPhrasesProblem(rand, 10, numPhrases:5, phraseLen:3, numOptimalPhrases:5, numDecoyPhrases:0, correctReward:1, decoyReward:0, phrasesAsSets:false ), 15),
    112         (rand) => Tuple.Create((IProblem)new FindPhrasesProblem(rand, 10, numPhrases:5, phraseLen:3, numOptimalPhrases:5, numDecoyPhrases:0, correctReward:1, decoyReward:0, phrasesAsSets:true ), 15),
    113         (rand) => Tuple.Create((IProblem)new FindPhrasesProblem(rand, 10, numPhrases:5, phraseLen:3, numOptimalPhrases:5, numDecoyPhrases:200, correctReward:1, decoyReward:0.5, phrasesAsSets:false), 15),
    114         (rand) => Tuple.Create((IProblem)new FindPhrasesProblem(rand, 10, numPhrases:5, phraseLen:3, numOptimalPhrases:5, numDecoyPhrases:200, correctReward:1, decoyReward:0.5, phrasesAsSets:true), 15),
    115         //(rand) => Tuple.Create((IProblem)new SymbolicRegressionPoly10Problem(), 23)
     111        //(rand) => Tuple.Create((IProblem)new FindPhrasesProblem(rand, 10, numPhrases:5, phraseLen:3, numOptimalPhrases:5, numDecoyPhrases:0, correctReward:1, decoyReward:0, phrasesAsSets:false ), 15),
     112        //(rand) => Tuple.Create((IProblem)new FindPhrasesProblem(rand, 10, numPhrases:5, phraseLen:3, numOptimalPhrases:5, numDecoyPhrases:0, correctReward:1, decoyReward:0, phrasesAsSets:true ), 15),
     113        //(rand) => Tuple.Create((IProblem)new FindPhrasesProblem(rand, 10, numPhrases:5, phraseLen:3, numOptimalPhrases:5, numDecoyPhrases:200, correctReward:1, decoyReward:0.5, phrasesAsSets:false), 15),
     114        //(rand) => Tuple.Create((IProblem)new FindPhrasesProblem(rand, 10, numPhrases:5, phraseLen:3, numOptimalPhrases:5, numDecoyPhrases:200, correctReward:1, decoyReward:0.5, phrasesAsSets:true), 15),
     115        (rand) => Tuple.Create((IProblem)new SymbolicRegressionPoly10Problem(), 23)
    116116      };
    117117
    118118      foreach (var instanceFactory in instanceFactories) {
    119         foreach (var useCanonical in new bool[] { true /*, false */ }) {
    120           foreach (var randomTries in new int[] { 0, /* 1, 10, /* 5, 100 /*, 500, 1000 */}) {
     119        foreach (var useCanonical in new bool[] { true /*, false */}) {
     120          foreach (var randomTries in new int[] { 0 /*, 1, 10 /*, /* 5, 100 /*, 500, 1000 */}) {
    121121            foreach (var policyFactory in policyFactories) {
    122122              var myRandomTries = randomTries;
     
    142142                var problem = instance.Item1;
    143143                var maxLen = instance.Item2;
    144                 var alg = new SequentialSearch(problem, maxLen, myLocalRand, myRandomTries,
    145                   new GenericGrammarPolicy(problem, policyFactory(), useCanonical));
     144                //var alg = new SequentialSearch(problem, maxLen, myLocalRand, myRandomTries,
     145                //  new GenericGrammarPolicy(problem, policyFactory(), useCanonical));
     146                var alg = new SequentialSearch(problem, maxLen, myLocalRand,
     147                  myRandomTries,
     148                  new GenericFunctionApproximationGrammarPolicy(problem,
     149                    useCanonical));
    146150                //var alg = new ExhaustiveBreadthFirstSearch(problem, 25);
    147151                //var alg = new AlternativesContextSampler(problem, 25);
     
    150154                  iterations++;
    151155                  globalStatistics.AddSentence(sentence, quality);
    152                   if (iterations % 10000 == 0) {
    153                     Console.WriteLine("{0,3} {1,5} \"{2,25}\" {3} {4}", i, myRandomTries, policyFactory(), useCanonical, globalStatistics);
     156                  if (iterations % 1000 == 0) {
     157                    Console.WriteLine("{0,3} {1,5} \"{2,25}\" {3} {4} {5}", i, myRandomTries, policyFactory(), useCanonical, problem.ToString(), globalStatistics);
    154158                  }
    155159                };
     
    190194
    191195
    192       int maxIterations = 100000;
     196      int maxIterations = 1000000;
    193197      int iterations = 0;
    194198      var sw = new Stopwatch();
     
    199203
    200204      //var problem = new RoyalSequenceProblem(random, 10, 30, 2, 1, 0);
     205      // var phraseLen = 3;
     206      // var numPhrases = 5;
     207      // var problem = new RoyalPhraseSequenceProblem(random, 10, numPhrases, phraseLen: phraseLen, numCorrectPhrases: 1, correctReward: 1, incorrectReward: 0.0, phrasesAsSets: false);
     208
    201209      //var phraseLen = 3;
    202210      //var numPhrases = 5;
    203       //var problem = new RoyalPhraseSequenceProblem(random, 15, numPhrases, phraseLen: phraseLen, numCorrectPhrases: 1, correctReward: 1, incorrectReward: 0.0, phrasesAsSets: true);
    204 
    205       // var phraseLen = 3;
    206       // var numPhrases = 5;
    207       // var problem = new FindPhrasesProblem(random, 10, numPhrases, phraseLen, numOptimalPhrases: numPhrases, numDecoyPhrases: 200, correctReward: 1.0, decoyReward: 0.5, phrasesAsSets: true);
     211      //var problem = new FindPhrasesProblem(random, 10, numPhrases, phraseLen, numOptimalPhrases: numPhrases, numDecoyPhrases: 0, correctReward: 1.0, decoyReward: 0, phrasesAsSets: false);
    208212
    209213      // good results for symb-reg
     
    213217      // - GenericThompsonSamplingPolicy("")
    214218      // - UCTPolicy(0.10) (5 of 5 runs, 35000 iters avg.), 10 successful runs of 10 with rand-tries 0, bei 40000 iters 9 / 10, bei 30000 1 / 10
     219      // 2015 01 22: symb-reg: grid test on find-phrases problem showed good results for UCB1TunedPolicy and SequentialSearch with canonical states
     220      // - symb-reg: consistent results with UCB1Tuned. finds optimal solution in ~50k iters (new GenericGrammarPolicy(problem, new UCB1TunedPolicy(), true));
     221      // 2015 01 23: grid test with canonical states:
     222      // - UCTPolicy(0.10) und UCBNormalPolicy 10/10 optimale Lösungen bei max. 50k iters, etwas schlechter: generic-thompson with variable sigma und bolzmannexploration (100)
     223
    215224
    216225      // good results for artificial ant:
     
    219228      // - GaussianModelWithUnknownVariance (and Q= 0.99-quantil) also works well for Ant
    220229      // 2015 01 19: grid test with canonical states (non-canonical slightly worse)
    221       // - Threshold Ascent (best 100, 0.01; all variants relatively good)
    222       // - Policies where the variance has a large weight compared to the mean? (Gaussian(compatible), Gaussian with fixed variance, UCT with large c, alle TA)
    223 
    224       //var problem = new SymbolicRegressionPoly10Problem();
    225 
    226       var problem = new SantaFeAntProblem();
    227       //var problem = new SymbolicRegressionProblem("Tower");
     230      // - ant: Threshold Ascent (best 100, 0.01; all variants relatively good)
     231      // - ant: Policies where the variance has a large weight compared to the mean? (Gaussian(compatible), Gaussian with fixed variance, UCT with large c, alle TA)
     232      // - ant: UCB1Tuned with canonical states also works very well for the artificial ant! constistent solutions in less than 10k iters     
     233
     234      var problem = new SymbolicRegressionPoly10Problem();
     235      //var problem = new SantaFeAntProblem();
     236      //var problem = new SymbolicRegressionProblem(random, "Tower");
    228237      //var problem = new PalindromeProblem();
    229238      //var problem = new HardPalindromeProblem();
     
    234243      //var alg = new MctsSampler(problem, 23, random, 0, new EpsGreedyPolicy(0.1));
    235244      //var alg = new SequentialSearch(problem, 23, random, 0,
    236       //  new HeuristicLab.Algorithms.Bandits.GrammarPolicies.GenericGrammarPolicy(problem, new ModifiedUCTPolicy(0.1), true));
    237       var alg = new SequentialSearch(problem, 17, random, 0,
    238         new HeuristicLab.Algorithms.Bandits.GrammarPolicies.GenericTDPolicy(problem, true));
     245      //  new HeuristicLab.Algorithms.Bandits.GrammarPolicies.QLearningGrammarPolicy(problem, new BoltzmannExplorationPolicy(10),
     246      //    1, 1, true));
     247      //var alg = new SequentialSearch(problem, 23, random, 0,
     248      //  new HeuristicLab.Algorithms.Bandits.GrammarPolicies.GenericContextualGrammarPolicy(problem, new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 10, 1, 1)), true));
     249      var alg = new SequentialSearch(problem, 23, random, 0,
     250        new HeuristicLab.Algorithms.Bandits.GrammarPolicies.GenericFunctionApproximationGrammarPolicy(problem, true));
    239251      //var alg = new MctsQLearningSampler(problem, sentenceLen, random, 0, null);
    240252      //var alg = new MctsQLearningSampler(problem, 30, random, 0, new EpsGreedyPolicy(0.2));
     
    249261
    250262      alg.FoundNewBestSolution += (sentence, quality) => {
    251         //Console.WriteLine("{0,4} {1,7} {2}", alg.treeDepth, alg.treeSize, globalStatistics);
     263        //Console.WriteLine("{0}", globalStatistics);
    252264        //Console.ReadLine();
    253265      };
     
    255267        iterations++;
    256268        globalStatistics.AddSentence(sentence, quality);
     269
    257270        if (iterations % 1000 == 0) {
    258271          if (iterations % 10000 == 0) Console.Clear();
     
    260273          alg.PrintStats();
    261274        }
     275
    262276        //Console.WriteLine(sentence);
    263277
    264         if (iterations % 10000 == 0) {
    265           //Console.WriteLine("{0,4} {1,7} {2}", alg.treeDepth, alg.treeSize, globalStatistics);
    266         }
     278        //if (iterations % 10000 == 0) {
     279        //  Console.WriteLine("{0}", globalStatistics);
     280        //}
    267281      };
    268282
Note: See TracChangeset for help on using the changeset viewer.