- Timestamp:
- 01/07/15 09:21:46 (9 years ago)
- Location:
- branches/HeuristicLab.Problems.GrammaticalOptimization/Main
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Main.csproj
r11727 r11732 57 57 <Name>HeuristicLab.Algorithms.GrammaticalOptimization</Name> 58 58 </ProjectReference> 59 <ProjectReference Include="..\HeuristicLab.Problems.GrammaticalOptimization.SymbReg\HeuristicLab.Problems.GrammaticalOptimization.SymbReg.csproj"> 60 <Project>{17A7A380-86CE-482D-8D22-CBD70CC97F0D}</Project> 61 <Name>HeuristicLab.Problems.GrammaticalOptimization.SymbReg</Name> 62 </ProjectReference> 59 63 <ProjectReference Include="..\HeuristicLab.Problems.GrammaticalOptimization\HeuristicLab.Problems.GrammaticalOptimization.csproj"> 60 64 <Project>{cb9dccf6-667e-4a13-b82d-dbd6b45a045e}</Project> -
branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Program.cs
r11730 r11732 11 11 using HeuristicLab.Algorithms.GrammaticalOptimization; 12 12 using HeuristicLab.Problems.GrammaticalOptimization; 13 using HeuristicLab.Problems.GrammaticalOptimization.SymbReg; 13 14 14 15 namespace Main { … … 22 23 23 24 private static void RunGridTest() { 24 int maxIterations = 100000; // for poly-10 with 50000 evaluations no successful try with hl yet25 // 25 int maxIterations = 200000; // for poly-10 with 50000 evaluations no successful try with hl yet 26 //var globalRandom = new Random(31415); 26 27 var localRandSeed = 31415; 27 28 var reps = 20; 28 29 29 var polic yFactories = new Func<Random, int,IPolicy>[]30 var policies = new Func<IPolicy>[] 30 31 { 31 (rand, numActions) => new GaussianThompsonSamplingPolicy(rand, numActions), 32 (rand, numActions) => new BernoulliThompsonSamplingPolicy(rand, numActions), 33 (rand, numActions) => new RandomPolicy(rand, numActions), 34 (rand, numActions) => new EpsGreedyPolicy(rand, numActions, 0.01), 35 (rand, numActions) => new EpsGreedyPolicy(rand, numActions, 0.05), 36 (rand, numActions) => new EpsGreedyPolicy(rand, numActions, 0.1), 37 (rand, numActions) => new EpsGreedyPolicy(rand, numActions, 0.2), 38 (rand, numActions) => new EpsGreedyPolicy(rand, numActions, 0.5), 39 (rand, numActions) => new UCTPolicy(numActions, 0.1), 40 (rand, numActions) => new UCTPolicy(numActions, 0.5), 41 (rand, numActions) => new UCTPolicy(numActions, 1), 42 (rand, numActions) => new UCTPolicy(numActions, 2), 43 (rand, numActions) => new UCTPolicy(numActions, 5), 44 (rand, numActions) => new UCTPolicy(numActions, 10), 45 (rand, numActions) => new UCB1Policy(numActions), 46 (rand, numActions) => new UCB1TunedPolicy(numActions), 47 (rand, numActions) => new UCBNormalPolicy(numActions), 48 (rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 0.1), 49 (rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 0.5), 50 (rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 1), 51 (rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 5), 52 (rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 10), 53 (rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 20), 54 (rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 100), 55 (rand, numActions) => new ChernoffIntervalEstimationPolicy(numActions, 0.01), 56 (rand, numActions) => new ChernoffIntervalEstimationPolicy(numActions, 0.05), 57 (rand, numActions) => new ChernoffIntervalEstimationPolicy(numActions, 0.1), 58 (rand, numActions) => new ChernoffIntervalEstimationPolicy(numActions, 0.2), 59 (rand, numActions) => new ThresholdAscentPolicy(numActions, 10, 0.01), 60 (rand, numActions) => new ThresholdAscentPolicy(numActions, 10, 0.05), 61 (rand, numActions) => new ThresholdAscentPolicy(numActions, 10, 0.1), 62 (rand, numActions) => new ThresholdAscentPolicy(numActions, 10, 0.2), 63 (rand, numActions) => new ThresholdAscentPolicy(numActions, 100, 0.01), 64 (rand, numActions) => new ThresholdAscentPolicy(numActions, 100, 0.05), 65 (rand, numActions) => new ThresholdAscentPolicy(numActions, 100, 0.1), 66 (rand, numActions) => new ThresholdAscentPolicy(numActions, 100, 0.2), 67 (rand, numActions) => new ThresholdAscentPolicy(numActions, 1000, 0.01), 68 (rand, numActions) => new ThresholdAscentPolicy(numActions, 1000, 0.05), 69 (rand, numActions) => new ThresholdAscentPolicy(numActions, 1000, 0.1), 70 (rand, numActions) => new ThresholdAscentPolicy(numActions, 1000, 0.2), 71 (rand, numActions) => new ThresholdAscentPolicy(numActions, 5000, 0.01), 72 (rand, numActions) => new ThresholdAscentPolicy(numActions, 10000, 0.01), 32 () => new GaussianThompsonSamplingPolicy(), 33 () => new GaussianThompsonSamplingPolicy(true), 34 () => new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 1)), 35 () => new BernoulliThompsonSamplingPolicy(), 36 () => new GenericThompsonSamplingPolicy(new BernoulliModel(1, 1)), 37 () => new RandomPolicy(), 38 () => new EpsGreedyPolicy(0.01), 39 () => new EpsGreedyPolicy(0.05), 40 () => new EpsGreedyPolicy(0.1), 41 () => new EpsGreedyPolicy(0.2), 42 () => new EpsGreedyPolicy(0.5), 43 () => new UCTPolicy(0.1), 44 () => new UCTPolicy(0.5), 45 () => new UCTPolicy(1), 46 () => new UCTPolicy(2), 47 () => new UCTPolicy( 5), 48 () => new UCTPolicy( 10), 49 () => new UCB1Policy(), 50 () => new UCB1TunedPolicy(), 51 () => new UCBNormalPolicy(), 52 () => new BoltzmannExplorationPolicy(0.1), 53 () => new BoltzmannExplorationPolicy(0.5), 54 () => new BoltzmannExplorationPolicy(1), 55 () => new BoltzmannExplorationPolicy(5), 56 () => new BoltzmannExplorationPolicy(10), 57 () => new BoltzmannExplorationPolicy(20), 58 () => new BoltzmannExplorationPolicy(100), 59 () => new ChernoffIntervalEstimationPolicy( 0.01), 60 () => new ChernoffIntervalEstimationPolicy( 0.05), 61 () => new ChernoffIntervalEstimationPolicy( 0.1), 62 () => new ChernoffIntervalEstimationPolicy( 0.2), 63 // (rand) => new ThresholdAscentPolicy(10, 0.01), 64 // (rand) => new ThresholdAscentPolicy(10, 0.05), 65 // (rand) => new ThresholdAscentPolicy(10, 0.1), 66 // (rand) => new ThresholdAscentPolicy(10, 0.2), 67 // (rand) => new ThresholdAscentPolicy(100, 0.01), 68 // (rand) => new ThresholdAscentPolicy(100, 0.05), 69 // (rand) => new ThresholdAscentPolicy(100, 0.1), 70 // (rand) => new ThresholdAscentPolicy(100, 0.2), 71 // (rand) => new ThresholdAscentPolicy(1000, 0.01), 72 // (rand) => new ThresholdAscentPolicy(1000, 0.05), 73 // (rand) => new ThresholdAscentPolicy(1000, 0.1), 74 // (rand) => new ThresholdAscentPolicy(1000, 0.2), 75 // (rand) => new ThresholdAscentPolicy(5000, 0.01), 76 // (rand) => new ThresholdAscentPolicy(10000, 0.01), 73 77 }; 74 78 75 var tasks = new List<Task>(); 76 foreach (var randomTries in new int[] { 1, 10, /* 5, 100 /*, 500, 1000 */}) { 77 foreach (var policyFactory in policyFactories) { 78 var myPolicyFactory = policyFactory; 79 var myRandomTries = randomTries; 80 var localRand = new Random(localRandSeed); 81 var options = new ParallelOptions(); 82 options.MaxDegreeOfParallelism = 1; 83 Parallel.For(0, reps, options, (i) => { 84 //var t = Task.Run(() => { 85 Random myLocalRand; 86 lock (localRand) 87 myLocalRand = new Random(localRand.Next()); 88 89 //for (int i = 0; i < reps; i++) { 90 91 int iterations = 0; 92 var sw = new Stopwatch(); 93 var globalStatistics = new SentenceSetStatistics(); 94 95 var problem = new SymbolicRegressionPoly10Problem(); 96 //var problem = new SantaFeAntProblem(); 97 //var problem = new PalindromeProblem(); 98 //var problem = new HardPalindromeProblem(); 99 //var problem = new RoyalPairProblem(); 100 //var problem = new EvenParityProblem(); 101 var alg = new MctsSampler(problem, 25, myLocalRand, myRandomTries, myPolicyFactory); 102 //var alg = new ExhaustiveBreadthFirstSearch(problem, 25); 103 //var alg = new AlternativesContextSampler(problem, 25); 104 105 alg.SolutionEvaluated += (sentence, quality) => { 106 iterations++; 107 globalStatistics.AddSentence(sentence, quality); 108 if (iterations % 10000 == 0) { 109 Console.WriteLine("{0,4} {1,7} {2,5} {3,25} {4}", alg.treeDepth, alg.treeSize, myRandomTries, myPolicyFactory(myLocalRand, 1), globalStatistics); 110 } 111 }; 112 113 sw.Start(); 114 115 alg.Run(maxIterations); 116 117 sw.Stop(); 118 //Console.WriteLine("{0,5} {1} {2}", randomTries, policyFactory(1), globalStatistics); 119 //} 120 //}); 121 //tasks.Add(t); 122 }); 79 foreach (var problem in new Tuple<IProblem, int>[] 80 { 81 Tuple.Create((IProblem)new SantaFeAntProblem(), 17), 82 Tuple.Create((IProblem)new SymbolicRegressionPoly10Problem(), 23), 83 }) 84 foreach (var randomTries in new int[] { 1, 10, /* 5, 100 /*, 500, 1000 */}) { 85 foreach (var policy in policies) { 86 var myRandomTries = randomTries; 87 var localRand = new Random(localRandSeed); 88 var options = new ParallelOptions(); 89 options.MaxDegreeOfParallelism = 1; 90 Parallel.For(0, reps, options, (i) => { 91 //var t = Task.Run(() => { 92 Random myLocalRand; 93 lock (localRand) 94 myLocalRand = new Random(localRand.Next()); 95 96 //for (int i = 0; i < reps; i++) { 97 98 int iterations = 0; 99 var globalStatistics = new SentenceSetStatistics(); 100 101 // var problem = new SymbolicRegressionPoly10Problem(); 102 // var problem = new SantaFeAntProblem(); 103 //var problem = new PalindromeProblem(); 104 //var problem = new HardPalindromeProblem(); 105 //var problem = new RoyalPairProblem(); 106 //var problem = new EvenParityProblem(); 107 var alg = new MctsSampler(problem.Item1, problem.Item2, myLocalRand, myRandomTries, policy()); // TODO: Make sure we generate the same random numbers for each experiment 108 //var alg = new ExhaustiveBreadthFirstSearch(problem, 25); 109 //var alg = new AlternativesContextSampler(problem, 25); 110 111 alg.SolutionEvaluated += (sentence, quality) => { 112 iterations++; 113 globalStatistics.AddSentence(sentence, quality); 114 if (iterations % 10000 == 0) { 115 Console.WriteLine("{0,4} {1,7} {2,5} {3,25} {4}", alg.treeDepth, alg.treeSize, myRandomTries, policy(), globalStatistics); 116 } 117 }; 118 119 120 alg.Run(maxIterations); 121 122 //Console.WriteLine("{0,5} {1} {2}", randomTries, policyFactory(1), globalStatistics); 123 //} 124 //}); 125 //tasks.Add(t); 126 }); 127 } 123 128 } 124 }125 129 //Task.WaitAll(tasks.ToArray()); 126 130 } 127 131 128 132 private static void RunDemo() { 133 // TODO: test with eps-greedy using max instead of average as value (seems to work well for symb-reg! explore further!) 134 // TODO: implement GaussianWithUnknownMeanAndVariance Model for Thompson Sampling (verify with unit test if correct mean and variance is identified) 135 // TODO: separate value function from policy 136 // TODO: debug and verify implementation variants of Gaussian Thompson Sampling with unit test 137 // TODO: refactor Policies to use banditInfos (policies are factories for bandit infos and bandit info only has an update routine, each policy works only with it's type of banditinfo) 138 // TODO: in contextual MCTS store a bandit info for each node in the _graph_ and also update all bandit infos of all parents 139 // TODO: exhaustive search with priority list 129 140 // TODO: warum funktioniert die alte Implementierung von GaussianThompson besser für SantaFe als alte? Siehe Vergleich: alte vs. neue implementierung GaussianThompsonSampling 130 141 // TODO: why does GaussianThompsonSampling work so well with MCTS for the artificial ant problem? … … 133 144 // TODO: research thompson sampling for max bandit? 134 145 // TODO: ausführlicher test von strategien für k-armed max bandit 135 // TODO: verify TA implementation using example from the original paper 136 // TODO: reference HL.ProblemInstances and try on tower dataset 146 // TODO: verify TA implementation using example from the original paper 137 147 // TODO: compare results for different policies also for the symb-reg problem 138 148 // TODO: separate policy from MCTS tree data structure to allow sharing of information over disconnected parts of the tree (semantic equivalence) … … 144 154 // TODO: vergleich bei complete-randomly möglichst kurze sätze generieren vs. einfach zufällig alternativen wählen 145 155 // TODO: reward discounting (für veränderliche reward distributions über zeit). speziellen unit-test dafür erstellen 146 147 148 int maxIterations = 10000000; 156 // TODO: constant optimization 157 158 159 int maxIterations = 100000; 149 160 int iterations = 0; 150 161 var sw = new Stopwatch(); … … 154 165 var random = new Random(); 155 166 156 //var problem = new SymbolicRegressionPoly10Problem(); 157 var problem = new SantaFeAntProblem(); 167 var problem = new SymbolicRegressionPoly10Problem(); 168 //var problem = new SantaFeAntProblem(); // good results e.g. with var alg = new MctsSampler(problem, 17, random, 1, (rand, numActions) => new ThresholdAscentPolicy(numActions, 500, 0.01)); 169 //var problem = new SymbolicRegressionProblem("Tower"); // very good results e.g. new EpsGreedyPolicy(0.2) using max reward as quality !!! 158 170 //var problem = new PalindromeProblem(); 159 171 //var problem = new HardPalindromeProblem(); 160 172 //var problem = new RoyalPairProblem(); 161 173 //var problem = new EvenParityProblem(); 162 //var alg = new MctsSampler(problem, 17, random, 1, (rand, numActions) => new GenericThompsonSamplingPolicy(rand, numActions, new GaussianModel(numActions, 0.5, 10)));174 var alg = new MctsSampler(problem, 23, random, 10, new EpsGreedyPolicy(0.2)); // GaussianModelWithUnknownVariance (and Q= 0.99-quantil) works well for Ant 163 175 //var alg = new ExhaustiveBreadthFirstSearch(problem, 17); 164 176 //var alg = new AlternativesContextSampler(problem, random, 17, 4, (rand, numActions) => new RandomPolicy(rand, numActions)); 165 177 //var alg = new ExhaustiveDepthFirstSearch(problem, 17); 166 178 // var alg = new AlternativesSampler(problem, 17); 167 var alg = new RandomSearch(problem, random, 17); 179 // var alg = new RandomSearch(problem, random, 17); 180 // var alg = new ExhaustiveRandomFirstSearch(problem, random, 17); 168 181 169 182 alg.FoundNewBestSolution += (sentence, quality) => { 170 183 bestQuality = quality; 171 184 bestSentence = sentence; 172 Console.WriteLine("{0, 10} {1,10:F5} {2,10:F5} {3}", iterations, bestQuality, quality, sentence);185 Console.WriteLine("{0,4} {1,7} {2}", alg.treeDepth, alg.treeSize, globalStatistics); 173 186 }; 174 187 alg.SolutionEvaluated += (sentence, quality) => { … … 176 189 globalStatistics.AddSentence(sentence, quality); 177 190 if (iterations % 1000 == 0) { 178 //alg.PrintStats();191 alg.PrintStats(); 179 192 } 180 193 if (iterations % 10000 == 0) { 181 194 //Console.WriteLine("{0,10} {1,10:F5} {2,10:F5} {3}", iterations, bestQuality, quality, sentence); 182 195 //Console.WriteLine("{0,4} {1,7} {2}", alg.treeDepth, alg.treeSize, globalStatistics); 183 Console.WriteLine( globalStatistics);196 Console.WriteLine("{0,4} {1,7} {2}", alg.treeDepth, alg.treeSize, globalStatistics); 184 197 } 185 198 };
Note: See TracChangeset
for help on using the changeset viewer.