- Timestamp:
- 01/09/15 14:57:28 (9 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Program.cs
r11732 r11742 8 8 using System.Threading.Tasks; 9 9 using HeuristicLab.Algorithms.Bandits; 10 using HeuristicLab.Algorithms.Bandits.BanditPolicies; 10 11 using HeuristicLab.Algorithms.Bandits.Models; 11 12 using HeuristicLab.Algorithms.GrammaticalOptimization; … … 26 27 //var globalRandom = new Random(31415); 27 28 var localRandSeed = 31415; 28 var reps = 20;29 30 var policies = new Func<I Policy>[]29 var reps = 8; 30 31 var policies = new Func<IBanditPolicy>[] 31 32 { 32 () => new GaussianThompsonSamplingPolicy(), 33 () => new EpsGreedyPolicy(0.01, (aInfo)=> aInfo.MaxReward, "max"), 34 () => new EpsGreedyPolicy(0.05, (aInfo)=> aInfo.MaxReward, "max"), 35 () => new EpsGreedyPolicy(0.1, (aInfo)=> aInfo.MaxReward, "max"), 36 () => new EpsGreedyPolicy(0.2, (aInfo)=> aInfo.MaxReward, "max"), 37 //() => new GaussianThompsonSamplingPolicy(), 33 38 () => new GaussianThompsonSamplingPolicy(true), 34 () => new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 1)), 35 () => new BernoulliThompsonSamplingPolicy(), 39 () => new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 10, 1)), 40 () => new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 10, 1, 1)), 41 //() => new BernoulliThompsonSamplingPolicy(), 36 42 () => new GenericThompsonSamplingPolicy(new BernoulliModel(1, 1)), 37 43 () => new RandomPolicy(), … … 61 67 () => new ChernoffIntervalEstimationPolicy( 0.1), 62 68 () => new ChernoffIntervalEstimationPolicy( 0.2), 63 // (rand) => new ThresholdAscentPolicy(10, 0.01),64 // (rand) => new ThresholdAscentPolicy(10, 0.05),65 // (rand) => new ThresholdAscentPolicy(10, 0.1),66 // (rand) => new ThresholdAscentPolicy(10, 0.2),67 // (rand) => new ThresholdAscentPolicy(100, 0.01),68 // (rand) => new ThresholdAscentPolicy(100, 0.05),69 // (rand) => new ThresholdAscentPolicy(100, 0.1),70 // (rand) => new ThresholdAscentPolicy(100, 0.2),71 // (rand) => new ThresholdAscentPolicy(1000, 0.01),72 // (rand) => new ThresholdAscentPolicy(1000, 0.05),73 // (rand) => new ThresholdAscentPolicy(1000, 0.1),74 // (rand) => new ThresholdAscentPolicy(1000, 0.2),75 // (rand) => new ThresholdAscentPolicy(5000, 0.01),76 // (rand) => new ThresholdAscentPolicy(10000, 0.01),69 () => new ThresholdAscentPolicy(10, 0.01), 70 () => new ThresholdAscentPolicy(10, 0.05), 71 () => new ThresholdAscentPolicy(10, 0.1), 72 () => new ThresholdAscentPolicy(10, 0.2), 73 () => new ThresholdAscentPolicy(100, 0.01), 74 () => new ThresholdAscentPolicy(100, 0.05), 75 () => new ThresholdAscentPolicy(100, 0.1), 76 () => new ThresholdAscentPolicy(100, 0.2), 77 () => new ThresholdAscentPolicy(1000, 0.01), 78 () => new ThresholdAscentPolicy(1000, 0.05), 79 () => new ThresholdAscentPolicy(1000, 0.1), 80 () => new ThresholdAscentPolicy(1000, 0.2), 81 () => new ThresholdAscentPolicy(5000, 0.01), 82 () => new ThresholdAscentPolicy(10000, 0.01), 77 83 }; 78 84 79 85 foreach (var problem in new Tuple<IProblem, int>[] 80 86 { 81 Tuple.Create((IProblem)new SantaFeAntProblem(), 17),87 //Tuple.Create((IProblem)new SantaFeAntProblem(), 17), 82 88 Tuple.Create((IProblem)new SymbolicRegressionPoly10Problem(), 23), 83 89 }) … … 87 93 var localRand = new Random(localRandSeed); 88 94 var options = new ParallelOptions(); 89 options.MaxDegreeOfParallelism = 1;95 options.MaxDegreeOfParallelism = 4; 90 96 Parallel.For(0, reps, options, (i) => { 91 97 //var t = Task.Run(() => { … … 132 138 private static void RunDemo() { 133 139 // TODO: test with eps-greedy using max instead of average as value (seems to work well for symb-reg! explore further!) 134 // TODO: implement GaussianWithUnknownMeanAndVariance Model for Thompson Sampling (verify with unit test if correct mean and variance is identified)135 140 // TODO: separate value function from policy 136 // TODO: debug and verify implementation variants of Gaussian Thompson Sampling with unit test137 // TODO: refactor Policies to use banditInfos (policies are factories for bandit infos and bandit info only has an update routine, each policy works only with it's type of banditinfo)138 141 // TODO: in contextual MCTS store a bandit info for each node in the _graph_ and also update all bandit infos of all parents 139 142 // TODO: exhaustive search with priority list 140 // TODO: warum funktioniert die alte Implementierung von GaussianThompson besser für SantaFe als alte? Siehe Vergleich: alte vs. neue implementierung GaussianThompsonSampling143 // TODO: warum funktioniert die alte Implementierung von GaussianThompson besser für SantaFe als neue? Siehe Vergleich: alte vs. neue implementierung GaussianThompsonSampling 141 144 // TODO: why does GaussianThompsonSampling work so well with MCTS for the artificial ant problem? 142 145 // TODO: wie kann ich sampler noch vergleichen bzw. was kann man messen um die qualität des samplers abzuschätzen (bis auf qualität und iterationen bis zur besten lösung) => ziel schnellere iterationen zu gutem ergebnis 143 // TODO: likelihood für R=1 bei Gaussian oder GaussianMixture einfach berechenbar?144 146 // TODO: research thompson sampling for max bandit? 145 147 // TODO: ausführlicher test von strategien für k-armed max bandit 146 148 // TODO: verify TA implementation using example from the original paper 147 // TODO: compare results for different policies also for the symb-reg problem148 149 // TODO: separate policy from MCTS tree data structure to allow sharing of information over disconnected parts of the tree (semantic equivalence) 149 150 // TODO: implement thompson sampling for gaussian mixture models 150 151 // TODO: implement inspection for MCTS (eventuell interactive command line für statistiken aus dem baum anzeigen) 151 152 // TODO: implement ACO-style bandit policy 152 // TODO: implement sequences that can be manipulated in-place (instead of strings), alternatives are also stored as sequences, for a sequence the index of the first NT-symb can be stored153 153 // TODO: gleichzeitige modellierung von transformierter zielvariable (y, 1/y, log(y), exp(y), sqrt(y), ...) 154 154 // TODO: vergleich bei complete-randomly möglichst kurze sätze generieren vs. einfach zufällig alternativen wählen … … 165 165 var random = new Random(); 166 166 167 var problem = new SymbolicRegressionPoly10Problem(); 168 //var problem = new SantaFeAntProblem(); // good results e.g. with var alg = new MctsSampler(problem, 17, random, 1, (rand, numActions) => new ThresholdAscentPolicy(numActions, 500, 0.01)); 169 //var problem = new SymbolicRegressionProblem("Tower"); // very good results e.g. new EpsGreedyPolicy(0.2) using max reward as quality !!! 167 var problem = new SymbolicRegressionPoly10Problem(); // good results e.g. 10 randomtries and EpsGreedyPolicy(0.2, (aInfo)=>aInfo.MaxReward) 168 // Ant 169 // good results e.g. with var alg = new MctsSampler(problem, 17, random, 1, (rand, numActions) => new ThresholdAscentPolicy(numActions, 500, 0.01)); 170 // GaussianModelWithUnknownVariance (and Q= 0.99-quantil) also works well for Ant 171 //var problem = new SantaFeAntProblem(); 172 //var problem = new SymbolicRegressionProblem("Tower"); 170 173 //var problem = new PalindromeProblem(); 171 174 //var problem = new HardPalindromeProblem(); 172 175 //var problem = new RoyalPairProblem(); 173 176 //var problem = new EvenParityProblem(); 174 var alg = new MctsSampler(problem, 2 3, random, 10, new EpsGreedyPolicy(0.2)); // GaussianModelWithUnknownVariance (and Q= 0.99-quantil) works well for Ant177 var alg = new MctsSampler(problem, 25, random, 0, new GaussianThompsonSamplingPolicy(true)); 175 178 //var alg = new ExhaustiveBreadthFirstSearch(problem, 17); 176 179 //var alg = new AlternativesContextSampler(problem, random, 17, 4, (rand, numActions) => new RandomPolicy(rand, numActions));
Note: See TracChangeset
for help on using the changeset viewer.