- Timestamp:
- 01/02/15 16:08:21 (10 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Program.cs
r11727 r11730 3 3 using System.Data; 4 4 using System.Diagnostics; 5 using System.Globalization; 5 6 using System.Linq; 6 7 using System.Text; 7 8 using System.Threading.Tasks; 8 9 using HeuristicLab.Algorithms.Bandits; 10 using HeuristicLab.Algorithms.Bandits.Models; 9 11 using HeuristicLab.Algorithms.GrammaticalOptimization; 10 12 using HeuristicLab.Problems.GrammaticalOptimization; … … 13 15 class Program { 14 16 static void Main(string[] args) { 15 // RunDemo(); 16 RunGridTest(); 17 CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture; 18 19 RunDemo(); 20 //RunGridTest(); 17 21 } 18 22 19 23 private static void RunGridTest() { 20 int maxIterations = 150000; 21 var globalRandom = new Random(31415); 22 var reps = 10; 23 Parallel.ForEach(new int[] { 1, 5, 10, 100, 500, 1000 }, (randomTries) => { 24 Random localRand; 25 lock (globalRandom) { 26 localRand = new Random(globalRandom.Next()); 27 } 28 var policyFactories = new Func<int, IPolicy>[] 24 int maxIterations = 100000; // for poly-10 with 50000 evaluations no successful try with hl yet 25 // var globalRandom = new Random(31415); 26 var localRandSeed = 31415; 27 var reps = 20; 28 29 var policyFactories = new Func<Random, int, IPolicy>[] 29 30 { 30 (numActions) => new RandomPolicy(localRand, numActions), 31 (numActions) => new UCB1Policy(numActions), 32 (numActions) => new UCB1TunedPolicy(numActions), 33 (numActions) => new UCBNormalPolicy(numActions), 34 (numActions) => new EpsGreedyPolicy(localRand, numActions, 0.01), 35 (numActions) => new EpsGreedyPolicy(localRand, numActions, 0.05), 36 (numActions) => new EpsGreedyPolicy(localRand, numActions, 0.1), 37 (numActions) => new EpsGreedyPolicy(localRand, numActions, 0.2), 38 (numActions) => new EpsGreedyPolicy(localRand, numActions, 0.5), 39 (numActions) => new GaussianThompsonSamplingPolicy(localRand, numActions), 40 (numActions) => new BernoulliThompsonSamplingPolicy(localRand, numActions) 31 (rand, numActions) => new GaussianThompsonSamplingPolicy(rand, numActions), 32 (rand, numActions) => new BernoulliThompsonSamplingPolicy(rand, numActions), 33 (rand, numActions) => new RandomPolicy(rand, numActions), 34 (rand, numActions) => new EpsGreedyPolicy(rand, numActions, 0.01), 35 (rand, numActions) => new EpsGreedyPolicy(rand, numActions, 0.05), 36 (rand, numActions) => new EpsGreedyPolicy(rand, numActions, 0.1), 37 (rand, numActions) => new EpsGreedyPolicy(rand, numActions, 0.2), 38 (rand, numActions) => new EpsGreedyPolicy(rand, numActions, 0.5), 39 (rand, numActions) => new UCTPolicy(numActions, 0.1), 40 (rand, numActions) => new UCTPolicy(numActions, 0.5), 41 (rand, numActions) => new UCTPolicy(numActions, 1), 42 (rand, numActions) => new UCTPolicy(numActions, 2), 43 (rand, numActions) => new UCTPolicy(numActions, 5), 44 (rand, numActions) => new UCTPolicy(numActions, 10), 45 (rand, numActions) => new UCB1Policy(numActions), 46 (rand, numActions) => new UCB1TunedPolicy(numActions), 47 (rand, numActions) => new UCBNormalPolicy(numActions), 48 (rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 0.1), 49 (rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 0.5), 50 (rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 1), 51 (rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 5), 52 (rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 10), 53 (rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 20), 54 (rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 100), 55 (rand, numActions) => new ChernoffIntervalEstimationPolicy(numActions, 0.01), 56 (rand, numActions) => new ChernoffIntervalEstimationPolicy(numActions, 0.05), 57 (rand, numActions) => new ChernoffIntervalEstimationPolicy(numActions, 0.1), 58 (rand, numActions) => new ChernoffIntervalEstimationPolicy(numActions, 0.2), 59 (rand, numActions) => new ThresholdAscentPolicy(numActions, 10, 0.01), 60 (rand, numActions) => new ThresholdAscentPolicy(numActions, 10, 0.05), 61 (rand, numActions) => new ThresholdAscentPolicy(numActions, 10, 0.1), 62 (rand, numActions) => new ThresholdAscentPolicy(numActions, 10, 0.2), 63 (rand, numActions) => new ThresholdAscentPolicy(numActions, 100, 0.01), 64 (rand, numActions) => new ThresholdAscentPolicy(numActions, 100, 0.05), 65 (rand, numActions) => new ThresholdAscentPolicy(numActions, 100, 0.1), 66 (rand, numActions) => new ThresholdAscentPolicy(numActions, 100, 0.2), 67 (rand, numActions) => new ThresholdAscentPolicy(numActions, 1000, 0.01), 68 (rand, numActions) => new ThresholdAscentPolicy(numActions, 1000, 0.05), 69 (rand, numActions) => new ThresholdAscentPolicy(numActions, 1000, 0.1), 70 (rand, numActions) => new ThresholdAscentPolicy(numActions, 1000, 0.2), 71 (rand, numActions) => new ThresholdAscentPolicy(numActions, 5000, 0.01), 72 (rand, numActions) => new ThresholdAscentPolicy(numActions, 10000, 0.01), 41 73 }; 42 74 43 foreach (var policyFactory in policyFactories) 44 for (int i = 0; i < reps; i++) { 75 var tasks = new List<Task>(); 76 foreach (var randomTries in new int[] { 1, 10, /* 5, 100 /*, 500, 1000 */}) { 77 foreach (var policyFactory in policyFactories) { 78 var myPolicyFactory = policyFactory; 79 var myRandomTries = randomTries; 80 var localRand = new Random(localRandSeed); 81 var options = new ParallelOptions(); 82 options.MaxDegreeOfParallelism = 1; 83 Parallel.For(0, reps, options, (i) => { 84 //var t = Task.Run(() => { 85 Random myLocalRand; 86 lock (localRand) 87 myLocalRand = new Random(localRand.Next()); 88 89 //for (int i = 0; i < reps; i++) { 90 45 91 int iterations = 0; 46 92 var sw = new Stopwatch(); 47 93 var globalStatistics = new SentenceSetStatistics(); 48 94 49 //var problem = new SymbolicRegressionPoly10Problem();50 var problem = new SantaFeAntProblem();95 var problem = new SymbolicRegressionPoly10Problem(); 96 //var problem = new SantaFeAntProblem(); 51 97 //var problem = new PalindromeProblem(); 52 98 //var problem = new HardPalindromeProblem(); 53 99 //var problem = new RoyalPairProblem(); 54 100 //var problem = new EvenParityProblem(); 55 var alg = new MctsSampler(problem, 17, localRand, randomTries, policyFactory);101 var alg = new MctsSampler(problem, 25, myLocalRand, myRandomTries, myPolicyFactory); 56 102 //var alg = new ExhaustiveBreadthFirstSearch(problem, 25); 57 103 //var alg = new AlternativesContextSampler(problem, 25); … … 61 107 globalStatistics.AddSentence(sentence, quality); 62 108 if (iterations % 10000 == 0) { 63 Console.WriteLine("{0 } {1} {2}", randomTries, policyFactory(1), globalStatistics);109 Console.WriteLine("{0,4} {1,7} {2,5} {3,25} {4}", alg.treeDepth, alg.treeSize, myRandomTries, myPolicyFactory(myLocalRand, 1), globalStatistics); 64 110 } 65 111 }; … … 70 116 71 117 sw.Stop(); 72 } 73 }); 118 //Console.WriteLine("{0,5} {1} {2}", randomTries, policyFactory(1), globalStatistics); 119 //} 120 //}); 121 //tasks.Add(t); 122 }); 123 } 124 } 125 //Task.WaitAll(tasks.ToArray()); 74 126 } 75 127 76 128 private static void RunDemo() { 77 // TODO: implement threshold ascent 78 // TODO: implement inspection for MCTS 129 // TODO: warum funktioniert die alte Implementierung von GaussianThompson besser für SantaFe als alte? Siehe Vergleich: alte vs. neue implementierung GaussianThompsonSampling 130 // TODO: why does GaussianThompsonSampling work so well with MCTS for the artificial ant problem? 131 // TODO: wie kann ich sampler noch vergleichen bzw. was kann man messen um die qualität des samplers abzuschätzen (bis auf qualität und iterationen bis zur besten lösung) => ziel schnellere iterationen zu gutem ergebnis 132 // TODO: likelihood für R=1 bei Gaussian oder GaussianMixture einfach berechenbar? 133 // TODO: research thompson sampling for max bandit? 134 // TODO: ausführlicher test von strategien für k-armed max bandit 135 // TODO: verify TA implementation using example from the original paper 136 // TODO: reference HL.ProblemInstances and try on tower dataset 137 // TODO: compare results for different policies also for the symb-reg problem 138 // TODO: separate policy from MCTS tree data structure to allow sharing of information over disconnected parts of the tree (semantic equivalence) 139 // TODO: implement thompson sampling for gaussian mixture models 140 // TODO: implement inspection for MCTS (eventuell interactive command line für statistiken aus dem baum anzeigen) 141 // TODO: implement ACO-style bandit policy 142 // TODO: implement sequences that can be manipulated in-place (instead of strings), alternatives are also stored as sequences, for a sequence the index of the first NT-symb can be stored 143 // TODO: gleichzeitige modellierung von transformierter zielvariable (y, 1/y, log(y), exp(y), sqrt(y), ...) 144 // TODO: vergleich bei complete-randomly möglichst kurze sätze generieren vs. einfach zufällig alternativen wählen 145 // TODO: reward discounting (für veränderliche reward distributions über zeit). speziellen unit-test dafür erstellen 146 79 147 80 148 int maxIterations = 10000000; … … 84 152 string bestSentence = ""; 85 153 var globalStatistics = new SentenceSetStatistics(); 86 var random = new Random( 31415);87 88 // 154 var random = new Random(); 155 156 //var problem = new SymbolicRegressionPoly10Problem(); 89 157 var problem = new SantaFeAntProblem(); 90 158 //var problem = new PalindromeProblem(); … … 92 160 //var problem = new RoyalPairProblem(); 93 161 //var problem = new EvenParityProblem(); 94 var alg = new MctsSampler(problem, 17, random); 95 //var alg = new ExhaustiveBreadthFirstSearch(problem, 25); 96 //var alg = new AlternativesContextSampler(problem, 25); 162 //var alg = new MctsSampler(problem, 17, random, 1, (rand, numActions) => new GenericThompsonSamplingPolicy(rand, numActions, new GaussianModel(numActions, 0.5, 10))); 163 //var alg = new ExhaustiveBreadthFirstSearch(problem, 17); 164 //var alg = new AlternativesContextSampler(problem, random, 17, 4, (rand, numActions) => new RandomPolicy(rand, numActions)); 165 //var alg = new ExhaustiveDepthFirstSearch(problem, 17); 166 // var alg = new AlternativesSampler(problem, 17); 167 var alg = new RandomSearch(problem, random, 17); 97 168 98 169 alg.FoundNewBestSolution += (sentence, quality) => { … … 104 175 iterations++; 105 176 globalStatistics.AddSentence(sentence, quality); 177 if (iterations % 1000 == 0) { 178 //alg.PrintStats(); 179 } 106 180 if (iterations % 10000 == 0) { 107 181 //Console.WriteLine("{0,10} {1,10:F5} {2,10:F5} {3}", iterations, bestQuality, quality, sentence); 108 Console.WriteLine(globalStatistics.ToString()); 182 //Console.WriteLine("{0,4} {1,7} {2}", alg.treeDepth, alg.treeSize, globalStatistics); 183 Console.WriteLine(globalStatistics); 109 184 } 110 185 };
Note: See TracChangeset
for help on using the changeset viewer.