using System; using System.Collections.Generic; using System.Data; using System.Diagnostics; using System.Globalization; using System.Linq; using System.Text; using System.Threading.Tasks; using HeuristicLab.Algorithms.Bandits; using HeuristicLab.Algorithms.Bandits.BanditPolicies; using HeuristicLab.Algorithms.Bandits.Models; using HeuristicLab.Algorithms.GrammaticalOptimization; using HeuristicLab.Problems.GrammaticalOptimization; using HeuristicLab.Problems.GrammaticalOptimization.SymbReg; namespace Main { class Program { static void Main(string[] args) { CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture; RunDemo(); //RunGridTest(); } private static void RunGridTest() { int maxIterations = 200000; // for poly-10 with 50000 evaluations no successful try with hl yet //var globalRandom = new Random(31415); var localRandSeed = 31415; var reps = 8; var policies = new Func[] { () => new EpsGreedyPolicy(0.01, (aInfo)=> aInfo.MaxReward, "max"), () => new EpsGreedyPolicy(0.05, (aInfo)=> aInfo.MaxReward, "max"), () => new EpsGreedyPolicy(0.1, (aInfo)=> aInfo.MaxReward, "max"), () => new EpsGreedyPolicy(0.2, (aInfo)=> aInfo.MaxReward, "max"), //() => new GaussianThompsonSamplingPolicy(), () => new GaussianThompsonSamplingPolicy(true), () => new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 10, 1)), () => new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 10, 1, 1)), //() => new BernoulliThompsonSamplingPolicy(), () => new GenericThompsonSamplingPolicy(new BernoulliModel(1, 1)), () => new RandomPolicy(), () => new EpsGreedyPolicy(0.01), () => new EpsGreedyPolicy(0.05), () => new EpsGreedyPolicy(0.1), () => new EpsGreedyPolicy(0.2), () => new EpsGreedyPolicy(0.5), () => new UCTPolicy(0.1), () => new UCTPolicy(0.5), () => new UCTPolicy(1), () => new UCTPolicy(2), () => new UCTPolicy( 5), () => new UCTPolicy( 10), () => new UCB1Policy(), () => new UCB1TunedPolicy(), () => new UCBNormalPolicy(), () => new BoltzmannExplorationPolicy(0.1), () => new BoltzmannExplorationPolicy(0.5), () => new BoltzmannExplorationPolicy(1), () => new BoltzmannExplorationPolicy(5), () => new BoltzmannExplorationPolicy(10), () => new BoltzmannExplorationPolicy(20), () => new BoltzmannExplorationPolicy(100), () => new ChernoffIntervalEstimationPolicy( 0.01), () => new ChernoffIntervalEstimationPolicy( 0.05), () => new ChernoffIntervalEstimationPolicy( 0.1), () => new ChernoffIntervalEstimationPolicy( 0.2), () => new ThresholdAscentPolicy(10, 0.01), () => new ThresholdAscentPolicy(10, 0.05), () => new ThresholdAscentPolicy(10, 0.1), () => new ThresholdAscentPolicy(10, 0.2), () => new ThresholdAscentPolicy(100, 0.01), () => new ThresholdAscentPolicy(100, 0.05), () => new ThresholdAscentPolicy(100, 0.1), () => new ThresholdAscentPolicy(100, 0.2), () => new ThresholdAscentPolicy(1000, 0.01), () => new ThresholdAscentPolicy(1000, 0.05), () => new ThresholdAscentPolicy(1000, 0.1), () => new ThresholdAscentPolicy(1000, 0.2), () => new ThresholdAscentPolicy(5000, 0.01), () => new ThresholdAscentPolicy(10000, 0.01), }; foreach (var problem in new Tuple[] { //Tuple.Create((IProblem)new SantaFeAntProblem(), 17), Tuple.Create((IProblem)new SymbolicRegressionPoly10Problem(), 23), }) foreach (var randomTries in new int[] { 1, 10, /* 5, 100 /*, 500, 1000 */}) { foreach (var policy in policies) { var myRandomTries = randomTries; var localRand = new Random(localRandSeed); var options = new ParallelOptions(); options.MaxDegreeOfParallelism = 4; Parallel.For(0, reps, options, (i) => { //var t = Task.Run(() => { Random myLocalRand; lock (localRand) myLocalRand = new Random(localRand.Next()); //for (int i = 0; i < reps; i++) { int iterations = 0; var globalStatistics = new SentenceSetStatistics(); // var problem = new SymbolicRegressionPoly10Problem(); // var problem = new SantaFeAntProblem(); //var problem = new PalindromeProblem(); //var problem = new HardPalindromeProblem(); //var problem = new RoyalPairProblem(); //var problem = new EvenParityProblem(); var alg = new MctsSampler(problem.Item1, problem.Item2, myLocalRand, myRandomTries, policy()); // TODO: Make sure we generate the same random numbers for each experiment //var alg = new ExhaustiveBreadthFirstSearch(problem, 25); //var alg = new AlternativesContextSampler(problem, 25); alg.SolutionEvaluated += (sentence, quality) => { iterations++; globalStatistics.AddSentence(sentence, quality); if (iterations % 10000 == 0) { Console.WriteLine("{0,4} {1,7} {2,5} {3,25} {4}", alg.treeDepth, alg.treeSize, myRandomTries, policy(), globalStatistics); } }; alg.Run(maxIterations); //Console.WriteLine("{0,5} {1} {2}", randomTries, policyFactory(1), globalStatistics); //} //}); //tasks.Add(t); }); } } //Task.WaitAll(tasks.ToArray()); } private static void RunDemo() { // TODO: test with eps-greedy using max instead of average as value (seems to work well for symb-reg! explore further!) // TODO: separate value function from policy // TODO: in contextual MCTS store a bandit info for each node in the _graph_ and also update all bandit infos of all parents // TODO: exhaustive search with priority list // TODO: warum funktioniert die alte Implementierung von GaussianThompson besser für SantaFe als neue? Siehe Vergleich: alte vs. neue implementierung GaussianThompsonSampling // TODO: why does GaussianThompsonSampling work so well with MCTS for the artificial ant problem? // TODO: wie kann ich sampler noch vergleichen bzw. was kann man messen um die qualität des samplers abzuschätzen (bis auf qualität und iterationen bis zur besten lösung) => ziel schnellere iterationen zu gutem ergebnis // TODO: research thompson sampling for max bandit? // TODO: ausführlicher test von strategien für k-armed max bandit // TODO: verify TA implementation using example from the original paper // TODO: separate policy from MCTS tree data structure to allow sharing of information over disconnected parts of the tree (semantic equivalence) // TODO: implement thompson sampling for gaussian mixture models // TODO: implement inspection for MCTS (eventuell interactive command line für statistiken aus dem baum anzeigen) // TODO: implement ACO-style bandit policy // TODO: gleichzeitige modellierung von transformierter zielvariable (y, 1/y, log(y), exp(y), sqrt(y), ...) // TODO: vergleich bei complete-randomly möglichst kurze sätze generieren vs. einfach zufällig alternativen wählen // TODO: reward discounting (für veränderliche reward distributions über zeit). speziellen unit-test dafür erstellen // TODO: constant optimization int maxIterations = 100000; int iterations = 0; var sw = new Stopwatch(); double bestQuality = 0; string bestSentence = ""; var globalStatistics = new SentenceSetStatistics(); var random = new Random(); var problem = new SymbolicRegressionPoly10Problem(); // good results e.g. 10 randomtries and EpsGreedyPolicy(0.2, (aInfo)=>aInfo.MaxReward) // Ant // good results e.g. with var alg = new MctsSampler(problem, 17, random, 1, (rand, numActions) => new ThresholdAscentPolicy(numActions, 500, 0.01)); // GaussianModelWithUnknownVariance (and Q= 0.99-quantil) also works well for Ant //var problem = new SantaFeAntProblem(); //var problem = new SymbolicRegressionProblem("Tower"); //var problem = new PalindromeProblem(); //var problem = new HardPalindromeProblem(); //var problem = new RoyalPairProblem(); //var problem = new EvenParityProblem(); var alg = new MctsSampler(problem, 25, random, 0, new GaussianThompsonSamplingPolicy(true)); //var alg = new ExhaustiveBreadthFirstSearch(problem, 17); //var alg = new AlternativesContextSampler(problem, random, 17, 4, (rand, numActions) => new RandomPolicy(rand, numActions)); //var alg = new ExhaustiveDepthFirstSearch(problem, 17); // var alg = new AlternativesSampler(problem, 17); // var alg = new RandomSearch(problem, random, 17); // var alg = new ExhaustiveRandomFirstSearch(problem, random, 17); alg.FoundNewBestSolution += (sentence, quality) => { bestQuality = quality; bestSentence = sentence; Console.WriteLine("{0,4} {1,7} {2}", alg.treeDepth, alg.treeSize, globalStatistics); }; alg.SolutionEvaluated += (sentence, quality) => { iterations++; globalStatistics.AddSentence(sentence, quality); if (iterations % 1000 == 0) { alg.PrintStats(); } if (iterations % 10000 == 0) { //Console.WriteLine("{0,10} {1,10:F5} {2,10:F5} {3}", iterations, bestQuality, quality, sentence); //Console.WriteLine("{0,4} {1,7} {2}", alg.treeDepth, alg.treeSize, globalStatistics); Console.WriteLine("{0,4} {1,7} {2}", alg.treeDepth, alg.treeSize, globalStatistics); } }; sw.Start(); alg.Run(maxIterations); sw.Stop(); Console.WriteLine("{0,10} Best soultion: {1,10:F5} {2}", iterations, bestQuality, bestSentence); Console.WriteLine("{0:F2} sec {1,10:F1} sols/sec {2,10:F1} ns/sol", sw.Elapsed.TotalSeconds, maxIterations / (double)sw.Elapsed.TotalSeconds, (double)sw.ElapsedMilliseconds * 1000 / maxIterations); } } }