using System; using System.Collections.Generic; using System.Data; using System.Diagnostics; using System.Globalization; using System.Linq; using System.Text; using System.Threading.Tasks; using HeuristicLab.Algorithms.Bandits; using HeuristicLab.Algorithms.Bandits.BanditPolicies; using HeuristicLab.Algorithms.Bandits.GrammarPolicies; using HeuristicLab.Algorithms.Bandits.Models; using HeuristicLab.Algorithms.GrammaticalOptimization; using HeuristicLab.Problems.GrammaticalOptimization; using HeuristicLab.Problems.GrammaticalOptimization.SymbReg; using BoltzmannExplorationPolicy = HeuristicLab.Algorithms.Bandits.BanditPolicies.BoltzmannExplorationPolicy; using EpsGreedyPolicy = HeuristicLab.Algorithms.Bandits.BanditPolicies.EpsGreedyPolicy; using RandomPolicy = HeuristicLab.Algorithms.Bandits.BanditPolicies.RandomPolicy; using UCTPolicy = HeuristicLab.Algorithms.Bandits.BanditPolicies.UCTPolicy; namespace Main { class Program { static void Main(string[] args) { CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture; //RunDemo(); RunGridTest(); } private static void RunGridTest() { int maxIterations = 70000; // for poly-10 with 50000 evaluations no successful try with hl yet //var globalRandom = new Random(31415); var localRandSeed = 31415; var reps = 30; var policyFactories = new Func[] { () => new RandomPolicy(), () => new ActiveLearningPolicy(), () => new EpsGreedyPolicy(0.01, (aInfo)=> aInfo.MaxReward, "max"), () => new EpsGreedyPolicy(0.05, (aInfo)=> aInfo.MaxReward, "max"), () => new EpsGreedyPolicy(0.1, (aInfo)=> aInfo.MaxReward, "max"), () => new EpsGreedyPolicy(0.2, (aInfo)=> aInfo.MaxReward, "max"), //() => new GaussianThompsonSamplingPolicy(), () => new GaussianThompsonSamplingPolicy(true), () => new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 10, 1)), () => new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 10, 1, 1)), //() => new BernoulliThompsonSamplingPolicy(), () => new GenericThompsonSamplingPolicy(new BernoulliModel(1, 1)), () => new EpsGreedyPolicy(0.01), () => new EpsGreedyPolicy(0.05), () => new EpsGreedyPolicy(0.1), () => new EpsGreedyPolicy(0.2), () => new EpsGreedyPolicy(0.5), () => new UCTPolicy(0.01), () => new UCTPolicy(0.05), () => new UCTPolicy(0.1), () => new UCTPolicy(0.5), () => new UCTPolicy(1), () => new UCTPolicy(2), () => new UCTPolicy( 5), () => new UCTPolicy( 10), () => new ModifiedUCTPolicy(0.01), () => new ModifiedUCTPolicy(0.05), () => new ModifiedUCTPolicy(0.1), () => new ModifiedUCTPolicy(0.5), () => new ModifiedUCTPolicy(1), () => new ModifiedUCTPolicy(2), () => new ModifiedUCTPolicy( 5), () => new ModifiedUCTPolicy( 10), () => new UCB1Policy(), () => new UCB1TunedPolicy(), () => new UCBNormalPolicy(), () => new BoltzmannExplorationPolicy(1), () => new BoltzmannExplorationPolicy(10), () => new BoltzmannExplorationPolicy(20), () => new BoltzmannExplorationPolicy(100), () => new BoltzmannExplorationPolicy(200), () => new BoltzmannExplorationPolicy(500), () => new ChernoffIntervalEstimationPolicy( 0.01), () => new ChernoffIntervalEstimationPolicy( 0.05), () => new ChernoffIntervalEstimationPolicy( 0.1), () => new ChernoffIntervalEstimationPolicy( 0.2), () => new ThresholdAscentPolicy(5, 0.01), () => new ThresholdAscentPolicy(5, 0.05), () => new ThresholdAscentPolicy(5, 0.1), () => new ThresholdAscentPolicy(5, 0.2), () => new ThresholdAscentPolicy(10, 0.01), () => new ThresholdAscentPolicy(10, 0.05), () => new ThresholdAscentPolicy(10, 0.1), () => new ThresholdAscentPolicy(10, 0.2), () => new ThresholdAscentPolicy(50, 0.01), () => new ThresholdAscentPolicy(50, 0.05), () => new ThresholdAscentPolicy(50, 0.1), () => new ThresholdAscentPolicy(50, 0.2), () => new ThresholdAscentPolicy(100, 0.01), () => new ThresholdAscentPolicy(100, 0.05), () => new ThresholdAscentPolicy(100, 0.1), () => new ThresholdAscentPolicy(100, 0.2), () => new ThresholdAscentPolicy(500, 0.01), () => new ThresholdAscentPolicy(500, 0.05), () => new ThresholdAscentPolicy(500, 0.1), () => new ThresholdAscentPolicy(500, 0.2), //() => new ThresholdAscentPolicy(5000, 0.01), //() => new ThresholdAscentPolicy(10000, 0.01), }; var instanceFactories = new Func>[] { //(rand) => Tuple.Create((IProblem)new SantaFeAntProblem(), 17), //(rand) => Tuple.Create((IProblem)new FindPhrasesProblem(rand, 10, numPhrases:5, phraseLen:3, numOptimalPhrases:5, numDecoyPhrases:0, correctReward:1, decoyReward:0, phrasesAsSets:false ), 15), //(rand) => Tuple.Create((IProblem)new FindPhrasesProblem(rand, 10, numPhrases:5, phraseLen:3, numOptimalPhrases:5, numDecoyPhrases:0, correctReward:1, decoyReward:0, phrasesAsSets:true ), 15), //(rand) => Tuple.Create((IProblem)new FindPhrasesProblem(rand, 10, numPhrases:5, phraseLen:3, numOptimalPhrases:5, numDecoyPhrases:200, correctReward:1, decoyReward:0.5, phrasesAsSets:false), 15), //(rand) => Tuple.Create((IProblem)new FindPhrasesProblem(rand, 10, numPhrases:5, phraseLen:3, numOptimalPhrases:5, numDecoyPhrases:200, correctReward:1, decoyReward:0.5, phrasesAsSets:true), 15), (rand) => Tuple.Create((IProblem)new SymbolicRegressionPoly10Problem(), 23) }; foreach (var instanceFactory in instanceFactories) { foreach (var useCanonical in new bool[] { true /*, false */}) { foreach (var randomTries in new int[] { 0 /*, 1, 10 /*, /* 5, 100 /*, 500, 1000 */}) { foreach (var policyFactory in policyFactories) { var myRandomTries = randomTries; var localRand = new Random(localRandSeed); var options = new ParallelOptions(); options.MaxDegreeOfParallelism = 4; Parallel.For(0, reps, options, (i) => { Random myLocalRand; lock (localRand) myLocalRand = new Random(localRand.Next()); int iterations = 0; var globalStatistics = new SentenceSetStatistics(); // var problem = new SymbolicRegressionPoly10Problem(); // var problem = new SantaFeAntProblem(); //var problem = new PalindromeProblem(); //var problem = new HardPalindromeProblem(); //var problem = new RoyalPairProblem(); //var problem = new EvenParityProblem(); // var alg = new MctsSampler(problem.Item1, problem.Item2, myLocalRand, myRandomTries, policy()); var instance = instanceFactory(myLocalRand); var problem = instance.Item1; var maxLen = instance.Item2; //var alg = new SequentialSearch(problem, maxLen, myLocalRand, myRandomTries, // new GenericGrammarPolicy(problem, policyFactory(), useCanonical)); var alg = new SequentialSearch(problem, maxLen, myLocalRand, myRandomTries, new GenericFunctionApproximationGrammarPolicy(problem, useCanonical)); //var alg = new ExhaustiveBreadthFirstSearch(problem, 25); //var alg = new AlternativesContextSampler(problem, 25); alg.SolutionEvaluated += (sentence, quality) => { iterations++; globalStatistics.AddSentence(sentence, quality); if (iterations % 1000 == 0) { Console.WriteLine("{0,3} {1,5} \"{2,25}\" {3} {4} {5}", i, myRandomTries, policyFactory(), useCanonical, problem.ToString(), globalStatistics); } }; alg.FoundNewBestSolution += (sentence, quality) => { //Console.WriteLine("{0,5} {1,25} {2} {3}", // myRandomTries, policyFactory(), useCanonical, // globalStatistics); }; alg.Run(maxIterations); }); } } } } } private static void RunDemo() { // TODO: implement bridge to HL-GP // TODO: unify MCTS, TD and ContextMCTS Solvers (stateInfos) // TODO: test with eps-greedy using max instead of average as value (seems to work well for symb-reg! explore further!) // TODO: separate value function from policy // TODO: in contextual MCTS store a bandit info for each node in the _graph_ and also update all bandit infos of all parents // TODO: exhaustive search with priority list // TODO: warum funktioniert die alte Implementierung von GaussianThompson besser für SantaFe als neue? Siehe Vergleich: alte vs. neue implementierung GaussianThompsonSampling // TODO: why does GaussianThompsonSampling work so well with MCTS for the artificial ant problem? // TODO: research thompson sampling for max bandit? // TODO: ausführlicher test von strategien für numCorrectPhrases-armed max bandit // TODO: verify TA implementation using example from the original paper // TODO: separate policy from MCTS tree data structure to allow sharing of information over disconnected parts of the tree (semantic equivalence) // TODO: implement thompson sampling for gaussian mixture models // TODO: implement inspection for MCTS (eventuell interactive command line für statistiken aus dem baum anzeigen) // TODO: implement ACO-style bandit policy // TODO: gleichzeitige modellierung von transformierter zielvariable (y, 1/y, log(y), exp(y), sqrt(y), ...) // TODO: vergleich bei complete-randomly möglichst kurze sätze generieren vs. einfach zufällig alternativen wählen // TODO: reward discounting (für veränderliche reward distributions über zeit). speziellen unit-test dafür erstellen // TODO: constant optimization int maxIterations = 1000000; int iterations = 0; var sw = new Stopwatch(); var globalStatistics = new SentenceSetStatistics(); var random = new Random(); //var problem = new RoyalSequenceProblem(random, 10, 30, 2, 1, 0); // var phraseLen = 3; // var numPhrases = 5; // var problem = new RoyalPhraseSequenceProblem(random, 10, numPhrases, phraseLen: phraseLen, numCorrectPhrases: 1, correctReward: 1, incorrectReward: 0.0, phrasesAsSets: false); //var phraseLen = 3; //var numPhrases = 5; //var problem = new FindPhrasesProblem(random, 10, numPhrases, phraseLen, numOptimalPhrases: numPhrases, numDecoyPhrases: 0, correctReward: 1.0, decoyReward: 0, phrasesAsSets: false); // good results for symb-reg // prev results: e.g. 10 randomtries and EpsGreedyPolicy(0.2, (aInfo)=>aInfo.MaxReward) // 2015 01 19: grid test with canonical states: // - EpsGreedyPolicy(0.20,max) // - GenericThompsonSamplingPolicy("") // - UCTPolicy(0.10) (5 of 5 runs, 35000 iters avg.), 10 successful runs of 10 with rand-tries 0, bei 40000 iters 9 / 10, bei 30000 1 / 10 // 2015 01 22: symb-reg: grid test on find-phrases problem showed good results for UCB1TunedPolicy and SequentialSearch with canonical states // - symb-reg: consistent results with UCB1Tuned. finds optimal solution in ~50k iters (new GenericGrammarPolicy(problem, new UCB1TunedPolicy(), true)); // 2015 01 23: grid test with canonical states: // - UCTPolicy(0.10) und UCBNormalPolicy 10/10 optimale Lösungen bei max. 50k iters, etwas schlechter: generic-thompson with variable sigma und bolzmannexploration (100) // good results for artificial ant: // prev results: // - var alg = new MctsSampler(problem, 17, random, 1, (rand, numActions) => new ThresholdAscentPolicy(numActions, 500, 0.01)); // - GaussianModelWithUnknownVariance (and Q= 0.99-quantil) also works well for Ant // 2015 01 19: grid test with canonical states (non-canonical slightly worse) // - ant: Threshold Ascent (best 100, 0.01; all variants relatively good) // - ant: Policies where the variance has a large weight compared to the mean? (Gaussian(compatible), Gaussian with fixed variance, UCT with large c, alle TA) // - ant: UCB1Tuned with canonical states also works very well for the artificial ant! constistent solutions in less than 10k iters var problem = new SymbolicRegressionPoly10Problem(); //var problem = new SantaFeAntProblem(); //var problem = new SymbolicRegressionProblem(random, "Tower"); //var problem = new PalindromeProblem(); //var problem = new HardPalindromeProblem(); //var problem = new RoyalPairProblem(); //var problem = new EvenParityProblem(); // symbreg length = 11 q = 0.824522210419616 //var alg = new MctsSampler(problem, 23, random, 0, new BoltzmannExplorationPolicy(100)); //var alg = new MctsSampler(problem, 23, random, 0, new EpsGreedyPolicy(0.1)); //var alg = new SequentialSearch(problem, 23, random, 0, // new HeuristicLab.Algorithms.Bandits.GrammarPolicies.QLearningGrammarPolicy(problem, new BoltzmannExplorationPolicy(10), // 1, 1, true)); //var alg = new SequentialSearch(problem, 23, random, 0, // new HeuristicLab.Algorithms.Bandits.GrammarPolicies.GenericContextualGrammarPolicy(problem, new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 10, 1, 1)), true)); var alg = new SequentialSearch(problem, 23, random, 0, new HeuristicLab.Algorithms.Bandits.GrammarPolicies.GenericFunctionApproximationGrammarPolicy(problem, true)); //var alg = new MctsQLearningSampler(problem, sentenceLen, random, 0, null); //var alg = new MctsQLearningSampler(problem, 30, random, 0, new EpsGreedyPolicy(0.2)); //var alg = new MctsContextualSampler(problem, 23, random, 0); // must visit each canonical solution only once //var alg = new TemporalDifferenceTreeSearchSampler(problem, 30, random, 1); //var alg = new ExhaustiveBreadthFirstSearch(problem, 7); //var alg = new AlternativesContextSampler(problem, random, 17, 4, (rand, numActions) => new RandomPolicy(rand, numActions)); //var alg = new ExhaustiveDepthFirstSearch(problem, 17); // var alg = new AlternativesSampler(problem, 17); // var alg = new RandomSearch(problem, random, 17); //var alg = new ExhaustiveRandomFirstSearch(problem, random, 17); alg.FoundNewBestSolution += (sentence, quality) => { //Console.WriteLine("{0}", globalStatistics); //Console.ReadLine(); }; alg.SolutionEvaluated += (sentence, quality) => { iterations++; globalStatistics.AddSentence(sentence, quality); if (iterations % 1000 == 0) { if (iterations % 10000 == 0) Console.Clear(); Console.SetCursorPosition(0, 0); alg.PrintStats(); } //Console.WriteLine(sentence); //if (iterations % 10000 == 0) { // Console.WriteLine("{0}", globalStatistics); //} }; sw.Start(); alg.Run(maxIterations); sw.Stop(); Console.Clear(); alg.PrintStats(); Console.WriteLine(globalStatistics); Console.WriteLine("{0:F2} sec {1,10:F1} sols/sec {2,10:F1} ns/sol", sw.Elapsed.TotalSeconds, maxIterations / (double)sw.Elapsed.TotalSeconds, (double)sw.ElapsedMilliseconds * 1000 / maxIterations); } } }