using System; using System.Collections.Generic; using System.Diagnostics; using System.Globalization; using System.Runtime.Remoting.Messaging; using System.Text; using System.Threading; using System.Threading.Tasks; using HeuristicLab.Algorithms.Bandits; using HeuristicLab.Algorithms.Bandits.BanditPolicies; using HeuristicLab.Algorithms.Bandits.GrammarPolicies; using HeuristicLab.Algorithms.Bandits.Models; using HeuristicLab.Algorithms.GeneticProgramming; using HeuristicLab.Algorithms.GrammaticalOptimization; using HeuristicLab.Problems.GrammaticalOptimization; using HeuristicLab.Problems.GrammaticalOptimization.SymbReg; using BoltzmannExplorationPolicy = HeuristicLab.Algorithms.Bandits.BanditPolicies.BoltzmannExplorationPolicy; using EpsGreedyPolicy = HeuristicLab.Algorithms.Bandits.BanditPolicies.EpsGreedyPolicy; using IProblem = HeuristicLab.Problems.GrammaticalOptimization.IProblem; using RandomPolicy = HeuristicLab.Algorithms.Bandits.BanditPolicies.RandomPolicy; using UCTPolicy = HeuristicLab.Algorithms.Bandits.BanditPolicies.UCTPolicy; namespace Main { class Program { static void Main(string[] args) { CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture; //RunDemo(); //RunGpDemo(); //RunGridTest(); RunGpGridTest(); } private static void RunGridTest() { int maxIterations = 200000; // for poly-10 with 50000 evaluations no successful try with hl yet //var globalRandom = new Random(31415); var localRandSeed = 31415; var reps = 20; var policyFactories = new Func[] { () => new RandomPolicy(), () => new ActiveLearningPolicy(), () => new EpsGreedyPolicy(0.01, (aInfo)=> aInfo.MaxReward, "max"), () => new EpsGreedyPolicy(0.05, (aInfo)=> aInfo.MaxReward, "max"), () => new EpsGreedyPolicy(0.1, (aInfo)=> aInfo.MaxReward, "max"), () => new EpsGreedyPolicy(0.2, (aInfo)=> aInfo.MaxReward, "max"), //() => new GaussianThompsonSamplingPolicy(), () => new GaussianThompsonSamplingPolicy(true), () => new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 10, 1)), () => new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 10, 1, 1)), //() => new BernoulliThompsonSamplingPolicy(), () => new GenericThompsonSamplingPolicy(new BernoulliModel(1, 1)), () => new EpsGreedyPolicy(0.01), () => new EpsGreedyPolicy(0.05), () => new EpsGreedyPolicy(0.1), () => new EpsGreedyPolicy(0.2), () => new EpsGreedyPolicy(0.5), () => new UCTPolicy(0.01), () => new UCTPolicy(0.05), () => new UCTPolicy(0.1), () => new UCTPolicy(0.5), () => new UCTPolicy(1), () => new UCTPolicy(2), () => new UCTPolicy( 5), () => new UCTPolicy( 10), () => new ModifiedUCTPolicy(0.01), () => new ModifiedUCTPolicy(0.05), () => new ModifiedUCTPolicy(0.1), () => new ModifiedUCTPolicy(0.5), () => new ModifiedUCTPolicy(1), () => new ModifiedUCTPolicy(2), () => new ModifiedUCTPolicy( 5), () => new ModifiedUCTPolicy( 10), () => new UCB1Policy(), () => new UCB1TunedPolicy(), () => new UCBNormalPolicy(), () => new BoltzmannExplorationPolicy(1), () => new BoltzmannExplorationPolicy(10), () => new BoltzmannExplorationPolicy(20), () => new BoltzmannExplorationPolicy(100), () => new BoltzmannExplorationPolicy(200), () => new BoltzmannExplorationPolicy(500), () => new ChernoffIntervalEstimationPolicy( 0.01), () => new ChernoffIntervalEstimationPolicy( 0.05), () => new ChernoffIntervalEstimationPolicy( 0.1), () => new ChernoffIntervalEstimationPolicy( 0.2), () => new ThresholdAscentPolicy(5, 0.01), () => new ThresholdAscentPolicy(5, 0.05), () => new ThresholdAscentPolicy(5, 0.1), () => new ThresholdAscentPolicy(5, 0.2), () => new ThresholdAscentPolicy(10, 0.01), () => new ThresholdAscentPolicy(10, 0.05), () => new ThresholdAscentPolicy(10, 0.1), () => new ThresholdAscentPolicy(10, 0.2), () => new ThresholdAscentPolicy(50, 0.01), () => new ThresholdAscentPolicy(50, 0.05), () => new ThresholdAscentPolicy(50, 0.1), () => new ThresholdAscentPolicy(50, 0.2), () => new ThresholdAscentPolicy(100, 0.01), () => new ThresholdAscentPolicy(100, 0.05), () => new ThresholdAscentPolicy(100, 0.1), () => new ThresholdAscentPolicy(100, 0.2), () => new ThresholdAscentPolicy(500, 0.01), () => new ThresholdAscentPolicy(500, 0.05), () => new ThresholdAscentPolicy(500, 0.1), () => new ThresholdAscentPolicy(500, 0.2), //() => new ThresholdAscentPolicy(5000, 0.01), //() => new ThresholdAscentPolicy(10000, 0.01), }; var instanceFactories = new Func>[] { (rand) => Tuple.Create((IProblem)new SantaFeAntProblem(), 17), //(rand) => Tuple.Create((IProblem)new FindPhrasesProblem(rand, 10, numPhrases:5, phraseLen:3, numOptimalPhrases:5, numDecoyPhrases:0, correctReward:1, decoyReward:0, phrasesAsSets:false ), 15), //(rand) => Tuple.Create((IProblem)new FindPhrasesProblem(rand, 10, numPhrases:5, phraseLen:3, numOptimalPhrases:5, numDecoyPhrases:0, correctReward:1, decoyReward:0, phrasesAsSets:true ), 15), //(rand) => Tuple.Create((IProblem)new FindPhrasesProblem(rand, 10, numPhrases:5, phraseLen:3, numOptimalPhrases:5, numDecoyPhrases:200, correctReward:1, decoyReward:0.5, phrasesAsSets:false), 15), //(rand) => Tuple.Create((IProblem)new FindPhrasesProblem(rand, 10, numPhrases:5, phraseLen:3, numOptimalPhrases:5, numDecoyPhrases:200, correctReward:1, decoyReward:0.5, phrasesAsSets:true), 15), (rand) => Tuple.Create((IProblem)new SymbolicRegressionPoly10Problem(), 23) }; foreach (var instanceFactory in instanceFactories) { foreach (var useCanonical in new bool[] { true, false }) { foreach (var randomTries in new int[] { 0, 1, 10 /*, /* 5, 100 /*, 500, 1000 */}) { foreach (var policyFactory in policyFactories) { var myRandomTries = randomTries; var localRand = new Random(localRandSeed); var options = new ParallelOptions(); options.MaxDegreeOfParallelism = 4; Parallel.For(0, reps, options, (i) => { Random myLocalRand; lock (localRand) myLocalRand = new Random(localRand.Next()); int iterations = 0; var globalStatistics = new SentenceSetStatistics(); // var problem = new SymbolicRegressionPoly10Problem(); // var problem = new SantaFeAntProblem(); //var problem = new PalindromeProblem(); //var problem = new HardPalindromeProblem(); //var problem = new RoyalPairProblem(); //var problem = new EvenParityProblem(); // var alg = new MctsSampler(problem.Item1, problem.Item2, myLocalRand, myRandomTries, policy()); var instance = instanceFactory(myLocalRand); var problem = instance.Item1; var maxLen = instance.Item2; var alg = new SequentialSearch(problem, maxLen, myLocalRand, myRandomTries, new GenericGrammarPolicy(problem, policyFactory(), useCanonical)); // var alg = new SequentialSearch(problem, maxLen, myLocalRand, // myRandomTries, // new GenericFunctionApproximationGrammarPolicy(problem, // useCanonical)); //var alg = new ExhaustiveBreadthFirstSearch(problem, 25); //var alg = new AlternativesContextSampler(problem, 25); alg.SolutionEvaluated += (sentence, quality) => { iterations++; globalStatistics.AddSentence(sentence, quality); if (iterations % 1000 == 0) { Console.WriteLine("{0,3} {1,5} \"{2,25}\" {3} {4} {5}", i, myRandomTries, policyFactory(), useCanonical, problem.ToString(), globalStatistics); } }; alg.FoundNewBestSolution += (sentence, quality) => { //Console.WriteLine("{0,5} {1,25} {2} {3}", // myRandomTries, policyFactory(), useCanonical, // globalStatistics); }; alg.Run(maxIterations); }); } } } } } private static void RunDemo() { // TODO: unify MCTS, TD and ContextMCTS Solvers (stateInfos) // TODO: test with eps-greedy using max instead of average as value (seems to work well for symb-reg! explore further!) // TODO: separate value function from policy // TODO: warum funktioniert die alte Implementierung von GaussianThompson besser für SantaFe als neue? Siehe Vergleich: alte vs. neue implementierung GaussianThompsonSampling // TODO: why does GaussianThompsonSampling work so well with MCTS for the artificial ant problem? // TODO: research thompson sampling for max bandit? // TODO: verify TA implementation using example from the original paper // TODO: implement thompson sampling for gaussian mixture models // TODO: gleichzeitige modellierung von transformierter zielvariable (y, 1/y, log(y), exp(y), sqrt(y), ...) // TODO: vergleich bei complete-randomly möglichst kurze sätze generieren vs. einfach zufällig alternativen wählen // TODO: reward discounting (für veränderliche reward distributions über zeit). speziellen unit-test dafür erstellen // TODO: constant optimization int maxIterations = 1000000; int iterations = 0; var sw = new Stopwatch(); var globalStatistics = new SentenceSetStatistics(); var random = new Random(); //var problem = new RoyalSequenceProblem(random, 10, 30, 2, 1, 0); // var phraseLen = 3; // var numPhrases = 5; // var problem = new RoyalPhraseSequenceProblem(random, 10, numPhrases, phraseLen: phraseLen, numCorrectPhrases: 1, correctReward: 1, incorrectReward: 0.0, phrasesAsSets: false); //var phraseLen = 3; //var numPhrases = 5; //var problem = new FindPhrasesProblem(random, 10, numPhrases, phraseLen, numOptimalPhrases: numPhrases, numDecoyPhrases: 0, correctReward: 1.0, decoyReward: 0, phrasesAsSets: false); // good results for symb-reg // prev results: e.g. 10 randomtries and EpsGreedyPolicy(0.2, (aInfo)=>aInfo.MaxReward) // 2015 01 19: grid test with canonical states: // - EpsGreedyPolicy(0.20,max) // - GenericThompsonSamplingPolicy("") // - UCTPolicy(0.10) (5 of 5 runs, 35000 iters avg.), 10 successful runs of 10 with rand-tries 0, bei 40000 iters 9 / 10, bei 30000 1 / 10 // 2015 01 22: symb-reg: grid test on find-phrases problem showed good results for UCB1TunedPolicy and SequentialSearch with canonical states // - symb-reg: consistent results with UCB1Tuned. finds optimal solution in ~50k iters (new GenericGrammarPolicy(problem, new UCB1TunedPolicy(), true)); // 2015 01 23: grid test with canonical states: // - UCTPolicy(0.10) und UCBNormalPolicy 10/10 optimale Lösungen bei max. 50k iters, etwas schlechter: generic-thompson with variable sigma und bolzmannexploration (100) // good results for artificial ant: // prev results: // - var alg = new MctsSampler(problem, 17, random, 1, (rand, numActions) => new ThresholdAscentPolicy(numActions, 500, 0.01)); // - GaussianModelWithUnknownVariance (and Q= 0.99-quantil) also works well for Ant // 2015 01 19: grid test with canonical states (non-canonical slightly worse) // - ant: Threshold Ascent (best 100, 0.01; all variants relatively good) // - ant: Policies where the variance has a large weight compared to the mean? (Gaussian(compatible), Gaussian with fixed variance, UCT with large c, alle TA) // - ant: UCB1Tuned with canonical states also works very well for the artificial ant! constistent solutions in less than 10k iters //var problem = new SymbolicRegressionPoly10Problem(); //var problem = new SantaFeAntProblem(); var problem = new SymbolicRegressionProblem(random, "Breiman"); //var problem = new PalindromeProblem(); //var problem = new HardPalindromeProblem(); //var problem = new RoyalPairProblem(); //var problem = new EvenParityProblem(); // symbreg length = 11 q = 0.824522210419616 //var alg = new MctsSampler(problem, 23, random, 0, new BoltzmannExplorationPolicy(100)); //var alg = new MctsSampler(problem, 23, random, 0, new EpsGreedyPolicy(0.1)); //var alg = new SequentialSearch(problem, 23, random, 0, // new HeuristicLab.Algorithms.Bandits.GrammarPolicies.QLearningGrammarPolicy(problem, new BoltzmannExplorationPolicy(10), // 1, 1, true)); //var alg = new SequentialSearch(problem, 23, random, 0, // new HeuristicLab.Algorithms.Bandits.GrammarPolicies.GenericContextualGrammarPolicy(problem, new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 10, 1, 1)), true)); var alg = new SequentialSearch(problem, 30, random, 0, new HeuristicLab.Algorithms.Bandits.GrammarPolicies.GenericFunctionApproximationGrammarPolicy(problem, true)); //var alg = new MctsQLearningSampler(problem, sentenceLen, random, 0, null); //var alg = new MctsQLearningSampler(problem, 30, random, 0, new EpsGreedyPolicy(0.2)); //var alg = new MctsContextualSampler(problem, 23, random, 0); // must visit each canonical solution only once //var alg = new TemporalDifferenceTreeSearchSampler(problem, 30, random, 1); //var alg = new ExhaustiveBreadthFirstSearch(problem, 7); //var alg = new AlternativesContextSampler(problem, random, 17, 4, (rand, numActions) => new RandomPolicy(rand, numActions)); //var alg = new ExhaustiveDepthFirstSearch(problem, 17); // var alg = new AlternativesSampler(problem, 17); // var alg = new RandomSearch(problem, random, 17); //var alg = new ExhaustiveRandomFirstSearch(problem, random, 17); alg.FoundNewBestSolution += (sentence, quality) => { //Console.WriteLine("{0}", globalStatistics); //Console.ReadLine(); }; alg.SolutionEvaluated += (sentence, quality) => { iterations++; globalStatistics.AddSentence(sentence, quality); //if (iterations % 100 == 0) { // if (iterations % 10000 == 0) Console.Clear(); // Console.SetCursorPosition(0, 0); // alg.PrintStats(); //} //Console.WriteLine(sentence); if (iterations % 100 == 0) { Console.WriteLine("{0}", globalStatistics); } }; sw.Start(); alg.Run(maxIterations); sw.Stop(); Console.Clear(); alg.PrintStats(); Console.WriteLine(globalStatistics); Console.WriteLine("{0:F2} sec {1,10:F1} sols/sec {2,10:F1} ns/sol", sw.Elapsed.TotalSeconds, maxIterations / (double)sw.Elapsed.TotalSeconds, (double)sw.ElapsedMilliseconds * 1000 / maxIterations); } public static void RunGpDemo() { int iterations = 0; const int seed = 31415; const int maxIterations = 100000; //var prob = new SymbolicRegressionProblem(new Random(31415), "Tower"); var prob = new SymbolicRegressionPoly10Problem(); var sgp = new OffspringSelectionGP(prob, new Random(seed), true); RunGP(sgp, prob, 200000, 500, 0.15, 50); } private static void RunGpGridTest() { const int nReps = 20; const int seed = 31415; //const int maxIters = 50000; var rand = new Random(seed); var problemFactories = new Func>[] { () => Tuple.Create(50000, 32, (ISymbolicExpressionTreeProblem)new PermutationProblem()), () => Tuple.Create(50000, 32, (ISymbolicExpressionTreeProblem)new RoyalPairProblem()), () => Tuple.Create(50000, 32,(ISymbolicExpressionTreeProblem)new RoyalSymbolProblem()), () => Tuple.Create(50000, 64, (ISymbolicExpressionTreeProblem)new RoyalPairProblem()), () => Tuple.Create(50000, 64,(ISymbolicExpressionTreeProblem)new RoyalSymbolProblem()), () => Tuple.Create(50000, 128, (ISymbolicExpressionTreeProblem)new RoyalPairProblem()), () => Tuple.Create(50000, 128,(ISymbolicExpressionTreeProblem)new RoyalSymbolProblem()), () => Tuple.Create(50000, 256, (ISymbolicExpressionTreeProblem)new RoyalPairProblem()), () => Tuple.Create(50000, 256,(ISymbolicExpressionTreeProblem)new RoyalSymbolProblem()), //() => new RoyalPairProblem(), //() => new FindPhrasesProblem(rand, 20, 5, 3, 5, 0, 1, 0, true), //() => new FindPhrasesProblem(rand, 20, 5, 3, 5, 0, 1, 0, false), //() => new FindPhrasesProblem(rand, 20, 5, 3, 5, 50, 1, 0.8, false), }; foreach (var popSize in new int[] { 100 /*, 250, 500, 1000, 2500, 5000, 10000 */ }) { foreach (var mutationRate in new double[] { /* 0.05, /* 0.10, */ 0.15, /* 0.25, 0.3 */}) { // skip experiments that are already done foreach (var problemFactory in problemFactories) { for (int i = 0; i < nReps; i++) { { var solverSeed = rand.Next(); var tupel = problemFactory(); var maxIters = tupel.Item1; var maxSize = tupel.Item2; var prob = tupel.Item3; var sgp = new StandardGP(prob, new Random(solverSeed)); RunGP(sgp, prob, maxIters, popSize, mutationRate, maxSize); } //{ // var prob = problemFactory(); // var osgp = new OffspringSelectionGP(prob, new Random(solverSeed)); // RunGP(osgp, prob, maxIters, popSize, mutationRate, maxSize); //} } } } } } private static void RunGP(IGPSolver gp, ISymbolicExpressionTreeProblem prob, int maxIters, int popSize, double mutationRate, int maxSize) { int iterations = 0; var globalStatistics = new SentenceSetStatistics(prob.BestKnownQuality(maxSize)); var gpName = gp.GetType().Name; var probName = prob.GetType().Name; gp.SolutionEvaluated += (sentence, quality) => { iterations++; globalStatistics.AddSentence(sentence, quality); if (iterations % 100 == 0) { Console.WriteLine("\"{0,25}\" {1} {2:N2} {3} \"{4,25}\" {5}", gpName, popSize, mutationRate, maxSize, probName, globalStatistics); } }; gp.PopulationSize = popSize; gp.MutationRate = mutationRate; gp.MaxSolutionSize = maxSize + 2; gp.MaxSolutionDepth = maxSize + 2; gp.Run(maxIters); } } }