using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Globalization;
using System.Runtime.Remoting.Messaging;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using HeuristicLab.Algorithms.Bandits;
using HeuristicLab.Algorithms.Bandits.BanditPolicies;
using HeuristicLab.Algorithms.Bandits.GrammarPolicies;
using HeuristicLab.Algorithms.Bandits.Models;
using HeuristicLab.Algorithms.GeneticProgramming;
using HeuristicLab.Algorithms.GrammaticalOptimization;
using HeuristicLab.Problems.GrammaticalOptimization;
using HeuristicLab.Problems.GrammaticalOptimization.SymbReg;
using BoltzmannExplorationPolicy = HeuristicLab.Algorithms.Bandits.BanditPolicies.BoltzmannExplorationPolicy;
using EpsGreedyPolicy = HeuristicLab.Algorithms.Bandits.BanditPolicies.EpsGreedyPolicy;
using IProblem = HeuristicLab.Problems.GrammaticalOptimization.IProblem;
using RandomPolicy = HeuristicLab.Algorithms.Bandits.BanditPolicies.RandomPolicy;
using UCTPolicy = HeuristicLab.Algorithms.Bandits.BanditPolicies.UCTPolicy;

namespace Main {
  class Program {
    static void Main(string[] args) {
      CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;

      //RunDemo();
      //RunGpDemo();
      //RunGridTest();
      RunGpGridTest();
    }

    private static void RunGridTest() {
      int maxIterations = 200000; // for poly-10 with 50000 evaluations no successful try with hl yet
      //var globalRandom = new Random(31415);
      var localRandSeed = 31415;
      var reps = 20;

      var policyFactories = new Func<IBanditPolicy>[]
        {
         () => new RandomPolicy(), 
          () => new ActiveLearningPolicy(),  
         () => new EpsGreedyPolicy(0.01, (aInfo)=> aInfo.MaxReward, "max"), 
         () => new EpsGreedyPolicy(0.05, (aInfo)=> aInfo.MaxReward, "max"), 
         () => new EpsGreedyPolicy(0.1, (aInfo)=> aInfo.MaxReward, "max"), 
         () => new EpsGreedyPolicy(0.2, (aInfo)=> aInfo.MaxReward, "max"), 
         //() => new GaussianThompsonSamplingPolicy(), 
         () => new GaussianThompsonSamplingPolicy(true), 
         () => new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 10, 1)), 
         () => new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 10, 1, 1)), 
         //() => new BernoulliThompsonSamplingPolicy(),
         () => new GenericThompsonSamplingPolicy(new BernoulliModel(1, 1)), 
         () => new EpsGreedyPolicy(0.01), 
         () => new EpsGreedyPolicy(0.05), 
         () => new EpsGreedyPolicy(0.1), 
         () => new EpsGreedyPolicy(0.2), 
         () => new EpsGreedyPolicy(0.5), 
         () => new UCTPolicy(0.01),
         () => new UCTPolicy(0.05),
         () => new UCTPolicy(0.1),
         () => new UCTPolicy(0.5),
         () => new UCTPolicy(1),
         () => new UCTPolicy(2),
         () => new UCTPolicy( 5),
         () => new UCTPolicy( 10),
         () => new ModifiedUCTPolicy(0.01),
         () => new ModifiedUCTPolicy(0.05),
         () => new ModifiedUCTPolicy(0.1),
         () => new ModifiedUCTPolicy(0.5),
         () => new ModifiedUCTPolicy(1),
         () => new ModifiedUCTPolicy(2),
         () => new ModifiedUCTPolicy( 5),
         () => new ModifiedUCTPolicy( 10),
         () => new UCB1Policy(), 
         () => new UCB1TunedPolicy(), 
         () => new UCBNormalPolicy(), 
         () => new BoltzmannExplorationPolicy(1),
         () => new BoltzmannExplorationPolicy(10),
         () => new BoltzmannExplorationPolicy(20),
         () => new BoltzmannExplorationPolicy(100),
         () => new BoltzmannExplorationPolicy(200),
         () => new BoltzmannExplorationPolicy(500),
         () => new ChernoffIntervalEstimationPolicy( 0.01), 
         () => new ChernoffIntervalEstimationPolicy( 0.05),
         () => new ChernoffIntervalEstimationPolicy( 0.1),
         () => new ChernoffIntervalEstimationPolicy( 0.2),
         () => new ThresholdAscentPolicy(5, 0.01), 
         () => new ThresholdAscentPolicy(5, 0.05),
         () => new ThresholdAscentPolicy(5, 0.1),
         () => new ThresholdAscentPolicy(5, 0.2),
         () => new ThresholdAscentPolicy(10, 0.01), 
         () => new ThresholdAscentPolicy(10, 0.05),
         () => new ThresholdAscentPolicy(10, 0.1),
         () => new ThresholdAscentPolicy(10, 0.2),
         () => new ThresholdAscentPolicy(50, 0.01), 
         () => new ThresholdAscentPolicy(50, 0.05),
         () => new ThresholdAscentPolicy(50, 0.1),
         () => new ThresholdAscentPolicy(50, 0.2),
         () => new ThresholdAscentPolicy(100, 0.01), 
         () => new ThresholdAscentPolicy(100, 0.05),
         () => new ThresholdAscentPolicy(100, 0.1),
         () => new ThresholdAscentPolicy(100, 0.2),
         () => new ThresholdAscentPolicy(500, 0.01), 
         () => new ThresholdAscentPolicy(500, 0.05),
         () => new ThresholdAscentPolicy(500, 0.1),
         () => new ThresholdAscentPolicy(500, 0.2),
         //() => new ThresholdAscentPolicy(5000, 0.01), 
         //() => new ThresholdAscentPolicy(10000, 0.01), 
        };

      var instanceFactories = new Func<Random, Tuple<IProblem, int>>[]
      {
        (rand) => Tuple.Create((IProblem)new SantaFeAntProblem(), 17),
        //(rand) => Tuple.Create((IProblem)new FindPhrasesProblem(rand, 10, numPhrases:5, phraseLen:3, numOptimalPhrases:5, numDecoyPhrases:0, correctReward:1, decoyReward:0, phrasesAsSets:false ), 15),
        //(rand) => Tuple.Create((IProblem)new FindPhrasesProblem(rand, 10, numPhrases:5, phraseLen:3, numOptimalPhrases:5, numDecoyPhrases:0, correctReward:1, decoyReward:0, phrasesAsSets:true ), 15),
        //(rand) => Tuple.Create((IProblem)new FindPhrasesProblem(rand, 10, numPhrases:5, phraseLen:3, numOptimalPhrases:5, numDecoyPhrases:200, correctReward:1, decoyReward:0.5, phrasesAsSets:false), 15),
        //(rand) => Tuple.Create((IProblem)new FindPhrasesProblem(rand, 10, numPhrases:5, phraseLen:3, numOptimalPhrases:5, numDecoyPhrases:200, correctReward:1, decoyReward:0.5, phrasesAsSets:true), 15),
        (rand) => Tuple.Create((IProblem)new SymbolicRegressionPoly10Problem(), 23)
      };

      foreach (var instanceFactory in instanceFactories) {
        foreach (var useCanonical in new bool[] { true, false }) {
          foreach (var randomTries in new int[] { 0, 1, 10 /*, /* 5, 100 /*, 500, 1000 */}) {
            foreach (var policyFactory in policyFactories) {
              var myRandomTries = randomTries;
              var localRand = new Random(localRandSeed);
              var options = new ParallelOptions();
              options.MaxDegreeOfParallelism = 4;
              Parallel.For(0, reps, options, (i) => {
                Random myLocalRand;
                lock (localRand)
                  myLocalRand = new Random(localRand.Next());

                int iterations = 0;
                var globalStatistics = new SentenceSetStatistics();

                // var problem = new SymbolicRegressionPoly10Problem();
                // var problem = new SantaFeAntProblem();
                //var problem = new PalindromeProblem();
                //var problem = new HardPalindromeProblem();
                //var problem = new RoyalPairProblem();
                //var problem = new EvenParityProblem();
                // var alg = new MctsSampler(problem.Item1, problem.Item2, myLocalRand, myRandomTries, policy()); 
                var instance = instanceFactory(myLocalRand);
                var problem = instance.Item1;
                var maxLen = instance.Item2;
                var alg = new SequentialSearch(problem, maxLen, myLocalRand, myRandomTries,
                  new GenericGrammarPolicy(problem, policyFactory(), useCanonical));
                // var alg = new SequentialSearch(problem, maxLen, myLocalRand,
                //   myRandomTries,
                //   new GenericFunctionApproximationGrammarPolicy(problem,
                //     useCanonical));
                //var alg = new ExhaustiveBreadthFirstSearch(problem, 25);
                //var alg = new AlternativesContextSampler(problem, 25);

                alg.SolutionEvaluated += (sentence, quality) => {
                  iterations++;
                  globalStatistics.AddSentence(sentence, quality);
                  if (iterations % 1000 == 0) {
                    Console.WriteLine("{0,3} {1,5} \"{2,25}\" {3} {4} {5}", i, myRandomTries, policyFactory(), useCanonical, problem.ToString(), globalStatistics);
                  }
                };
                alg.FoundNewBestSolution += (sentence, quality) => {
                  //Console.WriteLine("{0,5} {1,25} {2} {3}",
                  //  myRandomTries, policyFactory(), useCanonical,
                  //  globalStatistics);
                };

                alg.Run(maxIterations);
              });
            }
          }
        }
      }
    }

    private static void RunDemo() {
      // TODO: unify MCTS, TD and ContextMCTS Solvers (stateInfos)
      // TODO: test with eps-greedy using max instead of average as value (seems to work well for symb-reg! explore further!)
      // TODO: separate value function from policy
      // TODO: warum funktioniert die alte Implementierung von GaussianThompson besser für SantaFe als neue? Siehe Vergleich: alte vs. neue implementierung GaussianThompsonSampling
      // TODO: why does GaussianThompsonSampling work so well with MCTS for the artificial ant problem?
      // TODO: research thompson sampling for max bandit?
      // TODO: verify TA implementation using example from the original paper      
      // TODO: implement thompson sampling for gaussian mixture models
      // TODO: gleichzeitige modellierung von transformierter zielvariable (y, 1/y, log(y), exp(y), sqrt(y), ...)
      // TODO: vergleich bei complete-randomly möglichst kurze sätze generieren vs. einfach zufällig alternativen wählen
      // TODO: reward discounting (für veränderliche reward distributions über zeit). speziellen unit-test dafür erstellen
      // TODO: constant optimization 


      int maxIterations = 1000000;
      int iterations = 0;
      var sw = new Stopwatch();

      var globalStatistics = new SentenceSetStatistics();
      var random = new Random();


      //var problem = new RoyalSequenceProblem(random, 10, 30, 2, 1, 0);
      // var phraseLen = 3;
      // var numPhrases = 5;
      // var problem = new RoyalPhraseSequenceProblem(random, 10, numPhrases, phraseLen: phraseLen, numCorrectPhrases: 1, correctReward: 1, incorrectReward: 0.0, phrasesAsSets: false);

      //var phraseLen = 3;
      //var numPhrases = 5;
      //var problem = new FindPhrasesProblem(random, 10, numPhrases, phraseLen, numOptimalPhrases: numPhrases, numDecoyPhrases: 0, correctReward: 1.0, decoyReward: 0, phrasesAsSets: false);

      // good results for symb-reg
      // prev results: e.g. 10 randomtries and EpsGreedyPolicy(0.2, (aInfo)=>aInfo.MaxReward)
      // 2015 01 19: grid test with canonical states: 
      // - EpsGreedyPolicy(0.20,max)
      // - GenericThompsonSamplingPolicy("")
      // - UCTPolicy(0.10) (5 of 5 runs, 35000 iters avg.), 10 successful runs of 10 with rand-tries 0, bei 40000 iters 9 / 10, bei 30000 1 / 10
      // 2015 01 22: symb-reg: grid test on find-phrases problem showed good results for UCB1TunedPolicy and SequentialSearch with canonical states
      // - symb-reg: consistent results with UCB1Tuned. finds optimal solution in ~50k iters (new GenericGrammarPolicy(problem, new UCB1TunedPolicy(), true));
      // 2015 01 23: grid test with canonical states:
      // - UCTPolicy(0.10) und UCBNormalPolicy 10/10 optimale Lösungen bei max. 50k iters, etwas schlechter: generic-thompson with variable sigma und bolzmannexploration (100)


      // good results for artificial ant:
      // prev results: 
      // - var alg = new MctsSampler(problem, 17, random, 1, (rand, numActions) => new ThresholdAscentPolicy(numActions, 500, 0.01));
      // - GaussianModelWithUnknownVariance (and Q= 0.99-quantil) also works well for Ant
      // 2015 01 19: grid test with canonical states (non-canonical slightly worse)
      // - ant: Threshold Ascent (best 100, 0.01; all variants relatively good)
      // - ant: Policies where the variance has a large weight compared to the mean? (Gaussian(compatible), Gaussian with fixed variance, UCT with large c, alle TA)
      // - ant: UCB1Tuned with canonical states also works very well for the artificial ant! constistent solutions in less than 10k iters      

      //var problem = new SymbolicRegressionPoly10Problem();
      //var problem = new SantaFeAntProblem();
      var problem = new SymbolicRegressionProblem(random, "Breiman");
      //var problem = new PalindromeProblem();
      //var problem = new HardPalindromeProblem();
      //var problem = new RoyalPairProblem();
      //var problem = new EvenParityProblem();
      // symbreg length = 11 q = 0.824522210419616
      //var alg = new MctsSampler(problem, 23, random, 0, new BoltzmannExplorationPolicy(100));
      //var alg = new MctsSampler(problem, 23, random, 0, new EpsGreedyPolicy(0.1));
      //var alg = new SequentialSearch(problem, 23, random, 0,
      //  new HeuristicLab.Algorithms.Bandits.GrammarPolicies.QLearningGrammarPolicy(problem, new BoltzmannExplorationPolicy(10),
      //    1, 1, true));
      //var alg = new SequentialSearch(problem, 23, random, 0,
      //  new HeuristicLab.Algorithms.Bandits.GrammarPolicies.GenericContextualGrammarPolicy(problem, new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 10, 1, 1)), true));
      var alg = new SequentialSearch(problem, 30, random, 0,
        new HeuristicLab.Algorithms.Bandits.GrammarPolicies.GenericFunctionApproximationGrammarPolicy(problem, true));
      //var alg = new MctsQLearningSampler(problem, sentenceLen, random, 0, null);
      //var alg = new MctsQLearningSampler(problem, 30, random, 0, new EpsGreedyPolicy(0.2));
      //var alg = new MctsContextualSampler(problem, 23, random, 0); // must visit each canonical solution only once
      //var alg = new TemporalDifferenceTreeSearchSampler(problem, 30, random, 1);
      //var alg = new ExhaustiveBreadthFirstSearch(problem, 7);
      //var alg = new AlternativesContextSampler(problem, random, 17, 4, (rand, numActions) => new RandomPolicy(rand, numActions));
      //var alg = new ExhaustiveDepthFirstSearch(problem, 17);
      // var alg = new AlternativesSampler(problem, 17);
      // var alg = new RandomSearch(problem, random, 17);
      //var alg = new ExhaustiveRandomFirstSearch(problem, random, 17);

      alg.FoundNewBestSolution += (sentence, quality) => {
        //Console.WriteLine("{0}", globalStatistics);
        //Console.ReadLine();
      };
      alg.SolutionEvaluated += (sentence, quality) => {
        iterations++;
        globalStatistics.AddSentence(sentence, quality);

        //if (iterations % 100 == 0) {
        //  if (iterations % 10000 == 0) Console.Clear();
        //  Console.SetCursorPosition(0, 0);
        //  alg.PrintStats();
        //}

        //Console.WriteLine(sentence);

        if (iterations % 100 == 0) {
          Console.WriteLine("{0}", globalStatistics);
        }
      };


      sw.Start();

      alg.Run(maxIterations);

      sw.Stop();

      Console.Clear();
      alg.PrintStats();
      Console.WriteLine(globalStatistics);
      Console.WriteLine("{0:F2} sec {1,10:F1} sols/sec {2,10:F1} ns/sol",
        sw.Elapsed.TotalSeconds,
        maxIterations / (double)sw.Elapsed.TotalSeconds,
        (double)sw.ElapsedMilliseconds * 1000 / maxIterations);
    }

    public static void RunGpDemo() {
      int iterations = 0;
      const int seed = 31415;
      const int maxIterations = 100000;

      //var prob = new SymbolicRegressionProblem(new Random(31415), "Tower");
      var prob = new SymbolicRegressionPoly10Problem();
      var sgp = new OffspringSelectionGP(prob, new Random(seed), true);
      RunGP(sgp, prob, 200000, 500, 0.15, 50);
    }


    private static void RunGpGridTest() {
      const int nReps = 20;
      const int seed = 31415;
      //const int maxIters = 50000;
      var rand = new Random(seed);
      var problemFactories = new Func<Tuple<int, int, ISymbolicExpressionTreeProblem>>[]
      {
        () => Tuple.Create(50000, 32, (ISymbolicExpressionTreeProblem)new PermutationProblem()), 
        () => Tuple.Create(50000, 32, (ISymbolicExpressionTreeProblem)new RoyalPairProblem()), 
        () => Tuple.Create(50000, 32,(ISymbolicExpressionTreeProblem)new RoyalSymbolProblem()), 
        () => Tuple.Create(50000, 64, (ISymbolicExpressionTreeProblem)new RoyalPairProblem()), 
        () => Tuple.Create(50000, 64,(ISymbolicExpressionTreeProblem)new RoyalSymbolProblem()), 
        () => Tuple.Create(50000, 128, (ISymbolicExpressionTreeProblem)new RoyalPairProblem()), 
        () => Tuple.Create(50000, 128,(ISymbolicExpressionTreeProblem)new RoyalSymbolProblem()), 
        () => Tuple.Create(50000, 256, (ISymbolicExpressionTreeProblem)new RoyalPairProblem()), 
        () => Tuple.Create(50000, 256,(ISymbolicExpressionTreeProblem)new RoyalSymbolProblem()), 
        //() => new RoyalPairProblem(), 
        //() => new FindPhrasesProblem(rand, 20, 5, 3, 5, 0, 1, 0, true), 
        //() => new FindPhrasesProblem(rand, 20, 5, 3, 5, 0, 1, 0, false), 
        //() => new FindPhrasesProblem(rand, 20, 5, 3, 5, 50, 1, 0.8, false), 
      };

      foreach (var popSize in new int[] { 100 /*, 250, 500, 1000, 2500, 5000, 10000 */ }) {
        foreach (var mutationRate in new double[] { /* 0.05, /* 0.10, */ 0.15, /* 0.25, 0.3 */}) {
          // skip experiments that are already done
          foreach (var problemFactory in problemFactories) {
            for (int i = 0; i < nReps; i++) {
              {
                var solverSeed = rand.Next();
                var tupel = problemFactory();
                var maxIters = tupel.Item1;
                var maxSize = tupel.Item2;
                var prob = tupel.Item3;
                var sgp = new StandardGP(prob, new Random(solverSeed));
                RunGP(sgp, prob, maxIters, popSize, mutationRate, maxSize);
              }
              //{
              //  var prob = problemFactory();
              //  var osgp = new OffspringSelectionGP(prob, new Random(solverSeed));
              //  RunGP(osgp, prob, maxIters, popSize, mutationRate, maxSize);
              //}
            }
          }
        }

      }
    }

    private static void RunGP(IGPSolver gp, ISymbolicExpressionTreeProblem prob, int maxIters, int popSize, double mutationRate, int maxSize) {
      int iterations = 0;
      var globalStatistics = new SentenceSetStatistics(prob.BestKnownQuality(maxSize));
      var gpName = gp.GetType().Name;
      var probName = prob.GetType().Name;
      gp.SolutionEvaluated += (sentence, quality) => {
        iterations++;
        globalStatistics.AddSentence(sentence, quality);

        if (iterations % 100 == 0) {
          Console.WriteLine("\"{0,25}\" {1} {2:N2} {3} \"{4,25}\" {5}", gpName, popSize, mutationRate, maxSize, probName, globalStatistics);
        }
      };

      gp.PopulationSize = popSize;
      gp.MutationRate = mutationRate;
      gp.MaxSolutionSize = maxSize + 2;
      gp.MaxSolutionDepth = maxSize + 2;

      gp.Run(maxIters);
    }
  }
}