using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using HeuristicLab.Algorithms.Bandits;
using HeuristicLab.Algorithms.Bandits.BanditPolicies;
using HeuristicLab.Algorithms.Bandits.GrammarPolicies;
using HeuristicLab.Algorithms.Bandits.Models;
using HeuristicLab.Algorithms.GrammaticalOptimization;
using Microsoft.VisualStudio.TestTools.UnitTesting;
using RandomPolicy = HeuristicLab.Algorithms.Bandits.BanditPolicies.RandomPolicy;

namespace HeuristicLab.Problems.GrammaticalOptimization.Test {
  [TestClass]
  public class RunDemo {
    [TestMethod]
    public void RunGridTest() {
      int maxIterations = 20000; // for poly-10 with 50000 evaluations no successful try with hl yet
      //var globalRandom = new Random(31415);
      var localRandSeed = new Random().Next();
      var reps = 20;

      var policyFactories = new Func<IBanditPolicy>[]
        {
         () => new RandomPolicy(), 
          () => new ActiveLearningPolicy(),  
         () => new EpsGreedyPolicy(0.01, (aInfo)=> aInfo.MaxReward, "max"), 
         () => new EpsGreedyPolicy(0.05, (aInfo)=> aInfo.MaxReward, "max"), 
         () => new EpsGreedyPolicy(0.1, (aInfo)=> aInfo.MaxReward, "max"), 
         () => new EpsGreedyPolicy(0.2, (aInfo)=> aInfo.MaxReward, "max"), 
         //() => new GaussianThompsonSamplingPolicy(), 
         () => new GaussianThompsonSamplingPolicy(true), 
         () => new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 10, 1)), 
         () => new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 10, 1, 1)), 
         //() => new BernoulliThompsonSamplingPolicy(),
         () => new GenericThompsonSamplingPolicy(new BernoulliModel(1, 1)), 
         () => new EpsGreedyPolicy(0.01), 
         () => new EpsGreedyPolicy(0.05), 
         () => new EpsGreedyPolicy(0.1), 
         () => new EpsGreedyPolicy(0.2), 
         () => new EpsGreedyPolicy(0.5), 
         () => new UCTPolicy(0.01),
         () => new UCTPolicy(0.05),
         () => new UCTPolicy(0.1),
         () => new UCTPolicy(0.5),
         () => new UCTPolicy(1),
         () => new UCTPolicy(2),
         () => new UCTPolicy( 5),
         () => new UCTPolicy( 10),
         () => new ModifiedUCTPolicy(0.01),
         () => new ModifiedUCTPolicy(0.05),
         () => new ModifiedUCTPolicy(0.1),
         () => new ModifiedUCTPolicy(0.5),
         () => new ModifiedUCTPolicy(1),
         () => new ModifiedUCTPolicy(2),
         () => new ModifiedUCTPolicy( 5),
         () => new ModifiedUCTPolicy( 10),
         () => new UCB1Policy(), 
         () => new UCB1TunedPolicy(), 
         () => new UCBNormalPolicy(), 
         () => new BoltzmannExplorationPolicy(1),
         () => new BoltzmannExplorationPolicy(10),
         () => new BoltzmannExplorationPolicy(20),
         () => new BoltzmannExplorationPolicy(100),
         () => new BoltzmannExplorationPolicy(200),
         () => new BoltzmannExplorationPolicy(500),
          () => new ChernoffIntervalEstimationPolicy( 0.01), 
          () => new ChernoffIntervalEstimationPolicy( 0.05),
          () => new ChernoffIntervalEstimationPolicy( 0.1),
          () => new ChernoffIntervalEstimationPolicy( 0.2),
         () => new ThresholdAscentPolicy(5, 0.01), 
         () => new ThresholdAscentPolicy(5, 0.05),
         () => new ThresholdAscentPolicy(5, 0.1),
         () => new ThresholdAscentPolicy(5, 0.2),
         () => new ThresholdAscentPolicy(10, 0.01), 
         () => new ThresholdAscentPolicy(10, 0.05),
         () => new ThresholdAscentPolicy(10, 0.1),
         () => new ThresholdAscentPolicy(10, 0.2),
         () => new ThresholdAscentPolicy(50, 0.01), 
         () => new ThresholdAscentPolicy(50, 0.05),
         () => new ThresholdAscentPolicy(50, 0.1),
         () => new ThresholdAscentPolicy(50, 0.2),
         () => new ThresholdAscentPolicy(100, 0.01), 
         () => new ThresholdAscentPolicy(100, 0.05),
         () => new ThresholdAscentPolicy(100, 0.1),
         () => new ThresholdAscentPolicy(100, 0.2),
         () => new ThresholdAscentPolicy(500, 0.01), 
         () => new ThresholdAscentPolicy(500, 0.05),
         () => new ThresholdAscentPolicy(500, 0.1),
         () => new ThresholdAscentPolicy(500, 0.2),
         () => new ThresholdAscentPolicy(5000, 0.01), 
         () => new ThresholdAscentPolicy(10000, 0.01), 
        };

      var instanceFactories = new Func<Random, Tuple<IProblem, int>>[]
      {
        //(rand) => Tuple.Create((IProblem)new SantaFeAntProblem(), 17),
        //(rand) => Tuple.Create((IProblem)new FindPhrasesProblem(rand, 10, numPhrases:5, phraseLen:3, numOptimalPhrases:5, numDecoyPhrases:0, correctReward:1, decoyReward:0, phrasesAsSets:false ), 15),
        //(rand) => Tuple.Create((IProblem)new FindPhrasesProblem(rand, 10, numPhrases:5, phraseLen:3, numOptimalPhrases:5, numDecoyPhrases:0, correctReward:1, decoyReward:0, phrasesAsSets:true ), 15),
        //(rand) => Tuple.Create((IProblem)new FindPhrasesProblem(rand, 10, numPhrases:5, phraseLen:3, numOptimalPhrases:5, numDecoyPhrases:200, correctReward:1, decoyReward:0.5, phrasesAsSets:false), 15),
        //(rand) => Tuple.Create((IProblem)new FindPhrasesProblem(rand, 10, numPhrases:5, phraseLen:3, numOptimalPhrases:5, numDecoyPhrases:200, correctReward:1, decoyReward:0.5, phrasesAsSets:true), 15),
        //(rand) => Tuple.Create((IProblem)new SymbolicRegressionPoly10Problem(), 23)
        (rand) => Tuple.Create((IProblem)new SantaFeAntProblem(), 17)
      };

      foreach (var instanceFactory in instanceFactories) {
        foreach (var useCanonical in new bool[] { true /*, false */ }) {
          foreach (var randomTries in new int[] { 0 /*, 1, 10 /*, /* 5, 100 /*, 500, 1000 */}) {
            foreach (var policyFactory in policyFactories) {
              var myRandomTries = randomTries;
              var localRand = new Random(localRandSeed);
              var options = new ParallelOptions();
              options.MaxDegreeOfParallelism = 1;
              Parallel.For(0, reps, options, (i) => {
                Random myLocalRand;
                lock (localRand)
                  myLocalRand = new Random(localRand.Next());

                int iterations = 0;
                var globalStatistics = new SentenceSetStatistics();

                // var problem = new SymbolicRegressionPoly10Problem();
                // var problem = new SantaFeAntProblem();
                //var problem = new PalindromeProblem();
                //var problem = new HardPalindromeProblem();
                //var problem = new RoyalPairProblem();
                //var problem = new EvenParityProblem();
                // var alg = new MctsSampler(problem.Item1, problem.Item2, myLocalRand, myRandomTries, policy()); 
                var instance = instanceFactory(myLocalRand);
                var problem = instance.Item1;
                var maxLen = instance.Item2;
                var alg = new SequentialSearch(problem, maxLen, myLocalRand, myRandomTries,
                  new GenericGrammarPolicy(problem, policyFactory(), useCanonical));
                // var alg = new SequentialSearch(problem, maxLen, myLocalRand,
                //   myRandomTries,
                //   new GenericFunctionApproximationGrammarPolicy(problem,
                //     useCanonical));
                //var alg = new ExhaustiveBreadthFirstSearch(problem, 25);
                //var alg = new AlternativesContextSampler(problem, 25);

                alg.SolutionEvaluated += (sentence, quality) => {
                  iterations++;
                  globalStatistics.AddSentence(sentence, quality);
                  if (iterations % 1000 == 0) {
                    Console.WriteLine("{0,3} {1,5} \"{2,25}\" {3} {4} {5}", i, myRandomTries, policyFactory(), useCanonical, problem.ToString(), globalStatistics);
                  }
                };
                alg.FoundNewBestSolution += (sentence, quality) => {
                  //Console.WriteLine("{0,5} {1,25} {2} {3}",
                  //  myRandomTries, policyFactory(), useCanonical,
                  //  globalStatistics);
                };

                alg.Run(maxIterations);
              });
            }
          }
        }
      }
    }
  }
}