using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using HeuristicLab.Problems.GrammaticalOptimization;

namespace HeuristicLab.Algorithms.Bandits {
  public class SentenceBandit : IBandit {
    public int NumArms { get; private set; }
    public int OptimalExpectedRewardArm { get; private set; }
    public int OptimalMaximalRewardArm { get; private set; }

    private readonly System.Random random;
    private readonly IProblem problem;
    private readonly IGrammar grammar;
    private readonly int maxLen;
    private readonly ReadonlySequence incompletePhrase;
    private readonly Sequence[] replacements;

    public SentenceBandit(System.Random random, IProblem problem, string incompletePhrase, int maxLen = 200) {
      this.random = random;
      this.incompletePhrase = new ReadonlySequence(incompletePhrase);
      this.problem = problem;
      this.grammar = problem.Grammar;
      this.maxLen = maxLen;
      this.replacements = grammar.GetNonTerminalAlternatives(this.incompletePhrase.FirstNonTerminal).ToArray();
      NumArms = replacements.Length;
    }

    // pulling an arm results in a bernoulli distributed reward 
    // with mean expReward[i]
    public double Pull(int arm) {
      // make the selected replacment ...
      var s = new Sequence(incompletePhrase);
      s.ReplaceAt(s.FirstNonTerminalIndex, 1, replacements[arm]);

      // ... and complete randomly to evaluate
      var completeSentence = grammar.CompleteSentenceRandomly(random, s, maxLen);
      return problem.Evaluate(completeSentence.ToString());
    }
  }
}