using System; using System.Collections.Generic; using System.Diagnostics; using System.Linq; using System.Text; using System.Text.RegularExpressions; using HeuristicLab.Common; using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; namespace HeuristicLab.Problems.GrammaticalOptimization { // must find one of k*sequenceLen sequences where the quality of a sequence is the length of the subsequence containing only correct symbols and starting at the first symbol // parameters // - alphabetSize: number of different symbols (max=26) // - sequenceLen: length of the correct subsequence // - k: the number of correct symbols at each position // // this problem should be hard for GP and easy for MCTS (TD should not have an advantage compared to MCTS) public class RoyalSequenceProblem : ISymbolicExpressionTreeProblem { private readonly IGrammar grammar; private readonly double correctReward; private readonly double incorrectReward; private readonly int sequenceLen; private readonly SortedSet[] optimalSymbolsForPos; public string Name { get { return "RoyalSequence"; } } public RoyalSequenceProblem(System.Random rand, int alphabetSize, int sequenceLen, int k = 1, double correctReward = 1.0, double incorrectReward = 0.0) { if (alphabetSize <= 0 || alphabetSize > 26) throw new ArgumentException(); if (sequenceLen <= 0) throw new ArgumentException(); if (k < 1 || k > alphabetSize) throw new ArgumentException(); if (correctReward <= incorrectReward) throw new ArgumentException(); this.sequenceLen = sequenceLen; this.correctReward = correctReward; this.incorrectReward = incorrectReward; const char sentenceSymbol = 'S'; var terminalSymbols = Enumerable.Range(0, alphabetSize).Select(off => (char)((byte)'a' + off)).ToArray(); var nonTerminalSymbols = new char[] { sentenceSymbol }; { // create grammar for sequential search // S -> a..z | aS .. zS var rules = terminalSymbols.Select(t => Tuple.Create(sentenceSymbol, t.ToString())) .Concat(terminalSymbols.Select(t => Tuple.Create(sentenceSymbol, t + sentenceSymbol.ToString()))); this.grammar = new Grammar(sentenceSymbol, terminalSymbols, nonTerminalSymbols, rules); } { // create grammar for sequential search // S -> a..z | SS var rules = terminalSymbols.Select(t => Tuple.Create(sentenceSymbol, t.ToString())) .Concat(terminalSymbols.Select(t => Tuple.Create(sentenceSymbol, t + sentenceSymbol.ToString()))); this.grammar = new Grammar(sentenceSymbol, terminalSymbols, nonTerminalSymbols, rules); } this.optimalSymbolsForPos = new SortedSet[sequenceLen]; for (int i = 0; i < sequenceLen; i++) { optimalSymbolsForPos[i] = new SortedSet(); for (int j = 0; j < k; j++) { char ch; do { ch = terminalSymbols.SelectRandom(rand); } while (optimalSymbolsForPos[i].Contains(ch)); optimalSymbolsForPos[i].Add(ch); } } } public double BestKnownQuality(int maxLen) { return Math.Min(maxLen, sequenceLen) * correctReward; } public IGrammar Grammar { get { return grammar; } } public double Evaluate(string sentence) { // sentence must contain only terminal symbols, we are not checking if the sentence is syntactically valid here because it would be too slow! Debug.Assert(sentence.Any(c => grammar.IsTerminal(c))); var reward = 0.0; for (int i = 0; i < Math.Min(sentence.Length, sequenceLen); i++) { if (optimalSymbolsForPos[i].Contains(sentence[i])) { reward += correctReward; } else { // reduce reward by number of remaining symbols return Math.Max(0.0, reward + incorrectReward * (sentence.Length - i)); } } return reward; } // in each position there could be multiple correct and incorrect symbols public string CanonicalRepresentation(string phrase) { var sb = new StringBuilder(); for (int i = 0; i < phrase.Length; i++) { if (optimalSymbolsForPos[i].Contains(phrase[i])) { sb.Append(optimalSymbolsForPos[i].First()); // all symbols in the set are equivalent } else { sb.Append(phrase[i]); } } return sb.ToString(); } public IEnumerable GetFeatures(string phrase) { throw new NotImplementedException(); } public IGrammar TreeBasedGPGrammar { get; private set; } public string ConvertTreeToSentence(ISymbolicExpressionTree tree) { var sb = new StringBuilder(); foreach (var s in tree.Root.GetSubtree(0).GetSubtree(0).IterateNodesPrefix()) { if (s.Symbol.Name == "S") continue; sb.Append(s.Symbol.Name); } return sb.ToString(); } } }