using System; using System.Collections.Generic; using System.Diagnostics; using System.Linq; using System.Text; using System.Text.RegularExpressions; using HeuristicLab.Common; using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; using HeuristicLab.Random; namespace HeuristicLab.Problems.GrammaticalOptimization { // counts the number of times a pair of symbols occurs in a sentence public class RoyalPairProblem : ISymbolicExpressionTreeProblem { private readonly IGrammar grammar; private readonly int numTerminals; public string Name { get { return string.Format("RoyalPair({0})", numTerminals); } } public RoyalPairProblem(int numTerminals = 2) { this.numTerminals = numTerminals; var sentenceSymbol = 'S'; var terminalSymbols = Enumerable.Range(0, numTerminals).Select(off => (char)((byte)'a' + off)).ToList(); terminalSymbols.ShuffleInPlace(new MersenneTwister(31415)); var nonTerminalSymbols = new char[] { sentenceSymbol }; { // create grammar // S -> a..z | aS .. zS var rules = terminalSymbols.Select(t => Tuple.Create(sentenceSymbol, t.ToString())) .Concat(terminalSymbols.Select(t => Tuple.Create(sentenceSymbol, t + sentenceSymbol.ToString()))); this.grammar = new Grammar(sentenceSymbol, terminalSymbols, nonTerminalSymbols, rules); } { // create grammar for tree-based GP // S -> a..z | SS var rules = terminalSymbols.Select(t => Tuple.Create(sentenceSymbol, t.ToString())) .Concat(new Tuple[] { Tuple.Create(sentenceSymbol, sentenceSymbol.ToString() + sentenceSymbol) }); this.TreeBasedGPGrammar = new Grammar(sentenceSymbol, terminalSymbols, nonTerminalSymbols, rules); } } public double BestKnownQuality(int maxLen) { return maxLen - 1; } public IGrammar Grammar { get { return grammar; } } private Regex regex = new Regex("(?=ab)|(?=ba)"); // count the number of "ab" and "ba" pairs public double Evaluate(string sentence) { // sentence must contain only terminal symbols, we are not checking if the sentence is syntactically valid here because it would be too slow! Debug.Assert(sentence.Any(c => grammar.IsTerminal(c))); return regex.Matches(sentence).Count; } public string CanonicalRepresentation(string phrase) { return phrase; } public IEnumerable GetFeatures(string phrase) { if (phrase.Length <= 1) yield return new Feature("$$", 1.0); else if (phrase.Length == 2) yield return new Feature(phrase, 1.0); else if (phrase.EndsWith("S")) // second to last symbol yield return new Feature(phrase.Substring(phrase.Length - 3, 2), 1.0); else // last symbol yield return new Feature(phrase.Substring(phrase.Length - 2, 2), 1.0); } public IGrammar TreeBasedGPGrammar { get; private set; } public string ConvertTreeToSentence(ISymbolicExpressionTree tree) { var sb = new StringBuilder(); foreach (var s in tree.Root.GetSubtree(0).GetSubtree(0).IterateNodesPrefix()) { if (s.Symbol.Name == "S") continue; sb.Append(s.Symbol.Name); } return sb.ToString(); } } }