[11659] | 1 | using System;
|
---|
| 2 | using System.Collections.Generic;
|
---|
| 3 | using System.Diagnostics;
|
---|
| 4 | using System.Linq;
|
---|
| 5 | using System.Text;
|
---|
| 6 | using System.Text.RegularExpressions;
|
---|
[12391] | 7 | using HeuristicLab.Common;
|
---|
[11847] | 8 | using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
|
---|
[12391] | 9 | using HeuristicLab.Random;
|
---|
[11659] | 10 |
|
---|
| 11 | namespace HeuristicLab.Problems.GrammaticalOptimization {
|
---|
| 12 | // counts the number of times a pair of symbols occurs in a sentence
|
---|
[11847] | 13 | public class RoyalPairProblem : ISymbolicExpressionTreeProblem {
|
---|
[11659] | 14 |
|
---|
| 15 | private readonly IGrammar grammar;
|
---|
[12290] | 16 | private readonly int numTerminals;
|
---|
[12391] | 17 | public string Name { get { return string.Format("RoyalPair({0})", numTerminals); } }
|
---|
[12290] | 18 |
|
---|
| 19 | public RoyalPairProblem(int numTerminals = 2) {
|
---|
| 20 | this.numTerminals = numTerminals;
|
---|
| 21 |
|
---|
| 22 | var sentenceSymbol = 'S';
|
---|
[12391] | 23 | var terminalSymbols = Enumerable.Range(0, numTerminals).Select(off => (char)((byte)'a' + off)).ToList();
|
---|
| 24 | terminalSymbols.ShuffleInPlace(new MersenneTwister(31415));
|
---|
| 25 |
|
---|
[12290] | 26 | var nonTerminalSymbols = new char[] { sentenceSymbol };
|
---|
| 27 |
|
---|
| 28 | {
|
---|
| 29 | // create grammar
|
---|
| 30 | // S -> a..z | aS .. zS
|
---|
| 31 | var rules = terminalSymbols.Select(t => Tuple.Create(sentenceSymbol, t.ToString()))
|
---|
| 32 | .Concat(terminalSymbols.Select(t => Tuple.Create(sentenceSymbol, t + sentenceSymbol.ToString())));
|
---|
| 33 |
|
---|
| 34 | this.grammar = new Grammar(sentenceSymbol, terminalSymbols, nonTerminalSymbols, rules);
|
---|
| 35 | }
|
---|
| 36 | {
|
---|
| 37 | // create grammar for tree-based GP
|
---|
| 38 | // S -> a..z | SS
|
---|
| 39 | var rules = terminalSymbols.Select(t => Tuple.Create(sentenceSymbol, t.ToString()))
|
---|
| 40 | .Concat(new Tuple<char, string>[] { Tuple.Create(sentenceSymbol, sentenceSymbol.ToString() + sentenceSymbol) });
|
---|
| 41 |
|
---|
| 42 | this.TreeBasedGPGrammar = new Grammar(sentenceSymbol, terminalSymbols, nonTerminalSymbols, rules);
|
---|
| 43 | }
|
---|
| 44 |
|
---|
| 45 |
|
---|
[11659] | 46 | }
|
---|
| 47 |
|
---|
[11732] | 48 | public double BestKnownQuality(int maxLen) {
|
---|
[11659] | 49 | return maxLen - 1;
|
---|
| 50 | }
|
---|
| 51 |
|
---|
| 52 | public IGrammar Grammar {
|
---|
| 53 | get { return grammar; }
|
---|
| 54 | }
|
---|
| 55 |
|
---|
| 56 | private Regex regex = new Regex("(?=ab)|(?=ba)"); // count the number of "ab" and "ba" pairs
|
---|
| 57 | public double Evaluate(string sentence) {
|
---|
| 58 | // sentence must contain only terminal symbols, we are not checking if the sentence is syntactically valid here because it would be too slow!
|
---|
| 59 | Debug.Assert(sentence.Any(c => grammar.IsTerminal(c)));
|
---|
| 60 | return regex.Matches(sentence).Count;
|
---|
| 61 | }
|
---|
[11727] | 62 |
|
---|
[11832] | 63 | public string CanonicalRepresentation(string phrase) {
|
---|
| 64 | return phrase;
|
---|
| 65 | }
|
---|
| 66 |
|
---|
[11857] | 67 | public IEnumerable<Feature> GetFeatures(string phrase) {
|
---|
[12290] | 68 | if (phrase.Length <= 1)
|
---|
| 69 | yield return new Feature("$$", 1.0);
|
---|
| 70 | else if (phrase.Length == 2)
|
---|
| 71 | yield return new Feature(phrase, 1.0);
|
---|
| 72 | else if (phrase.EndsWith("S")) // second to last symbol
|
---|
| 73 | yield return new Feature(phrase.Substring(phrase.Length - 3, 2), 1.0);
|
---|
| 74 | else // last symbol
|
---|
| 75 | yield return new Feature(phrase.Substring(phrase.Length - 2, 2), 1.0);
|
---|
| 76 |
|
---|
[11727] | 77 | }
|
---|
[12893] | 78 | public bool IsOptimalPhrase(string phrase) {
|
---|
| 79 | throw new NotImplementedException();
|
---|
| 80 | }
|
---|
[12290] | 81 |
|
---|
[11857] | 82 | public IGrammar TreeBasedGPGrammar { get; private set; }
|
---|
[11847] | 83 | public string ConvertTreeToSentence(ISymbolicExpressionTree tree) {
|
---|
| 84 | var sb = new StringBuilder();
|
---|
| 85 | foreach (var s in tree.Root.GetSubtree(0).GetSubtree(0).IterateNodesPrefix()) {
|
---|
| 86 | if (s.Symbol.Name == "S") continue;
|
---|
| 87 | sb.Append(s.Symbol.Name);
|
---|
| 88 | }
|
---|
| 89 | return sb.ToString();
|
---|
| 90 | }
|
---|
[11659] | 91 | }
|
---|
| 92 | }
|
---|