1 | using System;
|
---|
2 | using System.Collections.Generic;
|
---|
3 | using System.Diagnostics;
|
---|
4 | using System.Linq;
|
---|
5 | using System.Text;
|
---|
6 | using System.Text.RegularExpressions;
|
---|
7 | using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
|
---|
8 |
|
---|
9 | namespace HeuristicLab.Problems.GrammaticalOptimization {
|
---|
10 | // counts the number of times a pair of symbols occurs in a sentence
|
---|
11 | public class RoyalPairProblem : ISymbolicExpressionTreeProblem {
|
---|
12 |
|
---|
13 | private readonly IGrammar grammar;
|
---|
14 | private readonly int numTerminals;
|
---|
15 | public string Name { get { return "RoyalPair"; } }
|
---|
16 |
|
---|
17 | public RoyalPairProblem(int numTerminals = 2) {
|
---|
18 | this.numTerminals = numTerminals;
|
---|
19 |
|
---|
20 | var sentenceSymbol = 'S';
|
---|
21 | var terminalSymbols = Enumerable.Range(0, numTerminals).Select(off => (char)((byte)'a' + off)).ToArray();
|
---|
22 | var nonTerminalSymbols = new char[] { sentenceSymbol };
|
---|
23 |
|
---|
24 | {
|
---|
25 | // create grammar
|
---|
26 | // S -> a..z | aS .. zS
|
---|
27 | var rules = terminalSymbols.Select(t => Tuple.Create(sentenceSymbol, t.ToString()))
|
---|
28 | .Concat(terminalSymbols.Select(t => Tuple.Create(sentenceSymbol, t + sentenceSymbol.ToString())));
|
---|
29 |
|
---|
30 | this.grammar = new Grammar(sentenceSymbol, terminalSymbols, nonTerminalSymbols, rules);
|
---|
31 | }
|
---|
32 | {
|
---|
33 | // create grammar for tree-based GP
|
---|
34 | // S -> a..z | SS
|
---|
35 | var rules = terminalSymbols.Select(t => Tuple.Create(sentenceSymbol, t.ToString()))
|
---|
36 | .Concat(new Tuple<char, string>[] { Tuple.Create(sentenceSymbol, sentenceSymbol.ToString() + sentenceSymbol) });
|
---|
37 |
|
---|
38 | this.TreeBasedGPGrammar = new Grammar(sentenceSymbol, terminalSymbols, nonTerminalSymbols, rules);
|
---|
39 | }
|
---|
40 |
|
---|
41 |
|
---|
42 | }
|
---|
43 |
|
---|
44 | public double BestKnownQuality(int maxLen) {
|
---|
45 | return maxLen - 1;
|
---|
46 | }
|
---|
47 |
|
---|
48 | public IGrammar Grammar {
|
---|
49 | get { return grammar; }
|
---|
50 | }
|
---|
51 |
|
---|
52 | private Regex regex = new Regex("(?=ab)|(?=ba)"); // count the number of "ab" and "ba" pairs
|
---|
53 | public double Evaluate(string sentence) {
|
---|
54 | // sentence must contain only terminal symbols, we are not checking if the sentence is syntactically valid here because it would be too slow!
|
---|
55 | Debug.Assert(sentence.Any(c => grammar.IsTerminal(c)));
|
---|
56 | return regex.Matches(sentence).Count;
|
---|
57 | }
|
---|
58 |
|
---|
59 | public string CanonicalRepresentation(string phrase) {
|
---|
60 | return phrase;
|
---|
61 | }
|
---|
62 |
|
---|
63 | public IEnumerable<Feature> GetFeatures(string phrase) {
|
---|
64 | if (phrase.Length <= 1)
|
---|
65 | yield return new Feature("$$", 1.0);
|
---|
66 | else if (phrase.Length == 2)
|
---|
67 | yield return new Feature(phrase, 1.0);
|
---|
68 | else if (phrase.EndsWith("S")) // second to last symbol
|
---|
69 | yield return new Feature(phrase.Substring(phrase.Length - 3, 2), 1.0);
|
---|
70 | else // last symbol
|
---|
71 | yield return new Feature(phrase.Substring(phrase.Length - 2, 2), 1.0);
|
---|
72 |
|
---|
73 | }
|
---|
74 |
|
---|
75 | public IGrammar TreeBasedGPGrammar { get; private set; }
|
---|
76 | public string ConvertTreeToSentence(ISymbolicExpressionTree tree) {
|
---|
77 | var sb = new StringBuilder();
|
---|
78 | foreach (var s in tree.Root.GetSubtree(0).GetSubtree(0).IterateNodesPrefix()) {
|
---|
79 | if (s.Symbol.Name == "S") continue;
|
---|
80 | sb.Append(s.Symbol.Name);
|
---|
81 | }
|
---|
82 | return sb.ToString();
|
---|
83 | }
|
---|
84 | }
|
---|
85 | }
|
---|