1 | using System;
2 | using System.Collections.Generic;
3 | using System.Diagnostics;
4 | using System.Linq;
5 | using System.Text;
6 | using System.Text.RegularExpressions;
7 | using HeuristicLab.Common;
8 | using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
9 |
10 | namespace HeuristicLab.Problems.GrammaticalOptimization {
11 | // must find one of k*sequenceLen sequences where the quality of a sequence is the length of the subsequence containing only correct symbols and starting at the first symbol
12 | // parameters
13 | // - alphabetSize: number of different symbols (max=26)
14 | // - sequenceLen: length of the correct subsequence
15 | // - k: the number of correct symbols at each position
16 | //
17 | // this problem should be hard for GP and easy for MCTS (TD should not have an advantage compared to MCTS)
18 | public class RoyalSequenceProblem : ISymbolicExpressionTreeProblem {
19 |
20 | private readonly IGrammar grammar;
21 | private readonly double correctReward;
22 | private readonly double incorrectReward;
23 | private readonly int sequenceLen;
24 | private readonly SortedSet<char>[] optimalSymbolsForPos;
25 | public string Name { get { return "RoyalSequence"; } }
26 | public RoyalSequenceProblem(System.Random rand, int alphabetSize, int sequenceLen, int k = 1, double correctReward = 1.0, double incorrectReward = 0.0) {
27 | if (alphabetSize <= 0 || alphabetSize > 26) throw new ArgumentException();
28 | if (sequenceLen <= 0) throw new ArgumentException();
29 | if (k < 1 || k > alphabetSize) throw new ArgumentException();
30 | if (correctReward <= incorrectReward) throw new ArgumentException();
31 | this.sequenceLen = sequenceLen;
32 | this.correctReward = correctReward;
33 | this.incorrectReward = incorrectReward;
34 |
35 | const char sentenceSymbol = 'S';
36 | var terminalSymbols = Enumerable.Range(0, alphabetSize).Select(off => (char)((byte)'a' + off)).ToArray();
37 | var nonTerminalSymbols = new char[] { sentenceSymbol };
38 |
39 | {
40 | // create grammar for sequential search
41 | // S -> a..z | aS .. zS
42 | var rules = terminalSymbols.Select(t => Tuple.Create(sentenceSymbol, t.ToString()))
43 | .Concat(terminalSymbols.Select(t => Tuple.Create(sentenceSymbol, t + sentenceSymbol.ToString())));
44 | this.grammar = new Grammar(sentenceSymbol, terminalSymbols, nonTerminalSymbols, rules);
45 | }
46 | {
47 | // create grammar for sequential search
48 | // S -> a..z | SS
49 | var rules = terminalSymbols.Select(t => Tuple.Create(sentenceSymbol, t.ToString()))
50 | .Concat(terminalSymbols.Select(t => Tuple.Create(sentenceSymbol, t + sentenceSymbol.ToString())));
51 | this.grammar = new Grammar(sentenceSymbol, terminalSymbols, nonTerminalSymbols, rules);
52 | }
53 |
54 | this.optimalSymbolsForPos = new SortedSet<char>[sequenceLen];
55 | for (int i = 0; i < sequenceLen; i++) {
56 | optimalSymbolsForPos[i] = new SortedSet<char>();
57 | for (int j = 0; j < k; j++) {
58 | char ch;
59 | do {
60 | ch = terminalSymbols.SelectRandom(rand);
61 | } while (optimalSymbolsForPos[i].Contains(ch));
62 | optimalSymbolsForPos[i].Add(ch);
63 | }
64 | }
65 | }
66 |
67 | public double BestKnownQuality(int maxLen) {
68 | return Math.Min(maxLen, sequenceLen) * correctReward;
69 | }
70 |
71 | public IGrammar Grammar {
72 | get { return grammar; }
73 | }
74 |
75 | public double Evaluate(string sentence) {
76 | // sentence must contain only terminal symbols, we are not checking if the sentence is syntactically valid here because it would be too slow!
77 | Debug.Assert(sentence.Any(c => grammar.IsTerminal(c)));
78 | var reward = 0.0;
79 | for (int i = 0; i < Math.Min(sentence.Length, sequenceLen); i++) {
80 | if (optimalSymbolsForPos[i].Contains(sentence[i])) {
81 | reward += correctReward;
82 | } else {
83 | // reduce reward by number of remaining symbols
84 | return Math.Max(0.0, reward + incorrectReward * (sentence.Length - i));
85 | }
86 | }
87 | return reward;
88 | }
89 |
90 | // in each position there could be multiple correct and incorrect symbols
91 | public string CanonicalRepresentation(string phrase) {
92 | var sb = new StringBuilder();
93 | for (int i = 0; i < phrase.Length; i++) {
94 | if (optimalSymbolsForPos[i].Contains(phrase[i])) {
95 | sb.Append(optimalSymbolsForPos[i].First()); // all symbols in the set are equivalent
96 | } else {
97 | sb.Append(phrase[i]);
98 | }
99 | }
100 | return sb.ToString();
101 | }
102 |
103 | public IEnumerable<Feature> GetFeatures(string phrase) {
104 | throw new NotImplementedException();
105 | }
106 | public bool IsOptimalPhrase(string phrase) {
107 | throw new NotImplementedException();
108 | }
109 |
110 | public IGrammar TreeBasedGPGrammar { get; private set; }
111 | public string ConvertTreeToSentence(ISymbolicExpressionTree tree) {
112 | var sb = new StringBuilder();
113 | foreach (var s in tree.Root.GetSubtree(0).GetSubtree(0).IterateNodesPrefix()) {
114 | if (s.Symbol.Name == "S") continue;
115 | sb.Append(s.Symbol.Name);
116 | }
117 | return sb.ToString();
118 | }
119 | }
120 | }