1 | using System;
|
---|
2 | using System.Collections.Generic;
|
---|
3 | using System.Diagnostics;
|
---|
4 | using System.Linq;
|
---|
5 | using System.Text;
|
---|
6 | using System.Text.RegularExpressions;
|
---|
7 | using HeuristicLab.Common;
|
---|
8 |
|
---|
9 | namespace HeuristicLab.Problems.GrammaticalOptimization {
|
---|
10 | // must find one of k*sequenceLen sequences where the quality of a sequence is the length of the subsequence containing only correct symbols and starting at the first symbol
|
---|
11 | // parameters
|
---|
12 | // - alphabetSize: number of different symbols (max=26)
|
---|
13 | // - sequenceLen: length of the correct subsequence
|
---|
14 | // - k: the number of correct symbols at each position
|
---|
15 | //
|
---|
16 | // this problem should be hard for GP and easy for MCTS (TD should not have an advantage compared to MCTS)
|
---|
17 | public class RoyalSequenceProblem : IProblem {
|
---|
18 |
|
---|
19 | private readonly IGrammar grammar;
|
---|
20 | private readonly double correctReward;
|
---|
21 | private readonly double incorrectReward;
|
---|
22 | private readonly int sequenceLen;
|
---|
23 | private readonly SortedSet<char>[] optimalSymbolsForPos;
|
---|
24 |
|
---|
25 | public RoyalSequenceProblem(Random rand, int alphabetSize, int sequenceLen, int k = 1, double correctReward = 1.0, double incorrectReward = 0.0) {
|
---|
26 | if (alphabetSize <= 0 || alphabetSize > 26) throw new ArgumentException();
|
---|
27 | if (sequenceLen <= 0) throw new ArgumentException();
|
---|
28 | if (k < 1 || k > alphabetSize) throw new ArgumentException();
|
---|
29 | if (correctReward <= incorrectReward) throw new ArgumentException();
|
---|
30 | this.sequenceLen = sequenceLen;
|
---|
31 | this.correctReward = correctReward;
|
---|
32 | this.incorrectReward = incorrectReward;
|
---|
33 | const char sentenceSymbol = 'S';
|
---|
34 | var terminalSymbols = Enumerable.Range(0, alphabetSize).Select(off => (char)((byte)'a' + off)).ToArray();
|
---|
35 | var nonTerminalSymbols = new char[] { sentenceSymbol };
|
---|
36 | var rules = terminalSymbols.Select(t => Tuple.Create(sentenceSymbol, t.ToString()))
|
---|
37 | .Concat(terminalSymbols.Select(t => Tuple.Create(sentenceSymbol, t + sentenceSymbol.ToString())));
|
---|
38 | //var rules = terminalSymbols.Select(t => Tuple.Create('S', t + "S"))
|
---|
39 | // .Concat(terminalSymbols.Select(t => Tuple.Create('S', t.ToString())));
|
---|
40 | this.grammar = new Grammar(sentenceSymbol, terminalSymbols, nonTerminalSymbols, rules);
|
---|
41 |
|
---|
42 | this.optimalSymbolsForPos = new SortedSet<char>[sequenceLen];
|
---|
43 | for (int i = 0; i < sequenceLen; i++) {
|
---|
44 | optimalSymbolsForPos[i] = new SortedSet<char>();
|
---|
45 | for (int j = 0; j < k; j++) {
|
---|
46 | char ch;
|
---|
47 | do {
|
---|
48 | ch = terminalSymbols.SelectRandom(rand);
|
---|
49 | } while (optimalSymbolsForPos[i].Contains(ch));
|
---|
50 | optimalSymbolsForPos[i].Add(ch);
|
---|
51 | }
|
---|
52 | }
|
---|
53 | }
|
---|
54 |
|
---|
55 | public double BestKnownQuality(int maxLen) {
|
---|
56 | return Math.Min(maxLen, sequenceLen) * correctReward;
|
---|
57 | }
|
---|
58 |
|
---|
59 | public IGrammar Grammar {
|
---|
60 | get { return grammar; }
|
---|
61 | }
|
---|
62 |
|
---|
63 | public double Evaluate(string sentence) {
|
---|
64 | // sentence must contain only terminal symbols, we are not checking if the sentence is syntactically valid here because it would be too slow!
|
---|
65 | Debug.Assert(sentence.Any(c => grammar.IsTerminal(c)));
|
---|
66 | var reward = 0.0;
|
---|
67 | for (int i = 0; i < Math.Min(sentence.Length, sequenceLen); i++) {
|
---|
68 | if (optimalSymbolsForPos[i].Contains(sentence[i])) {
|
---|
69 | reward += correctReward;
|
---|
70 | } else {
|
---|
71 | // reduce reward by number of remaining symbols
|
---|
72 | return Math.Max(0.0, reward + incorrectReward * (sentence.Length - i));
|
---|
73 | }
|
---|
74 | }
|
---|
75 | return reward;
|
---|
76 | }
|
---|
77 |
|
---|
78 | // in each position there could be multiple correct and incorrect symbols
|
---|
79 | public string CanonicalRepresentation(string terminalPhrase) {
|
---|
80 | var sb = new StringBuilder();
|
---|
81 | for (int i = 0; i < terminalPhrase.Length; i++) {
|
---|
82 | if (optimalSymbolsForPos[i].Contains(terminalPhrase[i])) {
|
---|
83 | sb.Append(optimalSymbolsForPos[i].First()); // all symbols in the set are equivalent
|
---|
84 | } else {
|
---|
85 | sb.Append(terminalPhrase[i]);
|
---|
86 | }
|
---|
87 | }
|
---|
88 | return sb.ToString();
|
---|
89 | }
|
---|
90 | }
|
---|
91 | }
|
---|