[11747] | 1 | using System;
|
---|
| 2 | using System.Collections.Generic;
|
---|
| 3 | using System.Diagnostics;
|
---|
| 4 | using System.Linq;
|
---|
| 5 | using System.Text;
|
---|
| 6 | using System.Text.RegularExpressions;
|
---|
| 7 | using HeuristicLab.Common;
|
---|
[11865] | 8 | using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
|
---|
[11747] | 9 |
|
---|
| 10 | namespace HeuristicLab.Problems.GrammaticalOptimization {
|
---|
| 11 | // must find one of k*sequenceLen sequences where the quality of a sequence is the length of the subsequence containing only correct symbols and starting at the first symbol
|
---|
| 12 | // parameters
|
---|
| 13 | // - alphabetSize: number of different symbols (max=26)
|
---|
| 14 | // - sequenceLen: length of the correct subsequence
|
---|
| 15 | // - k: the number of correct symbols at each position
|
---|
| 16 | //
|
---|
| 17 | // this problem should be hard for GP and easy for MCTS (TD should not have an advantage compared to MCTS)
|
---|
[11865] | 18 | public class RoyalSequenceProblem : ISymbolicExpressionTreeProblem {
|
---|
[11747] | 19 |
|
---|
| 20 | private readonly IGrammar grammar;
|
---|
| 21 | private readonly double correctReward;
|
---|
| 22 | private readonly double incorrectReward;
|
---|
| 23 | private readonly int sequenceLen;
|
---|
| 24 | private readonly SortedSet<char>[] optimalSymbolsForPos;
|
---|
[12099] | 25 | public string Name { get { return "RoyalSequence"; } }
|
---|
[12391] | 26 | public RoyalSequenceProblem(System.Random rand, int alphabetSize, int sequenceLen, int k = 1, double correctReward = 1.0, double incorrectReward = 0.0) {
|
---|
[11747] | 27 | if (alphabetSize <= 0 || alphabetSize > 26) throw new ArgumentException();
|
---|
| 28 | if (sequenceLen <= 0) throw new ArgumentException();
|
---|
| 29 | if (k < 1 || k > alphabetSize) throw new ArgumentException();
|
---|
| 30 | if (correctReward <= incorrectReward) throw new ArgumentException();
|
---|
| 31 | this.sequenceLen = sequenceLen;
|
---|
| 32 | this.correctReward = correctReward;
|
---|
| 33 | this.incorrectReward = incorrectReward;
|
---|
[11865] | 34 |
|
---|
[11799] | 35 | const char sentenceSymbol = 'S';
|
---|
[11747] | 36 | var terminalSymbols = Enumerable.Range(0, alphabetSize).Select(off => (char)((byte)'a' + off)).ToArray();
|
---|
[11799] | 37 | var nonTerminalSymbols = new char[] { sentenceSymbol };
|
---|
[11747] | 38 |
|
---|
[11865] | 39 | {
|
---|
| 40 | // create grammar for sequential search
|
---|
| 41 | // S -> a..z | aS .. zS
|
---|
| 42 | var rules = terminalSymbols.Select(t => Tuple.Create(sentenceSymbol, t.ToString()))
|
---|
| 43 | .Concat(terminalSymbols.Select(t => Tuple.Create(sentenceSymbol, t + sentenceSymbol.ToString())));
|
---|
| 44 | this.grammar = new Grammar(sentenceSymbol, terminalSymbols, nonTerminalSymbols, rules);
|
---|
| 45 | }
|
---|
| 46 | {
|
---|
| 47 | // create grammar for sequential search
|
---|
| 48 | // S -> a..z | SS
|
---|
| 49 | var rules = terminalSymbols.Select(t => Tuple.Create(sentenceSymbol, t.ToString()))
|
---|
| 50 | .Concat(terminalSymbols.Select(t => Tuple.Create(sentenceSymbol, t + sentenceSymbol.ToString())));
|
---|
| 51 | this.grammar = new Grammar(sentenceSymbol, terminalSymbols, nonTerminalSymbols, rules);
|
---|
| 52 | }
|
---|
| 53 |
|
---|
[11747] | 54 | this.optimalSymbolsForPos = new SortedSet<char>[sequenceLen];
|
---|
| 55 | for (int i = 0; i < sequenceLen; i++) {
|
---|
| 56 | optimalSymbolsForPos[i] = new SortedSet<char>();
|
---|
| 57 | for (int j = 0; j < k; j++) {
|
---|
| 58 | char ch;
|
---|
| 59 | do {
|
---|
| 60 | ch = terminalSymbols.SelectRandom(rand);
|
---|
| 61 | } while (optimalSymbolsForPos[i].Contains(ch));
|
---|
| 62 | optimalSymbolsForPos[i].Add(ch);
|
---|
| 63 | }
|
---|
| 64 | }
|
---|
| 65 | }
|
---|
| 66 |
|
---|
| 67 | public double BestKnownQuality(int maxLen) {
|
---|
| 68 | return Math.Min(maxLen, sequenceLen) * correctReward;
|
---|
| 69 | }
|
---|
| 70 |
|
---|
| 71 | public IGrammar Grammar {
|
---|
| 72 | get { return grammar; }
|
---|
| 73 | }
|
---|
| 74 |
|
---|
| 75 | public double Evaluate(string sentence) {
|
---|
| 76 | // sentence must contain only terminal symbols, we are not checking if the sentence is syntactically valid here because it would be too slow!
|
---|
| 77 | Debug.Assert(sentence.Any(c => grammar.IsTerminal(c)));
|
---|
| 78 | var reward = 0.0;
|
---|
| 79 | for (int i = 0; i < Math.Min(sentence.Length, sequenceLen); i++) {
|
---|
| 80 | if (optimalSymbolsForPos[i].Contains(sentence[i])) {
|
---|
| 81 | reward += correctReward;
|
---|
| 82 | } else {
|
---|
[11799] | 83 | // reduce reward by number of remaining symbols
|
---|
[11747] | 84 | return Math.Max(0.0, reward + incorrectReward * (sentence.Length - i));
|
---|
| 85 | }
|
---|
| 86 | }
|
---|
| 87 | return reward;
|
---|
| 88 | }
|
---|
| 89 |
|
---|
[11799] | 90 | // in each position there could be multiple correct and incorrect symbols
|
---|
[11832] | 91 | public string CanonicalRepresentation(string phrase) {
|
---|
[11799] | 92 | var sb = new StringBuilder();
|
---|
[11832] | 93 | for (int i = 0; i < phrase.Length; i++) {
|
---|
| 94 | if (optimalSymbolsForPos[i].Contains(phrase[i])) {
|
---|
[11799] | 95 | sb.Append(optimalSymbolsForPos[i].First()); // all symbols in the set are equivalent
|
---|
| 96 | } else {
|
---|
[11832] | 97 | sb.Append(phrase[i]);
|
---|
[11799] | 98 | }
|
---|
| 99 | }
|
---|
| 100 | return sb.ToString();
|
---|
[11747] | 101 | }
|
---|
[11832] | 102 |
|
---|
[11865] | 103 | public IEnumerable<Feature> GetFeatures(string phrase) {
|
---|
[11832] | 104 | throw new NotImplementedException();
|
---|
| 105 | }
|
---|
[11865] | 106 |
|
---|
| 107 | public IGrammar TreeBasedGPGrammar { get; private set; }
|
---|
| 108 | public string ConvertTreeToSentence(ISymbolicExpressionTree tree) {
|
---|
| 109 | var sb = new StringBuilder();
|
---|
| 110 | foreach (var s in tree.Root.GetSubtree(0).GetSubtree(0).IterateNodesPrefix()) {
|
---|
| 111 | if (s.Symbol.Name == "S") continue;
|
---|
| 112 | sb.Append(s.Symbol.Name);
|
---|
| 113 | }
|
---|
| 114 | return sb.ToString();
|
---|
| 115 | }
|
---|
[11747] | 116 | }
|
---|
| 117 | }
|
---|