source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/RoyalPhraseSequenceProblem.cs @ 11755

Last change on this file since 11755 was 11755, checked in by gkronber, 7 years ago

#2283: implemented synthetic benchmark problems (modeling symb-reg) with configurable hardness

File size: 6.1 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Linq;
5using System.Text;
6using System.Text.RegularExpressions;
7using HeuristicLab.Common;
8
9namespace HeuristicLab.Problems.GrammaticalOptimization {
10  // must find one of numCorrectPhrases*sequenceLen sequences where the quality of a sequence is the length of the subsequence containing only correct _phrases_ (of length phraseLen) and starting at the first position
11  // compared to the RoyalSequence problem this problem is harder because the number of different phrases starting at a position is much larger than the number of symbols (grows exponentially with the phrase-length)
12  // if phraseLen = 1 this is the same as the RoyalSequence problem
13  // parameters
14  // - alphabetSize: number of different symbols (max=26)
15  // - phraseLen: the length of a phrase in number of symbols
16  // - sequenceLen: the number of phrases in the correct subsequence (total sequence length is n * phraseLen
17  // - numCorrectPhrases: the number of correct phrases starting at each position
18  // - phrasesAsSets: switch to determine if the ordering of symbols within a phrase is relevant
19  //
20  // this problem should be hard for GP and easy for MCTS (TD should not have an advantage compared to MCTS)
21  // for phraseLen > 1 this should be harder than RoyalSymbolProblem
22  // when phrases are symbol sets instead of sequences then value-estimation routines should be better (TD)
23  public class RoyalPhraseSequenceProblem : IProblem {
24
25    private readonly IGrammar grammar;
26    private readonly double correctReward;
27    private readonly double incorrectReward;
28    private readonly int _numCorrectPhrases;
29    private readonly int sequenceLen;
30    private readonly int alphabetSize;
31    private readonly int phraseLen;
32    private readonly bool phrasesAsSets;
33    private readonly SortedSet<string>[] optimalPhrasesForPos;
34
35    public RoyalPhraseSequenceProblem(Random rand, int alphabetSize, int sequenceLen, int phraseLen = 1, int numCorrectPhrases = 1, double correctReward = 1.0, double incorrectReward = 0.0, bool phrasesAsSets = false) {
36      if (alphabetSize <= 0 || alphabetSize > 26) throw new ArgumentException();
37      if (sequenceLen <= 0) throw new ArgumentException();
38      if (numCorrectPhrases < 1 || numCorrectPhrases > alphabetSize) throw new ArgumentException();
39      if (phraseLen < 1) throw new ArgumentException();
40      if (correctReward <= incorrectReward) throw new ArgumentException();
41
42      this.alphabetSize = alphabetSize;
43      this.sequenceLen = sequenceLen;
44      this.phraseLen = phraseLen;
45      this._numCorrectPhrases = numCorrectPhrases;
46      this.correctReward = correctReward;
47      this.incorrectReward = incorrectReward;
48      this.phrasesAsSets = phrasesAsSets;
49      var sentenceSymbol = 'S';
50      var terminalSymbols = Enumerable.Range(0, alphabetSize).Select(off => (char)((byte)'a' + off)).ToArray();
51      var nonTerminalSymbols = new char[] { 'S' };
52      var rules = terminalSymbols.Select(t => Tuple.Create('S', t.ToString()))
53        .Concat(terminalSymbols.Select(t => Tuple.Create('S', t + "S")));
54
55      this.grammar = new Grammar(sentenceSymbol, terminalSymbols, nonTerminalSymbols, rules);
56
57      this.optimalPhrasesForPos = new SortedSet<string>[sequenceLen];
58      for (int i = 0; i < sequenceLen; i++) {
59        optimalPhrasesForPos[i] = new SortedSet<string>();
60        for (int j = 0; j < numCorrectPhrases; j++) {
61          string phrase = "";
62          do {
63            for (int l = 0; l < phraseLen; l++) {
64              phrase += terminalSymbols.SelectRandom(rand);
65            }
66            phrase = CanonicalPhrase(phrase);
67          } while (optimalPhrasesForPos[i].Contains(phrase)); // don't allow duplicate phrases
68          optimalPhrasesForPos[i].Add(phrase);
69        }
70      }
71
72      Debug.Assert(Evaluate(BestKnownSolution) / BestKnownQuality(phraseLen * sequenceLen) == 1.0);
73    }
74
75    public double BestKnownQuality(int maxLen) {
76      return Math.Min(maxLen / phraseLen, sequenceLen) * correctReward; // integer division
77    }
78
79    public string BestKnownSolution {
80      get {
81        string solution = "";
82        for (int i = 0; i < sequenceLen; i++) {
83          solution += optimalPhrasesForPos[i].First();
84        }
85        return solution;
86      }
87    }
88
89    public IGrammar Grammar {
90      get { return grammar; }
91    }
92
93    public double Evaluate(string sentence) {
94      // sentence must contain only terminal symbols, we are not checking if the sentence is syntactically valid here because it would be too slow!
95      Debug.Assert(sentence.Any(c => grammar.IsTerminal(c)));
96      // as long as only correct symbols are found we increase the reward by +1
97      // on the first incorrect symbol we return
98      var reward = 0.0;
99      for (int i = 0; i < Math.Min(sentence.Length / phraseLen, sequenceLen); i++) {
100        var canonicalPhrase = CanonicalPhrase(sentence.Substring(i * phraseLen, phraseLen));
101        if (optimalPhrasesForPos[i].Contains(canonicalPhrase)) {
102          reward += correctReward;
103        } else {
104          // alternatively reduce reward by number of remaining phrases
105          return Math.Max(0.0, reward + incorrectReward * (sentence.Length / phraseLen - i));
106          // stop on first incorrect symbol and return reward
107          //return reward;
108        }
109      }
110      return reward;
111    }
112
113    private string CanonicalPhrase(string phrase) {
114      if (phrasesAsSets) return string.Join("", phrase.OrderBy(ch => (byte)ch));
115      else return phrase;
116    }
117
118    public string CanonicalRepresentation(string terminalPhrase) {
119      if (phrasesAsSets) {
120        var phrases = new List<string>();
121        var numPhrases = terminalPhrase.Length / phraseLen;
122        for (int phraseIdx = 0; phraseIdx < numPhrases; phraseIdx++) {
123          var sentenceIdx = phraseIdx * phraseLen;
124          var phrase = terminalPhrase.Substring(sentenceIdx, phraseLen);
125          phrase = CanonicalPhrase(phrase);
126          phrases.Add(phrase);
127        }
128
129        return string.Join("", phrases);
130      } else
131        return terminalPhrase;
132    }
133  }
134}
Note: See TracBrowser for help on using the repository browser.