[11754] | 1 | using System;
|
---|
| 2 | using System.Collections.Generic;
|
---|
| 3 | using System.Diagnostics;
|
---|
| 4 | using System.Linq;
|
---|
| 5 | using System.Text;
|
---|
| 6 | using System.Text.RegularExpressions;
|
---|
| 7 | using HeuristicLab.Common;
|
---|
| 8 |
|
---|
| 9 | namespace HeuristicLab.Problems.GrammaticalOptimization {
|
---|
| 10 | // must find a set of phrases where the ordering of phrases is irrelevant
|
---|
| 11 | // Parameters
|
---|
| 12 | // - size of the alphabet
|
---|
| 13 | // - phrase length
|
---|
| 14 | // - number of phrases in the sequence
|
---|
| 15 | // - number of optimal phrases
|
---|
| 16 | // - reward for optimal phrases
|
---|
| 17 | // - number of decoy (sub-optimal) phrases
|
---|
| 18 | // - reward for decoy phrases (must be smaller than reward for optimal phrases)
|
---|
| 19 | public class FindPhrasesProblem : IProblem {
|
---|
| 20 |
|
---|
| 21 | private readonly IGrammar grammar;
|
---|
| 22 | private readonly int alphabetSize;
|
---|
| 23 | private readonly int numPhrases;
|
---|
| 24 | private readonly int phraseLen;
|
---|
| 25 | private readonly int numOptimalPhrases;
|
---|
| 26 | private readonly int numDecoyPhrases;
|
---|
| 27 | private readonly double correctReward;
|
---|
| 28 | private readonly double decoyReward;
|
---|
| 29 | private readonly SortedSet<string> optimalPhrases;
|
---|
| 30 | private readonly SortedSet<string> decoyPhrases;
|
---|
| 31 |
|
---|
| 32 | public FindPhrasesProblem(Random rand, int alphabetSize, int numPhrases, int phraseLen, int numOptimalPhrases, int numDecoyPhrases = 1,
|
---|
| 33 | double correctReward = 1.0, double decoyReward = 0.0) {
|
---|
| 34 | if (alphabetSize <= 0 || alphabetSize > 26) throw new ArgumentException();
|
---|
| 35 | if (numPhrases <= 0) throw new ArgumentException();
|
---|
| 36 | if (phraseLen < 1) throw new ArgumentException();
|
---|
| 37 | if (numOptimalPhrases < numPhrases) throw new ArgumentException();
|
---|
| 38 | if (numDecoyPhrases < 0) throw new ArgumentException();
|
---|
| 39 | if (correctReward <= decoyReward) throw new ArgumentException();
|
---|
| 40 |
|
---|
| 41 | this.alphabetSize = alphabetSize;
|
---|
| 42 | this.numPhrases = numPhrases;
|
---|
| 43 | this.phraseLen = phraseLen;
|
---|
| 44 | this.correctReward = correctReward;
|
---|
| 45 | this.decoyReward = decoyReward;
|
---|
| 46 |
|
---|
| 47 | // create grammar
|
---|
| 48 | var sentenceSymbol = 'S';
|
---|
| 49 | var terminalSymbols = Enumerable.Range(0, alphabetSize).Select(off => (char)((byte)'a' + off)).ToArray();
|
---|
| 50 | var nonTerminalSymbols = new char[] { 'S' };
|
---|
| 51 | var rules = terminalSymbols.Select(t => Tuple.Create('S', t.ToString()))
|
---|
| 52 | .Concat(terminalSymbols.Select(t => Tuple.Create('S', t + "S")));
|
---|
| 53 |
|
---|
| 54 | this.grammar = new Grammar(sentenceSymbol, terminalSymbols, nonTerminalSymbols, rules);
|
---|
| 55 |
|
---|
| 56 | this.optimalPhrasesForPos = new SortedSet<string>[sequenceLen];
|
---|
| 57 | for (int i = 0; i < sequenceLen; i++) {
|
---|
| 58 | optimalPhrasesForPos[i] = new SortedSet<string>();
|
---|
| 59 | for (int j = 0; j < k; j++) {
|
---|
| 60 | string phrase = "";
|
---|
| 61 | do {
|
---|
| 62 | for (int l = 0; l < phraseLen; l++) {
|
---|
| 63 | phrase += terminalSymbols.SelectRandom(rand);
|
---|
| 64 | }
|
---|
| 65 | } while (optimalPhrasesForPos[i].Contains(phrase)); // don't allow duplicate phrases
|
---|
| 66 | optimalPhrasesForPos[i].Add(phrase);
|
---|
| 67 | }
|
---|
| 68 | }
|
---|
| 69 |
|
---|
| 70 | Debug.Assert(Evaluate(BestKnownSolution) / BestKnownQuality(phraseLen * sequenceLen) == 1.0);
|
---|
| 71 | }
|
---|
| 72 |
|
---|
| 73 | public double BestKnownQuality(int maxLen) {
|
---|
| 74 | return Math.Min(maxLen / phraseLen, sequenceLen) * correctReward; // integer division
|
---|
| 75 | }
|
---|
| 76 |
|
---|
| 77 | public string BestKnownSolution {
|
---|
| 78 | get {
|
---|
| 79 | string solution = "";
|
---|
| 80 | for (int i = 0; i < sequenceLen; i++) {
|
---|
| 81 | solution += optimalPhrasesForPos[i].First();
|
---|
| 82 | }
|
---|
| 83 | return solution;
|
---|
| 84 | }
|
---|
| 85 | }
|
---|
| 86 |
|
---|
| 87 | public IGrammar Grammar {
|
---|
| 88 | get { return grammar; }
|
---|
| 89 | }
|
---|
| 90 |
|
---|
| 91 | public double Evaluate(string sentence) {
|
---|
| 92 | // sentence must contain only terminal symbols, we are not checking if the sentence is syntactically valid here because it would be too slow!
|
---|
| 93 | Debug.Assert(sentence.Any(c => grammar.IsTerminal(c)));
|
---|
| 94 | // as long as only correct symbols are found we increase the reward by +1
|
---|
| 95 | // on the first incorrect symbol we return
|
---|
| 96 | var reward = 0.0;
|
---|
| 97 | for (int i = 0; i < Math.Min(sentence.Length / phraseLen, sequenceLen); i++) {
|
---|
| 98 | if (optimalPhrasesForPos[i].Contains(sentence.Substring(i * phraseLen, phraseLen))) {
|
---|
| 99 | reward += correctReward;
|
---|
| 100 | } else {
|
---|
| 101 | // alternatively reduce reward by number of remaining phrases
|
---|
| 102 | return Math.Max(0.0, reward + incorrectReward * (sentence.Length / phraseLen - i));
|
---|
| 103 | // stop on first incorrect symbol and return reward
|
---|
| 104 | //return reward;
|
---|
| 105 | }
|
---|
| 106 | }
|
---|
| 107 | return reward;
|
---|
| 108 | }
|
---|
| 109 |
|
---|
| 110 | public string CanonicalRepresentation(string terminalPhrase) {
|
---|
| 111 | return terminalPhrase;
|
---|
| 112 | }
|
---|
| 113 | }
|
---|
| 114 | }
|
---|