source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/FindPhrasesProblem.cs @ 11754

Last change on this file since 11754 was 11754, checked in by gkronber, 7 years ago

#2283 added phrase-finding problem

File size: 4.5 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Linq;
5using System.Text;
6using System.Text.RegularExpressions;
7using HeuristicLab.Common;
8
9namespace HeuristicLab.Problems.GrammaticalOptimization {
10  // must find a set of phrases where the ordering of phrases is irrelevant
11  // Parameters
12  // - size of the alphabet
13  // - phrase length
14  // - number of phrases in the sequence
15  // - number of optimal phrases
16  // - reward for optimal phrases
17  // - number of decoy (sub-optimal) phrases
18  // - reward for decoy phrases (must be smaller than reward for optimal phrases)
19  public class FindPhrasesProblem : IProblem {
20
21    private readonly IGrammar grammar;
22    private readonly int alphabetSize;
23    private readonly int numPhrases;
24    private readonly int phraseLen;
25    private readonly int numOptimalPhrases;
26    private readonly int numDecoyPhrases;
27    private readonly double correctReward;
28    private readonly double decoyReward;
29    private readonly SortedSet<string> optimalPhrases;
30    private readonly SortedSet<string> decoyPhrases;
31
32    public FindPhrasesProblem(Random rand, int alphabetSize, int numPhrases, int phraseLen, int numOptimalPhrases, int numDecoyPhrases = 1,
33      double correctReward = 1.0, double decoyReward = 0.0) {
34      if (alphabetSize <= 0 || alphabetSize > 26) throw new ArgumentException();
35      if (numPhrases <= 0) throw new ArgumentException();
36      if (phraseLen < 1) throw new ArgumentException();
37      if (numOptimalPhrases < numPhrases) throw new ArgumentException();
38      if (numDecoyPhrases < 0) throw new ArgumentException();
39      if (correctReward <= decoyReward) throw new ArgumentException();
40
41      this.alphabetSize = alphabetSize;
42      this.numPhrases = numPhrases;
43      this.phraseLen = phraseLen;
44      this.correctReward = correctReward;
45      this.decoyReward = decoyReward;
46
47      // create grammar
48      var sentenceSymbol = 'S';
49      var terminalSymbols = Enumerable.Range(0, alphabetSize).Select(off => (char)((byte)'a' + off)).ToArray();
50      var nonTerminalSymbols = new char[] { 'S' };
51      var rules = terminalSymbols.Select(t => Tuple.Create('S', t.ToString()))
52        .Concat(terminalSymbols.Select(t => Tuple.Create('S', t + "S")));
53
54      this.grammar = new Grammar(sentenceSymbol, terminalSymbols, nonTerminalSymbols, rules);
55
56      this.optimalPhrasesForPos = new SortedSet<string>[sequenceLen];
57      for (int i = 0; i < sequenceLen; i++) {
58        optimalPhrasesForPos[i] = new SortedSet<string>();
59        for (int j = 0; j < k; j++) {
60          string phrase = "";
61          do {
62            for (int l = 0; l < phraseLen; l++) {
63              phrase += terminalSymbols.SelectRandom(rand);
64            }
65          } while (optimalPhrasesForPos[i].Contains(phrase)); // don't allow duplicate phrases
66          optimalPhrasesForPos[i].Add(phrase);
67        }
68      }
69
70      Debug.Assert(Evaluate(BestKnownSolution) / BestKnownQuality(phraseLen * sequenceLen) == 1.0);
71    }
72
73    public double BestKnownQuality(int maxLen) {
74      return Math.Min(maxLen / phraseLen, sequenceLen) * correctReward; // integer division
75    }
76
77    public string BestKnownSolution {
78      get {
79        string solution = "";
80        for (int i = 0; i < sequenceLen; i++) {
81          solution += optimalPhrasesForPos[i].First();
82        }
83        return solution;
84      }
85    }
86
87    public IGrammar Grammar {
88      get { return grammar; }
89    }
90
91    public double Evaluate(string sentence) {
92      // sentence must contain only terminal symbols, we are not checking if the sentence is syntactically valid here because it would be too slow!
93      Debug.Assert(sentence.Any(c => grammar.IsTerminal(c)));
94      // as long as only correct symbols are found we increase the reward by +1
95      // on the first incorrect symbol we return
96      var reward = 0.0;
97      for (int i = 0; i < Math.Min(sentence.Length / phraseLen, sequenceLen); i++) {
98        if (optimalPhrasesForPos[i].Contains(sentence.Substring(i * phraseLen, phraseLen))) {
99          reward += correctReward;
100        } else {
101          // alternatively reduce reward by number of remaining phrases
102          return Math.Max(0.0, reward + incorrectReward * (sentence.Length / phraseLen - i));
103          // stop on first incorrect symbol and return reward
104          //return reward;
105        }
106      }
107      return reward;
108    }
109
110    public string CanonicalRepresentation(string terminalPhrase) {
111      return terminalPhrase;
112    }
113  }
114}
Note: See TracBrowser for help on using the repository browser.