Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Problems.GrammaticalOptimization/Problems/RoyalSequenceProblem.cs @ 12448

Last change on this file since 12448 was 12391, checked in by gkronber, 10 years ago

#2283: added shuffling of terminal symbols to the royal pair problem to make sure that there is no bias from order of terminal symbols.

File size: 4.9 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Linq;
5using System.Text;
6using System.Text.RegularExpressions;
7using HeuristicLab.Common;
8using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
9
10namespace HeuristicLab.Problems.GrammaticalOptimization {
11  // must find one of k*sequenceLen sequences where the quality of a sequence is the length of the subsequence containing only correct symbols and starting at the first symbol
12  // parameters
13  // - alphabetSize: number of different symbols (max=26)
14  // - sequenceLen: length of the correct subsequence
15  // - k: the number of correct symbols at each position
16  //
17  // this problem should be hard for GP and easy for MCTS (TD should not have an advantage compared to MCTS)
18  public class RoyalSequenceProblem : ISymbolicExpressionTreeProblem {
19
20    private readonly IGrammar grammar;
21    private readonly double correctReward;
22    private readonly double incorrectReward;
23    private readonly int sequenceLen;
24    private readonly SortedSet<char>[] optimalSymbolsForPos;
25    public string Name { get { return "RoyalSequence"; } }
26    public RoyalSequenceProblem(System.Random rand, int alphabetSize, int sequenceLen, int k = 1, double correctReward = 1.0, double incorrectReward = 0.0) {
27      if (alphabetSize <= 0 || alphabetSize > 26) throw new ArgumentException();
28      if (sequenceLen <= 0) throw new ArgumentException();
29      if (k < 1 || k > alphabetSize) throw new ArgumentException();
30      if (correctReward <= incorrectReward) throw new ArgumentException();
31      this.sequenceLen = sequenceLen;
32      this.correctReward = correctReward;
33      this.incorrectReward = incorrectReward;
34
35      const char sentenceSymbol = 'S';
36      var terminalSymbols = Enumerable.Range(0, alphabetSize).Select(off => (char)((byte)'a' + off)).ToArray();
37      var nonTerminalSymbols = new char[] { sentenceSymbol };
38
39      {
40        // create grammar for sequential search
41        // S -> a..z | aS .. zS
42        var rules = terminalSymbols.Select(t => Tuple.Create(sentenceSymbol, t.ToString()))
43          .Concat(terminalSymbols.Select(t => Tuple.Create(sentenceSymbol, t + sentenceSymbol.ToString())));
44        this.grammar = new Grammar(sentenceSymbol, terminalSymbols, nonTerminalSymbols, rules);
45      }
46      {
47        // create grammar for sequential search
48        // S -> a..z | SS
49        var rules = terminalSymbols.Select(t => Tuple.Create(sentenceSymbol, t.ToString()))
50          .Concat(terminalSymbols.Select(t => Tuple.Create(sentenceSymbol, t + sentenceSymbol.ToString())));
51        this.grammar = new Grammar(sentenceSymbol, terminalSymbols, nonTerminalSymbols, rules);
52      }
53
54      this.optimalSymbolsForPos = new SortedSet<char>[sequenceLen];
55      for (int i = 0; i < sequenceLen; i++) {
56        optimalSymbolsForPos[i] = new SortedSet<char>();
57        for (int j = 0; j < k; j++) {
58          char ch;
59          do {
60            ch = terminalSymbols.SelectRandom(rand);
61          } while (optimalSymbolsForPos[i].Contains(ch));
62          optimalSymbolsForPos[i].Add(ch);
63        }
64      }
65    }
66
67    public double BestKnownQuality(int maxLen) {
68      return Math.Min(maxLen, sequenceLen) * correctReward;
69    }
70
71    public IGrammar Grammar {
72      get { return grammar; }
73    }
74
75    public double Evaluate(string sentence) {
76      // sentence must contain only terminal symbols, we are not checking if the sentence is syntactically valid here because it would be too slow!
77      Debug.Assert(sentence.Any(c => grammar.IsTerminal(c)));
78      var reward = 0.0;
79      for (int i = 0; i < Math.Min(sentence.Length, sequenceLen); i++) {
80        if (optimalSymbolsForPos[i].Contains(sentence[i])) {
81          reward += correctReward;
82        } else {
83          //  reduce reward by number of remaining symbols
84          return Math.Max(0.0, reward + incorrectReward * (sentence.Length - i));
85        }
86      }
87      return reward;
88    }
89
90    // in each position there could be multiple correct and incorrect symbols
91    public string CanonicalRepresentation(string phrase) {
92      var sb = new StringBuilder();
93      for (int i = 0; i < phrase.Length; i++) {
94        if (optimalSymbolsForPos[i].Contains(phrase[i])) {
95          sb.Append(optimalSymbolsForPos[i].First()); // all symbols in the set are equivalent
96        } else {
97          sb.Append(phrase[i]);
98        }
99      }
100      return sb.ToString();
101    }
102
103    public IEnumerable<Feature> GetFeatures(string phrase) {
104      throw new NotImplementedException();
105    }
106
107    public IGrammar TreeBasedGPGrammar { get; private set; }
108    public string ConvertTreeToSentence(ISymbolicExpressionTree tree) {
109      var sb = new StringBuilder();
110      foreach (var s in tree.Root.GetSubtree(0).GetSubtree(0).IterateNodesPrefix()) {
111        if (s.Symbol.Name == "S") continue;
112        sb.Append(s.Symbol.Name);
113      }
114      return sb.ToString();
115    }
116  }
117}
Note: See TracBrowser for help on using the repository browser.