Context Navigation

← Previous Changeset
Next Changeset →

Changeset 11755

Timestamp:

01/13/15 20:02:29 (10 years ago)

Author:

gkronber

Message:

#2283: implemented synthetic benchmark problems (modeling symb-reg) with configurable hardness

Location:

branches/HeuristicLab.Problems.GrammaticalOptimization

Files:

: 4 edited

HeuristicLab.Algorithms.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization.csproj (modified) (1 diff)
HeuristicLab.Problems.GrammaticalOptimization/FindPhrasesProblem.cs (modified) (6 diffs)
HeuristicLab.Problems.GrammaticalOptimization/RoyalPhraseSequenceProblem.cs (modified) (9 diffs)
Main/Program.cs (modified) (3 diffs)

Legend:

: Unmodified
: Added
: Removed

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization.csproj

r11747	r11755
45	45	<Compile Include="AlternativesSampler.cs" />
46	46	<Compile Include="AlternativesContextSampler.cs" />
47		~~<Compile Include="MctsQLearningSampler.cs" />~~
48	47	<Compile Include="TemporalDifferenceTreeSearchSampler.cs" />
49	48	<Compile Include="ExhaustiveRandomFirstSearch.cs" />

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/FindPhrasesProblem.cs

-                      r11754
+                      r11755
 using System;
 using System.Collections.Generic;
+using System.Data.Odbc;
 using System.Diagnostics;
 using System.Linq;
 …
   // - number of decoy (sub-optimal) phrases
   // - reward for decoy phrases (must be smaller than reward for optimal phrases)
+  // - phrasesAsSets: a switch to determine wether symbols in a phrase can be shuffled (sets) or if the ordering is relevant (non-sets)
+  // this problem should be similar to symbolic regression and should be easier for approaches using a state esimation value and the canoncial state
+  // when phrases are symbol sets instead of sequences then value-estimation routines should be better (TD)
   public class FindPhrasesProblem : IProblem {
     private readonly IGrammar grammar;
-    private readonly int alphabetSize;
     private readonly int numPhrases;
     private readonly int phraseLen;
 …
     private readonly double correctReward;
     private readonly double decoyReward;
+    private readonly bool phrasesAsSets;
     private readonly SortedSet<string> optimalPhrases;
     private readonly SortedSet<string> decoyPhrases;
     public FindPhrasesProblem(Random rand, int alphabetSize, int numPhrases, int phraseLen, int numOptimalPhrases, int numDecoyPhrases = 1,
       double correctReward = 1.0, double decoyReward = 0.0) {
+      double correctReward = 1.0, double decoyReward = 0.0, bool phrasesAsSets = false) {
       if (alphabetSize <= 0 || alphabetSize > 26) throw new ArgumentException();
       if (numPhrases <= 0) throw new ArgumentException();
 …
       if (correctReward <= decoyReward) throw new ArgumentException();
-      this.alphabetSize = alphabetSize;
       this.numPhrases = numPhrases;
       this.phraseLen = phraseLen;
       this.correctReward = correctReward;
       this.decoyReward = decoyReward;
+      this.phrasesAsSets = phrasesAsSets;
       // create grammar
 …
       this.grammar = new Grammar(sentenceSymbol, terminalSymbols, nonTerminalSymbols, rules);
+      this.optimalPhrasesForPos = new SortedSet<string>[sequenceLen];
+      for (int i = 0; i < sequenceLen; i++) {
+        optimalPhrasesForPos[i] = new SortedSet<string>();
+        for (int j = 0; j < k; j++) {
+          string phrase = "";
+          do {
+            for (int l = 0; l < phraseLen; l++) {
+              phrase += terminalSymbols.SelectRandom(rand);
+            }
+          } while (optimalPhrasesForPos[i].Contains(phrase)); // don't allow duplicate phrases
+          optimalPhrasesForPos[i].Add(phrase);
+      // generate optimal phrases
+      optimalPhrases = new SortedSet<string>();
+      while (optimalPhrases.Count < numOptimalPhrases) {
+        string phrase = "";
+        for (int l = 0; l < phraseLen; l++) {
+          phrase += terminalSymbols.SelectRandom(rand);
+        }
+        phrase = CanonicalPhrase(phrase);
+        // don't allow dups
+        if (!optimalPhrases.Contains(phrase)) optimalPhrases.Add(phrase);
+      }
+      Debug.Assert(Evaluate(BestKnownSolution) / BestKnownQuality(phraseLen * sequenceLen) == 1.0);
+      // generate decoy phrases
+      decoyPhrases = new SortedSet<string>();
+      while (decoyPhrases.Count < numDecoyPhrases) {
+        string phrase = "";
+        for (int l = 0; l < phraseLen; l++) {
+          phrase += terminalSymbols.SelectRandom(rand);
+        }
+        phrase = CanonicalPhrase(phrase);
+        // don't allow dups
+        if (!optimalPhrases.Contains(phrase) && !decoyPhrases.Contains(phrase)) decoyPhrases.Add(phrase);
+      }
+      Debug.Assert(Evaluate(BestKnownSolution) / BestKnownQuality(phraseLen * numPhrases) == 1.0);
+    }
     public double BestKnownQuality(int maxLen) {
       return Math.Min(maxLen / phraseLen, sequenceLen) * correctReward; // integer division
+      return Math.Min(maxLen / phraseLen, numPhrases) * correctReward; // integer division
+    }
     public string BestKnownSolution {
+      get {
+        string solution = "";
+        for (int i = 0; i < sequenceLen; i++) {
+          solution += optimalPhrasesForPos[i].First();
+        }
+        return solution;
+      }
+      get { return string.Join("", optimalPhrases.Take(numPhrases)); }
+    }
 …
       // sentence must contain only terminal symbols, we are not checking if the sentence is syntactically valid here because it would be too slow!
       Debug.Assert(sentence.Any(c => grammar.IsTerminal(c)));
+      // as long as only correct symbols are found we increase the reward by +1
+      // on the first incorrect symbol we return
       var reward = 0.0;
       for (int i = 0; i < Math.Min(sentence.Length / phraseLen, sequenceLen); i++) {
         if (optimalPhrasesForPos[i].Contains(sentence.Substring(i * phraseLen, phraseLen))) {
           reward += correctReward;
         } else {
           // alternatively reduce reward by number of remaining phrases
           return Math.Max(0.0, reward + incorrectReward * (sentence.Length / phraseLen - i));
           // stop on first incorrect symbol and return reward
           //return reward;
+        }
+      // split the sentence in phrases
+      // phrases must not overlap in the sentence, multiple occurences of a phrase are not counted
+      // the order of phrases is not relevant
+      var numPhrases = sentence.Length / phraseLen;
+      var phrases = new SortedSet<string>();
+      for (int phraseIdx = 0; phraseIdx < numPhrases; phraseIdx++) {
+        var sentenceIdx = phraseIdx * phraseLen;
+        var phrase = sentence.Substring(sentenceIdx, phraseLen);
+        phrase = CanonicalPhrase(phrase);
+        if (!phrases.Contains(phrase)) phrases.Add(phrase);
+      }
+      // add reward for each correct phrase that occurs in the sentence
+      // add reward for each decoy phrase that occurs in the sentence
+      var reward = phrases.Intersect(optimalPhrases).Count() * correctReward
+               + phrases.Intersect(decoyPhrases).Count() * decoyReward;
       return reward;
+    }
+    private string CanonicalPhrase(string phrase) {
+      if (phrasesAsSets) return string.Join("", phrase.OrderBy(ch => (byte)ch));
+      else return phrase;
+    }
     public string CanonicalRepresentation(string terminalPhrase) {
+      return terminalPhrase;
+      // as the ordering of phrases does not matter we can reorder the phrases
+      // and remove duplicates
+      var numPhrases = terminalPhrase.Length / phraseLen;
+      var phrases = new SortedSet<string>();
+      for (int phraseIdx = 0; phraseIdx < numPhrases; phraseIdx++) {
+        var sentenceIdx = phraseIdx * phraseLen;
+        var phrase = terminalPhrase.Substring(sentenceIdx, phraseLen);
+        phrase = CanonicalPhrase(phrase);
+        if (!phrases.Contains(phrase)) phrases.Add(phrase);
+      }
+      return string.Join("", phrases);
+    }
+  }

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/RoyalPhraseSequenceProblem.cs

-                      r11747
+                      r11755
 namespace HeuristicLab.Problems.GrammaticalOptimization {
   // must find one of k*sequenceLen sequences where the quality of a sequence is the length of the subsequence containing only correct _phrases_ (of length phraseLen) and starting at the first position
+  // must find one of numCorrectPhrases*sequenceLen sequences where the quality of a sequence is the length of the subsequence containing only correct _phrases_ (of length phraseLen) and starting at the first position
   // compared to the RoyalSequence problem this problem is harder because the number of different phrases starting at a position is much larger than the number of symbols (grows exponentially with the phrase-length)
   // if phraseLen = 1 this is the same as the RoyalSequence problem
 …
   // - phraseLen: the length of a phrase in number of symbols
   // - sequenceLen: the number of phrases in the correct subsequence (total sequence length is n * phraseLen
+  // - k: the number of correct phrases starting at each position
+  // - numCorrectPhrases: the number of correct phrases starting at each position
+  // - phrasesAsSets: switch to determine if the ordering of symbols within a phrase is relevant
   //
   // this problem should be hard for GP and easy for MCTS (TD should not have an advantage compared to MCTS)
   // for phraseLen > 1 this should be harder than RoyalSymbolProblem
+  // when phrases are symbol sets instead of sequences then value-estimation routines should be better (TD)
   public class RoyalPhraseSequenceProblem : IProblem {
 …
     private readonly double correctReward;
     private readonly double incorrectReward;
     private readonly int k;
+    private readonly int _numCorrectPhrases;
     private readonly int sequenceLen;
     private readonly int alphabetSize;
     private readonly int phraseLen;
+    private readonly bool phrasesAsSets;
     private readonly SortedSet<string>[] optimalPhrasesForPos;
     public RoyalPhraseSequenceProblem(Random rand, int alphabetSize, int sequenceLen, int phraseLen = 1, int k = 1, double correctReward = 1.0, double incorrectReward = 0.0) {
+    public RoyalPhraseSequenceProblem(Random rand, int alphabetSize, int sequenceLen, int phraseLen = 1, int numCorrectPhrases = 1, double correctReward = 1.0, double incorrectReward = 0.0, bool phrasesAsSets = false) {
       if (alphabetSize <= 0 || alphabetSize > 26) throw new ArgumentException();
       if (sequenceLen <= 0) throw new ArgumentException();
       if (k < 1 || k > alphabetSize) throw new ArgumentException();
+      if (numCorrectPhrases < 1 || numCorrectPhrases > alphabetSize) throw new ArgumentException();
       if (phraseLen < 1) throw new ArgumentException();
       if (correctReward <= incorrectReward) throw new ArgumentException();
 …
       this.sequenceLen = sequenceLen;
       this.phraseLen = phraseLen;
       this.k = k;
+      this._numCorrectPhrases = numCorrectPhrases;
       this.correctReward = correctReward;
       this.incorrectReward = incorrectReward;
+      this.phrasesAsSets = phrasesAsSets;
       var sentenceSymbol = 'S';
       var terminalSymbols = Enumerable.Range(0, alphabetSize).Select(off => (char)((byte)'a' + off)).ToArray();
 …
       for (int i = 0; i < sequenceLen; i++) {
         optimalPhrasesForPos[i] = new SortedSet<string>();
         for (int j = 0; j < k; j++) {
+        for (int j = 0; j < numCorrectPhrases; j++) {
           string phrase = "";
           do {
 …
               phrase += terminalSymbols.SelectRandom(rand);
+            }
+            phrase = CanonicalPhrase(phrase);
           } while (optimalPhrasesForPos[i].Contains(phrase)); // don't allow duplicate phrases
           optimalPhrasesForPos[i].Add(phrase);
 …
+      }
       Debug.Assert(Evaluate(BestKnownSolution)/BestKnownQuality(phraseLen * sequenceLen) == 1.0);
+      Debug.Assert(Evaluate(BestKnownSolution) / BestKnownQuality(phraseLen * sequenceLen) == 1.0);
+    }
 …
       var reward = 0.0;
       for (int i = 0; i < Math.Min(sentence.Length / phraseLen, sequenceLen); i++) {
+        if (optimalPhrasesForPos[i].Contains(sentence.Substring(i * phraseLen, phraseLen))) {
+        var canonicalPhrase = CanonicalPhrase(sentence.Substring(i * phraseLen, phraseLen));
+        if (optimalPhrasesForPos[i].Contains(canonicalPhrase)) {
           reward += correctReward;
         } else {
 …
+    }
+    private string CanonicalPhrase(string phrase) {
+      if (phrasesAsSets) return string.Join("", phrase.OrderBy(ch => (byte)ch));
+      else return phrase;
+    }
     public string CanonicalRepresentation(string terminalPhrase) {
+      return terminalPhrase;
+      if (phrasesAsSets) {
+        var phrases = new List<string>();
+        var numPhrases = terminalPhrase.Length / phraseLen;
+        for (int phraseIdx = 0; phraseIdx < numPhrases; phraseIdx++) {
+          var sentenceIdx = phraseIdx * phraseLen;
+          var phrase = terminalPhrase.Substring(sentenceIdx, phraseLen);
+          phrase = CanonicalPhrase(phrase);
+          phrases.Add(phrase);
+        }
+        return string.Join("", phrases);
+      } else
+        return terminalPhrase;
+    }
+  }

branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Program.cs

-                      r11747
+                      r11755
       // TODO: wie kann ich sampler noch vergleichen bzw. was kann man messen um die qualität des samplers abzuschätzen (bis auf qualität und iterationen bis zur besten lösung) => ziel schnellere iterationen zu gutem ergebnis
       // TODO: research thompson sampling for max bandit?
       // TODO: ausführlicher test von strategien für k-armed max bandit
+      // TODO: ausführlicher test von strategien für numCorrectPhrases-armed max bandit
       // TODO: verify TA implementation using example from the original paper
       // TODO: separate policy from MCTS tree data structure to allow sharing of information over disconnected parts of the tree (semantic equivalence)
 …
       var random = new Random();
+      var phraseLen = 1;
+      var sentenceLen = 25;
+      var numPhrases = sentenceLen / phraseLen;
+      var problem = new RoyalPhraseSequenceProblem(random, 10, numPhrases, phraseLen: 1, k: 1, correctReward: 1, incorrectReward: 0);
+      //var problem = new SymbolicRegressionPoly10Problem();   // good results e.g. 10 randomtries and EpsGreedyPolicy(0.2, (aInfo)=>aInfo.MaxReward)
+      //var phraseLen = 3;
+      //var numPhrases = 5;
+      //var problem = new RoyalPhraseSequenceProblem(random, 10, numPhrases, phraseLen: phraseLen, numCorrectPhrases: 1, correctReward: 1, incorrectReward: 0.0, phrasesAsSets: true);
+      //var phraseLen = 4;
+      //var numPhrases = 5;
+      //var problem = new FindPhrasesProblem(random, 15, numPhrases, phraseLen, numOptimalPhrases: numPhrases, numDecoyPhrases: 500, correctReward: 1.0, decoyReward: 0.2, phrasesAsSets: true);
+      var problem = new SymbolicRegressionPoly10Problem();   // good results e.g. 10 randomtries and EpsGreedyPolicy(0.2, (aInfo)=>aInfo.MaxReward)
       // Ant
       // good results e.g. with       var alg = new MctsSampler(problem, 17, random, 1, (rand, numActions) => new ThresholdAscentPolicy(numActions, 500, 0.01));
 …
       //var problem = new EvenParityProblem();
       // symbreg length = 11 q = 0.824522210419616
+      var alg = new MctsSampler(problem, sentenceLen, random, 0, new BoltzmannExplorationPolicy(200));
+      //var alg = new MctsSampler(problem, 23, random, 0, new BoltzmannExplorationPolicy(100));
+      var alg = new MctsSampler(problem, 23, random, 0, new EpsGreedyPolicy(0.1));
       //var alg = new MctsQLearningSampler(problem, sentenceLen, random, 0, null);
       //var alg = new MctsQLearningSampler(problem, 30, random, 0, new EpsGreedyPolicy(0.2));

Note: See TracChangeset for help on using the changeset viewer.

Download in other formats:

Update cookies preferences