Free cookie consent management tool by TermsFeed Policy Generator

Changeset 11755


Ignore:
Timestamp:
01/13/15 20:02:29 (10 years ago)
Author:
gkronber
Message:

#2283: implemented synthetic benchmark problems (modeling symb-reg) with configurable hardness

Location:
branches/HeuristicLab.Problems.GrammaticalOptimization
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization.csproj

    r11747 r11755  
    4545    <Compile Include="AlternativesSampler.cs" />
    4646    <Compile Include="AlternativesContextSampler.cs" />
    47     <Compile Include="MctsQLearningSampler.cs" />
    4847    <Compile Include="TemporalDifferenceTreeSearchSampler.cs" />
    4948    <Compile Include="ExhaustiveRandomFirstSearch.cs" />
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/FindPhrasesProblem.cs

    r11754 r11755  
    11using System;
    22using System.Collections.Generic;
     3using System.Data.Odbc;
    34using System.Diagnostics;
    45using System.Linq;
     
    1718  // - number of decoy (sub-optimal) phrases
    1819  // - reward for decoy phrases (must be smaller than reward for optimal phrases)
     20  // - phrasesAsSets: a switch to determine wether symbols in a phrase can be shuffled (sets) or if the ordering is relevant (non-sets)
     21
     22  // this problem should be similar to symbolic regression and should be easier for approaches using a state esimation value and the canoncial state
     23  // when phrases are symbol sets instead of sequences then value-estimation routines should be better (TD)
    1924  public class FindPhrasesProblem : IProblem {
    2025
    2126    private readonly IGrammar grammar;
    22     private readonly int alphabetSize;
    2327    private readonly int numPhrases;
    2428    private readonly int phraseLen;
     
    2731    private readonly double correctReward;
    2832    private readonly double decoyReward;
     33    private readonly bool phrasesAsSets;
    2934    private readonly SortedSet<string> optimalPhrases;
    3035    private readonly SortedSet<string> decoyPhrases;
    3136
    3237    public FindPhrasesProblem(Random rand, int alphabetSize, int numPhrases, int phraseLen, int numOptimalPhrases, int numDecoyPhrases = 1,
    33       double correctReward = 1.0, double decoyReward = 0.0) {
     38      double correctReward = 1.0, double decoyReward = 0.0, bool phrasesAsSets = false) {
    3439      if (alphabetSize <= 0 || alphabetSize > 26) throw new ArgumentException();
    3540      if (numPhrases <= 0) throw new ArgumentException();
     
    3944      if (correctReward <= decoyReward) throw new ArgumentException();
    4045
    41       this.alphabetSize = alphabetSize;
    4246      this.numPhrases = numPhrases;
    4347      this.phraseLen = phraseLen;
    4448      this.correctReward = correctReward;
    4549      this.decoyReward = decoyReward;
     50      this.phrasesAsSets = phrasesAsSets;
    4651
    4752      // create grammar
     
    5459      this.grammar = new Grammar(sentenceSymbol, terminalSymbols, nonTerminalSymbols, rules);
    5560
    56       this.optimalPhrasesForPos = new SortedSet<string>[sequenceLen];
    57       for (int i = 0; i < sequenceLen; i++) {
    58         optimalPhrasesForPos[i] = new SortedSet<string>();
    59         for (int j = 0; j < k; j++) {
    60           string phrase = "";
    61           do {
    62             for (int l = 0; l < phraseLen; l++) {
    63               phrase += terminalSymbols.SelectRandom(rand);
    64             }
    65           } while (optimalPhrasesForPos[i].Contains(phrase)); // don't allow duplicate phrases
    66           optimalPhrasesForPos[i].Add(phrase);
     61      // generate optimal phrases
     62      optimalPhrases = new SortedSet<string>();
     63      while (optimalPhrases.Count < numOptimalPhrases) {
     64        string phrase = "";
     65        for (int l = 0; l < phraseLen; l++) {
     66          phrase += terminalSymbols.SelectRandom(rand);
    6767        }
     68        phrase = CanonicalPhrase(phrase);
     69
     70        // don't allow dups
     71        if (!optimalPhrases.Contains(phrase)) optimalPhrases.Add(phrase);
    6872      }
    6973
    70       Debug.Assert(Evaluate(BestKnownSolution) / BestKnownQuality(phraseLen * sequenceLen) == 1.0);
     74      // generate decoy phrases
     75      decoyPhrases = new SortedSet<string>();
     76      while (decoyPhrases.Count < numDecoyPhrases) {
     77        string phrase = "";
     78        for (int l = 0; l < phraseLen; l++) {
     79          phrase += terminalSymbols.SelectRandom(rand);
     80        }
     81        phrase = CanonicalPhrase(phrase);
     82
     83        // don't allow dups
     84        if (!optimalPhrases.Contains(phrase) && !decoyPhrases.Contains(phrase)) decoyPhrases.Add(phrase);
     85      }
     86
     87      Debug.Assert(Evaluate(BestKnownSolution) / BestKnownQuality(phraseLen * numPhrases) == 1.0);
    7188    }
    7289
    7390    public double BestKnownQuality(int maxLen) {
    74       return Math.Min(maxLen / phraseLen, sequenceLen) * correctReward; // integer division
     91      return Math.Min(maxLen / phraseLen, numPhrases) * correctReward; // integer division
    7592    }
    7693
    7794    public string BestKnownSolution {
    78       get {
    79         string solution = "";
    80         for (int i = 0; i < sequenceLen; i++) {
    81           solution += optimalPhrasesForPos[i].First();
    82         }
    83         return solution;
    84       }
     95      get { return string.Join("", optimalPhrases.Take(numPhrases)); }
    8596    }
    8697
     
    92103      // sentence must contain only terminal symbols, we are not checking if the sentence is syntactically valid here because it would be too slow!
    93104      Debug.Assert(sentence.Any(c => grammar.IsTerminal(c)));
    94       // as long as only correct symbols are found we increase the reward by +1
    95       // on the first incorrect symbol we return
    96       var reward = 0.0;
    97       for (int i = 0; i < Math.Min(sentence.Length / phraseLen, sequenceLen); i++) {
    98         if (optimalPhrasesForPos[i].Contains(sentence.Substring(i * phraseLen, phraseLen))) {
    99           reward += correctReward;
    100         } else {
    101           // alternatively reduce reward by number of remaining phrases
    102           return Math.Max(0.0, reward + incorrectReward * (sentence.Length / phraseLen - i));
    103           // stop on first incorrect symbol and return reward
    104           //return reward;
    105         }
     105
     106
     107      // split the sentence in phrases
     108      // phrases must not overlap in the sentence, multiple occurences of a phrase are not counted
     109      // the order of phrases is not relevant
     110      var numPhrases = sentence.Length / phraseLen;
     111      var phrases = new SortedSet<string>();
     112      for (int phraseIdx = 0; phraseIdx < numPhrases; phraseIdx++) {
     113        var sentenceIdx = phraseIdx * phraseLen;
     114        var phrase = sentence.Substring(sentenceIdx, phraseLen);
     115        phrase = CanonicalPhrase(phrase);
     116        if (!phrases.Contains(phrase)) phrases.Add(phrase);
    106117      }
     118
     119      // add reward for each correct phrase that occurs in the sentence
     120      // add reward for each decoy phrase that occurs in the sentence
     121      var reward = phrases.Intersect(optimalPhrases).Count() * correctReward
     122               + phrases.Intersect(decoyPhrases).Count() * decoyReward;
     123
     124
     125
    107126      return reward;
    108127    }
    109128
     129    private string CanonicalPhrase(string phrase) {
     130      if (phrasesAsSets) return string.Join("", phrase.OrderBy(ch => (byte)ch));
     131      else return phrase;
     132    }
     133
    110134    public string CanonicalRepresentation(string terminalPhrase) {
    111       return terminalPhrase;
     135      // as the ordering of phrases does not matter we can reorder the phrases
     136      // and remove duplicates
     137      var numPhrases = terminalPhrase.Length / phraseLen;
     138      var phrases = new SortedSet<string>();
     139      for (int phraseIdx = 0; phraseIdx < numPhrases; phraseIdx++) {
     140        var sentenceIdx = phraseIdx * phraseLen;
     141        var phrase = terminalPhrase.Substring(sentenceIdx, phraseLen);
     142        phrase = CanonicalPhrase(phrase);
     143        if (!phrases.Contains(phrase)) phrases.Add(phrase);
     144      }
     145      return string.Join("", phrases);
    112146    }
    113147  }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/RoyalPhraseSequenceProblem.cs

    r11747 r11755  
    88
    99namespace HeuristicLab.Problems.GrammaticalOptimization {
    10   // must find one of k*sequenceLen sequences where the quality of a sequence is the length of the subsequence containing only correct _phrases_ (of length phraseLen) and starting at the first position
     10  // must find one of numCorrectPhrases*sequenceLen sequences where the quality of a sequence is the length of the subsequence containing only correct _phrases_ (of length phraseLen) and starting at the first position
    1111  // compared to the RoyalSequence problem this problem is harder because the number of different phrases starting at a position is much larger than the number of symbols (grows exponentially with the phrase-length)
    1212  // if phraseLen = 1 this is the same as the RoyalSequence problem
     
    1515  // - phraseLen: the length of a phrase in number of symbols
    1616  // - sequenceLen: the number of phrases in the correct subsequence (total sequence length is n * phraseLen
    17   // - k: the number of correct phrases starting at each position
     17  // - numCorrectPhrases: the number of correct phrases starting at each position
     18  // - phrasesAsSets: switch to determine if the ordering of symbols within a phrase is relevant
    1819  //
    1920  // this problem should be hard for GP and easy for MCTS (TD should not have an advantage compared to MCTS)
    2021  // for phraseLen > 1 this should be harder than RoyalSymbolProblem
     22  // when phrases are symbol sets instead of sequences then value-estimation routines should be better (TD)
    2123  public class RoyalPhraseSequenceProblem : IProblem {
    2224
     
    2426    private readonly double correctReward;
    2527    private readonly double incorrectReward;
    26     private readonly int k;
     28    private readonly int _numCorrectPhrases;
    2729    private readonly int sequenceLen;
    2830    private readonly int alphabetSize;
    2931    private readonly int phraseLen;
     32    private readonly bool phrasesAsSets;
    3033    private readonly SortedSet<string>[] optimalPhrasesForPos;
    3134
    32     public RoyalPhraseSequenceProblem(Random rand, int alphabetSize, int sequenceLen, int phraseLen = 1, int k = 1, double correctReward = 1.0, double incorrectReward = 0.0) {
     35    public RoyalPhraseSequenceProblem(Random rand, int alphabetSize, int sequenceLen, int phraseLen = 1, int numCorrectPhrases = 1, double correctReward = 1.0, double incorrectReward = 0.0, bool phrasesAsSets = false) {
    3336      if (alphabetSize <= 0 || alphabetSize > 26) throw new ArgumentException();
    3437      if (sequenceLen <= 0) throw new ArgumentException();
    35       if (k < 1 || k > alphabetSize) throw new ArgumentException();
     38      if (numCorrectPhrases < 1 || numCorrectPhrases > alphabetSize) throw new ArgumentException();
    3639      if (phraseLen < 1) throw new ArgumentException();
    3740      if (correctReward <= incorrectReward) throw new ArgumentException();
     
    4043      this.sequenceLen = sequenceLen;
    4144      this.phraseLen = phraseLen;
    42       this.k = k;
     45      this._numCorrectPhrases = numCorrectPhrases;
    4346      this.correctReward = correctReward;
    4447      this.incorrectReward = incorrectReward;
     48      this.phrasesAsSets = phrasesAsSets;
    4549      var sentenceSymbol = 'S';
    4650      var terminalSymbols = Enumerable.Range(0, alphabetSize).Select(off => (char)((byte)'a' + off)).ToArray();
     
    5458      for (int i = 0; i < sequenceLen; i++) {
    5559        optimalPhrasesForPos[i] = new SortedSet<string>();
    56         for (int j = 0; j < k; j++) {
     60        for (int j = 0; j < numCorrectPhrases; j++) {
    5761          string phrase = "";
    5862          do {
     
    6064              phrase += terminalSymbols.SelectRandom(rand);
    6165            }
     66            phrase = CanonicalPhrase(phrase);
    6267          } while (optimalPhrasesForPos[i].Contains(phrase)); // don't allow duplicate phrases
    6368          optimalPhrasesForPos[i].Add(phrase);
     
    6570      }
    6671
    67       Debug.Assert(Evaluate(BestKnownSolution)/BestKnownQuality(phraseLen * sequenceLen) == 1.0);
     72      Debug.Assert(Evaluate(BestKnownSolution) / BestKnownQuality(phraseLen * sequenceLen) == 1.0);
    6873    }
    6974
     
    9398      var reward = 0.0;
    9499      for (int i = 0; i < Math.Min(sentence.Length / phraseLen, sequenceLen); i++) {
    95         if (optimalPhrasesForPos[i].Contains(sentence.Substring(i * phraseLen, phraseLen))) {
     100        var canonicalPhrase = CanonicalPhrase(sentence.Substring(i * phraseLen, phraseLen));
     101        if (optimalPhrasesForPos[i].Contains(canonicalPhrase)) {
    96102          reward += correctReward;
    97103        } else {
     
    105111    }
    106112
     113    private string CanonicalPhrase(string phrase) {
     114      if (phrasesAsSets) return string.Join("", phrase.OrderBy(ch => (byte)ch));
     115      else return phrase;
     116    }
     117
    107118    public string CanonicalRepresentation(string terminalPhrase) {
    108       return terminalPhrase;
     119      if (phrasesAsSets) {
     120        var phrases = new List<string>();
     121        var numPhrases = terminalPhrase.Length / phraseLen;
     122        for (int phraseIdx = 0; phraseIdx < numPhrases; phraseIdx++) {
     123          var sentenceIdx = phraseIdx * phraseLen;
     124          var phrase = terminalPhrase.Substring(sentenceIdx, phraseLen);
     125          phrase = CanonicalPhrase(phrase);
     126          phrases.Add(phrase);
     127        }
     128
     129        return string.Join("", phrases);
     130      } else
     131        return terminalPhrase;
    109132    }
    110133  }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Program.cs

    r11747 r11755  
    146146      // TODO: wie kann ich sampler noch vergleichen bzw. was kann man messen um die qualität des samplers abzuschätzen (bis auf qualität und iterationen bis zur besten lösung) => ziel schnellere iterationen zu gutem ergebnis
    147147      // TODO: research thompson sampling for max bandit?
    148       // TODO: ausführlicher test von strategien für k-armed max bandit
     148      // TODO: ausführlicher test von strategien für numCorrectPhrases-armed max bandit
    149149      // TODO: verify TA implementation using example from the original paper     
    150150      // TODO: separate policy from MCTS tree data structure to allow sharing of information over disconnected parts of the tree (semantic equivalence)
     
    166166      var random = new Random();
    167167
    168       var phraseLen = 1;
    169       var sentenceLen = 25;
    170       var numPhrases = sentenceLen / phraseLen;
    171       var problem = new RoyalPhraseSequenceProblem(random, 10, numPhrases, phraseLen: 1, k: 1, correctReward: 1, incorrectReward: 0);
    172 
    173       //var problem = new SymbolicRegressionPoly10Problem();   // good results e.g. 10 randomtries and EpsGreedyPolicy(0.2, (aInfo)=>aInfo.MaxReward)
     168      //var phraseLen = 3;
     169      //var numPhrases = 5;
     170      //var problem = new RoyalPhraseSequenceProblem(random, 10, numPhrases, phraseLen: phraseLen, numCorrectPhrases: 1, correctReward: 1, incorrectReward: 0.0, phrasesAsSets: true);
     171
     172      //var phraseLen = 4;
     173      //var numPhrases = 5;
     174      //var problem = new FindPhrasesProblem(random, 15, numPhrases, phraseLen, numOptimalPhrases: numPhrases, numDecoyPhrases: 500, correctReward: 1.0, decoyReward: 0.2, phrasesAsSets: true);
     175
     176      var problem = new SymbolicRegressionPoly10Problem();   // good results e.g. 10 randomtries and EpsGreedyPolicy(0.2, (aInfo)=>aInfo.MaxReward)
    174177      // Ant
    175178      // good results e.g. with       var alg = new MctsSampler(problem, 17, random, 1, (rand, numActions) => new ThresholdAscentPolicy(numActions, 500, 0.01));
     
    182185      //var problem = new EvenParityProblem();
    183186      // symbreg length = 11 q = 0.824522210419616
    184       var alg = new MctsSampler(problem, sentenceLen, random, 0, new BoltzmannExplorationPolicy(200));
     187      //var alg = new MctsSampler(problem, 23, random, 0, new BoltzmannExplorationPolicy(100));
     188      var alg = new MctsSampler(problem, 23, random, 0, new EpsGreedyPolicy(0.1));
    185189      //var alg = new MctsQLearningSampler(problem, sentenceLen, random, 0, null);
    186190      //var alg = new MctsQLearningSampler(problem, 30, random, 0, new EpsGreedyPolicy(0.2));
Note: See TracChangeset for help on using the changeset viewer.