Changeset 11770


Ignore:
Timestamp:
01/15/15 18:59:07 (5 years ago)
Author:
gkronber
Message:

#2283: worked on generic sequential search alg with bandit policy as parameter

Location:
branches/HeuristicLab.Problems.GrammaticalOptimization
Files:
9 added
13 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/GrammarPolicies/RandomNoResamplingPolicy.cs

    r11742 r11770  
    11using System;
    22using System.Collections.Generic;
     3using System.Configuration;
    34using System.Linq;
     5using System.Security.Policy;
    46using System.Text;
    5 using System.Threading.Tasks;
    67using HeuristicLab.Common;
    78using HeuristicLab.Problems.GrammaticalOptimization;
    89
    910namespace HeuristicLab.Algorithms.Bandits.GrammarPolicies {
    10   public class RandomNoResamplingPolicy : IGrammarPolicy {
     11  public class RandomNoResamplingPolicy : GrammarPolicy {
    1112
    12     private readonly Dictionary<ReadonlySequence, bool> done;
    13     private readonly Dictionary<Tuple<ReadonlySequence, ReadonlySequence>, ReadonlySequence> nextState;
     13    private readonly HashSet<string> done;
    1414
    15 
    16     public RandomNoResamplingPolicy() {
    17       this.done = new Dictionary<ReadonlySequence, bool>();
     15    public RandomNoResamplingPolicy(IProblem problem, bool useCanonicalRepresentation)
     16      : base(problem, useCanonicalRepresentation) {
     17      this.done = new HashSet<string>();
    1818    }
    1919
    20     public ReadonlySequence SelectAction(Random random, ReadonlySequence state, IEnumerable<ReadonlySequence> actions) {
    21       var allDone = true;
    22       foreach (var a in actions) {
    23         var p = Tuple.Create(state, a);
    24         allDone &= nextState.ContainsKey(p) && Done(nextState[p]);
    25         if (!allDone) break;
     20    public override bool TrySelect(Random random, ReadonlySequence curState, IEnumerable<ReadonlySequence> afterStates, out ReadonlySequence selectedState) {
     21      // only select states that are not yet done
     22      afterStates = afterStates.Where(a => !done.Contains(a.ToString())).ToArray();
     23      if (!afterStates.Any()) {
     24        // fail because all follow states have already been visited => also disable the current state
     25        done.Add(CanonicalState(curState.ToString()));
     26        selectedState = null;
     27        return false;
    2628      }
    27       if(allDone)
    28       return actions
    29         .Where(a => !nextState.ContainsKey(Tuple.Create(state, a)) || Done(nextState[Tuple.Create(state, a)]))
    30         .SelectRandom(random);
     29
     30      selectedState = afterStates.SelectRandom(random);
     31      return true;
    3132    }
    3233
    33     public void UpdateReward(ReadonlySequence state, ReadonlySequence action, double reward, ReadonlySequence newState) {
    34       var key = Tuple.Create(state, action);
    35       nextState[key] = newState;
    36       if (newState.IsTerminal) done[newState] = true;
    37       if
     34    public override void UpdateReward(IEnumerable<ReadonlySequence> stateTrajectory, double reward) {
     35      base.UpdateReward(stateTrajectory, reward);
     36      // ignore rewards but update the set of visited terminal states
     37
     38      // the last state could be terminal
     39      var lastState = stateTrajectory.Last();
     40      if (lastState.IsTerminal) done.Add(CanonicalState(lastState.ToString()));
    3841    }
    3942
    40     public bool Done(ReadonlySequence state) {
    41       return done.ContainsKey(state);
     43    public override void Reset() {
     44      base.Reset();
     45      done.Clear();
    4246    }
    4347  }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/GrammarPolicies/RandomPolicy.cs

    r11742 r11770  
    88
    99namespace HeuristicLab.Algorithms.Bandits.GrammarPolicies {
    10   public class RandomPolicy : IGrammarPolicy {
    11     public ReadonlySequence SelectAction(Random random, ReadonlySequence state, IEnumerable<ReadonlySequence> actions) {
    12       return actions.SelectRandom(random);
     10  public class RandomPolicy : GrammarPolicy {
     11    public RandomPolicy(IProblem problem, bool useCanonicalRepresentation)
     12      : base(problem, useCanonicalRepresentation) {
    1313    }
    1414
    15     public void UpdateReward(ReadonlySequence state, ReadonlySequence action, double reward, ReadonlySequence newState) {
    16       // ignore
    17     }
    18 
    19     public bool Done(ReadonlySequence state) {
    20       return false;
     15    public override bool TrySelect(Random random, ReadonlySequence curState, IEnumerable<ReadonlySequence> afterStates, out ReadonlySequence selectedState) {
     16      // never fail => allows re-visits of terminal states
     17      selectedState = afterStates.SelectRandom(random);
     18      return true;
    2119    }
    2220  }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/HeuristicLab.Algorithms.Bandits.csproj

    r11747 r11770  
    6666    <Compile Include="Bandits\IBandit.cs" />
    6767    <Compile Include="Bandits\TruncatedNormalBandit.cs" />
     68    <Compile Include="GrammarPolicies\BoltzmanExplorationPolicy.cs" />
     69    <Compile Include="GrammarPolicies\GenericGrammarPolicy.cs">
     70      <SubType>Code</SubType>
     71    </Compile>
     72    <Compile Include="GrammarPolicies\TDPolicy.cs" />
     73    <Compile Include="GrammarPolicies\UCTPolicy.cs" />
     74    <Compile Include="GrammarPolicies\GrammarPolicy.cs" />
     75    <Compile Include="GrammarPolicies\EpsGreedyPolicy.cs" />
     76    <Compile Include="GrammarPolicies\GreedyPolicy.cs" />
     77    <Compile Include="GrammarPolicies\IGrammarPolicy.cs" />
     78    <Compile Include="GrammarPolicies\RandomNoResamplingPolicy.cs" />
    6879    <Compile Include="GrammarPolicies\RandomPolicy.cs" />
    6980    <Compile Include="IPolicy.cs" />
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/IBanditPolicyActionInfo.cs

    r11747 r11770  
    1 using System;
    2 using System.Collections.Generic;
    3 using System.Linq;
    4 using System.Text;
    5 using System.Threading.Tasks;
    6 
    7 namespace HeuristicLab.Algorithms.Bandits {
     1namespace HeuristicLab.Algorithms.Bandits {
    82  public interface IBanditPolicyActionInfo {
    93    bool Disabled { get; }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/IPolicy.cs

    r11744 r11770  
    11using System;
    22using System.Collections.Generic;
     3using System.Dynamic;
    34using System.Linq;
    45using System.Text;
     
    78
    89namespace HeuristicLab.Algorithms.Bandits {
    9   // this interface represents a policy for reinforcement learning
    10   public interface IPolicy<in TState, TAction> {
    11     TAction SelectAction(Random random, TState state, IEnumerable<TAction> actions);
    12     void UpdateReward(TState state, TAction action, double reward, TState newState); // reward received when after taking action in state and new state
    13     bool Done(TState state); // for deterministic MDP with deterministic rewards and goal to find a state with max reward
    14   }
     10  // this interface represents a policy for episodic reinforcement learning (with afterstates)
     11  // here we assume that a reward is only recieved at the end of the episode and the update is done only after an episode is complete
     12  // we also assume that the policy can fail to select one of the followStates
     13  public interface IPolicy<TState> {
     14    bool TrySelect(Random random, TState curState, IEnumerable<TState> afterStates, out TState selectedState); // selectedState \in afterStates
    1515
    16   public interface IGrammarPolicy : IPolicy<ReadonlySequence, ReadonlySequence> {
     16    // state-trajectory are the states of the episode, at the end we recieved the reward (only for the terminal state)
     17    void UpdateReward(IEnumerable<TState> stateTrajectory, double reward);
    1718
     19    void Reset(); // clears all internal state
     20
     21    // for introspection
     22    double GetValue(TState state);
     23    int GetTries(TState state);
    1824  }
    1925}
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization.csproj

    r11755 r11770  
    4545    <Compile Include="AlternativesSampler.cs" />
    4646    <Compile Include="AlternativesContextSampler.cs" />
     47    <Compile Include="SequentialSearch.cs" />
    4748    <Compile Include="TemporalDifferenceTreeSearchSampler.cs" />
    4849    <Compile Include="ExhaustiveRandomFirstSearch.cs" />
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Common/ConsoleEx.cs

    r11745 r11770  
    108108
    109109    public static ConsoleColor ColorForValue(double d) {
    110       Debug.Assert(d >= 0 && d <= 1.0);
     110      //Debug.Assert(d >= 0 && d <= 1.0);
     111      d = Math.Min(1.0, Math.Max(0.0, d));
    111112      var cIdx = Math.Max(0, (int)Math.Floor(d * 15) - 1);
    112113      return colors[cIdx];
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/EvenParityProblem.cs

    r11742 r11770  
    1313    private const string grammarString = @"
    1414G(S):
    15 S -> N | N*S | N+S | !S | (S)
    16 N -> a | b | c | d
     15S -> a | b | c | d | a*S | b*S | c*S | d*S | a+S | b+S | c+S | d+S | !S | (S)
    1716";
    1817
     
    2019    private readonly ExpressionInterpreter interpreter = new ExpressionInterpreter();
    2120    public EvenParityProblem() {
    22       this.grammar = new Grammar (grammarString);
     21      this.grammar = new Grammar(grammarString);
    2322    }
    2423
     
    5251
    5352    public string CanonicalRepresentation(string terminalPhrase) {
     53      throw new NotImplementedException();
    5454      return terminalPhrase;
    5555    }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/FindPhrasesProblem.cs

    r11755 r11770  
    143143        if (!phrases.Contains(phrase)) phrases.Add(phrase);
    144144      }
     145      var remainder = terminalPhrase.Substring(numPhrases * phraseLen, terminalPhrase.Length - (numPhrases * phraseLen));
     146      remainder = CanonicalPhrase(remainder);
     147      if (!phrases.Contains(remainder)) phrases.Add(remainder);
     148
    145149      return string.Join("", phrases);
    146150    }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/RoyalPhraseSequenceProblem.cs

    r11755 r11770  
    2626    private readonly double correctReward;
    2727    private readonly double incorrectReward;
    28     private readonly int _numCorrectPhrases;
    2928    private readonly int sequenceLen;
    30     private readonly int alphabetSize;
    3129    private readonly int phraseLen;
    3230    private readonly bool phrasesAsSets;
     
    4038      if (correctReward <= incorrectReward) throw new ArgumentException();
    4139
    42       this.alphabetSize = alphabetSize;
    4340      this.sequenceLen = sequenceLen;
    4441      this.phraseLen = phraseLen;
    45       this._numCorrectPhrases = numCorrectPhrases;
    4642      this.correctReward = correctReward;
    4743      this.incorrectReward = incorrectReward;
     
    127123        }
    128124
     125        var remainder = terminalPhrase.Substring(numPhrases * phraseLen, terminalPhrase.Length - (numPhrases * phraseLen));
     126        remainder = CanonicalPhrase(remainder);
     127        phrases.Add(remainder);
     128
    129129        return string.Join("", phrases);
    130130      } else
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/SantaFeAntProblem.cs

    r11747 r11770  
    1111A -> l | r | m | ?(A)(A) | lA | rA | mA
    1212";
     13
     14
    1315    // original koza grammar
    1416    // Ant -> left | right | move | if-food-ahead Ant Ant | Ant Ant | Ant Ant Ant
     
    103105      do {
    104106        oldPhrase = terminalPhrase;
    105         terminalPhrase.Replace("ll", "rr").Replace("rl", "lr");
     107        terminalPhrase = terminalPhrase.Replace("ll", "rr").Replace("rl", "lr").Replace("lr", "").Replace("lll", "r").Replace("rrr", "l");
    106108      } while (terminalPhrase != oldPhrase);
    107109      return terminalPhrase;
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/SymbolicRegressionPoly10Problem.cs

    r11747 r11770  
    22using System.Collections.Generic;
    33using System.Linq;
     4using System.Net;
    45using System.Security;
    56using System.Security.AccessControl;
     
    7273
    7374    // right now only + and * is supported
     75    private Dictionary<string, string> cache = new Dictionary<string, string>();
    7476    public string CanonicalRepresentation(string phrase) {
    75       var terms = phrase.Split('+').Select(t => t.Replace("*", ""));
    76       var terminalTerms = terms.Where(t => t.All(ch => grammar.IsTerminal(ch)));
    77       var nonTerminalTerms = terms.Where(t => t.Any(ch => grammar.IsNonTerminal(ch)));
     77      string res;
     78      if (!cache.TryGetValue(phrase, out res)) {
     79        var terms = phrase.Split('+').Select(t => t.Replace("*", ""));
     80        var terminalTerms = terms.Where(t => t.All(ch => grammar.IsTerminal(ch)));
     81        var nonTerminalTerms = terms.Where(t => t.Any(ch => grammar.IsNonTerminal(ch)));
    7882
    79       return string.Join("+", terminalTerms.Select(term => CanonicalTerm(term)).OrderBy(term => term).Concat(nonTerminalTerms.Select(term => CanonicalTerm(term))));
     83        res = string.Join("+", terminalTerms.Select(term => CanonicalTerm(term)).OrderBy(term => term).Concat(nonTerminalTerms.Select(term => CanonicalTerm(term))));
     84        cache[phrase] = res;
     85      }
     86      return res;
    8087    }
    8188
    8289    private string CanonicalTerm(string term) {
    83       return string.Join("", term.OrderByDescending(ch => (byte)ch));
     90      return string.Join("", term.OrderByDescending(ch => (byte)ch)); // we want to have the up-case characters last
    8491    }
    8592  }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Program.cs

    r11755 r11770  
    137137
    138138    private static void RunDemo() {
     139      // TODO: move problem instances into a separate folder
     140      // TODO: improve performance of SequentialSearch (memory allocations related to sequences)
     141      // TODO: implement bridge to HL-GP
    139142      // TODO: unify MCTS, TD and ContextMCTS Solvers (stateInfos)
    140143      // TODO: test with eps-greedy using max instead of average as value (seems to work well for symb-reg! explore further!)
     
    161164      int iterations = 0;
    162165      var sw = new Stopwatch();
    163       double bestQuality = 0;
    164       string bestSentence = "";
     166
    165167      var globalStatistics = new SentenceSetStatistics();
    166168      var random = new Random();
     
    168170      //var phraseLen = 3;
    169171      //var numPhrases = 5;
    170       //var problem = new RoyalPhraseSequenceProblem(random, 10, numPhrases, phraseLen: phraseLen, numCorrectPhrases: 1, correctReward: 1, incorrectReward: 0.0, phrasesAsSets: true);
    171 
    172       //var phraseLen = 4;
    173       //var numPhrases = 5;
    174       //var problem = new FindPhrasesProblem(random, 15, numPhrases, phraseLen, numOptimalPhrases: numPhrases, numDecoyPhrases: 500, correctReward: 1.0, decoyReward: 0.2, phrasesAsSets: true);
    175 
    176       var problem = new SymbolicRegressionPoly10Problem();   // good results e.g. 10 randomtries and EpsGreedyPolicy(0.2, (aInfo)=>aInfo.MaxReward)
     172      //var problem = new RoyalPhraseSequenceProblem(random, 15, numPhrases, phraseLen: phraseLen, numCorrectPhrases: 1, correctReward: 1, incorrectReward: 0.0, phrasesAsSets: true);
     173
     174      // var phraseLen = 2;
     175      // var numPhrases = 5;
     176      // var problem = new FindPhrasesProblem(random, 15, numPhrases, phraseLen, numOptimalPhrases: numPhrases, numDecoyPhrases: 0, correctReward: 1.0, decoyReward: 0.0, phrasesAsSets: true);
     177
     178      //var problem = new SymbolicRegressionPoly10Problem();   // good results e.g. 10 randomtries and EpsGreedyPolicy(0.2, (aInfo)=>aInfo.MaxReward)
    177179      // Ant
    178180      // good results e.g. with       var alg = new MctsSampler(problem, 17, random, 1, (rand, numActions) => new ThresholdAscentPolicy(numActions, 500, 0.01));
    179181      // GaussianModelWithUnknownVariance (and Q= 0.99-quantil) also works well for Ant
    180       //var problem = new SantaFeAntProblem();
     182      // very good results with:       var alg = new SequentialSearch(problem, 17, random, 0,
     183      // new HeuristicLab.Algorithms.Bandits.GrammarPolicies.GenericGrammarPolicy(problem, new UCB1TunedPolicy(), true));
     184
     185      var problem = new SantaFeAntProblem();
    181186      //var problem = new SymbolicRegressionProblem("Tower");
    182187      //var problem = new PalindromeProblem();
     
    186191      // symbreg length = 11 q = 0.824522210419616
    187192      //var alg = new MctsSampler(problem, 23, random, 0, new BoltzmannExplorationPolicy(100));
    188       var alg = new MctsSampler(problem, 23, random, 0, new EpsGreedyPolicy(0.1));
     193      //var alg = new MctsSampler(problem, 23, random, 0, new EpsGreedyPolicy(0.1));
     194      var alg = new SequentialSearch(problem, 10, random, 0,
     195        new HeuristicLab.Algorithms.Bandits.GrammarPolicies.GenericGrammarPolicy(problem, new GaussianThompsonSamplingPolicy(true), true));
    189196      //var alg = new MctsQLearningSampler(problem, sentenceLen, random, 0, null);
    190197      //var alg = new MctsQLearningSampler(problem, 30, random, 0, new EpsGreedyPolicy(0.2));
     
    199206
    200207      alg.FoundNewBestSolution += (sentence, quality) => {
    201         bestQuality = quality;
    202         bestSentence = sentence;
    203208        //Console.WriteLine("{0,4} {1,7} {2}", alg.treeDepth, alg.treeSize, globalStatistics);
    204209        //Console.ReadLine();
     
    208213        globalStatistics.AddSentence(sentence, quality);
    209214        if (iterations % 100 == 0) {
    210           //if (iterations % 1000 == 0) Console.Clear();
     215          if (iterations % 1000 == 0) Console.Clear();
    211216          Console.SetCursorPosition(0, 0);
    212217          alg.PrintStats();
     
    226231      sw.Stop();
    227232
    228       Console.WriteLine("{0,10} Best soultion: {1,10:F5} {2}", iterations, bestQuality, bestSentence);
     233      Console.Clear();
     234      alg.PrintStats();
     235      Console.WriteLine(globalStatistics);
    229236      Console.WriteLine("{0:F2} sec {1,10:F1} sols/sec {2,10:F1} ns/sol",
    230237        sw.Elapsed.TotalSeconds,
Note: See TracChangeset for help on using the changeset viewer.