Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
01/18/15 18:24:58 (10 years ago)
Author:
gkronber
Message:

#2283 fixed compile errors and refactoring

Location:
branches/HeuristicLab.Problems.GrammaticalOptimization
Files:
2 added
6 deleted
25 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/EpsGreedyPolicy.cs

    r11742 r11793  
    2626    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
    2727      Debug.Assert(actionInfos.Any());
    28       if (random.NextDouble() > eps) {
     28      if (random.NextDouble() >= eps) { // eps == 0 should be equivalent to pure exploitation, eps == 1 is pure exploration
    2929        // select best
    3030        var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>();
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/GrammarPolicies/GenericGrammarPolicy.cs

    r11792 r11793  
    1414    private readonly IProblem problem;
    1515    private readonly IBanditPolicy banditPolicy;
    16     private readonly HashSet<string> done;
     16    //private readonly HashSet<string> done;
    1717
    1818    public GenericGrammarPolicy(IProblem problem, IBanditPolicy banditPolicy, bool useCanonicalState = false) {
     
    2121      this.banditPolicy = banditPolicy;
    2222      this.stateInfo = new Dictionary<string, IBanditPolicyActionInfo>();
    23       this.done = new HashSet<string>();
     23      //this.done = new HashSet<string>();
    2424    }
    2525
    26     public bool TrySelect(Random random, ReadonlySequence curState, IEnumerable<ReadonlySequence> afterStates,
    27       out ReadonlySequence selectedState) {
    28       // only select states that are not yet done
    29       afterStates = afterStates.Where(a => !done.Contains(CanonicalState(a))).ToArray();
    30       if (!afterStates.Any()) {
     26    public bool TrySelect(Random random, string curState, IEnumerable<string> afterStates, out int selectedStateIdx) {
     27      // fail if all states are done (corresponding state infos are disabled)
     28      if (afterStates.All(s => GetStateInfo(s).Disabled)) {
    3129        // fail because all follow states have already been visited => also disable the current state (if we can be sure that it has been fully explored)
    3230
    33         done.Add(CanonicalState(curState));
    34         selectedState = null;
     31        GetStateInfo(curState).Disable(0.0); // should the value be max of afterstate values instead of 0.0?
     32        selectedStateIdx = -1;
    3533        return false;
    3634      }
    3735
     36      selectedStateIdx = banditPolicy.SelectAction(random, afterStates.Select(s => GetStateInfo(s)));
    3837
    39       var selectedIdx = banditPolicy.SelectAction(random, afterStates.Select(s => GetStateInfo(s)));
    40       selectedState = afterStates.ElementAt(selectedIdx);
    4138      return true;
    4239    }
    4340
    44     private IBanditPolicyActionInfo GetStateInfo(ReadonlySequence state) {
     41    private IBanditPolicyActionInfo GetStateInfo(string state) {
    4542      var s = CanonicalState(state);
    4643      IBanditPolicyActionInfo info;
     
    5249    }
    5350
    54     public virtual void UpdateReward(IEnumerable<ReadonlySequence> stateTrajectory, double reward) {
     51    public virtual void UpdateReward(IEnumerable<string> stateTrajectory, double reward) {
    5552      // the last state could be terminal
    5653      var lastState = stateTrajectory.Last();
    57       if (lastState.IsTerminal) done.Add(CanonicalState(lastState));
     54      if (problem.Grammar.IsTerminal(lastState)) {
     55        GetStateInfo(lastState).Disable(reward);
     56      }
    5857
    59       foreach (var state in stateTrajectory) {
     58      // update remaining states
     59      foreach (var state in stateTrajectory.Reverse().Skip(1)) {
    6060        GetStateInfo(state).UpdateReward(reward);
    6161      }
     
    6464    public virtual void Reset() {
    6565      stateInfo.Clear();
    66       done.Clear();
     66      //done.Clear();
    6767    }
    6868
    69     public int GetTries(ReadonlySequence state) {
     69    public int GetTries(string state) {
    7070      var s = CanonicalState(state);
    7171      if (stateInfo.ContainsKey(s)) return stateInfo[s].Tries;
     
    7373    }
    7474
    75     public double GetValue(ReadonlySequence state) {
     75    public double GetValue(string state) {
    7676      var s = CanonicalState(state);
    7777      if (stateInfo.ContainsKey(s)) return stateInfo[s].Value;
     
    7979    }
    8080
    81     protected string CanonicalState(ReadonlySequence state) {
     81    protected string CanonicalState(string state) {
    8282      if (useCanonicalState) {
    83         if (state.IsTerminal)
    84           return problem.CanonicalRepresentation(state.ToString());
     83        if (problem.Grammar.IsTerminal(state))
     84          return problem.CanonicalRepresentation(state);
    8585        else {
    8686          // for non-terminal phrases make sure we don't disable canonical states that have not yet been fully explored
     
    8888          // then we are not allowed to disable rS (canonical of lllS) because rS might not have been fully explored
    8989          // solution: we disable the state rS4
    90           return problem.CanonicalRepresentation(state.ToString()) + state.Length;
     90          return problem.CanonicalRepresentation(state) + state.Length;
    9191        }
    9292      } else
    93         return state.ToString();
     93        return state;
    9494    }
    9595  }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/GrammarPolicies/GrammarPolicy.cs

    r11770 r11793  
    88
    99namespace HeuristicLab.Algorithms.Bandits.GrammarPolicies {
    10   // stores: tries, avg reward and max reward for each state
     10  // stores: tries, avg reward and max reward for each state (base class for RandomPolicy and TDPolicy
    1111  public abstract class GrammarPolicy : IGrammarPolicy {
    1212    protected Dictionary<string, double> avgReward;
    1313    protected Dictionary<string, int> tries;
    1414    protected Dictionary<string, double> maxReward;
    15     private readonly bool useCanonicalState;
    16     private readonly IProblem problem;
     15    protected readonly bool useCanonicalState;
     16    protected readonly IProblem problem;
    1717
    18     public GrammarPolicy(IProblem problem, bool useCanonicalState = false) {
     18    protected GrammarPolicy(IProblem problem, bool useCanonicalState = false) {
    1919      this.useCanonicalState = useCanonicalState;
    2020      this.problem = problem;
     
    2424    }
    2525
    26     public abstract bool TrySelect(Random random, ReadonlySequence curState, IEnumerable<ReadonlySequence> afterStates, out ReadonlySequence selectedState);
     26    public abstract bool TrySelect(Random random, string curState, IEnumerable<string> afterStates, out int selectedStateIdx);
    2727
    28     public virtual void UpdateReward(IEnumerable<ReadonlySequence> stateTrajectory, double reward) {
     28    public virtual void UpdateReward(IEnumerable<string> stateTrajectory, double reward) {
    2929      foreach (var state in stateTrajectory) {
    30         var s = CanonicalState(state.ToString());
     30        var s = CanonicalState(state);
    3131
    3232        if (!tries.ContainsKey(s)) tries.Add(s, 0);
     
    4747    }
    4848
    49     public double AvgReward(ReadonlySequence state) {
    50       var s = CanonicalState(state.ToString());
     49    public double AvgReward(string state) {
     50      var s = CanonicalState(state);
    5151      if (avgReward.ContainsKey(s)) return avgReward[s];
    5252      else return 0.0;
    5353    }
    5454
    55     public double MaxReward(ReadonlySequence state) {
    56       var s = CanonicalState(state.ToString());
     55    public double MaxReward(string state) {
     56      var s = CanonicalState(state);
    5757      if (maxReward.ContainsKey(s)) return maxReward[s];
    5858      else return 0.0;
    5959    }
    6060
    61     public virtual int GetTries(ReadonlySequence state) {
    62       var s = CanonicalState(state.ToString());
     61    public virtual int GetTries(string state) {
     62      var s = CanonicalState(state);
    6363      if (tries.ContainsKey(s)) return tries[s];
    6464      else return 0;
    6565    }
    6666
    67     public virtual double GetValue(ReadonlySequence state) {
     67    public virtual double GetValue(string state) {
    6868      return AvgReward(state);
    6969    }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/GrammarPolicies/IGrammarPolicy.cs

    r11770 r11793  
    88
    99namespace HeuristicLab.Algorithms.Bandits.GrammarPolicies {
    10   public interface IGrammarPolicy : IPolicy<ReadonlySequence> {
     10  public interface IGrammarPolicy : IPolicy<string> {
    1111  }
    1212}
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/GrammarPolicies/RandomPolicy.cs

    r11770 r11793  
    1313    }
    1414
    15     public override bool TrySelect(Random random, ReadonlySequence curState, IEnumerable<ReadonlySequence> afterStates, out ReadonlySequence selectedState) {
     15    public override bool TrySelect(Random random, string curState, IEnumerable<string> afterStates, out int selectedStateIdx) {
    1616      // never fail => allows re-visits of terminal states
    17       selectedState = afterStates.SelectRandom(random);
     17      selectedStateIdx = random.Next(afterStates.Count());
    1818      return true;
    1919    }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/GrammarPolicies/TDPolicy.cs

    r11770 r11793  
    22using System.Collections.Generic;
    33using System.Configuration;
     4using System.Diagnostics;
    45using System.Linq;
    56using System.Security.Policy;
     
    78using System.Threading;
    89using System.Threading.Tasks;
     10using HeuristicLab.Algorithms.Bandits.BanditPolicies;
    911using HeuristicLab.Common;
    1012using HeuristicLab.Problems.GrammaticalOptimization;
     
    1517    private readonly HashSet<string> done;
    1618    private readonly Dictionary<string, double> v;
    17     private EpsGreedyPolicy epsGreedy;
     19    private IGrammarPolicy epsGreedy;
    1820
    1921    public TDPolicy(IProblem problem, bool useCanonicalRepresentation = false)
     
    2123      this.done = new HashSet<string>();
    2224      this.v = new Dictionary<string, double>();
    23       this.epsGreedy = new EpsGreedyPolicy(problem, useCanonicalRepresentation, 0.1);
     25      this.epsGreedy = new GenericGrammarPolicy(problem, new EpsGreedyPolicy(0.1), useCanonicalRepresentation);
    2426    }
    2527
    26     public override bool TrySelect(Random random, ReadonlySequence curState, IEnumerable<ReadonlySequence> afterStates, out ReadonlySequence selectedState) {
     28    public override bool TrySelect(Random random, string curState, IEnumerable<string> afterStates, out int selectedStateIdx) {
    2729      // only select states that are not yet done
    2830      afterStates = afterStates.Where(a => !done.Contains(CanonicalState(a.ToString()))).ToArray();
    2931      if (!afterStates.Any()) {
    3032        // fail because all follow states have already been visited => also disable the current state
    31         done.Add(CanonicalState(curState.ToString()));
    32         selectedState = null;
     33        done.Add(CanonicalState(curState));
     34        selectedStateIdx = -1;
    3335        return false;
    3436      }
     37      throw new NotImplementedException(); // TODO: remap indices of reduced action enumerable to indices of original enumerable
    3538
    3639      //return epsGreedy.TrySelect(random, curState, afterStates, out selectedState);
    3740
    3841      var bestQ = double.NegativeInfinity;
    39       selectedState = null;
     42      int idx = -1;
     43      selectedStateIdx = -1;
    4044      foreach (var state in afterStates) {
     45        idx++;
    4146        // try each state at least once
    4247        if (GetTries(state) == 0) {
    43           selectedState = state;
     48          selectedStateIdx = idx;
    4449          return true;
    4550        }
     
    4752        if (q > bestQ) {
    4853          bestQ = q;
    49           selectedState = state;
     54          selectedStateIdx = idx;
    5055        }
    5156      }
    5257
     58      Debug.Assert(selectedStateIdx > -1);
    5359      return true;
    5460    }
    5561
    56     private double V(ReadonlySequence state) {
    57       var s = CanonicalState(state.ToString());
     62    private double V(string state) {
     63      var s = CanonicalState(state);
    5864      if (v.ContainsKey(s)) return v[s];
    5965      else return 0.0;
    6066    }
    6167
    62     public override void UpdateReward(IEnumerable<ReadonlySequence> stateTrajectory, double reward) {
     68    public override void UpdateReward(IEnumerable<string> stateTrajectory, double reward) {
    6369      base.UpdateReward(stateTrajectory, reward);
    6470      epsGreedy.UpdateReward(stateTrajectory, reward);
    6571      // the last state could be terminal
    6672      var lastState = stateTrajectory.Last();
    67       if (lastState.IsTerminal) done.Add(CanonicalState(lastState.ToString()));
     73      if (problem.Grammar.IsTerminal(lastState)) done.Add(CanonicalState(lastState));
    6874
    69       v[CanonicalState(lastState.ToString())] = V(lastState) + 1.0 / GetTries(lastState) * (reward - V(lastState));
     75      v[CanonicalState(lastState)] = V(lastState) + 1.0 / GetTries(lastState) * (reward - V(lastState));
    7076
    7177      foreach (var p in stateTrajectory.Zip(stateTrajectory.Skip(1), Tuple.Create).Reverse()) {
     
    7379        var next = p.Item2;
    7480
    75         v[CanonicalState(cur.ToString())] = V(cur) + 1.0 / GetTries(cur) * (V(next) - V(cur));
     81        v[CanonicalState(cur)] = V(cur) + 1.0 / GetTries(cur) * (V(next) - V(cur));
    7682        //v[CanonicalState(cur.ToString())] = V(cur) + 0.1 * (V(next) - V(cur));
    7783      }
     
    7985    }
    8086
    81     public override double GetValue(ReadonlySequence state) {
     87    public override double GetValue(string state) {
    8288      return V(state);
    8389    }
    8490
    85     public void Reset() {
     91    public override void Reset() {
    8692      base.Reset();
    8793      epsGreedy.Reset();
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/HeuristicLab.Algorithms.Bandits.csproj

    r11770 r11793  
    6666    <Compile Include="Bandits\IBandit.cs" />
    6767    <Compile Include="Bandits\TruncatedNormalBandit.cs" />
    68     <Compile Include="GrammarPolicies\BoltzmanExplorationPolicy.cs" />
    6968    <Compile Include="GrammarPolicies\GenericGrammarPolicy.cs">
    7069      <SubType>Code</SubType>
    7170    </Compile>
     71    <Compile Include="GrammarPolicies\RandomPolicy.cs">
     72      <SubType>Code</SubType>
     73    </Compile>
    7274    <Compile Include="GrammarPolicies\TDPolicy.cs" />
    73     <Compile Include="GrammarPolicies\UCTPolicy.cs" />
    7475    <Compile Include="GrammarPolicies\GrammarPolicy.cs" />
    75     <Compile Include="GrammarPolicies\EpsGreedyPolicy.cs" />
    76     <Compile Include="GrammarPolicies\GreedyPolicy.cs" />
    7776    <Compile Include="GrammarPolicies\IGrammarPolicy.cs" />
    78     <Compile Include="GrammarPolicies\RandomNoResamplingPolicy.cs" />
    79     <Compile Include="GrammarPolicies\RandomPolicy.cs" />
    8077    <Compile Include="IPolicy.cs" />
    8178    <Compile Include="IBanditPolicy.cs" />
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/IPolicy.cs

    r11770 r11793  
    1111  // here we assume that a reward is only recieved at the end of the episode and the update is done only after an episode is complete
    1212  // we also assume that the policy can fail to select one of the followStates
    13   public interface IPolicy<TState> {
    14     bool TrySelect(Random random, TState curState, IEnumerable<TState> afterStates, out TState selectedState); // selectedState \in afterStates
     13  public interface IPolicy<in TState> {
     14    bool TrySelect(Random random, TState curState, IEnumerable<TState> afterStates, out int selectedStateIdx); // selectedState \in afterStates
    1515
    1616    // state-trajectory are the states of the episode, at the end we recieved the reward (only for the terminal state)
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/ExhaustiveBreadthFirstSearch.cs

    r11732 r11793  
    4242
    4343        char nt = phrase.FirstNonTerminal;
    44         int ntIdx;
    4544
    4645        var alts = grammar.GetAlternatives(nt);
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/RandomSearch.cs

    r11732 r11793  
    11using System;
    2 using System;
    3 using System.Collections.Generic;
    4 using System.Linq;
    5 using System.Text;
    62using HeuristicLab.Problems.GrammaticalOptimization;
    73
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/SequentialSearch.cs

    r11792 r11793  
    2424  // 3) Collect reward and update policy (feedback: state of visited rewards from step 2)
    2525  public class SequentialSearch {
     26    // only for storing states so that it is not necessary to allocate new state strings whenever we select a follow state using the policy
     27    private class TreeNode {
     28      public int randomTries;
     29      public string phrase;
     30      public Sequence alternative;
     31      public TreeNode[] children;
     32
     33      public TreeNode(string phrase, Sequence alternative) {
     34        this.alternative = alternative;
     35        this.phrase = phrase;
     36      }
     37    }
     38
    2639
    2740    public event Action<string, double> FoundNewBestSolution;
     
    3447    private readonly IGrammarPolicy behaviourPolicy;
    3548    private readonly IGrammarPolicy greedyPolicy;
     49    private TreeNode rootNode;
     50
     51    private int tries;
    3652    private int maxSearchDepth;
    3753
    3854    private double bestQuality;
    3955    private string bestPhrase;
    40     private int tries;
    41     private readonly List<ReadonlySequence> stateChain;
     56    private readonly List<string> stateChain;
    4257
    4358    public SequentialSearch(IProblem problem, int maxLen, Random random, int randomTries, IGrammarPolicy behaviourPolicy) {
     
    4762      this.randomTries = randomTries;
    4863      this.behaviourPolicy = behaviourPolicy;
    49       this.greedyPolicy = new GreedyPolicy(problem, false);
    50       this.stateChain = new List<ReadonlySequence>();
    51       this.cache = new Dictionary<ReadonlySequence, ReadonlySequence[]>();
     64      this.greedyPolicy = new GenericGrammarPolicy(problem, new EpsGreedyPolicy(0.0), false);
     65      this.stateChain = new List<string>();
    5266    }
    5367
    5468    public void Run(int maxIterations) {
    5569      bestQuality = double.MinValue;
    56       //InitPolicies(problem.Grammar);
    5770      Reset();
    5871
     
    7992
    8093
    81     private ReadonlySequence SampleSentence(IGrammar grammar) {
    82       ReadonlySequence phrase;
     94    private Sequence SampleSentence(IGrammar grammar) {
     95      Sequence phrase;
    8396      do {
    8497        stateChain.Clear();
    85         phrase = new ReadonlySequence(grammar.SentenceSymbol);
     98        phrase = new Sequence(rootNode.phrase);
    8699        //var startPhrase = new Sequence("a*b+c*d+e*f+E");
    87100      } while (!Done() && !TryCompleteSentence(grammar, ref phrase));
     
    89102    }
    90103
    91     private bool TryCompleteSentence(IGrammar g, ref ReadonlySequence phrase) {
     104    private bool TryCompleteSentence(IGrammar g, ref Sequence phrase) {
    92105      if (phrase.Length > maxLen) throw new ArgumentException();
    93106      if (g.MinPhraseLength(phrase) > maxLen) throw new ArgumentException();
    94107      var curDepth = 0;
    95 
    96       stateChain.Add(phrase);
     108      var n = rootNode;
     109      stateChain.Add(n.phrase);
     110
    97111      while (!phrase.IsTerminal) {
    98112
    99         var newPhrases = GenerateFollowStates(g, phrase);
    100        
    101         throw new NotImplementedException(); // TODO: reintroduce random-trie checking once the tree of all states has been reintroduced
     113
    102114        //if (n.randomTries < randomTries) {
    103115        //  n.randomTries++;
    104         //  treeDepth = Math.Max(treeDepth, curDepth);
    105         //  lastNode = n;
    106         //  return g.CompleteSentenceRandomly(random, phrase, maxLen);
     116        //  curDepth = Math.Max(curDepth, curDepth);
     117        //  g.CompleteSentenceRandomly(random, phrase, maxLen);
     118        //  return true;
    107119        //} else {
    108 
    109 
    110           // => select using bandit policy
    111           // failure means we simply restart
    112           if (!behaviourPolicy.TrySelect(random, phrase, newPhrases, out phrase)) {
    113             return false;
    114           }
    115         // }
    116         stateChain.Add(phrase);
     120        // => select using bandit policy
     121        // failure means we simply restart
     122        GenerateFollowStates(n); // creates child nodes for node n
     123
     124        int selectedChildIdx;
     125        if (!behaviourPolicy.TrySelect(random, n.phrase, n.children.Select(ch => ch.phrase), out selectedChildIdx)) {
     126          return false;
     127        }
     128        phrase.ReplaceAt(phrase.FirstNonTerminalIndex, 1, n.children[selectedChildIdx].alternative);
     129
     130        // prepare for next iteration
     131        n = n.children[selectedChildIdx];
     132        stateChain.Add(n.phrase);
    117133        curDepth++;
     134        //}
    118135      } // while
    119136
     
    123140
    124141
    125     private readonly Dictionary<ReadonlySequence, ReadonlySequence[]> cache;
    126     private IEnumerable<ReadonlySequence> GenerateFollowStates(IGrammar g, ReadonlySequence phrase) {
    127       throw new NotImplementedException();
    128       // TODO: Replace caching by a tree of all states. tree is only used for easily retrieving the follow-states of a state
    129       ReadonlySequence[] follow;
    130       //if (!cache.TryGetValue(phrase, out follow)) {
     142    private IEnumerable<string> GenerateFollowStates(TreeNode n) {
     143      // create children on the first visit
     144      if (n.children == null) {
     145        var g = problem.Grammar;
     146        // tree is only used for easily retrieving the follow-states of a state
     147        var phrase = new Sequence(n.phrase);
    131148        char nt = phrase.FirstNonTerminal;
    132149
     
    137154        var alts = g.GetAlternatives(nt).Where(alt => g.MinPhraseLength(alt) <= maxLenOfReplacement);
    138155
    139         follow = new ReadonlySequence[alts.Count()];
     156        var children = new TreeNode[alts.Count()];
    140157        int idx = 0;
    141158        foreach (var alt in alts) {
    142159          var newPhrase = new Sequence(phrase); // clone
    143160          newPhrase.ReplaceAt(newPhrase.FirstNonTerminalIndex, 1, alt);
    144           follow[idx++] = new ReadonlySequence(newPhrase);
    145         }
    146       //  cache[phrase] = follow;
    147       //}
    148       return follow;
     161          children[idx++] = new TreeNode(newPhrase.ToString(), alt);
     162        }
     163        n.children = children;
     164      }
     165      return n.children.Select(ch => ch.phrase);
    149166    }
    150167
     
    161178      bestQuality = 0.0;
    162179      tries = 0;
    163       cache.Clear();
     180      rootNode = new TreeNode(problem.Grammar.SentenceSymbol.ToString(), new ReadonlySequence("$"));
    164181    }
    165182
    166183    public bool Done() {
    167       var g = problem.Grammar;
    168       var startState = new ReadonlySequence(g.SentenceSymbol);
    169       var follow = GenerateFollowStates(g, startState);
    170       ReadonlySequence selectedState;
    171       return !behaviourPolicy.TrySelect(random, startState, follow, out selectedState);
     184      int selectedStateIdx;
     185      return !behaviourPolicy.TrySelect(random, rootNode.phrase, GenerateFollowStates(rootNode), out selectedStateIdx);
    172186    }
    173187
     
    176190      Console.WriteLine("depth: {0,5} tries: {1,5} best phrase {2,50} bestQ {3:F3}", maxSearchDepth, tries, bestPhrase, bestQuality);
    177191
    178       // use greedy strategy to generate the currently prefered sentence
    179       var phrase = new ReadonlySequence(problem.Grammar.SentenceSymbol);
     192      // use behaviour strategy to generate the currently prefered sentence
    180193      var policy = behaviourPolicy;
    181       while (!phrase.IsTerminal) {
     194
     195      var n = rootNode;
     196
     197      while (n != null) {
     198        var phrase = n.phrase;
    182199        Console.ForegroundColor = ConsoleColor.White;
    183200        Console.WriteLine("{0,-30}", phrase);
    184         var newPhrases = GenerateFollowStates(problem.Grammar, phrase);
    185         if (!newPhrases.Any()) break;
    186         var values = newPhrases.Select(p => policy.GetValue(p));
     201        var children = n.children;
     202        if (children == null || !children.Any()) break;
     203        var values = children.Select(ch => policy.GetValue(ch.phrase));
    187204        var maxValue = values.Max();
    188205        if (maxValue == 0) maxValue = 1.0;
    189206
    190207        // write phrases
    191         foreach (var p in newPhrases) {
    192           SetColorForValue(policy.GetValue(p) / maxValue);
    193           Console.Write(" {0,-4}", p.Subsequence(Math.Max(0, p.Length - 3), Math.Min(3, p.Length)));
     208        foreach (var ch in children) {
     209          SetColorForValue(policy.GetValue(ch.phrase) / maxValue);
     210          Console.Write(" {0,-4}", ch.phrase.Substring(Math.Max(0, ch.phrase.Length - 3), Math.Min(3, ch.phrase.Length)));
    194211        }
    195212        Console.WriteLine();
    196213
    197214        // write values
    198         foreach (var p in newPhrases) {
    199           SetColorForValue(policy.GetValue(p) / maxValue);
    200           Console.Write(" {0:F2}", policy.GetValue(p) * 10.0);
     215        foreach (var ch in children) {
     216          SetColorForValue(policy.GetValue(ch.phrase) / maxValue);
     217          Console.Write(" {0:F2}", policy.GetValue(ch.phrase) * 10.0);
    201218        }
    202219        Console.WriteLine();
    203220
    204221        // write tries
    205         foreach (var p in newPhrases) {
    206           SetColorForValue(policy.GetValue(p) / maxValue);
    207           Console.Write(" {0,4}", policy.GetTries(p));
     222        foreach (var ch in children) {
     223          SetColorForValue(policy.GetValue(ch.phrase) / maxValue);
     224          Console.Write(" {0,4}", policy.GetTries(ch.phrase));
    208225        }
    209226        Console.WriteLine();
    210         if (!policy.TrySelect(random, phrase, newPhrases, out phrase)) {
     227        int selectedChildIdx;
     228        if (!policy.TrySelect(random, phrase, children.Select(ch => ch.phrase), out selectedChildIdx)) {
    211229          break;
    212230        }
     231        n = n.children[selectedChildIdx];
    213232      }
    214233
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization.Test/TestSequence.cs

    r11730 r11793  
    4949          var nt = s.FirstNonTerminal;
    5050          Assert.Fail();
    51         }
    52         catch (IndexOutOfRangeException e) {
     51        } catch (IndexOutOfRangeException e) {
    5352        }
    5453      }
     
    201200          s.ReplaceAt(0, 4, t);
    202201          Assert.Fail();
    203         }
    204         catch (ArgumentException) { }
     202        } catch (ArgumentException) { }
    205203
    206204        s.ReplaceAt(0, 2, t); // should work
     
    209207          s.ReplaceAt(0, 3, t);
    210208          Assert.Fail();
    211         }
    212         catch (ArgumentException) { }
     209        } catch (ArgumentException) { }
    213210
    214211        try {
    215212          s.ReplaceAt(1, 2, t);
    216213          Assert.Fail();
    217         }
    218         catch (ArgumentException) { }
     214        } catch (ArgumentException) { }
    219215
    220216        s.ReplaceAt(1, 1, new Sequence("A")); // should work
     
    223219          s.ReplaceAt(-1, 2, t);
    224220          Assert.Fail();
    225         }
    226         catch (ArgumentException) { }
     221        } catch (ArgumentException) { }
    227222
    228223      }
     
    267262        Assert.AreEqual("AA", sub.ToString());
    268263      }
     264      {
     265        var s = new Sequence("aaaAA");
     266        var sub = s.Subsequence(2, 3);
     267
     268        Assert.AreEqual(1, sub.FirstNonTerminalIndex);
     269        Assert.AreEqual("aAA", sub.ToString());
     270      }
     271    }
     272
     273    [TestMethod]
     274    public void TestReadonlySequence() {
     275      CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
     276      {
     277        var s = new ReadonlySequence("AAaaaAA");
     278        var sub = s.Subsequence(0, 7);
     279
     280        Assert.AreEqual(0, sub.FirstNonTerminalIndex);
     281        Assert.AreEqual("AAaaaAA", sub.ToString());
     282      }
     283      {
     284        var s = new ReadonlySequence("AAaaaAA");
     285        var sub = s.Subsequence(0, 3);
     286
     287        Assert.AreEqual(0, sub.FirstNonTerminalIndex);
     288        Assert.AreEqual("AAa", sub.ToString());
     289
     290        sub = sub.Subsequence(1, 2);
     291        Assert.AreEqual(0, sub.FirstNonTerminalIndex);
     292        Assert.AreEqual("Aa", sub.ToString());
     293      }
     294      {
     295        var s = new ReadonlySequence("AAaaaAA");
     296        var sub = s.Subsequence(2, 3);
     297
     298        Assert.AreEqual(-1, sub.FirstNonTerminalIndex);
     299        Assert.AreEqual("aaa", sub.ToString());
     300      }
     301      {
     302        var s = new ReadonlySequence("AAaaaAA");
     303        var sub = s.Subsequence(2, 4);
     304
     305        Assert.AreEqual(3, sub.FirstNonTerminalIndex);
     306        Assert.AreEqual("aaaA", sub.ToString());
     307      }
     308      {
     309        var s = new ReadonlySequence("AAaaaAA");
     310        var sub = s.Subsequence(5, 2);
     311
     312        Assert.AreEqual(0, sub.FirstNonTerminalIndex);
     313        Assert.AreEqual("AA", sub.ToString());
     314      }
     315      {
     316        var s = new ReadonlySequence("aaaAA");
     317        var sub = s.Subsequence(2, 3);
     318
     319        Assert.AreEqual(1, sub.FirstNonTerminalIndex);
     320        Assert.AreEqual("aAA", sub.ToString());
     321      }
     322
    269323    }
    270324  }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/FindPhrasesProblem.cs

    r11770 r11793  
    2727    private readonly int numPhrases;
    2828    private readonly int phraseLen;
    29     private readonly int numOptimalPhrases;
    30     private readonly int numDecoyPhrases;
    3129    private readonly double correctReward;
    3230    private readonly double decoyReward;
     
    122120               + phrases.Intersect(decoyPhrases).Count() * decoyReward;
    123121
    124 
    125 
    126122      return reward;
    127123    }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/Grammar.cs

    r11732 r11793  
    201201    }
    202202
     203    public bool IsTerminal(string phrase) {
     204      // reverse because for our grammars and left-canonical derivation it is more likely that NTs occur near the end of the sequence
     205      return phrase.Reverse().All(IsTerminal);
     206    }
     207
    203208    public bool IsNonTerminal(char symbol) {
    204209      return nonTerminalSymbols.Contains(symbol);
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/HardPalindromeProblem.cs

    r11792 r11793  
    4040
    4141    public string CanonicalRepresentation(string terminalPhrase) {
    42       throw new NotImplementedException();
    4342      return terminalPhrase;
    4443    }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/IGrammar.cs

    r11730 r11793  
    2323
    2424    bool IsTerminal(char symbol);
     25    bool IsTerminal(string phrase);
    2526    bool IsNonTerminal(char symbol);
    2627  }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/IProblem.cs

    r11792 r11793  
    88    double BestKnownQuality(int maxLen);
    99    IGrammar Grammar { get; }
    10     double Evaluate(ReadonlySequence sentence);
    11     ReadonlySequence CanonicalRepresentation(ReadonlySequence terminalPhrase);
     10    double Evaluate(string sentence);
     11    string CanonicalRepresentation(string terminalPhrase);
    1212  }
    1313}
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/PalindromeProblem.cs

    r11792 r11793  
    8181
    8282    public string CanonicalRepresentation(string terminalPhrase) {
    83       throw new NotImplementedException();
    8483      return terminalPhrase;
    8584    }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/ReadonlySequence.cs

    r11742 r11793  
    11using System;
     2using System.Collections.Generic;
     3using System.Diagnostics;
     4using System.Linq;
     5using System.Text;
    26
    37namespace HeuristicLab.Problems.GrammaticalOptimization {
    48  public class ReadonlySequence : Sequence {
     9    private int symbolsOffset = 0; // the sequence does not have to start with the first symbol of the symbols array (when we reuse these arrays)
     10
     11    // cloning constructor for readonly sequences
     12    // does not allocate the symbols array of the base class
     13    // instead: reuse the symbols array (both sequences are readonly)
     14    private ReadonlySequence(ReadonlySequence original)
     15      : base() {
     16      base.symbols = original.symbols;
     17      this.symbolsOffset = original.symbolsOffset;
     18    }
    519    public ReadonlySequence(string s)
    620      : base(s, s.Length) {
     
    1933    }
    2034
     35    public override char this[int idx] {
     36      get {
     37        return base[idx + symbolsOffset];
     38      }
     39      set {
     40        throw new NotSupportedException();
     41      }
     42    }
     43
     44    public override IEnumerator<char> GetEnumerator() {
     45      return symbols.Skip(symbolsOffset).Take(Length).GetEnumerator();
     46    }
     47
     48    public override string ToString() {
     49      var sb = new StringBuilder(Length);
     50      sb.Append(symbols, symbolsOffset, Length);
     51      return sb.ToString();
     52    }
     53
     54    public new ReadonlySequence Subsequence(int startIdx, int len) {
     55      if (startIdx < 0 || len < 0) throw new ArgumentException();
     56      if (startIdx >= this.Length) throw new ArgumentException();
     57      if (startIdx + len > this.Length) throw new ArgumentException();
     58      var subsequence = new ReadonlySequence(this) { symbolsOffset = startIdx + this.symbolsOffset, Length = len };
     59
     60
     61      if (FirstNonTerminalIndex < 0) {
     62        subsequence.FirstNonTerminalIndex = -1;
     63      } else if (FirstNonTerminalIndex < startIdx) {
     64        // need to find first nt in subsequence
     65        subsequence.FirstNonTerminalIndex = -1;
     66        for (int i = 0; subsequence.FirstNonTerminalIndex == -1 && i < len; i++) {
     67          if (subsequence[i] >= 'A' && subsequence[i] <= 'Z') subsequence.FirstNonTerminalIndex = i;
     68        }
     69      } else if (FirstNonTerminalIndex >= startIdx && FirstNonTerminalIndex < startIdx + len) {
     70        subsequence.FirstNonTerminalIndex = FirstNonTerminalIndex - startIdx;
     71      } else {
     72        Debug.Assert(FirstNonTerminalIndex >= startIdx + len);
     73        subsequence.FirstNonTerminalIndex = -1;
     74      }
     75      return subsequence;
     76    }
     77
    2178    public override bool Equals(object obj) {
    22       var other = obj as Sequence;
     79      var other = obj as ReadonlySequence;
    2380      if (other == null) return false;
    2481      if (other.Length != this.Length) return false;
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/RoyalPairProblem.cs

    r11742 r11793  
    1717    public RoyalPairProblem() {
    1818      this.grammar = new Grammar(grammarString);
     19      // TODO: allow configuration of the number of symbols
    1920    }
    2021
     
    3536
    3637    public string CanonicalRepresentation(string terminalPhrase) {
     38      throw new NotImplementedException();
    3739      return terminalPhrase;
    3840    }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/RoyalSymbolProblem.cs

    r11742 r11793  
    1717    public RoyalSymbolProblem() {
    1818      this.grammar = new Grammar(grammarString);
     19      //TODO: allow configuration of the number of symbols
    1920    }
    2021
     
    3132      // sentence must contain only terminal symbols, we are not checking if the sentence is syntactically valid here because it would be too slow!
    3233      Debug.Assert(sentence.Any(c => grammar.IsTerminal(c)));
    33       return regex.Matches(sentence).Count;
     34      return regex.Matches(sentence.ToString()).Count;
    3435    }
    3536    public string CanonicalRepresentation(string terminalPhrase) {
     37      throw new NotImplementedException();
    3638      return terminalPhrase;
    3739    }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/RoyalTreeProblem.cs

    r11742 r11793  
    3030    }
    3131    public string CanonicalRepresentation(string terminalPhrase) {
     32      throw new NotImplementedException();
    3233      return terminalPhrase;
    3334    }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/SantaFeAntProblem.cs

    r11770 r11793  
    101101
    102102    public string CanonicalRepresentation(string terminalPhrase) {
    103       //return terminalPhrase;
    104103      string oldPhrase;
    105104      do {
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/Sequence.cs

    r11742 r11793  
    2222    private int len;
    2323    private int idxOfFirstNt;
    24     private readonly char[] symbols;
     24    protected char[] symbols;
    2525
    26     public char this[int idx] {
     26    public virtual char this[int idx] {
    2727      get { return symbols[idx]; }
    2828      set { throw new NotSupportedException(); }
     
    3131    public int Length {
    3232      get { return len; }
    33       private set { len = value; }
     33      protected set { len = value; }
    3434    }
    3535
     
    4040    public int FirstNonTerminalIndex {
    4141      get { return idxOfFirstNt; }
     42      protected set { idxOfFirstNt = value; }
    4243    }
    4344
    4445    public char FirstNonTerminal {
    45       get { return symbols[idxOfFirstNt]; }
     46      get { return this[idxOfFirstNt]; }
    4647    }
    4748
     
    9091    }
    9192
     93    // empty constructor does not allocate the symbol array
     94    protected Sequence() { }
     95
    9296    public virtual void ReplaceAt(int position, int len, Sequence replacement) {
    9397      if (replacement == null) throw new ArgumentNullException();
     
    111115        idxOfFirstNt = -1;
    112116        for (int i = startIdxOfRemainingPart; idxOfFirstNt == -1 && i < Length; i++) {
    113           if (symbols[i] >= 'A' && symbols[i] <= 'Z') idxOfFirstNt = i;
     117          if (this[i] >= 'A' && this[i] <= 'Z') idxOfFirstNt = i;
    114118        }
    115119      }
    116120    }
    117121
    118     public IEnumerator<char> GetEnumerator() {
     122    public virtual IEnumerator<char> GetEnumerator() {
    119123      return symbols.AsEnumerable().Take(len).GetEnumerator();
    120124    }
     
    143147        subsequence.idxOfFirstNt = -1;
    144148        for (int i = 0; subsequence.idxOfFirstNt == -1 && i < len; i++) {
    145           if (subsequence.symbols[i] >= 'A' && subsequence.symbols[i] <= 'Z') subsequence.idxOfFirstNt = i;
     149          if (subsequence[i] >= 'A' && subsequence[i] <= 'Z') subsequence.idxOfFirstNt = i;
    146150        }
    147151      } else if (idxOfFirstNt >= startIdx && idxOfFirstNt < startIdx + len) {
    148         subsequence.idxOfFirstNt = idxOfFirstNt;
     152        subsequence.idxOfFirstNt = idxOfFirstNt - startIdx;
    149153      } else {
    150154        Debug.Assert(idxOfFirstNt >= startIdx + len);
  • branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Program.cs

    r11792 r11793  
    2424      CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
    2525
    26       RunDemo();
     26      //RunDemo();
    2727      RunGridTest();
    2828    }
     
    191191      // var problem = new FindPhrasesProblem(random, 15, numPhrases, phraseLen, numOptimalPhrases: numPhrases, numDecoyPhrases: 0, correctReward: 1.0, decoyReward: 0.0, phrasesAsSets: true);
    192192
    193       var problem = new SymbolicRegressionPoly10Problem();   // good results e.g. 10 randomtries and EpsGreedyPolicy(0.2, (aInfo)=>aInfo.MaxReward)
     193      //var problem = new SymbolicRegressionPoly10Problem();   // good results e.g. 10 randomtries and EpsGreedyPolicy(0.2, (aInfo)=>aInfo.MaxReward)
    194194      // Ant
    195195      // good results e.g. with       var alg = new MctsSampler(problem, 17, random, 1, (rand, numActions) => new ThresholdAscentPolicy(numActions, 500, 0.01));
    196196      // GaussianModelWithUnknownVariance (and Q= 0.99-quantil) also works well for Ant
    197       // very good results with:       var alg = new SequentialSearch(problem, 17, random, 0,
    198       // new HeuristicLab.Algorithms.Bandits.GrammarPolicies.GenericGrammarPolicy(problem, new UCB1TunedPolicy(), true));
    199 
    200       //var problem = new SantaFeAntProblem();
     197
     198      var problem = new SantaFeAntProblem();
    201199      //var problem = new SymbolicRegressionProblem("Tower");
    202200      //var problem = new PalindromeProblem();
     
    207205      //var alg = new MctsSampler(problem, 23, random, 0, new BoltzmannExplorationPolicy(100));
    208206      //var alg = new MctsSampler(problem, 23, random, 0, new EpsGreedyPolicy(0.1));
    209       var alg = new SequentialSearch(problem, 23, random, 0,
    210         new HeuristicLab.Algorithms.Bandits.GrammarPolicies.GenericGrammarPolicy(problem, new EpsGreedyPolicy(0.2), true));
     207      var alg = new SequentialSearch(problem, 17, random, 0,
     208        new HeuristicLab.Algorithms.Bandits.GrammarPolicies.GenericGrammarPolicy(problem, new BoltzmannExplorationPolicy(10), true));
    211209      //var alg = new MctsQLearningSampler(problem, sentenceLen, random, 0, null);
    212210      //var alg = new MctsQLearningSampler(problem, 30, random, 0, new EpsGreedyPolicy(0.2));
Note: See TracChangeset for help on using the changeset viewer.