Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
01/09/15 14:57:28 (9 years ago)
Author:
gkronber
Message:

#2283 refactoring

Location:
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization
Files:
1 added
4 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/AlternativesContextSampler.cs

    r11732 r11742  
    1717    private readonly Random random;
    1818    private readonly int contextLen;
    19     private readonly IPolicy policy;
     19    private readonly IBanditPolicy policy;
    2020
    21     public AlternativesContextSampler(IProblem problem, Random random, int maxLen, int contextLen, IPolicy policy) {
     21    public AlternativesContextSampler(IProblem problem, Random random, int maxLen, int contextLen, IBanditPolicy policy) {
    2222      this.maxLen = maxLen;
    2323      this.problem = problem;
     
    4545
    4646
    47     private Dictionary<string, IPolicyActionInfo[]> contextActionInfos;
     47    private Dictionary<string, IBanditPolicyActionInfo[]> contextActionInfos;
    4848    private List<Tuple<string, int>> updateChain;
    4949
    5050    private void InitPolicies(IGrammar grammar) {
    51       this.contextActionInfos = new Dictionary<string, IPolicyActionInfo[]>();
     51      this.contextActionInfos = new Dictionary<string, IBanditPolicyActionInfo[]>();
    5252      this.updateChain = new List<Tuple<string, int>>();
    5353    }
     
    8282          var endIdx = Math.Min(startIdx + contextLen, ntIdx);
    8383          var lft = phrase.Subsequence(startIdx, endIdx - startIdx + 1).ToString();
    84           lft = problem.Hash(lft);
     84          lft = problem.CanonicalRepresentation(lft);
    8585          if (!contextActionInfos.ContainsKey(lft)) {
    8686            contextActionInfos.Add(lft, g.GetAlternatives(nt).Select(_ => policy.CreateActionInfo()).ToArray());
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/AlternativesSampler.cs

    r11732 r11742  
    1616    private readonly Random random;
    1717    private readonly IProblem problem;
    18     private readonly IPolicy policy;
     18    private readonly IBanditPolicy policy;
    1919
    20     public AlternativesSampler(IProblem problem, IPolicy policy, int maxLen) {
     20    public AlternativesSampler(IProblem problem, IBanditPolicy policy, int maxLen) {
    2121      this.problem = problem;
    2222      this.maxLen = maxLen;
     
    4343
    4444
    45     private Dictionary<char, IPolicyActionInfo[]> ntActionInfos;
     45    private Dictionary<char, IBanditPolicyActionInfo[]> ntActionInfos;
    4646    private List<Tuple<char, int>> updateChain;
    4747
    4848    private void InitPolicies(IGrammar grammar) {
    49       this.ntActionInfos = new Dictionary<char, IPolicyActionInfo[]>();
     49      this.ntActionInfos = new Dictionary<char, IBanditPolicyActionInfo[]>();
    5050      this.updateChain = new List<Tuple<char, int>>();
    5151      foreach (var nt in grammar.NonTerminalSymbols) {
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization.csproj

    r11732 r11742  
    4646    <Compile Include="AlternativesContextSampler.cs" />
    4747    <Compile Include="ExhaustiveRandomFirstSearch.cs" />
     48    <Compile Include="MctsContextualSampler.cs">
     49      <SubType>Code</SubType>
     50    </Compile>
    4851    <Compile Include="MctsSampler.cs" />
    4952    <Compile Include="ExhaustiveDepthFirstSearch.cs" />
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/MctsSampler.cs

    r11732 r11742  
    1212      public string ident;
    1313      public int randomTries;
    14       public int policyTries;
    15       public IPolicyActionInfo actionInfo;
     14      public IBanditPolicyActionInfo actionInfo;
    1615      public TreeNode[] children;
    1716      public bool done = false;
     
    2221
    2322      public override string ToString() {
    24         return string.Format("Node({0} tries: {1}, done: {2}, policy: {3})", ident, randomTries + policyTries, done, actionInfo);
     23        return string.Format("Node({0} tries: {1}, done: {2}, policy: {3})", ident, actionInfo.Tries, done, actionInfo);
    2524      }
    2625    }
     
    3433    private readonly Random random;
    3534    private readonly int randomTries;
    36     private readonly IPolicy policy;
     35    private readonly IBanditPolicy policy;
    3736
    3837    private List<TreeNode> updateChain;
     
    4746    // }
    4847
    49     public MctsSampler(IProblem problem, int maxLen, Random random, int randomTries, IPolicy policy) {
     48    public MctsSampler(IProblem problem, int maxLen, Random random, int randomTries, IBanditPolicy policy) {
    5049      this.maxLen = maxLen;
    5150      this.problem = problem;
     
    7877    public void PrintStats() {
    7978      var n = rootNode;
    80       Console.WriteLine("depth: {0,5} size: {1,10} root tries {2,10}", treeDepth, treeSize, rootNode.policyTries + rootNode.randomTries);
     79      Console.WriteLine("depth: {0,5} size: {1,10} root tries {2,10}", treeDepth, treeSize, n.actionInfo.Tries);
    8180      while (n.children != null) {
    8281        Console.WriteLine();
    8382        Console.WriteLine("{0,5}->{1,-50}", n.ident, string.Join(" ", n.children.Select(ch => string.Format("{0,4}", ch.ident))));
    84         Console.WriteLine("{0,5}  {1,-50}", string.Empty, string.Join(" ", n.children.Select(ch => string.Format("{0,4}", ch.randomTries + ch.policyTries))));
     83        Console.WriteLine("{0,5}  {1,-50}", string.Empty, string.Join(" ", n.children.Select(ch => string.Format("{0,4:F2}", ch.actionInfo.Value * 10))));
     84        Console.WriteLine("{0,5}  {1,-50}", string.Empty, string.Join(" ", n.children.Select(ch => string.Format("{0,4}", ch.done ? "X" : ch.actionInfo.Tries.ToString()))));
    8585        //n.policy.PrintStats();
    86         n = n.children.OrderByDescending(c => c.policyTries).First();
     86        n = n.children.Where(ch => !ch.done).OrderByDescending(c => c.actionInfo.Value).First();
    8787      }
    8888      Console.ReadLine();
     
    108108      if (g.MinPhraseLength(phrase) > maxLen) throw new ArgumentException();
    109109      TreeNode n = rootNode;
    110       bool done = phrase.IsTerminal;
    111110      var curDepth = 0;
    112       while (!done) {
     111      while (!phrase.IsTerminal) {
    113112        updateChain.Add(n);
    114113
     
    127126          if (n.randomTries == randomTries && n.children == null) {
    128127            n.children = alts.Select(alt => new TreeNode(alt.ToString())).ToArray(); // create a new node for each alternative
    129             //n.children = alts.Select(alt => new TreeNode(string.Empty)).ToArray(); // create a new node for each alternative
    130128            foreach (var ch in n.children) ch.actionInfo = policy.CreateActionInfo();
    131129            treeSize += n.children.Length;
    132130          }
    133           n.policyTries++;
    134131          // => select using bandit policy
    135132          int selectedAltIdx = policy.SelectAction(random, n.children.Select(c => c.actionInfo));
     
    140137
    141138          curDepth++;
    142 
    143           done = phrase.IsTerminal;
    144139
    145140          // prepare for next iteration
     
    153148      // the last node is a leaf node (sentence is done), so we never need to visit this node again
    154149      n.done = true;
    155       n.actionInfo.Disable();
    156150
    157151      treeDepth = Math.Max(treeDepth, curDepth);
     
    165159      foreach (var e in updateChain) {
    166160        var node = e;
     161        if (node.done) node.actionInfo.Disable();
    167162        if (node.children != null && node.children.All(c => c.done)) {
    168163          node.done = true;
     
    171166        if (!node.done) {
    172167          node.actionInfo.UpdateReward(reward);
    173           //policy.UpdateReward(action, reward / updateChain.Count);
    174168        }
    175169      }
Note: See TracChangeset for help on using the changeset viewer.