Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
01/12/15 21:23:01 (9 years ago)
Author:
gkronber
Message:

#2283: implemented test problems for MCTS

Location:
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization.csproj

    r11744 r11747  
    4545    <Compile Include="AlternativesSampler.cs" />
    4646    <Compile Include="AlternativesContextSampler.cs" />
     47    <Compile Include="MctsQLearningSampler.cs" />
    4748    <Compile Include="TemporalDifferenceTreeSearchSampler.cs" />
    4849    <Compile Include="ExhaustiveRandomFirstSearch.cs" />
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/MctsContextualSampler.cs

    r11745 r11747  
    1515      public int randomTries;
    1616      public int tries;
     17      public List<TreeNode> parents;
    1718      public TreeNode[] children;
    1819      public bool done = false;
     
    2122        this.ident = id;
    2223        this.alt = alt;
     24        this.parents = new List<TreeNode>();
    2325      }
    2426
     
    2830    }
    2931
     32    private Dictionary<string, TreeNode> treeNodes;
     33    private TreeNode GetTreeNode(string id, ReadonlySequence alt) {
     34      TreeNode n;
     35      var canonicalId = problem.CanonicalRepresentation(id);
     36      if (!treeNodes.TryGetValue(canonicalId, out n)) {
     37        n = new TreeNode(canonicalId, alt);
     38        tries.TryGetValue(canonicalId, out n.tries);
     39        treeNodes[canonicalId] = n;
     40      }
     41      return n;
     42    }
    3043
    3144    public event Action<string, double> FoundNewBestSolution;
     
    5164      this.v = new Dictionary<string, double>(1000000);
    5265      this.tries = new Dictionary<string, int>(1000000);
     66      treeNodes = new Dictionary<string, TreeNode>();
    5367    }
    5468
     
    5771      InitPolicies(problem.Grammar);
    5872      for (int i = 0; !rootNode.done && i < maxIterations; i++) {
    59         var sentence = SampleSentence(problem.Grammar).ToString();
    60         var quality = problem.Evaluate(sentence) / problem.BestKnownQuality(maxLen);
    61         Debug.Assert(quality >= 0 && quality <= 1.0);
    62         DistributeReward(quality);
    63 
    64         RaiseSolutionEvaluated(sentence, quality);
    65 
    66         if (quality > bestQuality) {
    67           bestQuality = quality;
    68           RaiseFoundNewBestSolution(sentence, quality);
     73        bool success;
     74        var sentence = SampleSentence(problem.Grammar, out success).ToString();
     75        if (success) {
     76          var quality = problem.Evaluate(sentence) / problem.BestKnownQuality(maxLen);
     77          Debug.Assert(quality >= 0 && quality <= 1.0);
     78          DistributeReward(quality);
     79
     80          RaiseSolutionEvaluated(sentence, quality);
     81
     82          if (quality > bestQuality) {
     83            bestQuality = quality;
     84            RaiseFoundNewBestSolution(sentence, quality);
     85          }
    6986        }
    7087      }
     
    7895      Console.WriteLine("depth: {0,5} size: {1,10} root tries {2,10}, rootQ {3:F3}, bestQ {4:F3}", treeDepth, treeSize, n.tries, V(n), bestQuality);
    7996      while (n.children != null) {
    80         Console.WriteLine("{0}", n.ident);
    81         double maxVForRow = n.children.Select(ch => V(ch)).Max();
     97        Console.WriteLine("{0,-30}", n.ident);
     98        double maxVForRow = n.children.Select(ch => Math.Min(1.0, Math.Max(0.0, V(ch)))).Max();
    8299        if (maxVForRow == 0) maxVForRow = 1.0;
    83100
    84101        for (int i = 0; i < n.children.Length; i++) {
    85102          var ch = n.children[i];
    86           Console.ForegroundColor = ConsoleEx.ColorForValue(V(ch) / maxVForRow);
     103          Console.ForegroundColor = ConsoleEx.ColorForValue(Math.Min(1.0, V(ch)) / maxVForRow);
    87104          Console.Write("{0,5}", ch.alt);
    88105        }
     
    90107        for (int i = 0; i < n.children.Length; i++) {
    91108          var ch = n.children[i];
    92           Console.ForegroundColor = ConsoleEx.ColorForValue(V(ch) / maxVForRow);
    93           Console.Write("{0,5:F2}", V(ch) * 10);
     109          Console.ForegroundColor = ConsoleEx.ColorForValue(Math.Min(1.0, V(ch)) / maxVForRow);
     110          Console.Write("{0,5:F2}", Math.Min(1.0, V(ch)) * 10);
    94111        }
    95112        Console.WriteLine();
    96113        for (int i = 0; i < n.children.Length; i++) {
    97114          var ch = n.children[i];
    98           Console.ForegroundColor = ConsoleEx.ColorForValue(V(ch) / maxVForRow);
     115          Console.ForegroundColor = ConsoleEx.ColorForValue(Math.Min(1.0, V(ch)) / maxVForRow);
    99116          Console.Write("{0,5}", ch.done ? "X" : ch.tries.ToString());
    100117        }
     
    102119        Console.WriteLine();
    103120        //n.policy.PrintStats();
    104         n = n.children.Where(ch => !ch.done).OrderByDescending(c => V(c)).First();
     121        n = n.children.Where(ch => !ch.done).OrderByDescending(c => c.tries).First();
    105122      }
    106123    }
     
    112129      this.tries.Clear();
    113130
    114       rootNode = new TreeNode(grammar.SentenceSymbol.ToString(), new ReadonlySequence("$"));
     131      rootNode = GetTreeNode(grammar.SentenceSymbol.ToString(), new ReadonlySequence("$"));
    115132      treeDepth = 0;
    116133      treeSize = 0;
    117134    }
    118135
    119     private Sequence SampleSentence(IGrammar grammar) {
     136    private Sequence SampleSentence(IGrammar grammar, out bool success) {
    120137      updateChain.Clear();
    121138      //var startPhrase = new Sequence("a*b+c*d+e*f+E");
    122139      var startPhrase = new Sequence(grammar.SentenceSymbol);
    123       return CompleteSentence(grammar, startPhrase);
    124     }
    125 
    126     private Sequence CompleteSentence(IGrammar g, Sequence phrase) {
     140      return CompleteSentence(grammar, startPhrase, out success);
     141    }
     142
     143    private Sequence CompleteSentence(IGrammar g, Sequence phrase, out bool success) {
    127144      if (phrase.Length > maxLen) throw new ArgumentException();
    128145      if (g.MinPhraseLength(phrase) > maxLen) throw new ArgumentException();
     
    136153          n.randomTries++;
    137154          treeDepth = Math.Max(treeDepth, curDepth);
     155          success = true;
    138156          return g.CompleteSentenceRandomly(random, phrase, maxLen);
    139157        } else {
     
    153171              newPhrase.ReplaceAt(newPhrase.FirstNonTerminalIndex, 1, alt);
    154172              if (!newPhrase.IsTerminal) newPhrase = newPhrase.Subsequence(0, newPhrase.FirstNonTerminalIndex + 1);
    155               n.children[i++] = new TreeNode(newPhrase.ToString(), new ReadonlySequence(alt));
     173              var treeNode = GetTreeNode(newPhrase.ToString(), new ReadonlySequence(alt));
     174              treeNode.parents.Add(n);
     175              n.children[i++] = treeNode;
    156176            }
    157177            treeSize += n.children.Length;
     178            UpdateDone(n);
     179
     180            // it could happend that we already finished all variations starting from the branch
     181            // stop
     182            if (n.done) {
     183              success = false;
     184              return phrase;
     185            }
    158186          }
     187          //int selectedAltIdx = SelectRandom(random, n.children);
     188
    159189          // => select using eps-greedy
    160190          int selectedAltIdx = SelectEpsGreedy(random, n.children);
     
    167197
    168198          curDepth++;
     199
    169200
    170201          // prepare for next iteration
    171202          parent = n;
    172203          n = n.children[selectedAltIdx];
     204          //UpdateTD(parent, n, 0.0);
    173205        }
    174206      } // while
     
    181213
    182214      treeDepth = Math.Max(treeDepth, curDepth);
     215      success = true;
    183216      return phrase;
    184217    }
    185218
     219
     220    //private void UpdateTD(TreeNode parent, TreeNode child, double reward) {
     221    //  double alpha = 1.0;
     222    //  var vParent = V(parent);
     223    //  var vChild = V(child);
     224    //  if (double.IsInfinity(vParent)) vParent = 0.0;
     225    //  if (double.IsInfinity(vChild)) vChild = 0.0;
     226    //  UpdateV(parent, (alpha * (reward + vChild - vParent)));
     227    //}
     228
    186229    private void DistributeReward(double reward) {
     230
    187231      // iterate in reverse order (bottom up)
    188       updateChain.Reverse();
    189 
     232      //updateChain.Reverse();
     233      UpdateDone(updateChain.Last().Item1);
     234      //UpdateTD(updateChain.Last().Item2, updateChain.Last().Item1, reward);
     235      //return;
     236
     237      BackPropReward(updateChain.Last().Item1, reward);
     238      /*
    190239      foreach (var e in updateChain) {
    191240        var node = e.Item1;
    192         var parent = e.Item2;
     241        //var parent = e.Item2;
    193242        node.tries++;
    194         if (node.children != null && node.children.All(c => c.done)) {
    195           node.done = true;
    196         }
     243        //if (node.children != null && node.children.All(c => c.done)) {
     244        //  node.done = true;
     245        //}
    197246        UpdateV(node, reward);
    198247
    199248        // the reward for the parent is either the just recieved reward or the value of the best action so far
    200         double value = 0.0;
    201         if (parent != null) {
    202           var doneChilds = parent.children.Where(ch => ch.done);
    203           if (doneChilds.Any()) value = doneChilds.Select(ch => V(ch)).Max();
    204         }
     249        //double value = 0.0;
     250        //if (parent != null) {
     251        //  var doneChilds = parent.children.Where(ch => ch.done);
     252        //  if (doneChilds.Any()) value = doneChilds.Select(ch => V(ch)).Max();
     253        //}
    205254        //if (value > reward) reward = value;
    206       }
    207     }
     255      }*/
     256    }
     257
     258    private void BackPropReward(TreeNode n, double reward) {
     259      n.tries++;
     260      UpdateV(n, reward);
     261      foreach (var p in n.parents) BackPropReward(p, reward);
     262    }
     263
     264    private void UpdateDone(TreeNode n) {
     265      if (!n.done && n.children != null && n.children.All(c => c.done)) n.done = true;
     266      if (n.done) foreach (var p in n.parents) UpdateDone(p);
     267    }
     268
    208269
    209270    private Dictionary<string, double> v;
     
    219280        tries.Add(canonicalStr, 1);
    220281      } else {
    221         v[canonicalStr] = stateV + 0.005 * (reward - stateV);
    222         //v[canonicalStr] = stateV + (1.0 / tries[canonicalStr]) * (reward - stateV);
     282        //v[canonicalStr] = stateV + 0.005 * (reward - stateV);
     283        v[canonicalStr] = stateV + (1.0 / tries[canonicalStr]) * (reward - stateV);
    223284        tries[canonicalStr]++;
    224285      }
     
    229290      //var canonicalStr = n.ident;
    230291      double stateV;
    231 
     292      if (!tries.ContainsKey(canonicalStr)) return double.PositiveInfinity;
    232293      if (!v.TryGetValue(canonicalStr, out  stateV)) {
    233294        return 0.0;
     
    237298    }
    238299
     300    private int SelectRandom(Random random, TreeNode[] children) {
     301      return children.Select((ch, i) => Tuple.Create(ch, i)).Where(p => !p.Item1.done).SelectRandom(random).Item2;
     302    }
     303
    239304    private int SelectEpsGreedy(Random random, TreeNode[] children) {
    240305      if (random.NextDouble() < 0.2) {
    241 
    242         return children.Select((ch, i) => Tuple.Create(ch, i)).Where(p => !p.Item1.done).SelectRandom(random).Item2;
     306        return SelectRandom(random, children);
    243307      } else {
    244308        var bestQ = double.NegativeInfinity;
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/MctsSampler.cs

    r11745 r11747  
    55using System.Text;
    66using HeuristicLab.Algorithms.Bandits;
     7using HeuristicLab.Common;
    78using HeuristicLab.Problems.GrammaticalOptimization;
    89
     
    1314      public int randomTries;
    1415      public IBanditPolicyActionInfo actionInfo;
     16      public TreeNode parent;
    1517      public TreeNode[] children;
    1618      public bool done = false;
    1719
    18       public TreeNode(string id) {
     20      public TreeNode(string id, TreeNode parent) {
    1921        this.ident = id;
     22        this.parent = parent;
    2023      }
    2124
     
    3538    private readonly IBanditPolicy policy;
    3639
    37     private List<TreeNode> updateChain;
     40    private TreeNode lastNode; // the bottom node in one episode
    3841    private TreeNode rootNode;
    3942
     
    7578      Console.WriteLine("depth: {0,5} size: {1,10} root tries {2,10}, rootQ {3:F3}, bestQ {4:F3}", treeDepth, treeSize, n.actionInfo.Tries, n.actionInfo.Value, bestQuality);
    7679      while (n.children != null) {
     80        Console.WriteLine("{0,-30}", n.ident);
     81        double maxVForRow = n.children.Select(ch => ch.actionInfo.Value).Max();
     82        if (maxVForRow == 0) maxVForRow = 1.0;
     83
     84        for (int i = 0; i < n.children.Length; i++) {
     85          var ch = n.children[i];
     86          SetColorForChild(ch, maxVForRow);
     87          Console.Write("{0,5}", ch.ident);
     88        }
    7789        Console.WriteLine();
    78         Console.WriteLine("{0,5}->{1,-50}", n.ident, string.Join(" ", n.children.Select(ch => string.Format("{0,4}", ch.ident))));
    79         Console.WriteLine("{0,5}  {1,-50}", string.Empty, string.Join(" ", n.children.Select(ch => string.Format("{0,4:F2}", ch.actionInfo.Value * 10))));
    80         Console.WriteLine("{0,5}  {1,-50}", string.Empty, string.Join(" ", n.children.Select(ch => string.Format("{0,4}", ch.done ? "X" : ch.actionInfo.Tries.ToString()))));
     90        for (int i = 0; i < n.children.Length; i++) {
     91          var ch = n.children[i];
     92          SetColorForChild(ch, maxVForRow);
     93          Console.Write("{0,5:F2}", ch.actionInfo.Value * 10);
     94        }
     95        Console.WriteLine();
     96        for (int i = 0; i < n.children.Length; i++) {
     97          var ch = n.children[i];
     98          SetColorForChild(ch, maxVForRow);
     99          Console.Write("{0,5}", ch.done ? "X" : ch.actionInfo.Tries.ToString());
     100        }
     101        Console.ForegroundColor = ConsoleColor.White;
     102        Console.WriteLine();
    81103        //n.policy.PrintStats();
    82         n = n.children.Where(ch => !ch.done).OrderByDescending(c => c.actionInfo.Value).First();
    83       }
     104        //n = n.children.Where(ch => !ch.done).OrderByDescending(c => c.actionInfo.Value).First();
     105        n = n.children.Where(ch=>!ch.done).OrderByDescending(c => c.actionInfo.Value).First();
     106      }
     107      Console.WriteLine("-----------------------");
     108    }
     109
     110    private void SetColorForChild(TreeNode ch, double maxVForRow) {
     111      //if (ch.done) Console.ForegroundColor = ConsoleColor.White;
     112      //else
     113      Console.ForegroundColor = ConsoleEx.ColorForValue(ch.actionInfo.Value / maxVForRow);
    84114    }
    85115
    86116    private void InitPolicies(IGrammar grammar) {
    87       this.updateChain = new List<TreeNode>();
    88 
    89       rootNode = new TreeNode(grammar.SentenceSymbol.ToString());
     117
     118
     119      rootNode = new TreeNode(grammar.SentenceSymbol.ToString(), null);
    90120      rootNode.actionInfo = policy.CreateActionInfo();
    91121      treeDepth = 0;
     
    94124
    95125    private Sequence SampleSentence(IGrammar grammar) {
    96       updateChain.Clear();
     126      lastNode = null;
    97127      var startPhrase = new Sequence(grammar.SentenceSymbol);
     128      //var startPhrase = new Sequence("a*b+c*d+e*f+E");
     129
    98130      return CompleteSentence(grammar, startPhrase);
    99131    }
     
    105137      var curDepth = 0;
    106138      while (!phrase.IsTerminal) {
    107         updateChain.Add(n);
    108139
    109140        if (n.randomTries < randomTries) {
    110141          n.randomTries++;
    111142          treeDepth = Math.Max(treeDepth, curDepth);
     143          lastNode = n;
    112144          return g.CompleteSentenceRandomly(random, phrase, maxLen);
    113145        } else {
     
    120152
    121153          if (n.randomTries == randomTries && n.children == null) {
    122             n.children = alts.Select(alt => new TreeNode(alt.ToString())).ToArray(); // create a new node for each alternative
     154            n.children = alts.Select(alt => new TreeNode(alt.ToString(), n)).ToArray(); // create a new node for each alternative
    123155            foreach (var ch in n.children) ch.actionInfo = policy.CreateActionInfo();
    124156            treeSize += n.children.Length;
     
    138170      } // while
    139171
    140       updateChain.Add(n);
     172      lastNode = n;
    141173
    142174
     
    150182    private void DistributeReward(double reward) {
    151183      // iterate in reverse order (bottom up)
    152       updateChain.Reverse();
    153 
    154       foreach (var e in updateChain) {
    155         var node = e;
    156         if (node.done) node.actionInfo.Disable();
     184
     185      var node = lastNode;
     186      while (node != null) {
     187        if (node.done) node.actionInfo.Disable(reward);
    157188        if (node.children != null && node.children.All(c => c.done)) {
    158189          node.done = true;
    159           node.actionInfo.Disable();
     190          var bestActionValue = node.children.Select(c => c.actionInfo.Value).Max();
     191          node.actionInfo.Disable(bestActionValue);
    160192        }
    161193        if (!node.done) {
    162194          node.actionInfo.UpdateReward(reward);
    163195        }
     196        node = node.parent;
    164197      }
    165198    }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/TemporalDifferenceTreeSearchSampler.cs

    r11744 r11747  
    3636    private readonly Random random;
    3737    private readonly int randomTries;
    38     private readonly IBanditPolicy policy;
    3938
    4039    private List<TreeNode> updateChain;
     
    4645
    4746
    48     public TemporalDifferenceTreeSearchSampler(IProblem problem, int maxLen, Random random, int randomTries, IBanditPolicy policy) {
     47    public TemporalDifferenceTreeSearchSampler(IProblem problem, int maxLen, Random random, int randomTries) {
    4948      this.maxLen = maxLen;
    5049      this.problem = problem;
    5150      this.random = random;
    5251      this.randomTries = randomTries;
    53       this.policy = policy;
    5452    }
    5553
     
    7876      Console.WriteLine("depth: {0,5} size: {1,10} root tries {2,10}, rootQ {3:F3}, bestQ {4:F3}", treeDepth, treeSize, n.tries, n.q, bestQuality);
    7977      while (n.children != null) {
     78        Console.WriteLine("{0,-30}", n.ident);
     79        double maxVForRow = n.children.Select(ch => ch.q).Max();
     80        if (maxVForRow == 0) maxVForRow = 1.0;
     81
     82        for (int i = 0; i < n.children.Length; i++) {
     83          var ch = n.children[i];
     84          Console.ForegroundColor = ConsoleEx.ColorForValue(ch.q / maxVForRow);
     85          Console.Write("{0,5}", ch.ident);
     86        }
    8087        Console.WriteLine();
    81         Console.WriteLine("{0,5}->{1,-50}", n.ident, string.Join(" ", n.children.Select(ch => string.Format("{0,4}", ch.ident))));
    82         Console.WriteLine("{0,5}  {1,-50}", string.Empty, string.Join(" ", n.children.Select(ch => string.Format("{0,4:F2}", ch.q * 10))));
    83         Console.WriteLine("{0,5}  {1,-50}", string.Empty, string.Join(" ", n.children.Select(ch => string.Format("{0,4}", ch.done ? "X" : ch.tries.ToString()))));
     88        for (int i = 0; i < n.children.Length; i++) {
     89          var ch = n.children[i];
     90          Console.ForegroundColor = ConsoleEx.ColorForValue(ch.q / maxVForRow);
     91          Console.Write("{0,5:F2}", ch.q * 10);
     92        }
     93        Console.WriteLine();
     94        for (int i = 0; i < n.children.Length; i++) {
     95          var ch = n.children[i];
     96          Console.ForegroundColor = ConsoleEx.ColorForValue(ch.q / maxVForRow);
     97          Console.Write("{0,5}", ch.done ? "X" : ch.tries.ToString());
     98        }
     99        Console.ForegroundColor = ConsoleColor.White;
     100        Console.WriteLine();
    84101        //n.policy.PrintStats();
    85102        n = n.children.Where(ch => !ch.done).OrderByDescending(c => c.q).First();
    86103      }
    87       //Console.ReadLine();
    88104    }
    89105
     
    127143          }
    128144          // => select using bandit policy
    129           int selectedAltIdx = SelectAction(random, n.children);
     145          int selectedAltIdx = SelectEpsGreedy(random, n.children);
    130146          Sequence selectedAlt = alts.ElementAt(selectedAltIdx);
    131147
     
    152168
    153169    // eps-greedy
    154     private int SelectAction(Random random, TreeNode[] children) {
     170    private int SelectEpsGreedy(Random random, TreeNode[] children) {
    155171      if (random.NextDouble() < 0.1) {
    156172
     
    158174      } else {
    159175        var bestQ = double.NegativeInfinity;
    160         var bestChildIdx = -1;
     176        var bestChildIdx = new List<int>();
    161177        for (int i = 0; i < children.Length; i++) {
    162178          if (children[i].done) continue;
    163           if (children[i].tries == 0) return i;
    164           if (children[i].q > bestQ) {
    165             bestQ = children[i].q;
    166             bestChildIdx = i;
     179          // if (children[i].tries == 0) return i;
     180          var q = children[i].q;
     181          if (q > bestQ) {
     182            bestQ = q;
     183            bestChildIdx.Clear();
     184            bestChildIdx.Add(i);
     185          } else if (q == bestQ) {
     186            bestChildIdx.Add(i);
    167187          }
    168188        }
    169         Debug.Assert(bestChildIdx > -1);
    170         return bestChildIdx;
     189        Debug.Assert(bestChildIdx.Any());
     190        return bestChildIdx.SelectRandom(random);
    171191      }
    172192    }
    173193
    174194    private void DistributeReward(double reward) {
    175       const double alpha = 0.1;
    176       const double gamma = 1;
    177       // iterate in reverse order (bottom up)
    178195      updateChain.Reverse();
    179       var nextQ = 0.0;
    180       foreach (var e in updateChain) {
    181         var node = e;
    182         node.tries++;
     196      foreach (var node in updateChain) {
    183197        if (node.children != null && node.children.All(c => c.done)) {
    184198          node.done = true;
    185199        }
    186         // reward is recieved only for the last action
    187         if (e == updateChain.First()) {
    188           node.q = node.q + alpha * (reward + gamma * nextQ - node.q);
    189           nextQ = node.q;
    190         } else {
    191           node.q = node.q + alpha * (0 + gamma * nextQ - node.q);
    192           nextQ = node.q;
    193         }
    194       }
     200      }
     201      updateChain.Reverse();
     202
     203      //const double alpha = 0.1;
     204      const double gamma = 1;
     205      double alpha;
     206      foreach (var p in updateChain.Zip(updateChain.Skip(1), Tuple.Create)) {
     207        var parent = p.Item1;
     208        var child = p.Item2;
     209        parent.tries++;
     210        alpha = 1.0 / parent.tries;
     211        //alpha = 0.01;
     212        parent.q = parent.q + alpha * (0 + gamma * child.q - parent.q);
     213      }
     214      // reward is recieved only for the last action
     215      var n = updateChain.Last();
     216      n.tries++;
     217      alpha = 1.0 / n.tries;
     218      //alpha = 0.1;
     219      n.q = n.q + alpha * reward;
    195220    }
    196221
Note: See TracChangeset for help on using the changeset viewer.