Context Navigation

← Previous Changeset
Next Changeset →

Changeset 11799

Timestamp:

01/19/15 20:09:12 (10 years ago)

Author:

gkronber

Message:

#2283: performance tuning and reactivated random-roll-out policy in sequential search

Location:

branches/HeuristicLab.Problems.GrammaticalOptimization

Files:

: 1 added
: 16 edited

HeuristicLab.Algorithms.Bandits/BanditPolicies/BoltzmannExplorationPolicy.cs (modified) (1 diff)
HeuristicLab.Algorithms.Bandits/BanditPolicies/GenericThompsonSamplingPolicy.cs (modified) (1 diff)
HeuristicLab.Algorithms.Bandits/Bandits/GaussianMixtureBandit.cs (modified) (1 diff)
HeuristicLab.Algorithms.Bandits/GrammarPolicies/GenericGrammarPolicy.cs (modified) (6 diffs)
HeuristicLab.Algorithms.Bandits/GrammarPolicies/TDPolicy.cs (modified) (1 diff)
HeuristicLab.Algorithms.Bandits/Models/GaussianMixtureModel.cs (modified) (1 diff)
HeuristicLab.Algorithms.GrammaticalOptimization/SequentialSearch.cs (modified) (4 diffs)
HeuristicLab.Common/Extensions.cs (modified) (2 diffs)
HeuristicLab.Common/HeuristicLab.Common.csproj (modified) (1 diff)
HeuristicLab.Common/MostRecentlyUsedCache.cs (added)
HeuristicLab.Problems.GrammaticalOptimization/FindPhrasesProblem.cs (modified) (1 diff)
HeuristicLab.Problems.GrammaticalOptimization/Grammar.cs (modified) (1 diff)
HeuristicLab.Problems.GrammaticalOptimization/RoyalPhraseSequenceProblem.cs (modified) (1 diff)
HeuristicLab.Problems.GrammaticalOptimization/RoyalSequenceProblem.cs (modified) (4 diffs)
HeuristicLab.Problems.GrammaticalOptimization/SantaFeAntProblem.cs (modified) (2 diffs)
HeuristicLab.Problems.GrammaticalOptimization/SymbolicRegressionPoly10Problem.cs (modified) (2 diffs)
Main/Program.cs (modified) (10 diffs)

Legend:

: Unmodified
: Added
: Removed

TabularUnified branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/BoltzmannExplorationPolicy.cs ¶

-                      r11747
+                      r11799
                 : Math.Exp(beta * valueFunction(aInfo));
+      var bestAction = myActionInfos
+        .Select((aInfo, idx) => new { aInfo, idx })
+        .SampleProportional(random, w)
+        .Select(p => p.idx)
+        .First();
+      var bestAction = Enumerable.Range(0, myActionInfos.Count()).SampleProportional(random, w);
       Debug.Assert(bestAction >= 0);
       return bestAction;

TabularUnified branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/GenericThompsonSamplingPolicy.cs ¶

r11742	r11799
39	39
40	40	public override string ToString() {
41		return string.Format("GenericThompsonSamplingPolicy(~~\"{0}\"~~)", model);
	41	return string.Format("GenericThompsonSamplingPolicy({0})", model);
42	42	}
43	43	}

TabularUnified branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Bandits/GaussianMixtureBandit.cs ¶

r11731	r11799
44	44	double x = 0;
45	45	do {
46		var k = Enumerable.Range(0, componentProb[arm].Length).SampleProportional(random, componentProb[arm])~~.First()~~;
	46	var k = Enumerable.Range(0, componentProb[arm].Length).SampleProportional(random, componentProb[arm]);
47	47
48	48	var z = Rand.RandNormal(random);

TabularUnified branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/GrammarPolicies/GenericGrammarPolicy.cs ¶

-                      r11793
+                      r11799
     private readonly IProblem problem;
     private readonly IBanditPolicy banditPolicy;
-    //private readonly HashSet<string> done;
     public GenericGrammarPolicy(IProblem problem, IBanditPolicy banditPolicy, bool useCanonicalState = false) {
 …
       this.banditPolicy = banditPolicy;
       this.stateInfo = new Dictionary<string, IBanditPolicyActionInfo>();
-      //this.done = new HashSet<string>();
+    }
 …
         // fail because all follow states have already been visited => also disable the current state (if we can be sure that it has been fully explored)
         GetStateInfo(curState).Disable(0.0); // should the value be max of afterstate values instead of 0.0?
+        GetStateInfo(curState).Disable(afterStates.Select(afterState => GetStateInfo(afterState).Value).Max());
         selectedStateIdx = -1;
         return false;
 …
     public virtual void UpdateReward(IEnumerable<string> stateTrajectory, double reward) {
+      // the last state could be terminal
+      var lastState = stateTrajectory.Last();
+      if (problem.Grammar.IsTerminal(lastState)) {
+        GetStateInfo(lastState).Disable(reward);
+      }
+      foreach (var state in stateTrajectory) {
+        GetStateInfo(state).UpdateReward(reward);
+      // update remaining states
+      foreach (var state in stateTrajectory.Reverse().Skip(1)) {
+        GetStateInfo(state).UpdateReward(reward);
+        // only the last state can be terminal
+        if (problem.Grammar.IsTerminal(state)) {
+          GetStateInfo(state).Disable(reward);
+        }
+      }
+    }
 …
     public virtual void Reset() {
       stateInfo.Clear();
-      //done.Clear();
+    }
 …
     protected string CanonicalState(string state) {
       if (useCanonicalState) {
+        if (problem.Grammar.IsTerminal(state))
+          return problem.CanonicalRepresentation(state);
+        else {
+          // for non-terminal phrases make sure we don't disable canonical states that have not yet been fully explored
+          // e.g. if for the ant problem we have the phrase lllS (and we are limited to 4 symbols) and lllr as well as llll are explored
+          // then we are not allowed to disable rS (canonical of lllS) because rS might not have been fully explored
+          // solution: we disable the state rS4
+          return problem.CanonicalRepresentation(state) + state.Length;
+        }
+        return problem.CanonicalRepresentation(state);
       } else
         return state;

TabularUnified branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/GrammarPolicies/TDPolicy.cs ¶

r11793	r11799
35	35	return false;
36	36	}
37		throw new NotImplementedException(); // TODO: remap indices of reduced action enumerable to indices of original enumerable
	37	throw new NotImplementedException(); // TODO: remap indices of reduced action enumerable to indices of original enumerable (see genericgrammarpolicy)
38	38
39	39	//return epsGreedy.TrySelect(random, curState, afterStates, out selectedState);

TabularUnified branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Models/GaussianMixtureModel.cs ¶

r11747	r11799
24	24
25	25	public double SampleExpectedReward(Random random) {
26		var k = Enumerable.Range(0, numComponents).SampleProportional(random, componentProbs)~~.First()~~;
	26	var k = Enumerable.Range(0, numComponents).SampleProportional(random, componentProbs);
27	27	return alglib.invnormaldistribution(random.NextDouble()) * Math.Sqrt(componentVars[k]) + componentMeans[k];
28	28	}

TabularUnified branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/SequentialSearch.cs ¶

-                      r11793
+                      r11799
       Reset();
       for (int i = 0; bestQuality < 1.0 && !Done() && i < maxIterations; i++) {
+      for (int i = 0; /*!bestQuality.IsAlmost(1.0) && */!Done() && i < maxIterations; i++) {
         var phrase = SampleSentence(problem.Grammar);
         // can fail on the last sentence
 …
         stateChain.Clear();
         phrase = new Sequence(rootNode.phrase);
-        //var startPhrase = new Sequence("a*b+c*d+e*f+E");
       } while (!Done() && !TryCompleteSentence(grammar, ref phrase));
       return phrase;
 …
       while (!phrase.IsTerminal) {
+        //if (n.randomTries < randomTries) {
+        //  n.randomTries++;
+        //  curDepth = Math.Max(curDepth, curDepth);
+        //  g.CompleteSentenceRandomly(random, phrase, maxLen);
+        //  return true;
+        //} else {
+        // => select using bandit policy
+        // failure means we simply restart
+        GenerateFollowStates(n); // creates child nodes for node n
+        int selectedChildIdx;
+        if (!behaviourPolicy.TrySelect(random, n.phrase, n.children.Select(ch => ch.phrase), out selectedChildIdx)) {
+          return false;
+        }
+        phrase.ReplaceAt(phrase.FirstNonTerminalIndex, 1, n.children[selectedChildIdx].alternative);
+        // prepare for next iteration
+        n = n.children[selectedChildIdx];
+        stateChain.Add(n.phrase);
+        curDepth++;
+        //}
+        if (n.randomTries < randomTries) {
+          n.randomTries++;
+          maxSearchDepth = Math.Max(maxSearchDepth, curDepth);
+          g.CompleteSentenceRandomly(random, phrase, maxLen);
+          return true;
+        } else {
+          // => select using bandit policy
+          // failure means we simply restart
+          GenerateFollowStates(n); // creates child nodes for node n
+          int selectedChildIdx;
+          if (!behaviourPolicy.TrySelect(random, n.phrase, n.children.Select(ch => ch.phrase), out selectedChildIdx)) {
+            return false;
+          }
+          phrase.ReplaceAt(phrase.FirstNonTerminalIndex, 1, n.children[selectedChildIdx].alternative);
+          // prepare for next iteration
+          n = n.children[selectedChildIdx];
+          stateChain.Add(n.phrase);
+          curDepth++;
+        }
       } // while
 …
         int idx = 0;
         foreach (var alt in alts) {
+          var newPhrase = new Sequence(phrase); // clone
+          newPhrase.ReplaceAt(newPhrase.FirstNonTerminalIndex, 1, alt);
+          children[idx++] = new TreeNode(newPhrase.ToString(), alt);
+          // var newPhrase = new Sequence(phrase); // clone
+          // newPhrase.ReplaceAt(newPhrase.FirstNonTerminalIndex, 1, alt);
+          // children[idx++] = new TreeNode(newPhrase.ToString(), alt);
+          // since we are not using a sequence later on we might directly transform the current sequence to a string and replace there
+          var phraseStr = phrase.ToString();
+          var sb = new StringBuilder(phraseStr);
+          sb.Remove(phrase.FirstNonTerminalIndex, 1).Insert(phrase.FirstNonTerminalIndex, alt.ToString());
+          children[idx++] = new TreeNode(sb.ToString(), alt);
+        }
         n.children = children;

TabularUnified branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Common/Extensions.cs ¶

-                      r11742
+                      r11799
     public static T SelectRandom<T>(this IEnumerable<T> xs, Random rand) {
       var xsArr = xs.ToArray();
       return xsArr[rand.Next(xsArr.Length)];
+      var n = xs.Count();
+      return xs.ElementAt(rand.Next(n));
+    }
+    public static IEnumerable<T> SampleProportional<T>(this IEnumerable<T> source, Random random, IEnumerable<double> weights) {
+      var sourceArray = source.ToArray();
+      var valueArray = weights.ToArray();
+      double total = valueArray.Sum();
+    public static T SampleProportional<T>(this IEnumerable<T> elements, Random random, IEnumerable<double> weights) {
+      double total = weights.Sum();
+      while (true) {
+        int index = 0;
+        double ball = valueArray[index], sum = random.NextDouble() * total;
+        while (ball < sum)
+          ball += valueArray[++index];
+        yield return sourceArray[index];
+      var elemEnumerator = elements.GetEnumerator();
+      elemEnumerator.MoveNext();
+      var weightEnumerator = weights.GetEnumerator();
+      weightEnumerator.MoveNext();
+      var r = random.NextDouble() * total;
+      var agg = weightEnumerator.Current;
+      while (agg < r) {
+        weightEnumerator.MoveNext();
+        elemEnumerator.MoveNext();
+        agg += weightEnumerator.Current;
+      }
+      return elemEnumerator.Current;
+    }
 …
         var y = yEnum.Current;
         s += (x - meanX) * (y - meanY);
         ssX += Math.Pow(x - meanX, 2);
         ssY += Math.Pow(y - meanY, 2);
+        ssX += (x - meanX) * (x - meanX);
+        ssY += (y - meanY) * (y - meanY);
+      }
       if (xEnum.MoveNext() | yEnum.MoveNext()) throw new ArgumentException("lengths are not equal");

TabularUnified branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Common/HeuristicLab.Common.csproj ¶

r11745	r11799
43	43	<Compile Include="ConsoleEx.cs" />
44	44	<Compile Include="Extensions.cs" />
	45	<Compile Include="MostRecentlyUsedCache.cs" />
45	46	<Compile Include="Properties\AssemblyInfo.cs" />
46	47	<Compile Include="Rand.cs" />

TabularUnified branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/FindPhrasesProblem.cs ¶

r11793	r11799
123	123	}
124	124
	125	// TODO: cache canonical phrases in most-recently used dictionary for increased performance (see symbolicregressionpoly10problem)
125	126	private string CanonicalPhrase(string phrase) {
126	127	if (phrasesAsSets) return string.Join("", phrase.OrderBy(ch => (byte)ch));

TabularUnified branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/Grammar.cs ¶

-                      r11793
+                      r11799
     public bool IsTerminal(string phrase) {
       // reverse because for our grammars and left-canonical derivation it is more likely that NTs occur near the end of the sequence
+      return phrase.Reverse().All(IsTerminal);
+      for (int i = phrase.Length - 1; i >= 0; i--) {
+        if (!IsTerminal(phrase[i])) return false;
+      }
+      return true;
+    }

TabularUnified branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/RoyalPhraseSequenceProblem.cs ¶

r11770	r11799
107	107	}
108	108
	109	// TODO: cache canonical phrases in most-recently used dictionary for increased performance (see symbolicregressionpoly10problem)
109	110	private string CanonicalPhrase(string phrase) {
110	111	if (phrasesAsSets) return string.Join("", phrase.OrderBy(ch => (byte)ch));

TabularUnified branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/RoyalSequenceProblem.cs ¶

-                      r11792
+                      r11799
       this.correctReward = correctReward;
       this.incorrectReward = incorrectReward;
       var sentenceSymbol = 'S';
+      const char sentenceSymbol = 'S';
       var terminalSymbols = Enumerable.Range(0, alphabetSize).Select(off => (char)((byte)'a' + off)).ToArray();
       var nonTerminalSymbols = new char[] { 'S' };
       var rules = terminalSymbols.Select(t => Tuple.Create('S', t.ToString()))
         .Concat(terminalSymbols.Select(t => Tuple.Create('S', t + "S")));
+      var nonTerminalSymbols = new char[] { sentenceSymbol };
+      var rules = terminalSymbols.Select(t => Tuple.Create(sentenceSymbol, t.ToString()))
+        .Concat(terminalSymbols.Select(t => Tuple.Create(sentenceSymbol, t + sentenceSymbol.ToString())));
       //var rules = terminalSymbols.Select(t => Tuple.Create('S', t + "S"))
       //  .Concat(terminalSymbols.Select(t => Tuple.Create('S', t.ToString())));
 …
       // sentence must contain only terminal symbols, we are not checking if the sentence is syntactically valid here because it would be too slow!
       Debug.Assert(sentence.Any(c => grammar.IsTerminal(c)));
-      // as long as only correct symbols are found we increase the reward by +1
-      // on the first incorrect symbol we return
       var reward = 0.0;
       for (int i = 0; i < Math.Min(sentence.Length, sequenceLen); i++) {
 …
           reward += correctReward;
         } else {
           // alternatively reduce reward by number of remaining symbols
+          //  reduce reward by number of remaining symbols
           return Math.Max(0.0, reward + incorrectReward * (sentence.Length - i));
-          // stop on first incorrect symbol and return reward
-          //return reward;
+        }
+      }
 …
+    }
+    // in each position there could be multiple correct and incorrect symbols
     public string CanonicalRepresentation(string terminalPhrase) {
+      throw new NotImplementedException();
+      return terminalPhrase;
+      var sb = new StringBuilder();
+      for (int i = 0; i < terminalPhrase.Length; i++) {
+        if (optimalSymbolsForPos[i].Contains(terminalPhrase[i])) {
+          sb.Append(optimalSymbolsForPos[i].First()); // all symbols in the set are equivalent
+        } else {
+          sb.Append(terminalPhrase[i]);
+        }
+      }
+      return sb.ToString();
+    }
+  }

TabularUnified branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/SantaFeAntProblem.cs ¶

-                      r11793
+                      r11799
 using System.Collections.Generic;
 using System.Linq;
+using System.Runtime.InteropServices;
 using System.Runtime.Remoting.Messaging;
 using System.Text;
+using System.Text.RegularExpressions;
 namespace HeuristicLab.Problems.GrammaticalOptimization {
 …
     public string CanonicalRepresentation(string terminalPhrase) {
+      var sb = new StringBuilder(terminalPhrase);
+      string canonicalPhrase = terminalPhrase;
       string oldPhrase;
       do {
+        oldPhrase = terminalPhrase;
+        terminalPhrase = terminalPhrase.Replace("ll", "rr").Replace("rl", "lr").Replace("lr", "").Replace("lll", "r").Replace("rrr", "l");
+      } while (terminalPhrase != oldPhrase);
+      return terminalPhrase;
+        oldPhrase = canonicalPhrase;
+        sb.Replace("ll", "rr").Replace("rl", "lr").Replace("lr", "").Replace("lll", "r").Replace("rrr", "l");
+        canonicalPhrase = sb.ToString();
+      } while (canonicalPhrase != oldPhrase);
+      sb.Append(terminalPhrase.Length - canonicalPhrase.Length);
+      return sb.ToString();
+    }
+  }

TabularUnified branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/SymbolicRegressionPoly10Problem.cs ¶

-                      r11792
+                      r11799
 using System;
+using System.Collections.Concurrent;
 using System.Collections.Generic;
+using System.Collections.Specialized;
 using System.Linq;
 using System.Net;
 …
+    // most-recently-used caching (with limited capacity) for canonical representations
+    MostRecentlyUsedCache<string, string> canonicalPhraseCache = new MostRecentlyUsedCache<string, string>(100000);
     // right now only + and * is supported
-    //private Dictionary<string, string> cache = new Dictionary<string, string>();
     public string CanonicalRepresentation(string phrase) {
+      string res;
+      //if (!cache.TryGetValue(phrase, out res)) {
+      var terms = phrase.Split('+').Select(t => t.Replace("*", ""));
+      var terminalTerms = terms.Where(t => t.All(ch => grammar.IsTerminal(ch)));
+      var nonTerminalTerms = terms.Where(t => t.Any(ch => grammar.IsNonTerminal(ch)));
+      string canonicalPhrase;
+      if (!canonicalPhraseCache.TryGetValue(phrase, out canonicalPhrase)) {
+        var terms = phrase.Split('+');
+        var canonicalTerms = new SortedSet<string>();
+        // only the last term might contain a NT-symbol. make sure this term is added at the end
+        for (int i = 0; i < terms.Length - 1; i++) {
+          canonicalTerms.Add(CanonicalTerm(terms[i]));
+        }
+      res = string.Join("+", terminalTerms.Select(term => CanonicalTerm(term)).OrderBy(term => term).Concat(nonTerminalTerms.Select(term => CanonicalTerm(term))));
+      //cache[phrase] = res;
+      //}
+      return res;
+        var sb = new StringBuilder(phrase.Length);
+        foreach (var t in canonicalTerms)
+          sb.Append(t).Append('+');
+        sb.Append(CanonicalTerm(terms[terms.Length - 1]));
+        sb.Append(phrase.Length - sb.Length);
+        canonicalPhrase = sb.ToString();
+        canonicalPhraseCache.Add(phrase, canonicalPhrase);
+      }
+      return canonicalPhrase;
+    }
+    // cache the canonical form of terms for performance reasons
+    private Dictionary<string, string> canonicalTermDictionary = new Dictionary<string, string>();
     private string CanonicalTerm(string term) {
+      return string.Join("", term.OrderByDescending(ch => (byte)ch)); // we want to have the up-case characters last
+      string canonicalTerm;
+      if (!canonicalTermDictionary.TryGetValue(term, out canonicalTerm)) {
+        // add
+        var chars = term.ToCharArray();
+        Array.Sort(chars);
+        var sb = new StringBuilder(chars.Length);
+        // we want to have the up-case characters last
+        for (int i = chars.Length - 1; i >= 0; i--) {
+          if (chars[i] != '*') sb.Append(chars[i]);
+        }
+        canonicalTerm = sb.ToString();
+        canonicalTermDictionary.Add(term, canonicalTerm);
+      }
+      return canonicalTerm;
+    }
+  }

TabularUnified branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Program.cs ¶

-                      r11795
+                      r11799
       CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
       //RunDemo();
       RunGridTest();
+      RunDemo();
+      //RunGridTest();
+    }
 …
       //var globalRandom = new Random(31415);
       var localRandSeed = 31415;
       var reps = 5;
       var policies = new Func<IBanditPolicy>[]
+      var reps = 10;
+      var policyFactories = new Func<IBanditPolicy>[]
+        {
          () => new RandomPolicy(),
 …
          () => new ChernoffIntervalEstimationPolicy( 0.1),
          () => new ChernoffIntervalEstimationPolicy( 0.2),
+         () => new ThresholdAscentPolicy(5, 0.01),
+         () => new ThresholdAscentPolicy(5, 0.05),
+         () => new ThresholdAscentPolicy(5, 0.1),
+         () => new ThresholdAscentPolicy(5, 0.2),
          () => new ThresholdAscentPolicy(10, 0.01),
          () => new ThresholdAscentPolicy(10, 0.05),
          () => new ThresholdAscentPolicy(10, 0.1),
          () => new ThresholdAscentPolicy(10, 0.2),
+         () => new ThresholdAscentPolicy(50, 0.01),
+         () => new ThresholdAscentPolicy(50, 0.05),
+         () => new ThresholdAscentPolicy(50, 0.1),
+         () => new ThresholdAscentPolicy(50, 0.2),
          () => new ThresholdAscentPolicy(100, 0.01),
          () => new ThresholdAscentPolicy(100, 0.05),
          () => new ThresholdAscentPolicy(100, 0.1),
          () => new ThresholdAscentPolicy(100, 0.2),
+         () => new ThresholdAscentPolicy(100, 0.01),
+         () => new ThresholdAscentPolicy(100, 0.05),
+         () => new ThresholdAscentPolicy(100, 0.1),
+         () => new ThresholdAscentPolicy(100, 0.2),
+         //() => new ThresholdAscentPolicy(1000, 0.01),
+         //() => new ThresholdAscentPolicy(1000, 0.05),
+         //() => new ThresholdAscentPolicy(1000, 0.1),
+         //() => new ThresholdAscentPolicy(1000, 0.2),
+         () => new ThresholdAscentPolicy(500, 0.01),
+         () => new ThresholdAscentPolicy(500, 0.05),
+         () => new ThresholdAscentPolicy(500, 0.1),
+         () => new ThresholdAscentPolicy(500, 0.2),
          //() => new ThresholdAscentPolicy(5000, 0.01),
          //() => new ThresholdAscentPolicy(10000, 0.01),
         };
+      foreach (var problem in new Tuple<IProblem, int>[]
+        {
+          Tuple.Create((IProblem)new SantaFeAntProblem(), 17),
+          Tuple.Create((IProblem)new SymbolicRegressionPoly10Problem(), 23),
+        })
+        foreach (var useCanonical in new bool[] { true, false })
+          foreach (var randomTries in new int[] { 0, /*1, 10, /* 5, 100 /*, 500, 1000 */}) {
+            foreach (var policy in policies) {
+      var instanceFactories = new Func<Random, Tuple<IProblem, int>>[]
+      {
+        (rand) => Tuple.Create((IProblem)new SantaFeAntProblem(), 17),
+        (rand) => Tuple.Create((IProblem)new FindPhrasesProblem(rand, 10, numPhrases:5, phraseLen:3, numOptimalPhrases:5, numDecoyPhrases:0, correctReward:1, decoyReward:0, phrasesAsSets:false ), 15),
+        (rand) => Tuple.Create((IProblem)new FindPhrasesProblem(rand, 10, numPhrases:5, phraseLen:3, numOptimalPhrases:5, numDecoyPhrases:0, correctReward:1, decoyReward:0, phrasesAsSets:true ), 15),
+        (rand) => Tuple.Create((IProblem)new FindPhrasesProblem(rand, 10, numPhrases:5, phraseLen:3, numOptimalPhrases:5, numDecoyPhrases:200, correctReward:1, decoyReward:0.5, phrasesAsSets:false), 15),
+        (rand) => Tuple.Create((IProblem)new FindPhrasesProblem(rand, 10, numPhrases:5, phraseLen:3, numOptimalPhrases:5, numDecoyPhrases:200, correctReward:1, decoyReward:0.5, phrasesAsSets:true), 15),
+        (rand) => Tuple.Create((IProblem)new SymbolicRegressionPoly10Problem(), 23)
+      };
+      foreach (var instanceFactory in instanceFactories) {
+        foreach (var useCanonical in new bool[] { true, false }) {
+          foreach (var randomTries in new int[] { 0, 1, 10, /* 5, 100 /*, 500, 1000 */}) {
+            foreach (var policyFactory in policyFactories) {
               var myRandomTries = randomTries;
               var localRand = new Random(localRandSeed);
               var options = new ParallelOptions();
               options.MaxDegreeOfParallelism = 1;
+              options.MaxDegreeOfParallelism = 4;
               Parallel.For(0, reps, options, (i) => {
-                //var t = Task.Run(() => {
                 Random myLocalRand;
                 lock (localRand)
                   myLocalRand = new Random(localRand.Next());
-                //for (int i = 0; i < reps; i++) {
                 int iterations = 0;
 …
                 //var problem = new RoyalPairProblem();
                 //var problem = new EvenParityProblem();
+                // var alg = new MctsSampler(problem.Item1, problem.Item2, myLocalRand, myRandomTries, policy()); // TODO: Make sure we generate the same random numbers for each
+                var alg = new SequentialSearch(problem.Item1, problem.Item2, myLocalRand, myRandomTries, new GenericGrammarPolicy(problem.Item1, policy(), useCanonical));
+                // var alg = new MctsSampler(problem.Item1, problem.Item2, myLocalRand, myRandomTries, policy());
+                var instance = instanceFactory(myLocalRand);
+                var problem = instance.Item1;
+                var maxLen = instance.Item2;
+                var alg = new SequentialSearch(problem, maxLen, myLocalRand, myRandomTries,
+                  new GenericGrammarPolicy(problem, policyFactory(), useCanonical));
                 //var alg = new ExhaustiveBreadthFirstSearch(problem, 25);
                 //var alg = new AlternativesContextSampler(problem, 25);
 …
                   iterations++;
                   globalStatistics.AddSentence(sentence, quality);
                   if (iterations % 1000 == 0) {
                     Console.WriteLine("{0,5} {1,25} {2} {3}", myRandomTries, policy(), useCanonical, globalStatistics);
+                  if (iterations % 10000 == 0) {
+                    Console.WriteLine("{0,3} {1,5} \"{2,25}\" {3} {4}", i, myRandomTries, policyFactory(), useCanonical, globalStatistics);
+                  }
                 };
                 alg.FoundNewBestSolution += (sentence, quality) => {
+                  Console.WriteLine("{0,5} {1,25} {2} {3}", myRandomTries, policy(), useCanonical, globalStatistics);
+                  //Console.WriteLine("{0,5} {1,25} {2} {3}",
+                  //  myRandomTries, policyFactory(), useCanonical,
+                  //  globalStatistics);
                 };
                 alg.Run(maxIterations);
-                //Console.WriteLine("{0,5} {1} {2}", randomTries, policyFactory(1), globalStatistics);
-                //}
-                //});
-                //tasks.Add(t);
               });
+            }
+          }
+      //Task.WaitAll(tasks.ToArray());
+        }
+      }
+    }
     private static void RunDemo() {
-      // TODO: clone problem for parallel grid test
       // TODO: move problem instances into a separate folder
-      // TODO: improve performance of SequentialSearch (memory allocations related to sequences)
       // TODO: implement bridge to HL-GP
       // TODO: unify MCTS, TD and ContextMCTS Solvers (stateInfos)
 …
       var random = new Random();
+      var problem = new RoyalSequenceProblem(random, 10, 30, 2, 1, 0);
       //var phraseLen = 3;
       //var numPhrases = 5;
       //var problem = new RoyalPhraseSequenceProblem(random, 15, numPhrases, phraseLen: phraseLen, numCorrectPhrases: 1, correctReward: 1, incorrectReward: 0.0, phrasesAsSets: true);
       // var phraseLen = 2;
+      // var phraseLen = 3;
       // var numPhrases = 5;
       // var problem = new FindPhrasesProblem(random, 15, numPhrases, phraseLen, numOptimalPhrases: numPhrases, numDecoyPhrases: 0, correctReward: 1.0, decoyReward: 0.0, phrasesAsSets: true);
+      // var problem = new FindPhrasesProblem(random, 10, numPhrases, phraseLen, numOptimalPhrases: numPhrases, numDecoyPhrases: 200, correctReward: 1.0, decoyReward: 0.5, phrasesAsSets: true);
       // good results for symb-reg
 …
       // - GenericThompsonSamplingPolicy("")
       // - UCTPolicy(0.10) (5 of 5 runs, 35000 iters avg.)
       // good results for artificial ant:
       // prev results:
 …
       // - GaussianModelWithUnknownVariance (and Q= 0.99-quantil) also works well for Ant
       // 2015 01 19: grid test with canonical states (non-canonical slightly worse)
+      // - Threshold Ascent (best 100, 0.01; all variants relatively good
+      //var problem = new SymbolicRegressionPoly10Problem();
+      var problem = new SantaFeAntProblem();
+      // - Threshold Ascent (best 100, 0.01; all variants relatively good)
+      // - Policies where the variance has a large weight compared to the mean? (Gaussian(compatible), Gaussian with fixed variance, UCT with large c, alle TA)
+      //var problem = new SymbolicRegressionPoly10Problem();
+      //var problem = new SantaFeAntProblem();
       //var problem = new SymbolicRegressionProblem("Tower");
       //var problem = new PalindromeProblem();
 …
       //var alg = new MctsSampler(problem, 23, random, 0, new BoltzmannExplorationPolicy(100));
       //var alg = new MctsSampler(problem, 23, random, 0, new EpsGreedyPolicy(0.1));
       var alg = new SequentialSearch(problem, 17, random, 0,
         new HeuristicLab.Algorithms.Bandits.GrammarPolicies.GenericGrammarPolicy(problem, new BoltzmannExplorationPolicy(10), true));
+      var alg = new SequentialSearch(problem, 30, random, 0,
+        new HeuristicLab.Algorithms.Bandits.GrammarPolicies.GenericGrammarPolicy(problem, new EpsGreedyPolicy(0.1), true));
       //var alg = new MctsQLearningSampler(problem, sentenceLen, random, 0, null);
       //var alg = new MctsQLearningSampler(problem, 30, random, 0, new EpsGreedyPolicy(0.2));
 …
         iterations++;
         globalStatistics.AddSentence(sentence, quality);
         if (iterations % 100 == 0) {
+        if (iterations % 1000 == 0) {
           if (iterations % 1000 == 0) Console.Clear();
           Console.SetCursorPosition(0, 0);

Note: See TracChangeset for help on using the changeset viewer.