Changeset 11730 for branches/HeuristicLab.Problems.GrammaticalOptimization

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Bandits/BernoulliBandit.cs

-                      r11711
+                      r11730
 namespace HeuristicLab.Algorithms.Bandits {
   public class BernoulliBandit {
+  public class BernoulliBandit : IBandit {
     public int NumArms { get; private set; }
     public double OptimalExpectedReward { get; private set; } // reward of the best arm, for calculating regret
+    public int OptimalExpectedRewardArm { get; private set; }
+    // the arm with highest expected reward also has the highest probability of return a reward of 1.0
+    public int OptimalMaximalRewardArm { get { return OptimalExpectedRewardArm; } }
     private readonly Random random;
     private readonly double[] expReward;
 …
       for (int i = 0; i < nArms; i++) {
         expReward[i] = random.NextDouble();
+        if (expReward[i] > OptimalExpectedReward) OptimalExpectedReward = expReward[i];
+        if (expReward[i] > OptimalExpectedReward) {
+          OptimalExpectedReward = expReward[i];
+          OptimalExpectedRewardArm = i;
+        }
+      }
+    }

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Bandits/TruncatedNormalBandit.cs

-                      r11711
+                      r11730
 using System.Text;
 using System.Threading.Tasks;
+using HeuristicLab.Common;
 namespace HeuristicLab.Algorithms.Bandits {
   public class TruncatedNormalBandit {
+  public class TruncatedNormalBandit : IBandit {
     public int NumArms { get; private set; }
     public double OptimalExpectedReward { get; private set; } // reward of the best arm, for calculating regret
+    public int OptimalExpectedRewardArm { get; private set; }
+    // the arm with highest expected reward also has the highest probability of return a reward of 1.0
+    public int OptimalMaximalRewardArm { get { return OptimalExpectedRewardArm; } }
     private readonly Random random;
     private readonly double[] expReward;
 …
       OptimalExpectedReward = double.NegativeInfinity;
       for (int i = 0; i < nArms; i++) {
+        expReward[i] = random.NextDouble();
+        if (expReward[i] > OptimalExpectedReward) OptimalExpectedReward = expReward[i];
+        expReward[i] = random.NextDouble() * 0.7;
+        if (expReward[i] > OptimalExpectedReward) {
+          OptimalExpectedReward = expReward[i];
+          OptimalExpectedRewardArm = i;
+        }
+      }
+    }
 …
       double x = 0;
       do {
         var z = Transform(random.NextDouble(), random.NextDouble());
+        var z = Rand.RandNormal(random);
         x = z * 0.1 + expReward[arm];
+      }
 …
       return x;
+    }
-    // box muller transform
-    private double Transform(double u1, double u2) {
-      return Math.Sqrt(-2 * Math.Log(u1)) * Math.Cos(2 * Math.PI * u2);
+    }
+  }
+}

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/HeuristicLab.Algorithms.Bandits.csproj

-                      r11727
+                      r11730
   </ItemGroup>
   <ItemGroup>
+    <Compile Include="BanditHelper.cs" />
     <Compile Include="Bandits\BernoulliBandit.cs" />
+    <Compile Include="Bandits\GaussianMixtureBandit.cs" />
+    <Compile Include="Bandits\IBandit.cs" />
     <Compile Include="Bandits\TruncatedNormalBandit.cs" />
+    <Compile Include="Models\BernoulliModel.cs" />
+    <Compile Include="Models\GaussianModel.cs" />
+    <Compile Include="Models\GaussianMixtureModel.cs" />
+    <Compile Include="Models\IModel.cs" />
     <Compile Include="Policies\BanditPolicy.cs" />
     <Compile Include="Policies\BernoulliThompsonSamplingPolicy.cs" />
+    <Compile Include="Policies\BoltzmannExplorationPolicy.cs" />
+    <Compile Include="Policies\ChernoffIntervalEstimationPolicy.cs" />
+    <Compile Include="Policies\GenericThompsonSamplingPolicy.cs" />
+    <Compile Include="Policies\ThresholdAscentPolicy.cs" />
+    <Compile Include="Policies\UCTPolicy.cs" />
     <Compile Include="Policies\GaussianThompsonSamplingPolicy.cs" />
     <Compile Include="Policies\Exp3Policy.cs" />

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/IPolicy.cs

r11727	r11730
20	20	// reset causes the policy to be reinitialized to it's initial state (as after constructor-call)
21	21	void Reset();
	22
	23	void PrintStats();
22	24	}
23	25	}

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/BanditPolicy.cs

r11727	r11730
28	28	Actions = Enumerable.Range(0, numInitialActions).ToArray();
29	29	}
	30
	31	public abstract void PrintStats();
30	32	}
31	33	}

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/BernoulliThompsonSamplingPolicy.cs

-                      r11727
+                      r11730
       Array.Clear(failure, 0, failure.Length);
+    }
+    public override void PrintStats() {
+      for (int i = 0; i < success.Length; i++) {
+        if (success[i] >= 0) {
+          Console.Write("{0,5:F2}", success[i] / failure[i]);
+        } else {
+          Console.Write("{0,5}", "");
+        }
+      }
+      Console.WriteLine();
+    }
+    public override string ToString() {
+      return "BernoulliThompsonSamplingPolicy";
+    }
+  }
+}

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/EpsGreedyPolicy.cs

-                      r11727
+                      r11730
       if (random.NextDouble() > eps) {
         // select best
         var maxReward = double.NegativeInfinity;
+        var bestQ = double.NegativeInfinity;
         int bestAction = -1;
         foreach (var a in Actions) {
           if (tries[a] == 0) return a;
           var avgReward = sumReward[a] / tries[a];
           if (maxReward < avgReward) {
             maxReward = avgReward;
+          var q = sumReward[a] / tries[a];
+          if (bestQ < q) {
+            bestQ = q;
             bestAction = a;
+          }
 …
       Array.Clear(sumReward, 0, sumReward.Length);
+    }
+    public override void PrintStats() {
+      for (int i = 0; i < sumReward.Length; i++) {
+        if (tries[i] >= 0) {
+          Console.Write(" {0,5:F2} {1}", sumReward[i] / tries[i], tries[i]);
+        } else {
+          Console.Write("-", "");
+        }
+      }
+      Console.WriteLine();
+    }
+    public override string ToString() {
+      return string.Format("EpsGreedyPolicy({0:F2})", eps);
+    }
+  }
+}

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/Exp3Policy.cs

-                      r11727
+                      r11730
       foreach (var a in Actions) w[a] = 1.0;
+    }
+    public override void PrintStats() {
+      for (int i = 0; i < w.Length; i++) {
+        if (w[i] > 0) {
+          Console.Write("{0,5:F2}", w[i]);
+        } else {
+          Console.Write("{0,5}", "");
+        }
+      }
+      Console.WriteLine();
+    }
+    public override string ToString() {
+      return "Exp3Policy";
+    }
+  }
+}

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/GaussianThompsonSamplingPolicy.cs

-                      r11727
+                      r11730
 namespace HeuristicLab.Algorithms.Bandits {
   public class GaussianThompsonSamplingPolicy : BanditPolicy {
     private readonly Random random;
     private readonly double[] sumRewards;
     private readonly double[] sumSqrRewards;
+    private readonly double[] sampleMean;
+    private readonly double[] sampleM2;
     private readonly int[] tries;
+    public GaussianThompsonSamplingPolicy(Random random, int numActions)
+    private bool compatibility;
+    // assumes a Gaussian reward distribution with different means but the same variances for each action
+    // the prior for the mean is also Gaussian with the following parameters
+    private readonly double rewardVariance = 0.1; // we assume a known variance
+    private readonly double priorMean = 0.5;
+    private readonly double priorVariance = 1;
+    public GaussianThompsonSamplingPolicy(Random random, int numActions, bool compatibility = false)
       : base(numActions) {
       this.random = random;
       this.sumRewards = new double[numActions];
       this.sumSqrRewards = new double[numActions];
+      this.sampleMean = new double[numActions];
+      this.sampleM2 = new double[numActions];
       this.tries = new int[numActions];
+      this.compatibility = compatibility;
+    }
 …
       int bestAction = -1;
       foreach (var a in Actions) {
+        if (tries[a] == 0) return a;
+        var mu = sumRewards[a] / tries[a];
+        var stdDev = Math.Sqrt(sumSqrRewards[a] / tries[a] - Math.Pow(mu, 2));
+        var theta = Rand.RandNormal(random) * stdDev + mu;
+        if(tries[a] == -1) continue; // skip disabled actions
+        double theta;
+        if (compatibility) {
+          if (tries[a] < 2) return a;
+          var mu = sampleMean[a];
+          var variance = sampleM2[a] / tries[a];
+          var stdDev = Math.Sqrt(variance);
+          theta = Rand.RandNormal(random) * stdDev + mu;
+        } else {
+          // calculate posterior mean and variance (for mean reward)
+          // see Murphy 2007: Conjugate Bayesian analysis of the Gaussian distribution (http://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf)
+          var posteriorVariance = 1.0 / (tries[a] / rewardVariance + 1.0 / priorVariance);
+          var posteriorMean = posteriorVariance * (priorMean / priorVariance + tries[a] * sampleMean[a] / rewardVariance);
+          // sample a mean from the posterior
+          theta = Rand.RandNormal(random) * Math.Sqrt(posteriorVariance) + posteriorMean;
+          // theta already represents the expected reward value => nothing else to do
+        }
         if (theta > maxTheta) {
           maxTheta = theta;
 …
+        }
+      }
+      Debug.Assert(Actions.Contains(bestAction));
       return bestAction;
+    }
 …
     public override void UpdateReward(int action, double reward) {
       Debug.Assert(Actions.Contains(action));
-      sumRewards[action] += reward;
-      sumSqrRewards[action] += reward * reward;
       tries[action]++;
+      var delta = reward - sampleMean[action];
+      sampleMean[action] += delta / tries[action];
+      sampleM2[action] += sampleM2[action] + delta * (reward - sampleMean[action]);
+    }
     public override void DisableAction(int action) {
       base.DisableAction(action);
       sumRewards[action] = 0;
       sumSqrRewards[action] = 0;
+      sampleMean[action] = 0;
+      sampleM2[action] = 0;
       tries[action] = -1;
+    }
 …
     public override void Reset() {
       base.Reset();
       Array.Clear(sumRewards, 0, sumRewards.Length);
       Array.Clear(sumSqrRewards, 0, sumSqrRewards.Length);
+      Array.Clear(sampleMean, 0, sampleMean.Length);
+      Array.Clear(sampleM2, 0, sampleM2.Length);
       Array.Clear(tries, 0, tries.Length);
+    }
+    public override void PrintStats() {
+      for (int i = 0; i < sampleMean.Length; i++) {
+        if (tries[i] >= 0) {
+          Console.Write(" {0,5:F2} {1}", sampleMean[i] / tries[i], tries[i]);
+        } else {
+          Console.Write("{0,5}", "");
+        }
+      }
+      Console.WriteLine();
+    }
+    public override string ToString() {
+      return "GaussianThompsonSamplingPolicy";
+    }
+  }

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/RandomPolicy.cs

r11727	r11730
23	23	// do nothing
24	24	}
25
	25	public override void PrintStats() {
	26	Console.WriteLine("Random");
	27	}
	28	public override string ToString() {
	29	return "RandomPolicy";
	30	}
26	31	}
27	32	}

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/UCB1Policy.cs

-                      r11727
+                      r11730
       Array.Clear(sumReward, 0, sumReward.Length);
+    }
+    public override void PrintStats() {
+      for (int i = 0; i < sumReward.Length; i++) {
+        if (tries[i] >= 0) {
+          Console.Write("{0,5:F2}", sumReward[i] / tries[i]);
+        } else {
+          Console.Write("{0,5}", "");
+        }
+      }
+      Console.WriteLine();
+    }
+    public override string ToString() {
+      return "UCB1Policy";
+    }
+  }
+}

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/UCB1TunedPolicy.cs

-                      r11727
+                      r11730
       Array.Clear(sumSqrReward, 0, sumSqrReward.Length);
+    }
+    public override void PrintStats() {
+      for (int i = 0; i < sumReward.Length; i++) {
+        if (tries[i] >= 0) {
+          Console.Write("{0,5:F2}", sumReward[i] / tries[i]);
+        } else {
+          Console.Write("{0,5}", "");
+        }
+      }
+      Console.WriteLine();
+    }
+    public override string ToString() {
+      return "UCB1TunedPolicy";
+    }
+  }
+}

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/UCBNormalPolicy.cs

-                      r11727
+                      r11730
       double bestQ = double.NegativeInfinity;
       foreach (var a in Actions) {
         if (totalTries == 0 || tries[a] == 0 || tries[a] < Math.Ceiling(8 * Math.Log(totalTries))) return a;
+        if (totalTries <= 1 || tries[a] <= 1 || tries[a] <= Math.Ceiling(8 * Math.Log(totalTries))) return a;
         var avgReward = sumReward[a] / tries[a];
+        var estVariance = 16 * ((sumSqrReward[a] - tries[a] * Math.Pow(avgReward, 2)) / (tries[a] - 1)) * (Math.Log(totalTries - 1) / tries[a]);
+        if (estVariance < 0) estVariance = 0; // numerical problems
         var q = avgReward
           + Math.Sqrt(16 * ((sumSqrReward[a] - tries[a] * Math.Pow(avgReward, 2)) / (tries[a] - 1)) * (Math.Log(totalTries - 1) / tries[a]));
+          + Math.Sqrt(estVariance);
         if (q > bestQ) {
           bestQ = q;
 …
+        }
+      }
+      Debug.Assert(Actions.Contains(bestAction));
       return bestAction;
+    }
 …
       Array.Clear(sumSqrReward, 0, sumSqrReward.Length);
+    }
+    public override void PrintStats() {
+      for (int i = 0; i < sumReward.Length; i++) {
+        if (tries[i] >= 0) {
+          Console.Write("{0,5:F2}", sumReward[i] / tries[i]);
+        } else {
+          Console.Write("{0,5}", "");
+        }
+      }
+      Console.WriteLine();
+    }
+    public override string ToString() {
+      return "UCBNormalPolicy";
+    }
+  }
+}

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/AlternativesContextSampler.cs

-                      r11727
+                      r11730
     private readonly Random random;
     private readonly int contextLen;
+    private readonly Func<Random, int, IPolicy> policyFactory;
     public AlternativesContextSampler(IProblem problem, int maxLen) {
+    public AlternativesContextSampler(IProblem problem, Random random, int maxLen, int contextLen, Func<Random, int, IPolicy> policyFactory) {
       this.maxLen = maxLen;
       this.problem = problem;
+      this.random = new Random(31415);
+      this.contextLen = 25;
+      this.random = random;
+      this.contextLen = contextLen;
+      this.policyFactory = policyFactory;
+    }
 …
       InitPolicies(problem.Grammar);
       for (int i = 0; i < maxIterations; i++) {
         var sentence = SampleSentence(problem.Grammar);
         var quality = problem.Evaluate(sentence) / problem.GetBestKnownQuality(maxLen);
+        var sentence = SampleSentence(problem.Grammar).ToString();
+        var quality = problem.Evaluate(sentence) / problem.GetBestKnownQuality(maxLen);
         DistributeReward(quality);
 …
     private Dictionary<string, IPolicy> ntPolicy;
     private List<Tuple<string, int>> updateChain;
     private void InitPolicies(IGrammar grammar) {
       this.ntPolicy = new Dictionary<string, IPolicy>();
 …
+    }
     private string SampleSentence(IGrammar grammar) {
+    private Sequence SampleSentence(IGrammar grammar) {
       updateChain.Clear();
       return CompleteSentence(grammar, grammar.SentenceSymbol.ToString());
+      return CompleteSentence(grammar, new Sequence(grammar.SentenceSymbol));
+    }
     public string CompleteSentence(IGrammar g, string phrase) {
+    public Sequence CompleteSentence(IGrammar g, Sequence phrase) {
       if (phrase.Length > maxLen) throw new ArgumentException();
       if (g.MinPhraseLength(phrase) > maxLen) throw new ArgumentException();
       bool done = phrase.All(g.IsTerminal); // terminal phrase means we are done
+      bool done = phrase.IsTerminal; // terminal phrase means we are done
       while (!done) {
+        int ntIdx; char nt;
+        Grammar.FindFirstNonTerminal(g, phrase, out nt, out ntIdx);
+        char nt = phrase.FirstNonTerminal;
         int maxLenOfReplacement = maxLen - (phrase.Length - 1); // replacing aAb with maxLen 4 means we can only use alternatives with a minPhraseLen <= 2
 …
         var alts = g.GetAlternatives(nt);
         string selectedAlt;
+        Sequence selectedAlt;
         // if the choice is restricted then one of the allowed alternatives is selected randomly
         if (alts.Any(alt => g.MinPhraseLength(alt) > maxLenOfReplacement)) {
 …
         } else {
           // all alts are allowed => select using bandit policy
+          var ntIdx = phrase.FirstNonTerminalIndex;
           var startIdx = Math.Max(0, ntIdx - contextLen);
           var endIdx = Math.Min(startIdx + contextLen, ntIdx);
           var lft = phrase.Substring(startIdx, endIdx - startIdx + 1);
+          var lft = phrase.Subsequence(startIdx, endIdx - startIdx + 1).ToString();
           lft = problem.Hash(lft);
           if (!ntPolicy.ContainsKey(lft)) {
             ntPolicy.Add(lft, new UCB1TunedPolicy(g.GetAlternatives(nt).Count()));
+            ntPolicy.Add(lft, policyFactory(random, g.GetAlternatives(nt).Count()));
+          }
           var selectedAltIdx = ntPolicy[lft].SelectAction();
 …
         // replace nt with alt
+        phrase = phrase.Remove(ntIdx, 1);
+        phrase = phrase.Insert(ntIdx, selectedAlt);
+        phrase.ReplaceAt(phrase.FirstNonTerminalIndex, 1, selectedAlt);
         done = phrase.All(g.IsTerminal); // terminal phrase means we are done
+        done = phrase.IsTerminal; // terminal phrase means we are done
+      }
       return phrase;

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/AlternativesSampler.cs

-                      r11727
+                      r11730
       InitPolicies(problem.Grammar);
       for (int i = 0; i < maxIterations; i++) {
         var sentence = SampleSentence(problem.Grammar);
         var quality = problem.Evaluate(sentence) / problem.GetBestKnownQuality(maxLen);
+        var sentence = SampleSentence(problem.Grammar).ToString();
+        var quality = problem.Evaluate(sentence) / problem.GetBestKnownQuality(maxLen);
         DistributeReward(quality);
 …
+    }
     private string SampleSentence(IGrammar grammar) {
+    private Sequence SampleSentence(IGrammar grammar) {
       updateChain.Clear();
       return CompleteSentence(grammar, grammar.SentenceSymbol.ToString());
+      return CompleteSentence(grammar, new Sequence(grammar.SentenceSymbol));
+    }
     public string CompleteSentence(IGrammar g, string phrase) {
+    public Sequence CompleteSentence(IGrammar g, Sequence phrase) {
       if (phrase.Length > maxLen) throw new ArgumentException();
       if (g.MinPhraseLength(phrase) > maxLen) throw new ArgumentException();
       bool done = phrase.All(g.IsTerminal); // terminal phrase means we are done
+      bool done = phrase.IsTerminal; // terminal phrase means we are done
       while (!done) {
+        int ntIdx; char nt;
+        Grammar.FindFirstNonTerminal(g, phrase, out nt, out ntIdx);
+        char nt = phrase.FirstNonTerminal;
         int maxLenOfReplacement = maxLen - (phrase.Length - 1); // replacing aAb with maxLen 4 means we can only use alternatives with a minPhraseLen <= 2
 …
         var alts = g.GetAlternatives(nt);
         string selectedAlt;
+        Sequence selectedAlt;
         // if the choice is restricted then one of the allowed alternatives is selected randomly
         if (alts.Any(alt => g.MinPhraseLength(alt) > maxLenOfReplacement)) {
 …
         // replace nt with alt
+        phrase = phrase.Remove(ntIdx, 1);
+        phrase = phrase.Insert(ntIdx, selectedAlt);
+        phrase.ReplaceAt(phrase.FirstNonTerminalIndex, 1, selectedAlt);
         done = phrase.All(g.IsTerminal); // terminal phrase means we are done
+        done = phrase.IsTerminal; // terminal phrase means we are done
+      }
       return phrase;

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/ExhaustiveBreadthFirstSearch.cs

-                      r11727
+                      r11730
     private readonly int maxLen;
     private readonly Queue<string> bfsQueue = new Queue<string>();
+    private readonly Queue<Sequence> bfsQueue = new Queue<Sequence>();
     private readonly IProblem problem;
 …
     public void Run(int maxIterations) {
       double bestQuality = double.MinValue;
       bfsQueue.Enqueue(problem.Grammar.SentenceSymbol.ToString());
+      bfsQueue.Enqueue(new Sequence(problem.Grammar.SentenceSymbol));
       var sentences = GenerateLanguage(problem.Grammar);
       var sentenceEnumerator = sentences.GetEnumerator();
       for (int i = 0; sentenceEnumerator.MoveNext() && i < maxIterations; i++) {
         var sentence = sentenceEnumerator.Current;
         var quality = problem.Evaluate(sentence) / problem.GetBestKnownQuality(maxLen);
+        var sentence = sentenceEnumerator.Current.ToString();
+        var quality = problem.Evaluate(sentence) / problem.GetBestKnownQuality(maxLen);
         RaiseSolutionEvaluated(sentence, quality);
 …
     // create sentences lazily
     private IEnumerable<string> GenerateLanguage(IGrammar grammar) {
+    private IEnumerable<Sequence> GenerateLanguage(IGrammar grammar) {
       while (bfsQueue.Any()) {
         var phrase = bfsQueue.Dequeue();
         char nt;
+        char nt = phrase.FirstNonTerminal;
         int ntIdx;
+        Grammar.FindFirstNonTerminal(grammar, phrase, out nt, out ntIdx);
         var alts = grammar.GetAlternatives(nt);
         foreach (var alt in alts) {
+          var newPhrase = phrase.Remove(ntIdx, 1).Insert(ntIdx, alt);
+          if (newPhrase.All(grammar.IsTerminal) && newPhrase.Length <= maxLen) {
+          var newPhrase = new Sequence(phrase);
+          newPhrase.ReplaceAt(newPhrase.FirstNonTerminalIndex, 1, alt);
+          if (newPhrase.IsTerminal && newPhrase.Length <= maxLen) {
             yield return newPhrase;
           } else if (grammar.MinPhraseLength(newPhrase) <= maxLen) {

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/ExhaustiveDepthFirstSearch.cs

-                      r11727
+                      r11730
     private readonly int maxLen;
+    private readonly Stack<string> stack = new Stack<string>();
+    private readonly Stack<Sequence> stack = new Stack<Sequence>();
+    private readonly IProblem problem;
     public ExhaustiveDepthFirstSearch(int maxLen) {
+    public ExhaustiveDepthFirstSearch(IProblem problem, int maxLen) {
       this.maxLen = maxLen;
+      this.problem = problem;
+    }
     public void Run(IProblem problem, int maxIterations) {
+    public void Run(int maxIterations) {
       double bestQuality = double.MinValue;
       stack.Push(problem.Grammar.SentenceSymbol.ToString());
+      stack.Push(new Sequence(problem.Grammar.SentenceSymbol));
       var sentences = GenerateLanguage(problem.Grammar);
       var sentenceEnumerator = sentences.GetEnumerator();
       for (int i = 0; sentenceEnumerator.MoveNext() && i < maxIterations; i++) {
         var sentence = sentenceEnumerator.Current;
+        var sentence = sentenceEnumerator.Current.ToString();
         var quality = problem.Evaluate(sentence) / problem.GetBestKnownQuality(maxLen);
         RaiseSolutionEvaluated(sentence, quality);
 …
     // create sentences lazily
     private IEnumerable<string> GenerateLanguage(IGrammar grammar) {
+    private IEnumerable<Sequence> GenerateLanguage(IGrammar grammar) {
       while (stack.Any()) {
         var phrase = stack.Pop();
+        char nt;
+        int ntIdx;
+        Grammar.FindFirstNonTerminal(grammar, phrase, out nt, out ntIdx);
+        char nt = phrase.FirstNonTerminal;
         var alts = grammar.GetAlternatives(nt);
         foreach (var alt in alts) {
+          var newPhrase = phrase.Remove(ntIdx, 1).Insert(ntIdx, alt);
+          if (newPhrase.All(grammar.IsTerminal) && newPhrase.Length <= maxLen) {
+          var newPhrase = new Sequence(phrase);
+          newPhrase.ReplaceAt(newPhrase.FirstNonTerminalIndex, 1, alt);
+          if (newPhrase.IsTerminal && newPhrase.Length <= maxLen) {
             yield return newPhrase;
           } else if (grammar.MinPhraseLength(newPhrase) <= maxLen) {

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/MctsSampler.cs

-                      r11727
+                      r11730
   public class MctsSampler {
     private class TreeNode {
+      public string ident;
       public int randomTries;
+      public int policyTries;
       public IPolicy policy;
       public TreeNode[] children;
       public bool done = false;
+      public TreeNode(string id) {
+        this.ident = id;
+      }
       public override string ToString() {
         return string.Format("Node(random-tries: {0}, done: {1}, policy: {2})", randomTries, done, policy);
+        return string.Format("Node({0} tries: {1}, done: {2}, policy: {3})", ident, randomTries + policyTries, done, policy);
+      }
+    }
     public event Action<string, double> FoundNewBestSolution;
 …
     private readonly Random random;
     private readonly int randomTries;
     private readonly Func<int, IPolicy> policyFactory;
+    private readonly Func<Random, int, IPolicy> policyFactory;
     private List<Tuple<TreeNode, int>> updateChain;
     private TreeNode rootNode;
+    public int treeDepth;
+    public int treeSize;
     public MctsSampler(IProblem problem, int maxLen, Random random) :
       this(problem, maxLen, random, 10, (numActions) => new EpsGreedyPolicy(random, numActions, 0.1)) {
+      this(problem, maxLen, random, 10, (rand, numActions) => new EpsGreedyPolicy(rand, numActions, 0.1)) {
+    }
     public MctsSampler(IProblem problem, int maxLen, Random random, int randomTries, Func<int, IPolicy> policyFactory) {
+    public MctsSampler(IProblem problem, int maxLen, Random random, int randomTries, Func<Random, int, IPolicy> policyFactory) {
       this.maxLen = maxLen;
       this.problem = problem;
 …
     public void Run(int maxIterations) {
       double bestQuality = double.MinValue;
       InitPolicies();
+      InitPolicies(problem.Grammar);
       for (int i = 0; !rootNode.done && i < maxIterations; i++) {
         var sentence = SampleSentence(problem.Grammar);
+        var sentence = SampleSentence(problem.Grammar).ToString();
         var quality = problem.Evaluate(sentence) / problem.GetBestKnownQuality(maxLen);
         Debug.Assert(quality >= 0 && quality <= 1.0);
 …
+        }
+      }
+      // clean up
+      InitPolicies(problem.Grammar); GC.Collect();
+    }
+    private void InitPolicies() {
+      this.updateChain = new List<Tuple<TreeNode, int>>();
+      rootNode = new TreeNode();
+    public void PrintStats() {
+      var n = rootNode;
+      Console.WriteLine("depth: {0,5} size: {1,10} root tries {2,10}", treeDepth, treeSize, rootNode.policyTries + rootNode.randomTries);
+      while (n.policy != null) {
+        Console.WriteLine();
+        Console.WriteLine("{0,5}->{1,-50}", n.ident, string.Join(" ", n.children.Select(ch => string.Format("{0,4}", ch.ident))));
+        Console.WriteLine("{0,5}  {1,-50}", string.Empty, string.Join(" ", n.children.Select(ch => string.Format("{0,4}", ch.randomTries + ch.policyTries))));
+        //n.policy.PrintStats();
+        n = n.children.OrderByDescending(c => c.policyTries).First();
+      }
+      Console.ReadLine();
+    }
+    private string SampleSentence(IGrammar grammar) {
+      updateChain.Clear();
+      return CompleteSentence(grammar, grammar.SentenceSymbol.ToString());
+    private void InitPolicies(IGrammar grammar) {
+      this.updateChain = new List<Tuple<TreeNode, int>>();
+      rootNode = new TreeNode(grammar.SentenceSymbol.ToString());
+      treeDepth = 0;
+      treeSize = 0;
+    }
+    public string CompleteSentence(IGrammar g, string phrase) {
+    private Sequence SampleSentence(IGrammar grammar) {
+      updateChain.Clear();
+      var startPhrase = new Sequence(grammar.SentenceSymbol);
+      return CompleteSentence(grammar, startPhrase);
+    }
+    private Sequence CompleteSentence(IGrammar g, Sequence phrase) {
       if (phrase.Length > maxLen) throw new ArgumentException();
       if (g.MinPhraseLength(phrase) > maxLen) throw new ArgumentException();
       TreeNode n = rootNode;
       bool done = phrase.All(g.IsTerminal); // terminal phrase means we are done
+      bool done = phrase.IsTerminal;
       int selectedAltIdx = -1;
+      var curDepth = 0;
       while (!done) {
+        int ntIdx; char nt;
+        Grammar.FindFirstNonTerminal(g, phrase, out nt, out ntIdx);
+        char nt = phrase.FirstNonTerminal;
         int maxLenOfReplacement = maxLen - (phrase.Length - 1); // replacing aAb with maxLen 4 means we can only use alternatives with a minPhraseLen <= 2
 …
         if (n.randomTries < randomTries) {
           n.randomTries++;
+          treeDepth = Math.Max(treeDepth, curDepth);
           return g.CompleteSentenceRandomly(random, phrase, maxLen);
         } else if (n.randomTries == randomTries && n.policy == null) {
+          n.policy = policyFactory(alts.Count());
+          n.children = alts.Select(_ => new TreeNode()).ToArray(); // create a new node for each alternative
+          n.policy = policyFactory(random, alts.Count());
+          //n.children = alts.Select(alt => new TreeNode(alt.ToString())).ToArray(); // create a new node for each alternative
+          n.children = alts.Select(alt => new TreeNode(string.Empty)).ToArray(); // create a new node for each alternative
+          treeSize += n.children.Length;
+        }
+        n.policyTries++;
         // => select using bandit policy
         selectedAltIdx = n.policy.SelectAction();
+        string selectedAlt = alts.ElementAt(selectedAltIdx);
+        Sequence selectedAlt = alts.ElementAt(selectedAltIdx);
         // replace nt with alt
+        phrase = phrase.Remove(ntIdx, 1);
+        phrase = phrase.Insert(ntIdx, selectedAlt);
+        phrase.ReplaceAt(phrase.FirstNonTerminalIndex, 1, selectedAlt);
         updateChain.Add(Tuple.Create(n, selectedAltIdx));
+        done = phrase.All(g.IsTerminal); // terminal phrase means we are done
+        curDepth++;
+        done = phrase.IsTerminal;
         if (!done) {
           // prepare for next iteration
 …
       n.children[selectedAltIdx].done = true;
+      treeDepth = Math.Max(treeDepth, curDepth);
       return phrase;
+    }
 …
         var policy = node.policy;
         var action = e.Item2;
+        //policy.UpdateReward(action, reward / updateChain.Count);
         policy.UpdateReward(action, reward);

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/RandomSearch.cs

-                      r11690
+                      r11730
     private readonly int maxLen;
     private readonly Random random;
+    private readonly IProblem problem;
     public RandomSearch(int maxLen) {
+    public RandomSearch(IProblem problem, Random random, int maxLen) {
       this.maxLen = maxLen;
+      this.random = new Random(31415);
+      this.random = random;
+      this.problem = problem;
+    }
     public void Run(IProblem problem, int maxIterations) {
+    public void Run(int maxIterations) {
       double bestQuality = double.MinValue;
       for (int i = 0; i < maxIterations; i++) {
         var sentence = CreateSentence(problem.Grammar);
         var quality = problem.Evaluate(sentence);
+        var sentence = CreateSentence(problem.Grammar).ToString();
+        var quality = problem.Evaluate(sentence) / problem.GetBestKnownQuality(maxLen);
         RaiseSolutionEvaluated(sentence, quality);
 …
+    }
     private string CreateSentence(IGrammar grammar) {
       var sentence = grammar.SentenceSymbol.ToString();
+    private Sequence CreateSentence(IGrammar grammar) {
+      var sentence = new Sequence(grammar.SentenceSymbol);
       return grammar.CompleteSentenceRandomly(random, sentence, maxLen);
+    }

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Common/Extensions.cs

-                      r11727
+                      r11730
       return xsArr[rand.Next(xsArr.Length)];
+    }
+    public static IEnumerable<T> SampleProportional<T>(this IEnumerable<T> source, Random random, IEnumerable<double> weights) {
+      var sourceArray = source.ToArray();
+      var valueArray = weights.ToArray();
+      double total = valueArray.Sum();
+      while (true) {
+        int index = 0;
+        double ball = valueArray[index], sum = random.NextDouble() * total;
+        while (ball < sum)
+          ball += valueArray[++index];
+        yield return sourceArray[index];
+      }
+    }
+  }
+}

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization.Test/HeuristicLab.Problems.GrammaticalOptimization.Test.csproj

r11708	r11730
57	57	</Choose>
58	58	<ItemGroup>
	59	<Compile Include="TestSequence.cs" />
59	60	<Compile Include="TestBanditPolicies.cs" />
60	61	<Compile Include="TestInstances.cs" />

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization.Test/TestBanditPolicies.cs

-                      r11727
+                      r11730
 using System.Globalization;
 using HeuristicLab.Algorithms.Bandits;
+using HeuristicLab.Algorithms.Bandits.Models;
 using Microsoft.VisualStudio.TestTools.UnitTesting;
 …
   [TestClass]
   public class TestBanditPolicies {
     [TestMethod]
     public void ComparePoliciesForBernoulliBandit() {
+      System.Threading.Thread.CurrentThread.CurrentCulture = CultureInfo.InvariantCulture;
+      CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
       var globalRand = new Random(31415);
       var seedForPolicy = globalRand.Next();
       var nArms = 10;
+      var nArms = 20;
       //Console.WriteLine("Exp3 (gamma=0.01)");
       //TestPolicyBernoulli(globalRand, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 1));
       //Console.WriteLine("Exp3 (gamma=0.05)");
       //estPolicyBernoulli(globalRand, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 1));
       Console.WriteLine("Thompson (Bernoulli)");
       TestPolicyBernoulli(globalRand, nArms, new BernoulliThompsonSamplingPolicy(new Random(seedForPolicy), nArms));
+      Console.WriteLine("Thompson (Bernoulli)"); TestPolicyBernoulli(globalRand, nArms, new BernoulliThompsonSamplingPolicy(new Random(seedForPolicy), nArms));
+      Console.WriteLine("Generic Thompson (Bernoulli)"); TestPolicyBernoulli(globalRand, nArms, new GenericThompsonSamplingPolicy(new Random(seedForPolicy), nArms, new BernoulliModel(nArms)));
       Console.WriteLine("Random");
       TestPolicyBernoulli(globalRand, nArms, new RandomPolicy(new Random(seedForPolicy), nArms));
 …
       //Console.WriteLine("Eps(0.5)");
       //TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.5));
+    }
+      Console.WriteLine("UCT(0.1)"); TestPolicyBernoulli(globalRand, nArms, new UCTPolicy(nArms, 0.1));
+      Console.WriteLine("UCT(0.5)"); TestPolicyBernoulli(globalRand, nArms, new UCTPolicy(nArms, 0.5));
+      Console.WriteLine("UCT(1)  "); TestPolicyBernoulli(globalRand, nArms, new UCTPolicy(nArms, 1));
+      Console.WriteLine("UCT(2)  "); TestPolicyBernoulli(globalRand, nArms, new UCTPolicy(nArms, 2));
+      Console.WriteLine("UCT(5)  "); TestPolicyBernoulli(globalRand, nArms, new UCTPolicy(nArms, 5));
+      Console.WriteLine("BoltzmannExploration(0.1)"); TestPolicyBernoulli(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 0.1));
+      Console.WriteLine("BoltzmannExploration(0.5)"); TestPolicyBernoulli(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 0.5));
+      Console.WriteLine("BoltzmannExploration(1)  "); TestPolicyBernoulli(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 1));
+      Console.WriteLine("BoltzmannExploration(10) "); TestPolicyBernoulli(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 10));
+      Console.WriteLine("BoltzmannExploration(100)"); TestPolicyBernoulli(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 100));
+      Console.WriteLine("ChernoffIntervalEstimationPolicy(0.01)"); TestPolicyBernoulli(globalRand, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.01));
+      Console.WriteLine("ChernoffIntervalEstimationPolicy(0.05)"); TestPolicyBernoulli(globalRand, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.05));
+      Console.WriteLine("ChernoffIntervalEstimationPolicy(0.1) "); TestPolicyBernoulli(globalRand, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.1));
+      // not applicable to bernoulli rewards
+      //Console.WriteLine("ThresholdAscent(10, 0.01)  "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 10, 0.01));
+      //Console.WriteLine("ThresholdAscent(10, 0.05)  "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 10, 0.05));
+      //Console.WriteLine("ThresholdAscent(10, 0.1)   "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 10, 0.1));
+      //Console.WriteLine("ThresholdAscent(100, 0.01) "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 100, 0.01));
+      //Console.WriteLine("ThresholdAscent(100, 0.05) "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 100, 0.05));
+      //Console.WriteLine("ThresholdAscent(100, 0.1)  "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 100, 0.1));
+      //Console.WriteLine("ThresholdAscent(1000, 0.01)"); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.01));
+      //Console.WriteLine("ThresholdAscent(1000, 0.05)"); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.05));
+      //Console.WriteLine("ThresholdAscent(1000, 0.1) "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.1));
+    }
     [TestMethod]
     public void ComparePoliciesForNormalBandit() {
+      System.Threading.Thread.CurrentThread.CurrentCulture = CultureInfo.InvariantCulture;
+      CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
       var globalRand = new Random(31415);
       var seedForPolicy = globalRand.Next();
+      var nArms = 10;
+      Console.WriteLine("Thompson (Gaussian)");
+      TestPolicyNormal(globalRand, nArms, new GaussianThompsonSamplingPolicy(new Random(seedForPolicy), nArms));
+      Console.WriteLine("Random");
+      TestPolicyNormal(globalRand, nArms, new RandomPolicy(new Random(seedForPolicy), nArms));
+      Console.WriteLine("UCB1");
+      TestPolicyNormal(globalRand, nArms, new UCB1Policy(nArms));
+      Console.WriteLine("UCB1Tuned");
+      TestPolicyNormal(globalRand, nArms, new UCB1TunedPolicy(nArms));
+      Console.WriteLine("UCB1Normal");
+      TestPolicyNormal(globalRand, nArms, new UCBNormalPolicy(nArms));
+      var nArms = 20;
+      Console.WriteLine("Thompson (Gaussian orig)"); TestPolicyNormal(globalRand, nArms, new GaussianThompsonSamplingPolicy(new Random(seedForPolicy), nArms, true));
+      Console.WriteLine("Thompson (Gaussian new)"); TestPolicyNormal(globalRand, nArms, new GaussianThompsonSamplingPolicy(new Random(seedForPolicy), nArms));
+      Console.WriteLine("Generic Thompson (Gaussian)"); TestPolicyNormal(globalRand, nArms, new GenericThompsonSamplingPolicy(new Random(seedForPolicy), nArms, new GaussianModel(nArms, 0.5, 1)));
+      /*
+      Console.WriteLine("Random"); TestPolicyNormal(globalRand, nArms, new RandomPolicy(new Random(seedForPolicy), nArms));
+      Console.WriteLine("UCB1"); TestPolicyNormal(globalRand, nArms, new UCB1Policy(nArms));
+      Console.WriteLine("UCB1Tuned"); TestPolicyNormal(globalRand, nArms, new UCB1TunedPolicy(nArms));
+      Console.WriteLine("UCB1Normal"); TestPolicyNormal(globalRand, nArms, new UCBNormalPolicy(nArms));
       //Console.WriteLine("Exp3 (gamma=0.01)");
       //TestPolicyNormal(globalRand, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 0.01));
       //Console.WriteLine("Exp3 (gamma=0.05)");
       //TestPolicyNormal(globalRand, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 0.05));
+      Console.WriteLine("Eps(0.01)");
+      TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.01));
+      Console.WriteLine("Eps(0.05)");
+      TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.05));
+      Console.WriteLine("Eps(0.01)"); TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.01));
+      Console.WriteLine("Eps(0.05)"); TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.05));
       //Console.WriteLine("Eps(0.1)");
       //TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.1));
 …
       //Console.WriteLine("Eps(0.5)");
       //TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.5));
+    }
+      Console.WriteLine("UCT(0.1)"); TestPolicyNormal(globalRand, nArms, new UCTPolicy(nArms, 0.1));
+      Console.WriteLine("UCT(0.5)"); TestPolicyNormal(globalRand, nArms, new UCTPolicy(nArms, 0.5));
+      Console.WriteLine("UCT(1)  "); TestPolicyNormal(globalRand, nArms, new UCTPolicy(nArms, 1));
+      Console.WriteLine("UCT(2)  "); TestPolicyNormal(globalRand, nArms, new UCTPolicy(nArms, 2));
+      Console.WriteLine("UCT(5)  "); TestPolicyNormal(globalRand, nArms, new UCTPolicy(nArms, 5));
+      Console.WriteLine("BoltzmannExploration(0.1)"); TestPolicyNormal(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 0.1));
+      Console.WriteLine("BoltzmannExploration(0.5)"); TestPolicyNormal(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 0.5));
+      Console.WriteLine("BoltzmannExploration(1)  "); TestPolicyNormal(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 1));
+      Console.WriteLine("BoltzmannExploration(10) "); TestPolicyNormal(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 10));
+      Console.WriteLine("BoltzmannExploration(100)"); TestPolicyNormal(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 100));
+      Console.WriteLine("ChernoffIntervalEstimationPolicy(0.01)"); TestPolicyNormal(globalRand, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.01));
+      Console.WriteLine("ChernoffIntervalEstimationPolicy(0.05)"); TestPolicyNormal(globalRand, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.05));
+      Console.WriteLine("ChernoffIntervalEstimationPolicy(0.1) "); TestPolicyNormal(globalRand, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.1));
+      Console.WriteLine("ThresholdAscent(10,0.01)  "); TestPolicyNormal(globalRand, nArms, new ThresholdAscentPolicy(nArms, 10, 0.01));
+      Console.WriteLine("ThresholdAscent(10,0.05)  "); TestPolicyNormal(globalRand, nArms, new ThresholdAscentPolicy(nArms, 10, 0.05));
+      Console.WriteLine("ThresholdAscent(10,0.1)   "); TestPolicyNormal(globalRand, nArms, new ThresholdAscentPolicy(nArms, 10, 0.1));
+      Console.WriteLine("ThresholdAscent(100,0.01) "); TestPolicyNormal(globalRand, nArms, new ThresholdAscentPolicy(nArms, 100, 0.01));
+      Console.WriteLine("ThresholdAscent(100,0.05) "); TestPolicyNormal(globalRand, nArms, new ThresholdAscentPolicy(nArms, 100, 0.05));
+      Console.WriteLine("ThresholdAscent(100,0.1)  "); TestPolicyNormal(globalRand, nArms, new ThresholdAscentPolicy(nArms, 100, 0.1));
+      Console.WriteLine("ThresholdAscent(1000,0.01)"); TestPolicyNormal(globalRand, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.01));
+      Console.WriteLine("ThresholdAscent(1000,0.05)"); TestPolicyNormal(globalRand, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.05));
+      Console.WriteLine("ThresholdAscent(1000,0.1) "); TestPolicyNormal(globalRand, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.1));
+       */
+    }
+    [TestMethod]
+    public void ComparePoliciesForGaussianMixtureBandit() {
+      CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
+      var globalRand = new Random(31415);
+      var seedForPolicy = globalRand.Next();
+      var nArms = 20;
+      Console.WriteLine("Thompson (Gaussian orig)"); TestPolicyGaussianMixture(globalRand, nArms, new GaussianThompsonSamplingPolicy(new Random(seedForPolicy), nArms, true));
+      Console.WriteLine("Thompson (Gaussian new)"); TestPolicyGaussianMixture(globalRand, nArms, new GaussianThompsonSamplingPolicy(new Random(seedForPolicy), nArms));
+      Console.WriteLine("Generic Thompson (Gaussian)"); TestPolicyGaussianMixture(globalRand, nArms, new GenericThompsonSamplingPolicy(new Random(seedForPolicy), nArms, new GaussianModel(nArms, 0.5, 1)));
+      /*
+      Console.WriteLine("Random"); TestPolicyGaussianMixture(globalRand, nArms, new RandomPolicy(new Random(seedForPolicy), nArms));
+      Console.WriteLine("UCB1"); TestPolicyGaussianMixture(globalRand, nArms, new UCB1Policy(nArms));
+      Console.WriteLine("UCB1Tuned "); TestPolicyGaussianMixture(globalRand, nArms, new UCB1TunedPolicy(nArms));
+      Console.WriteLine("UCB1Normal"); TestPolicyGaussianMixture(globalRand, nArms, new UCBNormalPolicy(nArms));
+      Console.WriteLine("Eps(0.01) "); TestPolicyGaussianMixture(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.01));
+      Console.WriteLine("Eps(0.05) "); TestPolicyGaussianMixture(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.05));
+      Console.WriteLine("UCT(1)  "); TestPolicyGaussianMixture(globalRand, nArms, new UCTPolicy(nArms, 1));
+      Console.WriteLine("UCT(2)  "); TestPolicyGaussianMixture(globalRand, nArms, new UCTPolicy(nArms, 2));
+      Console.WriteLine("UCT(5)  "); TestPolicyGaussianMixture(globalRand, nArms, new UCTPolicy(nArms, 5));
+      Console.WriteLine("BoltzmannExploration(1)  "); TestPolicyGaussianMixture(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 1));
+      Console.WriteLine("BoltzmannExploration(10) "); TestPolicyGaussianMixture(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 10));
+      Console.WriteLine("BoltzmannExploration(100)"); TestPolicyGaussianMixture(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 100));
+      Console.WriteLine("ThresholdAscent(10,0.01)  "); TestPolicyGaussianMixture(globalRand, nArms, new ThresholdAscentPolicy(nArms, 10, 0.01));
+      Console.WriteLine("ThresholdAscent(100,0.01) "); TestPolicyGaussianMixture(globalRand, nArms, new ThresholdAscentPolicy(nArms, 100, 0.01));
+      Console.WriteLine("ThresholdAscent(1000,0.01)"); TestPolicyGaussianMixture(globalRand, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.01));
+      Console.WriteLine("ThresholdAscent(10000,0.01)"); TestPolicyGaussianMixture(globalRand, nArms, new ThresholdAscentPolicy(nArms, 10000, 0.01));
+       */
+    }
     private void TestPolicyBernoulli(Random globalRand, int nArms, IPolicy policy) {
+      var maxIt = 1E6;
+      var reps = 10; // 10 independent runs
+      var avgRegretForIteration = new Dictionary<int, double>();
+      TestPolicy(globalRand, nArms, policy, (banditRandom, nActions) => new BernoulliBandit(banditRandom, nActions));
+    }
+    private void TestPolicyNormal(Random globalRand, int nArms, IPolicy policy) {
+      TestPolicy(globalRand, nArms, policy, (banditRandom, nActions) => new TruncatedNormalBandit(banditRandom, nActions));
+    }
+    private void TestPolicyGaussianMixture(Random globalRand, int nArms, IPolicy policy) {
+      TestPolicy(globalRand, nArms, policy, (banditRandom, nActions) => new GaussianMixtureBandit(banditRandom, nActions));
+    }
+    private void TestPolicy(Random globalRand, int nArms, IPolicy policy, Func<Random, int, IBandit> banditFactory) {
+      var maxIt = 1E5;
+      var reps = 30; // independent runs
+      var regretForIteration = new Dictionary<int, List<double>>();
+      var numberOfPullsOfSuboptimalArmsForExp = new Dictionary<int, double>();
+      var numberOfPullsOfSuboptimalArmsForMax = new Dictionary<int, double>();
       // calculate statistics
       for (int r = 0; r < reps; r++) {
         var nextLogStep = 1;
         var b = new BernoulliBandit(new Random(globalRand.Next()), 10);
+        var b = banditFactory(new Random(globalRand.Next()), nArms);
         policy.Reset();
         var totalRegret = 0.0;
+        var totalPullsOfSuboptimalArmsExp = 0.0;
+        var totalPullsOfSuboptimalArmsMax = 0.0;
         for (int i = 0; i <= maxIt; i++) {
           var selectedAction = policy.SelectAction();
           var reward = b.Pull(selectedAction);
+          policy.UpdateReward(selectedAction, reward);
+          // collect stats
+          if (selectedAction != b.OptimalExpectedRewardArm) totalPullsOfSuboptimalArmsExp++;
+          if (selectedAction != b.OptimalMaximalRewardArm) totalPullsOfSuboptimalArmsMax++;
           totalRegret += b.OptimalExpectedReward - reward;
+          policy.UpdateReward(selectedAction, reward);
           if (i == nextLogStep) {
             nextLogStep *= 10;
             if (!avgRegretForIteration.ContainsKey(i)) {
               avgRegretForIteration.Add(i, 0.0);
+            nextLogStep *= 2;
+            if (!regretForIteration.ContainsKey(i)) {
+              regretForIteration.Add(i, new List<double>());
+            }
+            avgRegretForIteration[i] += totalRegret / i;
+            regretForIteration[i].Add(totalRegret / i);
+            if (!numberOfPullsOfSuboptimalArmsForExp.ContainsKey(i)) {
+              numberOfPullsOfSuboptimalArmsForExp.Add(i, 0.0);
+            }
+            numberOfPullsOfSuboptimalArmsForExp[i] += totalPullsOfSuboptimalArmsExp;
+            if (!numberOfPullsOfSuboptimalArmsForMax.ContainsKey(i)) {
+              numberOfPullsOfSuboptimalArmsForMax.Add(i, 0.0);
+            }
+            numberOfPullsOfSuboptimalArmsForMax[i] += totalPullsOfSuboptimalArmsMax;
+          }
+        }
+      }
       // print
+      foreach (var p in avgRegretForIteration.Keys.OrderBy(k => k)) {
+        Console.WriteLine("{0} {1}", p, avgRegretForIteration[p] / reps); // print avg. of avg. regret
+      }
+    }
+    private void TestPolicyNormal(Random globalRand, int nArms, IPolicy policy) {
+      var maxIt = 1E6;
+      var reps = 10; // 10 independent runs
+      var avgRegretForIteration = new Dictionary<int, double>();
+      // calculate statistics
+      for (int r = 0; r < reps; r++) {
+        var nextLogStep = 1;
+        var b = new TruncatedNormalBandit(new Random(globalRand.Next()), 10);
+        policy.Reset();
+        var totalRegret = 0.0;
+        for (int i = 0; i <= maxIt; i++) {
+          var selectedAction = policy.SelectAction();
+          var reward = b.Pull(selectedAction);
+          totalRegret += b.OptimalExpectedReward - reward;
+          policy.UpdateReward(selectedAction, reward);
+          if (i == nextLogStep) {
+            nextLogStep *= 10;
+            if (!avgRegretForIteration.ContainsKey(i)) {
+              avgRegretForIteration.Add(i, 0.0);
+            }
+            avgRegretForIteration[i] += totalRegret / i;
+          }
+        }
+      }
+      // print
+      foreach (var p in avgRegretForIteration.Keys.OrderBy(k => k)) {
+        Console.WriteLine("{0} {1}", p, avgRegretForIteration[p] / reps); // print avg. of avg. regret
+      foreach (var p in regretForIteration.Keys.OrderBy(k => k)) {
+        Console.WriteLine("iter {0,8} regret avg {1,7:F5} min {2,7:F5} max {3,7:F5} suboptimal pulls (exp) {4,7:F2} suboptimal pulls (max) {5,7:F2}",
+          p,
+          regretForIteration[p].Average(),
+          regretForIteration[p].Min(),
+          regretForIteration[p].Max(),
+          numberOfPullsOfSuboptimalArmsForExp[p] / (double)reps,
+          numberOfPullsOfSuboptimalArmsForMax[p] / (double)reps
+          );
+      }
+    }

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization.Test/TestInstances.cs

-                      r11659
+                      r11730
         Assert.AreEqual(2, g.GetAlternatives('B').Count());
         Assert.IsTrue(g.GetAlternatives('S').Contains("aA"));
         Assert.IsTrue(g.GetAlternatives('S').Contains("bB"));
         Assert.IsTrue(g.GetAlternatives('A').Contains("aA"));
         Assert.IsTrue(g.GetAlternatives('A').Contains("a"));
         Assert.IsTrue(g.GetAlternatives('B').Contains("Bb"));
         Assert.IsTrue(g.GetAlternatives('B').Contains("b"));
         Assert.AreEqual(2, g.MinPhraseLength("S"));
         Assert.AreEqual(short.MaxValue, g.MaxPhraseLength("S"));
         Assert.AreEqual(1, g.MinPhraseLength("A"));
         Assert.AreEqual(short.MaxValue, g.MaxPhraseLength("A"));
         Assert.AreEqual(1, g.MinPhraseLength("B"));
         Assert.AreEqual(short.MaxValue, g.MaxPhraseLength("B"));
+        Assert.IsTrue(g.GetAlternatives('S').Any(s => s.ToString() == "aA"));
+        Assert.IsTrue(g.GetAlternatives('S').Any(s => s.ToString() == "bB"));
+        Assert.IsTrue(g.GetAlternatives('A').Any(s => s.ToString() == "aA"));
+        Assert.IsTrue(g.GetAlternatives('A').Any(s => s.ToString() == "a"));
+        Assert.IsTrue(g.GetAlternatives('B').Any(s => s.ToString() == "Bb"));
+        Assert.IsTrue(g.GetAlternatives('B').Any(s => s.ToString() == "b"));
+        Assert.AreEqual(2, g.MinPhraseLength(new Sequence("S")));
+        Assert.AreEqual(short.MaxValue, g.MaxPhraseLength(new Sequence("S")));
+        Assert.AreEqual(1, g.MinPhraseLength(new Sequence("A")));
+        Assert.AreEqual(short.MaxValue, g.MaxPhraseLength(new Sequence("A")));
+        Assert.AreEqual(1, g.MinPhraseLength(new Sequence("B")));
+        Assert.AreEqual(short.MaxValue, g.MaxPhraseLength(new Sequence("B")));
+      }
 …
         Assert.AreEqual(1, g.GetAlternatives('S').Count());
         Assert.IsTrue(g.GetAlternatives('S').Contains("sS"));
         Assert.AreEqual(short.MaxValue, g.MinPhraseLength("S"));
         Assert.AreEqual(short.MaxValue, g.MaxPhraseLength("S"));
+        Assert.IsTrue(g.GetAlternatives('S').Any(s => s.ToString() == "sS"));
+        Assert.AreEqual(short.MaxValue, g.MinPhraseLength(new Sequence("S")));
+        Assert.AreEqual(short.MaxValue, g.MaxPhraseLength(new Sequence("S")));
+      }
 …
         Assert.AreEqual(2, g.GetAlternatives('S').Count());
         Assert.IsTrue(g.GetAlternatives('S').Contains("sss"));
         Assert.IsTrue(g.GetAlternatives('S').Contains("sS"));
         Assert.AreEqual(3, g.MinPhraseLength("S"));
         Assert.AreEqual(short.MaxValue, g.MaxPhraseLength("S"));
         Assert.AreEqual(4, g.MinPhraseLength("sS"));
         Assert.AreEqual(7, g.MinPhraseLength("sSS"));
         Assert.AreEqual(short.MaxValue, g.MaxPhraseLength("sSS"));
         Assert.AreEqual(3, g.MaxPhraseLength("sss"));
         Assert.AreEqual(3, g.MinPhraseLength("sss"));
+        Assert.IsTrue(g.GetAlternatives('S').Any(s => s.ToString() == "sss"));
+        Assert.IsTrue(g.GetAlternatives('S').Any(s => s.ToString() == "sS"));
+        Assert.AreEqual(3, g.MinPhraseLength(new Sequence("S")));
+        Assert.AreEqual(short.MaxValue, g.MaxPhraseLength(new Sequence("S")));
+        Assert.AreEqual(4, g.MinPhraseLength(new Sequence("sS")));
+        Assert.AreEqual(7, g.MinPhraseLength(new Sequence("sSS")));
+        Assert.AreEqual(short.MaxValue, g.MaxPhraseLength(new Sequence("sSS")));
+        Assert.AreEqual(3, g.MaxPhraseLength(new Sequence("sss")));
+        Assert.AreEqual(3, g.MinPhraseLength(new Sequence("sss")));
+      }
 …
         Assert.AreEqual(2, g.GetAlternatives('S').Count());
         Assert.IsTrue(g.GetAlternatives('S').Contains("T"));
         Assert.IsTrue(g.GetAlternatives('S').Contains("TS"));
         Assert.AreEqual(1, g.MinPhraseLength("S"));
         Assert.AreEqual(short.MaxValue, g.MaxPhraseLength("S"));
         Assert.AreEqual(1, g.MinPhraseLength("T"));
         Assert.AreEqual(1, g.MaxPhraseLength("T"));
+        Assert.IsTrue(g.GetAlternatives('S').Any(s => s.ToString() == "T"));
+        Assert.IsTrue(g.GetAlternatives('S').Any(s => s.ToString() == "TS"));
+        Assert.AreEqual(1, g.MinPhraseLength(new Sequence("S")));
+        Assert.AreEqual(short.MaxValue, g.MaxPhraseLength(new Sequence("S")));
+        Assert.AreEqual(1, g.MinPhraseLength(new Sequence("T")));
+        Assert.AreEqual(1, g.MaxPhraseLength(new Sequence("T")));
+      }
 …
       Assert.AreEqual(0.252718466940018, p.Evaluate("a*b"), 1.0E-7);
+      Assert.AreEqual(0.290635611728845, p.Evaluate("c*d"), 1.0E-7);
+      Assert.AreEqual(0.25737325167716, p.Evaluate("e*f"), 1.0E-7);
       Assert.AreEqual(0.00173739472363473, p.Evaluate("b*c"), 1.0E-7);
       Assert.AreEqual(3.15450564064128E-05, p.Evaluate("d*e"), 1.0E-7);
+      Assert.AreEqual(0.0943358163760454, p.Evaluate("a*g*i"), 1.0E-7);
+      Assert.AreEqual(0.116199534934045, p.Evaluate("c*f*j"), 1.0E-7);
       Assert.AreEqual(1.0, p.Evaluate("a*b+c*d+e*f+a*g*i+c*f*j"), 1.0E-7);
+    }

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization.Test/TestSolvers.cs

-                      r11727
+                      r11730
         // E -> V | V+E | V-E | V*E | V/E | (E)
         // V -> a .. j
+        /* grammar has been change ... unit test not yet adapted
         var prob = new SymbolicRegressionPoly10Problem();
         var comb = 10;
         TestDFS(prob, 1, comb);
         TestDFS(prob, 2, comb);
         comb = comb + 10 * 4 * comb + comb;
         TestDFS(prob, 3, comb);
         TestDFS(prob, 4, comb);
         comb = comb + 10 * 4 * comb + 10; // ((E))
         TestDFS(prob, 5, comb);
         TestDFS(prob, 6, comb);
         comb = comb + 10 * 4 * comb + 10; // (((E)))
+        comb = comb + 10 * 4 * comb + 10; // (((E)))  */
         // takes too long
         //TestDFS(prob, 7, comb);
 …
         // E -> V | V+E | V-E | V*E | V/E | (E)
         // V -> a .. j
+        /* grammar has been change ... unit test not yet adapted
         var prob = new SymbolicRegressionPoly10Problem();
         var comb = 10;
 …
         TestDFS(prob, 6, comb);
         comb = comb + 10 * 4 * comb + 10; // (((E)))
+        comb = comb + 10 * 4 * comb + 10; // (((E))) */
         // takes too long
         //TestDFS(prob, 7, comb);
 …
     private void TestDFS(IProblem prob, int len, int numExpectedSols) {
       var solver = new ExhaustiveDepthFirstSearch(len);
+      var solver = new ExhaustiveDepthFirstSearch(prob, len);
       int numSols = 0;
       solver.SolutionEvaluated += (s, d) => { numSols++; };
+      solver.SolutionEvaluated += (s, d) => { numSols++; Console.WriteLine(s); };
       solver.Run(prob, int.MaxValue);
+      solver.Run(int.MaxValue);
       Assert.AreEqual(numExpectedSols, numSols);
+    }

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/Grammar.cs

-                      r11727
+                      r11730
   public class Grammar : IGrammar {
     private readonly Dictionary<char, List<string>> rules;
+    private readonly Dictionary<char, List<Sequence>> rules;
     private readonly HashSet<char> terminalSymbols;
     private readonly char sentenceSymbol;
     private readonly HashSet<char> nonTerminalSymbols;
     private readonly Dictionary<string, int> maxPhraseLength = new Dictionary<string, int>();
     private readonly Dictionary<string, int> minPhraseLength = new Dictionary<string, int>();
+    private readonly Dictionary<Sequence, int> maxPhraseLength = new Dictionary<Sequence, int>();
+    private readonly Dictionary<Sequence, int> minPhraseLength = new Dictionary<Sequence, int>();
     public char SentenceSymbol { get { return sentenceSymbol; } }
 …
     // cloning ctor
     public Grammar(Grammar orig) {
+      this.rules = new Dictionary<char, List<string>>(orig.rules);
+      this.rules = new Dictionary<char, List<Sequence>>();
+      foreach (var r in orig.rules)
+        this.rules.Add(r.Key, new List<Sequence>(r.Value.Select(v => new Sequence(v)))); // clone sequences
       this.terminalSymbols = new HashSet<char>(orig.terminalSymbols);
       this.sentenceSymbol = orig.sentenceSymbol;
       this.nonTerminalSymbols = new HashSet<char>(orig.nonTerminalSymbols);
+      this.maxPhraseLength = new Dictionary<string, int>(orig.maxPhraseLength);
+      this.minPhraseLength = new Dictionary<string, int>(orig.minPhraseLength);
+      this.maxPhraseLength = new Dictionary<Sequence, int>();
+      foreach (var p in orig.maxPhraseLength) this.maxPhraseLength.Add(new Sequence(p.Key), p.Value);
+      this.minPhraseLength = new Dictionary<Sequence, int>();
+      foreach (var p in orig.minPhraseLength) this.minPhraseLength.Add(new Sequence(p.Key), p.Value);
+    }
 …
       this.terminalSymbols = new HashSet<char>(terminalSymbols);
       this.nonTerminalSymbols = new HashSet<char>(nonTerminalSymbols);
       this.rules = new Dictionary<char, List<string>>();
+      this.rules = new Dictionary<char, List<Sequence>>();
       foreach (var r in rules) {
         if (!this.rules.ContainsKey(r.Item1)) this.rules.Add(r.Item1, new List<string>());
         this.rules[r.Item1].Add(r.Item2); // here we store an array of symbols for a phase
+        if (!this.rules.ContainsKey(r.Item1)) this.rules.Add(r.Item1, new List<Sequence>());
+        this.rules[r.Item1].Add(new Sequence(r.Item2)); // here we store an array of symbols for a phase
+      }
 …
           max = Math.Max(max, maxPhraseLength[alt]);
+        }
         minPhraseLength[nt.ToString()] = min;
         maxPhraseLength[nt.ToString()] = max;
+      }
+    }
     public IEnumerable<string> GetAlternatives(char nt) {
+        minPhraseLength[new Sequence(nt)] = min;
+        maxPhraseLength[new Sequence(nt)] = max;
+      }
+    }
+    public IEnumerable<Sequence> GetAlternatives(char nt) {
       return rules[nt];
+    }
     public IEnumerable<string> GetTerminalAlternatives(char nt) {
+    public IEnumerable<Sequence> GetTerminalAlternatives(char nt) {
       return GetAlternatives(nt).Where(alt => alt.All(IsTerminal));
+    }
     public IEnumerable<string> GetNonTerminalAlternatives(char nt) {
+    public IEnumerable<Sequence> GetNonTerminalAlternatives(char nt) {
       return GetAlternatives(nt).Where(alt => alt.Any(IsNonTerminal));
+    }
     // caches for this are build in construction of object
     public int MinPhraseLength(string phrase) {
+    public int MinPhraseLength(Sequence phrase) {
       int l;
       if (minPhraseLength.TryGetValue(phrase, out l)) return l;
 …
     // caches for this are build in construction of object
     public int MaxPhraseLength(string phrase) {
+    public int MaxPhraseLength(Sequence phrase) {
       int l;
       if (maxPhraseLength.TryGetValue(phrase, out l)) return l;
 …
+    }
     public string CompleteSentenceRandomly(Random random, string phrase, int maxLen) {
+    public Sequence CompleteSentenceRandomly(Random random, Sequence phrase, int maxLen) {
       if (phrase.Length > maxLen) throw new ArgumentException();
       if (MinPhraseLength(phrase) > maxLen) throw new ArgumentException();
       bool done = phrase.All(IsTerminal); // terminal phrase means we are done
+      bool done = phrase.IsTerminal; // terminal phrase means we are done
       while (!done) {
+        int ntIdx; char nt;
+        FindFirstNonTerminal(this, phrase, out nt, out ntIdx);
+        char nt = phrase.FirstNonTerminal;
         int maxLenOfReplacement = maxLen - (phrase.Length - 1); // replacing aAb with maxLen 4 means we can only use alternatives with a minPhraseLen <= 2
 …
         // replace nt with random alternative
         var selectedAlt = alts.SelectRandom(random);
+        phrase = phrase.Remove(ntIdx, 1);
+        phrase = phrase.Insert(ntIdx, selectedAlt);
+        phrase.ReplaceAt(phrase.FirstNonTerminalIndex, 1, selectedAlt);
         done = phrase.All(IsTerminal); // terminal phrase means we are done
+      }
       return phrase;
+    }
-    public static void FindFirstNonTerminal(IGrammar g, string phrase, out char nt, out int ntIdx) {
-      ntIdx = 0;
-      while (ntIdx < phrase.Length && g.IsTerminal(phrase[ntIdx])) ntIdx++;
-      if (ntIdx >= phrase.Length) {
-        ntIdx = -1;
-        nt = '\0';
-      } else {
-        nt = phrase[ntIdx];
+      }
+    }
 …
       foreach (var r in rules) {
         foreach (var alt in r.Value) {
+          var phrase = string.Join(" ", alt);
+          sb.AppendFormat("  {0} -> {1} (min: {2}, max {3})", r.Key, phrase, MinPhraseLength(phrase), MaxPhraseLength(phrase))
+          sb.AppendFormat("  {0} -> {1} (min: {2}, max {3})", r.Key, alt, MinPhraseLength(alt), MaxPhraseLength(alt))
             .AppendLine();
+        }

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization.csproj

r11727	r11730
47	47	<Compile Include="EvenParityProblem.cs" />
48	48	<Compile Include="SentenceSetStatistics.cs" />
	49	<Compile Include="Sequence.cs" />
49	50	<Compile Include="SymbolicRegressionPoly10Problem.cs" />
50	51	<Compile Include="SantaFeAntProblem.cs" />

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/IGrammar.cs

-                      r11659
+                      r11730
     IEnumerable<char> Symbols { get; }
     IEnumerable<string> GetAlternatives(char nt);
     IEnumerable<string> GetTerminalAlternatives(char nt);
     IEnumerable<string> GetNonTerminalAlternatives(char nt);
+    IEnumerable<Sequence> GetAlternatives(char nt);
+    IEnumerable<Sequence> GetTerminalAlternatives(char nt);
+    IEnumerable<Sequence> GetNonTerminalAlternatives(char nt);
     int MinPhraseLength(string phrase);
     int MaxPhraseLength(string phrase);
     string CompleteSentenceRandomly(Random random, string phrase, int maxLen);
+    int MinPhraseLength(Sequence phrase);
+    int MaxPhraseLength(Sequence phrase);
+    Sequence CompleteSentenceRandomly(Random random, Sequence phrase, int maxLen);
     bool IsTerminal(char symbol);

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/SantaFeAntProblem.cs

r11727	r11730
112	112	private int steps;
113	113	private HeadingEnum heading;
	114
	115
114	116
115	117	public Ant() {

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/SentenceSetStatistics.cs

-                      r11727
+                      r11730
 using System.Text;
 using System.Threading.Tasks;
+using HeuristicLab.Common;
 namespace HeuristicLab.Problems.GrammaticalOptimization {
 …
     public string LastSentence { get; private set; }
     public double BestSentenceQuality { get; private set; }
+    public double BestSentenceIndex { get; private set; }
     public double FirstSentenceQuality { get; private set; }
     public double LastSentenceQuality { get; private set; }
 …
     public void AddSentence(string sentence, double quality) {
+      if (NumberOfSentences == 0) {
+      sumQualities += quality;
+      NumberOfSentences++;
+      if (NumberOfSentences == 1) {
         FirstSentence = sentence;
         FirstSentenceQuality = quality;
 …
         BestSentence = sentence;
         BestSentenceQuality = quality;
+        BestSentenceIndex = NumberOfSentences;
+      }
-      sumQualities += quality;
-      NumberOfSentences++;
       LastSentence = sentence;
 …
     public override string ToString() {
       return
         string.Format("Sentences: {0,10} avg.-quality {1,7:F5} best {2,7:F5} {3} first {4,7:F5} {5} last {6,7:F5} {7}",
+        string.Format("Sentences: {0,10} avg.-quality {1,7:F5} best {2,7:F5} {3,2} {4,10} {5} first {6,7:F5} {7} last {8,7:F5} {9}",
       NumberOfSentences, AverageQuality,
+      BestSentenceQuality, BestSentence,
+      BestSentenceQuality, BestSentenceQuality.IsAlmost(1.0)?1.0:0.0,
+      BestSentenceIndex, BestSentence,
       FirstSentenceQuality, FirstSentence,
       LastSentenceQuality, LastSentence

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/SymbolicRegressionPoly10Problem.cs

-                      r11727
+                      r11730
     private const string grammarString = @"
     G(E):
     E -> a | b | c | d | e | f | g | h | j | a+E | b+E | c+E | d+E | e+E | f+E | g+E | h+E | j+E | a*E | b*E | c*E | d*E | e*E | f*E | g*E | h*E | j*E
+    E -> a | b | c | d | e | f | g | h | i | j | a+E | b+E | c+E | d+E | e+E | f+E | g+E | h+E | i+E | j+E | a*E | b*E | c*E | d*E | e*E | f*E | g*E | h*E | i*E | j*E
     ";
 …
+        }
         // poly-10 no noise
+        /* a*b + c*d + e*f + a*g*i + c*f*j */
         y[i] = x[i][0] * x[i][1] +
                x[i][2] * x[i][3] +

branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Program.cs

-                      r11727
+                      r11730
 using System.Data;
 using System.Diagnostics;
+using System.Globalization;
 using System.Linq;
 using System.Text;
 using System.Threading.Tasks;
 using HeuristicLab.Algorithms.Bandits;
+using HeuristicLab.Algorithms.Bandits.Models;
 using HeuristicLab.Algorithms.GrammaticalOptimization;
 using HeuristicLab.Problems.GrammaticalOptimization;
 …
   class Program {
     static void Main(string[] args) {
+      // RunDemo();
+      RunGridTest();
+      CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
+      RunDemo();
+      //RunGridTest();
+    }
     private static void RunGridTest() {
+      int maxIterations = 150000;
+      var globalRandom = new Random(31415);
+      var reps = 10;
+      Parallel.ForEach(new int[] { 1, 5, 10, 100, 500, 1000 }, (randomTries) => {
+        Random localRand;
+        lock (globalRandom) {
+          localRand = new Random(globalRandom.Next());
+        }
+        var policyFactories = new Func<int, IPolicy>[]
+      int maxIterations = 100000; // for poly-10 with 50000 evaluations no successful try with hl yet
+      // var globalRandom = new Random(31415);
+      var localRandSeed = 31415;
+      var reps = 20;
+      var policyFactories = new Func<Random, int, IPolicy>[]
+        {
+          (numActions) => new RandomPolicy(localRand, numActions),
+          (numActions) => new UCB1Policy(numActions),
+          (numActions) => new UCB1TunedPolicy(numActions),
+          (numActions) => new UCBNormalPolicy(numActions),
+          (numActions) => new EpsGreedyPolicy(localRand, numActions, 0.01),
+          (numActions) => new EpsGreedyPolicy(localRand, numActions, 0.05),
+          (numActions) => new EpsGreedyPolicy(localRand, numActions, 0.1),
+          (numActions) => new EpsGreedyPolicy(localRand, numActions, 0.2),
+          (numActions) => new EpsGreedyPolicy(localRand, numActions, 0.5),
+          (numActions) => new GaussianThompsonSamplingPolicy(localRand, numActions),
+          (numActions) => new BernoulliThompsonSamplingPolicy(localRand, numActions)
+          (rand, numActions) => new GaussianThompsonSamplingPolicy(rand, numActions),
+          (rand, numActions) => new BernoulliThompsonSamplingPolicy(rand, numActions),
+          (rand, numActions) => new RandomPolicy(rand, numActions),
+          (rand, numActions) => new EpsGreedyPolicy(rand, numActions, 0.01),
+          (rand, numActions) => new EpsGreedyPolicy(rand, numActions, 0.05),
+          (rand, numActions) => new EpsGreedyPolicy(rand, numActions, 0.1),
+          (rand, numActions) => new EpsGreedyPolicy(rand, numActions, 0.2),
+          (rand, numActions) => new EpsGreedyPolicy(rand, numActions, 0.5),
+          (rand, numActions) => new UCTPolicy(numActions, 0.1),
+          (rand, numActions) => new UCTPolicy(numActions, 0.5),
+          (rand, numActions) => new UCTPolicy(numActions, 1),
+          (rand, numActions) => new UCTPolicy(numActions, 2),
+          (rand, numActions) => new UCTPolicy(numActions, 5),
+          (rand, numActions) => new UCTPolicy(numActions, 10),
+          (rand, numActions) => new UCB1Policy(numActions),
+          (rand, numActions) => new UCB1TunedPolicy(numActions),
+          (rand, numActions) => new UCBNormalPolicy(numActions),
+          (rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 0.1),
+          (rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 0.5),
+          (rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 1),
+          (rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 5),
+          (rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 10),
+          (rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 20),
+          (rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 100),
+          (rand, numActions) => new ChernoffIntervalEstimationPolicy(numActions, 0.01),
+          (rand, numActions) => new ChernoffIntervalEstimationPolicy(numActions, 0.05),
+          (rand, numActions) => new ChernoffIntervalEstimationPolicy(numActions, 0.1),
+          (rand, numActions) => new ChernoffIntervalEstimationPolicy(numActions, 0.2),
+          (rand, numActions) => new ThresholdAscentPolicy(numActions, 10, 0.01),
+          (rand, numActions) => new ThresholdAscentPolicy(numActions, 10, 0.05),
+          (rand, numActions) => new ThresholdAscentPolicy(numActions, 10, 0.1),
+          (rand, numActions) => new ThresholdAscentPolicy(numActions, 10, 0.2),
+          (rand, numActions) => new ThresholdAscentPolicy(numActions, 100, 0.01),
+          (rand, numActions) => new ThresholdAscentPolicy(numActions, 100, 0.05),
+          (rand, numActions) => new ThresholdAscentPolicy(numActions, 100, 0.1),
+          (rand, numActions) => new ThresholdAscentPolicy(numActions, 100, 0.2),
+          (rand, numActions) => new ThresholdAscentPolicy(numActions, 1000, 0.01),
+          (rand, numActions) => new ThresholdAscentPolicy(numActions, 1000, 0.05),
+          (rand, numActions) => new ThresholdAscentPolicy(numActions, 1000, 0.1),
+          (rand, numActions) => new ThresholdAscentPolicy(numActions, 1000, 0.2),
+          (rand, numActions) => new ThresholdAscentPolicy(numActions, 5000, 0.01),
+          (rand, numActions) => new ThresholdAscentPolicy(numActions, 10000, 0.01),
         };
+        foreach (var policyFactory in policyFactories)
+          for (int i = 0; i < reps; i++) {
+      var tasks = new List<Task>();
+      foreach (var randomTries in new int[] { 1, 10, /* 5, 100 /*, 500, 1000 */}) {
+        foreach (var policyFactory in policyFactories) {
+          var myPolicyFactory = policyFactory;
+          var myRandomTries = randomTries;
+          var localRand = new Random(localRandSeed);
+          var options = new ParallelOptions();
+          options.MaxDegreeOfParallelism = 1;
+          Parallel.For(0, reps, options, (i) => {
+            //var t = Task.Run(() => {
+            Random myLocalRand;
+            lock (localRand)
+              myLocalRand = new Random(localRand.Next());
+            //for (int i = 0; i < reps; i++) {
             int iterations = 0;
             var sw = new Stopwatch();
             var globalStatistics = new SentenceSetStatistics();
             // var problem = new SymbolicRegressionPoly10Problem();
             var problem = new SantaFeAntProblem();
+            var problem = new SymbolicRegressionPoly10Problem();
+            //var problem = new SantaFeAntProblem();
             //var problem = new PalindromeProblem();
             //var problem = new HardPalindromeProblem();
             //var problem = new RoyalPairProblem();
             //var problem = new EvenParityProblem();
             var alg = new MctsSampler(problem, 17, localRand, randomTries, policyFactory);
+            var alg = new MctsSampler(problem, 25, myLocalRand, myRandomTries, myPolicyFactory);
             //var alg = new ExhaustiveBreadthFirstSearch(problem, 25);
             //var alg = new AlternativesContextSampler(problem, 25);
 …
               globalStatistics.AddSentence(sentence, quality);
               if (iterations % 10000 == 0) {
                 Console.WriteLine("{0} {1} {2}", randomTries, policyFactory(1), globalStatistics);
+                Console.WriteLine("{0,4} {1,7} {2,5} {3,25} {4}", alg.treeDepth, alg.treeSize, myRandomTries, myPolicyFactory(myLocalRand, 1), globalStatistics);
+              }
             };
 …
             sw.Stop();
+          }
+      });
+            //Console.WriteLine("{0,5} {1} {2}", randomTries, policyFactory(1), globalStatistics);
+            //}
+            //});
+            //tasks.Add(t);
+          });
+        }
+      }
+      //Task.WaitAll(tasks.ToArray());
+    }
     private static void RunDemo() {
+      // TODO: implement threshold ascent
+      // TODO: implement inspection for MCTS
+      // TODO: warum funktioniert die alte Implementierung von GaussianThompson besser für SantaFe als alte? Siehe Vergleich: alte vs. neue implementierung GaussianThompsonSampling
+      // TODO: why does GaussianThompsonSampling work so well with MCTS for the artificial ant problem?
+      // TODO: wie kann ich sampler noch vergleichen bzw. was kann man messen um die qualität des samplers abzuschätzen (bis auf qualität und iterationen bis zur besten lösung) => ziel schnellere iterationen zu gutem ergebnis
+      // TODO: likelihood für R=1 bei Gaussian oder GaussianMixture einfach berechenbar?
+      // TODO: research thompson sampling for max bandit?
+      // TODO: ausführlicher test von strategien für k-armed max bandit
+      // TODO: verify TA implementation using example from the original paper
+      // TODO: reference HL.ProblemInstances and try on tower dataset
+      // TODO: compare results for different policies also for the symb-reg problem
+      // TODO: separate policy from MCTS tree data structure to allow sharing of information over disconnected parts of the tree (semantic equivalence)
+      // TODO: implement thompson sampling for gaussian mixture models
+      // TODO: implement inspection for MCTS (eventuell interactive command line für statistiken aus dem baum anzeigen)
+      // TODO: implement ACO-style bandit policy
+      // TODO: implement sequences that can be manipulated in-place (instead of strings), alternatives are also stored as sequences, for a sequence the index of the first NT-symb can be stored
+      // TODO: gleichzeitige modellierung von transformierter zielvariable (y, 1/y, log(y), exp(y), sqrt(y), ...)
+      // TODO: vergleich bei complete-randomly möglichst kurze sätze generieren vs. einfach zufällig alternativen wählen
+      // TODO: reward discounting (für veränderliche reward distributions über zeit). speziellen unit-test dafür erstellen
       int maxIterations = 10000000;
 …
       string bestSentence = "";
       var globalStatistics = new SentenceSetStatistics();
       var random = new Random(31415);
       // var problem = new SymbolicRegressionPoly10Problem();
+      var random = new Random();
+      //var problem = new SymbolicRegressionPoly10Problem();
       var problem = new SantaFeAntProblem();
       //var problem = new PalindromeProblem();
 …
       //var problem = new RoyalPairProblem();
       //var problem = new EvenParityProblem();
+      var alg = new MctsSampler(problem, 17, random);
+      //var alg = new ExhaustiveBreadthFirstSearch(problem, 25);
+      //var alg = new AlternativesContextSampler(problem, 25);
+      //var alg = new MctsSampler(problem, 17, random, 1, (rand, numActions) => new GenericThompsonSamplingPolicy(rand, numActions, new GaussianModel(numActions, 0.5, 10)));
+      //var alg = new ExhaustiveBreadthFirstSearch(problem, 17);
+      //var alg = new AlternativesContextSampler(problem, random, 17, 4, (rand, numActions) => new RandomPolicy(rand, numActions));
+      //var alg = new ExhaustiveDepthFirstSearch(problem, 17);
+      // var alg = new AlternativesSampler(problem, 17);
+      var alg = new RandomSearch(problem, random, 17);
       alg.FoundNewBestSolution += (sentence, quality) => {
 …
         iterations++;
         globalStatistics.AddSentence(sentence, quality);
+        if (iterations % 1000 == 0) {
+          //alg.PrintStats();
+        }
         if (iterations % 10000 == 0) {
           //Console.WriteLine("{0,10} {1,10:F5} {2,10:F5} {3}", iterations, bestQuality, quality, sentence);
+          Console.WriteLine(globalStatistics.ToString());
+          //Console.WriteLine("{0,4} {1,7} {2}", alg.treeDepth, alg.treeSize, globalStatistics);
+          Console.WriteLine(globalStatistics);
+        }
       };

Context Navigation

Legend:

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Bandits/BernoulliBandit.cs

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Bandits/TruncatedNormalBandit.cs

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/HeuristicLab.Algorithms.Bandits.csproj

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/IPolicy.cs

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/BanditPolicy.cs

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/BernoulliThompsonSamplingPolicy.cs

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/EpsGreedyPolicy.cs

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/Exp3Policy.cs

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/GaussianThompsonSamplingPolicy.cs

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/RandomPolicy.cs

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/UCB1Policy.cs

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/UCB1TunedPolicy.cs

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/UCBNormalPolicy.cs

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/AlternativesContextSampler.cs

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/AlternativesSampler.cs

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/ExhaustiveBreadthFirstSearch.cs

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/ExhaustiveDepthFirstSearch.cs

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/MctsSampler.cs

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/RandomSearch.cs

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Common/Extensions.cs

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization.Test/HeuristicLab.Problems.GrammaticalOptimization.Test.csproj

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization.Test/TestBanditPolicies.cs

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization.Test/TestInstances.cs

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization.Test/TestSolvers.cs

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/Grammar.cs

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization.csproj

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/IGrammar.cs

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/SantaFeAntProblem.cs

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/SentenceSetStatistics.cs

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/SymbolicRegressionPoly10Problem.cs

branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Program.cs

Download in other formats: