Changeset 11727
- Timestamp:
- 12/29/14 11:02:36 (10 years ago)
- Location:
- branches/HeuristicLab.Problems.GrammaticalOptimization
- Files:
-
- 12 added
- 29 edited
- 1 moved
Legend:
- Unmodified
- Added
- Removed
-
TabularUnified branches/HeuristicLab.Problems.GrammaticalOptimization/GrammaticalOptimization.sln ¶
r11708 r11727 11 11 EndProject 12 12 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "HeuristicLab.Algorithms.Bandits", "HeuristicLab.Algorithms.Bandits\HeuristicLab.Algorithms.Bandits.csproj", "{24408F7D-EE0F-4886-A08B-EC324D662E47}" 13 EndProject 14 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "HeuristicLab.Common", "HeuristicLab.Common\HeuristicLab.Common.csproj", "{3A2FBBCB-F9DF-4970-87F3-F13337D941AD}" 13 15 EndProject 14 16 Global … … 38 40 {24408F7D-EE0F-4886-A08B-EC324D662E47}.Release|Any CPU.ActiveCfg = Release|Any CPU 39 41 {24408F7D-EE0F-4886-A08B-EC324D662E47}.Release|Any CPU.Build.0 = Release|Any CPU 42 {3A2FBBCB-F9DF-4970-87F3-F13337D941AD}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 43 {3A2FBBCB-F9DF-4970-87F3-F13337D941AD}.Debug|Any CPU.Build.0 = Debug|Any CPU 44 {3A2FBBCB-F9DF-4970-87F3-F13337D941AD}.Release|Any CPU.ActiveCfg = Release|Any CPU 45 {3A2FBBCB-F9DF-4970-87F3-F13337D941AD}.Release|Any CPU.Build.0 = Release|Any CPU 40 46 EndGlobalSection 41 47 GlobalSection(SolutionProperties) = preSolution 42 48 HideSolutionNode = FALSE 43 49 EndGlobalSection 50 GlobalSection(Performance) = preSolution 51 HasPerformanceSessions = true 52 EndGlobalSection 44 53 EndGlobal -
TabularUnified branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/HeuristicLab.Algorithms.Bandits.csproj ¶
r11711 r11727 43 43 <Compile Include="Bandits\TruncatedNormalBandit.cs" /> 44 44 <Compile Include="Policies\BanditPolicy.cs" /> 45 <Compile Include="Policies\BernoulliThompsonSamplingPolicy.cs" /> 46 <Compile Include="Policies\GaussianThompsonSamplingPolicy.cs" /> 47 <Compile Include="Policies\Exp3Policy.cs" /> 45 48 <Compile Include="Policies\EpsGreedyPolicy.cs" /> 46 49 <Compile Include="Policies\RandomPolicy.cs" /> … … 51 54 <Compile Include="Properties\AssemblyInfo.cs" /> 52 55 </ItemGroup> 53 <ItemGroup /> 56 <ItemGroup> 57 <ProjectReference Include="..\HeuristicLab.Common\HeuristicLab.Common.csproj"> 58 <Project>{3A2FBBCB-F9DF-4970-87F3-F13337D941AD}</Project> 59 <Name>HeuristicLab.Common</Name> 60 </ProjectReference> 61 </ItemGroup> 54 62 <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" /> 55 63 <!-- To modify your build process, add your task inside one of the targets below and uncomment it. -
TabularUnified branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/IPolicy.cs ¶
r11708 r11727 6 6 7 7 namespace HeuristicLab.Algorithms.Bandits { 8 // this interface represents a policy for reinforcement learning 8 9 public interface IPolicy { 9 int SelectAction(); 10 void UpdateReward(int action, double reward); 10 IEnumerable<int> Actions { get; } 11 int SelectAction(); // action selection ... 12 void UpdateReward(int action, double reward); // ... and reward update are defined as usual 13 14 // policies must also support disabling of potential actions 15 // for instance if we know that an action in a state has a deterministic 16 // reward we need to sample it only once 17 // it is necessary to sample an action only once 18 void DisableAction(int action); 19 20 // reset causes the policy to be reinitialized to it's initial state (as after constructor-call) 11 21 void Reset(); 12 22 } -
TabularUnified branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/BanditPolicy.cs ¶
r11711 r11727 1 1 using System; 2 2 using System.Collections.Generic; 3 using System.Diagnostics; 3 4 using System.Linq; 4 5 using System.Text; … … 7 8 namespace HeuristicLab.Algorithms.Bandits { 8 9 public abstract class BanditPolicy : IPolicy { 9 public int NumActions { get; private set; } 10 public BanditPolicy(int numActions) { 11 this.NumActions = numActions; 10 public IEnumerable<int> Actions { get; private set; } 11 private readonly int numInitialActions; 12 13 protected BanditPolicy(int numActions) { 14 this.numInitialActions = numActions; 15 Actions = Enumerable.Range(0, numActions).ToArray(); 12 16 } 13 17 14 18 public abstract int SelectAction(); 15 19 public abstract void UpdateReward(int action, double reward); 16 public abstract void Reset(); 20 21 public virtual void DisableAction(int action) { 22 Debug.Assert(Actions.Contains(action)); 23 24 Actions = Actions.Where(a => a != action).ToArray(); 25 } 26 27 public virtual void Reset() { 28 Actions = Enumerable.Range(0, numInitialActions).ToArray(); 29 } 17 30 } 18 31 } -
TabularUnified branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/EpsGreedyPolicy.cs ¶
r11711 r11727 1 1 using System; 2 2 using System.Collections.Generic; 3 using System.Diagnostics; 3 4 using System.Linq; 4 5 using System.Text; … … 11 12 private readonly int[] tries; 12 13 private readonly double[] sumReward; 14 private readonly RandomPolicy randomPolicy; 15 13 16 public EpsGreedyPolicy(Random random, int numActions, double eps) 14 17 : base(numActions) { 15 18 this.random = random; 16 19 this.eps = eps; 17 this.tries = new int[NumActions]; 18 this.sumReward = new double[NumActions]; 20 this.randomPolicy = new RandomPolicy(random, numActions); 21 this.tries = new int[numActions]; 22 this.sumReward = new double[numActions]; 19 23 } 20 24 21 25 public override int SelectAction() { 26 Debug.Assert(Actions.Any()); 22 27 if (random.NextDouble() > eps) { 23 28 // select best 24 29 var maxReward = double.NegativeInfinity; 25 30 int bestAction = -1; 26 for (int i = 0; i < NumActions; i++) {27 if (tries[ i] == 0) return i;28 var avgReward = sumReward[ i] / tries[i];31 foreach (var a in Actions) { 32 if (tries[a] == 0) return a; 33 var avgReward = sumReward[a] / tries[a]; 29 34 if (maxReward < avgReward) { 30 35 maxReward = avgReward; 31 bestAction = i;36 bestAction = a; 32 37 } 33 38 } 39 Debug.Assert(bestAction >= 0); 34 40 return bestAction; 35 41 } else { 36 42 // select random 37 return random .Next(NumActions);43 return randomPolicy.SelectAction(); 38 44 } 39 45 } 40 46 public override void UpdateReward(int action, double reward) { 47 Debug.Assert(Actions.Contains(action)); 48 49 randomPolicy.UpdateReward(action, reward); // does nothing 41 50 tries[action]++; 42 51 sumReward[action] += reward; 43 52 } 53 54 public override void DisableAction(int action) { 55 base.DisableAction(action); 56 randomPolicy.DisableAction(action); 57 sumReward[action] = 0; 58 tries[action] = -1; 59 } 60 44 61 public override void Reset() { 62 base.Reset(); 63 randomPolicy.Reset(); 45 64 Array.Clear(tries, 0, tries.Length); 46 65 Array.Clear(sumReward, 0, sumReward.Length); -
TabularUnified branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/RandomPolicy.cs ¶
r11711 r11727 1 1 using System; 2 2 using System.Collections.Generic; 3 using System.Diagnostics; 3 4 using System.Linq; 4 5 using System.Text; 5 6 using System.Threading.Tasks; 7 using HeuristicLab.Common; 6 8 7 9 namespace HeuristicLab.Algorithms.Bandits { 8 10 public class RandomPolicy : BanditPolicy { 9 11 private readonly Random random; 12 10 13 public RandomPolicy(Random random, int numActions) 11 14 : base(numActions) { … … 14 17 15 18 public override int SelectAction() { 16 return random.Next(NumActions); 19 Debug.Assert(Actions.Any()); 20 return Actions.SelectRandom(random); 17 21 } 18 22 public override void UpdateReward(int action, double reward) { 19 23 // do nothing 20 24 } 21 public override void Reset() { 22 // do nothing 23 } 25 24 26 } 25 27 } -
TabularUnified branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/UCB1Policy.cs ¶
r11711 r11727 1 1 using System; 2 2 using System.Collections.Generic; 3 using System.Diagnostics; 3 4 using System.Linq; 4 5 using System.Text; … … 12 13 public UCB1Policy(int numActions) 13 14 : base(numActions) { 14 this.tries = new int[ NumActions];15 this.sumReward = new double[ NumActions];15 this.tries = new int[numActions]; 16 this.sumReward = new double[numActions]; 16 17 } 17 18 … … 19 20 int bestAction = -1; 20 21 double bestQ = double.NegativeInfinity; 21 for (int i = 0; i < NumActions; i++) {22 if (tries[ i] == 0) return i;23 var q = sumReward[ i] / tries[i] + Math.Sqrt((2 * Math.Log(totalTries)) / tries[i]);22 foreach (var a in Actions) { 23 if (tries[a] == 0) return a; 24 var q = sumReward[a] / tries[a] + Math.Sqrt((2 * Math.Log(totalTries)) / tries[a]); 24 25 if (q > bestQ) { 25 26 bestQ = q; 26 bestAction = i;27 bestAction = a; 27 28 } 28 29 } … … 30 31 } 31 32 public override void UpdateReward(int action, double reward) { 33 Debug.Assert(Actions.Contains(action)); 32 34 totalTries++; 33 35 tries[action]++; 34 36 sumReward[action] += reward; 35 37 } 38 39 public override void DisableAction(int action) { 40 base.DisableAction(action); 41 totalTries -= tries[action]; 42 tries[action] = -1; 43 sumReward[action] = 0; 44 } 45 36 46 public override void Reset() { 47 base.Reset(); 37 48 totalTries = 0; 38 49 Array.Clear(tries, 0, tries.Length); -
TabularUnified branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/UCB1TunedPolicy.cs ¶
r11711 r11727 1 1 using System; 2 2 using System.Collections.Generic; 3 using System.Diagnostics; 3 4 using System.Linq; 4 5 using System.Text; … … 13 14 public UCB1TunedPolicy(int numActions) 14 15 : base(numActions) { 15 this.tries = new int[ NumActions];16 this.sumReward = new double[ NumActions];17 this.sumSqrReward = new double[ NumActions];16 this.tries = new int[numActions]; 17 this.sumReward = new double[numActions]; 18 this.sumSqrReward = new double[numActions]; 18 19 } 19 20 … … 25 26 26 27 public override int SelectAction() { 28 Debug.Assert(Actions.Any()); 27 29 int bestAction = -1; 28 30 double bestQ = double.NegativeInfinity; 29 for (int i = 0; i < NumActions; i++) {30 if (tries[ i] == 0) return i;31 var q = sumReward[ i] / tries[i] + Math.Sqrt((Math.Log(totalTries) / tries[i]) * Math.Min(1.0 / 4, V(i))); // 1/4 is upper bound of bernoulli distributed variable31 foreach (var a in Actions) { 32 if (tries[a] == 0) return a; 33 var q = sumReward[a] / tries[a] + Math.Sqrt((Math.Log(totalTries) / tries[a]) * Math.Min(1.0 / 4, V(a))); // 1/4 is upper bound of bernoulli distributed variable 32 34 if (q > bestQ) { 33 35 bestQ = q; 34 bestAction = i;36 bestAction = a; 35 37 } 36 38 } … … 38 40 } 39 41 public override void UpdateReward(int action, double reward) { 42 Debug.Assert(Actions.Contains(action)); 40 43 totalTries++; 41 44 tries[action]++; … … 43 46 sumSqrReward[action] += reward * reward; 44 47 } 48 49 public override void DisableAction(int action) { 50 base.DisableAction(action); 51 totalTries -= tries[action]; 52 tries[action] = -1; 53 sumReward[action] = 0; 54 sumSqrReward[action] = 0; 55 } 56 45 57 public override void Reset() { 58 base.Reset(); 46 59 totalTries = 0; 47 60 Array.Clear(tries, 0, tries.Length); -
TabularUnified branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/UCBNormalPolicy.cs ¶
r11711 r11727 1 1 using System; 2 2 using System.Collections.Generic; 3 using System.Diagnostics; 3 4 using System.Linq; 4 5 using System.Text; … … 13 14 public UCBNormalPolicy(int numActions) 14 15 : base(numActions) { 15 this.tries = new int[ NumActions];16 this.sumReward = new double[ NumActions];17 this.sumSqrReward = new double[ NumActions];16 this.tries = new int[numActions]; 17 this.sumReward = new double[numActions]; 18 this.sumSqrReward = new double[numActions]; 18 19 } 19 20 20 private double V(int arm) {21 var s = tries[arm];22 return sumSqrReward[arm] / s - Math.Pow(sumReward[arm] / s, 2) + Math.Sqrt(2 * Math.Log(totalTries) / s);23 }24 25 26 21 public override int SelectAction() { 22 Debug.Assert(Actions.Any()); 27 23 int bestAction = -1; 28 24 double bestQ = double.NegativeInfinity; 29 for (int i = 0; i < NumActions; i++) {30 if (totalTries == 0 || tries[ i] == 0 || tries[i] < Math.Ceiling(8 * Math.Log(totalTries))) return i;31 var avgReward = sumReward[ i] / tries[i];25 foreach (var a in Actions) { 26 if (totalTries == 0 || tries[a] == 0 || tries[a] < Math.Ceiling(8 * Math.Log(totalTries))) return a; 27 var avgReward = sumReward[a] / tries[a]; 32 28 var q = avgReward 33 + Math.Sqrt(16 * ((sumSqrReward[ i] - tries[i] * Math.Pow(avgReward, 2)) / (tries[i] - 1)) * (Math.Log(totalTries - 1) / tries[i]));29 + Math.Sqrt(16 * ((sumSqrReward[a] - tries[a] * Math.Pow(avgReward, 2)) / (tries[a] - 1)) * (Math.Log(totalTries - 1) / tries[a])); 34 30 if (q > bestQ) { 35 31 bestQ = q; 36 bestAction = i;32 bestAction = a; 37 33 } 38 34 } … … 40 36 } 41 37 public override void UpdateReward(int action, double reward) { 38 Debug.Assert(Actions.Contains(action)); 42 39 totalTries++; 43 40 tries[action]++; … … 45 42 sumSqrReward[action] += reward * reward; 46 43 } 44 45 public override void DisableAction(int action) { 46 base.DisableAction(action); 47 totalTries -= tries[action]; 48 tries[action] = -1; 49 sumReward[action] = 0; 50 sumSqrReward[action] = 0; 51 } 52 47 53 public override void Reset() { 54 base.Reset(); 48 55 totalTries = 0; 49 56 Array.Clear(tries, 0, tries.Length); -
TabularUnified branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/ExhaustiveBreadthFirstSearch.cs ¶
r11708 r11727 12 12 private readonly int maxLen; 13 13 private readonly Queue<string> bfsQueue = new Queue<string>(); 14 private readonly IProblem problem; 14 15 15 public ExhaustiveBreadthFirstSearch(int maxLen) { 16 public ExhaustiveBreadthFirstSearch(IProblem problem, int maxLen) { 17 this.problem = problem; 16 18 this.maxLen = maxLen; 17 19 } 18 20 19 public void Run( IProblem problem,int maxIterations) {21 public void Run(int maxIterations) { 20 22 double bestQuality = double.MinValue; 21 23 bfsQueue.Enqueue(problem.Grammar.SentenceSymbol.ToString()); … … 24 26 for (int i = 0; sentenceEnumerator.MoveNext() && i < maxIterations; i++) { 25 27 var sentence = sentenceEnumerator.Current; 26 var quality = problem.Evaluate(sentence) ;28 var quality = problem.Evaluate(sentence) / problem.GetBestKnownQuality(maxLen); 27 29 RaiseSolutionEvaluated(sentence, quality); 28 30 … … 39 41 var phrase = bfsQueue.Dequeue(); 40 42 41 var nt = phrase.First(grammar.IsNonTerminal); 42 var ntIdx = phrase.IndexOf(nt); // TODO perf 43 char nt; 44 int ntIdx; 45 Grammar.FindFirstNonTerminal(grammar, phrase, out nt, out ntIdx); 43 46 var alts = grammar.GetAlternatives(nt); 44 47 foreach (var alt in alts) { -
TabularUnified branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/ExhaustiveDepthFirstSearch.cs ¶
r11708 r11727 24 24 for (int i = 0; sentenceEnumerator.MoveNext() && i < maxIterations; i++) { 25 25 var sentence = sentenceEnumerator.Current; 26 var quality = problem.Evaluate(sentence) ;26 var quality = problem.Evaluate(sentence) / problem.GetBestKnownQuality(maxLen); 27 27 RaiseSolutionEvaluated(sentence, quality); 28 28 … … 39 39 var phrase = stack.Pop(); 40 40 41 var nt = phrase.First(grammar.IsNonTerminal); 42 var ntIdx = phrase.IndexOf(nt); // TODO perf 41 char nt; 42 int ntIdx; 43 Grammar.FindFirstNonTerminal(grammar, phrase, out nt, out ntIdx); 43 44 var alts = grammar.GetAlternatives(nt); 44 45 foreach (var alt in alts) { -
TabularUnified branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization.csproj ¶
r11690 r11727 43 43 </ItemGroup> 44 44 <ItemGroup> 45 <Compile Include="AlternativesSampler.cs" /> 46 <Compile Include="AlternativesContextSampler.cs" /> 47 <Compile Include="MctsSampler.cs" /> 45 48 <Compile Include="ExhaustiveDepthFirstSearch.cs" /> 46 49 <Compile Include="ExhaustiveBreadthFirstSearch.cs" /> … … 49 52 </ItemGroup> 50 53 <ItemGroup> 54 <ProjectReference Include="..\HeuristicLab.Algorithms.Bandits\HeuristicLab.Algorithms.Bandits.csproj"> 55 <Project>{24408F7D-EE0F-4886-A08B-EC324D662E47}</Project> 56 <Name>HeuristicLab.Algorithms.Bandits</Name> 57 </ProjectReference> 58 <ProjectReference Include="..\HeuristicLab.Common\HeuristicLab.Common.csproj"> 59 <Project>{3A2FBBCB-F9DF-4970-87F3-F13337D941AD}</Project> 60 <Name>HeuristicLab.Common</Name> 61 </ProjectReference> 51 62 <ProjectReference Include="..\HeuristicLab.Problems.GrammaticalOptimization\HeuristicLab.Problems.GrammaticalOptimization.csproj"> 52 63 <Project>{cb9dccf6-667e-4a13-b82d-dbd6b45a045e}</Project> -
TabularUnified branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Common/Extensions.cs ¶
r11689 r11727 5 5 using System.Threading.Tasks; 6 6 7 namespace HeuristicLab. Problems.GrammaticalOptimization {7 namespace HeuristicLab.Common { 8 8 public static class Extensions { 9 9 public static bool IsAlmost(this double x, double y) { -
TabularUnified branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization.Test/TestBanditPolicies.cs ¶
r11710 r11727 15 15 var seedForPolicy = globalRand.Next(); 16 16 var nArms = 10; 17 //Console.WriteLine("Exp3 (gamma=0.01)"); 18 //TestPolicyBernoulli(globalRand, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 1)); 19 //Console.WriteLine("Exp3 (gamma=0.05)"); 20 //estPolicyBernoulli(globalRand, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 1)); 21 Console.WriteLine("Thompson (Bernoulli)"); 22 TestPolicyBernoulli(globalRand, nArms, new BernoulliThompsonSamplingPolicy(new Random(seedForPolicy), nArms)); 17 23 Console.WriteLine("Random"); 18 TestPolicyBernoulli(globalRand, nArms, new RandomPolicy(new Random(seedForPolicy), 10));24 TestPolicyBernoulli(globalRand, nArms, new RandomPolicy(new Random(seedForPolicy), nArms)); 19 25 Console.WriteLine("UCB1"); 20 TestPolicyBernoulli(globalRand, nArms, new UCB1Policy( 10));26 TestPolicyBernoulli(globalRand, nArms, new UCB1Policy(nArms)); 21 27 Console.WriteLine("UCB1Tuned"); 22 TestPolicyBernoulli(globalRand, nArms, new UCB1TunedPolicy( 10));28 TestPolicyBernoulli(globalRand, nArms, new UCB1TunedPolicy(nArms)); 23 29 Console.WriteLine("UCB1Normal"); 24 TestPolicyBernoulli(globalRand, nArms, new UCBNormalPolicy( 10));30 TestPolicyBernoulli(globalRand, nArms, new UCBNormalPolicy(nArms)); 25 31 Console.WriteLine("Eps(0.01)"); 26 TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.01));32 TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.01)); 27 33 Console.WriteLine("Eps(0.05)"); 28 TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.05));29 Console.WriteLine("Eps(0.1)");30 TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.1));31 Console.WriteLine("Eps(0.2)");32 TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.2));33 Console.WriteLine("Eps(0.5)");34 TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.5));34 TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.05)); 35 //Console.WriteLine("Eps(0.1)"); 36 //TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.1)); 37 //Console.WriteLine("Eps(0.2)"); 38 //TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.2)); 39 //Console.WriteLine("Eps(0.5)"); 40 //TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.5)); 35 41 } 36 42 [TestMethod] … … 40 46 var seedForPolicy = globalRand.Next(); 41 47 var nArms = 10; 48 Console.WriteLine("Thompson (Gaussian)"); 49 TestPolicyNormal(globalRand, nArms, new GaussianThompsonSamplingPolicy(new Random(seedForPolicy), nArms)); 42 50 Console.WriteLine("Random"); 43 TestPolicyNormal(globalRand, nArms, new RandomPolicy(new Random(seedForPolicy), 10));51 TestPolicyNormal(globalRand, nArms, new RandomPolicy(new Random(seedForPolicy), nArms)); 44 52 Console.WriteLine("UCB1"); 45 TestPolicyNormal(globalRand, nArms, new UCB1Policy( 10));53 TestPolicyNormal(globalRand, nArms, new UCB1Policy(nArms)); 46 54 Console.WriteLine("UCB1Tuned"); 47 TestPolicyNormal(globalRand, nArms, new UCB1TunedPolicy( 10));55 TestPolicyNormal(globalRand, nArms, new UCB1TunedPolicy(nArms)); 48 56 Console.WriteLine("UCB1Normal"); 49 TestPolicyNormal(globalRand, nArms, new UCBNormalPolicy(10)); 57 TestPolicyNormal(globalRand, nArms, new UCBNormalPolicy(nArms)); 58 //Console.WriteLine("Exp3 (gamma=0.01)"); 59 //TestPolicyNormal(globalRand, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 0.01)); 60 //Console.WriteLine("Exp3 (gamma=0.05)"); 61 //TestPolicyNormal(globalRand, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 0.05)); 50 62 Console.WriteLine("Eps(0.01)"); 51 TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.01));63 TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.01)); 52 64 Console.WriteLine("Eps(0.05)"); 53 TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.05));54 Console.WriteLine("Eps(0.1)");55 TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.1));56 Console.WriteLine("Eps(0.2)");57 TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.2));58 Console.WriteLine("Eps(0.5)");59 TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.5));65 TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.05)); 66 //Console.WriteLine("Eps(0.1)"); 67 //TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.1)); 68 //Console.WriteLine("Eps(0.2)"); 69 //TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.2)); 70 //Console.WriteLine("Eps(0.5)"); 71 //TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.5)); 60 72 } 61 73 -
TabularUnified branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization.Test/TestSolvers.cs ¶
r11708 r11727 57 57 58 58 private void TestBFS(IProblem prob, int len, int numExpectedSols) { 59 var solver = new ExhaustiveBreadthFirstSearch( len);59 var solver = new ExhaustiveBreadthFirstSearch(prob, len); 60 60 int numSols = 0; 61 61 62 62 solver.SolutionEvaluated += (s, d) => { numSols++; }; 63 63 64 solver.Run( prob,int.MaxValue);64 solver.Run(int.MaxValue); 65 65 Assert.AreEqual(numExpectedSols, numSols); 66 66 } -
TabularUnified branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/EvenParityProblem.cs ¶
r11659 r11727 50 50 return nCorrect; 51 51 } 52 53 public string Hash(string terminalPhrase) { 54 return terminalPhrase; 55 } 52 56 } 53 57 } -
TabularUnified branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/ExpressionInterpreter.cs ¶
r11659 r11727 6 6 using System.Text; 7 7 using System.Threading.Tasks; 8 using HeuristicLab.Common; 8 9 9 10 namespace HeuristicLab.Problems.GrammaticalOptimization { -
TabularUnified branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/Grammar.cs ¶
r11659 r11727 8 8 using System.Text.RegularExpressions; 9 9 using System.Xml.Linq; 10 using HeuristicLab.Common; 10 11 11 12 namespace HeuristicLab.Problems.GrammaticalOptimization { … … 157 158 while (!done) { 158 159 int ntIdx; char nt; 159 FindFirstNonTerminal( phrase, out nt, out ntIdx);160 FindFirstNonTerminal(this, phrase, out nt, out ntIdx); 160 161 161 162 int maxLenOfReplacement = maxLen - (phrase.Length - 1); // replacing aAb with maxLen 4 means we can only use alternatives with a minPhraseLen <= 2 … … 175 176 } 176 177 177 p rivate void FindFirstNonTerminal(string phrase, out char nt, out int ntIdx) {178 public static void FindFirstNonTerminal(IGrammar g, string phrase, out char nt, out int ntIdx) { 178 179 ntIdx = 0; 179 while (ntIdx < phrase.Length && IsTerminal(phrase[ntIdx])) ntIdx++;180 while (ntIdx < phrase.Length && g.IsTerminal(phrase[ntIdx])) ntIdx++; 180 181 if (ntIdx >= phrase.Length) { 181 182 ntIdx = -1; -
TabularUnified branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/HardPalindromeProblem.cs ¶
r11659 r11727 39 39 } 40 40 41 public string Hash(string terminalPhrase) { 42 return terminalPhrase; 43 } 41 44 } 42 45 } -
TabularUnified branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization.csproj ¶
r11659 r11727 44 44 <ItemGroup> 45 45 <Compile Include="ExpressionInterpreter.cs" /> 46 <Compile Include="Extensions.cs" />47 46 <Compile Include="Grammar.cs" /> 48 47 <Compile Include="EvenParityProblem.cs" /> 48 <Compile Include="SentenceSetStatistics.cs" /> 49 49 <Compile Include="SymbolicRegressionPoly10Problem.cs" /> 50 50 <Compile Include="SantaFeAntProblem.cs" /> … … 59 59 <Compile Include="RoyalTreeProblem.cs" /> 60 60 </ItemGroup> 61 <ItemGroup> 62 <ProjectReference Include="..\HeuristicLab.Common\HeuristicLab.Common.csproj"> 63 <Project>{3A2FBBCB-F9DF-4970-87F3-F13337D941AD}</Project> 64 <Name>HeuristicLab.Common</Name> 65 </ProjectReference> 66 </ItemGroup> 61 67 <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" /> 62 68 <!-- To modify your build process, add your task inside one of the targets below and uncomment it. -
TabularUnified branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/IProblem.cs ¶
r11659 r11727 9 9 IGrammar Grammar { get; } 10 10 double Evaluate(string sentence); 11 string Hash(string terminalPhrase); 11 12 } 12 13 } -
TabularUnified branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/PalindromeProblem.cs ¶
r11659 r11727 79 79 return result.ToString(); 80 80 } 81 82 public string Hash(string terminalPhrase) { 83 return terminalPhrase; 84 } 81 85 } 82 86 } -
TabularUnified branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/RoyalPairProblem.cs ¶
r11659 r11727 33 33 return regex.Matches(sentence).Count; 34 34 } 35 36 public string Hash(string terminalPhrase) { 37 return terminalPhrase; 38 } 35 39 } 36 40 } -
TabularUnified branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/RoyalRoadProblem.cs ¶
r11659 r11727 29 29 throw new NotImplementedException(); 30 30 } 31 public string Hash(string terminalPhrase) { 32 return terminalPhrase; 33 } 31 34 32 35 } -
TabularUnified branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/RoyalSymbolProblem.cs ¶
r11659 r11727 33 33 return regex.Matches(sentence).Count; 34 34 } 35 public string Hash(string terminalPhrase) { 36 return terminalPhrase; 37 } 38 35 39 } 36 40 } -
TabularUnified branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/RoyalTreeProblem.cs ¶
r11659 r11727 29 29 throw new NotImplementedException(); 30 30 } 31 public string Hash(string terminalPhrase) { 32 return terminalPhrase; 33 } 31 34 32 35 } -
TabularUnified branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/SantaFeAntProblem.cs ¶
r11708 r11727 96 96 p++; 97 97 } 98 } 99 100 public string Hash(string terminalPhrase) { 101 return terminalPhrase.Replace("rl", "").Replace("lr", ""); 98 102 } 99 103 } -
TabularUnified branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/SymbolicRegressionPoly10Problem.cs ¶
r11708 r11727 5 5 using System.Security.AccessControl; 6 6 using System.Text; 7 using HeuristicLab.Common; 7 8 8 9 namespace HeuristicLab.Problems.GrammaticalOptimization { 9 10 public class SymbolicRegressionPoly10Problem : IProblem { 11 // private const string grammarString = @" 12 // G(E): 13 // E -> V | V+E | V-E | V*E | (E) 14 // V -> a .. j 15 // "; 10 16 private const string grammarString = @" 11 G(E): 12 E -> V | V+E | V-E | V*E | V/E | (E) 13 V -> a .. j 14 "; 17 G(E): 18 E -> a | b | c | d | e | f | g | h | j | a+E | b+E | c+E | d+E | e+E | f+E | g+E | h+E | j+E | a*E | b*E | c*E | d*E | e*E | f*E | g*E | h*E | j*E 19 "; 15 20 16 21 … … 82 87 return s * s / (ssX * ssY); 83 88 } 89 90 91 // right now only + and * is supported 92 public string Hash(string terminalPhrase) { 93 var terms = terminalPhrase.Split('+'); 94 return string.Join("+", terms.Select(term => string.Join("", term.Replace("*", "").OrderBy(ch => ch))) 95 .OrderBy(term => term)); 96 } 84 97 } 85 98 } -
TabularUnified branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Main.csproj ¶
r11659 r11727 49 49 </ItemGroup> 50 50 <ItemGroup> 51 <ProjectReference Include="..\HeuristicLab.Algorithms.Bandits\HeuristicLab.Algorithms.Bandits.csproj"> 52 <Project>{24408F7D-EE0F-4886-A08B-EC324D662E47}</Project> 53 <Name>HeuristicLab.Algorithms.Bandits</Name> 54 </ProjectReference> 51 55 <ProjectReference Include="..\HeuristicLab.Algorithms.GrammaticalOptimization\HeuristicLab.Algorithms.GrammaticalOptimization.csproj"> 52 56 <Project>{eea07488-1a51-412a-a52c-53b754a628b3}</Project> -
TabularUnified branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Program.cs ¶
r11690 r11727 1 1 using System; 2 2 using System.Collections.Generic; 3 using System.Data; 3 4 using System.Diagnostics; 4 5 using System.Linq; 5 6 using System.Text; 7 using System.Threading.Tasks; 8 using HeuristicLab.Algorithms.Bandits; 6 9 using HeuristicLab.Algorithms.GrammaticalOptimization; 7 10 using HeuristicLab.Problems.GrammaticalOptimization; … … 10 13 class Program { 11 14 static void Main(string[] args) { 15 // RunDemo(); 16 RunGridTest(); 17 } 18 19 private static void RunGridTest() { 20 int maxIterations = 150000; 21 var globalRandom = new Random(31415); 22 var reps = 10; 23 Parallel.ForEach(new int[] { 1, 5, 10, 100, 500, 1000 }, (randomTries) => { 24 Random localRand; 25 lock (globalRandom) { 26 localRand = new Random(globalRandom.Next()); 27 } 28 var policyFactories = new Func<int, IPolicy>[] 29 { 30 (numActions) => new RandomPolicy(localRand, numActions), 31 (numActions) => new UCB1Policy(numActions), 32 (numActions) => new UCB1TunedPolicy(numActions), 33 (numActions) => new UCBNormalPolicy(numActions), 34 (numActions) => new EpsGreedyPolicy(localRand, numActions, 0.01), 35 (numActions) => new EpsGreedyPolicy(localRand, numActions, 0.05), 36 (numActions) => new EpsGreedyPolicy(localRand, numActions, 0.1), 37 (numActions) => new EpsGreedyPolicy(localRand, numActions, 0.2), 38 (numActions) => new EpsGreedyPolicy(localRand, numActions, 0.5), 39 (numActions) => new GaussianThompsonSamplingPolicy(localRand, numActions), 40 (numActions) => new BernoulliThompsonSamplingPolicy(localRand, numActions) 41 }; 42 43 foreach (var policyFactory in policyFactories) 44 for (int i = 0; i < reps; i++) { 45 int iterations = 0; 46 var sw = new Stopwatch(); 47 var globalStatistics = new SentenceSetStatistics(); 48 49 // var problem = new SymbolicRegressionPoly10Problem(); 50 var problem = new SantaFeAntProblem(); 51 //var problem = new PalindromeProblem(); 52 //var problem = new HardPalindromeProblem(); 53 //var problem = new RoyalPairProblem(); 54 //var problem = new EvenParityProblem(); 55 var alg = new MctsSampler(problem, 17, localRand, randomTries, policyFactory); 56 //var alg = new ExhaustiveBreadthFirstSearch(problem, 25); 57 //var alg = new AlternativesContextSampler(problem, 25); 58 59 alg.SolutionEvaluated += (sentence, quality) => { 60 iterations++; 61 globalStatistics.AddSentence(sentence, quality); 62 if (iterations % 10000 == 0) { 63 Console.WriteLine("{0} {1} {2}", randomTries, policyFactory(1), globalStatistics); 64 } 65 }; 66 67 sw.Start(); 68 69 alg.Run(maxIterations); 70 71 sw.Stop(); 72 } 73 }); 74 } 75 76 private static void RunDemo() { 77 // TODO: implement threshold ascent 78 // TODO: implement inspection for MCTS 79 12 80 int maxIterations = 10000000; 13 81 int iterations = 0; … … 15 83 double bestQuality = 0; 16 84 string bestSentence = ""; 85 var globalStatistics = new SentenceSetStatistics(); 86 var random = new Random(31415); 17 87 18 var rs = new ExhaustiveDepthFirstSearch(17); 88 // var problem = new SymbolicRegressionPoly10Problem(); 89 var problem = new SantaFeAntProblem(); 90 //var problem = new PalindromeProblem(); 91 //var problem = new HardPalindromeProblem(); 92 //var problem = new RoyalPairProblem(); 93 //var problem = new EvenParityProblem(); 94 var alg = new MctsSampler(problem, 17, random); 95 //var alg = new ExhaustiveBreadthFirstSearch(problem, 25); 96 //var alg = new AlternativesContextSampler(problem, 25); 19 97 20 rs.FoundNewBestSolution += (sentence, quality) => {98 alg.FoundNewBestSolution += (sentence, quality) => { 21 99 bestQuality = quality; 22 100 bestSentence = sentence; 23 101 Console.WriteLine("{0,10} {1,10:F5} {2,10:F5} {3}", iterations, bestQuality, quality, sentence); 24 102 }; 25 rs.SolutionEvaluated += (sentence, quality) => {103 alg.SolutionEvaluated += (sentence, quality) => { 26 104 iterations++; 105 globalStatistics.AddSentence(sentence, quality); 27 106 if (iterations % 10000 == 0) { 28 Console.WriteLine("{0,10} {1,10:F5} {2,10:F5} {3}", iterations, bestQuality, quality, sentence); 107 //Console.WriteLine("{0,10} {1,10:F5} {2,10:F5} {3}", iterations, bestQuality, quality, sentence); 108 Console.WriteLine(globalStatistics.ToString()); 29 109 } 30 110 }; … … 33 113 sw.Start(); 34 114 35 rs.Run(new SymbolicRegressionPoly10Problem(),maxIterations);115 alg.Run(maxIterations); 36 116 37 117 sw.Stop();
Note: See TracChangeset
for help on using the changeset viewer.