Changeset 11732 for branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization.Test
- Timestamp:
- 01/07/15 09:21:46 (9 years ago)
- Location:
- branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization.Test
- Files:
-
- 1 added
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization.Test/HeuristicLab.Problems.GrammaticalOptimization.Test.csproj
r11730 r11732 39 39 </PropertyGroup> 40 40 <ItemGroup> 41 <Reference Include="HeuristicLab.Problems.Instances-3.3"> 42 <HintPath>..\..\..\trunk\sources\bin\HeuristicLab.Problems.Instances-3.3.dll</HintPath> 43 </Reference> 44 <Reference Include="HeuristicLab.Problems.Instances.DataAnalysis-3.3"> 45 <HintPath>..\..\..\trunk\sources\bin\HeuristicLab.Problems.Instances.DataAnalysis-3.3.dll</HintPath> 46 </Reference> 41 47 <Reference Include="System" /> 42 48 <Reference Include="System.Core"> … … 57 63 </Choose> 58 64 <ItemGroup> 65 <Compile Include="TestSymbRegInstances.cs" /> 59 66 <Compile Include="TestSequence.cs" /> 60 67 <Compile Include="TestBanditPolicies.cs" /> … … 71 78 <Project>{eea07488-1a51-412a-a52c-53b754a628b3}</Project> 72 79 <Name>HeuristicLab.Algorithms.GrammaticalOptimization</Name> 80 </ProjectReference> 81 <ProjectReference Include="..\HeuristicLab.Problems.GrammaticalOptimization.SymbReg\HeuristicLab.Problems.GrammaticalOptimization.SymbReg.csproj"> 82 <Project>{17a7a380-86ce-482d-8d22-cbd70cc97f0d}</Project> 83 <Name>HeuristicLab.Problems.GrammaticalOptimization.SymbReg</Name> 73 84 </ProjectReference> 74 85 <ProjectReference Include="..\HeuristicLab.Problems.GrammaticalOptimization\HeuristicLab.Problems.GrammaticalOptimization.csproj"> -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization.Test/TestBanditPolicies.cs
r11730 r11732 10 10 [TestClass] 11 11 public class TestBanditPolicies { 12 [TestMethod] 13 public void ComparePoliciesForGaussianUnknownVarianceBandit() { 14 CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture; 15 var randSeed = 31415; 16 var nArms = 20; 17 18 // Console.WriteLine("Threshold Ascent (20)"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new ThresholdAscent(20, 0.01)); 19 // Console.WriteLine("Threshold Ascent (100)"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new ThresholdAscent(100, 0.01)); 20 // Console.WriteLine("Threshold Ascent (500)"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new ThresholdAscent(500, 0.01)); 21 // Console.WriteLine("Threshold Ascent (1000)"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new ThresholdAscent(1000, 0.01)); 22 Console.WriteLine("Thompson (Gaussian fixed variance)"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0, 1, 1))); 23 Console.WriteLine("Thompson (Gaussian est variance)"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0, 1, 1, 0.1))); 24 Console.WriteLine("GaussianThompson (compat)"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new GaussianThompsonSamplingPolicy(true)); 25 Console.WriteLine("GaussianThompson"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new GaussianThompsonSamplingPolicy()); 26 Console.WriteLine("UCBNormal"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new UCBNormalPolicy()); 27 Console.WriteLine("Random"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new RandomPolicy()); 28 29 } 12 30 13 31 … … 15 33 public void ComparePoliciesForBernoulliBandit() { 16 34 CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture; 17 18 var globalRand = new Random(31415); 19 var seedForPolicy = globalRand.Next(); 35 var randSeed = 31415; 20 36 var nArms = 20; 21 37 //Console.WriteLine("Exp3 (gamma=0.01)"); … … 23 39 //Console.WriteLine("Exp3 (gamma=0.05)"); 24 40 //estPolicyBernoulli(globalRand, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 1)); 25 Console.WriteLine("Thompson (Bernoulli)"); TestPolicyBernoulli( globalRand, nArms, new BernoulliThompsonSamplingPolicy(new Random(seedForPolicy), nArms));26 Console.WriteLine("Generic Thompson (Bernoulli)"); TestPolicyBernoulli( globalRand, nArms, new GenericThompsonSamplingPolicy(new Random(seedForPolicy), nArms, new BernoulliModel(nArms)));41 Console.WriteLine("Thompson (Bernoulli)"); TestPolicyBernoulli(randSeed, nArms, new BernoulliThompsonSamplingPolicy()); 42 Console.WriteLine("Generic Thompson (Bernoulli)"); TestPolicyBernoulli(randSeed, nArms, new GenericThompsonSamplingPolicy(new BernoulliModel())); 27 43 Console.WriteLine("Random"); 28 TestPolicyBernoulli( globalRand, nArms, new RandomPolicy(new Random(seedForPolicy), nArms));44 TestPolicyBernoulli(randSeed, nArms, new RandomPolicy()); 29 45 Console.WriteLine("UCB1"); 30 TestPolicyBernoulli( globalRand, nArms, new UCB1Policy(nArms));46 TestPolicyBernoulli(randSeed, nArms, new UCB1Policy()); 31 47 Console.WriteLine("UCB1Tuned"); 32 TestPolicyBernoulli( globalRand, nArms, new UCB1TunedPolicy(nArms));48 TestPolicyBernoulli(randSeed, nArms, new UCB1TunedPolicy()); 33 49 Console.WriteLine("UCB1Normal"); 34 TestPolicyBernoulli( globalRand, nArms, new UCBNormalPolicy(nArms));50 TestPolicyBernoulli(randSeed, nArms, new UCBNormalPolicy()); 35 51 Console.WriteLine("Eps(0.01)"); 36 TestPolicyBernoulli( globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms,0.01));52 TestPolicyBernoulli(randSeed, nArms, new EpsGreedyPolicy(0.01)); 37 53 Console.WriteLine("Eps(0.05)"); 38 TestPolicyBernoulli( globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms,0.05));54 TestPolicyBernoulli(randSeed, nArms, new EpsGreedyPolicy(0.05)); 39 55 //Console.WriteLine("Eps(0.1)"); 40 //TestPolicyBernoulli( globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms,0.1));56 //TestPolicyBernoulli(randSeed, nArms, new EpsGreedyPolicy(0.1)); 41 57 //Console.WriteLine("Eps(0.2)"); 42 //TestPolicyBernoulli( globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms,0.2));58 //TestPolicyBernoulli(randSeed, nArms, new EpsGreedyPolicy(0.2)); 43 59 //Console.WriteLine("Eps(0.5)"); 44 //TestPolicyBernoulli( globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms,0.5));45 Console.WriteLine("UCT(0.1)"); TestPolicyBernoulli( globalRand, nArms, new UCTPolicy(nArms,0.1));46 Console.WriteLine("UCT(0.5)"); TestPolicyBernoulli( globalRand, nArms, new UCTPolicy(nArms,0.5));47 Console.WriteLine("UCT(1) "); TestPolicyBernoulli( globalRand, nArms, new UCTPolicy(nArms,1));48 Console.WriteLine("UCT(2) "); TestPolicyBernoulli( globalRand, nArms, new UCTPolicy(nArms,2));49 Console.WriteLine("UCT(5) "); TestPolicyBernoulli( globalRand, nArms, new UCTPolicy(nArms,5));50 Console.WriteLine("BoltzmannExploration(0.1)"); TestPolicyBernoulli( globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms,0.1));51 Console.WriteLine("BoltzmannExploration(0.5)"); TestPolicyBernoulli( globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms,0.5));52 Console.WriteLine("BoltzmannExploration(1) "); TestPolicyBernoulli( globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms,1));53 Console.WriteLine("BoltzmannExploration(10) "); TestPolicyBernoulli( globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms,10));54 Console.WriteLine("BoltzmannExploration(100)"); TestPolicyBernoulli( globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms,100));55 Console.WriteLine("ChernoffIntervalEstimationPolicy(0.01)"); TestPolicyBernoulli( globalRand, nArms, new ChernoffIntervalEstimationPolicy(nArms,0.01));56 Console.WriteLine("ChernoffIntervalEstimationPolicy(0.05)"); TestPolicyBernoulli( globalRand, nArms, new ChernoffIntervalEstimationPolicy(nArms,0.05));57 Console.WriteLine("ChernoffIntervalEstimationPolicy(0.1) "); TestPolicyBernoulli( globalRand, nArms, new ChernoffIntervalEstimationPolicy(nArms,0.1));60 //TestPolicyBernoulli(randSeed, nArms, new EpsGreedyPolicy(0.5)); 61 Console.WriteLine("UCT(0.1)"); TestPolicyBernoulli(randSeed, nArms, new UCTPolicy(0.1)); 62 Console.WriteLine("UCT(0.5)"); TestPolicyBernoulli(randSeed, nArms, new UCTPolicy(0.5)); 63 Console.WriteLine("UCT(1) "); TestPolicyBernoulli(randSeed, nArms, new UCTPolicy(1)); 64 Console.WriteLine("UCT(2) "); TestPolicyBernoulli(randSeed, nArms, new UCTPolicy(2)); 65 Console.WriteLine("UCT(5) "); TestPolicyBernoulli(randSeed, nArms, new UCTPolicy(5)); 66 Console.WriteLine("BoltzmannExploration(0.1)"); TestPolicyBernoulli(randSeed, nArms, new BoltzmannExplorationPolicy(0.1)); 67 Console.WriteLine("BoltzmannExploration(0.5)"); TestPolicyBernoulli(randSeed, nArms, new BoltzmannExplorationPolicy(0.5)); 68 Console.WriteLine("BoltzmannExploration(1) "); TestPolicyBernoulli(randSeed, nArms, new BoltzmannExplorationPolicy(1)); 69 Console.WriteLine("BoltzmannExploration(10) "); TestPolicyBernoulli(randSeed, nArms, new BoltzmannExplorationPolicy(10)); 70 Console.WriteLine("BoltzmannExploration(100)"); TestPolicyBernoulli(randSeed, nArms, new BoltzmannExplorationPolicy(100)); 71 Console.WriteLine("ChernoffIntervalEstimationPolicy(0.01)"); TestPolicyBernoulli(randSeed, nArms, new ChernoffIntervalEstimationPolicy(0.01)); 72 Console.WriteLine("ChernoffIntervalEstimationPolicy(0.05)"); TestPolicyBernoulli(randSeed, nArms, new ChernoffIntervalEstimationPolicy(0.05)); 73 Console.WriteLine("ChernoffIntervalEstimationPolicy(0.1) "); TestPolicyBernoulli(randSeed, nArms, new ChernoffIntervalEstimationPolicy(0.1)); 58 74 59 75 // not applicable to bernoulli rewards … … 70 86 71 87 [TestMethod] 72 public void ComparePoliciesForNormalBandit() { 73 CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture; 74 75 var globalRand = new Random(31415); 76 var seedForPolicy = globalRand.Next(); 77 var nArms = 20; 78 Console.WriteLine("Thompson (Gaussian orig)"); TestPolicyNormal(globalRand, nArms, new GaussianThompsonSamplingPolicy(new Random(seedForPolicy), nArms, true)); 79 Console.WriteLine("Thompson (Gaussian new)"); TestPolicyNormal(globalRand, nArms, new GaussianThompsonSamplingPolicy(new Random(seedForPolicy), nArms)); 80 Console.WriteLine("Generic Thompson (Gaussian)"); TestPolicyNormal(globalRand, nArms, new GenericThompsonSamplingPolicy(new Random(seedForPolicy), nArms, new GaussianModel(nArms, 0.5, 1))); 88 public void ComparePoliciesForGaussianBandit() { 89 CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture; 90 91 var randSeed = 31415; 92 var nArms = 20; 93 Console.WriteLine("Thompson (Gaussian orig)"); TestPolicyGaussian(randSeed, nArms, new GaussianThompsonSamplingPolicy(true)); 94 Console.WriteLine("Thompson (Gaussian new)"); TestPolicyGaussian(randSeed, nArms, new GaussianThompsonSamplingPolicy()); 95 Console.WriteLine("Generic Thompson (Gaussian)"); TestPolicyGaussian(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 1))); 81 96 /* 82 Console.WriteLine("Random"); TestPolicyNormal( globalRand, nArms, new RandomPolicy(new Random(seedForPolicy), nArms));83 Console.WriteLine("UCB1"); TestPolicyNormal( globalRand, nArms, new UCB1Policy(nArms));84 Console.WriteLine("UCB1Tuned"); TestPolicyNormal( globalRand, nArms, new UCB1TunedPolicy(nArms));85 Console.WriteLine("UCB1Normal"); TestPolicyNormal( globalRand, nArms, new UCBNormalPolicy(nArms));97 Console.WriteLine("Random"); TestPolicyNormal(randSeed, nArms, new RandomPolicy(new Random(seedForPolicy), nArms)); 98 Console.WriteLine("UCB1"); TestPolicyNormal(randSeed, nArms, new UCB1Policy(nArms)); 99 Console.WriteLine("UCB1Tuned"); TestPolicyNormal(randSeed, nArms, new UCB1TunedPolicy(nArms)); 100 Console.WriteLine("UCB1Normal"); TestPolicyNormal(randSeed, nArms, new UCBNormalPolicy(nArms)); 86 101 //Console.WriteLine("Exp3 (gamma=0.01)"); 87 //TestPolicyNormal( globalRand, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 0.01));102 //TestPolicyNormal(randSeed, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 0.01)); 88 103 //Console.WriteLine("Exp3 (gamma=0.05)"); 89 //TestPolicyNormal( globalRand, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 0.05));90 Console.WriteLine("Eps(0.01)"); TestPolicyNormal( globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.01));91 Console.WriteLine("Eps(0.05)"); TestPolicyNormal( globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.05));104 //TestPolicyNormal(randSeed, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 0.05)); 105 Console.WriteLine("Eps(0.01)"); TestPolicyNormal(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.01)); 106 Console.WriteLine("Eps(0.05)"); TestPolicyNormal(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.05)); 92 107 //Console.WriteLine("Eps(0.1)"); 93 //TestPolicyNormal( globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.1));108 //TestPolicyNormal(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.1)); 94 109 //Console.WriteLine("Eps(0.2)"); 95 //TestPolicyNormal( globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.2));110 //TestPolicyNormal(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.2)); 96 111 //Console.WriteLine("Eps(0.5)"); 97 //TestPolicyNormal( globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.5));98 Console.WriteLine("UCT(0.1)"); TestPolicyNormal( globalRand, nArms, new UCTPolicy(nArms, 0.1));99 Console.WriteLine("UCT(0.5)"); TestPolicyNormal( globalRand, nArms, new UCTPolicy(nArms, 0.5));100 Console.WriteLine("UCT(1) "); TestPolicyNormal( globalRand, nArms, new UCTPolicy(nArms, 1));101 Console.WriteLine("UCT(2) "); TestPolicyNormal( globalRand, nArms, new UCTPolicy(nArms, 2));102 Console.WriteLine("UCT(5) "); TestPolicyNormal( globalRand, nArms, new UCTPolicy(nArms, 5));103 Console.WriteLine("BoltzmannExploration(0.1)"); TestPolicyNormal( globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 0.1));104 Console.WriteLine("BoltzmannExploration(0.5)"); TestPolicyNormal( globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 0.5));105 Console.WriteLine("BoltzmannExploration(1) "); TestPolicyNormal( globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 1));106 Console.WriteLine("BoltzmannExploration(10) "); TestPolicyNormal( globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 10));107 Console.WriteLine("BoltzmannExploration(100)"); TestPolicyNormal( globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 100));108 Console.WriteLine("ChernoffIntervalEstimationPolicy(0.01)"); TestPolicyNormal( globalRand, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.01));109 Console.WriteLine("ChernoffIntervalEstimationPolicy(0.05)"); TestPolicyNormal( globalRand, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.05));110 Console.WriteLine("ChernoffIntervalEstimationPolicy(0.1) "); TestPolicyNormal( globalRand, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.1));111 Console.WriteLine("ThresholdAscent(10,0.01) "); TestPolicyNormal( globalRand, nArms, new ThresholdAscentPolicy(nArms, 10, 0.01));112 Console.WriteLine("ThresholdAscent(10,0.05) "); TestPolicyNormal( globalRand, nArms, new ThresholdAscentPolicy(nArms, 10, 0.05));113 Console.WriteLine("ThresholdAscent(10,0.1) "); TestPolicyNormal( globalRand, nArms, new ThresholdAscentPolicy(nArms, 10, 0.1));114 Console.WriteLine("ThresholdAscent(100,0.01) "); TestPolicyNormal( globalRand, nArms, new ThresholdAscentPolicy(nArms, 100, 0.01));115 Console.WriteLine("ThresholdAscent(100,0.05) "); TestPolicyNormal( globalRand, nArms, new ThresholdAscentPolicy(nArms, 100, 0.05));116 Console.WriteLine("ThresholdAscent(100,0.1) "); TestPolicyNormal( globalRand, nArms, new ThresholdAscentPolicy(nArms, 100, 0.1));117 Console.WriteLine("ThresholdAscent(1000,0.01)"); TestPolicyNormal( globalRand, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.01));118 Console.WriteLine("ThresholdAscent(1000,0.05)"); TestPolicyNormal( globalRand, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.05));119 Console.WriteLine("ThresholdAscent(1000,0.1) "); TestPolicyNormal( globalRand, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.1));112 //TestPolicyNormal(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.5)); 113 Console.WriteLine("UCT(0.1)"); TestPolicyNormal(randSeed, nArms, new UCTPolicy(nArms, 0.1)); 114 Console.WriteLine("UCT(0.5)"); TestPolicyNormal(randSeed, nArms, new UCTPolicy(nArms, 0.5)); 115 Console.WriteLine("UCT(1) "); TestPolicyNormal(randSeed, nArms, new UCTPolicy(nArms, 1)); 116 Console.WriteLine("UCT(2) "); TestPolicyNormal(randSeed, nArms, new UCTPolicy(nArms, 2)); 117 Console.WriteLine("UCT(5) "); TestPolicyNormal(randSeed, nArms, new UCTPolicy(nArms, 5)); 118 Console.WriteLine("BoltzmannExploration(0.1)"); TestPolicyNormal(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 0.1)); 119 Console.WriteLine("BoltzmannExploration(0.5)"); TestPolicyNormal(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 0.5)); 120 Console.WriteLine("BoltzmannExploration(1) "); TestPolicyNormal(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 1)); 121 Console.WriteLine("BoltzmannExploration(10) "); TestPolicyNormal(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 10)); 122 Console.WriteLine("BoltzmannExploration(100)"); TestPolicyNormal(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 100)); 123 Console.WriteLine("ChernoffIntervalEstimationPolicy(0.01)"); TestPolicyNormal(randSeed, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.01)); 124 Console.WriteLine("ChernoffIntervalEstimationPolicy(0.05)"); TestPolicyNormal(randSeed, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.05)); 125 Console.WriteLine("ChernoffIntervalEstimationPolicy(0.1) "); TestPolicyNormal(randSeed, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.1)); 126 Console.WriteLine("ThresholdAscent(10,0.01) "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 10, 0.01)); 127 Console.WriteLine("ThresholdAscent(10,0.05) "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 10, 0.05)); 128 Console.WriteLine("ThresholdAscent(10,0.1) "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 10, 0.1)); 129 Console.WriteLine("ThresholdAscent(100,0.01) "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 100, 0.01)); 130 Console.WriteLine("ThresholdAscent(100,0.05) "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 100, 0.05)); 131 Console.WriteLine("ThresholdAscent(100,0.1) "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 100, 0.1)); 132 Console.WriteLine("ThresholdAscent(1000,0.01)"); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.01)); 133 Console.WriteLine("ThresholdAscent(1000,0.05)"); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.05)); 134 Console.WriteLine("ThresholdAscent(1000,0.1) "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.1)); 120 135 */ 121 136 } … … 124 139 public void ComparePoliciesForGaussianMixtureBandit() { 125 140 CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture; 126 127 var globalRand = new Random(31415); 128 var seedForPolicy = globalRand.Next(); 129 var nArms = 20; 130 Console.WriteLine("Thompson (Gaussian orig)"); TestPolicyGaussianMixture(globalRand, nArms, new GaussianThompsonSamplingPolicy(new Random(seedForPolicy), nArms, true)); 131 Console.WriteLine("Thompson (Gaussian new)"); TestPolicyGaussianMixture(globalRand, nArms, new GaussianThompsonSamplingPolicy(new Random(seedForPolicy), nArms)); 132 Console.WriteLine("Generic Thompson (Gaussian)"); TestPolicyGaussianMixture(globalRand, nArms, new GenericThompsonSamplingPolicy(new Random(seedForPolicy), nArms, new GaussianModel(nArms, 0.5, 1))); 141 var randSeed = 31415; 142 var nArms = 20; 143 Console.WriteLine("Thompson (Gaussian orig)"); TestPolicyGaussianMixture(randSeed, nArms, new GaussianThompsonSamplingPolicy(true)); 144 Console.WriteLine("Thompson (Gaussian new)"); TestPolicyGaussianMixture(randSeed, nArms, new GaussianThompsonSamplingPolicy()); 145 Console.WriteLine("Generic Thompson (Gaussian)"); TestPolicyGaussianMixture(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 1))); 133 146 134 147 /* 135 Console.WriteLine("Random"); TestPolicyGaussianMixture( globalRand, nArms, new RandomPolicy(new Random(seedForPolicy), nArms));136 Console.WriteLine("UCB1"); TestPolicyGaussianMixture( globalRand, nArms, new UCB1Policy(nArms));137 Console.WriteLine("UCB1Tuned "); TestPolicyGaussianMixture( globalRand, nArms, new UCB1TunedPolicy(nArms));138 Console.WriteLine("UCB1Normal"); TestPolicyGaussianMixture( globalRand, nArms, new UCBNormalPolicy(nArms));139 Console.WriteLine("Eps(0.01) "); TestPolicyGaussianMixture( globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.01));140 Console.WriteLine("Eps(0.05) "); TestPolicyGaussianMixture( globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.05));141 Console.WriteLine("UCT(1) "); TestPolicyGaussianMixture( globalRand, nArms, new UCTPolicy(nArms, 1));142 Console.WriteLine("UCT(2) "); TestPolicyGaussianMixture( globalRand, nArms, new UCTPolicy(nArms, 2));143 Console.WriteLine("UCT(5) "); TestPolicyGaussianMixture( globalRand, nArms, new UCTPolicy(nArms, 5));144 Console.WriteLine("BoltzmannExploration(1) "); TestPolicyGaussianMixture( globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 1));145 Console.WriteLine("BoltzmannExploration(10) "); TestPolicyGaussianMixture( globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 10));146 Console.WriteLine("BoltzmannExploration(100)"); TestPolicyGaussianMixture( globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 100));147 148 Console.WriteLine("ThresholdAscent(10,0.01) "); TestPolicyGaussianMixture( globalRand, nArms, new ThresholdAscentPolicy(nArms, 10, 0.01));149 Console.WriteLine("ThresholdAscent(100,0.01) "); TestPolicyGaussianMixture( globalRand, nArms, new ThresholdAscentPolicy(nArms, 100, 0.01));150 Console.WriteLine("ThresholdAscent(1000,0.01)"); TestPolicyGaussianMixture( globalRand, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.01));151 Console.WriteLine("ThresholdAscent(10000,0.01)"); TestPolicyGaussianMixture( globalRand, nArms, new ThresholdAscentPolicy(nArms, 10000, 0.01));148 Console.WriteLine("Random"); TestPolicyGaussianMixture(randSeed, nArms, new RandomPolicy(new Random(seedForPolicy), nArms)); 149 Console.WriteLine("UCB1"); TestPolicyGaussianMixture(randSeed, nArms, new UCB1Policy(nArms)); 150 Console.WriteLine("UCB1Tuned "); TestPolicyGaussianMixture(randSeed, nArms, new UCB1TunedPolicy(nArms)); 151 Console.WriteLine("UCB1Normal"); TestPolicyGaussianMixture(randSeed, nArms, new UCBNormalPolicy(nArms)); 152 Console.WriteLine("Eps(0.01) "); TestPolicyGaussianMixture(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.01)); 153 Console.WriteLine("Eps(0.05) "); TestPolicyGaussianMixture(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.05)); 154 Console.WriteLine("UCT(1) "); TestPolicyGaussianMixture(randSeed, nArms, new UCTPolicy(nArms, 1)); 155 Console.WriteLine("UCT(2) "); TestPolicyGaussianMixture(randSeed, nArms, new UCTPolicy(nArms, 2)); 156 Console.WriteLine("UCT(5) "); TestPolicyGaussianMixture(randSeed, nArms, new UCTPolicy(nArms, 5)); 157 Console.WriteLine("BoltzmannExploration(1) "); TestPolicyGaussianMixture(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 1)); 158 Console.WriteLine("BoltzmannExploration(10) "); TestPolicyGaussianMixture(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 10)); 159 Console.WriteLine("BoltzmannExploration(100)"); TestPolicyGaussianMixture(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 100)); 160 161 Console.WriteLine("ThresholdAscent(10,0.01) "); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(nArms, 10, 0.01)); 162 Console.WriteLine("ThresholdAscent(100,0.01) "); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(nArms, 100, 0.01)); 163 Console.WriteLine("ThresholdAscent(1000,0.01)"); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.01)); 164 Console.WriteLine("ThresholdAscent(10000,0.01)"); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(nArms, 10000, 0.01)); 152 165 */ 153 166 } 154 167 155 168 156 private void TestPolicyBernoulli(Random globalRand, int nArms, IPolicy policy) { 157 TestPolicy(globalRand, nArms, policy, (banditRandom, nActions) => new BernoulliBandit(banditRandom, nActions)); 158 } 159 private void TestPolicyNormal(Random globalRand, int nArms, IPolicy policy) { 160 TestPolicy(globalRand, nArms, policy, (banditRandom, nActions) => new TruncatedNormalBandit(banditRandom, nActions)); 161 } 162 private void TestPolicyGaussianMixture(Random globalRand, int nArms, IPolicy policy) { 163 TestPolicy(globalRand, nArms, policy, (banditRandom, nActions) => new GaussianMixtureBandit(banditRandom, nActions)); 164 } 165 166 167 private void TestPolicy(Random globalRand, int nArms, IPolicy policy, Func<Random, int, IBandit> banditFactory) { 169 private void TestPolicyBernoulli(int randSeed, int nArms, IPolicy policy) { 170 TestPolicy(randSeed, nArms, policy, (banditRandom, nActions) => new BernoulliBandit(banditRandom, nActions)); 171 } 172 private void TestPolicyGaussian(int randSeed, int nArms, IPolicy policy) { 173 TestPolicy(randSeed, nArms, policy, (banditRandom, nActions) => new TruncatedNormalBandit(banditRandom, nActions)); 174 } 175 private void TestPolicyGaussianMixture(int randSeed, int nArms, IPolicy policy) { 176 TestPolicy(randSeed, nArms, policy, (banditRandom, nActions) => new GaussianMixtureBandit(banditRandom, nActions)); 177 } 178 private void TestPolicyGaussianUnknownVariance(int randSeed, int nArms, IPolicy policy) { 179 TestPolicy(randSeed, nArms, policy, (banditRandom, nActions) => new GaussianBandit(banditRandom, nActions)); 180 } 181 182 183 private void TestPolicy(int randSeed, int nArms, IPolicy policy, Func<Random, int, IBandit> banditFactory) { 168 184 var maxIt = 1E5; 169 var reps = 30; // independent runs185 var reps = 10; // independent runs 170 186 var regretForIteration = new Dictionary<int, List<double>>(); 171 187 var numberOfPullsOfSuboptimalArmsForExp = new Dictionary<int, double>(); 172 188 var numberOfPullsOfSuboptimalArmsForMax = new Dictionary<int, double>(); 189 var globalRandom = new Random(randSeed); 190 var banditRandom = new Random(globalRandom.Next()); // bandits must produce the same rewards for each test 191 var policyRandom = new Random(globalRandom.Next()); 192 173 193 // calculate statistics 174 194 for (int r = 0; r < reps; r++) { 175 195 var nextLogStep = 1; 176 var b = banditFactory(new Random(globalRand.Next()), nArms); 177 policy.Reset(); 196 var b = banditFactory(banditRandom, nArms); 178 197 var totalRegret = 0.0; 179 198 var totalPullsOfSuboptimalArmsExp = 0.0; 180 199 var totalPullsOfSuboptimalArmsMax = 0.0; 200 var actionInfos = Enumerable.Range(0, nArms).Select(_ => policy.CreateActionInfo()).ToArray(); 181 201 for (int i = 0; i <= maxIt; i++) { 182 var selectedAction = policy.SelectAction( );202 var selectedAction = policy.SelectAction(policyRandom, actionInfos); 183 203 var reward = b.Pull(selectedAction); 184 policy.UpdateReward(selectedAction,reward);204 actionInfos[selectedAction].UpdateReward(reward); 185 205 186 206 // collect stats
Note: See TracChangeset
for help on using the changeset viewer.