Changeset 11744
- Timestamp:
- 01/09/15 16:54:05 (10 years ago)
- Location:
- branches/HeuristicLab.Problems.GrammaticalOptimization
- Files:
-
- 3 added
- 7 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/ModelPolicyActionInfo.cs
r11742 r11744 20 20 public void UpdateReward(double reward) { 21 21 Debug.Assert(!Disabled); 22 Tries++; 22 23 model.Update(reward); 23 24 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/ThresholdAscentPolicy.cs
r11742 r11744 77 77 } 78 78 79 private double U(double mu, inttotalTries, int n, int k) {79 private double U(double mu, double totalTries, int n, int k) { 80 80 //var alpha = Math.Log(2.0 * totalTries * k / delta); 81 81 double alpha = Math.Log(2) + Math.Log(totalTries) + Math.Log(k) - Math.Log(delta); -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/HeuristicLab.Algorithms.Bandits.csproj
r11742 r11744 69 69 <Compile Include="IBanditPolicy.cs" /> 70 70 <Compile Include="IBanditPolicyActionInfo.cs" /> 71 <Compile Include="Models\GaussianMixtureModel.cs" /> 72 <Compile Include="Models\LogitNormalModel.cs" /> 71 73 <Compile Include="OnlineMeanAndVarianceEstimator.cs" /> 72 74 <Compile Include="Models\BernoulliModel.cs" /> -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Models/GaussianMixtureModel.cs
r11730 r11744 9 9 namespace HeuristicLab.Algorithms.Bandits.Models { 10 10 public class GaussianMixtureModel : IModel { 11 private readonly int numActions; 12 private readonly double[][] meanMean; // mean of mean for each arm and component 13 private readonly double[][] meanVariance; // variance of mean for each arm and component 14 private readonly double[][] componentProb; 11 private readonly double[] componentMeans; 12 private readonly double[] componentVars; 13 private readonly double[] componentProbs; 15 14 16 // parameters of beta prior distribution17 15 private int numComponents; 18 private double priorMean;19 16 20 public GaussianMixtureModel(int numActions, double priorMean = 0.5, int nComponents = 5) { 21 this.numActions = numActions; 17 public GaussianMixtureModel(int nComponents = 5) { 22 18 this.numComponents = nComponents; 23 this.priorMean = priorMean; 24 this.meanMean = new double[numActions][]; 25 this.meanVariance = new double[numActions][]; 26 this.componentProb = new double[numActions][]; 27 for (int a = 0; a < numActions; a++) { 28 // TODO: probably need to initizalize this randomly to allow learning 29 meanMean[a] = Enumerable.Repeat(priorMean, nComponents).ToArray(); 30 meanVariance[a] = Enumerable.Repeat(1.0, nComponents).ToArray(); // prior variance of mean variance = 1 31 componentProb[a] = Enumerable.Repeat(1.0 / nComponents, nComponents).ToArray(); // uniform prior for component probabilities 32 } 19 this.componentProbs = new double[nComponents]; 20 this.componentMeans = new double[nComponents]; 21 this.componentVars = new double[nComponents]; 33 22 } 34 23 35 24 36 public double[] SampleExpectedRewards(Random random) { 37 // sample mean foreach action and component from the prior 38 var exp = new double[numActions]; 39 for (int a = 0; a < numActions; a++) { 40 var sumReward = 0.0; 41 var numSamples = 10000; 42 var sampledComponents = Enumerable.Range(0, numComponents).SampleProportional(random, componentProb[a]).Take(numSamples); 43 foreach (var k in sampledComponents) { 44 sumReward += Rand.RandNormal(random) * Math.Sqrt(meanVariance[a][k]) + meanMean[a][k]; 45 } 46 exp[a] = sumReward / (double)numSamples; 47 } 48 49 return exp; 25 public double SampleExpectedReward(Random random) { 26 var k = Enumerable.Range(0, numComponents).SampleProportional(random, componentProbs).First(); 27 return alglib.invnormaldistribution(random.NextDouble()) * Math.Sqrt(componentVars[k]) + componentMeans[k]; 50 28 } 51 29 52 public void Update( int action,double reward) {30 public void Update(double reward) { 53 31 // see http://www.cs.toronto.edu/~mackay/itprnn/ps/302.320.pdf Algorithm 22.2 soft k-means 54 32 throw new NotImplementedException(); 55 33 } 56 34 57 public void Disable(int action) { 58 Array.Clear(meanMean[action], 0, meanMean[action].Length); 59 Array.Clear(meanVariance[action], 0, meanVariance[action].Length); 35 public void Disable() { 36 Array.Clear(componentMeans, 0, numComponents); 37 for (int i = 0; i < numComponents; i++) 38 componentVars[i] = 0.0; 39 } 40 41 public object Clone() { 42 return new GaussianMixtureModel(numComponents); 60 43 } 61 44 62 45 public void Reset() { 63 Array.Clear(meanMean, 0, meanMean.Length); 64 Array.Clear(meanVariance, 0, meanVariance.Length); 46 Array.Clear(componentMeans, 0, numComponents); 47 Array.Clear(componentVars, 0, numComponents); 48 Array.Clear(componentProbs, 0, numComponents); 65 49 } 66 50 -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization.csproj
r11742 r11744 45 45 <Compile Include="AlternativesSampler.cs" /> 46 46 <Compile Include="AlternativesContextSampler.cs" /> 47 <Compile Include="TemporalDifferenceTreeSearchSampler.cs" /> 47 48 <Compile Include="ExhaustiveRandomFirstSearch.cs" /> 48 49 <Compile Include="MctsContextualSampler.cs"> -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/MctsSampler.cs
r11742 r11744 40 40 public int treeDepth; 41 41 public int treeSize; 42 private double bestQuality; 42 43 43 44 // public MctsSampler(IProblem problem, int maxLen, Random random) : … … 55 56 56 57 public void Run(int maxIterations) { 57 doublebestQuality = double.MinValue;58 bestQuality = double.MinValue; 58 59 InitPolicies(problem.Grammar); 59 60 for (int i = 0; !rootNode.done && i < maxIterations; i++) { … … 77 78 public void PrintStats() { 78 79 var n = rootNode; 79 Console.WriteLine("depth: {0,5} size: {1,10} root tries {2,10} ", treeDepth, treeSize, n.actionInfo.Tries);80 Console.WriteLine("depth: {0,5} size: {1,10} root tries {2,10}, rootQ {3:F3}, bestQ {4:F3}", treeDepth, treeSize, n.actionInfo.Tries, n.actionInfo.Value, bestQuality); 80 81 while (n.children != null) { 81 82 Console.WriteLine(); … … 86 87 n = n.children.Where(ch => !ch.done).OrderByDescending(c => c.actionInfo.Value).First(); 87 88 } 88 Console.ReadLine();89 89 } 90 90 -
branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Program.cs
r11742 r11744 175 175 //var problem = new RoyalPairProblem(); 176 176 //var problem = new EvenParityProblem(); 177 var alg = new MctsSampler(problem, 25, random, 0, new GaussianThompsonSamplingPolicy(true)); 177 var alg = new MctsSampler(problem, 25, random, 0, new GenericThompsonSamplingPolicy(new LogitNormalModel())); 178 //var alg = new TemporalDifferenceTreeSearchSampler(problem, 23, random, 0, new RandomPolicy()); 178 179 //var alg = new ExhaustiveBreadthFirstSearch(problem, 17); 179 180 //var alg = new AlternativesContextSampler(problem, random, 17, 4, (rand, numActions) => new RandomPolicy(rand, numActions)); … … 191 192 iterations++; 192 193 globalStatistics.AddSentence(sentence, quality); 193 if (iterations % 1000 == 0) { 194 if (iterations % 100 == 0) { 195 Console.Clear(); 194 196 alg.PrintStats(); 195 197 }
Note: See TracChangeset
for help on using the changeset viewer.