Context Navigation

← Previous Change
Next Change →

Changeset 11974 for branches/HeuristicLab.Problems.GrammaticalOptimization

Timestamp:

02/10/15 02:05:31 (10 years ago)

Author:

gkronber

Message:

#2283: eurocast experiments

Location:

branches/HeuristicLab.Problems.GrammaticalOptimization

Files:

: 6 edited

HeuristicLab.Algorithms.Bandits/Policies/GaussianThompsonSamplingPolicy.cs (modified) (1 diff)
HeuristicLab.Algorithms.Bandits/Policies/GenericThompsonSamplingPolicy.cs (modified) (1 diff)
HeuristicLab.Algorithms.GrammaticalOptimization/SequentialDecisionPolicies/GenericFunctionApproximationGrammarPolicy.cs (modified) (4 diffs)
HeuristicLab.Common/Extensions.cs (modified) (1 diff)
HeuristicLab.Problems.GrammaticalOptimization/Problems/SantaFeAntProblem.cs (modified) (1 diff)
Main/Program.cs (modified) (4 diffs)

Legend:

: Unmodified
: Added
: Removed

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/GaussianThompsonSamplingPolicy.cs

r11832	r11974
57	57	}
58	58
	59	// very unlikely to be the same (and we don't care)
59	60	if (theta > bestQ) {
60	61	bestQ = theta;

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/GenericThompsonSamplingPolicy.cs

r11806	r11974
23	23	aIdx++;
24	24	var q = aInfo.SampleExpectedReward(random);
	25	// very unlikely to be equal and we don't care
25	26	if (q > bestQ) {
26	27	bestQ = q;

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/SequentialDecisionPolicies/GenericFunctionApproximationGrammarPolicy.cs

-                      r11832
+                      r11974
         originalIdx++;
+      }
+      const double beta = 20.0;
+      var w = from q in activeAfterStates
+              select Math.Exp(beta * q);
+      var bestAction = Enumerable.Range(0, maxIdx).SampleProportional(random, w);
+      selectedStateIdx = actionIndexMap[bestAction];
+      Debug.Assert(selectedStateIdx >= 0);
+      /*
       if (random.NextDouble() < 0.2) {
         selectedStateIdx = actionIndexMap[random.Next(maxIdx)];
 …
         // find max
         var bestQ = double.NegativeInfinity;
         var bestIdx = -1;
+        var bestIdxs = new List<int>();
         for (int i = 0; i < maxIdx; i++) {
           if (activeAfterStates[i] > bestQ) {
+            bestIdxs.Clear();
+            bestIdxs.Add(i);
             bestQ = activeAfterStates[i];
+            bestIdx = i;
+          } else if (activeAfterStates[i].IsAlmost(bestQ)) {
+            bestIdxs.Add(i);
+          }
+        }
         selectedStateIdx = actionIndexMap[bestIdx];
+        selectedStateIdx = actionIndexMap[bestIdxs[random.Next(bestIdxs.Count)]];
+      }
+      */
       return true;
 …
     public double GetValue(string state) {
       return problem.GetFeatures(state).Sum(feature => GetWeight(feature)) ;
+      return problem.GetFeatures(state).Sum(feature => GetWeight(feature));
+    }
 …
       double w;
       if (featureWeigths.TryGetValue(feature.Id, out w)) return w * feature.Value;
       else return 0.0;
+      else return 0.0; // TODO: alternatives?
+    }
     private void UpdateWeights(string state, double reward) {

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Common/Extensions.cs

-                      r11846
+                      r11974
 namespace HeuristicLab.Common {
   public static class Extensions {
     public static bool IsAlmost(this double x, double y) {
+      if (double.IsNaN(x) || double.IsNaN(y)) return false;
+      if (double.IsPositiveInfinity(x) && double.IsPositiveInfinity(y)) return true;
+      if (double.IsNegativeInfinity(x) && double.IsNegativeInfinity(y)) return true;
       return Math.Abs(x - y) < 1.0e-12;
+    }

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/Problems/SantaFeAntProblem.cs

-                      r11857
+                      r11974
     public IEnumerable<Feature> GetFeatures(string phrase) {
+      return Enumerable.Repeat(new Feature(CanonicalRepresentation(phrase), 1.0), 1);
+      yield return new Feature(CanonicalRepresentation(phrase), 1.0);
+      // yield return new Feature("Length", phrase.Length); //
+      // foreach (var pair in phrase.Zip(phrase.Skip(1), Tuple.Create)) {
+      //   yield return new Feature(pair.Item1.ToString() + pair.Item2, 1.0);
+      // }
+      // number of occurances for each symbol
+    }

branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Program.cs

-                      r11973
+                      r11974
       //RunGridTest();
       //RunGpGridTest();
       RunFunApproxTest();
+     RunFunApproxTest();
+    }
 …
       int maxIterations = 200000; // for poly-10 with 50000 evaluations no successful try with hl yet
       //var globalRandom = new Random(31415);
       var localRandSeed = 31415;
+      var localRandSeed = new Random().Next();
       var reps = 20;
       var policyFactories = new Func<IBanditPolicy>[]
+        {
          () => new RandomPolicy(),
           () => new ActiveLearningPolicy(),
          () => new EpsGreedyPolicy(0.01, (aInfo)=> aInfo.MaxReward, "max"),
          () => new EpsGreedyPolicy(0.05, (aInfo)=> aInfo.MaxReward, "max"),
          () => new EpsGreedyPolicy(0.1, (aInfo)=> aInfo.MaxReward, "max"),
          () => new EpsGreedyPolicy(0.2, (aInfo)=> aInfo.MaxReward, "max"),
          //() => new GaussianThompsonSamplingPolicy(),
          () => new GaussianThompsonSamplingPolicy(true),
          () => new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 10, 1)),
          () => new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 10, 1, 1)),
          //() => new BernoulliThompsonSamplingPolicy(),
          () => new GenericThompsonSamplingPolicy(new BernoulliModel(1, 1)),
          () => new EpsGreedyPolicy(0.01),
          () => new EpsGreedyPolicy(0.05),
          () => new EpsGreedyPolicy(0.1),
          () => new EpsGreedyPolicy(0.2),
          () => new EpsGreedyPolicy(0.5),
          () => new UCTPolicy(0.01),
          () => new UCTPolicy(0.05),
          () => new UCTPolicy(0.1),
          () => new UCTPolicy(0.5),
          () => new UCTPolicy(1),
          () => new UCTPolicy(2),
          () => new UCTPolicy( 5),
          () => new UCTPolicy( 10),
          () => new ModifiedUCTPolicy(0.01),
          () => new ModifiedUCTPolicy(0.05),
          () => new ModifiedUCTPolicy(0.1),
          () => new ModifiedUCTPolicy(0.5),
          () => new ModifiedUCTPolicy(1),
          () => new ModifiedUCTPolicy(2),
          () => new ModifiedUCTPolicy( 5),
          () => new ModifiedUCTPolicy( 10),
          () => new UCB1Policy(),
          () => new UCB1TunedPolicy(),
          () => new UCBNormalPolicy(),
          () => new BoltzmannExplorationPolicy(1),
          () => new BoltzmannExplorationPolicy(10),
          () => new BoltzmannExplorationPolicy(20),
          () => new BoltzmannExplorationPolicy(100),
          () => new BoltzmannExplorationPolicy(200),
          () => new BoltzmannExplorationPolicy(500),
+         //() => new RandomPolicy(),
+         // () => new ActiveLearningPolicy(),
+         //() => new EpsGreedyPolicy(0.01, (aInfo)=> aInfo.MaxReward, "max"),
+         //() => new EpsGreedyPolicy(0.05, (aInfo)=> aInfo.MaxReward, "max"),
+         //() => new EpsGreedyPolicy(0.1, (aInfo)=> aInfo.MaxReward, "max"),
+         //() => new EpsGreedyPolicy(0.2, (aInfo)=> aInfo.MaxReward, "max"),
+         ////() => new GaussianThompsonSamplingPolicy(),
+         //() => new GaussianThompsonSamplingPolicy(true),
+         //() => new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 10, 1)),
+         //() => new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 10, 1, 1)),
+         ////() => new BernoulliThompsonSamplingPolicy(),
+         //() => new GenericThompsonSamplingPolicy(new BernoulliModel(1, 1)),
+         //() => new EpsGreedyPolicy(0.01),
+         //() => new EpsGreedyPolicy(0.05),
+         //() => new EpsGreedyPolicy(0.1),
+         //() => new EpsGreedyPolicy(0.2),
+         //() => new EpsGreedyPolicy(0.5),
+         //() => new UCTPolicy(0.01),
+         //() => new UCTPolicy(0.05),
+         //() => new UCTPolicy(0.1),
+         //() => new UCTPolicy(0.5),
+         //() => new UCTPolicy(1),
+         //() => new UCTPolicy(2),
+         //() => new UCTPolicy( 5),
+         //() => new UCTPolicy( 10),
+         //() => new ModifiedUCTPolicy(0.01),
+         //() => new ModifiedUCTPolicy(0.05),
+         //() => new ModifiedUCTPolicy(0.1),
+         //() => new ModifiedUCTPolicy(0.5),
+         //() => new ModifiedUCTPolicy(1),
+         //() => new ModifiedUCTPolicy(2),
+         //() => new ModifiedUCTPolicy( 5),
+         //() => new ModifiedUCTPolicy( 10),
+         //() => new UCB1Policy(),
+         //() => new UCB1TunedPolicy(),
+         //() => new UCBNormalPolicy(),
+         //() => new BoltzmannExplorationPolicy(1),
+         //() => new BoltzmannExplorationPolicy(10),
+         //() => new BoltzmannExplorationPolicy(20),
+         //() => new BoltzmannExplorationPolicy(100),
+         //() => new BoltzmannExplorationPolicy(200),
+         //() => new BoltzmannExplorationPolicy(500),
          () => new ChernoffIntervalEstimationPolicy( 0.01),
          () => new ChernoffIntervalEstimationPolicy( 0.05),
          () => new ChernoffIntervalEstimationPolicy( 0.1),
          () => new ChernoffIntervalEstimationPolicy( 0.2),
          () => new ThresholdAscentPolicy(5, 0.01),
          () => new ThresholdAscentPolicy(5, 0.05),
          () => new ThresholdAscentPolicy(5, 0.1),
          () => new ThresholdAscentPolicy(5, 0.2),
          () => new ThresholdAscentPolicy(10, 0.01),
          () => new ThresholdAscentPolicy(10, 0.05),
          () => new ThresholdAscentPolicy(10, 0.1),
          () => new ThresholdAscentPolicy(10, 0.2),
          () => new ThresholdAscentPolicy(50, 0.01),
          () => new ThresholdAscentPolicy(50, 0.05),
          () => new ThresholdAscentPolicy(50, 0.1),
          () => new ThresholdAscentPolicy(50, 0.2),
          () => new ThresholdAscentPolicy(100, 0.01),
          () => new ThresholdAscentPolicy(100, 0.05),
          () => new ThresholdAscentPolicy(100, 0.1),
          () => new ThresholdAscentPolicy(100, 0.2),
          () => new ThresholdAscentPolicy(500, 0.01),
          () => new ThresholdAscentPolicy(500, 0.05),
          () => new ThresholdAscentPolicy(500, 0.1),
          () => new ThresholdAscentPolicy(500, 0.2),
+         //() => new ThresholdAscentPolicy(5, 0.01),
+         //() => new ThresholdAscentPolicy(5, 0.05),
+         //() => new ThresholdAscentPolicy(5, 0.1),
+         //() => new ThresholdAscentPolicy(5, 0.2),
+         //() => new ThresholdAscentPolicy(10, 0.01),
+         //() => new ThresholdAscentPolicy(10, 0.05),
+         //() => new ThresholdAscentPolicy(10, 0.1),
+         //() => new ThresholdAscentPolicy(10, 0.2),
+         //() => new ThresholdAscentPolicy(50, 0.01),
+         //() => new ThresholdAscentPolicy(50, 0.05),
+         //() => new ThresholdAscentPolicy(50, 0.1),
+         //() => new ThresholdAscentPolicy(50, 0.2),
+         //() => new ThresholdAscentPolicy(100, 0.01),
+         //() => new ThresholdAscentPolicy(100, 0.05),
+         //() => new ThresholdAscentPolicy(100, 0.1),
+         //() => new ThresholdAscentPolicy(100, 0.2),
+         //() => new ThresholdAscentPolicy(500, 0.01),
+         //() => new ThresholdAscentPolicy(500, 0.05),
+         //() => new ThresholdAscentPolicy(500, 0.1),
+         //() => new ThresholdAscentPolicy(500, 0.2),
          //() => new ThresholdAscentPolicy(5000, 0.01),
          //() => new ThresholdAscentPolicy(10000, 0.01),
 …
               var localRand = new Random(localRandSeed);
               var options = new ParallelOptions();
               options.MaxDegreeOfParallelism = 4;
+              options.MaxDegreeOfParallelism = 1;
               Parallel.For(0, reps, options, (i) => {
                 Random myLocalRand;
 …
       var problemFactories = new Func<Tuple<int, int, ISymbolicExpressionTreeProblem>>[]
+      {
         () => Tuple.Create(100000, 23,  (ISymbolicExpressionTreeProblem)new SymbolicRegressionPoly10Problem()),
         //() => Tuple.Create(100000, 17, (ISymbolicExpressionTreeProblem)new SantaFeAntProblem()),
+        //() => Tuple.Create(100000, 23,  (ISymbolicExpressionTreeProblem)new SymbolicRegressionPoly10Problem()),
+        () => Tuple.Create(100000, 17, (ISymbolicExpressionTreeProblem)new SantaFeAntProblem()),
         //() => Tuple.Create(50000, 32,(ISymbolicExpressionTreeProblem)new RoyalSymbolProblem()),
         //() => Tuple.Create(50000, 64, (ISymbolicExpressionTreeProblem)new RoyalPairProblem()),

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 11974 for branches/HeuristicLab.Problems.GrammaticalOptimization

Legend:

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/GaussianThompsonSamplingPolicy.cs

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/GenericThompsonSamplingPolicy.cs

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/SequentialDecisionPolicies/GenericFunctionApproximationGrammarPolicy.cs

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Common/Extensions.cs

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/Problems/SantaFeAntProblem.cs

branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Program.cs

Download in other formats: