Context Navigation

← Previous Changeset
Next Changeset →

Changeset 11976

Timestamp:

02/11/15 02:22:18 (10 years ago)

Author:

gkronber

Message:

#2283 worked on seq search for ant

Location:

branches/HeuristicLab.Problems.GrammaticalOptimization

Files:

: 8 edited

HeuristicLab.Algorithms.Bandits/Policies/ThresholdAscentPolicy.cs (modified) (1 diff)
HeuristicLab.Algorithms.GrammaticalOptimization/SequentialDecisionPolicies/GenericFunctionApproximationGrammarPolicy.cs (modified) (6 diffs)
HeuristicLab.Algorithms.GrammaticalOptimization/Solvers/SequentialSearch.cs (modified) (1 diff)
HeuristicLab.Distributions/GaussianModel.cs (modified) (1 diff)
HeuristicLab.Problems.GrammaticalOptimization/Feature.cs (modified) (1 diff)
HeuristicLab.Problems.GrammaticalOptimization/Problems/SantaFeAntProblem.cs (modified) (11 diffs)
HeuristicLab.Problems.GrammaticalOptimization/Problems/SymbolicRegressionPoly10Problem.cs (modified) (1 diff)
Main/Program.cs (modified) (7 diffs)

Legend:

: Unmodified
: Added
: Removed

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/ThresholdAscentPolicy.cs

r11806	r11976
31	31	public double Value {
32	32	get {
33		if (Tries == 0.0) return ~~0.0~~;
	33	if (Tries == 0.0) return double.PositiveInfinity;
34	34	return rewardHistogram[thresholdBin] / (double)Tries;
35	35	}

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/SequentialDecisionPolicies/GenericFunctionApproximationGrammarPolicy.cs

-                      r11974
+                      r11976
   public sealed class GenericFunctionApproximationGrammarPolicy : IGrammarPolicy {
     private Dictionary<string, double> featureWeigths; // stores the necessary information for bandit policies for each state (=canonical phrase)
+    private Dictionary<string, int> featureTries;
     private HashSet<string> done;
     private readonly bool useCanonicalPhrases;
     private readonly IProblem problem;
 …
       this.problem = problem;
       this.featureWeigths = new Dictionary<string, double>();
+      this.featureTries = new Dictionary<string, int>();
       this.done = new HashSet<string>();
+    }
 …
         originalIdx++;
+      }
+      const double beta = 20.0;
+      var w = from q in activeAfterStates
+              select Math.Exp(beta * q);
+      /*
+      const double beta = 1;
+      var w = from idx in Enumerable.Range(0, maxIdx)
+              let afterStateQ = activeAfterStates[idx]
+              select Math.Exp(beta * afterStateQ);
       var bestAction = Enumerable.Range(0, maxIdx).SampleProportional(random, w);
       selectedStateIdx = actionIndexMap[bestAction];
       Debug.Assert(selectedStateIdx >= 0);
+      /*
+      */
       if (random.NextDouble() < 0.2) {
         selectedStateIdx = actionIndexMap[random.Next(maxIdx)];
 …
         selectedStateIdx = actionIndexMap[bestIdxs[random.Next(bestIdxs.Count)]];
+      }
+      */
 …
     public int GetTries(string state) {
+      return 1;
+      return 0;
+    }
+    public int GetFeatureTries(string featureId) {
+      int t;
+      if (featureTries.TryGetValue(featureId, out t)) {
+        return t;
+      } else return 0;
+    }
     public double GetValue(string state) {
       return problem.GetFeatures(state).Sum(feature => GetWeight(feature));
+      return problem.GetFeatures(state).Average(feature => GetWeight(feature));
+    }
 …
       double w;
       if (featureWeigths.TryGetValue(feature.Id, out w)) return w * feature.Value;
       else return 0.0; // TODO: alternatives?
+      else return 0.0;
+    }
     private void UpdateWeights(string state, double reward) {
-      const double alpha = 0.01;
       double delta = reward - GetValue(state);
+      delta /= problem.GetFeatures(state).Count();
+      const double alpha = 0.001;
       foreach (var feature in problem.GetFeatures(state)) {
+        featureTries[feature.Id] = GetFeatureTries(feature.Id) + 1;
+        Debug.Assert(GetFeatureTries(feature.Id) >= 1);
+        //double alpha = 1.0 / GetFeatureTries(feature.Id);
+        //alpha = Math.Max(alpha, 0.01);
         double w;
         if (!featureWeigths.TryGetValue(feature.Id, out w)) {
           featureWeigths[feature.Id] = alpha * delta;
+          featureWeigths[feature.Id] = alpha * delta * feature.Value;
         } else {
           featureWeigths[feature.Id] += alpha * delta;
+          featureWeigths[feature.Id] += alpha * delta * feature.Value;
+        }
+      }

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/Solvers/SequentialSearch.cs

r11850	r11976
166	166	private void DistributeReward(double reward) {
167	167	behaviourPolicy.UpdateReward(stateChain, reward);
168		greedyPolicy.UpdateReward(stateChain, reward);
	168	//greedyPolicy.UpdateReward(stateChain, reward);
169	169	}
170	170

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Distributions/GaussianModel.cs

r11851	r11976
38	38	this.meanPriorMu = meanPriorMu;
39	39	this.meanPriorVariance = meanPriorVariance;
40
	40
41	41	this.knownVariance = false;
42	42	this.precisionPriorAlpha = precisionPriorAlpha;

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/Feature.cs

r11832	r11976
15	15	this.Value = val;
16	16	}
	17
	18	public override string ToString() {
	19	return string.Format("{0} {1:N3}", Id, Value);
	20	}
17	21	}
18	22	}

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/Problems/SantaFeAntProblem.cs

-                      r11974
+                      r11976
+    }
     private void Run(Ant ant, string sentence, ref int p) {
+    private static void Run(Ant ant, string sentence, ref int p, bool stopAfterFirst = false) {
       while (!ant.Done()) {
+        if (p >= sentence.Length) p = 0; // restart
+        if (p >= sentence.Length) {
+          if (stopAfterFirst) return;
+          p = 0; // restart
+        }
         switch (sentence[p]) {
           case 'r': {
 …
               break;
+            }
+          case '.': {
+              // nop
+              p++;
+              ant.Nop();
+              break;
+            }
           case ')': {
               p++; // skip
 …
+    }
     private void Skip(string sentence, ref int p) {
+    private static void Skip(string sentence, ref int p) {
       int openCount = 1;
       while (openCount > 0) {
 …
     public string CanonicalRepresentation(string phrase) {
+      phrase = phrase.Replace("A", ".");
       var sb = new StringBuilder(phrase);
       string canonicalPhrase = phrase;
 …
       do {
         oldPhrase = canonicalPhrase;
+        sb.Replace("ll", "rr").Replace("rl", "lr").Replace("lr", "").Replace("lll", "r").Replace("rrr", "l");
+        sb.Replace("ll", "rr").Replace("rl", "").Replace("lr", "").Replace("lll", "r").Replace("rrr", "l");
+        sb.Replace("?(m)(m)", "?()()m").Replace("?(l)(l)", "?()()l").Replace("?(r)(r)", "?()()r").Replace("?()()", "");
         canonicalPhrase = sb.ToString();
       } while (canonicalPhrase != oldPhrase);
 …
     public IEnumerable<Feature> GetFeatures(string phrase) {
+      yield return new Feature(CanonicalRepresentation(phrase), 1.0);
+      phrase = CanonicalRepresentation(phrase);
+      var isTerminal = grammar.IsTerminal(phrase);
+      //yield return new Feature("const", 0.0);
+      //if (phrase.Length > 0) {
+      //  var ant = new Ant(recordTrail: true);
+      //  int pos = 0;
+      //  Run(ant, phrase, ref pos, true);
+      //  //yield return new Feature("food", ant.FoodEaten);
+      //  yield return new Feature(ant.Trail, 1.0);
+      //}
+      //yield return new Feature(isTerminal + "const", 0.0);
+      //yield return new Feature(isTerminal.ToString() + phrase.Length, 1.0);
+      //int ntIdx;
+      //for (ntIdx = 0; ntIdx < phrase.Length; ntIdx++) if (grammar.IsNonTerminal(phrase[ntIdx])) break;
+      //for (int l = ntIdx-2; l >= 0; l--) {
+      //  yield return new Feature(phrase.Substring(l, ntIdx-l-1), 1.0);
+      //}
+      //
+      ////yield return new Feature("$" + phrase[0], 1.0);
+      // if (!isTerminal) {
+      //   for (int i = 4; i < phrase.Length; i++) {
+      //     if (grammar.IsNonTerminal(phrase[i])) {
+      //       yield return new Feature(phrase[i - 4].ToString() + phrase[i - 3].ToString() + phrase[i - 2] + phrase[i - 1], 0.1);
+      //       break;
+      //     }
+      //   }
+      // }
+      // var d = 0;
+      // var ls = 0;
+      // var rs = 0;
+      // var qs = 0;
+      // foreach (var ch in phrase) if (ch == 'l') { d--; ls++; } else if (ch == 'r') { d++; rs++; } else if (ch == '?') qs++;
+      // yield return new Feature(isTerminal + "D" + Math.Abs(d), 1.0);
+      // yield return new Feature(isTerminal + "R" + rs, 1.0);
+      // yield return new Feature(isTerminal + "L" + ls, 1.0);
+      // yield return new Feature(isTerminal + "?" + qs, 1.0);
+      yield return new Feature(phrase, 1.0);
+      //for (int i = 0; i < phrase.Length; i++)
+      //  yield return new Feature(i.ToString() + phrase[i].ToString(), 1.0);
       // yield return new Feature("Length", phrase.Length); //
       // foreach (var pair in phrase.Zip(phrase.Skip(1), Tuple.Create)) {
 …
   public class Ant {
+    private const int maxSteps = 600;
+    private int maxSteps = 600;
+    public int MaxSteps { get { return maxSteps; } set { maxSteps = value; } }
     public enum HeadingEnum { North, East, South, West };
     public int FoodEaten { get; private set; }
 …
     private int steps;
     private HeadingEnum heading;
+    public Ant() {
+    public int PosX { get { return posX; } }
+    public int PosY { get { return posY; } }
+    public HeadingEnum Heading { get { return heading; } }
+    private bool recordTrail = false;
+    private StringBuilder trailBuilder;
+    public string Trail {
+      get {
+        if (!recordTrail) throw new NotSupportedException();
+        else return trailBuilder.ToString() + heading; // add final heading as state
+      }
+    }
+    public Ant(bool recordTrail = false) {
       world[00] = " ###                            ".ToCharArray();
       world[01] = "   #                            ".ToCharArray();
 …
       FoodEaten = 0;
       steps = 0;
+      this.recordTrail = recordTrail;
+      if (this.recordTrail) trailBuilder = new StringBuilder();
+    }
 …
           world[posY][posX] = '.';
+        }
+        if (recordTrail) trailBuilder.Append("m" + posX + "x" + posY); // record position change
+      }
+    }
+    public void Nop() {
+      // wait one time step
+      if (steps <= maxSteps) {
+        steps++;
+      }
+    }
 …
       int nextPosY = posY;
       MoveInternal(ref nextPosX, ref nextPosY);
+      if (recordTrail) trailBuilder.Append("?" + nextPosX + "x" + nextPosY); // record check
       return world[nextPosY][nextPosX] == '#';
+    }

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/Problems/SymbolicRegressionPoly10Problem.cs

r11857	r11976
153	153	canonicalTerms.Add(CanonicalTerm(t));
154	154	}
155		return canonicalTerms.Select(entry => new Feature(entry, 1.0)).Concat(new Feature[] { new Feature(CanonicalRepresentation(phrase), 1.0) });
	155	return canonicalTerms.Select(entry => new Feature(entry, 1.0))
	156	.Concat(new Feature[] { new Feature(CanonicalRepresentation(phrase), 1.0) });
156	157	}
157	158

branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Program.cs

-                      r11974
+                      r11976
       //RunDemo();
       //RunGpDemo();
       //RunGridTest();
+      // RunGridTest();
       //RunGpGridTest();
      RunFunApproxTest();
 …
          //() => new BoltzmannExplorationPolicy(200),
          //() => new BoltzmannExplorationPolicy(500),
          () => new ChernoffIntervalEstimationPolicy( 0.01),
          () => new ChernoffIntervalEstimationPolicy( 0.05),
          () => new ChernoffIntervalEstimationPolicy( 0.1),
          () => new ChernoffIntervalEstimationPolicy( 0.2),
+         // () => new ChernoffIntervalEstimationPolicy( 0.01),
+         // () => new ChernoffIntervalEstimationPolicy( 0.05),
+         // () => new ChernoffIntervalEstimationPolicy( 0.1),
+         // () => new ChernoffIntervalEstimationPolicy( 0.2),
          //() => new ThresholdAscentPolicy(5, 0.01),
          //() => new ThresholdAscentPolicy(5, 0.05),
 …
          //() => new ThresholdAscentPolicy(50, 0.2),
          //() => new ThresholdAscentPolicy(100, 0.01),
          //() => new ThresholdAscentPolicy(100, 0.05),
+         () => new ThresholdAscentPolicy(100, 0.05),
          //() => new ThresholdAscentPolicy(100, 0.1),
          //() => new ThresholdAscentPolicy(100, 0.2),
 …
       var instanceFactories = new Func<Random, Tuple<IProblem, int>>[]
+      {
         (rand) => Tuple.Create((IProblem)new SantaFeAntProblem(), 17),
+        //(rand) => Tuple.Create((IProblem)new SantaFeAntProblem(), 17),
         //(rand) => Tuple.Create((IProblem)new FindPhrasesProblem(rand, 10, numPhrases:5, phraseLen:3, numOptimalPhrases:5, numDecoyPhrases:0, correctReward:1, decoyReward:0, phrasesAsSets:false ), 15),
         //(rand) => Tuple.Create((IProblem)new FindPhrasesProblem(rand, 10, numPhrases:5, phraseLen:3, numOptimalPhrases:5, numDecoyPhrases:0, correctReward:1, decoyReward:0, phrasesAsSets:true ), 15),
 …
       foreach (var instanceFactory in instanceFactories) {
         foreach (var useCanonical in new bool[] { true, false }) {
           foreach (var randomTries in new int[] { 0, 1, 10 /*, /* 5, 100 /*, 500, 1000 */}) {
+        foreach (var useCanonical in new bool[] { true /*, false */ }) {
+          foreach (var randomTries in new int[] { 1 /*, 1, 10 /*, /* 5, 100 /*, 500, 1000 */}) {
             foreach (var policyFactory in policyFactories) {
               var myRandomTries = randomTries;
 …
       const int seed = 31415;
       //const int maxIters = 50000;
       var rand = new Random(seed);
+      var rand = new Random();
       var problemFactories = new Func<Tuple<int, int, ISymbolicExpressionTreeProblem>>[]
+      {
 …
               iterations++;
               globalStatistics.AddSentence(sentence, quality);
+              if (iterations % 1000 == 0) {
+              //if (iterations % 100 == 0) {
+              //  Console.Clear();
+              //  Console.SetCursorPosition(0, 0);
+              //  alg.PrintStats();
+              //}
+              //Console.WriteLine("{0:N5} {1}", quality, sentence);
+              if (iterations % 200 == 0) {
                 Console.WriteLine("\"{0,25}\" {1} \"{2,25}\" {3}", algName, maxSize, probName, globalStatistics);
+              }

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 11976

Legend:

Download in other formats: